9#ifndef INCLUDED_SDSL_WT_AP
10#define INCLUDED_SDSL_WT_AP
57template <
class t_wt_
byte = wt_huff<bit_vector, rank_support_v5<>>,
class t_wt_
int = wm_
int<>>
60 static_assert(std::is_same<typename index_tag<t_wt_byte>::type,
wt_tag>::value,
61 "First template argument has to be a wavelet tree.");
62 static_assert(std::is_same<typename index_tag<t_wt_int>::type,
wt_tag>::value,
63 "Second template argument has to be a wavelet tree.");
91 inline std::tuple<bool, value_type, value_type> try_get_char_class_offset(
value_type c)
const
95 return std::make_tuple(
false, 0, 0);
100 return std::make_tuple(
false, 0, 0);
102 return std::make_tuple(
true, offset_class.second, offset_class.first);
118 template <
typename t_it>
121 const uint8_t wt_byte_width = wt_byte_type::alphabet_category::WIDTH;
122 const uint8_t wt_int_width = wt_int_type::alphabet_category::WIDTH;
126 std::vector<std::pair<size_type, value_type>> char_freq;
130 for (
auto it =
begin; it !=
end; ++it)
133 while (element >= max_symbol)
135 char_freq.emplace_back(0, max_symbol);
139 if (char_freq[element].first == 0)
143 char_freq[element].first++;
145 std::sort(char_freq.rbegin(), char_freq.rend());
146 m_sigma = max_symbol - pseudo_entries;
152 std::vector<std::pair<std::string, int_vector_buffer<wt_int_width>>> temp_file_offset_buffers;
158 m_char2class_buffer[char_freq[i].second] = i;
168 for (; offset < class_size && current_symbol <
m_sigma; ++offset, ++current_symbol)
170 m_char2class_buffer[char_freq[current_symbol].second] = i;
175 temp_file_offset_buffers.emplace_back(temp_file_offset,
186 std::string temp_file_class =
194 for (
auto it =
begin; it !=
end; ++it)
203 temp_file_offset_buffers[cl].second.push_back(offset);
206 class_buffer.
close();
220 auto & temp_file_offset_buffer = temp_file_offset_buffers[i];
221 temp_file_offset_buffer.second.close();
245 *
this = std::move(wt);
254 *
this = std::move(tmp);
269 m_class = std::move(wt.m_class);
300 auto textoffset_class =
m_class.inverse_select(i);
301 auto cl = textoffset_class.second;
322 auto success_class_offset = try_get_char_class_offset(c);
323 if (!std::get<0>(success_class_offset))
327 auto cl = std::get<1>(success_class_offset);
328 auto offset = std::get<2>(success_class_offset);
344 auto textoffset_class =
m_class.inverse_select(i);
345 auto textoffset = textoffset_class.first;
346 auto cl = textoffset_class.second;
349 return std::make_pair(textoffset,
m_char2class.select(1, cl));
352 return std::make_pair(class_result.first,
m_char2class.select(class_result.second + 1, cl));
368 assert(1 <= i and i <=
rank(
size(), c));
369 auto success_class_offset = try_get_char_class_offset(c);
370 if (!std::get<0>(success_class_offset))
374 auto cl = std::get<1>(success_class_offset);
375 auto offset = std::get<2>(success_class_offset);
378 return m_class.select(text_offset, cl);
390 written_bytes +=
m_char2class.serialize(out, child,
"char2class");
391 written_bytes +=
m_class.serialize(out, child,
"class");
394 written_bytes +=
m_offset[i].serialize(out, child,
"offset");
397 return written_bytes;
418 template <
typename archive_t>
431 template <
typename archive_t>
449 return {
this,
size()};
457 return {
this,
size()};
463 return (
m_size == other.m_size) && (
m_sigma == other.m_sigma)
471 return !(*
this == other);
bits.hpp contains the sdsl::bits class.
cereal.hpp offers cereal support
void close(bool remove_file=false)
Close the int_vector_buffer.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
int_vector_size_type size_type
ptrdiff_t difference_type
int_vector_trait< t_width >::value_type value_type
static mm_event_proxy event(std::string const &name)
Generic iterator for a random access container.
static structure_tree_node * add_child(structure_tree_node *v, std::string const &name, std::string const &type)
static void add_size(structure_tree_node *v, uint64_t value)
std::vector< wt_int_type > m_offset
const_iterator end() const
bool operator!=(wt_ap const &other) const noexcept
Inequality operator.
wt_ap(wt_ap const &wt)
Copy constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
bool operator==(wt_ap const &other) const noexcept
Equality operator.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
wt_ap & operator=(wt_ap &&wt)
Assignment move operator.
wt_ap(wt_ap &&wt)
Move copy constructor.
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
value_type m_singleton_class_cnt
wt_byte_type m_char2class
wt_ap(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
bool empty() const
Returns whether the wavelet tree contains no data.
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
void load(std::istream &in)
Loads the data structure from the given istream.
int_vector ::difference_type difference_type
int_vector ::size_type size_type
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
wt_ap & operator=(wt_ap const &wt)
Assignment operator.
int_alphabet_tag alphabet_category
wt_ap()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
int_vector ::value_type value_type
random_access_const_iterator< wt_ap > const_iterator
size_type size() const
Returns the size of the original vector.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
int_vector.hpp contains the sdsl::int_vector class.
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
memory_tracking.hpp contains two function for allocating and deallocating memory
std::string to_string(T const &t, int w=1)
Namespace for the succinct data structure library.
int remove(std::string const &)
Remove a file.
std::string ram_file_name(std::string const &file)
Returns the corresponding RAM-file name for file.
size_t write_member(T const &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
uint64_t count(t_k2_treap const &treap, k2_treap_ns::point_type p1, k2_treap_ns::point_type p2)
Count how many points are in the rectangle (p1,p2)
void read_member(T &t, std::istream &in)
void construct_im(t_index &idx, t_data &&data, uint8_t num_bytes=0)
rank_support_v5.hpp contains rank_support_v5.5
Contains declarations and definitions of data structure concepts.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
structure_tree.hpp contains a helper class which can represent the memory structure of a class.
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wm_int.hpp contains a specialized class for a wavelet tree for sequences over large alphabets.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.