9 #ifndef INCLUDED_SDSL_WT_PC
10 #define INCLUDED_SDSL_WT_PC
37 template <
class t_shape,
39 class t_rank =
typename t_bitvector::rank_1_type,
40 class t_select =
typename t_bitvector::select_1_type,
41 class t_select_zero =
typename t_bitvector::select_0_type,
42 class t_tree_strat = byte_tree<>>
58 typedef typename t_shape::template type<wt_pc>
shape_type;
63 using node_type =
typename tree_strat_type::node_type;
83 uint64_t p = m_tree.bit_path(old_chr);
84 uint32_t path_len = p >> 56;
86 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
88 if (p & 1) {
bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times); }
89 bv_node_pos[v] += times;
90 v = m_tree.child(v, p & 1);
95 size_type construct_tree_shape(
const std::vector<size_type> & C)
98 std::vector<pc_node> temp_nodes;
99 shape_type::construct_tree(C, temp_nodes);
107 void construct_init_rank_select()
109 util::init_support(m_bv_rank, &m_bv);
110 util::init_support(m_bv_select0, &m_bv);
111 util::init_support(m_bv_select1, &m_bv);
118 std::vector<value_type> & cs,
119 std::vector<size_type> & rank_c_i,
120 std::vector<size_type> & rank_c_j,
124 size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
125 size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
132 if (!m_tree.is_leaf(v_new)) { _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new); }
137 cs[k++] = m_tree.bv_pos_rank(v_new);
144 if (!m_tree.is_leaf(v_new)) { _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new); }
149 cs[k++] = m_tree.bv_pos_rank(v_new);
168 template <
typename t_it>
172 if (0 == m_size)
return;
177 std::vector<size_type> C;
183 size_type tree_size = construct_tree_shape(C);
188 std::vector<uint64_t> bv_node_pos(m_tree.size(), 0);
189 for (
size_type v = 0; v < m_tree.size(); ++v) { bv_node_pos[v] = m_tree.bv_pos(v); }
192 for (
auto it =
begin; it !=
end; ++it)
197 insert_char(old_chr, bv_node_pos, times, temp_bv);
206 insert_char(old_chr, bv_node_pos, times, temp_bv);
211 if (times > 0) { insert_char(old_chr, bv_node_pos, times, temp_bv); }
214 construct_init_rank_select();
216 m_tree.init_node_ranks(m_bv_rank);
219 template <
typename t_it>
227 , m_sigma(wt.m_sigma)
229 , m_bv_rank(wt.m_bv_rank)
230 , m_bv_select1(wt.m_bv_select1)
231 , m_bv_select0(wt.m_bv_select0)
234 m_bv_rank.set_vector(&m_bv);
235 m_bv_select1.set_vector(&m_bv);
236 m_bv_select0.set_vector(&m_bv);
241 , m_sigma(wt.m_sigma)
242 , m_bv(std::move(wt.m_bv))
243 , m_bv_rank(std::move(wt.m_bv_rank))
244 , m_bv_select1(std::move(wt.m_bv_select1))
245 , m_bv_select0(std::move(wt.m_bv_select0))
246 , m_tree(std::move(wt.m_tree))
248 m_bv_rank.set_vector(&m_bv);
249 m_bv_select1.set_vector(&m_bv);
250 m_bv_select0.set_vector(&m_bv);
259 *
this = std::move(tmp);
270 m_sigma = wt.m_sigma;
271 m_bv = std::move(wt.m_bv);
272 m_bv_rank = std::move(wt.m_bv_rank);
273 m_bv_rank.set_vector(&m_bv);
274 m_bv_select1 = std::move(wt.m_bv_select1);
275 m_bv_select1.set_vector(&m_bv);
276 m_bv_select0 = std::move(wt.m_bv_select0);
277 m_bv_select0.set_vector(&m_bv);
278 m_tree = std::move(wt.m_tree);
287 bool empty()
const {
return m_size == 0; }
306 while (!m_tree.is_leaf(v))
308 if (m_bv[m_tree.bv_pos(v) + i])
310 i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v);
311 v = m_tree.child(v, 1);
315 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
316 v = m_tree.child(v, 0);
320 return m_tree.bv_pos_rank(v);
338 if (!m_tree.is_valid(m_tree.c_to_leaf(c)))
346 uint64_t p = m_tree.bit_path(c);
347 uint32_t path_len = (p >> 56);
350 for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1)
352 if (p & 1) { result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v)); }
355 result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
357 v = m_tree.child(v, p & 1);
376 while (!m_tree.is_leaf(v))
378 if (m_bv[m_tree.bv_pos(v) + i])
380 i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
381 v = m_tree.child(v, 1);
385 i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
386 v = m_tree.child(v, 0);
390 return std::make_pair(i, (
value_type)m_tree.bv_pos_rank(v));
406 assert(1 <= i and i <=
rank(
size(), c));
408 if (!m_tree.is_valid(v))
412 if (m_sigma == 1) {
return std::min(i - 1, m_size); }
414 uint64_t p = m_tree.bit_path(c);
415 uint32_t path_len = (p >> 56);
417 p <<= (64 - path_len);
418 for (uint32_t l = 0; l < path_len; ++l, p <<= 1)
420 if ((p & 0x8000000000000000ULL) == 0)
422 v = m_tree.parent(v);
423 result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
427 v = m_tree.parent(v);
428 result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
459 std::vector<value_type> & cs,
460 std::vector<size_type> & rank_c_i,
461 std::vector<size_type> & rank_c_j)
const
463 assert(i <= j and j <=
size());
464 if (i == j) { k = 0; }
465 else if (1 == m_sigma)
468 cs[0] = m_tree.bv_pos_rank(m_tree.root());
469 rank_c_i[0] = std::min(i, m_size);
470 rank_c_j[0] = std::min(j, m_size);
472 else if ((j - i) == 1)
476 rank_c_i[0] = rc.first;
478 rank_c_j[0] = rank_c_i[0] + 1;
480 else if ((j - i) == 2)
483 rank_c_i[0] = rc.first;
486 rank_c_i[1] = rc.first;
492 rank_c_j[0] = rank_c_i[0] + 2;
502 rank_c_j[0] = rank_c_i[0] + 1;
503 rank_c_j[1] = rank_c_i[1] + 1;
509 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
528 template <
class t_ret_type = std::tuple<
size_type,
size_type,
size_type>>
533 assert(i <= j and j <=
size());
536 value_type _c = m_tree.bv_pos_rank(m_tree.root());
539 return t_ret_type{ i, 0, 0 };
543 return t_ret_type{ 0, 0, j - i };
547 return t_ret_type{ 0, j - i, 0 };
550 if (i == j) {
return t_ret_type{
rank(i, c), 0, 0 }; }
551 uint64_t p = m_tree.bit_path(c);
552 uint32_t path_len = p >> 56;
558 return t_ret_type{ 0, 0, j - i };
561 return t_ret_type{ 0, j - i - std::get<2>(res), std::get<2>(res) };
565 for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
567 size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
568 size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
572 smaller += j - r1_2 - i + r1_1;
578 greater += r1_2 - r1_1;
582 v = m_tree.child(v, p & 1);
584 return t_ret_type{ i, smaller, greater };
599 template <
class t_ret_type = std::tuple<
size_type,
size_type>>
606 value_type _c = m_tree.bv_pos_rank(m_tree.root());
609 return t_ret_type{ i, 0 };
613 return t_ret_type{ 0, 0 };
617 return t_ret_type{ 0, i };
621 uint64_t p = m_tree.bit_path(c);
622 uint32_t path_len = p >> 56;
628 return t_ret_type{ 0, 0 };
631 return t_ret_type{ 0, std::get<0>(res) + std::get<1>(res) };
636 for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1)
638 size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v));
641 result += all - ones;
648 v = m_tree.child(v, p & 1);
650 return t_ret_type{ all, result };
664 written_bytes +=
write_member(m_size, out, child,
"size");
665 written_bytes +=
write_member(m_sigma, out, child,
"sigma");
666 written_bytes += m_bv.serialize(out, child,
"bv");
667 written_bytes += m_bv_rank.serialize(out, child,
"bv_rank");
668 written_bytes += m_bv_select1.serialize(out, child,
"bv_select_1");
669 written_bytes += m_bv_select0.serialize(out, child,
"bv_select_0");
670 written_bytes += m_tree.serialize(out, child,
"tree");
672 return written_bytes;
681 m_bv_rank.load(in, &m_bv);
682 m_bv_select1.load(in, &m_bv);
683 m_bv_select0.load(in, &m_bv);
690 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv) &&
691 (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1) &&
692 (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree);
698 template <
typename archive_t>
710 template <
typename archive_t>
717 m_bv_rank.set_vector(&m_bv);
719 m_bv_select1.set_vector(&m_bv);
721 m_bv_select0.set_vector(&m_bv);
741 bool bit = *(
begin(vv) + i);
742 i = std::get<1>(rs[bit]);
768 auto parent = m_tree.parent(v);
770 if (m_tree.child(parent, 0) == v)
771 return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1;
773 return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1;
778 return m_tree.size(v);
792 return { { m_tree.child(v, 0), m_tree.child(v, 1) } };
807 auto ranges_copy = ranges;
808 return expand(v, std::move(ranges_copy));
823 auto v_sp_rank = m_tree.bv_pos_rank(v);
826 for (
auto & r : ranges)
828 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
829 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
830 auto left_size = (r[1] - r[0] + 1) - right_size;
832 auto right_sp = sp_rank - v_sp_rank;
833 auto left_sp = r[0] - right_sp;
835 r = { { left_sp, left_sp + left_size - 1 } };
836 res[i++] = { { right_sp, right_sp + right_size - 1 } };
838 return { { ranges, std::move(res) } };
853 auto v_sp_rank = m_tree.bv_pos_rank(v);
854 auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
855 auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
856 auto left_size = (r[1] - r[0] + 1) - right_size;
858 auto right_sp = sp_rank - v_sp_rank;
859 auto left_sp = r[0] - right_sp;
861 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
867 uint64_t
path = m_tree.bit_path(c);
868 uint64_t path_len =
path >> 56;
872 return { path_len,
path };
893 auto begin(
const node_type & v)
const -> decltype(m_bv.begin() + m_tree.bv_pos(v))
895 return m_bv.begin() + m_tree.bv_pos(v);
899 auto end(
const node_type & v)
const -> decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v))
901 return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v);
bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
A generic vector class for integers of width .
Generic iterator for a random access container.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A prefix code-shaped wavelet.
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
value_type sym(const node_type &v) const
Symbol for a leaf.
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
wt_pc(const wt_pc &wt)
Copy constructor.
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
t_bitvector::difference_type difference_type
std::pair< bool, value_type > symbol_gte(value_type c) const
Returns for a symbol c the next larger or equal symbol in the WT.
void load(std::istream &in)
Loads the data structure from the given istream.
int_vector ::size_type size_type
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
tree_strat_type::alphabet_category alphabet_category
t_bitvector bit_vector_type
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
wt_pc(t_it begin, t_it end, std::string)
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the original vector.
typename tree_strat_type::node_type node_type
node_type root() const
Returns the root node.
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
bool operator==(wt_pc const &other) const noexcept
Equality operator.
const_iterator end() const
Returns a const_iterator to the element after the last element.
wt_pc(t_it begin, t_it end)
Construct the wavelet tree from a sequence defined by two interators.
t_select_zero select_0_type
t_tree_strat::template type< wt_pc > tree_strat_type
bool empty(const node_type &v) const
Indicates if node v is empty.
const bit_vector_type & bv
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
tree_strat_type::value_type value_type
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
wt_pc & operator=(const wt_pc &wt)
Assignment operator.
bool empty() const
Returns whether the wavelet tree contains no data.
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
t_shape::template type< wt_pc > shape_type
auto size(const node_type &v) const -> decltype(m_tree.size(v))
Return the size of node v.
bool operator!=(wt_pc const &other) const noexcept
Inequality operator.
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
std::pair< bool, value_type > symbol_lte(value_type c) const
Returns for a symbol c the previous smaller or equal symbol in the WT.
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
wt_pc & operator=(wt_pc &&wt)
Move assignment operator.
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
random_access_const_iterator< wt_pc > const_iterator
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)>>
Random access container to sequence of node v.
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
std::array< int_vector<>::size_type, 2 > range_type
std::vector< range_type > range_vec_type
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
static SDSL_CONSTEXPR uint64_t rev(uint64_t x)
reverses a given 64 bit word