SDSL  3.0.0
Succinct Data Structure Library
wt_pc.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
9 #ifndef INCLUDED_SDSL_WT_PC
10 #define INCLUDED_SDSL_WT_PC
11 
12 #include <tuple>
13 #include <utility>
14 #include <vector>
15 
16 #include <sdsl/bit_vectors.hpp>
17 #include <sdsl/rank_support.hpp>
18 #include <sdsl/select_support.hpp>
19 #include <sdsl/wt_helper.hpp>
20 
22 namespace sdsl
23 {
24 
26 
37 template <class t_shape,
38  class t_bitvector = bit_vector,
39  class t_rank = typename t_bitvector::rank_1_type,
40  class t_select = typename t_bitvector::select_1_type,
41  class t_select_zero = typename t_bitvector::select_0_type,
42  class t_tree_strat = byte_tree<>>
43 class wt_pc
44 {
45  public:
46  typedef typename t_tree_strat::template type<wt_pc> tree_strat_type;
48  typedef typename tree_strat_type::value_type value_type;
49  typedef typename t_bitvector::difference_type difference_type;
52  typedef t_bitvector bit_vector_type;
53  typedef t_rank rank_1_type;
54  typedef t_select select_1_type;
55  typedef t_select_zero select_0_type;
57  typedef typename tree_strat_type::alphabet_category alphabet_category;
58  typedef typename t_shape::template type<wt_pc> shape_type;
59  enum
60  {
61  lex_ordered = shape_type::lex_ordered
62  };
63  using node_type = typename tree_strat_type::node_type;
64 
65  private:
66 #ifdef WT_PC_CACHE
67  mutable value_type m_last_access_answer;
68  mutable size_type m_last_access_i;
69  mutable size_type m_last_access_rl;
70 #endif
71 
72  size_type m_size = 0; // original text size
73  size_type m_sigma = 0; // alphabet size
74  bit_vector_type m_bv; // bit vector to store the wavelet tree
75  rank_1_type m_bv_rank; // rank support for the wavelet tree bit vector
76  select_1_type m_bv_select1; // select support for the wavelet tree bit vector
77  select_0_type m_bv_select0;
78  tree_strat_type m_tree;
79 
80  // insert a character into the wavelet tree, see construct method
81  void insert_char(value_type old_chr, std::vector<uint64_t> & bv_node_pos, size_type times, bit_vector & bv)
82  {
83  uint64_t p = m_tree.bit_path(old_chr);
84  uint32_t path_len = p >> 56;
85  node_type v = m_tree.root();
86  for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
87  {
88  if (p & 1) { bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times); }
89  bv_node_pos[v] += times;
90  v = m_tree.child(v, p & 1);
91  }
92  }
93 
94  // calculates the tree shape returns the size of the WT bit vector
95  size_type construct_tree_shape(const std::vector<size_type> & C)
96  {
97  // vector for node of the tree
98  std::vector<pc_node> temp_nodes; //(2*m_sigma-1);
99  shape_type::construct_tree(C, temp_nodes);
100  // Convert code tree into BFS order in memory and
101  // calculate bv_pos values
102  size_type bv_size = 0;
103  m_tree = tree_strat_type(temp_nodes, bv_size, this);
104  return bv_size;
105  }
106 
107  void construct_init_rank_select()
108  {
109  util::init_support(m_bv_rank, &m_bv);
110  util::init_support(m_bv_select0, &m_bv);
111  util::init_support(m_bv_select1, &m_bv);
112  }
113 
114  // recursive internal version of the method interval_symbols
115  void _interval_symbols(size_type i,
116  size_type j,
117  size_type & k,
118  std::vector<value_type> & cs,
119  std::vector<size_type> & rank_c_i,
120  std::vector<size_type> & rank_c_j,
121  node_type v) const
122  {
123  // invariant: j>i
124  size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
125  size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
126  // goto left child
127  i -= i_new;
128  j -= j_new;
129  if (i != j)
130  {
131  node_type v_new = m_tree.child(v, 0);
132  if (!m_tree.is_leaf(v_new)) { _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new); }
133  else
134  {
135  rank_c_i[k] = i;
136  rank_c_j[k] = j;
137  cs[k++] = m_tree.bv_pos_rank(v_new);
138  }
139  }
140  // goto right child
141  if (i_new != j_new)
142  {
143  node_type v_new = m_tree.child(v, 1);
144  if (!m_tree.is_leaf(v_new)) { _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new); }
145  else
146  {
147  rank_c_i[k] = i_new;
148  rank_c_j[k] = j_new;
149  cs[k++] = m_tree.bv_pos_rank(v_new);
150  }
151  }
152  }
153 
154  public:
155  const size_type & sigma = m_sigma;
156  const bit_vector_type & bv = m_bv;
157 
158  // Default constructor
159  wt_pc(){};
160 
162 
168  template <typename t_it>
169  wt_pc(t_it begin, t_it end)
170  : m_size(std::distance(begin, end))
171  {
172  if (0 == m_size) return;
173  // O(n + |\Sigma|\log|\Sigma|) algorithm for calculating node sizes
174  // TODO: C should also depend on the tree_strategy. C is just a mapping
175  // from a symbol to its frequency. So a map<uint64_t,uint64_t> could be
176  // used for integer alphabets...
177  std::vector<size_type> C;
178  // 1. Count occurrences of characters
180  // 2. Calculate effective alphabet size
182  // 3. Generate tree shape
183  size_type tree_size = construct_tree_shape(C);
184  // 4. Generate wavelet tree bit sequence m_bv
185  bit_vector temp_bv(tree_size, 0);
186 
187  // Initializing starting position of wavelet tree nodes
188  std::vector<uint64_t> bv_node_pos(m_tree.size(), 0);
189  for (size_type v = 0; v < m_tree.size(); ++v) { bv_node_pos[v] = m_tree.bv_pos(v); }
190  value_type old_chr = *begin;
191  uint32_t times = 0;
192  for (auto it = begin; it != end; ++it)
193  {
194  value_type chr = *it;
195  if (chr != old_chr)
196  {
197  insert_char(old_chr, bv_node_pos, times, temp_bv);
198  times = 1;
199  old_chr = chr;
200  }
201  else
202  { // chr == old_chr
203  ++times;
204  if (times == 64)
205  {
206  insert_char(old_chr, bv_node_pos, times, temp_bv);
207  times = 0;
208  }
209  }
210  }
211  if (times > 0) { insert_char(old_chr, bv_node_pos, times, temp_bv); }
212  m_bv = bit_vector_type(std::move(temp_bv));
213  // 5. Initialize rank and select data structures for m_bv
214  construct_init_rank_select();
215  // 6. Finish inner nodes by precalculating the bv_pos_rank values
216  m_tree.init_node_ranks(m_bv_rank);
217  }
218 
219  template <typename t_it>
220  wt_pc(t_it begin, t_it end, std::string)
221  : wt_pc(begin, end)
222  {}
223 
225  wt_pc(const wt_pc & wt)
226  : m_size(wt.m_size)
227  , m_sigma(wt.m_sigma)
228  , m_bv(wt.m_bv)
229  , m_bv_rank(wt.m_bv_rank)
230  , m_bv_select1(wt.m_bv_select1)
231  , m_bv_select0(wt.m_bv_select0)
232  , m_tree(wt.m_tree)
233  {
234  m_bv_rank.set_vector(&m_bv);
235  m_bv_select1.set_vector(&m_bv);
236  m_bv_select0.set_vector(&m_bv);
237  }
238 
239  wt_pc(wt_pc && wt)
240  : m_size(wt.m_size)
241  , m_sigma(wt.m_sigma)
242  , m_bv(std::move(wt.m_bv))
243  , m_bv_rank(std::move(wt.m_bv_rank))
244  , m_bv_select1(std::move(wt.m_bv_select1))
245  , m_bv_select0(std::move(wt.m_bv_select0))
246  , m_tree(std::move(wt.m_tree))
247  {
248  m_bv_rank.set_vector(&m_bv);
249  m_bv_select1.set_vector(&m_bv);
250  m_bv_select0.set_vector(&m_bv);
251  }
252 
254  wt_pc & operator=(const wt_pc & wt)
255  {
256  if (this != &wt)
257  {
258  wt_pc tmp(wt); // re-use copy-constructor
259  *this = std::move(tmp); // re-use move-assignment
260  }
261  return *this;
262  }
263 
266  {
267  if (this != &wt)
268  {
269  m_size = wt.m_size;
270  m_sigma = wt.m_sigma;
271  m_bv = std::move(wt.m_bv);
272  m_bv_rank = std::move(wt.m_bv_rank);
273  m_bv_rank.set_vector(&m_bv);
274  m_bv_select1 = std::move(wt.m_bv_select1);
275  m_bv_select1.set_vector(&m_bv);
276  m_bv_select0 = std::move(wt.m_bv_select0);
277  m_bv_select0.set_vector(&m_bv);
278  m_tree = std::move(wt.m_tree);
279  }
280  return *this;
281  }
282 
284  size_type size() const { return m_size; }
285 
287  bool empty() const { return m_size == 0; }
288 
290 
301  {
302  assert(i < size());
303  // which stores how many of the next symbols are equal
304  // with the current char
305  node_type v = m_tree.root(); // start at root node
306  while (!m_tree.is_leaf(v))
307  { // while not a leaf
308  if (m_bv[m_tree.bv_pos(v) + i])
309  { // goto right child
310  i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v);
311  v = m_tree.child(v, 1);
312  }
313  else
314  { // goto the left child
315  i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
316  v = m_tree.child(v, 0);
317  }
318  }
319  // if v is a leaf bv_pos_rank returns symbol itself
320  return m_tree.bv_pos_rank(v);
321  };
322 
324 
336  {
337  assert(i <= size());
338  if (!m_tree.is_valid(m_tree.c_to_leaf(c)))
339  {
340  return 0; // if `c` was not in the text
341  }
342  if (m_sigma == 1)
343  {
344  return i; // if m_sigma == 1 answer is trivial
345  }
346  uint64_t p = m_tree.bit_path(c);
347  uint32_t path_len = (p >> 56);
348  size_type result = i;
349  node_type v = m_tree.root();
350  for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1)
351  {
352  if (p & 1) { result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v)); }
353  else
354  {
355  result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v));
356  }
357  v = m_tree.child(v, p & 1); // goto child
358  }
359  return result;
360  };
361 
363 
372  std::pair<size_type, value_type> inverse_select(size_type i) const
373  {
374  assert(i < size());
375  node_type v = m_tree.root();
376  while (!m_tree.is_leaf(v))
377  { // while not a leaf
378  if (m_bv[m_tree.bv_pos(v) + i])
379  { // goto right child
380  i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
381  v = m_tree.child(v, 1);
382  }
383  else
384  { // goto left child
385  i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
386  v = m_tree.child(v, 0);
387  }
388  }
389  // if v is a leaf bv_pos_rank returns symbol itself
390  return std::make_pair(i, (value_type)m_tree.bv_pos_rank(v));
391  }
392 
394 
405  {
406  assert(1 <= i and i <= rank(size(), c));
407  node_type v = m_tree.c_to_leaf(c);
408  if (!m_tree.is_valid(v))
409  { // if c was not in the text
410  return m_size; // -> return a position right to the end
411  }
412  if (m_sigma == 1) { return std::min(i - 1, m_size); }
413  size_type result = i - 1; // otherwise
414  uint64_t p = m_tree.bit_path(c);
415  uint32_t path_len = (p >> 56);
416  // path_len > 0, since we have handled m_sigma = 1.
417  p <<= (64 - path_len);
418  for (uint32_t l = 0; l < path_len; ++l, p <<= 1)
419  {
420  if ((p & 0x8000000000000000ULL) == 0)
421  { // node was a left child
422  v = m_tree.parent(v);
423  result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
424  }
425  else
426  { // node was a right child
427  v = m_tree.parent(v);
428  result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v);
429  }
430  }
431  return result;
432  };
433 
435 
457  size_type j,
458  size_type & k,
459  std::vector<value_type> & cs,
460  std::vector<size_type> & rank_c_i,
461  std::vector<size_type> & rank_c_j) const
462  {
463  assert(i <= j and j <= size());
464  if (i == j) { k = 0; }
465  else if (1 == m_sigma)
466  {
467  k = 1;
468  cs[0] = m_tree.bv_pos_rank(m_tree.root());
469  rank_c_i[0] = std::min(i, m_size);
470  rank_c_j[0] = std::min(j, m_size);
471  }
472  else if ((j - i) == 1)
473  {
474  k = 1;
475  auto rc = inverse_select(i);
476  rank_c_i[0] = rc.first;
477  cs[0] = rc.second;
478  rank_c_j[0] = rank_c_i[0] + 1;
479  }
480  else if ((j - i) == 2)
481  {
482  auto rc = inverse_select(i);
483  rank_c_i[0] = rc.first;
484  cs[0] = rc.second;
485  rc = inverse_select(i + 1);
486  rank_c_i[1] = rc.first;
487  cs[1] = rc.second;
488 
489  if (cs[0] == cs[1])
490  {
491  k = 1;
492  rank_c_j[0] = rank_c_i[0] + 2;
493  }
494  else
495  {
496  k = 2;
497  if (lex_ordered and cs[0] > cs[1])
498  {
499  std::swap(cs[0], cs[1]);
500  std::swap(rank_c_i[0], rank_c_i[1]);
501  }
502  rank_c_j[0] = rank_c_i[0] + 1;
503  rank_c_j[1] = rank_c_i[1] + 1;
504  }
505  }
506  else
507  {
508  k = 0;
509  _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0);
510  }
511  }
512 
514 
528  template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
529  typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type lex_count(size_type i,
530  size_type j,
531  value_type c) const
532  {
533  assert(i <= j and j <= size());
534  if (1 == m_sigma)
535  {
536  value_type _c = m_tree.bv_pos_rank(m_tree.root());
537  if (c == _c)
538  { // c is the only symbol in the wt
539  return t_ret_type{ i, 0, 0 };
540  }
541  else if (c < _c)
542  {
543  return t_ret_type{ 0, 0, j - i };
544  }
545  else
546  {
547  return t_ret_type{ 0, j - i, 0 };
548  }
549  }
550  if (i == j) { return t_ret_type{ rank(i, c), 0, 0 }; }
551  uint64_t p = m_tree.bit_path(c);
552  uint32_t path_len = p >> 56;
553  if (path_len == 0)
554  { // path_len=0: => c is not present
555  value_type _c = (value_type)p;
556  if (c == _c)
557  { // c is smaller than any symbol in wt
558  return t_ret_type{ 0, 0, j - i };
559  }
560  auto res = lex_count(i, j, _c);
561  return t_ret_type{ 0, j - i - std::get<2>(res), std::get<2>(res) };
562  }
563  size_type smaller = 0, greater = 0;
564  node_type v = m_tree.root();
565  for (uint32_t l = 0; l < path_len; ++l, p >>= 1)
566  {
567  size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v));
568  size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v));
569 
570  if (p & 1)
571  {
572  smaller += j - r1_2 - i + r1_1;
573  i = r1_1;
574  j = r1_2;
575  }
576  else
577  {
578  greater += r1_2 - r1_1;
579  i -= r1_1;
580  j -= r1_2;
581  }
582  v = m_tree.child(v, p & 1);
583  }
584  return t_ret_type{ i, smaller, greater };
585  }
586 
588 
599  template <class t_ret_type = std::tuple<size_type, size_type>>
600  typename std::enable_if<shape_type::lex_ordered, t_ret_type>::type lex_smaller_count(size_type i,
601  value_type c) const
602  {
603  assert(i <= size());
604  if (1 == m_sigma)
605  {
606  value_type _c = m_tree.bv_pos_rank(m_tree.root());
607  if (c == _c)
608  { // c is the only symbol in the wt
609  return t_ret_type{ i, 0 };
610  }
611  else if (c < _c)
612  {
613  return t_ret_type{ 0, 0 };
614  }
615  else
616  {
617  return t_ret_type{ 0, i };
618  }
619  }
620 
621  uint64_t p = m_tree.bit_path(c);
622  uint32_t path_len = p >> 56;
623  if (path_len == 0)
624  { // path_len=0: => c is not present
625  value_type _c = (value_type)p;
626  if (c == _c)
627  { // c is smaller than any symbol in wt
628  return t_ret_type{ 0, 0 };
629  }
630  auto res = lex_smaller_count(i, _c);
631  return t_ret_type{ 0, std::get<0>(res) + std::get<1>(res) };
632  }
633  size_type result = 0;
634  size_type all = i; // possible occurrences of c
635  node_type v = m_tree.root();
636  for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1)
637  {
638  size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v));
639  if (p & 1)
640  {
641  result += all - ones;
642  all = ones;
643  }
644  else
645  {
646  all -= ones;
647  }
648  v = m_tree.child(v, p & 1);
649  }
650  return t_ret_type{ all, result };
651  }
652 
654  const_iterator begin() const { return const_iterator(this, 0); }
655 
657  const_iterator end() const { return const_iterator(this, size()); }
658 
660  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
661  {
662  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
663  size_type written_bytes = 0;
664  written_bytes += write_member(m_size, out, child, "size");
665  written_bytes += write_member(m_sigma, out, child, "sigma");
666  written_bytes += m_bv.serialize(out, child, "bv");
667  written_bytes += m_bv_rank.serialize(out, child, "bv_rank");
668  written_bytes += m_bv_select1.serialize(out, child, "bv_select_1");
669  written_bytes += m_bv_select0.serialize(out, child, "bv_select_0");
670  written_bytes += m_tree.serialize(out, child, "tree");
671  structure_tree::add_size(child, written_bytes);
672  return written_bytes;
673  }
674 
676  void load(std::istream & in)
677  {
678  read_member(m_size, in);
679  read_member(m_sigma, in);
680  m_bv.load(in);
681  m_bv_rank.load(in, &m_bv);
682  m_bv_select1.load(in, &m_bv);
683  m_bv_select0.load(in, &m_bv);
684  m_tree.load(in);
685  }
686 
688  bool operator==(wt_pc const & other) const noexcept
689  {
690  return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv) &&
691  (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1) &&
692  (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree);
693  }
694 
696  bool operator!=(wt_pc const & other) const noexcept { return !(*this == other); }
697 
698  template <typename archive_t>
699  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
700  {
701  ar(CEREAL_NVP(m_size));
702  ar(CEREAL_NVP(m_sigma));
703  ar(CEREAL_NVP(m_bv));
704  ar(CEREAL_NVP(m_bv_rank));
705  ar(CEREAL_NVP(m_bv_select1));
706  ar(CEREAL_NVP(m_bv_select0));
707  ar(CEREAL_NVP(m_tree));
708  }
709 
710  template <typename archive_t>
711  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
712  {
713  ar(CEREAL_NVP(m_size));
714  ar(CEREAL_NVP(m_sigma));
715  ar(CEREAL_NVP(m_bv));
716  ar(CEREAL_NVP(m_bv_rank));
717  m_bv_rank.set_vector(&m_bv);
718  ar(CEREAL_NVP(m_bv_select1));
719  m_bv_select1.set_vector(&m_bv);
720  ar(CEREAL_NVP(m_bv_select0));
721  m_bv_select0.set_vector(&m_bv);
722  ar(CEREAL_NVP(m_tree));
723  }
724 
727  {
729  }
730 
732  auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
733  {
734  return random_access_container<std::function<value_type(size_type)>>(
735  [&v, this](size_type i) {
736  node_type vv = v;
737  while (!is_leaf(vv))
738  {
739  auto vs = expand(vv);
740  auto rs = expand(vv, range_type{ { 0, i } });
741  bool bit = *(begin(vv) + i);
742  i = std::get<1>(rs[bit]);
743  vv = vs[bit];
744  }
745  return sym(vv);
746  },
747  size(v));
748  }
749 
751  bool is_leaf(const node_type & v) const { return m_tree.is_leaf(v); }
752 
754  value_type sym(const node_type & v) const { return m_tree.bv_pos_rank(v); }
755 
757  bool empty(const node_type & v) const { return size(v) == 0; }
758 
760  auto size(const node_type & v) const -> decltype(m_tree.size(v))
761  {
762  if (is_leaf(v))
763  {
764  if (v == root())
765  return size();
766  else
767  {
768  auto parent = m_tree.parent(v);
769  auto rs = expand(parent, range_type{ { 0, size(parent) - 1 } });
770  if (m_tree.child(parent, 0) == v)
771  return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1;
772  else
773  return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1;
774  }
775  }
776  else
777  {
778  return m_tree.size(v);
779  }
780  }
781 
783  node_type root() const { return m_tree.root(); }
784 
786 
790  std::array<node_type, 2> expand(const node_type & v) const
791  {
792  return { { m_tree.child(v, 0), m_tree.child(v, 1) } };
793  }
794 
796 
805  std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
806  {
807  auto ranges_copy = ranges;
808  return expand(v, std::move(ranges_copy));
809  }
810 
812 
821  std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
822  {
823  auto v_sp_rank = m_tree.bv_pos_rank(v);
824  range_vec_type res(ranges.size());
825  size_t i = 0;
826  for (auto & r : ranges)
827  {
828  auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
829  auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
830  auto left_size = (r[1] - r[0] + 1) - right_size;
831 
832  auto right_sp = sp_rank - v_sp_rank;
833  auto left_sp = r[0] - right_sp;
834 
835  r = { { left_sp, left_sp + left_size - 1 } };
836  res[i++] = { { right_sp, right_sp + right_size - 1 } };
837  }
838  return { { ranges, std::move(res) } };
839  }
840 
842 
851  std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
852  {
853  auto v_sp_rank = m_tree.bv_pos_rank(v);
854  auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]);
855  auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank;
856  auto left_size = (r[1] - r[0] + 1) - right_size;
857 
858  auto right_sp = sp_rank - v_sp_rank;
859  auto left_sp = r[0] - right_sp;
860 
861  return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
862  }
863 
865  std::pair<uint64_t, uint64_t> path(value_type c) const
866  {
867  uint64_t path = m_tree.bit_path(c);
868  uint64_t path_len = path >> 56;
869  // reverse the path till we fix the ordering
870  path = bits::rev(path);
871  path = path >> (64 - path_len); // remove the length
872  return { path_len, path };
873  }
874 
876 
881  std::pair<bool, value_type> symbol_gte(value_type c) const { return m_tree.symbol_gte(c); }
882 
884 
889  std::pair<bool, value_type> symbol_lte(value_type c) const { return m_tree.symbol_lte(c); }
890 
891  private:
893  auto begin(const node_type & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v))
894  {
895  return m_bv.begin() + m_tree.bv_pos(v);
896  }
897 
899  auto end(const node_type & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v))
900  {
901  return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v);
902  }
903 };
904 } // namespace sdsl
905 
906 #endif
bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A generic vector class for integers of width .
Definition: int_vector.hpp:253
Generic iterator for a random access container.
Definition: iterators.hpp:24
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A prefix code-shaped wavelet.
Definition: wt_pc.hpp:44
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition: wt_pc.hpp:456
value_type sym(const node_type &v) const
Symbol for a leaf.
Definition: wt_pc.hpp:754
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wt_pc.hpp:805
wt_pc(const wt_pc &wt)
Copy constructor.
Definition: wt_pc.hpp:225
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition: wt_pc.hpp:335
t_bitvector::difference_type difference_type
Definition: wt_pc.hpp:49
std::pair< bool, value_type > symbol_gte(value_type c) const
Returns for a symbol c the next larger or equal symbol in the WT.
Definition: wt_pc.hpp:881
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_pc.hpp:676
int_vector ::size_type size_type
Definition: wt_pc.hpp:47
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wt_pc.hpp:300
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_pc.hpp:711
tree_strat_type::alphabet_category alphabet_category
Definition: wt_pc.hpp:57
t_bitvector bit_vector_type
Definition: wt_pc.hpp:52
const_iterator iterator
Definition: wt_pc.hpp:51
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wt_pc.hpp:751
wt_pc(t_it begin, t_it end, std::string)
Definition: wt_pc.hpp:220
wt_tag index_category
Definition: wt_pc.hpp:56
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_pc.hpp:699
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wt_pc.hpp:654
size_type size() const
Returns the size of the original vector.
Definition: wt_pc.hpp:284
typename tree_strat_type::node_type node_type
Definition: wt_pc.hpp:63
node_type root() const
Returns the root node.
Definition: wt_pc.hpp:783
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wt_pc.hpp:821
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wt_pc.hpp:726
bool operator==(wt_pc const &other) const noexcept
Equality operator.
Definition: wt_pc.hpp:688
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wt_pc.hpp:657
wt_pc(t_it begin, t_it end)
Construct the wavelet tree from a sequence defined by two interators.
Definition: wt_pc.hpp:169
t_select_zero select_0_type
Definition: wt_pc.hpp:55
t_tree_strat::template type< wt_pc > tree_strat_type
Definition: wt_pc.hpp:46
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wt_pc.hpp:757
t_select select_1_type
Definition: wt_pc.hpp:54
wt_pc(wt_pc &&wt)
Definition: wt_pc.hpp:239
const bit_vector_type & bv
Definition: wt_pc.hpp:156
@ lex_ordered
Definition: wt_pc.hpp:61
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_pc.hpp:660
tree_strat_type::value_type value_type
Definition: wt_pc.hpp:48
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wt_pc.hpp:851
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many times symbol wt[i] occurs in the prefix [0..i-1].
Definition: wt_pc.hpp:372
wt_pc & operator=(const wt_pc &wt)
Assignment operator.
Definition: wt_pc.hpp:254
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wt_pc.hpp:287
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wt_pc.hpp:865
t_shape::template type< wt_pc > shape_type
Definition: wt_pc.hpp:58
auto size(const node_type &v) const -> decltype(m_tree.size(v))
Return the size of node v.
Definition: wt_pc.hpp:760
bool operator!=(wt_pc const &other) const noexcept
Inequality operator.
Definition: wt_pc.hpp:696
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition: wt_pc.hpp:600
std::enable_if< shape_type::lex_ordered, t_ret_type >::type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition: wt_pc.hpp:529
std::pair< bool, value_type > symbol_lte(value_type c) const
Returns for a symbol c the previous smaller or equal symbol in the WT.
Definition: wt_pc.hpp:889
size_type select(size_type i, value_type c) const
Calculates the ith occurrence of the symbol c in the supported vector.
Definition: wt_pc.hpp:404
wt_pc & operator=(wt_pc &&wt)
Move assignment operator.
Definition: wt_pc.hpp:265
t_rank rank_1_type
Definition: wt_pc.hpp:53
const size_type & sigma
Definition: wt_pc.hpp:155
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wt_pc.hpp:790
random_access_const_iterator< wt_pc > const_iterator
Definition: wt_pc.hpp:50
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)>>
Random access container to sequence of node v.
Definition: wt_pc.hpp:732
Namespace for the succinct data structure library.
void calculate_effective_alphabet_size(const t_rac &C, sigma_type &sigma)
Definition: wt_helper.hpp:52
void calculate_character_occurences(t_it begin, t_it end, t_rac &C)
Count for each character the number of occurrences in rac[0..size-1].
Definition: wt_helper.hpp:40
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:970
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
static SDSL_CONSTEXPR uint64_t rev(uint64_t x)
reverses a given 64 bit word
Definition: bits.hpp:942