SDSL  3.0.0
Succinct Data Structure Library
hyb_vector.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 /*
5  * Copyright (c) 2014 Juha Karkkainen, Dominik Kempa and Simon J. Puglisi
6  *
7  * Permission is hereby granted, free of charge, to any person
8  * obtaining a copy of this software and associated documentation
9  * files (the "Software"), to deal in the Software without
10  * restriction, including without limitation the rights to use,
11  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following
14  * conditions:
15  *
16  * The above copyright notice and this permission notice shall be
17  * included in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26  * OTHER DEALINGS IN THE SOFTWARE.
27  *
28  * Simon Gog made the following changes:
29  * - replace std::vectors by int_vectors
30  * - add support for rank0
31  * - added naive implementation of method get_int
32  * - TODO: added a naive implementation of select
33  */
34 #ifndef INCLUDED_SDSL_HYB_VECTOR
35 #define INCLUDED_SDSL_HYB_VECTOR
36 
37 #include <algorithm>
38 #include <cstdlib>
39 #include <iostream>
40 #include <vector>
41 
42 #include <sdsl/int_vector.hpp>
43 #include <sdsl/io.hpp>
44 #include <sdsl/iterators.hpp>
45 #include <sdsl/util.hpp>
46 
47 namespace sdsl
48 {
49 
50 // Needed for friend declarations.
51 template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
52 class rank_support_hyb;
53 template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
54 class select_support_hyb;
55 
57 
65 template <uint32_t k_sblock_rate = 16>
67 {
68  public:
77 
78  friend class rank_support_hyb<1, k_sblock_rate>;
79  friend class rank_support_hyb<0, k_sblock_rate>;
80  friend class select_support_hyb<1, k_sblock_rate>;
81  friend class select_support_hyb<0, k_sblock_rate>;
82 
83  private:
84  static const uint32_t k_block_size;
85  static const uint32_t k_block_bytes;
86  static const uint32_t k_sblock_header_size;
87  static const uint32_t k_sblock_size;
88  static const uint32_t k_hblock_rate;
89 
90  size_type m_size = 0; // original bitvector size
91  int_vector<8> m_trunk; // body of encoded blocks
92  int_vector<8> m_sblock_header; // sblock headers
93  int_vector<64> m_hblock_header; // hblock headers
94 
95  public:
97  hyb_vector() = default;
98  hyb_vector(const hyb_vector & hybrid) = default;
99  hyb_vector(hyb_vector && hybrid) = default;
100  hyb_vector & operator=(const hyb_vector & hybrid) = default;
101  hyb_vector & operator=(hyb_vector && hybrid) = default;
102 
104  hyb_vector(const bit_vector & bv)
105  {
106  m_size = bv.size();
107 
108  // Compute the number of blocks.
109  size_type n_blocks = (m_size + k_block_size - 1) / k_block_size;
110  size_type n_sblocks = (n_blocks + k_sblock_rate - 1) / k_sblock_rate;
111  size_type n_hblocks = (n_blocks + k_hblock_rate - 1) / k_hblock_rate;
112 
113  size_type trunk_size = 0;
114 
115  // runs_lookup[i] = number of runs - 1 in the binary encoding of i.
116  int_vector<8> runs_lookup(65536, 0);
117  runs_lookup[0] = 0;
118  for (uint32_t i = 1; i < 65536; ++i)
119  {
120  runs_lookup[i] = runs_lookup[i >> 1];
121  if (i >= 32768) --runs_lookup[i];
122  if ((i & 1) != ((i >> 1) & 1)) ++runs_lookup[i];
123  }
124 
125  // Compute optimal encoding for each block.
126  const uint64_t * bv_ptr = bv.data();
127  for (size_type block_id = 0; block_id < n_blocks; ++block_id)
128  {
129  size_type block_beg = block_id * k_block_size;
130  size_type block_end = block_beg + k_block_size;
131 
132  uint32_t ones = 0;
133  uint32_t runs = 0;
134 
135  if (block_end <= m_size)
136  {
137  // Count the number of ones, fast.
138  const uint64_t * ptr64 = bv_ptr;
139  for (uint8_t i = 0; i < 4; ++i) ones += bits::cnt(*ptr64++);
140 
141  // Count the number of runs, fast.
142  ptr64 = bv_ptr;
143  for (uint8_t i = 0; i < 4; ++i)
144  {
145  // Count changes of bits inside 16-bit words of *ptr64.
146  for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
147 
148  // Count changes of bits between 16-bit words of *ptr64.
149  for (uint8_t j = 0; j < 3; ++j)
150  runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
151  ++ptr64;
152  }
153 
154  // Count changes of bits between 64-bit words.
155  ptr64 = bv_ptr;
156  for (uint8_t i = 0; i < 3; ++i)
157  {
158  runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
159  ++ptr64;
160  }
161  ++runs;
162  }
163  else
164  {
165  // Count number of ones and runs, slow.
166  uint8_t prevbit = 2;
167  for (size_type i = block_beg; i < block_end; ++i)
168  {
169  uint8_t bit = (i < m_size ? bv[i] : 0);
170  if (bit == 1) ++ones;
171  if (bit != prevbit) ++runs;
172  prevbit = bit;
173  }
174  }
175 
176  // Choose best encoding.
177  uint32_t minority_enc_size = std::min(ones, k_block_size - ones);
178  uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
179  uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
180  best_enc_size = std::min(best_enc_size, k_block_bytes);
181 
182  // Update the answer.
183  trunk_size += best_enc_size;
184  bv_ptr += k_block_size / 64;
185  }
186 
187  // Allocate the memory.
188  m_sblock_header = int_vector<8>(n_sblocks * k_sblock_header_size, 0);
189  m_hblock_header = int_vector<64>(n_hblocks * 2, 0);
190  m_trunk = int_vector<8>(trunk_size, 0);
191 
192  // The actual encoding follows.
193  size_type tot_rank = 0; // stores current rank value
194  size_type sblock_ones = 0; // number of 1s inside superblock
195  size_type trunk_ptr = 0;
196 
197  // Process blocks left to right.
198  bv_ptr = bv.data();
199  for (size_type block_id = 0; block_id < n_blocks; ++block_id)
200  {
201  size_type block_beg = block_id * k_block_size;
202  size_type block_end = block_beg + k_block_size;
203  size_type sblock_id = block_id / k_sblock_rate;
204  size_type hblock_id = block_id / k_hblock_rate;
205 
206  // Update hblock header.
207  if (!(block_id % k_hblock_rate))
208  {
209  m_hblock_header[2 * hblock_id] = trunk_ptr;
210  m_hblock_header[2 * hblock_id + 1] = tot_rank;
211  }
212 
213  // Update sblock header.
214  if (!(block_id % k_sblock_rate))
215  {
216  uint32_t * ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * sblock_id);
217  *ptr++ = trunk_ptr - m_hblock_header[2 * hblock_id];
218  *ptr = tot_rank - m_hblock_header[2 * hblock_id + 1];
219 
220  // If the sblock is uniform, flip the bit.
221  if (sblock_id && (!sblock_ones || sblock_ones == k_sblock_size))
222  {
223  ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * (sblock_id - 1));
224  *ptr |= 0x80000000;
225  }
226 
227  // Reset the number of ones in sblock.
228  sblock_ones = 0;
229  }
230 
231  uint32_t ones = 0;
232  uint32_t runs = 0;
233 
234  // Compute the number of 1-bits and runs inside current block.
235  if (block_end <= m_size)
236  {
237  // Count the number of ones, fast.
238  const uint64_t * ptr64 = bv_ptr;
239  for (uint8_t i = 0; i < 4; ++i) ones += bits::cnt(*ptr64++);
240 
241  // Count the number of runs, fast.
242  ptr64 = bv_ptr;
243  for (uint8_t i = 0; i < 4; ++i)
244  {
245  for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
246  for (uint8_t j = 0; j < 3; ++j)
247  runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
248  ++ptr64;
249  }
250  ptr64 = bv_ptr;
251  for (uint8_t i = 0; i < 3; ++i)
252  {
253  runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
254  ++ptr64;
255  }
256  ++runs;
257  }
258  else
259  {
260  // Count number of ones and runs, slow.
261  uint8_t prevbit = 2;
262  for (size_type i = block_beg; i < block_end; ++i)
263  {
264  uint8_t bit = (i < m_size ? bv[i] : 0);
265  if (bit == 1) ++ones;
266  if (bit != prevbit) ++runs;
267  prevbit = bit;
268  }
269  }
270  uint32_t zeros = k_block_size - ones;
271 
272  // Store block popcount.
273  uint16_t * header_ptr16 = (uint16_t *)(((uint8_t *)m_sblock_header.data()) +
274  sblock_id * k_sblock_header_size + 8 +
275  (block_id % k_sblock_rate) * 2);
276 
277  (*header_ptr16) = ones;
278  if (ones == k_block_size) (*header_ptr16) |= 0x200;
279 
280  if (0 < ones && ones < k_block_size)
281  { // non uniform block
282  uint32_t minority_enc_size = std::min(ones, zeros);
283  uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
284  uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
285 
286  if (k_block_bytes <= best_enc_size)
287  {
288  // Use plain encoding.
289  (*header_ptr16) |= (k_block_bytes << 10);
290 
291  // Copy original 256 bits from bv into trunk.
292  if (block_end <= m_size)
293  {
294  for (uint8_t i = 0; i < 4; ++i)
295  {
296  *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = *(bv_ptr + i);
297  trunk_ptr += 8;
298  }
299  }
300  else
301  {
302  for (size_type i = block_beg; i < block_end; i += 64)
303  {
304  uint64_t w = 0;
305  for (size_type j = i; j < std::min(i + 64, block_end); ++j)
306  {
307  uint8_t bit = (j < m_size ? bv[j] : 0);
308  if (bit) w |= ((uint64_t)1 << (j - i));
309  }
310  *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = w;
311  trunk_ptr += 8;
312  }
313  }
314  }
315  else
316  {
317  if (runs_enc_size < minority_enc_size)
318  {
319 
320  // Use runs encoding.
321  (*header_ptr16) |= (runs_enc_size << 10);
322  (*header_ptr16) |= (bv[block_beg] << 9);
323 
324  if (block_end <= m_size)
325  {
326  // Find run ends, fast.
327  uint32_t runid = 0;
328  const uint64_t * ptr64 = bv_ptr;
329 
330  uint64_t w = 0;
331  for (uint8_t i = 0; runid < runs_enc_size && i < 4; ++i)
332  {
333  // Check if run end aligns with the end of the 64-bit word.
334  if (i > 0 && (w & 1) != ((*ptr64) & 1)) m_trunk[trunk_ptr + runid++] = 64 * i - 1;
335 
336  w = (*ptr64++);
337  for (uint8_t j = 0; runid < runs_enc_size && j < 63; ++j)
338  {
339  if ((w & 1) != ((w >> 1) & 1)) m_trunk[trunk_ptr + runid++] = j + i * 64;
340  w >>= 1;
341  }
342  }
343  trunk_ptr += runid;
344  }
345  else
346  {
347  // Find run ends, slow.
348  uint8_t prevbit = 2;
349  uint32_t runid = 0;
350 
351  for (size_type i = block_beg; runid < runs_enc_size; ++i)
352  {
353  uint8_t bit = (i < m_size ? bv[i] : 0);
354  if (bit != prevbit && i != block_beg)
355  m_trunk[trunk_ptr + runid++] = (i - block_beg - 1);
356  prevbit = bit;
357  }
358  trunk_ptr += runid;
359  }
360  }
361  else
362  {
363  // Use minority encoding.
364  // Update sblock header.
365  (*header_ptr16) |= (minority_enc_size << 10);
366  if (ones < zeros) (*header_ptr16) |= 0x200;
367  uint8_t keybit = (ones < zeros);
368 
369  // Find positions of 1-bits, fast.
370  if (block_end <= m_size)
371  {
372  const uint64_t * ptr64 = bv_ptr;
373  for (uint8_t i = 0; i < 4; ++i)
374  {
375  uint64_t w = (*ptr64++);
376  for (uint8_t j = 0; j < 64; ++j)
377  {
378  if ((w & 1) == keybit) m_trunk[trunk_ptr++] = j + 64 * i;
379  w >>= 1;
380  }
381  }
382  }
383  else
384  {
385  for (size_type i = block_beg; i < block_end; ++i)
386  {
387  uint8_t bit = (i < m_size ? bv[i] : 0);
388 
389  if (bit == keybit) m_trunk[trunk_ptr++] = i - block_beg;
390  }
391  }
392  }
393  }
394  }
395 
396  // Update global rank.
397  tot_rank += ones;
398  sblock_ones += ones;
399  bv_ptr += k_block_size / 64;
400  }
401  }
402 
403  private:
405  value_type access0(size_type i) const
406  {
407  assert(i > 0);
408  assert(i <= m_size);
409 
410  size_type block_id = (i - 1) / k_block_size;
411  size_type sblock_id = block_id / k_sblock_rate;
412  size_type hblock_id = block_id / k_hblock_rate;
413 
414  size_type trunk_base = m_hblock_header[2 * hblock_id];
415 
416  uint32_t local_i = i - block_id * k_block_size;
417 
418  // Read superblock header.
419  const uint8_t * header_ptr8 = ((const uint8_t *)m_sblock_header.data()) + (sblock_id * k_sblock_header_size);
420  uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
421  size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
422  header_ptr8 += 8;
423 
424  uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
425 
426  // Uniform superblock optimization.
427  if ((*header_ptr32) & 0x80000000) return (value_type)((*(header_ptr8 + 1)) & 0x01);
428 
429  // Fast forward through preceding blocks in the superblock.
430  for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
431  {
432  trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
433  ++header_ptr16;
434  }
435 
436  const uint8_t * trunk_p = ((const uint8_t *)m_trunk.data()) + trunk_ptr;
437 
438  uint32_t encoding_size = ((*header_ptr16) >> 10);
439  uint32_t ones = ((*header_ptr16) & 0x1ff);
440  uint32_t zeros = k_block_size - ones;
441 
442  // Number of runs <= 2.
443  uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
444  if (!encoding_size)
445  {
446  uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
447  uint8_t inside_second_run = (first_run_length < local_i);
448  return (inside_second_run ^ special_bit);
449  }
450 
451  // Number of runs > 2.
452  if (encoding_size < k_block_bytes)
453  {
454  if (std::min(ones, zeros) == encoding_size)
455  {
456  // Minority encoding.
457  uint32_t tot = 0;
458  while (tot < encoding_size && *trunk_p < local_i)
459  {
460  ++trunk_p;
461  ++tot;
462  }
463  uint8_t last_was_majority = ((!tot) || (*(trunk_p - 1) != local_i - 1));
464  return (last_was_majority ^ special_bit);
465  }
466 
467  // Runs encoding.
468  if (special_bit)
469  {
470  uint32_t j = 0;
471  uint32_t acc = 0;
472 
473  int32_t last = -1;
474  while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
475  {
476  acc += *trunk_p - last;
477  ++trunk_p;
478  last = *trunk_p;
479  ++trunk_p;
480  j += 2;
481  }
482 
483  uint8_t access_i = 0;
484  if (j + 1 >= encoding_size)
485  {
486  if (j < encoding_size)
487  { // j == encoding_size - 1
488  if (local_i <= (uint32_t)(*trunk_p) + 1)
489  access_i = (((int32_t)local_i - last - 1) > 0);
490  else
491  {
492  acc += (int32_t)(*trunk_p) - last;
493  if (ones - acc <= k_block_size - local_i)
494  access_i = 0;
495  else
496  access_i = 1;
497  }
498  }
499  else
500  { // j == encoding_size
501  if ((int32_t)(ones - acc) < (int32_t)local_i - last - 1)
502  access_i = 0;
503  else
504  access_i = (((int32_t)local_i - last - 1) > 0);
505  }
506  }
507  else
508  {
509  if ((*trunk_p) < local_i - 1)
510  access_i = 0;
511  else
512  access_i = (((int32_t)local_i - last - 1) > 0);
513  }
514 
515  return access_i;
516  }
517  else
518  {
519  uint32_t j = 0;
520  uint32_t acc = 0;
521  int32_t last = -1;
522 
523  while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
524  {
525  acc += *trunk_p - last;
526  ++trunk_p;
527  last = *trunk_p;
528  ++trunk_p;
529  j += 2;
530  }
531 
532  uint8_t access_i = 0;
533  if (j + 1 >= encoding_size)
534  {
535  if (j < encoding_size)
536  {
537  if (local_i <= (uint32_t)(*trunk_p) + 1)
538  access_i = (((int32_t)local_i - last - 1) == 0);
539  else
540  {
541  acc += (*trunk_p) - last;
542  if (zeros - acc <= k_block_size - local_i)
543  access_i = 1;
544  else
545  access_i = 0;
546  }
547  }
548  else
549  {
550  if ((int32_t)(zeros - acc) < (int32_t)local_i - last - 1)
551  access_i = 1;
552  else
553  access_i = ((local_i - last - 1) == 0);
554  }
555  }
556  else
557  {
558  if ((*trunk_p) < local_i - 1)
559  access_i = 1;
560  else
561  access_i = (((int32_t)local_i - last - 1) == 0);
562  }
563 
564  return access_i;
565  }
566  }
567  else
568  {
569  // plain encoding.
570  uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr);
571  uint32_t bit;
572  for (bit = 0; bit + 64 <= local_i; bit += 64) trunk_ptr64++;
573 
574  uint8_t access_i = 0;
575  if (bit != local_i)
576  access_i = (((*trunk_ptr64) >> (local_i - bit - 1)) & 1);
577  else
578  access_i = (((*(trunk_ptr64 - 1)) >> 63) & 1);
579  return access_i;
580  }
581  }
582 
583  public:
585 
592  uint64_t get_int(size_type idx, const uint8_t len = 64) const
593  {
594  uint64_t res = 0;
595  for (size_t i = 0; i < len; ++i)
596  {
597  res <<= 1;
598  res |= (*this)[idx + len - 1 - i];
599  }
600  return res;
601  }
602 
604  value_type operator[](size_type i) const { return access0(i + 1); }
605 
607  size_type size() const { return m_size; }
608 
610  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
611  {
612  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
613  size_type written_bytes = 0;
614  written_bytes += write_member(m_size, out, child, "size");
615  written_bytes += m_trunk.serialize(out, child, "trunk");
616  written_bytes += m_sblock_header.serialize(out, child, "sblock_header");
617  written_bytes += m_hblock_header.serialize(out, child, "hblock_header");
618  structure_tree::add_size(child, written_bytes);
619  return written_bytes;
620  }
621 
623  void load(std::istream & in)
624  {
625  read_member(m_size, in);
626  m_trunk.load(in);
627  m_sblock_header.load(in);
628  m_hblock_header.load(in);
629  }
630 
631  template <typename archive_t>
632  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
633  {
634  ar(CEREAL_NVP(m_size));
635  ar(CEREAL_NVP(m_trunk));
636  ar(CEREAL_NVP(m_sblock_header));
637  ar(CEREAL_NVP(m_hblock_header));
638  }
639 
640  template <typename archive_t>
641  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
642  {
643  ar(CEREAL_NVP(m_size));
644  ar(CEREAL_NVP(m_trunk));
645  ar(CEREAL_NVP(m_sblock_header));
646  ar(CEREAL_NVP(m_hblock_header));
647  }
648 
649  iterator begin() const { return iterator(this, 0); }
650 
651  iterator end() const { return iterator(this, size()); }
652 
653  bool operator==(const hyb_vector & v) const
654  {
655  return m_size == v.m_size && m_trunk == v.m_trunk && m_sblock_header == v.m_sblock_header &&
656  m_hblock_header == v.m_hblock_header;
657  }
658 
659  bool operator!=(const hyb_vector & v) const { return !(*this == v); }
660 };
661 
662 template <uint32_t k_sblock_rate>
663 const uint32_t hyb_vector<k_sblock_rate>::k_block_size = 256;
664 template <uint32_t k_sblock_rate>
665 const uint32_t hyb_vector<k_sblock_rate>::k_block_bytes = 32;
666 template <uint32_t k_sblock_rate>
667 const uint32_t hyb_vector<k_sblock_rate>::k_sblock_header_size = 8 + 2 * k_sblock_rate;
668 template <uint32_t k_sblock_rate>
669 const uint32_t hyb_vector<k_sblock_rate>::k_sblock_size = 256 * k_sblock_rate;
670 template <uint32_t k_sblock_rate>
671 const uint32_t hyb_vector<k_sblock_rate>::k_hblock_rate = (1U << 31) / 256;
672 
673 template <uint8_t t_bp>
675 {
677  static size_type adapt(size_type res, size_type) { return res; }
678 };
679 
680 template <>
681 struct rank_result<0>
682 {
684  static size_type adapt(size_type res, size_type i) { return i - res; }
685 };
686 
688 
692 // TODO:
693 template <uint8_t t_b, uint32_t k_sblock_rate>
695 {
696  public:
699  enum
700  {
701  bit_pat = t_b
702  };
703  enum
704  {
705  bit_pat_len = (uint8_t)1
706  };
707 
708  private:
709  const bit_vector_type * m_v;
710 
711  public:
713  explicit rank_support_hyb(const bit_vector_type * v = nullptr) { set_vector(v); }
714 
716  const size_type rank(size_type i) const
717  {
718  assert(m_v != nullptr);
719  assert(i <= m_v->size());
720 
721  // Handle easy case.
722  if (i <= 0) return 0;
723 
724  size_type block_id = (i - 1) / bit_vector_type::k_block_size;
725  size_type sblock_id = block_id / k_sblock_rate;
726  size_type hblock_id = block_id / bit_vector_type::k_hblock_rate;
727 
728  size_type trunk_base = m_v->m_hblock_header[2 * hblock_id];
729  size_type hblock_rank = m_v->m_hblock_header[2 * hblock_id + 1];
730 
731  uint32_t local_i = i - block_id * bit_vector_type::k_block_size;
732 
733  // Read superblock header.
734  const uint8_t * header_ptr8 = ((const uint8_t *)(m_v->m_sblock_header.data())) +
735  (sblock_id * bit_vector_type::k_sblock_header_size);
736  uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
737  size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
738  size_type sblock_rank = *(header_ptr32 + 1);
739  header_ptr8 += 8;
740 
741  uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
742 
743  // Uniform superblock optimization.
744  if ((*header_ptr32) & 0x80000000)
745  {
746  return rank_result<t_b>::adapt(hblock_rank + sblock_rank +
747  ((*(header_ptr8 + 1)) &
748  0x01) * (i -
749  sblock_id * bit_vector_type::k_sblock_size),
750  i);
751  }
752 
753  // Fast forward through preceding blocks in the superblock.
754  size_type block_rank = 0;
755  for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
756  {
757  trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
758  block_rank += ((*header_ptr16) & 0x1ff); // Add 1s in the block.
759  ++header_ptr16;
760  }
761 
762  const uint8_t * trunk_p = ((uint8_t *)m_v->m_trunk.data()) + trunk_ptr;
763 
764  uint32_t encoding_size = ((*header_ptr16) >> 10);
765  uint32_t ones = ((*header_ptr16) & 0x1ff);
766  uint32_t zeros = bit_vector_type::k_block_size - ones;
767 
768  // Number of runs <= 2.
769  uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
770  if (!encoding_size)
771  {
772  uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
773  uint32_t local_rank = std::min(local_i, first_run_length);
774 
775  return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank +
776  (special_bit * local_rank +
777  (1 -
778  special_bit) * (local_i -
779  local_rank)),
780  i);
781  }
782 
783  // Number of runs > 2.
784  if (encoding_size < bit_vector_type::k_block_bytes)
785  {
786  if (std::min(ones, zeros) == encoding_size)
787  {
788  // Minority encoding.
789  uint32_t tot = 0;
790  while (tot < encoding_size && (*trunk_p++) < local_i) ++tot;
791 
792  return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + special_bit * tot +
793  (1 -
794  special_bit) * (local_i -
795  tot),
796  i);
797  }
798 
799  // Runs encoding.
800  if (special_bit)
801  {
802  uint32_t j = 0;
803  uint32_t acc = 0;
804 
805  int32_t last = -1;
806  while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
807  {
808  acc += *trunk_p - last;
809  ++trunk_p;
810  last = *trunk_p;
811  ++trunk_p;
812  j += 2;
813  }
814 
815  if (j + 1 >= encoding_size)
816  {
817  if (j < encoding_size)
818  {
819  if (*trunk_p >= local_i)
820  acc += local_i - last - 1;
821  else
822  {
823  acc += (*trunk_p) - last;
824  acc += (ones - acc) - std::min(ones - acc, bit_vector_type::k_block_size - local_i);
825  }
826  }
827  else
828  acc += std::min(ones - acc, local_i - last - 1);
829  }
830  else
831  acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
832 
833  return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + acc, i);
834  }
835  else
836  {
837  uint32_t j = 0;
838  uint32_t acc = 0;
839  int32_t last = -1;
840 
841  while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
842  {
843  acc += *trunk_p - last;
844  ++trunk_p;
845  last = *trunk_p;
846  ++trunk_p;
847  j += 2;
848  }
849 
850  if (j + 1 >= encoding_size)
851  {
852  if (j < encoding_size)
853  {
854  if (*trunk_p >= local_i)
855  acc += local_i - last - 1;
856  else
857  {
858  acc += (*trunk_p) - last;
859  acc += (zeros - acc) - std::min(zeros - acc, bit_vector_type::k_block_size - local_i);
860  }
861  }
862  else
863  acc += std::min(zeros - acc, local_i - last - 1);
864  }
865  else
866  acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
867 
868  return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + (local_i - acc), i);
869  }
870  }
871  else
872  {
873  // plain encoding.
874  uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_v->m_trunk.data()) + trunk_ptr);
875  uint32_t bit;
876  for (bit = 0; bit + 64 <= local_i; bit += 64) block_rank += bits::cnt(*trunk_ptr64++);
877  if (bit != local_i) block_rank += bits::cnt((*trunk_ptr64) & (((uint64_t)1 << (local_i - bit)) - 1));
878 
879  return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank, i);
880  }
881  }
882 
884  const size_type operator()(size_type i) const { return rank(i); }
885 
887  const size_type size() const { return m_v->size(); }
888 
890  void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
891 
894  {
895  if (this != &rs) { set_vector(rs.m_v); }
896  return *this;
897  }
898 
900  void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
901 
903  size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
904  {
905  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
906  structure_tree::add_size(child, 0);
907  return 0;
908  }
909 
910  template <typename archive_t>
911  void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
912  {}
913 
914  template <typename archive_t>
915  void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
916  {}
917 
918  bool operator==(const rank_support_hyb & other) const noexcept { return *m_v == *other.m_v; }
919 
920  bool operator!=(const rank_support_hyb & other) const noexcept { return !(*this == other); }
921 };
922 
924 
929 template <uint8_t t_b, uint32_t k_sblock_rate>
931 {
932  public:
935  enum
936  {
937  bit_pat = t_b
938  };
939  enum
940  {
941  bit_pat_len = (uint8_t)1
942  };
943 
944  private:
945  const bit_vector_type * m_v;
946 
947  public:
949  explicit select_support_hyb(const bit_vector_type * v = nullptr) { set_vector(v); }
950 
953  {
954  fprintf(stderr, "\nhyb_vector: select queries are not currently supported\n");
955  std::exit(EXIT_FAILURE);
956  }
957 
959  const size_type operator()(size_type i) const { return select(i); }
960 
962  const size_type size() const { return m_v->size(); }
963 
965  void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
966 
969  {
970  if (this != &rs) { set_vector(rs.m_v); }
971  return *this;
972  }
973 
975  void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
976 
978  size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
979  {
980  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
981  structure_tree::add_size(child, 0);
982  return 0;
983  }
984 
985  template <typename archive_t>
986  void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
987  {}
988 
989  template <typename archive_t>
990  void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
991  {}
992 
993  bool operator==(const select_support_hyb & other) const noexcept { return *m_v == *other.m_v; }
994 
995  bool operator!=(const select_support_hyb & other) const noexcept { return !(*this == other); }
996 };
997 
998 } // end namespace sdsl
999 
1000 #endif // INCLUDED_SDSL_HYB_VECTOR
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
A hybrid-encoded compressed bitvector representation.
Definition: hyb_vector.hpp:67
iterator end() const
Definition: hyb_vector.hpp:651
hyb_vector()=default
Default constructor.
select_support_hyb< 0, k_sblock_rate > select_0_type
Definition: hyb_vector.hpp:76
uint64_t get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
Definition: hyb_vector.hpp:592
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: hyb_vector.hpp:623
hyb_vector(hyb_vector &&hybrid)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: hyb_vector.hpp:641
iterator begin() const
Definition: hyb_vector.hpp:649
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: hyb_vector.hpp:610
hyb_vector & operator=(hyb_vector &&hybrid)=default
hyb_vector & operator=(const hyb_vector &hybrid)=default
hyb_vector(const hyb_vector &hybrid)=default
rank_support_hyb< 0, k_sblock_rate > rank_0_type
Definition: hyb_vector.hpp:74
bool operator==(const hyb_vector &v) const
Definition: hyb_vector.hpp:653
select_support_hyb< 1, k_sblock_rate > select_1_type
Definition: hyb_vector.hpp:75
random_access_const_iterator< hyb_vector > iterator
Definition: hyb_vector.hpp:72
size_type size() const
Returns the size of the original bitvector.
Definition: hyb_vector.hpp:607
bit_vector::difference_type difference_type
Definition: hyb_vector.hpp:71
bit_vector::size_type size_type
Definition: hyb_vector.hpp:69
bool operator!=(const hyb_vector &v) const
Definition: hyb_vector.hpp:659
hyb_vector(const bit_vector &bv)
Constructor.
Definition: hyb_vector.hpp:104
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: hyb_vector.hpp:632
value_type operator[](size_type i) const
Accessing the i-th element of the original bitvector.
Definition: hyb_vector.hpp:604
rank_support_hyb< 1, k_sblock_rate > rank_1_type
Definition: hyb_vector.hpp:73
bit_vector::value_type value_type
Definition: hyb_vector.hpp:70
int_vector_size_type size_type
Definition: int_vector.hpp:266
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:255
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Generic iterator for a random access container.
Definition: iterators.hpp:24
Rank_support for the hyb_vector class.
Definition: hyb_vector.hpp:695
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
Definition: hyb_vector.hpp:911
const size_type size() const
Return the size of the original vector.
Definition: hyb_vector.hpp:887
rank_support_hyb & operator=(const rank_support_hyb &rs)
Assignment operator.
Definition: hyb_vector.hpp:893
bool operator==(const rank_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:918
rank_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
Definition: hyb_vector.hpp:713
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
Definition: hyb_vector.hpp:915
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
Definition: hyb_vector.hpp:903
bit_vector_type::size_type size_type
Definition: hyb_vector.hpp:698
const size_type rank(size_type i) const
Answers rank queries.
Definition: hyb_vector.hpp:716
bool operator!=(const rank_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:920
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
Definition: hyb_vector.hpp:890
const size_type operator()(size_type i) const
Shorthand for rank(i)
Definition: hyb_vector.hpp:884
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
Definition: hyb_vector.hpp:900
hyb_vector< k_sblock_rate > bit_vector_type
Definition: hyb_vector.hpp:697
Select support for the hyb_vector class.
Definition: hyb_vector.hpp:931
bool operator!=(const select_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:995
bit_vector_type::size_type size_type
Definition: hyb_vector.hpp:934
bool operator==(const select_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:993
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
Definition: hyb_vector.hpp:965
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
Definition: hyb_vector.hpp:990
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
Definition: hyb_vector.hpp:978
select_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
Definition: hyb_vector.hpp:949
select_support_hyb & operator=(const select_support_hyb &rs)
Assignment operator.
Definition: hyb_vector.hpp:968
size_type select(size_type) const
Answers select queries.
Definition: hyb_vector.hpp:952
const size_type operator()(size_type i) const
Shorthand for select(i)
Definition: hyb_vector.hpp:959
const size_type size() const
Return the size of the original vector.
Definition: hyb_vector.hpp:962
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
Definition: hyb_vector.hpp:975
hyb_vector< k_sblock_rate > bit_vector_type
Definition: hyb_vector.hpp:933
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
Definition: hyb_vector.hpp:986
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
static SDSL_CONSTEXPR uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:494
bit_vector::size_type size_type
Definition: hyb_vector.hpp:683
static size_type adapt(size_type res, size_type i)
Definition: hyb_vector.hpp:684
static size_type adapt(size_type res, size_type)
Definition: hyb_vector.hpp:677
bit_vector::size_type size_type
Definition: hyb_vector.hpp:676
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.