SDSL  3.0.0
Succinct Data Structure Library
coder_fibonacci.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef SDSL_CODER_FIBONACCI_INCLUDED
9 #define SDSL_CODER_FIBONACCI_INCLUDED
10 
11 #include <sdsl/int_vector.hpp>
12 
13 namespace sdsl
14 {
15 
16 namespace coder
17 {
18 
20 template <typename T = void>
21 class fibonacci
22 {
23  public:
24  static struct impl
25  {
26  uint64_t fib12bit_to_bin[(1 << 12) * 8];
28 
33  uint8_t fib2bin_shift[(1 << 13)];
35 
40  uint16_t fib2bin_16_greedy[(1 << 16)];
41 
43  uint64_t fib2bin_0_95[(1 << 12) * 8];
44 
45  impl()
46  {
47  for (uint32_t x = 0; x <= 0x1FFF; ++x)
48  {
49  if (bits::cnt11(x)) { fib2bin_shift[x] = bits::sel11(x, 1) + 1; }
50  else
51  {
52  fib2bin_shift[x] = 0;
53  }
54  }
55  for (uint32_t x = 0; x < 1 << 16; ++x)
56  {
57  uint16_t w = 0;
58  uint32_t offset = 0;
59  if (uint32_t cnt = bits::cnt11(x))
60  {
61  uint32_t y = x;
62  uint32_t fib_pos = 1;
63  do {
64  if (y & 1)
65  {
66  w += bits::lt_fib[fib_pos - 1];
67  if (y & 2)
68  {
69  --cnt;
70  ++offset;
71  fib_pos = 0;
72  y >>= 1;
73  }
74  }
75  ++fib_pos;
76  ++offset;
77  y >>= 1;
78  } while (cnt);
79  }
80  fib2bin_16_greedy[x] = (offset << 11) | w;
81  }
82  for (uint32_t p = 0; p < 8; ++p)
83  {
84  for (uint32_t x = 0; x <= 0xFFF; ++x)
85  {
86  uint64_t w = 0;
87  for (uint32_t j = 0; j < 12 and 12 * p + j < 92; ++j)
88  {
89  if ((x >> j) & 1ULL)
90  {
91  w += bits::lt_fib[12 * p + j];
92  if (x >> (j + 1) & 1ULL) { break; }
93  }
94  }
95  fib2bin_0_95[(p << 12) | x] = w;
96  }
97  }
98  }
99  } data;
100 
101  typedef uint64_t size_type;
102 
103  static const uint8_t min_codeword_length = 2; // 11 represents 1 and is the code word with minimum length
105 
107  static uint8_t encoding_length(uint64_t w);
109  /* \param data Bitstring
110  * \param start_idx Starting index of the decoding.
111  * \param n Number of values to decode from the bitstring.
112  * \param it Iterator
113  */
114  template <bool t_sumup, bool t_inc, class t_iter>
115  static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
116 
117  template <bool t_sumup, bool t_inc, class t_iter>
118  static uint64_t decode1(const uint64_t * data,
119  const size_type start_idx,
120  size_type n,
121  t_iter it = (t_iter) nullptr);
122 
125 
130  static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
131 
134 
136  static uint64_t decode_prefix_sum(const uint64_t * d,
137  const size_type start_idx,
138  const size_type end_idx,
139  size_type n);
140 
141  template <class int_vector1, class int_vector2>
142  static bool encode(const int_vector1 & v, int_vector2 & z);
143 
144  template <class int_vector>
145  static uint64_t * raw_data(int_vector & v)
146  {
147  return v.m_data;
148  }
149 
151 
155  static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
156 
157  template <class int_vector1, class int_vector2>
158  static bool decode(const int_vector1 & z, int_vector2 & v);
159 };
160 
161 template <typename T>
162 inline uint8_t fibonacci<T>::encoding_length(uint64_t w)
163 {
164  if (w == 0) { return 93; }
165  // This limit for the leftmost 1bit in the resulting fib code could be improved using a table
166  uint8_t len_1 = bits::hi(w); // len-1 of the fib code
167  while (++len_1 < (uint8_t)(sizeof(bits::lt_fib) / sizeof(bits::lt_fib[0])) && w >= bits::lt_fib[len_1])
168  ;
169  return len_1 + 1;
170 }
171 
172 template <typename T>
173 template <class int_vector1, class int_vector2>
174 inline bool fibonacci<T>::encode(const int_vector1 & v, int_vector2 & z)
175 {
176  uint64_t z_bit_size = 0;
177  uint64_t w;
178  const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
179  for (typename int_vector1::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
180  {
181  if ((w = *it) == 0)
182  {
183  if (v.width() < 64) { w = zero_val; }
184  }
185  z_bit_size += encoding_length(w);
186  }
187  z.bit_resize(z_bit_size);
188  z.shrink_to_fit();
189  if (z_bit_size & 0x3F)
190  { // if z_bit_size % 64 != 0
191  *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
192  }
193  uint64_t * z_data = z.m_data;
194  uint8_t offset = 0;
195  uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
196  uint64_t t;
197  for (typename int_vector1::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
198  {
199  w = *it;
200  if (w == 0) { w = zero_val; }
201  int8_t len_1 = encoding_length(w) - 1, j;
202  fibword_low = 0x0000000000000001ULL;
203 
204  if (len_1 >= 64)
205  { // length > 65
206  fibword_high = 0x0000000000000001ULL;
207  j = len_1 - 1;
208  if (w == 0)
209  { // handle special case
210  fibword_high <<= 1;
211  fibword_high |= 1;
212  fibword_high <<= 1;
213  w -= bits::lt_fib[len_1 - 1];
214  j -= 2;
215  }
216  for (; j > 63; --j)
217  {
218  fibword_high <<= 1;
219  if (w >= (t = bits::lt_fib[j]))
220  {
221  w -= t;
222  fibword_high |= 1;
223  if (w and j > 64)
224  {
225  fibword_high <<= 1;
226  --j;
227  }
228  else
229  {
230  fibword_high <<= (64 - j);
231  break;
232  }
233  }
234  }
235  j = 64;
236  }
237  else
238  {
239  j = len_1 - 1;
240  }
241 
242  for (; j >= 0; --j)
243  {
244  fibword_low <<= 1;
245  if (w >= (t = bits::lt_fib[j]))
246  {
247  w -= t;
248  fibword_low |= 1;
249  if (w)
250  {
251  fibword_low <<= 1;
252  --j;
253  }
254  else
255  {
256  fibword_low <<= (j);
257  break;
258  }
259  }
260  }
261  if (len_1 >= 64)
262  {
263  bits::write_int_and_move(z_data, fibword_low, offset, 64);
264  bits::write_int_and_move(z_data, fibword_high, offset, len_1 - 63);
265  }
266  else
267  {
268  bits::write_int_and_move(z_data, fibword_low, offset, (len_1 & 0x3F) + 1);
269  }
270  }
271  z.width(v.width());
272  return true;
273 }
274 
275 template <typename T>
276 inline void fibonacci<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
277 {
278  uint64_t fibword_high = 0x0000000000000001ULL, fibword_low;
279  uint64_t t;
280  int8_t len_1 = encoding_length(x) - 1, j;
281  fibword_low = 0x0000000000000001ULL;
282 
283  if (len_1 >= 64)
284  { // length > 65
285  fibword_high = 0x0000000000000001ULL;
286  j = len_1 - 1;
287  if (x == 0)
288  { // handle special case
289  fibword_high <<= 1;
290  fibword_high |= 1;
291  fibword_high <<= 1;
292  x -= bits::lt_fib[len_1 - 1];
293  j -= 2;
294  }
295  for (; j > 63; --j)
296  {
297  fibword_high <<= 1;
298  if (x >= (t = bits::lt_fib[j]))
299  {
300  x -= t;
301  fibword_high |= 1;
302  if (x and j > 64)
303  {
304  fibword_high <<= 1;
305  --j;
306  }
307  else
308  {
309  fibword_high <<= (64 - j);
310  break;
311  }
312  }
313  }
314  j = 64;
315  }
316  else
317  {
318  j = len_1 - 1;
319  }
320  for (; j >= 0; --j)
321  {
322  fibword_low <<= 1;
323  if (x >= (t = bits::lt_fib[j]))
324  {
325  x -= t;
326  fibword_low |= 1;
327  if (x)
328  {
329  fibword_low <<= 1;
330  --j;
331  }
332  else
333  {
334  fibword_low <<= (j);
335  break;
336  }
337  }
338  }
339  if (len_1 >= 64)
340  {
341  bits::write_int_and_move(z, fibword_low, offset, 64);
342  bits::write_int_and_move(z, fibword_high, offset, len_1 - 63);
343  }
344  else
345  {
346  bits::write_int_and_move(z, fibword_low, offset, (len_1 & 0x3F) + 1);
347  }
348 }
349 
350 template <typename T>
351 template <class int_vector1, class int_vector2>
352 inline bool fibonacci<T>::decode(const int_vector1 & z, int_vector2 & v)
353 {
354  uint64_t n = 0, carry = 0; // n = number of values to be decoded
355  const uint64_t * data = z.data();
356  // Determine size of v
357  if (z.empty())
358  { // if z is empty we are ready with decoding
359  v.width(z.width());
360  v.resize(0);
361  v.shrink_to_fit();
362  return true;
363  }
364  for (typename int_vector1::size_type i = 0; i < z.bit_data_size() - 1; ++i, ++data)
365  {
366  n += bits::cnt11(*data, carry);
367  }
368  if (z.bit_data_size() << 6 != z.bit_size())
369  {
370  n += bits::cnt11((*data) & bits::lo_set[z.bit_size() & 0x3F], carry);
371  }
372  else
373  {
374  n += bits::cnt11(*data, carry);
375  }
376  v.width(z.width());
377  v.resize(n);
378  v.shrink_to_fit();
379  return decode<false, true>(z.data(), 0, n, v.begin());
380 }
381 
382 template <typename T>
383 template <bool t_sumup, bool t_inc, class t_iter>
384 inline uint64_t fibonacci<T>::decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it)
385 {
386  data += (start_idx >> 6);
387  uint64_t w = 0, value = 0;
388  int8_t buffered = 0; // bits buffered in w, in 0..64
389  int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
390  int8_t shift = 0;
391  uint32_t fibtable = 0;
392  while (n)
393  { // while not all values are decoded
394  while (buffered < 13 and bits::cnt11(w) < n)
395  {
396  w |= (((*data) >> read) << buffered);
397  if (read >= buffered)
398  {
399  ++data;
400  buffered += 64 - read;
401  read = 0;
402  }
403  else
404  { // read < buffered
405  read += 64 - buffered;
406  buffered = 64;
407  }
408  }
409  value += fibonacci<T>::data.fib2bin_0_95[(fibtable << 12) | (w & 0xFFF)];
410  shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
411  if (shift > 0)
412  { // if end of decoding
413  w >>= shift;
414  buffered -= shift;
415  if (t_inc) *(it++) = value;
416  if (!t_sumup and n != 1) value = 0;
417  fibtable = 0;
418  --n;
419  }
420  else
421  { // not end of decoding
422  w >>= 12;
423  buffered -= 12;
424  ++fibtable;
425  }
426  }
427  return value;
428 }
429 
430 template <typename T>
431 template <bool t_sumup, bool t_inc, class t_iter>
432 inline uint64_t fibonacci<T>::decode1(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
433 {
434  d += (start_idx >> 6);
435  uint64_t w = 0, value = 0;
436  int8_t buffered = 0; // bits buffered in w, in 0..64
437  int8_t read = start_idx & 0x3F; // read bits in current *data 0..63
438  int8_t shift = 0;
439  uint32_t fibtable = 0;
440  uint8_t blocknr = (start_idx >> 6) % 9;
441  while (n)
442  { // while not all values are decoded
443  while (buffered < 13 and bits::cnt11(w) < n)
444  {
445  w |= (((*d) >> read) << buffered);
446  if (read >= buffered)
447  {
448  ++blocknr;
449  ++d;
450  if (blocknr == 8)
451  {
452  ++d;
453  blocknr = 0;
454  }
455  buffered += 64 - read;
456  read = 0;
457  }
458  else
459  { // read < buffered
460  read += 64 - buffered;
461  buffered = 64;
462  }
463  }
464  value += fibonacci<T>::data.fib2bin_0_95[(fibtable << 12) | (w & 0xFFF)];
465  shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
466  if (shift > 0)
467  { // if end of decoding
468  w >>= shift;
469  buffered -= shift;
470  if (t_inc) *(it++) = value;
471  if (!t_sumup) value = 0;
472  fibtable = 0;
473  --n;
474  }
475  else
476  { // not end of decoding
477  w >>= 12;
478  buffered -= 12;
479  ++fibtable;
480  }
481  }
482  return value;
483 }
484 
485 template <typename T>
486 inline uint64_t fibonacci<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
487 {
488  if (n == 0) return 0;
489  // return decode<true,false,int*>(data, start_idx, n);
490  d += (start_idx >> 6);
491  size_type i = 0;
492  int32_t bits_to_decode = 0;
493  uint64_t w = 0, value = 0;
494  int16_t buffered = 0, read = start_idx & 0x3F, shift = 0;
495  uint16_t temp = 0;
496  uint64_t carry = 0;
497  i = bits::cnt11(*d & ~bits::lo_set[read], carry);
498  if (i < n)
499  {
500  uint64_t oldcarry;
501  w = 0;
502  do {
503  oldcarry = carry;
504  i += (temp = bits::cnt11(*(d + (++w)), carry));
505  } while (i < n);
506  bits_to_decode += ((w - 1) << 6) + bits::sel11(*(d + w), n - (i - temp), oldcarry) + 65 - read;
507  w = 0;
508  }
509  else
510  { // i>=n
511  bits_to_decode = bits::sel11(*d >> read, n) + 1;
512  }
513  if (((size_type)bits_to_decode) == n << 1) return n;
514  if (((size_type)bits_to_decode) == (n << 1) + 1) return n + 1;
515  i = 0;
516  // while( bits_to_decode > 0 or buffered > 0){// while not all values are decoded
517  do {
518  while (buffered < 64 and bits_to_decode > 0)
519  {
520  w |= (((*d) >> read) << buffered);
521  if (read >= buffered)
522  {
523  ++d;
524  buffered += 64 - read;
525  bits_to_decode -= (64 - read);
526  read = 0;
527  }
528  else
529  { // read buffered
530  read += 64 - buffered;
531  bits_to_decode -= (64 - buffered);
532  buffered = 64;
533  }
534  if (bits_to_decode < 0)
535  {
536  buffered += bits_to_decode;
537  w &= bits::lo_set[buffered];
538  bits_to_decode = 0;
539  }
540  }
541  if (!i)
542  { // try do decode multiple values
543  if ((w & 0xFFFFFF) == 0xFFFFFF)
544  {
545  value += 12;
546  w >>= 24;
547  buffered -= 24;
548  if ((w & 0xFFFFFF) == 0xFFFFFF)
549  {
550  value += 12;
551  w >>= 24;
552  buffered -= 24;
553  }
554  }
555  do {
556  temp = fibonacci<T>::data.fib2bin_16_greedy[w & 0xFFFF];
557  if ((shift = (temp >> 11)) > 0)
558  {
559  value += (temp & 0x7FFULL);
560  w >>= shift;
561  buffered -= shift;
562  }
563  else
564  {
565  value += fibonacci<T>::data.fib2bin_0_95[w & 0xFFF];
566  w >>= 12;
567  buffered -= 12;
568  i = 1;
569  break;
570  }
571  } while (buffered > 15);
572  }
573  else
574  { // i > 0
575  value += fibonacci<T>::data.fib2bin_0_95[(i << 12) | (w & 0xFFF)];
576  shift = fibonacci<T>::data.fib2bin_shift[w & 0x1FFF];
577  if (shift > 0)
578  { // if end of decoding
579  w >>= shift;
580  buffered -= shift;
581  i = 0;
582  }
583  else
584  { // not end of decoding
585  w >>= 12;
586  buffered -= 12;
587  ++i;
588  }
589  }
590  } while (bits_to_decode > 0 or buffered > 0);
591  return value;
592 }
593 
594 template <typename T>
595 inline uint64_t fibonacci<T>::decode_prefix_sum(const uint64_t * d,
596  const size_type start_idx,
597  SDSL_UNUSED const size_type end_idx,
598  size_type n)
599 {
600  return decode_prefix_sum(d, start_idx, n);
601 }
602 
603 template <typename T>
605 
606 } // end namespace coder
607 } // end namespace sdsl
608 #endif
A class to encode and decode between Fibonacci and binary code.
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Fibonacci encoded bits beginning at start_idx in the bitstring "data".
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Fibonacci encoded integers beginning at start_idx in the bitstring "data" and return the sum...
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, const size_type end_idx, size_type n)
Decode n Fibonacci encoded integers beginning at start_idx and ending at end_idx (exclusive) in the b...
static uint64_t decode1(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
static struct sdsl::coder::fibonacci::impl data
static bool encode(const int_vector1 &v, int_vector2 &z)
static const uint8_t min_codeword_length
static uint8_t encoding_length(uint64_t w)
Get the number of bits that are necessary to encode the value w in Fibonacci code.
static uint64_t * raw_data(int_vector &v)
A generic vector class for integers of width .
Definition: int_vector.hpp:253
#define SDSL_UNUSED
Definition: config.hpp:13
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
constexpr static uint64_t lt_fib[92]
Array containing Fibonacci numbers less than .
Definition: bits.hpp:79
static SDSL_CONSTEXPR uint32_t cnt11(uint64_t x, uint64_t &c)
Count the number of consecutive and distinct 11 in the 64bit integer x.
Definition: bits.hpp:551
static SDSL_CONSTEXPR uint32_t sel11(uint64_t x, uint32_t i, uint32_t c=0)
Calculates the position of the i-th rightmost 11-bit-pattern which terminates a Fibonacci coded integ...
Definition: bits.hpp:721
static SDSL_CONSTEXPR void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:752
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661