SDSL  3.0.0
Succinct Data Structure Library
k2_tree.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_K2_TREE
9 #define INCLUDED_SDSL_K2_TREE
10 
11 #include <deque>
12 #include <queue>
13 #include <stdexcept>
14 #include <tuple>
15 
16 #include <sdsl/bit_vectors.hpp>
18 #include <sdsl/io.hpp>
19 #include <sdsl/k2_tree_helper.hpp>
20 
22 namespace sdsl
23 {
25 
36 template <uint8_t k, typename t_bv = bit_vector, typename t_rank = typename t_bv::rank_1_type>
37 class k2_tree
38 {
39  public:
42 
43  private:
46  t_bv k_t;
48  t_bv k_l;
49 
50  t_rank k_t_rank;
51 
52  uint8_t k_k;
53  uint16_t k_height;
54 
55  protected:
56  void build_from_matrix(const std::vector<std::vector<int>> & matrix)
57  {
58  // Makes the size a power of k.
59  int simulated_size = std::pow(k, k_height);
60  std::vector<std::deque<bit_vector>> acc(k_height + 1);
61 
62  k2_tree_ns::_build_from_matrix<bit_vector>(matrix, k, simulated_size, k_height, 1, 0, 0, acc);
63 
64  size_type t_size = 0;
65  size_type l_size = 0;
66  for (int i = 1; i < k_height; i++)
67  for (auto it = acc[i].begin(); it != acc[i].end(); it++) t_size += (*it).size();
68 
69  for (auto it = acc[k_height].begin(); it != acc[k_height].end(); it++) l_size += (*it).size();
70 
71  bit_vector k_t_(t_size, 0);
72  bit_vector k_l_(l_size, 0);
73 
74  int n = 0;
75  for (int j = 1; j < k_height; j++)
76  for (auto it = acc[j].begin(); it != acc[j].end(); it++)
77  for (unsigned i = 0; i < (*it).size(); i++)
78  {
79  // TODO there should be a better way to do this
80  k_t_.set_int(n, (*it).get_int(i, 1), 1);
81  n++;
82  }
83  n = 0;
84  for (auto it = acc[k_height].begin(); it != acc[k_height].end(); it++)
85  for (unsigned i = 0; i < (*it).size(); i++)
86  {
87  // TODO there should be a better way to do this
88  k_l_.set_int(n * 1, (*it).get_int(i, 1), 1);
89  n++;
90  }
91 
92  k2_tree_ns::build_template_vector<t_bv>(k_t_, k_l_, k_t, k_l);
93  }
94 
106  void _neigh(size_type n, idx_type row, idx_type col, size_type level, std::vector<idx_type> & acc) const
107  {
108  if (level >= k_t.size())
109  { // Last level
110  if (k_l[level - k_t.size()] == 1) acc.push_back(col);
111  return;
112  }
113 
114  if (k_t[level] == 1)
115  {
116  idx_type y = k_t_rank(level + 1) * std::pow(k_k, 2) + k_k * std::floor(row / static_cast<double>(n));
117  for (unsigned j = 0; j < k_k; j++) _neigh(n / k_k, row % n, col + n * j, y + j, acc);
118  }
119  }
120 
132  void _reverse_neigh(size_type n, idx_type row, idx_type col, size_type level, std::vector<idx_type> & acc) const
133  {
134  if (level >= k_t.size())
135  { // Last level
136  if (k_l[level - k_t.size()] == 1) { acc.push_back(row); }
137  return;
138  }
139 
140  if (k_t[level] == 1)
141  {
142  idx_type y = k_t_rank(level + 1) * std::pow(k_k, 2) + std::floor(col / static_cast<double>(n));
143  for (unsigned j = 0; j < k_k; j++) _reverse_neigh(n / k_k, row + n * j, col % n, y + j * k_k, acc);
144  }
145  }
146 
148 
156  void build_from_edges(std::vector<std::tuple<idx_type, idx_type>> & edges, const size_type size)
157  {
158 
159  typedef std::tuple<idx_type, idx_type, size_type, idx_type, idx_type> t_part_tuple;
160 
161  k_k = k;
162  k_height = std::ceil(std::log(size) / std::log(k_k));
163  k_height = k_height > 1 ? k_height : 1; // If size == 0
164  size_type k_2 = std::pow(k_k, 2);
165  bit_vector k_t_ = bit_vector(k_2 * k_height * edges.size(), 0);
166  bit_vector k_l_;
167 
168  std::queue<t_part_tuple> q;
169  idx_type t = 0, last_level = 0;
170  idx_type i, j, r_0, c_0, it, c, r;
171  size_type l = std::pow(k_k, k_height - 1);
172  std::vector<idx_type> pos_by_chunk(k_2 + 1, 0);
173 
174  q.push(t_part_tuple(0, edges.size(), l, 0, 0));
175 
176  while (!q.empty())
177  {
178  std::vector<idx_type> amount_by_chunk(k_2, 0);
179  std::tie(i, j, l, r_0, c_0) = q.front();
180  q.pop();
181  // Get size for each chunk
182  for (it = i; it < j; it++)
183  amount_by_chunk[k2_tree_ns::get_chunk_idx(std::get<0>(edges[it]),
184  std::get<1>(edges[it]),
185  c_0,
186  r_0,
187  l,
188  k_k)] += 1;
189  if (l == 1)
190  {
191  if (last_level == 0)
192  {
193  last_level = t;
194  k_l_ = bit_vector(k_t_.size() - last_level, 0);
195  k_t_.resize(last_level);
196  k_t_.shrink_to_fit();
197  last_level = 1; // if t was 0
198  t = 0; // Restart counter as we're storing at k_l_ now.
199  }
200  for (it = 0; it < k_2; it++, t++)
201  if (amount_by_chunk[it] != 0) k_l_[t] = 1;
202  // At l == 1 we do not put new elements at the queue.
203  continue;
204  }
205 
206  // Set starting position in the vector for each chunk
207  pos_by_chunk[0] = i;
208  for (it = 1; it < k_2; it++) pos_by_chunk[it] = pos_by_chunk[it - 1] + amount_by_chunk[it - 1];
209  // To handle the last case when it = k_2 - 1
210  pos_by_chunk[k_2] = j;
211  // Push to the queue every non zero elements chunk
212  for (it = 0; it < k_2; it++, t++)
213  // If not empty chunk, set bit to 1
214  if (amount_by_chunk[it] != 0)
215  {
216  r = it / k_k;
217  c = it % k_k;
218  k_t_[t] = 1;
219  q.push(t_part_tuple(pos_by_chunk[it], pos_by_chunk[it + 1], l / k_k, r_0 + r * l, c_0 + c * l));
220  }
221  idx_type chunk;
222 
223  // Sort edges' vector
224  for (unsigned ch = 0; ch < k_2; ch++)
225  {
226  idx_type be = ch == 0 ? i : pos_by_chunk[ch - 1];
227  for (it = pos_by_chunk[ch]; it < be + amount_by_chunk[ch];)
228  {
229  chunk = k2_tree_ns::get_chunk_idx(std::get<0>(edges[it]), std::get<1>(edges[it]), c_0, r_0, l, k_k);
230 
231  if (pos_by_chunk[chunk] != it)
232  std::iter_swap(edges.begin() + it, edges.begin() + pos_by_chunk[chunk]);
233  else
234  it++;
235  pos_by_chunk[chunk]++;
236  }
237  }
238  }
239  k_l_.resize(t);
240  k_l_.shrink_to_fit();
241  k2_tree_ns::build_template_vector<t_bv>(k_t_, k_l_, k_t, k_l);
242 
243  k_t_rank = t_rank(&k_t);
244  }
245 
246  public:
247  k2_tree() = default;
248 
250 
256  k2_tree(std::vector<std::vector<int>> & matrix)
257  {
258  if (matrix.size() < 1) { throw std::logic_error("Matrix has no elements"); }
259  std::vector<bit_vector> t;
260  k_k = k;
261  if (matrix.size() < k_k)
262  k_height = 1;
263  else // height = log_k n
264  k_height = std::ceil(std::log(matrix.size()) / std::log(k_k));
265 
266  build_from_matrix(matrix);
267 
268  k_t_rank = t_rank(&k_t);
269  }
270 
272 
280  k2_tree(std::vector<std::tuple<idx_type, idx_type>> & edges, const size_type size)
281  {
282  assert(size > 0);
283  assert(edges.size() > 0);
284 
285  build_from_edges(edges, size);
286  }
287 
289 
301  k2_tree(std::string filename, size_type size = 0)
302  {
303  int_vector_buffer<> buf_x(filename + ".x", std::ios::in);
304  int_vector_buffer<> buf_y(filename + ".y", std::ios::in);
305 
306  assert(buf_x.size() == buf_y.size());
307  assert(buf_x.size() > 0);
308 
309  std::vector<std::tuple<idx_type, idx_type>> edges;
310  edges.reserve(buf_x.size());
311 
312  if (size == 0)
313  {
314  size_type max = 0;
315  for (auto v : buf_x) max = std::max(static_cast<size_type>(v), max);
316  for (auto v : buf_y) max = std::max(static_cast<size_type>(v), max);
317  size = max + 1;
318  }
319 
320  for (uint64_t i = 0; i < buf_x.size(); i++)
321  edges.push_back(std::tuple<idx_type, idx_type>{ buf_x[i], buf_y[i] });
322 
323  build_from_edges(edges, size);
324  }
325 
326  k2_tree(const k2_tree & tr)
327  : k_t(tr.k_t)
328  , k_l(tr.k_l)
329  , k_k(tr.k_k)
330  , k_height(tr.k_height)
331  , k_t_rank(tr.k_t_rank)
332  {
333  k_t_rank.set_vector(&k_t);
334  }
335 
336  k2_tree(k2_tree && tr) { *this = std::move(tr); }
337 
340  {
341  if (this != &tr)
342  {
343  k_t = std::move(tr.k_t);
344  k_l = std::move(tr.k_l);
345  k_k = std::move(tr.k_k);
346  k_height = std::move(tr.k_height);
347  k_t_rank = std::move(tr.k_t_rank);
348  k_t_rank.set_vector(&k_t);
349  }
350  return *this;
351  }
352 
355  {
356  if (this != &tr)
357  {
358  k2_tree tmp(tr);
359  *this = std::move(tmp);
360  }
361  return *this;
362  }
363 
365  bool operator==(const k2_tree & tr) const
366  {
367  // TODO check the rank support equality?
368  if (k_k != tr.k_k || k_height != tr.k_height) return false;
369  if (k_t.size() != tr.k_t.size() || k_l.size() != tr.k_l.size()) return false;
370  for (unsigned i = 0; i < k_t.size(); i++)
371  if (k_t[i] != tr.k_t[i]) return false;
372  for (unsigned i = 0; i < k_l.size(); i++)
373  if (k_l[i] != tr.k_l[i]) return false;
374  return true;
375  }
376 
377  t_bv get_t() { return k_t; }
378 
379  t_bv get_l() { return k_l; }
380 
382 
388  bool adj(idx_type i, idx_type j) const
389  {
390  if (k_t.size() == 0 && k_l.size() == 0) return false;
391  size_type n = std::pow(k_k, k_height - 1);
392  size_type k_2 = std::pow(k_k, 2);
393  idx_type col, row;
394 
395  // This is duplicated to avoid an extra if at the loop. As idx_type
396  // is unsigned and rank has an offset of one, is not possible to run
397  // k_t_rank with zero as parameter at the first iteration.
398  row = std::floor(i / static_cast<double>(n));
399  col = std::floor(j / static_cast<double>(n));
400  i = i % n;
401  j = j % n;
402  idx_type level = k_k * row + col;
403  n = n / k_k;
404 
405  while (level < k_t.size())
406  {
407  if (k_t[level] == 0) return false;
408  row = std::floor(i / static_cast<double>(n));
409  col = std::floor(j / static_cast<double>(n));
410  i = i % n;
411  j = j % n;
412  level = k_t_rank(level + 1) * k_2 + k_k * row + col;
413  n = n / k_k;
414  }
415 
416  return k_l[level - k_t.size()] == 1;
417  }
418 
420 
424  std::vector<idx_type> neigh(idx_type i) const
425  {
426  std::vector<idx_type> acc{};
427  if (k_l.size() == 0 && k_t.size() == 0) return acc;
428  size_type n = static_cast<size_type>(std::pow(k_k, k_height)) / k_k;
429  idx_type y = k_k * std::floor(i / static_cast<double>(n));
430  for (unsigned j = 0; j < k_k; j++) _neigh(n / k_k, i % n, n * j, y + j, acc);
431  return acc;
432  }
433 
435 
439  std::vector<idx_type> reverse_neigh(idx_type i) const
440  {
441  std::vector<idx_type> acc{};
442  if (k_l.size() == 0 && k_t.size() == 0) return acc;
443  // Size of the first square division
444  size_type n = static_cast<size_type>(std::pow(k_k, k_height)) / k_k;
445  idx_type y = std::floor(i / static_cast<double>(n));
446  for (unsigned j = 0; j < k_k; j++) _reverse_neigh(n / k_k, n * j, i % n, y + j * k_k, acc);
447 
448  return acc;
449  }
450 
452 
458  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
459  {
460  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
461  size_type written_bytes = 0;
462 
463  written_bytes += k_t.serialize(out, child, "t");
464  written_bytes += k_l.serialize(out, child, "l");
465  written_bytes += k_t_rank.serialize(out, child, "t_rank");
466  written_bytes += write_member(k_k, out, child, "k");
467  written_bytes += write_member(k_height, out, child, "height");
468  structure_tree::add_size(child, written_bytes);
469  return written_bytes;
470  }
471 
473 
476  void load(std::istream & in)
477  {
478  k_t.load(in);
479  k_l.load(in);
480  k_t_rank.load(in);
481  k_t_rank.set_vector(&k_t);
482  read_member(k_k, in);
483  read_member(k_height, in);
484  }
485 
487  template <typename archive_t>
488  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
489  {
490  ar(CEREAL_NVP(k_k));
491  ar(CEREAL_NVP(k_height));
492  ar(CEREAL_NVP(k_t));
493  ar(CEREAL_NVP(k_l));
494  ar(CEREAL_NVP(k_t_rank));
495  }
496 
498  template <typename archive_t>
499  typename std::enable_if<cereal::traits::is_output_serializable<k2_tree, archive_t>::value, void>::type
501  {
502  ar(CEREAL_NVP(k_k));
503  ar(CEREAL_NVP(k_height));
504  ar(CEREAL_NVP(k_t));
505  ar(CEREAL_NVP(k_l));
506  ar(CEREAL_NVP(k_t_rank));
507  k_t_rank.set_vector(&k_t);
508  }
509 };
510 } // namespace sdsl
511 
512 #endif
bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
uint64_t size() const
Returns the number of elements currently stored.
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:530
size_type size() const noexcept
The number of elements in the int_vector.
void push_back(value_type value)
Insert element at the end.
Definition: int_vector.hpp:472
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:545
void set_int(size_type idx, value_type x, const uint8_t len=64)
Set the bits from position idx to idx+len-1 to the binary representation of integer x.
A k^2-tree.
Definition: k2_tree.hpp:38
bool adj(idx_type i, idx_type j) const
Indicates wheter node j is adjacent to node i or not.
Definition: k2_tree.hpp:388
std::enable_if< cereal::traits::is_output_serializable< k2_tree, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
Definition: k2_tree.hpp:500
void load(std::istream &in)
Load from istream.
Definition: k2_tree.hpp:476
void _neigh(size_type n, idx_type row, idx_type col, size_type level, std::vector< idx_type > &acc) const
Recursive function to retrieve list of neighbors.
Definition: k2_tree.hpp:106
k2_tree & operator=(k2_tree &tr)
Assignment operator.
Definition: k2_tree.hpp:354
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: k2_tree.hpp:488
k2_tree_ns::idx_type idx_type
Definition: k2_tree.hpp:40
void _reverse_neigh(size_type n, idx_type row, idx_type col, size_type level, std::vector< idx_type > &acc) const
Recursive function to retrieve list of reverse neighbors.
Definition: k2_tree.hpp:132
t_bv get_l()
Definition: k2_tree.hpp:379
bool operator==(const k2_tree &tr) const
Equal operator.
Definition: k2_tree.hpp:365
t_bv get_t()
Definition: k2_tree.hpp:377
std::vector< idx_type > neigh(idx_type i) const
Returns a list of neighbors of node i.
Definition: k2_tree.hpp:424
void build_from_edges(std::vector< std::tuple< idx_type, idx_type >> &edges, const size_type size)
Build a tree from an edges collection.
Definition: k2_tree.hpp:156
std::vector< idx_type > reverse_neigh(idx_type i) const
Returns a list of reverse neighbors of node i.
Definition: k2_tree.hpp:439
k2_tree(std::vector< std::vector< int >> &matrix)
Constructor.
Definition: k2_tree.hpp:256
void build_from_matrix(const std::vector< std::vector< int >> &matrix)
Definition: k2_tree.hpp:56
k2_tree(const k2_tree &tr)
Definition: k2_tree.hpp:326
k2_tree_ns::size_type size_type
Definition: k2_tree.hpp:41
k2_tree(k2_tree &&tr)
Definition: k2_tree.hpp:336
k2_tree(std::vector< std::tuple< idx_type, idx_type >> &edges, const size_type size)
Constructor.
Definition: k2_tree.hpp:280
k2_tree()=default
k2_tree(std::string filename, size_type size=0)
Constructor.
Definition: k2_tree.hpp:301
k2_tree & operator=(k2_tree &&tr)
Move assignment operator.
Definition: k2_tree.hpp:339
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: k2_tree.hpp:458
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
int_vector_buffer.hpp contains the sdsl::int_vector_buffer class.
io.hpp contains some methods for reading/writing sdsl structures.
k2_tree_helper.hpp contains helper functions and definitions for a k^2-tree implementation.
uint16_t get_chunk_idx(idx_type v, idx_type u, idx_type c_0, idx_type r_0, size_type l, uint8_t k)
Get the chunk index ([0, k^2[) of a submatrix point.
int_vector ::size_type size_type
int_vector ::size_type idx_type
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:51
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787