SDSL  3.0.0
Succinct Data Structure Library
lcp_wt.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
8 #ifndef INCLUDED_SDSL_LCP_WT
9 #define INCLUDED_SDSL_LCP_WT
10 
11 #include <algorithm> // for lower_bound
12 #include <cassert>
13 #include <cstring> // for strlen
14 #include <iomanip>
15 #include <iostream>
16 #include <iterator>
17 #include <stdexcept>
18 #include <utility> // for pair
19 #include <vector>
20 
21 #include <sdsl/int_vector.hpp>
22 #include <sdsl/iterators.hpp>
23 #include <sdsl/util.hpp>
24 #include <sdsl/wt_huff.hpp>
25 
26 namespace sdsl
27 {
28 
30 
37 template <uint8_t t_width = 0>
38 class lcp_wt
39 {
40  public:
44  typedef const value_type const_reference;
47  typedef const pointer const_pointer;
49  typedef ptrdiff_t difference_type;
51 
54 
55  enum
56  {
59  sa_order = 1
60  }; // as the lcp_wt is not fast for texts with long repetition
61 
62  template <class Cst>
63  using type = lcp_wt;
64 
65  private:
66  small_lcp_type m_small_lcp; // vector for lcp values < 255
67  int_vector<t_width> m_big_lcp; // vector for lcp values > 254
68 
69  typedef std::pair<size_type, size_type> tPII;
70  typedef std::vector<tPII> tVPII;
71 
72  public:
74  lcp_wt() = default;
76  lcp_wt(const lcp_wt &) = default;
77  lcp_wt(lcp_wt &&) = default;
78  lcp_wt & operator=(const lcp_wt &) = default;
79  lcp_wt & operator=(lcp_wt &&) = default;
80 
82  lcp_wt(cache_config & config, std::string other_key = "")
83  {
84  std::string temp_file = tmp_file(config, "_lcp_sml");
85  std::string lcp_key = conf::KEY_LCP;
86  if ("" != other_key) { lcp_key = other_key; }
87  int_vector_buffer<> lcp_buf(cache_file_name(lcp_key, config));
88  size_type l = 0, max_l = 0, big_sum = 0, n = lcp_buf.size();
89  {
90  int_vector<8> small_lcp = int_vector<8>(n);
91  for (size_type i = 0; i < n; ++i)
92  {
93  if ((l = lcp_buf[i]) < 255) { small_lcp[i] = l; }
94  else
95  {
96  small_lcp[i] = 255;
97  if (l > max_l) max_l = l;
98  ++big_sum;
99  }
100  }
101  store_to_file(small_lcp, temp_file);
102  }
103  {
104  int_vector_buffer<8> lcp_sml_buf(temp_file);
105  m_small_lcp = small_lcp_type(lcp_sml_buf.begin(), lcp_sml_buf.end(), config.dir);
106  }
107  sdsl::remove(temp_file);
108  m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
109  {
110  for (size_type i = 0, ii = 0; i < n; ++i)
111  {
112  if (lcp_buf[i] >= 255) { m_big_lcp[ii++] = lcp_buf[i]; }
113  }
114  }
115  }
116 
118  size_type size() const { return m_small_lcp.size(); }
119 
122 
124  bool empty() const { return 0 == m_small_lcp.size(); }
125 
127  const_iterator begin() const { return const_iterator(this, 0); }
128 
130  const_iterator end() const { return const_iterator(this, size()); }
131 
133 
136  {
137  if (m_small_lcp[i] != 255) { return m_small_lcp[i]; }
138  else
139  {
140  return m_big_lcp[m_small_lcp.rank(i, 255)];
141  }
142  }
143 
145  size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
146  {
147  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
148  size_type written_bytes = 0;
149  written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
150  written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
151  structure_tree::add_size(child, written_bytes);
152  return written_bytes;
153  }
154 
156  void load(std::istream & in)
157  {
158  m_small_lcp.load(in);
159  m_big_lcp.load(in);
160  }
161 
162  template <typename archive_t>
163  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
164  {
165  ar(CEREAL_NVP(m_small_lcp));
166  ar(CEREAL_NVP(m_big_lcp));
167  }
168 
169  template <typename archive_t>
170  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
171  {
172  ar(CEREAL_NVP(m_small_lcp));
173  ar(CEREAL_NVP(m_big_lcp));
174  }
175 
177  bool operator==(lcp_wt const & other) const noexcept
178  {
179  return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp);
180  }
181 
183  bool operator!=(lcp_wt const & other) const noexcept { return !(*this == other); }
184 };
185 
186 } // end namespace sdsl
187 #endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:30
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:253
void load(std::istream &in)
Load the int_vector for a stream.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:255
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:566
A class for the compressed version of lcp information of an suffix array.
Definition: lcp_wt.hpp:39
random_access_const_iterator< lcp_wt > const_iterator
Definition: lcp_wt.hpp:42
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: lcp_wt.hpp:163
lcp_plain_tag lcp_category
Definition: lcp_wt.hpp:52
bool operator!=(lcp_wt const &other) const noexcept
Inequality operator.
Definition: lcp_wt.hpp:183
lcp_wt & operator=(const lcp_wt &)=default
const value_type const_reference
Definition: lcp_wt.hpp:44
const_iterator iterator
Definition: lcp_wt.hpp:43
static size_type max_size()
Returns the largest size that lcp_wt can ever have.
Definition: lcp_wt.hpp:121
lcp_wt()=default
Default Constructor.
int_vector< t_width >::value_type value_type
Definition: lcp_wt.hpp:41
lcp_tag index_category
Definition: lcp_wt.hpp:53
int_vector ::size_type size_type
Definition: lcp_wt.hpp:48
void load(std::istream &in)
Load from a stream.
Definition: lcp_wt.hpp:156
ptrdiff_t difference_type
Definition: lcp_wt.hpp:49
const pointer const_pointer
Definition: lcp_wt.hpp:47
value_type operator[](size_type i) const
[]-operator
Definition: lcp_wt.hpp:135
bool operator==(lcp_wt const &other) const noexcept
Equality operator.
Definition: lcp_wt.hpp:177
bool empty() const
Returns if the data structure is empty.
Definition: lcp_wt.hpp:124
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: lcp_wt.hpp:127
size_type size() const
Number of elements in the instance.
Definition: lcp_wt.hpp:118
const_reference * pointer
Definition: lcp_wt.hpp:46
const_reference reference
Definition: lcp_wt.hpp:45
lcp_wt(cache_config &config, std::string other_key="")
Constructor.
Definition: lcp_wt.hpp:82
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: lcp_wt.hpp:145
lcp_wt(const lcp_wt &)=default
Copy / Move constructor.
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: lcp_wt.hpp:130
wt_huff< bit_vector, rank_support_v<>, select_support_scan< 1 >, select_support_scan< 0 > > small_lcp_type
Definition: lcp_wt.hpp:50
lcp_wt & operator=(lcp_wt &&)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: lcp_wt.hpp:170
lcp_wt(lcp_wt &&)=default
Generic iterator for a random access container.
Definition: iterators.hpp:24
A class supporting linear time select queries.
static void add_size(structure_tree_node *v, uint64_t value)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
A prefix code-shaped wavelet.
Definition: wt_pc.hpp:44
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
Definition: wt_pc.hpp:335
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_pc.hpp:676
size_type size() const
Returns the size of the original vector.
Definition: wt_pc.hpp:284
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_pc.hpp:660
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
constexpr char KEY_LCP[]
Definition: config.hpp:44
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
Definition: io.hpp:798
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
static SDSL_CONSTEXPR uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:661
Helper class for construction process.
Definition: config.hpp:67
std::string dir
Definition: config.hpp:71
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.
wt_huff.hpp contains a class for a Huffman shaped wavelet tree over byte sequences.