SDSL  3.0.0
Succinct Data Structure Library
int_vector_mapper.hpp
Go to the documentation of this file.
1 // Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 #ifndef SDSL_INT_VECTOR_MAPPER
5 #define SDSL_INT_VECTOR_MAPPER
6 
7 #include <cinttypes>
8 #include <cstdio>
9 #include <ios>
10 
11 #include <sdsl/int_vector.hpp>
13 
14 namespace sdsl
15 {
16 
17 template <uint8_t t_width = 0, std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
19 {
20  static_assert(t_width <= 64, "int_vector_mapper: width must be at most 64 bits.");
21 
22  public:
27  static constexpr uint8_t fixed_int_width = t_width;
28 
29  public:
30  const size_type append_block_size = 1000000;
31 
32  private:
33  uint8_t * m_mapped_data = nullptr;
34  uint64_t m_file_size_bytes = 0;
35  off_t m_data_offset = 0;
36  int m_fd = -1;
37  int_vector<t_width> m_wrapper;
38  std::string m_file_name;
39  bool m_delete_on_close;
40 
41  public:
42  int_vector_mapper() = delete;
45 
46  public:
48  {
49  if (m_mapped_data)
50  {
51  auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
52  if (ret != 0)
53  {
54  std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
55  << std::endl;
56  }
57 
58  if (t_mode & std::ios_base::out)
59  { // write was possible
60  if (m_data_offset)
61  { // if the file is not a plain file
62  // set std::ios::in to not truncate the file
63  osfstream out(m_file_name, std::ios::in);
64  if (out)
65  {
66  out.seekp(0, std::ios::beg);
67  int_vector<t_width>::write_header(m_wrapper.m_size, m_wrapper.m_width, out);
68 
69  // out.seekp(0, std::ios::end);
70  }
71  else
72  {
73  std::cerr << "int_vector_mapper: could not open file for header update" << std::endl;
74  /*
75  * throw std::runtime_error("int_vector_mapper: \
76  * could not open file for header update");
77  */
78  }
79  }
80  }
81 
82  if (t_mode & std::ios_base::out)
83  {
84  // do we have to truncate?
85  size_type current_bit_size = m_wrapper.m_size;
86  size_type data_size_in_bytes = ((current_bit_size + 63) >> 6) << 3;
87  if (m_file_size_bytes != data_size_in_bytes + m_data_offset)
88  {
89  int tret = memory_manager::truncate_file_mmap(m_fd, data_size_in_bytes + m_data_offset);
90  if (tret == -1)
91  {
92  std::string truncate_error = std::string("int_vector_mapper: truncate error. ") +
93  std::string(util::str_from_errno());
94  std::cerr << truncate_error;
95  }
96  }
97  }
98  }
99  if (m_fd != -1)
100  {
101  auto ret = memory_manager::close_file_for_mmap(m_fd);
102  if (ret != 0)
103  {
104  std::cerr << "int_vector_mapper: error closing file mapping'" << m_file_name << "': " << ret
105  << std::endl;
106  }
107  if (m_delete_on_close)
108  {
109  int ret_code = sdsl::remove(m_file_name);
110  if (ret_code != 0)
111  {
112  std::cerr << "int_vector_mapper: error deleting file '" << m_file_name << "': " << ret_code
113  << std::endl;
114  }
115  }
116  }
117  m_wrapper.m_data = nullptr;
118  m_wrapper.m_size = 0;
119  }
120 
122  {
123  m_wrapper.m_data = ivm.m_wrapper.m_data;
124  m_wrapper.m_size = ivm.m_wrapper.m_size;
125  m_wrapper.width(ivm.m_wrapper.width());
126  m_file_name = ivm.m_file_name;
127  m_delete_on_close = ivm.m_delete_on_close;
128  ivm.m_wrapper.m_data = nullptr;
129  ivm.m_wrapper.m_size = 0;
130  ivm.m_mapped_data = nullptr;
131  ivm.m_fd = -1;
132  }
133 
135  {
136  m_wrapper.m_data = ivm.m_wrapper.m_data;
137  m_wrapper.m_size = ivm.m_wrapper.m_size;
138  m_wrapper.width(ivm.m_wrapper.width());
139  m_file_name = ivm.m_file_name;
140  m_delete_on_close = ivm.m_delete_on_close;
141  ivm.m_wrapper.m_data = nullptr;
142  ivm.m_wrapper.m_size = 0;
143  ivm.m_mapped_data = nullptr;
144  ivm.m_fd = -1;
145  return (*this);
146  }
147 
148  int_vector_mapper(const std::string & key, const cache_config & config)
149  : int_vector_mapper(cache_file_name(key, config))
150  {}
151 
152  int_vector_mapper(const std::string filename, bool is_plain = false, bool delete_on_close = false)
153  : m_data_offset(0)
154  , m_file_name(filename)
155  , m_delete_on_close(delete_on_close)
156  {
157  size_type size_in_bits = 0;
158  uint8_t int_width = t_width;
159  {
160  isfstream f(filename, std::ifstream::binary);
161  if (!f.is_open())
162  {
163  throw std::runtime_error("int_vector_mapper: file " + m_file_name + " does not exist.");
164  }
165  if (!is_plain) { m_data_offset = int_vector<t_width>::read_header(size_in_bits, int_width, f); }
166  }
167 
168  m_file_size_bytes = util::file_size(m_file_name);
169 
170  if (is_plain)
171  {
172  if (8 != t_width and 16 != t_width and 32 != t_width and 64 != t_width)
173  {
174  throw std::runtime_error("int_vector_mapper: plain vector can "
175  "only be of width 8, 16, 32, 64.");
176  }
177  else
178  {
179  uint8_t byte_width = t_width / 8;
180  // if( m_file_size_bytes % (t_width/8) != 0)
181  if ((m_file_size_bytes & bits::lo_set[bits::cnt(byte_width - 1)]) != 0)
182  {
183  throw std::runtime_error("int_vector_mapper: plain vector not a multiple of byte: " +
184  std::to_string(m_file_size_bytes) + " mod " + std::to_string(byte_width) +
185  " != 0");
186  }
187  }
188  size_in_bits = m_file_size_bytes * 8;
189  }
190 
191  // open backend file depending on mode
192  m_fd = memory_manager::open_file_for_mmap(m_file_name, t_mode);
193  if (m_fd == -1)
194  {
195  std::string open_error = std::string("int_vector_mapper: open file error.") +
196  std::string(util::str_from_errno());
197  throw std::runtime_error(open_error);
198  }
199 
200  // prepare for mmap
201  m_wrapper.width(int_width);
202  // mmap data
203  m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
204  if (m_mapped_data == nullptr)
205  {
206  std::string mmap_error = std::string("int_vector_mapper: mmap error. ") +
207  std::string(util::str_from_errno());
208  throw std::runtime_error(mmap_error);
209  }
210 
211  m_wrapper.m_size = size_in_bits;
212  free(m_wrapper.m_data);
213  m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
214  }
215 
216  std::string file_name() const { return m_file_name; }
217  width_type width() const { return m_wrapper.width(); }
218  void width(const uint8_t new_int_width)
219  {
220  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'width'");
221  m_wrapper.width(new_int_width);
222  }
223  size_type size() const { return m_wrapper.size(); }
225  {
226  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'bit_resize'");
227  size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3;
228  if (m_file_size_bytes != new_size_in_bytes + m_data_offset)
229  {
230  if (m_mapped_data)
231  {
232  auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
233  if (ret != 0)
234  {
235  std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
236  << std::endl;
237  }
238  }
239  int tret = memory_manager::truncate_file_mmap(m_fd, new_size_in_bytes + m_data_offset);
240  if (tret == -1)
241  {
242  std::string truncate_error = std::string("int_vector_mapper: truncate error. ") +
243  std::string(util::str_from_errno());
244  throw std::runtime_error(truncate_error);
245  }
246  m_file_size_bytes = new_size_in_bytes + m_data_offset;
247 
248  // perform the actual mapping
249  m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
250  if (m_mapped_data == nullptr)
251  {
252  std::string mmap_error = std::string("int_vector_mapper: mmap error. ") +
253  std::string(util::str_from_errno());
254  throw std::runtime_error(mmap_error);
255  }
256 
257  // update wrapper
258  m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
259  }
260  m_wrapper.m_size = bit_size;
261  }
262 
263  void resize(const size_type size)
264  {
265  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'resize'");
266  size_type size_in_bits = size * width();
267  bit_resize(size_in_bits);
268  }
269 
271  {
272  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'begin'");
273  return m_wrapper.begin();
274  }
276  {
277  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'end'");
278  return m_wrapper.end();
279  }
280  auto begin() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.begin(); }
281  auto end() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.end(); }
282  auto cbegin() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.begin(); }
283  auto cend() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.end(); }
284  auto operator[](const size_type & idx) const -> typename int_vector<t_width>::const_reference
285  {
286  return m_wrapper[idx];
287  }
289  {
290  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'operator[]'");
291  return m_wrapper[idx];
292  }
293  const uint64_t * data() const { return m_wrapper.data(); }
294  uint64_t * data()
295  {
296  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'data'");
297  return m_wrapper.data();
298  }
299  value_type get_int(size_type idx, const uint8_t len = 64) const { return m_wrapper.get_int(idx, len); }
300  void set_int(size_type idx, value_type x, const uint8_t len = 64)
301  {
302  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'set_int'");
303  m_wrapper.set_int(idx, x, len);
304  }
306  {
307  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'push_back'");
308  if (capacity() < size() + 1)
309  {
310  size_type old_size = m_wrapper.m_size;
311  size_type size_in_bits = (size() + append_block_size) * width();
312  bit_resize(size_in_bits);
313  m_wrapper.m_size = old_size;
314  }
315  // update size in wrapper only
316  m_wrapper.m_size += width();
317  m_wrapper[size() - 1] = x;
318  }
320  {
321  size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset);
322  return data_size_in_bits / width();
323  }
324  size_type bit_size() const { return m_wrapper.bit_size(); }
325  template <class container>
326  bool operator==(const container & v) const
327  {
328  return std::equal(begin(), end(), v.begin());
329  }
330  bool operator==(const int_vector<t_width> & v) const { return m_wrapper == v; }
331  bool operator==(const int_vector_mapper & v) const { return m_wrapper == v.m_wrapper; }
332  template <class container>
333  bool operator!=(const container & v) const
334  {
335  return !(*this == v);
336  }
337  void flip()
338  {
339  static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'flip'");
340  m_wrapper.flip();
341  }
342  bool empty() const { return m_wrapper.empty(); }
343 };
344 
345 template <uint8_t t_width = 0>
347 {
348  private:
349  static std::string tmp_file(const std::string & dir)
350  {
351  char tmp_file_name[1024] = { 0 };
352 #ifdef _WIN32
353  auto ret = GetTempFileName(dir.c_str(), "tmp_mapper_file_", 0, tmp_file_name);
354  if (ret == 0) { throw std::runtime_error("could not create temporary file."); }
355 #else
356  sprintf(tmp_file_name, "%s/tmp_mapper_file_%" PRIu64 "_XXXXXX.sdsl", dir.c_str(), util::pid());
357  int fd = mkstemps(tmp_file_name, 5);
358  if (fd == -1) { throw std::runtime_error("could not create temporary file."); }
359  close(fd);
360 #endif
361  return std::string(tmp_file_name, strlen(tmp_file_name));
362  }
363 
364  public:
366  {
367 #ifdef MSVC_COMPILER
368  char tmp_dir_name[1024] = { 0 };
369  auto tmp_dir = GetTempPath(1024, tmp_dir_name);
370  auto file_name = tmp_file(tmp_dir_name);
371 #else
372  auto file_name = tmp_file("/tmp");
373 #endif
374  return create(file_name);
375  }
377  {
378  auto file_name = tmp_file(config.dir);
379  return create(file_name);
380  }
381  static int_vector_mapper<t_width> create(const std::string & file_name)
382  {
383  // write empty int_vector to init the file
384  int_vector<t_width> tmp_vector;
385  store_to_file(tmp_vector, file_name);
387  }
388 };
389 
390 // creates emtpy int_vector<> that will not be deleted
391 template <uint8_t t_width = 0>
393 {
394  public:
395  static int_vector_mapper<t_width> create(const std::string & key, cache_config & config)
396  {
397  auto file_name = cache_file_name(key, config);
398  auto tmp = create(file_name);
399  register_cache_file(key, config);
400  return std::move(tmp);
401  }
402  static int_vector_mapper<t_width> create(const std::string & file_name)
403  {
404  // write empty int_vector to init the file
405  int_vector<t_width> tmp_vector;
406  store_to_file(tmp_vector, file_name);
408  }
409  static int_vector_mapper<t_width> create(const std::string & file_name, size_t size, uint8_t int_width = t_width)
410  {
411  // write empty int_vector to init the file
412  int_vector<t_width> tmp_vector(0, 0, int_width);
413  store_to_file(tmp_vector, file_name);
415  mapper.resize(size);
416  return mapper;
417  }
418 };
419 
420 template <std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
422 
423 template <uint8_t t_width = 0>
425 
426 } // namespace sdsl
427 
428 #endif
int_vector_mapper(const std::string filename, bool is_plain=false, bool delete_on_close=false)
bool operator!=(const container &v) const
auto cbegin() const -> typename int_vector< t_width >::const_iterator
value_type get_int(size_type idx, const uint8_t len=64) const
int_vector_mapper(const std::string &key, const cache_config &config)
std::string file_name() const
void bit_resize(const size_type bit_size)
bool operator==(const int_vector< t_width > &v) const
auto begin() -> typename int_vector< t_width >::iterator
const size_type append_block_size
void width(const uint8_t new_int_width)
void set_int(size_type idx, value_type x, const uint8_t len=64)
int_vector< t_width >::int_width_type width_type
int_vector_mapper(int_vector_mapper &&ivm)
static constexpr uint8_t fixed_int_width
auto operator[](const size_type &idx) -> typename int_vector< t_width >::reference
int_vector_mapper(const int_vector_mapper &)=delete
bool operator==(const int_vector_mapper &v) const
auto end() const -> typename int_vector< t_width >::const_iterator
int_vector_mapper & operator=(int_vector_mapper &&ivm)
void push_back(value_type x)
auto cend() const -> typename int_vector< t_width >::const_iterator
const uint64_t * data() const
auto end() -> typename int_vector< t_width >::iterator
int_vector_mapper & operator=(const int_vector_mapper &)=delete
auto operator[](const size_type &idx) const -> typename int_vector< t_width >::const_reference
bool operator==(const container &v) const
int_vector< t_width >::size_type size_type
auto begin() const -> typename int_vector< t_width >::const_iterator
int_vector< t_width >::difference_type difference_type
void resize(const size_type size)
int_vector< t_width >::value_type value_type
A proxy class that acts as a reference to an integer of length len bits in a int_vector.
Definition: int_vector.hpp:869
A generic vector class for integers of width .
Definition: int_vector.hpp:253
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:788
void flip()
Flip all bits of bit_vector.
Definition: int_vector.hpp:803
bool empty() const noexcept
Equivalent to size() == 0.
Definition: int_vector.hpp:524
value_type get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx in the int_vector.
int_vector_size_type size_type
Definition: int_vector.hpp:266
ptrdiff_t difference_type
Definition: int_vector.hpp:265
int_vector_trait< t_width >::const_reference const_reference
Definition: int_vector.hpp:262
int_vector_trait< t_width >::int_width_type int_width_type
Definition: int_vector.hpp:267
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:571
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:590
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:619
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:255
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
Definition: int_vector.hpp:813
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:830
void set_int(size_type idx, value_type x, const uint8_t len=64)
Set the bits from position idx to idx+len-1 to the binary representation of integer x.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:783
bool is_open()
Is the stream close?
Definition: sfstream.hpp:222
static int close_file_for_mmap(int fd)
static int mem_unmap(int fd, void *addr, const uint64_t size)
static int open_file_for_mmap(std::string &filename, std::ios_base::openmode mode)
static int truncate_file_mmap(int fd, const uint64_t new_size)
static void * mmap_file(int fd, uint64_t file_size, std::ios_base::openmode mode)
osfstream & seekp(pos_type pos)
Definition: sfstream.hpp:111
static int_vector_mapper< t_width > create(const cache_config &config)
static int_vector_mapper< t_width > create()
static int_vector_mapper< t_width > create(const std::string &file_name)
static int_vector_mapper< t_width > create(const std::string &key, cache_config &config)
static int_vector_mapper< t_width > create(const std::string &file_name)
static int_vector_mapper< t_width > create(const std::string &file_name, size_t size, uint8_t int_width=t_width)
int_vector.hpp contains the sdsl::int_vector class.
memory_management.hpp contains two function for allocating and deallocating memory
int close(const int fd)
Get fd for file.
Definition: ram_fs.hpp:110
Get the size of a file in bytes size_t file_size(const std::string &file)
Definition: util.hpp:186
uint64_t pid()
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition: io.hpp:656
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
Definition: io.hpp:798
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
constexpr static uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:197
static SDSL_CONSTEXPR uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:494
Helper class for construction process.
Definition: config.hpp:67
std::string dir
Definition: config.hpp:71