MMTF-C++
The C++ language MMTF libraries
binary_decoder.hpp
Go to the documentation of this file.
1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The authors of this code are: Gerardo Tauriello
6 //
7 // Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
8 // Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
9 // Gazal Kalyan, Alexander Rose.
10 //
11 // *************************************************************************
12 
13 #ifndef MMTF_BINARY_DECODER_H
14 #define MMTF_BINARY_DECODER_H
15 
16 #include "structure_data.hpp"
17 
18 #include <msgpack.hpp>
19 #include <cstring> // low level mem
20 #include <sstream>
21 #include <limits>
22 #include <algorithm>
23 
24 namespace mmtf {
25 
30 public:
38  BinaryDecoder(const msgpack::object& obj,
39  const std::string& key = "UNNAMED_BINARY");
40 
56  template<typename T>
57  void decode(T& target);
58 
59 private:
60  // for error reporting
61  std::string key_;
62  // data from binary header
63  int32_t strategy_;
64  int32_t length_;
65  int32_t parameter_;
66  const char* encodedData_;
67  uint32_t encodedDataLength_; // max. size for binary is 2^32 - 1
68 
69  // check length consistency (throws)
70  void checkLength_(int32_t exp_length);
71  // check if binary data is divisible by x (throws)
72  void checkDivisibleBy_(int32_t item_size);
73 
74  // byte decoders
75  void decodeFromBytes_(std::vector<float>& output);
76  void decodeFromBytes_(std::vector<int8_t>& output);
77  void decodeFromBytes_(std::vector<int16_t>& output);
78  void decodeFromBytes_(std::vector<int32_t>& output);
79  // special one: decode to vector of strings
80  void decodeFromBytes_(std::vector<std::string>& output);
81 
82  // run length decoding
83  // -> Int and IntOut can be any integer types
84  // -> Int values are blindly converted to IntOut
85  template<typename Int, typename IntOut>
86  void runLengthDecode_(const std::vector<Int>& input,
87  std::vector<IntOut>& output);
88 
89  // delta decoding -> Int can be any integer type
90  template<typename Int>
91  void deltaDecode_(const std::vector<Int>& input, std::vector<Int>& output);
92  // variant doing it in-place
93  template<typename Int>
94  void deltaDecode_(std::vector<Int>& in_out);
95 
96  // recursive indexing decode -> SmallInt must be smaller than Int
97  template<typename SmallInt, typename Int>
98  void recursiveIndexDecode_(const std::vector<SmallInt>& input,
99  std::vector<Int>& output);
100 
101  // decode integer to float -> Int can be any integer type
102  template<typename Int>
103  void decodeDivide_(const std::vector<Int>& input, float divisor,
104  std::vector<float>& output);
105 };
106 
107 // *************************************************************************
108 // IMPLEMENTATION
109 // *************************************************************************
110 
111 // helpers in anonymous namespace (only visible in this file)
112 namespace {
113 
114 // byteorder functions ("ntohl" etc.)
115 #ifdef WIN32
116 #include <winsock2.h>
117 #else
118 #include <arpa/inet.h>
119 #endif
120 
121 #ifndef __EMSCRIPTEN__
122 void assignBigendian4(void* dst, const char* src) {
123  *((uint32_t*)dst) = ntohl(*((uint32_t*)src));
124 }
125 
126 void assignBigendian2(void* dst, const char* src) {
127  *((uint16_t*)dst) = ntohs(*((uint16_t*)src));
128 }
129 #else
130 // Need to avoid how emscripten handles memory
131 // Note that this will only work on little endian machines, but this should not be a major
132 // an issue as Emscripten only supports little endian hardware.
133 // see: https://kripken.github.io/emscripten-site/docs/porting/guidelines/portability_guidelines.html
134 
135 void assignBigendian4(void* dst, const char* src) {
136  ((uint8_t*)dst)[0] = src[3];
137  ((uint8_t*)dst)[1] = src[2];
138  ((uint8_t*)dst)[2] = src[1];
139  ((uint8_t*)dst)[3] = src[0];
140 }
141 
142 void assignBigendian2(void* dst, const char* src) {
143  ((uint8_t*)dst)[0] = src[1];
144  ((uint8_t*)dst)[1] = src[0];
145 }
146 #endif
147 
148 void arrayCopyBigendian4(void* dst, const char* src, size_t n) {
149  for (size_t i = 0; i < n; i += 4) {
150  assignBigendian4(((char*)dst) + i, src + i);
151  }
152 }
153 
154 void arrayCopyBigendian2(void* dst, const char* src, size_t n) {
155  for (size_t i = 0; i < n; i += 2) {
156  assignBigendian2(((char*)dst) + i, src + i);
157  }
158 }
159 
160 } // anon ns
161 
162 inline BinaryDecoder::BinaryDecoder(const msgpack::object& obj,
163  const std::string& key)
164  : key_(key) {
165  // sanity checks
166  if (obj.type != msgpack::type::BIN) {
167  throw DecodeError("The '" + key + "' entry is not binary data");
168  }
169  if (obj.via.bin.size < 12) {
170  std::stringstream err;
171  err << "The '" + key + "' entry is too short " << obj.via.bin.size;
172  throw DecodeError(err.str());
173  }
174  // get data (encoded data is only pointed to and not parsed here)
175  const char* bytes = obj.via.bin.ptr;
176 
177  assignBigendian4(&strategy_, bytes);
178  assignBigendian4(&length_, bytes + 4);
179  assignBigendian4(&parameter_, bytes + 8);
180  encodedData_ = bytes + 12;
181  encodedDataLength_ = obj.via.bin.size - 12;
182 }
183 
184 template<typename T>
185 void BinaryDecoder::decode(T& target) {
186  throw mmtf::DecodeError("Invalid target type for binary '" + key_ + "'");
187 }
188 
189 template<>
190 inline void BinaryDecoder::decode(std::vector<float>& output) {
191 
192  // check strategy to parse
193  switch (strategy_) {
194  case 1: {
195  decodeFromBytes_(output);
196  break;
197  }
198  case 9: {
199  std::vector<int32_t> step1;
200  std::vector<int32_t> step2;
201  decodeFromBytes_(step1);
202  runLengthDecode_(step1, step2);
203  decodeDivide_(step2, static_cast<float>(parameter_), output);
204  break;
205  }
206  case 10: {
207  std::vector<int16_t> step1;
208  std::vector<int32_t> step2;
209  decodeFromBytes_(step1);
210  recursiveIndexDecode_(step1, step2);
211  deltaDecode_(step2);
212  decodeDivide_(step2, static_cast<float>(parameter_), output);
213  break;
214  }
215  case 11: {
216  std::vector<int16_t> step1;
217  decodeFromBytes_(step1);
218  decodeDivide_(step1, static_cast<float>(parameter_), output);
219  break;
220  }
221  case 12: {
222  std::vector<int16_t> step1;
223  std::vector<int32_t> step2;
224  decodeFromBytes_(step1);
225  recursiveIndexDecode_(step1, step2);
226  decodeDivide_(step2, static_cast<float>(parameter_), output);
227  break;
228  }
229  case 13: {
230  std::vector<int8_t> step1;
231  std::vector<int32_t> step2;
232  decodeFromBytes_(step1);
233  recursiveIndexDecode_(step1, step2);
234  decodeDivide_(step2, static_cast<float>(parameter_), output);
235  break;
236  }
237  default: {
238  std::stringstream err;
239  err << "Invalid strategy " << strategy_ << " for binary '" + key_
240  << "': does not decode to float array";
241  throw DecodeError(err.str());
242  }
243  }
244 
245  // check size
246  checkLength_(output.size());
247 }
248 
249 template<>
250 inline void BinaryDecoder::decode(std::vector<int8_t>& output) {
251 
252  // check strategy to parse
253  switch (strategy_) {
254  case 2: {
255  decodeFromBytes_(output);
256  break;
257  }
258  default: {
259  std::stringstream err;
260  err << "Invalid strategy " << strategy_ << " for binary '" + key_
261  << "': does not decode to int8 array";
262  throw DecodeError(err.str());
263  }
264  }
265 
266  // check size
267  checkLength_(output.size());
268 }
269 
270 template<>
271 inline void BinaryDecoder::decode(std::vector<int16_t>& output) {
272 
273  // check strategy to parse
274  switch (strategy_) {
275  case 3: {
276  decodeFromBytes_(output);
277  break;
278  }
279  default: {
280  std::stringstream err;
281  err << "Invalid strategy " << strategy_ << " for binary '" + key_
282  << "': does not decode to int16 array";
283  throw DecodeError(err.str());
284  }
285  }
286 
287  // check size
288  checkLength_(output.size());
289 }
290 
291 template<>
292 inline void BinaryDecoder::decode(std::vector<int32_t>& output) {
293 
294  // check strategy to parse
295  switch (strategy_) {
296  case 4: {
297  decodeFromBytes_(output);
298  break;
299  }
300  case 7: {
301  std::vector<int32_t> step1;
302  decodeFromBytes_(step1);
303  runLengthDecode_(step1, output);
304  break;
305  }
306  case 8: {
307  std::vector<int32_t> step1;
308  decodeFromBytes_(step1);
309  runLengthDecode_(step1, output);
310  deltaDecode_(output);
311  break;
312  }
313  case 14: {
314  std::vector<int16_t> step1;
315  decodeFromBytes_(step1);
316  recursiveIndexDecode_(step1, output);
317  break;
318  }
319  case 15: {
320  std::vector<int8_t> step1;
321  decodeFromBytes_(step1);
322  recursiveIndexDecode_(step1, output);
323  break;
324  }
325  default: {
326  std::stringstream err;
327  err << "Invalid strategy " << strategy_ << " for binary '" + key_
328  << "': does not decode to int32 array";
329  throw DecodeError(err.str());
330  }
331  }
332 
333  // check size
334  checkLength_(output.size());
335 }
336 
337 template<>
338 inline void BinaryDecoder::decode(std::vector<std::string>& output) {
339 
340  // check strategy to parse
341  switch (strategy_) {
342  case 5: {
343  decodeFromBytes_(output);
344  break;
345  }
346  default: {
347  std::stringstream err;
348  err << "Invalid strategy " << strategy_ << " for binary '" + key_
349  << "': does not decode to string array";
350  throw DecodeError(err.str());
351  }
352  }
353 
354  // check size
355  checkLength_(output.size());
356 }
357 
358 template<>
359 inline void BinaryDecoder::decode(std::vector<char>& output) {
360 
361  // check strategy to parse
362  switch (strategy_) {
363  case 6: {
364  std::vector<int32_t> step1;
365  decodeFromBytes_(step1);
366  runLengthDecode_(step1, output);
367  break;
368  }
369  default: {
370  std::stringstream err;
371  err << "Invalid strategy " << strategy_ << " for binary '" + key_
372  << "': does not decode to string array";
373  throw DecodeError(err.str());
374  }
375  }
376 
377  // check size
378  checkLength_(output.size());
379 }
380 
381 // checks
382 inline void BinaryDecoder::checkLength_(int32_t exp_length) {
383  if (length_ != exp_length) {
384  std::stringstream err;
385  err << "Length mismatch for binary '" + key_ + "': "
386  << length_ << " vs " << exp_length;
387  throw DecodeError(err.str());
388  }
389 }
390 
391 inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) {
392  if (encodedDataLength_ % item_size != 0) {
393  std::stringstream err;
394  err << "Binary length of '" + key_ + "': "
395  << encodedDataLength_ << " is not a multiple of " << item_size;
396  throw DecodeError(err.str());
397  }
398 }
399 
400 // byte decoders
401 inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) {
402  checkDivisibleBy_(4);
403  // prepare memory
404  output.resize(encodedDataLength_ / 4);
405  // get data
406  if(!output.empty()) {
407  arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
408  }
409 }
410 inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) {
411  // prepare memory
412  output.resize(encodedDataLength_);
413  // get data
414  if (!output.empty()) {
415  memcpy(&output[0], encodedData_, encodedDataLength_);
416  }
417 }
418 inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) {
419  checkDivisibleBy_(2);
420  // prepare memory
421  output.resize(encodedDataLength_ / 2);
422  // get data
423  if (!output.empty()) {
424  arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
425  }
426 }
427 inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) {
428  checkDivisibleBy_(4);
429  // prepare memory
430  output.resize(encodedDataLength_ / 4);
431  // get data
432  if (!output.empty()) {
433  arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
434  }
435 }
436 // special one: decode to vector of strings
437 inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) {
438  char NULL_BYTE = 0x00;
439  // check parameter
440  const int32_t str_len = parameter_;
441  checkDivisibleBy_(str_len);
442  // prepare memory
443  output.resize(encodedDataLength_ / str_len);
444  // get data
445  for (size_t i = 0; i < output.size(); ++i) {
446  output[i].assign(encodedData_ + i * str_len, str_len);
447  output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
448  }
449 }
450 
451 // run length decoding
452 template<typename Int, typename IntOut>
453 void BinaryDecoder::runLengthDecode_(const std::vector<Int>& input,
454  std::vector<IntOut>& output) {
455  // we work with pairs of numbers
456  checkDivisibleBy_(2);
457  // find out size of resulting vector (for speed)
458  size_t out_size = 0;
459  for (size_t i = 0; i < input.size(); i += 2) {
460  out_size += input[i + 1];
461  }
462  // reserve space (for speed)
463  output.clear();
464  output.reserve(out_size);
465  // get data
466  for (size_t i = 0; i < input.size(); i += 2) {
467  const IntOut value = IntOut(input[i]);
468  const Int number = input[i+1];
469  for (Int j = 0; j < number; ++j) {
470  output.push_back(value);
471  }
472  }
473 }
474 
475 // delta decoding
476 template<typename Int>
477 void BinaryDecoder::deltaDecode_(const std::vector<Int>& input,
478  std::vector<Int>& output) {
479  // reserve space (for speed)
480  output.clear();
481  if (input.empty()) return; // ensure we have some values
482  output.reserve(input.size());
483  // get data
484  output.push_back(input[0]);
485  for (size_t i = 1; i < input.size(); ++i) {
486  output.push_back(output[i - 1] + input[i]);
487  }
488 }
489 template<typename Int>
490 void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) {
491  for (size_t i = 1; i < in_out.size(); ++i) {
492  in_out[i] = in_out[i - 1] + in_out[i];
493  }
494 }
495 
496 // recursive indexing decode
497 template<typename SmallInt, typename Int>
498 void BinaryDecoder::recursiveIndexDecode_(const std::vector<SmallInt>& input,
499  std::vector<Int>& output) {
500  // get limits
501  const SmallInt min_int = std::numeric_limits<SmallInt>::min();
502  const SmallInt max_int = std::numeric_limits<SmallInt>::max();
503  // find out size of resulting vector (for speed)
504  size_t out_size = 0;
505  for (size_t i = 0; i < input.size(); ++i) {
506  if (input[i] != min_int && input[i] != max_int) ++out_size;
507  }
508  // reserve space (for speed)
509  output.clear();
510  output.reserve(out_size);
511  // get data
512  Int cur_val = 0;
513  for (size_t i = 0; i < input.size(); ++i) {
514  cur_val += input[i];
515  if (input[i] != min_int && input[i] != max_int) {
516  output.push_back(cur_val);
517  cur_val = 0;
518  }
519  }
520 }
521 
522 // decode integer to float
523 template<typename Int>
524 void BinaryDecoder::decodeDivide_(const std::vector<Int>& input, float divisor,
525  std::vector<float>& output) {
526  // reserve space and get inverted divisor (for speed)
527  output.clear();
528  output.reserve(input.size());
529  float inv_div = float(1) / divisor;
530  // get data
531  for (size_t i = 0; i < input.size(); ++i) {
532  output.push_back(float(input[i]) * inv_div);
533  }
534 }
535 
536 } // mmtf namespace
537 
538 #endif
structure_data.hpp
mmtf::DecodeError
Exception thrown when failing during decoding.
Definition: errors.hpp:23
mmtf::BinaryDecoder::decode
void decode(T &target)
Decode binary msgpack object into the given target.
Definition: binary_decoder.hpp:185
mmtf::BinaryDecoder::BinaryDecoder
BinaryDecoder(const msgpack::object &obj, const std::string &key="UNNAMED_BINARY")
Initialize object given a msgpack object. Reads out binary header to prepare for call of decode.
Definition: binary_decoder.hpp:162
mmtf::BinaryDecoder
Helper class to decode msgpack binary into a vector.
Definition: binary_decoder.hpp:29
mmtf
Definition: binary_decoder.hpp:24