13 #ifndef MMTF_BINARY_DECODER_H
14 #define MMTF_BINARY_DECODER_H
18 #include <msgpack.hpp>
39 const std::string& key =
"UNNAMED_BINARY");
66 const char* encodedData_;
67 uint32_t encodedDataLength_;
70 void checkLength_(int32_t exp_length);
72 void checkDivisibleBy_(int32_t item_size);
75 void decodeFromBytes_(std::vector<float>& output);
76 void decodeFromBytes_(std::vector<int8_t>& output);
77 void decodeFromBytes_(std::vector<int16_t>& output);
78 void decodeFromBytes_(std::vector<int32_t>& output);
80 void decodeFromBytes_(std::vector<std::string>& output);
85 template<
typename Int,
typename IntOut>
86 void runLengthDecode_(
const std::vector<Int>& input,
87 std::vector<IntOut>& output);
90 template<
typename Int>
91 void deltaDecode_(
const std::vector<Int>& input, std::vector<Int>& output);
93 template<
typename Int>
94 void deltaDecode_(std::vector<Int>& in_out);
97 template<
typename SmallInt,
typename Int>
98 void recursiveIndexDecode_(
const std::vector<SmallInt>& input,
99 std::vector<Int>& output);
102 template<
typename Int>
103 void decodeDivide_(
const std::vector<Int>& input,
float divisor,
104 std::vector<float>& output);
116 #include <winsock2.h>
118 #include <arpa/inet.h>
121 #ifndef __EMSCRIPTEN__
122 void assignBigendian4(
void* dst,
const char* src) {
123 *((uint32_t*)dst) = ntohl(*((uint32_t*)src));
126 void assignBigendian2(
void* dst,
const char* src) {
127 *((uint16_t*)dst) = ntohs(*((uint16_t*)src));
135 void assignBigendian4(
void* dst,
const char* src) {
136 ((uint8_t*)dst)[0] = src[3];
137 ((uint8_t*)dst)[1] = src[2];
138 ((uint8_t*)dst)[2] = src[1];
139 ((uint8_t*)dst)[3] = src[0];
142 void assignBigendian2(
void* dst,
const char* src) {
143 ((uint8_t*)dst)[0] = src[1];
144 ((uint8_t*)dst)[1] = src[0];
148 void arrayCopyBigendian4(
void* dst,
const char* src,
size_t n) {
149 for (
size_t i = 0; i < n; i += 4) {
150 assignBigendian4(((
char*)dst) + i, src + i);
154 void arrayCopyBigendian2(
void* dst,
const char* src,
size_t n) {
155 for (
size_t i = 0; i < n; i += 2) {
156 assignBigendian2(((
char*)dst) + i, src + i);
163 const std::string& key)
166 if (obj.type != msgpack::type::BIN) {
167 throw DecodeError(
"The '" + key +
"' entry is not binary data");
169 if (obj.via.bin.size < 12) {
170 std::stringstream err;
171 err <<
"The '" + key +
"' entry is too short " << obj.via.bin.size;
175 const char* bytes = obj.via.bin.ptr;
177 assignBigendian4(&strategy_, bytes);
178 assignBigendian4(&length_, bytes + 4);
179 assignBigendian4(¶meter_, bytes + 8);
180 encodedData_ = bytes + 12;
181 encodedDataLength_ = obj.via.bin.size - 12;
195 decodeFromBytes_(output);
199 std::vector<int32_t> step1;
200 std::vector<int32_t> step2;
201 decodeFromBytes_(step1);
202 runLengthDecode_(step1, step2);
203 decodeDivide_(step2,
static_cast<float>(parameter_), output);
207 std::vector<int16_t> step1;
208 std::vector<int32_t> step2;
209 decodeFromBytes_(step1);
210 recursiveIndexDecode_(step1, step2);
212 decodeDivide_(step2,
static_cast<float>(parameter_), output);
216 std::vector<int16_t> step1;
217 decodeFromBytes_(step1);
218 decodeDivide_(step1,
static_cast<float>(parameter_), output);
222 std::vector<int16_t> step1;
223 std::vector<int32_t> step2;
224 decodeFromBytes_(step1);
225 recursiveIndexDecode_(step1, step2);
226 decodeDivide_(step2,
static_cast<float>(parameter_), output);
230 std::vector<int8_t> step1;
231 std::vector<int32_t> step2;
232 decodeFromBytes_(step1);
233 recursiveIndexDecode_(step1, step2);
234 decodeDivide_(step2,
static_cast<float>(parameter_), output);
238 std::stringstream err;
239 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
240 <<
"': does not decode to float array";
246 checkLength_(output.size());
255 decodeFromBytes_(output);
259 std::stringstream err;
260 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
261 <<
"': does not decode to int8 array";
267 checkLength_(output.size());
276 decodeFromBytes_(output);
280 std::stringstream err;
281 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
282 <<
"': does not decode to int16 array";
288 checkLength_(output.size());
297 decodeFromBytes_(output);
301 std::vector<int32_t> step1;
302 decodeFromBytes_(step1);
303 runLengthDecode_(step1, output);
307 std::vector<int32_t> step1;
308 decodeFromBytes_(step1);
309 runLengthDecode_(step1, output);
310 deltaDecode_(output);
314 std::vector<int16_t> step1;
315 decodeFromBytes_(step1);
316 recursiveIndexDecode_(step1, output);
320 std::vector<int8_t> step1;
321 decodeFromBytes_(step1);
322 recursiveIndexDecode_(step1, output);
326 std::stringstream err;
327 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
328 <<
"': does not decode to int32 array";
334 checkLength_(output.size());
343 decodeFromBytes_(output);
347 std::stringstream err;
348 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
349 <<
"': does not decode to string array";
355 checkLength_(output.size());
364 std::vector<int32_t> step1;
365 decodeFromBytes_(step1);
366 runLengthDecode_(step1, output);
370 std::stringstream err;
371 err <<
"Invalid strategy " << strategy_ <<
" for binary '" + key_
372 <<
"': does not decode to string array";
378 checkLength_(output.size());
382 inline void BinaryDecoder::checkLength_(int32_t exp_length) {
383 if (length_ != exp_length) {
384 std::stringstream err;
385 err <<
"Length mismatch for binary '" + key_ +
"': "
386 << length_ <<
" vs " << exp_length;
391 inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) {
392 if (encodedDataLength_ % item_size != 0) {
393 std::stringstream err;
394 err <<
"Binary length of '" + key_ +
"': "
395 << encodedDataLength_ <<
" is not a multiple of " << item_size;
396 throw DecodeError(err.str());
401 inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) {
402 checkDivisibleBy_(4);
404 output.resize(encodedDataLength_ / 4);
406 if(!output.empty()) {
407 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
410 inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) {
412 output.resize(encodedDataLength_);
414 if (!output.empty()) {
415 memcpy(&output[0], encodedData_, encodedDataLength_);
418 inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) {
419 checkDivisibleBy_(2);
421 output.resize(encodedDataLength_ / 2);
423 if (!output.empty()) {
424 arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
427 inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) {
428 checkDivisibleBy_(4);
430 output.resize(encodedDataLength_ / 4);
432 if (!output.empty()) {
433 arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
437 inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) {
438 char NULL_BYTE = 0x00;
440 const int32_t str_len = parameter_;
441 checkDivisibleBy_(str_len);
443 output.resize(encodedDataLength_ / str_len);
445 for (
size_t i = 0; i < output.size(); ++i) {
446 output[i].assign(encodedData_ + i * str_len, str_len);
447 output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
452 template<
typename Int,
typename IntOut>
453 void BinaryDecoder::runLengthDecode_(
const std::vector<Int>& input,
454 std::vector<IntOut>& output) {
456 checkDivisibleBy_(2);
459 for (
size_t i = 0; i < input.size(); i += 2) {
460 out_size += input[i + 1];
464 output.reserve(out_size);
466 for (
size_t i = 0; i < input.size(); i += 2) {
467 const IntOut value = IntOut(input[i]);
468 const Int number = input[i+1];
469 for (Int j = 0; j < number; ++j) {
470 output.push_back(value);
476 template<
typename Int>
477 void BinaryDecoder::deltaDecode_(
const std::vector<Int>& input,
478 std::vector<Int>& output) {
481 if (input.empty())
return;
482 output.reserve(input.size());
484 output.push_back(input[0]);
485 for (
size_t i = 1; i < input.size(); ++i) {
486 output.push_back(output[i - 1] + input[i]);
489 template<
typename Int>
490 void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) {
491 for (
size_t i = 1; i < in_out.size(); ++i) {
492 in_out[i] = in_out[i - 1] + in_out[i];
497 template<
typename SmallInt,
typename Int>
498 void BinaryDecoder::recursiveIndexDecode_(
const std::vector<SmallInt>& input,
499 std::vector<Int>& output) {
501 const SmallInt min_int = std::numeric_limits<SmallInt>::min();
502 const SmallInt max_int = std::numeric_limits<SmallInt>::max();
505 for (
size_t i = 0; i < input.size(); ++i) {
506 if (input[i] != min_int && input[i] != max_int) ++out_size;
510 output.reserve(out_size);
513 for (
size_t i = 0; i < input.size(); ++i) {
515 if (input[i] != min_int && input[i] != max_int) {
516 output.push_back(cur_val);
523 template<
typename Int>
524 void BinaryDecoder::decodeDivide_(
const std::vector<Int>& input,
float divisor,
525 std::vector<float>& output) {
528 output.reserve(input.size());
529 float inv_div = float(1) / divisor;
531 for (
size_t i = 0; i < input.size(); ++i) {
532 output.push_back(
float(input[i]) * inv_div);