MMTF-C++
The C++ language MMTF libraries
binary_encoder.hpp
Go to the documentation of this file.
1 // *************************************************************************
2 //
3 // Licensed under the MIT License (see accompanying LICENSE file).
4 //
5 // The author of this code is: Daniel Farrell
6 //
7 // Based on mmtf_python, adapted to c++ standards 2018
8 //
9 // *************************************************************************
10 
11 
12 #ifndef MMTF_BINARY_ENCODER_H
13 #define MMTF_BINARY_ENCODER_H
14 #include <math.h>
15 
16 // byteorder functions
17 #ifdef WIN32
18 #include <winsock2.h>
19 #else
20 #include <arpa/inet.h>
21 #endif
22 
23 namespace mmtf {
24 
25 // *************************************************************************
26 // PRIVATE FUNCTIONS (only visible in this header)
27 // *************************************************************************
28 
29 namespace { // private helpers
30 
37 inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
38  int multiplier);
39 
45 inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in);
46 
47 
53 inline std::vector<int32_t> runLengthEncode(std::vector<int32_t> const & vec_in );
54 
62 inline std::vector<int32_t> recursiveIndexEncode(std::vector<int32_t> const & vec_in,
63  int max=32767, int min=-32768);
64 
70 inline std::vector<int32_t> convertCharsToInts(std::vector<char> const & vec_in);
71 
72 
80 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param=0);
81 
87 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss);
88 
89 } // anon ns
90 
91 // *************************************************************************
92 // PUBLIC FUNCTIONS
93 // *************************************************************************
94 
99 inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in);
100 
105 inline std::vector<char> encodeFourByteInt(std::vector<int32_t> vec_in);
106 
112 inline std::vector<char> encodeStringVector(std::vector<std::string> in_sv, int32_t CHAIN_LEN);
113 
114 
119 inline std::vector<char> encodeRunLengthChar(std::vector<char> in_cv);
120 
121 
126 inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec);
127 
133 inline std::vector<char> encodeRunLengthFloat(std::vector<float> floats_in, int32_t multiplier);
134 
140 inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> floats_in, int32_t multiplier);
141 
142 // *************************************************************************
143 // IMPLEMENTATION
144 // *************************************************************************
145 
146 namespace { // private helpers
147 
148 inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
149  int multiplier) {
150  std::vector<int32_t> vec_out;
151  for (size_t i=0; i<vec_in.size(); ++i) {
152  vec_out.push_back(static_cast<int32_t>(round(vec_in[i]*multiplier)));
153  }
154  return vec_out;
155 }
156 
157 
158 inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in) {
159  std::vector<int32_t> vec_out;
160  if (vec_in.size() == 0) return vec_out;
161  vec_out.push_back(vec_in[0]);
162  for (int32_t i=1; i< (int)vec_in.size(); ++i) {
163  vec_out.push_back(vec_in[i]-vec_in[i-1]);
164  }
165  return vec_out;
166 }
167 
168 
169 inline std::vector<int32_t> runLengthEncode(std::vector<int32_t> const & vec_in ) {
170  std::vector<int32_t> ret;
171  if (vec_in.size()==0) return ret;
172  int32_t curr = vec_in[0];
173  ret.push_back(curr);
174  int32_t counter = 1;
175  for (int32_t i = 1; i < (int)vec_in.size(); ++i) {
176  if ( vec_in[i] == curr ) {
177  ++counter;
178  } else {
179  ret.push_back(counter);
180  ret.push_back(vec_in[i]);
181  curr = vec_in[i];
182  counter = 1;
183  }
184  }
185  ret.push_back(counter);
186  return ret;
187 }
188 
189 
190 inline std::vector<int32_t> recursiveIndexEncode(
191  std::vector<int32_t> const & vec_in,
192  int max /* =32767 */, int min /*=-32768 */) {
193  std::vector<int32_t> vec_out;
194  for (int32_t i=0; i< (int)vec_in.size(); ++i) {
195  int32_t x = vec_in[i];
196  if ( x >= 0 ) {
197  while (x >= max) {
198  vec_out.push_back(max);
199  x -= max;
200  }
201  } else {
202  while (x <= min) {
203  vec_out.push_back(min);
204  x += std::abs(min);
205  }
206  }
207  vec_out.push_back(x);
208  }
209  return vec_out;
210 }
211 
212 
213 inline std::vector<int32_t> convertCharsToInts(std::vector<char> const & vec_in) {
214  std::vector<int32_t> vec_out;
215  for (size_t i=0; i<vec_in.size(); ++i) {
216  vec_out.push_back((int)vec_in[i]);
217  }
218  return vec_out;
219 }
220 
221 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param /* =0 */) {
222  uint32_t be_codec = htonl(codec);
223  uint32_t be_array_size = htonl(array_size);
224  uint32_t be_param = htonl(param);
225  ss.write(reinterpret_cast< char * >(&be_codec), sizeof(be_codec));
226  ss.write(reinterpret_cast< char * >(&be_array_size), sizeof(be_array_size));
227  ss.write(reinterpret_cast< char * >(&be_param), sizeof(be_param));
228 }
229 
230 
231 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss) {
232  std::string s = ss.str();
233  std::vector<char> ret(s.begin(), s.end());
234  return ret;
235 }
236 
237 } // anon ns
238 
239 
240 inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in) {
241  std::stringstream ss;
242  add_header(ss, vec_in.size(), 2, 0);
243  for (size_t i=0; i<vec_in.size(); ++i) {
244  ss.write(reinterpret_cast< char * >(&vec_in[i]), sizeof(vec_in[i]));
245  }
246  return stringstreamToCharVector(ss);
247 }
248 
249 
250 inline std::vector<char> encodeFourByteInt(std::vector<int32_t> vec_in) {
251  std::stringstream ss;
252  add_header(ss, vec_in.size(), 4, 0);
253  for (size_t i=0; i<vec_in.size(); ++i) {
254  int32_t be_x = htonl(vec_in[i]);
255  ss.write(reinterpret_cast< char * >(&be_x), sizeof(be_x));
256  }
257  return stringstreamToCharVector(ss);
258 }
259 
260 
261 inline std::vector<char> encodeStringVector(std::vector<std::string> in_sv, int32_t CHAIN_LEN) {
262  char NULL_BYTE = 0x00;
263  std::stringstream ss;
264  add_header(ss, in_sv.size(), 5, CHAIN_LEN);
265  std::vector<char> char_vec;
266  for (size_t i=0; i<in_sv.size(); ++i) {
267  char_vec.insert(char_vec.end(), in_sv[i].begin(), in_sv[i].end());
268  for (size_t j=0; j<CHAIN_LEN-in_sv[i].size(); ++j) {
269  char_vec.push_back(NULL_BYTE);
270  }
271  }
272  for (size_t i=0; i<char_vec.size(); ++i) {
273  ss.write(reinterpret_cast< char * >(&char_vec[i]), sizeof(char_vec[i]));
274  }
275  return stringstreamToCharVector(ss);
276 }
277 
278 
279 inline std::vector<char> encodeRunLengthChar(std::vector<char> in_cv) {
280  std::stringstream ss;
281  add_header(ss, in_cv.size(), 6, 0);
282  std::vector<int32_t> int_vec;
283  int_vec = convertCharsToInts(in_cv);
284  int_vec = runLengthEncode(int_vec);
285  for (size_t i=0; i<int_vec.size(); ++i) {
286  int32_t temp = htonl(int_vec[i]);
287  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
288  }
289  return stringstreamToCharVector(ss);
290 }
291 
292 
293 inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec) {
294  std::stringstream ss;
295  add_header(ss, int_vec.size(), 8, 0);
296  int_vec = deltaEncode(int_vec);
297  int_vec = runLengthEncode(int_vec);
298  for (size_t i=0; i<int_vec.size(); ++i) {
299  int32_t temp = htonl(int_vec[i]);
300  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
301  }
302  return stringstreamToCharVector(ss);
303 }
304 
305 inline std::vector<char> encodeRunLengthFloat(std::vector<float> floats_in, int32_t multiplier) {
306  std::stringstream ss;
307  add_header(ss, floats_in.size(), 9, multiplier);
308  std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
309  int_vec = runLengthEncode(int_vec);
310  for (size_t i=0; i<int_vec.size(); ++i) {
311  int32_t temp = htonl(int_vec[i]);
312  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
313  }
314  return stringstreamToCharVector(ss);
315 }
316 
317 
318 
319 inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> floats_in, int32_t multiplier) {
320  std::stringstream ss;
321  add_header(ss, floats_in.size(), 10, multiplier);
322  std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
323  int_vec = deltaEncode(int_vec);
324  int_vec = recursiveIndexEncode(int_vec);
325  for (size_t i=0; i<int_vec.size(); ++i) {
326  int16_t temp = htons(int_vec[i]);
327  ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
328  }
329  return stringstreamToCharVector(ss);
330 }
331 
332 } // mmtf namespace
333 #endif
mmtf::encodeInt8ToByte
std::vector< char > encodeInt8ToByte(std::vector< int8_t > vec_in)
Definition: binary_encoder.hpp:240
mmtf::encodeRunLengthChar
std::vector< char > encodeRunLengthChar(std::vector< char > in_cv)
Definition: binary_encoder.hpp:279
mmtf::encodeRunLengthFloat
std::vector< char > encodeRunLengthFloat(std::vector< float > floats_in, int32_t multiplier)
Definition: binary_encoder.hpp:305
mmtf::encodeRunLengthDeltaInt
std::vector< char > encodeRunLengthDeltaInt(std::vector< int32_t > int_vec)
Definition: binary_encoder.hpp:293
mmtf::encodeFourByteInt
std::vector< char > encodeFourByteInt(std::vector< int32_t > vec_in)
Definition: binary_encoder.hpp:250
mmtf::encodeStringVector
std::vector< char > encodeStringVector(std::vector< std::string > in_sv, int32_t CHAIN_LEN)
Definition: binary_encoder.hpp:261
mmtf
Definition: binary_decoder.hpp:24
mmtf::encodeDeltaRecursiveFloat
std::vector< char > encodeDeltaRecursiveFloat(std::vector< float > floats_in, int32_t multiplier)
Definition: binary_encoder.hpp:319