12 #ifndef MMTF_BINARY_ENCODER_H
13 #define MMTF_BINARY_ENCODER_H
20 #include <arpa/inet.h>
37 inline std::vector<int32_t> convertFloatsToInts(std::vector<float>
const & vec_in,
45 inline std::vector<int32_t> deltaEncode(std::vector<int32_t>
const & vec_in);
53 inline std::vector<int32_t> runLengthEncode(std::vector<int32_t>
const & vec_in );
62 inline std::vector<int32_t> recursiveIndexEncode(std::vector<int32_t>
const & vec_in,
63 int max=32767,
int min=-32768);
70 inline std::vector<int32_t> convertCharsToInts(std::vector<char>
const & vec_in);
80 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param=0);
87 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss);
112 inline std::vector<char>
encodeStringVector(std::vector<std::string> in_sv, int32_t CHAIN_LEN);
133 inline std::vector<char>
encodeRunLengthFloat(std::vector<float> floats_in, int32_t multiplier);
148 inline std::vector<int32_t> convertFloatsToInts(std::vector<float>
const & vec_in,
150 std::vector<int32_t> vec_out;
151 for (
size_t i=0; i<vec_in.size(); ++i) {
152 vec_out.push_back(
static_cast<int32_t
>(round(vec_in[i]*multiplier)));
158 inline std::vector<int32_t> deltaEncode(std::vector<int32_t>
const & vec_in) {
159 std::vector<int32_t> vec_out;
160 if (vec_in.size() == 0)
return vec_out;
161 vec_out.push_back(vec_in[0]);
162 for (int32_t i=1; i< (int)vec_in.size(); ++i) {
163 vec_out.push_back(vec_in[i]-vec_in[i-1]);
169 inline std::vector<int32_t> runLengthEncode(std::vector<int32_t>
const & vec_in ) {
170 std::vector<int32_t> ret;
171 if (vec_in.size()==0)
return ret;
172 int32_t curr = vec_in[0];
175 for (int32_t i = 1; i < (int)vec_in.size(); ++i) {
176 if ( vec_in[i] == curr ) {
179 ret.push_back(counter);
180 ret.push_back(vec_in[i]);
185 ret.push_back(counter);
190 inline std::vector<int32_t> recursiveIndexEncode(
191 std::vector<int32_t>
const & vec_in,
192 int max ,
int min ) {
193 std::vector<int32_t> vec_out;
194 for (int32_t i=0; i< (int)vec_in.size(); ++i) {
195 int32_t x = vec_in[i];
198 vec_out.push_back(max);
203 vec_out.push_back(min);
207 vec_out.push_back(x);
213 inline std::vector<int32_t> convertCharsToInts(std::vector<char>
const & vec_in) {
214 std::vector<int32_t> vec_out;
215 for (
size_t i=0; i<vec_in.size(); ++i) {
216 vec_out.push_back((
int)vec_in[i]);
221 inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param ) {
222 uint32_t be_codec = htonl(codec);
223 uint32_t be_array_size = htonl(array_size);
224 uint32_t be_param = htonl(param);
225 ss.write(
reinterpret_cast< char *
>(&be_codec),
sizeof(be_codec));
226 ss.write(
reinterpret_cast< char *
>(&be_array_size),
sizeof(be_array_size));
227 ss.write(
reinterpret_cast< char *
>(&be_param),
sizeof(be_param));
231 inline std::vector<char> stringstreamToCharVector(std::stringstream & ss) {
232 std::string s = ss.str();
233 std::vector<char> ret(s.begin(), s.end());
241 std::stringstream ss;
242 add_header(ss, vec_in.size(), 2, 0);
243 for (
size_t i=0; i<vec_in.size(); ++i) {
244 ss.write(
reinterpret_cast< char *
>(&vec_in[i]),
sizeof(vec_in[i]));
246 return stringstreamToCharVector(ss);
251 std::stringstream ss;
252 add_header(ss, vec_in.size(), 4, 0);
253 for (
size_t i=0; i<vec_in.size(); ++i) {
254 int32_t be_x = htonl(vec_in[i]);
255 ss.write(
reinterpret_cast< char *
>(&be_x),
sizeof(be_x));
257 return stringstreamToCharVector(ss);
262 char NULL_BYTE = 0x00;
263 std::stringstream ss;
264 add_header(ss, in_sv.size(), 5, CHAIN_LEN);
265 std::vector<char> char_vec;
266 for (
size_t i=0; i<in_sv.size(); ++i) {
267 char_vec.insert(char_vec.end(), in_sv[i].begin(), in_sv[i].end());
268 for (
size_t j=0; j<CHAIN_LEN-in_sv[i].size(); ++j) {
269 char_vec.push_back(NULL_BYTE);
272 for (
size_t i=0; i<char_vec.size(); ++i) {
273 ss.write(
reinterpret_cast< char *
>(&char_vec[i]),
sizeof(char_vec[i]));
275 return stringstreamToCharVector(ss);
280 std::stringstream ss;
281 add_header(ss, in_cv.size(), 6, 0);
282 std::vector<int32_t> int_vec;
283 int_vec = convertCharsToInts(in_cv);
284 int_vec = runLengthEncode(int_vec);
285 for (
size_t i=0; i<int_vec.size(); ++i) {
286 int32_t temp = htonl(int_vec[i]);
287 ss.write(
reinterpret_cast< char *
>(&temp),
sizeof(temp));
289 return stringstreamToCharVector(ss);
294 std::stringstream ss;
295 add_header(ss, int_vec.size(), 8, 0);
296 int_vec = deltaEncode(int_vec);
297 int_vec = runLengthEncode(int_vec);
298 for (
size_t i=0; i<int_vec.size(); ++i) {
299 int32_t temp = htonl(int_vec[i]);
300 ss.write(
reinterpret_cast< char *
>(&temp),
sizeof(temp));
302 return stringstreamToCharVector(ss);
306 std::stringstream ss;
307 add_header(ss, floats_in.size(), 9, multiplier);
308 std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
309 int_vec = runLengthEncode(int_vec);
310 for (
size_t i=0; i<int_vec.size(); ++i) {
311 int32_t temp = htonl(int_vec[i]);
312 ss.write(
reinterpret_cast< char *
>(&temp),
sizeof(temp));
314 return stringstreamToCharVector(ss);
320 std::stringstream ss;
321 add_header(ss, floats_in.size(), 10, multiplier);
322 std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
323 int_vec = deltaEncode(int_vec);
324 int_vec = recursiveIndexEncode(int_vec);
325 for (
size_t i=0; i<int_vec.size(); ++i) {
326 int16_t temp = htons(int_vec[i]);
327 ss.write(
reinterpret_cast< char *
>(&temp),
sizeof(temp));
329 return stringstreamToCharVector(ss);