Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13#include <string.h>
14#include <volk/volk.h>
16
17typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
22
23static inline int parity(int x)
24{
25 x ^= x >> 16;
26 x ^= x >> 8;
27 x ^= x >> 4;
28 x ^= x >> 2;
29 x ^= x >> 1;
30 return x & 1;
31}
32
33static inline int chainback_viterbi(unsigned char* data,
34 unsigned int nbits,
35 unsigned int endstate,
36 unsigned int tailsize,
37 unsigned char* decisions)
38{
39 unsigned char* d;
40 int d_ADDSHIFT = 0;
41 int d_numstates = (1 << 6);
42 int d_decision_t_size = d_numstates / 8;
43 unsigned int d_k = 7;
44 int d_framebits = nbits;
45 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
46 d = decisions;
47 /* Make room beyond the end of the encoder register so we can
48 * accumulate a full byte of decoded data
49 */
50
51 endstate = (endstate % d_numstates) << d_ADDSHIFT;
52
53 /* The store into data[] only needs to be done every 8 bits.
54 * But this avoids a conditional branch, and the writes will
55 * combine in the cache anyway
56 */
57
58 d += tailsize * d_decision_t_size; /* Look past tail */
59 int retval;
60 int dif = tailsize - (d_k - 1);
61 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
62 p_decision_t dec;
63 while (nbits-- > d_framebits - (d_k - 1)) {
64 int k;
65 dec.t = &d[nbits * d_decision_t_size];
66 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
67
68 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
69 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
70 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
71 data[((nbits + dif) % d_framebits)] = k;
72
73 retval = endstate;
74 }
75 nbits += 1;
76
77 while (nbits-- != 0) {
78 int k;
79
80 dec.t = &d[nbits * d_decision_t_size];
81
82 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
83
84 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
85 data[((nbits + dif) % d_framebits)] = k;
86 }
87 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
88 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
89
90
91 return retval >> d_ADDSHIFT;
92}
93
94
95#if LV_HAVE_SSE3
96
97#include <emmintrin.h>
98#include <mmintrin.h>
99#include <pmmintrin.h>
100#include <stdio.h>
101#include <xmmintrin.h>
102
103static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
104 unsigned char* syms,
105 unsigned int framebits)
106{
107 if (framebits < 12) {
108 return;
109 }
110
111 static int once = 1;
112 int d_numstates = (1 << 6);
113 int rate = 2;
114 static unsigned char* D;
115 static unsigned char* Y;
116 static unsigned char* X;
117 static unsigned int excess = 6;
118 static unsigned char* Branchtab;
119
120 int d_polys[2] = { 79, 109 };
121
122
123 if (once) {
124
125 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
126 Y = X + d_numstates;
127 Branchtab =
128 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
129 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
131 int state, i;
132
133 /* Initialize the branch table */
134 for (state = 0; state < d_numstates / 2; state++) {
135 for (i = 0; i < rate; i++) {
136 Branchtab[i * d_numstates / 2 + state] =
137 parity((2 * state) & d_polys[i]) ? 255 : 0;
138 }
139 }
140
141 once = 0;
142 }
143
144 // unbias the old_metrics
145 memset(X, 31, d_numstates);
146
147 // initialize decisions
148 memset(D, 0, (d_numstates / 8) * (framebits + 6));
149
151 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
152
153 unsigned int min = X[0];
154 int i = 0, state = 0;
155 for (i = 0; i < (d_numstates); ++i) {
156 if (X[i] < min) {
157 min = X[i];
158 state = i;
159 }
160 }
161
162 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
163
164 return;
165}
166
167#endif /*LV_HAVE_SSE3*/
168
169
170#if LV_HAVE_NEON
171
172static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
173 unsigned char* syms,
174 unsigned int framebits)
175{
176 if (framebits < 12) {
177 return;
178 }
179
180 static int once = 1;
181 int d_numstates = (1 << 6);
182 int rate = 2;
183 static unsigned char* D;
184 static unsigned char* Y;
185 static unsigned char* X;
186 static unsigned int excess = 6;
187 static unsigned char* Branchtab;
188
189 int d_polys[2] = { 79, 109 };
190
191
192 if (once) {
193
194 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
195 Y = X + d_numstates;
196 Branchtab =
197 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
198 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
200 int state, i;
201
202 /* Initialize the branch table */
203 for (state = 0; state < d_numstates / 2; state++) {
204 for (i = 0; i < rate; i++) {
205 Branchtab[i * d_numstates / 2 + state] =
206 parity((2 * state) & d_polys[i]) ? 255 : 0;
207 }
208 }
209
210 once = 0;
211 }
212
213 // unbias the old_metrics
214 memset(X, 31, d_numstates);
215
216 // initialize decisions
217 memset(D, 0, (d_numstates / 8) * (framebits + 6));
218
220 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
221
222 unsigned int min = X[0];
223 int i = 0, state = 0;
224 for (i = 0; i < (d_numstates); ++i) {
225 if (X[i] < min) {
226 min = X[i];
227 state = i;
228 }
229 }
230
231 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
232
233 return;
234}
235
236#endif /*LV_HAVE_NEON*/
237
238
239#if LV_HAVE_AVX2
240
241#include <immintrin.h>
242#include <stdio.h>
243
244static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
245 unsigned char* syms,
246 unsigned int framebits)
247{
248 if (framebits < 12) {
249 return;
250 }
251
252 static int once = 1;
253 int d_numstates = (1 << 6);
254 int rate = 2;
255 static unsigned char* D;
256 static unsigned char* Y;
257 static unsigned char* X;
258 static unsigned int excess = 6;
259 static unsigned char* Branchtab;
260
261 int d_polys[2] = { 79, 109 };
262
263
264 if (once) {
265
266 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
267 Y = X + d_numstates;
268 Branchtab =
269 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
270 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
272 int state, i;
273
274 /* Initialize the branch table */
275 for (state = 0; state < d_numstates / 2; state++) {
276 for (i = 0; i < rate; i++) {
277 Branchtab[i * d_numstates / 2 + state] =
278 parity((2 * state) & d_polys[i]) ? 255 : 0;
279 }
280 }
281
282 once = 0;
283 }
284
285 // unbias the old_metrics
286 memset(X, 31, d_numstates);
287
288 // initialize decisions
289 memset(D, 0, (d_numstates / 8) * (framebits + 6));
290
291 volk_8u_x4_conv_k7_r2_8u_avx2(
292 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
293
294 unsigned int min = X[0];
295 int i = 0, state = 0;
296 for (i = 0; i < (d_numstates); ++i) {
297 if (X[i] < min) {
298 min = X[i];
299 state = i;
300 }
301 }
302
303 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
304
305 return;
306}
307
308#endif /*LV_HAVE_AVX2*/
309
310
311#if LV_HAVE_GENERIC
312
313
314static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
315 unsigned char* syms,
316 unsigned int framebits)
317{
318 if (framebits < 12) {
319 return;
320 }
321
322 static int once = 1;
323 int d_numstates = (1 << 6);
324 int rate = 2;
325 static unsigned char* Y;
326 static unsigned char* X;
327 static unsigned char* D;
328 static unsigned int excess = 6;
329 static unsigned char* Branchtab;
330
331 int d_polys[2] = { 79, 109 };
332
333
334 if (once) {
335
336 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
337 Y = X + d_numstates;
338 Branchtab =
339 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
340 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
342
343 int state, i;
344
345 /* Initialize the branch table */
346 for (state = 0; state < d_numstates / 2; state++) {
347 for (i = 0; i < rate; i++) {
348 Branchtab[i * d_numstates / 2 + state] =
349 parity((2 * state) & d_polys[i]) ? 255 : 0;
350 }
351 }
352
353 once = 0;
354 }
355
356 // unbias the old_metrics
357 memset(X, 31, d_numstates);
358
359 // initialize decisions
360 memset(D, 0, (d_numstates / 8) * (framebits + 6));
361
363 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
364
365 unsigned int min = X[0];
366 int i = 0, state = 0;
367 for (i = 0; i < (d_numstates); ++i) {
368 if (X[i] < min) {
369 min = X[i];
370 state = i;
371 }
372 }
373
374 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
375
376 return;
377}
378
379#endif /* LV_HAVE_GENERIC */
380
381#if LV_HAVE_RVV
382#include <riscv_vector.h>
383
384static inline void volk_8u_conv_k7_r2puppet_8u_rvv(unsigned char* dec,
385 unsigned char* syms,
386 unsigned int framebits)
387{
388 if (framebits < 12)
389 return;
390
391 int d_numstates = (1 << 6);
392 static unsigned char* D;
393 static unsigned char* Y;
394 static unsigned char* X;
395 static unsigned int excess = 6;
396 static unsigned char* Branchtab;
397
398 static int once = 1;
399 if (once) {
400 once = 0;
401
402 X = (unsigned char*)volk_malloc(3 * d_numstates, volk_get_alignment());
403 Y = X + d_numstates;
404 Branchtab = Y + d_numstates;
405 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
407
408 /* Initialize the branch table */
409 for (size_t state = 0; state < d_numstates / 2; state++) {
410 Branchtab[state] = parity(state & 39) * 255;
411 Branchtab[state + d_numstates / 2] = parity(state & 54) * 255;
412 }
413 }
414
415 memset(X, 31, d_numstates); // unbias the old_metrics
416 memset(D, 0, (d_numstates / 8) * (framebits + 6)); // initialize decisions
417
418 volk_8u_x4_conv_k7_r2_8u_rvv(
419 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
420
421 unsigned int min = X[0];
422 int i = 0, state = 0;
423 for (i = 0; i < d_numstates; ++i) {
424 if (X[i] < min) {
425 min = X[i];
426 state = i;
427 }
428 }
429
430 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
431
432 return;
433}
434#endif /*LV_HAVE_RVV*/
435
436#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
Definition volk_8u_conv_k7_r2puppet_8u.h:17
unsigned int * w
Definition volk_8u_conv_k7_r2puppet_8u.h:20
unsigned char * t
Definition volk_8u_conv_k7_r2puppet_8u.h:19
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition volk.tmpl.c:90
static int parity(int x)
Definition volk_8u_conv_k7_r2puppet_8u.h:23
static void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:172
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition volk_8u_conv_k7_r2puppet_8u.h:33
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:103
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition volk_8u_conv_k7_r2puppet_8u.h:314
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:212
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:310
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition volk_8u_x4_conv_k7_r2_8u.h:443
for i
Definition volk_config_fixed.tmpl.h:13
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition volk_malloc.c:38