41#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H
42#define INCLUDED_volk_16ic_deinterleave_real_8i_a_H
51static inline void volk_16ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int8_t* complexVectorPtr = (int8_t*)complexVector;
57 int8_t* iBufferPtr = iBuffer;
58 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
90 __m256i iMoveMask2 = _mm256_set_epi8(13,
122 __m256i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
124 unsigned int thirtysecondPoints = num_points / 32;
126 for (number = 0; number < thirtysecondPoints; number++) {
127 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
128 complexVectorPtr += 32;
129 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
130 complexVectorPtr += 32;
132 complexVal3 = _mm256_load_si256((__m256i*)complexVectorPtr);
133 complexVectorPtr += 32;
134 complexVal4 = _mm256_load_si256((__m256i*)complexVectorPtr);
135 complexVectorPtr += 32;
137 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
138 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
140 complexVal1 = _mm256_or_si256(complexVal1, complexVal2);
141 complexVal1 = _mm256_permute4x64_epi64(complexVal1, 0xd8);
143 complexVal3 = _mm256_shuffle_epi8(complexVal3, iMoveMask1);
144 complexVal4 = _mm256_shuffle_epi8(complexVal4, iMoveMask2);
146 complexVal3 = _mm256_or_si256(complexVal3, complexVal4);
147 complexVal3 = _mm256_permute4x64_epi64(complexVal3, 0xd8);
149 complexVal1 = _mm256_srai_epi16(complexVal1, 8);
150 complexVal3 = _mm256_srai_epi16(complexVal3, 8);
152 iOutputVal = _mm256_packs_epi16(complexVal1, complexVal3);
153 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
155 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
160 number = thirtysecondPoints * 32;
161 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
162 for (; number < num_points; number++) {
163 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
164 int16ComplexVectorPtr++;
171#include <tmmintrin.h>
175 unsigned int num_points)
177 unsigned int number = 0;
178 const int8_t* complexVectorPtr = (int8_t*)complexVector;
179 int8_t* iBufferPtr = iBuffer;
180 __m128i iMoveMask1 = _mm_set_epi8(
181 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
182 __m128i iMoveMask2 = _mm_set_epi8(
183 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
184 __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
186 unsigned int sixteenthPoints = num_points / 16;
188 for (number = 0; number < sixteenthPoints; number++) {
189 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
190 complexVectorPtr += 16;
191 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
192 complexVectorPtr += 16;
194 complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr);
195 complexVectorPtr += 16;
196 complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr);
197 complexVectorPtr += 16;
199 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
200 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
202 complexVal1 = _mm_or_si128(complexVal1, complexVal2);
204 complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
205 complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);
207 complexVal3 = _mm_or_si128(complexVal3, complexVal4);
210 complexVal1 = _mm_srai_epi16(complexVal1, 8);
211 complexVal3 = _mm_srai_epi16(complexVal3, 8);
213 iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);
215 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
220 number = sixteenthPoints * 16;
221 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
222 for (; number < num_points; number++) {
223 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
224 int16ComplexVectorPtr++;
229#ifdef LV_HAVE_GENERIC
233 unsigned int num_points)
235 unsigned int number = 0;
236 int16_t* complexVectorPtr = (int16_t*)complexVector;
237 int8_t* iBufferPtr = iBuffer;
238 for (number = 0; number < num_points; number++) {
239 *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
250 unsigned int num_points)
252 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
253 int8_t* iBufferPtr = iBuffer;
254 unsigned int eighth_points = num_points / 8;
257 int16x8x2_t complexInput;
259 for (number = 0; number < eighth_points; number++) {
260 complexInput = vld2q_s16(complexVectorPtr);
261 realOutput = vshrn_n_s16(complexInput.val[0], 8);
262 vst1_s8(iBufferPtr, realOutput);
263 complexVectorPtr += 16;
267 for (number = eighth_points * 8; number < num_points; number++) {
268 *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
276extern void volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer,
280static inline void volk_16ic_deinterleave_real_8i_u_orc(int8_t* iBuffer,
282 unsigned int num_points)
284 volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points);
291#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_u_H
292#define INCLUDED_volk_16ic_deinterleave_real_8i_u_H
299#include <immintrin.h>
301static inline void volk_16ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
303 unsigned int num_points)
305 unsigned int number = 0;
306 const int8_t* complexVectorPtr = (int8_t*)complexVector;
307 int8_t* iBufferPtr = iBuffer;
308 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
340 __m256i iMoveMask2 = _mm256_set_epi8(13,
372 __m256i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
374 unsigned int thirtysecondPoints = num_points / 32;
376 for (number = 0; number < thirtysecondPoints; number++) {
377 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
378 complexVectorPtr += 32;
379 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
380 complexVectorPtr += 32;
382 complexVal3 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
383 complexVectorPtr += 32;
384 complexVal4 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
385 complexVectorPtr += 32;
387 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
388 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
390 complexVal1 = _mm256_or_si256(complexVal1, complexVal2);
391 complexVal1 = _mm256_permute4x64_epi64(complexVal1, 0xd8);
393 complexVal3 = _mm256_shuffle_epi8(complexVal3, iMoveMask1);
394 complexVal4 = _mm256_shuffle_epi8(complexVal4, iMoveMask2);
396 complexVal3 = _mm256_or_si256(complexVal3, complexVal4);
397 complexVal3 = _mm256_permute4x64_epi64(complexVal3, 0xd8);
399 complexVal1 = _mm256_srai_epi16(complexVal1, 8);
400 complexVal3 = _mm256_srai_epi16(complexVal3, 8);
402 iOutputVal = _mm256_packs_epi16(complexVal1, complexVal3);
403 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
405 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
410 number = thirtysecondPoints * 32;
411 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
412 for (; number < num_points; number++) {
413 *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
414 int16ComplexVectorPtr++;
421#include <riscv_vector.h>
423static inline void volk_16ic_deinterleave_real_8i_rvv(int8_t* iBuffer,
425 unsigned int num_points)
427 const uint32_t* in = (
const uint32_t*)complexVector;
428 size_t n = num_points;
429 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
430 vl = __riscv_vsetvl_e32m8(n);
431 vuint32m8_t vc = __riscv_vle32_v_u32m8(in, vl);
433 (uint8_t*)iBuffer, __riscv_vnsrl(__riscv_vnsrl(vc, 0, vl), 8, vl), vl);
static void volk_16ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_8i.h:231
static void volk_16ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_8i.h:248
static void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_8i.h:173
short complex lv_16sc_t
Definition volk_complex.h:71