41#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
42#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
50static inline void volk_8ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int8_t* complexVectorPtr = (int8_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
58 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
91 __m256i complexVal, iOutputVal, qOutputVal;
92 __m128i iOutputVal0, qOutputVal0;
94 unsigned int sixteenthPoints = num_points / 16;
96 for (number = 0; number < sixteenthPoints; number++) {
97 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
98 complexVectorPtr += 32;
100 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
101 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
103 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
104 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
106 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
107 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
109 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
110 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
112 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
113 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
119 number = sixteenthPoints * 16;
120 for (; number < num_points; number++) {
122 ((int16_t)*complexVectorPtr++) *
124 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
130#include <smmintrin.h>
132static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer,
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
141 __m128i iMoveMask = _mm_set_epi8(0x80,
157 __m128i qMoveMask = _mm_set_epi8(
158 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
159 __m128i complexVal, iOutputVal, qOutputVal;
161 unsigned int eighthPoints = num_points / 8;
163 for (number = 0; number < eighthPoints; number++) {
164 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
165 complexVectorPtr += 16;
167 iOutputVal = _mm_shuffle_epi8(complexVal,
169 qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
171 iOutputVal = _mm_cvtepi8_epi16(iOutputVal);
174 _mm_slli_epi16(iOutputVal, 8);
177 qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
178 qOutputVal = _mm_slli_epi16(qOutputVal, 8);
180 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
181 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
187 number = eighthPoints * 8;
188 for (; number < num_points; number++) {
190 ((int16_t)*complexVectorPtr++) *
192 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
199#include <immintrin.h>
204 unsigned int num_points)
206 unsigned int number = 0;
207 const int8_t* complexVectorPtr = (int8_t*)complexVector;
208 int16_t* iBufferPtr = iBuffer;
209 int16_t* qBufferPtr = qBuffer;
210 __m128i iMoveMask = _mm_set_epi8(0x80,
226 __m128i qMoveMask = _mm_set_epi8(
227 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
228 __m256i complexVal, iOutputVal, qOutputVal;
229 __m128i complexVal1, complexVal0;
230 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
232 unsigned int sixteenthPoints = num_points / 16;
234 for (number = 0; number < sixteenthPoints; number++) {
235 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
236 complexVectorPtr += 32;
239 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
240 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
242 iOutputVal1 = _mm_shuffle_epi8(
243 complexVal1, iMoveMask);
244 iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
245 qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
246 qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
249 _mm_cvtepi8_epi16(iOutputVal1);
252 _mm_slli_epi16(iOutputVal1, 8);
254 iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
255 iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
257 qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
258 qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
259 qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
260 qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
263 __m256i dummy = _mm256_setzero_si256();
264 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
265 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
266 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
267 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
269 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
270 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
276 number = sixteenthPoints * 16;
277 for (; number < num_points; number++) {
279 ((int16_t)*complexVectorPtr++) *
281 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
287#ifdef LV_HAVE_GENERIC
292 unsigned int num_points)
294 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
295 int16_t* iBufferPtr = iBuffer;
296 int16_t* qBufferPtr = qBuffer;
298 for (number = 0; number < num_points; number++) {
299 *iBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
300 *qBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
308#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
309#define INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
315#include <immintrin.h>
317static inline void volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
320 unsigned int num_points)
322 unsigned int number = 0;
323 const int8_t* complexVectorPtr = (int8_t*)complexVector;
324 int16_t* iBufferPtr = iBuffer;
325 int16_t* qBufferPtr = qBuffer;
326 __m256i MoveMask = _mm256_set_epi8(15,
358 __m256i complexVal, iOutputVal, qOutputVal;
359 __m128i iOutputVal0, qOutputVal0;
361 unsigned int sixteenthPoints = num_points / 16;
363 for (number = 0; number < sixteenthPoints; number++) {
364 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
365 complexVectorPtr += 32;
367 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
368 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
370 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
371 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
373 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
374 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
376 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
377 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
379 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
380 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
386 number = sixteenthPoints * 16;
387 for (; number < num_points; number++) {
389 ((int16_t)*complexVectorPtr++) *
391 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
397#include <riscv_vector.h>
399static inline void volk_8ic_deinterleave_16i_x2_rvv(int16_t* iBuffer,
402 unsigned int num_points)
404 const uint16_t* in = (
const uint16_t*)complexVector;
405 size_t n = num_points;
406 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl, qBuffer += vl) {
407 vl = __riscv_vsetvl_e16m8(n);
408 vuint16m8_t vc = __riscv_vle16_v_u16m8(in, vl);
409 vuint16m8_t vr = __riscv_vsll(vc, 8, vl);
410 vuint16m8_t vi = __riscv_vand(vc, 0xFF00, vl);
411 __riscv_vse16((uint16_t*)iBuffer, vr, vl);
412 __riscv_vse16((uint16_t*)qBuffer, vi, vl);
static void volk_8ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_16i_x2.h:289
static void volk_8ic_deinterleave_16i_x2_a_avx(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_16i_x2.h:201
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70