43#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
44#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
55volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
59 unsigned int num_points)
61 float* iBufferPtr = iBuffer;
62 float* qBufferPtr = qBuffer;
64 unsigned int number = 0;
65 const unsigned int eighthPoints = num_points / 8;
66 __m128 iFloatValue, qFloatValue;
68 const float iScalar = 1.0 / scalar;
69 __m128 invScalar = _mm_set_ps1(iScalar);
70 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
71 int8_t* complexVectorPtr = (int8_t*)complexVector;
73 __m128i iMoveMask = _mm_set_epi8(
74 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
75 __m128i qMoveMask = _mm_set_epi8(
76 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
78 for (; number < eighthPoints; number++) {
79 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
80 complexVectorPtr += 16;
81 iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
82 qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
84 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
85 iFloatValue = _mm_cvtepi32_ps(iIntVal);
86 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
87 _mm_store_ps(iBufferPtr, iFloatValue);
90 iComplexVal = _mm_srli_si128(iComplexVal, 4);
92 iIntVal = _mm_cvtepi8_epi32(iComplexVal);
93 iFloatValue = _mm_cvtepi32_ps(iIntVal);
94 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
95 _mm_store_ps(iBufferPtr, iFloatValue);
98 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
99 qFloatValue = _mm_cvtepi32_ps(qIntVal);
100 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
101 _mm_store_ps(qBufferPtr, qFloatValue);
104 qComplexVal = _mm_srli_si128(qComplexVal, 4);
106 qIntVal = _mm_cvtepi8_epi32(qComplexVal);
107 qFloatValue = _mm_cvtepi32_ps(qIntVal);
108 qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
109 _mm_store_ps(qBufferPtr, qFloatValue);
114 number = eighthPoints * 8;
115 for (; number < num_points; number++) {
116 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
117 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
124#include <xmmintrin.h>
130 unsigned int num_points)
132 float* iBufferPtr = iBuffer;
133 float* qBufferPtr = qBuffer;
135 unsigned int number = 0;
136 const unsigned int quarterPoints = num_points / 4;
137 __m128 cplxValue1, cplxValue2, iValue, qValue;
139 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
140 int8_t* complexVectorPtr = (int8_t*)complexVector;
144 for (; number < quarterPoints; number++) {
145 floatBuffer[0] = (float)(complexVectorPtr[0]);
146 floatBuffer[1] = (float)(complexVectorPtr[1]);
147 floatBuffer[2] = (float)(complexVectorPtr[2]);
148 floatBuffer[3] = (float)(complexVectorPtr[3]);
150 floatBuffer[4] = (float)(complexVectorPtr[4]);
151 floatBuffer[5] = (float)(complexVectorPtr[5]);
152 floatBuffer[6] = (float)(complexVectorPtr[6]);
153 floatBuffer[7] = (float)(complexVectorPtr[7]);
155 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
156 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
158 complexVectorPtr += 8;
160 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
161 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
164 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
165 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
167 _mm_store_ps(iBufferPtr, iValue);
168 _mm_store_ps(qBufferPtr, qValue);
174 number = quarterPoints * 4;
175 complexVectorPtr = (int8_t*)&complexVector[number];
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
178 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
185#include <immintrin.h>
187static inline void volk_8ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
191 unsigned int num_points)
193 float* iBufferPtr = iBuffer;
194 float* qBufferPtr = qBuffer;
196 unsigned int number = 0;
197 const unsigned int sixteenthPoints = num_points / 16;
198 __m256 iFloatValue, qFloatValue;
200 const float iScalar = 1.0 / scalar;
201 __m256 invScalar = _mm256_set1_ps(iScalar);
202 __m256i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
205 __m256i iMoveMask = _mm256_set_epi8(0x80,
237 __m256i qMoveMask = _mm256_set_epi8(0x80,
270 for (; number < sixteenthPoints; number++) {
271 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
272 complexVectorPtr += 32;
273 iComplexVal = _mm256_shuffle_epi8(complexVal, iMoveMask);
274 qComplexVal = _mm256_shuffle_epi8(complexVal, qMoveMask);
276 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
277 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
278 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
279 _mm256_store_ps(iBufferPtr, iFloatValue);
282 iComplexVal = _mm256_permute4x64_epi64(iComplexVal, 0b11000110);
283 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
284 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
285 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
286 _mm256_store_ps(iBufferPtr, iFloatValue);
289 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
290 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
291 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
292 _mm256_store_ps(qBufferPtr, qFloatValue);
295 qComplexVal = _mm256_permute4x64_epi64(qComplexVal, 0b11000110);
296 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
297 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
298 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
299 _mm256_store_ps(qBufferPtr, qFloatValue);
303 number = sixteenthPoints * 16;
304 for (; number < num_points; number++) {
305 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
306 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
312#ifdef LV_HAVE_GENERIC
319 unsigned int num_points)
321 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
322 float* iBufferPtr = iBuffer;
323 float* qBufferPtr = qBuffer;
325 const float invScalar = 1.0 / scalar;
326 for (number = 0; number < num_points; number++) {
327 *iBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
328 *qBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
337#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
338#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
345#include <immintrin.h>
347static inline void volk_8ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
351 unsigned int num_points)
353 float* iBufferPtr = iBuffer;
354 float* qBufferPtr = qBuffer;
356 unsigned int number = 0;
357 const unsigned int sixteenthPoints = num_points / 16;
358 __m256 iFloatValue, qFloatValue;
360 const float iScalar = 1.0 / scalar;
361 __m256 invScalar = _mm256_set1_ps(iScalar);
362 __m256i complexVal, iIntVal, qIntVal;
363 __m128i iComplexVal, qComplexVal;
364 int8_t* complexVectorPtr = (int8_t*)complexVector;
366 __m256i MoveMask = _mm256_set_epi8(15,
399 for (; number < sixteenthPoints; number++) {
400 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
401 complexVectorPtr += 32;
402 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
403 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
404 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
405 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
407 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
408 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
409 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
410 _mm256_storeu_ps(iBufferPtr, iFloatValue);
413 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
414 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
415 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
416 _mm256_storeu_ps(qBufferPtr, qFloatValue);
419 complexVal = _mm256_srli_si256(complexVal, 8);
420 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
421 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
423 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
424 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
425 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
426 _mm256_storeu_ps(iBufferPtr, iFloatValue);
429 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
430 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
431 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
432 _mm256_storeu_ps(qBufferPtr, qFloatValue);
436 number = sixteenthPoints * 16;
437 for (; number < num_points; number++) {
438 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
439 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
445#include <riscv_vector.h>
447static inline void volk_8ic_s32f_deinterleave_32f_x2_rvv(
float* iBuffer,
451 unsigned int num_points)
453 const uint16_t* in = (
const uint16_t*)complexVector;
454 size_t n = num_points;
455 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl, qBuffer += vl) {
456 vl = __riscv_vsetvl_e16m4(n);
457 vuint16m4_t vc = __riscv_vle16_v_u16m4(in, vl);
458 vint8m2_t vr = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 0, vl));
459 vint8m2_t vi = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 8, vl));
460 vfloat32m8_t vrf = __riscv_vfwcvt_f(__riscv_vsext_vf2(vr, vl), vl);
461 vfloat32m8_t vif = __riscv_vfwcvt_f(__riscv_vsext_vf2(vi, vl), vl);
462 __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
463 __riscv_vse32(qBuffer, __riscv_vfmul(vif, 1.0f / scalar, vl), vl);
static void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_32f_x2.h:126
static void volk_8ic_s32f_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_32f_x2.h:315
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70