43#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
44#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
54volk_16ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
57 unsigned int num_points)
59 float* iBufferPtr = iBuffer;
61 unsigned int number = 0;
62 const unsigned int eighthPoints = num_points / 8;
66 const float iScalar = 1.0 / scalar;
67 __m256 invScalar = _mm256_set1_ps(iScalar);
68 __m256i complexVal, iIntVal;
69 __m128i complexVal128;
70 int8_t* complexVectorPtr = (int8_t*)complexVector;
72 __m256i moveMask = _mm256_set_epi8(0x80,
105 for (; number < eighthPoints; number++) {
106 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
107 complexVectorPtr += 32;
108 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
109 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
110 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
112 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
113 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
117 _mm256_store_ps(iBufferPtr, iFloatValue);
122 number = eighthPoints * 8;
123 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
124 for (; number < num_points; number++) {
125 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
126 sixteenTComplexVectorPtr++;
132#include <smmintrin.h>
135volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
138 unsigned int num_points)
140 float* iBufferPtr = iBuffer;
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
147 const float iScalar = 1.0 / scalar;
148 __m128 invScalar = _mm_set_ps1(iScalar);
149 __m128i complexVal, iIntVal;
150 int8_t* complexVectorPtr = (int8_t*)complexVector;
152 __m128i moveMask = _mm_set_epi8(
153 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
155 for (; number < quarterPoints; number++) {
156 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
157 complexVectorPtr += 16;
158 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
160 iIntVal = _mm_cvtepi16_epi32(complexVal);
161 iFloatValue = _mm_cvtepi32_ps(iIntVal);
163 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
165 _mm_store_ps(iBufferPtr, iFloatValue);
170 number = quarterPoints * 4;
171 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
172 for (; number < num_points; number++) {
173 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
174 sixteenTComplexVectorPtr++;
180#include <xmmintrin.h>
186 unsigned int num_points)
188 float* iBufferPtr = iBuffer;
190 unsigned int number = 0;
191 const unsigned int quarterPoints = num_points / 4;
194 const float iScalar = 1.0 / scalar;
195 __m128 invScalar = _mm_set_ps1(iScalar);
196 int16_t* complexVectorPtr = (int16_t*)complexVector;
200 for (; number < quarterPoints; number++) {
201 floatBuffer[0] = (float)(*complexVectorPtr);
202 complexVectorPtr += 2;
203 floatBuffer[1] = (float)(*complexVectorPtr);
204 complexVectorPtr += 2;
205 floatBuffer[2] = (float)(*complexVectorPtr);
206 complexVectorPtr += 2;
207 floatBuffer[3] = (float)(*complexVectorPtr);
208 complexVectorPtr += 2;
210 iValue = _mm_load_ps(floatBuffer);
212 iValue = _mm_mul_ps(iValue, invScalar);
214 _mm_store_ps(iBufferPtr, iValue);
219 number = quarterPoints * 4;
220 complexVectorPtr = (int16_t*)&complexVector[number];
221 for (; number < num_points; number++) {
222 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
228#ifdef LV_HAVE_GENERIC
233 unsigned int num_points)
235 unsigned int number = 0;
236 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
237 float* iBufferPtr = iBuffer;
238 const float invScalar = 1.0 / scalar;
239 for (number = 0; number < num_points; number++) {
240 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
249#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
250#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
257#include <immintrin.h>
260volk_16ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
263 unsigned int num_points)
265 float* iBufferPtr = iBuffer;
267 unsigned int number = 0;
268 const unsigned int eighthPoints = num_points / 8;
272 const float iScalar = 1.0 / scalar;
273 __m256 invScalar = _mm256_set1_ps(iScalar);
274 __m256i complexVal, iIntVal;
275 __m128i complexVal128;
276 int8_t* complexVectorPtr = (int8_t*)complexVector;
278 __m256i moveMask = _mm256_set_epi8(0x80,
311 for (; number < eighthPoints; number++) {
312 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
313 complexVectorPtr += 32;
314 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
315 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
316 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
318 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
319 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
321 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
323 _mm256_storeu_ps(iBufferPtr, iFloatValue);
328 number = eighthPoints * 8;
329 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
330 for (; number < num_points; number++) {
331 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
332 sixteenTComplexVectorPtr++;
338#include <riscv_vector.h>
341volk_16ic_s32f_deinterleave_real_32f_rvv(
float* iBuffer,
344 unsigned int num_points)
346 const int32_t* in = (
const int32_t*)complexVector;
347 size_t n = num_points;
348 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
349 vl = __riscv_vsetvl_e32m8(n);
350 vint32m8_t vc = __riscv_vle32_v_i32m8(in, vl);
351 vfloat32m8_t vr = __riscv_vfwcvt_f(__riscv_vncvt_x(vc, vl), vl);
352 __riscv_vse32(iBuffer, __riscv_vfmul(vr, 1.0f / scalar, vl), vl);
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_16ic_s32f_deinterleave_real_32f.h:230
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_16ic_s32f_deinterleave_real_32f.h:183
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
short complex lv_16sc_t
Definition volk_complex.h:71