42#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
43#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
53volk_8ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
56 unsigned int num_points)
58 float* iBufferPtr = iBuffer;
60 unsigned int number = 0;
61 const unsigned int sixteenthPoints = num_points / 16;
64 const float iScalar = 1.0 / scalar;
65 __m256 invScalar = _mm256_set1_ps(iScalar);
66 __m256i complexVal, iIntVal;
67 int8_t* complexVectorPtr = (int8_t*)complexVector;
69 __m256i moveMask = _mm256_set_epi8(0x80,
101 for (; number < sixteenthPoints; number++) {
102 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
103 complexVectorPtr += 32;
104 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
106 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
107 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
108 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
109 _mm256_store_ps(iBufferPtr, iFloatValue);
112 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
113 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
114 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116 _mm256_store_ps(iBufferPtr, iFloatValue);
120 number = sixteenthPoints * 16;
121 for (; number < num_points; number++) {
122 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
130#include <smmintrin.h>
133volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
136 unsigned int num_points)
138 float* iBufferPtr = iBuffer;
140 unsigned int number = 0;
141 const unsigned int eighthPoints = num_points / 8;
144 const float iScalar = 1.0 / scalar;
145 __m128 invScalar = _mm_set_ps1(iScalar);
146 __m128i complexVal, iIntVal;
147 int8_t* complexVectorPtr = (int8_t*)complexVector;
149 __m128i moveMask = _mm_set_epi8(
150 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
152 for (; number < eighthPoints; number++) {
153 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
154 complexVectorPtr += 16;
155 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
157 iIntVal = _mm_cvtepi8_epi32(complexVal);
158 iFloatValue = _mm_cvtepi32_ps(iIntVal);
160 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
162 _mm_store_ps(iBufferPtr, iFloatValue);
166 complexVal = _mm_srli_si128(complexVal, 4);
167 iIntVal = _mm_cvtepi8_epi32(complexVal);
168 iFloatValue = _mm_cvtepi32_ps(iIntVal);
170 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
172 _mm_store_ps(iBufferPtr, iFloatValue);
177 number = eighthPoints * 8;
178 for (; number < num_points; number++) {
179 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
187#include <xmmintrin.h>
193 unsigned int num_points)
195 float* iBufferPtr = iBuffer;
197 unsigned int number = 0;
198 const unsigned int quarterPoints = num_points / 4;
201 const float iScalar = 1.0 / scalar;
202 __m128 invScalar = _mm_set_ps1(iScalar);
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
207 for (; number < quarterPoints; number++) {
208 floatBuffer[0] = (float)(*complexVectorPtr);
209 complexVectorPtr += 2;
210 floatBuffer[1] = (float)(*complexVectorPtr);
211 complexVectorPtr += 2;
212 floatBuffer[2] = (float)(*complexVectorPtr);
213 complexVectorPtr += 2;
214 floatBuffer[3] = (float)(*complexVectorPtr);
215 complexVectorPtr += 2;
217 iValue = _mm_load_ps(floatBuffer);
219 iValue = _mm_mul_ps(iValue, invScalar);
221 _mm_store_ps(iBufferPtr, iValue);
226 number = quarterPoints * 4;
227 for (; number < num_points; number++) {
228 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
235#ifdef LV_HAVE_GENERIC
241 unsigned int num_points)
243 unsigned int number = 0;
244 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
245 float* iBufferPtr = iBuffer;
246 const float invScalar = 1.0 / scalar;
247 for (number = 0; number < num_points; number++) {
248 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
257#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
258#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
265#include <immintrin.h>
268volk_8ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
271 unsigned int num_points)
273 float* iBufferPtr = iBuffer;
275 unsigned int number = 0;
276 const unsigned int sixteenthPoints = num_points / 16;
279 const float iScalar = 1.0 / scalar;
280 __m256 invScalar = _mm256_set1_ps(iScalar);
281 __m256i complexVal, iIntVal;
283 int8_t* complexVectorPtr = (int8_t*)complexVector;
285 __m256i moveMask = _mm256_set_epi8(0x80,
318 for (; number < sixteenthPoints; number++) {
319 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
320 complexVectorPtr += 32;
321 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
323 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
324 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
325 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
327 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
329 _mm256_storeu_ps(iBufferPtr, iFloatValue);
333 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
334 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
335 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
337 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
339 _mm256_storeu_ps(iBufferPtr, iFloatValue);
344 number = sixteenthPoints * 16;
345 for (; number < num_points; number++) {
346 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
353#include <riscv_vector.h>
355static inline void volk_8ic_s32f_deinterleave_real_32f_rvv(
float* iBuffer,
358 unsigned int num_points)
360 const uint16_t* in = (
const uint16_t*)complexVector;
361 size_t n = num_points;
362 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
363 vl = __riscv_vsetvl_e16m4(n);
364 vuint16m4_t vc = __riscv_vle16_v_u16m4(in, vl);
365 vint8m2_t vr = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 0, vl));
366 vfloat32m8_t vrf = __riscv_vfwcvt_f(__riscv_vsext_vf2(vr, vl), vl);
367 __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_real_32f.h:238
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_real_32f.h:190
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70