60#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
72volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
75 unsigned int num_points)
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
80 const float* complexVectorPtr = (
float*)complexVector;
81 int16_t* iBufferPtr = iBuffer;
83 __m256 vScalar = _mm256_set1_ps(scalar);
85 __m256 cplxValue1, cplxValue2, iValue;
89 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
91 for (; number < eighthPoints; number++) {
92 cplxValue1 = _mm256_load_ps(complexVectorPtr);
93 complexVectorPtr += 8;
95 cplxValue2 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
99 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
101 iValue = _mm256_mul_ps(iValue, vScalar);
103 a = _mm256_cvtps_epi32(iValue);
104 a = _mm256_packs_epi32(a, a);
105 a = _mm256_permutevar8x32_epi32(a, idx);
106 b = _mm256_extracti128_si256(a, 0);
108 _mm_store_si128((__m128i*)iBufferPtr, b);
112 number = eighthPoints * 8;
113 iBufferPtr = &iBuffer[number];
114 for (; number < num_points; number++) {
115 *iBufferPtr++ = (int16_t)
rintf(*complexVectorPtr++ * scalar);
124#include <xmmintrin.h>
130 unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const float* complexVectorPtr = (
float*)complexVector;
136 int16_t* iBufferPtr = iBuffer;
138 __m128 vScalar = _mm_set_ps1(scalar);
140 __m128 cplxValue1, cplxValue2, iValue;
144 for (; number < quarterPoints; number++) {
145 cplxValue1 = _mm_load_ps(complexVectorPtr);
146 complexVectorPtr += 4;
148 cplxValue2 = _mm_load_ps(complexVectorPtr);
149 complexVectorPtr += 4;
152 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
154 iValue = _mm_mul_ps(iValue, vScalar);
156 _mm_store_ps(floatBuffer, iValue);
157 *iBufferPtr++ = (int16_t)
rintf(floatBuffer[0]);
158 *iBufferPtr++ = (int16_t)
rintf(floatBuffer[1]);
159 *iBufferPtr++ = (int16_t)
rintf(floatBuffer[2]);
160 *iBufferPtr++ = (int16_t)
rintf(floatBuffer[3]);
163 number = quarterPoints * 4;
164 iBufferPtr = &iBuffer[number];
165 for (; number < num_points; number++) {
166 *iBufferPtr++ = (int16_t)
rintf(*complexVectorPtr++ * scalar);
174#ifdef LV_HAVE_GENERIC
180 unsigned int num_points)
182 const float* complexVectorPtr = (
float*)complexVector;
183 int16_t* iBufferPtr = iBuffer;
184 unsigned int number = 0;
185 for (number = 0; number < num_points; number++) {
186 *iBufferPtr++ = (int16_t)
rintf(*complexVectorPtr++ * scalar);
195#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
196#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
203#include <immintrin.h>
206volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
209 unsigned int num_points)
211 unsigned int number = 0;
212 const unsigned int eighthPoints = num_points / 8;
214 const float* complexVectorPtr = (
float*)complexVector;
215 int16_t* iBufferPtr = iBuffer;
217 __m256 vScalar = _mm256_set1_ps(scalar);
219 __m256 cplxValue1, cplxValue2, iValue;
223 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
225 for (; number < eighthPoints; number++) {
226 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
227 complexVectorPtr += 8;
229 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
230 complexVectorPtr += 8;
233 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235 iValue = _mm256_mul_ps(iValue, vScalar);
237 a = _mm256_cvtps_epi32(iValue);
238 a = _mm256_packs_epi32(a, a);
239 a = _mm256_permutevar8x32_epi32(a, idx);
240 b = _mm256_extracti128_si256(a, 0);
242 _mm_storeu_si128((__m128i*)iBufferPtr, b);
246 number = eighthPoints * 8;
247 iBufferPtr = &iBuffer[number];
248 for (; number < num_points; number++) {
249 *iBufferPtr++ = (int16_t)
rintf(*complexVectorPtr++ * scalar);
257#include <riscv_vector.h>
260volk_32fc_s32f_deinterleave_real_16i_rvv(int16_t* iBuffer,
263 unsigned int num_points)
265 const uint64_t* in = (
const uint64_t*)complexVector;
266 size_t n = num_points;
267 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
268 vl = __riscv_vsetvl_e64m8(n);
269 vuint32m4_t vi = __riscv_vnsrl(__riscv_vle64_v_u64m8(in, vl), 0, vl);
270 vfloat32m4_t vif = __riscv_vfmul(__riscv_vreinterpret_f32m4(vi), scalar, vl);
271 __riscv_vse16(iBuffer, __riscv_vncvt_x(__riscv_vfcvt_x(vif, vl), vl), vl);
static float rintf(float x)
Definition config.h:45
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_32fc_s32f_deinterleave_real_16i.h:177
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_32fc_s32f_deinterleave_real_16i.h:127
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
float complex lv_32fc_t
Definition volk_complex.h:74