62#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
63#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
72static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(
lv_16sc_t* complexVector,
76 unsigned int num_points)
78 unsigned int number = 0;
79 const float* iBufferPtr = iBuffer;
80 const float* qBufferPtr = qBuffer;
82 __m256 vScalar = _mm256_set1_ps(scalar);
84 const unsigned int eighthPoints = num_points / 8;
86 __m256 iValue, qValue, cplxValue1, cplxValue2;
87 __m256i intValue1, intValue2;
89 int16_t* complexVectorPtr = (int16_t*)complexVector;
91 for (; number < eighthPoints; number++) {
92 iValue = _mm256_load_ps(iBufferPtr);
93 qValue = _mm256_load_ps(qBufferPtr);
96 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
97 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
100 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
101 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
103 intValue1 = _mm256_cvtps_epi32(cplxValue1);
104 intValue2 = _mm256_cvtps_epi32(cplxValue2);
106 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
108 _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
109 complexVectorPtr += 16;
115 number = eighthPoints * 8;
116 complexVectorPtr = (int16_t*)(&complexVector[number]);
117 for (; number < num_points; number++) {
118 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
119 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
126#include <emmintrin.h>
129 const float* iBuffer,
130 const float* qBuffer,
132 unsigned int num_points)
134 unsigned int number = 0;
135 const float* iBufferPtr = iBuffer;
136 const float* qBufferPtr = qBuffer;
138 __m128 vScalar = _mm_set_ps1(scalar);
140 const unsigned int quarterPoints = num_points / 4;
142 __m128 iValue, qValue, cplxValue1, cplxValue2;
143 __m128i intValue1, intValue2;
145 int16_t* complexVectorPtr = (int16_t*)complexVector;
147 for (; number < quarterPoints; number++) {
148 iValue = _mm_load_ps(iBufferPtr);
149 qValue = _mm_load_ps(qBufferPtr);
152 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
153 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
156 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
157 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
159 intValue1 = _mm_cvtps_epi32(cplxValue1);
160 intValue2 = _mm_cvtps_epi32(cplxValue2);
162 intValue1 = _mm_packs_epi32(intValue1, intValue2);
164 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
165 complexVectorPtr += 8;
171 number = quarterPoints * 4;
172 complexVectorPtr = (int16_t*)(&complexVector[number]);
173 for (; number < num_points; number++) {
174 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
175 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
182#include <xmmintrin.h>
185 const float* iBuffer,
186 const float* qBuffer,
188 unsigned int num_points)
190 unsigned int number = 0;
191 const float* iBufferPtr = iBuffer;
192 const float* qBufferPtr = qBuffer;
194 __m128 vScalar = _mm_set_ps1(scalar);
196 const unsigned int quarterPoints = num_points / 4;
198 __m128 iValue, qValue, cplxValue;
200 int16_t* complexVectorPtr = (int16_t*)complexVector;
204 for (; number < quarterPoints; number++) {
205 iValue = _mm_load_ps(iBufferPtr);
206 qValue = _mm_load_ps(qBufferPtr);
209 cplxValue = _mm_unpacklo_ps(iValue, qValue);
210 cplxValue = _mm_mul_ps(cplxValue, vScalar);
212 _mm_store_ps(floatBuffer, cplxValue);
214 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
215 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
216 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
217 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
220 cplxValue = _mm_unpackhi_ps(iValue, qValue);
221 cplxValue = _mm_mul_ps(cplxValue, vScalar);
223 _mm_store_ps(floatBuffer, cplxValue);
225 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
226 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
227 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
228 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
234 number = quarterPoints * 4;
235 complexVectorPtr = (int16_t*)(&complexVector[number]);
236 for (; number < num_points; number++) {
237 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
238 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
244#ifdef LV_HAVE_GENERIC
247 const float* iBuffer,
248 const float* qBuffer,
250 unsigned int num_points)
252 int16_t* complexVectorPtr = (int16_t*)complexVector;
253 const float* iBufferPtr = iBuffer;
254 const float* qBufferPtr = qBuffer;
255 unsigned int number = 0;
257 for (number = 0; number < num_points; number++) {
258 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
259 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
267#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
268#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
275#include <immintrin.h>
277static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(
lv_16sc_t* complexVector,
278 const float* iBuffer,
279 const float* qBuffer,
281 unsigned int num_points)
283 unsigned int number = 0;
284 const float* iBufferPtr = iBuffer;
285 const float* qBufferPtr = qBuffer;
287 __m256 vScalar = _mm256_set1_ps(scalar);
289 const unsigned int eighthPoints = num_points / 8;
291 __m256 iValue, qValue, cplxValue1, cplxValue2;
292 __m256i intValue1, intValue2;
294 int16_t* complexVectorPtr = (int16_t*)complexVector;
296 for (; number < eighthPoints; number++) {
297 iValue = _mm256_loadu_ps(iBufferPtr);
298 qValue = _mm256_loadu_ps(qBufferPtr);
301 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
302 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
305 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
306 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
308 intValue1 = _mm256_cvtps_epi32(cplxValue1);
309 intValue2 = _mm256_cvtps_epi32(cplxValue2);
311 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
313 _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
314 complexVectorPtr += 16;
320 number = eighthPoints * 8;
321 complexVectorPtr = (int16_t*)(&complexVector[number]);
322 for (; number < num_points; number++) {
323 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
324 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
330#include <riscv_vector.h>
332static inline void volk_32f_x2_s32f_interleave_16ic_rvv(
lv_16sc_t* complexVector,
333 const float* iBuffer,
334 const float* qBuffer,
336 unsigned int num_points)
338 uint32_t* out = (uint32_t*)complexVector;
339 size_t n = num_points;
340 for (
size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
341 vl = __riscv_vsetvl_e32m8(n);
342 vfloat32m8_t vrf = __riscv_vle32_v_f32m8(iBuffer, vl);
343 vfloat32m8_t vif = __riscv_vle32_v_f32m8(qBuffer, vl);
344 vint16m4_t vri = __riscv_vfncvt_x(__riscv_vfmul(vrf, scalar, vl), vl);
345 vint16m4_t vii = __riscv_vfncvt_x(__riscv_vfmul(vif, scalar, vl), vl);
346 vuint16m4_t vr = __riscv_vreinterpret_u16m4(vri);
347 vuint16m4_t vi = __riscv_vreinterpret_u16m4(vii);
348 vuint32m8_t vc = __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFF, vi, vl);
349 __riscv_vse32(out, vc, vl);
355#include <riscv_vector.h>
357static inline void volk_32f_x2_s32f_interleave_16ic_rvvseg(
lv_16sc_t* complexVector,
358 const float* iBuffer,
359 const float* qBuffer,
361 unsigned int num_points)
363 size_t n = num_points;
364 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
365 vl = __riscv_vsetvl_e32m8(n);
366 vfloat32m8_t vrf = __riscv_vle32_v_f32m8(iBuffer, vl);
367 vfloat32m8_t vif = __riscv_vle32_v_f32m8(qBuffer, vl);
368 vint16m4_t vri = __riscv_vfncvt_x(__riscv_vfmul(vrf, scalar, vl), vl);
369 vint16m4_t vii = __riscv_vfncvt_x(__riscv_vfmul(vif, scalar, vl), vl);
371 (int16_t*)complexVector, __riscv_vcreate_v_i16m4x2(vri, vii), vl);
static float rintf(float x)
Definition config.h:45
static void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition volk_32f_x2_s32f_interleave_16ic.h:128
static void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition volk_32f_x2_s32f_interleave_16ic.h:184
static void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition volk_32f_x2_s32f_interleave_16ic.h:246
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
short complex lv_16sc_t
Definition volk_complex.h:71