60#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
72 unsigned int num_points)
74 unsigned int number = 0;
75 float* complexVectorPtr = (
float*)complexVector;
76 const float* iBufferPtr = iBuffer;
77 const float* qBufferPtr = qBuffer;
79 const uint64_t eighthPoints = num_points / 8;
81 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82 for (; number < eighthPoints; number++) {
83 iValue = _mm256_load_ps(iBufferPtr);
84 qValue = _mm256_load_ps(qBufferPtr);
87 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
89 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
91 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92 _mm256_store_ps(complexVectorPtr, cplxValue);
93 complexVectorPtr += 8;
95 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96 _mm256_store_ps(complexVectorPtr, cplxValue);
97 complexVectorPtr += 8;
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *complexVectorPtr++ = *iBufferPtr++;
106 *complexVectorPtr++ = *qBufferPtr++;
113#include <xmmintrin.h>
116 const float* iBuffer,
117 const float* qBuffer,
118 unsigned int num_points)
120 unsigned int number = 0;
121 float* complexVectorPtr = (
float*)complexVector;
122 const float* iBufferPtr = iBuffer;
123 const float* qBufferPtr = qBuffer;
125 const uint64_t quarterPoints = num_points / 4;
127 __m128 iValue, qValue, cplxValue;
128 for (; number < quarterPoints; number++) {
129 iValue = _mm_load_ps(iBufferPtr);
130 qValue = _mm_load_ps(qBufferPtr);
133 cplxValue = _mm_unpacklo_ps(iValue, qValue);
134 _mm_store_ps(complexVectorPtr, cplxValue);
135 complexVectorPtr += 4;
138 cplxValue = _mm_unpackhi_ps(iValue, qValue);
139 _mm_store_ps(complexVectorPtr, cplxValue);
140 complexVectorPtr += 4;
146 number = quarterPoints * 4;
147 for (; number < num_points; number++) {
148 *complexVectorPtr++ = *iBufferPtr++;
149 *complexVectorPtr++ = *qBufferPtr++;
159 const float* iBuffer,
160 const float* qBuffer,
161 unsigned int num_points)
163 unsigned int quarter_points = num_points / 4;
165 float* complexVectorPtr = (
float*)complexVector;
167 float32x4x2_t complex_vec;
168 for (number = 0; number < quarter_points; ++number) {
169 complex_vec.val[0] = vld1q_f32(iBuffer);
170 complex_vec.val[1] = vld1q_f32(qBuffer);
171 vst2q_f32(complexVectorPtr, complex_vec);
174 complexVectorPtr += 8;
177 for (number = quarter_points * 4; number < num_points; ++number) {
178 *complexVectorPtr++ = *iBuffer++;
179 *complexVectorPtr++ = *qBuffer++;
185#ifdef LV_HAVE_GENERIC
188 const float* iBuffer,
189 const float* qBuffer,
190 unsigned int num_points)
192 float* complexVectorPtr = (
float*)complexVector;
193 const float* iBufferPtr = iBuffer;
194 const float* qBufferPtr = qBuffer;
197 for (number = 0; number < num_points; number++) {
198 *complexVectorPtr++ = *iBufferPtr++;
199 *complexVectorPtr++ = *qBufferPtr++;
207#ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
208#define INCLUDED_volk_32f_x2_interleave_32fc_u_H
214#include <immintrin.h>
217 const float* iBuffer,
218 const float* qBuffer,
219 unsigned int num_points)
221 unsigned int number = 0;
222 float* complexVectorPtr = (
float*)complexVector;
223 const float* iBufferPtr = iBuffer;
224 const float* qBufferPtr = qBuffer;
226 const uint64_t eighthPoints = num_points / 8;
228 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
229 for (; number < eighthPoints; number++) {
230 iValue = _mm256_loadu_ps(iBufferPtr);
231 qValue = _mm256_loadu_ps(qBufferPtr);
234 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
236 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
238 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
239 _mm256_storeu_ps(complexVectorPtr, cplxValue);
240 complexVectorPtr += 8;
242 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
243 _mm256_storeu_ps(complexVectorPtr, cplxValue);
244 complexVectorPtr += 8;
250 number = eighthPoints * 8;
251 for (; number < num_points; number++) {
252 *complexVectorPtr++ = *iBufferPtr++;
253 *complexVectorPtr++ = *qBufferPtr++;
259#include <riscv_vector.h>
261static inline void volk_32f_x2_interleave_32fc_rvv(
lv_32fc_t* complexVector,
262 const float* iBuffer,
263 const float* qBuffer,
264 unsigned int num_points)
266 uint64_t* out = (uint64_t*)complexVector;
267 size_t n = num_points;
268 for (
size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
269 vl = __riscv_vsetvl_e32m4(n);
270 vuint32m4_t vr = __riscv_vle32_v_u32m4((
const uint32_t*)iBuffer, vl);
271 vuint32m4_t vi = __riscv_vle32_v_u32m4((
const uint32_t*)qBuffer, vl);
273 __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFFFFFF, vi, vl);
274 __riscv_vse64(out, vc, vl);
280#include <riscv_vector.h>
282static inline void volk_32f_x2_interleave_32fc_rvvseg(
lv_32fc_t* complexVector,
283 const float* iBuffer,
284 const float* qBuffer,
285 unsigned int num_points)
287 size_t n = num_points;
288 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
289 vl = __riscv_vsetvl_e32m4(n);
290 vfloat32m4_t vr = __riscv_vle32_v_f32m4(iBuffer, vl);
291 vfloat32m4_t vi = __riscv_vle32_v_f32m4(qBuffer, vl);
292 __riscv_vsseg2e32((
float*)complexVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition volk_32f_x2_interleave_32fc.h:69
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition volk_32f_x2_interleave_32fc.h:187
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition volk_32f_x2_interleave_32fc.h:158
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition volk_32f_x2_interleave_32fc.h:216
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition volk_32f_x2_interleave_32fc.h:115
float complex lv_32fc_t
Definition volk_complex.h:74