60#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
61#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
71 unsigned int num_points)
73 const float* complexVectorPtr = (
float*)complexVector;
74 float* iBufferPtr = iBuffer;
75 float* qBufferPtr = qBuffer;
77 unsigned int number = 0;
79 const unsigned int eighthPoints = num_points / 8;
80 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
81 for (; number < eighthPoints; number++) {
82 cplxValue1 = _mm256_load_ps(complexVectorPtr);
83 complexVectorPtr += 8;
85 cplxValue2 = _mm256_load_ps(complexVectorPtr);
86 complexVectorPtr += 8;
88 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
89 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
92 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
94 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
96 _mm256_store_ps(iBufferPtr, iValue);
97 _mm256_store_ps(qBufferPtr, qValue);
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *iBufferPtr++ = *complexVectorPtr++;
106 *qBufferPtr++ = *complexVectorPtr++;
112#include <xmmintrin.h>
117 unsigned int num_points)
119 const float* complexVectorPtr = (
float*)complexVector;
120 float* iBufferPtr = iBuffer;
121 float* qBufferPtr = qBuffer;
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
125 __m128 cplxValue1, cplxValue2, iValue, qValue;
126 for (; number < quarterPoints; number++) {
127 cplxValue1 = _mm_load_ps(complexVectorPtr);
128 complexVectorPtr += 4;
130 cplxValue2 = _mm_load_ps(complexVectorPtr);
131 complexVectorPtr += 4;
134 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
136 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
138 _mm_store_ps(iBufferPtr, iValue);
139 _mm_store_ps(qBufferPtr, qValue);
145 number = quarterPoints * 4;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
148 *qBufferPtr++ = *complexVectorPtr++;
160 unsigned int num_points)
162 unsigned int number = 0;
163 unsigned int quarter_points = num_points / 4;
164 const float* complexVectorPtr = (
float*)complexVector;
165 float* iBufferPtr = iBuffer;
166 float* qBufferPtr = qBuffer;
167 float32x4x2_t complexInput;
169 for (number = 0; number < quarter_points; number++) {
170 complexInput = vld2q_f32(complexVectorPtr);
171 vst1q_f32(iBufferPtr, complexInput.val[0]);
172 vst1q_f32(qBufferPtr, complexInput.val[1]);
173 complexVectorPtr += 8;
178 for (number = quarter_points * 4; number < num_points; number++) {
179 *iBufferPtr++ = *complexVectorPtr++;
180 *qBufferPtr++ = *complexVectorPtr++;
186#ifdef LV_HAVE_GENERIC
191 unsigned int num_points)
193 const float* complexVectorPtr = (
float*)complexVector;
194 float* iBufferPtr = iBuffer;
195 float* qBufferPtr = qBuffer;
197 for (number = 0; number < num_points; number++) {
198 *iBufferPtr++ = *complexVectorPtr++;
199 *qBufferPtr++ = *complexVectorPtr++;
207#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
208#define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
214#include <immintrin.h>
218 unsigned int num_points)
220 const float* complexVectorPtr = (
float*)complexVector;
221 float* iBufferPtr = iBuffer;
222 float* qBufferPtr = qBuffer;
224 unsigned int number = 0;
226 const unsigned int eighthPoints = num_points / 8;
227 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
228 for (; number < eighthPoints; number++) {
229 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
230 complexVectorPtr += 8;
232 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
233 complexVectorPtr += 8;
235 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
236 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
239 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
241 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
243 _mm256_storeu_ps(iBufferPtr, iValue);
244 _mm256_storeu_ps(qBufferPtr, qValue);
250 number = eighthPoints * 8;
251 for (; number < num_points; number++) {
252 *iBufferPtr++ = *complexVectorPtr++;
253 *qBufferPtr++ = *complexVectorPtr++;
259#include <riscv_vector.h>
261static inline void volk_32fc_deinterleave_32f_x2_rvv(
float* iBuffer,
264 unsigned int num_points)
266 size_t n = num_points;
267 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
268 vl = __riscv_vsetvl_e32m4(n);
269 vuint64m8_t vc = __riscv_vle64_v_u64m8((
const uint64_t*)complexVector, vl);
270 vuint32m4_t vr = __riscv_vnsrl(vc, 0, vl);
271 vuint32m4_t vi = __riscv_vnsrl(vc, 32, vl);
272 __riscv_vse32((uint32_t*)iBuffer, vr, vl);
273 __riscv_vse32((uint32_t*)qBuffer, vi, vl);
279#include <riscv_vector.h>
281static inline void volk_32fc_deinterleave_32f_x2_rvvseg(
float* iBuffer,
284 unsigned int num_points)
286 size_t n = num_points;
287 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
288 vl = __riscv_vsetvl_e32m4(n);
290 __riscv_vlseg2e32_v_u32m4x2((
const uint32_t*)complexVector, vl);
291 vuint32m4_t vr = __riscv_vget_u32m4(vc, 0);
292 vuint32m4_t vi = __riscv_vget_u32m4(vc, 1);
293 __riscv_vse32((uint32_t*)iBuffer, vr, vl);
294 __riscv_vse32((uint32_t*)qBuffer, vi, vl);
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:188
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:68
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:114
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:157
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:215
float complex lv_32fc_t
Definition volk_complex.h:74