60#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
61#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
72 unsigned int num_points)
74 unsigned int number = 0;
76 const float* complexVectorPtr = (
float*)complexVector;
77 double* iBufferPtr = iBuffer;
78 double* qBufferPtr = qBuffer;
80 const unsigned int quarterPoints = num_points / 4;
82 __m128 complexH, complexL, fVal;
85 for (; number < quarterPoints; number++) {
87 cplxValue = _mm256_loadu_ps(complexVectorPtr);
88 complexVectorPtr += 8;
90 complexH = _mm256_extractf128_ps(cplxValue, 1);
91 complexL = _mm256_extractf128_ps(cplxValue, 0);
94 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
95 dVal = _mm256_cvtps_pd(fVal);
96 _mm256_storeu_pd(iBufferPtr, dVal);
99 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
100 dVal = _mm256_cvtps_pd(fVal);
101 _mm256_storeu_pd(qBufferPtr, dVal);
107 number = quarterPoints * 4;
108 for (; number < num_points; number++) {
109 *iBufferPtr++ = *complexVectorPtr++;
110 *qBufferPtr++ = *complexVectorPtr++;
116#include <emmintrin.h>
121 unsigned int num_points)
123 unsigned int number = 0;
125 const float* complexVectorPtr = (
float*)complexVector;
126 double* iBufferPtr = iBuffer;
127 double* qBufferPtr = qBuffer;
129 const unsigned int halfPoints = num_points / 2;
130 __m128 cplxValue, fVal;
133 for (; number < halfPoints; number++) {
135 cplxValue = _mm_loadu_ps(complexVectorPtr);
136 complexVectorPtr += 4;
139 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
140 dVal = _mm_cvtps_pd(fVal);
141 _mm_storeu_pd(iBufferPtr, dVal);
144 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
145 dVal = _mm_cvtps_pd(fVal);
146 _mm_storeu_pd(qBufferPtr, dVal);
152 number = halfPoints * 2;
153 for (; number < num_points; number++) {
154 *iBufferPtr++ = *complexVectorPtr++;
155 *qBufferPtr++ = *complexVectorPtr++;
160#ifdef LV_HAVE_GENERIC
165 unsigned int num_points)
167 unsigned int number = 0;
168 const float* complexVectorPtr = (
float*)complexVector;
169 double* iBufferPtr = iBuffer;
170 double* qBufferPtr = qBuffer;
172 for (number = 0; number < num_points; number++) {
173 *iBufferPtr++ = (double)*complexVectorPtr++;
174 *qBufferPtr++ = (double)*complexVectorPtr++;
180#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
181#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
187#include <immintrin.h>
192 unsigned int num_points)
194 unsigned int number = 0;
196 const float* complexVectorPtr = (
float*)complexVector;
197 double* iBufferPtr = iBuffer;
198 double* qBufferPtr = qBuffer;
200 const unsigned int quarterPoints = num_points / 4;
202 __m128 complexH, complexL, fVal;
205 for (; number < quarterPoints; number++) {
207 cplxValue = _mm256_load_ps(complexVectorPtr);
208 complexVectorPtr += 8;
210 complexH = _mm256_extractf128_ps(cplxValue, 1);
211 complexL = _mm256_extractf128_ps(cplxValue, 0);
214 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
215 dVal = _mm256_cvtps_pd(fVal);
216 _mm256_store_pd(iBufferPtr, dVal);
219 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
220 dVal = _mm256_cvtps_pd(fVal);
221 _mm256_store_pd(qBufferPtr, dVal);
227 number = quarterPoints * 4;
228 for (; number < num_points; number++) {
229 *iBufferPtr++ = *complexVectorPtr++;
230 *qBufferPtr++ = *complexVectorPtr++;
236#include <emmintrin.h>
241 unsigned int num_points)
243 unsigned int number = 0;
245 const float* complexVectorPtr = (
float*)complexVector;
246 double* iBufferPtr = iBuffer;
247 double* qBufferPtr = qBuffer;
249 const unsigned int halfPoints = num_points / 2;
250 __m128 cplxValue, fVal;
253 for (; number < halfPoints; number++) {
255 cplxValue = _mm_load_ps(complexVectorPtr);
256 complexVectorPtr += 4;
259 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
260 dVal = _mm_cvtps_pd(fVal);
261 _mm_store_pd(iBufferPtr, dVal);
264 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
265 dVal = _mm_cvtps_pd(fVal);
266 _mm_store_pd(qBufferPtr, dVal);
272 number = halfPoints * 2;
273 for (; number < num_points; number++) {
274 *iBufferPtr++ = *complexVectorPtr++;
275 *qBufferPtr++ = *complexVectorPtr++;
283static inline void volk_32fc_deinterleave_64f_x2_neon(
double* iBuffer,
286 unsigned int num_points)
288 unsigned int number = 0;
289 unsigned int half_points = num_points / 2;
290 const float* complexVectorPtr = (
float*)complexVector;
291 double* iBufferPtr = iBuffer;
292 double* qBufferPtr = qBuffer;
293 float32x2x2_t complexInput;
294 float64x2_t iVal, qVal;
296 for (number = 0; number < half_points; number++) {
297 complexInput = vld2_f32(complexVectorPtr);
299 iVal = vcvt_f64_f32(complexInput.val[0]);
300 qVal = vcvt_f64_f32(complexInput.val[1]);
302 vst1q_f64(iBufferPtr, iVal);
303 vst1q_f64(qBufferPtr, qVal);
305 complexVectorPtr += 4;
310 for (number = half_points * 2; number < num_points; number++) {
311 *iBufferPtr++ = (double)*complexVectorPtr++;
312 *qBufferPtr++ = (double)*complexVectorPtr++;
318#include <riscv_vector.h>
320static inline void volk_32fc_deinterleave_64f_x2_rvv(
double* iBuffer,
323 unsigned int num_points)
325 size_t n = num_points;
326 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
327 vl = __riscv_vsetvl_e32m4(n);
328 vuint64m8_t vc = __riscv_vle64_v_u64m8((
const uint64_t*)complexVector, vl);
329 vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
330 vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
331 __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
332 __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
338#include <riscv_vector.h>
340static inline void volk_32fc_deinterleave_64f_x2_rvvseg(
double* iBuffer,
343 unsigned int num_points)
345 size_t n = num_points;
346 for (
size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
347 vl = __riscv_vsetvl_e32m4(n);
348 vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((
const float*)complexVector, vl);
349 vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
350 vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
351 __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
352 __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:189
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:118
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:162
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:238
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:69
float complex lv_32fc_t
Definition volk_complex.h:74