Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32fc_deinterleave_64f_x2.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
59
60#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
61#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
62
63#include <inttypes.h>
64#include <stdio.h>
65
66#ifdef LV_HAVE_AVX
67#include <immintrin.h>
68
69static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,
70 double* qBuffer,
71 const lv_32fc_t* complexVector,
72 unsigned int num_points)
73{
74 unsigned int number = 0;
75
76 const float* complexVectorPtr = (float*)complexVector;
77 double* iBufferPtr = iBuffer;
78 double* qBufferPtr = qBuffer;
79
80 const unsigned int quarterPoints = num_points / 4;
81 __m256 cplxValue;
82 __m128 complexH, complexL, fVal;
83 __m256d dVal;
84
85 for (; number < quarterPoints; number++) {
86
87 cplxValue = _mm256_loadu_ps(complexVectorPtr);
88 complexVectorPtr += 8;
89
90 complexH = _mm256_extractf128_ps(cplxValue, 1);
91 complexL = _mm256_extractf128_ps(cplxValue, 0);
92
93 // Arrange in i1i2i1i2 format
94 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
95 dVal = _mm256_cvtps_pd(fVal);
96 _mm256_storeu_pd(iBufferPtr, dVal);
97
98 // Arrange in q1q2q1q2 format
99 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
100 dVal = _mm256_cvtps_pd(fVal);
101 _mm256_storeu_pd(qBufferPtr, dVal);
102
103 iBufferPtr += 4;
104 qBufferPtr += 4;
105 }
106
107 number = quarterPoints * 4;
108 for (; number < num_points; number++) {
109 *iBufferPtr++ = *complexVectorPtr++;
110 *qBufferPtr++ = *complexVectorPtr++;
111 }
112}
113#endif /* LV_HAVE_AVX */
114
115#ifdef LV_HAVE_SSE2
116#include <emmintrin.h>
117
118static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,
119 double* qBuffer,
120 const lv_32fc_t* complexVector,
121 unsigned int num_points)
122{
123 unsigned int number = 0;
124
125 const float* complexVectorPtr = (float*)complexVector;
126 double* iBufferPtr = iBuffer;
127 double* qBufferPtr = qBuffer;
128
129 const unsigned int halfPoints = num_points / 2;
130 __m128 cplxValue, fVal;
131 __m128d dVal;
132
133 for (; number < halfPoints; number++) {
134
135 cplxValue = _mm_loadu_ps(complexVectorPtr);
136 complexVectorPtr += 4;
137
138 // Arrange in i1i2i1i2 format
139 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
140 dVal = _mm_cvtps_pd(fVal);
141 _mm_storeu_pd(iBufferPtr, dVal);
142
143 // Arrange in q1q2q1q2 format
144 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
145 dVal = _mm_cvtps_pd(fVal);
146 _mm_storeu_pd(qBufferPtr, dVal);
147
148 iBufferPtr += 2;
149 qBufferPtr += 2;
150 }
151
152 number = halfPoints * 2;
153 for (; number < num_points; number++) {
154 *iBufferPtr++ = *complexVectorPtr++;
155 *qBufferPtr++ = *complexVectorPtr++;
156 }
157}
158#endif /* LV_HAVE_SSE */
159
160#ifdef LV_HAVE_GENERIC
161
162static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,
163 double* qBuffer,
164 const lv_32fc_t* complexVector,
165 unsigned int num_points)
166{
167 unsigned int number = 0;
168 const float* complexVectorPtr = (float*)complexVector;
169 double* iBufferPtr = iBuffer;
170 double* qBufferPtr = qBuffer;
171
172 for (number = 0; number < num_points; number++) {
173 *iBufferPtr++ = (double)*complexVectorPtr++;
174 *qBufferPtr++ = (double)*complexVectorPtr++;
175 }
176}
177#endif /* LV_HAVE_GENERIC */
178
179#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
180#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
181#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
182
183#include <inttypes.h>
184#include <stdio.h>
185
186#ifdef LV_HAVE_AVX
187#include <immintrin.h>
188
189static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,
190 double* qBuffer,
191 const lv_32fc_t* complexVector,
192 unsigned int num_points)
193{
194 unsigned int number = 0;
195
196 const float* complexVectorPtr = (float*)complexVector;
197 double* iBufferPtr = iBuffer;
198 double* qBufferPtr = qBuffer;
199
200 const unsigned int quarterPoints = num_points / 4;
201 __m256 cplxValue;
202 __m128 complexH, complexL, fVal;
203 __m256d dVal;
204
205 for (; number < quarterPoints; number++) {
206
207 cplxValue = _mm256_load_ps(complexVectorPtr);
208 complexVectorPtr += 8;
209
210 complexH = _mm256_extractf128_ps(cplxValue, 1);
211 complexL = _mm256_extractf128_ps(cplxValue, 0);
212
213 // Arrange in i1i2i1i2 format
214 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
215 dVal = _mm256_cvtps_pd(fVal);
216 _mm256_store_pd(iBufferPtr, dVal);
217
218 // Arrange in q1q2q1q2 format
219 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
220 dVal = _mm256_cvtps_pd(fVal);
221 _mm256_store_pd(qBufferPtr, dVal);
222
223 iBufferPtr += 4;
224 qBufferPtr += 4;
225 }
226
227 number = quarterPoints * 4;
228 for (; number < num_points; number++) {
229 *iBufferPtr++ = *complexVectorPtr++;
230 *qBufferPtr++ = *complexVectorPtr++;
231 }
232}
233#endif /* LV_HAVE_AVX */
234
235#ifdef LV_HAVE_SSE2
236#include <emmintrin.h>
237
238static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
239 double* qBuffer,
240 const lv_32fc_t* complexVector,
241 unsigned int num_points)
242{
243 unsigned int number = 0;
244
245 const float* complexVectorPtr = (float*)complexVector;
246 double* iBufferPtr = iBuffer;
247 double* qBufferPtr = qBuffer;
248
249 const unsigned int halfPoints = num_points / 2;
250 __m128 cplxValue, fVal;
251 __m128d dVal;
252
253 for (; number < halfPoints; number++) {
254
255 cplxValue = _mm_load_ps(complexVectorPtr);
256 complexVectorPtr += 4;
257
258 // Arrange in i1i2i1i2 format
259 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
260 dVal = _mm_cvtps_pd(fVal);
261 _mm_store_pd(iBufferPtr, dVal);
262
263 // Arrange in q1q2q1q2 format
264 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
265 dVal = _mm_cvtps_pd(fVal);
266 _mm_store_pd(qBufferPtr, dVal);
267
268 iBufferPtr += 2;
269 qBufferPtr += 2;
270 }
271
272 number = halfPoints * 2;
273 for (; number < num_points; number++) {
274 *iBufferPtr++ = *complexVectorPtr++;
275 *qBufferPtr++ = *complexVectorPtr++;
276 }
277}
278#endif /* LV_HAVE_SSE */
279
280#ifdef LV_HAVE_NEONV8
281#include <arm_neon.h>
282
283static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
284 double* qBuffer,
285 const lv_32fc_t* complexVector,
286 unsigned int num_points)
287{
288 unsigned int number = 0;
289 unsigned int half_points = num_points / 2;
290 const float* complexVectorPtr = (float*)complexVector;
291 double* iBufferPtr = iBuffer;
292 double* qBufferPtr = qBuffer;
293 float32x2x2_t complexInput;
294 float64x2_t iVal, qVal;
295
296 for (number = 0; number < half_points; number++) {
297 complexInput = vld2_f32(complexVectorPtr);
298
299 iVal = vcvt_f64_f32(complexInput.val[0]);
300 qVal = vcvt_f64_f32(complexInput.val[1]);
301
302 vst1q_f64(iBufferPtr, iVal);
303 vst1q_f64(qBufferPtr, qVal);
304
305 complexVectorPtr += 4;
306 iBufferPtr += 2;
307 qBufferPtr += 2;
308 }
309
310 for (number = half_points * 2; number < num_points; number++) {
311 *iBufferPtr++ = (double)*complexVectorPtr++;
312 *qBufferPtr++ = (double)*complexVectorPtr++;
313 }
314}
315#endif /* LV_HAVE_NEONV8 */
316
317#ifdef LV_HAVE_RVV
318#include <riscv_vector.h>
319
320static inline void volk_32fc_deinterleave_64f_x2_rvv(double* iBuffer,
321 double* qBuffer,
322 const lv_32fc_t* complexVector,
323 unsigned int num_points)
324{
325 size_t n = num_points;
326 for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
327 vl = __riscv_vsetvl_e32m4(n);
328 vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
329 vfloat32m4_t vr = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 0, vl));
330 vfloat32m4_t vi = __riscv_vreinterpret_f32m4(__riscv_vnsrl(vc, 32, vl));
331 __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
332 __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
333 }
334}
335#endif /*LV_HAVE_RVV*/
336
337#ifdef LV_HAVE_RVVSEG
338#include <riscv_vector.h>
339
340static inline void volk_32fc_deinterleave_64f_x2_rvvseg(double* iBuffer,
341 double* qBuffer,
342 const lv_32fc_t* complexVector,
343 unsigned int num_points)
344{
345 size_t n = num_points;
346 for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
347 vl = __riscv_vsetvl_e32m4(n);
348 vfloat32m4x2_t vc = __riscv_vlseg2e32_v_f32m4x2((const float*)complexVector, vl);
349 vfloat32m4_t vr = __riscv_vget_f32m4(vc, 0);
350 vfloat32m4_t vi = __riscv_vget_f32m4(vc, 1);
351 __riscv_vse64(iBuffer, __riscv_vfwcvt_f(vr, vl), vl);
352 __riscv_vse64(qBuffer, __riscv_vfwcvt_f(vi, vl), vl);
353 }
354}
355#endif /*LV_HAVE_RVVSEG*/
356
357#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:189
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:118
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:162
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:238
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_64f_x2.h:69
float complex lv_32fc_t
Definition volk_complex.h:74