Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32fc_deinterleave_32f_x2.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
59
60#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
61#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
62
63#include <inttypes.h>
64#include <stdio.h>
65
66#ifdef LV_HAVE_AVX
67#include <immintrin.h>
68static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
69 float* qBuffer,
70 const lv_32fc_t* complexVector,
71 unsigned int num_points)
72{
73 const float* complexVectorPtr = (float*)complexVector;
74 float* iBufferPtr = iBuffer;
75 float* qBufferPtr = qBuffer;
76
77 unsigned int number = 0;
78 // Mask for real and imaginary parts
79 const unsigned int eighthPoints = num_points / 8;
80 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
81 for (; number < eighthPoints; number++) {
82 cplxValue1 = _mm256_load_ps(complexVectorPtr);
83 complexVectorPtr += 8;
84
85 cplxValue2 = _mm256_load_ps(complexVectorPtr);
86 complexVectorPtr += 8;
87
88 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
89 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
90
91 // Arrange in i1i2i3i4 format
92 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
93 // Arrange in q1q2q3q4 format
94 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
95
96 _mm256_store_ps(iBufferPtr, iValue);
97 _mm256_store_ps(qBufferPtr, qValue);
98
99 iBufferPtr += 8;
100 qBufferPtr += 8;
101 }
102
103 number = eighthPoints * 8;
104 for (; number < num_points; number++) {
105 *iBufferPtr++ = *complexVectorPtr++;
106 *qBufferPtr++ = *complexVectorPtr++;
107 }
108}
109#endif /* LV_HAVE_AVX */
110
111#ifdef LV_HAVE_SSE
112#include <xmmintrin.h>
113
114static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
115 float* qBuffer,
116 const lv_32fc_t* complexVector,
117 unsigned int num_points)
118{
119 const float* complexVectorPtr = (float*)complexVector;
120 float* iBufferPtr = iBuffer;
121 float* qBufferPtr = qBuffer;
122
123 unsigned int number = 0;
124 const unsigned int quarterPoints = num_points / 4;
125 __m128 cplxValue1, cplxValue2, iValue, qValue;
126 for (; number < quarterPoints; number++) {
127 cplxValue1 = _mm_load_ps(complexVectorPtr);
128 complexVectorPtr += 4;
129
130 cplxValue2 = _mm_load_ps(complexVectorPtr);
131 complexVectorPtr += 4;
132
133 // Arrange in i1i2i3i4 format
134 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
135 // Arrange in q1q2q3q4 format
136 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
137
138 _mm_store_ps(iBufferPtr, iValue);
139 _mm_store_ps(qBufferPtr, qValue);
140
141 iBufferPtr += 4;
142 qBufferPtr += 4;
143 }
144
145 number = quarterPoints * 4;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
148 *qBufferPtr++ = *complexVectorPtr++;
149 }
150}
151#endif /* LV_HAVE_SSE */
152
153
154#ifdef LV_HAVE_NEON
155#include <arm_neon.h>
156
157static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
158 float* qBuffer,
159 const lv_32fc_t* complexVector,
160 unsigned int num_points)
161{
162 unsigned int number = 0;
163 unsigned int quarter_points = num_points / 4;
164 const float* complexVectorPtr = (float*)complexVector;
165 float* iBufferPtr = iBuffer;
166 float* qBufferPtr = qBuffer;
167 float32x4x2_t complexInput;
168
169 for (number = 0; number < quarter_points; number++) {
170 complexInput = vld2q_f32(complexVectorPtr);
171 vst1q_f32(iBufferPtr, complexInput.val[0]);
172 vst1q_f32(qBufferPtr, complexInput.val[1]);
173 complexVectorPtr += 8;
174 iBufferPtr += 4;
175 qBufferPtr += 4;
176 }
177
178 for (number = quarter_points * 4; number < num_points; number++) {
179 *iBufferPtr++ = *complexVectorPtr++;
180 *qBufferPtr++ = *complexVectorPtr++;
181 }
182}
183#endif /* LV_HAVE_NEON */
184
185
186#ifdef LV_HAVE_GENERIC
187
188static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
189 float* qBuffer,
190 const lv_32fc_t* complexVector,
191 unsigned int num_points)
192{
193 const float* complexVectorPtr = (float*)complexVector;
194 float* iBufferPtr = iBuffer;
195 float* qBufferPtr = qBuffer;
196 unsigned int number;
197 for (number = 0; number < num_points; number++) {
198 *iBufferPtr++ = *complexVectorPtr++;
199 *qBufferPtr++ = *complexVectorPtr++;
200 }
201}
202#endif /* LV_HAVE_GENERIC */
203
204#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
205
206
207#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
208#define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
209
210#include <inttypes.h>
211#include <stdio.h>
212
213#ifdef LV_HAVE_AVX
214#include <immintrin.h>
215static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
216 float* qBuffer,
217 const lv_32fc_t* complexVector,
218 unsigned int num_points)
219{
220 const float* complexVectorPtr = (float*)complexVector;
221 float* iBufferPtr = iBuffer;
222 float* qBufferPtr = qBuffer;
223
224 unsigned int number = 0;
225 // Mask for real and imaginary parts
226 const unsigned int eighthPoints = num_points / 8;
227 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
228 for (; number < eighthPoints; number++) {
229 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
230 complexVectorPtr += 8;
231
232 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
233 complexVectorPtr += 8;
234
235 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
236 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
237
238 // Arrange in i1i2i3i4 format
239 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
240 // Arrange in q1q2q3q4 format
241 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
242
243 _mm256_storeu_ps(iBufferPtr, iValue);
244 _mm256_storeu_ps(qBufferPtr, qValue);
245
246 iBufferPtr += 8;
247 qBufferPtr += 8;
248 }
249
250 number = eighthPoints * 8;
251 for (; number < num_points; number++) {
252 *iBufferPtr++ = *complexVectorPtr++;
253 *qBufferPtr++ = *complexVectorPtr++;
254 }
255}
256#endif /* LV_HAVE_AVX */
257
258#ifdef LV_HAVE_RVV
259#include <riscv_vector.h>
260
261static inline void volk_32fc_deinterleave_32f_x2_rvv(float* iBuffer,
262 float* qBuffer,
263 const lv_32fc_t* complexVector,
264 unsigned int num_points)
265{
266 size_t n = num_points;
267 for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
268 vl = __riscv_vsetvl_e32m4(n);
269 vuint64m8_t vc = __riscv_vle64_v_u64m8((const uint64_t*)complexVector, vl);
270 vuint32m4_t vr = __riscv_vnsrl(vc, 0, vl);
271 vuint32m4_t vi = __riscv_vnsrl(vc, 32, vl);
272 __riscv_vse32((uint32_t*)iBuffer, vr, vl);
273 __riscv_vse32((uint32_t*)qBuffer, vi, vl);
274 }
275}
276#endif /*LV_HAVE_RVV*/
277
278#ifdef LV_HAVE_RVVSEG
279#include <riscv_vector.h>
280
281static inline void volk_32fc_deinterleave_32f_x2_rvvseg(float* iBuffer,
282 float* qBuffer,
283 const lv_32fc_t* complexVector,
284 unsigned int num_points)
285{
286 size_t n = num_points;
287 for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
288 vl = __riscv_vsetvl_e32m4(n);
289 vuint32m4x2_t vc =
290 __riscv_vlseg2e32_v_u32m4x2((const uint32_t*)complexVector, vl);
291 vuint32m4_t vr = __riscv_vget_u32m4(vc, 0);
292 vuint32m4_t vi = __riscv_vget_u32m4(vc, 1);
293 __riscv_vse32((uint32_t*)iBuffer, vr, vl);
294 __riscv_vse32((uint32_t*)qBuffer, vi, vl);
295 }
296}
297#endif /*LV_HAVE_RVVSEG*/
298
299#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:188
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:68
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:114
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:157
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition volk_32fc_deinterleave_32f_x2.h:215
float complex lv_32fc_t
Definition volk_complex.h:74