Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_8ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
41
42#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
43#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
44
45#include <inttypes.h>
46#include <stdio.h>
47#include <volk/volk_common.h>
48
49#ifdef LV_HAVE_AVX2
50#include <immintrin.h>
51
52static inline void
53volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
54 const lv_8sc_t* complexVector,
55 const float scalar,
56 unsigned int num_points)
57{
58 float* iBufferPtr = iBuffer;
59
60 unsigned int number = 0;
61 const unsigned int sixteenthPoints = num_points / 16;
62 __m256 iFloatValue;
63
64 const float iScalar = 1.0 / scalar;
65 __m256 invScalar = _mm256_set1_ps(iScalar);
66 __m256i complexVal, iIntVal;
67 int8_t* complexVectorPtr = (int8_t*)complexVector;
68
69 __m256i moveMask = _mm256_set_epi8(0x80,
70 0x80,
71 0x80,
72 0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 14,
78 12,
79 10,
80 8,
81 6,
82 4,
83 2,
84 0,
85 0x80,
86 0x80,
87 0x80,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 14,
94 12,
95 10,
96 8,
97 6,
98 4,
99 2,
100 0);
101 for (; number < sixteenthPoints; number++) {
102 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
103 complexVectorPtr += 32;
104 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
105
106 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
107 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
108 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
109 _mm256_store_ps(iBufferPtr, iFloatValue);
110 iBufferPtr += 8;
111
112 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
113 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
114 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116 _mm256_store_ps(iBufferPtr, iFloatValue);
117 iBufferPtr += 8;
118 }
119
120 number = sixteenthPoints * 16;
121 for (; number < num_points; number++) {
122 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
123 complexVectorPtr++;
124 }
125}
126#endif /* LV_HAVE_AVX2 */
127
128
129#ifdef LV_HAVE_SSE4_1
130#include <smmintrin.h>
131
132static inline void
133volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
134 const lv_8sc_t* complexVector,
135 const float scalar,
136 unsigned int num_points)
137{
138 float* iBufferPtr = iBuffer;
139
140 unsigned int number = 0;
141 const unsigned int eighthPoints = num_points / 8;
142 __m128 iFloatValue;
143
144 const float iScalar = 1.0 / scalar;
145 __m128 invScalar = _mm_set_ps1(iScalar);
146 __m128i complexVal, iIntVal;
147 int8_t* complexVectorPtr = (int8_t*)complexVector;
148
149 __m128i moveMask = _mm_set_epi8(
150 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
151
152 for (; number < eighthPoints; number++) {
153 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
154 complexVectorPtr += 16;
155 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
156
157 iIntVal = _mm_cvtepi8_epi32(complexVal);
158 iFloatValue = _mm_cvtepi32_ps(iIntVal);
159
160 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
161
162 _mm_store_ps(iBufferPtr, iFloatValue);
163
164 iBufferPtr += 4;
165
166 complexVal = _mm_srli_si128(complexVal, 4);
167 iIntVal = _mm_cvtepi8_epi32(complexVal);
168 iFloatValue = _mm_cvtepi32_ps(iIntVal);
169
170 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
171
172 _mm_store_ps(iBufferPtr, iFloatValue);
173
174 iBufferPtr += 4;
175 }
176
177 number = eighthPoints * 8;
178 for (; number < num_points; number++) {
179 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
180 complexVectorPtr++;
181 }
182}
183#endif /* LV_HAVE_SSE4_1 */
184
185
186#ifdef LV_HAVE_SSE
187#include <xmmintrin.h>
188
189static inline void
191 const lv_8sc_t* complexVector,
192 const float scalar,
193 unsigned int num_points)
194{
195 float* iBufferPtr = iBuffer;
196
197 unsigned int number = 0;
198 const unsigned int quarterPoints = num_points / 4;
199 __m128 iValue;
200
201 const float iScalar = 1.0 / scalar;
202 __m128 invScalar = _mm_set_ps1(iScalar);
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
204
205 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
206
207 for (; number < quarterPoints; number++) {
208 floatBuffer[0] = (float)(*complexVectorPtr);
209 complexVectorPtr += 2;
210 floatBuffer[1] = (float)(*complexVectorPtr);
211 complexVectorPtr += 2;
212 floatBuffer[2] = (float)(*complexVectorPtr);
213 complexVectorPtr += 2;
214 floatBuffer[3] = (float)(*complexVectorPtr);
215 complexVectorPtr += 2;
216
217 iValue = _mm_load_ps(floatBuffer);
218
219 iValue = _mm_mul_ps(iValue, invScalar);
220
221 _mm_store_ps(iBufferPtr, iValue);
222
223 iBufferPtr += 4;
224 }
225
226 number = quarterPoints * 4;
227 for (; number < num_points; number++) {
228 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
229 complexVectorPtr++;
230 }
231}
232#endif /* LV_HAVE_SSE */
233
234
235#ifdef LV_HAVE_GENERIC
236
237static inline void
239 const lv_8sc_t* complexVector,
240 const float scalar,
241 unsigned int num_points)
242{
243 unsigned int number = 0;
244 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
245 float* iBufferPtr = iBuffer;
246 const float invScalar = 1.0 / scalar;
247 for (number = 0; number < num_points; number++) {
248 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
249 complexVectorPtr++;
250 }
251}
252#endif /* LV_HAVE_GENERIC */
253
254
255#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
256
257#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
258#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
259
260#include <inttypes.h>
261#include <stdio.h>
262#include <volk/volk_common.h>
263
264#ifdef LV_HAVE_AVX2
265#include <immintrin.h>
266
267static inline void
268volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
269 const lv_8sc_t* complexVector,
270 const float scalar,
271 unsigned int num_points)
272{
273 float* iBufferPtr = iBuffer;
274
275 unsigned int number = 0;
276 const unsigned int sixteenthPoints = num_points / 16;
277 __m256 iFloatValue;
278
279 const float iScalar = 1.0 / scalar;
280 __m256 invScalar = _mm256_set1_ps(iScalar);
281 __m256i complexVal, iIntVal;
282 __m128i hcomplexVal;
283 int8_t* complexVectorPtr = (int8_t*)complexVector;
284
285 __m256i moveMask = _mm256_set_epi8(0x80,
286 0x80,
287 0x80,
288 0x80,
289 0x80,
290 0x80,
291 0x80,
292 0x80,
293 14,
294 12,
295 10,
296 8,
297 6,
298 4,
299 2,
300 0,
301 0x80,
302 0x80,
303 0x80,
304 0x80,
305 0x80,
306 0x80,
307 0x80,
308 0x80,
309 14,
310 12,
311 10,
312 8,
313 6,
314 4,
315 2,
316 0);
317
318 for (; number < sixteenthPoints; number++) {
319 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
320 complexVectorPtr += 32;
321 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
322
323 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
324 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
325 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
326
327 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
328
329 _mm256_storeu_ps(iBufferPtr, iFloatValue);
330
331 iBufferPtr += 8;
332
333 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
334 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
335 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
336
337 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
338
339 _mm256_storeu_ps(iBufferPtr, iFloatValue);
340
341 iBufferPtr += 8;
342 }
343
344 number = sixteenthPoints * 16;
345 for (; number < num_points; number++) {
346 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
347 complexVectorPtr++;
348 }
349}
350#endif /* LV_HAVE_AVX2 */
351
352#ifdef LV_HAVE_RVV
353#include <riscv_vector.h>
354
355static inline void volk_8ic_s32f_deinterleave_real_32f_rvv(float* iBuffer,
356 const lv_8sc_t* complexVector,
357 const float scalar,
358 unsigned int num_points)
359{
360 const uint16_t* in = (const uint16_t*)complexVector;
361 size_t n = num_points;
362 for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
363 vl = __riscv_vsetvl_e16m4(n);
364 vuint16m4_t vc = __riscv_vle16_v_u16m4(in, vl);
365 vint8m2_t vr = __riscv_vreinterpret_i8m2(__riscv_vnsrl(vc, 0, vl));
366 vfloat32m8_t vrf = __riscv_vfwcvt_f(__riscv_vsext_vf2(vr, vl), vl);
367 __riscv_vse32(iBuffer, __riscv_vfmul(vrf, 1.0f / scalar, vl), vl);
368 }
369}
370#endif /*LV_HAVE_RVV*/
371
372#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_real_32f.h:238
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_8ic_s32f_deinterleave_real_32f.h:190
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70