Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_16ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
42
43#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
44#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
45
46#include <inttypes.h>
47#include <stdio.h>
48#include <volk/volk_common.h>
49
50#ifdef LV_HAVE_AVX2
51#include <immintrin.h>
52
53static inline void
54volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
55 const lv_16sc_t* complexVector,
56 const float scalar,
57 unsigned int num_points)
58{
59 float* iBufferPtr = iBuffer;
60
61 unsigned int number = 0;
62 const unsigned int eighthPoints = num_points / 8;
63
64 __m256 iFloatValue;
65
66 const float iScalar = 1.0 / scalar;
67 __m256 invScalar = _mm256_set1_ps(iScalar);
68 __m256i complexVal, iIntVal;
69 __m128i complexVal128;
70 int8_t* complexVectorPtr = (int8_t*)complexVector;
71
72 __m256i moveMask = _mm256_set_epi8(0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 13,
81 12,
82 9,
83 8,
84 5,
85 4,
86 1,
87 0,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 0x80,
94 0x80,
95 0x80,
96 13,
97 12,
98 9,
99 8,
100 5,
101 4,
102 1,
103 0);
104
105 for (; number < eighthPoints; number++) {
106 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
107 complexVectorPtr += 32;
108 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
109 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
110 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
111
112 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
113 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
114
115 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116
117 _mm256_store_ps(iBufferPtr, iFloatValue);
118
119 iBufferPtr += 8;
120 }
121
122 number = eighthPoints * 8;
123 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
124 for (; number < num_points; number++) {
125 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
126 sixteenTComplexVectorPtr++;
127 }
128}
129#endif /* LV_HAVE_AVX2 */
130
131#ifdef LV_HAVE_SSE4_1
132#include <smmintrin.h>
133
134static inline void
135volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
136 const lv_16sc_t* complexVector,
137 const float scalar,
138 unsigned int num_points)
139{
140 float* iBufferPtr = iBuffer;
141
142 unsigned int number = 0;
143 const unsigned int quarterPoints = num_points / 4;
144
145 __m128 iFloatValue;
146
147 const float iScalar = 1.0 / scalar;
148 __m128 invScalar = _mm_set_ps1(iScalar);
149 __m128i complexVal, iIntVal;
150 int8_t* complexVectorPtr = (int8_t*)complexVector;
151
152 __m128i moveMask = _mm_set_epi8(
153 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
154
155 for (; number < quarterPoints; number++) {
156 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
157 complexVectorPtr += 16;
158 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
159
160 iIntVal = _mm_cvtepi16_epi32(complexVal);
161 iFloatValue = _mm_cvtepi32_ps(iIntVal);
162
163 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
164
165 _mm_store_ps(iBufferPtr, iFloatValue);
166
167 iBufferPtr += 4;
168 }
169
170 number = quarterPoints * 4;
171 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
172 for (; number < num_points; number++) {
173 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
174 sixteenTComplexVectorPtr++;
175 }
176}
177#endif /* LV_HAVE_SSE4_1 */
178
179#ifdef LV_HAVE_SSE
180#include <xmmintrin.h>
181
182static inline void
184 const lv_16sc_t* complexVector,
185 const float scalar,
186 unsigned int num_points)
187{
188 float* iBufferPtr = iBuffer;
189
190 unsigned int number = 0;
191 const unsigned int quarterPoints = num_points / 4;
192 __m128 iValue;
193
194 const float iScalar = 1.0 / scalar;
195 __m128 invScalar = _mm_set_ps1(iScalar);
196 int16_t* complexVectorPtr = (int16_t*)complexVector;
197
198 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
199
200 for (; number < quarterPoints; number++) {
201 floatBuffer[0] = (float)(*complexVectorPtr);
202 complexVectorPtr += 2;
203 floatBuffer[1] = (float)(*complexVectorPtr);
204 complexVectorPtr += 2;
205 floatBuffer[2] = (float)(*complexVectorPtr);
206 complexVectorPtr += 2;
207 floatBuffer[3] = (float)(*complexVectorPtr);
208 complexVectorPtr += 2;
209
210 iValue = _mm_load_ps(floatBuffer);
211
212 iValue = _mm_mul_ps(iValue, invScalar);
213
214 _mm_store_ps(iBufferPtr, iValue);
215
216 iBufferPtr += 4;
217 }
218
219 number = quarterPoints * 4;
220 complexVectorPtr = (int16_t*)&complexVector[number];
221 for (; number < num_points; number++) {
222 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
223 complexVectorPtr++;
224 }
225}
226#endif /* LV_HAVE_SSE */
227
228#ifdef LV_HAVE_GENERIC
229static inline void
231 const lv_16sc_t* complexVector,
232 const float scalar,
233 unsigned int num_points)
234{
235 unsigned int number = 0;
236 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
237 float* iBufferPtr = iBuffer;
238 const float invScalar = 1.0 / scalar;
239 for (number = 0; number < num_points; number++) {
240 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
241 complexVectorPtr++;
242 }
243}
244#endif /* LV_HAVE_GENERIC */
245
246
247#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
248
249#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
250#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
251
252#include <inttypes.h>
253#include <stdio.h>
254#include <volk/volk_common.h>
255
256#ifdef LV_HAVE_AVX2
257#include <immintrin.h>
258
259static inline void
260volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
261 const lv_16sc_t* complexVector,
262 const float scalar,
263 unsigned int num_points)
264{
265 float* iBufferPtr = iBuffer;
266
267 unsigned int number = 0;
268 const unsigned int eighthPoints = num_points / 8;
269
270 __m256 iFloatValue;
271
272 const float iScalar = 1.0 / scalar;
273 __m256 invScalar = _mm256_set1_ps(iScalar);
274 __m256i complexVal, iIntVal;
275 __m128i complexVal128;
276 int8_t* complexVectorPtr = (int8_t*)complexVector;
277
278 __m256i moveMask = _mm256_set_epi8(0x80,
279 0x80,
280 0x80,
281 0x80,
282 0x80,
283 0x80,
284 0x80,
285 0x80,
286 13,
287 12,
288 9,
289 8,
290 5,
291 4,
292 1,
293 0,
294 0x80,
295 0x80,
296 0x80,
297 0x80,
298 0x80,
299 0x80,
300 0x80,
301 0x80,
302 13,
303 12,
304 9,
305 8,
306 5,
307 4,
308 1,
309 0);
310
311 for (; number < eighthPoints; number++) {
312 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
313 complexVectorPtr += 32;
314 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
315 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
316 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
317
318 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
319 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
320
321 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
322
323 _mm256_storeu_ps(iBufferPtr, iFloatValue);
324
325 iBufferPtr += 8;
326 }
327
328 number = eighthPoints * 8;
329 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
330 for (; number < num_points; number++) {
331 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
332 sixteenTComplexVectorPtr++;
333 }
334}
335#endif /* LV_HAVE_AVX2 */
336
337#ifdef LV_HAVE_RVV
338#include <riscv_vector.h>
339
340static inline void
341volk_16ic_s32f_deinterleave_real_32f_rvv(float* iBuffer,
342 const lv_16sc_t* complexVector,
343 const float scalar,
344 unsigned int num_points)
345{
346 const int32_t* in = (const int32_t*)complexVector;
347 size_t n = num_points;
348 for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
349 vl = __riscv_vsetvl_e32m8(n);
350 vint32m8_t vc = __riscv_vle32_v_i32m8(in, vl);
351 vfloat32m8_t vr = __riscv_vfwcvt_f(__riscv_vncvt_x(vc, vl), vl);
352 __riscv_vse32(iBuffer, __riscv_vfmul(vr, 1.0f / scalar, vl), vl);
353 }
354}
355#endif /*LV_HAVE_RVV*/
356
357#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_16ic_s32f_deinterleave_real_32f.h:230
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_16ic_s32f_deinterleave_real_32f.h:183
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
short complex lv_16sc_t
Definition volk_complex.h:71