Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32i_s32f_convert_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
50
51#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52#define INCLUDED_volk_32i_s32f_convert_32f_u_H
53
54#include <inttypes.h>
55#include <stdio.h>
56
57#ifdef LV_HAVE_AVX512F
58#include <immintrin.h>
59
60static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
61 const int32_t* inputVector,
62 const float scalar,
63 unsigned int num_points)
64{
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
67
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
72 __m512i inputVal;
73 __m512 ret;
74
75 for (; number < onesixteenthPoints; number++) {
76 // Load the values
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
78
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
81
82 _mm512_storeu_ps(outputVectorPtr, ret);
83
84 outputVectorPtr += 16;
85 inputPtr += 16;
86 }
87
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
91 }
92}
93#endif /* LV_HAVE_AVX512F */
94
95
96#ifdef LV_HAVE_AVX2
97#include <immintrin.h>
98
99static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
100 const int32_t* inputVector,
101 const float scalar,
102 unsigned int num_points)
103{
104 unsigned int number = 0;
105 const unsigned int oneEightPoints = num_points / 8;
106
107 float* outputVectorPtr = outputVector;
108 const float iScalar = 1.0 / scalar;
109 __m256 invScalar = _mm256_set1_ps(iScalar);
110 int32_t* inputPtr = (int32_t*)inputVector;
111 __m256i inputVal;
112 __m256 ret;
113
114 for (; number < oneEightPoints; number++) {
115 // Load the 4 values
116 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
117
118 ret = _mm256_cvtepi32_ps(inputVal);
119 ret = _mm256_mul_ps(ret, invScalar);
120
121 _mm256_storeu_ps(outputVectorPtr, ret);
122
123 outputVectorPtr += 8;
124 inputPtr += 8;
125 }
126
127 number = oneEightPoints * 8;
128 for (; number < num_points; number++) {
129 outputVector[number] = ((float)(inputVector[number])) * iScalar;
130 }
131}
132#endif /* LV_HAVE_AVX2 */
133
134
135#ifdef LV_HAVE_SSE2
136#include <emmintrin.h>
137
138static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
139 const int32_t* inputVector,
140 const float scalar,
141 unsigned int num_points)
142{
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
145
146 float* outputVectorPtr = outputVector;
147 const float iScalar = 1.0 / scalar;
148 __m128 invScalar = _mm_set_ps1(iScalar);
149 int32_t* inputPtr = (int32_t*)inputVector;
150 __m128i inputVal;
151 __m128 ret;
152
153 for (; number < quarterPoints; number++) {
154 // Load the 4 values
155 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
156
157 ret = _mm_cvtepi32_ps(inputVal);
158 ret = _mm_mul_ps(ret, invScalar);
159
160 _mm_storeu_ps(outputVectorPtr, ret);
161
162 outputVectorPtr += 4;
163 inputPtr += 4;
164 }
165
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 outputVector[number] = ((float)(inputVector[number])) * iScalar;
169 }
170}
171#endif /* LV_HAVE_SSE2 */
172
173
174#ifdef LV_HAVE_GENERIC
175
176static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
177 const int32_t* inputVector,
178 const float scalar,
179 unsigned int num_points)
180{
181 float* outputVectorPtr = outputVector;
182 const int32_t* inputVectorPtr = inputVector;
183 unsigned int number = 0;
184 const float iScalar = 1.0 / scalar;
185
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
188 }
189}
190#endif /* LV_HAVE_GENERIC */
191
192#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
193
194
195#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196#define INCLUDED_volk_32i_s32f_convert_32f_a_H
197
198#include <inttypes.h>
199#include <stdio.h>
200
201#ifdef LV_HAVE_AVX512F
202#include <immintrin.h>
203
204static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
205 const int32_t* inputVector,
206 const float scalar,
207 unsigned int num_points)
208{
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
211
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
216 __m512i inputVal;
217 __m512 ret;
218
219 for (; number < onesixteenthPoints; number++) {
220 // Load the values
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
222
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
225
226 _mm512_store_ps(outputVectorPtr, ret);
227
228 outputVectorPtr += 16;
229 inputPtr += 16;
230 }
231
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
235 }
236}
237#endif /* LV_HAVE_AVX512F */
238
239#ifdef LV_HAVE_AVX2
240#include <immintrin.h>
241
242static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
243 const int32_t* inputVector,
244 const float scalar,
245 unsigned int num_points)
246{
247 unsigned int number = 0;
248 const unsigned int oneEightPoints = num_points / 8;
249
250 float* outputVectorPtr = outputVector;
251 const float iScalar = 1.0 / scalar;
252 __m256 invScalar = _mm256_set1_ps(iScalar);
253 int32_t* inputPtr = (int32_t*)inputVector;
254 __m256i inputVal;
255 __m256 ret;
256
257 for (; number < oneEightPoints; number++) {
258 // Load the 4 values
259 inputVal = _mm256_load_si256((__m256i*)inputPtr);
260
261 ret = _mm256_cvtepi32_ps(inputVal);
262 ret = _mm256_mul_ps(ret, invScalar);
263
264 _mm256_store_ps(outputVectorPtr, ret);
265
266 outputVectorPtr += 8;
267 inputPtr += 8;
268 }
269
270 number = oneEightPoints * 8;
271 for (; number < num_points; number++) {
272 outputVector[number] = ((float)(inputVector[number])) * iScalar;
273 }
274}
275#endif /* LV_HAVE_AVX2 */
276
277
278#ifdef LV_HAVE_SSE2
279#include <emmintrin.h>
280
281static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
282 const int32_t* inputVector,
283 const float scalar,
284 unsigned int num_points)
285{
286 unsigned int number = 0;
287 const unsigned int quarterPoints = num_points / 4;
288
289 float* outputVectorPtr = outputVector;
290 const float iScalar = 1.0 / scalar;
291 __m128 invScalar = _mm_set_ps1(iScalar);
292 int32_t* inputPtr = (int32_t*)inputVector;
293 __m128i inputVal;
294 __m128 ret;
295
296 for (; number < quarterPoints; number++) {
297 // Load the 4 values
298 inputVal = _mm_load_si128((__m128i*)inputPtr);
299
300 ret = _mm_cvtepi32_ps(inputVal);
301 ret = _mm_mul_ps(ret, invScalar);
302
303 _mm_store_ps(outputVectorPtr, ret);
304
305 outputVectorPtr += 4;
306 inputPtr += 4;
307 }
308
309 number = quarterPoints * 4;
310 for (; number < num_points; number++) {
311 outputVector[number] = ((float)(inputVector[number])) * iScalar;
312 }
313}
314#endif /* LV_HAVE_SSE2 */
315
316#ifdef LV_HAVE_RVV
317#include <riscv_vector.h>
318
319static inline void volk_32i_s32f_convert_32f_rvv(float* outputVector,
320 const int32_t* inputVector,
321 const float scalar,
322 unsigned int num_points)
323{
324 size_t n = num_points;
325 for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
326 vl = __riscv_vsetvl_e32m8(n);
327 vfloat32m8_t v = __riscv_vfcvt_f(__riscv_vle32_v_i32m8(inputVector, vl), vl);
328 __riscv_vse32(outputVector, __riscv_vfmul(v, 1.0f / scalar, vl), vl);
329 }
330}
331#endif /*LV_HAVE_RVV*/
332
333#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:138
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:281
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:176