51#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52#define INCLUDED_volk_32i_s32f_convert_32f_u_H
60static inline void volk_32i_s32f_convert_32f_u_avx512f(
float* outputVector,
61 const int32_t* inputVector,
63 unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
75 for (; number < onesixteenthPoints; number++) {
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
82 _mm512_storeu_ps(outputVectorPtr, ret);
84 outputVectorPtr += 16;
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
99static inline void volk_32i_s32f_convert_32f_u_avx2(
float* outputVector,
100 const int32_t* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const unsigned int oneEightPoints = num_points / 8;
107 float* outputVectorPtr = outputVector;
108 const float iScalar = 1.0 / scalar;
109 __m256 invScalar = _mm256_set1_ps(iScalar);
110 int32_t* inputPtr = (int32_t*)inputVector;
114 for (; number < oneEightPoints; number++) {
116 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
118 ret = _mm256_cvtepi32_ps(inputVal);
119 ret = _mm256_mul_ps(ret, invScalar);
121 _mm256_storeu_ps(outputVectorPtr, ret);
123 outputVectorPtr += 8;
127 number = oneEightPoints * 8;
128 for (; number < num_points; number++) {
129 outputVector[number] = ((float)(inputVector[number])) * iScalar;
136#include <emmintrin.h>
139 const int32_t* inputVector,
141 unsigned int num_points)
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
146 float* outputVectorPtr = outputVector;
147 const float iScalar = 1.0 / scalar;
148 __m128 invScalar = _mm_set_ps1(iScalar);
149 int32_t* inputPtr = (int32_t*)inputVector;
153 for (; number < quarterPoints; number++) {
155 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
157 ret = _mm_cvtepi32_ps(inputVal);
158 ret = _mm_mul_ps(ret, invScalar);
160 _mm_storeu_ps(outputVectorPtr, ret);
162 outputVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 outputVector[number] = ((float)(inputVector[number])) * iScalar;
174#ifdef LV_HAVE_GENERIC
177 const int32_t* inputVector,
179 unsigned int num_points)
181 float* outputVectorPtr = outputVector;
182 const int32_t* inputVectorPtr = inputVector;
183 unsigned int number = 0;
184 const float iScalar = 1.0 / scalar;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
195#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196#define INCLUDED_volk_32i_s32f_convert_32f_a_H
201#ifdef LV_HAVE_AVX512F
202#include <immintrin.h>
204static inline void volk_32i_s32f_convert_32f_a_avx512f(
float* outputVector,
205 const int32_t* inputVector,
207 unsigned int num_points)
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
219 for (; number < onesixteenthPoints; number++) {
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
226 _mm512_store_ps(outputVectorPtr, ret);
228 outputVectorPtr += 16;
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
240#include <immintrin.h>
242static inline void volk_32i_s32f_convert_32f_a_avx2(
float* outputVector,
243 const int32_t* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int oneEightPoints = num_points / 8;
250 float* outputVectorPtr = outputVector;
251 const float iScalar = 1.0 / scalar;
252 __m256 invScalar = _mm256_set1_ps(iScalar);
253 int32_t* inputPtr = (int32_t*)inputVector;
257 for (; number < oneEightPoints; number++) {
259 inputVal = _mm256_load_si256((__m256i*)inputPtr);
261 ret = _mm256_cvtepi32_ps(inputVal);
262 ret = _mm256_mul_ps(ret, invScalar);
264 _mm256_store_ps(outputVectorPtr, ret);
266 outputVectorPtr += 8;
270 number = oneEightPoints * 8;
271 for (; number < num_points; number++) {
272 outputVector[number] = ((float)(inputVector[number])) * iScalar;
279#include <emmintrin.h>
282 const int32_t* inputVector,
284 unsigned int num_points)
286 unsigned int number = 0;
287 const unsigned int quarterPoints = num_points / 4;
289 float* outputVectorPtr = outputVector;
290 const float iScalar = 1.0 / scalar;
291 __m128 invScalar = _mm_set_ps1(iScalar);
292 int32_t* inputPtr = (int32_t*)inputVector;
296 for (; number < quarterPoints; number++) {
298 inputVal = _mm_load_si128((__m128i*)inputPtr);
300 ret = _mm_cvtepi32_ps(inputVal);
301 ret = _mm_mul_ps(ret, invScalar);
303 _mm_store_ps(outputVectorPtr, ret);
305 outputVectorPtr += 4;
309 number = quarterPoints * 4;
310 for (; number < num_points; number++) {
311 outputVector[number] = ((float)(inputVector[number])) * iScalar;
317#include <riscv_vector.h>
319static inline void volk_32i_s32f_convert_32f_rvv(
float* outputVector,
320 const int32_t* inputVector,
322 unsigned int num_points)
324 size_t n = num_points;
325 for (
size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
326 vl = __riscv_vsetvl_e32m8(n);
327 vfloat32m8_t v = __riscv_vfcvt_f(__riscv_vle32_v_i32m8(inputVector, vl), vl);
328 __riscv_vse32(outputVector, __riscv_vfmul(v, 1.0f / scalar, vl), vl);
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:138
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:281
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition volk_32i_s32f_convert_32f.h:176