51#ifndef INCLUDED_volk_64f_convert_32f_u_H
52#define INCLUDED_volk_64f_convert_32f_u_H
60static inline void volk_64f_convert_32f_u_avx512f(
float* outputVector,
61 const double* inputVector,
62 unsigned int num_points)
64 unsigned int number = 0;
66 const unsigned int oneSixteenthPoints = num_points / 16;
68 const double* inputVectorPtr = (
const double*)inputVector;
69 float* outputVectorPtr = outputVector;
71 __m512d inputVal1, inputVal2;
73 for (; number < oneSixteenthPoints; number++) {
74 inputVal1 = _mm512_loadu_pd(inputVectorPtr);
76 inputVal2 = _mm512_loadu_pd(inputVectorPtr);
79 ret1 = _mm512_cvtpd_ps(inputVal1);
80 ret2 = _mm512_cvtpd_ps(inputVal2);
82 _mm256_storeu_ps(outputVectorPtr, ret1);
85 _mm256_storeu_ps(outputVectorPtr, ret2);
89 number = oneSixteenthPoints * 16;
90 for (; number < num_points; number++) {
91 outputVector[number] = (float)(inputVector[number]);
101 const double* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
106 const unsigned int oneEightPoints = num_points / 8;
108 const double* inputVectorPtr = (
const double*)inputVector;
109 float* outputVectorPtr = outputVector;
111 __m256d inputVal1, inputVal2;
113 for (; number < oneEightPoints; number++) {
114 inputVal1 = _mm256_loadu_pd(inputVectorPtr);
116 inputVal2 = _mm256_loadu_pd(inputVectorPtr);
119 ret1 = _mm256_cvtpd_ps(inputVal1);
120 ret2 = _mm256_cvtpd_ps(inputVal2);
122 _mm_storeu_ps(outputVectorPtr, ret1);
123 outputVectorPtr += 4;
125 _mm_storeu_ps(outputVectorPtr, ret2);
126 outputVectorPtr += 4;
129 number = oneEightPoints * 8;
130 for (; number < num_points; number++) {
131 outputVector[number] = (float)(inputVector[number]);
138#include <emmintrin.h>
141 const double* inputVector,
142 unsigned int num_points)
144 unsigned int number = 0;
146 const unsigned int quarterPoints = num_points / 4;
148 const double* inputVectorPtr = (
const double*)inputVector;
149 float* outputVectorPtr = outputVector;
151 __m128d inputVal1, inputVal2;
153 for (; number < quarterPoints; number++) {
154 inputVal1 = _mm_loadu_pd(inputVectorPtr);
156 inputVal2 = _mm_loadu_pd(inputVectorPtr);
159 ret = _mm_cvtpd_ps(inputVal1);
160 ret2 = _mm_cvtpd_ps(inputVal2);
162 ret = _mm_movelh_ps(ret, ret2);
164 _mm_storeu_ps(outputVectorPtr, ret);
165 outputVectorPtr += 4;
168 number = quarterPoints * 4;
169 for (; number < num_points; number++) {
170 outputVector[number] = (float)(inputVector[number]);
176#ifdef LV_HAVE_GENERIC
179 const double* inputVector,
180 unsigned int num_points)
182 float* outputVectorPtr = outputVector;
183 const double* inputVectorPtr = inputVector;
184 unsigned int number = 0;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
194#ifndef INCLUDED_volk_64f_convert_32f_a_H
195#define INCLUDED_volk_64f_convert_32f_a_H
200#ifdef LV_HAVE_AVX512F
201#include <immintrin.h>
203static inline void volk_64f_convert_32f_a_avx512f(
float* outputVector,
204 const double* inputVector,
205 unsigned int num_points)
207 unsigned int number = 0;
209 const unsigned int oneSixteenthPoints = num_points / 16;
211 const double* inputVectorPtr = (
const double*)inputVector;
212 float* outputVectorPtr = outputVector;
214 __m512d inputVal1, inputVal2;
216 for (; number < oneSixteenthPoints; number++) {
217 inputVal1 = _mm512_load_pd(inputVectorPtr);
219 inputVal2 = _mm512_load_pd(inputVectorPtr);
222 ret1 = _mm512_cvtpd_ps(inputVal1);
223 ret2 = _mm512_cvtpd_ps(inputVal2);
225 _mm256_store_ps(outputVectorPtr, ret1);
226 outputVectorPtr += 8;
228 _mm256_store_ps(outputVectorPtr, ret2);
229 outputVectorPtr += 8;
232 number = oneSixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = (float)(inputVector[number]);
241#include <immintrin.h>
244 const double* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
249 const unsigned int oneEightPoints = num_points / 8;
251 const double* inputVectorPtr = (
const double*)inputVector;
252 float* outputVectorPtr = outputVector;
254 __m256d inputVal1, inputVal2;
256 for (; number < oneEightPoints; number++) {
257 inputVal1 = _mm256_load_pd(inputVectorPtr);
259 inputVal2 = _mm256_load_pd(inputVectorPtr);
262 ret1 = _mm256_cvtpd_ps(inputVal1);
263 ret2 = _mm256_cvtpd_ps(inputVal2);
265 _mm_store_ps(outputVectorPtr, ret1);
266 outputVectorPtr += 4;
268 _mm_store_ps(outputVectorPtr, ret2);
269 outputVectorPtr += 4;
272 number = oneEightPoints * 8;
273 for (; number < num_points; number++) {
274 outputVector[number] = (float)(inputVector[number]);
281#include <emmintrin.h>
284 const double* inputVector,
285 unsigned int num_points)
287 unsigned int number = 0;
289 const unsigned int quarterPoints = num_points / 4;
291 const double* inputVectorPtr = (
const double*)inputVector;
292 float* outputVectorPtr = outputVector;
294 __m128d inputVal1, inputVal2;
296 for (; number < quarterPoints; number++) {
297 inputVal1 = _mm_load_pd(inputVectorPtr);
299 inputVal2 = _mm_load_pd(inputVectorPtr);
302 ret = _mm_cvtpd_ps(inputVal1);
303 ret2 = _mm_cvtpd_ps(inputVal2);
305 ret = _mm_movelh_ps(ret, ret2);
307 _mm_store_ps(outputVectorPtr, ret);
308 outputVectorPtr += 4;
311 number = quarterPoints * 4;
312 for (; number < num_points; number++) {
313 outputVector[number] = (float)(inputVector[number]);
319#include <riscv_vector.h>
321static inline void volk_64f_convert_32f_rvv(
float* outputVector,
322 const double* inputVector,
323 unsigned int num_points)
325 size_t n = num_points;
326 for (
size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
327 vl = __riscv_vsetvl_e64m8(n);
328 vfloat64m8_t v = __riscv_vle64_v_f64m8(inputVector, vl);
329 __riscv_vse32(outputVector, __riscv_vfncvt_f(v, vl), vl);
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition volk_64f_convert_32f.h:100
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition volk_64f_convert_32f.h:178
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition volk_64f_convert_32f.h:243
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition volk_64f_convert_32f.h:140
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition volk_64f_convert_32f.h:283