52#ifndef INCLUDED_volk_32f_sqrt_32f_a_H
53#define INCLUDED_volk_32f_sqrt_32f_a_H
65 unsigned int number = 0;
66 const unsigned int quarterPoints = num_points / 4;
68 float* cPtr = cVector;
69 const float* aPtr = aVector;
72 for (; number < quarterPoints; number++) {
73 aVal = _mm_load_ps(aPtr);
75 cVal = _mm_sqrt_ps(aVal);
77 _mm_store_ps(cPtr, cVal);
83 number = quarterPoints * 4;
84 for (; number < num_points; number++) {
85 *cPtr++ = sqrtf(*aPtr++);
97 unsigned int number = 0;
98 const unsigned int eighthPoints = num_points / 8;
100 float* cPtr = cVector;
101 const float* aPtr = aVector;
104 for (; number < eighthPoints; number++) {
105 aVal = _mm256_load_ps(aPtr);
107 cVal = _mm256_sqrt_ps(aVal);
109 _mm256_store_ps(cPtr, cVal);
115 number = eighthPoints * 8;
116 for (; number < num_points; number++) {
117 *cPtr++ = sqrtf(*aPtr++);
130 float* cPtr = cVector;
131 const float* aPtr = aVector;
132 unsigned int number = 0;
133 unsigned int quarter_points = num_points / 4;
134 float32x4_t in_vec, out_vec;
136 for (number = 0; number < quarter_points; number++) {
137 in_vec = vld1q_f32(aPtr);
139 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec));
140 vst1q_f32(cPtr, out_vec);
145 for (number = quarter_points * 4; number < num_points; number++) {
146 *cPtr++ = sqrtf(*aPtr++);
153#ifdef LV_HAVE_GENERIC
158 float* cPtr = cVector;
159 const float* aPtr = aVector;
160 unsigned int number = 0;
162 for (number = 0; number < num_points; number++) {
163 *cPtr++ = sqrtf(*aPtr++);
171#ifndef INCLUDED_volk_32f_sqrt_32f_u_H
172#define INCLUDED_volk_32f_sqrt_32f_u_H
178#include <immintrin.h>
183 unsigned int number = 0;
184 const unsigned int eighthPoints = num_points / 8;
186 float* cPtr = cVector;
187 const float* aPtr = aVector;
190 for (; number < eighthPoints; number++) {
191 aVal = _mm256_loadu_ps(aPtr);
193 cVal = _mm256_sqrt_ps(aVal);
195 _mm256_storeu_ps(cPtr, cVal);
201 number = eighthPoints * 8;
202 for (; number < num_points; number++) {
203 *cPtr++ = sqrtf(*aPtr++);
210#include <riscv_vector.h>
213volk_32f_sqrt_32f_rvv(
float* cVector,
const float* aVector,
unsigned int num_points)
215 size_t n = num_points;
216 for (
size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
217 vl = __riscv_vsetvl_e32m8(n);
218 vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
219 __riscv_vse32(cVector, __riscv_vfsqrt(v, vl), vl);
static void volk_32f_sqrt_32f_neon(float *cVector, const float *aVector, unsigned int num_points)
Definition volk_32f_sqrt_32f.h:128
static void volk_32f_sqrt_32f_a_avx(float *cVector, const float *aVector, unsigned int num_points)
Definition volk_32f_sqrt_32f.h:95
static void volk_32f_sqrt_32f_a_sse(float *cVector, const float *aVector, unsigned int num_points)
Definition volk_32f_sqrt_32f.h:63
static void volk_32f_sqrt_32f_u_avx(float *cVector, const float *aVector, unsigned int num_points)
Definition volk_32f_sqrt_32f.h:181
static void volk_32f_sqrt_32f_generic(float *cVector, const float *aVector, unsigned int num_points)
Definition volk_32f_sqrt_32f.h:156