53#ifndef INCLUDED_volk_32f_reciprocal_32f_a_H
54#define INCLUDED_volk_32f_reciprocal_32f_a_H
60 for (
unsigned int i = 0;
i < num_points;
i++) {
71 const __m128 ONE = _mm_set_ps1(1.f);
72 const unsigned int quarter_points = num_points / 4;
74 for (
unsigned int number = 0; number < quarter_points; number++) {
75 __m128 x = _mm_load_ps(in);
77 __m128 r = _mm_div_ps(ONE, x);
82 const unsigned int done = quarter_points * 4;
93 const __m256 ONE = _mm256_set1_ps(1.f);
94 const unsigned int eighth_points = num_points / 8;
96 for (
unsigned int number = 0; number < eighth_points; number++) {
97 __m256 x = _mm256_load_ps(in);
99 __m256 r = _mm256_div_ps(ONE, x);
100 _mm256_store_ps(out, r);
104 const unsigned int done = eighth_points * 8;
110#ifdef LV_HAVE_AVX512F
111#include <immintrin.h>
113volk_32f_reciprocal_32f_a_avx512(
float* out,
const float* in,
unsigned int num_points)
115 const unsigned int sixteenth_points = num_points / 16;
117 for (
unsigned int number = 0; number < sixteenth_points; number++) {
118 __m512 x = _mm512_load_ps(in);
120 __m512 r = _mm512_rcp14_ps(x);
121 _mm512_store_ps(out, r);
125 const unsigned int done = sixteenth_points * 16;
133#ifndef INCLUDED_volk_32f_reciprocal_32f_u_H
134#define INCLUDED_volk_32f_reciprocal_32f_u_H
137#include <xmmintrin.h>
141 const __m128 ONE = _mm_set_ps1(1.f);
142 const unsigned int quarter_points = num_points / 4;
144 for (
unsigned int number = 0; number < quarter_points; number++) {
145 __m128 x = _mm_loadu_ps(in);
147 __m128 r = _mm_div_ps(ONE, x);
148 _mm_storeu_ps(out, r);
152 const unsigned int done = quarter_points * 4;
159#include <immintrin.h>
163 const __m256 ONE = _mm256_set1_ps(1.f);
164 const unsigned int eighth_points = num_points / 8;
166 for (
unsigned int number = 0; number < eighth_points; number++) {
167 __m256 x = _mm256_loadu_ps(in);
169 __m256 r = _mm256_div_ps(ONE, x);
170 _mm256_storeu_ps(out, r);
174 const unsigned int done = eighth_points * 8;
180#ifdef LV_HAVE_AVX512F
181#include <immintrin.h>
183volk_32f_reciprocal_32f_u_avx512(
float* out,
const float* in,
unsigned int num_points)
185 const unsigned int sixteenth_points = num_points / 16;
187 for (
unsigned int number = 0; number < sixteenth_points; number++) {
188 __m512 x = _mm512_loadu_ps(in);
190 __m512 r = _mm512_rcp14_ps(x);
191 _mm512_storeu_ps(out, r);
195 const unsigned int done = sixteenth_points * 16;
202#include <riscv_vector.h>
205volk_32f_reciprocal_32f_rvv(
float* out,
const float* in,
unsigned int num_points)
207 size_t n = num_points;
208 for (
size_t vl; n > 0; n -= vl, in += vl, out += vl) {
209 vl = __riscv_vsetvl_e32m8(n);
210 vfloat32m8_t v = __riscv_vle32_v_f32m8(in, vl);
211 __riscv_vse32(out, __riscv_vfrdiv(v, 1.0f, vl), vl);
static void volk_32f_reciprocal_32f_a_sse(float *out, const float *in, unsigned int num_points)
Definition volk_32f_reciprocal_32f.h:69
static void volk_32f_reciprocal_32f_u_avx(float *out, const float *in, unsigned int num_points)
Definition volk_32f_reciprocal_32f.h:161
static void volk_32f_reciprocal_32f_a_avx(float *out, const float *in, unsigned int num_points)
Definition volk_32f_reciprocal_32f.h:91
static void volk_32f_reciprocal_32f_generic(float *out, const float *in, unsigned int num_points)
Definition volk_32f_reciprocal_32f.h:58
static void volk_32f_reciprocal_32f_u_sse(float *out, const float *in, unsigned int num_points)
Definition volk_32f_reciprocal_32f.h:139
for i
Definition volk_config_fixed.tmpl.h:13