58#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
59#define INCLUDED_volk_32f_x2_subtract_32f_a_H
70 unsigned int num_points)
72 for (
unsigned int number = 0; number < num_points; number++) {
73 *cVector++ = (*aVector++) - (*bVector++);
82static inline void volk_32f_x2_subtract_32f_a_avx512f(
float* cVector,
85 unsigned int num_points)
87 const unsigned int sixteenthPoints = num_points / 16;
89 for (
unsigned int number = 0; number < sixteenthPoints; number++) {
90 __m512 aVal = _mm512_load_ps(aVector);
91 __m512 bVal = _mm512_load_ps(bVector);
93 __m512 cVal = _mm512_sub_ps(aVal, bVal);
95 _mm512_store_ps(cVector, cVal);
103 cVector, aVector, bVector, num_points - sixteenthPoints * 16);
108#include <immintrin.h>
111 const float* aVector,
112 const float* bVector,
113 unsigned int num_points)
115 const unsigned int eighthPoints = num_points / 8;
117 for (
unsigned int number = 0; number < eighthPoints; number++) {
118 __m256 aVal = _mm256_load_ps(aVector);
119 __m256 bVal = _mm256_load_ps(bVector);
121 __m256 cVal = _mm256_sub_ps(aVal, bVal);
123 _mm256_store_ps(cVector, cVal);
131 cVector, aVector, bVector, num_points - eighthPoints * 8);
136#include <xmmintrin.h>
139 const float* aVector,
140 const float* bVector,
141 unsigned int num_points)
143 const unsigned int quarterPoints = num_points / 4;
145 for (
unsigned int number = 0; number < quarterPoints; number++) {
146 __m128 aVal = _mm_load_ps(aVector);
147 __m128 bVal = _mm_load_ps(bVector);
149 __m128 cVal = _mm_sub_ps(aVal, bVal);
151 _mm_store_ps(cVector, cVal);
159 cVector, aVector, bVector, num_points - quarterPoints * 4);
168 const float* aVector,
169 const float* bVector,
170 unsigned int num_points)
172 const unsigned int quarterPoints = num_points / 4;
174 for (
unsigned int number = 0; number < quarterPoints; number++) {
175 float32x4_t a_vec = vld1q_f32(aVector);
176 float32x4_t b_vec = vld1q_f32(bVector);
178 float32x4_t c_vec = vsubq_f32(a_vec, b_vec);
180 vst1q_f32(cVector, c_vec);
188 cVector, aVector, bVector, num_points - quarterPoints * 4);
194extern void volk_32f_x2_subtract_32f_a_orc_impl(
float* cVector,
195 const float* aVector,
196 const float* bVector,
199static inline void volk_32f_x2_subtract_32f_u_orc(
float* cVector,
200 const float* aVector,
201 const float* bVector,
202 unsigned int num_points)
204 volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);
212#ifndef INCLUDED_volk_32f_x2_subtract_32f_u_H
213#define INCLUDED_volk_32f_x2_subtract_32f_u_H
218#ifdef LV_HAVE_AVX512F
219#include <immintrin.h>
221static inline void volk_32f_x2_subtract_32f_u_avx512f(
float* cVector,
222 const float* aVector,
223 const float* bVector,
224 unsigned int num_points)
226 const unsigned int sixteenthPoints = num_points / 16;
228 for (
unsigned int number = 0; number < sixteenthPoints; number++) {
229 __m512 aVal = _mm512_loadu_ps(aVector);
230 __m512 bVal = _mm512_loadu_ps(bVector);
232 __m512 cVal = _mm512_sub_ps(aVal, bVal);
234 _mm512_storeu_ps(cVector, cVal);
242 cVector, aVector, bVector, num_points - sixteenthPoints * 16);
248#include <immintrin.h>
251 const float* aVector,
252 const float* bVector,
253 unsigned int num_points)
255 const unsigned int eighthPoints = num_points / 8;
257 for (
unsigned int number = 0; number < eighthPoints; number++) {
258 __m256 aVal = _mm256_loadu_ps(aVector);
259 __m256 bVal = _mm256_loadu_ps(bVector);
261 __m256 cVal = _mm256_sub_ps(aVal, bVal);
263 _mm256_storeu_ps(cVector, cVal);
271 cVector, aVector, bVector, num_points - eighthPoints * 8);
276#include <riscv_vector.h>
278static inline void volk_32f_x2_subtract_32f_rvv(
float* cVector,
279 const float* aVector,
280 const float* bVector,
281 unsigned int num_points)
283 size_t n = num_points;
284 for (
size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
285 vl = __riscv_vsetvl_e32m8(n);
286 vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
287 vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
288 __riscv_vse32(cVector, __riscv_vfsub(va, vb, vl), vl);
static void volk_32f_x2_subtract_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_subtract_32f.h:167
static void volk_32f_x2_subtract_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_subtract_32f.h:110
static void volk_32f_x2_subtract_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_subtract_32f.h:250
static void volk_32f_x2_subtract_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_subtract_32f.h:138
static void volk_32f_x2_subtract_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_subtract_32f.h:67