58#ifndef INCLUDED_volk_32f_x2_min_32f_a_H
59#define INCLUDED_volk_32f_x2_min_32f_a_H
70 unsigned int num_points)
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
75 float* cPtr = cVector;
76 const float* aPtr = aVector;
77 const float* bPtr = bVector;
79 __m128 aVal, bVal, cVal;
80 for (; number < quarterPoints; number++) {
81 aVal = _mm_load_ps(aPtr);
82 bVal = _mm_load_ps(bPtr);
84 cVal = _mm_min_ps(aVal, bVal);
86 _mm_store_ps(cPtr, cVal);
93 number = quarterPoints * 4;
94 for (; number < num_points; number++) {
95 const float a = *aPtr++;
96 const float b = *bPtr++;
97 *cPtr++ = (a < b ? a : b);
107 const float* aVector,
108 const float* bVector,
109 unsigned int num_points)
111 float* cPtr = cVector;
112 const float* aPtr = aVector;
113 const float* bPtr = bVector;
114 unsigned int number = 0;
115 unsigned int quarter_points = num_points / 4;
117 float32x4_t a_vec, b_vec, c_vec;
118 for (number = 0; number < quarter_points; number++) {
119 a_vec = vld1q_f32(aPtr);
120 b_vec = vld1q_f32(bPtr);
122 c_vec = vminq_f32(a_vec, b_vec);
124 vst1q_f32(cPtr, c_vec);
130 for (number = quarter_points * 4; number < num_points; number++) {
131 const float a = *aPtr++;
132 const float b = *bPtr++;
133 *cPtr++ = (a < b ? a : b);
139#ifdef LV_HAVE_GENERIC
142 const float* aVector,
143 const float* bVector,
144 unsigned int num_points)
146 float* cPtr = cVector;
147 const float* aPtr = aVector;
148 const float* bPtr = bVector;
149 unsigned int number = 0;
151 for (number = 0; number < num_points; number++) {
152 const float a = *aPtr++;
153 const float b = *bPtr++;
154 *cPtr++ = (a < b ? a : b);
162extern void volk_32f_x2_min_32f_a_orc_impl(
float* cVector,
163 const float* aVector,
164 const float* bVector,
167static inline void volk_32f_x2_min_32f_u_orc(
float* cVector,
168 const float* aVector,
169 const float* bVector,
170 unsigned int num_points)
172 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
177#include <immintrin.h>
180 const float* aVector,
181 const float* bVector,
182 unsigned int num_points)
184 unsigned int number = 0;
185 const unsigned int eighthPoints = num_points / 8;
187 float* cPtr = cVector;
188 const float* aPtr = aVector;
189 const float* bPtr = bVector;
191 __m256 aVal, bVal, cVal;
192 for (; number < eighthPoints; number++) {
193 aVal = _mm256_load_ps(aPtr);
194 bVal = _mm256_load_ps(bPtr);
196 cVal = _mm256_min_ps(aVal, bVal);
198 _mm256_store_ps(cPtr, cVal);
205 number = eighthPoints * 8;
206 for (; number < num_points; number++) {
207 const float a = *aPtr++;
208 const float b = *bPtr++;
209 *cPtr++ = (a < b ? a : b);
214#ifdef LV_HAVE_AVX512F
215#include <immintrin.h>
217static inline void volk_32f_x2_min_32f_a_avx512f(
float* cVector,
218 const float* aVector,
219 const float* bVector,
220 unsigned int num_points)
222 unsigned int number = 0;
223 const unsigned int sixteenthPoints = num_points / 16;
225 float* cPtr = cVector;
226 const float* aPtr = aVector;
227 const float* bPtr = bVector;
229 __m512 aVal, bVal, cVal;
230 for (; number < sixteenthPoints; number++) {
231 aVal = _mm512_load_ps(aPtr);
232 bVal = _mm512_load_ps(bPtr);
234 cVal = _mm512_min_ps(aVal, bVal);
236 _mm512_store_ps(cPtr, cVal);
243 number = sixteenthPoints * 16;
244 for (; number < num_points; number++) {
245 const float a = *aPtr++;
246 const float b = *bPtr++;
247 *cPtr++ = (a < b ? a : b);
255#ifndef INCLUDED_volk_32f_x2_min_32f_u_H
256#define INCLUDED_volk_32f_x2_min_32f_u_H
261#ifdef LV_HAVE_AVX512F
262#include <immintrin.h>
264static inline void volk_32f_x2_min_32f_u_avx512f(
float* cVector,
265 const float* aVector,
266 const float* bVector,
267 unsigned int num_points)
269 unsigned int number = 0;
270 const unsigned int sixteenthPoints = num_points / 16;
272 float* cPtr = cVector;
273 const float* aPtr = aVector;
274 const float* bPtr = bVector;
276 __m512 aVal, bVal, cVal;
277 for (; number < sixteenthPoints; number++) {
278 aVal = _mm512_loadu_ps(aPtr);
279 bVal = _mm512_loadu_ps(bPtr);
281 cVal = _mm512_min_ps(aVal, bVal);
283 _mm512_storeu_ps(cPtr, cVal);
290 number = sixteenthPoints * 16;
291 for (; number < num_points; number++) {
292 const float a = *aPtr++;
293 const float b = *bPtr++;
294 *cPtr++ = (a < b ? a : b);
300#include <immintrin.h>
303 const float* aVector,
304 const float* bVector,
305 unsigned int num_points)
307 unsigned int number = 0;
308 const unsigned int eighthPoints = num_points / 8;
310 float* cPtr = cVector;
311 const float* aPtr = aVector;
312 const float* bPtr = bVector;
314 __m256 aVal, bVal, cVal;
315 for (; number < eighthPoints; number++) {
316 aVal = _mm256_loadu_ps(aPtr);
317 bVal = _mm256_loadu_ps(bPtr);
319 cVal = _mm256_min_ps(aVal, bVal);
321 _mm256_storeu_ps(cPtr, cVal);
328 number = eighthPoints * 8;
329 for (; number < num_points; number++) {
330 const float a = *aPtr++;
331 const float b = *bPtr++;
332 *cPtr++ = (a < b ? a : b);
338#include <riscv_vector.h>
340static inline void volk_32f_x2_min_32f_rvv(
float* cVector,
341 const float* aVector,
342 const float* bVector,
343 unsigned int num_points)
345 size_t n = num_points;
346 for (
size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
347 vl = __riscv_vsetvl_e32m8(n);
348 vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
349 vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
350 __riscv_vse32(cVector, __riscv_vfmin(va, vb, vl), vl);
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:67
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:106
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:141
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:302
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:179