Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32f_x2_min_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
57
58#ifndef INCLUDED_volk_32f_x2_min_32f_a_H
59#define INCLUDED_volk_32f_x2_min_32f_a_H
60
61#include <inttypes.h>
62#include <stdio.h>
63
64#ifdef LV_HAVE_SSE
65#include <xmmintrin.h>
66
67static inline void volk_32f_x2_min_32f_a_sse(float* cVector,
68 const float* aVector,
69 const float* bVector,
70 unsigned int num_points)
71{
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
74
75 float* cPtr = cVector;
76 const float* aPtr = aVector;
77 const float* bPtr = bVector;
78
79 __m128 aVal, bVal, cVal;
80 for (; number < quarterPoints; number++) {
81 aVal = _mm_load_ps(aPtr);
82 bVal = _mm_load_ps(bPtr);
83
84 cVal = _mm_min_ps(aVal, bVal);
85
86 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
87
88 aPtr += 4;
89 bPtr += 4;
90 cPtr += 4;
91 }
92
93 number = quarterPoints * 4;
94 for (; number < num_points; number++) {
95 const float a = *aPtr++;
96 const float b = *bPtr++;
97 *cPtr++ = (a < b ? a : b);
98 }
99}
100#endif /* LV_HAVE_SSE */
101
102
103#ifdef LV_HAVE_NEON
104#include <arm_neon.h>
105
106static inline void volk_32f_x2_min_32f_neon(float* cVector,
107 const float* aVector,
108 const float* bVector,
109 unsigned int num_points)
110{
111 float* cPtr = cVector;
112 const float* aPtr = aVector;
113 const float* bPtr = bVector;
114 unsigned int number = 0;
115 unsigned int quarter_points = num_points / 4;
116
117 float32x4_t a_vec, b_vec, c_vec;
118 for (number = 0; number < quarter_points; number++) {
119 a_vec = vld1q_f32(aPtr);
120 b_vec = vld1q_f32(bPtr);
121
122 c_vec = vminq_f32(a_vec, b_vec);
123
124 vst1q_f32(cPtr, c_vec);
125 aPtr += 4;
126 bPtr += 4;
127 cPtr += 4;
128 }
129
130 for (number = quarter_points * 4; number < num_points; number++) {
131 const float a = *aPtr++;
132 const float b = *bPtr++;
133 *cPtr++ = (a < b ? a : b);
134 }
135}
136#endif /* LV_HAVE_NEON */
137
138
139#ifdef LV_HAVE_GENERIC
140
141static inline void volk_32f_x2_min_32f_generic(float* cVector,
142 const float* aVector,
143 const float* bVector,
144 unsigned int num_points)
145{
146 float* cPtr = cVector;
147 const float* aPtr = aVector;
148 const float* bPtr = bVector;
149 unsigned int number = 0;
150
151 for (number = 0; number < num_points; number++) {
152 const float a = *aPtr++;
153 const float b = *bPtr++;
154 *cPtr++ = (a < b ? a : b);
155 }
156}
157#endif /* LV_HAVE_GENERIC */
158
159
160#ifdef LV_HAVE_ORC
161
162extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector,
163 const float* aVector,
164 const float* bVector,
165 int num_points);
166
167static inline void volk_32f_x2_min_32f_u_orc(float* cVector,
168 const float* aVector,
169 const float* bVector,
170 unsigned int num_points)
171{
172 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
173}
174#endif /* LV_HAVE_ORC */
175
176#ifdef LV_HAVE_AVX
177#include <immintrin.h>
178
179static inline void volk_32f_x2_min_32f_a_avx(float* cVector,
180 const float* aVector,
181 const float* bVector,
182 unsigned int num_points)
183{
184 unsigned int number = 0;
185 const unsigned int eighthPoints = num_points / 8;
186
187 float* cPtr = cVector;
188 const float* aPtr = aVector;
189 const float* bPtr = bVector;
190
191 __m256 aVal, bVal, cVal;
192 for (; number < eighthPoints; number++) {
193 aVal = _mm256_load_ps(aPtr);
194 bVal = _mm256_load_ps(bPtr);
195
196 cVal = _mm256_min_ps(aVal, bVal);
197
198 _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
199
200 aPtr += 8;
201 bPtr += 8;
202 cPtr += 8;
203 }
204
205 number = eighthPoints * 8;
206 for (; number < num_points; number++) {
207 const float a = *aPtr++;
208 const float b = *bPtr++;
209 *cPtr++ = (a < b ? a : b);
210 }
211}
212#endif /* LV_HAVE_AVX */
213
214#ifdef LV_HAVE_AVX512F
215#include <immintrin.h>
216
217static inline void volk_32f_x2_min_32f_a_avx512f(float* cVector,
218 const float* aVector,
219 const float* bVector,
220 unsigned int num_points)
221{
222 unsigned int number = 0;
223 const unsigned int sixteenthPoints = num_points / 16;
224
225 float* cPtr = cVector;
226 const float* aPtr = aVector;
227 const float* bPtr = bVector;
228
229 __m512 aVal, bVal, cVal;
230 for (; number < sixteenthPoints; number++) {
231 aVal = _mm512_load_ps(aPtr);
232 bVal = _mm512_load_ps(bPtr);
233
234 cVal = _mm512_min_ps(aVal, bVal);
235
236 _mm512_store_ps(cPtr, cVal); // Store the results back into the C container
237
238 aPtr += 16;
239 bPtr += 16;
240 cPtr += 16;
241 }
242
243 number = sixteenthPoints * 16;
244 for (; number < num_points; number++) {
245 const float a = *aPtr++;
246 const float b = *bPtr++;
247 *cPtr++ = (a < b ? a : b);
248 }
249}
250#endif /* LV_HAVE_AVX512F */
251
252#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */
253
254
255#ifndef INCLUDED_volk_32f_x2_min_32f_u_H
256#define INCLUDED_volk_32f_x2_min_32f_u_H
257
258#include <inttypes.h>
259#include <stdio.h>
260
261#ifdef LV_HAVE_AVX512F
262#include <immintrin.h>
263
264static inline void volk_32f_x2_min_32f_u_avx512f(float* cVector,
265 const float* aVector,
266 const float* bVector,
267 unsigned int num_points)
268{
269 unsigned int number = 0;
270 const unsigned int sixteenthPoints = num_points / 16;
271
272 float* cPtr = cVector;
273 const float* aPtr = aVector;
274 const float* bPtr = bVector;
275
276 __m512 aVal, bVal, cVal;
277 for (; number < sixteenthPoints; number++) {
278 aVal = _mm512_loadu_ps(aPtr);
279 bVal = _mm512_loadu_ps(bPtr);
280
281 cVal = _mm512_min_ps(aVal, bVal);
282
283 _mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
284
285 aPtr += 16;
286 bPtr += 16;
287 cPtr += 16;
288 }
289
290 number = sixteenthPoints * 16;
291 for (; number < num_points; number++) {
292 const float a = *aPtr++;
293 const float b = *bPtr++;
294 *cPtr++ = (a < b ? a : b);
295 }
296}
297#endif /* LV_HAVE_AVX512F */
298
299#ifdef LV_HAVE_AVX
300#include <immintrin.h>
301
302static inline void volk_32f_x2_min_32f_u_avx(float* cVector,
303 const float* aVector,
304 const float* bVector,
305 unsigned int num_points)
306{
307 unsigned int number = 0;
308 const unsigned int eighthPoints = num_points / 8;
309
310 float* cPtr = cVector;
311 const float* aPtr = aVector;
312 const float* bPtr = bVector;
313
314 __m256 aVal, bVal, cVal;
315 for (; number < eighthPoints; number++) {
316 aVal = _mm256_loadu_ps(aPtr);
317 bVal = _mm256_loadu_ps(bPtr);
318
319 cVal = _mm256_min_ps(aVal, bVal);
320
321 _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
322
323 aPtr += 8;
324 bPtr += 8;
325 cPtr += 8;
326 }
327
328 number = eighthPoints * 8;
329 for (; number < num_points; number++) {
330 const float a = *aPtr++;
331 const float b = *bPtr++;
332 *cPtr++ = (a < b ? a : b);
333 }
334}
335#endif /* LV_HAVE_AVX */
336
337#ifdef LV_HAVE_RVV
338#include <riscv_vector.h>
339
340static inline void volk_32f_x2_min_32f_rvv(float* cVector,
341 const float* aVector,
342 const float* bVector,
343 unsigned int num_points)
344{
345 size_t n = num_points;
346 for (size_t vl; n > 0; n -= vl, aVector += vl, bVector += vl, cVector += vl) {
347 vl = __riscv_vsetvl_e32m8(n);
348 vfloat32m8_t va = __riscv_vle32_v_f32m8(aVector, vl);
349 vfloat32m8_t vb = __riscv_vle32_v_f32m8(bVector, vl);
350 __riscv_vse32(cVector, __riscv_vfmin(va, vb, vl), vl);
351 }
352}
353#endif /*LV_HAVE_RVV*/
354
355#endif /* INCLUDED_volk_32f_x2_min_32f_u_H */
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:67
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:106
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:141
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:302
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition volk_32f_x2_min_32f.h:179