Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32f_s32f_calc_spectral_noise_floor_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
47
48#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
49#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
50
51#include <inttypes.h>
52#include <stdio.h>
53#include <volk/volk_common.h>
54
56
57#ifdef LV_HAVE_AVX
58#include <immintrin.h>
59
60static inline void
62 const float* realDataPoints,
63 const float spectralExclusionValue,
64 const unsigned int num_points)
65{
66 unsigned int number = 0;
67 const unsigned int eighthPoints = num_points / 8;
68
69 const float* dataPointsPtr = realDataPoints;
70 __VOLK_ATTR_ALIGNED(32) float avgPointsVector[8];
71
72 __m256 dataPointsVal;
73 __m256 avgPointsVal = _mm256_setzero_ps();
74 // Calculate the sum (for mean) for all points
75 for (; number < eighthPoints; number++) {
76
77 dataPointsVal = _mm256_load_ps(dataPointsPtr);
78
79 dataPointsPtr += 8;
80
81 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
82 }
83
84 _mm256_store_ps(avgPointsVector, avgPointsVal);
85
86 float sumMean = 0.0;
87 sumMean += avgPointsVector[0];
88 sumMean += avgPointsVector[1];
89 sumMean += avgPointsVector[2];
90 sumMean += avgPointsVector[3];
91 sumMean += avgPointsVector[4];
92 sumMean += avgPointsVector[5];
93 sumMean += avgPointsVector[6];
94 sumMean += avgPointsVector[7];
95
96 number = eighthPoints * 8;
97 for (; number < num_points; number++) {
98 sumMean += realDataPoints[number];
99 }
100
101 // calculate the spectral mean
102 // +20 because for the comparison below we only want to throw out bins
103 // that are significantly higher (and would, thus, affect the mean more
104 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
105
106 dataPointsPtr = realDataPoints; // Reset the dataPointsPtr
107 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
108 __m256 vOnesVector = _mm256_set1_ps(1.0);
109 __m256 vValidBinCount = _mm256_setzero_ps();
110 avgPointsVal = _mm256_setzero_ps();
111 __m256 compareMask;
112 number = 0;
113 // Calculate the sum (for mean) for any points which do NOT exceed the mean amplitude
114 for (; number < eighthPoints; number++) {
115
116 dataPointsVal = _mm256_load_ps(dataPointsPtr);
117
118 dataPointsPtr += 8;
119
120 // Identify which items do not exceed the mean amplitude
121 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
122
123 // Mask off the items that exceed the mean amplitude and add the avg Points that
124 // do not exceed the mean amplitude
125 avgPointsVal =
126 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
127
128 // Count the number of bins which do not exceed the mean amplitude
129 vValidBinCount =
130 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
131 }
132
133 // Calculate the mean from the remaining data points
134 _mm256_store_ps(avgPointsVector, avgPointsVal);
135
136 sumMean = 0.0;
137 sumMean += avgPointsVector[0];
138 sumMean += avgPointsVector[1];
139 sumMean += avgPointsVector[2];
140 sumMean += avgPointsVector[3];
141 sumMean += avgPointsVector[4];
142 sumMean += avgPointsVector[5];
143 sumMean += avgPointsVector[6];
144 sumMean += avgPointsVector[7];
145
146 // Calculate the number of valid bins from the remaining count
147 __VOLK_ATTR_ALIGNED(32) float validBinCountVector[8];
148 _mm256_store_ps(validBinCountVector, vValidBinCount);
149
150 float validBinCount = 0;
151 validBinCount += validBinCountVector[0];
152 validBinCount += validBinCountVector[1];
153 validBinCount += validBinCountVector[2];
154 validBinCount += validBinCountVector[3];
155 validBinCount += validBinCountVector[4];
156 validBinCount += validBinCountVector[5];
157 validBinCount += validBinCountVector[6];
158 validBinCount += validBinCountVector[7];
159
160 number = eighthPoints * 8;
161 for (; number < num_points; number++) {
162 if (realDataPoints[number] <= meanAmplitude) {
163 sumMean += realDataPoints[number];
164 validBinCount += 1.0;
165 }
166 }
167
168 float localNoiseFloorAmplitude = 0;
169 if (validBinCount > 0.0) {
170 localNoiseFloorAmplitude = sumMean / validBinCount;
171 } else {
172 localNoiseFloorAmplitude =
173 meanAmplitude; // For the odd case that all the amplitudes are equal...
174 }
175
176 *noiseFloorAmplitude = localNoiseFloorAmplitude;
177}
178#endif /* LV_HAVE_AVX */
179
180#ifdef LV_HAVE_SSE
181#include <xmmintrin.h>
182
183static inline void
185 const float* realDataPoints,
186 const float spectralExclusionValue,
187 const unsigned int num_points)
188{
189 unsigned int number = 0;
190 const unsigned int quarterPoints = num_points / 4;
191
192 const float* dataPointsPtr = realDataPoints;
193 __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4];
194
195 __m128 dataPointsVal;
196 __m128 avgPointsVal = _mm_setzero_ps();
197 // Calculate the sum (for mean) for all points
198 for (; number < quarterPoints; number++) {
199
200 dataPointsVal = _mm_load_ps(dataPointsPtr);
201
202 dataPointsPtr += 4;
203
204 avgPointsVal = _mm_add_ps(avgPointsVal, dataPointsVal);
205 }
206
207 _mm_store_ps(avgPointsVector, avgPointsVal);
208
209 float sumMean = 0.0;
210 sumMean += avgPointsVector[0];
211 sumMean += avgPointsVector[1];
212 sumMean += avgPointsVector[2];
213 sumMean += avgPointsVector[3];
214
215 number = quarterPoints * 4;
216 for (; number < num_points; number++) {
217 sumMean += realDataPoints[number];
218 }
219
220 // calculate the spectral mean
221 // +20 because for the comparison below we only want to throw out bins
222 // that are significantly higher (and would, thus, affect the mean more
223 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
224
225 dataPointsPtr = realDataPoints; // Reset the dataPointsPtr
226 __m128 vMeanAmplitudeVector = _mm_set_ps1(meanAmplitude);
227 __m128 vOnesVector = _mm_set_ps1(1.0);
228 __m128 vValidBinCount = _mm_setzero_ps();
229 avgPointsVal = _mm_setzero_ps();
230 __m128 compareMask;
231 number = 0;
232 // Calculate the sum (for mean) for any points which do NOT exceed the mean amplitude
233 for (; number < quarterPoints; number++) {
234
235 dataPointsVal = _mm_load_ps(dataPointsPtr);
236
237 dataPointsPtr += 4;
238
239 // Identify which items do not exceed the mean amplitude
240 compareMask = _mm_cmple_ps(dataPointsVal, vMeanAmplitudeVector);
241
242 // Mask off the items that exceed the mean amplitude and add the avg Points that
243 // do not exceed the mean amplitude
244 avgPointsVal = _mm_add_ps(avgPointsVal, _mm_and_ps(compareMask, dataPointsVal));
245
246 // Count the number of bins which do not exceed the mean amplitude
247 vValidBinCount = _mm_add_ps(vValidBinCount, _mm_and_ps(compareMask, vOnesVector));
248 }
249
250 // Calculate the mean from the remaining data points
251 _mm_store_ps(avgPointsVector, avgPointsVal);
252
253 sumMean = 0.0;
254 sumMean += avgPointsVector[0];
255 sumMean += avgPointsVector[1];
256 sumMean += avgPointsVector[2];
257 sumMean += avgPointsVector[3];
258
259 // Calculate the number of valid bins from the remaining count
260 __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4];
261 _mm_store_ps(validBinCountVector, vValidBinCount);
262
263 float validBinCount = 0;
264 validBinCount += validBinCountVector[0];
265 validBinCount += validBinCountVector[1];
266 validBinCount += validBinCountVector[2];
267 validBinCount += validBinCountVector[3];
268
269 number = quarterPoints * 4;
270 for (; number < num_points; number++) {
271 if (realDataPoints[number] <= meanAmplitude) {
272 sumMean += realDataPoints[number];
273 validBinCount += 1.0;
274 }
275 }
276
277 float localNoiseFloorAmplitude = 0;
278 if (validBinCount > 0.0) {
279 localNoiseFloorAmplitude = sumMean / validBinCount;
280 } else {
281 localNoiseFloorAmplitude =
282 meanAmplitude; // For the odd case that all the amplitudes are equal...
283 }
284
285 *noiseFloorAmplitude = localNoiseFloorAmplitude;
286}
287#endif /* LV_HAVE_SSE */
288
289
290#ifdef LV_HAVE_GENERIC
291
292static inline void
294 const float* realDataPoints,
295 const float spectralExclusionValue,
296 const unsigned int num_points)
297{
298 float sumMean = 0.0;
299 unsigned int number;
300 // find the sum (for mean), etc
301 for (number = 0; number < num_points; number++) {
302 // sum (for mean)
303 sumMean += realDataPoints[number];
304 }
305
306 // calculate the spectral mean
307 // +20 because for the comparison below we only want to throw out bins
308 // that are significantly higher (and would, thus, affect the mean more)
309 const float meanAmplitude = (sumMean / num_points) + spectralExclusionValue;
310
311 // now throw out any bins higher than the mean
312 sumMean = 0.0;
313 unsigned int newNumDataPoints = num_points;
314 for (number = 0; number < num_points; number++) {
315 if (realDataPoints[number] <= meanAmplitude)
316 sumMean += realDataPoints[number];
317 else
318 newNumDataPoints--;
319 }
320
321 float localNoiseFloorAmplitude = 0.0;
322 if (newNumDataPoints == 0) // in the odd case that all
323 localNoiseFloorAmplitude = meanAmplitude; // amplitudes are equal!
324 else
325 localNoiseFloorAmplitude = sumMean / ((float)newNumDataPoints);
326
327 *noiseFloorAmplitude = localNoiseFloorAmplitude;
328}
329#endif /* LV_HAVE_GENERIC */
330
331
332#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H */
333
334#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
335#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
336
337#include <inttypes.h>
338#include <stdio.h>
339#include <volk/volk_common.h>
340
341#ifdef LV_HAVE_AVX
342#include <immintrin.h>
343
344static inline void
346 const float* realDataPoints,
347 const float spectralExclusionValue,
348 const unsigned int num_points)
349{
350 unsigned int number = 0;
351 const unsigned int eighthPoints = num_points / 8;
352
353 const float* dataPointsPtr = realDataPoints;
354 __VOLK_ATTR_ALIGNED(16) float avgPointsVector[8];
355
356 __m256 dataPointsVal;
357 __m256 avgPointsVal = _mm256_setzero_ps();
358 // Calculate the sum (for mean) for all points
359 for (; number < eighthPoints; number++) {
360
361 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
362
363 dataPointsPtr += 8;
364
365 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
366 }
367
368 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
369
370 float sumMean = 0.0;
371 sumMean += avgPointsVector[0];
372 sumMean += avgPointsVector[1];
373 sumMean += avgPointsVector[2];
374 sumMean += avgPointsVector[3];
375 sumMean += avgPointsVector[4];
376 sumMean += avgPointsVector[5];
377 sumMean += avgPointsVector[6];
378 sumMean += avgPointsVector[7];
379
380 number = eighthPoints * 8;
381 for (; number < num_points; number++) {
382 sumMean += realDataPoints[number];
383 }
384
385 // calculate the spectral mean
386 // +20 because for the comparison below we only want to throw out bins
387 // that are significantly higher (and would, thus, affect the mean more
388 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
389
390 dataPointsPtr = realDataPoints; // Reset the dataPointsPtr
391 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
392 __m256 vOnesVector = _mm256_set1_ps(1.0);
393 __m256 vValidBinCount = _mm256_setzero_ps();
394 avgPointsVal = _mm256_setzero_ps();
395 __m256 compareMask;
396 number = 0;
397 // Calculate the sum (for mean) for any points which do NOT exceed the mean amplitude
398 for (; number < eighthPoints; number++) {
399
400 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
401
402 dataPointsPtr += 8;
403
404 // Identify which items do not exceed the mean amplitude
405 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
406
407 // Mask off the items that exceed the mean amplitude and add the avg Points that
408 // do not exceed the mean amplitude
409 avgPointsVal =
410 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
411
412 // Count the number of bins which do not exceed the mean amplitude
413 vValidBinCount =
414 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
415 }
416
417 // Calculate the mean from the remaining data points
418 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
419
420 sumMean = 0.0;
421 sumMean += avgPointsVector[0];
422 sumMean += avgPointsVector[1];
423 sumMean += avgPointsVector[2];
424 sumMean += avgPointsVector[3];
425 sumMean += avgPointsVector[4];
426 sumMean += avgPointsVector[5];
427 sumMean += avgPointsVector[6];
428 sumMean += avgPointsVector[7];
429
430 // Calculate the number of valid bins from the remaining count
431 __VOLK_ATTR_ALIGNED(16) float validBinCountVector[8];
432 _mm256_storeu_ps(validBinCountVector, vValidBinCount);
433
434 float validBinCount = 0;
435 validBinCount += validBinCountVector[0];
436 validBinCount += validBinCountVector[1];
437 validBinCount += validBinCountVector[2];
438 validBinCount += validBinCountVector[3];
439 validBinCount += validBinCountVector[4];
440 validBinCount += validBinCountVector[5];
441 validBinCount += validBinCountVector[6];
442 validBinCount += validBinCountVector[7];
443
444 number = eighthPoints * 8;
445 for (; number < num_points; number++) {
446 if (realDataPoints[number] <= meanAmplitude) {
447 sumMean += realDataPoints[number];
448 validBinCount += 1.0;
449 }
450 }
451
452 float localNoiseFloorAmplitude = 0;
453 if (validBinCount > 0.0) {
454 localNoiseFloorAmplitude = sumMean / validBinCount;
455 } else {
456 localNoiseFloorAmplitude =
457 meanAmplitude; // For the odd case that all the amplitudes are equal...
458 }
459
460 *noiseFloorAmplitude = localNoiseFloorAmplitude;
461}
462#endif /* LV_HAVE_AVX */
463
464#ifdef LV_HAVE_RVV
465#include <riscv_vector.h>
466
467static inline void
468volk_32f_s32f_calc_spectral_noise_floor_32f_rvv(float* noiseFloorAmplitude,
469 const float* realDataPoints,
470 const float spectralExclusionValue,
471 const unsigned int num_points)
472{
473 float sum;
474 volk_32f_accumulator_s32f_rvv(&sum, realDataPoints, num_points);
475 float meanAmplitude = sum / num_points + spectralExclusionValue;
476
477 vfloat32m8_t vbin = __riscv_vfmv_v_f_f32m8(meanAmplitude, __riscv_vsetvlmax_e32m8());
478 vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
479 size_t n = num_points, binCount = 0;
480 for (size_t vl; n > 0; n -= vl, realDataPoints += vl) {
481 vl = __riscv_vsetvl_e32m8(n);
482 vfloat32m8_t v = __riscv_vle32_v_f32m8(realDataPoints, vl);
483 vbool4_t m = __riscv_vmfle(v, vbin, vl);
484 binCount += __riscv_vcpop(m, vl);
485 vsum = __riscv_vfadd_tumu(m, vsum, vsum, v, vl);
486 }
487 size_t vl = __riscv_vsetvlmax_e32m1();
488 vfloat32m1_t v = RISCV_SHRINK8(vfadd, f, 32, vsum);
489 vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
490 sum = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
491
492 *noiseFloorAmplitude = binCount == 0 ? meanAmplitude : sum / binCount;
493}
494#endif /*LV_HAVE_RVV*/
495
496#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H */
static void volk_32f_s32f_calc_spectral_noise_floor_32f_u_avx(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition volk_32f_s32f_calc_spectral_noise_floor_32f.h:345
static void volk_32f_s32f_calc_spectral_noise_floor_32f_generic(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition volk_32f_s32f_calc_spectral_noise_floor_32f.h:293
static void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition volk_32f_s32f_calc_spectral_noise_floor_32f.h:184
static void volk_32f_s32f_calc_spectral_noise_floor_32f_a_avx(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition volk_32f_s32f_calc_spectral_noise_floor_32f.h:61
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:62
#define RISCV_SHRINK8(op, T, S, v)
Definition volk_rvv_intrinsics.h:33