40#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
52 const float normalizationFactor,
53 unsigned int num_points)
56 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
80 volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
83 volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
87 volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88 volk_32f_s32f_multiply_32f(
100 const float normalizationFactor,
101 unsigned int num_points)
103 float* logPowerOutputPtr = logPowerOutput;
104 const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
105 const float iNormalizationFactor = 1.0 / normalizationFactor;
107 unsigned int quarter_points = num_points / 4;
108 float32x4x2_t fft_vec;
109 float32x4_t log_pwr_vec;
110 float32x4_t mag_squared_vec;
112 const float inv_ln10_10 = 4.34294481903f;
114 for (number = 0; number < quarter_points; number++) {
116 fft_vec = vld2q_f32((
float*)complexFFTInputPtr);
120 fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
121 fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
123 log_pwr_vec = vmulq_n_f32(
_vlogq_f32(mag_squared_vec), inv_ln10_10);
125 vst1q_f32(logPowerOutputPtr, log_pwr_vec);
127 complexFFTInputPtr += 4;
128 logPowerOutputPtr += 4;
132 for (number = quarter_points * 4; number < num_points; number++) {
133 const float real =
lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
134 const float imag =
lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
138 complexFFTInputPtr++;
147#include <riscv_vector.h>
149static inline void volk_32fc_s32f_power_spectrum_32f_rvv(
float* logPowerOutput,
151 const float normalizationFactor,
152 unsigned int num_points)
154 size_t vlmax = __riscv_vsetvlmax_e32m2();
156#if LOG_POLY_DEGREE == 6
157 const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
158 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
159 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
160 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
161 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
162 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
163#elif LOG_POLY_DEGREE == 5
164 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
165 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
166 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
167 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
168 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
169#elif LOG_POLY_DEGREE == 4
170 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
171 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
172 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
173 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
174#elif LOG_POLY_DEGREE == 3
175 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
176 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
177 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
182 const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
183 const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
184 const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
185 const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
187 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
189 size_t n = num_points;
190 for (
size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
191 vl = __riscv_vsetvl_e32m2(n);
192 vuint64m4_t vc = __riscv_vle64_v_u64m4((
const uint64_t*)complexFFTInput, vl);
193 vfloat32m2_t vr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 0, vl));
194 vfloat32m2_t vi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 32, vl));
195 vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
196 v = __riscv_vfmul(v, normFactSq, vl);
198 vfloat32m2_t a = __riscv_vfabs(v, vl);
199 vfloat32m2_t exp = __riscv_vfcvt_f(
200 __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
202 vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
203 __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
205 vfloat32m2_t mant = c0;
206 mant = __riscv_vfmadd(mant, frac, c1, vl);
207 mant = __riscv_vfmadd(mant, frac, c2, vl);
208#if LOG_POLY_DEGREE >= 4
209 mant = __riscv_vfmadd(mant, frac, c3, vl);
210#if LOG_POLY_DEGREE >= 5
211 mant = __riscv_vfmadd(mant, frac, c4, vl);
212#if LOG_POLY_DEGREE >= 6
213 mant = __riscv_vfmadd(mant, frac, c5, vl);
217 v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
220 __riscv_vse32(logPowerOutput, v, vl);
227#include <riscv_vector.h>
230volk_32fc_s32f_power_spectrum_32f_rvvseg(
float* logPowerOutput,
232 const float normalizationFactor,
233 unsigned int num_points)
235 size_t vlmax = __riscv_vsetvlmax_e32m2();
237#if LOG_POLY_DEGREE == 6
238 const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
239 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
240 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
241 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
242 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
243 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
244#elif LOG_POLY_DEGREE == 5
245 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
246 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
247 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
248 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
249 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
250#elif LOG_POLY_DEGREE == 4
251 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
252 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
253 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
254 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
255#elif LOG_POLY_DEGREE == 3
256 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
257 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
258 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
263 const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
264 const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
265 const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
266 const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
268 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
270 size_t n = num_points;
271 for (
size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
272 vl = __riscv_vsetvl_e32m2(n);
274 __riscv_vlseg2e32_v_f32m2x2((
const float*)complexFFTInput, vl);
275 vfloat32m2_t vr = __riscv_vget_f32m2(vc, 0);
276 vfloat32m2_t vi = __riscv_vget_f32m2(vc, 1);
277 vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
278 v = __riscv_vfmul(v, normFactSq, vl);
280 vfloat32m2_t a = __riscv_vfabs(v, vl);
281 vfloat32m2_t exp = __riscv_vfcvt_f(
282 __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
284 vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
285 __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
287 vfloat32m2_t mant = c0;
288 mant = __riscv_vfmadd(mant, frac, c1, vl);
289 mant = __riscv_vfmadd(mant, frac, c2, vl);
290#if LOG_POLY_DEGREE >= 4
291 mant = __riscv_vfmadd(mant, frac, c3, vl);
292#if LOG_POLY_DEGREE >= 5
293 mant = __riscv_vfmadd(mant, frac, c4, vl);
294#if LOG_POLY_DEGREE >= 6
295 mant = __riscv_vfmadd(mant, frac, c5, vl);
299 v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
302 __riscv_vse32(logPowerOutput, v, vl);
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition volk_32fc_s32f_power_spectrum_32f.h:50
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition volk_32fc_s32f_power_spectrum_32f.h:98
#define volk_log2to10factor
Definition volk_common.h:165
static float log2f_non_ieee(float f)
Definition volk_common.h:155
#define __VOLK_PREFETCH(addr)
Definition volk_common.h:68
#define lv_cimag(x)
Definition volk_complex.h:98
#define lv_creal(x)
Definition volk_complex.h:96
float complex lv_32fc_t
Definition volk_complex.h:74
static float32x4_t _vlogq_f32(float32x4_t x)
Definition volk_neon_intrinsics.h:143
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition volk_neon_intrinsics.h:73