56#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
57#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
66 unsigned int num_points)
68 for (
unsigned int number = 0; number < num_points; number++) {
69 *cVector++ = (*aVector++) * scalar;
80 unsigned int num_points)
82 const unsigned int quarterPoints = num_points / 4;
84 float* cPtr = cVector;
85 const float* aPtr = aVector;
87 const __m128 bVal = _mm_set_ps1(scalar);
88 for (
unsigned int number = 0; number < quarterPoints; number++) {
89 __m128 aVal = _mm_loadu_ps(aPtr);
91 __m128 cVal = _mm_mul_ps(aVal, bVal);
93 _mm_storeu_ps(cPtr, cVal);
99 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
100 *cPtr++ = (*aPtr++) * scalar;
106#include <immintrin.h>
109 const float* aVector,
111 unsigned int num_points)
113 const unsigned int eighthPoints = num_points / 8;
115 float* cPtr = cVector;
116 const float* aPtr = aVector;
118 const __m256 bVal = _mm256_set1_ps(scalar);
119 for (
unsigned int number = 0; number < eighthPoints; number++) {
120 __m256 aVal = _mm256_loadu_ps(aPtr);
122 __m256 cVal = _mm256_mul_ps(aVal, bVal);
124 _mm256_storeu_ps(cPtr, cVal);
130 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
131 *cPtr++ = (*aPtr++) * scalar;
136#ifdef LV_HAVE_RISCV64
137extern void volk_32f_s32f_multiply_32f_sifive_u74(
float* cVector,
138 const float* aVector,
140 unsigned int num_points);
147#ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
148#define INCLUDED_volk_32f_s32f_multiply_32f_a_H
154#include <xmmintrin.h>
157 const float* aVector,
159 unsigned int num_points)
161 const unsigned int quarterPoints = num_points / 4;
163 float* cPtr = cVector;
164 const float* aPtr = aVector;
166 const __m128 bVal = _mm_set_ps1(scalar);
167 for (
unsigned int number = 0; number < quarterPoints; number++) {
168 __m128 aVal = _mm_load_ps(aPtr);
170 __m128 cVal = _mm_mul_ps(aVal, bVal);
172 _mm_store_ps(cPtr, cVal);
178 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
179 *cPtr++ = (*aPtr++) * scalar;
185#include <immintrin.h>
188 const float* aVector,
190 unsigned int num_points)
192 const unsigned int eighthPoints = num_points / 8;
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
197 const __m256 bVal = _mm256_set1_ps(scalar);
198 for (
unsigned int number = 0; number < eighthPoints; number++) {
199 __m256 aVal = _mm256_load_ps(aPtr);
201 __m256 cVal = _mm256_mul_ps(aVal, bVal);
203 _mm256_store_ps(cPtr, cVal);
209 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
210 *cPtr++ = (*aPtr++) * scalar;
219 const float* aVector,
221 unsigned int num_points)
223 const unsigned int quarterPoints = num_points / 4;
225 const float* inputPtr = aVector;
226 float* outputPtr = cVector;
228 for (
unsigned int number = 0; number < quarterPoints; number++) {
229 float32x4_t aVal = vld1q_f32(inputPtr);
230 float32x4_t cVal = vmulq_n_f32(aVal, scalar);
231 vst1q_f32(outputPtr, cVal);
236 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
237 *outputPtr++ = (*inputPtr++) * scalar;
245extern void volk_32f_s32f_multiply_32f_a_orc_impl(
float* dst,
250static inline void volk_32f_s32f_multiply_32f_u_orc(
float* cVector,
251 const float* aVector,
253 unsigned int num_points)
255 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
261#include <riscv_vector.h>
263static inline void volk_32f_s32f_multiply_32f_rvv(
float* cVector,
264 const float* aVector,
266 unsigned int num_points)
268 size_t n = num_points;
269 for (
size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
270 vl = __riscv_vsetvl_e32m8(n);
271 vfloat32m8_t v = __riscv_vle32_v_f32m8(aVector, vl);
272 __riscv_vse32(cVector, __riscv_vfmul(v, scalar, vl), vl);
static void volk_32f_s32f_multiply_32f_a_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:187
static void volk_32f_s32f_multiply_32f_u_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:77
static void volk_32f_s32f_multiply_32f_u_avx(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:108
static void volk_32f_s32f_multiply_32f_a_sse(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:156
static void volk_32f_s32f_multiply_32f_generic(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:63
static void volk_32f_s32f_multiply_32f_u_neon(float *cVector, const float *aVector, const float scalar, unsigned int num_points)
Definition volk_32f_s32f_multiply_32f.h:218