Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_32fc_conjugate_32fc.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
54
55#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
56#define INCLUDED_volk_32fc_conjugate_32fc_u_H
57
58#include <float.h>
59#include <inttypes.h>
60#include <stdio.h>
61#include <volk/volk_complex.h>
62
63#ifdef LV_HAVE_AVX
64#include <immintrin.h>
65
66static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector,
67 const lv_32fc_t* aVector,
68 unsigned int num_points)
69{
70 unsigned int number = 0;
71 const unsigned int quarterPoints = num_points / 4;
72
73 __m256 x;
74 lv_32fc_t* c = cVector;
75 const lv_32fc_t* a = aVector;
76
77 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
78
79 for (; number < quarterPoints; number++) {
80
81 x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
82
83 x = _mm256_xor_ps(x, conjugator); // conjugate register
84
85 _mm256_storeu_ps((float*)c, x); // Store the results back into the C container
86
87 a += 4;
88 c += 4;
89 }
90
91 number = quarterPoints * 4;
92
93 for (; number < num_points; number++) {
94 *c++ = lv_conj(*a++);
95 }
96}
97#endif /* LV_HAVE_AVX */
98
99#ifdef LV_HAVE_SSE3
100#include <pmmintrin.h>
101
102static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector,
103 const lv_32fc_t* aVector,
104 unsigned int num_points)
105{
106 unsigned int number = 0;
107 const unsigned int halfPoints = num_points / 2;
108
109 __m128 x;
110 lv_32fc_t* c = cVector;
111 const lv_32fc_t* a = aVector;
112
113 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
114
115 for (; number < halfPoints; number++) {
116
117 x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
118
119 x = _mm_xor_ps(x, conjugator); // conjugate register
120
121 _mm_storeu_ps((float*)c, x); // Store the results back into the C container
122
123 a += 2;
124 c += 2;
125 }
126
127 if ((num_points % 2) != 0) {
128 *c = lv_conj(*a);
129 }
130}
131#endif /* LV_HAVE_SSE3 */
132
133#ifdef LV_HAVE_GENERIC
134
135static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector,
136 const lv_32fc_t* aVector,
137 unsigned int num_points)
138{
139 lv_32fc_t* cPtr = cVector;
140 const lv_32fc_t* aPtr = aVector;
141 unsigned int number = 0;
142
143 for (number = 0; number < num_points; number++) {
144 *cPtr++ = lv_conj(*aPtr++);
145 }
146}
147#endif /* LV_HAVE_GENERIC */
148
149
150#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
151#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
152#define INCLUDED_volk_32fc_conjugate_32fc_a_H
153
154#include <float.h>
155#include <inttypes.h>
156#include <stdio.h>
157#include <volk/volk_complex.h>
158
159#ifdef LV_HAVE_AVX
160#include <immintrin.h>
161
162static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector,
163 const lv_32fc_t* aVector,
164 unsigned int num_points)
165{
166 unsigned int number = 0;
167 const unsigned int quarterPoints = num_points / 4;
168
169 __m256 x;
170 lv_32fc_t* c = cVector;
171 const lv_32fc_t* a = aVector;
172
173 __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
174
175 for (; number < quarterPoints; number++) {
176
177 x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
178
179 x = _mm256_xor_ps(x, conjugator); // conjugate register
180
181 _mm256_store_ps((float*)c, x); // Store the results back into the C container
182
183 a += 4;
184 c += 4;
185 }
186
187 number = quarterPoints * 4;
188
189 for (; number < num_points; number++) {
190 *c++ = lv_conj(*a++);
191 }
192}
193#endif /* LV_HAVE_AVX */
194
195#ifdef LV_HAVE_SSE3
196#include <pmmintrin.h>
197
198static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector,
199 const lv_32fc_t* aVector,
200 unsigned int num_points)
201{
202 unsigned int number = 0;
203 const unsigned int halfPoints = num_points / 2;
204
205 __m128 x;
206 lv_32fc_t* c = cVector;
207 const lv_32fc_t* a = aVector;
208
209 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
210
211 for (; number < halfPoints; number++) {
212
213 x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
214
215 x = _mm_xor_ps(x, conjugator); // conjugate register
216
217 _mm_store_ps((float*)c, x); // Store the results back into the C container
218
219 a += 2;
220 c += 2;
221 }
222
223 if ((num_points % 2) != 0) {
224 *c = lv_conj(*a);
225 }
226}
227#endif /* LV_HAVE_SSE3 */
228
229#ifdef LV_HAVE_NEON
230#include <arm_neon.h>
231
232static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector,
233 const lv_32fc_t* aVector,
234 unsigned int num_points)
235{
236 unsigned int number;
237 const unsigned int quarterPoints = num_points / 4;
238
239 float32x4x2_t x;
240 lv_32fc_t* c = cVector;
241 const lv_32fc_t* a = aVector;
242
243 for (number = 0; number < quarterPoints; number++) {
244 __VOLK_PREFETCH(a + 4);
245 x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; ai,bi,ci,di
246
247 // xor the imaginary lane
248 x.val[1] = vnegq_f32(x.val[1]);
249
250 vst2q_f32((float*)c, x); // Store the results back into the C container
251
252 a += 4;
253 c += 4;
254 }
255
256 for (number = quarterPoints * 4; number < num_points; number++) {
257 *c++ = lv_conj(*a++);
258 }
259}
260#endif /* LV_HAVE_NEON */
261
262
263#ifdef LV_HAVE_RVV
264#include <riscv_vector.h>
265
266static inline void volk_32fc_conjugate_32fc_rvv(lv_32fc_t* cVector,
267 const lv_32fc_t* aVector,
268 unsigned int num_points)
269{
270 size_t n = num_points;
271 vuint64m8_t m = __riscv_vmv_v_x_u64m8(1ull << 63, __riscv_vsetvlmax_e64m8());
272 for (size_t vl; n > 0; n -= vl, aVector += vl, cVector += vl) {
273 vl = __riscv_vsetvl_e64m8(n);
274 vuint64m8_t v = __riscv_vle64_v_u64m8((const uint64_t*)aVector, vl);
275 __riscv_vse64((uint64_t*)cVector, __riscv_vxor(v, m, vl), vl);
276 }
277}
278#endif /*LV_HAVE_RVV*/
279
280#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
static void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:162
static void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:102
static void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:198
static void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:232
static void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:66
static void volk_32fc_conjugate_32fc_generic(lv_32fc_t *cVector, const lv_32fc_t *aVector, unsigned int num_points)
Definition volk_32fc_conjugate_32fc.h:135
#define __VOLK_PREFETCH(addr)
Definition volk_common.h:68
#define lv_conj(x)
Definition volk_complex.h:100
float complex lv_32fc_t
Definition volk_complex.h:74