Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_8ic_deinterleave_real_16i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
39
40#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
41#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
42
43#include <inttypes.h>
44#include <stdio.h>
45
46
47#ifdef LV_HAVE_AVX2
48#include <immintrin.h>
49
50static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
51 const lv_8sc_t* complexVector,
52 unsigned int num_points)
53{
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 __m256i moveMask = _mm256_set_epi8(0x80,
58 0x80,
59 0x80,
60 0x80,
61 0x80,
62 0x80,
63 0x80,
64 0x80,
65 14,
66 12,
67 10,
68 8,
69 6,
70 4,
71 2,
72 0,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 0x80,
81 14,
82 12,
83 10,
84 8,
85 6,
86 4,
87 2,
88 0);
89 __m256i complexVal, outputVal;
90 __m128i outputVal0;
91
92 unsigned int sixteenthPoints = num_points / 16;
93
94 for (number = 0; number < sixteenthPoints; number++) {
95 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
96 complexVectorPtr += 32;
97
98 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
99 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
100
101 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
102
103 outputVal = _mm256_cvtepi8_epi16(outputVal0);
104 outputVal = _mm256_slli_epi16(outputVal, 7);
105
106 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
107
108 iBufferPtr += 16;
109 }
110
111 number = sixteenthPoints * 16;
112 for (; number < num_points; number++) {
113 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
114 complexVectorPtr++;
115 }
116}
117#endif /* LV_HAVE_AVX2 */
118
119#ifdef LV_HAVE_SSE4_1
120#include <smmintrin.h>
121
122static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
123 const lv_8sc_t* complexVector,
124 unsigned int num_points)
125{
126 unsigned int number = 0;
127 const int8_t* complexVectorPtr = (int8_t*)complexVector;
128 int16_t* iBufferPtr = iBuffer;
129 __m128i moveMask = _mm_set_epi8(
130 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
131 __m128i complexVal, outputVal;
132
133 unsigned int eighthPoints = num_points / 8;
134
135 for (number = 0; number < eighthPoints; number++) {
136 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
137 complexVectorPtr += 16;
138
139 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
140
141 outputVal = _mm_cvtepi8_epi16(complexVal);
142 outputVal = _mm_slli_epi16(outputVal, 7);
143
144 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
145 iBufferPtr += 8;
146 }
147
148 number = eighthPoints * 8;
149 for (; number < num_points; number++) {
150 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
151 complexVectorPtr++;
152 }
153}
154#endif /* LV_HAVE_SSE4_1 */
155
156
157#ifdef LV_HAVE_AVX
158#include <immintrin.h>
159
160static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer,
161 const lv_8sc_t* complexVector,
162 unsigned int num_points)
163{
164 unsigned int number = 0;
165 const int8_t* complexVectorPtr = (int8_t*)complexVector;
166 int16_t* iBufferPtr = iBuffer;
167 __m128i moveMask = _mm_set_epi8(
168 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
169 __m256i complexVal, outputVal;
170 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
171
172 unsigned int sixteenthPoints = num_points / 16;
173
174 for (number = 0; number < sixteenthPoints; number++) {
175 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
176 complexVectorPtr += 32;
177
178 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
179 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
180
181 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
182 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
183
184 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
185 outputVal1 = _mm_slli_epi16(outputVal1, 7);
186 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
187 outputVal0 = _mm_slli_epi16(outputVal0, 7);
188
189 __m256i dummy = _mm256_setzero_si256();
190 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
191 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
192 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
193
194 iBufferPtr += 16;
195 }
196
197 number = sixteenthPoints * 16;
198 for (; number < num_points; number++) {
199 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
200 complexVectorPtr++;
201 }
202}
203#endif /* LV_HAVE_AVX */
204
205
206#ifdef LV_HAVE_GENERIC
207
208static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer,
209 const lv_8sc_t* complexVector,
210 unsigned int num_points)
211{
212 unsigned int number = 0;
213 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
214 int16_t* iBufferPtr = iBuffer;
215 for (number = 0; number < num_points; number++) {
216 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
217 complexVectorPtr++;
218 }
219}
220#endif /* LV_HAVE_GENERIC */
221
222
223#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
224
225#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
226#define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
227
228#include <inttypes.h>
229#include <stdio.h>
230
231
232#ifdef LV_HAVE_AVX2
233#include <immintrin.h>
234
235static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
236 const lv_8sc_t* complexVector,
237 unsigned int num_points)
238{
239 unsigned int number = 0;
240 const int8_t* complexVectorPtr = (int8_t*)complexVector;
241 int16_t* iBufferPtr = iBuffer;
242 __m256i moveMask = _mm256_set_epi8(0x80,
243 0x80,
244 0x80,
245 0x80,
246 0x80,
247 0x80,
248 0x80,
249 0x80,
250 14,
251 12,
252 10,
253 8,
254 6,
255 4,
256 2,
257 0,
258 0x80,
259 0x80,
260 0x80,
261 0x80,
262 0x80,
263 0x80,
264 0x80,
265 0x80,
266 14,
267 12,
268 10,
269 8,
270 6,
271 4,
272 2,
273 0);
274 __m256i complexVal, outputVal;
275 __m128i outputVal0;
276
277 unsigned int sixteenthPoints = num_points / 16;
278
279 for (number = 0; number < sixteenthPoints; number++) {
280 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
281 complexVectorPtr += 32;
282
283 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
284 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
285
286 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
287
288 outputVal = _mm256_cvtepi8_epi16(outputVal0);
289 outputVal = _mm256_slli_epi16(outputVal, 7);
290
291 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
292
293 iBufferPtr += 16;
294 }
295
296 number = sixteenthPoints * 16;
297 for (; number < num_points; number++) {
298 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
299 complexVectorPtr++;
300 }
301}
302#endif /* LV_HAVE_AVX2 */
303
304#ifdef LV_HAVE_RVV
305#include <riscv_vector.h>
306
307static inline void volk_8ic_deinterleave_real_16i_rvv(int16_t* iBuffer,
308 const lv_8sc_t* complexVector,
309 unsigned int num_points)
310{
311 const int16_t* in = (const int16_t*)complexVector;
312 size_t n = num_points;
313 for (size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
314 vl = __riscv_vsetvl_e16m8(n);
315 vint16m8_t v = __riscv_vle16_v_i16m8(in, vl);
316 __riscv_vse16(iBuffer, __riscv_vsra(__riscv_vsll(v, 8, vl), 1, vl), vl);
317 }
318}
319#endif /*LV_HAVE_RVV*/
320
321#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_16i.h:160
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_16i.h:208
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70