40#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
41#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
49static inline void volk_8ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
51 unsigned int num_points)
53 unsigned int number = 0;
54 const int8_t* complexVectorPtr = (int8_t*)complexVector;
55 int8_t* iBufferPtr = iBuffer;
56 __m256i moveMask1 = _mm256_set_epi8(0x80,
88 __m256i moveMask2 = _mm256_set_epi8(14,
120 __m256i complexVal1, complexVal2, outputVal;
122 unsigned int thirtysecondPoints = num_points / 32;
124 for (number = 0; number < thirtysecondPoints; number++) {
126 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
127 complexVectorPtr += 32;
128 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
129 complexVectorPtr += 32;
131 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
132 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
133 outputVal = _mm256_or_si256(complexVal1, complexVal2);
134 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
136 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
140 number = thirtysecondPoints * 32;
141 for (; number < num_points; number++) {
142 *iBufferPtr++ = *complexVectorPtr++;
150#include <tmmintrin.h>
154 unsigned int num_points)
156 unsigned int number = 0;
157 const int8_t* complexVectorPtr = (int8_t*)complexVector;
158 int8_t* iBufferPtr = iBuffer;
159 __m128i moveMask1 = _mm_set_epi8(
160 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
161 __m128i moveMask2 = _mm_set_epi8(
162 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
163 __m128i complexVal1, complexVal2, outputVal;
165 unsigned int sixteenthPoints = num_points / 16;
167 for (number = 0; number < sixteenthPoints; number++) {
168 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
169 complexVectorPtr += 16;
170 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
171 complexVectorPtr += 16;
173 complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
174 complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
176 outputVal = _mm_or_si128(complexVal1, complexVal2);
178 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
182 number = sixteenthPoints * 16;
183 for (; number < num_points; number++) {
184 *iBufferPtr++ = *complexVectorPtr++;
192#include <immintrin.h>
196 unsigned int num_points)
198 unsigned int number = 0;
199 const int8_t* complexVectorPtr = (int8_t*)complexVector;
200 int8_t* iBufferPtr = iBuffer;
201 __m128i moveMaskL = _mm_set_epi8(
202 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
203 __m128i moveMaskH = _mm_set_epi8(
204 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
205 __m256i complexVal1, complexVal2, outputVal;
206 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1,
209 unsigned int thirtysecondPoints = num_points / 32;
211 for (number = 0; number < thirtysecondPoints; number++) {
213 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
214 complexVectorPtr += 32;
215 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
216 complexVectorPtr += 32;
218 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
219 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
220 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
221 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
223 complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
224 complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
225 outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
228 complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
229 complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
230 outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
232 __m256i dummy = _mm256_setzero_si256();
233 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
234 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
237 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
241 number = thirtysecondPoints * 32;
242 for (; number < num_points; number++) {
243 *iBufferPtr++ = *complexVectorPtr++;
250#ifdef LV_HAVE_GENERIC
254 unsigned int num_points)
256 unsigned int number = 0;
257 const int8_t* complexVectorPtr = (int8_t*)complexVector;
258 int8_t* iBufferPtr = iBuffer;
259 for (number = 0; number < num_points; number++) {
260 *iBufferPtr++ = *complexVectorPtr++;
272 unsigned int num_points)
275 unsigned int sixteenth_points = num_points / 16;
277 int8x16x2_t input_vector;
278 for (number = 0; number < sixteenth_points; ++number) {
279 input_vector = vld2q_s8((int8_t*)complexVector);
280 vst1q_s8(iBuffer, input_vector.val[0]);
285 const int8_t* complexVectorPtr = (int8_t*)complexVector;
286 int8_t* iBufferPtr = iBuffer;
287 for (number = sixteenth_points * 16; number < num_points; number++) {
288 *iBufferPtr++ = *complexVectorPtr++;
297#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
298#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
304#include <immintrin.h>
306static inline void volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
308 unsigned int num_points)
310 unsigned int number = 0;
311 const int8_t* complexVectorPtr = (int8_t*)complexVector;
312 int8_t* iBufferPtr = iBuffer;
313 __m256i moveMask1 = _mm256_set_epi8(0x80,
345 __m256i moveMask2 = _mm256_set_epi8(14,
377 __m256i complexVal1, complexVal2, outputVal;
379 unsigned int thirtysecondPoints = num_points / 32;
381 for (number = 0; number < thirtysecondPoints; number++) {
383 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
384 complexVectorPtr += 32;
385 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
386 complexVectorPtr += 32;
388 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
389 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
390 outputVal = _mm256_or_si256(complexVal1, complexVal2);
391 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
393 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
397 number = thirtysecondPoints * 32;
398 for (; number < num_points; number++) {
399 *iBufferPtr++ = *complexVectorPtr++;
406#include <riscv_vector.h>
408static inline void volk_8ic_deinterleave_real_8i_rvv(int8_t* iBuffer,
410 unsigned int num_points)
412 const uint16_t* in = (
const uint16_t*)complexVector;
413 size_t n = num_points;
414 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
415 vl = __riscv_vsetvl_e16m8(n);
416 vuint16m8_t vc = __riscv_vle16_v_u16m8(in, vl);
417 __riscv_vse8((uint8_t*)iBuffer, __riscv_vnsrl(vc, 0, vl), vl);
static void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_8i.h:152
static void volk_8ic_deinterleave_real_8i_a_avx(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_8i.h:194
static void volk_8ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_8i.h:270
static void volk_8ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_8i.h:252
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70