41#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
42#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
51static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int16_t* complexVectorPtr = (int16_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
59 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
91 __m256i iMoveMask2 = _mm256_set_epi8(13,
124 __m256i complexVal1, complexVal2, iOutputVal;
126 unsigned int sixteenthPoints = num_points / 16;
128 for (number = 0; number < sixteenthPoints; number++) {
129 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
130 complexVectorPtr += 16;
131 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
132 complexVectorPtr += 16;
134 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
135 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
137 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
138 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
140 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
145 number = sixteenthPoints * 16;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
154#include <tmmintrin.h>
158 unsigned int num_points)
160 unsigned int number = 0;
161 const int16_t* complexVectorPtr = (int16_t*)complexVector;
162 int16_t* iBufferPtr = iBuffer;
164 __m128i iMoveMask1 = _mm_set_epi8(
165 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
166 __m128i iMoveMask2 = _mm_set_epi8(
167 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
169 __m128i complexVal1, complexVal2, iOutputVal;
171 unsigned int eighthPoints = num_points / 8;
173 for (number = 0; number < eighthPoints; number++) {
174 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
175 complexVectorPtr += 8;
176 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
177 complexVectorPtr += 8;
179 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
180 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
182 iOutputVal = _mm_or_si128(complexVal1, complexVal2);
184 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
189 number = eighthPoints * 8;
190 for (; number < num_points; number++) {
191 *iBufferPtr++ = *complexVectorPtr++;
199#include <emmintrin.h>
203 unsigned int num_points)
205 unsigned int number = 0;
206 const int16_t* complexVectorPtr = (int16_t*)complexVector;
207 int16_t* iBufferPtr = iBuffer;
208 __m128i complexVal1, complexVal2, iOutputVal;
209 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
210 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
212 unsigned int eighthPoints = num_points / 8;
214 for (number = 0; number < eighthPoints; number++) {
215 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
216 complexVectorPtr += 8;
217 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
218 complexVectorPtr += 8;
220 complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
222 complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
224 complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
226 complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
228 complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
230 complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2, 0, 3, 1));
232 iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask),
233 _mm_and_si128(complexVal2, highMask));
235 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
240 number = eighthPoints * 8;
241 for (; number < num_points; number++) {
242 *iBufferPtr++ = *complexVectorPtr++;
248#ifdef LV_HAVE_GENERIC
252 unsigned int num_points)
254 unsigned int number = 0;
255 const int16_t* complexVectorPtr = (int16_t*)complexVector;
256 int16_t* iBufferPtr = iBuffer;
257 for (number = 0; number < num_points; number++) {
258 *iBufferPtr++ = *complexVectorPtr++;
268#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H
269#define INCLUDED_volk_16ic_deinterleave_real_16i_u_H
276#include <immintrin.h>
278static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
280 unsigned int num_points)
282 unsigned int number = 0;
283 const int16_t* complexVectorPtr = (int16_t*)complexVector;
284 int16_t* iBufferPtr = iBuffer;
286 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
318 __m256i iMoveMask2 = _mm256_set_epi8(13,
351 __m256i complexVal1, complexVal2, iOutputVal;
353 unsigned int sixteenthPoints = num_points / 16;
355 for (number = 0; number < sixteenthPoints; number++) {
356 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
357 complexVectorPtr += 16;
358 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
359 complexVectorPtr += 16;
361 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
362 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
364 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
365 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
367 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
372 number = sixteenthPoints * 16;
373 for (; number < num_points; number++) {
374 *iBufferPtr++ = *complexVectorPtr++;
381#include <riscv_vector.h>
383static inline void volk_16ic_deinterleave_real_16i_rvv(int16_t* iBuffer,
385 unsigned int num_points)
387 const uint32_t* in = (
const uint32_t*)complexVector;
388 size_t n = num_points;
389 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
390 vl = __riscv_vsetvl_e32m8(n);
391 vuint32m8_t vc = __riscv_vle32_v_u32m8(in, vl);
392 __riscv_vse16((uint16_t*)iBuffer, __riscv_vnsrl(vc, 0, vl), vl);
static void volk_16ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_16i.h:250
static void volk_16ic_deinterleave_real_16i_a_sse2(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_16i.h:201
static void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t *iBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_real_16i.h:156
short complex lv_16sc_t
Definition volk_complex.h:71