40#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
41#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
50static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
52 unsigned int num_points)
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 __m256i moveMask = _mm256_set_epi8(0x80,
89 __m256i complexVal, outputVal;
92 unsigned int sixteenthPoints = num_points / 16;
94 for (number = 0; number < sixteenthPoints; number++) {
95 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
96 complexVectorPtr += 32;
98 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
99 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
101 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
103 outputVal = _mm256_cvtepi8_epi16(outputVal0);
104 outputVal = _mm256_slli_epi16(outputVal, 7);
106 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
111 number = sixteenthPoints * 16;
112 for (; number < num_points; number++) {
113 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
120#include <smmintrin.h>
122static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
124 unsigned int num_points)
126 unsigned int number = 0;
127 const int8_t* complexVectorPtr = (int8_t*)complexVector;
128 int16_t* iBufferPtr = iBuffer;
129 __m128i moveMask = _mm_set_epi8(
130 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
131 __m128i complexVal, outputVal;
133 unsigned int eighthPoints = num_points / 8;
135 for (number = 0; number < eighthPoints; number++) {
136 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
137 complexVectorPtr += 16;
139 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
141 outputVal = _mm_cvtepi8_epi16(complexVal);
142 outputVal = _mm_slli_epi16(outputVal, 7);
144 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
148 number = eighthPoints * 8;
149 for (; number < num_points; number++) {
150 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
158#include <immintrin.h>
162 unsigned int num_points)
164 unsigned int number = 0;
165 const int8_t* complexVectorPtr = (int8_t*)complexVector;
166 int16_t* iBufferPtr = iBuffer;
167 __m128i moveMask = _mm_set_epi8(
168 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
169 __m256i complexVal, outputVal;
170 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
172 unsigned int sixteenthPoints = num_points / 16;
174 for (number = 0; number < sixteenthPoints; number++) {
175 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
176 complexVectorPtr += 32;
178 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
179 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
181 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
182 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
184 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
185 outputVal1 = _mm_slli_epi16(outputVal1, 7);
186 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
187 outputVal0 = _mm_slli_epi16(outputVal0, 7);
189 __m256i dummy = _mm256_setzero_si256();
190 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
191 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
192 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
197 number = sixteenthPoints * 16;
198 for (; number < num_points; number++) {
199 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
206#ifdef LV_HAVE_GENERIC
210 unsigned int num_points)
212 unsigned int number = 0;
213 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
214 int16_t* iBufferPtr = iBuffer;
215 for (number = 0; number < num_points; number++) {
216 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
225#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
226#define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
233#include <immintrin.h>
235static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
237 unsigned int num_points)
239 unsigned int number = 0;
240 const int8_t* complexVectorPtr = (int8_t*)complexVector;
241 int16_t* iBufferPtr = iBuffer;
242 __m256i moveMask = _mm256_set_epi8(0x80,
274 __m256i complexVal, outputVal;
277 unsigned int sixteenthPoints = num_points / 16;
279 for (number = 0; number < sixteenthPoints; number++) {
280 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
281 complexVectorPtr += 32;
283 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
284 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
286 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
288 outputVal = _mm256_cvtepi8_epi16(outputVal0);
289 outputVal = _mm256_slli_epi16(outputVal, 7);
291 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
296 number = sixteenthPoints * 16;
297 for (; number < num_points; number++) {
298 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
305#include <riscv_vector.h>
307static inline void volk_8ic_deinterleave_real_16i_rvv(int16_t* iBuffer,
309 unsigned int num_points)
311 const int16_t* in = (
const int16_t*)complexVector;
312 size_t n = num_points;
313 for (
size_t vl; n > 0; n -= vl, in += vl, iBuffer += vl) {
314 vl = __riscv_vsetvl_e16m8(n);
315 vint16m8_t v = __riscv_vle16_v_i16m8(in, vl);
316 __riscv_vse16(iBuffer, __riscv_vsra(__riscv_vsll(v, 8, vl), 1, vl), vl);
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_16i.h:160
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition volk_8ic_deinterleave_real_16i.h:208
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition volk_complex.h:70