40#ifndef INCLUDED_volk_8i_convert_16i_u_H
41#define INCLUDED_volk_8i_convert_16i_u_H
49static inline void volk_8i_convert_16i_u_avx2(int16_t* outputVector,
50 const int8_t* inputVector,
51 unsigned int num_points)
53 unsigned int number = 0;
54 const unsigned int sixteenthPoints = num_points / 16;
56 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
57 __m256i* outputVectorPtr = (__m256i*)outputVector;
61 for (; number < sixteenthPoints; number++) {
62 inputVal = _mm_loadu_si128(inputVectorPtr);
63 ret = _mm256_cvtepi8_epi16(inputVal);
64 ret = _mm256_slli_epi16(ret, 8);
65 _mm256_storeu_si256(outputVectorPtr, ret);
71 number = sixteenthPoints * 16;
72 for (; number < num_points; number++) {
73 outputVector[number] = (int16_t)(inputVector[number]) * 256;
82static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector,
83 const int8_t* inputVector,
84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int sixteenthPoints = num_points / 16;
89 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
90 __m128i* outputVectorPtr = (__m128i*)outputVector;
94 for (; number < sixteenthPoints; number++) {
95 inputVal = _mm_loadu_si128(inputVectorPtr);
96 ret = _mm_cvtepi8_epi16(inputVal);
97 ret = _mm_slli_epi16(ret, 8);
98 _mm_storeu_si128(outputVectorPtr, ret);
102 inputVal = _mm_srli_si128(inputVal, 8);
103 ret = _mm_cvtepi8_epi16(inputVal);
104 ret = _mm_slli_epi16(ret, 8);
105 _mm_storeu_si128(outputVectorPtr, ret);
112 number = sixteenthPoints * 16;
113 for (; number < num_points; number++) {
114 outputVector[number] = (int16_t)(inputVector[number]) * 256;
120#ifdef LV_HAVE_GENERIC
123 const int8_t* inputVector,
124 unsigned int num_points)
126 int16_t* outputVectorPtr = outputVector;
127 const int8_t* inputVectorPtr = inputVector;
128 unsigned int number = 0;
130 for (number = 0; number < num_points; number++) {
131 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
140#ifndef INCLUDED_volk_8i_convert_16i_a_H
141#define INCLUDED_volk_8i_convert_16i_a_H
147#include <immintrin.h>
149static inline void volk_8i_convert_16i_a_avx2(int16_t* outputVector,
150 const int8_t* inputVector,
151 unsigned int num_points)
153 unsigned int number = 0;
154 const unsigned int sixteenthPoints = num_points / 16;
156 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
157 __m256i* outputVectorPtr = (__m256i*)outputVector;
161 for (; number < sixteenthPoints; number++) {
162 inputVal = _mm_load_si128(inputVectorPtr);
163 ret = _mm256_cvtepi8_epi16(inputVal);
164 ret = _mm256_slli_epi16(ret, 8);
165 _mm256_store_si256(outputVectorPtr, ret);
171 number = sixteenthPoints * 16;
172 for (; number < num_points; number++) {
173 outputVector[number] = (int16_t)(inputVector[number]) * 256;
180#include <smmintrin.h>
182static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector,
183 const int8_t* inputVector,
184 unsigned int num_points)
186 unsigned int number = 0;
187 const unsigned int sixteenthPoints = num_points / 16;
189 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
190 __m128i* outputVectorPtr = (__m128i*)outputVector;
194 for (; number < sixteenthPoints; number++) {
195 inputVal = _mm_load_si128(inputVectorPtr);
196 ret = _mm_cvtepi8_epi16(inputVal);
197 ret = _mm_slli_epi16(ret, 8);
198 _mm_store_si128(outputVectorPtr, ret);
202 inputVal = _mm_srli_si128(inputVal, 8);
203 ret = _mm_cvtepi8_epi16(inputVal);
204 ret = _mm_slli_epi16(ret, 8);
205 _mm_store_si128(outputVectorPtr, ret);
212 number = sixteenthPoints * 16;
213 for (; number < num_points; number++) {
214 outputVector[number] = (int16_t)(inputVector[number]) * 256;
224 const int8_t* inputVector,
225 unsigned int num_points)
227 int16_t* outputVectorPtr = outputVector;
228 const int8_t* inputVectorPtr = inputVector;
230 const unsigned int eighth_points = num_points / 8;
233 int16x8_t converted_vec;
238 for (number = 0; number < eighth_points; ++number) {
239 input_vec = vld1_s8(inputVectorPtr);
240 converted_vec = vmovl_s8(input_vec);
242 converted_vec = vshlq_n_s16(converted_vec, 8);
243 vst1q_s16(outputVectorPtr, converted_vec);
246 outputVectorPtr += 8;
249 for (number = eighth_points * 8; number < num_points; number++) {
250 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
257extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector,
258 const int8_t* inputVector,
261static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector,
262 const int8_t* inputVector,
263 unsigned int num_points)
265 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
270#include <riscv_vector.h>
272static inline void volk_8i_convert_16i_rvv(int16_t* outputVector,
273 const int8_t* inputVector,
274 unsigned int num_points)
276 size_t n = num_points;
277 for (
size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
278 vl = __riscv_vsetvl_e8m4(n);
279 vint16m8_t v = __riscv_vsext_vf2(__riscv_vle8_v_i8m4(inputVector, vl), vl);
280 __riscv_vse16(outputVector, __riscv_vsll(v, 8, vl), vl);
static void volk_8i_convert_16i_generic(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition volk_8i_convert_16i.h:122
static void volk_8i_convert_16i_neon(int16_t *outputVector, const int8_t *inputVector, unsigned int num_points)
Definition volk_8i_convert_16i.h:223