16#ifndef INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_
17#define INCLUDE_VOLK_VOLK_AVX2_INTRINSICS_H_
21static inline __m256
_mm256_real(
const __m256 z1,
const __m256 z2)
23 const __m256i permute_mask = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
24 __m256 r = _mm256_shuffle_ps(z1, z2, _MM_SHUFFLE(2, 0, 2, 0));
25 return _mm256_permutevar8x32_ps(r, permute_mask);
28static inline __m256
_mm256_imag(
const __m256 z1,
const __m256 z2)
30 const __m256i permute_mask = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
31 __m256
i = _mm256_shuffle_ps(z1, z2, _MM_SHUFFLE(3, 1, 3, 1));
32 return _mm256_permutevar8x32_ps(
i, permute_mask);
37 const __m128i zeros = _mm_set1_epi8(0x00);
38 const __m128i sign_extract = _mm_set1_epi8(0x80);
39 const __m256i shuffle_mask = _mm256_setr_epi8(0xff,
71 __m256i sign_bits = _mm256_setzero_si256();
73 fbits = _mm_cmpgt_epi8(fbits, zeros);
74 fbits = _mm_and_si128(fbits, sign_extract);
75 sign_bits = _mm256_insertf128_si256(sign_bits, fbits, 0);
76 sign_bits = _mm256_insertf128_si256(sign_bits, fbits, 1);
77 sign_bits = _mm256_shuffle_epi8(sign_bits, shuffle_mask);
79 return _mm256_castsi256_ps(sign_bits);
92 llr0 = _mm256_xor_ps(llr0, sign_mask);
93 __m256 dst = _mm256_add_ps(llr0, llr1);
98 const __m256 cplxValue1)
100 const __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
101 const __m256 squared0 = _mm256_mul_ps(cplxValue0, cplxValue0);
102 const __m256 squared1 = _mm256_mul_ps(cplxValue1, cplxValue1);
103 const __m256 complex_result = _mm256_hadd_ps(squared0, squared1);
104 return _mm256_permutevar8x32_ps(complex_result, idx);
108 const __m256 symbols1,
109 const __m256 points0,
110 const __m256 points1,
118 const __m256 diff0 = _mm256_sub_ps(symbols0, points0);
119 const __m256 diff1 = _mm256_sub_ps(symbols1, points1);
121 return _mm256_mul_ps(norms, scalar);
144 __m256i* max_indices,
145 __m256i* current_indices,
146 __m256i indices_increment)
148 in0 = _mm256_mul_ps(in0, in0);
149 in1 = _mm256_mul_ps(in1, in1);
169 __m256 abs_squared = _mm256_hadd_ps(in0, in1);
180 __m256 compare_mask = _mm256_cmp_ps(abs_squared, *max_values, _CMP_GT_OS);
183 *max_values = _mm256_blendv_ps(*max_values, abs_squared, compare_mask);
194 _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*max_indices),
195 _mm256_castsi256_ps(*current_indices),
199 *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
206 __m256i* max_indices,
207 __m256i* current_indices,
208 __m256i indices_increment)
210 in0 = _mm256_mul_ps(in0, in0);
211 in1 = _mm256_mul_ps(in1, in1);
213 __m256 abs_squared = _mm256_hadd_ps(in0, in1);
214 __m256 compare_mask = _mm256_cmp_ps(abs_squared, *max_values, _CMP_GT_OS);
226 *max_values = _mm256_max_ps(abs_squared, *max_values);
229 _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*max_indices),
230 _mm256_castsi256_ps(*current_indices),
233 *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
256 __m256i* min_indices,
257 __m256i* current_indices,
258 __m256i indices_increment)
260 in0 = _mm256_mul_ps(in0, in0);
261 in1 = _mm256_mul_ps(in1, in1);
281 __m256 abs_squared = _mm256_hadd_ps(in0, in1);
292 __m256 compare_mask = _mm256_cmp_ps(abs_squared, *min_values, _CMP_LT_OS);
295 *min_values = _mm256_blendv_ps(*min_values, abs_squared, compare_mask);
306 _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*min_indices),
307 _mm256_castsi256_ps(*current_indices),
311 *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
318 __m256i* min_indices,
319 __m256i* current_indices,
320 __m256i indices_increment)
322 in0 = _mm256_mul_ps(in0, in0);
323 in1 = _mm256_mul_ps(in1, in1);
325 __m256 abs_squared = _mm256_hadd_ps(in0, in1);
326 __m256 compare_mask = _mm256_cmp_ps(abs_squared, *min_values, _CMP_LT_OS);
338 *min_values = _mm256_min_ps(abs_squared, *min_values);
341 _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(*min_indices),
342 _mm256_castsi256_ps(*current_indices),
345 *current_indices = _mm256_add_epi32(*current_indices, indices_increment);
static __m256 _mm256_scaled_norm_dist_ps_avx2(const __m256 symbols0, const __m256 symbols1, const __m256 points0, const __m256 points1, const __m256 scalar)
Definition volk_avx2_intrinsics.h:107
static __m256 _mm256_polar_sign_mask_avx2(__m128i fbits)
Definition volk_avx2_intrinsics.h:35
static void vector_32fc_index_max_variant1(__m256 in0, __m256 in1, __m256 *max_values, __m256i *max_indices, __m256i *current_indices, __m256i indices_increment)
Definition volk_avx2_intrinsics.h:203
static __m256 _mm256_magnitudesquared_ps_avx2(const __m256 cplxValue0, const __m256 cplxValue1)
Definition volk_avx2_intrinsics.h:97
static void vector_32fc_index_min_variant0(__m256 in0, __m256 in1, __m256 *min_values, __m256i *min_indices, __m256i *current_indices, __m256i indices_increment)
Definition volk_avx2_intrinsics.h:253
static __m256 _mm256_polar_fsign_add_llrs_avx2(__m256 src0, __m256 src1, __m128i fbits)
Definition volk_avx2_intrinsics.h:83
static void vector_32fc_index_max_variant0(__m256 in0, __m256 in1, __m256 *max_values, __m256i *max_indices, __m256i *current_indices, __m256i indices_increment)
Definition volk_avx2_intrinsics.h:141
static __m256 _mm256_real(const __m256 z1, const __m256 z2)
Definition volk_avx2_intrinsics.h:21
static __m256 _mm256_imag(const __m256 z1, const __m256 z2)
Definition volk_avx2_intrinsics.h:28
static void vector_32fc_index_min_variant1(__m256 in0, __m256 in1, __m256 *min_values, __m256i *min_indices, __m256i *current_indices, __m256i indices_increment)
Definition volk_avx2_intrinsics.h:315
static void _mm256_polar_deinterleave(__m256 *llr0, __m256 *llr1, __m256 src0, __m256 src1)
Definition volk_avx_intrinsics.h:183
for i
Definition volk_config_fixed.tmpl.h:13