59 const unsigned int num_bytes = num_points * 2;
61 short candidate = src0[0];
63 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
67 p_src0 = (__m128i*)src0;
69 int bound = num_bytes >> 4;
70 int leftovers = (num_bytes >> 1) & 7;
74 xmm1 = _mm_setzero_si128();
75 xmm0 = _mm_setzero_si128();
78 xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
80 for (
i = 0;
i < bound; ++
i) {
81 xmm1 = _mm_load_si128(p_src0);
85 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
86 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
87 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
89 xmm6 = _mm_xor_si128(xmm4, xmm5);
91 xmm3 = _mm_and_si128(xmm3, xmm0);
92 xmm4 = _mm_and_si128(xmm6, xmm1);
94 xmm0 = _mm_add_epi16(xmm3, xmm4);
97 _mm_store_si128((__m128i*)cands, xmm0);
99 for (
i = 0;
i < 8; ++
i) {
100 candidate = ((short)(candidate - cands[
i]) > 0) ? candidate : cands[
i];
103 for (
i = 0;
i < leftovers; ++
i) {
104 candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0)
106 : src0[(bound << 3) +
i];
109 target[0] = candidate;
static void volk_16i_max_star_16i_a_ssse3(short *target, short *src0, unsigned int num_points)
Definition volk_16i_max_star_16i.h:57
static void volk_16i_max_star_16i_generic(short *target, short *src0, unsigned int num_points)
Definition volk_16i_max_star_16i.h:117