Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_64u_popcnt.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
46
47#ifndef INCLUDED_volk_64u_popcnt_a_H
48#define INCLUDED_volk_64u_popcnt_a_H
49
50#include <inttypes.h>
51#include <stdio.h>
52
53
54#ifdef LV_HAVE_GENERIC
55
56
57static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t value)
58{
59 // const uint32_t* valueVector = (const uint32_t*)&value;
60
61 // This is faster than a lookup table
62 // uint32_t retVal = valueVector[0];
63 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
64
65 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
66 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
67 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
68 retVal = (retVal + (retVal >> 8));
69 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
70 uint64_t retVal64 = retVal;
71
72 // retVal = valueVector[1];
73 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
74 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
75 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
76 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
77 retVal = (retVal + (retVal >> 8));
78 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
79 retVal64 += retVal;
80
81 *ret = retVal64;
82}
83
84#endif /*LV_HAVE_GENERIC*/
85
86
87#if LV_HAVE_SSE4_2 && LV_HAVE_64
88
89#include <nmmintrin.h>
90
91static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t value)
92{
93 *ret = _mm_popcnt_u64(value);
94}
95
96#endif /*LV_HAVE_SSE4_2*/
97
98
99#if LV_HAVE_NEON
100#include <arm_neon.h>
101static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value)
102{
103 uint8x8_t input_val, count8x8_val;
104 uint16x4_t count16x4_val;
105 uint32x2_t count32x2_val;
106 uint64x1_t count64x1_val;
107
108 input_val = vld1_u8((unsigned char*)&value);
109 count8x8_val = vcnt_u8(input_val);
110 count16x4_val = vpaddl_u8(count8x8_val);
111 count32x2_val = vpaddl_u16(count16x4_val);
112 count64x1_val = vpaddl_u32(count32x2_val);
113 vst1_u64(ret, count64x1_val);
114
115 //*ret = _mm_popcnt_u64(value);
116}
117#endif /*LV_HAVE_NEON*/
118
119#ifdef LV_HAVE_RVV
120#include <riscv_vector.h>
121
122static inline void volk_64u_popcnt_rvv(uint64_t* ret, const uint64_t value)
123{
124 *ret = __riscv_vcpop(__riscv_vreinterpret_b2(__riscv_vmv_s_x_u64m1(value, 1)), 64);
125}
126#endif /*LV_HAVE_RVV*/
127
128#ifdef LV_HAVE_RVA22V
129#include <riscv_bitmanip.h>
130
131static inline void volk_64u_popcnt_rva22(uint64_t* ret, const uint64_t value)
132{
133 *ret = __riscv_cpop_64(value);
134}
135#endif /*LV_HAVE_RVA22V*/
136
137#endif /*INCLUDED_volk_64u_popcnt_a_H*/
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition volk_64u_popcnt.h:101
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition volk_64u_popcnt.h:57