Vector Optimized Library of Kernels 3.2.0
Architecture-tuned implementations of math kernels
Loading...
Searching...
No Matches
volk_common.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2010, 2011, 2015-2017, 2019, 2020 Free Software Foundation, Inc.
4 * Copyright 2023 Magnus Lundmark <magnuslundmark@gmail.com>
5 *
6 * This file is part of VOLK
7 *
8 * SPDX-License-Identifier: LGPL-3.0-or-later
9 */
10
11#ifndef INCLUDED_LIBVOLK_COMMON_H
12#define INCLUDED_LIBVOLK_COMMON_H
13
15// Cross-platform attribute macros
17#if _MSC_VER
18#define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
19#define __VOLK_ATTR_UNUSED
20#define __VOLK_ATTR_INLINE __forceinline
21#define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
22#define __VOLK_ATTR_EXPORT __declspec(dllexport)
23#define __VOLK_ATTR_IMPORT __declspec(dllimport)
24#define __VOLK_PREFETCH(addr)
25#define __VOLK_ASM __asm
26#elif defined(__clang__)
27// AppleClang also defines __GNUC__, so do this check first. These
28// will probably be the same as for __GNUC__, but let's keep them
29// separate just to be safe.
30#define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
31#define __VOLK_ATTR_UNUSED __attribute__((unused))
32#define __VOLK_ATTR_INLINE __attribute__((always_inline))
33#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
34#define __VOLK_ASM __asm__
35#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
36#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
37#define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
38#elif defined __GNUC__
39#define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
40#define __VOLK_ATTR_UNUSED __attribute__((unused))
41#define __VOLK_ATTR_INLINE __attribute__((always_inline))
42#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
43#define __VOLK_ASM __asm__
44#if __GNUC__ >= 4
45#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
46#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
47#else
48#define __VOLK_ATTR_EXPORT
49#define __VOLK_ATTR_IMPORT
50#endif
51#define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
52#elif _MSC_VER
53#define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
54#define __VOLK_ATTR_UNUSED
55#define __VOLK_ATTR_INLINE __forceinline
56#define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
57#define __VOLK_ATTR_EXPORT __declspec(dllexport)
58#define __VOLK_ATTR_IMPORT __declspec(dllimport)
59#define __VOLK_PREFETCH(addr)
60#define __VOLK_ASM __asm
61#else
62#define __VOLK_ATTR_ALIGNED(x)
63#define __VOLK_ATTR_UNUSED
64#define __VOLK_ATTR_INLINE
65#define __VOLK_ATTR_DEPRECATED
66#define __VOLK_ATTR_EXPORT
67#define __VOLK_ATTR_IMPORT
68#define __VOLK_PREFETCH(addr)
69#define __VOLK_ASM __asm__
70#endif
71
73// Ignore annoying warnings in MSVC
75#if defined(_MSC_VER)
76#pragma warning(disable : 4244) //'conversion' conversion from 'type1' to 'type2',
77 // possible loss of data
78#pragma warning(disable : 4305) //'identifier' : truncation from 'type1' to 'type2'
79#endif
80
82// C-linkage declaration macros
83// FIXME: due to the usage of complex.h, require gcc for c-linkage
85#if defined(__cplusplus) && (__GNUC__)
86#define __VOLK_DECL_BEGIN extern "C" {
87#define __VOLK_DECL_END }
88#else
89#define __VOLK_DECL_BEGIN
90#define __VOLK_DECL_END
91#endif
92
94// Define VOLK_API for library symbols
95// https://gcc.gnu.org/wiki/Visibility
97#ifdef volk_EXPORTS
98#define VOLK_API __VOLK_ATTR_EXPORT
99#else
100#define VOLK_API __VOLK_ATTR_IMPORT
101#endif
102
104// The bit128 union used by some
106#include <stdint.h>
107
108#ifdef LV_HAVE_SSE
109#ifdef _WIN32
110#include <intrin.h>
111#else
112#include <x86intrin.h>
113#endif
114#endif
115
116union bit128 {
117 uint8_t i8[16];
118 uint16_t i16[8];
119 uint32_t i[4];
120 float f[4];
121 double d[2];
122
123#ifdef LV_HAVE_SSE
124 __m128 float_vec;
125#endif
126
127#ifdef LV_HAVE_SSE2
128 __m128i int_vec;
129 __m128d double_vec;
130#endif
131};
132
133union bit256 {
134 uint8_t i8[32];
135 uint16_t i16[16];
136 uint32_t i[8];
137 float f[8];
138 double d[4];
139
140#ifdef LV_HAVE_AVX
141 __m256 float_vec;
142 __m256i int_vec;
143 __m256d double_vec;
144#endif
145};
146
147#define bit128_p(x) ((union bit128*)(x))
148#define bit256_p(x) ((union bit256*)(x))
149
151// log2f
153#include <math.h>
154// +-Inf -> +-127.0f in order to match the behaviour of the SIMD kernels
155static inline float log2f_non_ieee(float f)
156{
157 float const result = log2f(f);
158 return isinf(result) ? copysignf(127.0f, result) : result;
159}
160
162// Constant used to do log10 calculations as faster log2
164// precalculated 10.0 / log2f_non_ieee(10.0) to allow for constexpr
165#define volk_log2to10factor (0x1.815182p1) // 3.01029995663981209120
166
168// arctan(x) polynomial expansion
170static inline float volk_arctan_poly(const float x)
171{
172 /*
173 * arctan(x) polynomial expansion on the interval [-1, 1]
174 * Maximum relative error < 6.6e-7
175 */
176 const float a1 = +0x1.ffffeap-1f;
177 const float a3 = -0x1.55437p-2f;
178 const float a5 = +0x1.972be6p-3f;
179 const float a7 = -0x1.1436ap-3f;
180 const float a9 = +0x1.5785aap-4f;
181 const float a11 = -0x1.2f3004p-5f;
182 const float a13 = +0x1.01a37cp-7f;
183
184 const float x_times_x = x * x;
185 float arctan = a13;
186 arctan = fmaf(x_times_x, arctan, a11);
187 arctan = fmaf(x_times_x, arctan, a9);
188 arctan = fmaf(x_times_x, arctan, a7);
189 arctan = fmaf(x_times_x, arctan, a5);
190 arctan = fmaf(x_times_x, arctan, a3);
191 arctan = fmaf(x_times_x, arctan, a1);
192 arctan *= x;
193
194 return arctan;
195}
196
197// arctan(x)
199static inline float volk_arctan(const float x)
200{
201 /*
202 * arctan(x) + arctan(1 / x) == sign(x) * pi / 2
203 */
204 const float pi_2 = 0x1.921fb6p0f;
205
206 if (fabs(x) < 1.f) {
207 return volk_arctan_poly(x);
208 } else {
209 return copysignf(pi_2, x) - volk_arctan_poly(1.f / x);
210 }
211}
212
213// arctan2(y, x)
215static inline float volk_atan2(const float y, const float x)
216{
217 /*
218 * / arctan(y / x) if x > 0
219 * | arctan(y / x) + PI if x < 0 and y >= 0
220 * atan2(y, x) = | arctan(y / x) - PI if x < 0 and y < 0
221 * | sign(y) * PI / 2 if x = 0
222 * \ undefined if x = 0 and y = 0
223 * atan2f(0.f, 0.f) shall return 0.f
224 * atan2f(0.f, -0.f) shall return -0.f
225 */
226 const float pi = 0x1.921fb6p1f;
227 const float pi_2 = 0x1.921fb6p0f;
228
229 if (fabs(x) == 0.f) {
230 return (fabs(y) == 0.f) ? copysignf(0.f, y) : copysignf(pi_2, y);
231 }
232 const int swap = fabs(x) < fabs(y);
233 const float input = swap ? (x / y) : (y / x);
234 float result = volk_arctan_poly(input);
235 result = swap ? (input >= 0.f ? pi_2 : -pi_2) - result : result;
236 if (x < 0.f) {
237 result += copysignf(pi, y);
238 }
239 return result;
240}
241
242#endif /*INCLUDED_LIBVOLK_COMMON_H*/
Definition volk_common.h:116
float f[4]
Definition volk_common.h:120
__m128i int_vec
Definition volk_common.h:128
__m128d double_vec
Definition volk_common.h:129
uint8_t i8[16]
Definition volk_common.h:117
uint32_t i[4]
Definition volk_common.h:119
double d[2]
Definition volk_common.h:121
uint16_t i16[8]
Definition volk_common.h:118
__m128 float_vec
Definition volk_common.h:124
Definition volk_common.h:133
float f[8]
Definition volk_common.h:137
__m256d double_vec
Definition volk_common.h:143
uint8_t i8[32]
Definition volk_common.h:134
uint16_t i16[16]
Definition volk_common.h:135
double d[4]
Definition volk_common.h:138
uint32_t i[8]
Definition volk_common.h:136
__m256 float_vec
Definition volk_common.h:141
__m256i int_vec
Definition volk_common.h:142
static float volk_arctan(const float x)
Definition volk_common.h:199
static float volk_arctan_poly(const float x)
Definition volk_common.h:170
static float log2f_non_ieee(float f)
Definition volk_common.h:155
static float volk_atan2(const float y, const float x)
Definition volk_common.h:215