1 /*===---- __clang_cuda_math.h - Device-side CUDA math support --------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_CUDA_MATH_H__
10 #define __CLANG_CUDA_MATH_H__
12 #error "This file is for CUDA compilation only."
15 #ifndef __OPENMP_NVPTX__
16 #if CUDA_VERSION < 9000
17 #error This file is intended to be used with CUDA-9+ only.
21 // __DEVICE__ is a helper macro with common set of attributes for the wrappers
22 // we implement in this file. We need static in order to avoid emitting unused
23 // functions and __forceinline__ helps inlining these wrappers at -O1.
24 #pragma push_macro("__DEVICE__")
25 #ifdef __OPENMP_NVPTX__
26 #if defined(__cplusplus)
27 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
29 #define __DEVICE__ static __attribute__((always_inline, nothrow))
32 #define __DEVICE__ static __device__ __forceinline__
35 // Specialized version of __DEVICE__ for functions with void return type. Needed
36 // because the OpenMP overlay requires constexpr functions here but prior to
37 // c++14 void return functions could not be constexpr.
38 #pragma push_macro("__DEVICE_VOID__")
39 #ifdef __OPENMP_NVPTX__ && defined(__cplusplus) && __cplusplus < 201402L
40 #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
42 #define __DEVICE_VOID__ __DEVICE__
45 // libdevice provides fast low precision and slow full-recision implementations
46 // for some functions. Which one gets selected depends on
47 // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
48 // -ffast-math or -fcuda-approx-transcendentals are in effect.
49 #pragma push_macro("__FAST_OR_SLOW")
50 #if defined(__CLANG_CUDA_APPROX_TRANSCENDENTALS__)
51 #define __FAST_OR_SLOW(fast, slow) fast
53 #define __FAST_OR_SLOW(fast, slow) slow
56 __DEVICE__
int abs(int __a
) { return __nv_abs(__a
); }
57 __DEVICE__
double fabs(double __a
) { return __nv_fabs(__a
); }
58 __DEVICE__
double acos(double __a
) { return __nv_acos(__a
); }
59 __DEVICE__
float acosf(float __a
) { return __nv_acosf(__a
); }
60 __DEVICE__
double acosh(double __a
) { return __nv_acosh(__a
); }
61 __DEVICE__
float acoshf(float __a
) { return __nv_acoshf(__a
); }
62 __DEVICE__
double asin(double __a
) { return __nv_asin(__a
); }
63 __DEVICE__
float asinf(float __a
) { return __nv_asinf(__a
); }
64 __DEVICE__
double asinh(double __a
) { return __nv_asinh(__a
); }
65 __DEVICE__
float asinhf(float __a
) { return __nv_asinhf(__a
); }
66 __DEVICE__
double atan(double __a
) { return __nv_atan(__a
); }
67 __DEVICE__
double atan2(double __a
, double __b
) { return __nv_atan2(__a
, __b
); }
68 __DEVICE__
float atan2f(float __a
, float __b
) { return __nv_atan2f(__a
, __b
); }
69 __DEVICE__
float atanf(float __a
) { return __nv_atanf(__a
); }
70 __DEVICE__
double atanh(double __a
) { return __nv_atanh(__a
); }
71 __DEVICE__
float atanhf(float __a
) { return __nv_atanhf(__a
); }
72 __DEVICE__
double cbrt(double __a
) { return __nv_cbrt(__a
); }
73 __DEVICE__
float cbrtf(float __a
) { return __nv_cbrtf(__a
); }
74 __DEVICE__
double ceil(double __a
) { return __nv_ceil(__a
); }
75 __DEVICE__
float ceilf(float __a
) { return __nv_ceilf(__a
); }
76 __DEVICE__
double copysign(double __a
, double __b
) {
77 return __nv_copysign(__a
, __b
);
79 __DEVICE__
float copysignf(float __a
, float __b
) {
80 return __nv_copysignf(__a
, __b
);
82 __DEVICE__
double cos(double __a
) { return __nv_cos(__a
); }
83 __DEVICE__
float cosf(float __a
) {
84 return __FAST_OR_SLOW(__nv_fast_cosf
, __nv_cosf
)(__a
);
86 __DEVICE__
double cosh(double __a
) { return __nv_cosh(__a
); }
87 __DEVICE__
float coshf(float __a
) { return __nv_coshf(__a
); }
88 __DEVICE__
double cospi(double __a
) { return __nv_cospi(__a
); }
89 __DEVICE__
float cospif(float __a
) { return __nv_cospif(__a
); }
90 __DEVICE__
double cyl_bessel_i0(double __a
) { return __nv_cyl_bessel_i0(__a
); }
91 __DEVICE__
float cyl_bessel_i0f(float __a
) { return __nv_cyl_bessel_i0f(__a
); }
92 __DEVICE__
double cyl_bessel_i1(double __a
) { return __nv_cyl_bessel_i1(__a
); }
93 __DEVICE__
float cyl_bessel_i1f(float __a
) { return __nv_cyl_bessel_i1f(__a
); }
94 __DEVICE__
double erf(double __a
) { return __nv_erf(__a
); }
95 __DEVICE__
double erfc(double __a
) { return __nv_erfc(__a
); }
96 __DEVICE__
float erfcf(float __a
) { return __nv_erfcf(__a
); }
97 __DEVICE__
double erfcinv(double __a
) { return __nv_erfcinv(__a
); }
98 __DEVICE__
float erfcinvf(float __a
) { return __nv_erfcinvf(__a
); }
99 __DEVICE__
double erfcx(double __a
) { return __nv_erfcx(__a
); }
100 __DEVICE__
float erfcxf(float __a
) { return __nv_erfcxf(__a
); }
101 __DEVICE__
float erff(float __a
) { return __nv_erff(__a
); }
102 __DEVICE__
double erfinv(double __a
) { return __nv_erfinv(__a
); }
103 __DEVICE__
float erfinvf(float __a
) { return __nv_erfinvf(__a
); }
104 __DEVICE__
double exp(double __a
) { return __nv_exp(__a
); }
105 __DEVICE__
double exp10(double __a
) { return __nv_exp10(__a
); }
106 __DEVICE__
float exp10f(float __a
) { return __nv_exp10f(__a
); }
107 __DEVICE__
double exp2(double __a
) { return __nv_exp2(__a
); }
108 __DEVICE__
float exp2f(float __a
) { return __nv_exp2f(__a
); }
109 __DEVICE__
float expf(float __a
) { return __nv_expf(__a
); }
110 __DEVICE__
double expm1(double __a
) { return __nv_expm1(__a
); }
111 __DEVICE__
float expm1f(float __a
) { return __nv_expm1f(__a
); }
112 __DEVICE__
float fabsf(float __a
) { return __nv_fabsf(__a
); }
113 __DEVICE__
double fdim(double __a
, double __b
) { return __nv_fdim(__a
, __b
); }
114 __DEVICE__
float fdimf(float __a
, float __b
) { return __nv_fdimf(__a
, __b
); }
115 __DEVICE__
double fdivide(double __a
, double __b
) { return __a
/ __b
; }
116 __DEVICE__
float fdividef(float __a
, float __b
) {
117 #if __FAST_MATH__ && !__CUDA_PREC_DIV
118 return __nv_fast_fdividef(__a
, __b
);
123 __DEVICE__
double floor(double __f
) { return __nv_floor(__f
); }
124 __DEVICE__
float floorf(float __f
) { return __nv_floorf(__f
); }
125 __DEVICE__
double fma(double __a
, double __b
, double __c
) {
126 return __nv_fma(__a
, __b
, __c
);
128 __DEVICE__
float fmaf(float __a
, float __b
, float __c
) {
129 return __nv_fmaf(__a
, __b
, __c
);
131 __DEVICE__
double fmax(double __a
, double __b
) { return __nv_fmax(__a
, __b
); }
132 __DEVICE__
float fmaxf(float __a
, float __b
) { return __nv_fmaxf(__a
, __b
); }
133 __DEVICE__
double fmin(double __a
, double __b
) { return __nv_fmin(__a
, __b
); }
134 __DEVICE__
float fminf(float __a
, float __b
) { return __nv_fminf(__a
, __b
); }
135 __DEVICE__
double fmod(double __a
, double __b
) { return __nv_fmod(__a
, __b
); }
136 __DEVICE__
float fmodf(float __a
, float __b
) { return __nv_fmodf(__a
, __b
); }
137 __DEVICE__
double frexp(double __a
, int *__b
) { return __nv_frexp(__a
, __b
); }
138 __DEVICE__
float frexpf(float __a
, int *__b
) { return __nv_frexpf(__a
, __b
); }
139 __DEVICE__
double hypot(double __a
, double __b
) { return __nv_hypot(__a
, __b
); }
140 __DEVICE__
float hypotf(float __a
, float __b
) { return __nv_hypotf(__a
, __b
); }
141 __DEVICE__
int ilogb(double __a
) { return __nv_ilogb(__a
); }
142 __DEVICE__
int ilogbf(float __a
) { return __nv_ilogbf(__a
); }
143 __DEVICE__
double j0(double __a
) { return __nv_j0(__a
); }
144 __DEVICE__
float j0f(float __a
) { return __nv_j0f(__a
); }
145 __DEVICE__
double j1(double __a
) { return __nv_j1(__a
); }
146 __DEVICE__
float j1f(float __a
) { return __nv_j1f(__a
); }
147 __DEVICE__
double jn(int __n
, double __a
) { return __nv_jn(__n
, __a
); }
148 __DEVICE__
float jnf(int __n
, float __a
) { return __nv_jnf(__n
, __a
); }
149 #if defined(__LP64__) || defined(_WIN64)
150 __DEVICE__
long labs(long __a
) { return __nv_llabs(__a
); };
152 __DEVICE__
long labs(long __a
) { return __nv_abs(__a
); };
154 __DEVICE__
double ldexp(double __a
, int __b
) { return __nv_ldexp(__a
, __b
); }
155 __DEVICE__
float ldexpf(float __a
, int __b
) { return __nv_ldexpf(__a
, __b
); }
156 __DEVICE__
double lgamma(double __a
) { return __nv_lgamma(__a
); }
157 __DEVICE__
float lgammaf(float __a
) { return __nv_lgammaf(__a
); }
158 __DEVICE__
long long llabs(long long __a
) { return __nv_llabs(__a
); }
159 __DEVICE__
long long llmax(long long __a
, long long __b
) {
160 return __nv_llmax(__a
, __b
);
162 __DEVICE__
long long llmin(long long __a
, long long __b
) {
163 return __nv_llmin(__a
, __b
);
165 __DEVICE__
long long llrint(double __a
) { return __nv_llrint(__a
); }
166 __DEVICE__
long long llrintf(float __a
) { return __nv_llrintf(__a
); }
167 __DEVICE__
long long llround(double __a
) { return __nv_llround(__a
); }
168 __DEVICE__
long long llroundf(float __a
) { return __nv_llroundf(__a
); }
169 __DEVICE__
double round(double __a
) { return __nv_round(__a
); }
170 __DEVICE__
float roundf(float __a
) { return __nv_roundf(__a
); }
171 __DEVICE__
double log(double __a
) { return __nv_log(__a
); }
172 __DEVICE__
double log10(double __a
) { return __nv_log10(__a
); }
173 __DEVICE__
float log10f(float __a
) { return __nv_log10f(__a
); }
174 __DEVICE__
double log1p(double __a
) { return __nv_log1p(__a
); }
175 __DEVICE__
float log1pf(float __a
) { return __nv_log1pf(__a
); }
176 __DEVICE__
double log2(double __a
) { return __nv_log2(__a
); }
177 __DEVICE__
float log2f(float __a
) {
178 return __FAST_OR_SLOW(__nv_fast_log2f
, __nv_log2f
)(__a
);
180 __DEVICE__
double logb(double __a
) { return __nv_logb(__a
); }
181 __DEVICE__
float logbf(float __a
) { return __nv_logbf(__a
); }
182 __DEVICE__
float logf(float __a
) {
183 return __FAST_OR_SLOW(__nv_fast_logf
, __nv_logf
)(__a
);
185 #if defined(__LP64__) || defined(_WIN64)
186 __DEVICE__
long lrint(double __a
) { return llrint(__a
); }
187 __DEVICE__
long lrintf(float __a
) { return __float2ll_rn(__a
); }
188 __DEVICE__
long lround(double __a
) { return llround(__a
); }
189 __DEVICE__
long lroundf(float __a
) { return llroundf(__a
); }
191 __DEVICE__
long lrint(double __a
) { return (long)rint(__a
); }
192 __DEVICE__
long lrintf(float __a
) { return __float2int_rn(__a
); }
193 __DEVICE__
long lround(double __a
) { return round(__a
); }
194 __DEVICE__
long lroundf(float __a
) { return roundf(__a
); }
196 __DEVICE__
int max(int __a
, int __b
) { return __nv_max(__a
, __b
); }
197 __DEVICE__
int min(int __a
, int __b
) { return __nv_min(__a
, __b
); }
198 __DEVICE__
double modf(double __a
, double *__b
) { return __nv_modf(__a
, __b
); }
199 __DEVICE__
float modff(float __a
, float *__b
) { return __nv_modff(__a
, __b
); }
200 __DEVICE__
double nearbyint(double __a
) { return __builtin_nearbyint(__a
); }
201 __DEVICE__
float nearbyintf(float __a
) { return __builtin_nearbyintf(__a
); }
202 __DEVICE__
double nextafter(double __a
, double __b
) {
203 return __nv_nextafter(__a
, __b
);
205 __DEVICE__
float nextafterf(float __a
, float __b
) {
206 return __nv_nextafterf(__a
, __b
);
208 __DEVICE__
double norm(int __dim
, const double *__t
) {
209 return __nv_norm(__dim
, __t
);
211 __DEVICE__
double norm3d(double __a
, double __b
, double __c
) {
212 return __nv_norm3d(__a
, __b
, __c
);
214 __DEVICE__
float norm3df(float __a
, float __b
, float __c
) {
215 return __nv_norm3df(__a
, __b
, __c
);
217 __DEVICE__
double norm4d(double __a
, double __b
, double __c
, double __d
) {
218 return __nv_norm4d(__a
, __b
, __c
, __d
);
220 __DEVICE__
float norm4df(float __a
, float __b
, float __c
, float __d
) {
221 return __nv_norm4df(__a
, __b
, __c
, __d
);
223 __DEVICE__
double normcdf(double __a
) { return __nv_normcdf(__a
); }
224 __DEVICE__
float normcdff(float __a
) { return __nv_normcdff(__a
); }
225 __DEVICE__
double normcdfinv(double __a
) { return __nv_normcdfinv(__a
); }
226 __DEVICE__
float normcdfinvf(float __a
) { return __nv_normcdfinvf(__a
); }
227 __DEVICE__
float normf(int __dim
, const float *__t
) {
228 return __nv_normf(__dim
, __t
);
230 __DEVICE__
double pow(double __a
, double __b
) { return __nv_pow(__a
, __b
); }
231 __DEVICE__
float powf(float __a
, float __b
) { return __nv_powf(__a
, __b
); }
232 __DEVICE__
double powi(double __a
, int __b
) { return __nv_powi(__a
, __b
); }
233 __DEVICE__
float powif(float __a
, int __b
) { return __nv_powif(__a
, __b
); }
234 __DEVICE__
double rcbrt(double __a
) { return __nv_rcbrt(__a
); }
235 __DEVICE__
float rcbrtf(float __a
) { return __nv_rcbrtf(__a
); }
236 __DEVICE__
double remainder(double __a
, double __b
) {
237 return __nv_remainder(__a
, __b
);
239 __DEVICE__
float remainderf(float __a
, float __b
) {
240 return __nv_remainderf(__a
, __b
);
242 __DEVICE__
double remquo(double __a
, double __b
, int *__c
) {
243 return __nv_remquo(__a
, __b
, __c
);
245 __DEVICE__
float remquof(float __a
, float __b
, int *__c
) {
246 return __nv_remquof(__a
, __b
, __c
);
248 __DEVICE__
double rhypot(double __a
, double __b
) {
249 return __nv_rhypot(__a
, __b
);
251 __DEVICE__
float rhypotf(float __a
, float __b
) {
252 return __nv_rhypotf(__a
, __b
);
254 // __nv_rint* in libdevice is buggy and produces incorrect results.
255 __DEVICE__
double rint(double __a
) { return __builtin_rint(__a
); }
256 __DEVICE__
float rintf(float __a
) { return __builtin_rintf(__a
); }
257 __DEVICE__
double rnorm(int __a
, const double *__b
) {
258 return __nv_rnorm(__a
, __b
);
260 __DEVICE__
double rnorm3d(double __a
, double __b
, double __c
) {
261 return __nv_rnorm3d(__a
, __b
, __c
);
263 __DEVICE__
float rnorm3df(float __a
, float __b
, float __c
) {
264 return __nv_rnorm3df(__a
, __b
, __c
);
266 __DEVICE__
double rnorm4d(double __a
, double __b
, double __c
, double __d
) {
267 return __nv_rnorm4d(__a
, __b
, __c
, __d
);
269 __DEVICE__
float rnorm4df(float __a
, float __b
, float __c
, float __d
) {
270 return __nv_rnorm4df(__a
, __b
, __c
, __d
);
272 __DEVICE__
float rnormf(int __dim
, const float *__t
) {
273 return __nv_rnormf(__dim
, __t
);
275 __DEVICE__
double rsqrt(double __a
) { return __nv_rsqrt(__a
); }
276 __DEVICE__
float rsqrtf(float __a
) { return __nv_rsqrtf(__a
); }
277 __DEVICE__
double scalbn(double __a
, int __b
) { return __nv_scalbn(__a
, __b
); }
278 __DEVICE__
float scalbnf(float __a
, int __b
) { return __nv_scalbnf(__a
, __b
); }
279 __DEVICE__
double scalbln(double __a
, long __b
) {
281 return __a
> 0 ? HUGE_VAL
: -HUGE_VAL
;
283 return __a
> 0 ? 0.0 : -0.0;
284 return scalbn(__a
, (int)__b
);
286 __DEVICE__
float scalblnf(float __a
, long __b
) {
288 return __a
> 0 ? HUGE_VALF
: -HUGE_VALF
;
290 return __a
> 0 ? 0.f
: -0.f
;
291 return scalbnf(__a
, (int)__b
);
293 __DEVICE__
double sin(double __a
) { return __nv_sin(__a
); }
294 __DEVICE_VOID__
void sincos(double __a
, double *__s
, double *__c
) {
295 return __nv_sincos(__a
, __s
, __c
);
297 __DEVICE_VOID__
void sincosf(float __a
, float *__s
, float *__c
) {
298 return __FAST_OR_SLOW(__nv_fast_sincosf
, __nv_sincosf
)(__a
, __s
, __c
);
300 __DEVICE_VOID__
void sincospi(double __a
, double *__s
, double *__c
) {
301 return __nv_sincospi(__a
, __s
, __c
);
303 __DEVICE_VOID__
void sincospif(float __a
, float *__s
, float *__c
) {
304 return __nv_sincospif(__a
, __s
, __c
);
306 __DEVICE__
float sinf(float __a
) {
307 return __FAST_OR_SLOW(__nv_fast_sinf
, __nv_sinf
)(__a
);
309 __DEVICE__
double sinh(double __a
) { return __nv_sinh(__a
); }
310 __DEVICE__
float sinhf(float __a
) { return __nv_sinhf(__a
); }
311 __DEVICE__
double sinpi(double __a
) { return __nv_sinpi(__a
); }
312 __DEVICE__
float sinpif(float __a
) { return __nv_sinpif(__a
); }
313 __DEVICE__
double sqrt(double __a
) { return __nv_sqrt(__a
); }
314 __DEVICE__
float sqrtf(float __a
) { return __nv_sqrtf(__a
); }
315 __DEVICE__
double tan(double __a
) { return __nv_tan(__a
); }
316 __DEVICE__
float tanf(float __a
) { return __nv_tanf(__a
); }
317 __DEVICE__
double tanh(double __a
) { return __nv_tanh(__a
); }
318 __DEVICE__
float tanhf(float __a
) { return __nv_tanhf(__a
); }
319 __DEVICE__
double tgamma(double __a
) { return __nv_tgamma(__a
); }
320 __DEVICE__
float tgammaf(float __a
) { return __nv_tgammaf(__a
); }
321 __DEVICE__
double trunc(double __a
) { return __nv_trunc(__a
); }
322 __DEVICE__
float truncf(float __a
) { return __nv_truncf(__a
); }
323 __DEVICE__
unsigned long long ullmax(unsigned long long __a
,
324 unsigned long long __b
) {
325 return __nv_ullmax(__a
, __b
);
327 __DEVICE__
unsigned long long ullmin(unsigned long long __a
,
328 unsigned long long __b
) {
329 return __nv_ullmin(__a
, __b
);
331 __DEVICE__
unsigned int umax(unsigned int __a
, unsigned int __b
) {
332 return __nv_umax(__a
, __b
);
334 __DEVICE__
unsigned int umin(unsigned int __a
, unsigned int __b
) {
335 return __nv_umin(__a
, __b
);
337 __DEVICE__
double y0(double __a
) { return __nv_y0(__a
); }
338 __DEVICE__
float y0f(float __a
) { return __nv_y0f(__a
); }
339 __DEVICE__
double y1(double __a
) { return __nv_y1(__a
); }
340 __DEVICE__
float y1f(float __a
) { return __nv_y1f(__a
); }
341 __DEVICE__
double yn(int __a
, double __b
) { return __nv_yn(__a
, __b
); }
342 __DEVICE__
float ynf(int __a
, float __b
) { return __nv_ynf(__a
, __b
); }
344 #pragma pop_macro("__DEVICE__")
345 #pragma pop_macro("__DEVICE_VOID__")
346 #pragma pop_macro("__FAST_OR_SLOW")
348 #endif // __CLANG_CUDA_MATH_H__