1 /*===---- __clang_cuda_math.h - Device-side CUDA math support --------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_CUDA_MATH_H__
10 #define __CLANG_CUDA_MATH_H__
12 #error "This file is for CUDA compilation only."
15 // The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
16 // libcalls reach the link step instead of being eagerly replaced.
17 #ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
19 #ifndef __OPENMP_NVPTX__
20 #if CUDA_VERSION < 9000
21 #error This file is intended to be used with CUDA-9+ only.
25 // __DEVICE__ is a helper macro with common set of attributes for the wrappers
26 // we implement in this file. We need static in order to avoid emitting unused
27 // functions and __forceinline__ helps inlining these wrappers at -O1.
28 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_NVPTX__
30 #if defined(__cplusplus)
31 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
33 #define __DEVICE__ static __attribute__((always_inline, nothrow))
36 #define __DEVICE__ static __device__ __forceinline__
39 // Specialized version of __DEVICE__ for functions with void return type. Needed
40 // because the OpenMP overlay requires constexpr functions here but prior to
41 // c++14 void return functions could not be constexpr.
42 #pragma push_macro("__DEVICE_VOID__")
43 #if defined(__OPENMP_NVPTX__) && defined(__cplusplus) && __cplusplus < 201402L
44 #define __DEVICE_VOID__ static __attribute__((always_inline, nothrow))
46 #define __DEVICE_VOID__ __DEVICE__
49 // libdevice provides fast low precision and slow full-recision implementations
50 // for some functions. Which one gets selected depends on
51 // __CLANG_CUDA_APPROX_TRANSCENDENTALS__ which gets defined by clang if
52 // -ffast-math or -fgpu-approx-transcendentals are in effect.
53 #pragma push_macro("__FAST_OR_SLOW")
54 #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
55 #define __FAST_OR_SLOW(fast, slow) fast
57 #define __FAST_OR_SLOW(fast, slow) slow
60 __DEVICE__
int abs(int __a
) { return __nv_abs(__a
); }
61 __DEVICE__
double fabs(double __a
) { return __nv_fabs(__a
); }
62 __DEVICE__
double acos(double __a
) { return __nv_acos(__a
); }
63 __DEVICE__
float acosf(float __a
) { return __nv_acosf(__a
); }
64 __DEVICE__
double acosh(double __a
) { return __nv_acosh(__a
); }
65 __DEVICE__
float acoshf(float __a
) { return __nv_acoshf(__a
); }
66 __DEVICE__
double asin(double __a
) { return __nv_asin(__a
); }
67 __DEVICE__
float asinf(float __a
) { return __nv_asinf(__a
); }
68 __DEVICE__
double asinh(double __a
) { return __nv_asinh(__a
); }
69 __DEVICE__
float asinhf(float __a
) { return __nv_asinhf(__a
); }
70 __DEVICE__
double atan(double __a
) { return __nv_atan(__a
); }
71 __DEVICE__
double atan2(double __a
, double __b
) { return __nv_atan2(__a
, __b
); }
72 __DEVICE__
float atan2f(float __a
, float __b
) { return __nv_atan2f(__a
, __b
); }
73 __DEVICE__
float atanf(float __a
) { return __nv_atanf(__a
); }
74 __DEVICE__
double atanh(double __a
) { return __nv_atanh(__a
); }
75 __DEVICE__
float atanhf(float __a
) { return __nv_atanhf(__a
); }
76 __DEVICE__
double cbrt(double __a
) { return __nv_cbrt(__a
); }
77 __DEVICE__
float cbrtf(float __a
) { return __nv_cbrtf(__a
); }
78 __DEVICE__
double ceil(double __a
) { return __nv_ceil(__a
); }
79 __DEVICE__
float ceilf(float __a
) { return __nv_ceilf(__a
); }
80 __DEVICE__
double copysign(double __a
, double __b
) {
81 return __nv_copysign(__a
, __b
);
83 __DEVICE__
float copysignf(float __a
, float __b
) {
84 return __nv_copysignf(__a
, __b
);
86 __DEVICE__
double cos(double __a
) { return __nv_cos(__a
); }
87 __DEVICE__
float cosf(float __a
) {
88 return __FAST_OR_SLOW(__nv_fast_cosf
, __nv_cosf
)(__a
);
90 __DEVICE__
double cosh(double __a
) { return __nv_cosh(__a
); }
91 __DEVICE__
float coshf(float __a
) { return __nv_coshf(__a
); }
92 __DEVICE__
double cospi(double __a
) { return __nv_cospi(__a
); }
93 __DEVICE__
float cospif(float __a
) { return __nv_cospif(__a
); }
94 __DEVICE__
double cyl_bessel_i0(double __a
) { return __nv_cyl_bessel_i0(__a
); }
95 __DEVICE__
float cyl_bessel_i0f(float __a
) { return __nv_cyl_bessel_i0f(__a
); }
96 __DEVICE__
double cyl_bessel_i1(double __a
) { return __nv_cyl_bessel_i1(__a
); }
97 __DEVICE__
float cyl_bessel_i1f(float __a
) { return __nv_cyl_bessel_i1f(__a
); }
98 __DEVICE__
double erf(double __a
) { return __nv_erf(__a
); }
99 __DEVICE__
double erfc(double __a
) { return __nv_erfc(__a
); }
100 __DEVICE__
float erfcf(float __a
) { return __nv_erfcf(__a
); }
101 __DEVICE__
double erfcinv(double __a
) { return __nv_erfcinv(__a
); }
102 __DEVICE__
float erfcinvf(float __a
) { return __nv_erfcinvf(__a
); }
103 __DEVICE__
double erfcx(double __a
) { return __nv_erfcx(__a
); }
104 __DEVICE__
float erfcxf(float __a
) { return __nv_erfcxf(__a
); }
105 __DEVICE__
float erff(float __a
) { return __nv_erff(__a
); }
106 __DEVICE__
double erfinv(double __a
) { return __nv_erfinv(__a
); }
107 __DEVICE__
float erfinvf(float __a
) { return __nv_erfinvf(__a
); }
108 __DEVICE__
double exp(double __a
) { return __nv_exp(__a
); }
109 __DEVICE__
double exp10(double __a
) { return __nv_exp10(__a
); }
110 __DEVICE__
float exp10f(float __a
) { return __nv_exp10f(__a
); }
111 __DEVICE__
double exp2(double __a
) { return __nv_exp2(__a
); }
112 __DEVICE__
float exp2f(float __a
) { return __nv_exp2f(__a
); }
113 __DEVICE__
float expf(float __a
) { return __nv_expf(__a
); }
114 __DEVICE__
double expm1(double __a
) { return __nv_expm1(__a
); }
115 __DEVICE__
float expm1f(float __a
) { return __nv_expm1f(__a
); }
116 __DEVICE__
float fabsf(float __a
) { return __nv_fabsf(__a
); }
117 __DEVICE__
double fdim(double __a
, double __b
) { return __nv_fdim(__a
, __b
); }
118 __DEVICE__
float fdimf(float __a
, float __b
) { return __nv_fdimf(__a
, __b
); }
119 __DEVICE__
double fdivide(double __a
, double __b
) { return __a
/ __b
; }
120 __DEVICE__
float fdividef(float __a
, float __b
) {
121 #if __FAST_MATH__ && !__CUDA_PREC_DIV
122 return __nv_fast_fdividef(__a
, __b
);
127 __DEVICE__
double floor(double __f
) { return __nv_floor(__f
); }
128 __DEVICE__
float floorf(float __f
) { return __nv_floorf(__f
); }
129 __DEVICE__
double fma(double __a
, double __b
, double __c
) {
130 return __nv_fma(__a
, __b
, __c
);
132 __DEVICE__
float fmaf(float __a
, float __b
, float __c
) {
133 return __nv_fmaf(__a
, __b
, __c
);
135 __DEVICE__
double fmax(double __a
, double __b
) { return __nv_fmax(__a
, __b
); }
136 __DEVICE__
float fmaxf(float __a
, float __b
) { return __nv_fmaxf(__a
, __b
); }
137 __DEVICE__
double fmin(double __a
, double __b
) { return __nv_fmin(__a
, __b
); }
138 __DEVICE__
float fminf(float __a
, float __b
) { return __nv_fminf(__a
, __b
); }
139 __DEVICE__
double fmod(double __a
, double __b
) { return __nv_fmod(__a
, __b
); }
140 __DEVICE__
float fmodf(float __a
, float __b
) { return __nv_fmodf(__a
, __b
); }
141 __DEVICE__
double frexp(double __a
, int *__b
) { return __nv_frexp(__a
, __b
); }
142 __DEVICE__
float frexpf(float __a
, int *__b
) { return __nv_frexpf(__a
, __b
); }
143 __DEVICE__
double hypot(double __a
, double __b
) { return __nv_hypot(__a
, __b
); }
144 __DEVICE__
float hypotf(float __a
, float __b
) { return __nv_hypotf(__a
, __b
); }
145 __DEVICE__
int ilogb(double __a
) { return __nv_ilogb(__a
); }
146 __DEVICE__
int ilogbf(float __a
) { return __nv_ilogbf(__a
); }
147 __DEVICE__
double j0(double __a
) { return __nv_j0(__a
); }
148 __DEVICE__
float j0f(float __a
) { return __nv_j0f(__a
); }
149 __DEVICE__
double j1(double __a
) { return __nv_j1(__a
); }
150 __DEVICE__
float j1f(float __a
) { return __nv_j1f(__a
); }
151 __DEVICE__
double jn(int __n
, double __a
) { return __nv_jn(__n
, __a
); }
152 __DEVICE__
float jnf(int __n
, float __a
) { return __nv_jnf(__n
, __a
); }
153 #if defined(__LP64__) || defined(_WIN64)
154 __DEVICE__
long labs(long __a
) { return __nv_llabs(__a
); };
156 __DEVICE__
long labs(long __a
) { return __nv_abs(__a
); };
158 __DEVICE__
double ldexp(double __a
, int __b
) { return __nv_ldexp(__a
, __b
); }
159 __DEVICE__
float ldexpf(float __a
, int __b
) { return __nv_ldexpf(__a
, __b
); }
160 __DEVICE__
double lgamma(double __a
) { return __nv_lgamma(__a
); }
161 __DEVICE__
float lgammaf(float __a
) { return __nv_lgammaf(__a
); }
162 __DEVICE__
long long llabs(long long __a
) { return __nv_llabs(__a
); }
163 __DEVICE__
long long llmax(long long __a
, long long __b
) {
164 return __nv_llmax(__a
, __b
);
166 __DEVICE__
long long llmin(long long __a
, long long __b
) {
167 return __nv_llmin(__a
, __b
);
169 __DEVICE__
long long llrint(double __a
) { return __nv_llrint(__a
); }
170 __DEVICE__
long long llrintf(float __a
) { return __nv_llrintf(__a
); }
171 __DEVICE__
long long llround(double __a
) { return __nv_llround(__a
); }
172 __DEVICE__
long long llroundf(float __a
) { return __nv_llroundf(__a
); }
173 __DEVICE__
double round(double __a
) { return __nv_round(__a
); }
174 __DEVICE__
float roundf(float __a
) { return __nv_roundf(__a
); }
175 __DEVICE__
double log(double __a
) { return __nv_log(__a
); }
176 __DEVICE__
double log10(double __a
) { return __nv_log10(__a
); }
177 __DEVICE__
float log10f(float __a
) { return __nv_log10f(__a
); }
178 __DEVICE__
double log1p(double __a
) { return __nv_log1p(__a
); }
179 __DEVICE__
float log1pf(float __a
) { return __nv_log1pf(__a
); }
180 __DEVICE__
double log2(double __a
) { return __nv_log2(__a
); }
181 __DEVICE__
float log2f(float __a
) {
182 return __FAST_OR_SLOW(__nv_fast_log2f
, __nv_log2f
)(__a
);
184 __DEVICE__
double logb(double __a
) { return __nv_logb(__a
); }
185 __DEVICE__
float logbf(float __a
) { return __nv_logbf(__a
); }
186 __DEVICE__
float logf(float __a
) {
187 return __FAST_OR_SLOW(__nv_fast_logf
, __nv_logf
)(__a
);
189 #if defined(__LP64__) || defined(_WIN64)
190 __DEVICE__
long lrint(double __a
) { return llrint(__a
); }
191 __DEVICE__
long lrintf(float __a
) { return __float2ll_rn(__a
); }
192 __DEVICE__
long lround(double __a
) { return llround(__a
); }
193 __DEVICE__
long lroundf(float __a
) { return llroundf(__a
); }
195 __DEVICE__
long lrint(double __a
) { return (long)rint(__a
); }
196 __DEVICE__
long lrintf(float __a
) { return __float2int_rn(__a
); }
197 __DEVICE__
long lround(double __a
) { return round(__a
); }
198 __DEVICE__
long lroundf(float __a
) { return roundf(__a
); }
200 __DEVICE__
int max(int __a
, int __b
) { return __nv_max(__a
, __b
); }
201 __DEVICE__
int min(int __a
, int __b
) { return __nv_min(__a
, __b
); }
202 __DEVICE__
double modf(double __a
, double *__b
) { return __nv_modf(__a
, __b
); }
203 __DEVICE__
float modff(float __a
, float *__b
) { return __nv_modff(__a
, __b
); }
204 __DEVICE__
double nearbyint(double __a
) { return __builtin_nearbyint(__a
); }
205 __DEVICE__
float nearbyintf(float __a
) { return __builtin_nearbyintf(__a
); }
206 __DEVICE__
double nextafter(double __a
, double __b
) {
207 return __nv_nextafter(__a
, __b
);
209 __DEVICE__
float nextafterf(float __a
, float __b
) {
210 return __nv_nextafterf(__a
, __b
);
212 __DEVICE__
double norm(int __dim
, const double *__t
) {
213 return __nv_norm(__dim
, __t
);
215 __DEVICE__
double norm3d(double __a
, double __b
, double __c
) {
216 return __nv_norm3d(__a
, __b
, __c
);
218 __DEVICE__
float norm3df(float __a
, float __b
, float __c
) {
219 return __nv_norm3df(__a
, __b
, __c
);
221 __DEVICE__
double norm4d(double __a
, double __b
, double __c
, double __d
) {
222 return __nv_norm4d(__a
, __b
, __c
, __d
);
224 __DEVICE__
float norm4df(float __a
, float __b
, float __c
, float __d
) {
225 return __nv_norm4df(__a
, __b
, __c
, __d
);
227 __DEVICE__
double normcdf(double __a
) { return __nv_normcdf(__a
); }
228 __DEVICE__
float normcdff(float __a
) { return __nv_normcdff(__a
); }
229 __DEVICE__
double normcdfinv(double __a
) { return __nv_normcdfinv(__a
); }
230 __DEVICE__
float normcdfinvf(float __a
) { return __nv_normcdfinvf(__a
); }
231 __DEVICE__
float normf(int __dim
, const float *__t
) {
232 return __nv_normf(__dim
, __t
);
234 __DEVICE__
double pow(double __a
, double __b
) { return __nv_pow(__a
, __b
); }
235 __DEVICE__
float powf(float __a
, float __b
) { return __nv_powf(__a
, __b
); }
236 __DEVICE__
double powi(double __a
, int __b
) { return __nv_powi(__a
, __b
); }
237 __DEVICE__
float powif(float __a
, int __b
) { return __nv_powif(__a
, __b
); }
238 __DEVICE__
double rcbrt(double __a
) { return __nv_rcbrt(__a
); }
239 __DEVICE__
float rcbrtf(float __a
) { return __nv_rcbrtf(__a
); }
240 __DEVICE__
double remainder(double __a
, double __b
) {
241 return __nv_remainder(__a
, __b
);
243 __DEVICE__
float remainderf(float __a
, float __b
) {
244 return __nv_remainderf(__a
, __b
);
246 __DEVICE__
double remquo(double __a
, double __b
, int *__c
) {
247 return __nv_remquo(__a
, __b
, __c
);
249 __DEVICE__
float remquof(float __a
, float __b
, int *__c
) {
250 return __nv_remquof(__a
, __b
, __c
);
252 __DEVICE__
double rhypot(double __a
, double __b
) {
253 return __nv_rhypot(__a
, __b
);
255 __DEVICE__
float rhypotf(float __a
, float __b
) {
256 return __nv_rhypotf(__a
, __b
);
258 // __nv_rint* in libdevice is buggy and produces incorrect results.
259 __DEVICE__
double rint(double __a
) { return __builtin_rint(__a
); }
260 __DEVICE__
float rintf(float __a
) { return __builtin_rintf(__a
); }
261 __DEVICE__
double rnorm(int __a
, const double *__b
) {
262 return __nv_rnorm(__a
, __b
);
264 __DEVICE__
double rnorm3d(double __a
, double __b
, double __c
) {
265 return __nv_rnorm3d(__a
, __b
, __c
);
267 __DEVICE__
float rnorm3df(float __a
, float __b
, float __c
) {
268 return __nv_rnorm3df(__a
, __b
, __c
);
270 __DEVICE__
double rnorm4d(double __a
, double __b
, double __c
, double __d
) {
271 return __nv_rnorm4d(__a
, __b
, __c
, __d
);
273 __DEVICE__
float rnorm4df(float __a
, float __b
, float __c
, float __d
) {
274 return __nv_rnorm4df(__a
, __b
, __c
, __d
);
276 __DEVICE__
float rnormf(int __dim
, const float *__t
) {
277 return __nv_rnormf(__dim
, __t
);
279 __DEVICE__
double rsqrt(double __a
) { return __nv_rsqrt(__a
); }
280 __DEVICE__
float rsqrtf(float __a
) { return __nv_rsqrtf(__a
); }
281 __DEVICE__
double scalbn(double __a
, int __b
) { return __nv_scalbn(__a
, __b
); }
282 __DEVICE__
float scalbnf(float __a
, int __b
) { return __nv_scalbnf(__a
, __b
); }
283 __DEVICE__
double scalbln(double __a
, long __b
) {
285 return __a
> 0 ? HUGE_VAL
: -HUGE_VAL
;
287 return __a
> 0 ? 0.0 : -0.0;
288 return scalbn(__a
, (int)__b
);
290 __DEVICE__
float scalblnf(float __a
, long __b
) {
292 return __a
> 0 ? HUGE_VALF
: -HUGE_VALF
;
294 return __a
> 0 ? 0.f
: -0.f
;
295 return scalbnf(__a
, (int)__b
);
297 __DEVICE__
double sin(double __a
) { return __nv_sin(__a
); }
298 __DEVICE_VOID__
void sincos(double __a
, double *__s
, double *__c
) {
299 return __nv_sincos(__a
, __s
, __c
);
301 __DEVICE_VOID__
void sincosf(float __a
, float *__s
, float *__c
) {
302 return __FAST_OR_SLOW(__nv_fast_sincosf
, __nv_sincosf
)(__a
, __s
, __c
);
304 __DEVICE_VOID__
void sincospi(double __a
, double *__s
, double *__c
) {
305 return __nv_sincospi(__a
, __s
, __c
);
307 __DEVICE_VOID__
void sincospif(float __a
, float *__s
, float *__c
) {
308 return __nv_sincospif(__a
, __s
, __c
);
310 __DEVICE__
float sinf(float __a
) {
311 return __FAST_OR_SLOW(__nv_fast_sinf
, __nv_sinf
)(__a
);
313 __DEVICE__
double sinh(double __a
) { return __nv_sinh(__a
); }
314 __DEVICE__
float sinhf(float __a
) { return __nv_sinhf(__a
); }
315 __DEVICE__
double sinpi(double __a
) { return __nv_sinpi(__a
); }
316 __DEVICE__
float sinpif(float __a
) { return __nv_sinpif(__a
); }
317 __DEVICE__
double sqrt(double __a
) { return __nv_sqrt(__a
); }
318 __DEVICE__
float sqrtf(float __a
) { return __nv_sqrtf(__a
); }
319 __DEVICE__
double tan(double __a
) { return __nv_tan(__a
); }
320 __DEVICE__
float tanf(float __a
) { return __nv_tanf(__a
); }
321 __DEVICE__
double tanh(double __a
) { return __nv_tanh(__a
); }
322 __DEVICE__
float tanhf(float __a
) { return __nv_tanhf(__a
); }
323 __DEVICE__
double tgamma(double __a
) { return __nv_tgamma(__a
); }
324 __DEVICE__
float tgammaf(float __a
) { return __nv_tgammaf(__a
); }
325 __DEVICE__
double trunc(double __a
) { return __nv_trunc(__a
); }
326 __DEVICE__
float truncf(float __a
) { return __nv_truncf(__a
); }
327 __DEVICE__
unsigned long long ullmax(unsigned long long __a
,
328 unsigned long long __b
) {
329 return __nv_ullmax(__a
, __b
);
331 __DEVICE__
unsigned long long ullmin(unsigned long long __a
,
332 unsigned long long __b
) {
333 return __nv_ullmin(__a
, __b
);
335 __DEVICE__
unsigned int umax(unsigned int __a
, unsigned int __b
) {
336 return __nv_umax(__a
, __b
);
338 __DEVICE__
unsigned int umin(unsigned int __a
, unsigned int __b
) {
339 return __nv_umin(__a
, __b
);
341 __DEVICE__
double y0(double __a
) { return __nv_y0(__a
); }
342 __DEVICE__
float y0f(float __a
) { return __nv_y0f(__a
); }
343 __DEVICE__
double y1(double __a
) { return __nv_y1(__a
); }
344 __DEVICE__
float y1f(float __a
) { return __nv_y1f(__a
); }
345 __DEVICE__
double yn(int __a
, double __b
) { return __nv_yn(__a
, __b
); }
346 __DEVICE__
float ynf(int __a
, float __b
) { return __nv_ynf(__a
, __b
); }
348 #pragma pop_macro("__DEVICE__")
349 #pragma pop_macro("__DEVICE_VOID__")
350 #pragma pop_macro("__FAST_OR_SLOW")
352 #endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
353 #endif // __CLANG_CUDA_MATH_H__