1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_CUDA_CMATH_H__
10 #define __CLANG_CUDA_CMATH_H__
12 #error "This file is for CUDA compilation only."
15 #ifndef __OPENMP_NVPTX__
19 // CUDA lets us use various std math functions on the device side. This file
20 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
22 // Specifically, the forward-declares header declares __device__ overloads for
23 // these functions in the global namespace, then pulls them into namespace std
24 // with 'using' statements. Then this file implements those functions, after
25 // their implementations have been pulled in.
27 // It's important that we declare the functions in the global namespace and pull
28 // them into namespace std with using statements, as opposed to simply declaring
29 // these functions in namespace std, because our device functions need to
30 // overload the standard library functions, which may be declared in the global
31 // namespace or in std, depending on the degree of conformance of the stdlib
32 // implementation. Declaring in the global namespace and pulling into namespace
33 // std covers all of the known knowns.
35 #ifdef __OPENMP_NVPTX__
36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
41 __DEVICE__
long long abs(long long __n
) { return ::llabs(__n
); }
42 __DEVICE__
long abs(long __n
) { return ::labs(__n
); }
43 __DEVICE__
float abs(float __x
) { return ::fabsf(__x
); }
44 __DEVICE__
double abs(double __x
) { return ::fabs(__x
); }
45 __DEVICE__
float acos(float __x
) { return ::acosf(__x
); }
46 __DEVICE__
float asin(float __x
) { return ::asinf(__x
); }
47 __DEVICE__
float atan(float __x
) { return ::atanf(__x
); }
48 __DEVICE__
float atan2(float __x
, float __y
) { return ::atan2f(__x
, __y
); }
49 __DEVICE__
float ceil(float __x
) { return ::ceilf(__x
); }
50 __DEVICE__
float cos(float __x
) { return ::cosf(__x
); }
51 __DEVICE__
float cosh(float __x
) { return ::coshf(__x
); }
52 __DEVICE__
float exp(float __x
) { return ::expf(__x
); }
53 __DEVICE__
float fabs(float __x
) { return ::fabsf(__x
); }
54 __DEVICE__
float floor(float __x
) { return ::floorf(__x
); }
55 __DEVICE__
float fmod(float __x
, float __y
) { return ::fmodf(__x
, __y
); }
56 __DEVICE__
int fpclassify(float __x
) {
57 return __builtin_fpclassify(FP_NAN
, FP_INFINITE
, FP_NORMAL
, FP_SUBNORMAL
,
60 __DEVICE__
int fpclassify(double __x
) {
61 return __builtin_fpclassify(FP_NAN
, FP_INFINITE
, FP_NORMAL
, FP_SUBNORMAL
,
64 __DEVICE__
float frexp(float __arg
, int *__exp
) {
65 return ::frexpf(__arg
, __exp
);
68 // For inscrutable reasons, the CUDA headers define these functions for us on
70 #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
72 // For OpenMP we work around some old system headers that have non-conforming
73 // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
74 // this by providing two versions of these functions, differing only in the
75 // return type. To avoid conflicting definitions we disable implicit base
76 // function generation. That means we will end up with two specializations, one
77 // per type, but only one has a base function defined by the system header.
78 #if defined(__OPENMP_NVPTX__)
79 #pragma omp begin declare variant match( \
80 implementation = {extension(disable_implicit_base)})
82 // FIXME: We lack an extension to customize the mangling of the variants, e.g.,
83 // add a suffix. This means we would clash with the names of the variants
84 // (note that we do not create implicit base functions here). To avoid
85 // this clash we add a new trait to some of them that is always true
86 // (this is LLVM after all ;)). It will only influence the mangled name
87 // of the variants inside the inner region and avoid the clash.
88 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
90 __DEVICE__
int isinf(float __x
) { return ::__isinff(__x
); }
91 __DEVICE__
int isinf(double __x
) { return ::__isinf(__x
); }
92 __DEVICE__
int isfinite(float __x
) { return ::__finitef(__x
); }
93 __DEVICE__
int isfinite(double __x
) { return ::__isfinited(__x
); }
94 __DEVICE__
int isnan(float __x
) { return ::__isnanf(__x
); }
95 __DEVICE__
int isnan(double __x
) { return ::__isnan(__x
); }
97 #pragma omp end declare variant
101 __DEVICE__
bool isinf(float __x
) { return ::__isinff(__x
); }
102 __DEVICE__
bool isinf(double __x
) { return ::__isinf(__x
); }
103 __DEVICE__
bool isfinite(float __x
) { return ::__finitef(__x
); }
104 // For inscrutable reasons, __finite(), the double-precision version of
105 // __finitef, does not exist when compiling for MacOS. __isfinited is available
106 // everywhere and is just as good.
107 __DEVICE__
bool isfinite(double __x
) { return ::__isfinited(__x
); }
108 __DEVICE__
bool isnan(float __x
) { return ::__isnanf(__x
); }
109 __DEVICE__
bool isnan(double __x
) { return ::__isnan(__x
); }
111 #if defined(__OPENMP_NVPTX__)
112 #pragma omp end declare variant
117 __DEVICE__
bool isgreater(float __x
, float __y
) {
118 return __builtin_isgreater(__x
, __y
);
120 __DEVICE__
bool isgreater(double __x
, double __y
) {
121 return __builtin_isgreater(__x
, __y
);
123 __DEVICE__
bool isgreaterequal(float __x
, float __y
) {
124 return __builtin_isgreaterequal(__x
, __y
);
126 __DEVICE__
bool isgreaterequal(double __x
, double __y
) {
127 return __builtin_isgreaterequal(__x
, __y
);
129 __DEVICE__
bool isless(float __x
, float __y
) {
130 return __builtin_isless(__x
, __y
);
132 __DEVICE__
bool isless(double __x
, double __y
) {
133 return __builtin_isless(__x
, __y
);
135 __DEVICE__
bool islessequal(float __x
, float __y
) {
136 return __builtin_islessequal(__x
, __y
);
138 __DEVICE__
bool islessequal(double __x
, double __y
) {
139 return __builtin_islessequal(__x
, __y
);
141 __DEVICE__
bool islessgreater(float __x
, float __y
) {
142 return __builtin_islessgreater(__x
, __y
);
144 __DEVICE__
bool islessgreater(double __x
, double __y
) {
145 return __builtin_islessgreater(__x
, __y
);
147 __DEVICE__
bool isnormal(float __x
) { return __builtin_isnormal(__x
); }
148 __DEVICE__
bool isnormal(double __x
) { return __builtin_isnormal(__x
); }
149 __DEVICE__
bool isunordered(float __x
, float __y
) {
150 return __builtin_isunordered(__x
, __y
);
152 __DEVICE__
bool isunordered(double __x
, double __y
) {
153 return __builtin_isunordered(__x
, __y
);
155 __DEVICE__
float ldexp(float __arg
, int __exp
) {
156 return ::ldexpf(__arg
, __exp
);
158 __DEVICE__
float log(float __x
) { return ::logf(__x
); }
159 __DEVICE__
float log10(float __x
) { return ::log10f(__x
); }
160 __DEVICE__
float modf(float __x
, float *__iptr
) { return ::modff(__x
, __iptr
); }
161 __DEVICE__
float pow(float __base
, float __exp
) {
162 return ::powf(__base
, __exp
);
164 __DEVICE__
float pow(float __base
, int __iexp
) {
165 return ::powif(__base
, __iexp
);
167 __DEVICE__
double pow(double __base
, int __iexp
) {
168 return ::powi(__base
, __iexp
);
170 __DEVICE__
bool signbit(float __x
) { return ::__signbitf(__x
); }
171 __DEVICE__
bool signbit(double __x
) { return ::__signbitd(__x
); }
172 __DEVICE__
float sin(float __x
) { return ::sinf(__x
); }
173 __DEVICE__
float sinh(float __x
) { return ::sinhf(__x
); }
174 __DEVICE__
float sqrt(float __x
) { return ::sqrtf(__x
); }
175 __DEVICE__
float tan(float __x
) { return ::tanf(__x
); }
176 __DEVICE__
float tanh(float __x
) { return ::tanhf(__x
); }
178 // There was a redefinition error for this this overload in CUDA mode.
179 // We restrict it to OpenMP mode for now, that is where it is actually needed
181 #ifdef __OPENMP_NVPTX__
182 __DEVICE__
float remquo(float __n
, float __d
, int *__q
) {
183 return ::remquof(__n
, __d
, __q
);
187 // Notably missing above is nexttoward. We omit it because
188 // libdevice doesn't provide an implementation, and we don't want to be in the
189 // business of implementing tricky libm functions in this header.
191 #ifndef __OPENMP_NVPTX__
193 // Now we've defined everything we promised we'd define in
194 // __clang_cuda_math_forward_declares.h. We need to do two additional things to
195 // fix up our math functions.
197 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define
198 // only sin(float) and sin(double), which means that e.g. sin(0) is
201 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
202 // std. These are defined in the CUDA headers in the global namespace,
203 // independent of everything else we've done here.
205 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But
206 // we go ahead and unconditionally define functions that are only available when
207 // compiling for C++11 to match the behavior of the CUDA headers.
208 template<bool __B
, class __T
= void>
209 struct __clang_cuda_enable_if
{};
211 template <class __T
> struct __clang_cuda_enable_if
<true, __T
> {
215 // Defines an overload of __fn that accepts one integral argument, calls
216 // __fn((double)x), and returns __retty.
217 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \
218 template <typename __T> \
220 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \
223 return ::__fn((double)__x); \
226 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
227 // __fn((double)x, (double)y), and returns a double.
229 // Note this is different from OVERLOAD_1, which generates an overload that
230 // accepts only *integral* arguments.
231 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \
232 template <typename __T1, typename __T2> \
233 __DEVICE__ typename __clang_cuda_enable_if< \
234 std::numeric_limits<__T1>::is_specialized && \
235 std::numeric_limits<__T2>::is_specialized, \
237 __fn(__T1 __x, __T2 __y) { \
238 return __fn((double)__x, (double)__y); \
241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos
)
242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh
)
243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin
)
244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh
)
245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan
)
246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2
);
247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh
)
248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt
)
249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil
)
250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign
);
251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos
)
252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh
)
253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf
)
254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc
)
255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp
)
256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2
)
257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1
)
258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs
)
259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim
);
260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor
)
261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax
);
262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin
);
263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod
);
264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify
)
265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot
);
266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb
)
267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite
)
268 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater
);
269 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal
);
270 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf
);
271 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless
);
272 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal
);
273 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater
);
274 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan
);
275 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal
)
276 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered
);
277 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma
)
278 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log
)
279 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10
)
280 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p
)
281 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2
)
282 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb
)
283 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint
)
284 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround
)
285 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint
)
286 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround
)
287 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint
);
288 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter
);
289 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow
);
290 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder
);
291 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint
);
292 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round
);
293 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit
)
294 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin
)
295 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh
)
296 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt
)
297 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan
)
298 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh
)
299 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma
)
300 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc
);
302 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
303 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
305 // Overloads for functions that don't match the patterns expected by
306 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
307 template <typename __T1
, typename __T2
, typename __T3
>
308 __DEVICE__ typename __clang_cuda_enable_if
<
309 std::numeric_limits
<__T1
>::is_specialized
&&
310 std::numeric_limits
<__T2
>::is_specialized
&&
311 std::numeric_limits
<__T3
>::is_specialized
,
313 fma(__T1 __x
, __T2 __y
, __T3 __z
) {
314 return std::fma((double)__x
, (double)__y
, (double)__z
);
317 template <typename __T
>
318 __DEVICE__ typename __clang_cuda_enable_if
<std::numeric_limits
<__T
>::is_integer
,
320 frexp(__T __x
, int *__exp
) {
321 return std::frexp((double)__x
, __exp
);
324 template <typename __T
>
325 __DEVICE__ typename __clang_cuda_enable_if
<std::numeric_limits
<__T
>::is_integer
,
327 ldexp(__T __x
, int __exp
) {
328 return std::ldexp((double)__x
, __exp
);
331 template <typename __T1
, typename __T2
>
332 __DEVICE__ typename __clang_cuda_enable_if
<
333 std::numeric_limits
<__T1
>::is_specialized
&&
334 std::numeric_limits
<__T2
>::is_specialized
,
336 remquo(__T1 __x
, __T2 __y
, int *__quo
) {
337 return std::remquo((double)__x
, (double)__y
, __quo
);
340 template <typename __T
>
341 __DEVICE__ typename __clang_cuda_enable_if
<std::numeric_limits
<__T
>::is_integer
,
343 scalbln(__T __x
, long __exp
) {
344 return std::scalbln((double)__x
, __exp
);
347 template <typename __T
>
348 __DEVICE__ typename __clang_cuda_enable_if
<std::numeric_limits
<__T
>::is_integer
,
350 scalbn(__T __x
, int __exp
) {
351 return std::scalbn((double)__x
, __exp
);
354 // We need to define these overloads in exactly the namespace our standard
355 // library uses (including the right inline namespace), otherwise they won't be
356 // picked up by other functions in the standard library (e.g. functions in
357 // <complex>). Thus the ugliness below.
358 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
359 _LIBCPP_BEGIN_NAMESPACE_STD
362 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
363 _GLIBCXX_BEGIN_NAMESPACE_VERSION
367 // Pull the new overloads we defined above into namespace std.
398 using ::isgreaterequal
;
401 using ::islessgreater
;
433 // Well this is fun: We need to pull these symbols in for libc++, but we can't
434 // pull them in with libstdc++, because its ::isinf and ::isnan are different
435 // than its std::isinf and std::isnan.
441 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
499 #ifdef _LIBCPP_END_NAMESPACE_STD
500 _LIBCPP_END_NAMESPACE_STD
502 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
503 _GLIBCXX_END_NAMESPACE_VERSION
508 #endif // __OPENMP_NVPTX__