[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / Headers / __clang_hip_math.h
blobc19e32bd293645440b00110b97e885179ec6535a
1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
9 #ifndef __CLANG_HIP_MATH_H__
10 #define __CLANG_HIP_MATH_H__
12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
14 #endif
16 #if !defined(__HIPCC_RTC__)
17 #if defined(__cplusplus)
18 #include <algorithm>
19 #endif
20 #include <limits.h>
21 #include <stdint.h>
22 #ifdef __OPENMP_AMDGCN__
23 #include <omp.h>
24 #endif
25 #endif // !defined(__HIPCC_RTC__)
27 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_AMDGCN__
30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
31 #else
32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
33 #endif
35 // A few functions return bool type starting only in C++11.
36 #pragma push_macro("__RETURN_TYPE")
37 #ifdef __OPENMP_AMDGCN__
38 #define __RETURN_TYPE int
39 #else
40 #if defined(__cplusplus)
41 #define __RETURN_TYPE bool
42 #else
43 #define __RETURN_TYPE int
44 #endif
45 #endif // __OPENMP_AMDGCN__
47 #if defined (__cplusplus) && __cplusplus < 201103L
48 // emulate static_assert on type sizes
49 template<bool>
50 struct __compare_result{};
51 template<>
52 struct __compare_result<true> {
53 static const __device__ bool valid;
56 __DEVICE__
57 void __suppress_unused_warning(bool b){};
58 template <unsigned int S, unsigned int T>
59 __DEVICE__ void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result<S == T>::valid);
63 #define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
66 #else
67 #define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
70 #endif
72 __DEVICE__
73 uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
74 uint64_t __r = 0;
75 while (*__tagp != '\0') {
76 char __tmp = *__tagp;
78 if (__tmp >= '0' && __tmp <= '7')
79 __r = (__r * 8u) + __tmp - '0';
80 else
81 return 0;
83 ++__tagp;
86 return __r;
89 __DEVICE__
90 uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
91 uint64_t __r = 0;
92 while (*__tagp != '\0') {
93 char __tmp = *__tagp;
95 if (__tmp >= '0' && __tmp <= '9')
96 __r = (__r * 10u) + __tmp - '0';
97 else
98 return 0;
100 ++__tagp;
103 return __r;
106 __DEVICE__
107 uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
108 uint64_t __r = 0;
109 while (*__tagp != '\0') {
110 char __tmp = *__tagp;
112 if (__tmp >= '0' && __tmp <= '9')
113 __r = (__r * 16u) + __tmp - '0';
114 else if (__tmp >= 'a' && __tmp <= 'f')
115 __r = (__r * 16u) + __tmp - 'a' + 10;
116 else if (__tmp >= 'A' && __tmp <= 'F')
117 __r = (__r * 16u) + __tmp - 'A' + 10;
118 else
119 return 0;
121 ++__tagp;
124 return __r;
127 __DEVICE__
128 uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
129 if (*__tagp == '0') {
130 ++__tagp;
132 if (*__tagp == 'x' || *__tagp == 'X')
133 return __make_mantissa_base16(__tagp);
134 else
135 return __make_mantissa_base8(__tagp);
138 return __make_mantissa_base10(__tagp);
141 // BEGIN FLOAT
142 #if defined(__cplusplus)
143 __DEVICE__
144 int abs(int __x) {
145 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
146 return (__x ^ __sgn) - __sgn;
148 __DEVICE__
149 long labs(long __x) {
150 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
151 return (__x ^ __sgn) - __sgn;
153 __DEVICE__
154 long long llabs(long long __x) {
155 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
156 return (__x ^ __sgn) - __sgn;
158 #endif
160 __DEVICE__
161 float acosf(float __x) { return __ocml_acos_f32(__x); }
163 __DEVICE__
164 float acoshf(float __x) { return __ocml_acosh_f32(__x); }
166 __DEVICE__
167 float asinf(float __x) { return __ocml_asin_f32(__x); }
169 __DEVICE__
170 float asinhf(float __x) { return __ocml_asinh_f32(__x); }
172 __DEVICE__
173 float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
175 __DEVICE__
176 float atanf(float __x) { return __ocml_atan_f32(__x); }
178 __DEVICE__
179 float atanhf(float __x) { return __ocml_atanh_f32(__x); }
181 __DEVICE__
182 float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
184 __DEVICE__
185 float ceilf(float __x) { return __ocml_ceil_f32(__x); }
187 __DEVICE__
188 float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
190 __DEVICE__
191 float cosf(float __x) { return __ocml_cos_f32(__x); }
193 __DEVICE__
194 float coshf(float __x) { return __ocml_cosh_f32(__x); }
196 __DEVICE__
197 float cospif(float __x) { return __ocml_cospi_f32(__x); }
199 __DEVICE__
200 float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
202 __DEVICE__
203 float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
205 __DEVICE__
206 float erfcf(float __x) { return __ocml_erfc_f32(__x); }
208 __DEVICE__
209 float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
211 __DEVICE__
212 float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
214 __DEVICE__
215 float erff(float __x) { return __ocml_erf_f32(__x); }
217 __DEVICE__
218 float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
220 __DEVICE__
221 float exp10f(float __x) { return __ocml_exp10_f32(__x); }
223 __DEVICE__
224 float exp2f(float __x) { return __ocml_exp2_f32(__x); }
226 __DEVICE__
227 float expf(float __x) { return __ocml_exp_f32(__x); }
229 __DEVICE__
230 float expm1f(float __x) { return __ocml_expm1_f32(__x); }
232 __DEVICE__
233 float fabsf(float __x) { return __builtin_fabsf(__x); }
235 __DEVICE__
236 float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
238 __DEVICE__
239 float fdividef(float __x, float __y) { return __x / __y; }
241 __DEVICE__
242 float floorf(float __x) { return __ocml_floor_f32(__x); }
244 __DEVICE__
245 float fmaf(float __x, float __y, float __z) {
246 return __builtin_fmaf(__x, __y, __z);
249 __DEVICE__
250 float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
252 __DEVICE__
253 float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); }
255 __DEVICE__
256 float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
258 __DEVICE__
259 float frexpf(float __x, int *__nptr) {
260 *__nptr = __builtin_amdgcn_frexp_expf(__x);
261 return __builtin_amdgcn_frexp_mantf(__x);
264 __DEVICE__
265 float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
267 __DEVICE__
268 int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
270 __DEVICE__
271 __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
273 __DEVICE__
274 __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); }
276 __DEVICE__
277 __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); }
279 __DEVICE__
280 float j0f(float __x) { return __ocml_j0_f32(__x); }
282 __DEVICE__
283 float j1f(float __x) { return __ocml_j1_f32(__x); }
285 __DEVICE__
286 float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
287 // and the Miller & Brown algorithm
288 // for linear recurrences to get O(log n) steps, but it's unclear if
289 // it'd be beneficial in this case.
290 if (__n == 0)
291 return j0f(__x);
292 if (__n == 1)
293 return j1f(__x);
295 float __x0 = j0f(__x);
296 float __x1 = j1f(__x);
297 for (int __i = 1; __i < __n; ++__i) {
298 float __x2 = (2 * __i) / __x * __x1 - __x0;
299 __x0 = __x1;
300 __x1 = __x2;
303 return __x1;
306 __DEVICE__
307 float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
309 __DEVICE__
310 float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
312 __DEVICE__
313 long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
315 __DEVICE__
316 long long int llroundf(float __x) { return __ocml_round_f32(__x); }
318 __DEVICE__
319 float log10f(float __x) { return __ocml_log10_f32(__x); }
321 __DEVICE__
322 float log1pf(float __x) { return __ocml_log1p_f32(__x); }
324 __DEVICE__
325 float log2f(float __x) { return __ocml_log2_f32(__x); }
327 __DEVICE__
328 float logbf(float __x) { return __ocml_logb_f32(__x); }
330 __DEVICE__
331 float logf(float __x) { return __ocml_log_f32(__x); }
333 __DEVICE__
334 long int lrintf(float __x) { return __ocml_rint_f32(__x); }
336 __DEVICE__
337 long int lroundf(float __x) { return __ocml_round_f32(__x); }
339 __DEVICE__
340 float modff(float __x, float *__iptr) {
341 float __tmp;
342 #ifdef __OPENMP_AMDGCN__
343 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
344 #endif
345 float __r =
346 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
347 *__iptr = __tmp;
348 return __r;
351 __DEVICE__
352 float nanf(const char *__tagp __attribute__((nonnull))) {
353 union {
354 float val;
355 struct ieee_float {
356 unsigned int mantissa : 22;
357 unsigned int quiet : 1;
358 unsigned int exponent : 8;
359 unsigned int sign : 1;
360 } bits;
361 } __tmp;
362 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
364 __tmp.bits.sign = 0u;
365 __tmp.bits.exponent = ~0u;
366 __tmp.bits.quiet = 1u;
367 __tmp.bits.mantissa = __make_mantissa(__tagp);
369 return __tmp.val;
372 __DEVICE__
373 float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
375 __DEVICE__
376 float nextafterf(float __x, float __y) {
377 return __ocml_nextafter_f32(__x, __y);
380 __DEVICE__
381 float norm3df(float __x, float __y, float __z) {
382 return __ocml_len3_f32(__x, __y, __z);
385 __DEVICE__
386 float norm4df(float __x, float __y, float __z, float __w) {
387 return __ocml_len4_f32(__x, __y, __z, __w);
390 __DEVICE__
391 float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
393 __DEVICE__
394 float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
396 __DEVICE__
397 float normf(int __dim,
398 const float *__a) { // TODO: placeholder until OCML adds support.
399 float __r = 0;
400 while (__dim--) {
401 __r += __a[0] * __a[0];
402 ++__a;
405 return __ocml_sqrt_f32(__r);
408 __DEVICE__
409 float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
411 __DEVICE__
412 float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
414 __DEVICE__
415 float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
417 __DEVICE__
418 float remainderf(float __x, float __y) {
419 return __ocml_remainder_f32(__x, __y);
422 __DEVICE__
423 float remquof(float __x, float __y, int *__quo) {
424 int __tmp;
425 #ifdef __OPENMP_AMDGCN__
426 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
427 #endif
428 float __r = __ocml_remquo_f32(
429 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
430 *__quo = __tmp;
432 return __r;
435 __DEVICE__
436 float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
438 __DEVICE__
439 float rintf(float __x) { return __ocml_rint_f32(__x); }
441 __DEVICE__
442 float rnorm3df(float __x, float __y, float __z) {
443 return __ocml_rlen3_f32(__x, __y, __z);
446 __DEVICE__
447 float rnorm4df(float __x, float __y, float __z, float __w) {
448 return __ocml_rlen4_f32(__x, __y, __z, __w);
451 __DEVICE__
452 float rnormf(int __dim,
453 const float *__a) { // TODO: placeholder until OCML adds support.
454 float __r = 0;
455 while (__dim--) {
456 __r += __a[0] * __a[0];
457 ++__a;
460 return __ocml_rsqrt_f32(__r);
463 __DEVICE__
464 float roundf(float __x) { return __ocml_round_f32(__x); }
466 __DEVICE__
467 float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
469 __DEVICE__
470 float scalblnf(float __x, long int __n) {
471 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
472 : __ocml_scalb_f32(__x, __n);
475 __DEVICE__
476 float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
478 __DEVICE__
479 __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); }
481 __DEVICE__
482 void sincosf(float __x, float *__sinptr, float *__cosptr) {
483 float __tmp;
484 #ifdef __OPENMP_AMDGCN__
485 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
486 #endif
487 *__sinptr =
488 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
489 *__cosptr = __tmp;
492 __DEVICE__
493 void sincospif(float __x, float *__sinptr, float *__cosptr) {
494 float __tmp;
495 #ifdef __OPENMP_AMDGCN__
496 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
497 #endif
498 *__sinptr = __ocml_sincospi_f32(
499 __x, (__attribute__((address_space(5))) float *)&__tmp);
500 *__cosptr = __tmp;
503 __DEVICE__
504 float sinf(float __x) { return __ocml_sin_f32(__x); }
506 __DEVICE__
507 float sinhf(float __x) { return __ocml_sinh_f32(__x); }
509 __DEVICE__
510 float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
512 __DEVICE__
513 float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
515 __DEVICE__
516 float tanf(float __x) { return __ocml_tan_f32(__x); }
518 __DEVICE__
519 float tanhf(float __x) { return __ocml_tanh_f32(__x); }
521 __DEVICE__
522 float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
524 __DEVICE__
525 float truncf(float __x) { return __ocml_trunc_f32(__x); }
527 __DEVICE__
528 float y0f(float __x) { return __ocml_y0_f32(__x); }
530 __DEVICE__
531 float y1f(float __x) { return __ocml_y1_f32(__x); }
533 __DEVICE__
534 float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
535 // and the Miller & Brown algorithm
536 // for linear recurrences to get O(log n) steps, but it's unclear if
537 // it'd be beneficial in this case. Placeholder until OCML adds
538 // support.
539 if (__n == 0)
540 return y0f(__x);
541 if (__n == 1)
542 return y1f(__x);
544 float __x0 = y0f(__x);
545 float __x1 = y1f(__x);
546 for (int __i = 1; __i < __n; ++__i) {
547 float __x2 = (2 * __i) / __x * __x1 - __x0;
548 __x0 = __x1;
549 __x1 = __x2;
552 return __x1;
555 // BEGIN INTRINSICS
557 __DEVICE__
558 float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
560 __DEVICE__
561 float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
563 __DEVICE__
564 float __expf(float __x) { return __ocml_native_exp_f32(__x); }
566 #if defined OCML_BASIC_ROUNDED_OPERATIONS
567 __DEVICE__
568 float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
569 __DEVICE__
570 float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
571 __DEVICE__
572 float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
573 __DEVICE__
574 float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
575 #else
576 __DEVICE__
577 float __fadd_rn(float __x, float __y) { return __x + __y; }
578 #endif
580 #if defined OCML_BASIC_ROUNDED_OPERATIONS
581 __DEVICE__
582 float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
583 __DEVICE__
584 float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
585 __DEVICE__
586 float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
587 __DEVICE__
588 float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
589 #else
590 __DEVICE__
591 float __fdiv_rn(float __x, float __y) { return __x / __y; }
592 #endif
594 __DEVICE__
595 float __fdividef(float __x, float __y) { return __x / __y; }
597 #if defined OCML_BASIC_ROUNDED_OPERATIONS
598 __DEVICE__
599 float __fmaf_rd(float __x, float __y, float __z) {
600 return __ocml_fma_rtn_f32(__x, __y, __z);
602 __DEVICE__
603 float __fmaf_rn(float __x, float __y, float __z) {
604 return __ocml_fma_rte_f32(__x, __y, __z);
606 __DEVICE__
607 float __fmaf_ru(float __x, float __y, float __z) {
608 return __ocml_fma_rtp_f32(__x, __y, __z);
610 __DEVICE__
611 float __fmaf_rz(float __x, float __y, float __z) {
612 return __ocml_fma_rtz_f32(__x, __y, __z);
614 #else
615 __DEVICE__
616 float __fmaf_rn(float __x, float __y, float __z) {
617 return __builtin_fmaf(__x, __y, __z);
619 #endif
621 #if defined OCML_BASIC_ROUNDED_OPERATIONS
622 __DEVICE__
623 float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
624 __DEVICE__
625 float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
626 __DEVICE__
627 float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
628 __DEVICE__
629 float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
630 #else
631 __DEVICE__
632 float __fmul_rn(float __x, float __y) { return __x * __y; }
633 #endif
635 #if defined OCML_BASIC_ROUNDED_OPERATIONS
636 __DEVICE__
637 float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
638 __DEVICE__
639 float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
640 __DEVICE__
641 float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
642 __DEVICE__
643 float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
644 #else
645 __DEVICE__
646 float __frcp_rn(float __x) { return 1.0f / __x; }
647 #endif
649 __DEVICE__
650 float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
652 #if defined OCML_BASIC_ROUNDED_OPERATIONS
653 __DEVICE__
654 float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
655 __DEVICE__
656 float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
657 __DEVICE__
658 float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
659 __DEVICE__
660 float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
661 #else
662 __DEVICE__
663 float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
664 #endif
666 #if defined OCML_BASIC_ROUNDED_OPERATIONS
667 __DEVICE__
668 float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
669 __DEVICE__
670 float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
671 __DEVICE__
672 float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
673 __DEVICE__
674 float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
675 #else
676 __DEVICE__
677 float __fsub_rn(float __x, float __y) { return __x - __y; }
678 #endif
680 __DEVICE__
681 float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
683 __DEVICE__
684 float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
686 __DEVICE__
687 float __logf(float __x) { return __ocml_native_log_f32(__x); }
689 __DEVICE__
690 float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
692 __DEVICE__
693 float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
695 __DEVICE__
696 void __sincosf(float __x, float *__sinptr, float *__cosptr) {
697 *__sinptr = __ocml_native_sin_f32(__x);
698 *__cosptr = __ocml_native_cos_f32(__x);
701 __DEVICE__
702 float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
704 __DEVICE__
705 float __tanf(float __x) { return __ocml_tan_f32(__x); }
706 // END INTRINSICS
707 // END FLOAT
709 // BEGIN DOUBLE
710 __DEVICE__
711 double acos(double __x) { return __ocml_acos_f64(__x); }
713 __DEVICE__
714 double acosh(double __x) { return __ocml_acosh_f64(__x); }
716 __DEVICE__
717 double asin(double __x) { return __ocml_asin_f64(__x); }
719 __DEVICE__
720 double asinh(double __x) { return __ocml_asinh_f64(__x); }
722 __DEVICE__
723 double atan(double __x) { return __ocml_atan_f64(__x); }
725 __DEVICE__
726 double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
728 __DEVICE__
729 double atanh(double __x) { return __ocml_atanh_f64(__x); }
731 __DEVICE__
732 double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
734 __DEVICE__
735 double ceil(double __x) { return __ocml_ceil_f64(__x); }
737 __DEVICE__
738 double copysign(double __x, double __y) {
739 return __builtin_copysign(__x, __y);
742 __DEVICE__
743 double cos(double __x) { return __ocml_cos_f64(__x); }
745 __DEVICE__
746 double cosh(double __x) { return __ocml_cosh_f64(__x); }
748 __DEVICE__
749 double cospi(double __x) { return __ocml_cospi_f64(__x); }
751 __DEVICE__
752 double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
754 __DEVICE__
755 double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
757 __DEVICE__
758 double erf(double __x) { return __ocml_erf_f64(__x); }
760 __DEVICE__
761 double erfc(double __x) { return __ocml_erfc_f64(__x); }
763 __DEVICE__
764 double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
766 __DEVICE__
767 double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
769 __DEVICE__
770 double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
772 __DEVICE__
773 double exp(double __x) { return __ocml_exp_f64(__x); }
775 __DEVICE__
776 double exp10(double __x) { return __ocml_exp10_f64(__x); }
778 __DEVICE__
779 double exp2(double __x) { return __ocml_exp2_f64(__x); }
781 __DEVICE__
782 double expm1(double __x) { return __ocml_expm1_f64(__x); }
784 __DEVICE__
785 double fabs(double __x) { return __builtin_fabs(__x); }
787 __DEVICE__
788 double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
790 __DEVICE__
791 double floor(double __x) { return __ocml_floor_f64(__x); }
793 __DEVICE__
794 double fma(double __x, double __y, double __z) {
795 return __builtin_fma(__x, __y, __z);
798 __DEVICE__
799 double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); }
801 __DEVICE__
802 double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); }
804 __DEVICE__
805 double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
807 __DEVICE__
808 double frexp(double __x, int *__nptr) {
809 *__nptr = __builtin_amdgcn_frexp_exp(__x);
810 return __builtin_amdgcn_frexp_mant(__x);
813 __DEVICE__
814 double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
816 __DEVICE__
817 int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
819 __DEVICE__
820 __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
822 __DEVICE__
823 __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); }
825 __DEVICE__
826 __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); }
828 __DEVICE__
829 double j0(double __x) { return __ocml_j0_f64(__x); }
831 __DEVICE__
832 double j1(double __x) { return __ocml_j1_f64(__x); }
834 __DEVICE__
835 double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
836 // and the Miller & Brown algorithm
837 // for linear recurrences to get O(log n) steps, but it's unclear if
838 // it'd be beneficial in this case. Placeholder until OCML adds
839 // support.
840 if (__n == 0)
841 return j0(__x);
842 if (__n == 1)
843 return j1(__x);
845 double __x0 = j0(__x);
846 double __x1 = j1(__x);
847 for (int __i = 1; __i < __n; ++__i) {
848 double __x2 = (2 * __i) / __x * __x1 - __x0;
849 __x0 = __x1;
850 __x1 = __x2;
852 return __x1;
855 __DEVICE__
856 double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
858 __DEVICE__
859 double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
861 __DEVICE__
862 long long int llrint(double __x) { return __ocml_rint_f64(__x); }
864 __DEVICE__
865 long long int llround(double __x) { return __ocml_round_f64(__x); }
867 __DEVICE__
868 double log(double __x) { return __ocml_log_f64(__x); }
870 __DEVICE__
871 double log10(double __x) { return __ocml_log10_f64(__x); }
873 __DEVICE__
874 double log1p(double __x) { return __ocml_log1p_f64(__x); }
876 __DEVICE__
877 double log2(double __x) { return __ocml_log2_f64(__x); }
879 __DEVICE__
880 double logb(double __x) { return __ocml_logb_f64(__x); }
882 __DEVICE__
883 long int lrint(double __x) { return __ocml_rint_f64(__x); }
885 __DEVICE__
886 long int lround(double __x) { return __ocml_round_f64(__x); }
888 __DEVICE__
889 double modf(double __x, double *__iptr) {
890 double __tmp;
891 #ifdef __OPENMP_AMDGCN__
892 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
893 #endif
894 double __r =
895 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
896 *__iptr = __tmp;
898 return __r;
901 __DEVICE__
902 double nan(const char *__tagp) {
903 #if !_WIN32
904 union {
905 double val;
906 struct ieee_double {
907 uint64_t mantissa : 51;
908 uint32_t quiet : 1;
909 uint32_t exponent : 11;
910 uint32_t sign : 1;
911 } bits;
912 } __tmp;
913 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
915 __tmp.bits.sign = 0u;
916 __tmp.bits.exponent = ~0u;
917 __tmp.bits.quiet = 1u;
918 __tmp.bits.mantissa = __make_mantissa(__tagp);
920 return __tmp.val;
921 #else
922 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
923 uint64_t __val = __make_mantissa(__tagp);
924 __val |= 0xFFF << 51;
925 return *reinterpret_cast<double *>(&__val);
926 #endif
929 __DEVICE__
930 double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
932 __DEVICE__
933 double nextafter(double __x, double __y) {
934 return __ocml_nextafter_f64(__x, __y);
937 __DEVICE__
938 double norm(int __dim,
939 const double *__a) { // TODO: placeholder until OCML adds support.
940 double __r = 0;
941 while (__dim--) {
942 __r += __a[0] * __a[0];
943 ++__a;
946 return __ocml_sqrt_f64(__r);
949 __DEVICE__
950 double norm3d(double __x, double __y, double __z) {
951 return __ocml_len3_f64(__x, __y, __z);
954 __DEVICE__
955 double norm4d(double __x, double __y, double __z, double __w) {
956 return __ocml_len4_f64(__x, __y, __z, __w);
959 __DEVICE__
960 double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
962 __DEVICE__
963 double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
965 __DEVICE__
966 double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
968 __DEVICE__
969 double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
971 __DEVICE__
972 double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
974 __DEVICE__
975 double remainder(double __x, double __y) {
976 return __ocml_remainder_f64(__x, __y);
979 __DEVICE__
980 double remquo(double __x, double __y, int *__quo) {
981 int __tmp;
982 #ifdef __OPENMP_AMDGCN__
983 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
984 #endif
985 double __r = __ocml_remquo_f64(
986 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
987 *__quo = __tmp;
989 return __r;
992 __DEVICE__
993 double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
995 __DEVICE__
996 double rint(double __x) { return __ocml_rint_f64(__x); }
998 __DEVICE__
999 double rnorm(int __dim,
1000 const double *__a) { // TODO: placeholder until OCML adds support.
1001 double __r = 0;
1002 while (__dim--) {
1003 __r += __a[0] * __a[0];
1004 ++__a;
1007 return __ocml_rsqrt_f64(__r);
1010 __DEVICE__
1011 double rnorm3d(double __x, double __y, double __z) {
1012 return __ocml_rlen3_f64(__x, __y, __z);
1015 __DEVICE__
1016 double rnorm4d(double __x, double __y, double __z, double __w) {
1017 return __ocml_rlen4_f64(__x, __y, __z, __w);
1020 __DEVICE__
1021 double round(double __x) { return __ocml_round_f64(__x); }
1023 __DEVICE__
1024 double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1026 __DEVICE__
1027 double scalbln(double __x, long int __n) {
1028 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1029 : __ocml_scalb_f64(__x, __n);
1031 __DEVICE__
1032 double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1034 __DEVICE__
1035 __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); }
1037 __DEVICE__
1038 double sin(double __x) { return __ocml_sin_f64(__x); }
1040 __DEVICE__
1041 void sincos(double __x, double *__sinptr, double *__cosptr) {
1042 double __tmp;
1043 #ifdef __OPENMP_AMDGCN__
1044 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1045 #endif
1046 *__sinptr = __ocml_sincos_f64(
1047 __x, (__attribute__((address_space(5))) double *)&__tmp);
1048 *__cosptr = __tmp;
1051 __DEVICE__
1052 void sincospi(double __x, double *__sinptr, double *__cosptr) {
1053 double __tmp;
1054 #ifdef __OPENMP_AMDGCN__
1055 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1056 #endif
1057 *__sinptr = __ocml_sincospi_f64(
1058 __x, (__attribute__((address_space(5))) double *)&__tmp);
1059 *__cosptr = __tmp;
1062 __DEVICE__
1063 double sinh(double __x) { return __ocml_sinh_f64(__x); }
1065 __DEVICE__
1066 double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1068 __DEVICE__
1069 double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1071 __DEVICE__
1072 double tan(double __x) { return __ocml_tan_f64(__x); }
1074 __DEVICE__
1075 double tanh(double __x) { return __ocml_tanh_f64(__x); }
1077 __DEVICE__
1078 double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1080 __DEVICE__
1081 double trunc(double __x) { return __ocml_trunc_f64(__x); }
1083 __DEVICE__
1084 double y0(double __x) { return __ocml_y0_f64(__x); }
1086 __DEVICE__
1087 double y1(double __x) { return __ocml_y1_f64(__x); }
1089 __DEVICE__
1090 double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1091 // and the Miller & Brown algorithm
1092 // for linear recurrences to get O(log n) steps, but it's unclear if
1093 // it'd be beneficial in this case. Placeholder until OCML adds
1094 // support.
1095 if (__n == 0)
1096 return y0(__x);
1097 if (__n == 1)
1098 return y1(__x);
1100 double __x0 = y0(__x);
1101 double __x1 = y1(__x);
1102 for (int __i = 1; __i < __n; ++__i) {
1103 double __x2 = (2 * __i) / __x * __x1 - __x0;
1104 __x0 = __x1;
1105 __x1 = __x2;
1108 return __x1;
1111 // BEGIN INTRINSICS
1112 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1113 __DEVICE__
1114 double __dadd_rd(double __x, double __y) {
1115 return __ocml_add_rtn_f64(__x, __y);
1117 __DEVICE__
1118 double __dadd_rn(double __x, double __y) {
1119 return __ocml_add_rte_f64(__x, __y);
1121 __DEVICE__
1122 double __dadd_ru(double __x, double __y) {
1123 return __ocml_add_rtp_f64(__x, __y);
1125 __DEVICE__
1126 double __dadd_rz(double __x, double __y) {
1127 return __ocml_add_rtz_f64(__x, __y);
1129 #else
1130 __DEVICE__
1131 double __dadd_rn(double __x, double __y) { return __x + __y; }
1132 #endif
1134 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1135 __DEVICE__
1136 double __ddiv_rd(double __x, double __y) {
1137 return __ocml_div_rtn_f64(__x, __y);
1139 __DEVICE__
1140 double __ddiv_rn(double __x, double __y) {
1141 return __ocml_div_rte_f64(__x, __y);
1143 __DEVICE__
1144 double __ddiv_ru(double __x, double __y) {
1145 return __ocml_div_rtp_f64(__x, __y);
1147 __DEVICE__
1148 double __ddiv_rz(double __x, double __y) {
1149 return __ocml_div_rtz_f64(__x, __y);
1151 #else
1152 __DEVICE__
1153 double __ddiv_rn(double __x, double __y) { return __x / __y; }
1154 #endif
1156 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1157 __DEVICE__
1158 double __dmul_rd(double __x, double __y) {
1159 return __ocml_mul_rtn_f64(__x, __y);
1161 __DEVICE__
1162 double __dmul_rn(double __x, double __y) {
1163 return __ocml_mul_rte_f64(__x, __y);
1165 __DEVICE__
1166 double __dmul_ru(double __x, double __y) {
1167 return __ocml_mul_rtp_f64(__x, __y);
1169 __DEVICE__
1170 double __dmul_rz(double __x, double __y) {
1171 return __ocml_mul_rtz_f64(__x, __y);
1173 #else
1174 __DEVICE__
1175 double __dmul_rn(double __x, double __y) { return __x * __y; }
1176 #endif
1178 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1179 __DEVICE__
1180 double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1181 __DEVICE__
1182 double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1183 __DEVICE__
1184 double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1185 __DEVICE__
1186 double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1187 #else
1188 __DEVICE__
1189 double __drcp_rn(double __x) { return 1.0 / __x; }
1190 #endif
1192 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1193 __DEVICE__
1194 double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1195 __DEVICE__
1196 double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1197 __DEVICE__
1198 double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1199 __DEVICE__
1200 double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1201 #else
1202 __DEVICE__
1203 double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1204 #endif
1206 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1207 __DEVICE__
1208 double __dsub_rd(double __x, double __y) {
1209 return __ocml_sub_rtn_f64(__x, __y);
1211 __DEVICE__
1212 double __dsub_rn(double __x, double __y) {
1213 return __ocml_sub_rte_f64(__x, __y);
1215 __DEVICE__
1216 double __dsub_ru(double __x, double __y) {
1217 return __ocml_sub_rtp_f64(__x, __y);
1219 __DEVICE__
1220 double __dsub_rz(double __x, double __y) {
1221 return __ocml_sub_rtz_f64(__x, __y);
1223 #else
1224 __DEVICE__
1225 double __dsub_rn(double __x, double __y) { return __x - __y; }
1226 #endif
1228 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1229 __DEVICE__
1230 double __fma_rd(double __x, double __y, double __z) {
1231 return __ocml_fma_rtn_f64(__x, __y, __z);
1233 __DEVICE__
1234 double __fma_rn(double __x, double __y, double __z) {
1235 return __ocml_fma_rte_f64(__x, __y, __z);
1237 __DEVICE__
1238 double __fma_ru(double __x, double __y, double __z) {
1239 return __ocml_fma_rtp_f64(__x, __y, __z);
1241 __DEVICE__
1242 double __fma_rz(double __x, double __y, double __z) {
1243 return __ocml_fma_rtz_f64(__x, __y, __z);
1245 #else
1246 __DEVICE__
1247 double __fma_rn(double __x, double __y, double __z) {
1248 return __builtin_fma(__x, __y, __z);
1250 #endif
1251 // END INTRINSICS
1252 // END DOUBLE
1254 // C only macros
1255 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1256 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1257 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1258 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1259 #define signbit(__x) \
1260 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1261 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1263 #if defined(__cplusplus)
1264 template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1265 return (__arg1 < __arg2) ? __arg1 : __arg2;
1268 template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1269 return (__arg1 > __arg2) ? __arg1 : __arg2;
1272 __DEVICE__ int min(int __arg1, int __arg2) {
1273 return (__arg1 < __arg2) ? __arg1 : __arg2;
1275 __DEVICE__ int max(int __arg1, int __arg2) {
1276 return (__arg1 > __arg2) ? __arg1 : __arg2;
1279 __DEVICE__
1280 float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
1282 __DEVICE__
1283 double max(double __x, double __y) { return __builtin_fmax(__x, __y); }
1285 __DEVICE__
1286 float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
1288 __DEVICE__
1289 double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
1291 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1292 __host__ inline static int min(int __arg1, int __arg2) {
1293 return std::min(__arg1, __arg2);
1296 __host__ inline static int max(int __arg1, int __arg2) {
1297 return std::max(__arg1, __arg2);
1299 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1300 #endif
1302 #pragma pop_macro("__DEVICE__")
1303 #pragma pop_macro("__RETURN_TYPE")
1305 #endif // __CLANG_HIP_MATH_H__