1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_HIP_MATH_H__
10 #define __CLANG_HIP_MATH_H__
12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
16 #if !defined(__HIPCC_RTC__)
17 #if defined(__cplusplus)
22 #ifdef __OPENMP_AMDGCN__
25 #endif // !defined(__HIPCC_RTC__)
27 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_AMDGCN__
30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
35 // A few functions return bool type starting only in C++11.
36 #pragma push_macro("__RETURN_TYPE")
37 #ifdef __OPENMP_AMDGCN__
38 #define __RETURN_TYPE int
40 #if defined(__cplusplus)
41 #define __RETURN_TYPE bool
43 #define __RETURN_TYPE int
45 #endif // __OPENMP_AMDGCN__
47 #if defined (__cplusplus) && __cplusplus < 201103L
48 // emulate static_assert on type sizes
50 struct __compare_result
{};
52 struct __compare_result
<true> {
53 static const __device__
bool valid
;
57 void __suppress_unused_warning(bool b
){};
58 template <unsigned int S
, unsigned int T
>
59 __DEVICE__
void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result
<S
== T
>::valid
);
63 #define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
67 #define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
73 uint64_t __make_mantissa_base8(const char *__tagp
__attribute__((nonnull
))) {
75 while (*__tagp
!= '\0') {
78 if (__tmp
>= '0' && __tmp
<= '7')
79 __r
= (__r
* 8u) + __tmp
- '0';
90 uint64_t __make_mantissa_base10(const char *__tagp
__attribute__((nonnull
))) {
92 while (*__tagp
!= '\0') {
95 if (__tmp
>= '0' && __tmp
<= '9')
96 __r
= (__r
* 10u) + __tmp
- '0';
107 uint64_t __make_mantissa_base16(const char *__tagp
__attribute__((nonnull
))) {
109 while (*__tagp
!= '\0') {
110 char __tmp
= *__tagp
;
112 if (__tmp
>= '0' && __tmp
<= '9')
113 __r
= (__r
* 16u) + __tmp
- '0';
114 else if (__tmp
>= 'a' && __tmp
<= 'f')
115 __r
= (__r
* 16u) + __tmp
- 'a' + 10;
116 else if (__tmp
>= 'A' && __tmp
<= 'F')
117 __r
= (__r
* 16u) + __tmp
- 'A' + 10;
128 uint64_t __make_mantissa(const char *__tagp
__attribute__((nonnull
))) {
129 if (*__tagp
== '0') {
132 if (*__tagp
== 'x' || *__tagp
== 'X')
133 return __make_mantissa_base16(__tagp
);
135 return __make_mantissa_base8(__tagp
);
138 return __make_mantissa_base10(__tagp
);
142 #if defined(__cplusplus)
145 int __sgn
= __x
>> (sizeof(int) * CHAR_BIT
- 1);
146 return (__x
^ __sgn
) - __sgn
;
149 long labs(long __x
) {
150 long __sgn
= __x
>> (sizeof(long) * CHAR_BIT
- 1);
151 return (__x
^ __sgn
) - __sgn
;
154 long long llabs(long long __x
) {
155 long long __sgn
= __x
>> (sizeof(long long) * CHAR_BIT
- 1);
156 return (__x
^ __sgn
) - __sgn
;
161 float acosf(float __x
) { return __ocml_acos_f32(__x
); }
164 float acoshf(float __x
) { return __ocml_acosh_f32(__x
); }
167 float asinf(float __x
) { return __ocml_asin_f32(__x
); }
170 float asinhf(float __x
) { return __ocml_asinh_f32(__x
); }
173 float atan2f(float __x
, float __y
) { return __ocml_atan2_f32(__x
, __y
); }
176 float atanf(float __x
) { return __ocml_atan_f32(__x
); }
179 float atanhf(float __x
) { return __ocml_atanh_f32(__x
); }
182 float cbrtf(float __x
) { return __ocml_cbrt_f32(__x
); }
185 float ceilf(float __x
) { return __ocml_ceil_f32(__x
); }
188 float copysignf(float __x
, float __y
) { return __builtin_copysignf(__x
, __y
); }
191 float cosf(float __x
) { return __ocml_cos_f32(__x
); }
194 float coshf(float __x
) { return __ocml_cosh_f32(__x
); }
197 float cospif(float __x
) { return __ocml_cospi_f32(__x
); }
200 float cyl_bessel_i0f(float __x
) { return __ocml_i0_f32(__x
); }
203 float cyl_bessel_i1f(float __x
) { return __ocml_i1_f32(__x
); }
206 float erfcf(float __x
) { return __ocml_erfc_f32(__x
); }
209 float erfcinvf(float __x
) { return __ocml_erfcinv_f32(__x
); }
212 float erfcxf(float __x
) { return __ocml_erfcx_f32(__x
); }
215 float erff(float __x
) { return __ocml_erf_f32(__x
); }
218 float erfinvf(float __x
) { return __ocml_erfinv_f32(__x
); }
221 float exp10f(float __x
) { return __ocml_exp10_f32(__x
); }
224 float exp2f(float __x
) { return __ocml_exp2_f32(__x
); }
227 float expf(float __x
) { return __ocml_exp_f32(__x
); }
230 float expm1f(float __x
) { return __ocml_expm1_f32(__x
); }
233 float fabsf(float __x
) { return __builtin_fabsf(__x
); }
236 float fdimf(float __x
, float __y
) { return __ocml_fdim_f32(__x
, __y
); }
239 float fdividef(float __x
, float __y
) { return __x
/ __y
; }
242 float floorf(float __x
) { return __ocml_floor_f32(__x
); }
245 float fmaf(float __x
, float __y
, float __z
) {
246 return __builtin_fmaf(__x
, __y
, __z
);
250 float fmaxf(float __x
, float __y
) { return __builtin_fmaxf(__x
, __y
); }
253 float fminf(float __x
, float __y
) { return __builtin_fminf(__x
, __y
); }
256 float fmodf(float __x
, float __y
) { return __ocml_fmod_f32(__x
, __y
); }
259 float frexpf(float __x
, int *__nptr
) {
260 *__nptr
= __builtin_amdgcn_frexp_expf(__x
);
261 return __builtin_amdgcn_frexp_mantf(__x
);
265 float hypotf(float __x
, float __y
) { return __ocml_hypot_f32(__x
, __y
); }
268 int ilogbf(float __x
) { return __ocml_ilogb_f32(__x
); }
271 __RETURN_TYPE
__finitef(float __x
) { return __ocml_isfinite_f32(__x
); }
274 __RETURN_TYPE
__isinff(float __x
) { return __builtin_isinf(__x
); }
277 __RETURN_TYPE
__isnanf(float __x
) { return __builtin_isnan(__x
); }
280 float j0f(float __x
) { return __ocml_j0_f32(__x
); }
283 float j1f(float __x
) { return __ocml_j1_f32(__x
); }
286 float jnf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
287 // and the Miller & Brown algorithm
288 // for linear recurrences to get O(log n) steps, but it's unclear if
289 // it'd be beneficial in this case.
295 float __x0
= j0f(__x
);
296 float __x1
= j1f(__x
);
297 for (int __i
= 1; __i
< __n
; ++__i
) {
298 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
307 float ldexpf(float __x
, int __e
) { return __ocml_ldexp_f32(__x
, __e
); }
310 float lgammaf(float __x
) { return __ocml_lgamma_f32(__x
); }
313 long long int llrintf(float __x
) { return __ocml_rint_f32(__x
); }
316 long long int llroundf(float __x
) { return __ocml_round_f32(__x
); }
319 float log10f(float __x
) { return __ocml_log10_f32(__x
); }
322 float log1pf(float __x
) { return __ocml_log1p_f32(__x
); }
325 float log2f(float __x
) { return __ocml_log2_f32(__x
); }
328 float logbf(float __x
) { return __ocml_logb_f32(__x
); }
331 float logf(float __x
) { return __ocml_log_f32(__x
); }
334 long int lrintf(float __x
) { return __ocml_rint_f32(__x
); }
337 long int lroundf(float __x
) { return __ocml_round_f32(__x
); }
340 float modff(float __x
, float *__iptr
) {
342 #ifdef __OPENMP_AMDGCN__
343 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
346 __ocml_modf_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
352 float nanf(const char *__tagp
__attribute__((nonnull
))) {
356 unsigned int mantissa
: 22;
357 unsigned int quiet
: 1;
358 unsigned int exponent
: 8;
359 unsigned int sign
: 1;
362 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
364 __tmp
.bits
.sign
= 0u;
365 __tmp
.bits
.exponent
= ~0u;
366 __tmp
.bits
.quiet
= 1u;
367 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
373 float nearbyintf(float __x
) { return __ocml_nearbyint_f32(__x
); }
376 float nextafterf(float __x
, float __y
) {
377 return __ocml_nextafter_f32(__x
, __y
);
381 float norm3df(float __x
, float __y
, float __z
) {
382 return __ocml_len3_f32(__x
, __y
, __z
);
386 float norm4df(float __x
, float __y
, float __z
, float __w
) {
387 return __ocml_len4_f32(__x
, __y
, __z
, __w
);
391 float normcdff(float __x
) { return __ocml_ncdf_f32(__x
); }
394 float normcdfinvf(float __x
) { return __ocml_ncdfinv_f32(__x
); }
397 float normf(int __dim
,
398 const float *__a
) { // TODO: placeholder until OCML adds support.
401 __r
+= __a
[0] * __a
[0];
405 return __ocml_sqrt_f32(__r
);
409 float powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
412 float powif(float __x
, int __y
) { return __ocml_pown_f32(__x
, __y
); }
415 float rcbrtf(float __x
) { return __ocml_rcbrt_f32(__x
); }
418 float remainderf(float __x
, float __y
) {
419 return __ocml_remainder_f32(__x
, __y
);
423 float remquof(float __x
, float __y
, int *__quo
) {
425 #ifdef __OPENMP_AMDGCN__
426 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
428 float __r
= __ocml_remquo_f32(
429 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
436 float rhypotf(float __x
, float __y
) { return __ocml_rhypot_f32(__x
, __y
); }
439 float rintf(float __x
) { return __ocml_rint_f32(__x
); }
442 float rnorm3df(float __x
, float __y
, float __z
) {
443 return __ocml_rlen3_f32(__x
, __y
, __z
);
447 float rnorm4df(float __x
, float __y
, float __z
, float __w
) {
448 return __ocml_rlen4_f32(__x
, __y
, __z
, __w
);
452 float rnormf(int __dim
,
453 const float *__a
) { // TODO: placeholder until OCML adds support.
456 __r
+= __a
[0] * __a
[0];
460 return __ocml_rsqrt_f32(__r
);
464 float roundf(float __x
) { return __ocml_round_f32(__x
); }
467 float rsqrtf(float __x
) { return __ocml_rsqrt_f32(__x
); }
470 float scalblnf(float __x
, long int __n
) {
471 return (__n
< INT_MAX
) ? __ocml_scalbn_f32(__x
, __n
)
472 : __ocml_scalb_f32(__x
, __n
);
476 float scalbnf(float __x
, int __n
) { return __ocml_scalbn_f32(__x
, __n
); }
479 __RETURN_TYPE
__signbitf(float __x
) { return __builtin_signbitf(__x
); }
482 void sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
484 #ifdef __OPENMP_AMDGCN__
485 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
488 __ocml_sincos_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
493 void sincospif(float __x
, float *__sinptr
, float *__cosptr
) {
495 #ifdef __OPENMP_AMDGCN__
496 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
498 *__sinptr
= __ocml_sincospi_f32(
499 __x
, (__attribute__((address_space(5))) float *)&__tmp
);
504 float sinf(float __x
) { return __ocml_sin_f32(__x
); }
507 float sinhf(float __x
) { return __ocml_sinh_f32(__x
); }
510 float sinpif(float __x
) { return __ocml_sinpi_f32(__x
); }
513 float sqrtf(float __x
) { return __ocml_sqrt_f32(__x
); }
516 float tanf(float __x
) { return __ocml_tan_f32(__x
); }
519 float tanhf(float __x
) { return __ocml_tanh_f32(__x
); }
522 float tgammaf(float __x
) { return __ocml_tgamma_f32(__x
); }
525 float truncf(float __x
) { return __ocml_trunc_f32(__x
); }
528 float y0f(float __x
) { return __ocml_y0_f32(__x
); }
531 float y1f(float __x
) { return __ocml_y1_f32(__x
); }
534 float ynf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
535 // and the Miller & Brown algorithm
536 // for linear recurrences to get O(log n) steps, but it's unclear if
537 // it'd be beneficial in this case. Placeholder until OCML adds
544 float __x0
= y0f(__x
);
545 float __x1
= y1f(__x
);
546 for (int __i
= 1; __i
< __n
; ++__i
) {
547 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
558 float __cosf(float __x
) { return __ocml_native_cos_f32(__x
); }
561 float __exp10f(float __x
) { return __ocml_native_exp10_f32(__x
); }
564 float __expf(float __x
) { return __ocml_native_exp_f32(__x
); }
566 #if defined OCML_BASIC_ROUNDED_OPERATIONS
568 float __fadd_rd(float __x
, float __y
) { return __ocml_add_rtn_f32(__x
, __y
); }
570 float __fadd_rn(float __x
, float __y
) { return __ocml_add_rte_f32(__x
, __y
); }
572 float __fadd_ru(float __x
, float __y
) { return __ocml_add_rtp_f32(__x
, __y
); }
574 float __fadd_rz(float __x
, float __y
) { return __ocml_add_rtz_f32(__x
, __y
); }
577 float __fadd_rn(float __x
, float __y
) { return __x
+ __y
; }
580 #if defined OCML_BASIC_ROUNDED_OPERATIONS
582 float __fdiv_rd(float __x
, float __y
) { return __ocml_div_rtn_f32(__x
, __y
); }
584 float __fdiv_rn(float __x
, float __y
) { return __ocml_div_rte_f32(__x
, __y
); }
586 float __fdiv_ru(float __x
, float __y
) { return __ocml_div_rtp_f32(__x
, __y
); }
588 float __fdiv_rz(float __x
, float __y
) { return __ocml_div_rtz_f32(__x
, __y
); }
591 float __fdiv_rn(float __x
, float __y
) { return __x
/ __y
; }
595 float __fdividef(float __x
, float __y
) { return __x
/ __y
; }
597 #if defined OCML_BASIC_ROUNDED_OPERATIONS
599 float __fmaf_rd(float __x
, float __y
, float __z
) {
600 return __ocml_fma_rtn_f32(__x
, __y
, __z
);
603 float __fmaf_rn(float __x
, float __y
, float __z
) {
604 return __ocml_fma_rte_f32(__x
, __y
, __z
);
607 float __fmaf_ru(float __x
, float __y
, float __z
) {
608 return __ocml_fma_rtp_f32(__x
, __y
, __z
);
611 float __fmaf_rz(float __x
, float __y
, float __z
) {
612 return __ocml_fma_rtz_f32(__x
, __y
, __z
);
616 float __fmaf_rn(float __x
, float __y
, float __z
) {
617 return __builtin_fmaf(__x
, __y
, __z
);
621 #if defined OCML_BASIC_ROUNDED_OPERATIONS
623 float __fmul_rd(float __x
, float __y
) { return __ocml_mul_rtn_f32(__x
, __y
); }
625 float __fmul_rn(float __x
, float __y
) { return __ocml_mul_rte_f32(__x
, __y
); }
627 float __fmul_ru(float __x
, float __y
) { return __ocml_mul_rtp_f32(__x
, __y
); }
629 float __fmul_rz(float __x
, float __y
) { return __ocml_mul_rtz_f32(__x
, __y
); }
632 float __fmul_rn(float __x
, float __y
) { return __x
* __y
; }
635 #if defined OCML_BASIC_ROUNDED_OPERATIONS
637 float __frcp_rd(float __x
) { return __ocml_div_rtn_f32(1.0f
, __x
); }
639 float __frcp_rn(float __x
) { return __ocml_div_rte_f32(1.0f
, __x
); }
641 float __frcp_ru(float __x
) { return __ocml_div_rtp_f32(1.0f
, __x
); }
643 float __frcp_rz(float __x
) { return __ocml_div_rtz_f32(1.0f
, __x
); }
646 float __frcp_rn(float __x
) { return 1.0f
/ __x
; }
650 float __frsqrt_rn(float __x
) { return __llvm_amdgcn_rsq_f32(__x
); }
652 #if defined OCML_BASIC_ROUNDED_OPERATIONS
654 float __fsqrt_rd(float __x
) { return __ocml_sqrt_rtn_f32(__x
); }
656 float __fsqrt_rn(float __x
) { return __ocml_sqrt_rte_f32(__x
); }
658 float __fsqrt_ru(float __x
) { return __ocml_sqrt_rtp_f32(__x
); }
660 float __fsqrt_rz(float __x
) { return __ocml_sqrt_rtz_f32(__x
); }
663 float __fsqrt_rn(float __x
) { return __ocml_native_sqrt_f32(__x
); }
666 #if defined OCML_BASIC_ROUNDED_OPERATIONS
668 float __fsub_rd(float __x
, float __y
) { return __ocml_sub_rtn_f32(__x
, __y
); }
670 float __fsub_rn(float __x
, float __y
) { return __ocml_sub_rte_f32(__x
, __y
); }
672 float __fsub_ru(float __x
, float __y
) { return __ocml_sub_rtp_f32(__x
, __y
); }
674 float __fsub_rz(float __x
, float __y
) { return __ocml_sub_rtz_f32(__x
, __y
); }
677 float __fsub_rn(float __x
, float __y
) { return __x
- __y
; }
681 float __log10f(float __x
) { return __ocml_native_log10_f32(__x
); }
684 float __log2f(float __x
) { return __ocml_native_log2_f32(__x
); }
687 float __logf(float __x
) { return __ocml_native_log_f32(__x
); }
690 float __powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
693 float __saturatef(float __x
) { return (__x
< 0) ? 0 : ((__x
> 1) ? 1 : __x
); }
696 void __sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
697 *__sinptr
= __ocml_native_sin_f32(__x
);
698 *__cosptr
= __ocml_native_cos_f32(__x
);
702 float __sinf(float __x
) { return __ocml_native_sin_f32(__x
); }
705 float __tanf(float __x
) { return __ocml_tan_f32(__x
); }
711 double acos(double __x
) { return __ocml_acos_f64(__x
); }
714 double acosh(double __x
) { return __ocml_acosh_f64(__x
); }
717 double asin(double __x
) { return __ocml_asin_f64(__x
); }
720 double asinh(double __x
) { return __ocml_asinh_f64(__x
); }
723 double atan(double __x
) { return __ocml_atan_f64(__x
); }
726 double atan2(double __x
, double __y
) { return __ocml_atan2_f64(__x
, __y
); }
729 double atanh(double __x
) { return __ocml_atanh_f64(__x
); }
732 double cbrt(double __x
) { return __ocml_cbrt_f64(__x
); }
735 double ceil(double __x
) { return __ocml_ceil_f64(__x
); }
738 double copysign(double __x
, double __y
) {
739 return __builtin_copysign(__x
, __y
);
743 double cos(double __x
) { return __ocml_cos_f64(__x
); }
746 double cosh(double __x
) { return __ocml_cosh_f64(__x
); }
749 double cospi(double __x
) { return __ocml_cospi_f64(__x
); }
752 double cyl_bessel_i0(double __x
) { return __ocml_i0_f64(__x
); }
755 double cyl_bessel_i1(double __x
) { return __ocml_i1_f64(__x
); }
758 double erf(double __x
) { return __ocml_erf_f64(__x
); }
761 double erfc(double __x
) { return __ocml_erfc_f64(__x
); }
764 double erfcinv(double __x
) { return __ocml_erfcinv_f64(__x
); }
767 double erfcx(double __x
) { return __ocml_erfcx_f64(__x
); }
770 double erfinv(double __x
) { return __ocml_erfinv_f64(__x
); }
773 double exp(double __x
) { return __ocml_exp_f64(__x
); }
776 double exp10(double __x
) { return __ocml_exp10_f64(__x
); }
779 double exp2(double __x
) { return __ocml_exp2_f64(__x
); }
782 double expm1(double __x
) { return __ocml_expm1_f64(__x
); }
785 double fabs(double __x
) { return __builtin_fabs(__x
); }
788 double fdim(double __x
, double __y
) { return __ocml_fdim_f64(__x
, __y
); }
791 double floor(double __x
) { return __ocml_floor_f64(__x
); }
794 double fma(double __x
, double __y
, double __z
) {
795 return __builtin_fma(__x
, __y
, __z
);
799 double fmax(double __x
, double __y
) { return __builtin_fmax(__x
, __y
); }
802 double fmin(double __x
, double __y
) { return __builtin_fmin(__x
, __y
); }
805 double fmod(double __x
, double __y
) { return __ocml_fmod_f64(__x
, __y
); }
808 double frexp(double __x
, int *__nptr
) {
809 *__nptr
= __builtin_amdgcn_frexp_exp(__x
);
810 return __builtin_amdgcn_frexp_mant(__x
);
814 double hypot(double __x
, double __y
) { return __ocml_hypot_f64(__x
, __y
); }
817 int ilogb(double __x
) { return __ocml_ilogb_f64(__x
); }
820 __RETURN_TYPE
__finite(double __x
) { return __ocml_isfinite_f64(__x
); }
823 __RETURN_TYPE
__isinf(double __x
) { return __builtin_isinf(__x
); }
826 __RETURN_TYPE
__isnan(double __x
) { return __builtin_isnan(__x
); }
829 double j0(double __x
) { return __ocml_j0_f64(__x
); }
832 double j1(double __x
) { return __ocml_j1_f64(__x
); }
835 double jn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
836 // and the Miller & Brown algorithm
837 // for linear recurrences to get O(log n) steps, but it's unclear if
838 // it'd be beneficial in this case. Placeholder until OCML adds
845 double __x0
= j0(__x
);
846 double __x1
= j1(__x
);
847 for (int __i
= 1; __i
< __n
; ++__i
) {
848 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
856 double ldexp(double __x
, int __e
) { return __ocml_ldexp_f64(__x
, __e
); }
859 double lgamma(double __x
) { return __ocml_lgamma_f64(__x
); }
862 long long int llrint(double __x
) { return __ocml_rint_f64(__x
); }
865 long long int llround(double __x
) { return __ocml_round_f64(__x
); }
868 double log(double __x
) { return __ocml_log_f64(__x
); }
871 double log10(double __x
) { return __ocml_log10_f64(__x
); }
874 double log1p(double __x
) { return __ocml_log1p_f64(__x
); }
877 double log2(double __x
) { return __ocml_log2_f64(__x
); }
880 double logb(double __x
) { return __ocml_logb_f64(__x
); }
883 long int lrint(double __x
) { return __ocml_rint_f64(__x
); }
886 long int lround(double __x
) { return __ocml_round_f64(__x
); }
889 double modf(double __x
, double *__iptr
) {
891 #ifdef __OPENMP_AMDGCN__
892 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
895 __ocml_modf_f64(__x
, (__attribute__((address_space(5))) double *)&__tmp
);
902 double nan(const char *__tagp
) {
907 uint64_t mantissa
: 51;
909 uint32_t exponent
: 11;
913 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
915 __tmp
.bits
.sign
= 0u;
916 __tmp
.bits
.exponent
= ~0u;
917 __tmp
.bits
.quiet
= 1u;
918 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
922 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
923 uint64_t __val
= __make_mantissa(__tagp
);
924 __val
|= 0xFFF << 51;
925 return *reinterpret_cast<double *>(&__val
);
930 double nearbyint(double __x
) { return __ocml_nearbyint_f64(__x
); }
933 double nextafter(double __x
, double __y
) {
934 return __ocml_nextafter_f64(__x
, __y
);
938 double norm(int __dim
,
939 const double *__a
) { // TODO: placeholder until OCML adds support.
942 __r
+= __a
[0] * __a
[0];
946 return __ocml_sqrt_f64(__r
);
950 double norm3d(double __x
, double __y
, double __z
) {
951 return __ocml_len3_f64(__x
, __y
, __z
);
955 double norm4d(double __x
, double __y
, double __z
, double __w
) {
956 return __ocml_len4_f64(__x
, __y
, __z
, __w
);
960 double normcdf(double __x
) { return __ocml_ncdf_f64(__x
); }
963 double normcdfinv(double __x
) { return __ocml_ncdfinv_f64(__x
); }
966 double pow(double __x
, double __y
) { return __ocml_pow_f64(__x
, __y
); }
969 double powi(double __x
, int __y
) { return __ocml_pown_f64(__x
, __y
); }
972 double rcbrt(double __x
) { return __ocml_rcbrt_f64(__x
); }
975 double remainder(double __x
, double __y
) {
976 return __ocml_remainder_f64(__x
, __y
);
980 double remquo(double __x
, double __y
, int *__quo
) {
982 #ifdef __OPENMP_AMDGCN__
983 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
985 double __r
= __ocml_remquo_f64(
986 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
993 double rhypot(double __x
, double __y
) { return __ocml_rhypot_f64(__x
, __y
); }
996 double rint(double __x
) { return __ocml_rint_f64(__x
); }
999 double rnorm(int __dim
,
1000 const double *__a
) { // TODO: placeholder until OCML adds support.
1003 __r
+= __a
[0] * __a
[0];
1007 return __ocml_rsqrt_f64(__r
);
1011 double rnorm3d(double __x
, double __y
, double __z
) {
1012 return __ocml_rlen3_f64(__x
, __y
, __z
);
1016 double rnorm4d(double __x
, double __y
, double __z
, double __w
) {
1017 return __ocml_rlen4_f64(__x
, __y
, __z
, __w
);
1021 double round(double __x
) { return __ocml_round_f64(__x
); }
1024 double rsqrt(double __x
) { return __ocml_rsqrt_f64(__x
); }
1027 double scalbln(double __x
, long int __n
) {
1028 return (__n
< INT_MAX
) ? __ocml_scalbn_f64(__x
, __n
)
1029 : __ocml_scalb_f64(__x
, __n
);
1032 double scalbn(double __x
, int __n
) { return __ocml_scalbn_f64(__x
, __n
); }
1035 __RETURN_TYPE
__signbit(double __x
) { return __builtin_signbit(__x
); }
1038 double sin(double __x
) { return __ocml_sin_f64(__x
); }
1041 void sincos(double __x
, double *__sinptr
, double *__cosptr
) {
1043 #ifdef __OPENMP_AMDGCN__
1044 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1046 *__sinptr
= __ocml_sincos_f64(
1047 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1052 void sincospi(double __x
, double *__sinptr
, double *__cosptr
) {
1054 #ifdef __OPENMP_AMDGCN__
1055 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1057 *__sinptr
= __ocml_sincospi_f64(
1058 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1063 double sinh(double __x
) { return __ocml_sinh_f64(__x
); }
1066 double sinpi(double __x
) { return __ocml_sinpi_f64(__x
); }
1069 double sqrt(double __x
) { return __ocml_sqrt_f64(__x
); }
1072 double tan(double __x
) { return __ocml_tan_f64(__x
); }
1075 double tanh(double __x
) { return __ocml_tanh_f64(__x
); }
1078 double tgamma(double __x
) { return __ocml_tgamma_f64(__x
); }
1081 double trunc(double __x
) { return __ocml_trunc_f64(__x
); }
1084 double y0(double __x
) { return __ocml_y0_f64(__x
); }
1087 double y1(double __x
) { return __ocml_y1_f64(__x
); }
1090 double yn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
1091 // and the Miller & Brown algorithm
1092 // for linear recurrences to get O(log n) steps, but it's unclear if
1093 // it'd be beneficial in this case. Placeholder until OCML adds
1100 double __x0
= y0(__x
);
1101 double __x1
= y1(__x
);
1102 for (int __i
= 1; __i
< __n
; ++__i
) {
1103 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
1112 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1114 double __dadd_rd(double __x
, double __y
) {
1115 return __ocml_add_rtn_f64(__x
, __y
);
1118 double __dadd_rn(double __x
, double __y
) {
1119 return __ocml_add_rte_f64(__x
, __y
);
1122 double __dadd_ru(double __x
, double __y
) {
1123 return __ocml_add_rtp_f64(__x
, __y
);
1126 double __dadd_rz(double __x
, double __y
) {
1127 return __ocml_add_rtz_f64(__x
, __y
);
1131 double __dadd_rn(double __x
, double __y
) { return __x
+ __y
; }
1134 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1136 double __ddiv_rd(double __x
, double __y
) {
1137 return __ocml_div_rtn_f64(__x
, __y
);
1140 double __ddiv_rn(double __x
, double __y
) {
1141 return __ocml_div_rte_f64(__x
, __y
);
1144 double __ddiv_ru(double __x
, double __y
) {
1145 return __ocml_div_rtp_f64(__x
, __y
);
1148 double __ddiv_rz(double __x
, double __y
) {
1149 return __ocml_div_rtz_f64(__x
, __y
);
1153 double __ddiv_rn(double __x
, double __y
) { return __x
/ __y
; }
1156 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1158 double __dmul_rd(double __x
, double __y
) {
1159 return __ocml_mul_rtn_f64(__x
, __y
);
1162 double __dmul_rn(double __x
, double __y
) {
1163 return __ocml_mul_rte_f64(__x
, __y
);
1166 double __dmul_ru(double __x
, double __y
) {
1167 return __ocml_mul_rtp_f64(__x
, __y
);
1170 double __dmul_rz(double __x
, double __y
) {
1171 return __ocml_mul_rtz_f64(__x
, __y
);
1175 double __dmul_rn(double __x
, double __y
) { return __x
* __y
; }
1178 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1180 double __drcp_rd(double __x
) { return __ocml_div_rtn_f64(1.0, __x
); }
1182 double __drcp_rn(double __x
) { return __ocml_div_rte_f64(1.0, __x
); }
1184 double __drcp_ru(double __x
) { return __ocml_div_rtp_f64(1.0, __x
); }
1186 double __drcp_rz(double __x
) { return __ocml_div_rtz_f64(1.0, __x
); }
1189 double __drcp_rn(double __x
) { return 1.0 / __x
; }
1192 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1194 double __dsqrt_rd(double __x
) { return __ocml_sqrt_rtn_f64(__x
); }
1196 double __dsqrt_rn(double __x
) { return __ocml_sqrt_rte_f64(__x
); }
1198 double __dsqrt_ru(double __x
) { return __ocml_sqrt_rtp_f64(__x
); }
1200 double __dsqrt_rz(double __x
) { return __ocml_sqrt_rtz_f64(__x
); }
1203 double __dsqrt_rn(double __x
) { return __ocml_sqrt_f64(__x
); }
1206 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1208 double __dsub_rd(double __x
, double __y
) {
1209 return __ocml_sub_rtn_f64(__x
, __y
);
1212 double __dsub_rn(double __x
, double __y
) {
1213 return __ocml_sub_rte_f64(__x
, __y
);
1216 double __dsub_ru(double __x
, double __y
) {
1217 return __ocml_sub_rtp_f64(__x
, __y
);
1220 double __dsub_rz(double __x
, double __y
) {
1221 return __ocml_sub_rtz_f64(__x
, __y
);
1225 double __dsub_rn(double __x
, double __y
) { return __x
- __y
; }
1228 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1230 double __fma_rd(double __x
, double __y
, double __z
) {
1231 return __ocml_fma_rtn_f64(__x
, __y
, __z
);
1234 double __fma_rn(double __x
, double __y
, double __z
) {
1235 return __ocml_fma_rte_f64(__x
, __y
, __z
);
1238 double __fma_ru(double __x
, double __y
, double __z
) {
1239 return __ocml_fma_rtp_f64(__x
, __y
, __z
);
1242 double __fma_rz(double __x
, double __y
, double __z
) {
1243 return __ocml_fma_rtz_f64(__x
, __y
, __z
);
1247 double __fma_rn(double __x
, double __y
, double __z
) {
1248 return __builtin_fma(__x
, __y
, __z
);
1255 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1256 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1257 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1258 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1259 #define signbit(__x) \
1260 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1261 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1263 #if defined(__cplusplus)
1264 template <class T
> __DEVICE__ T
min(T __arg1
, T __arg2
) {
1265 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1268 template <class T
> __DEVICE__ T
max(T __arg1
, T __arg2
) {
1269 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1272 __DEVICE__
int min(int __arg1
, int __arg2
) {
1273 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1275 __DEVICE__
int max(int __arg1
, int __arg2
) {
1276 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1280 float max(float __x
, float __y
) { return __builtin_fmaxf(__x
, __y
); }
1283 double max(double __x
, double __y
) { return __builtin_fmax(__x
, __y
); }
1286 float min(float __x
, float __y
) { return __builtin_fminf(__x
, __y
); }
1289 double min(double __x
, double __y
) { return __builtin_fmin(__x
, __y
); }
1291 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1292 __host__
inline static int min(int __arg1
, int __arg2
) {
1293 return std::min(__arg1
, __arg2
);
1296 __host__
inline static int max(int __arg1
, int __arg2
) {
1297 return std::max(__arg1
, __arg2
);
1299 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1302 #pragma pop_macro("__DEVICE__")
1303 #pragma pop_macro("__RETURN_TYPE")
1305 #endif // __CLANG_HIP_MATH_H__