1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_HIP_MATH_H__
10 #define __CLANG_HIP_MATH_H__
12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
16 #if !defined(__HIPCC_RTC__)
17 #if defined(__cplusplus)
22 #ifdef __OPENMP_AMDGCN__
25 #endif // !defined(__HIPCC_RTC__)
27 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_AMDGCN__
30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
35 // A few functions return bool type starting only in C++11.
36 #pragma push_macro("__RETURN_TYPE")
37 #ifdef __OPENMP_AMDGCN__
38 #define __RETURN_TYPE int
40 #if defined(__cplusplus)
41 #define __RETURN_TYPE bool
43 #define __RETURN_TYPE int
45 #endif // __OPENMP_AMDGCN__
47 #if defined (__cplusplus) && __cplusplus < 201103L
48 // emulate static_assert on type sizes
50 struct __compare_result
{};
52 struct __compare_result
<true> {
53 static const __device__
bool valid
;
57 void __suppress_unused_warning(bool b
){};
58 template <unsigned int S
, unsigned int T
>
59 __DEVICE__
void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result
<S
== T
>::valid
);
63 #define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
67 #define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
73 uint64_t __make_mantissa_base8(const char *__tagp
__attribute__((nonnull
))) {
75 while (*__tagp
!= '\0') {
78 if (__tmp
>= '0' && __tmp
<= '7')
79 __r
= (__r
* 8u) + __tmp
- '0';
90 uint64_t __make_mantissa_base10(const char *__tagp
__attribute__((nonnull
))) {
92 while (*__tagp
!= '\0') {
95 if (__tmp
>= '0' && __tmp
<= '9')
96 __r
= (__r
* 10u) + __tmp
- '0';
107 uint64_t __make_mantissa_base16(const char *__tagp
__attribute__((nonnull
))) {
109 while (*__tagp
!= '\0') {
110 char __tmp
= *__tagp
;
112 if (__tmp
>= '0' && __tmp
<= '9')
113 __r
= (__r
* 16u) + __tmp
- '0';
114 else if (__tmp
>= 'a' && __tmp
<= 'f')
115 __r
= (__r
* 16u) + __tmp
- 'a' + 10;
116 else if (__tmp
>= 'A' && __tmp
<= 'F')
117 __r
= (__r
* 16u) + __tmp
- 'A' + 10;
128 uint64_t __make_mantissa(const char *__tagp
__attribute__((nonnull
))) {
129 if (*__tagp
== '0') {
132 if (*__tagp
== 'x' || *__tagp
== 'X')
133 return __make_mantissa_base16(__tagp
);
135 return __make_mantissa_base8(__tagp
);
138 return __make_mantissa_base10(__tagp
);
142 #if defined(__cplusplus)
145 int __sgn
= __x
>> (sizeof(int) * CHAR_BIT
- 1);
146 return (__x
^ __sgn
) - __sgn
;
149 long labs(long __x
) {
150 long __sgn
= __x
>> (sizeof(long) * CHAR_BIT
- 1);
151 return (__x
^ __sgn
) - __sgn
;
154 long long llabs(long long __x
) {
155 long long __sgn
= __x
>> (sizeof(long long) * CHAR_BIT
- 1);
156 return (__x
^ __sgn
) - __sgn
;
161 float acosf(float __x
) { return __ocml_acos_f32(__x
); }
164 float acoshf(float __x
) { return __ocml_acosh_f32(__x
); }
167 float asinf(float __x
) { return __ocml_asin_f32(__x
); }
170 float asinhf(float __x
) { return __ocml_asinh_f32(__x
); }
173 float atan2f(float __x
, float __y
) { return __ocml_atan2_f32(__x
, __y
); }
176 float atanf(float __x
) { return __ocml_atan_f32(__x
); }
179 float atanhf(float __x
) { return __ocml_atanh_f32(__x
); }
182 float cbrtf(float __x
) { return __ocml_cbrt_f32(__x
); }
185 float ceilf(float __x
) { return __ocml_ceil_f32(__x
); }
188 float copysignf(float __x
, float __y
) { return __ocml_copysign_f32(__x
, __y
); }
191 float cosf(float __x
) { return __ocml_cos_f32(__x
); }
194 float coshf(float __x
) { return __ocml_cosh_f32(__x
); }
197 float cospif(float __x
) { return __ocml_cospi_f32(__x
); }
200 float cyl_bessel_i0f(float __x
) { return __ocml_i0_f32(__x
); }
203 float cyl_bessel_i1f(float __x
) { return __ocml_i1_f32(__x
); }
206 float erfcf(float __x
) { return __ocml_erfc_f32(__x
); }
209 float erfcinvf(float __x
) { return __ocml_erfcinv_f32(__x
); }
212 float erfcxf(float __x
) { return __ocml_erfcx_f32(__x
); }
215 float erff(float __x
) { return __ocml_erf_f32(__x
); }
218 float erfinvf(float __x
) { return __ocml_erfinv_f32(__x
); }
221 float exp10f(float __x
) { return __ocml_exp10_f32(__x
); }
224 float exp2f(float __x
) { return __ocml_exp2_f32(__x
); }
227 float expf(float __x
) { return __ocml_exp_f32(__x
); }
230 float expm1f(float __x
) { return __ocml_expm1_f32(__x
); }
233 float fabsf(float __x
) { return __builtin_fabsf(__x
); }
236 float fdimf(float __x
, float __y
) { return __ocml_fdim_f32(__x
, __y
); }
239 float fdividef(float __x
, float __y
) { return __x
/ __y
; }
242 float floorf(float __x
) { return __ocml_floor_f32(__x
); }
245 float fmaf(float __x
, float __y
, float __z
) {
246 return __ocml_fma_f32(__x
, __y
, __z
);
250 float fmaxf(float __x
, float __y
) { return __ocml_fmax_f32(__x
, __y
); }
253 float fminf(float __x
, float __y
) { return __ocml_fmin_f32(__x
, __y
); }
256 float fmodf(float __x
, float __y
) { return __ocml_fmod_f32(__x
, __y
); }
259 float frexpf(float __x
, int *__nptr
) {
261 #ifdef __OPENMP_AMDGCN__
262 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
265 __ocml_frexp_f32(__x
, (__attribute__((address_space(5))) int *)&__tmp
);
272 float hypotf(float __x
, float __y
) { return __ocml_hypot_f32(__x
, __y
); }
275 int ilogbf(float __x
) { return __ocml_ilogb_f32(__x
); }
278 __RETURN_TYPE
__finitef(float __x
) { return __ocml_isfinite_f32(__x
); }
281 __RETURN_TYPE
__isinff(float __x
) { return __ocml_isinf_f32(__x
); }
284 __RETURN_TYPE
__isnanf(float __x
) { return __ocml_isnan_f32(__x
); }
287 float j0f(float __x
) { return __ocml_j0_f32(__x
); }
290 float j1f(float __x
) { return __ocml_j1_f32(__x
); }
293 float jnf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
294 // and the Miller & Brown algorithm
295 // for linear recurrences to get O(log n) steps, but it's unclear if
296 // it'd be beneficial in this case.
302 float __x0
= j0f(__x
);
303 float __x1
= j1f(__x
);
304 for (int __i
= 1; __i
< __n
; ++__i
) {
305 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
314 float ldexpf(float __x
, int __e
) { return __ocml_ldexp_f32(__x
, __e
); }
317 float lgammaf(float __x
) { return __ocml_lgamma_f32(__x
); }
320 long long int llrintf(float __x
) { return __ocml_rint_f32(__x
); }
323 long long int llroundf(float __x
) { return __ocml_round_f32(__x
); }
326 float log10f(float __x
) { return __ocml_log10_f32(__x
); }
329 float log1pf(float __x
) { return __ocml_log1p_f32(__x
); }
332 float log2f(float __x
) { return __ocml_log2_f32(__x
); }
335 float logbf(float __x
) { return __ocml_logb_f32(__x
); }
338 float logf(float __x
) { return __ocml_log_f32(__x
); }
341 long int lrintf(float __x
) { return __ocml_rint_f32(__x
); }
344 long int lroundf(float __x
) { return __ocml_round_f32(__x
); }
347 float modff(float __x
, float *__iptr
) {
349 #ifdef __OPENMP_AMDGCN__
350 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
353 __ocml_modf_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
359 float nanf(const char *__tagp
__attribute__((nonnull
))) {
363 unsigned int mantissa
: 22;
364 unsigned int quiet
: 1;
365 unsigned int exponent
: 8;
366 unsigned int sign
: 1;
369 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
371 __tmp
.bits
.sign
= 0u;
372 __tmp
.bits
.exponent
= ~0u;
373 __tmp
.bits
.quiet
= 1u;
374 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
380 float nearbyintf(float __x
) { return __ocml_nearbyint_f32(__x
); }
383 float nextafterf(float __x
, float __y
) {
384 return __ocml_nextafter_f32(__x
, __y
);
388 float norm3df(float __x
, float __y
, float __z
) {
389 return __ocml_len3_f32(__x
, __y
, __z
);
393 float norm4df(float __x
, float __y
, float __z
, float __w
) {
394 return __ocml_len4_f32(__x
, __y
, __z
, __w
);
398 float normcdff(float __x
) { return __ocml_ncdf_f32(__x
); }
401 float normcdfinvf(float __x
) { return __ocml_ncdfinv_f32(__x
); }
404 float normf(int __dim
,
405 const float *__a
) { // TODO: placeholder until OCML adds support.
408 __r
+= __a
[0] * __a
[0];
412 return __ocml_sqrt_f32(__r
);
416 float powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
419 float powif(float __x
, int __y
) { return __ocml_pown_f32(__x
, __y
); }
422 float rcbrtf(float __x
) { return __ocml_rcbrt_f32(__x
); }
425 float remainderf(float __x
, float __y
) {
426 return __ocml_remainder_f32(__x
, __y
);
430 float remquof(float __x
, float __y
, int *__quo
) {
432 #ifdef __OPENMP_AMDGCN__
433 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
435 float __r
= __ocml_remquo_f32(
436 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
443 float rhypotf(float __x
, float __y
) { return __ocml_rhypot_f32(__x
, __y
); }
446 float rintf(float __x
) { return __ocml_rint_f32(__x
); }
449 float rnorm3df(float __x
, float __y
, float __z
) {
450 return __ocml_rlen3_f32(__x
, __y
, __z
);
454 float rnorm4df(float __x
, float __y
, float __z
, float __w
) {
455 return __ocml_rlen4_f32(__x
, __y
, __z
, __w
);
459 float rnormf(int __dim
,
460 const float *__a
) { // TODO: placeholder until OCML adds support.
463 __r
+= __a
[0] * __a
[0];
467 return __ocml_rsqrt_f32(__r
);
471 float roundf(float __x
) { return __ocml_round_f32(__x
); }
474 float rsqrtf(float __x
) { return __ocml_rsqrt_f32(__x
); }
477 float scalblnf(float __x
, long int __n
) {
478 return (__n
< INT_MAX
) ? __ocml_scalbn_f32(__x
, __n
)
479 : __ocml_scalb_f32(__x
, __n
);
483 float scalbnf(float __x
, int __n
) { return __ocml_scalbn_f32(__x
, __n
); }
486 __RETURN_TYPE
__signbitf(float __x
) { return __ocml_signbit_f32(__x
); }
489 void sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
491 #ifdef __OPENMP_AMDGCN__
492 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
495 __ocml_sincos_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
500 void sincospif(float __x
, float *__sinptr
, float *__cosptr
) {
502 #ifdef __OPENMP_AMDGCN__
503 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
505 *__sinptr
= __ocml_sincospi_f32(
506 __x
, (__attribute__((address_space(5))) float *)&__tmp
);
511 float sinf(float __x
) { return __ocml_sin_f32(__x
); }
514 float sinhf(float __x
) { return __ocml_sinh_f32(__x
); }
517 float sinpif(float __x
) { return __ocml_sinpi_f32(__x
); }
520 float sqrtf(float __x
) { return __ocml_sqrt_f32(__x
); }
523 float tanf(float __x
) { return __ocml_tan_f32(__x
); }
526 float tanhf(float __x
) { return __ocml_tanh_f32(__x
); }
529 float tgammaf(float __x
) { return __ocml_tgamma_f32(__x
); }
532 float truncf(float __x
) { return __ocml_trunc_f32(__x
); }
535 float y0f(float __x
) { return __ocml_y0_f32(__x
); }
538 float y1f(float __x
) { return __ocml_y1_f32(__x
); }
541 float ynf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
542 // and the Miller & Brown algorithm
543 // for linear recurrences to get O(log n) steps, but it's unclear if
544 // it'd be beneficial in this case. Placeholder until OCML adds
551 float __x0
= y0f(__x
);
552 float __x1
= y1f(__x
);
553 for (int __i
= 1; __i
< __n
; ++__i
) {
554 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
565 float __cosf(float __x
) { return __ocml_native_cos_f32(__x
); }
568 float __exp10f(float __x
) { return __ocml_native_exp10_f32(__x
); }
571 float __expf(float __x
) { return __ocml_native_exp_f32(__x
); }
573 #if defined OCML_BASIC_ROUNDED_OPERATIONS
575 float __fadd_rd(float __x
, float __y
) { return __ocml_add_rtn_f32(__x
, __y
); }
577 float __fadd_rn(float __x
, float __y
) { return __ocml_add_rte_f32(__x
, __y
); }
579 float __fadd_ru(float __x
, float __y
) { return __ocml_add_rtp_f32(__x
, __y
); }
581 float __fadd_rz(float __x
, float __y
) { return __ocml_add_rtz_f32(__x
, __y
); }
584 float __fadd_rn(float __x
, float __y
) { return __x
+ __y
; }
587 #if defined OCML_BASIC_ROUNDED_OPERATIONS
589 float __fdiv_rd(float __x
, float __y
) { return __ocml_div_rtn_f32(__x
, __y
); }
591 float __fdiv_rn(float __x
, float __y
) { return __ocml_div_rte_f32(__x
, __y
); }
593 float __fdiv_ru(float __x
, float __y
) { return __ocml_div_rtp_f32(__x
, __y
); }
595 float __fdiv_rz(float __x
, float __y
) { return __ocml_div_rtz_f32(__x
, __y
); }
598 float __fdiv_rn(float __x
, float __y
) { return __x
/ __y
; }
602 float __fdividef(float __x
, float __y
) { return __x
/ __y
; }
604 #if defined OCML_BASIC_ROUNDED_OPERATIONS
606 float __fmaf_rd(float __x
, float __y
, float __z
) {
607 return __ocml_fma_rtn_f32(__x
, __y
, __z
);
610 float __fmaf_rn(float __x
, float __y
, float __z
) {
611 return __ocml_fma_rte_f32(__x
, __y
, __z
);
614 float __fmaf_ru(float __x
, float __y
, float __z
) {
615 return __ocml_fma_rtp_f32(__x
, __y
, __z
);
618 float __fmaf_rz(float __x
, float __y
, float __z
) {
619 return __ocml_fma_rtz_f32(__x
, __y
, __z
);
623 float __fmaf_rn(float __x
, float __y
, float __z
) {
624 return __ocml_fma_f32(__x
, __y
, __z
);
628 #if defined OCML_BASIC_ROUNDED_OPERATIONS
630 float __fmul_rd(float __x
, float __y
) { return __ocml_mul_rtn_f32(__x
, __y
); }
632 float __fmul_rn(float __x
, float __y
) { return __ocml_mul_rte_f32(__x
, __y
); }
634 float __fmul_ru(float __x
, float __y
) { return __ocml_mul_rtp_f32(__x
, __y
); }
636 float __fmul_rz(float __x
, float __y
) { return __ocml_mul_rtz_f32(__x
, __y
); }
639 float __fmul_rn(float __x
, float __y
) { return __x
* __y
; }
642 #if defined OCML_BASIC_ROUNDED_OPERATIONS
644 float __frcp_rd(float __x
) { return __ocml_div_rtn_f32(1.0f
, __x
); }
646 float __frcp_rn(float __x
) { return __ocml_div_rte_f32(1.0f
, __x
); }
648 float __frcp_ru(float __x
) { return __ocml_div_rtp_f32(1.0f
, __x
); }
650 float __frcp_rz(float __x
) { return __ocml_div_rtz_f32(1.0f
, __x
); }
653 float __frcp_rn(float __x
) { return 1.0f
/ __x
; }
657 float __frsqrt_rn(float __x
) { return __llvm_amdgcn_rsq_f32(__x
); }
659 #if defined OCML_BASIC_ROUNDED_OPERATIONS
661 float __fsqrt_rd(float __x
) { return __ocml_sqrt_rtn_f32(__x
); }
663 float __fsqrt_rn(float __x
) { return __ocml_sqrt_rte_f32(__x
); }
665 float __fsqrt_ru(float __x
) { return __ocml_sqrt_rtp_f32(__x
); }
667 float __fsqrt_rz(float __x
) { return __ocml_sqrt_rtz_f32(__x
); }
670 float __fsqrt_rn(float __x
) { return __ocml_native_sqrt_f32(__x
); }
673 #if defined OCML_BASIC_ROUNDED_OPERATIONS
675 float __fsub_rd(float __x
, float __y
) { return __ocml_sub_rtn_f32(__x
, __y
); }
677 float __fsub_rn(float __x
, float __y
) { return __ocml_sub_rte_f32(__x
, __y
); }
679 float __fsub_ru(float __x
, float __y
) { return __ocml_sub_rtp_f32(__x
, __y
); }
681 float __fsub_rz(float __x
, float __y
) { return __ocml_sub_rtz_f32(__x
, __y
); }
684 float __fsub_rn(float __x
, float __y
) { return __x
- __y
; }
688 float __log10f(float __x
) { return __ocml_native_log10_f32(__x
); }
691 float __log2f(float __x
) { return __ocml_native_log2_f32(__x
); }
694 float __logf(float __x
) { return __ocml_native_log_f32(__x
); }
697 float __powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
700 float __saturatef(float __x
) { return (__x
< 0) ? 0 : ((__x
> 1) ? 1 : __x
); }
703 void __sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
704 *__sinptr
= __ocml_native_sin_f32(__x
);
705 *__cosptr
= __ocml_native_cos_f32(__x
);
709 float __sinf(float __x
) { return __ocml_native_sin_f32(__x
); }
712 float __tanf(float __x
) { return __ocml_tan_f32(__x
); }
718 double acos(double __x
) { return __ocml_acos_f64(__x
); }
721 double acosh(double __x
) { return __ocml_acosh_f64(__x
); }
724 double asin(double __x
) { return __ocml_asin_f64(__x
); }
727 double asinh(double __x
) { return __ocml_asinh_f64(__x
); }
730 double atan(double __x
) { return __ocml_atan_f64(__x
); }
733 double atan2(double __x
, double __y
) { return __ocml_atan2_f64(__x
, __y
); }
736 double atanh(double __x
) { return __ocml_atanh_f64(__x
); }
739 double cbrt(double __x
) { return __ocml_cbrt_f64(__x
); }
742 double ceil(double __x
) { return __ocml_ceil_f64(__x
); }
745 double copysign(double __x
, double __y
) {
746 return __ocml_copysign_f64(__x
, __y
);
750 double cos(double __x
) { return __ocml_cos_f64(__x
); }
753 double cosh(double __x
) { return __ocml_cosh_f64(__x
); }
756 double cospi(double __x
) { return __ocml_cospi_f64(__x
); }
759 double cyl_bessel_i0(double __x
) { return __ocml_i0_f64(__x
); }
762 double cyl_bessel_i1(double __x
) { return __ocml_i1_f64(__x
); }
765 double erf(double __x
) { return __ocml_erf_f64(__x
); }
768 double erfc(double __x
) { return __ocml_erfc_f64(__x
); }
771 double erfcinv(double __x
) { return __ocml_erfcinv_f64(__x
); }
774 double erfcx(double __x
) { return __ocml_erfcx_f64(__x
); }
777 double erfinv(double __x
) { return __ocml_erfinv_f64(__x
); }
780 double exp(double __x
) { return __ocml_exp_f64(__x
); }
783 double exp10(double __x
) { return __ocml_exp10_f64(__x
); }
786 double exp2(double __x
) { return __ocml_exp2_f64(__x
); }
789 double expm1(double __x
) { return __ocml_expm1_f64(__x
); }
792 double fabs(double __x
) { return __builtin_fabs(__x
); }
795 double fdim(double __x
, double __y
) { return __ocml_fdim_f64(__x
, __y
); }
798 double floor(double __x
) { return __ocml_floor_f64(__x
); }
801 double fma(double __x
, double __y
, double __z
) {
802 return __ocml_fma_f64(__x
, __y
, __z
);
806 double fmax(double __x
, double __y
) { return __ocml_fmax_f64(__x
, __y
); }
809 double fmin(double __x
, double __y
) { return __ocml_fmin_f64(__x
, __y
); }
812 double fmod(double __x
, double __y
) { return __ocml_fmod_f64(__x
, __y
); }
815 double frexp(double __x
, int *__nptr
) {
817 #ifdef __OPENMP_AMDGCN__
818 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
821 __ocml_frexp_f64(__x
, (__attribute__((address_space(5))) int *)&__tmp
);
827 double hypot(double __x
, double __y
) { return __ocml_hypot_f64(__x
, __y
); }
830 int ilogb(double __x
) { return __ocml_ilogb_f64(__x
); }
833 __RETURN_TYPE
__finite(double __x
) { return __ocml_isfinite_f64(__x
); }
836 __RETURN_TYPE
__isinf(double __x
) { return __ocml_isinf_f64(__x
); }
839 __RETURN_TYPE
__isnan(double __x
) { return __ocml_isnan_f64(__x
); }
842 double j0(double __x
) { return __ocml_j0_f64(__x
); }
845 double j1(double __x
) { return __ocml_j1_f64(__x
); }
848 double jn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
849 // and the Miller & Brown algorithm
850 // for linear recurrences to get O(log n) steps, but it's unclear if
851 // it'd be beneficial in this case. Placeholder until OCML adds
858 double __x0
= j0(__x
);
859 double __x1
= j1(__x
);
860 for (int __i
= 1; __i
< __n
; ++__i
) {
861 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
869 double ldexp(double __x
, int __e
) { return __ocml_ldexp_f64(__x
, __e
); }
872 double lgamma(double __x
) { return __ocml_lgamma_f64(__x
); }
875 long long int llrint(double __x
) { return __ocml_rint_f64(__x
); }
878 long long int llround(double __x
) { return __ocml_round_f64(__x
); }
881 double log(double __x
) { return __ocml_log_f64(__x
); }
884 double log10(double __x
) { return __ocml_log10_f64(__x
); }
887 double log1p(double __x
) { return __ocml_log1p_f64(__x
); }
890 double log2(double __x
) { return __ocml_log2_f64(__x
); }
893 double logb(double __x
) { return __ocml_logb_f64(__x
); }
896 long int lrint(double __x
) { return __ocml_rint_f64(__x
); }
899 long int lround(double __x
) { return __ocml_round_f64(__x
); }
902 double modf(double __x
, double *__iptr
) {
904 #ifdef __OPENMP_AMDGCN__
905 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
908 __ocml_modf_f64(__x
, (__attribute__((address_space(5))) double *)&__tmp
);
915 double nan(const char *__tagp
) {
920 uint64_t mantissa
: 51;
922 uint32_t exponent
: 11;
926 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
928 __tmp
.bits
.sign
= 0u;
929 __tmp
.bits
.exponent
= ~0u;
930 __tmp
.bits
.quiet
= 1u;
931 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
935 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
936 uint64_t __val
= __make_mantissa(__tagp
);
937 __val
|= 0xFFF << 51;
938 return *reinterpret_cast<double *>(&__val
);
943 double nearbyint(double __x
) { return __ocml_nearbyint_f64(__x
); }
946 double nextafter(double __x
, double __y
) {
947 return __ocml_nextafter_f64(__x
, __y
);
951 double norm(int __dim
,
952 const double *__a
) { // TODO: placeholder until OCML adds support.
955 __r
+= __a
[0] * __a
[0];
959 return __ocml_sqrt_f64(__r
);
963 double norm3d(double __x
, double __y
, double __z
) {
964 return __ocml_len3_f64(__x
, __y
, __z
);
968 double norm4d(double __x
, double __y
, double __z
, double __w
) {
969 return __ocml_len4_f64(__x
, __y
, __z
, __w
);
973 double normcdf(double __x
) { return __ocml_ncdf_f64(__x
); }
976 double normcdfinv(double __x
) { return __ocml_ncdfinv_f64(__x
); }
979 double pow(double __x
, double __y
) { return __ocml_pow_f64(__x
, __y
); }
982 double powi(double __x
, int __y
) { return __ocml_pown_f64(__x
, __y
); }
985 double rcbrt(double __x
) { return __ocml_rcbrt_f64(__x
); }
988 double remainder(double __x
, double __y
) {
989 return __ocml_remainder_f64(__x
, __y
);
993 double remquo(double __x
, double __y
, int *__quo
) {
995 #ifdef __OPENMP_AMDGCN__
996 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
998 double __r
= __ocml_remquo_f64(
999 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
1006 double rhypot(double __x
, double __y
) { return __ocml_rhypot_f64(__x
, __y
); }
1009 double rint(double __x
) { return __ocml_rint_f64(__x
); }
1012 double rnorm(int __dim
,
1013 const double *__a
) { // TODO: placeholder until OCML adds support.
1016 __r
+= __a
[0] * __a
[0];
1020 return __ocml_rsqrt_f64(__r
);
1024 double rnorm3d(double __x
, double __y
, double __z
) {
1025 return __ocml_rlen3_f64(__x
, __y
, __z
);
1029 double rnorm4d(double __x
, double __y
, double __z
, double __w
) {
1030 return __ocml_rlen4_f64(__x
, __y
, __z
, __w
);
1034 double round(double __x
) { return __ocml_round_f64(__x
); }
1037 double rsqrt(double __x
) { return __ocml_rsqrt_f64(__x
); }
1040 double scalbln(double __x
, long int __n
) {
1041 return (__n
< INT_MAX
) ? __ocml_scalbn_f64(__x
, __n
)
1042 : __ocml_scalb_f64(__x
, __n
);
1045 double scalbn(double __x
, int __n
) { return __ocml_scalbn_f64(__x
, __n
); }
1048 __RETURN_TYPE
__signbit(double __x
) { return __ocml_signbit_f64(__x
); }
1051 double sin(double __x
) { return __ocml_sin_f64(__x
); }
1054 void sincos(double __x
, double *__sinptr
, double *__cosptr
) {
1056 #ifdef __OPENMP_AMDGCN__
1057 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1059 *__sinptr
= __ocml_sincos_f64(
1060 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1065 void sincospi(double __x
, double *__sinptr
, double *__cosptr
) {
1067 #ifdef __OPENMP_AMDGCN__
1068 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1070 *__sinptr
= __ocml_sincospi_f64(
1071 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1076 double sinh(double __x
) { return __ocml_sinh_f64(__x
); }
1079 double sinpi(double __x
) { return __ocml_sinpi_f64(__x
); }
1082 double sqrt(double __x
) { return __ocml_sqrt_f64(__x
); }
1085 double tan(double __x
) { return __ocml_tan_f64(__x
); }
1088 double tanh(double __x
) { return __ocml_tanh_f64(__x
); }
1091 double tgamma(double __x
) { return __ocml_tgamma_f64(__x
); }
1094 double trunc(double __x
) { return __ocml_trunc_f64(__x
); }
1097 double y0(double __x
) { return __ocml_y0_f64(__x
); }
1100 double y1(double __x
) { return __ocml_y1_f64(__x
); }
1103 double yn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
1104 // and the Miller & Brown algorithm
1105 // for linear recurrences to get O(log n) steps, but it's unclear if
1106 // it'd be beneficial in this case. Placeholder until OCML adds
1113 double __x0
= y0(__x
);
1114 double __x1
= y1(__x
);
1115 for (int __i
= 1; __i
< __n
; ++__i
) {
1116 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
1125 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1127 double __dadd_rd(double __x
, double __y
) {
1128 return __ocml_add_rtn_f64(__x
, __y
);
1131 double __dadd_rn(double __x
, double __y
) {
1132 return __ocml_add_rte_f64(__x
, __y
);
1135 double __dadd_ru(double __x
, double __y
) {
1136 return __ocml_add_rtp_f64(__x
, __y
);
1139 double __dadd_rz(double __x
, double __y
) {
1140 return __ocml_add_rtz_f64(__x
, __y
);
1144 double __dadd_rn(double __x
, double __y
) { return __x
+ __y
; }
1147 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1149 double __ddiv_rd(double __x
, double __y
) {
1150 return __ocml_div_rtn_f64(__x
, __y
);
1153 double __ddiv_rn(double __x
, double __y
) {
1154 return __ocml_div_rte_f64(__x
, __y
);
1157 double __ddiv_ru(double __x
, double __y
) {
1158 return __ocml_div_rtp_f64(__x
, __y
);
1161 double __ddiv_rz(double __x
, double __y
) {
1162 return __ocml_div_rtz_f64(__x
, __y
);
1166 double __ddiv_rn(double __x
, double __y
) { return __x
/ __y
; }
1169 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1171 double __dmul_rd(double __x
, double __y
) {
1172 return __ocml_mul_rtn_f64(__x
, __y
);
1175 double __dmul_rn(double __x
, double __y
) {
1176 return __ocml_mul_rte_f64(__x
, __y
);
1179 double __dmul_ru(double __x
, double __y
) {
1180 return __ocml_mul_rtp_f64(__x
, __y
);
1183 double __dmul_rz(double __x
, double __y
) {
1184 return __ocml_mul_rtz_f64(__x
, __y
);
1188 double __dmul_rn(double __x
, double __y
) { return __x
* __y
; }
1191 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1193 double __drcp_rd(double __x
) { return __ocml_div_rtn_f64(1.0, __x
); }
1195 double __drcp_rn(double __x
) { return __ocml_div_rte_f64(1.0, __x
); }
1197 double __drcp_ru(double __x
) { return __ocml_div_rtp_f64(1.0, __x
); }
1199 double __drcp_rz(double __x
) { return __ocml_div_rtz_f64(1.0, __x
); }
1202 double __drcp_rn(double __x
) { return 1.0 / __x
; }
1205 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1207 double __dsqrt_rd(double __x
) { return __ocml_sqrt_rtn_f64(__x
); }
1209 double __dsqrt_rn(double __x
) { return __ocml_sqrt_rte_f64(__x
); }
1211 double __dsqrt_ru(double __x
) { return __ocml_sqrt_rtp_f64(__x
); }
1213 double __dsqrt_rz(double __x
) { return __ocml_sqrt_rtz_f64(__x
); }
1216 double __dsqrt_rn(double __x
) { return __ocml_sqrt_f64(__x
); }
1219 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1221 double __dsub_rd(double __x
, double __y
) {
1222 return __ocml_sub_rtn_f64(__x
, __y
);
1225 double __dsub_rn(double __x
, double __y
) {
1226 return __ocml_sub_rte_f64(__x
, __y
);
1229 double __dsub_ru(double __x
, double __y
) {
1230 return __ocml_sub_rtp_f64(__x
, __y
);
1233 double __dsub_rz(double __x
, double __y
) {
1234 return __ocml_sub_rtz_f64(__x
, __y
);
1238 double __dsub_rn(double __x
, double __y
) { return __x
- __y
; }
1241 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1243 double __fma_rd(double __x
, double __y
, double __z
) {
1244 return __ocml_fma_rtn_f64(__x
, __y
, __z
);
1247 double __fma_rn(double __x
, double __y
, double __z
) {
1248 return __ocml_fma_rte_f64(__x
, __y
, __z
);
1251 double __fma_ru(double __x
, double __y
, double __z
) {
1252 return __ocml_fma_rtp_f64(__x
, __y
, __z
);
1255 double __fma_rz(double __x
, double __y
, double __z
) {
1256 return __ocml_fma_rtz_f64(__x
, __y
, __z
);
1260 double __fma_rn(double __x
, double __y
, double __z
) {
1261 return __ocml_fma_f64(__x
, __y
, __z
);
1268 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1269 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1270 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1271 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1272 #define signbit(__x) \
1273 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1274 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1276 #if defined(__cplusplus)
1277 template <class T
> __DEVICE__ T
min(T __arg1
, T __arg2
) {
1278 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1281 template <class T
> __DEVICE__ T
max(T __arg1
, T __arg2
) {
1282 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1285 __DEVICE__
int min(int __arg1
, int __arg2
) {
1286 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1288 __DEVICE__
int max(int __arg1
, int __arg2
) {
1289 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1293 float max(float __x
, float __y
) { return fmaxf(__x
, __y
); }
1296 double max(double __x
, double __y
) { return fmax(__x
, __y
); }
1299 float min(float __x
, float __y
) { return fminf(__x
, __y
); }
1302 double min(double __x
, double __y
) { return fmin(__x
, __y
); }
1304 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1305 __host__
inline static int min(int __arg1
, int __arg2
) {
1306 return std::min(__arg1
, __arg2
);
1309 __host__
inline static int max(int __arg1
, int __arg2
) {
1310 return std::max(__arg1
, __arg2
);
1312 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1315 #pragma pop_macro("__DEVICE__")
1316 #pragma pop_macro("__RETURN_TYPE")
1318 #endif // __CLANG_HIP_MATH_H__