1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
9 #ifndef __CLANG_HIP_MATH_H__
10 #define __CLANG_HIP_MATH_H__
12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
16 #if !defined(__HIPCC_RTC__)
17 #if defined(__cplusplus)
22 #ifdef __OPENMP_AMDGCN__
25 #endif // !defined(__HIPCC_RTC__)
27 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_AMDGCN__
30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
35 // A few functions return bool type starting only in C++11.
36 #pragma push_macro("__RETURN_TYPE")
37 #ifdef __OPENMP_AMDGCN__
38 #define __RETURN_TYPE int
40 #if defined(__cplusplus)
41 #define __RETURN_TYPE bool
43 #define __RETURN_TYPE int
45 #endif // __OPENMP_AMDGCN__
47 #if defined (__cplusplus) && __cplusplus < 201103L
48 // emulate static_assert on type sizes
50 struct __compare_result
{};
52 struct __compare_result
<true> {
53 static const __device__
bool valid
;
57 void __suppress_unused_warning(bool b
){};
58 template <unsigned int S
, unsigned int T
>
59 __DEVICE__
void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result
<S
== T
>::valid
);
63 #define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
67 #define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
73 uint64_t __make_mantissa_base8(const char *__tagp
) {
78 if (__tmp
>= '0' && __tmp
<= '7')
79 __r
= (__r
* 8u) + __tmp
- '0';
90 uint64_t __make_mantissa_base10(const char *__tagp
) {
95 if (__tmp
>= '0' && __tmp
<= '9')
96 __r
= (__r
* 10u) + __tmp
- '0';
107 uint64_t __make_mantissa_base16(const char *__tagp
) {
110 char __tmp
= *__tagp
;
112 if (__tmp
>= '0' && __tmp
<= '9')
113 __r
= (__r
* 16u) + __tmp
- '0';
114 else if (__tmp
>= 'a' && __tmp
<= 'f')
115 __r
= (__r
* 16u) + __tmp
- 'a' + 10;
116 else if (__tmp
>= 'A' && __tmp
<= 'F')
117 __r
= (__r
* 16u) + __tmp
- 'A' + 10;
128 uint64_t __make_mantissa(const char *__tagp
) {
132 if (*__tagp
== '0') {
135 if (*__tagp
== 'x' || *__tagp
== 'X')
136 return __make_mantissa_base16(__tagp
);
138 return __make_mantissa_base8(__tagp
);
141 return __make_mantissa_base10(__tagp
);
145 #if defined(__cplusplus)
148 int __sgn
= __x
>> (sizeof(int) * CHAR_BIT
- 1);
149 return (__x
^ __sgn
) - __sgn
;
152 long labs(long __x
) {
153 long __sgn
= __x
>> (sizeof(long) * CHAR_BIT
- 1);
154 return (__x
^ __sgn
) - __sgn
;
157 long long llabs(long long __x
) {
158 long long __sgn
= __x
>> (sizeof(long long) * CHAR_BIT
- 1);
159 return (__x
^ __sgn
) - __sgn
;
164 float acosf(float __x
) { return __ocml_acos_f32(__x
); }
167 float acoshf(float __x
) { return __ocml_acosh_f32(__x
); }
170 float asinf(float __x
) { return __ocml_asin_f32(__x
); }
173 float asinhf(float __x
) { return __ocml_asinh_f32(__x
); }
176 float atan2f(float __x
, float __y
) { return __ocml_atan2_f32(__x
, __y
); }
179 float atanf(float __x
) { return __ocml_atan_f32(__x
); }
182 float atanhf(float __x
) { return __ocml_atanh_f32(__x
); }
185 float cbrtf(float __x
) { return __ocml_cbrt_f32(__x
); }
188 float ceilf(float __x
) { return __ocml_ceil_f32(__x
); }
191 float copysignf(float __x
, float __y
) { return __ocml_copysign_f32(__x
, __y
); }
194 float cosf(float __x
) { return __ocml_cos_f32(__x
); }
197 float coshf(float __x
) { return __ocml_cosh_f32(__x
); }
200 float cospif(float __x
) { return __ocml_cospi_f32(__x
); }
203 float cyl_bessel_i0f(float __x
) { return __ocml_i0_f32(__x
); }
206 float cyl_bessel_i1f(float __x
) { return __ocml_i1_f32(__x
); }
209 float erfcf(float __x
) { return __ocml_erfc_f32(__x
); }
212 float erfcinvf(float __x
) { return __ocml_erfcinv_f32(__x
); }
215 float erfcxf(float __x
) { return __ocml_erfcx_f32(__x
); }
218 float erff(float __x
) { return __ocml_erf_f32(__x
); }
221 float erfinvf(float __x
) { return __ocml_erfinv_f32(__x
); }
224 float exp10f(float __x
) { return __ocml_exp10_f32(__x
); }
227 float exp2f(float __x
) { return __ocml_exp2_f32(__x
); }
230 float expf(float __x
) { return __ocml_exp_f32(__x
); }
233 float expm1f(float __x
) { return __ocml_expm1_f32(__x
); }
236 float fabsf(float __x
) { return __ocml_fabs_f32(__x
); }
239 float fdimf(float __x
, float __y
) { return __ocml_fdim_f32(__x
, __y
); }
242 float fdividef(float __x
, float __y
) { return __x
/ __y
; }
245 float floorf(float __x
) { return __ocml_floor_f32(__x
); }
248 float fmaf(float __x
, float __y
, float __z
) {
249 return __ocml_fma_f32(__x
, __y
, __z
);
253 float fmaxf(float __x
, float __y
) { return __ocml_fmax_f32(__x
, __y
); }
256 float fminf(float __x
, float __y
) { return __ocml_fmin_f32(__x
, __y
); }
259 float fmodf(float __x
, float __y
) { return __ocml_fmod_f32(__x
, __y
); }
262 float frexpf(float __x
, int *__nptr
) {
264 #ifdef __OPENMP_AMDGCN__
265 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
268 __ocml_frexp_f32(__x
, (__attribute__((address_space(5))) int *)&__tmp
);
275 float hypotf(float __x
, float __y
) { return __ocml_hypot_f32(__x
, __y
); }
278 int ilogbf(float __x
) { return __ocml_ilogb_f32(__x
); }
281 __RETURN_TYPE
__finitef(float __x
) { return __ocml_isfinite_f32(__x
); }
284 __RETURN_TYPE
__isinff(float __x
) { return __ocml_isinf_f32(__x
); }
287 __RETURN_TYPE
__isnanf(float __x
) { return __ocml_isnan_f32(__x
); }
290 float j0f(float __x
) { return __ocml_j0_f32(__x
); }
293 float j1f(float __x
) { return __ocml_j1_f32(__x
); }
296 float jnf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
297 // and the Miller & Brown algorithm
298 // for linear recurrences to get O(log n) steps, but it's unclear if
299 // it'd be beneficial in this case.
305 float __x0
= j0f(__x
);
306 float __x1
= j1f(__x
);
307 for (int __i
= 1; __i
< __n
; ++__i
) {
308 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
317 float ldexpf(float __x
, int __e
) { return __ocml_ldexp_f32(__x
, __e
); }
320 float lgammaf(float __x
) { return __ocml_lgamma_f32(__x
); }
323 long long int llrintf(float __x
) { return __ocml_rint_f32(__x
); }
326 long long int llroundf(float __x
) { return __ocml_round_f32(__x
); }
329 float log10f(float __x
) { return __ocml_log10_f32(__x
); }
332 float log1pf(float __x
) { return __ocml_log1p_f32(__x
); }
335 float log2f(float __x
) { return __ocml_log2_f32(__x
); }
338 float logbf(float __x
) { return __ocml_logb_f32(__x
); }
341 float logf(float __x
) { return __ocml_log_f32(__x
); }
344 long int lrintf(float __x
) { return __ocml_rint_f32(__x
); }
347 long int lroundf(float __x
) { return __ocml_round_f32(__x
); }
350 float modff(float __x
, float *__iptr
) {
352 #ifdef __OPENMP_AMDGCN__
353 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
356 __ocml_modf_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
362 float nanf(const char *__tagp
) {
366 unsigned int mantissa
: 22;
367 unsigned int quiet
: 1;
368 unsigned int exponent
: 8;
369 unsigned int sign
: 1;
372 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
374 __tmp
.bits
.sign
= 0u;
375 __tmp
.bits
.exponent
= ~0u;
376 __tmp
.bits
.quiet
= 1u;
377 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
383 float nearbyintf(float __x
) { return __ocml_nearbyint_f32(__x
); }
386 float nextafterf(float __x
, float __y
) {
387 return __ocml_nextafter_f32(__x
, __y
);
391 float norm3df(float __x
, float __y
, float __z
) {
392 return __ocml_len3_f32(__x
, __y
, __z
);
396 float norm4df(float __x
, float __y
, float __z
, float __w
) {
397 return __ocml_len4_f32(__x
, __y
, __z
, __w
);
401 float normcdff(float __x
) { return __ocml_ncdf_f32(__x
); }
404 float normcdfinvf(float __x
) { return __ocml_ncdfinv_f32(__x
); }
407 float normf(int __dim
,
408 const float *__a
) { // TODO: placeholder until OCML adds support.
411 __r
+= __a
[0] * __a
[0];
415 return __ocml_sqrt_f32(__r
);
419 float powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
422 float powif(float __x
, int __y
) { return __ocml_pown_f32(__x
, __y
); }
425 float rcbrtf(float __x
) { return __ocml_rcbrt_f32(__x
); }
428 float remainderf(float __x
, float __y
) {
429 return __ocml_remainder_f32(__x
, __y
);
433 float remquof(float __x
, float __y
, int *__quo
) {
435 #ifdef __OPENMP_AMDGCN__
436 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
438 float __r
= __ocml_remquo_f32(
439 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
446 float rhypotf(float __x
, float __y
) { return __ocml_rhypot_f32(__x
, __y
); }
449 float rintf(float __x
) { return __ocml_rint_f32(__x
); }
452 float rnorm3df(float __x
, float __y
, float __z
) {
453 return __ocml_rlen3_f32(__x
, __y
, __z
);
457 float rnorm4df(float __x
, float __y
, float __z
, float __w
) {
458 return __ocml_rlen4_f32(__x
, __y
, __z
, __w
);
462 float rnormf(int __dim
,
463 const float *__a
) { // TODO: placeholder until OCML adds support.
466 __r
+= __a
[0] * __a
[0];
470 return __ocml_rsqrt_f32(__r
);
474 float roundf(float __x
) { return __ocml_round_f32(__x
); }
477 float rsqrtf(float __x
) { return __ocml_rsqrt_f32(__x
); }
480 float scalblnf(float __x
, long int __n
) {
481 return (__n
< INT_MAX
) ? __ocml_scalbn_f32(__x
, __n
)
482 : __ocml_scalb_f32(__x
, __n
);
486 float scalbnf(float __x
, int __n
) { return __ocml_scalbn_f32(__x
, __n
); }
489 __RETURN_TYPE
__signbitf(float __x
) { return __ocml_signbit_f32(__x
); }
492 void sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
494 #ifdef __OPENMP_AMDGCN__
495 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
498 __ocml_sincos_f32(__x
, (__attribute__((address_space(5))) float *)&__tmp
);
503 void sincospif(float __x
, float *__sinptr
, float *__cosptr
) {
505 #ifdef __OPENMP_AMDGCN__
506 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
508 *__sinptr
= __ocml_sincospi_f32(
509 __x
, (__attribute__((address_space(5))) float *)&__tmp
);
514 float sinf(float __x
) { return __ocml_sin_f32(__x
); }
517 float sinhf(float __x
) { return __ocml_sinh_f32(__x
); }
520 float sinpif(float __x
) { return __ocml_sinpi_f32(__x
); }
523 float sqrtf(float __x
) { return __ocml_sqrt_f32(__x
); }
526 float tanf(float __x
) { return __ocml_tan_f32(__x
); }
529 float tanhf(float __x
) { return __ocml_tanh_f32(__x
); }
532 float tgammaf(float __x
) { return __ocml_tgamma_f32(__x
); }
535 float truncf(float __x
) { return __ocml_trunc_f32(__x
); }
538 float y0f(float __x
) { return __ocml_y0_f32(__x
); }
541 float y1f(float __x
) { return __ocml_y1_f32(__x
); }
544 float ynf(int __n
, float __x
) { // TODO: we could use Ahmes multiplication
545 // and the Miller & Brown algorithm
546 // for linear recurrences to get O(log n) steps, but it's unclear if
547 // it'd be beneficial in this case. Placeholder until OCML adds
554 float __x0
= y0f(__x
);
555 float __x1
= y1f(__x
);
556 for (int __i
= 1; __i
< __n
; ++__i
) {
557 float __x2
= (2 * __i
) / __x
* __x1
- __x0
;
568 float __cosf(float __x
) { return __ocml_native_cos_f32(__x
); }
571 float __exp10f(float __x
) { return __ocml_native_exp10_f32(__x
); }
574 float __expf(float __x
) { return __ocml_native_exp_f32(__x
); }
576 #if defined OCML_BASIC_ROUNDED_OPERATIONS
578 float __fadd_rd(float __x
, float __y
) { return __ocml_add_rtn_f32(__x
, __y
); }
580 float __fadd_rn(float __x
, float __y
) { return __ocml_add_rte_f32(__x
, __y
); }
582 float __fadd_ru(float __x
, float __y
) { return __ocml_add_rtp_f32(__x
, __y
); }
584 float __fadd_rz(float __x
, float __y
) { return __ocml_add_rtz_f32(__x
, __y
); }
587 float __fadd_rn(float __x
, float __y
) { return __x
+ __y
; }
590 #if defined OCML_BASIC_ROUNDED_OPERATIONS
592 float __fdiv_rd(float __x
, float __y
) { return __ocml_div_rtn_f32(__x
, __y
); }
594 float __fdiv_rn(float __x
, float __y
) { return __ocml_div_rte_f32(__x
, __y
); }
596 float __fdiv_ru(float __x
, float __y
) { return __ocml_div_rtp_f32(__x
, __y
); }
598 float __fdiv_rz(float __x
, float __y
) { return __ocml_div_rtz_f32(__x
, __y
); }
601 float __fdiv_rn(float __x
, float __y
) { return __x
/ __y
; }
605 float __fdividef(float __x
, float __y
) { return __x
/ __y
; }
607 #if defined OCML_BASIC_ROUNDED_OPERATIONS
609 float __fmaf_rd(float __x
, float __y
, float __z
) {
610 return __ocml_fma_rtn_f32(__x
, __y
, __z
);
613 float __fmaf_rn(float __x
, float __y
, float __z
) {
614 return __ocml_fma_rte_f32(__x
, __y
, __z
);
617 float __fmaf_ru(float __x
, float __y
, float __z
) {
618 return __ocml_fma_rtp_f32(__x
, __y
, __z
);
621 float __fmaf_rz(float __x
, float __y
, float __z
) {
622 return __ocml_fma_rtz_f32(__x
, __y
, __z
);
626 float __fmaf_rn(float __x
, float __y
, float __z
) {
627 return __ocml_fma_f32(__x
, __y
, __z
);
631 #if defined OCML_BASIC_ROUNDED_OPERATIONS
633 float __fmul_rd(float __x
, float __y
) { return __ocml_mul_rtn_f32(__x
, __y
); }
635 float __fmul_rn(float __x
, float __y
) { return __ocml_mul_rte_f32(__x
, __y
); }
637 float __fmul_ru(float __x
, float __y
) { return __ocml_mul_rtp_f32(__x
, __y
); }
639 float __fmul_rz(float __x
, float __y
) { return __ocml_mul_rtz_f32(__x
, __y
); }
642 float __fmul_rn(float __x
, float __y
) { return __x
* __y
; }
645 #if defined OCML_BASIC_ROUNDED_OPERATIONS
647 float __frcp_rd(float __x
) { return __ocml_div_rtn_f32(1.0f
, __x
); }
649 float __frcp_rn(float __x
) { return __ocml_div_rte_f32(1.0f
, __x
); }
651 float __frcp_ru(float __x
) { return __ocml_div_rtp_f32(1.0f
, __x
); }
653 float __frcp_rz(float __x
) { return __ocml_div_rtz_f32(1.0f
, __x
); }
656 float __frcp_rn(float __x
) { return 1.0f
/ __x
; }
660 float __frsqrt_rn(float __x
) { return __llvm_amdgcn_rsq_f32(__x
); }
662 #if defined OCML_BASIC_ROUNDED_OPERATIONS
664 float __fsqrt_rd(float __x
) { return __ocml_sqrt_rtn_f32(__x
); }
666 float __fsqrt_rn(float __x
) { return __ocml_sqrt_rte_f32(__x
); }
668 float __fsqrt_ru(float __x
) { return __ocml_sqrt_rtp_f32(__x
); }
670 float __fsqrt_rz(float __x
) { return __ocml_sqrt_rtz_f32(__x
); }
673 float __fsqrt_rn(float __x
) { return __ocml_native_sqrt_f32(__x
); }
676 #if defined OCML_BASIC_ROUNDED_OPERATIONS
678 float __fsub_rd(float __x
, float __y
) { return __ocml_sub_rtn_f32(__x
, __y
); }
680 float __fsub_rn(float __x
, float __y
) { return __ocml_sub_rte_f32(__x
, __y
); }
682 float __fsub_ru(float __x
, float __y
) { return __ocml_sub_rtp_f32(__x
, __y
); }
684 float __fsub_rz(float __x
, float __y
) { return __ocml_sub_rtz_f32(__x
, __y
); }
687 float __fsub_rn(float __x
, float __y
) { return __x
- __y
; }
691 float __log10f(float __x
) { return __ocml_native_log10_f32(__x
); }
694 float __log2f(float __x
) { return __ocml_native_log2_f32(__x
); }
697 float __logf(float __x
) { return __ocml_native_log_f32(__x
); }
700 float __powf(float __x
, float __y
) { return __ocml_pow_f32(__x
, __y
); }
703 float __saturatef(float __x
) { return (__x
< 0) ? 0 : ((__x
> 1) ? 1 : __x
); }
706 void __sincosf(float __x
, float *__sinptr
, float *__cosptr
) {
707 *__sinptr
= __ocml_native_sin_f32(__x
);
708 *__cosptr
= __ocml_native_cos_f32(__x
);
712 float __sinf(float __x
) { return __ocml_native_sin_f32(__x
); }
715 float __tanf(float __x
) { return __ocml_tan_f32(__x
); }
721 double acos(double __x
) { return __ocml_acos_f64(__x
); }
724 double acosh(double __x
) { return __ocml_acosh_f64(__x
); }
727 double asin(double __x
) { return __ocml_asin_f64(__x
); }
730 double asinh(double __x
) { return __ocml_asinh_f64(__x
); }
733 double atan(double __x
) { return __ocml_atan_f64(__x
); }
736 double atan2(double __x
, double __y
) { return __ocml_atan2_f64(__x
, __y
); }
739 double atanh(double __x
) { return __ocml_atanh_f64(__x
); }
742 double cbrt(double __x
) { return __ocml_cbrt_f64(__x
); }
745 double ceil(double __x
) { return __ocml_ceil_f64(__x
); }
748 double copysign(double __x
, double __y
) {
749 return __ocml_copysign_f64(__x
, __y
);
753 double cos(double __x
) { return __ocml_cos_f64(__x
); }
756 double cosh(double __x
) { return __ocml_cosh_f64(__x
); }
759 double cospi(double __x
) { return __ocml_cospi_f64(__x
); }
762 double cyl_bessel_i0(double __x
) { return __ocml_i0_f64(__x
); }
765 double cyl_bessel_i1(double __x
) { return __ocml_i1_f64(__x
); }
768 double erf(double __x
) { return __ocml_erf_f64(__x
); }
771 double erfc(double __x
) { return __ocml_erfc_f64(__x
); }
774 double erfcinv(double __x
) { return __ocml_erfcinv_f64(__x
); }
777 double erfcx(double __x
) { return __ocml_erfcx_f64(__x
); }
780 double erfinv(double __x
) { return __ocml_erfinv_f64(__x
); }
783 double exp(double __x
) { return __ocml_exp_f64(__x
); }
786 double exp10(double __x
) { return __ocml_exp10_f64(__x
); }
789 double exp2(double __x
) { return __ocml_exp2_f64(__x
); }
792 double expm1(double __x
) { return __ocml_expm1_f64(__x
); }
795 double fabs(double __x
) { return __ocml_fabs_f64(__x
); }
798 double fdim(double __x
, double __y
) { return __ocml_fdim_f64(__x
, __y
); }
801 double floor(double __x
) { return __ocml_floor_f64(__x
); }
804 double fma(double __x
, double __y
, double __z
) {
805 return __ocml_fma_f64(__x
, __y
, __z
);
809 double fmax(double __x
, double __y
) { return __ocml_fmax_f64(__x
, __y
); }
812 double fmin(double __x
, double __y
) { return __ocml_fmin_f64(__x
, __y
); }
815 double fmod(double __x
, double __y
) { return __ocml_fmod_f64(__x
, __y
); }
818 double frexp(double __x
, int *__nptr
) {
820 #ifdef __OPENMP_AMDGCN__
821 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
824 __ocml_frexp_f64(__x
, (__attribute__((address_space(5))) int *)&__tmp
);
830 double hypot(double __x
, double __y
) { return __ocml_hypot_f64(__x
, __y
); }
833 int ilogb(double __x
) { return __ocml_ilogb_f64(__x
); }
836 __RETURN_TYPE
__finite(double __x
) { return __ocml_isfinite_f64(__x
); }
839 __RETURN_TYPE
__isinf(double __x
) { return __ocml_isinf_f64(__x
); }
842 __RETURN_TYPE
__isnan(double __x
) { return __ocml_isnan_f64(__x
); }
845 double j0(double __x
) { return __ocml_j0_f64(__x
); }
848 double j1(double __x
) { return __ocml_j1_f64(__x
); }
851 double jn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
852 // and the Miller & Brown algorithm
853 // for linear recurrences to get O(log n) steps, but it's unclear if
854 // it'd be beneficial in this case. Placeholder until OCML adds
861 double __x0
= j0(__x
);
862 double __x1
= j1(__x
);
863 for (int __i
= 1; __i
< __n
; ++__i
) {
864 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
872 double ldexp(double __x
, int __e
) { return __ocml_ldexp_f64(__x
, __e
); }
875 double lgamma(double __x
) { return __ocml_lgamma_f64(__x
); }
878 long long int llrint(double __x
) { return __ocml_rint_f64(__x
); }
881 long long int llround(double __x
) { return __ocml_round_f64(__x
); }
884 double log(double __x
) { return __ocml_log_f64(__x
); }
887 double log10(double __x
) { return __ocml_log10_f64(__x
); }
890 double log1p(double __x
) { return __ocml_log1p_f64(__x
); }
893 double log2(double __x
) { return __ocml_log2_f64(__x
); }
896 double logb(double __x
) { return __ocml_logb_f64(__x
); }
899 long int lrint(double __x
) { return __ocml_rint_f64(__x
); }
902 long int lround(double __x
) { return __ocml_round_f64(__x
); }
905 double modf(double __x
, double *__iptr
) {
907 #ifdef __OPENMP_AMDGCN__
908 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
911 __ocml_modf_f64(__x
, (__attribute__((address_space(5))) double *)&__tmp
);
918 double nan(const char *__tagp
) {
923 uint64_t mantissa
: 51;
925 uint32_t exponent
: 11;
929 __static_assert_type_size_equal(sizeof(__tmp
.val
), sizeof(__tmp
.bits
));
931 __tmp
.bits
.sign
= 0u;
932 __tmp
.bits
.exponent
= ~0u;
933 __tmp
.bits
.quiet
= 1u;
934 __tmp
.bits
.mantissa
= __make_mantissa(__tagp
);
938 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
939 uint64_t __val
= __make_mantissa(__tagp
);
940 __val
|= 0xFFF << 51;
941 return *reinterpret_cast<double *>(&__val
);
946 double nearbyint(double __x
) { return __ocml_nearbyint_f64(__x
); }
949 double nextafter(double __x
, double __y
) {
950 return __ocml_nextafter_f64(__x
, __y
);
954 double norm(int __dim
,
955 const double *__a
) { // TODO: placeholder until OCML adds support.
958 __r
+= __a
[0] * __a
[0];
962 return __ocml_sqrt_f64(__r
);
966 double norm3d(double __x
, double __y
, double __z
) {
967 return __ocml_len3_f64(__x
, __y
, __z
);
971 double norm4d(double __x
, double __y
, double __z
, double __w
) {
972 return __ocml_len4_f64(__x
, __y
, __z
, __w
);
976 double normcdf(double __x
) { return __ocml_ncdf_f64(__x
); }
979 double normcdfinv(double __x
) { return __ocml_ncdfinv_f64(__x
); }
982 double pow(double __x
, double __y
) { return __ocml_pow_f64(__x
, __y
); }
985 double powi(double __x
, int __y
) { return __ocml_pown_f64(__x
, __y
); }
988 double rcbrt(double __x
) { return __ocml_rcbrt_f64(__x
); }
991 double remainder(double __x
, double __y
) {
992 return __ocml_remainder_f64(__x
, __y
);
996 double remquo(double __x
, double __y
, int *__quo
) {
998 #ifdef __OPENMP_AMDGCN__
999 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1001 double __r
= __ocml_remquo_f64(
1002 __x
, __y
, (__attribute__((address_space(5))) int *)&__tmp
);
1009 double rhypot(double __x
, double __y
) { return __ocml_rhypot_f64(__x
, __y
); }
1012 double rint(double __x
) { return __ocml_rint_f64(__x
); }
1015 double rnorm(int __dim
,
1016 const double *__a
) { // TODO: placeholder until OCML adds support.
1019 __r
+= __a
[0] * __a
[0];
1023 return __ocml_rsqrt_f64(__r
);
1027 double rnorm3d(double __x
, double __y
, double __z
) {
1028 return __ocml_rlen3_f64(__x
, __y
, __z
);
1032 double rnorm4d(double __x
, double __y
, double __z
, double __w
) {
1033 return __ocml_rlen4_f64(__x
, __y
, __z
, __w
);
1037 double round(double __x
) { return __ocml_round_f64(__x
); }
1040 double rsqrt(double __x
) { return __ocml_rsqrt_f64(__x
); }
1043 double scalbln(double __x
, long int __n
) {
1044 return (__n
< INT_MAX
) ? __ocml_scalbn_f64(__x
, __n
)
1045 : __ocml_scalb_f64(__x
, __n
);
1048 double scalbn(double __x
, int __n
) { return __ocml_scalbn_f64(__x
, __n
); }
1051 __RETURN_TYPE
__signbit(double __x
) { return __ocml_signbit_f64(__x
); }
1054 double sin(double __x
) { return __ocml_sin_f64(__x
); }
1057 void sincos(double __x
, double *__sinptr
, double *__cosptr
) {
1059 #ifdef __OPENMP_AMDGCN__
1060 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1062 *__sinptr
= __ocml_sincos_f64(
1063 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1068 void sincospi(double __x
, double *__sinptr
, double *__cosptr
) {
1070 #ifdef __OPENMP_AMDGCN__
1071 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1073 *__sinptr
= __ocml_sincospi_f64(
1074 __x
, (__attribute__((address_space(5))) double *)&__tmp
);
1079 double sinh(double __x
) { return __ocml_sinh_f64(__x
); }
1082 double sinpi(double __x
) { return __ocml_sinpi_f64(__x
); }
1085 double sqrt(double __x
) { return __ocml_sqrt_f64(__x
); }
1088 double tan(double __x
) { return __ocml_tan_f64(__x
); }
1091 double tanh(double __x
) { return __ocml_tanh_f64(__x
); }
1094 double tgamma(double __x
) { return __ocml_tgamma_f64(__x
); }
1097 double trunc(double __x
) { return __ocml_trunc_f64(__x
); }
1100 double y0(double __x
) { return __ocml_y0_f64(__x
); }
1103 double y1(double __x
) { return __ocml_y1_f64(__x
); }
1106 double yn(int __n
, double __x
) { // TODO: we could use Ahmes multiplication
1107 // and the Miller & Brown algorithm
1108 // for linear recurrences to get O(log n) steps, but it's unclear if
1109 // it'd be beneficial in this case. Placeholder until OCML adds
1116 double __x0
= y0(__x
);
1117 double __x1
= y1(__x
);
1118 for (int __i
= 1; __i
< __n
; ++__i
) {
1119 double __x2
= (2 * __i
) / __x
* __x1
- __x0
;
1128 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1130 double __dadd_rd(double __x
, double __y
) {
1131 return __ocml_add_rtn_f64(__x
, __y
);
1134 double __dadd_rn(double __x
, double __y
) {
1135 return __ocml_add_rte_f64(__x
, __y
);
1138 double __dadd_ru(double __x
, double __y
) {
1139 return __ocml_add_rtp_f64(__x
, __y
);
1142 double __dadd_rz(double __x
, double __y
) {
1143 return __ocml_add_rtz_f64(__x
, __y
);
1147 double __dadd_rn(double __x
, double __y
) { return __x
+ __y
; }
1150 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1152 double __ddiv_rd(double __x
, double __y
) {
1153 return __ocml_div_rtn_f64(__x
, __y
);
1156 double __ddiv_rn(double __x
, double __y
) {
1157 return __ocml_div_rte_f64(__x
, __y
);
1160 double __ddiv_ru(double __x
, double __y
) {
1161 return __ocml_div_rtp_f64(__x
, __y
);
1164 double __ddiv_rz(double __x
, double __y
) {
1165 return __ocml_div_rtz_f64(__x
, __y
);
1169 double __ddiv_rn(double __x
, double __y
) { return __x
/ __y
; }
1172 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1174 double __dmul_rd(double __x
, double __y
) {
1175 return __ocml_mul_rtn_f64(__x
, __y
);
1178 double __dmul_rn(double __x
, double __y
) {
1179 return __ocml_mul_rte_f64(__x
, __y
);
1182 double __dmul_ru(double __x
, double __y
) {
1183 return __ocml_mul_rtp_f64(__x
, __y
);
1186 double __dmul_rz(double __x
, double __y
) {
1187 return __ocml_mul_rtz_f64(__x
, __y
);
1191 double __dmul_rn(double __x
, double __y
) { return __x
* __y
; }
1194 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1196 double __drcp_rd(double __x
) { return __ocml_div_rtn_f64(1.0, __x
); }
1198 double __drcp_rn(double __x
) { return __ocml_div_rte_f64(1.0, __x
); }
1200 double __drcp_ru(double __x
) { return __ocml_div_rtp_f64(1.0, __x
); }
1202 double __drcp_rz(double __x
) { return __ocml_div_rtz_f64(1.0, __x
); }
1205 double __drcp_rn(double __x
) { return 1.0 / __x
; }
1208 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1210 double __dsqrt_rd(double __x
) { return __ocml_sqrt_rtn_f64(__x
); }
1212 double __dsqrt_rn(double __x
) { return __ocml_sqrt_rte_f64(__x
); }
1214 double __dsqrt_ru(double __x
) { return __ocml_sqrt_rtp_f64(__x
); }
1216 double __dsqrt_rz(double __x
) { return __ocml_sqrt_rtz_f64(__x
); }
1219 double __dsqrt_rn(double __x
) { return __ocml_sqrt_f64(__x
); }
1222 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1224 double __dsub_rd(double __x
, double __y
) {
1225 return __ocml_sub_rtn_f64(__x
, __y
);
1228 double __dsub_rn(double __x
, double __y
) {
1229 return __ocml_sub_rte_f64(__x
, __y
);
1232 double __dsub_ru(double __x
, double __y
) {
1233 return __ocml_sub_rtp_f64(__x
, __y
);
1236 double __dsub_rz(double __x
, double __y
) {
1237 return __ocml_sub_rtz_f64(__x
, __y
);
1241 double __dsub_rn(double __x
, double __y
) { return __x
- __y
; }
1244 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1246 double __fma_rd(double __x
, double __y
, double __z
) {
1247 return __ocml_fma_rtn_f64(__x
, __y
, __z
);
1250 double __fma_rn(double __x
, double __y
, double __z
) {
1251 return __ocml_fma_rte_f64(__x
, __y
, __z
);
1254 double __fma_ru(double __x
, double __y
, double __z
) {
1255 return __ocml_fma_rtp_f64(__x
, __y
, __z
);
1258 double __fma_rz(double __x
, double __y
, double __z
) {
1259 return __ocml_fma_rtz_f64(__x
, __y
, __z
);
1263 double __fma_rn(double __x
, double __y
, double __z
) {
1264 return __ocml_fma_f64(__x
, __y
, __z
);
1271 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1272 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1273 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1274 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1275 #define signbit(__x) \
1276 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1277 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1279 #if defined(__cplusplus)
1280 template <class T
> __DEVICE__ T
min(T __arg1
, T __arg2
) {
1281 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1284 template <class T
> __DEVICE__ T
max(T __arg1
, T __arg2
) {
1285 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1288 __DEVICE__
int min(int __arg1
, int __arg2
) {
1289 return (__arg1
< __arg2
) ? __arg1
: __arg2
;
1291 __DEVICE__
int max(int __arg1
, int __arg2
) {
1292 return (__arg1
> __arg2
) ? __arg1
: __arg2
;
1296 float max(float __x
, float __y
) { return fmaxf(__x
, __y
); }
1299 double max(double __x
, double __y
) { return fmax(__x
, __y
); }
1302 float min(float __x
, float __y
) { return fminf(__x
, __y
); }
1305 double min(double __x
, double __y
) { return fmin(__x
, __y
); }
1307 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1308 __host__
inline static int min(int __arg1
, int __arg2
) {
1309 return std::min(__arg1
, __arg2
);
1312 __host__
inline static int max(int __arg1
, int __arg2
) {
1313 return std::max(__arg1
, __arg2
);
1315 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1318 #pragma pop_macro("__DEVICE__")
1319 #pragma pop_macro("__RETURN_TYPE")
1321 #endif // __CLANG_HIP_MATH_H__