[docs] Fix build-docs.sh
[llvm-project.git] / clang / lib / Headers / __clang_hip_math.h
blobef7e087b832ca8bd3f2fdcc4ee6b144f7c03d629
1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
9 #ifndef __CLANG_HIP_MATH_H__
10 #define __CLANG_HIP_MATH_H__
12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
14 #endif
16 #if !defined(__HIPCC_RTC__)
17 #if defined(__cplusplus)
18 #include <algorithm>
19 #endif
20 #include <limits.h>
21 #include <stdint.h>
22 #ifdef __OPENMP_AMDGCN__
23 #include <omp.h>
24 #endif
25 #endif // !defined(__HIPCC_RTC__)
27 #pragma push_macro("__DEVICE__")
29 #ifdef __OPENMP_AMDGCN__
30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
31 #else
32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
33 #endif
35 // A few functions return bool type starting only in C++11.
36 #pragma push_macro("__RETURN_TYPE")
37 #ifdef __OPENMP_AMDGCN__
38 #define __RETURN_TYPE int
39 #else
40 #if defined(__cplusplus)
41 #define __RETURN_TYPE bool
42 #else
43 #define __RETURN_TYPE int
44 #endif
45 #endif // __OPENMP_AMDGCN__
47 #if defined (__cplusplus) && __cplusplus < 201103L
48 // emulate static_assert on type sizes
49 template<bool>
50 struct __compare_result{};
51 template<>
52 struct __compare_result<true> {
53 static const __device__ bool valid;
56 __DEVICE__
57 void __suppress_unused_warning(bool b){};
58 template <unsigned int S, unsigned int T>
59 __DEVICE__ void __static_assert_equal_size() {
60 __suppress_unused_warning(__compare_result<S == T>::valid);
63 #define __static_assert_type_size_equal(A, B) \
64 __static_assert_equal_size<A,B>()
66 #else
67 #define __static_assert_type_size_equal(A,B) \
68 static_assert((A) == (B), "")
70 #endif
72 __DEVICE__
73 uint64_t __make_mantissa_base8(const char *__tagp) {
74 uint64_t __r = 0;
75 while (__tagp) {
76 char __tmp = *__tagp;
78 if (__tmp >= '0' && __tmp <= '7')
79 __r = (__r * 8u) + __tmp - '0';
80 else
81 return 0;
83 ++__tagp;
86 return __r;
89 __DEVICE__
90 uint64_t __make_mantissa_base10(const char *__tagp) {
91 uint64_t __r = 0;
92 while (__tagp) {
93 char __tmp = *__tagp;
95 if (__tmp >= '0' && __tmp <= '9')
96 __r = (__r * 10u) + __tmp - '0';
97 else
98 return 0;
100 ++__tagp;
103 return __r;
106 __DEVICE__
107 uint64_t __make_mantissa_base16(const char *__tagp) {
108 uint64_t __r = 0;
109 while (__tagp) {
110 char __tmp = *__tagp;
112 if (__tmp >= '0' && __tmp <= '9')
113 __r = (__r * 16u) + __tmp - '0';
114 else if (__tmp >= 'a' && __tmp <= 'f')
115 __r = (__r * 16u) + __tmp - 'a' + 10;
116 else if (__tmp >= 'A' && __tmp <= 'F')
117 __r = (__r * 16u) + __tmp - 'A' + 10;
118 else
119 return 0;
121 ++__tagp;
124 return __r;
127 __DEVICE__
128 uint64_t __make_mantissa(const char *__tagp) {
129 if (!__tagp)
130 return 0u;
132 if (*__tagp == '0') {
133 ++__tagp;
135 if (*__tagp == 'x' || *__tagp == 'X')
136 return __make_mantissa_base16(__tagp);
137 else
138 return __make_mantissa_base8(__tagp);
141 return __make_mantissa_base10(__tagp);
144 // BEGIN FLOAT
145 #if defined(__cplusplus)
146 __DEVICE__
147 int abs(int __x) {
148 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
149 return (__x ^ __sgn) - __sgn;
151 __DEVICE__
152 long labs(long __x) {
153 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
154 return (__x ^ __sgn) - __sgn;
156 __DEVICE__
157 long long llabs(long long __x) {
158 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
159 return (__x ^ __sgn) - __sgn;
161 #endif
163 __DEVICE__
164 float acosf(float __x) { return __ocml_acos_f32(__x); }
166 __DEVICE__
167 float acoshf(float __x) { return __ocml_acosh_f32(__x); }
169 __DEVICE__
170 float asinf(float __x) { return __ocml_asin_f32(__x); }
172 __DEVICE__
173 float asinhf(float __x) { return __ocml_asinh_f32(__x); }
175 __DEVICE__
176 float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
178 __DEVICE__
179 float atanf(float __x) { return __ocml_atan_f32(__x); }
181 __DEVICE__
182 float atanhf(float __x) { return __ocml_atanh_f32(__x); }
184 __DEVICE__
185 float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
187 __DEVICE__
188 float ceilf(float __x) { return __ocml_ceil_f32(__x); }
190 __DEVICE__
191 float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
193 __DEVICE__
194 float cosf(float __x) { return __ocml_cos_f32(__x); }
196 __DEVICE__
197 float coshf(float __x) { return __ocml_cosh_f32(__x); }
199 __DEVICE__
200 float cospif(float __x) { return __ocml_cospi_f32(__x); }
202 __DEVICE__
203 float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
205 __DEVICE__
206 float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
208 __DEVICE__
209 float erfcf(float __x) { return __ocml_erfc_f32(__x); }
211 __DEVICE__
212 float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
214 __DEVICE__
215 float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
217 __DEVICE__
218 float erff(float __x) { return __ocml_erf_f32(__x); }
220 __DEVICE__
221 float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
223 __DEVICE__
224 float exp10f(float __x) { return __ocml_exp10_f32(__x); }
226 __DEVICE__
227 float exp2f(float __x) { return __ocml_exp2_f32(__x); }
229 __DEVICE__
230 float expf(float __x) { return __ocml_exp_f32(__x); }
232 __DEVICE__
233 float expm1f(float __x) { return __ocml_expm1_f32(__x); }
235 __DEVICE__
236 float fabsf(float __x) { return __ocml_fabs_f32(__x); }
238 __DEVICE__
239 float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
241 __DEVICE__
242 float fdividef(float __x, float __y) { return __x / __y; }
244 __DEVICE__
245 float floorf(float __x) { return __ocml_floor_f32(__x); }
247 __DEVICE__
248 float fmaf(float __x, float __y, float __z) {
249 return __ocml_fma_f32(__x, __y, __z);
252 __DEVICE__
253 float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
255 __DEVICE__
256 float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
258 __DEVICE__
259 float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
261 __DEVICE__
262 float frexpf(float __x, int *__nptr) {
263 int __tmp;
264 #ifdef __OPENMP_AMDGCN__
265 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
266 #endif
267 float __r =
268 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
269 *__nptr = __tmp;
271 return __r;
274 __DEVICE__
275 float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
277 __DEVICE__
278 int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
280 __DEVICE__
281 __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
283 __DEVICE__
284 __RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
286 __DEVICE__
287 __RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
289 __DEVICE__
290 float j0f(float __x) { return __ocml_j0_f32(__x); }
292 __DEVICE__
293 float j1f(float __x) { return __ocml_j1_f32(__x); }
295 __DEVICE__
296 float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
297 // and the Miller & Brown algorithm
298 // for linear recurrences to get O(log n) steps, but it's unclear if
299 // it'd be beneficial in this case.
300 if (__n == 0)
301 return j0f(__x);
302 if (__n == 1)
303 return j1f(__x);
305 float __x0 = j0f(__x);
306 float __x1 = j1f(__x);
307 for (int __i = 1; __i < __n; ++__i) {
308 float __x2 = (2 * __i) / __x * __x1 - __x0;
309 __x0 = __x1;
310 __x1 = __x2;
313 return __x1;
316 __DEVICE__
317 float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
319 __DEVICE__
320 float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
322 __DEVICE__
323 long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
325 __DEVICE__
326 long long int llroundf(float __x) { return __ocml_round_f32(__x); }
328 __DEVICE__
329 float log10f(float __x) { return __ocml_log10_f32(__x); }
331 __DEVICE__
332 float log1pf(float __x) { return __ocml_log1p_f32(__x); }
334 __DEVICE__
335 float log2f(float __x) { return __ocml_log2_f32(__x); }
337 __DEVICE__
338 float logbf(float __x) { return __ocml_logb_f32(__x); }
340 __DEVICE__
341 float logf(float __x) { return __ocml_log_f32(__x); }
343 __DEVICE__
344 long int lrintf(float __x) { return __ocml_rint_f32(__x); }
346 __DEVICE__
347 long int lroundf(float __x) { return __ocml_round_f32(__x); }
349 __DEVICE__
350 float modff(float __x, float *__iptr) {
351 float __tmp;
352 #ifdef __OPENMP_AMDGCN__
353 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
354 #endif
355 float __r =
356 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
357 *__iptr = __tmp;
358 return __r;
361 __DEVICE__
362 float nanf(const char *__tagp) {
363 union {
364 float val;
365 struct ieee_float {
366 unsigned int mantissa : 22;
367 unsigned int quiet : 1;
368 unsigned int exponent : 8;
369 unsigned int sign : 1;
370 } bits;
371 } __tmp;
372 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
374 __tmp.bits.sign = 0u;
375 __tmp.bits.exponent = ~0u;
376 __tmp.bits.quiet = 1u;
377 __tmp.bits.mantissa = __make_mantissa(__tagp);
379 return __tmp.val;
382 __DEVICE__
383 float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
385 __DEVICE__
386 float nextafterf(float __x, float __y) {
387 return __ocml_nextafter_f32(__x, __y);
390 __DEVICE__
391 float norm3df(float __x, float __y, float __z) {
392 return __ocml_len3_f32(__x, __y, __z);
395 __DEVICE__
396 float norm4df(float __x, float __y, float __z, float __w) {
397 return __ocml_len4_f32(__x, __y, __z, __w);
400 __DEVICE__
401 float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
403 __DEVICE__
404 float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
406 __DEVICE__
407 float normf(int __dim,
408 const float *__a) { // TODO: placeholder until OCML adds support.
409 float __r = 0;
410 while (__dim--) {
411 __r += __a[0] * __a[0];
412 ++__a;
415 return __ocml_sqrt_f32(__r);
418 __DEVICE__
419 float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
421 __DEVICE__
422 float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
424 __DEVICE__
425 float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
427 __DEVICE__
428 float remainderf(float __x, float __y) {
429 return __ocml_remainder_f32(__x, __y);
432 __DEVICE__
433 float remquof(float __x, float __y, int *__quo) {
434 int __tmp;
435 #ifdef __OPENMP_AMDGCN__
436 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
437 #endif
438 float __r = __ocml_remquo_f32(
439 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
440 *__quo = __tmp;
442 return __r;
445 __DEVICE__
446 float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
448 __DEVICE__
449 float rintf(float __x) { return __ocml_rint_f32(__x); }
451 __DEVICE__
452 float rnorm3df(float __x, float __y, float __z) {
453 return __ocml_rlen3_f32(__x, __y, __z);
456 __DEVICE__
457 float rnorm4df(float __x, float __y, float __z, float __w) {
458 return __ocml_rlen4_f32(__x, __y, __z, __w);
461 __DEVICE__
462 float rnormf(int __dim,
463 const float *__a) { // TODO: placeholder until OCML adds support.
464 float __r = 0;
465 while (__dim--) {
466 __r += __a[0] * __a[0];
467 ++__a;
470 return __ocml_rsqrt_f32(__r);
473 __DEVICE__
474 float roundf(float __x) { return __ocml_round_f32(__x); }
476 __DEVICE__
477 float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
479 __DEVICE__
480 float scalblnf(float __x, long int __n) {
481 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
482 : __ocml_scalb_f32(__x, __n);
485 __DEVICE__
486 float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
488 __DEVICE__
489 __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
491 __DEVICE__
492 void sincosf(float __x, float *__sinptr, float *__cosptr) {
493 float __tmp;
494 #ifdef __OPENMP_AMDGCN__
495 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
496 #endif
497 *__sinptr =
498 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
499 *__cosptr = __tmp;
502 __DEVICE__
503 void sincospif(float __x, float *__sinptr, float *__cosptr) {
504 float __tmp;
505 #ifdef __OPENMP_AMDGCN__
506 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
507 #endif
508 *__sinptr = __ocml_sincospi_f32(
509 __x, (__attribute__((address_space(5))) float *)&__tmp);
510 *__cosptr = __tmp;
513 __DEVICE__
514 float sinf(float __x) { return __ocml_sin_f32(__x); }
516 __DEVICE__
517 float sinhf(float __x) { return __ocml_sinh_f32(__x); }
519 __DEVICE__
520 float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
522 __DEVICE__
523 float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
525 __DEVICE__
526 float tanf(float __x) { return __ocml_tan_f32(__x); }
528 __DEVICE__
529 float tanhf(float __x) { return __ocml_tanh_f32(__x); }
531 __DEVICE__
532 float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
534 __DEVICE__
535 float truncf(float __x) { return __ocml_trunc_f32(__x); }
537 __DEVICE__
538 float y0f(float __x) { return __ocml_y0_f32(__x); }
540 __DEVICE__
541 float y1f(float __x) { return __ocml_y1_f32(__x); }
543 __DEVICE__
544 float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
545 // and the Miller & Brown algorithm
546 // for linear recurrences to get O(log n) steps, but it's unclear if
547 // it'd be beneficial in this case. Placeholder until OCML adds
548 // support.
549 if (__n == 0)
550 return y0f(__x);
551 if (__n == 1)
552 return y1f(__x);
554 float __x0 = y0f(__x);
555 float __x1 = y1f(__x);
556 for (int __i = 1; __i < __n; ++__i) {
557 float __x2 = (2 * __i) / __x * __x1 - __x0;
558 __x0 = __x1;
559 __x1 = __x2;
562 return __x1;
565 // BEGIN INTRINSICS
567 __DEVICE__
568 float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
570 __DEVICE__
571 float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
573 __DEVICE__
574 float __expf(float __x) { return __ocml_native_exp_f32(__x); }
576 #if defined OCML_BASIC_ROUNDED_OPERATIONS
577 __DEVICE__
578 float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
579 __DEVICE__
580 float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
581 __DEVICE__
582 float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
583 __DEVICE__
584 float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
585 #else
586 __DEVICE__
587 float __fadd_rn(float __x, float __y) { return __x + __y; }
588 #endif
590 #if defined OCML_BASIC_ROUNDED_OPERATIONS
591 __DEVICE__
592 float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
593 __DEVICE__
594 float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
595 __DEVICE__
596 float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
597 __DEVICE__
598 float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
599 #else
600 __DEVICE__
601 float __fdiv_rn(float __x, float __y) { return __x / __y; }
602 #endif
604 __DEVICE__
605 float __fdividef(float __x, float __y) { return __x / __y; }
607 #if defined OCML_BASIC_ROUNDED_OPERATIONS
608 __DEVICE__
609 float __fmaf_rd(float __x, float __y, float __z) {
610 return __ocml_fma_rtn_f32(__x, __y, __z);
612 __DEVICE__
613 float __fmaf_rn(float __x, float __y, float __z) {
614 return __ocml_fma_rte_f32(__x, __y, __z);
616 __DEVICE__
617 float __fmaf_ru(float __x, float __y, float __z) {
618 return __ocml_fma_rtp_f32(__x, __y, __z);
620 __DEVICE__
621 float __fmaf_rz(float __x, float __y, float __z) {
622 return __ocml_fma_rtz_f32(__x, __y, __z);
624 #else
625 __DEVICE__
626 float __fmaf_rn(float __x, float __y, float __z) {
627 return __ocml_fma_f32(__x, __y, __z);
629 #endif
631 #if defined OCML_BASIC_ROUNDED_OPERATIONS
632 __DEVICE__
633 float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
634 __DEVICE__
635 float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
636 __DEVICE__
637 float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
638 __DEVICE__
639 float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
640 #else
641 __DEVICE__
642 float __fmul_rn(float __x, float __y) { return __x * __y; }
643 #endif
645 #if defined OCML_BASIC_ROUNDED_OPERATIONS
646 __DEVICE__
647 float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
648 __DEVICE__
649 float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
650 __DEVICE__
651 float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
652 __DEVICE__
653 float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
654 #else
655 __DEVICE__
656 float __frcp_rn(float __x) { return 1.0f / __x; }
657 #endif
659 __DEVICE__
660 float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
662 #if defined OCML_BASIC_ROUNDED_OPERATIONS
663 __DEVICE__
664 float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
665 __DEVICE__
666 float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
667 __DEVICE__
668 float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
669 __DEVICE__
670 float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
671 #else
672 __DEVICE__
673 float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
674 #endif
676 #if defined OCML_BASIC_ROUNDED_OPERATIONS
677 __DEVICE__
678 float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
679 __DEVICE__
680 float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
681 __DEVICE__
682 float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
683 __DEVICE__
684 float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
685 #else
686 __DEVICE__
687 float __fsub_rn(float __x, float __y) { return __x - __y; }
688 #endif
690 __DEVICE__
691 float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
693 __DEVICE__
694 float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
696 __DEVICE__
697 float __logf(float __x) { return __ocml_native_log_f32(__x); }
699 __DEVICE__
700 float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
702 __DEVICE__
703 float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
705 __DEVICE__
706 void __sincosf(float __x, float *__sinptr, float *__cosptr) {
707 *__sinptr = __ocml_native_sin_f32(__x);
708 *__cosptr = __ocml_native_cos_f32(__x);
711 __DEVICE__
712 float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
714 __DEVICE__
715 float __tanf(float __x) { return __ocml_tan_f32(__x); }
716 // END INTRINSICS
717 // END FLOAT
719 // BEGIN DOUBLE
720 __DEVICE__
721 double acos(double __x) { return __ocml_acos_f64(__x); }
723 __DEVICE__
724 double acosh(double __x) { return __ocml_acosh_f64(__x); }
726 __DEVICE__
727 double asin(double __x) { return __ocml_asin_f64(__x); }
729 __DEVICE__
730 double asinh(double __x) { return __ocml_asinh_f64(__x); }
732 __DEVICE__
733 double atan(double __x) { return __ocml_atan_f64(__x); }
735 __DEVICE__
736 double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
738 __DEVICE__
739 double atanh(double __x) { return __ocml_atanh_f64(__x); }
741 __DEVICE__
742 double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
744 __DEVICE__
745 double ceil(double __x) { return __ocml_ceil_f64(__x); }
747 __DEVICE__
748 double copysign(double __x, double __y) {
749 return __ocml_copysign_f64(__x, __y);
752 __DEVICE__
753 double cos(double __x) { return __ocml_cos_f64(__x); }
755 __DEVICE__
756 double cosh(double __x) { return __ocml_cosh_f64(__x); }
758 __DEVICE__
759 double cospi(double __x) { return __ocml_cospi_f64(__x); }
761 __DEVICE__
762 double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
764 __DEVICE__
765 double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
767 __DEVICE__
768 double erf(double __x) { return __ocml_erf_f64(__x); }
770 __DEVICE__
771 double erfc(double __x) { return __ocml_erfc_f64(__x); }
773 __DEVICE__
774 double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
776 __DEVICE__
777 double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
779 __DEVICE__
780 double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
782 __DEVICE__
783 double exp(double __x) { return __ocml_exp_f64(__x); }
785 __DEVICE__
786 double exp10(double __x) { return __ocml_exp10_f64(__x); }
788 __DEVICE__
789 double exp2(double __x) { return __ocml_exp2_f64(__x); }
791 __DEVICE__
792 double expm1(double __x) { return __ocml_expm1_f64(__x); }
794 __DEVICE__
795 double fabs(double __x) { return __ocml_fabs_f64(__x); }
797 __DEVICE__
798 double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
800 __DEVICE__
801 double floor(double __x) { return __ocml_floor_f64(__x); }
803 __DEVICE__
804 double fma(double __x, double __y, double __z) {
805 return __ocml_fma_f64(__x, __y, __z);
808 __DEVICE__
809 double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
811 __DEVICE__
812 double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
814 __DEVICE__
815 double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
817 __DEVICE__
818 double frexp(double __x, int *__nptr) {
819 int __tmp;
820 #ifdef __OPENMP_AMDGCN__
821 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
822 #endif
823 double __r =
824 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
825 *__nptr = __tmp;
826 return __r;
829 __DEVICE__
830 double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
832 __DEVICE__
833 int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
835 __DEVICE__
836 __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
838 __DEVICE__
839 __RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
841 __DEVICE__
842 __RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
844 __DEVICE__
845 double j0(double __x) { return __ocml_j0_f64(__x); }
847 __DEVICE__
848 double j1(double __x) { return __ocml_j1_f64(__x); }
850 __DEVICE__
851 double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
852 // and the Miller & Brown algorithm
853 // for linear recurrences to get O(log n) steps, but it's unclear if
854 // it'd be beneficial in this case. Placeholder until OCML adds
855 // support.
856 if (__n == 0)
857 return j0(__x);
858 if (__n == 1)
859 return j1(__x);
861 double __x0 = j0(__x);
862 double __x1 = j1(__x);
863 for (int __i = 1; __i < __n; ++__i) {
864 double __x2 = (2 * __i) / __x * __x1 - __x0;
865 __x0 = __x1;
866 __x1 = __x2;
868 return __x1;
871 __DEVICE__
872 double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
874 __DEVICE__
875 double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
877 __DEVICE__
878 long long int llrint(double __x) { return __ocml_rint_f64(__x); }
880 __DEVICE__
881 long long int llround(double __x) { return __ocml_round_f64(__x); }
883 __DEVICE__
884 double log(double __x) { return __ocml_log_f64(__x); }
886 __DEVICE__
887 double log10(double __x) { return __ocml_log10_f64(__x); }
889 __DEVICE__
890 double log1p(double __x) { return __ocml_log1p_f64(__x); }
892 __DEVICE__
893 double log2(double __x) { return __ocml_log2_f64(__x); }
895 __DEVICE__
896 double logb(double __x) { return __ocml_logb_f64(__x); }
898 __DEVICE__
899 long int lrint(double __x) { return __ocml_rint_f64(__x); }
901 __DEVICE__
902 long int lround(double __x) { return __ocml_round_f64(__x); }
904 __DEVICE__
905 double modf(double __x, double *__iptr) {
906 double __tmp;
907 #ifdef __OPENMP_AMDGCN__
908 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
909 #endif
910 double __r =
911 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
912 *__iptr = __tmp;
914 return __r;
917 __DEVICE__
918 double nan(const char *__tagp) {
919 #if !_WIN32
920 union {
921 double val;
922 struct ieee_double {
923 uint64_t mantissa : 51;
924 uint32_t quiet : 1;
925 uint32_t exponent : 11;
926 uint32_t sign : 1;
927 } bits;
928 } __tmp;
929 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
931 __tmp.bits.sign = 0u;
932 __tmp.bits.exponent = ~0u;
933 __tmp.bits.quiet = 1u;
934 __tmp.bits.mantissa = __make_mantissa(__tagp);
936 return __tmp.val;
937 #else
938 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
939 uint64_t __val = __make_mantissa(__tagp);
940 __val |= 0xFFF << 51;
941 return *reinterpret_cast<double *>(&__val);
942 #endif
945 __DEVICE__
946 double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
948 __DEVICE__
949 double nextafter(double __x, double __y) {
950 return __ocml_nextafter_f64(__x, __y);
953 __DEVICE__
954 double norm(int __dim,
955 const double *__a) { // TODO: placeholder until OCML adds support.
956 double __r = 0;
957 while (__dim--) {
958 __r += __a[0] * __a[0];
959 ++__a;
962 return __ocml_sqrt_f64(__r);
965 __DEVICE__
966 double norm3d(double __x, double __y, double __z) {
967 return __ocml_len3_f64(__x, __y, __z);
970 __DEVICE__
971 double norm4d(double __x, double __y, double __z, double __w) {
972 return __ocml_len4_f64(__x, __y, __z, __w);
975 __DEVICE__
976 double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
978 __DEVICE__
979 double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
981 __DEVICE__
982 double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
984 __DEVICE__
985 double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
987 __DEVICE__
988 double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
990 __DEVICE__
991 double remainder(double __x, double __y) {
992 return __ocml_remainder_f64(__x, __y);
995 __DEVICE__
996 double remquo(double __x, double __y, int *__quo) {
997 int __tmp;
998 #ifdef __OPENMP_AMDGCN__
999 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1000 #endif
1001 double __r = __ocml_remquo_f64(
1002 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
1003 *__quo = __tmp;
1005 return __r;
1008 __DEVICE__
1009 double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1011 __DEVICE__
1012 double rint(double __x) { return __ocml_rint_f64(__x); }
1014 __DEVICE__
1015 double rnorm(int __dim,
1016 const double *__a) { // TODO: placeholder until OCML adds support.
1017 double __r = 0;
1018 while (__dim--) {
1019 __r += __a[0] * __a[0];
1020 ++__a;
1023 return __ocml_rsqrt_f64(__r);
1026 __DEVICE__
1027 double rnorm3d(double __x, double __y, double __z) {
1028 return __ocml_rlen3_f64(__x, __y, __z);
1031 __DEVICE__
1032 double rnorm4d(double __x, double __y, double __z, double __w) {
1033 return __ocml_rlen4_f64(__x, __y, __z, __w);
1036 __DEVICE__
1037 double round(double __x) { return __ocml_round_f64(__x); }
1039 __DEVICE__
1040 double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1042 __DEVICE__
1043 double scalbln(double __x, long int __n) {
1044 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1045 : __ocml_scalb_f64(__x, __n);
1047 __DEVICE__
1048 double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1050 __DEVICE__
1051 __RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
1053 __DEVICE__
1054 double sin(double __x) { return __ocml_sin_f64(__x); }
1056 __DEVICE__
1057 void sincos(double __x, double *__sinptr, double *__cosptr) {
1058 double __tmp;
1059 #ifdef __OPENMP_AMDGCN__
1060 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1061 #endif
1062 *__sinptr = __ocml_sincos_f64(
1063 __x, (__attribute__((address_space(5))) double *)&__tmp);
1064 *__cosptr = __tmp;
1067 __DEVICE__
1068 void sincospi(double __x, double *__sinptr, double *__cosptr) {
1069 double __tmp;
1070 #ifdef __OPENMP_AMDGCN__
1071 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1072 #endif
1073 *__sinptr = __ocml_sincospi_f64(
1074 __x, (__attribute__((address_space(5))) double *)&__tmp);
1075 *__cosptr = __tmp;
1078 __DEVICE__
1079 double sinh(double __x) { return __ocml_sinh_f64(__x); }
1081 __DEVICE__
1082 double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1084 __DEVICE__
1085 double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1087 __DEVICE__
1088 double tan(double __x) { return __ocml_tan_f64(__x); }
1090 __DEVICE__
1091 double tanh(double __x) { return __ocml_tanh_f64(__x); }
1093 __DEVICE__
1094 double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1096 __DEVICE__
1097 double trunc(double __x) { return __ocml_trunc_f64(__x); }
1099 __DEVICE__
1100 double y0(double __x) { return __ocml_y0_f64(__x); }
1102 __DEVICE__
1103 double y1(double __x) { return __ocml_y1_f64(__x); }
1105 __DEVICE__
1106 double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1107 // and the Miller & Brown algorithm
1108 // for linear recurrences to get O(log n) steps, but it's unclear if
1109 // it'd be beneficial in this case. Placeholder until OCML adds
1110 // support.
1111 if (__n == 0)
1112 return y0(__x);
1113 if (__n == 1)
1114 return y1(__x);
1116 double __x0 = y0(__x);
1117 double __x1 = y1(__x);
1118 for (int __i = 1; __i < __n; ++__i) {
1119 double __x2 = (2 * __i) / __x * __x1 - __x0;
1120 __x0 = __x1;
1121 __x1 = __x2;
1124 return __x1;
1127 // BEGIN INTRINSICS
1128 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1129 __DEVICE__
1130 double __dadd_rd(double __x, double __y) {
1131 return __ocml_add_rtn_f64(__x, __y);
1133 __DEVICE__
1134 double __dadd_rn(double __x, double __y) {
1135 return __ocml_add_rte_f64(__x, __y);
1137 __DEVICE__
1138 double __dadd_ru(double __x, double __y) {
1139 return __ocml_add_rtp_f64(__x, __y);
1141 __DEVICE__
1142 double __dadd_rz(double __x, double __y) {
1143 return __ocml_add_rtz_f64(__x, __y);
1145 #else
1146 __DEVICE__
1147 double __dadd_rn(double __x, double __y) { return __x + __y; }
1148 #endif
1150 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1151 __DEVICE__
1152 double __ddiv_rd(double __x, double __y) {
1153 return __ocml_div_rtn_f64(__x, __y);
1155 __DEVICE__
1156 double __ddiv_rn(double __x, double __y) {
1157 return __ocml_div_rte_f64(__x, __y);
1159 __DEVICE__
1160 double __ddiv_ru(double __x, double __y) {
1161 return __ocml_div_rtp_f64(__x, __y);
1163 __DEVICE__
1164 double __ddiv_rz(double __x, double __y) {
1165 return __ocml_div_rtz_f64(__x, __y);
1167 #else
1168 __DEVICE__
1169 double __ddiv_rn(double __x, double __y) { return __x / __y; }
1170 #endif
1172 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1173 __DEVICE__
1174 double __dmul_rd(double __x, double __y) {
1175 return __ocml_mul_rtn_f64(__x, __y);
1177 __DEVICE__
1178 double __dmul_rn(double __x, double __y) {
1179 return __ocml_mul_rte_f64(__x, __y);
1181 __DEVICE__
1182 double __dmul_ru(double __x, double __y) {
1183 return __ocml_mul_rtp_f64(__x, __y);
1185 __DEVICE__
1186 double __dmul_rz(double __x, double __y) {
1187 return __ocml_mul_rtz_f64(__x, __y);
1189 #else
1190 __DEVICE__
1191 double __dmul_rn(double __x, double __y) { return __x * __y; }
1192 #endif
1194 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1195 __DEVICE__
1196 double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1197 __DEVICE__
1198 double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1199 __DEVICE__
1200 double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1201 __DEVICE__
1202 double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1203 #else
1204 __DEVICE__
1205 double __drcp_rn(double __x) { return 1.0 / __x; }
1206 #endif
1208 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1209 __DEVICE__
1210 double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1211 __DEVICE__
1212 double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1213 __DEVICE__
1214 double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1215 __DEVICE__
1216 double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1217 #else
1218 __DEVICE__
1219 double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1220 #endif
1222 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1223 __DEVICE__
1224 double __dsub_rd(double __x, double __y) {
1225 return __ocml_sub_rtn_f64(__x, __y);
1227 __DEVICE__
1228 double __dsub_rn(double __x, double __y) {
1229 return __ocml_sub_rte_f64(__x, __y);
1231 __DEVICE__
1232 double __dsub_ru(double __x, double __y) {
1233 return __ocml_sub_rtp_f64(__x, __y);
1235 __DEVICE__
1236 double __dsub_rz(double __x, double __y) {
1237 return __ocml_sub_rtz_f64(__x, __y);
1239 #else
1240 __DEVICE__
1241 double __dsub_rn(double __x, double __y) { return __x - __y; }
1242 #endif
1244 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1245 __DEVICE__
1246 double __fma_rd(double __x, double __y, double __z) {
1247 return __ocml_fma_rtn_f64(__x, __y, __z);
1249 __DEVICE__
1250 double __fma_rn(double __x, double __y, double __z) {
1251 return __ocml_fma_rte_f64(__x, __y, __z);
1253 __DEVICE__
1254 double __fma_ru(double __x, double __y, double __z) {
1255 return __ocml_fma_rtp_f64(__x, __y, __z);
1257 __DEVICE__
1258 double __fma_rz(double __x, double __y, double __z) {
1259 return __ocml_fma_rtz_f64(__x, __y, __z);
1261 #else
1262 __DEVICE__
1263 double __fma_rn(double __x, double __y, double __z) {
1264 return __ocml_fma_f64(__x, __y, __z);
1266 #endif
1267 // END INTRINSICS
1268 // END DOUBLE
1270 // C only macros
1271 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1272 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1273 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1274 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1275 #define signbit(__x) \
1276 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1277 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1279 #if defined(__cplusplus)
1280 template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1281 return (__arg1 < __arg2) ? __arg1 : __arg2;
1284 template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1285 return (__arg1 > __arg2) ? __arg1 : __arg2;
1288 __DEVICE__ int min(int __arg1, int __arg2) {
1289 return (__arg1 < __arg2) ? __arg1 : __arg2;
1291 __DEVICE__ int max(int __arg1, int __arg2) {
1292 return (__arg1 > __arg2) ? __arg1 : __arg2;
1295 __DEVICE__
1296 float max(float __x, float __y) { return fmaxf(__x, __y); }
1298 __DEVICE__
1299 double max(double __x, double __y) { return fmax(__x, __y); }
1301 __DEVICE__
1302 float min(float __x, float __y) { return fminf(__x, __y); }
1304 __DEVICE__
1305 double min(double __x, double __y) { return fmin(__x, __y); }
1307 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1308 __host__ inline static int min(int __arg1, int __arg2) {
1309 return std::min(__arg1, __arg2);
1312 __host__ inline static int max(int __arg1, int __arg2) {
1313 return std::max(__arg1, __arg2);
1315 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1316 #endif
1318 #pragma pop_macro("__DEVICE__")
1319 #pragma pop_macro("__RETURN_TYPE")
1321 #endif // __CLANG_HIP_MATH_H__