clang/lib/Headers/__clang_hip_math.h

   1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
   2  *
   3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4  * See https://llvm.org/LICENSE.txt for license information.
   5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6  *
   7  *===-----------------------------------------------------------------------===
   8  */
   9 #ifndef __CLANG_HIP_MATH_H__
  10 #define __CLANG_HIP_MATH_H__
  11
  12 #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
  13 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
  14 #endif
  15
  16 #if !defined(__HIPCC_RTC__)
  17 #if defined(__cplusplus)
  18 #include <algorithm>
  19 #endif
  20 #include <limits.h>
  21 #include <stdint.h>
  22 #ifdef __OPENMP_AMDGCN__
  23 #include <omp.h>
  24 #endif
  25 #endif // !defined(__HIPCC_RTC__)
  26
  27 #pragma push_macro("__DEVICE__")
  28
  29 #ifdef __OPENMP_AMDGCN__
  30 #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
  31 #else
  32 #define __DEVICE__ static __device__ inline __attribute__((always_inline))
  33 #endif
  34
  35 // A few functions return bool type starting only in C++11.
  36 #pragma push_macro("__RETURN_TYPE")
  37 #ifdef __OPENMP_AMDGCN__
  38 #define __RETURN_TYPE int
  39 #else
  40 #if defined(__cplusplus)
  41 #define __RETURN_TYPE bool
  42 #else
  43 #define __RETURN_TYPE int
  44 #endif
  45 #endif // __OPENMP_AMDGCN__
  46
  47 #if defined (__cplusplus) && __cplusplus < 201103L
  48 // emulate static_assert on type sizes
  49 template<bool>
  50 struct __compare_result{};
  51 template<>
  52 struct __compare_result<true> {
  53   static const __device__ bool valid;
  54 };
  55
  56 __DEVICE__
  57 void __suppress_unused_warning(bool b){};
  58 template <unsigned int S, unsigned int T>
  59 __DEVICE__ void __static_assert_equal_size() {
  60   __suppress_unused_warning(__compare_result<S == T>::valid);
  61 }
  62
  63 #define __static_assert_type_size_equal(A, B) \
  64   __static_assert_equal_size<A,B>()
  65
  66 #else
  67 #define __static_assert_type_size_equal(A,B) \
  68   static_assert((A) == (B), "")
  69
  70 #endif
  71
  72 __DEVICE__
  73 uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
  74   uint64_t __r = 0;
  75   while (*__tagp != '\0') {
  76     char __tmp = *__tagp;
  77
  78     if (__tmp >= '0' && __tmp <= '7')
  79       __r = (__r * 8u) + __tmp - '0';
  80     else
  81       return 0;
  82
  83     ++__tagp;
  84   }
  85
  86   return __r;
  87 }
  88
  89 __DEVICE__
  90 uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
  91   uint64_t __r = 0;
  92   while (*__tagp != '\0') {
  93     char __tmp = *__tagp;
  94
  95     if (__tmp >= '0' && __tmp <= '9')
  96       __r = (__r * 10u) + __tmp - '0';
  97     else
  98       return 0;
  99
 100     ++__tagp;
 101   }
 102
 103   return __r;
 104 }
 105
 106 __DEVICE__
 107 uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
 108   uint64_t __r = 0;
 109   while (*__tagp != '\0') {
 110     char __tmp = *__tagp;
 111
 112     if (__tmp >= '0' && __tmp <= '9')
 113       __r = (__r * 16u) + __tmp - '0';
 114     else if (__tmp >= 'a' && __tmp <= 'f')
 115       __r = (__r * 16u) + __tmp - 'a' + 10;
 116     else if (__tmp >= 'A' && __tmp <= 'F')
 117       __r = (__r * 16u) + __tmp - 'A' + 10;
 118     else
 119       return 0;
 120
 121     ++__tagp;
 122   }
 123
 124   return __r;
 125 }
 126
 127 __DEVICE__
 128 uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
 129   if (*__tagp == '0') {
 130     ++__tagp;
 131
 132     if (*__tagp == 'x' || *__tagp == 'X')
 133       return __make_mantissa_base16(__tagp);
 134     else
 135       return __make_mantissa_base8(__tagp);
 136   }
 137
 138   return __make_mantissa_base10(__tagp);
 139 }
 140
 141 // BEGIN FLOAT
 142 #if defined(__cplusplus)
 143 __DEVICE__
 144 int abs(int __x) {
 145   int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
 146   return (__x ^ __sgn) - __sgn;
 147 }
 148 __DEVICE__
 149 long labs(long __x) {
 150   long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
 151   return (__x ^ __sgn) - __sgn;
 152 }
 153 __DEVICE__
 154 long long llabs(long long __x) {
 155   long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
 156   return (__x ^ __sgn) - __sgn;
 157 }
 158 #endif
 159
 160 __DEVICE__
 161 float acosf(float __x) { return __ocml_acos_f32(__x); }
 162
 163 __DEVICE__
 164 float acoshf(float __x) { return __ocml_acosh_f32(__x); }
 165
 166 __DEVICE__
 167 float asinf(float __x) { return __ocml_asin_f32(__x); }
 168
 169 __DEVICE__
 170 float asinhf(float __x) { return __ocml_asinh_f32(__x); }
 171
 172 __DEVICE__
 173 float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
 174
 175 __DEVICE__
 176 float atanf(float __x) { return __ocml_atan_f32(__x); }
 177
 178 __DEVICE__
 179 float atanhf(float __x) { return __ocml_atanh_f32(__x); }
 180
 181 __DEVICE__
 182 float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
 183
 184 __DEVICE__
 185 float ceilf(float __x) { return __ocml_ceil_f32(__x); }
 186
 187 __DEVICE__
 188 float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
 189
 190 __DEVICE__
 191 float cosf(float __x) { return __ocml_cos_f32(__x); }
 192
 193 __DEVICE__
 194 float coshf(float __x) { return __ocml_cosh_f32(__x); }
 195
 196 __DEVICE__
 197 float cospif(float __x) { return __ocml_cospi_f32(__x); }
 198
 199 __DEVICE__
 200 float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
 201
 202 __DEVICE__
 203 float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
 204
 205 __DEVICE__
 206 float erfcf(float __x) { return __ocml_erfc_f32(__x); }
 207
 208 __DEVICE__
 209 float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
 210
 211 __DEVICE__
 212 float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
 213
 214 __DEVICE__
 215 float erff(float __x) { return __ocml_erf_f32(__x); }
 216
 217 __DEVICE__
 218 float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
 219
 220 __DEVICE__
 221 float exp10f(float __x) { return __ocml_exp10_f32(__x); }
 222
 223 __DEVICE__
 224 float exp2f(float __x) { return __ocml_exp2_f32(__x); }
 225
 226 __DEVICE__
 227 float expf(float __x) { return __ocml_exp_f32(__x); }
 228
 229 __DEVICE__
 230 float expm1f(float __x) { return __ocml_expm1_f32(__x); }
 231
 232 __DEVICE__
 233 float fabsf(float __x) { return __builtin_fabsf(__x); }
 234
 235 __DEVICE__
 236 float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
 237
 238 __DEVICE__
 239 float fdividef(float __x, float __y) { return __x / __y; }
 240
 241 __DEVICE__
 242 float floorf(float __x) { return __ocml_floor_f32(__x); }
 243
 244 __DEVICE__
 245 float fmaf(float __x, float __y, float __z) {
 246   return __builtin_fmaf(__x, __y, __z);
 247 }
 248
 249 __DEVICE__
 250 float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
 251
 252 __DEVICE__
 253 float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); }
 254
 255 __DEVICE__
 256 float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
 257
 258 __DEVICE__
 259 float frexpf(float __x, int *__nptr) {
 260   *__nptr = __builtin_amdgcn_frexp_expf(__x);
 261   return __builtin_amdgcn_frexp_mantf(__x);
 262 }
 263
 264 __DEVICE__
 265 float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
 266
 267 __DEVICE__
 268 int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
 269
 270 __DEVICE__
 271 __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
 272
 273 __DEVICE__
 274 __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); }
 275
 276 __DEVICE__
 277 __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); }
 278
 279 __DEVICE__
 280 float j0f(float __x) { return __ocml_j0_f32(__x); }
 281
 282 __DEVICE__
 283 float j1f(float __x) { return __ocml_j1_f32(__x); }
 284
 285 __DEVICE__
 286 float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
 287                                 // and the Miller & Brown algorithm
 288   //       for linear recurrences to get O(log n) steps, but it's unclear if
 289   //       it'd be beneficial in this case.
 290   if (__n == 0)
 291     return j0f(__x);
 292   if (__n == 1)
 293     return j1f(__x);
 294
 295   float __x0 = j0f(__x);
 296   float __x1 = j1f(__x);
 297   for (int __i = 1; __i < __n; ++__i) {
 298     float __x2 = (2 * __i) / __x * __x1 - __x0;
 299     __x0 = __x1;
 300     __x1 = __x2;
 301   }
 302
 303   return __x1;
 304 }
 305
 306 __DEVICE__
 307 float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
 308
 309 __DEVICE__
 310 float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
 311
 312 __DEVICE__
 313 long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
 314
 315 __DEVICE__
 316 long long int llroundf(float __x) { return __ocml_round_f32(__x); }
 317
 318 __DEVICE__
 319 float log10f(float __x) { return __ocml_log10_f32(__x); }
 320
 321 __DEVICE__
 322 float log1pf(float __x) { return __ocml_log1p_f32(__x); }
 323
 324 __DEVICE__
 325 float log2f(float __x) { return __ocml_log2_f32(__x); }
 326
 327 __DEVICE__
 328 float logbf(float __x) { return __ocml_logb_f32(__x); }
 329
 330 __DEVICE__
 331 float logf(float __x) { return __ocml_log_f32(__x); }
 332
 333 __DEVICE__
 334 long int lrintf(float __x) { return __ocml_rint_f32(__x); }
 335
 336 __DEVICE__
 337 long int lroundf(float __x) { return __ocml_round_f32(__x); }
 338
 339 __DEVICE__
 340 float modff(float __x, float *__iptr) {
 341   float __tmp;
 342 #ifdef __OPENMP_AMDGCN__
 343 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 344 #endif
 345   float __r =
 346       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
 347   *__iptr = __tmp;
 348   return __r;
 349 }
 350
 351 __DEVICE__
 352 float nanf(const char *__tagp __attribute__((nonnull))) {
 353   union {
 354     float val;
 355     struct ieee_float {
 356       unsigned int mantissa : 22;
 357       unsigned int quiet : 1;
 358       unsigned int exponent : 8;
 359       unsigned int sign : 1;
 360     } bits;
 361   } __tmp;
 362   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
 363
 364   __tmp.bits.sign = 0u;
 365   __tmp.bits.exponent = ~0u;
 366   __tmp.bits.quiet = 1u;
 367   __tmp.bits.mantissa = __make_mantissa(__tagp);
 368
 369   return __tmp.val;
 370 }
 371
 372 __DEVICE__
 373 float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
 374
 375 __DEVICE__
 376 float nextafterf(float __x, float __y) {
 377   return __ocml_nextafter_f32(__x, __y);
 378 }
 379
 380 __DEVICE__
 381 float norm3df(float __x, float __y, float __z) {
 382   return __ocml_len3_f32(__x, __y, __z);
 383 }
 384
 385 __DEVICE__
 386 float norm4df(float __x, float __y, float __z, float __w) {
 387   return __ocml_len4_f32(__x, __y, __z, __w);
 388 }
 389
 390 __DEVICE__
 391 float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
 392
 393 __DEVICE__
 394 float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
 395
 396 __DEVICE__
 397 float normf(int __dim,
 398             const float *__a) { // TODO: placeholder until OCML adds support.
 399   float __r = 0;
 400   while (__dim--) {
 401     __r += __a[0] * __a[0];
 402     ++__a;
 403   }
 404
 405   return __ocml_sqrt_f32(__r);
 406 }
 407
 408 __DEVICE__
 409 float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
 410
 411 __DEVICE__
 412 float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
 413
 414 __DEVICE__
 415 float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
 416
 417 __DEVICE__
 418 float remainderf(float __x, float __y) {
 419   return __ocml_remainder_f32(__x, __y);
 420 }
 421
 422 __DEVICE__
 423 float remquof(float __x, float __y, int *__quo) {
 424   int __tmp;
 425 #ifdef __OPENMP_AMDGCN__
 426 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 427 #endif
 428   float __r = __ocml_remquo_f32(
 429       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
 430   *__quo = __tmp;
 431
 432   return __r;
 433 }
 434
 435 __DEVICE__
 436 float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
 437
 438 __DEVICE__
 439 float rintf(float __x) { return __ocml_rint_f32(__x); }
 440
 441 __DEVICE__
 442 float rnorm3df(float __x, float __y, float __z) {
 443   return __ocml_rlen3_f32(__x, __y, __z);
 444 }
 445
 446 __DEVICE__
 447 float rnorm4df(float __x, float __y, float __z, float __w) {
 448   return __ocml_rlen4_f32(__x, __y, __z, __w);
 449 }
 450
 451 __DEVICE__
 452 float rnormf(int __dim,
 453              const float *__a) { // TODO: placeholder until OCML adds support.
 454   float __r = 0;
 455   while (__dim--) {
 456     __r += __a[0] * __a[0];
 457     ++__a;
 458   }
 459
 460   return __ocml_rsqrt_f32(__r);
 461 }
 462
 463 __DEVICE__
 464 float roundf(float __x) { return __ocml_round_f32(__x); }
 465
 466 __DEVICE__
 467 float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
 468
 469 __DEVICE__
 470 float scalblnf(float __x, long int __n) {
 471   return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
 472                          : __ocml_scalb_f32(__x, __n);
 473 }
 474
 475 __DEVICE__
 476 float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
 477
 478 __DEVICE__
 479 __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); }
 480
 481 __DEVICE__
 482 void sincosf(float __x, float *__sinptr, float *__cosptr) {
 483   float __tmp;
 484 #ifdef __OPENMP_AMDGCN__
 485 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 486 #endif
 487   *__sinptr =
 488       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
 489   *__cosptr = __tmp;
 490 }
 491
 492 __DEVICE__
 493 void sincospif(float __x, float *__sinptr, float *__cosptr) {
 494   float __tmp;
 495 #ifdef __OPENMP_AMDGCN__
 496 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 497 #endif
 498   *__sinptr = __ocml_sincospi_f32(
 499       __x, (__attribute__((address_space(5))) float *)&__tmp);
 500   *__cosptr = __tmp;
 501 }
 502
 503 __DEVICE__
 504 float sinf(float __x) { return __ocml_sin_f32(__x); }
 505
 506 __DEVICE__
 507 float sinhf(float __x) { return __ocml_sinh_f32(__x); }
 508
 509 __DEVICE__
 510 float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
 511
 512 __DEVICE__
 513 float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
 514
 515 __DEVICE__
 516 float tanf(float __x) { return __ocml_tan_f32(__x); }
 517
 518 __DEVICE__
 519 float tanhf(float __x) { return __ocml_tanh_f32(__x); }
 520
 521 __DEVICE__
 522 float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
 523
 524 __DEVICE__
 525 float truncf(float __x) { return __ocml_trunc_f32(__x); }
 526
 527 __DEVICE__
 528 float y0f(float __x) { return __ocml_y0_f32(__x); }
 529
 530 __DEVICE__
 531 float y1f(float __x) { return __ocml_y1_f32(__x); }
 532
 533 __DEVICE__
 534 float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
 535                                 // and the Miller & Brown algorithm
 536   //       for linear recurrences to get O(log n) steps, but it's unclear if
 537   //       it'd be beneficial in this case. Placeholder until OCML adds
 538   //       support.
 539   if (__n == 0)
 540     return y0f(__x);
 541   if (__n == 1)
 542     return y1f(__x);
 543
 544   float __x0 = y0f(__x);
 545   float __x1 = y1f(__x);
 546   for (int __i = 1; __i < __n; ++__i) {
 547     float __x2 = (2 * __i) / __x * __x1 - __x0;
 548     __x0 = __x1;
 549     __x1 = __x2;
 550   }
 551
 552   return __x1;
 553 }
 554
 555 // BEGIN INTRINSICS
 556
 557 __DEVICE__
 558 float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
 559
 560 __DEVICE__
 561 float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
 562
 563 __DEVICE__
 564 float __expf(float __x) { return __ocml_native_exp_f32(__x); }
 565
 566 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 567 __DEVICE__
 568 float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
 569 __DEVICE__
 570 float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
 571 __DEVICE__
 572 float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
 573 __DEVICE__
 574 float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
 575 #else
 576 __DEVICE__
 577 float __fadd_rn(float __x, float __y) { return __x + __y; }
 578 #endif
 579
 580 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 581 __DEVICE__
 582 float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
 583 __DEVICE__
 584 float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
 585 __DEVICE__
 586 float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
 587 __DEVICE__
 588 float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
 589 #else
 590 __DEVICE__
 591 float __fdiv_rn(float __x, float __y) { return __x / __y; }
 592 #endif
 593
 594 __DEVICE__
 595 float __fdividef(float __x, float __y) { return __x / __y; }
 596
 597 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 598 __DEVICE__
 599 float __fmaf_rd(float __x, float __y, float __z) {
 600   return __ocml_fma_rtn_f32(__x, __y, __z);
 601 }
 602 __DEVICE__
 603 float __fmaf_rn(float __x, float __y, float __z) {
 604   return __ocml_fma_rte_f32(__x, __y, __z);
 605 }
 606 __DEVICE__
 607 float __fmaf_ru(float __x, float __y, float __z) {
 608   return __ocml_fma_rtp_f32(__x, __y, __z);
 609 }
 610 __DEVICE__
 611 float __fmaf_rz(float __x, float __y, float __z) {
 612   return __ocml_fma_rtz_f32(__x, __y, __z);
 613 }
 614 #else
 615 __DEVICE__
 616 float __fmaf_rn(float __x, float __y, float __z) {
 617   return __builtin_fmaf(__x, __y, __z);
 618 }
 619 #endif
 620
 621 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 622 __DEVICE__
 623 float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
 624 __DEVICE__
 625 float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
 626 __DEVICE__
 627 float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
 628 __DEVICE__
 629 float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
 630 #else
 631 __DEVICE__
 632 float __fmul_rn(float __x, float __y) { return __x * __y; }
 633 #endif
 634
 635 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 636 __DEVICE__
 637 float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
 638 __DEVICE__
 639 float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
 640 __DEVICE__
 641 float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
 642 __DEVICE__
 643 float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
 644 #else
 645 __DEVICE__
 646 float __frcp_rn(float __x) { return 1.0f / __x; }
 647 #endif
 648
 649 __DEVICE__
 650 float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
 651
 652 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 653 __DEVICE__
 654 float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
 655 __DEVICE__
 656 float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
 657 __DEVICE__
 658 float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
 659 __DEVICE__
 660 float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
 661 #else
 662 __DEVICE__
 663 float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
 664 #endif
 665
 666 #if defined OCML_BASIC_ROUNDED_OPERATIONS
 667 __DEVICE__
 668 float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
 669 __DEVICE__
 670 float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
 671 __DEVICE__
 672 float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
 673 __DEVICE__
 674 float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
 675 #else
 676 __DEVICE__
 677 float __fsub_rn(float __x, float __y) { return __x - __y; }
 678 #endif
 679
 680 __DEVICE__
 681 float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
 682
 683 __DEVICE__
 684 float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
 685
 686 __DEVICE__
 687 float __logf(float __x) { return __ocml_native_log_f32(__x); }
 688
 689 __DEVICE__
 690 float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
 691
 692 __DEVICE__
 693 float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
 694
 695 __DEVICE__
 696 void __sincosf(float __x, float *__sinptr, float *__cosptr) {
 697   *__sinptr = __ocml_native_sin_f32(__x);
 698   *__cosptr = __ocml_native_cos_f32(__x);
 699 }
 700
 701 __DEVICE__
 702 float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
 703
 704 __DEVICE__
 705 float __tanf(float __x) { return __ocml_tan_f32(__x); }
 706 // END INTRINSICS
 707 // END FLOAT
 708
 709 // BEGIN DOUBLE
 710 __DEVICE__
 711 double acos(double __x) { return __ocml_acos_f64(__x); }
 712
 713 __DEVICE__
 714 double acosh(double __x) { return __ocml_acosh_f64(__x); }
 715
 716 __DEVICE__
 717 double asin(double __x) { return __ocml_asin_f64(__x); }
 718
 719 __DEVICE__
 720 double asinh(double __x) { return __ocml_asinh_f64(__x); }
 721
 722 __DEVICE__
 723 double atan(double __x) { return __ocml_atan_f64(__x); }
 724
 725 __DEVICE__
 726 double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
 727
 728 __DEVICE__
 729 double atanh(double __x) { return __ocml_atanh_f64(__x); }
 730
 731 __DEVICE__
 732 double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
 733
 734 __DEVICE__
 735 double ceil(double __x) { return __ocml_ceil_f64(__x); }
 736
 737 __DEVICE__
 738 double copysign(double __x, double __y) {
 739   return __builtin_copysign(__x, __y);
 740 }
 741
 742 __DEVICE__
 743 double cos(double __x) { return __ocml_cos_f64(__x); }
 744
 745 __DEVICE__
 746 double cosh(double __x) { return __ocml_cosh_f64(__x); }
 747
 748 __DEVICE__
 749 double cospi(double __x) { return __ocml_cospi_f64(__x); }
 750
 751 __DEVICE__
 752 double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
 753
 754 __DEVICE__
 755 double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
 756
 757 __DEVICE__
 758 double erf(double __x) { return __ocml_erf_f64(__x); }
 759
 760 __DEVICE__
 761 double erfc(double __x) { return __ocml_erfc_f64(__x); }
 762
 763 __DEVICE__
 764 double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
 765
 766 __DEVICE__
 767 double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
 768
 769 __DEVICE__
 770 double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
 771
 772 __DEVICE__
 773 double exp(double __x) { return __ocml_exp_f64(__x); }
 774
 775 __DEVICE__
 776 double exp10(double __x) { return __ocml_exp10_f64(__x); }
 777
 778 __DEVICE__
 779 double exp2(double __x) { return __ocml_exp2_f64(__x); }
 780
 781 __DEVICE__
 782 double expm1(double __x) { return __ocml_expm1_f64(__x); }
 783
 784 __DEVICE__
 785 double fabs(double __x) { return __builtin_fabs(__x); }
 786
 787 __DEVICE__
 788 double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
 789
 790 __DEVICE__
 791 double floor(double __x) { return __ocml_floor_f64(__x); }
 792
 793 __DEVICE__
 794 double fma(double __x, double __y, double __z) {
 795   return __builtin_fma(__x, __y, __z);
 796 }
 797
 798 __DEVICE__
 799 double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); }
 800
 801 __DEVICE__
 802 double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); }
 803
 804 __DEVICE__
 805 double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
 806
 807 __DEVICE__
 808 double frexp(double __x, int *__nptr) {
 809   *__nptr = __builtin_amdgcn_frexp_exp(__x);
 810   return __builtin_amdgcn_frexp_mant(__x);
 811 }
 812
 813 __DEVICE__
 814 double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
 815
 816 __DEVICE__
 817 int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
 818
 819 __DEVICE__
 820 __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
 821
 822 __DEVICE__
 823 __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); }
 824
 825 __DEVICE__
 826 __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); }
 827
 828 __DEVICE__
 829 double j0(double __x) { return __ocml_j0_f64(__x); }
 830
 831 __DEVICE__
 832 double j1(double __x) { return __ocml_j1_f64(__x); }
 833
 834 __DEVICE__
 835 double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
 836                                  // and the Miller & Brown algorithm
 837   //       for linear recurrences to get O(log n) steps, but it's unclear if
 838   //       it'd be beneficial in this case. Placeholder until OCML adds
 839   //       support.
 840   if (__n == 0)
 841     return j0(__x);
 842   if (__n == 1)
 843     return j1(__x);
 844
 845   double __x0 = j0(__x);
 846   double __x1 = j1(__x);
 847   for (int __i = 1; __i < __n; ++__i) {
 848     double __x2 = (2 * __i) / __x * __x1 - __x0;
 849     __x0 = __x1;
 850     __x1 = __x2;
 851   }
 852   return __x1;
 853 }
 854
 855 __DEVICE__
 856 double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
 857
 858 __DEVICE__
 859 double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
 860
 861 __DEVICE__
 862 long long int llrint(double __x) { return __ocml_rint_f64(__x); }
 863
 864 __DEVICE__
 865 long long int llround(double __x) { return __ocml_round_f64(__x); }
 866
 867 __DEVICE__
 868 double log(double __x) { return __ocml_log_f64(__x); }
 869
 870 __DEVICE__
 871 double log10(double __x) { return __ocml_log10_f64(__x); }
 872
 873 __DEVICE__
 874 double log1p(double __x) { return __ocml_log1p_f64(__x); }
 875
 876 __DEVICE__
 877 double log2(double __x) { return __ocml_log2_f64(__x); }
 878
 879 __DEVICE__
 880 double logb(double __x) { return __ocml_logb_f64(__x); }
 881
 882 __DEVICE__
 883 long int lrint(double __x) { return __ocml_rint_f64(__x); }
 884
 885 __DEVICE__
 886 long int lround(double __x) { return __ocml_round_f64(__x); }
 887
 888 __DEVICE__
 889 double modf(double __x, double *__iptr) {
 890   double __tmp;
 891 #ifdef __OPENMP_AMDGCN__
 892 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 893 #endif
 894   double __r =
 895       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
 896   *__iptr = __tmp;
 897
 898   return __r;
 899 }
 900
 901 __DEVICE__
 902 double nan(const char *__tagp) {
 903 #if !_WIN32
 904   union {
 905     double val;
 906     struct ieee_double {
 907       uint64_t mantissa : 51;
 908       uint32_t quiet : 1;
 909       uint32_t exponent : 11;
 910       uint32_t sign : 1;
 911     } bits;
 912   } __tmp;
 913   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
 914
 915   __tmp.bits.sign = 0u;
 916   __tmp.bits.exponent = ~0u;
 917   __tmp.bits.quiet = 1u;
 918   __tmp.bits.mantissa = __make_mantissa(__tagp);
 919
 920   return __tmp.val;
 921 #else
 922   __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
 923   uint64_t __val = __make_mantissa(__tagp);
 924   __val |= 0xFFF << 51;
 925   return *reinterpret_cast<double *>(&__val);
 926 #endif
 927 }
 928
 929 __DEVICE__
 930 double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
 931
 932 __DEVICE__
 933 double nextafter(double __x, double __y) {
 934   return __ocml_nextafter_f64(__x, __y);
 935 }
 936
 937 __DEVICE__
 938 double norm(int __dim,
 939             const double *__a) { // TODO: placeholder until OCML adds support.
 940   double __r = 0;
 941   while (__dim--) {
 942     __r += __a[0] * __a[0];
 943     ++__a;
 944   }
 945
 946   return __ocml_sqrt_f64(__r);
 947 }
 948
 949 __DEVICE__
 950 double norm3d(double __x, double __y, double __z) {
 951   return __ocml_len3_f64(__x, __y, __z);
 952 }
 953
 954 __DEVICE__
 955 double norm4d(double __x, double __y, double __z, double __w) {
 956   return __ocml_len4_f64(__x, __y, __z, __w);
 957 }
 958
 959 __DEVICE__
 960 double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
 961
 962 __DEVICE__
 963 double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
 964
 965 __DEVICE__
 966 double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
 967
 968 __DEVICE__
 969 double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
 970
 971 __DEVICE__
 972 double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
 973
 974 __DEVICE__
 975 double remainder(double __x, double __y) {
 976   return __ocml_remainder_f64(__x, __y);
 977 }
 978
 979 __DEVICE__
 980 double remquo(double __x, double __y, int *__quo) {
 981   int __tmp;
 982 #ifdef __OPENMP_AMDGCN__
 983 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
 984 #endif
 985   double __r = __ocml_remquo_f64(
 986       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
 987   *__quo = __tmp;
 988
 989   return __r;
 990 }
 991
 992 __DEVICE__
 993 double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
 994
 995 __DEVICE__
 996 double rint(double __x) { return __ocml_rint_f64(__x); }
 997
 998 __DEVICE__
 999 double rnorm(int __dim,
1000              const double *__a) { // TODO: placeholder until OCML adds support.
1001   double __r = 0;
1002   while (__dim--) {
1003     __r += __a[0] * __a[0];
1004     ++__a;
1005   }
1006
1007   return __ocml_rsqrt_f64(__r);
1008 }
1009
1010 __DEVICE__
1011 double rnorm3d(double __x, double __y, double __z) {
1012   return __ocml_rlen3_f64(__x, __y, __z);
1013 }
1014
1015 __DEVICE__
1016 double rnorm4d(double __x, double __y, double __z, double __w) {
1017   return __ocml_rlen4_f64(__x, __y, __z, __w);
1018 }
1019
1020 __DEVICE__
1021 double round(double __x) { return __ocml_round_f64(__x); }
1022
1023 __DEVICE__
1024 double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1025
1026 __DEVICE__
1027 double scalbln(double __x, long int __n) {
1028   return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1029                          : __ocml_scalb_f64(__x, __n);
1030 }
1031 __DEVICE__
1032 double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1033
1034 __DEVICE__
1035 __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); }
1036
1037 __DEVICE__
1038 double sin(double __x) { return __ocml_sin_f64(__x); }
1039
1040 __DEVICE__
1041 void sincos(double __x, double *__sinptr, double *__cosptr) {
1042   double __tmp;
1043 #ifdef __OPENMP_AMDGCN__
1044 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1045 #endif
1046   *__sinptr = __ocml_sincos_f64(
1047       __x, (__attribute__((address_space(5))) double *)&__tmp);
1048   *__cosptr = __tmp;
1049 }
1050
1051 __DEVICE__
1052 void sincospi(double __x, double *__sinptr, double *__cosptr) {
1053   double __tmp;
1054 #ifdef __OPENMP_AMDGCN__
1055 #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1056 #endif
1057   *__sinptr = __ocml_sincospi_f64(
1058       __x, (__attribute__((address_space(5))) double *)&__tmp);
1059   *__cosptr = __tmp;
1060 }
1061
1062 __DEVICE__
1063 double sinh(double __x) { return __ocml_sinh_f64(__x); }
1064
1065 __DEVICE__
1066 double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1067
1068 __DEVICE__
1069 double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1070
1071 __DEVICE__
1072 double tan(double __x) { return __ocml_tan_f64(__x); }
1073
1074 __DEVICE__
1075 double tanh(double __x) { return __ocml_tanh_f64(__x); }
1076
1077 __DEVICE__
1078 double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1079
1080 __DEVICE__
1081 double trunc(double __x) { return __ocml_trunc_f64(__x); }
1082
1083 __DEVICE__
1084 double y0(double __x) { return __ocml_y0_f64(__x); }
1085
1086 __DEVICE__
1087 double y1(double __x) { return __ocml_y1_f64(__x); }
1088
1089 __DEVICE__
1090 double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1091                                  // and the Miller & Brown algorithm
1092   //       for linear recurrences to get O(log n) steps, but it's unclear if
1093   //       it'd be beneficial in this case. Placeholder until OCML adds
1094   //       support.
1095   if (__n == 0)
1096     return y0(__x);
1097   if (__n == 1)
1098     return y1(__x);
1099
1100   double __x0 = y0(__x);
1101   double __x1 = y1(__x);
1102   for (int __i = 1; __i < __n; ++__i) {
1103     double __x2 = (2 * __i) / __x * __x1 - __x0;
1104     __x0 = __x1;
1105     __x1 = __x2;
1106   }
1107
1108   return __x1;
1109 }
1110
1111 // BEGIN INTRINSICS
1112 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1113 __DEVICE__
1114 double __dadd_rd(double __x, double __y) {
1115   return __ocml_add_rtn_f64(__x, __y);
1116 }
1117 __DEVICE__
1118 double __dadd_rn(double __x, double __y) {
1119   return __ocml_add_rte_f64(__x, __y);
1120 }
1121 __DEVICE__
1122 double __dadd_ru(double __x, double __y) {
1123   return __ocml_add_rtp_f64(__x, __y);
1124 }
1125 __DEVICE__
1126 double __dadd_rz(double __x, double __y) {
1127   return __ocml_add_rtz_f64(__x, __y);
1128 }
1129 #else
1130 __DEVICE__
1131 double __dadd_rn(double __x, double __y) { return __x + __y; }
1132 #endif
1133
1134 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1135 __DEVICE__
1136 double __ddiv_rd(double __x, double __y) {
1137   return __ocml_div_rtn_f64(__x, __y);
1138 }
1139 __DEVICE__
1140 double __ddiv_rn(double __x, double __y) {
1141   return __ocml_div_rte_f64(__x, __y);
1142 }
1143 __DEVICE__
1144 double __ddiv_ru(double __x, double __y) {
1145   return __ocml_div_rtp_f64(__x, __y);
1146 }
1147 __DEVICE__
1148 double __ddiv_rz(double __x, double __y) {
1149   return __ocml_div_rtz_f64(__x, __y);
1150 }
1151 #else
1152 __DEVICE__
1153 double __ddiv_rn(double __x, double __y) { return __x / __y; }
1154 #endif
1155
1156 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1157 __DEVICE__
1158 double __dmul_rd(double __x, double __y) {
1159   return __ocml_mul_rtn_f64(__x, __y);
1160 }
1161 __DEVICE__
1162 double __dmul_rn(double __x, double __y) {
1163   return __ocml_mul_rte_f64(__x, __y);
1164 }
1165 __DEVICE__
1166 double __dmul_ru(double __x, double __y) {
1167   return __ocml_mul_rtp_f64(__x, __y);
1168 }
1169 __DEVICE__
1170 double __dmul_rz(double __x, double __y) {
1171   return __ocml_mul_rtz_f64(__x, __y);
1172 }
1173 #else
1174 __DEVICE__
1175 double __dmul_rn(double __x, double __y) { return __x * __y; }
1176 #endif
1177
1178 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1179 __DEVICE__
1180 double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1181 __DEVICE__
1182 double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1183 __DEVICE__
1184 double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1185 __DEVICE__
1186 double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1187 #else
1188 __DEVICE__
1189 double __drcp_rn(double __x) { return 1.0 / __x; }
1190 #endif
1191
1192 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1193 __DEVICE__
1194 double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1195 __DEVICE__
1196 double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1197 __DEVICE__
1198 double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1199 __DEVICE__
1200 double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1201 #else
1202 __DEVICE__
1203 double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1204 #endif
1205
1206 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1207 __DEVICE__
1208 double __dsub_rd(double __x, double __y) {
1209   return __ocml_sub_rtn_f64(__x, __y);
1210 }
1211 __DEVICE__
1212 double __dsub_rn(double __x, double __y) {
1213   return __ocml_sub_rte_f64(__x, __y);
1214 }
1215 __DEVICE__
1216 double __dsub_ru(double __x, double __y) {
1217   return __ocml_sub_rtp_f64(__x, __y);
1218 }
1219 __DEVICE__
1220 double __dsub_rz(double __x, double __y) {
1221   return __ocml_sub_rtz_f64(__x, __y);
1222 }
1223 #else
1224 __DEVICE__
1225 double __dsub_rn(double __x, double __y) { return __x - __y; }
1226 #endif
1227
1228 #if defined OCML_BASIC_ROUNDED_OPERATIONS
1229 __DEVICE__
1230 double __fma_rd(double __x, double __y, double __z) {
1231   return __ocml_fma_rtn_f64(__x, __y, __z);
1232 }
1233 __DEVICE__
1234 double __fma_rn(double __x, double __y, double __z) {
1235   return __ocml_fma_rte_f64(__x, __y, __z);
1236 }
1237 __DEVICE__
1238 double __fma_ru(double __x, double __y, double __z) {
1239   return __ocml_fma_rtp_f64(__x, __y, __z);
1240 }
1241 __DEVICE__
1242 double __fma_rz(double __x, double __y, double __z) {
1243   return __ocml_fma_rtz_f64(__x, __y, __z);
1244 }
1245 #else
1246 __DEVICE__
1247 double __fma_rn(double __x, double __y, double __z) {
1248   return __builtin_fma(__x, __y, __z);
1249 }
1250 #endif
1251 // END INTRINSICS
1252 // END DOUBLE
1253
1254 // C only macros
1255 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1256 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1257 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1258 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1259 #define signbit(__x)                                                           \
1260   _Generic((__x), float : __signbitf, double : __signbit)(__x)
1261 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1262
1263 #if defined(__cplusplus)
1264 template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1265   return (__arg1 < __arg2) ? __arg1 : __arg2;
1266 }
1267
1268 template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1269   return (__arg1 > __arg2) ? __arg1 : __arg2;
1270 }
1271
1272 __DEVICE__ int min(int __arg1, int __arg2) {
1273   return (__arg1 < __arg2) ? __arg1 : __arg2;
1274 }
1275 __DEVICE__ int max(int __arg1, int __arg2) {
1276   return (__arg1 > __arg2) ? __arg1 : __arg2;
1277 }
1278
1279 __DEVICE__
1280 float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
1281
1282 __DEVICE__
1283 double max(double __x, double __y) { return __builtin_fmax(__x, __y); }
1284
1285 __DEVICE__
1286 float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
1287
1288 __DEVICE__
1289 double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
1290
1291 #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1292 __host__ inline static int min(int __arg1, int __arg2) {
1293   return std::min(__arg1, __arg2);
1294 }
1295
1296 __host__ inline static int max(int __arg1, int __arg2) {
1297   return std::max(__arg1, __arg2);
1298 }
1299 #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1300 #endif
1301
1302 #pragma pop_macro("__DEVICE__")
1303 #pragma pop_macro("__RETURN_TYPE")
1304
1305 #endif // __CLANG_HIP_MATH_H__