libclc/generic/lib/math/exp_helper.cl

   1 /*
   2  * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20  * THE SOFTWARE.
  21  */
  22
  23 #include <clc/clc.h>
  24
  25 #include "math.h"
  26 #include "tables.h"
  27
  28 #ifdef cl_khr_fp64
  29
  30 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
  31
  32 _CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
  33
  34     int j = n & 0x3f;
  35     int m = n >> 6;
  36
  37     // 6 term tail of Taylor expansion of e^r
  38     double z2 = r * fma(r,
  39                         fma(r,
  40                             fma(r,
  41                                 fma(r,
  42                                     fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
  43                                     0x1.5555555555555p-5),
  44                                 0x1.5555555555555p-3),
  45                             0x1.0000000000000p-1),
  46                         1.0);
  47
  48     double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
  49     z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
  50
  51     int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
  52
  53     int n1 = m >> 2;
  54     int n2 = m-n1;
  55     double z3= z2 * as_double(((long)n1 + 1023) << 52);
  56     z3 *= as_double(((long)n2 + 1023) << 52);
  57
  58     z2 = ldexp(z2, m);
  59     z2 = small_value ? z3: z2;
  60
  61     z2 = isnan(x) ? x : z2;
  62
  63     z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
  64     z2 = x < x_min ? 0.0 : z2;
  65
  66     return z2;
  67 }
  68
  69 #endif // cl_khr_fp64