libclc/generic/lib/math/clc_fmod.cl

   1 /*
   2  * Copyright (c) 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20  * THE SOFTWARE.
  21  */
  22
  23 #include <clc/clc.h>
  24 #include <clc/clcmacro.h>
  25 #include <clc/math/clc_floor.h>
  26 #include <clc/math/clc_trunc.h>
  27 #include <clc/shared/clc_max.h>
  28
  29 #include <math/clc_remainder.h>
  30 #include "config.h"
  31 #include "math.h"
  32
  33 _CLC_DEF _CLC_OVERLOAD float __clc_fmod(float x, float y)
  34 {
  35     int ux = as_int(x);
  36     int ax = ux & EXSIGNBIT_SP32;
  37     float xa = as_float(ax);
  38     int sx = ux ^ ax;
  39     int ex = ax >> EXPSHIFTBITS_SP32;
  40
  41     int uy = as_int(y);
  42     int ay = uy & EXSIGNBIT_SP32;
  43     float ya = as_float(ay);
  44     int ey = ay >> EXPSHIFTBITS_SP32;
  45
  46     float xr = as_float(0x3f800000 | (ax & 0x007fffff));
  47     float yr = as_float(0x3f800000 | (ay & 0x007fffff));
  48     int c;
  49     int k = ex - ey;
  50
  51     while (k > 0) {
  52         c = xr >= yr;
  53         xr -= c ? yr : 0.0f;
  54         xr += xr;
  55         --k;
  56     }
  57
  58     c = xr >= yr;
  59     xr -= c ? yr : 0.0f;
  60
  61     int lt = ex < ey;
  62
  63     xr = lt ? xa : xr;
  64     yr = lt ? ya : yr;
  65
  66
  67     float s = as_float(ey << EXPSHIFTBITS_SP32);
  68     xr *= lt ? 1.0f : s;
  69
  70     c = ax == ay;
  71     xr = c ? 0.0f : xr;
  72
  73     xr = as_float(sx ^ as_int(xr));
  74
  75     c = ax > PINFBITPATT_SP32 | ay > PINFBITPATT_SP32 | ax == PINFBITPATT_SP32 | ay == 0;
  76     xr = c ? as_float(QNANBITPATT_SP32) : xr;
  77
  78     return xr;
  79
  80 }
  81 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_fmod, float, float);
  82
  83 #ifdef cl_khr_fp64
  84 _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y)
  85 {
  86     ulong ux = as_ulong(x);
  87     ulong ax = ux & ~SIGNBIT_DP64;
  88     ulong xsgn = ux ^ ax;
  89     double dx = as_double(ax);
  90     int xexp = convert_int(ax >> EXPSHIFTBITS_DP64);
  91     int xexp1 = 11 - (int) clz(ax & MANTBITS_DP64);
  92     xexp1 = xexp < 1 ? xexp1 : xexp;
  93
  94     ulong uy = as_ulong(y);
  95     ulong ay = uy & ~SIGNBIT_DP64;
  96     double dy = as_double(ay);
  97     int yexp = convert_int(ay >> EXPSHIFTBITS_DP64);
  98     int yexp1 = 11 - (int) clz(ay & MANTBITS_DP64);
  99     yexp1 = yexp < 1 ? yexp1 : yexp;
 100
 101     // First assume |x| > |y|
 102
 103     // Set ntimes to the number of times we need to do a
 104     // partial remainder. If the exponent of x is an exact multiple
 105     // of 53 larger than the exponent of y, and the mantissa of x is
 106     // less than the mantissa of y, ntimes will be one too large
 107     // but it doesn't matter - it just means that we'll go round
 108     // the loop below one extra time.
 109     int ntimes = __clc_max(0, (xexp1 - yexp1) / 53);
 110     double w =  ldexp(dy, ntimes * 53);
 111     w = ntimes == 0 ? dy : w;
 112     double scale = ntimes == 0 ? 1.0 : 0x1.0p-53;
 113
 114     // Each time round the loop we compute a partial remainder.
 115     // This is done by subtracting a large multiple of w
 116     // from x each time, where w is a scaled up version of y.
 117     // The subtraction must be performed exactly in quad
 118     // precision, though the result at each stage can
 119     // fit exactly in a double precision number.
 120     int i;
 121     double t, v, p, pp;
 122
 123     for (i = 0; i < ntimes; i++) {
 124         // Compute integral multiplier
 125         t = __clc_trunc(dx / w);
 126
 127         // Compute w * t in quad precision
 128         p = w * t;
 129         pp = fma(w, t, -p);
 130
 131         // Subtract w * t from dx
 132         v = dx - p;
 133         dx = v + (((dx - v) - p) - pp);
 134
 135         // If t was one too large, dx will be negative. Add back one w.
 136         dx += dx < 0.0 ? w : 0.0;
 137
 138         // Scale w down by 2^(-53) for the next iteration
 139         w *= scale;
 140     }
 141
 142     // One more time
 143     // Variable todd says whether the integer t is odd or not
 144     t = __clc_floor(dx / w);
 145     long lt = (long)t;
 146     int todd = lt & 1;
 147
 148     p = w * t;
 149     pp = fma(w, t, -p);
 150     v = dx - p;
 151     dx = v + (((dx - v) - p) - pp);
 152     i = dx < 0.0;
 153     todd ^= i;
 154     dx += i ? w : 0.0;
 155
 156     // At this point, dx lies in the range [0,dy)
 157     double ret = as_double(xsgn ^ as_ulong(dx));
 158     dx = as_double(ax);
 159
 160     // Now handle |x| == |y|
 161     int c = dx == dy;
 162     t = as_double(xsgn);
 163     ret = c ? t : ret;
 164
 165     // Next, handle |x| < |y|
 166     c = dx < dy;
 167     ret = c ? x : ret;
 168
 169     // We don't need anything special for |x| == 0
 170
 171     // |y| is 0
 172     c = dy == 0.0;
 173     ret = c ? as_double(QNANBITPATT_DP64) : ret;
 174
 175     // y is +-Inf, NaN
 176     c = yexp > BIASEDEMAX_DP64;
 177     t = y == y ? x : y;
 178     ret = c ? t : ret;
 179
 180     // x is +=Inf, NaN
 181     c = xexp > BIASEDEMAX_DP64;
 182     ret = c ? as_double(QNANBITPATT_DP64) : ret;
 183
 184     return ret;
 185 }
 186 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_fmod, double, double);
 187 #endif