libclc/generic/lib/math/clc_hypot.cl

   1 /*
   2  * Copyright (c) 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20  * THE SOFTWARE.
  21  */
  22
  23 #include <clc/clc.h>
  24 #include <clc/clcmacro.h>
  25 #include <clc/integer/clc_abs.h>
  26 #include <clc/relational/clc_isnan.h>
  27 #include <clc/shared/clc_clamp.h>
  28 #include <math/clc_hypot.h>
  29
  30 #include "config.h"
  31 #include "math.h"
  32
  33 // Returns sqrt(x*x + y*y) with no overflow or underflow unless the result
  34 // warrants it
  35 _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) {
  36   uint ux = as_uint(x);
  37   uint aux = ux & EXSIGNBIT_SP32;
  38   uint uy = as_uint(y);
  39   uint auy = uy & EXSIGNBIT_SP32;
  40   float retval;
  41   int c = aux > auy;
  42   ux = c ? aux : auy;
  43   uy = c ? auy : aux;
  44
  45   int xexp =
  46       __clc_clamp((int)(ux >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32, -126, 126);
  47   float fx_exp = as_float((xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
  48   float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
  49   float fx = as_float(ux) * fi_exp;
  50   float fy = as_float(uy) * fi_exp;
  51   retval = sqrt(mad(fx, fx, fy * fy)) * fx_exp;
  52
  53   retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval;
  54   retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32
  55                ? as_float(PINFBITPATT_SP32)
  56                : retval;
  57   return retval;
  58 }
  59 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_hypot, float, float)
  60
  61 #ifdef cl_khr_fp64
  62 _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
  63   ulong ux = as_ulong(x) & ~SIGNBIT_DP64;
  64   int xexp = ux >> EXPSHIFTBITS_DP64;
  65   x = as_double(ux);
  66
  67   ulong uy = as_ulong(y) & ~SIGNBIT_DP64;
  68   int yexp = uy >> EXPSHIFTBITS_DP64;
  69   y = as_double(uy);
  70
  71   int c = xexp > EXPBIAS_DP64 + 500 | yexp > EXPBIAS_DP64 + 500;
  72   double preadjust = c ? 0x1.0p-600 : 1.0;
  73   double postadjust = c ? 0x1.0p+600 : 1.0;
  74
  75   c = xexp < EXPBIAS_DP64 - 500 | yexp < EXPBIAS_DP64 - 500;
  76   preadjust = c ? 0x1.0p+600 : preadjust;
  77   postadjust = c ? 0x1.0p-600 : postadjust;
  78
  79   double ax = x * preadjust;
  80   double ay = y * preadjust;
  81
  82   // The post adjust may overflow, but this can't be avoided in any case
  83   double r = sqrt(fma(ax, ax, ay * ay)) * postadjust;
  84
  85   // If the difference in exponents between x and y is large
  86   double s = x + y;
  87   c = __clc_abs(xexp - yexp) > MANTLENGTH_DP64 + 1;
  88   r = c ? s : r;
  89
  90   // Check for NaN
  91   // c = x != x | y != y;
  92   c = __clc_isnan(x) | __clc_isnan(y);
  93   r = c ? as_double(QNANBITPATT_DP64) : r;
  94
  95   // If either is Inf, we must return Inf
  96   c = x == as_double(PINFBITPATT_DP64) | y == as_double(PINFBITPATT_DP64);
  97   r = c ? as_double(PINFBITPATT_DP64) : r;
  98
  99   return r;
 100 }
 101
 102 _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_hypot, double,
 103                       double)
 104 #endif