usr/src/lib/libc/sparc/fp/_D_cplx_div.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 /*
  30  * _D_cplx_div(z, w) returns z / w with infinities handled according
  31  * to C99.
  32  *
  33  * If z and w are both finite and w is nonzero, _D_cplx_div(z, w)
  34  * delivers the complex quotient q according to the usual formula:
  35  * let a = Re(z), b = Im(z), c = Re(w), and d = Im(w); then q = x +
  36  * I * y where x = (a * c + b * d) / r and y = (b * c - a * d) / r
  37  * with r = c * c + d * d.  This implementation scales to avoid
  38  * premature underflow or overflow.
  39  *
  40  * If z is neither NaN nor zero and w is zero, or if z is infinite
  41  * and w is finite and nonzero, _D_cplx_div delivers an infinite
  42  * result.  If z is finite and w is infinite, _D_cplx_div delivers
  43  * a zero result.
  44  *
  45  * If z and w are both zero or both infinite, or if either z or w is
  46  * a complex NaN, _D_cplx_div delivers NaN + I * NaN.  C99 doesn't
  47  * specify these cases.
  48  *
  49  * This implementation can raise spurious underflow, overflow, in-
  50  * valid operation, inexact, and division-by-zero exceptions.  C99
  51  * allows this.
  52  *
  53  * Warning: Do not attempt to "optimize" this code by removing multi-
  54  * plications by zero.
  55  */
  56
  57 #if !defined(sparc) && !defined(__sparc)
  58 #error This code is for SPARC only
  59 #endif
  60
  61 static union {
  62         int     i[2];
  63         double  d;
  64 } inf = {
  65         0x7ff00000, 0
  66 };
  67
  68 /*
  69  * Return +1 if x is +Inf, -1 if x is -Inf, and 0 otherwise
  70  */
  71 static int
  72 testinf(double x)
  73 {
  74         union {
  75                 int     i[2];
  76                 double  d;
  77         } xx;
  78
  79         xx.d = x;
  80         return (((((xx.i[0] << 1) - 0xffe00000) | xx.i[1]) == 0)?
  81                 (1 | (xx.i[0] >> 31)) : 0);
  82 }
  83
  84 double _Complex
  85 _D_cplx_div(double _Complex z, double _Complex w)
  86 {
  87         double _Complex v;
  88         union {
  89                 int     i[2];
  90                 double  d;
  91         } aa, bb, cc, dd, ss;
  92         double          a, b, c, d, r;
  93         int             ha, hb, hc, hd, hz, hw, hs, i, j;
  94
  95         /*
  96          * The following is equivalent to
  97          *
  98          *  a = creal(z); b = cimag(z);
  99          *  c = creal(w); d = cimag(w);
 100          */
 101         a = ((double *)&z)[0];
 102         b = ((double *)&z)[1];
 103         c = ((double *)&w)[0];
 104         d = ((double *)&w)[1];
 105
 106         /* extract high-order words to estimate |z| and |w| */
 107         aa.d = a;
 108         bb.d = b;
 109         ha = aa.i[0] & ~0x80000000;
 110         hb = bb.i[0] & ~0x80000000;
 111         hz = (ha > hb)? ha : hb;
 112
 113         cc.d = c;
 114         dd.d = d;
 115         hc = cc.i[0] & ~0x80000000;
 116         hd = dd.i[0] & ~0x80000000;
 117         hw = (hc > hd)? hc : hd;
 118
 119         /* check for special cases */
 120         if (hw >= 0x7ff00000) { /* w is inf or nan */
 121                 r = 0.0;
 122                 i = testinf(c);
 123                 j = testinf(d);
 124                 if (i | j) { /* w is infinite */
 125                         /*
 126                          * "factor out" infinity, being careful to preserve
 127                          * signs of finite values
 128                          */
 129                         c = i? i : ((cc.i[0] < 0)? -0.0 : 0.0);
 130                         d = j? j : ((dd.i[0] < 0)? -0.0 : 0.0);
 131                         if (hz >= 0x7fe00000) {
 132                                 /* scale to avoid overflow below */
 133                                 c *= 0.5;
 134                                 d *= 0.5;
 135                         }
 136                 }
 137                 ((double *)&v)[0] = (a * c + b * d) * r;
 138                 ((double *)&v)[1] = (b * c - a * d) * r;
 139                 return (v);
 140         }
 141
 142         if (hw < 0x00100000) {
 143                 /*
 144                  * This nonsense is needed to work around some SPARC
 145                  * implementations of nonstandard mode; if both parts
 146                  * of w are subnormal, multiply them by one to force
 147                  * them to be flushed to zero when nonstandard mode
 148                  * is enabled.  Sheesh.
 149                  */
 150                 cc.d = c = c * 1.0;
 151                 dd.d = d = d * 1.0;
 152                 hc = cc.i[0] & ~0x80000000;
 153                 hd = dd.i[0] & ~0x80000000;
 154                 hw = (hc > hd)? hc : hd;
 155         }
 156
 157         if (hw == 0 && (cc.i[1] | dd.i[1]) == 0) {
 158                 /* w is zero; multiply z by 1/Re(w) - I * Im(w) */
 159                 c = 1.0 / c;
 160                 i = testinf(a);
 161                 j = testinf(b);
 162                 if (i | j) { /* z is infinite */
 163                         a = i;
 164                         b = j;
 165                 }
 166                 ((double *)&v)[0] = a * c + b * d;
 167                 ((double *)&v)[1] = b * c - a * d;
 168                 return (v);
 169         }
 170
 171         if (hz >= 0x7ff00000) { /* z is inf or nan */
 172                 r = 1.0;
 173                 i = testinf(a);
 174                 j = testinf(b);
 175                 if (i | j) { /* z is infinite */
 176                         a = i;
 177                         b = j;
 178                         r = inf.d;
 179                 }
 180                 ((double *)&v)[0] = (a * c + b * d) * r;
 181                 ((double *)&v)[1] = (b * c - a * d) * r;
 182                 return (v);
 183         }
 184
 185         /*
 186          * Scale c and d to compute 1/|w|^2 and the real and imaginary
 187          * parts of the quotient.
 188          *
 189          * Note that for any s, if we let c' = sc, d' = sd, c'' = sc',
 190          * and d'' = sd', then
 191          *
 192          *  (ac'' + bd'') / (c'^2 + d'^2) = (ac + bd) / (c^2 + d^2)
 193          *
 194          * and similarly for the imaginary part of the quotient.  We want
 195          * to choose s such that (i) r := 1/(c'^2 + d'^2) can be computed
 196          * without overflow or harmful underflow, and (ii) (ac'' + bd'')
 197          * and (bc'' - ad'') can be computed without spurious overflow or
 198          * harmful underflow.  To avoid unnecessary rounding, we restrict
 199          * s to a power of two.
 200          *
 201          * To satisfy (i), we need to choose s such that max(|c'|,|d'|)
 202          * is not too far from one.  To satisfy (ii), we need to choose
 203          * s such that max(|c''|,|d''|) is also not too far from one.
 204          * There is some leeway in our choice, but to keep the logic
 205          * from getting overly complicated, we simply attempt to roughly
 206          * balance these constraints by choosing s so as to make r about
 207          * the same size as max(|c''|,|d''|).  This corresponds to choos-
 208          * ing s to be a power of two near |w|^(-3/4).
 209          *
 210          * Regarding overflow, observe that if max(|c''|,|d''|) <= 1/2,
 211          * then the computation of (ac'' + bd'') and (bc'' - ad'') can-
 212          * not overflow; otherwise, the computation of either of these
 213          * values can only incur overflow if the true result would be
 214          * within a factor of two of the overflow threshold.  In other
 215          * words, if we bias the choice of s such that at least one of
 216          *
 217          *  max(|c''|,|d''|) <= 1/2   or   r >= 2
 218          *
 219          * always holds, then no undeserved overflow can occur.
 220          *
 221          * To cope with underflow, note that if r < 2^-53, then any
 222          * intermediate results that underflow are insignificant; either
 223          * they will be added to normal results, rendering the under-
 224          * flow no worse than ordinary roundoff, or they will contribute
 225          * to a final result that is smaller than the smallest subnormal
 226          * number.  Therefore, we need only modify the preceding logic
 227          * when z is very small and w is not too far from one.  In that
 228          * case, we can reduce the effect of any intermediate underflow
 229          * to no worse than ordinary roundoff error by choosing s so as
 230          * to make max(|c''|,|d''|) large enough that at least one of
 231          * (ac'' + bd'') or (bc'' - ad'') is normal.
 232          */
 233         hs = (((hw >> 2) - hw) + 0x6fd7ffff) & 0xfff00000;
 234         if (hz < 0x07200000) { /* |z| < 2^-909 */
 235                 if (((hw - 0x32800000) | (0x47100000 - hw)) >= 0)
 236                         hs = (((0x47100000 - hw) >> 1) & 0xfff00000)
 237                                 + 0x3ff00000;
 238         }
 239         ss.i[0] = hs;
 240         ss.i[1] = 0;
 241
 242         c *= ss.d;
 243         d *= ss.d;
 244         r = 1.0 / (c * c + d * d);
 245
 246         c *= ss.d;
 247         d *= ss.d;
 248         ((double *)&v)[0] = (a * c + b * d) * r;
 249         ((double *)&v)[1] = (b * c - a * d) * r;
 250         return (v);
 251 }