4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
42 #define restrict _Restrict
47 /* double rhypot(double x, double y)
52 * x or y = NaN => QNaN
53 * x and y = 0 => Inf + divide-by-zero
54 * 2. Computes rhypot(x,y):
55 * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm))
61 * Compute 1/(xnm * xnm + ynm * ynm) by simulating
62 * muti-precision arithmetic.
65 * Maximum error observed: less than 0.869 ulp after 1.000.000.000
69 extern double sqrt(double);
70 extern double fabs(double);
72 static const int __vlibm_TBL_rhypot
[] = {
74 * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */
75 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
76 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
77 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
78 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
79 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
80 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
81 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
82 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
83 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
84 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
85 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
86 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
87 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
88 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
89 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
90 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
91 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
92 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
93 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
94 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
95 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
96 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
97 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
98 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
99 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
100 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
101 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
102 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
103 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
104 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
105 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
106 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
109 static const unsigned long long LCONST
[] = {
110 0x3ff0000000000000ULL
, /* DONE = 1.0 */
111 0x4000000000000000ULL
, /* DTWO = 2.0 */
112 0x4230000000000000ULL
, /* D2ON36 = 2**36 */
113 0x7fd0000000000000ULL
, /* D2ON1022 = 2**1022 */
114 0x3cb0000000000000ULL
, /* D2ONM52 = 2**-52 */
125 #define RETURN(I, ret) \
134 hx##I &= 0x7fffffff; \
135 hy##I &= 0x7fffffff; \
137 if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000) /* |X| or |Y| = Inf,NaN */ \
143 if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0; /* |X| = Inf */ \
144 else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0; /* |Y| = Inf */ \
145 else res0 = fabs(x) + fabs(y); \
151 diff0 = hy##I - hx##I; \
153 if (hx##I < 0x00100000 && hy##I < 0x00100000) /* |X| and |Y| = subnormal or zero */ \
160 if ((hx##I | hy##I | lx | ly) == 0) /* |X| and |Y| = 0 */ \
161 RETURN (I, DONE / 0.0) \
166 x = *(long long*)&x; \
167 y = *(long long*)&y; \
172 x_hi0 = (x + D2ON36) - D2ON36; \
173 y_hi0 = (y + D2ON36) - D2ON36; \
176 res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \
177 res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \
179 dres0 = res0_hi + res0_lo; \
181 iarr0 = HI(&dres0); \
182 iexp0 = iarr0 & 0xfff00000; \
184 iarr0 = (iarr0 >> 11) & 0x1fc; \
185 itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \
190 dd0 = dd0 * (DTWO - dd0 * dres0); \
191 dd0 = dd0 * (DTWO - dd0 * dres0); \
192 dres0 = dd0 * (DTWO - dd0 * dres0); \
194 HI(&res0) = HI(&dres0) & 0xffffff00; \
196 res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \
197 res0 = sqrt (res0); \
199 res0 = D2ON1022 * res0; \
202 j0 = hy##I - (diff0 & j0); \
204 HI(&scl##I) = 0x7ff00000 - j0;
207 __vrhypot(int n
, double * restrict px
, int stridex
, double * restrict py
,
208 int stridey
, double * restrict pz
, int stridez
)
212 double x_hi0
, x_lo0
, y_hi0
, y_lo0
, scl0
= 0;
213 double x0
, y0
, res0
, dd0
;
214 double res0_hi
,res0_lo
, dres0
;
215 double x_hi1
, x_lo1
, y_hi1
, y_lo1
, scl1
= 0;
216 double x1
= 0.0L, y1
= 0.0L, res1
, dd1
;
217 double res1_hi
,res1_lo
, dres1
;
218 double x_hi2
, x_lo2
, y_hi2
, y_lo2
, scl2
= 0;
219 double x2
, y2
, res2
, dd2
;
220 double res2_hi
,res2_lo
, dres2
;
222 int hx0
, hy0
, j0
, diff0
;
223 int iarr0
, iexp0
, itbl0
;
225 int iarr1
, iexp1
, itbl1
;
227 int iarr2
, iexp2
, itbl2
;
231 double DONE
= ((double*)LCONST
)[0];
232 double DTWO
= ((double*)LCONST
)[1];
233 double D2ON36
= ((double*)LCONST
)[2];
234 double D2ON1022
= ((double*)LCONST
)[3];
235 double D2ONM52
= ((double*)LCONST
)[4];
237 double *pz0
, *pz1
= 0, *pz2
;
269 x_hi0
= (x0
+ D2ON36
) - D2ON36
;
270 y_hi0
= (y0
+ D2ON36
) - D2ON36
;
271 x_hi1
= (x1
+ D2ON36
) - D2ON36
;
272 y_hi1
= (y1
+ D2ON36
) - D2ON36
;
273 x_hi2
= (x2
+ D2ON36
) - D2ON36
;
274 y_hi2
= (y2
+ D2ON36
) - D2ON36
;
281 res0_hi
= (x_hi0
* x_hi0
+ y_hi0
* y_hi0
);
282 res1_hi
= (x_hi1
* x_hi1
+ y_hi1
* y_hi1
);
283 res2_hi
= (x_hi2
* x_hi2
+ y_hi2
* y_hi2
);
284 res0_lo
= ((x0
+ x_hi0
) * x_lo0
+ (y0
+ y_hi0
) * y_lo0
);
285 res1_lo
= ((x1
+ x_hi1
) * x_lo1
+ (y1
+ y_hi1
) * y_lo1
);
286 res2_lo
= ((x2
+ x_hi2
) * x_lo2
+ (y2
+ y_hi2
) * y_lo2
);
288 dres0
= res0_hi
+ res0_lo
;
289 dres1
= res1_hi
+ res1_lo
;
290 dres2
= res2_hi
+ res2_lo
;
295 iexp0
= iarr0
& 0xfff00000;
296 iexp1
= iarr1
& 0xfff00000;
297 iexp2
= iarr2
& 0xfff00000;
299 iarr0
= (iarr0
>> 11) & 0x1fc;
300 iarr1
= (iarr1
>> 11) & 0x1fc;
301 iarr2
= (iarr2
>> 11) & 0x1fc;
302 itbl0
= ((int*)((char*)__vlibm_TBL_rhypot
+ iarr0
))[0];
303 itbl1
= ((int*)((char*)__vlibm_TBL_rhypot
+ iarr1
))[0];
304 itbl2
= ((int*)((char*)__vlibm_TBL_rhypot
+ iarr2
))[0];
315 dd0
= dd0
* (DTWO
- dd0
* dres0
);
316 dd1
= dd1
* (DTWO
- dd1
* dres1
);
317 dd2
= dd2
* (DTWO
- dd2
* dres2
);
318 dd0
= dd0
* (DTWO
- dd0
* dres0
);
319 dd1
= dd1
* (DTWO
- dd1
* dres1
);
320 dd2
= dd2
* (DTWO
- dd2
* dres2
);
321 dres0
= dd0
* (DTWO
- dd0
* dres0
);
322 dres1
= dd1
* (DTWO
- dd1
* dres1
);
323 dres2
= dd2
* (DTWO
- dd2
* dres2
);
325 HI(&res0
) = HI(&dres0
) & 0xffffff00;
326 HI(&res1
) = HI(&dres1
) & 0xffffff00;
327 HI(&res2
) = HI(&dres2
) & 0xffffff00;
331 res0
+= (DONE
- res0_hi
* res0
- res0_lo
* res0
) * dres0
;
332 res1
+= (DONE
- res1_hi
* res1
- res1_lo
* res1
) * dres1
;
333 res2
+= (DONE
- res2_hi
* res2
- res2_lo
* res2
) * dres2
;
358 x_hi0
= (x0
+ D2ON36
) - D2ON36
;
359 y_hi0
= (y0
+ D2ON36
) - D2ON36
;
362 res0_hi
= (x_hi0
* x_hi0
+ y_hi0
* y_hi0
);
363 res0_lo
= ((x0
+ x_hi0
) * x_lo0
+ (y0
+ y_hi0
) * y_lo0
);
365 dres0
= res0_hi
+ res0_lo
;
368 iexp0
= iarr0
& 0xfff00000;
370 iarr0
= (iarr0
>> 11) & 0x1fc;
371 itbl0
= ((int*)((char*)__vlibm_TBL_rhypot
+ iarr0
))[0];
376 dd0
= dd0
* (DTWO
- dd0
* dres0
);
377 dd0
= dd0
* (DTWO
- dd0
* dres0
);
378 dres0
= dd0
* (DTWO
- dd0
* dres0
);
380 HI(&res0
) = HI(&dres0
) & 0xffffff00;
382 res0
+= (DONE
- res0_hi
* res0
- res0_lo
* res0
) * dres0
;
394 x_hi1
= (x1
+ D2ON36
) - D2ON36
;
395 y_hi1
= (y1
+ D2ON36
) - D2ON36
;
398 res1_hi
= (x_hi1
* x_hi1
+ y_hi1
* y_hi1
);
399 res1_lo
= ((x1
+ x_hi1
) * x_lo1
+ (y1
+ y_hi1
) * y_lo1
);
401 dres1
= res1_hi
+ res1_lo
;
404 iexp1
= iarr1
& 0xfff00000;
406 iarr1
= (iarr1
>> 11) & 0x1fc;
407 itbl1
= ((int*)((char*)__vlibm_TBL_rhypot
+ iarr1
))[0];
412 dd1
= dd1
* (DTWO
- dd1
* dres1
);
413 dd1
= dd1
* (DTWO
- dd1
* dres1
);
414 dres1
= dd1
* (DTWO
- dd1
* dres1
);
416 HI(&res1
) = HI(&dres1
) & 0xffffff00;
418 res1
+= (DONE
- res1_hi
* res1
- res1_lo
* res1
) * dres1
;