4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
42 #define restrict _Restrict
47 /* double hypot(double x, double y)
51 * x or y is +Inf or -Inf => +Inf
52 * x or y is NaN => QNaN
53 * 2. Computes hypot(x,y):
54 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
60 * Compute xnm * xnm + ynm * ynm by simulating
61 * muti-precision arithmetic.
64 * Maximum error observed: less than 0.872 ulp after 16.777.216.000
68 extern double sqrt(double);
69 extern double fabs(double);
71 static const unsigned long long LCONST
[] = {
72 0x41b0000000000000ULL
, /* D2ON28 = 2 ** 28 */
73 0x0010000000000000ULL
, /* D2ONM1022 = 2 ** -1022 */
74 0x7fd0000000000000ULL
/* D2ONP1022 = 2 ** 1022 */
78 __vhypot_n(int n
, double * restrict px
, int stridex
, double * restrict py
,
79 int stridey
, double * restrict pz
, int stridez
);
81 #pragma no_inline(__vhypot_n)
92 spx = px; spy = py; spz = pz; \
100 __vhypot(int n
, double * restrict px
, int stridex
, double * restrict py
,
101 int stridey
, double * restrict pz
, int stridez
)
103 int hx0
, hx1
, hy0
, j0
, diff
;
104 double x_hi
, x_lo
, y_hi
, y_lo
;
107 double *spx
, *spy
, *spz
;
109 double D2ON28
= ((double*)LCONST
)[0]; /* 2 ** 28 */
110 double D2ONM1022
= ((double*)LCONST
)[1]; /* 2 **-1022 */
111 double D2ONP1022
= ((double*)LCONST
)[2]; /* 2 ** 1022 */
127 if (hx0
>= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
131 j0
= hy0
- (diff
& j0
);
137 if (j0
>= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
139 int lx
= LO((px
- stridex
));
141 if (hx0
== 0x7ff00000 && lx
== 0) res
= x
== y
? y
: x
;
142 else if (hy0
== 0x7ff00000 && ly
== 0) res
= x
== y
? x
: y
;
149 if (((diff
^ j0
) - j0
) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
154 x_hi
= (x
+ D2ON28
) - D2ON28
;
156 y_hi
= (y
+ D2ON28
) - D2ON28
;
158 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
159 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
163 res
= D2ONP1022
* res
;
169 if (hy0
>= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
173 j0
= hy0
- (diff
& j0
);
179 if (j0
>= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
181 int lx
= LO((px
- stridex
));
183 if (hx0
== 0x7ff00000 && lx
== 0) res
= x
== y
? y
: x
;
184 else if (hy0
== 0x7ff00000 && ly
== 0) res
= x
== y
? x
: y
;
191 if (((diff
^ j0
) - j0
) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
196 x_hi
= (x
+ D2ON28
) - D2ON28
;
198 y_hi
= (y
+ D2ON28
) - D2ON28
;
200 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
201 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
205 res
= D2ONP1022
* res
;
214 if (hx0
< 0x00100000 && hy0
< 0x00100000) /* X and Y are subnormal */
222 x_hi
= (x
+ D2ON28
) - D2ON28
;
224 y_hi
= (y
+ D2ON28
) - D2ON28
;
226 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
227 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
231 res
= D2ONM1022
* res
;
242 __vhypot_n (n_n
, spx
, stridex
, spy
, stridey
, spz
, stridez
);
257 j0
= hy0
- (diff
& j0
);
260 if (j0
>= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
264 if (j0
>= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
268 if (hx0
== 0x7ff00000 && lx
== 0) res
= x
== y
? y
: x
;
269 else if (hy0
== 0x7ff00000 && ly
== 0) res
= x
== y
? x
: y
;
277 if (((diff
^ j0
) - j0
) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
282 x_hi
= (x
+ D2ON28
) - D2ON28
;
284 y_hi
= (y
+ D2ON28
) - D2ON28
;
286 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
287 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
291 res
= D2ONP1022
* res
;
303 if (j0
< 0x00100000) /* X and Y are subnormal */
308 x_hi
= (x
+ D2ON28
) - D2ON28
;
310 y_hi
= (y
+ D2ON28
) - D2ON28
;
312 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
313 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
317 res
= D2ONM1022
* res
;
322 HI(&scl
) = (0x7fe00000 - j0
);
327 x_hi
= (x
+ D2ON28
) - D2ON28
;
328 y_hi
= (y
+ D2ON28
) - D2ON28
;
332 res
= (x_hi
* x_hi
+ y_hi
* y_hi
);
333 res
+= ((x
+ x_hi
) * x_lo
+ (y
+ y_hi
) * y_lo
);
345 __vhypot_n(int n
, double * restrict px
, int stridex
, double * restrict py
,
346 int stridey
, double * restrict pz
, int stridez
)
348 int hx0
, hy0
, j0
, diff0
;
349 double x_hi0
, x_lo0
, y_hi0
, y_lo0
, scl0
= 0;
351 double D2ON28
= ((double*)LCONST
)[0]; /* 2 ** 28 */
365 j0
= hy0
- (diff0
& j0
);
371 HI(&scl0
) = (0x7fe00000 - j0
);
376 x_hi0
= (x0
+ D2ON28
) - D2ON28
;
377 y_hi0
= (y0
+ D2ON28
) - D2ON28
;
381 res0
= (x_hi0
* x_hi0
+ y_hi0
* y_hi0
);
382 res0
+= ((x0
+ x_hi0
) * x_lo0
+ (y0
+ y_hi0
) * y_lo0
);