2 /*---------------------------------------------------------------*/
3 /*--- begin host_generic_maddf.c ---*/
4 /*---------------------------------------------------------------*/
7 Compute x * y + z as ternary operation.
8 Copyright (C) 2010-2017 Free Software Foundation, Inc.
9 This file is part of the GNU C Library.
10 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
12 The GNU C Library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 2.1 of the License, or (at your option) any later version.
17 The GNU C Library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
22 You should have received a copy of the GNU Lesser General Public
23 License along with the GNU C Library; if not, see
24 <http://www.gnu.org/licenses/>.
27 /* Generic helper functions for doing FMA, i.e. compute x * y + z
29 These are purely back-end entities and cannot be seen/referenced
32 #include "libvex_basictypes.h"
33 #include "host_generic_maddf.h"
34 #include "main_util.h"
36 /* This implementation relies on Double being more than twice as
37 precise as Float and uses rounding to odd in order to avoid problems
39 See a paper by Boldo and Melquiond:
40 http://www.lri.fr/~melquion/doc/08-tc.pdf */
42 #define FORCE_EVAL(X) __asm __volatile__ ("" : : "m" (X))
44 #if defined(__x86_64__) && defined(__SSE2_MATH__)
45 # define ENV_TYPE unsigned int
46 /* Save current rounding mode into ENV, hold exceptions, set rounding
47 mode to rounding toward zero. */
48 # define ROUNDTOZERO(env) \
51 __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \
53 mxcsr = (mxcsr | 0x7f80) & ~0x3f; \
54 __asm __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));\
56 /* Restore exceptions from ENV, return if inexact exception has been raised
58 # define RESET_TESTINEXACT(env) \
60 unsigned int mxcsr, ret; \
61 __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \
62 ret = (mxcsr >> 5) & 1; \
63 mxcsr = (mxcsr & 0x3d) | (env); \
64 __asm __volatile__ ("ldmxcsr %0" : : "m" (mxcsr));\
67 /* Return if inexact exception has been raised since ROUNDTOZERO. */
68 # define TESTINEXACT() \
71 __asm __volatile__ ("stmxcsr %0" : "=m" (mxcsr)); \
76 #define DBL_MANT_DIG 53
77 #define IEEE754_DOUBLE_BIAS 0x3ff
79 union vg_ieee754_double
{
82 /* This is the IEEE 754 double-precision format. */
85 unsigned int negative
:1;
86 unsigned int exponent
:11;
87 unsigned int mantissa0
:20;
88 unsigned int mantissa1
:32;
90 unsigned int mantissa1
:32;
91 unsigned int mantissa0
:20;
92 unsigned int exponent
:11;
93 unsigned int negative
:1;
99 h_generic_calc_MAddF32 ( /*OUT*/Float
* res
,
100 Float
* argX
, Float
* argY
, Float
* argZ
)
103 /* Lame fallback implementation. */
104 *res
= *argX
* *argY
+ *argZ
;
107 /* Multiplication is always exact. */
108 Double temp
= (Double
) *argX
* (Double
) *argY
;
109 union vg_ieee754_double u
;
113 /* Perform addition with round to odd. */
114 u
.d
= temp
+ (Double
) *argZ
;
115 /* Ensure the addition is not scheduled after fetestexcept call. */
118 /* Reset rounding mode and test for inexact simultaneously. */
119 int j
= RESET_TESTINEXACT (env
);
121 if ((u
.ieee
.mantissa1
& 1) == 0 && u
.ieee
.exponent
!= 0x7ff)
122 u
.ieee
.mantissa1
|= j
;
124 /* And finally truncation with round to nearest. */
131 h_generic_calc_MAddF64 ( /*OUT*/Double
* res
,
132 Double
* argX
, Double
* argY
, Double
* argZ
)
135 /* Lame fallback implementation. */
136 *res
= *argX
* *argY
+ *argZ
;
138 Double x
= *argX
, y
= *argY
, z
= *argZ
;
139 union vg_ieee754_double u
, v
, w
;
144 if (UNLIKELY (u
.ieee
.exponent
+ v
.ieee
.exponent
145 >= 0x7ff + IEEE754_DOUBLE_BIAS
- DBL_MANT_DIG
)
146 || UNLIKELY (u
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
)
147 || UNLIKELY (v
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
)
148 || UNLIKELY (w
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
)
149 || UNLIKELY (u
.ieee
.exponent
+ v
.ieee
.exponent
150 <= IEEE754_DOUBLE_BIAS
+ DBL_MANT_DIG
)) {
151 /* If z is Inf, but x and y are finite, the result should be
152 z rather than NaN. */
153 if (w
.ieee
.exponent
== 0x7ff
154 && u
.ieee
.exponent
!= 0x7ff
155 && v
.ieee
.exponent
!= 0x7ff) {
159 /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
160 or if x * y is less than half of DBL_DENORM_MIN,
161 compute as x * y + z. */
162 if (u
.ieee
.exponent
== 0x7ff
163 || v
.ieee
.exponent
== 0x7ff
164 || w
.ieee
.exponent
== 0x7ff
165 || u
.ieee
.exponent
+ v
.ieee
.exponent
> 0x7ff + IEEE754_DOUBLE_BIAS
166 || u
.ieee
.exponent
+ v
.ieee
.exponent
167 < IEEE754_DOUBLE_BIAS
- DBL_MANT_DIG
- 2) {
171 if (u
.ieee
.exponent
+ v
.ieee
.exponent
172 >= 0x7ff + IEEE754_DOUBLE_BIAS
- DBL_MANT_DIG
) {
173 /* Compute 1p-53 times smaller result and multiply
175 if (u
.ieee
.exponent
> v
.ieee
.exponent
)
176 u
.ieee
.exponent
-= DBL_MANT_DIG
;
178 v
.ieee
.exponent
-= DBL_MANT_DIG
;
179 /* If x + y exponent is very large and z exponent is very small,
180 it doesn't matter if we don't adjust it. */
181 if (w
.ieee
.exponent
> DBL_MANT_DIG
)
182 w
.ieee
.exponent
-= DBL_MANT_DIG
;
184 } else if (w
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
) {
186 If z exponent is very large and x and y exponents are
187 very small, it doesn't matter if we don't adjust it. */
188 if (u
.ieee
.exponent
> v
.ieee
.exponent
) {
189 if (u
.ieee
.exponent
> DBL_MANT_DIG
)
190 u
.ieee
.exponent
-= DBL_MANT_DIG
;
191 } else if (v
.ieee
.exponent
> DBL_MANT_DIG
)
192 v
.ieee
.exponent
-= DBL_MANT_DIG
;
193 w
.ieee
.exponent
-= DBL_MANT_DIG
;
195 } else if (u
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
) {
196 u
.ieee
.exponent
-= DBL_MANT_DIG
;
198 v
.ieee
.exponent
+= DBL_MANT_DIG
;
201 } else if (v
.ieee
.exponent
>= 0x7ff - DBL_MANT_DIG
) {
202 v
.ieee
.exponent
-= DBL_MANT_DIG
;
204 u
.ieee
.exponent
+= DBL_MANT_DIG
;
207 } else /* if (u.ieee.exponent + v.ieee.exponent
208 <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */ {
209 if (u
.ieee
.exponent
> v
.ieee
.exponent
)
210 u
.ieee
.exponent
+= 2 * DBL_MANT_DIG
;
212 v
.ieee
.exponent
+= 2 * DBL_MANT_DIG
;
213 if (w
.ieee
.exponent
<= 4 * DBL_MANT_DIG
+ 4) {
215 w
.ieee
.exponent
+= 2 * DBL_MANT_DIG
;
220 /* Otherwise x * y should just affect inexact
227 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
228 # define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
236 Double m2
= (((x1
* y1
- m1
) + x1
* y2
) + x2
* y1
) + x2
* y2
;
239 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
250 /* Perform m2 + a2 addition with round to odd. */
253 if (UNLIKELY (adjust
< 0)) {
254 if ((u
.ieee
.mantissa1
& 1) == 0)
255 u
.ieee
.mantissa1
|= TESTINEXACT ();
257 /* Ensure the addition is not scheduled after fetestexcept call. */
261 /* Reset rounding mode and test for inexact simultaneously. */
262 int j
= RESET_TESTINEXACT (env
) != 0;
264 if (LIKELY (adjust
== 0)) {
265 if ((u
.ieee
.mantissa1
& 1) == 0 && u
.ieee
.exponent
!= 0x7ff)
266 u
.ieee
.mantissa1
|= j
;
267 /* Result is a1 + u.d. */
269 } else if (LIKELY (adjust
> 0)) {
270 if ((u
.ieee
.mantissa1
& 1) == 0 && u
.ieee
.exponent
!= 0x7ff)
271 u
.ieee
.mantissa1
|= j
;
272 /* Result is a1 + u.d, scaled up. */
273 *res
= (a1
+ u
.d
) * 0x1p
53;
275 /* If a1 + u.d is exact, the only rounding happens during
278 *res
= v
.d
* 0x1p
-106;
281 /* If result rounded to zero is not subnormal, no double
282 rounding will occur. */
283 if (v
.ieee
.exponent
> 106) {
284 *res
= (a1
+ u
.d
) * 0x1p
-106;
287 /* If v.d * 0x1p-106 with round to zero is a subnormal above
288 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
289 down just by 1 bit, which means v.ieee.mantissa1 |= j would
290 change the round bit, not sticky or guard bit.
291 v.d * 0x1p-106 never normalizes by shifting up,
292 so round bit plus sticky bit should be already enough
293 for proper rounding. */
294 if (v
.ieee
.exponent
== 106) {
295 /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
296 v.ieee.mantissa1 & 1 is the round bit and j is our sticky
297 bit. In round-to-nearest 001 rounds down like 00,
298 011 rounds up, even though 01 rounds down (thus we need
299 to adjust), 101 rounds down like 10 and 111 rounds up
301 if ((v
.ieee
.mantissa1
& 3) == 1) {
304 *res
= v
.d
- 0x1p
-1074;
306 *res
= v
.d
+ 0x1p
-1074;
308 *res
= v
.d
* 0x1p
-106;
311 v
.ieee
.mantissa1
|= j
;
312 *res
= v
.d
* 0x1p
-106;
318 /*---------------------------------------------------------------*/
319 /*--- end host_generic_maddf.c --*/
320 /*---------------------------------------------------------------*/