Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / useful / fp_80_64.c
blob2c328b72223f2148be021cb9c07286a25324ac08
2 #ifndef USED_AS_INCLUDE
4 #include "../pub/libvex_basictypes.h"
5 #include <stdio.h>
6 #include <malloc.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <assert.h>
12 /* Test program for developing code for conversions between
13 x87 64-bit and 80-bit floats.
15 80-bit format exists only for x86/x86-64, and so the routines
16 hardwire it as little-endian. The 64-bit format (IEEE double)
17 could exist on any platform, little or big-endian and so we
18 have to take that into account. IOW, these routines have to
19 work correctly when compiled on both big- and little-endian
20 targets, but the 80-bit images only ever have to exist in
21 little-endian format.
23 static void show_f80 ( UChar* );
24 static void show_f64 ( UChar* );
26 static inline
27 UInt read_bit_array ( UChar* arr, UInt n )
29 UChar c = arr[n >> 3];
30 c >>= (n&7);
31 return c & 1;
34 static inline
35 void write_bit_array ( UChar* arr, UInt n, UInt b )
37 UChar c = arr[n >> 3];
38 c &= ~(1 << (n&7));
39 c |= ((b&1) << (n&7));
40 arr[n >> 3] = c;
44 static void convert_f80le_to_f64le_HW ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
46 asm volatile ("ffree %%st(7); fldt (%0); fstpl (%1)"
48 : "r" (&f80[0]), "r" (&f64[0])
49 : "memory" );
52 static void convert_f64le_to_f80le_HW ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
54 asm volatile ("ffree %%st(7); fldl (%0); fstpt (%1)"
56 : "r" (&f64[0]), "r" (&f80[0])
57 : "memory" );
60 #endif /* ndef USED_AS_INCLUDE */
64 /* 80 and 64-bit floating point formats:
66 80-bit:
68 S 0 0-------0 zero
69 S 0 0X------X denormals
70 S 1-7FFE 1X------X normals (all normals have leading 1)
71 S 7FFF 10------0 infinity
72 S 7FFF 10X-----X snan
73 S 7FFF 11X-----X qnan
75 S is the sign bit. For runs X----X, at least one of the Xs must be
76 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
77 there is an explicitly represented leading 1, and a sign bit,
78 giving 80 in total.
80 64-bit avoids the confusion of an explicitly represented leading 1
81 and so is simpler:
83 S 0 0------0 zero
84 S 0 X------X denormals
85 S 1-7FE any normals
86 S 7FF 0------0 infinity
87 S 7FF 0X-----X snan
88 S 7FF 1X-----X qnan
90 Exponent is 11 bits, fractional part is 52 bits, and there is a
91 sign bit, giving 64 in total.
94 /* Convert a IEEE754 double (64-bit) into an x87 extended double
95 (80-bit), mimicing the hardware fairly closely. Both numbers are
96 stored little-endian. Limitations, all of which could be fixed,
97 given some level of hassle:
99 * Identity of NaNs is not preserved.
101 See comments in the code for more details.
103 static void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
105 Bool mantissaIsZero;
106 Int bexp, i, j, shift;
107 UChar sign;
109 sign = toUChar( (f64[7] >> 7) & 1 );
110 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
111 bexp &= 0x7FF;
113 mantissaIsZero = False;
114 if (bexp == 0 || bexp == 0x7FF) {
115 /* We'll need to know whether or not the mantissa (bits 51:0) is
116 all zeroes in order to handle these cases. So figure it
117 out. */
118 mantissaIsZero
119 = toBool(
120 (f64[6] & 0x0F) == 0
121 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
122 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
126 /* If the exponent is zero, either we have a zero or a denormal.
127 Produce a zero. This is a hack in that it forces denormals to
128 zero. Could do better. */
129 if (bexp == 0) {
130 f80[9] = toUChar( sign << 7 );
131 f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
132 = f80[3] = f80[2] = f80[1] = f80[0] = 0;
134 if (mantissaIsZero)
135 /* It really is zero, so that's all we can do. */
136 return;
138 /* There is at least one 1-bit in the mantissa. So it's a
139 potentially denormalised double -- but we can produce a
140 normalised long double. Count the leading zeroes in the
141 mantissa so as to decide how much to bump the exponent down
142 by. Note, this is SLOW. */
143 shift = 0;
144 for (i = 51; i >= 0; i--) {
145 if (read_bit_array(f64, i))
146 break;
147 shift++;
150 /* and copy into place as many bits as we can get our hands on. */
151 j = 63;
152 for (i = 51 - shift; i >= 0; i--) {
153 write_bit_array( f80, j,
154 read_bit_array( f64, i ) );
155 j--;
158 /* Set the exponent appropriately, and we're done. */
159 bexp -= shift;
160 bexp += (16383 - 1023);
161 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
162 f80[8] = toUChar( bexp & 0xFF );
163 return;
166 /* If the exponent is 7FF, this is either an Infinity, a SNaN or
167 QNaN, as determined by examining bits 51:0, thus:
168 0 ... 0 Inf
169 0X ... X SNaN
170 1X ... X QNaN
171 where at least one of the Xs is not zero.
173 if (bexp == 0x7FF) {
174 if (mantissaIsZero) {
175 /* Produce an appropriately signed infinity:
176 S 1--1 (15) 1 0--0 (63)
178 f80[9] = toUChar( (sign << 7) | 0x7F );
179 f80[8] = 0xFF;
180 f80[7] = 0x80;
181 f80[6] = f80[5] = f80[4] = f80[3]
182 = f80[2] = f80[1] = f80[0] = 0;
183 return;
185 /* So it's either a QNaN or SNaN. Distinguish by considering
186 bit 51. Note, this destroys all the trailing bits
187 (identity?) of the NaN. IEEE754 doesn't require preserving
188 these (it only requires that there be one QNaN value and one
189 SNaN value), but x87 does seem to have some ability to
190 preserve them. Anyway, here, the NaN's identity is
191 destroyed. Could be improved. */
192 if (f64[6] & 8) {
193 /* QNaN. Make a QNaN:
194 S 1--1 (15) 1 1--1 (63)
196 f80[9] = toUChar( (sign << 7) | 0x7F );
197 f80[8] = 0xFF;
198 f80[7] = 0xFF;
199 f80[6] = f80[5] = f80[4] = f80[3]
200 = f80[2] = f80[1] = f80[0] = 0xFF;
201 } else {
202 /* SNaN. Make a SNaN:
203 S 1--1 (15) 0 1--1 (63)
205 f80[9] = toUChar( (sign << 7) | 0x7F );
206 f80[8] = 0xFF;
207 f80[7] = 0x7F;
208 f80[6] = f80[5] = f80[4] = f80[3]
209 = f80[2] = f80[1] = f80[0] = 0xFF;
211 return;
214 /* It's not a zero, denormal, infinity or nan. So it must be a
215 normalised number. Rebias the exponent and build the new
216 number. */
217 bexp += (16383 - 1023);
219 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
220 f80[8] = toUChar( bexp & 0xFF );
221 f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
222 | ((f64[5] >> 5) & 7) );
223 f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
224 f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
225 f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
226 f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
227 f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
228 f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
229 f80[0] = toUChar( 0 );
233 /* Convert a x87 extended double (80-bit) into an IEEE 754 double
234 (64-bit), mimicking the hardware fairly closely. Both numbers are
235 stored little-endian. Limitations, both of which could be fixed,
236 given some level of hassle:
238 * Rounding following truncation could be a bit better.
240 * Identity of NaNs is not preserved.
242 See comments in the code for more details.
244 static void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
246 Bool isInf;
247 Int bexp, i, j;
248 UChar sign;
250 sign = toUChar((f80[9] >> 7) & 1);
251 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
252 bexp &= 0x7FFF;
254 /* If the exponent is zero, either we have a zero or a denormal.
255 But an extended precision denormal becomes a double precision
256 zero, so in either case, just produce the appropriately signed
257 zero. */
258 if (bexp == 0) {
259 f64[7] = toUChar(sign << 7);
260 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
261 return;
264 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
265 QNaN, as determined by examining bits 62:0, thus:
266 0 ... 0 Inf
267 0X ... X SNaN
268 1X ... X QNaN
269 where at least one of the Xs is not zero.
271 if (bexp == 0x7FFF) {
272 isInf = toBool(
273 (f80[7] & 0x7F) == 0
274 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
275 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
276 && f80[0] == 0
278 if (isInf) {
279 if (0 == (f80[7] & 0x80))
280 goto wierd_NaN;
281 /* Produce an appropriately signed infinity:
282 S 1--1 (11) 0--0 (52)
284 f64[7] = toUChar((sign << 7) | 0x7F);
285 f64[6] = 0xF0;
286 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
287 return;
289 /* So it's either a QNaN or SNaN. Distinguish by considering
290 bit 62. Note, this destroys all the trailing bits
291 (identity?) of the NaN. IEEE754 doesn't require preserving
292 these (it only requires that there be one QNaN value and one
293 SNaN value), but x87 does seem to have some ability to
294 preserve them. Anyway, here, the NaN's identity is
295 destroyed. Could be improved. */
296 if (f80[8] & 0x40) {
297 /* QNaN. Make a QNaN:
298 S 1--1 (11) 1 1--1 (51)
300 f64[7] = toUChar((sign << 7) | 0x7F);
301 f64[6] = 0xFF;
302 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
303 } else {
304 /* SNaN. Make a SNaN:
305 S 1--1 (11) 0 1--1 (51)
307 f64[7] = toUChar((sign << 7) | 0x7F);
308 f64[6] = 0xF7;
309 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
311 return;
314 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
315 zero, the x87 FPU appears to consider the number denormalised
316 and converts it to a QNaN. */
317 if (0 == (f80[7] & 0x80)) {
318 wierd_NaN:
319 /* Strange hardware QNaN:
320 S 1--1 (11) 1 0--0 (51)
322 /* On a PIII, these QNaNs always appear with sign==1. I have
323 no idea why. */
324 f64[7] = (1 /*sign*/ << 7) | 0x7F;
325 f64[6] = 0xF8;
326 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
327 return;
330 /* It's not a zero, denormal, infinity or nan. So it must be a
331 normalised number. Rebias the exponent and consider. */
332 bexp -= (16383 - 1023);
333 if (bexp >= 0x7FF) {
334 /* It's too big for a double. Construct an infinity. */
335 f64[7] = toUChar((sign << 7) | 0x7F);
336 f64[6] = 0xF0;
337 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
338 return;
341 if (bexp <= 0) {
342 /* It's too small for a normalised double. First construct a
343 zero and then see if it can be improved into a denormal. */
344 f64[7] = toUChar(sign << 7);
345 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
347 if (bexp < -52)
348 /* Too small even for a denormal. */
349 return;
351 /* Ok, let's make a denormal. Note, this is SLOW. */
352 /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
353 indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
354 /* bexp is in range -52 .. 0 inclusive */
355 for (i = 63; i >= 0; i--) {
356 j = i - 12 + bexp;
357 if (j < 0) break;
358 /* We shouldn't really call vassert from generated code. */
359 assert(j >= 0 && j < 52);
360 write_bit_array ( f64,
362 read_bit_array ( f80, i ) );
364 /* and now we might have to round ... */
365 if (read_bit_array(f80, 10+1 - bexp) == 1)
366 goto do_rounding;
368 return;
371 /* Ok, it's a normalised number which is representable as a double.
372 Copy the exponent and mantissa into place. */
374 for (i = 0; i < 52; i++)
375 write_bit_array ( f64,
377 read_bit_array ( f80, i+11 ) );
379 f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
380 f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
381 f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
382 f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
383 f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
384 f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
386 f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
388 f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
390 /* Now consider any rounding that needs to happen as a result of
391 truncating the mantissa. */
392 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
394 /* If the bottom bits of f80 are "100 0000 0000", then the
395 infinitely precise value is deemed to be mid-way between the
396 two closest representable values. Since we're doing
397 round-to-nearest (the default mode), in that case it is the
398 bit immediately above which indicates whether we should round
399 upwards or not -- if 0, we don't. All that is encapsulated
400 in the following simple test. */
401 if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
402 return;
404 do_rounding:
405 /* Round upwards. This is a kludge. Once in every 2^24
406 roundings (statistically) the bottom three bytes are all 0xFF
407 and so we don't round at all. Could be improved. */
408 if (f64[0] != 0xFF) {
409 f64[0]++;
411 else
412 if (f64[0] == 0xFF && f64[1] != 0xFF) {
413 f64[0] = 0;
414 f64[1]++;
416 else
417 if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
418 f64[0] = 0;
419 f64[1] = 0;
420 f64[2]++;
422 /* else we don't round, but we should. */
427 #ifndef USED_AS_INCLUDE
429 //////////////
431 static void show_f80 ( UChar* f80 )
433 Int i;
434 printf("%d ", read_bit_array(f80, 79));
436 for (i = 78; i >= 64; i--)
437 printf("%d", read_bit_array(f80, i));
439 printf(" %d ", read_bit_array(f80, 63));
441 for (i = 62; i >= 0; i--)
442 printf("%d", read_bit_array(f80, i));
445 static void show_f64le ( UChar* f64 )
447 Int i;
448 printf("%d ", read_bit_array(f64, 63));
450 for (i = 62; i >= 52; i--)
451 printf("%d", read_bit_array(f64, i));
453 printf(" ");
454 for (i = 51; i >= 0; i--)
455 printf("%d", read_bit_array(f64, i));
458 //////////////
461 /* Convert f80 to a 64-bit IEEE double using both the hardware and the
462 soft version, and compare the results. If they differ, print
463 details and return 1. If they are identical, return 0.
465 int do_80_to_64_test ( Int test_no, UChar* f80, UChar* f64h, UChar* f64s)
467 Char buf64s[100], buf64h[100];
468 Bool same;
469 Int k;
470 convert_f80le_to_f64le_HW(f80, f64h);
471 convert_f80le_to_f64le(f80, f64s);
472 same = True;
473 for (k = 0; k < 8; k++) {
474 if (f64s[k] != f64h[k]) {
475 same = False; break;
478 /* bitwise identical */
479 if (same)
480 return 0;
482 sprintf(buf64s, "%.16e", *(double*)f64s);
483 sprintf(buf64h, "%.16e", *(double*)f64h);
485 /* Not bitwise identical, but pretty darn close */
486 if (0 == strcmp(buf64s, buf64h))
487 return 0;
489 printf("\n");
490 printf("f80: "); show_f80(f80); printf("\n");
491 printf("f64h: "); show_f64le(f64h); printf("\n");
492 printf("f64s: "); show_f64le(f64s); printf("\n");
494 printf("[test %d] %.16Le -> (hw %s, sw %s)\n",
495 test_no, *(long double*)f80,
496 buf64h, buf64s );
498 return 1;
502 /* Convert an IEEE 64-bit double to a x87 extended double (80 bit)
503 using both the hardware and the soft version, and compare the
504 results. If they differ, print details and return 1. If they are
505 identical, return 0.
507 int do_64_to_80_test ( Int test_no, UChar* f64, UChar* f80h, UChar* f80s)
509 Char buf80s[100], buf80h[100];
510 Bool same;
511 Int k;
512 convert_f64le_to_f80le_HW(f64, f80h);
513 convert_f64le_to_f80le(f64, f80s);
514 same = True;
515 for (k = 0; k < 10; k++) {
516 if (f80s[k] != f80h[k]) {
517 same = False; break;
520 /* bitwise identical */
521 if (same)
522 return 0;
524 sprintf(buf80s, "%.20Le", *(long double*)f80s);
525 sprintf(buf80h, "%.20Le", *(long double*)f80h);
527 /* Not bitwise identical, but pretty darn close */
528 if (0 == strcmp(buf80s, buf80h))
529 return 0;
531 printf("\n");
532 printf("f64: "); show_f64le(f64); printf("\n");
533 printf("f80h: "); show_f80(f80h); printf("\n");
534 printf("f80s: "); show_f80(f80s); printf("\n");
536 printf("[test %d] %.16e -> (hw %s, sw %s)\n",
537 test_no, *(double*)f64,
538 buf80h, buf80s );
540 return 1;
545 void do_80_to_64_tests ( void )
547 UInt b9,b8,b7,i, j;
548 Int fails=0, tests=0;
549 UChar* f64h = malloc(8);
550 UChar* f64s = malloc(8);
551 UChar* f80 = malloc(10);
552 int STEP = 1;
554 srandom(4343);
556 /* Ten million random bit patterns */
557 for (i = 0; i < 10000000; i++) {
558 tests++;
559 for (j = 0; j < 10; j++)
560 f80[j] = (random() >> 7) & 255;
562 fails += do_80_to_64_test(tests, f80, f64h, f64s);
565 /* 2^24 numbers in which the first 24 bits are tested exhaustively
566 -- this covers the sign, exponent and leading part of the
567 mantissa. */
568 for (b9 = 0; b9 < 256; b9 += STEP) {
569 for (b8 = 0; b8 < 256; b8 += STEP) {
570 for (b7 = 0; b7 < 256; b7 += STEP) {
571 tests++;
572 for (i = 0; i < 10; i++)
573 f80[i] = 0;
574 for (i = 0; i < 8; i++)
575 f64h[i] = f64s[i] = 0;
576 f80[9] = b9;
577 f80[8] = b8;
578 f80[7] = b7;
580 fails += do_80_to_64_test(tests, f80, f64h, f64s);
583 printf("\n80 -> 64: %d tests, %d fails\n\n", tests, fails);
587 void do_64_to_80_tests ( void )
589 UInt b7,b6,b5,i, j;
590 Int fails=0, tests=0;
591 UChar* f80h = malloc(10);
592 UChar* f80s = malloc(10);
593 UChar* f64 = malloc(8);
594 int STEP = 1;
596 srandom(2323);
598 /* Ten million random bit patterns */
599 for (i = 0; i < 10000000; i++) {
600 tests++;
601 for (j = 0; j < 8; j++)
602 f64[j] = (random() >> 13) & 255;
604 fails += do_64_to_80_test(tests, f64, f80h, f80s);
607 /* 2^24 numbers in which the first 24 bits are tested exhaustively
608 -- this covers the sign, exponent and leading part of the
609 mantissa. */
610 for (b7 = 0; b7 < 256; b7 += STEP) {
611 for (b6 = 0; b6 < 256; b6 += STEP) {
612 for (b5 = 0; b5 < 256; b5 += STEP) {
613 tests++;
614 for (i = 0; i < 8; i++)
615 f64[i] = 0;
616 for (i = 0; i < 10; i++)
617 f80h[i] = f80s[i] = 0;
618 f64[7] = b7;
619 f64[6] = b6;
620 f64[5] = b5;
622 fails += do_64_to_80_test(tests, f64, f80h, f80s);
625 printf("\n64 -> 80: %d tests, %d fails\n\n", tests, fails);
629 int main ( void )
631 do_80_to_64_tests();
632 do_64_to_80_tests();
633 return 0;
636 #endif /* ndef USED_AS_INCLUDE */