2 * Test Floating Point Conversion
5 /* we want additional float type definitions */
6 #define __STDC_WANT_IEC_60559_BFP_EXT__
7 #define __STDC_WANT_IEC_60559_TYPES_EXT__
15 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
17 static char flag_str
[256];
19 static char *get_flag_state(int flags
)
22 snprintf(flag_str
, sizeof(flag_str
), "%s %s %s %s %s",
23 flags
& FE_OVERFLOW
? "OVERFLOW" : "",
24 flags
& FE_UNDERFLOW
? "UNDERFLOW" : "",
25 flags
& FE_DIVBYZERO
? "DIV0" : "",
26 flags
& FE_INEXACT
? "INEXACT" : "",
27 flags
& FE_INVALID
? "INVALID" : "");
29 snprintf(flag_str
, sizeof(flag_str
), "OK");
35 static void print_double_number(int i
, double num
)
37 uint64_t double_as_hex
= *(uint64_t *) &num
;
38 int flags
= fetestexcept(FE_ALL_EXCEPT
);
39 char *fstr
= get_flag_state(flags
);
41 printf("%02d DOUBLE: %02.20e / %#020" PRIx64
" (%#x => %s)\n",
42 i
, num
, double_as_hex
, flags
, fstr
);
45 static void print_single_number(int i
, float num
)
47 uint32_t single_as_hex
= *(uint32_t *) &num
;
48 int flags
= fetestexcept(FE_ALL_EXCEPT
);
49 char *fstr
= get_flag_state(flags
);
51 printf("%02d SINGLE: %02.20e / %#010x (%#x => %s)\n",
52 i
, num
, single_as_hex
, flags
, fstr
);
55 static void print_half_number(int i
, uint16_t num
)
57 int flags
= fetestexcept(FE_ALL_EXCEPT
);
58 char *fstr
= get_flag_state(flags
);
60 printf("%02d HALF: %#04x (%#x => %s)\n",
64 static void print_int64(int i
, int64_t num
)
66 uint64_t int64_as_hex
= *(uint64_t *) &num
;
67 int flags
= fetestexcept(FE_ALL_EXCEPT
);
68 char *fstr
= get_flag_state(flags
);
70 printf("%02d INT64: %20" PRId64
"/%#020" PRIx64
" (%#x => %s)\n",
71 i
, num
, int64_as_hex
, flags
, fstr
);
75 /* Signaling NaN macros, if supported. */
76 # if __GNUC_PREREQ(3, 3)
77 # define SNANF (__builtin_nansf (""))
78 # define SNAN (__builtin_nans (""))
79 # define SNANL (__builtin_nansl (""))
83 float single_numbers
[] = { -SNANF
,
95 5.96046E-8, /* min positive FP16 subnormal */
96 6.09756E-5, /* max subnormal FP16 */
97 6.10352E-5, /* min positive normal FP16 */
99 1.0009765625, /* smallest float after 1.0 FP16 */
103 65504.0, /* max FP16 */
106 131008.0, /* max AFP */
114 static void convert_single_to_half(void)
118 printf("Converting single-precision to half-precision\n");
120 for (i
= 0; i
< ARRAY_SIZE(single_numbers
); ++i
) {
121 float input
= single_numbers
[i
];
123 feclearexcept(FE_ALL_EXCEPT
);
125 print_single_number(i
, input
);
128 asm("vcvtb.f16.f32 %0, %1" : "=t" (output
) : "x" (input
));
131 asm("fcvt %h0, %s1" : "=w" (output
) : "x" (input
));
133 print_half_number(i
, output
);
137 static void convert_single_to_double(void)
141 printf("Converting single-precision to double-precision\n");
143 for (i
= 0; i
< ARRAY_SIZE(single_numbers
); ++i
) {
144 float input
= single_numbers
[i
];
145 /* uint64_t output; */
148 feclearexcept(FE_ALL_EXCEPT
);
150 print_single_number(i
, input
);
152 asm("vcvt.f64.f32 %P0, %1" : "=w" (output
) : "t" (input
));
154 asm("fcvt %d0, %s1" : "=w" (output
) : "x" (input
));
156 print_double_number(i
, output
);
160 static void convert_single_to_integer(void)
164 printf("Converting single-precision to integer\n");
166 for (i
= 0; i
< ARRAY_SIZE(single_numbers
); ++i
) {
167 float input
= single_numbers
[i
];
170 feclearexcept(FE_ALL_EXCEPT
);
172 print_single_number(i
, input
);
174 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
177 asm("fcvtzs %0, %s1" : "=r" (output
) : "w" (input
));
179 print_int64(i
, output
);
183 /* This allows us to initialise some doubles as pure hex */
189 test_doubles double_numbers
[] = {
197 {-1.111E+30}, /* half prec */
204 {5.96046E-8}, /* min positive FP16 subnormal */
205 {6.09756E-5}, /* max subnormal FP16 */
206 {6.10352E-5}, /* min positive normal FP16 */
208 {1.0009765625}, /* smallest float after 1.0 FP16 */
210 {1.3789972848607228e-308},
211 {1.4914738736681624e-308},
215 {65504.0}, /* max FP16 */
218 {131008.0}, /* max AFP */
220 {.h
= 0x41dfffffffc00000 }, /* to int = 0x7fffffff */
226 {.h
= 0x7ff0000000000001}, /* SNAN */
230 static void convert_double_to_half(void)
234 printf("Converting double-precision to half-precision\n");
236 for (i
= 0; i
< ARRAY_SIZE(double_numbers
); ++i
) {
237 double input
= double_numbers
[i
].d
;
240 feclearexcept(FE_ALL_EXCEPT
);
242 print_double_number(i
, input
);
244 /* as we don't have _Float16 support */
246 /* asm("vcvtb.f16.f64 %0, %P1" : "=t" (output) : "x" (input)); */
249 asm("fcvt %h0, %d1" : "=w" (output
) : "x" (input
));
251 print_half_number(i
, output
);
255 static void convert_double_to_single(void)
259 printf("Converting double-precision to single-precision\n");
261 for (i
= 0; i
< ARRAY_SIZE(double_numbers
); ++i
) {
262 double input
= double_numbers
[i
].d
;
265 feclearexcept(FE_ALL_EXCEPT
);
267 print_double_number(i
, input
);
270 asm("vcvt.f32.f64 %0, %P1" : "=w" (output
) : "x" (input
));
272 asm("fcvt %s0, %d1" : "=w" (output
) : "x" (input
));
275 print_single_number(i
, output
);
279 static void convert_double_to_integer(void)
283 printf("Converting double-precision to integer\n");
285 for (i
= 0; i
< ARRAY_SIZE(double_numbers
); ++i
) {
286 double input
= double_numbers
[i
].d
;
289 feclearexcept(FE_ALL_EXCEPT
);
291 print_double_number(i
, input
);
293 /* asm("vcvt.s32.f32 %s0, %s1" : "=t" (output) : "t" (input)); */
296 asm("fcvtzs %0, %d1" : "=r" (output
) : "w" (input
));
298 print_int64(i
, output
);
302 /* no handy defines for these numbers */
303 uint16_t half_numbers
[] = {
304 0xffff, /* -NaN / AHP -Max */
305 0xfcff, /* -NaN / AHP */
306 0xfc01, /* -NaN / AHP */
311 0x8001, /* -MIN subnormal */
314 0x0001, /* MIN subnormal */
318 0x7c01, /* NaN / AHP */
319 0x7cff, /* NaN / AHP */
320 0x7fff, /* NaN / AHP +Max*/
323 static void convert_half_to_double(void)
327 printf("Converting half-precision to double-precision\n");
329 for (i
= 0; i
< ARRAY_SIZE(half_numbers
); ++i
) {
330 uint16_t input
= half_numbers
[i
];
333 feclearexcept(FE_ALL_EXCEPT
);
335 print_half_number(i
, input
);
337 /* asm("vcvtb.f64.f16 %P0, %1" : "=w" (output) : "t" (input)); */
340 asm("fcvt %d0, %h1" : "=w" (output
) : "x" (input
));
342 print_double_number(i
, output
);
346 static void convert_half_to_single(void)
350 printf("Converting half-precision to single-precision\n");
352 for (i
= 0; i
< ARRAY_SIZE(half_numbers
); ++i
) {
353 uint16_t input
= half_numbers
[i
];
356 feclearexcept(FE_ALL_EXCEPT
);
358 print_half_number(i
, input
);
360 asm("vcvtb.f32.f16 %0, %1" : "=w" (output
) : "x" ((uint32_t)input
));
362 asm("fcvt %s0, %h1" : "=w" (output
) : "x" (input
));
364 print_single_number(i
, output
);
368 static void convert_half_to_integer(void)
372 printf("Converting half-precision to integer\n");
374 for (i
= 0; i
< ARRAY_SIZE(half_numbers
); ++i
) {
375 uint16_t input
= half_numbers
[i
];
378 feclearexcept(FE_ALL_EXCEPT
);
380 print_half_number(i
, input
);
382 /* asm("vcvt.s32.f16 %0, %1" : "=t" (output) : "t" (input)); v8.2*/
385 asm("fcvt %s0, %h1" : "=w" (output
) : "x" (input
));
387 print_int64(i
, output
);
396 float_mapping round_flags
[] = {
397 { FE_TONEAREST
, "to nearest" },
398 { FE_UPWARD
, "upwards" },
399 { FE_DOWNWARD
, "downwards" },
400 { FE_TOWARDZERO
, "to zero" }
403 int main(int argc
, char *argv
[argc
])
407 printf("#### Enabling IEEE Half Precision\n");
409 for (i
= 0; i
< ARRAY_SIZE(round_flags
); ++i
) {
410 fesetround(round_flags
[i
].flag
);
411 printf("### Rounding %s\n", round_flags
[i
].desc
);
412 convert_single_to_half();
413 convert_single_to_double();
414 convert_double_to_half();
415 convert_double_to_single();
416 convert_half_to_single();
417 convert_half_to_double();
420 /* convert to integer */
421 convert_single_to_integer();
422 convert_double_to_integer();
423 convert_half_to_integer();
425 /* And now with ARM alternative FP16 */
427 /* See glibc sysdeps/arm/fpu_control.h */
428 asm("mrc p10, 7, r1, cr1, cr0, 0\n\t"
429 "orr r1, r1, %[flags]\n\t"
430 "mcr p10, 7, r1, cr1, cr0, 0\n\t"
431 : /* no output */ : [flags
] "n" (1 << 26) : "r1" );
433 asm("mrs x1, fpcr\n\t"
434 "orr x1, x1, %[flags]\n\t"
436 : /* no output */ : [flags
] "n" (1 << 26) : "x1" );
439 printf("#### Enabling ARM Alternative Half Precision\n");
441 for (i
= 0; i
< ARRAY_SIZE(round_flags
); ++i
) {
442 fesetround(round_flags
[i
].flag
);
443 printf("### Rounding %s\n", round_flags
[i
].desc
);
444 convert_single_to_half();
445 convert_single_to_double();
446 convert_double_to_half();
447 convert_double_to_single();
448 convert_half_to_single();
449 convert_half_to_double();
452 /* convert to integer */
453 convert_single_to_integer();
454 convert_double_to_integer();
455 convert_half_to_integer();