1 # Copyright 2010 Luca Barbieri
3 # Permission is hereby granted, free of charge, to any person obtaining
4 # a copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish,
7 # distribute, sublicense, and/or sell copies of the Software, and to
8 # permit persons to whom the Software is furnished to do so, subject to
9 # the following conditions:
11 # The above copyright notice and this permission notice (including the
12 # next paragraph) shall be included in all copies or substantial
13 # portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 # IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
19 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 # *************************************************************************
25 # The code is a reimplementation of the algorithm in
26 # www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
27 # "Fast Half Float Conversions" by Jeroen van der Zijp, Nov 2008
29 # The table contents have been slightly changed so that the exponent
30 # bias is now in the exponent table instead of the mantissa table (mostly
31 # for cosmetic reasons, and because it theoretically allows a variant
32 # that flushes denormal to zero but uses a mantissa table with 24-bit
35 # The tables are also constructed slightly differently.
38 # Note that using a 64K * 4 table is a terrible idea since it will not fit
39 # in the L1 cache and will massively pollute the L2 cache as well
41 # These should instead fit in the L1 cache.
43 # TODO: we could use a denormal bias table instead of the mantissa/offset
44 # tables: this would reduce the L1 cache usage from 8704 to 2304 bytes
45 # but would involve more computation
47 # Note however that if denormals are never encountered, the L1 cache usage
48 # is only about 4608 bytes anyway.
59 print "const " + t
+ " " + n
+ "[" + str(l
) + "] = {"
64 print "\t" + hex(v
) + ","
70 assert table_index
== table_length
72 print "/* This file is autogenerated by u_half.py. Do not edit directly. */"
73 print "#include \"util/u_half.h\""
75 begin("uint32_t", "util_half_to_float_mantissa_table", 2048)
80 for i
in xrange(1, 1024):
85 while (m
& 0x00800000) == 0:
94 for i
in xrange(1024, 2048):
95 value((i
- 1024) << 13)
98 begin("uint32_t", "util_half_to_float_exponent_table", 64)
99 # positive zero or denormals
103 for i
in xrange(1, 31):
104 value(0x38000000 + (i
<< 23))
106 # positive infinity/NaN
109 # negative zero or denormals
113 for i
in range(33, 63):
114 value(0xb8000000 + ((i
- 32) << 23))
116 # negative infinity/NaN
120 begin("uint32_t", "util_half_to_float_offset_table", 64)
121 # positive zero or denormals
125 for i
in range(1, 32):
128 # negative zero or denormals
132 for i
in xrange(33, 64):
136 begin("uint16_t", "util_float_to_half_base_table", 512)
137 for sign
in (0, 0x8000):
138 # very small numbers mapping to zero
139 for i
in xrange(-127, -24):
142 # small numbers mapping to denormals
143 for i
in xrange(-24, -14):
144 value(sign |
(0x400 >> (-14 -i
)))
147 for i
in xrange(-14, 16):
148 value(sign |
((i
+ 15) << 10))
150 # large numbers mapping to infinity
151 for i
in xrange(16, 128):
158 begin("uint8_t", "util_float_to_half_shift_table", 512)
159 for sign
in (0, 0x8000):
160 # very small numbers mapping to zero
161 for i
in xrange(-127, -24):
164 # small numbers mapping to denormals
165 for i
in xrange(-24, -14):
169 for i
in xrange(-14, 16):
172 # large numbers mapping to infinity
173 for i
in xrange(16, 128):