3 ===============================================================================
5 This C header file is part of the SoftFloat IEC/IEEE Floating-point
6 Arithmetic Package, Release 2.
8 Written by John R. Hauser. This work was made possible in part by the
9 International Computer Science Institute, located at Suite 600, 1947 Center
10 Street, Berkeley, California 94704. Funding was partially provided by the
11 National Science Foundation under grant MIP-9311980. The original version
12 of this code was written as part of a project to build a fixed-point vector
13 processor in collaboration with the University of California at Berkeley,
14 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
15 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
16 arithmetic/softfloat.html'.
18 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
19 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
20 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
21 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
22 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) they include prominent notice that the work is derivative, and (2) they
26 include prominent notice akin to these three paragraphs for those parts of
27 this code that are retained.
29 ===============================================================================
32 #ifndef __SOFTFLOAT_H__
33 #define __SOFTFLOAT_H__
36 -------------------------------------------------------------------------------
37 The macro `FLOATX80' must be defined to enable the extended double-precision
38 floating-point format `floatx80'. If this macro is not defined, the
39 `floatx80' type will not be defined, and none of the functions that either
40 input or output the `floatx80' type will be defined. The same applies to
41 the `FLOAT128' macro and the quadruple-precision format `float128'.
42 -------------------------------------------------------------------------------
45 /* #define FLOAT128 */
48 -------------------------------------------------------------------------------
49 Software IEC/IEEE floating-point types.
50 -------------------------------------------------------------------------------
52 typedef unsigned long int float32
;
53 typedef unsigned long long float64
;
57 unsigned long long low
;
62 unsigned long long high
, low
;
67 -------------------------------------------------------------------------------
68 Software IEC/IEEE floating-point underflow tininess-detection mode.
69 -------------------------------------------------------------------------------
71 extern signed char float_detect_tininess
;
73 float_tininess_after_rounding
= 0,
74 float_tininess_before_rounding
= 1
78 -------------------------------------------------------------------------------
79 Software IEC/IEEE floating-point rounding mode.
80 -------------------------------------------------------------------------------
82 extern signed char float_rounding_mode
;
84 float_round_nearest_even
= 0,
85 float_round_to_zero
= 1,
91 -------------------------------------------------------------------------------
92 Software IEC/IEEE floating-point exception flags.
93 -------------------------------------------------------------------------------
94 extern signed char float_exception_flags;
96 float_flag_inexact = 1,
97 float_flag_underflow = 2,
98 float_flag_overflow = 4,
99 float_flag_divbyzero = 8,
100 float_flag_invalid = 16
103 ScottB: November 4, 1998
104 Changed the enumeration to match the bit order in the FPA11.
107 extern signed char float_exception_flags
;
109 float_flag_invalid
= 1,
110 float_flag_divbyzero
= 2,
111 float_flag_overflow
= 4,
112 float_flag_underflow
= 8,
113 float_flag_inexact
= 16
117 -------------------------------------------------------------------------------
118 Routine to raise any or all of the software IEC/IEEE floating-point
120 -------------------------------------------------------------------------------
122 void float_raise( signed char );
125 -------------------------------------------------------------------------------
126 Software IEC/IEEE integer-to-floating-point conversion routines.
127 -------------------------------------------------------------------------------
129 float32
int32_to_float32( signed int );
130 float64
int32_to_float64( signed int );
132 floatx80
int32_to_floatx80( signed int );
135 float128
int32_to_float128( signed int );
139 -------------------------------------------------------------------------------
140 Software IEC/IEEE single-precision conversion routines.
141 -------------------------------------------------------------------------------
143 signed int float32_to_int32( float32
);
144 signed int float32_to_int32_round_to_zero( float32
);
145 float64
float32_to_float64( float32
);
147 floatx80
float32_to_floatx80( float32
);
150 float128
float32_to_float128( float32
);
154 -------------------------------------------------------------------------------
155 Software IEC/IEEE single-precision operations.
156 -------------------------------------------------------------------------------
158 float32
float32_round_to_int( float32
);
159 float32
float32_add( float32
, float32
);
160 float32
float32_sub( float32
, float32
);
161 float32
float32_mul( float32
, float32
);
162 float32
float32_div( float32
, float32
);
163 float32
float32_rem( float32
, float32
);
164 float32
float32_sqrt( float32
);
165 char float32_eq( float32
, float32
);
166 char float32_le( float32
, float32
);
167 char float32_lt( float32
, float32
);
168 char float32_eq_signaling( float32
, float32
);
169 char float32_le_quiet( float32
, float32
);
170 char float32_lt_quiet( float32
, float32
);
171 char float32_is_signaling_nan( float32
);
174 -------------------------------------------------------------------------------
175 Software IEC/IEEE double-precision conversion routines.
176 -------------------------------------------------------------------------------
178 signed int float64_to_int32( float64
);
179 signed int float64_to_int32_round_to_zero( float64
);
180 float32
float64_to_float32( float64
);
182 floatx80
float64_to_floatx80( float64
);
185 float128
float64_to_float128( float64
);
189 -------------------------------------------------------------------------------
190 Software IEC/IEEE double-precision operations.
191 -------------------------------------------------------------------------------
193 float64
float64_round_to_int( float64
);
194 float64
float64_add( float64
, float64
);
195 float64
float64_sub( float64
, float64
);
196 float64
float64_mul( float64
, float64
);
197 float64
float64_div( float64
, float64
);
198 float64
float64_rem( float64
, float64
);
199 float64
float64_sqrt( float64
);
200 char float64_eq( float64
, float64
);
201 char float64_le( float64
, float64
);
202 char float64_lt( float64
, float64
);
203 char float64_eq_signaling( float64
, float64
);
204 char float64_le_quiet( float64
, float64
);
205 char float64_lt_quiet( float64
, float64
);
206 char float64_is_signaling_nan( float64
);
211 -------------------------------------------------------------------------------
212 Software IEC/IEEE extended double-precision conversion routines.
213 -------------------------------------------------------------------------------
215 signed int floatx80_to_int32( floatx80
);
216 signed int floatx80_to_int32_round_to_zero( floatx80
);
217 float32
floatx80_to_float32( floatx80
);
218 float64
floatx80_to_float64( floatx80
);
220 float128
floatx80_to_float128( floatx80
);
224 -------------------------------------------------------------------------------
225 Software IEC/IEEE extended double-precision rounding precision. Valid
226 values are 32, 64, and 80.
227 -------------------------------------------------------------------------------
229 extern signed char floatx80_rounding_precision
;
232 -------------------------------------------------------------------------------
233 Software IEC/IEEE extended double-precision operations.
234 -------------------------------------------------------------------------------
236 floatx80
floatx80_round_to_int( floatx80
);
237 floatx80
floatx80_add( floatx80
, floatx80
);
238 floatx80
floatx80_sub( floatx80
, floatx80
);
239 floatx80
floatx80_mul( floatx80
, floatx80
);
240 floatx80
floatx80_div( floatx80
, floatx80
);
241 floatx80
floatx80_rem( floatx80
, floatx80
);
242 floatx80
floatx80_sqrt( floatx80
);
243 char floatx80_eq( floatx80
, floatx80
);
244 char floatx80_le( floatx80
, floatx80
);
245 char floatx80_lt( floatx80
, floatx80
);
246 char floatx80_eq_signaling( floatx80
, floatx80
);
247 char floatx80_le_quiet( floatx80
, floatx80
);
248 char floatx80_lt_quiet( floatx80
, floatx80
);
249 char floatx80_is_signaling_nan( floatx80
);
256 -------------------------------------------------------------------------------
257 Software IEC/IEEE quadruple-precision conversion routines.
258 -------------------------------------------------------------------------------
260 signed int float128_to_int32( float128
);
261 signed int float128_to_int32_round_to_zero( float128
);
262 float32
float128_to_float32( float128
);
263 float64
float128_to_float64( float128
);
265 floatx80
float128_to_floatx80( float128
);
269 -------------------------------------------------------------------------------
270 Software IEC/IEEE quadruple-precision operations.
271 -------------------------------------------------------------------------------
273 float128
float128_round_to_int( float128
);
274 float128
float128_add( float128
, float128
);
275 float128
float128_sub( float128
, float128
);
276 float128
float128_mul( float128
, float128
);
277 float128
float128_div( float128
, float128
);
278 float128
float128_rem( float128
, float128
);
279 float128
float128_sqrt( float128
);
280 char float128_eq( float128
, float128
);
281 char float128_le( float128
, float128
);
282 char float128_lt( float128
, float128
);
283 char float128_eq_signaling( float128
, float128
);
284 char float128_le_quiet( float128
, float128
);
285 char float128_lt_quiet( float128
, float128
);
286 char float128_is_signaling_nan( float128
);