1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H
10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/common.h"
19 namespace __llvm_libc
{
22 // A class which stores the normalized form of a floating point value.
23 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
24 // are not handled by this class.
26 // A normalized floating point number is of this form:
27 // (-1)*sign * 2^exponent * <mantissa>
28 // where <mantissa> is of the form 1.<...>.
29 template <typename T
> struct NormalFloat
{
31 cpp::is_floating_point_v
<T
>,
32 "NormalFloat template parameter has to be a floating point type.");
34 using UIntType
= typename FPBits
<T
>::UIntType
;
35 static constexpr UIntType ONE
= (UIntType(1) << MantissaWidth
<T
>::VALUE
);
37 // Unbiased exponent value.
41 // We want |UIntType| to have atleast one bit more than the actual mantissa
42 // bit width to accommodate the implicit 1 value.
43 static_assert(sizeof(UIntType
) * 8 >= MantissaWidth
<T
>::VALUE
+ 1,
44 "Bad type for mantissa in NormalFloat.");
48 LIBC_INLINE
NormalFloat(int32_t e
, UIntType m
, bool s
)
49 : exponent(e
), mantissa(m
), sign(s
) {
53 unsigned normalization_shift
= evaluate_normalization_shift(mantissa
);
54 mantissa
= mantissa
<< normalization_shift
;
55 exponent
-= normalization_shift
;
58 LIBC_INLINE
explicit NormalFloat(T x
) { init_from_bits(FPBits
<T
>(x
)); }
60 LIBC_INLINE
explicit NormalFloat(FPBits
<T
> bits
) { init_from_bits(bits
); }
62 // Compares this normalized number with another normalized number.
63 // Returns -1 is this number is less than |other|, 0 if this number is equal
64 // to |other|, and 1 if this number is greater than |other|.
65 LIBC_INLINE
int cmp(const NormalFloat
<T
> &other
) const {
66 if (sign
!= other
.sign
)
69 if (exponent
> other
.exponent
) {
71 } else if (exponent
== other
.exponent
) {
72 if (mantissa
> other
.mantissa
)
74 else if (mantissa
== other
.mantissa
)
83 // Returns a new normalized floating point number which is equal in value
84 // to this number multiplied by 2^e. That is:
86 LIBC_INLINE NormalFloat
<T
> mul2(int e
) const {
87 NormalFloat
<T
> result
= *this;
92 LIBC_INLINE
operator T() const {
93 int biased_exponent
= exponent
+ FPBits
<T
>::EXPONENT_BIAS
;
94 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
95 constexpr int MAX_EXPONENT_VALUE
= (1 << ExponentWidth
<T
>::VALUE
) - 2;
96 if (biased_exponent
> MAX_EXPONENT_VALUE
) {
97 return sign
? T(FPBits
<T
>::neg_inf()) : T(FPBits
<T
>::inf());
100 FPBits
<T
> result(T(0.0));
101 result
.set_sign(sign
);
103 constexpr int SUBNORMAL_EXPONENT
= -FPBits
<T
>::EXPONENT_BIAS
+ 1;
104 if (exponent
< SUBNORMAL_EXPONENT
) {
105 unsigned shift
= SUBNORMAL_EXPONENT
- exponent
;
106 // Since exponent > subnormalExponent, shift is strictly greater than
108 if (shift
<= MantissaWidth
<T
>::VALUE
+ 1) {
109 // Generate a subnormal number. Might lead to loss of precision.
110 // We round to nearest and round halfway cases to even.
111 const UIntType shift_out_mask
= (UIntType(1) << shift
) - 1;
112 const UIntType shift_out_value
= mantissa
& shift_out_mask
;
113 const UIntType halfway_value
= UIntType(1) << (shift
- 1);
114 result
.set_unbiased_exponent(0);
115 result
.set_mantissa(mantissa
>> shift
);
116 UIntType new_mantissa
= result
.get_mantissa();
117 if (shift_out_value
> halfway_value
) {
119 } else if (shift_out_value
== halfway_value
) {
121 if (result
.get_mantissa() & 0x1)
124 result
.set_mantissa(new_mantissa
);
125 // Adding 1 to mantissa can lead to overflow. This can only happen if
126 // mantissa was all ones (0b111..11). For such a case, we will carry
127 // the overflow into the exponent.
128 if (new_mantissa
== ONE
)
129 result
.set_unbiased_exponent(1);
136 result
.set_unbiased_exponent(exponent
+ FPBits
<T
>::EXPONENT_BIAS
);
137 result
.set_mantissa(mantissa
);
142 LIBC_INLINE
void init_from_bits(FPBits
<T
> bits
) {
143 sign
= bits
.get_sign();
145 if (bits
.is_inf_or_nan() || bits
.is_zero()) {
146 // Ignore special bit patterns. Implementations deal with them separately
147 // anyway so this should not be a problem.
153 // Normalize subnormal numbers.
154 if (bits
.get_unbiased_exponent() == 0) {
155 unsigned shift
= evaluate_normalization_shift(bits
.get_mantissa());
156 mantissa
= UIntType(bits
.get_mantissa()) << shift
;
157 exponent
= 1 - FPBits
<T
>::EXPONENT_BIAS
- shift
;
159 exponent
= bits
.get_unbiased_exponent() - FPBits
<T
>::EXPONENT_BIAS
;
160 mantissa
= ONE
| bits
.get_mantissa();
164 LIBC_INLINE
unsigned evaluate_normalization_shift(UIntType m
) {
166 for (; (ONE
& m
) == 0 && (shift
< MantissaWidth
<T
>::VALUE
);
173 #ifdef SPECIAL_X86_LONG_DOUBLE
176 NormalFloat
<long double>::init_from_bits(FPBits
<long double> bits
) {
177 sign
= bits
.get_sign();
179 if (bits
.is_inf_or_nan() || bits
.is_zero()) {
180 // Ignore special bit patterns. Implementations deal with them separately
181 // anyway so this should not be a problem.
187 if (bits
.get_unbiased_exponent() == 0) {
188 if (bits
.get_implicit_bit() == 0) {
189 // Since we ignore zero value, the mantissa in this case is non-zero.
190 int normalization_shift
=
191 evaluate_normalization_shift(bits
.get_mantissa());
192 exponent
= -16382 - normalization_shift
;
193 mantissa
= (bits
.get_mantissa() << normalization_shift
);
196 mantissa
= ONE
| bits
.get_mantissa();
199 if (bits
.get_implicit_bit() == 0) {
200 // Invalid number so just store 0 similar to a NaN.
204 exponent
= bits
.get_unbiased_exponent() - 16383;
205 mantissa
= ONE
| bits
.get_mantissa();
210 template <> LIBC_INLINE NormalFloat
<long double>::operator long double() const {
211 int biased_exponent
= exponent
+ FPBits
<long double>::EXPONENT_BIAS
;
212 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
213 constexpr int MAX_EXPONENT_VALUE
=
214 (1 << ExponentWidth
<long double>::VALUE
) - 2;
215 if (biased_exponent
> MAX_EXPONENT_VALUE
) {
216 return sign
? FPBits
<long double>::neg_inf() : FPBits
<long double>::inf();
219 FPBits
<long double> result(0.0l);
220 result
.set_sign(sign
);
222 constexpr int SUBNORMAL_EXPONENT
= -FPBits
<long double>::EXPONENT_BIAS
+ 1;
223 if (exponent
< SUBNORMAL_EXPONENT
) {
224 unsigned shift
= SUBNORMAL_EXPONENT
- exponent
;
225 if (shift
<= MantissaWidth
<long double>::VALUE
+ 1) {
226 // Generate a subnormal number. Might lead to loss of precision.
227 // We round to nearest and round halfway cases to even.
228 const UIntType shift_out_mask
= (UIntType(1) << shift
) - 1;
229 const UIntType shift_out_value
= mantissa
& shift_out_mask
;
230 const UIntType halfway_value
= UIntType(1) << (shift
- 1);
231 result
.set_unbiased_exponent(0);
232 result
.set_mantissa(mantissa
>> shift
);
233 UIntType new_mantissa
= result
.get_mantissa();
234 if (shift_out_value
> halfway_value
) {
236 } else if (shift_out_value
== halfway_value
) {
238 if (result
.get_mantissa() & 0x1)
241 result
.set_mantissa(new_mantissa
);
242 // Adding 1 to mantissa can lead to overflow. This can only happen if
243 // mantissa was all ones (0b111..11). For such a case, we will carry
244 // the overflow into the exponent and set the implicit bit to 1.
245 if (new_mantissa
== ONE
) {
246 result
.set_unbiased_exponent(1);
247 result
.set_implicit_bit(1);
249 result
.set_implicit_bit(0);
251 return static_cast<long double>(result
);
253 return static_cast<long double>(result
);
257 result
.set_unbiased_exponent(biased_exponent
);
258 result
.set_mantissa(mantissa
);
259 result
.set_implicit_bit(1);
260 return static_cast<long double>(result
);
262 #endif // SPECIAL_X86_LONG_DOUBLE
264 } // namespace fputil
265 } // namespace __llvm_libc
267 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H