1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/common.h"
16 #include "src/__support/macros/config.h"
20 namespace LIBC_NAMESPACE_DECL
{
23 // A class which stores the normalized form of a floating point value.
24 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
25 // are not handled by this class.
27 // A normalized floating point number is of this form:
28 // (-1)*sign * 2^exponent * <mantissa>
29 // where <mantissa> is of the form 1.<...>.
30 template <typename T
> struct NormalFloat
{
32 cpp::is_floating_point_v
<T
>,
33 "NormalFloat template parameter has to be a floating point type.");
35 using StorageType
= typename FPBits
<T
>::StorageType
;
36 static constexpr StorageType ONE
=
37 (StorageType(1) << FPBits
<T
>::FRACTION_LEN
);
39 // Unbiased exponent value.
43 // We want |StorageType| to have atleast one bit more than the actual mantissa
44 // bit width to accommodate the implicit 1 value.
45 static_assert(sizeof(StorageType
) * 8 >= FPBits
<T
>::FRACTION_LEN
+ 1,
46 "Bad type for mantissa in NormalFloat.");
48 Sign sign
= Sign::POS
;
50 LIBC_INLINE
NormalFloat(Sign s
, int32_t e
, StorageType m
)
51 : exponent(e
), mantissa(m
), sign(s
) {
55 unsigned normalization_shift
= evaluate_normalization_shift(mantissa
);
56 mantissa
<<= normalization_shift
;
57 exponent
-= normalization_shift
;
60 LIBC_INLINE
explicit NormalFloat(T x
) { init_from_bits(FPBits
<T
>(x
)); }
62 LIBC_INLINE
explicit NormalFloat(FPBits
<T
> bits
) { init_from_bits(bits
); }
64 // Compares this normalized number with another normalized number.
65 // Returns -1 is this number is less than |other|, 0 if this number is equal
66 // to |other|, and 1 if this number is greater than |other|.
67 LIBC_INLINE
int cmp(const NormalFloat
<T
> &other
) const {
68 const int result
= sign
.is_neg() ? -1 : 1;
69 if (sign
!= other
.sign
)
72 if (exponent
> other
.exponent
) {
74 } else if (exponent
== other
.exponent
) {
75 if (mantissa
> other
.mantissa
)
77 else if (mantissa
== other
.mantissa
)
86 // Returns a new normalized floating point number which is equal in value
87 // to this number multiplied by 2^e. That is:
89 LIBC_INLINE NormalFloat
<T
> mul2(int e
) const {
90 NormalFloat
<T
> result
= *this;
95 LIBC_INLINE
operator T() const {
96 int biased_exponent
= exponent
+ FPBits
<T
>::EXP_BIAS
;
97 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
98 constexpr int MAX_EXPONENT_VALUE
= (1 << FPBits
<T
>::EXP_LEN
) - 2;
99 if (biased_exponent
> MAX_EXPONENT_VALUE
) {
100 return FPBits
<T
>::inf(sign
).get_val();
103 FPBits
<T
> result(T(0.0));
104 result
.set_sign(sign
);
106 constexpr int SUBNORMAL_EXPONENT
= -FPBits
<T
>::EXP_BIAS
+ 1;
107 if (exponent
< SUBNORMAL_EXPONENT
) {
108 unsigned shift
= SUBNORMAL_EXPONENT
- exponent
;
109 // Since exponent > subnormalExponent, shift is strictly greater than
111 if (shift
<= FPBits
<T
>::FRACTION_LEN
+ 1) {
112 // Generate a subnormal number. Might lead to loss of precision.
113 // We round to nearest and round halfway cases to even.
114 const StorageType shift_out_mask
=
115 static_cast<StorageType
>(StorageType(1) << shift
) - 1;
116 const StorageType shift_out_value
= mantissa
& shift_out_mask
;
117 const StorageType halfway_value
=
118 static_cast<StorageType
>(StorageType(1) << (shift
- 1));
119 result
.set_biased_exponent(0);
120 result
.set_mantissa(mantissa
>> shift
);
121 StorageType new_mantissa
= result
.get_mantissa();
122 if (shift_out_value
> halfway_value
) {
124 } else if (shift_out_value
== halfway_value
) {
126 if (result
.get_mantissa() & 0x1)
129 result
.set_mantissa(new_mantissa
);
130 // Adding 1 to mantissa can lead to overflow. This can only happen if
131 // mantissa was all ones (0b111..11). For such a case, we will carry
132 // the overflow into the exponent.
133 if (new_mantissa
== ONE
)
134 result
.set_biased_exponent(1);
135 return result
.get_val();
137 return result
.get_val();
141 result
.set_biased_exponent(
142 static_cast<StorageType
>(exponent
+ FPBits
<T
>::EXP_BIAS
));
143 result
.set_mantissa(mantissa
);
144 return result
.get_val();
148 LIBC_INLINE
void init_from_bits(FPBits
<T
> bits
) {
151 if (bits
.is_inf_or_nan() || bits
.is_zero()) {
152 // Ignore special bit patterns. Implementations deal with them separately
153 // anyway so this should not be a problem.
159 // Normalize subnormal numbers.
160 if (bits
.is_subnormal()) {
161 unsigned shift
= evaluate_normalization_shift(bits
.get_mantissa());
162 mantissa
= static_cast<StorageType
>(bits
.get_mantissa() << shift
);
163 exponent
= 1 - FPBits
<T
>::EXP_BIAS
- shift
;
165 exponent
= bits
.get_biased_exponent() - FPBits
<T
>::EXP_BIAS
;
166 mantissa
= ONE
| bits
.get_mantissa();
170 LIBC_INLINE
unsigned evaluate_normalization_shift(StorageType m
) {
172 for (; (ONE
& m
) == 0 && (shift
< FPBits
<T
>::FRACTION_LEN
);
179 #ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
182 NormalFloat
<long double>::init_from_bits(FPBits
<long double> bits
) {
185 if (bits
.is_inf_or_nan() || bits
.is_zero()) {
186 // Ignore special bit patterns. Implementations deal with them separately
187 // anyway so this should not be a problem.
193 if (bits
.is_subnormal()) {
194 if (bits
.get_implicit_bit() == 0) {
195 // Since we ignore zero value, the mantissa in this case is non-zero.
196 int normalization_shift
=
197 evaluate_normalization_shift(bits
.get_mantissa());
198 exponent
= -16382 - normalization_shift
;
199 mantissa
= (bits
.get_mantissa() << normalization_shift
);
202 mantissa
= ONE
| bits
.get_mantissa();
205 if (bits
.get_implicit_bit() == 0) {
206 // Invalid number so just store 0 similar to a NaN.
210 exponent
= bits
.get_biased_exponent() - 16383;
211 mantissa
= ONE
| bits
.get_mantissa();
216 template <> LIBC_INLINE NormalFloat
<long double>::operator long double() const {
217 using LDBits
= FPBits
<long double>;
218 int biased_exponent
= exponent
+ LDBits::EXP_BIAS
;
219 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
220 constexpr int MAX_EXPONENT_VALUE
= (1 << LDBits::EXP_LEN
) - 2;
221 if (biased_exponent
> MAX_EXPONENT_VALUE
) {
222 return LDBits::inf(sign
).get_val();
225 FPBits
<long double> result(0.0l);
226 result
.set_sign(sign
);
228 constexpr int SUBNORMAL_EXPONENT
= -LDBits::EXP_BIAS
+ 1;
229 if (exponent
< SUBNORMAL_EXPONENT
) {
230 unsigned shift
= SUBNORMAL_EXPONENT
- exponent
;
231 if (shift
<= LDBits::FRACTION_LEN
+ 1) {
232 // Generate a subnormal number. Might lead to loss of precision.
233 // We round to nearest and round halfway cases to even.
234 const StorageType shift_out_mask
= (StorageType(1) << shift
) - 1;
235 const StorageType shift_out_value
= mantissa
& shift_out_mask
;
236 const StorageType halfway_value
= StorageType(1) << (shift
- 1);
237 result
.set_biased_exponent(0);
238 result
.set_mantissa(mantissa
>> shift
);
239 StorageType new_mantissa
= result
.get_mantissa();
240 if (shift_out_value
> halfway_value
) {
242 } else if (shift_out_value
== halfway_value
) {
244 if (result
.get_mantissa() & 0x1)
247 result
.set_mantissa(new_mantissa
);
248 // Adding 1 to mantissa can lead to overflow. This can only happen if
249 // mantissa was all ones (0b111..11). For such a case, we will carry
250 // the overflow into the exponent and set the implicit bit to 1.
251 if (new_mantissa
== ONE
) {
252 result
.set_biased_exponent(1);
253 result
.set_implicit_bit(1);
255 result
.set_implicit_bit(0);
257 return result
.get_val();
259 return result
.get_val();
263 result
.set_biased_exponent(biased_exponent
);
264 result
.set_mantissa(mantissa
);
265 result
.set_implicit_bit(1);
266 return result
.get_val();
268 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
270 } // namespace fputil
271 } // namespace LIBC_NAMESPACE_DECL
273 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H