1 //===-- String to float conversion utils ------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LIBC_SRC_SUPPORT_STR_TO_FLOAT_H
10 #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H
12 #include "src/__support/CPP/limits.h"
13 #include "src/__support/CPP/optional.h"
14 #include "src/__support/FPUtil/FEnvImpl.h"
15 #include "src/__support/FPUtil/FPBits.h"
16 #include "src/__support/UInt128.h"
17 #include "src/__support/builtin_wrappers.h"
18 #include "src/__support/common.h"
19 #include "src/__support/ctype_utils.h"
20 #include "src/__support/detailed_powers_of_ten.h"
21 #include "src/__support/high_precision_decimal.h"
22 #include "src/__support/str_to_integer.h"
23 #include "src/__support/str_to_num_result.h"
24 #include "src/errno/libc_errno.h" // For ERANGE
26 namespace __llvm_libc
{
29 template <class T
> struct ExpandedFloat
{
30 typename
fputil::FPBits
<T
>::UIntType mantissa
;
34 template <class T
> struct FloatConvertReturn
{
35 ExpandedFloat
<T
> num
= {0, 0};
39 template <class T
> LIBC_INLINE
uint32_t leading_zeroes(T inputNumber
) {
40 constexpr uint32_t BITS_IN_T
= sizeof(T
) * 8;
41 if (inputNumber
== 0) {
44 uint32_t cur_guess
= BITS_IN_T
/ 2;
45 uint32_t range_size
= BITS_IN_T
/ 2;
46 // while either shifting by curGuess does not get rid of all of the bits or
47 // shifting by one less also gets rid of all of the bits then we have not
48 // found the first bit.
49 while (((inputNumber
>> cur_guess
) > 0) ||
50 ((inputNumber
>> (cur_guess
- 1)) == 0)) {
51 // Binary search for the first set bit
53 if (range_size
== 0) {
56 if ((inputNumber
>> cur_guess
) > 0) {
57 cur_guess
+= range_size
;
59 cur_guess
-= range_size
;
62 if (inputNumber
>> cur_guess
> 0) {
65 return BITS_IN_T
- cur_guess
;
69 LIBC_INLINE
uint32_t leading_zeroes
<uint32_t>(uint32_t inputNumber
) {
70 return safe_clz(inputNumber
);
74 LIBC_INLINE
uint32_t leading_zeroes
<uint64_t>(uint64_t inputNumber
) {
75 return safe_clz(inputNumber
);
78 LIBC_INLINE
uint64_t low64(const UInt128
&num
) {
79 return static_cast<uint64_t>(num
& 0xffffffffffffffff);
82 LIBC_INLINE
uint64_t high64(const UInt128
&num
) {
83 return static_cast<uint64_t>(num
>> 64);
86 template <class T
> LIBC_INLINE
void set_implicit_bit(fputil::FPBits
<T
> &) {
90 #if defined(SPECIAL_X86_LONG_DOUBLE)
93 set_implicit_bit
<long double>(fputil::FPBits
<long double> &result
) {
94 result
.set_implicit_bit(result
.get_unbiased_exponent() != 0);
98 // This Eisel-Lemire implementation is based on the algorithm described in the
99 // paper Number Parsing at a Gigabyte per Second, Software: Practice and
100 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the
101 // description by Nigel Tao
102 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang
103 // implementation, also by Nigel Tao
104 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25)
105 // for some optimizations as well as handling 32 bit floats.
107 LIBC_INLINE
cpp::optional
<ExpandedFloat
<T
>>
108 eisel_lemire(ExpandedFloat
<T
> init_num
,
109 RoundDirection round
= RoundDirection::Nearest
) {
111 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
113 BitsType mantissa
= init_num
.mantissa
;
114 int32_t exp10
= init_num
.exponent
;
116 constexpr uint32_t BITS_IN_MANTISSA
= sizeof(mantissa
) * 8;
118 if (sizeof(T
) > 8) { // This algorithm cannot handle anything longer than a
119 // double, so we skip straight to the fallback.
124 if (exp10
< DETAILED_POWERS_OF_TEN_MIN_EXP_10
||
125 exp10
> DETAILED_POWERS_OF_TEN_MAX_EXP_10
) {
130 uint32_t clz
= leading_zeroes
<BitsType
>(mantissa
);
133 uint32_t exp2
= static_cast<uint32_t>(exp10_to_exp2(exp10
)) +
134 BITS_IN_MANTISSA
+ fputil::FloatProperties
<T
>::EXPONENT_BIAS
-
138 const uint64_t *power_of_ten
=
139 DETAILED_POWERS_OF_TEN
[exp10
- DETAILED_POWERS_OF_TEN_MIN_EXP_10
];
141 UInt128 first_approx
=
142 static_cast<UInt128
>(mantissa
) * static_cast<UInt128
>(power_of_ten
[1]);
144 // Wider Approximation
145 UInt128 final_approx
;
146 // The halfway constant is used to check if the bits that will be shifted away
147 // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final
148 // mantissa size) - 3 (we shift away the last two bits separately for
149 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
150 // it's 6 bits for floats in this case.
151 const uint64_t halfway_constant
=
152 (uint64_t(1) << (BITS_IN_MANTISSA
-
153 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
- 3)) -
155 if ((high64(first_approx
) & halfway_constant
) == halfway_constant
&&
156 low64(first_approx
) + mantissa
< mantissa
) {
158 static_cast<UInt128
>(mantissa
) * static_cast<UInt128
>(power_of_ten
[0]);
159 UInt128 second_approx
=
160 first_approx
+ static_cast<UInt128
>(high64(low_bits
));
162 if ((high64(second_approx
) & halfway_constant
) == halfway_constant
&&
163 low64(second_approx
) + 1 == 0 &&
164 low64(low_bits
) + mantissa
< mantissa
) {
167 final_approx
= second_approx
;
169 final_approx
= first_approx
;
172 // Shifting to 54 bits for doubles and 25 bits for floats
174 static_cast<BitsType
>(high64(final_approx
) >> (BITS_IN_MANTISSA
- 1));
175 BitsType final_mantissa
=
176 static_cast<BitsType
>(high64(final_approx
) >>
177 (msb
+ BITS_IN_MANTISSA
-
178 (fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+ 3)));
179 exp2
-= static_cast<uint32_t>(1 ^ msb
); // same as !msb
181 if (round
== RoundDirection::Nearest
) {
182 // Half-way ambiguity
183 if (low64(final_approx
) == 0 &&
184 (high64(final_approx
) & halfway_constant
) == 0 &&
185 (final_mantissa
& 3) == 1) {
190 final_mantissa
+= final_mantissa
& 1;
192 } else if (round
== RoundDirection::Up
) {
193 // If any of the bits being rounded away are non-zero, then round up.
194 if (low64(final_approx
) > 0 ||
195 (high64(final_approx
) & halfway_constant
) > 0) {
196 // Add two since the last current lowest bit is about to be shifted away.
200 // else round down, which has no effect.
202 // From 54 to 53 bits for doubles and 25 to 24 bits for floats
203 final_mantissa
>>= 1;
204 if ((final_mantissa
>> (fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+ 1)) >
206 final_mantissa
>>= 1;
210 // The if block is equivalent to (but has fewer branches than):
211 // if exp2 <= 0 || exp2 >= 0x7FF { etc }
212 if (exp2
- 1 >= (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 2) {
216 ExpandedFloat
<T
> output
;
217 output
.mantissa
= final_mantissa
;
218 output
.exponent
= exp2
;
222 #if !defined(LONG_DOUBLE_IS_DOUBLE)
224 LIBC_INLINE
cpp::optional
<ExpandedFloat
<long double>>
225 eisel_lemire
<long double>(ExpandedFloat
<long double> init_num
,
226 RoundDirection round
) {
227 using BitsType
= typename
fputil::FPBits
<long double>::UIntType
;
229 BitsType mantissa
= init_num
.mantissa
;
230 int32_t exp10
= init_num
.exponent
;
232 constexpr uint32_t BITS_IN_MANTISSA
= sizeof(mantissa
) * 8;
235 // This doesn't reach very far into the range for long doubles, since it's
236 // sized for doubles and their 11 exponent bits, and not for long doubles and
237 // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long
238 // double). This is a known tradeoff, and was made because a proper long
239 // double table would be approximately 16 times larger. This would have
240 // significant memory and storage costs all the time to speed up a relatively
241 // uncommon path. In addition the exp10_to_exp2 function only approximates
242 // multiplying by log(10)/log(2), and that approximation may not be accurate
243 // out to the full long double range.
244 if (exp10
< DETAILED_POWERS_OF_TEN_MIN_EXP_10
||
245 exp10
> DETAILED_POWERS_OF_TEN_MAX_EXP_10
) {
250 uint32_t clz
= leading_zeroes
<BitsType
>(mantissa
);
253 uint32_t exp2
= static_cast<uint32_t>(exp10_to_exp2(exp10
)) +
255 fputil::FloatProperties
<long double>::EXPONENT_BIAS
- clz
;
258 const uint64_t *power_of_ten
=
259 DETAILED_POWERS_OF_TEN
[exp10
- DETAILED_POWERS_OF_TEN_MIN_EXP_10
];
261 // Since the input mantissa is more than 64 bits, we have to multiply with the
262 // full 128 bits of the power of ten to get an approximation with the same
263 // number of significant bits. This means that we only get the one
264 // approximation, and that approximation is 256 bits long.
265 UInt128 approx_upper
= static_cast<UInt128
>(high64(mantissa
)) *
266 static_cast<UInt128
>(power_of_ten
[1]);
268 UInt128 approx_middle
= static_cast<UInt128
>(high64(mantissa
)) *
269 static_cast<UInt128
>(power_of_ten
[0]) +
270 static_cast<UInt128
>(low64(mantissa
)) *
271 static_cast<UInt128
>(power_of_ten
[1]);
273 UInt128 approx_lower
= static_cast<UInt128
>(low64(mantissa
)) *
274 static_cast<UInt128
>(power_of_ten
[0]);
276 UInt128 final_approx_lower
=
277 approx_lower
+ (static_cast<UInt128
>(low64(approx_middle
)) << 64);
278 UInt128 final_approx_upper
= approx_upper
+ high64(approx_middle
) +
279 (final_approx_lower
< approx_lower
? 1 : 0);
281 // The halfway constant is used to check if the bits that will be shifted away
282 // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64
283 // (final mantissa size) - 3 (we shift away the last two bits separately for
284 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
285 // it's 12 bits for 128 bit floats in this case.
286 constexpr UInt128 HALFWAY_CONSTANT
=
287 (UInt128(1) << (BITS_IN_MANTISSA
-
288 fputil::FloatProperties
<long double>::MANTISSA_WIDTH
-
292 if ((final_approx_upper
& HALFWAY_CONSTANT
) == HALFWAY_CONSTANT
&&
293 final_approx_lower
+ mantissa
< mantissa
) {
297 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
298 BitsType msb
= final_approx_upper
>> (BITS_IN_MANTISSA
- 1);
299 BitsType final_mantissa
=
300 final_approx_upper
>>
301 (msb
+ BITS_IN_MANTISSA
-
302 (fputil::FloatProperties
<long double>::MANTISSA_WIDTH
+ 3));
303 exp2
-= static_cast<uint32_t>(1 ^ msb
); // same as !msb
305 if (round
== RoundDirection::Nearest
) {
306 // Half-way ambiguity
307 if (final_approx_lower
== 0 &&
308 (final_approx_upper
& HALFWAY_CONSTANT
) == 0 &&
309 (final_mantissa
& 3) == 1) {
313 final_mantissa
+= final_mantissa
& 1;
315 } else if (round
== RoundDirection::Up
) {
316 // If any of the bits being rounded away are non-zero, then round up.
317 if (final_approx_lower
> 0 || (final_approx_upper
& HALFWAY_CONSTANT
) > 0) {
318 // Add two since the last current lowest bit is about to be shifted away.
322 // else round down, which has no effect.
324 // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit
326 final_mantissa
>>= 1;
327 if ((final_mantissa
>>
328 (fputil::FloatProperties
<long double>::MANTISSA_WIDTH
+ 1)) > 0) {
329 final_mantissa
>>= 1;
333 // The if block is equivalent to (but has fewer branches than):
334 // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
336 (1 << fputil::FloatProperties
<long double>::EXPONENT_WIDTH
) - 2) {
340 ExpandedFloat
<long double> output
;
341 output
.mantissa
= final_mantissa
;
342 output
.exponent
= exp2
;
347 // The nth item in POWERS_OF_TWO represents the greatest power of two less than
348 // 10^n. This tells us how much we can safely shift without overshooting.
349 constexpr uint8_t POWERS_OF_TWO
[19] = {
350 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59,
352 constexpr int32_t NUM_POWERS_OF_TWO
=
353 sizeof(POWERS_OF_TWO
) / sizeof(POWERS_OF_TWO
[0]);
355 // Takes a mantissa and base 10 exponent and converts it into its closest
356 // floating point type T equivalent. This is the fallback algorithm used when
357 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based
358 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
359 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
361 LIBC_INLINE FloatConvertReturn
<T
>
362 simple_decimal_conversion(const char *__restrict numStart
,
363 RoundDirection round
= RoundDirection::Nearest
) {
366 HighPrecisionDecimal hpd
= HighPrecisionDecimal(numStart
);
368 FloatConvertReturn
<T
> output
;
370 if (hpd
.get_num_digits() == 0) {
375 // If the exponent is too large and can't be represented in this size of
376 // float, return inf.
377 if (hpd
.get_decimal_point() > 0 &&
378 exp10_to_exp2(hpd
.get_decimal_point() - 1) >
379 static_cast<int64_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
)) {
380 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
381 output
.error
= ERANGE
;
384 // If the exponent is too small even for a subnormal, return 0.
385 if (hpd
.get_decimal_point() < 0 &&
386 exp10_to_exp2(-hpd
.get_decimal_point()) >
387 static_cast<int64_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
+
388 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
)) {
390 output
.error
= ERANGE
;
394 // Right shift until the number is smaller than 1.
395 while (hpd
.get_decimal_point() > 0) {
396 int32_t shift_amount
= 0;
397 if (hpd
.get_decimal_point() >= NUM_POWERS_OF_TWO
) {
400 shift_amount
= POWERS_OF_TWO
[hpd
.get_decimal_point()];
402 exp2
+= shift_amount
;
403 hpd
.shift(-shift_amount
);
406 // Left shift until the number is between 1/2 and 1
407 while (hpd
.get_decimal_point() < 0 ||
408 (hpd
.get_decimal_point() == 0 && hpd
.get_digits()[0] < 5)) {
409 int32_t shift_amount
= 0;
411 if (-hpd
.get_decimal_point() >= NUM_POWERS_OF_TWO
) {
413 } else if (hpd
.get_decimal_point() != 0) {
414 shift_amount
= POWERS_OF_TWO
[-hpd
.get_decimal_point()];
415 } else { // This handles the case of the number being between .1 and .5
418 exp2
-= shift_amount
;
419 hpd
.shift(shift_amount
);
422 // Left shift once so that the number is between 1 and 2
426 // Get the biased exponent
427 exp2
+= fputil::FloatProperties
<T
>::EXPONENT_BIAS
;
429 // Handle the exponent being too large (and return inf).
430 if (exp2
>= fputil::FPBits
<T
>::MAX_EXPONENT
) {
431 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
432 output
.error
= ERANGE
;
436 // Shift left to fill the mantissa
437 hpd
.shift(fputil::FloatProperties
<T
>::MANTISSA_WIDTH
);
438 typename
fputil::FPBits
<T
>::UIntType final_mantissa
=
439 hpd
.round_to_integer_type
<typename
fputil::FPBits
<T
>::UIntType
>();
443 // Shift right until there is a valid exponent
448 // Shift right one more time to compensate for the left shift to get it
452 hpd
.round_to_integer_type
<typename
fputil::FPBits
<T
>::UIntType
>(round
);
454 // Check if by shifting right we've caused this to round to a normal number.
455 if ((final_mantissa
>> fputil::FloatProperties
<T
>::MANTISSA_WIDTH
) != 0) {
460 // Check if rounding added a bit, and shift down if that's the case.
461 if (final_mantissa
== typename
fputil::FPBits
<T
>::UIntType(2)
462 << fputil::FloatProperties
<T
>::MANTISSA_WIDTH
) {
463 final_mantissa
>>= 1;
466 // Check if this rounding causes exp2 to go out of range and make the result
467 // INF. If this is the case, then finalMantissa and exp2 are already the
468 // correct values for an INF result.
469 if (exp2
>= fputil::FPBits
<T
>::MAX_EXPONENT
) {
470 output
.error
= ERANGE
;
475 output
.error
= ERANGE
;
478 output
.num
= {final_mantissa
, exp2
};
482 // This class is used for templating the constants for Clinger's Fast Path,
483 // described as a method of approximation in
484 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990
485 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557.
486 // As well as the additions by Gay that extend the useful range by the number of
487 // exact digits stored by the float type, described in
488 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions;
489 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10.
490 template <class T
> class ClingerConsts
;
492 template <> class ClingerConsts
<float> {
494 static constexpr float POWERS_OF_TEN_ARRAY
[] = {1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
,
495 1e6
, 1e7
, 1e8
, 1e9
, 1e10
};
496 static constexpr int32_t EXACT_POWERS_OF_TEN
= 10;
497 static constexpr int32_t DIGITS_IN_MANTISSA
= 7;
498 static constexpr float MAX_EXACT_INT
= 16777215.0;
501 template <> class ClingerConsts
<double> {
503 static constexpr double POWERS_OF_TEN_ARRAY
[] = {
504 1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
, 1e6
, 1e7
, 1e8
, 1e9
, 1e10
, 1e11
,
505 1e12
, 1e13
, 1e14
, 1e15
, 1e16
, 1e17
, 1e18
, 1e19
, 1e20
, 1e21
, 1e22
};
506 static constexpr int32_t EXACT_POWERS_OF_TEN
= 22;
507 static constexpr int32_t DIGITS_IN_MANTISSA
= 15;
508 static constexpr double MAX_EXACT_INT
= 9007199254740991.0;
511 #if defined(LONG_DOUBLE_IS_DOUBLE)
512 template <> class ClingerConsts
<long double> {
514 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
515 1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
, 1e6
, 1e7
, 1e8
, 1e9
, 1e10
, 1e11
,
516 1e12
, 1e13
, 1e14
, 1e15
, 1e16
, 1e17
, 1e18
, 1e19
, 1e20
, 1e21
, 1e22
};
517 static constexpr int32_t EXACT_POWERS_OF_TEN
=
518 ClingerConsts
<double>::EXACT_POWERS_OF_TEN
;
519 static constexpr int32_t DIGITS_IN_MANTISSA
=
520 ClingerConsts
<double>::DIGITS_IN_MANTISSA
;
521 static constexpr long double MAX_EXACT_INT
=
522 ClingerConsts
<double>::MAX_EXACT_INT
;
524 #elif defined(SPECIAL_X86_LONG_DOUBLE)
525 template <> class ClingerConsts
<long double> {
527 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
528 1e0L
, 1e1L
, 1e2L
, 1e3L
, 1e4L
, 1e5L
, 1e6L
, 1e7L
, 1e8L
, 1e9L
,
529 1e10L
, 1e11L
, 1e12L
, 1e13L
, 1e14L
, 1e15L
, 1e16L
, 1e17L
, 1e18L
, 1e19L
,
530 1e20L
, 1e21L
, 1e22L
, 1e23L
, 1e24L
, 1e25L
, 1e26L
, 1e27L
};
531 static constexpr int32_t EXACT_POWERS_OF_TEN
= 27;
532 static constexpr int32_t DIGITS_IN_MANTISSA
= 21;
533 static constexpr long double MAX_EXACT_INT
= 18446744073709551615.0L;
536 template <> class ClingerConsts
<long double> {
538 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
539 1e0L
, 1e1L
, 1e2L
, 1e3L
, 1e4L
, 1e5L
, 1e6L
, 1e7L
, 1e8L
, 1e9L
,
540 1e10L
, 1e11L
, 1e12L
, 1e13L
, 1e14L
, 1e15L
, 1e16L
, 1e17L
, 1e18L
, 1e19L
,
541 1e20L
, 1e21L
, 1e22L
, 1e23L
, 1e24L
, 1e25L
, 1e26L
, 1e27L
, 1e28L
, 1e29L
,
542 1e30L
, 1e31L
, 1e32L
, 1e33L
, 1e34L
, 1e35L
, 1e36L
, 1e37L
, 1e38L
, 1e39L
,
543 1e40L
, 1e41L
, 1e42L
, 1e43L
, 1e44L
, 1e45L
, 1e46L
, 1e47L
, 1e48L
};
544 static constexpr int32_t EXACT_POWERS_OF_TEN
= 48;
545 static constexpr int32_t DIGITS_IN_MANTISSA
= 33;
546 static constexpr long double MAX_EXACT_INT
=
547 10384593717069655257060992658440191.0L;
551 // Take an exact mantissa and exponent and attempt to convert it using only
552 // exact floating point arithmetic. This only handles numbers with low
553 // exponents, but handles them quickly. This is an implementation of Clinger's
554 // Fast Path, as described above.
556 LIBC_INLINE
cpp::optional
<ExpandedFloat
<T
>>
557 clinger_fast_path(ExpandedFloat
<T
> init_num
,
558 RoundDirection round
= RoundDirection::Nearest
) {
560 typename
fputil::FPBits
<T
>::UIntType mantissa
= init_num
.mantissa
;
561 int32_t exp10
= init_num
.exponent
;
563 if (mantissa
>> fputil::FloatProperties
<T
>::MANTISSA_WIDTH
> 0) {
567 fputil::FPBits
<T
> result
;
568 T float_mantissa
= static_cast<T
>(mantissa
);
571 result
= fputil::FPBits
<T
>(float_mantissa
);
574 if (exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
+
575 ClingerConsts
<T
>::DIGITS_IN_MANTISSA
) {
578 if (exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
) {
579 float_mantissa
= float_mantissa
*
580 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
581 [exp10
- ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
];
582 exp10
= ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
;
584 if (float_mantissa
> ClingerConsts
<T
>::MAX_EXACT_INT
) {
587 result
= fputil::FPBits
<T
>(float_mantissa
*
588 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[exp10
]);
589 } else if (exp10
< 0) {
590 if (-exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
) {
593 result
= fputil::FPBits
<T
>(float_mantissa
/
594 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[-exp10
]);
597 // If the rounding mode is not nearest, then the sign of the number may affect
598 // the result. To make sure the rounding mode is respected properly, the
599 // calculation is redone with a negative result, and the rounding mode is used
600 // to select the correct result.
601 if (round
!= RoundDirection::Nearest
) {
602 fputil::FPBits
<T
> negative_result
;
603 // I'm 99% sure this will break under fast math optimizations.
604 negative_result
= fputil::FPBits
<T
>(
605 (-float_mantissa
) * ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[exp10
]);
607 // If the results are equal, then we don't need to use the rounding mode.
608 if (T(result
) != -T(negative_result
)) {
609 fputil::FPBits
<T
> lower_result
;
610 fputil::FPBits
<T
> higher_result
;
612 if (T(result
) < -T(negative_result
)) {
613 lower_result
= result
;
614 higher_result
= negative_result
;
616 lower_result
= negative_result
;
617 higher_result
= result
;
620 if (round
== RoundDirection::Up
) {
621 result
= higher_result
;
623 result
= lower_result
;
628 ExpandedFloat
<T
> output
;
629 output
.mantissa
= result
.get_mantissa();
630 output
.exponent
= result
.get_unbiased_exponent();
634 // The upper bound is the highest base-10 exponent that could possibly give a
635 // non-inf result for this size of float. The value is
636 // log10(2^(exponent bias)).
637 // The generic approximation uses the fact that log10(2^x) ~= x/3
638 template <typename T
> constexpr int32_t get_upper_bound() {
639 return static_cast<int32_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
) / 3;
642 template <> constexpr int32_t get_upper_bound
<float>() { return 39; }
644 template <> constexpr int32_t get_upper_bound
<double>() { return 309; }
646 // The lower bound is the largest negative base-10 exponent that could possibly
647 // give a non-zero result for this size of float. The value is
648 // log10(2^(exponent bias + final mantissa width + intermediate mantissa width))
649 // The intermediate mantissa is the integer that's been parsed from the string,
650 // and the final mantissa is the fractional part of the output number. A very
651 // low base 10 exponent with a very high intermediate mantissa can cancel each
652 // other out, and subnormal numbers allow for the result to be at the very low
653 // end of the final mantissa.
654 template <typename T
> constexpr int32_t get_lower_bound() {
655 return -(static_cast<int32_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
+
656 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+
661 template <> constexpr int32_t get_lower_bound
<float>() {
662 return -(39 + 6 + 10);
665 template <> constexpr int32_t get_lower_bound
<double>() {
666 return -(309 + 15 + 20);
669 // Takes a mantissa and base 10 exponent and converts it into its closest
670 // floating point type T equivalient. First we try the Eisel-Lemire algorithm,
671 // then if that fails then we fall back to a more accurate algorithm for
672 // accuracy. The resulting mantissa and exponent are placed in outputMantissa
675 LIBC_INLINE FloatConvertReturn
<T
>
676 decimal_exp_to_float(ExpandedFloat
<T
> init_num
, const char *__restrict numStart
,
677 bool truncated
, RoundDirection round
) {
679 typename
fputil::FPBits
<T
>::UIntType mantissa
= init_num
.mantissa
;
680 int32_t exp10
= init_num
.exponent
;
682 FloatConvertReturn
<T
> output
;
683 cpp::optional
<ExpandedFloat
<T
>> opt_output
;
685 // If the exponent is too large and can't be represented in this size of
686 // float, return inf. These bounds are relatively loose, but are mostly
687 // serving as a first pass. Some close numbers getting through is okay.
688 if (exp10
> get_upper_bound
<T
>()) {
689 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
690 output
.error
= ERANGE
;
693 // If the exponent is too small even for a subnormal, return 0.
694 if (exp10
< get_lower_bound
<T
>()) {
696 output
.error
= ERANGE
;
700 // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail.
701 // For this reason the "error" field in their return values is used to
702 // represent whether they've failed as opposed to the errno value. Any
703 // non-zero value represents a failure.
705 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
707 opt_output
= clinger_fast_path
<T
>(init_num
, round
);
708 // If the algorithm succeeded the error will be 0, else it will be a
710 if (opt_output
.has_value()) {
711 return {opt_output
.value(), 0};
714 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
716 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
718 opt_output
= eisel_lemire
<T
>(init_num
, round
);
719 if (opt_output
.has_value()) {
721 return {opt_output
.value(), 0};
723 // If the mantissa is truncated, then the result may be off by the LSB, so
724 // check if rounding the mantissa up changes the result. If not, then it's
725 // safe, else use the fallback.
726 auto secound_output
= eisel_lemire
<T
>({mantissa
+ 1, exp10
}, round
);
727 if (secound_output
.has_value()) {
728 if (opt_output
->mantissa
== secound_output
->mantissa
&&
729 opt_output
->exponent
== secound_output
->exponent
) {
730 return {opt_output
.value(), 0};
734 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
736 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
737 output
= simple_decimal_conversion
<T
>(numStart
, round
);
739 #warning "Simple decimal conversion is disabled, result may not be correct."
740 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
745 // Takes a mantissa and base 2 exponent and converts it into its closest
746 // floating point type T equivalient. Since the exponent is already in the right
747 // form, this is mostly just shifting and rounding. This is used for hexadecimal
748 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent.
750 LIBC_INLINE FloatConvertReturn
<T
> binary_exp_to_float(ExpandedFloat
<T
> init_num
,
752 RoundDirection round
) {
753 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
755 BitsType mantissa
= init_num
.mantissa
;
756 int32_t exp2
= init_num
.exponent
;
758 FloatConvertReturn
<T
> output
;
760 // This is the number of leading zeroes a properly normalized float of type T
762 constexpr int32_t NUMBITS
= sizeof(BitsType
) * 8;
763 constexpr int32_t INF_EXP
=
764 (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 1;
766 // Normalization step 1: Bring the leading bit to the highest bit of BitsType.
767 uint32_t amount_to_shift_left
= leading_zeroes
<BitsType
>(mantissa
);
768 mantissa
<<= amount_to_shift_left
;
770 // Keep exp2 representing the exponent of the lowest bit of BitsType.
771 exp2
-= amount_to_shift_left
;
773 // biasedExponent represents the biased exponent of the most significant bit.
774 int32_t biased_exponent
=
775 exp2
+ NUMBITS
+ fputil::FPBits
<T
>::EXPONENT_BIAS
- 1;
777 // Handle numbers that're too large and get squashed to inf
778 if (biased_exponent
>= INF_EXP
) {
779 // This indicates an overflow, so we make the result INF and set errno.
780 output
.num
= {0, (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 1};
781 output
.error
= ERANGE
;
785 uint32_t amount_to_shift_right
=
786 NUMBITS
- fputil::FloatProperties
<T
>::MANTISSA_WIDTH
- 1;
788 // Handle subnormals.
789 if (biased_exponent
<= 0) {
790 amount_to_shift_right
+= 1 - biased_exponent
;
793 if (amount_to_shift_right
> NUMBITS
) {
794 // Return 0 if the exponent is too small.
796 output
.error
= ERANGE
;
801 BitsType round_bit_mask
= BitsType(1) << (amount_to_shift_right
- 1);
802 BitsType sticky_mask
= round_bit_mask
- 1;
803 bool round_bit
= mantissa
& round_bit_mask
;
804 bool sticky_bit
= static_cast<bool>(mantissa
& sticky_mask
) || truncated
;
806 if (amount_to_shift_right
< NUMBITS
) {
807 // Shift the mantissa and clear the implicit bit.
808 mantissa
>>= amount_to_shift_right
;
809 mantissa
&= fputil::FloatProperties
<T
>::MANTISSA_MASK
;
813 bool least_significant_bit
= mantissa
& BitsType(1);
815 // TODO: check that this rounding behavior is correct.
817 if (round
== RoundDirection::Nearest
) {
818 // Perform rounding-to-nearest, tie-to-even.
819 if (round_bit
&& (least_significant_bit
|| sticky_bit
)) {
822 } else if (round
== RoundDirection::Up
) {
823 if (round_bit
|| sticky_bit
) {
826 } else /* (round == RoundDirection::Down)*/ {
827 if (round_bit
&& sticky_bit
) {
832 if (mantissa
> fputil::FloatProperties
<T
>::MANTISSA_MASK
) {
833 // Rounding causes the exponent to increase.
836 if (biased_exponent
== INF_EXP
) {
837 output
.error
= ERANGE
;
841 if (biased_exponent
== 0) {
842 output
.error
= ERANGE
;
845 output
.num
= {mantissa
& fputil::FloatProperties
<T
>::MANTISSA_MASK
,
850 // checks if the next 4 characters of the string pointer are the start of a
851 // hexadecimal floating point number. Does not advance the string pointer.
852 LIBC_INLINE
bool is_float_hex_start(const char *__restrict src
,
853 const char decimalPoint
) {
854 if (!(src
[0] == '0' && tolower(src
[1]) == 'x')) {
857 size_t first_digit
= 2;
858 if (src
[2] == decimalPoint
) {
861 return isalnum(src
[first_digit
]) && b36_char_to_int(src
[first_digit
]) < 16;
864 // Takes the start of a string representing a decimal float, as well as the
865 // local decimalPoint. It returns if it suceeded in parsing any digits, and if
866 // the return value is true then the outputs are pointer to the end of the
867 // number, and the mantissa and exponent for the closest float T representation.
868 // If the return value is false, then it is assumed that there is no number
871 LIBC_INLINE StrToNumResult
<ExpandedFloat
<T
>>
872 decimal_string_to_float(const char *__restrict src
, const char DECIMAL_POINT
,
873 RoundDirection round
) {
874 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
875 constexpr uint32_t BASE
= 10;
876 constexpr char EXPONENT_MARKER
= 'e';
878 bool truncated
= false;
879 bool seen_digit
= false;
880 bool after_decimal
= false;
881 BitsType mantissa
= 0;
882 int32_t exponent
= 0;
886 StrToNumResult
<ExpandedFloat
<T
>> output({0, 0});
888 // The goal for the first step of parsing is to convert the number in src to
889 // the format mantissa * (base ^ exponent)
891 // The loop fills the mantissa with as many digits as it can hold
892 const BitsType bitstype_max_div_by_base
=
893 cpp::numeric_limits
<BitsType
>::max() / BASE
;
895 if (isdigit(src
[index
])) {
896 uint32_t digit
= src
[index
] - '0';
899 if (mantissa
< bitstype_max_div_by_base
) {
900 mantissa
= (mantissa
* BASE
) + digit
;
914 if (src
[index
] == DECIMAL_POINT
) {
916 break; // this means that src[index] points to a second decimal point,
917 // ending the number.
919 after_decimal
= true;
923 // The character is neither a digit nor a decimal point.
930 if (tolower(src
[index
]) == EXPONENT_MARKER
) {
931 if (src
[index
+ 1] == '+' || src
[index
+ 1] == '-' ||
932 isdigit(src
[index
+ 1])) {
934 auto result
= strtointeger
<int32_t>(src
+ index
, 10);
935 if (result
.has_error())
936 output
.error
= result
.error
;
937 int32_t add_to_exponent
= result
.value
;
938 index
+= result
.parsed_len
;
940 // Here we do this operation as int64 to avoid overflow.
941 int64_t temp_exponent
= static_cast<int64_t>(exponent
) +
942 static_cast<int64_t>(add_to_exponent
);
944 // If the result is in the valid range, then we use it. The valid range is
945 // also within the int32 range, so this prevents overflow issues.
946 if (temp_exponent
> fputil::FPBits
<T
>::MAX_EXPONENT
) {
947 exponent
= fputil::FPBits
<T
>::MAX_EXPONENT
;
948 } else if (temp_exponent
< -fputil::FPBits
<T
>::MAX_EXPONENT
) {
949 exponent
= -fputil::FPBits
<T
>::MAX_EXPONENT
;
951 exponent
= static_cast<int32_t>(temp_exponent
);
956 output
.parsed_len
= index
;
957 if (mantissa
== 0) { // if we have a 0, then also 0 the exponent.
958 output
.value
= {0, 0};
961 decimal_exp_to_float
<T
>({mantissa
, exponent
}, src
, truncated
, round
);
962 output
.value
= temp
.num
;
963 output
.error
= temp
.error
;
968 // Takes the start of a string representing a hexadecimal float, as well as the
969 // local decimal point. It returns if it suceeded in parsing any digits, and if
970 // the return value is true then the outputs are pointer to the end of the
971 // number, and the mantissa and exponent for the closest float T representation.
972 // If the return value is false, then it is assumed that there is no number
975 LIBC_INLINE StrToNumResult
<ExpandedFloat
<T
>>
976 hexadecimal_string_to_float(const char *__restrict src
,
977 const char DECIMAL_POINT
, RoundDirection round
) {
978 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
979 constexpr uint32_t BASE
= 16;
980 constexpr char EXPONENT_MARKER
= 'p';
982 bool truncated
= false;
983 bool seen_digit
= false;
984 bool after_decimal
= false;
985 BitsType mantissa
= 0;
986 int32_t exponent
= 0;
990 StrToNumResult
<ExpandedFloat
<T
>> output({0, 0});
992 // The goal for the first step of parsing is to convert the number in src to
993 // the format mantissa * (base ^ exponent)
995 // The loop fills the mantissa with as many digits as it can hold
996 const BitsType bitstype_max_div_by_base
=
997 cpp::numeric_limits
<BitsType
>::max() / BASE
;
999 if (isalnum(src
[index
])) {
1000 uint32_t digit
= b36_char_to_int(src
[index
]);
1006 if (mantissa
< bitstype_max_div_by_base
) {
1007 mantissa
= (mantissa
* BASE
) + digit
;
1019 if (src
[index
] == DECIMAL_POINT
) {
1020 if (after_decimal
) {
1021 break; // this means that src[index] points to a second decimal point,
1022 // ending the number.
1024 after_decimal
= true;
1028 // The character is neither a hexadecimal digit nor a decimal point.
1035 // Convert the exponent from having a base of 16 to having a base of 2.
1038 if (tolower(src
[index
]) == EXPONENT_MARKER
) {
1039 if (src
[index
+ 1] == '+' || src
[index
+ 1] == '-' ||
1040 isdigit(src
[index
+ 1])) {
1042 auto result
= strtointeger
<int32_t>(src
+ index
, 10);
1043 if (result
.has_error())
1044 output
.error
= result
.error
;
1046 int32_t add_to_exponent
= result
.value
;
1047 index
+= result
.parsed_len
;
1049 // Here we do this operation as int64 to avoid overflow.
1050 int64_t temp_exponent
= static_cast<int64_t>(exponent
) +
1051 static_cast<int64_t>(add_to_exponent
);
1053 // If the result is in the valid range, then we use it. The valid range is
1054 // also within the int32 range, so this prevents overflow issues.
1055 if (temp_exponent
> fputil::FPBits
<T
>::MAX_EXPONENT
) {
1056 exponent
= fputil::FPBits
<T
>::MAX_EXPONENT
;
1057 } else if (temp_exponent
< -fputil::FPBits
<T
>::MAX_EXPONENT
) {
1058 exponent
= -fputil::FPBits
<T
>::MAX_EXPONENT
;
1060 exponent
= static_cast<int32_t>(temp_exponent
);
1064 output
.parsed_len
= index
;
1065 if (mantissa
== 0) { // if we have a 0, then also 0 the exponent.
1066 output
.value
.exponent
= 0;
1067 output
.value
.mantissa
= 0;
1069 auto temp
= binary_exp_to_float
<T
>({mantissa
, exponent
}, truncated
, round
);
1070 output
.error
= temp
.error
;
1071 output
.value
= temp
.num
;
1076 // Takes a pointer to a string and a pointer to a string pointer. This function
1077 // is used as the backend for all of the string to float functions.
1079 LIBC_INLINE StrToNumResult
<T
> strtofloatingpoint(const char *__restrict src
) {
1080 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
1081 fputil::FPBits
<T
> result
= fputil::FPBits
<T
>();
1082 bool seen_digit
= false;
1087 ptrdiff_t index
= first_non_whitespace(src
) - src
;
1089 if (src
[index
] == '+' || src
[index
] == '-') {
1095 result
.set_sign(true);
1098 static constexpr char DECIMAL_POINT
= '.';
1099 static const char *inf_string
= "infinity";
1100 static const char *nan_string
= "nan";
1102 if (isdigit(src
[index
]) || src
[index
] == DECIMAL_POINT
) { // regular number
1104 if (is_float_hex_start(src
+ index
, DECIMAL_POINT
)) {
1110 RoundDirection round_direction
= RoundDirection::Nearest
;
1112 switch (fputil::get_round()) {
1114 round_direction
= RoundDirection::Nearest
;
1118 round_direction
= RoundDirection::Up
;
1120 round_direction
= RoundDirection::Down
;
1125 round_direction
= RoundDirection::Down
;
1127 round_direction
= RoundDirection::Up
;
1131 round_direction
= RoundDirection::Down
;
1135 StrToNumResult
<ExpandedFloat
<T
>> parse_result({0, 0});
1137 parse_result
= hexadecimal_string_to_float
<T
>(src
+ index
, DECIMAL_POINT
,
1139 } else { // base is 10
1140 parse_result
= decimal_string_to_float
<T
>(src
+ index
, DECIMAL_POINT
,
1143 seen_digit
= parse_result
.parsed_len
!= 0;
1144 result
.set_mantissa(parse_result
.value
.mantissa
);
1145 result
.set_unbiased_exponent(parse_result
.value
.exponent
);
1146 index
+= parse_result
.parsed_len
;
1147 error
= parse_result
.error
;
1148 } else if (tolower(src
[index
]) == 'n') { // NaN
1149 if (tolower(src
[index
+ 1]) == nan_string
[1] &&
1150 tolower(src
[index
+ 2]) == nan_string
[2]) {
1153 BitsType nan_mantissa
= 0;
1154 // this handles the case of `NaN(n-character-sequence)`, where the
1155 // n-character-sequence is made of 0 or more letters and numbers in any
1157 if (src
[index
] == '(') {
1158 size_t left_paren
= index
;
1160 while (isalnum(src
[index
]))
1162 if (src
[index
] == ')') {
1164 if (isdigit(src
[left_paren
+ 1])) {
1165 // This is to prevent errors when BitsType is larger than 64 bits,
1166 // since strtointeger only supports up to 64 bits. This is actually
1167 // more than is required by the specification, which says for the
1168 // input type "NAN(n-char-sequence)" that "the meaning of
1169 // the n-char sequence is implementation-defined."
1171 auto strtoint_result
=
1172 strtointeger
<uint64_t>(src
+ (left_paren
+ 1), 0);
1173 if (strtoint_result
.has_error()) {
1174 error
= strtoint_result
.error
;
1176 nan_mantissa
= strtoint_result
.value
;
1177 if (src
[left_paren
+ 1 + strtoint_result
.parsed_len
] != ')')
1184 nan_mantissa
|= fputil::FloatProperties
<T
>::QUIET_NAN_MASK
;
1185 if (result
.get_sign()) {
1186 result
= fputil::FPBits
<T
>(result
.build_quiet_nan(nan_mantissa
));
1187 result
.set_sign(true);
1189 result
.set_sign(false);
1190 result
= fputil::FPBits
<T
>(result
.build_quiet_nan(nan_mantissa
));
1193 } else if (tolower(src
[index
]) == 'i') { // INF
1194 if (tolower(src
[index
+ 1]) == inf_string
[1] &&
1195 tolower(src
[index
+ 2]) == inf_string
[2]) {
1197 if (result
.get_sign())
1198 result
= result
.neg_inf();
1200 result
= result
.inf();
1201 if (tolower(src
[index
+ 3]) == inf_string
[3] &&
1202 tolower(src
[index
+ 4]) == inf_string
[4] &&
1203 tolower(src
[index
+ 5]) == inf_string
[5] &&
1204 tolower(src
[index
+ 6]) == inf_string
[6] &&
1205 tolower(src
[index
+ 7]) == inf_string
[7]) {
1206 // if the string is "INFINITY" then consume 8 characters.
1213 if (!seen_digit
) { // If there is nothing to actually parse, then return 0.
1214 return {T(0), 0, error
};
1217 // This function only does something if T is long double and the platform uses
1218 // special 80 bit long doubles. Otherwise it should be inlined out.
1219 set_implicit_bit
<T
>(result
);
1221 return {T(result
), index
, error
};
1224 } // namespace internal
1225 } // namespace __llvm_libc
1227 #endif // LIBC_SRC_SUPPORT_STR_TO_FLOAT_H