1 //===-- String to float conversion utils ------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LIBC_SRC_SUPPORT_STR_TO_FLOAT_H
10 #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H
12 #include "src/__support/CPP/limits.h"
13 #include "src/__support/CPP/optional.h"
14 #include "src/__support/FPUtil/FEnvImpl.h"
15 #include "src/__support/FPUtil/FPBits.h"
16 #include "src/__support/FPUtil/dyadic_float.h"
17 #include "src/__support/FPUtil/rounding_mode.h"
18 #include "src/__support/UInt128.h"
19 #include "src/__support/builtin_wrappers.h"
20 #include "src/__support/common.h"
21 #include "src/__support/ctype_utils.h"
22 #include "src/__support/detailed_powers_of_ten.h"
23 #include "src/__support/high_precision_decimal.h"
24 #include "src/__support/str_to_integer.h"
25 #include "src/__support/str_to_num_result.h"
26 #include "src/errno/libc_errno.h" // For ERANGE
28 namespace __llvm_libc
{
31 template <class T
> struct ExpandedFloat
{
32 typename
fputil::FPBits
<T
>::UIntType mantissa
;
36 template <class T
> struct FloatConvertReturn
{
37 ExpandedFloat
<T
> num
= {0, 0};
41 template <class T
> LIBC_INLINE
uint32_t leading_zeroes(T inputNumber
) {
42 constexpr uint32_t BITS_IN_T
= sizeof(T
) * 8;
43 if (inputNumber
== 0) {
46 uint32_t cur_guess
= BITS_IN_T
/ 2;
47 uint32_t range_size
= BITS_IN_T
/ 2;
48 // while either shifting by curGuess does not get rid of all of the bits or
49 // shifting by one less also gets rid of all of the bits then we have not
50 // found the first bit.
51 while (((inputNumber
>> cur_guess
) > 0) ||
52 ((inputNumber
>> (cur_guess
- 1)) == 0)) {
53 // Binary search for the first set bit
55 if (range_size
== 0) {
58 if ((inputNumber
>> cur_guess
) > 0) {
59 cur_guess
+= range_size
;
61 cur_guess
-= range_size
;
64 if (inputNumber
>> cur_guess
> 0) {
67 return BITS_IN_T
- cur_guess
;
71 LIBC_INLINE
uint32_t leading_zeroes
<uint32_t>(uint32_t inputNumber
) {
72 return safe_clz(inputNumber
);
76 LIBC_INLINE
uint32_t leading_zeroes
<uint64_t>(uint64_t inputNumber
) {
77 return safe_clz(inputNumber
);
80 LIBC_INLINE
uint64_t low64(const UInt128
&num
) {
81 return static_cast<uint64_t>(num
& 0xffffffffffffffff);
84 LIBC_INLINE
uint64_t high64(const UInt128
&num
) {
85 return static_cast<uint64_t>(num
>> 64);
88 template <class T
> LIBC_INLINE
void set_implicit_bit(fputil::FPBits
<T
> &) {
92 #if defined(SPECIAL_X86_LONG_DOUBLE)
95 set_implicit_bit
<long double>(fputil::FPBits
<long double> &result
) {
96 result
.set_implicit_bit(result
.get_unbiased_exponent() != 0);
100 // This Eisel-Lemire implementation is based on the algorithm described in the
101 // paper Number Parsing at a Gigabyte per Second, Software: Practice and
102 // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the
103 // description by Nigel Tao
104 // (https://nigeltao.github.io/blog/2020/eisel-lemire.html) and the golang
105 // implementation, also by Nigel Tao
106 // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25)
107 // for some optimizations as well as handling 32 bit floats.
109 LIBC_INLINE
cpp::optional
<ExpandedFloat
<T
>>
110 eisel_lemire(ExpandedFloat
<T
> init_num
,
111 RoundDirection round
= RoundDirection::Nearest
) {
113 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
115 BitsType mantissa
= init_num
.mantissa
;
116 int32_t exp10
= init_num
.exponent
;
118 constexpr uint32_t BITS_IN_MANTISSA
= sizeof(mantissa
) * 8;
120 if (sizeof(T
) > 8) { // This algorithm cannot handle anything longer than a
121 // double, so we skip straight to the fallback.
126 if (exp10
< DETAILED_POWERS_OF_TEN_MIN_EXP_10
||
127 exp10
> DETAILED_POWERS_OF_TEN_MAX_EXP_10
) {
132 uint32_t clz
= leading_zeroes
<BitsType
>(mantissa
);
135 uint32_t exp2
= static_cast<uint32_t>(exp10_to_exp2(exp10
)) +
136 BITS_IN_MANTISSA
+ fputil::FloatProperties
<T
>::EXPONENT_BIAS
-
140 const uint64_t *power_of_ten
=
141 DETAILED_POWERS_OF_TEN
[exp10
- DETAILED_POWERS_OF_TEN_MIN_EXP_10
];
143 UInt128 first_approx
=
144 static_cast<UInt128
>(mantissa
) * static_cast<UInt128
>(power_of_ten
[1]);
146 // Wider Approximation
147 UInt128 final_approx
;
148 // The halfway constant is used to check if the bits that will be shifted away
149 // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final
150 // mantissa size) - 3 (we shift away the last two bits separately for
151 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
152 // it's 6 bits for floats in this case.
153 const uint64_t halfway_constant
=
154 (uint64_t(1) << (BITS_IN_MANTISSA
-
155 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
- 3)) -
157 if ((high64(first_approx
) & halfway_constant
) == halfway_constant
&&
158 low64(first_approx
) + mantissa
< mantissa
) {
160 static_cast<UInt128
>(mantissa
) * static_cast<UInt128
>(power_of_ten
[0]);
161 UInt128 second_approx
=
162 first_approx
+ static_cast<UInt128
>(high64(low_bits
));
164 if ((high64(second_approx
) & halfway_constant
) == halfway_constant
&&
165 low64(second_approx
) + 1 == 0 &&
166 low64(low_bits
) + mantissa
< mantissa
) {
169 final_approx
= second_approx
;
171 final_approx
= first_approx
;
174 // Shifting to 54 bits for doubles and 25 bits for floats
176 static_cast<BitsType
>(high64(final_approx
) >> (BITS_IN_MANTISSA
- 1));
177 BitsType final_mantissa
=
178 static_cast<BitsType
>(high64(final_approx
) >>
179 (msb
+ BITS_IN_MANTISSA
-
180 (fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+ 3)));
181 exp2
-= static_cast<uint32_t>(1 ^ msb
); // same as !msb
183 if (round
== RoundDirection::Nearest
) {
184 // Half-way ambiguity
185 if (low64(final_approx
) == 0 &&
186 (high64(final_approx
) & halfway_constant
) == 0 &&
187 (final_mantissa
& 3) == 1) {
192 final_mantissa
+= final_mantissa
& 1;
194 } else if (round
== RoundDirection::Up
) {
195 // If any of the bits being rounded away are non-zero, then round up.
196 if (low64(final_approx
) > 0 ||
197 (high64(final_approx
) & halfway_constant
) > 0) {
198 // Add two since the last current lowest bit is about to be shifted away.
202 // else round down, which has no effect.
204 // From 54 to 53 bits for doubles and 25 to 24 bits for floats
205 final_mantissa
>>= 1;
206 if ((final_mantissa
>> (fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+ 1)) >
208 final_mantissa
>>= 1;
212 // The if block is equivalent to (but has fewer branches than):
213 // if exp2 <= 0 || exp2 >= 0x7FF { etc }
214 if (exp2
- 1 >= (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 2) {
218 ExpandedFloat
<T
> output
;
219 output
.mantissa
= final_mantissa
;
220 output
.exponent
= exp2
;
224 #if !defined(LONG_DOUBLE_IS_DOUBLE)
226 LIBC_INLINE
cpp::optional
<ExpandedFloat
<long double>>
227 eisel_lemire
<long double>(ExpandedFloat
<long double> init_num
,
228 RoundDirection round
) {
229 using BitsType
= typename
fputil::FPBits
<long double>::UIntType
;
231 BitsType mantissa
= init_num
.mantissa
;
232 int32_t exp10
= init_num
.exponent
;
234 constexpr uint32_t BITS_IN_MANTISSA
= sizeof(mantissa
) * 8;
237 // This doesn't reach very far into the range for long doubles, since it's
238 // sized for doubles and their 11 exponent bits, and not for long doubles and
239 // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long
240 // double). This is a known tradeoff, and was made because a proper long
241 // double table would be approximately 16 times larger. This would have
242 // significant memory and storage costs all the time to speed up a relatively
243 // uncommon path. In addition the exp10_to_exp2 function only approximates
244 // multiplying by log(10)/log(2), and that approximation may not be accurate
245 // out to the full long double range.
246 if (exp10
< DETAILED_POWERS_OF_TEN_MIN_EXP_10
||
247 exp10
> DETAILED_POWERS_OF_TEN_MAX_EXP_10
) {
252 uint32_t clz
= leading_zeroes
<BitsType
>(mantissa
);
255 uint32_t exp2
= static_cast<uint32_t>(exp10_to_exp2(exp10
)) +
257 fputil::FloatProperties
<long double>::EXPONENT_BIAS
- clz
;
260 const uint64_t *power_of_ten
=
261 DETAILED_POWERS_OF_TEN
[exp10
- DETAILED_POWERS_OF_TEN_MIN_EXP_10
];
263 // Since the input mantissa is more than 64 bits, we have to multiply with the
264 // full 128 bits of the power of ten to get an approximation with the same
265 // number of significant bits. This means that we only get the one
266 // approximation, and that approximation is 256 bits long.
267 UInt128 approx_upper
= static_cast<UInt128
>(high64(mantissa
)) *
268 static_cast<UInt128
>(power_of_ten
[1]);
270 UInt128 approx_middle_a
= static_cast<UInt128
>(high64(mantissa
)) *
271 static_cast<UInt128
>(power_of_ten
[0]);
272 UInt128 approx_middle_b
= static_cast<UInt128
>(low64(mantissa
)) *
273 static_cast<UInt128
>(power_of_ten
[1]);
275 UInt128 approx_middle
= approx_middle_a
+ approx_middle_b
;
277 // Handle overflow in the middle
278 approx_upper
+= (approx_middle
< approx_middle_a
) ? UInt128(1) << 64 : 0;
280 UInt128 approx_lower
= static_cast<UInt128
>(low64(mantissa
)) *
281 static_cast<UInt128
>(power_of_ten
[0]);
283 UInt128 final_approx_lower
=
284 approx_lower
+ (static_cast<UInt128
>(low64(approx_middle
)) << 64);
285 UInt128 final_approx_upper
= approx_upper
+ high64(approx_middle
) +
286 (final_approx_lower
< approx_lower
? 1 : 0);
288 // The halfway constant is used to check if the bits that will be shifted away
289 // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64
290 // (final mantissa size) - 3 (we shift away the last two bits separately for
291 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
292 // it's 12 bits for 128 bit floats in this case.
293 constexpr UInt128 HALFWAY_CONSTANT
=
294 (UInt128(1) << (BITS_IN_MANTISSA
-
295 fputil::FloatProperties
<long double>::MANTISSA_WIDTH
-
299 if ((final_approx_upper
& HALFWAY_CONSTANT
) == HALFWAY_CONSTANT
&&
300 final_approx_lower
+ mantissa
< mantissa
) {
304 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
306 static_cast<uint32_t>(final_approx_upper
>> (BITS_IN_MANTISSA
- 1));
307 BitsType final_mantissa
=
308 final_approx_upper
>>
309 (msb
+ BITS_IN_MANTISSA
-
310 (fputil::FloatProperties
<long double>::MANTISSA_WIDTH
+ 3));
311 exp2
-= static_cast<uint32_t>(1 ^ msb
); // same as !msb
313 if (round
== RoundDirection::Nearest
) {
314 // Half-way ambiguity
315 if (final_approx_lower
== 0 &&
316 (final_approx_upper
& HALFWAY_CONSTANT
) == 0 &&
317 (final_mantissa
& 3) == 1) {
321 final_mantissa
+= final_mantissa
& 1;
323 } else if (round
== RoundDirection::Up
) {
324 // If any of the bits being rounded away are non-zero, then round up.
325 if (final_approx_lower
> 0 || (final_approx_upper
& HALFWAY_CONSTANT
) > 0) {
326 // Add two since the last current lowest bit is about to be shifted away.
330 // else round down, which has no effect.
332 // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit
334 final_mantissa
>>= 1;
335 if ((final_mantissa
>>
336 (fputil::FloatProperties
<long double>::MANTISSA_WIDTH
+ 1)) > 0) {
337 final_mantissa
>>= 1;
341 // The if block is equivalent to (but has fewer branches than):
342 // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
344 (1 << fputil::FloatProperties
<long double>::EXPONENT_WIDTH
) - 2) {
348 ExpandedFloat
<long double> output
;
349 output
.mantissa
= final_mantissa
;
350 output
.exponent
= exp2
;
355 // The nth item in POWERS_OF_TWO represents the greatest power of two less than
356 // 10^n. This tells us how much we can safely shift without overshooting.
357 constexpr uint8_t POWERS_OF_TWO
[19] = {
358 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, 33, 36, 39, 43, 46, 49, 53, 56, 59,
360 constexpr int32_t NUM_POWERS_OF_TWO
=
361 sizeof(POWERS_OF_TWO
) / sizeof(POWERS_OF_TWO
[0]);
363 // Takes a mantissa and base 10 exponent and converts it into its closest
364 // floating point type T equivalent. This is the fallback algorithm used when
365 // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based
366 // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this
367 // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html
369 LIBC_INLINE FloatConvertReturn
<T
>
370 simple_decimal_conversion(const char *__restrict numStart
,
371 RoundDirection round
= RoundDirection::Nearest
) {
374 HighPrecisionDecimal hpd
= HighPrecisionDecimal(numStart
);
376 FloatConvertReturn
<T
> output
;
378 if (hpd
.get_num_digits() == 0) {
383 // If the exponent is too large and can't be represented in this size of
384 // float, return inf.
385 if (hpd
.get_decimal_point() > 0 &&
386 exp10_to_exp2(hpd
.get_decimal_point() - 1) >
387 static_cast<int64_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
)) {
388 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
389 output
.error
= ERANGE
;
392 // If the exponent is too small even for a subnormal, return 0.
393 if (hpd
.get_decimal_point() < 0 &&
394 exp10_to_exp2(-hpd
.get_decimal_point()) >
395 static_cast<int64_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
+
396 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
)) {
398 output
.error
= ERANGE
;
402 // Right shift until the number is smaller than 1.
403 while (hpd
.get_decimal_point() > 0) {
404 int32_t shift_amount
= 0;
405 if (hpd
.get_decimal_point() >= NUM_POWERS_OF_TWO
) {
408 shift_amount
= POWERS_OF_TWO
[hpd
.get_decimal_point()];
410 exp2
+= shift_amount
;
411 hpd
.shift(-shift_amount
);
414 // Left shift until the number is between 1/2 and 1
415 while (hpd
.get_decimal_point() < 0 ||
416 (hpd
.get_decimal_point() == 0 && hpd
.get_digits()[0] < 5)) {
417 int32_t shift_amount
= 0;
419 if (-hpd
.get_decimal_point() >= NUM_POWERS_OF_TWO
) {
421 } else if (hpd
.get_decimal_point() != 0) {
422 shift_amount
= POWERS_OF_TWO
[-hpd
.get_decimal_point()];
423 } else { // This handles the case of the number being between .1 and .5
426 exp2
-= shift_amount
;
427 hpd
.shift(shift_amount
);
430 // Left shift once so that the number is between 1 and 2
434 // Get the biased exponent
435 exp2
+= fputil::FloatProperties
<T
>::EXPONENT_BIAS
;
437 // Handle the exponent being too large (and return inf).
438 if (exp2
>= fputil::FPBits
<T
>::MAX_EXPONENT
) {
439 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
440 output
.error
= ERANGE
;
444 // Shift left to fill the mantissa
445 hpd
.shift(fputil::FloatProperties
<T
>::MANTISSA_WIDTH
);
446 typename
fputil::FPBits
<T
>::UIntType final_mantissa
=
447 hpd
.round_to_integer_type
<typename
fputil::FPBits
<T
>::UIntType
>();
451 // Shift right until there is a valid exponent
456 // Shift right one more time to compensate for the left shift to get it
460 hpd
.round_to_integer_type
<typename
fputil::FPBits
<T
>::UIntType
>(round
);
462 // Check if by shifting right we've caused this to round to a normal number.
463 if ((final_mantissa
>> fputil::FloatProperties
<T
>::MANTISSA_WIDTH
) != 0) {
468 // Check if rounding added a bit, and shift down if that's the case.
469 if (final_mantissa
== typename
fputil::FPBits
<T
>::UIntType(2)
470 << fputil::FloatProperties
<T
>::MANTISSA_WIDTH
) {
471 final_mantissa
>>= 1;
474 // Check if this rounding causes exp2 to go out of range and make the result
475 // INF. If this is the case, then finalMantissa and exp2 are already the
476 // correct values for an INF result.
477 if (exp2
>= fputil::FPBits
<T
>::MAX_EXPONENT
) {
478 output
.error
= ERANGE
;
483 output
.error
= ERANGE
;
486 output
.num
= {final_mantissa
, exp2
};
490 // This class is used for templating the constants for Clinger's Fast Path,
491 // described as a method of approximation in
492 // Clinger WD. How to Read Floating Point Numbers Accurately. SIGPLAN Not 1990
493 // Jun;25(6):92–101. https://doi.org/10.1145/93548.93557.
494 // As well as the additions by Gay that extend the useful range by the number of
495 // exact digits stored by the float type, described in
496 // Gay DM, Correctly rounded binary-decimal and decimal-binary conversions;
497 // 1990. AT&T Bell Laboratories Numerical Analysis Manuscript 90-10.
498 template <class T
> class ClingerConsts
;
500 template <> class ClingerConsts
<float> {
502 static constexpr float POWERS_OF_TEN_ARRAY
[] = {1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
,
503 1e6
, 1e7
, 1e8
, 1e9
, 1e10
};
504 static constexpr int32_t EXACT_POWERS_OF_TEN
= 10;
505 static constexpr int32_t DIGITS_IN_MANTISSA
= 7;
506 static constexpr float MAX_EXACT_INT
= 16777215.0;
509 template <> class ClingerConsts
<double> {
511 static constexpr double POWERS_OF_TEN_ARRAY
[] = {
512 1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
, 1e6
, 1e7
, 1e8
, 1e9
, 1e10
, 1e11
,
513 1e12
, 1e13
, 1e14
, 1e15
, 1e16
, 1e17
, 1e18
, 1e19
, 1e20
, 1e21
, 1e22
};
514 static constexpr int32_t EXACT_POWERS_OF_TEN
= 22;
515 static constexpr int32_t DIGITS_IN_MANTISSA
= 15;
516 static constexpr double MAX_EXACT_INT
= 9007199254740991.0;
519 #if defined(LONG_DOUBLE_IS_DOUBLE)
520 template <> class ClingerConsts
<long double> {
522 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
523 1e0
, 1e1
, 1e2
, 1e3
, 1e4
, 1e5
, 1e6
, 1e7
, 1e8
, 1e9
, 1e10
, 1e11
,
524 1e12
, 1e13
, 1e14
, 1e15
, 1e16
, 1e17
, 1e18
, 1e19
, 1e20
, 1e21
, 1e22
};
525 static constexpr int32_t EXACT_POWERS_OF_TEN
=
526 ClingerConsts
<double>::EXACT_POWERS_OF_TEN
;
527 static constexpr int32_t DIGITS_IN_MANTISSA
=
528 ClingerConsts
<double>::DIGITS_IN_MANTISSA
;
529 static constexpr long double MAX_EXACT_INT
=
530 ClingerConsts
<double>::MAX_EXACT_INT
;
532 #elif defined(SPECIAL_X86_LONG_DOUBLE)
533 template <> class ClingerConsts
<long double> {
535 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
536 1e0L
, 1e1L
, 1e2L
, 1e3L
, 1e4L
, 1e5L
, 1e6L
, 1e7L
, 1e8L
, 1e9L
,
537 1e10L
, 1e11L
, 1e12L
, 1e13L
, 1e14L
, 1e15L
, 1e16L
, 1e17L
, 1e18L
, 1e19L
,
538 1e20L
, 1e21L
, 1e22L
, 1e23L
, 1e24L
, 1e25L
, 1e26L
, 1e27L
};
539 static constexpr int32_t EXACT_POWERS_OF_TEN
= 27;
540 static constexpr int32_t DIGITS_IN_MANTISSA
= 21;
541 static constexpr long double MAX_EXACT_INT
= 18446744073709551615.0L;
544 template <> class ClingerConsts
<long double> {
546 static constexpr long double POWERS_OF_TEN_ARRAY
[] = {
547 1e0L
, 1e1L
, 1e2L
, 1e3L
, 1e4L
, 1e5L
, 1e6L
, 1e7L
, 1e8L
, 1e9L
,
548 1e10L
, 1e11L
, 1e12L
, 1e13L
, 1e14L
, 1e15L
, 1e16L
, 1e17L
, 1e18L
, 1e19L
,
549 1e20L
, 1e21L
, 1e22L
, 1e23L
, 1e24L
, 1e25L
, 1e26L
, 1e27L
, 1e28L
, 1e29L
,
550 1e30L
, 1e31L
, 1e32L
, 1e33L
, 1e34L
, 1e35L
, 1e36L
, 1e37L
, 1e38L
, 1e39L
,
551 1e40L
, 1e41L
, 1e42L
, 1e43L
, 1e44L
, 1e45L
, 1e46L
, 1e47L
, 1e48L
};
552 static constexpr int32_t EXACT_POWERS_OF_TEN
= 48;
553 static constexpr int32_t DIGITS_IN_MANTISSA
= 33;
554 static constexpr long double MAX_EXACT_INT
=
555 10384593717069655257060992658440191.0L;
559 // Take an exact mantissa and exponent and attempt to convert it using only
560 // exact floating point arithmetic. This only handles numbers with low
561 // exponents, but handles them quickly. This is an implementation of Clinger's
562 // Fast Path, as described above.
564 LIBC_INLINE
cpp::optional
<ExpandedFloat
<T
>>
565 clinger_fast_path(ExpandedFloat
<T
> init_num
,
566 RoundDirection round
= RoundDirection::Nearest
) {
568 typename
fputil::FPBits
<T
>::UIntType mantissa
= init_num
.mantissa
;
569 int32_t exp10
= init_num
.exponent
;
571 if (mantissa
>> fputil::FloatProperties
<T
>::MANTISSA_WIDTH
> 0) {
575 fputil::FPBits
<T
> result
;
577 if constexpr (cpp::is_same_v
<typename
fputil::FPBits
<T
>::UIntType
,
579 float_mantissa
= static_cast<T
>(fputil::DyadicFloat
<128>(
581 fputil::DyadicFloat
<128>::MantissaType(
582 {uint64_t(mantissa
), uint64_t(mantissa
>> 64)})));
584 float_mantissa
= static_cast<T
>(mantissa
);
588 result
= fputil::FPBits
<T
>(float_mantissa
);
591 if (exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
+
592 ClingerConsts
<T
>::DIGITS_IN_MANTISSA
) {
595 if (exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
) {
596 float_mantissa
= float_mantissa
*
597 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
598 [exp10
- ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
];
599 exp10
= ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
;
601 if (float_mantissa
> ClingerConsts
<T
>::MAX_EXACT_INT
) {
604 result
= fputil::FPBits
<T
>(float_mantissa
*
605 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[exp10
]);
606 } else if (exp10
< 0) {
607 if (-exp10
> ClingerConsts
<T
>::EXACT_POWERS_OF_TEN
) {
610 result
= fputil::FPBits
<T
>(float_mantissa
/
611 ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[-exp10
]);
614 // If the rounding mode is not nearest, then the sign of the number may affect
615 // the result. To make sure the rounding mode is respected properly, the
616 // calculation is redone with a negative result, and the rounding mode is used
617 // to select the correct result.
618 if (round
!= RoundDirection::Nearest
) {
619 fputil::FPBits
<T
> negative_result
;
620 // I'm 99% sure this will break under fast math optimizations.
621 negative_result
= fputil::FPBits
<T
>(
622 (-float_mantissa
) * ClingerConsts
<T
>::POWERS_OF_TEN_ARRAY
[exp10
]);
624 // If the results are equal, then we don't need to use the rounding mode.
625 if (T(result
) != -T(negative_result
)) {
626 fputil::FPBits
<T
> lower_result
;
627 fputil::FPBits
<T
> higher_result
;
629 if (T(result
) < -T(negative_result
)) {
630 lower_result
= result
;
631 higher_result
= negative_result
;
633 lower_result
= negative_result
;
634 higher_result
= result
;
637 if (round
== RoundDirection::Up
) {
638 result
= higher_result
;
640 result
= lower_result
;
645 ExpandedFloat
<T
> output
;
646 output
.mantissa
= result
.get_mantissa();
647 output
.exponent
= result
.get_unbiased_exponent();
651 // The upper bound is the highest base-10 exponent that could possibly give a
652 // non-inf result for this size of float. The value is
653 // log10(2^(exponent bias)).
654 // The generic approximation uses the fact that log10(2^x) ~= x/3
655 template <typename T
> constexpr int32_t get_upper_bound() {
656 return static_cast<int32_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
) / 3;
659 template <> constexpr int32_t get_upper_bound
<float>() { return 39; }
661 template <> constexpr int32_t get_upper_bound
<double>() { return 309; }
663 // The lower bound is the largest negative base-10 exponent that could possibly
664 // give a non-zero result for this size of float. The value is
665 // log10(2^(exponent bias + final mantissa width + intermediate mantissa width))
666 // The intermediate mantissa is the integer that's been parsed from the string,
667 // and the final mantissa is the fractional part of the output number. A very
668 // low base 10 exponent with a very high intermediate mantissa can cancel each
669 // other out, and subnormal numbers allow for the result to be at the very low
670 // end of the final mantissa.
671 template <typename T
> constexpr int32_t get_lower_bound() {
672 return -(static_cast<int32_t>(fputil::FloatProperties
<T
>::EXPONENT_BIAS
+
673 fputil::FloatProperties
<T
>::MANTISSA_WIDTH
+
678 template <> constexpr int32_t get_lower_bound
<float>() {
679 return -(39 + 6 + 10);
682 template <> constexpr int32_t get_lower_bound
<double>() {
683 return -(309 + 15 + 20);
686 // Takes a mantissa and base 10 exponent and converts it into its closest
687 // floating point type T equivalient. First we try the Eisel-Lemire algorithm,
688 // then if that fails then we fall back to a more accurate algorithm for
689 // accuracy. The resulting mantissa and exponent are placed in outputMantissa
692 LIBC_INLINE FloatConvertReturn
<T
>
693 decimal_exp_to_float(ExpandedFloat
<T
> init_num
, const char *__restrict numStart
,
694 bool truncated
, RoundDirection round
) {
696 typename
fputil::FPBits
<T
>::UIntType mantissa
= init_num
.mantissa
;
697 int32_t exp10
= init_num
.exponent
;
699 FloatConvertReturn
<T
> output
;
700 cpp::optional
<ExpandedFloat
<T
>> opt_output
;
702 // If the exponent is too large and can't be represented in this size of
703 // float, return inf. These bounds are relatively loose, but are mostly
704 // serving as a first pass. Some close numbers getting through is okay.
705 if (exp10
> get_upper_bound
<T
>()) {
706 output
.num
= {0, fputil::FPBits
<T
>::MAX_EXPONENT
};
707 output
.error
= ERANGE
;
710 // If the exponent is too small even for a subnormal, return 0.
711 if (exp10
< get_lower_bound
<T
>()) {
713 output
.error
= ERANGE
;
717 // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail.
718 // For this reason the "error" field in their return values is used to
719 // represent whether they've failed as opposed to the errno value. Any
720 // non-zero value represents a failure.
722 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
724 opt_output
= clinger_fast_path
<T
>(init_num
, round
);
725 // If the algorithm succeeded the error will be 0, else it will be a
727 if (opt_output
.has_value()) {
728 return {opt_output
.value(), 0};
731 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH
733 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
735 opt_output
= eisel_lemire
<T
>(init_num
, round
);
736 if (opt_output
.has_value()) {
738 return {opt_output
.value(), 0};
740 // If the mantissa is truncated, then the result may be off by the LSB, so
741 // check if rounding the mantissa up changes the result. If not, then it's
742 // safe, else use the fallback.
743 auto secound_output
= eisel_lemire
<T
>({mantissa
+ 1, exp10
}, round
);
744 if (secound_output
.has_value()) {
745 if (opt_output
->mantissa
== secound_output
->mantissa
&&
746 opt_output
->exponent
== secound_output
->exponent
) {
747 return {opt_output
.value(), 0};
751 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE
753 #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
754 output
= simple_decimal_conversion
<T
>(numStart
, round
);
756 #warning "Simple decimal conversion is disabled, result may not be correct."
757 #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION
762 // Takes a mantissa and base 2 exponent and converts it into its closest
763 // floating point type T equivalient. Since the exponent is already in the right
764 // form, this is mostly just shifting and rounding. This is used for hexadecimal
765 // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent.
767 LIBC_INLINE FloatConvertReturn
<T
> binary_exp_to_float(ExpandedFloat
<T
> init_num
,
769 RoundDirection round
) {
770 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
772 BitsType mantissa
= init_num
.mantissa
;
773 int32_t exp2
= init_num
.exponent
;
775 FloatConvertReturn
<T
> output
;
777 // This is the number of leading zeroes a properly normalized float of type T
779 constexpr int32_t NUMBITS
= sizeof(BitsType
) * 8;
780 constexpr int32_t INF_EXP
=
781 (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 1;
783 // Normalization step 1: Bring the leading bit to the highest bit of BitsType.
784 uint32_t amount_to_shift_left
= leading_zeroes
<BitsType
>(mantissa
);
785 mantissa
<<= amount_to_shift_left
;
787 // Keep exp2 representing the exponent of the lowest bit of BitsType.
788 exp2
-= amount_to_shift_left
;
790 // biasedExponent represents the biased exponent of the most significant bit.
791 int32_t biased_exponent
=
792 exp2
+ NUMBITS
+ fputil::FPBits
<T
>::EXPONENT_BIAS
- 1;
794 // Handle numbers that're too large and get squashed to inf
795 if (biased_exponent
>= INF_EXP
) {
796 // This indicates an overflow, so we make the result INF and set errno.
797 output
.num
= {0, (1 << fputil::FloatProperties
<T
>::EXPONENT_WIDTH
) - 1};
798 output
.error
= ERANGE
;
802 uint32_t amount_to_shift_right
=
803 NUMBITS
- fputil::FloatProperties
<T
>::MANTISSA_WIDTH
- 1;
805 // Handle subnormals.
806 if (biased_exponent
<= 0) {
807 amount_to_shift_right
+= 1 - biased_exponent
;
810 if (amount_to_shift_right
> NUMBITS
) {
811 // Return 0 if the exponent is too small.
813 output
.error
= ERANGE
;
818 BitsType round_bit_mask
= BitsType(1) << (amount_to_shift_right
- 1);
819 BitsType sticky_mask
= round_bit_mask
- 1;
820 bool round_bit
= static_cast<bool>(mantissa
& round_bit_mask
);
821 bool sticky_bit
= static_cast<bool>(mantissa
& sticky_mask
) || truncated
;
823 if (amount_to_shift_right
< NUMBITS
) {
824 // Shift the mantissa and clear the implicit bit.
825 mantissa
>>= amount_to_shift_right
;
826 mantissa
&= fputil::FloatProperties
<T
>::MANTISSA_MASK
;
830 bool least_significant_bit
= static_cast<bool>(mantissa
& BitsType(1));
832 // TODO: check that this rounding behavior is correct.
834 if (round
== RoundDirection::Nearest
) {
835 // Perform rounding-to-nearest, tie-to-even.
836 if (round_bit
&& (least_significant_bit
|| sticky_bit
)) {
839 } else if (round
== RoundDirection::Up
) {
840 if (round_bit
|| sticky_bit
) {
843 } else /* (round == RoundDirection::Down)*/ {
844 if (round_bit
&& sticky_bit
) {
849 if (mantissa
> fputil::FloatProperties
<T
>::MANTISSA_MASK
) {
850 // Rounding causes the exponent to increase.
853 if (biased_exponent
== INF_EXP
) {
854 output
.error
= ERANGE
;
858 if (biased_exponent
== 0) {
859 output
.error
= ERANGE
;
862 output
.num
= {mantissa
& fputil::FloatProperties
<T
>::MANTISSA_MASK
,
867 // checks if the next 4 characters of the string pointer are the start of a
868 // hexadecimal floating point number. Does not advance the string pointer.
869 LIBC_INLINE
bool is_float_hex_start(const char *__restrict src
,
870 const char decimalPoint
) {
871 if (!(src
[0] == '0' && tolower(src
[1]) == 'x')) {
874 size_t first_digit
= 2;
875 if (src
[2] == decimalPoint
) {
878 return isalnum(src
[first_digit
]) && b36_char_to_int(src
[first_digit
]) < 16;
881 // Takes the start of a string representing a decimal float, as well as the
882 // local decimalPoint. It returns if it suceeded in parsing any digits, and if
883 // the return value is true then the outputs are pointer to the end of the
884 // number, and the mantissa and exponent for the closest float T representation.
885 // If the return value is false, then it is assumed that there is no number
888 LIBC_INLINE StrToNumResult
<ExpandedFloat
<T
>>
889 decimal_string_to_float(const char *__restrict src
, const char DECIMAL_POINT
,
890 RoundDirection round
) {
891 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
892 constexpr uint32_t BASE
= 10;
893 constexpr char EXPONENT_MARKER
= 'e';
895 bool truncated
= false;
896 bool seen_digit
= false;
897 bool after_decimal
= false;
898 BitsType mantissa
= 0;
899 int32_t exponent
= 0;
903 StrToNumResult
<ExpandedFloat
<T
>> output({0, 0});
905 // The goal for the first step of parsing is to convert the number in src to
906 // the format mantissa * (base ^ exponent)
908 // The loop fills the mantissa with as many digits as it can hold
909 const BitsType bitstype_max_div_by_base
=
910 cpp::numeric_limits
<BitsType
>::max() / BASE
;
912 if (isdigit(src
[index
])) {
913 uint32_t digit
= src
[index
] - '0';
916 if (mantissa
< bitstype_max_div_by_base
) {
917 mantissa
= (mantissa
* BASE
) + digit
;
931 if (src
[index
] == DECIMAL_POINT
) {
933 break; // this means that src[index] points to a second decimal point,
934 // ending the number.
936 after_decimal
= true;
940 // The character is neither a digit nor a decimal point.
947 if (tolower(src
[index
]) == EXPONENT_MARKER
) {
948 bool has_sign
= false;
949 if (src
[index
+ 1] == '+' || src
[index
+ 1] == '-') {
952 if (isdigit(src
[index
+ 1 + static_cast<size_t>(has_sign
)])) {
954 auto result
= strtointeger
<int32_t>(src
+ index
, 10);
955 if (result
.has_error())
956 output
.error
= result
.error
;
957 int32_t add_to_exponent
= result
.value
;
958 index
+= result
.parsed_len
;
960 // Here we do this operation as int64 to avoid overflow.
961 int64_t temp_exponent
= static_cast<int64_t>(exponent
) +
962 static_cast<int64_t>(add_to_exponent
);
964 // If the result is in the valid range, then we use it. The valid range is
965 // also within the int32 range, so this prevents overflow issues.
966 if (temp_exponent
> fputil::FPBits
<T
>::MAX_EXPONENT
) {
967 exponent
= fputil::FPBits
<T
>::MAX_EXPONENT
;
968 } else if (temp_exponent
< -fputil::FPBits
<T
>::MAX_EXPONENT
) {
969 exponent
= -fputil::FPBits
<T
>::MAX_EXPONENT
;
971 exponent
= static_cast<int32_t>(temp_exponent
);
976 output
.parsed_len
= index
;
977 if (mantissa
== 0) { // if we have a 0, then also 0 the exponent.
978 output
.value
= {0, 0};
981 decimal_exp_to_float
<T
>({mantissa
, exponent
}, src
, truncated
, round
);
982 output
.value
= temp
.num
;
983 output
.error
= temp
.error
;
988 // Takes the start of a string representing a hexadecimal float, as well as the
989 // local decimal point. It returns if it suceeded in parsing any digits, and if
990 // the return value is true then the outputs are pointer to the end of the
991 // number, and the mantissa and exponent for the closest float T representation.
992 // If the return value is false, then it is assumed that there is no number
995 LIBC_INLINE StrToNumResult
<ExpandedFloat
<T
>>
996 hexadecimal_string_to_float(const char *__restrict src
,
997 const char DECIMAL_POINT
, RoundDirection round
) {
998 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
999 constexpr uint32_t BASE
= 16;
1000 constexpr char EXPONENT_MARKER
= 'p';
1002 bool truncated
= false;
1003 bool seen_digit
= false;
1004 bool after_decimal
= false;
1005 BitsType mantissa
= 0;
1006 int32_t exponent
= 0;
1010 StrToNumResult
<ExpandedFloat
<T
>> output({0, 0});
1012 // The goal for the first step of parsing is to convert the number in src to
1013 // the format mantissa * (base ^ exponent)
1015 // The loop fills the mantissa with as many digits as it can hold
1016 const BitsType bitstype_max_div_by_base
=
1017 cpp::numeric_limits
<BitsType
>::max() / BASE
;
1019 if (isalnum(src
[index
])) {
1020 uint32_t digit
= b36_char_to_int(src
[index
]);
1026 if (mantissa
< bitstype_max_div_by_base
) {
1027 mantissa
= (mantissa
* BASE
) + digit
;
1039 if (src
[index
] == DECIMAL_POINT
) {
1040 if (after_decimal
) {
1041 break; // this means that src[index] points to a second decimal point,
1042 // ending the number.
1044 after_decimal
= true;
1048 // The character is neither a hexadecimal digit nor a decimal point.
1055 // Convert the exponent from having a base of 16 to having a base of 2.
1058 if (tolower(src
[index
]) == EXPONENT_MARKER
) {
1059 bool has_sign
= false;
1060 if (src
[index
+ 1] == '+' || src
[index
+ 1] == '-') {
1063 if (isdigit(src
[index
+ 1 + static_cast<size_t>(has_sign
)])) {
1065 auto result
= strtointeger
<int32_t>(src
+ index
, 10);
1066 if (result
.has_error())
1067 output
.error
= result
.error
;
1069 int32_t add_to_exponent
= result
.value
;
1070 index
+= result
.parsed_len
;
1072 // Here we do this operation as int64 to avoid overflow.
1073 int64_t temp_exponent
= static_cast<int64_t>(exponent
) +
1074 static_cast<int64_t>(add_to_exponent
);
1076 // If the result is in the valid range, then we use it. The valid range is
1077 // also within the int32 range, so this prevents overflow issues.
1078 if (temp_exponent
> fputil::FPBits
<T
>::MAX_EXPONENT
) {
1079 exponent
= fputil::FPBits
<T
>::MAX_EXPONENT
;
1080 } else if (temp_exponent
< -fputil::FPBits
<T
>::MAX_EXPONENT
) {
1081 exponent
= -fputil::FPBits
<T
>::MAX_EXPONENT
;
1083 exponent
= static_cast<int32_t>(temp_exponent
);
1087 output
.parsed_len
= index
;
1088 if (mantissa
== 0) { // if we have a 0, then also 0 the exponent.
1089 output
.value
.exponent
= 0;
1090 output
.value
.mantissa
= 0;
1092 auto temp
= binary_exp_to_float
<T
>({mantissa
, exponent
}, truncated
, round
);
1093 output
.error
= temp
.error
;
1094 output
.value
= temp
.num
;
1099 // Takes a pointer to a string and a pointer to a string pointer. This function
1100 // is used as the backend for all of the string to float functions.
1102 LIBC_INLINE StrToNumResult
<T
> strtofloatingpoint(const char *__restrict src
) {
1103 using BitsType
= typename
fputil::FPBits
<T
>::UIntType
;
1104 fputil::FPBits
<T
> result
= fputil::FPBits
<T
>();
1105 bool seen_digit
= false;
1110 ptrdiff_t index
= first_non_whitespace(src
) - src
;
1112 if (src
[index
] == '+' || src
[index
] == '-') {
1118 result
.set_sign(true);
1121 static constexpr char DECIMAL_POINT
= '.';
1122 static const char *inf_string
= "infinity";
1123 static const char *nan_string
= "nan";
1125 if (isdigit(src
[index
]) || src
[index
] == DECIMAL_POINT
) { // regular number
1127 if (is_float_hex_start(src
+ index
, DECIMAL_POINT
)) {
1133 RoundDirection round_direction
= RoundDirection::Nearest
;
1135 switch (fputil::quick_get_round()) {
1137 round_direction
= RoundDirection::Nearest
;
1141 round_direction
= RoundDirection::Up
;
1143 round_direction
= RoundDirection::Down
;
1148 round_direction
= RoundDirection::Down
;
1150 round_direction
= RoundDirection::Up
;
1154 round_direction
= RoundDirection::Down
;
1158 StrToNumResult
<ExpandedFloat
<T
>> parse_result({0, 0});
1160 parse_result
= hexadecimal_string_to_float
<T
>(src
+ index
, DECIMAL_POINT
,
1162 } else { // base is 10
1163 parse_result
= decimal_string_to_float
<T
>(src
+ index
, DECIMAL_POINT
,
1166 seen_digit
= parse_result
.parsed_len
!= 0;
1167 result
.set_mantissa(parse_result
.value
.mantissa
);
1168 result
.set_unbiased_exponent(parse_result
.value
.exponent
);
1169 index
+= parse_result
.parsed_len
;
1170 error
= parse_result
.error
;
1171 } else if (tolower(src
[index
]) == 'n') { // NaN
1172 if (tolower(src
[index
+ 1]) == nan_string
[1] &&
1173 tolower(src
[index
+ 2]) == nan_string
[2]) {
1176 BitsType nan_mantissa
= 0;
1177 // this handles the case of `NaN(n-character-sequence)`, where the
1178 // n-character-sequence is made of 0 or more letters and numbers in any
1180 if (src
[index
] == '(') {
1181 size_t left_paren
= index
;
1183 while (isalnum(src
[index
]))
1185 if (src
[index
] == ')') {
1187 if (isdigit(src
[left_paren
+ 1])) {
1188 // This is to prevent errors when BitsType is larger than 64 bits,
1189 // since strtointeger only supports up to 64 bits. This is actually
1190 // more than is required by the specification, which says for the
1191 // input type "NAN(n-char-sequence)" that "the meaning of
1192 // the n-char sequence is implementation-defined."
1194 auto strtoint_result
=
1195 strtointeger
<uint64_t>(src
+ (left_paren
+ 1), 0);
1196 if (strtoint_result
.has_error()) {
1197 error
= strtoint_result
.error
;
1199 nan_mantissa
= strtoint_result
.value
;
1200 if (src
[left_paren
+ 1 + strtoint_result
.parsed_len
] != ')')
1207 nan_mantissa
|= fputil::FloatProperties
<T
>::QUIET_NAN_MASK
;
1208 if (result
.get_sign()) {
1209 result
= fputil::FPBits
<T
>(result
.build_quiet_nan(nan_mantissa
));
1210 result
.set_sign(true);
1212 result
.set_sign(false);
1213 result
= fputil::FPBits
<T
>(result
.build_quiet_nan(nan_mantissa
));
1216 } else if (tolower(src
[index
]) == 'i') { // INF
1217 if (tolower(src
[index
+ 1]) == inf_string
[1] &&
1218 tolower(src
[index
+ 2]) == inf_string
[2]) {
1220 if (result
.get_sign())
1221 result
= result
.neg_inf();
1223 result
= result
.inf();
1224 if (tolower(src
[index
+ 3]) == inf_string
[3] &&
1225 tolower(src
[index
+ 4]) == inf_string
[4] &&
1226 tolower(src
[index
+ 5]) == inf_string
[5] &&
1227 tolower(src
[index
+ 6]) == inf_string
[6] &&
1228 tolower(src
[index
+ 7]) == inf_string
[7]) {
1229 // if the string is "INFINITY" then consume 8 characters.
1236 if (!seen_digit
) { // If there is nothing to actually parse, then return 0.
1237 return {T(0), 0, error
};
1240 // This function only does something if T is long double and the platform uses
1241 // special 80 bit long doubles. Otherwise it should be inlined out.
1242 set_implicit_bit
<T
>(result
);
1244 return {T(result
), index
, error
};
1247 } // namespace internal
1248 } // namespace __llvm_libc
1250 #endif // LIBC_SRC_SUPPORT_STR_TO_FLOAT_H