1 //===-- Utilities to convert floating point values to string ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
10 #define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
14 #include "src/__support/CPP/limits.h"
15 #include "src/__support/CPP/type_traits.h"
16 #include "src/__support/FPUtil/FPBits.h"
17 #include "src/__support/FPUtil/dyadic_float.h"
18 #include "src/__support/big_int.h"
19 #include "src/__support/common.h"
20 #include "src/__support/libc_assert.h"
21 #include "src/__support/macros/attributes.h"
22 #include "src/__support/macros/config.h"
23 #include "src/__support/sign.h"
25 // This file has 5 compile-time flags to allow the user to configure the float
26 // to string behavior. These were used to explore tradeoffs during the design
27 // phase, and can still be used to gain specific properties. Unless you
28 // specifically know what you're doing, you should leave all these flags off.
30 // LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
31 // This flag disables the separate long double conversion implementation. It is
32 // not based on the Ryu algorithm, instead generating the digits by
33 // multiplying/dividing the written-out number by 10^9 to get blocks. It's
34 // significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE,
35 // and is small in binary size. Its downside is that it always calculates all
36 // of the digits above the decimal point, making it inefficient for %e calls
37 // with large exponents. This specialization overrides other flags, so this
38 // flag must be set for other flags to effect the long double behavior.
40 // LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
41 // The Mega Table is ~5 megabytes when compiled. It lists the constants needed
42 // to perform the Ryu Printf algorithm (described below) for all long double
43 // values. This makes it extremely fast for both doubles and long doubles, in
44 // exchange for large binary size.
46 // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
47 // Dyadic floats are software floating point numbers, and their accuracy can be
48 // as high as necessary. This option uses 256 bit dyadic floats to calculate
49 // the table values that Ryu Printf needs. This is reasonably fast and very
50 // small compared to the Mega Table, but the 256 bit floats only give accurate
51 // results for the first ~50 digits of the output. In practice this shouldn't
52 // be a problem since long doubles are only accurate for ~35 digits, but the
53 // trailing values all being 0s may cause brittle tests to fail.
55 // LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC
56 // Integer Calculation uses wide integers to do the calculations for the Ryu
57 // Printf table, which is just as accurate as the Mega Table without requiring
58 // as much code size. These integers can be very large (~32KB at max, though
59 // always on the stack) to handle the edges of the long double range. They are
60 // also very slow, taking multiple seconds on a powerful CPU to calculate the
61 // values at the end of the range. If no flag is set, this is used for long
62 // doubles, the flag only changes the double behavior.
64 // LIBC_COPT_FLOAT_TO_STR_NO_TABLE
65 // This flag doesn't change the actual calculation method, instead it is used
66 // to disable the normal Ryu Printf table for configurations that don't use any
70 // If no flags are set, doubles use the normal (and much more reasonably sized)
71 // Ryu Printf table and long doubles use their specialized implementation. This
72 // provides good performance and binary size.
74 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
75 #include "src/__support/ryu_long_double_constants.h"
76 #elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE)
77 #include "src/__support/ryu_constants.h"
79 constexpr size_t IDX_SIZE
= 1;
80 constexpr size_t MID_INT_SIZE
= 192;
83 // This implementation is based on the Ryu Printf algorithm by Ulf Adams:
84 // Ulf Adams. 2019. Ryƫ revisited: printf floating point conversion.
85 // Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages.
86 // https://doi.org/10.1145/3360595
88 // This version is modified to require significantly less memory (it doesn't use
89 // a large buffer to store the result).
91 // The general concept of this algorithm is as follows:
92 // We want to calculate a 9 digit segment of a floating point number using this
93 // formula: floor((mantissa * 2^exponent)/10^i) % 10^9.
94 // To do so normally would involve large integers (~1000 bits for doubles), so
95 // we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a
96 // lookup table. The resulting intermediate value needs to be about 192 bits to
97 // store the result with enough precision. Since this is all being done with
98 // integers for appropriate precision, we would run into a problem if
99 // i > exponent since then 2^exponent / 10^i would be less than 1. To correct
100 // for this, the actual calculation done is 2^(exponent + c) / 10^i, and then
101 // when multiplying by the mantissa we reverse this by dividing by 2^c, like so:
102 // floor((mantissa * table[exponent][i])/(2^c)) % 10^9.
103 // This gives a 9 digit value, which is small enough to fit in a 32 bit integer,
104 // and that integer is converted into a string as normal, and called a block. In
105 // this implementation, the most recent block is buffered, so that if rounding
106 // is necessary the block can be adjusted before being written to the output.
107 // Any block that is all 9s adds one to the max block counter and doesn't clear
108 // the buffer because they can cause the block above them to be rounded up.
110 namespace LIBC_NAMESPACE_DECL
{
112 using BlockInt
= uint32_t;
113 constexpr uint32_t BLOCK_SIZE
= 9;
114 constexpr uint64_t EXP5_9
= 1953125;
115 constexpr uint64_t EXP10_9
= 1000000000;
117 using FPBits
= fputil::FPBits
<long double>;
119 // Larger numbers prefer a slightly larger constant than is used for the smaller
121 constexpr size_t CALC_SHIFT_CONST
= 128;
125 // Returns floor(log_10(2^e)); requires 0 <= e <= 42039.
126 LIBC_INLINE
constexpr uint32_t log10_pow2(uint64_t e
) {
127 LIBC_ASSERT(e
<= 42039 &&
128 "Incorrect exponent to perform log10_pow2 approximation.");
129 // This approximation is based on the float value for log_10(2). It first
130 // gives an incorrect result for our purposes at 42039 (well beyond the 16383
131 // maximum for long doubles).
133 // To get these constants I first evaluated log_10(2) to get an approximation
134 // of 0.301029996. Next I passed that value through a string to double
135 // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent
136 // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional
137 // number). Next I shifted the mantissa right 12 bits to create more space for
138 // the multiplication result, adding 12 to the exponent to compensate. To
139 // check that this approximation works for our purposes I used the following
141 // for i in range(16384):
142 // if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)):
144 // The reason we add 1 is because this evaluation truncates the result, giving
145 // us the floor, whereas counting the digits of the power of 2 gives us the
146 // ceiling. With a similar loop I checked the maximum valid value and found
148 return static_cast<uint32_t>((e
* 0x13441350fbdll
) >> 42);
151 // Same as above, but with different constants.
152 LIBC_INLINE
constexpr uint32_t log2_pow5(uint64_t e
) {
153 return static_cast<uint32_t>((e
* 0x12934f0979bll
) >> 39);
156 // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any
157 // power of 2 was also a power of 10, but since that doesn't exist this is
158 // always accurate. This is used to calculate the maximum number of base-10
159 // digits a given e-bit number could have.
160 LIBC_INLINE
constexpr uint32_t ceil_log10_pow2(uint32_t e
) {
161 return log10_pow2(e
) + 1;
164 LIBC_INLINE
constexpr uint32_t div_ceil(uint32_t num
, uint32_t denom
) {
165 return (num
+ (denom
- 1)) / denom
;
168 // Returns the maximum number of 9 digit blocks a number described by the given
169 // index (which is ceil(exponent/16)) and mantissa width could need.
170 LIBC_INLINE
constexpr uint32_t length_for_num(uint32_t idx
,
171 uint32_t mantissa_width
) {
172 return div_ceil(ceil_log10_pow2(idx
) + ceil_log10_pow2(mantissa_width
+ 1),
176 // The formula for the table when i is positive (or zero) is as follows:
177 // floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1)
178 // Rewritten slightly we get:
179 // floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1)
181 // TODO: Fix long doubles (needs bigger table or alternate algorithm.)
182 // Currently the table values are generated, which is very slow.
183 template <size_t INT_SIZE
>
184 LIBC_INLINE
constexpr UInt
<MID_INT_SIZE
> get_table_positive(int exponent
,
186 // INT_SIZE is the size of int that is used for the internal calculations of
187 // this function. It should be large enough to hold 2^(exponent+constant), so
188 // ~1000 for double and ~16000 for long double. Be warned that the time
189 // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of
190 // bits in the number being exponentiated and m is the exponent.
191 const int shift_amount
=
192 static_cast<int>(exponent
+ CALC_SHIFT_CONST
- (BLOCK_SIZE
* i
));
193 if (shift_amount
< 0) {
196 UInt
<INT_SIZE
> num(0);
197 // MOD_SIZE is one of the limiting factors for how big the constant argument
198 // can get, since it needs to be small enough to fit in the result UInt,
199 // otherwise we'll get truncation on return.
200 constexpr UInt
<INT_SIZE
> MOD_SIZE
=
201 (UInt
<INT_SIZE
>(EXP10_9
)
202 << (CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0)));
204 num
= UInt
<INT_SIZE
>(1) << (shift_amount
);
206 UInt
<INT_SIZE
> fives(EXP5_9
);
212 if (num
> MOD_SIZE
) {
213 auto rem
= num
.div_uint_half_times_pow_2(
214 EXP10_9
, CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0))
221 template <size_t INT_SIZE
>
222 LIBC_INLINE UInt
<MID_INT_SIZE
> get_table_positive_df(int exponent
, size_t i
) {
223 static_assert(INT_SIZE
== 256,
224 "Only 256 is supported as an int size right now.");
225 // This version uses dyadic floats with 256 bit mantissas to perform the same
226 // calculation as above. Due to floating point imprecision it is only accurate
227 // for the first 50 digits, but it's much faster. Since even 128 bit long
228 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
229 // enough for these floats to be converted back and forth safely. This is
230 // ideal for avoiding the size of the long double table.
231 const int shift_amount
=
232 static_cast<int>(exponent
+ CALC_SHIFT_CONST
- (9 * i
));
233 if (shift_amount
< 0) {
236 fputil::DyadicFloat
<INT_SIZE
> num(Sign::POS
, 0, 1);
237 constexpr UInt
<INT_SIZE
> MOD_SIZE
=
238 (UInt
<INT_SIZE
>(EXP10_9
)
239 << (CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0)));
241 constexpr UInt
<INT_SIZE
> FIVE_EXP_MINUS_NINE_MANT
{
242 {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030,
243 0x89705f4136b4a597}};
245 static const fputil::DyadicFloat
<INT_SIZE
> FIVE_EXP_MINUS_NINE(
246 Sign::POS
, -276, FIVE_EXP_MINUS_NINE_MANT
);
249 fputil::DyadicFloat
<INT_SIZE
> fives
=
250 fputil::pow_n(FIVE_EXP_MINUS_NINE
, static_cast<uint32_t>(i
));
253 num
= mul_pow_2(num
, shift_amount
);
255 // Adding one is part of the formula.
256 UInt
<INT_SIZE
> int_num
= num
.as_mantissa_type() + 1;
257 if (int_num
> MOD_SIZE
) {
260 .div_uint_half_times_pow_2(
261 EXP10_9
, CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0))
266 UInt
<MID_INT_SIZE
> result
= int_num
;
271 // The formula for the table when i is negative (or zero) is as follows:
272 // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0)
273 // Since we know i is always negative, we just take it as unsigned and treat it
274 // as negative. We do the same with exponent, while they're both always negative
275 // in theory, in practice they're converted to positive for simpler
277 // The formula being used looks more like this:
278 // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0)
279 template <size_t INT_SIZE
>
280 LIBC_INLINE UInt
<MID_INT_SIZE
> get_table_negative(int exponent
, size_t i
) {
281 int shift_amount
= CALC_SHIFT_CONST
- exponent
;
282 UInt
<INT_SIZE
> num(1);
283 constexpr UInt
<INT_SIZE
> MOD_SIZE
=
284 (UInt
<INT_SIZE
>(EXP10_9
)
285 << (CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0)));
287 size_t ten_blocks
= i
;
288 size_t five_blocks
= 0;
289 if (shift_amount
< 0) {
290 int block_shifts
= (-shift_amount
) / BLOCK_SIZE
;
291 if (block_shifts
< static_cast<int>(ten_blocks
)) {
292 ten_blocks
= ten_blocks
- block_shifts
;
293 five_blocks
= block_shifts
;
294 shift_amount
= shift_amount
+ (block_shifts
* BLOCK_SIZE
);
298 shift_amount
= shift_amount
+ (static_cast<int>(i
) * BLOCK_SIZE
);
302 if (five_blocks
> 0) {
303 UInt
<INT_SIZE
> fives(EXP5_9
);
304 fives
.pow_n(five_blocks
);
307 if (ten_blocks
> 0) {
308 UInt
<INT_SIZE
> tens(EXP10_9
);
309 tens
.pow_n(ten_blocks
);
310 if (five_blocks
<= 0) {
317 if (shift_amount
> 0) {
318 num
= num
<< shift_amount
;
320 num
= num
>> (-shift_amount
);
322 if (num
> MOD_SIZE
) {
323 auto rem
= num
.div_uint_half_times_pow_2(
324 EXP10_9
, CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0))
331 template <size_t INT_SIZE
>
332 LIBC_INLINE UInt
<MID_INT_SIZE
> get_table_negative_df(int exponent
, size_t i
) {
333 static_assert(INT_SIZE
== 256,
334 "Only 256 is supported as an int size right now.");
335 // This version uses dyadic floats with 256 bit mantissas to perform the same
336 // calculation as above. Due to floating point imprecision it is only accurate
337 // for the first 50 digits, but it's much faster. Since even 128 bit long
338 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
339 // enough for these floats to be converted back and forth safely. This is
340 // ideal for avoiding the size of the long double table.
342 int shift_amount
= CALC_SHIFT_CONST
- exponent
;
344 fputil::DyadicFloat
<INT_SIZE
> num(Sign::POS
, 0, 1);
345 constexpr UInt
<INT_SIZE
> MOD_SIZE
=
346 (UInt
<INT_SIZE
>(EXP10_9
)
347 << (CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0)));
349 constexpr UInt
<INT_SIZE
> TEN_EXP_NINE_MANT(EXP10_9
);
351 static const fputil::DyadicFloat
<INT_SIZE
> TEN_EXP_NINE(Sign::POS
, 0,
355 fputil::DyadicFloat
<INT_SIZE
> tens
=
356 fputil::pow_n(TEN_EXP_NINE
, static_cast<uint32_t>(i
));
359 num
= mul_pow_2(num
, shift_amount
);
361 UInt
<INT_SIZE
> int_num
= num
.as_mantissa_type();
362 if (int_num
> MOD_SIZE
) {
365 .div_uint_half_times_pow_2(
366 EXP10_9
, CALC_SHIFT_CONST
+ (IDX_SIZE
> 1 ? IDX_SIZE
: 0))
371 UInt
<MID_INT_SIZE
> result
= int_num
;
376 LIBC_INLINE
uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa
,
377 const UInt
<MID_INT_SIZE
> &large
,
378 const int32_t shift_amount
) {
379 // make sure the number of bits is always divisible by 64
380 UInt
<internal::div_ceil(MID_INT_SIZE
+ FPBits::STORAGE_LEN
, 64) * 64> val(
382 val
= (val
* mantissa
) >> shift_amount
;
383 return static_cast<uint32_t>(
384 val
.div_uint_half_times_pow_2(static_cast<uint32_t>(EXP10_9
), 0).value());
387 } // namespace internal
389 // Convert floating point values to their string representation.
390 // Because the result may not fit in a reasonably sized array, the caller must
391 // request blocks of digits and convert them from integers to strings themself.
392 // Blocks contain the most digits that can be stored in an BlockInt. This is 9
393 // digits for a 32 bit int and 18 digits for a 64 bit int.
394 // The intended use pattern is to create a FloatToString object of the
395 // appropriate type, then call get_positive_blocks to get an approximate number
396 // of blocks there are before the decimal point. Now the client code can start
397 // calling get_positive_block in a loop from the number of positive blocks to
398 // zero. This will give all digits before the decimal point. Then the user can
399 // start calling get_negative_block in a loop from 0 until the number of digits
400 // they need is reached. As an optimization, the client can use
401 // zero_blocks_after_point to find the number of blocks that are guaranteed to
402 // be zero after the decimal point and before the non-zero digits. Additionally,
403 // is_lowest_block will return if the current block is the lowest non-zero
405 template <typename T
, cpp::enable_if_t
<cpp::is_floating_point_v
<T
>, int> = 0>
406 class FloatToString
{
407 fputil::FPBits
<T
> float_bits
;
409 FPBits::StorageType mantissa
;
411 static constexpr int FRACTION_LEN
= fputil::FPBits
<T
>::FRACTION_LEN
;
412 static constexpr int EXP_BIAS
= fputil::FPBits
<T
>::EXP_BIAS
;
415 LIBC_INLINE
constexpr FloatToString(T init_float
) : float_bits(init_float
) {
416 exponent
= float_bits
.get_explicit_exponent();
417 mantissa
= float_bits
.get_explicit_mantissa();
419 // Adjust for the width of the mantissa.
420 exponent
-= FRACTION_LEN
;
423 LIBC_INLINE
constexpr bool is_nan() { return float_bits
.is_nan(); }
424 LIBC_INLINE
constexpr bool is_inf() { return float_bits
.is_inf(); }
425 LIBC_INLINE
constexpr bool is_inf_or_nan() {
426 return float_bits
.is_inf_or_nan();
429 // get_block returns an integer that represents the digits in the requested
431 LIBC_INLINE
constexpr BlockInt
get_positive_block(int block_index
) {
432 if (exponent
>= -FRACTION_LEN
) {
433 // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to
434 // find the coarse section of the POW10_SPLIT table that will be used to
435 // calculate the 9 digit window, as well as some other related values.
439 : static_cast<uint32_t>(exponent
+ (IDX_SIZE
- 1)) / IDX_SIZE
;
441 // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) -
444 const uint32_t pos_exp
= idx
* IDX_SIZE
;
446 UInt
<MID_INT_SIZE
> val
;
448 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
449 // ----------------------- DYADIC FLOAT CALC MODE ------------------------
450 const int32_t SHIFT_CONST
= CALC_SHIFT_CONST
;
451 val
= internal::get_table_positive_df
<256>(IDX_SIZE
* idx
, block_index
);
452 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
454 // ---------------------------- INT CALC MODE ----------------------------
455 const int32_t SHIFT_CONST
= CALC_SHIFT_CONST
;
456 const uint64_t MAX_POW_2_SIZE
=
457 pos_exp
+ CALC_SHIFT_CONST
- (BLOCK_SIZE
* block_index
);
458 const uint64_t MAX_POW_5_SIZE
=
459 internal::log2_pow5(BLOCK_SIZE
* block_index
);
460 const uint64_t MAX_INT_SIZE
=
461 (MAX_POW_2_SIZE
> MAX_POW_5_SIZE
) ? MAX_POW_2_SIZE
: MAX_POW_5_SIZE
;
463 if (MAX_INT_SIZE
< 1024) {
464 val
= internal::get_table_positive
<1024>(pos_exp
, block_index
);
465 } else if (MAX_INT_SIZE
< 2048) {
466 val
= internal::get_table_positive
<2048>(pos_exp
, block_index
);
467 } else if (MAX_INT_SIZE
< 4096) {
468 val
= internal::get_table_positive
<4096>(pos_exp
, block_index
);
469 } else if (MAX_INT_SIZE
< 8192) {
470 val
= internal::get_table_positive
<8192>(pos_exp
, block_index
);
471 } else if (MAX_INT_SIZE
< 16384) {
472 val
= internal::get_table_positive
<16384>(pos_exp
, block_index
);
474 val
= internal::get_table_positive
<16384 + 128>(pos_exp
, block_index
);
477 // ----------------------------- TABLE MODE ------------------------------
478 const int32_t SHIFT_CONST
= TABLE_SHIFT_CONST
;
480 val
= POW10_SPLIT
[POW10_OFFSET
[idx
] + block_index
];
482 const uint32_t shift_amount
= SHIFT_CONST
+ pos_exp
- exponent
;
484 const BlockInt digits
=
485 internal::mul_shift_mod_1e9(mantissa
, val
, (int32_t)(shift_amount
));
492 LIBC_INLINE
constexpr BlockInt
get_negative_block(int block_index
) {
494 const int32_t idx
= -exponent
/ IDX_SIZE
;
496 UInt
<MID_INT_SIZE
> val
;
498 const uint32_t pos_exp
= static_cast<uint32_t>(idx
* IDX_SIZE
);
500 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
501 // ----------------------- DYADIC FLOAT CALC MODE ------------------------
502 const int32_t SHIFT_CONST
= CALC_SHIFT_CONST
;
503 val
= internal::get_table_negative_df
<256>(pos_exp
, block_index
+ 1);
504 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
505 // ---------------------------- INT CALC MODE ----------------------------
506 const int32_t SHIFT_CONST
= CALC_SHIFT_CONST
;
508 const uint64_t NUM_FIVES
= (block_index
+ 1) * BLOCK_SIZE
;
509 // Round MAX_INT_SIZE up to the nearest 64 (adding 1 because log2_pow5
510 // implicitly rounds down).
511 const uint64_t MAX_INT_SIZE
=
512 ((internal::log2_pow5(NUM_FIVES
) / 64) + 1) * 64;
514 if (MAX_INT_SIZE
< 1024) {
515 val
= internal::get_table_negative
<1024>(pos_exp
, block_index
+ 1);
516 } else if (MAX_INT_SIZE
< 2048) {
517 val
= internal::get_table_negative
<2048>(pos_exp
, block_index
+ 1);
518 } else if (MAX_INT_SIZE
< 4096) {
519 val
= internal::get_table_negative
<4096>(pos_exp
, block_index
+ 1);
520 } else if (MAX_INT_SIZE
< 8192) {
521 val
= internal::get_table_negative
<8192>(pos_exp
, block_index
+ 1);
522 } else if (MAX_INT_SIZE
< 16384) {
523 val
= internal::get_table_negative
<16384>(pos_exp
, block_index
+ 1);
525 val
= internal::get_table_negative
<16384 + 8192>(pos_exp
,
529 // ----------------------------- TABLE MODE ------------------------------
530 // if the requested block is zero
531 const int32_t SHIFT_CONST
= TABLE_SHIFT_CONST
;
532 if (block_index
< MIN_BLOCK_2
[idx
]) {
535 const uint32_t p
= POW10_OFFSET_2
[idx
] + block_index
- MIN_BLOCK_2
[idx
];
536 // If every digit after the requested block is zero.
537 if (p
>= POW10_OFFSET_2
[idx
+ 1]) {
541 val
= POW10_SPLIT_2
[p
];
543 const int32_t shift_amount
=
544 SHIFT_CONST
+ (-exponent
- static_cast<int32_t>(pos_exp
));
546 internal::mul_shift_mod_1e9(mantissa
, val
, shift_amount
);
553 LIBC_INLINE
constexpr BlockInt
get_block(int block_index
) {
554 if (block_index
>= 0) {
555 return get_positive_block(block_index
);
557 return get_negative_block(-1 - block_index
);
561 LIBC_INLINE
constexpr size_t get_positive_blocks() {
562 if (exponent
< -FRACTION_LEN
)
567 : static_cast<uint32_t>(exponent
+ (IDX_SIZE
- 1)) / IDX_SIZE
;
568 return internal::length_for_num(idx
* IDX_SIZE
, FRACTION_LEN
);
571 // This takes the index of a block after the decimal point (a negative block)
572 // and return if it's sure that all of the digits after it are zero.
573 LIBC_INLINE
constexpr bool is_lowest_block(size_t negative_block_index
) {
574 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
575 // The decimal representation of 2**(-i) will have exactly i digits after
576 // the decimal point.
577 int num_requested_digits
=
578 static_cast<int>((negative_block_index
+ 1) * BLOCK_SIZE
);
580 return num_requested_digits
> -exponent
;
582 const int32_t idx
= -exponent
/ IDX_SIZE
;
584 POW10_OFFSET_2
[idx
] + negative_block_index
- MIN_BLOCK_2
[idx
];
585 // If the remaining digits are all 0, then this is the lowest block.
586 return p
>= POW10_OFFSET_2
[idx
+ 1];
590 LIBC_INLINE
constexpr size_t zero_blocks_after_point() {
591 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
592 if (exponent
< -FRACTION_LEN
) {
593 const int pos_exp
= -exponent
- 1;
594 const uint32_t pos_idx
=
595 static_cast<uint32_t>(pos_exp
+ (IDX_SIZE
- 1)) / IDX_SIZE
;
596 const int32_t pos_len
= ((internal::ceil_log10_pow2(pos_idx
* IDX_SIZE
) -
597 internal::ceil_log10_pow2(FRACTION_LEN
+ 1)) /
600 return static_cast<uint32_t>(pos_len
> 0 ? pos_len
: 0);
604 return MIN_BLOCK_2
[-exponent
/ IDX_SIZE
];
609 #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) && \
610 !defined(LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD)
611 // --------------------------- LONG DOUBLE FUNCTIONS ---------------------------
613 // this algorithm will work exactly the same for 80 bit and 128 bit long
614 // doubles. They have the same max exponent, but even if they didn't the
615 // constants should be calculated to be correct for any provided floating point
618 template <> class FloatToString
<long double> {
619 fputil::FPBits
<long double> float_bits
;
620 bool is_negative
= 0;
622 FPBits::StorageType mantissa
= 0;
624 static constexpr int FRACTION_LEN
= fputil::FPBits
<long double>::FRACTION_LEN
;
625 static constexpr int EXP_BIAS
= fputil::FPBits
<long double>::EXP_BIAS
;
626 static constexpr size_t UINT_WORD_SIZE
= 64;
628 static constexpr size_t FLOAT_AS_INT_WIDTH
=
629 internal::div_ceil(fputil::FPBits
<long double>::MAX_BIASED_EXPONENT
-
633 static constexpr size_t EXTRA_INT_WIDTH
=
634 internal::div_ceil(sizeof(long double) * CHAR_BIT
, UINT_WORD_SIZE
) *
637 using wide_int
= UInt
<FLOAT_AS_INT_WIDTH
+ EXTRA_INT_WIDTH
>;
639 // float_as_fixed represents the floating point number as a fixed point number
640 // with the point EXTRA_INT_WIDTH bits from the left of the number. This can
641 // store any number with a negative exponent.
642 wide_int float_as_fixed
= 0;
643 int int_block_index
= 0;
645 static constexpr size_t BLOCK_BUFFER_LEN
=
646 internal::div_ceil(internal::log10_pow2(FLOAT_AS_INT_WIDTH
), BLOCK_SIZE
) +
648 BlockInt block_buffer
[BLOCK_BUFFER_LEN
] = {0};
649 size_t block_buffer_valid
= 0;
651 template <size_t Bits
>
652 LIBC_INLINE
static constexpr BlockInt
grab_digits(UInt
<Bits
> &int_num
) {
653 auto wide_result
= int_num
.div_uint_half_times_pow_2(EXP5_9
, 9);
654 // the optional only comes into effect when dividing by 0, which will
655 // never happen here. Thus, we just assert that it has value.
656 LIBC_ASSERT(wide_result
.has_value());
657 return static_cast<BlockInt
>(wide_result
.value());
660 LIBC_INLINE
static constexpr void zero_leading_digits(wide_int
&int_num
) {
661 // WORD_SIZE is the width of the numbers used to internally represent the
663 for (size_t i
= 0; i
< EXTRA_INT_WIDTH
/ wide_int::WORD_SIZE
; ++i
)
664 int_num
[i
+ (FLOAT_AS_INT_WIDTH
/ wide_int::WORD_SIZE
)] = 0;
667 // init_convert initializes float_as_int, cur_block, and block_buffer based on
668 // the mantissa and exponent of the initial number. Calling it will always
669 // return the class to the starting state.
670 LIBC_INLINE
constexpr void init_convert() {
671 // No calculation necessary for the 0 case.
672 if (mantissa
== 0 && exponent
== 0)
676 // if the exponent is positive, then the number is fully above the decimal
677 // point. In this case we represent the float as an integer, then divide
678 // by 10^BLOCK_SIZE and take the remainder as our next block. This
679 // generates the digits from right to left, but the digits will be written
680 // from left to right, so it caches the results so they can be read in
683 wide_int float_as_int
= mantissa
;
685 float_as_int
<<= exponent
;
688 while (float_as_int
> 0) {
689 LIBC_ASSERT(int_block_index
< static_cast<int>(BLOCK_BUFFER_LEN
));
690 block_buffer
[int_block_index
] =
691 grab_digits
<FLOAT_AS_INT_WIDTH
+ EXTRA_INT_WIDTH
>(float_as_int
);
694 block_buffer_valid
= int_block_index
;
697 // if the exponent is not positive, then the number is at least partially
698 // below the decimal point. In this case we represent the float as a fixed
699 // point number with the decimal point after the top EXTRA_INT_WIDTH bits.
700 float_as_fixed
= mantissa
;
702 const int SHIFT_AMOUNT
= FLOAT_AS_INT_WIDTH
+ exponent
;
703 static_assert(EXTRA_INT_WIDTH
>= sizeof(long double) * 8);
704 float_as_fixed
<<= SHIFT_AMOUNT
;
706 // If there are still digits above the decimal point, handle those.
707 if (cpp::countl_zero(float_as_fixed
) <
708 static_cast<int>(EXTRA_INT_WIDTH
)) {
709 UInt
<EXTRA_INT_WIDTH
> above_decimal_point
=
710 float_as_fixed
>> FLOAT_AS_INT_WIDTH
;
712 size_t positive_int_block_index
= 0;
713 while (above_decimal_point
> 0) {
714 block_buffer
[positive_int_block_index
] =
715 grab_digits
<EXTRA_INT_WIDTH
>(above_decimal_point
);
716 ++positive_int_block_index
;
718 block_buffer_valid
= positive_int_block_index
;
720 // Zero all digits above the decimal point.
721 zero_leading_digits(float_as_fixed
);
728 LIBC_INLINE
constexpr FloatToString(long double init_float
)
729 : float_bits(init_float
) {
730 is_negative
= float_bits
.is_neg();
731 exponent
= float_bits
.get_explicit_exponent();
732 mantissa
= float_bits
.get_explicit_mantissa();
734 // Adjust for the width of the mantissa.
735 exponent
-= FRACTION_LEN
;
737 this->init_convert();
740 LIBC_INLINE
constexpr size_t get_positive_blocks() {
741 if (exponent
< -FRACTION_LEN
)
747 : static_cast<uint32_t>(exponent
+ (IDX_SIZE
- 1)) / IDX_SIZE
;
748 return internal::length_for_num(idx
* IDX_SIZE
, FRACTION_LEN
);
751 LIBC_INLINE
constexpr size_t zero_blocks_after_point() {
752 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
753 return MIN_BLOCK_2
[-exponent
/ IDX_SIZE
];
755 if (exponent
>= -FRACTION_LEN
)
758 const int pos_exp
= -exponent
- 1;
759 const uint32_t pos_idx
=
760 static_cast<uint32_t>(pos_exp
+ (IDX_SIZE
- 1)) / IDX_SIZE
;
761 const int32_t pos_len
= ((internal::ceil_log10_pow2(pos_idx
* IDX_SIZE
) -
762 internal::ceil_log10_pow2(FRACTION_LEN
+ 1)) /
765 return static_cast<uint32_t>(pos_len
> 0 ? pos_len
: 0);
769 LIBC_INLINE
constexpr bool is_lowest_block(size_t negative_block_index
) {
770 // The decimal representation of 2**(-i) will have exactly i digits after
771 // the decimal point.
772 const int num_requested_digits
=
773 static_cast<int>((negative_block_index
+ 1) * BLOCK_SIZE
);
775 return num_requested_digits
> -exponent
;
778 LIBC_INLINE
constexpr BlockInt
get_positive_block(int block_index
) {
779 if (exponent
< -FRACTION_LEN
)
781 if (block_index
> static_cast<int>(block_buffer_valid
) || block_index
< 0)
784 LIBC_ASSERT(block_index
< static_cast<int>(BLOCK_BUFFER_LEN
));
786 return block_buffer
[block_index
];
789 LIBC_INLINE
constexpr BlockInt
get_negative_block(int negative_block_index
) {
793 // negative_block_index starts at 0 with the first block after the decimal
794 // point, and 1 with the second and so on. This converts to the same
795 // block_index used everywhere else.
797 const int block_index
= -1 - negative_block_index
;
799 // If we're currently after the requested block (remember these are
800 // negative indices) we reset the number to the start. This is only
801 // likely to happen in %g calls. This will also reset int_block_index.
802 // if (block_index > int_block_index) {
806 // Printf is the only existing user of this code and it will only ever move
807 // downwards, except for %g but that currently creates a second
808 // float_to_string object so this assertion still holds. If a new user needs
809 // the ability to step backwards, uncomment the code above.
810 LIBC_ASSERT(block_index
<= int_block_index
);
812 // If we are currently before the requested block. Step until we reach the
813 // requested block. This is likely to only be one step.
814 while (block_index
< int_block_index
) {
815 zero_leading_digits(float_as_fixed
);
816 float_as_fixed
.mul(EXP10_9
);
820 // We're now on the requested block, return the current block.
821 return static_cast<BlockInt
>(float_as_fixed
>> FLOAT_AS_INT_WIDTH
);
824 LIBC_INLINE
constexpr BlockInt
get_block(int block_index
) {
825 if (block_index
>= 0)
826 return get_positive_block(block_index
);
828 return get_negative_block(-1 - block_index
);
832 #endif // !LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64 &&
833 // !LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD
835 } // namespace LIBC_NAMESPACE_DECL
837 #endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H