Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libc / src / __support / float_to_string.h
blobeb06cd9c08af287a1ac25fb13a4f50cfd33545b6
1 //===-- Utilities to convert floating point values to string ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
10 #define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H
12 #include <stdint.h>
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/FPUtil/FPBits.h"
16 #include "src/__support/FPUtil/dyadic_float.h"
17 #include "src/__support/UInt.h"
18 #include "src/__support/common.h"
19 #include "src/__support/libc_assert.h"
21 // This file has 5 compile-time flags to allow the user to configure the float
22 // to string behavior. These allow the user to select which 2 of the 3 useful
23 // properties they want. The useful properties are:
24 // 1) Speed of Evaluation
25 // 2) Small Size of Binary
26 // 3) Centered Output Value
27 // These are explained below with the flags that are missing each one.
29 // LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
30 // The Mega Table is ~5 megabytes when compiled. It lists the constants needed
31 // to perform the Ryu Printf algorithm (described below) for all long double
32 // values. This makes it extremely fast for both doubles and long doubles, in
33 // exchange for large binary size.
35 // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT
36 // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD
37 // Dyadic floats are software floating point numbers, and their accuracy can be
38 // as high as necessary. This option uses 256 bit dyadic floats to calculate
39 // the table values that Ryu Printf needs. This is reasonably fast and very
40 // small compared to the Mega Table, but the 256 bit floats only give accurate
41 // results for the first ~50 digits of the output. In practice this shouldn't
42 // be a problem since long doubles are only accurate for ~35 digits, but the
43 // trailing values all being 0s may cause brittle tests to fail. The _LD
44 // version of this flag only effects the long double calculations, and the
45 // other version effects both long double and double.
47 // LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC
48 // Integer Calculation uses wide integers to do the calculations for the Ryu
49 // Printf table, which is just as accurate as the Mega Table without requiring
50 // as much code size. These integers can be very large (~32KB at max, though
51 // always on the stack) to handle the edges of the long double range. They are
52 // also very slow, taking multiple seconds on a powerful CPU to calculate the
53 // values at the end of the range. If no flag is set, this is used for long
54 // doubles, the flag only changes the double behavior.
56 // LIBC_COPT_FLOAT_TO_STR_NO_TABLE
57 // This flag doesn't change the actual calculation method, instead it is used
58 // to disable the normal Ryu Printf table for configurations that don't use any
59 // table at all.
61 // Default Config:
62 // If no flags are set, doubles use the normal (and much more reasonably sized)
63 // Ryu Printf table and long doubles use Integer Calculation. This is because
64 // long doubles are rarely used and the normal Ryu Printf table is very fast
65 // for doubles.
67 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
68 #include "src/__support/ryu_long_double_constants.h"
69 #elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE)
70 #include "src/__support/ryu_constants.h"
71 #else
72 constexpr size_t IDX_SIZE = 1;
73 constexpr size_t MID_INT_SIZE = 192;
74 #endif
76 // This implementation is based on the Ryu Printf algorithm by Ulf Adams:
77 // Ulf Adams. 2019. Ryƫ revisited: printf floating point conversion.
78 // Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages.
79 // https://doi.org/10.1145/3360595
81 // This version is modified to require significantly less memory (it doesn't use
82 // a large buffer to store the result).
84 // The general concept of this algorithm is as follows:
85 // We want to calculate a 9 digit segment of a floating point number using this
86 // formula: floor((mantissa * 2^exponent)/10^i) % 10^9.
87 // To do so normally would involve large integers (~1000 bits for doubles), so
88 // we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a
89 // lookup table. The resulting intermediate value needs to be about 192 bits to
90 // store the result with enough precision. Since this is all being done with
91 // integers for appropriate precision, we would run into a problem if
92 // i > exponent since then 2^exponent / 10^i would be less than 1. To correct
93 // for this, the actual calculation done is 2^(exponent + c) / 10^i, and then
94 // when multiplying by the mantissa we reverse this by dividing by 2^c, like so:
95 // floor((mantissa * table[exponent][i])/(2^c)) % 10^9.
96 // This gives a 9 digit value, which is small enough to fit in a 32 bit integer,
97 // and that integer is converted into a string as normal, and called a block. In
98 // this implementation, the most recent block is buffered, so that if rounding
99 // is necessary the block can be adjusted before being written to the output.
100 // Any block that is all 9s adds one to the max block counter and doesn't clear
101 // the buffer because they can cause the block above them to be rounded up.
103 namespace LIBC_NAMESPACE {
105 using BlockInt = uint32_t;
106 constexpr size_t BLOCK_SIZE = 9;
108 using MantissaInt = fputil::FPBits<long double>::UIntType;
110 // Larger numbers prefer a slightly larger constant than is used for the smaller
111 // numbers.
112 constexpr size_t CALC_SHIFT_CONST = 128;
114 namespace internal {
116 // Returns floor(log_10(2^e)); requires 0 <= e <= 42039.
117 LIBC_INLINE constexpr uint32_t log10_pow2(const uint64_t e) {
118 LIBC_ASSERT(e <= 42039 &&
119 "Incorrect exponent to perform log10_pow2 approximation.");
120 // This approximation is based on the float value for log_10(2). It first
121 // gives an incorrect result for our purposes at 42039 (well beyond the 16383
122 // maximum for long doubles).
124 // To get these constants I first evaluated log_10(2) to get an approximation
125 // of 0.301029996. Next I passed that value through a string to double
126 // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent
127 // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional
128 // number). Next I shifted the mantissa right 12 bits to create more space for
129 // the multiplication result, adding 12 to the exponent to compensate. To
130 // check that this approximation works for our purposes I used the following
131 // python code:
132 // for i in range(16384):
133 // if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)):
134 // print(i)
135 // The reason we add 1 is because this evaluation truncates the result, giving
136 // us the floor, whereas counting the digits of the power of 2 gives us the
137 // ceiling. With a similar loop I checked the maximum valid value and found
138 // 42039.
139 return (e * 0x13441350fbdll) >> 42;
142 // Same as above, but with different constants.
143 LIBC_INLINE constexpr uint32_t log2_pow5(const uint64_t e) {
144 return (e * 0x12934f0979bll) >> 39;
147 // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any
148 // power of 2 was also a power of 10, but since that doesn't exist this is
149 // always accurate. This is used to calculate the maximum number of base-10
150 // digits a given e-bit number could have.
151 LIBC_INLINE constexpr uint32_t ceil_log10_pow2(const uint32_t e) {
152 return log10_pow2(e) + 1;
155 // Returns the maximum number of 9 digit blocks a number described by the given
156 // index (which is ceil(exponent/16)) and mantissa width could need.
157 LIBC_INLINE constexpr uint32_t length_for_num(const uint32_t idx,
158 const uint32_t mantissa_width) {
159 //+8 to round up when dividing by 9
160 return (ceil_log10_pow2(idx) + ceil_log10_pow2(mantissa_width + 1) +
161 (BLOCK_SIZE - 1)) /
162 BLOCK_SIZE;
163 // return (ceil_log10_pow2(16 * idx + mantissa_width) + 8) / 9;
166 // The formula for the table when i is positive (or zero) is as follows:
167 // floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1)
168 // Rewritten slightly we get:
169 // floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1)
171 // TODO: Fix long doubles (needs bigger table or alternate algorithm.)
172 // Currently the table values are generated, which is very slow.
173 template <size_t INT_SIZE>
174 LIBC_INLINE constexpr cpp::UInt<MID_INT_SIZE> get_table_positive(int exponent,
175 size_t i) {
176 // INT_SIZE is the size of int that is used for the internal calculations of
177 // this function. It should be large enough to hold 2^(exponent+constant), so
178 // ~1000 for double and ~16000 for long double. Be warned that the time
179 // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of
180 // bits in the number being exponentiated and m is the exponent.
181 const int shift_amount =
182 static_cast<int>(exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * i));
183 if (shift_amount < 0) {
184 return 1;
186 cpp::UInt<INT_SIZE> num(0);
187 // MOD_SIZE is one of the limiting factors for how big the constant argument
188 // can get, since it needs to be small enough to fit in the result UInt,
189 // otherwise we'll get truncation on return.
190 constexpr cpp::UInt<INT_SIZE> MOD_SIZE =
191 (cpp::UInt<INT_SIZE>(1000000000)
192 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
194 constexpr uint64_t FIVE_EXP_NINE = 1953125;
196 num = cpp::UInt<INT_SIZE>(1) << (shift_amount);
197 if (i > 0) {
198 cpp::UInt<INT_SIZE> fives(FIVE_EXP_NINE);
199 fives.pow_n(i);
200 num = num / fives;
203 num = num + 1;
204 if (num > MOD_SIZE) {
205 auto rem =
206 num.div_uint32_times_pow_2(
207 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
208 .value();
209 num = rem;
211 return num;
214 template <size_t INT_SIZE>
215 LIBC_INLINE cpp::UInt<MID_INT_SIZE> get_table_positive_df(int exponent,
216 size_t i) {
217 static_assert(INT_SIZE == 256,
218 "Only 256 is supported as an int size right now.");
219 // This version uses dyadic floats with 256 bit mantissas to perform the same
220 // calculation as above. Due to floating point imprecision it is only accurate
221 // for the first 50 digits, but it's much faster. Since even 128 bit long
222 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
223 // enough for these floats to be converted back and forth safely. This is
224 // ideal for avoiding the size of the long double table.
225 const int shift_amount =
226 static_cast<int>(exponent + CALC_SHIFT_CONST - (9 * i));
227 if (shift_amount < 0) {
228 return 1;
230 fputil::DyadicFloat<INT_SIZE> num(false, 0, 1);
231 constexpr cpp::UInt<INT_SIZE> MOD_SIZE =
232 (cpp::UInt<INT_SIZE>(1000000000)
233 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
235 constexpr cpp::UInt<INT_SIZE> FIVE_EXP_MINUS_NINE_MANT{
236 {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030,
237 0x89705f4136b4a597}};
239 static const fputil::DyadicFloat<INT_SIZE> FIVE_EXP_MINUS_NINE(
240 false, -276, FIVE_EXP_MINUS_NINE_MANT);
242 if (i > 0) {
243 fputil::DyadicFloat<INT_SIZE> fives = fputil::pow_n(FIVE_EXP_MINUS_NINE, i);
244 num = fives;
246 num = mul_pow_2(num, shift_amount);
248 // Adding one is part of the formula.
249 cpp::UInt<INT_SIZE> int_num = static_cast<cpp::UInt<INT_SIZE>>(num) + 1;
250 if (int_num > MOD_SIZE) {
251 auto rem =
252 int_num
253 .div_uint32_times_pow_2(
254 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
255 .value();
256 int_num = rem;
259 cpp::UInt<MID_INT_SIZE> result = int_num;
261 return result;
264 // The formula for the table when i is negative (or zero) is as follows:
265 // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0)
266 // Since we know i is always negative, we just take it as unsigned and treat it
267 // as negative. We do the same with exponent, while they're both always negative
268 // in theory, in practice they're converted to positive for simpler
269 // calculations.
270 // The formula being used looks more like this:
271 // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0)
272 template <size_t INT_SIZE>
273 LIBC_INLINE cpp::UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i) {
274 int shift_amount = CALC_SHIFT_CONST - exponent;
275 cpp::UInt<INT_SIZE> num(1);
276 constexpr cpp::UInt<INT_SIZE> MOD_SIZE =
277 (cpp::UInt<INT_SIZE>(1000000000)
278 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
280 constexpr uint64_t TEN_EXP_NINE = 1000000000;
281 constexpr uint64_t FIVE_EXP_NINE = 1953125;
282 size_t ten_blocks = i;
283 size_t five_blocks = 0;
284 if (shift_amount < 0) {
285 int block_shifts = (-shift_amount) / BLOCK_SIZE;
286 if (block_shifts < static_cast<int>(ten_blocks)) {
287 ten_blocks = ten_blocks - block_shifts;
288 five_blocks = block_shifts;
289 shift_amount = shift_amount + (block_shifts * BLOCK_SIZE);
290 } else {
291 ten_blocks = 0;
292 five_blocks = i;
293 shift_amount = static_cast<int>(shift_amount + (i * BLOCK_SIZE));
297 if (five_blocks > 0) {
298 cpp::UInt<INT_SIZE> fives(FIVE_EXP_NINE);
299 fives.pow_n(five_blocks);
300 num = fives;
302 if (ten_blocks > 0) {
303 cpp::UInt<INT_SIZE> tens(TEN_EXP_NINE);
304 tens.pow_n(ten_blocks);
305 if (five_blocks <= 0) {
306 num = tens;
307 } else {
308 num *= tens;
312 if (shift_amount > 0) {
313 num = num << shift_amount;
314 } else {
315 num = num >> (-shift_amount);
317 if (num > MOD_SIZE) {
318 auto rem =
319 num.div_uint32_times_pow_2(
320 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
321 .value();
322 num = rem;
324 return num;
327 template <size_t INT_SIZE>
328 LIBC_INLINE cpp::UInt<MID_INT_SIZE> get_table_negative_df(int exponent,
329 size_t i) {
330 static_assert(INT_SIZE == 256,
331 "Only 256 is supported as an int size right now.");
332 // This version uses dyadic floats with 256 bit mantissas to perform the same
333 // calculation as above. Due to floating point imprecision it is only accurate
334 // for the first 50 digits, but it's much faster. Since even 128 bit long
335 // doubles are only accurate to ~35 digits, the 50 digits of accuracy are
336 // enough for these floats to be converted back and forth safely. This is
337 // ideal for avoiding the size of the long double table.
339 int shift_amount = CALC_SHIFT_CONST - exponent;
341 fputil::DyadicFloat<INT_SIZE> num(false, 0, 1);
342 constexpr cpp::UInt<INT_SIZE> MOD_SIZE =
343 (cpp::UInt<INT_SIZE>(1000000000)
344 << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)));
346 constexpr cpp::UInt<INT_SIZE> TEN_EXP_NINE_MANT(1000000000);
348 static const fputil::DyadicFloat<INT_SIZE> TEN_EXP_NINE(false, 0,
349 TEN_EXP_NINE_MANT);
351 if (i > 0) {
352 fputil::DyadicFloat<INT_SIZE> tens = fputil::pow_n(TEN_EXP_NINE, i);
353 num = tens;
355 num = mul_pow_2(num, shift_amount);
357 cpp::UInt<INT_SIZE> int_num = static_cast<cpp::UInt<INT_SIZE>>(num);
358 if (int_num > MOD_SIZE) {
359 auto rem =
360 int_num
361 .div_uint32_times_pow_2(
362 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))
363 .value();
364 int_num = rem;
367 cpp::UInt<MID_INT_SIZE> result = int_num;
369 return result;
372 LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
373 // The formula for mult_const is:
374 // 1 + floor((2^(bits in target integer size + log_2(divider))) / divider)
375 // Where divider is 10^9 and target integer size is 128.
376 const cpp::UInt<MID_INT_SIZE> mult_const(
377 {0x31680A88F8953031u, 0x89705F4136B4A597u, 0});
378 const auto middle = (mult_const * val);
379 const uint64_t result = static_cast<uint64_t>(middle[2]);
380 const uint64_t shifted = result >> 29;
381 return static_cast<uint32_t>(static_cast<uint32_t>(val) -
382 (1000000000 * shifted));
385 LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
386 const cpp::UInt<MID_INT_SIZE> &large,
387 const int32_t shift_amount) {
388 constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
389 cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
390 val = (val * mantissa) >> shift_amount;
391 return static_cast<uint32_t>(
392 val.div_uint32_times_pow_2(1000000000, 0).value());
395 } // namespace internal
397 // Convert floating point values to their string representation.
398 // Because the result may not fit in a reasonably sized array, the caller must
399 // request blocks of digits and convert them from integers to strings themself.
400 // Blocks contain the most digits that can be stored in an BlockInt. This is 9
401 // digits for a 32 bit int and 18 digits for a 64 bit int.
402 // The intended use pattern is to create a FloatToString object of the
403 // appropriate type, then call get_positive_blocks to get an approximate number
404 // of blocks there are before the decimal point. Now the client code can start
405 // calling get_positive_block in a loop from the number of positive blocks to
406 // zero. This will give all digits before the decimal point. Then the user can
407 // start calling get_negative_block in a loop from 0 until the number of digits
408 // they need is reached. As an optimization, the client can use
409 // zero_blocks_after_point to find the number of blocks that are guaranteed to
410 // be zero after the decimal point and before the non-zero digits. Additionally,
411 // is_lowest_block will return if the current block is the lowest non-zero
412 // block.
413 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
414 class FloatToString {
415 fputil::FPBits<T> float_bits;
416 bool is_negative;
417 int exponent;
418 MantissaInt mantissa;
420 static constexpr int MANT_WIDTH = fputil::MantissaWidth<T>::VALUE;
421 static constexpr int EXP_BIAS = fputil::FPBits<T>::EXPONENT_BIAS;
423 public:
424 LIBC_INLINE constexpr FloatToString(T init_float) : float_bits(init_float) {
425 is_negative = float_bits.get_sign();
426 exponent = float_bits.get_explicit_exponent();
427 mantissa = float_bits.get_explicit_mantissa();
429 // Adjust for the width of the mantissa.
430 exponent -= MANT_WIDTH;
432 // init_convert();
435 LIBC_INLINE constexpr bool is_nan() { return float_bits.is_nan(); }
436 LIBC_INLINE constexpr bool is_inf() { return float_bits.is_inf(); }
437 LIBC_INLINE constexpr bool is_inf_or_nan() {
438 return float_bits.is_inf_or_nan();
441 // get_block returns an integer that represents the digits in the requested
442 // block.
443 LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) {
444 if (exponent >= -MANT_WIDTH) {
445 // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to
446 // find the coarse section of the POW10_SPLIT table that will be used to
447 // calculate the 9 digit window, as well as some other related values.
448 const uint32_t idx =
449 exponent < 0
451 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
453 // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) -
454 // exponent
456 cpp::UInt<MID_INT_SIZE> val;
458 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
459 // ----------------------- DYADIC FLOAT CALC MODE ------------------------
460 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
461 val = internal::get_table_positive_df<256>(IDX_SIZE * idx, block_index);
462 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
464 // ---------------------------- INT CALC MODE ----------------------------
465 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
467 const uint64_t MAX_POW_2_SIZE =
468 exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index);
469 const uint64_t MAX_POW_5_SIZE =
470 internal::log2_pow5(BLOCK_SIZE * block_index);
471 const uint64_t MAX_INT_SIZE =
472 (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE;
474 if (MAX_INT_SIZE < 1024) {
475 val = internal::get_table_positive<1024>(IDX_SIZE * idx, block_index);
476 } else if (MAX_INT_SIZE < 2048) {
477 val = internal::get_table_positive<2048>(IDX_SIZE * idx, block_index);
478 } else if (MAX_INT_SIZE < 4096) {
479 val = internal::get_table_positive<4096>(IDX_SIZE * idx, block_index);
480 } else if (MAX_INT_SIZE < 8192) {
481 val = internal::get_table_positive<8192>(IDX_SIZE * idx, block_index);
482 } else {
483 val = internal::get_table_positive<16384>(IDX_SIZE * idx, block_index);
485 #else
486 // ----------------------------- TABLE MODE ------------------------------
487 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
489 val = POW10_SPLIT[POW10_OFFSET[idx] + block_index];
490 #endif
491 const uint32_t shift_amount = SHIFT_CONST + (IDX_SIZE * idx) - exponent;
492 const uint32_t digits =
493 internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount));
494 return digits;
495 } else {
496 return 0;
500 LIBC_INLINE constexpr BlockInt get_negative_block(int block_index) {
501 if (exponent < 0) {
502 const int32_t idx = -exponent / IDX_SIZE;
504 cpp::UInt<MID_INT_SIZE> val;
506 #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT)
507 // ----------------------- DYADIC FLOAT CALC MODE ------------------------
508 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
509 val =
510 internal::get_table_negative_df<256>(idx * IDX_SIZE, block_index + 1);
511 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC)
512 // ---------------------------- INT CALC MODE ----------------------------
513 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
514 const uint64_t TEN_BLOCKS = (block_index + 1) * BLOCK_SIZE;
515 const uint64_t MAX_INT_SIZE = internal::log2_pow5(TEN_BLOCKS);
517 if (MAX_INT_SIZE < 1024) {
518 val =
519 internal::get_table_negative<1024>(idx * IDX_SIZE, block_index + 1);
520 } else if (MAX_INT_SIZE < 2048) {
521 val =
522 internal::get_table_negative<2048>(idx * IDX_SIZE, block_index + 1);
523 } else if (MAX_INT_SIZE < 4096) {
524 val =
525 internal::get_table_negative<4096>(idx * IDX_SIZE, block_index + 1);
526 } else if (MAX_INT_SIZE < 8192) {
527 val =
528 internal::get_table_negative<8192>(idx * IDX_SIZE, block_index + 1);
529 } else if (MAX_INT_SIZE < 16384) {
530 val = internal::get_table_negative<16384>(idx * IDX_SIZE,
531 block_index + 1);
532 } else {
533 val = internal::get_table_negative<32768>(idx * IDX_SIZE,
534 block_index + 1);
536 #else
537 // ----------------------------- TABLE MODE ------------------------------
538 // if the requested block is zero
539 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
540 if (block_index < MIN_BLOCK_2[idx]) {
541 return 0;
543 const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx];
544 // If every digit after the requested block is zero.
545 if (p >= POW10_OFFSET_2[idx + 1]) {
546 return 0;
549 val = POW10_SPLIT_2[p];
550 #endif
551 const int32_t shift_amount = SHIFT_CONST + (-exponent - IDX_SIZE * idx);
552 uint32_t digits =
553 internal::mul_shift_mod_1e9(mantissa, val, shift_amount);
554 return digits;
555 } else {
556 return 0;
560 LIBC_INLINE constexpr BlockInt get_block(int block_index) {
561 if (block_index >= 0) {
562 return get_positive_block(block_index);
563 } else {
564 return get_negative_block(-1 - block_index);
568 LIBC_INLINE constexpr size_t get_positive_blocks() {
569 if (exponent >= -MANT_WIDTH) {
570 const uint32_t idx =
571 exponent < 0
573 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
574 const uint32_t len = internal::length_for_num(idx * IDX_SIZE, MANT_WIDTH);
575 return len;
576 } else {
577 return 0;
581 // This takes the index of a block after the decimal point (a negative block)
582 // and return if it's sure that all of the digits after it are zero.
583 LIBC_INLINE constexpr bool is_lowest_block(size_t block_index) {
584 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
585 return false;
586 #else
587 const int32_t idx = -exponent / IDX_SIZE;
588 const size_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx];
589 // If the remaining digits are all 0, then this is the lowest block.
590 return p >= POW10_OFFSET_2[idx + 1];
591 #endif
594 LIBC_INLINE constexpr size_t zero_blocks_after_point() {
595 #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE
596 return 0;
597 // TODO (michaelrj): Find a good algorithm for this that doesn't use a
598 // table.
599 #else
600 return MIN_BLOCK_2[-exponent / IDX_SIZE];
601 #endif
605 #ifndef LONG_DOUBLE_IS_DOUBLE
606 // --------------------------- LONG DOUBLE FUNCTIONS ---------------------------
608 template <>
609 LIBC_INLINE constexpr size_t FloatToString<long double>::get_positive_blocks() {
610 if (exponent >= -MANT_WIDTH) {
611 const uint32_t idx =
612 exponent < 0
614 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
615 const uint32_t len = internal::length_for_num(idx * IDX_SIZE, MANT_WIDTH);
616 return len;
617 } else {
618 return 0;
622 template <>
623 LIBC_INLINE constexpr size_t
624 FloatToString<long double>::zero_blocks_after_point() {
625 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
626 return MIN_BLOCK_2[-exponent / IDX_SIZE];
627 #else
628 return 0;
629 // TODO (michaelrj): Find a good algorithm for this that doesn't use a table.
630 #endif
633 template <>
634 LIBC_INLINE constexpr bool FloatToString<long double>::is_lowest_block(size_t) {
635 return false;
638 template <>
639 LIBC_INLINE constexpr BlockInt
640 FloatToString<long double>::get_positive_block(int block_index) {
641 if (exponent >= -MANT_WIDTH) {
643 // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to
644 // find the coarse section of the POW10_SPLIT table that will be used to
645 // calculate the 9 digit window, as well as some other related values.
646 const uint32_t idx =
647 exponent < 0
649 : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE;
650 const uint32_t pos_exp = idx * IDX_SIZE;
652 // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - exponent
654 cpp::UInt<MID_INT_SIZE> val;
655 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
656 // ------------------------------ TABLE MODE -------------------------------
657 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
658 val = POW10_SPLIT[POW10_OFFSET[idx] + block_index];
660 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) || \
661 defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD)
662 // ------------------------ DYADIC FLOAT CALC MODE -------------------------
663 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
664 val = internal::get_table_positive_df<256>(pos_exp, block_index);
665 #else
666 // ----------------------------- INT CALC MODE -----------------------------
667 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
668 const uint64_t MAX_POW_2_SIZE =
669 pos_exp + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index);
670 const uint64_t MAX_POW_5_SIZE =
671 internal::log2_pow5(BLOCK_SIZE * block_index);
672 const uint64_t MAX_INT_SIZE =
673 (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE;
675 if (MAX_INT_SIZE < 1024) {
676 val = internal::get_table_positive<1024>(pos_exp, block_index);
677 } else if (MAX_INT_SIZE < 2048) {
678 val = internal::get_table_positive<2048>(pos_exp, block_index);
679 } else if (MAX_INT_SIZE < 4096) {
680 val = internal::get_table_positive<4096>(pos_exp, block_index);
681 } else if (MAX_INT_SIZE < 8192) {
682 val = internal::get_table_positive<8192>(pos_exp, block_index);
683 } else if (MAX_INT_SIZE < 16384) {
684 val = internal::get_table_positive<16384>(pos_exp, block_index);
685 } else {
686 val = internal::get_table_positive<16384 + 128>(pos_exp, block_index);
688 #endif
689 const uint32_t shift_amount = SHIFT_CONST + pos_exp - exponent;
691 const BlockInt digits =
692 internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount));
693 return digits;
694 } else {
695 return 0;
699 template <>
700 LIBC_INLINE constexpr BlockInt
701 FloatToString<long double>::get_negative_block(int block_index) {
702 if (exponent < 0) {
703 const int32_t idx = -exponent / IDX_SIZE;
705 cpp::UInt<MID_INT_SIZE> val;
706 #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE
707 // ------------------------------ TABLE MODE -------------------------------
708 const int32_t SHIFT_CONST = TABLE_SHIFT_CONST;
710 // if the requested block is zero
711 if (block_index < MIN_BLOCK_2[idx]) {
712 return 0;
714 const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx];
715 // If every digit after the requested block is zero.
716 if (p >= POW10_OFFSET_2[idx + 1]) {
717 return 0;
719 val = POW10_SPLIT_2[p];
720 #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) || \
721 defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD)
722 // ------------------------ DYADIC FLOAT CALC MODE -------------------------
723 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
725 val = internal::get_table_negative_df<256>(idx * IDX_SIZE, block_index + 1);
726 #else // table mode
727 // ----------------------------- INT CALC MODE -----------------------------
728 const int32_t SHIFT_CONST = CALC_SHIFT_CONST;
730 const uint64_t TEN_BLOCKS = (block_index + 1) * BLOCK_SIZE;
731 const uint64_t MAX_INT_SIZE = internal::log2_pow5(TEN_BLOCKS);
733 if (MAX_INT_SIZE < 1024) {
734 val = internal::get_table_negative<1024>(idx * IDX_SIZE, block_index + 1);
735 } else if (MAX_INT_SIZE < 2048) {
736 val = internal::get_table_negative<2048>(idx * IDX_SIZE, block_index + 1);
737 } else if (MAX_INT_SIZE < 4096) {
738 val = internal::get_table_negative<4096>(idx * IDX_SIZE, block_index + 1);
739 } else if (MAX_INT_SIZE < 8192) {
740 val = internal::get_table_negative<8192>(idx * IDX_SIZE, block_index + 1);
741 } else if (MAX_INT_SIZE < 16384) {
742 val =
743 internal::get_table_negative<16384>(idx * IDX_SIZE, block_index + 1);
744 } else {
745 val = internal::get_table_negative<16384 + 8192>(idx * IDX_SIZE,
746 block_index + 1);
748 #endif
749 const int32_t shift_amount = SHIFT_CONST + (-exponent - IDX_SIZE * idx);
750 BlockInt digits = internal::mul_shift_mod_1e9(mantissa, val, shift_amount);
751 return digits;
752 } else {
753 return 0;
757 #endif // LONG_DOUBLE_IS_DOUBLE
759 } // namespace LIBC_NAMESPACE
761 #endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H