1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Copyright (c) Microsoft Corporation.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 // Copyright 2018 Ulf Adams
13 // Copyright (c) Microsoft Corporation. All rights reserved.
15 // Boost Software License - Version 1.0 - August 17th, 2003
17 // Permission is hereby granted, free of charge, to any person or organization
18 // obtaining a copy of the software and accompanying documentation covered by
19 // this license (the "Software") to use, reproduce, display, distribute,
20 // execute, and transmit the Software, and to prepare derivative works of the
21 // Software, and to permit third-parties to whom the Software is furnished to
22 // do so, all subject to the following:
24 // The copyright notices in the Software and this entire statement, including
25 // the above license grant, this restriction and the following disclaimer,
26 // must be included in all copies of the Software, in whole or in part, and
27 // all derivative works of the Software, unless such copies or derivative
28 // works are solely in the form of machine-executable object code generated by
29 // a source language processor.
31 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34 // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35 // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37 // DEALINGS IN THE SOFTWARE.
39 // Avoid formatting to keep the changes with the original code minimal.
46 #include "include/ryu/common.h"
47 #include "include/ryu/d2fixed.h"
48 #include "include/ryu/d2s_intrinsics.h"
49 #include "include/ryu/digit_table.h"
50 #include "include/ryu/f2s.h"
51 #include "include/ryu/ryu.h"
53 _LIBCPP_BEGIN_NAMESPACE_STD
55 inline constexpr int __FLOAT_MANTISSA_BITS
= 23;
56 inline constexpr int __FLOAT_EXPONENT_BITS
= 8;
57 inline constexpr int __FLOAT_BIAS
= 127;
59 inline constexpr int __FLOAT_POW5_INV_BITCOUNT
= 59;
60 inline constexpr uint64_t __FLOAT_POW5_INV_SPLIT
[31] = {
61 576460752303423489u, 461168601842738791u, 368934881474191033u, 295147905179352826u,
62 472236648286964522u, 377789318629571618u, 302231454903657294u, 483570327845851670u,
63 386856262276681336u, 309485009821345069u, 495176015714152110u, 396140812571321688u,
64 316912650057057351u, 507060240091291761u, 405648192073033409u, 324518553658426727u,
65 519229685853482763u, 415383748682786211u, 332306998946228969u, 531691198313966350u,
66 425352958651173080u, 340282366920938464u, 544451787073501542u, 435561429658801234u,
67 348449143727040987u, 557518629963265579u, 446014903970612463u, 356811923176489971u,
68 570899077082383953u, 456719261665907162u, 365375409332725730u
70 inline constexpr int __FLOAT_POW5_BITCOUNT
= 61;
71 inline constexpr uint64_t __FLOAT_POW5_SPLIT
[47] = {
72 1152921504606846976u, 1441151880758558720u, 1801439850948198400u, 2251799813685248000u,
73 1407374883553280000u, 1759218604441600000u, 2199023255552000000u, 1374389534720000000u,
74 1717986918400000000u, 2147483648000000000u, 1342177280000000000u, 1677721600000000000u,
75 2097152000000000000u, 1310720000000000000u, 1638400000000000000u, 2048000000000000000u,
76 1280000000000000000u, 1600000000000000000u, 2000000000000000000u, 1250000000000000000u,
77 1562500000000000000u, 1953125000000000000u, 1220703125000000000u, 1525878906250000000u,
78 1907348632812500000u, 1192092895507812500u, 1490116119384765625u, 1862645149230957031u,
79 1164153218269348144u, 1455191522836685180u, 1818989403545856475u, 2273736754432320594u,
80 1421085471520200371u, 1776356839400250464u, 2220446049250313080u, 1387778780781445675u,
81 1734723475976807094u, 2168404344971008868u, 1355252715606880542u, 1694065894508600678u,
82 2117582368135750847u, 1323488980084844279u, 1654361225106055349u, 2067951531382569187u,
83 1292469707114105741u, 1615587133892632177u, 2019483917365790221u
86 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __pow5Factor(uint32_t __value
) {
89 _LIBCPP_ASSERT_INTERNAL(__value
!= 0, "");
90 const uint32_t __q
= __value
/ 5;
91 const uint32_t __r
= __value
% 5;
101 // Returns true if __value is divisible by 5^__p.
102 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline bool __multipleOfPowerOf5(const uint32_t __value
, const uint32_t __p
) {
103 return __pow5Factor(__value
) >= __p
;
106 // Returns true if __value is divisible by 2^__p.
107 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline bool __multipleOfPowerOf2(const uint32_t __value
, const uint32_t __p
) {
108 _LIBCPP_ASSERT_INTERNAL(__value
!= 0, "");
109 _LIBCPP_ASSERT_INTERNAL(__p
< 32, "");
110 // __builtin_ctz doesn't appear to be faster here.
111 return (__value
& ((1u << __p
) - 1)) == 0;
114 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __mulShift(const uint32_t __m
, const uint64_t __factor
, const int32_t __shift
) {
115 _LIBCPP_ASSERT_INTERNAL(__shift
> 32, "");
117 // The casts here help MSVC to avoid calls to the __allmul library
119 const uint32_t __factorLo
= static_cast<uint32_t>(__factor
);
120 const uint32_t __factorHi
= static_cast<uint32_t>(__factor
>> 32);
121 const uint64_t __bits0
= static_cast<uint64_t>(__m
) * __factorLo
;
122 const uint64_t __bits1
= static_cast<uint64_t>(__m
) * __factorHi
;
124 #ifndef _LIBCPP_64_BIT
125 // On 32-bit platforms we can avoid a 64-bit shift-right since we only
126 // need the upper 32 bits of the result and the shift value is > 32.
127 const uint32_t __bits0Hi
= static_cast<uint32_t>(__bits0
>> 32);
128 uint32_t __bits1Lo
= static_cast<uint32_t>(__bits1
);
129 uint32_t __bits1Hi
= static_cast<uint32_t>(__bits1
>> 32);
130 __bits1Lo
+= __bits0Hi
;
131 __bits1Hi
+= (__bits1Lo
< __bits0Hi
);
132 const int32_t __s
= __shift
- 32;
133 return (__bits1Hi
<< (32 - __s
)) | (__bits1Lo
>> __s
);
134 #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
135 const uint64_t __sum
= (__bits0
>> 32) + __bits1
;
136 const uint64_t __shiftedSum
= __sum
>> (__shift
- 32);
137 _LIBCPP_ASSERT_INTERNAL(__shiftedSum
<= UINT32_MAX
, "");
138 return static_cast<uint32_t>(__shiftedSum
);
139 #endif // ^^^ 64-bit ^^^
142 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __mulPow5InvDivPow2(const uint32_t __m
, const uint32_t __q
, const int32_t __j
) {
143 return __mulShift(__m
, __FLOAT_POW5_INV_SPLIT
[__q
], __j
);
146 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline uint32_t __mulPow5divPow2(const uint32_t __m
, const uint32_t __i
, const int32_t __j
) {
147 return __mulShift(__m
, __FLOAT_POW5_SPLIT
[__i
], __j
);
150 // A floating decimal representing m * 10^e.
151 struct __floating_decimal_32
{
156 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline __floating_decimal_32
__f2d(const uint32_t __ieeeMantissa
, const uint32_t __ieeeExponent
) {
159 if (__ieeeExponent
== 0) {
160 // We subtract 2 so that the bounds computation has 2 additional bits.
161 __e2
= 1 - __FLOAT_BIAS
- __FLOAT_MANTISSA_BITS
- 2;
162 __m2
= __ieeeMantissa
;
164 __e2
= static_cast<int32_t>(__ieeeExponent
) - __FLOAT_BIAS
- __FLOAT_MANTISSA_BITS
- 2;
165 __m2
= (1u << __FLOAT_MANTISSA_BITS
) | __ieeeMantissa
;
167 const bool __even
= (__m2
& 1) == 0;
168 const bool __acceptBounds
= __even
;
170 // Step 2: Determine the interval of valid decimal representations.
171 const uint32_t __mv
= 4 * __m2
;
172 const uint32_t __mp
= 4 * __m2
+ 2;
173 // Implicit bool -> int conversion. True is 1, false is 0.
174 const uint32_t __mmShift
= __ieeeMantissa
!= 0 || __ieeeExponent
<= 1;
175 const uint32_t __mm
= 4 * __m2
- 1 - __mmShift
;
177 // Step 3: Convert to a decimal power base using 64-bit arithmetic.
178 uint32_t __vr
, __vp
, __vm
;
180 bool __vmIsTrailingZeros
= false;
181 bool __vrIsTrailingZeros
= false;
182 uint8_t __lastRemovedDigit
= 0;
184 const uint32_t __q
= __log10Pow2(__e2
);
185 __e10
= static_cast<int32_t>(__q
);
186 const int32_t __k
= __FLOAT_POW5_INV_BITCOUNT
+ __pow5bits(static_cast<int32_t>(__q
)) - 1;
187 const int32_t __i
= -__e2
+ static_cast<int32_t>(__q
) + __k
;
188 __vr
= __mulPow5InvDivPow2(__mv
, __q
, __i
);
189 __vp
= __mulPow5InvDivPow2(__mp
, __q
, __i
);
190 __vm
= __mulPow5InvDivPow2(__mm
, __q
, __i
);
191 if (__q
!= 0 && (__vp
- 1) / 10 <= __vm
/ 10) {
192 // We need to know one removed digit even if we are not going to loop below. We could use
193 // __q = X - 1 above, except that would require 33 bits for the result, and we've found that
194 // 32-bit arithmetic is faster even on 64-bit machines.
195 const int32_t __l
= __FLOAT_POW5_INV_BITCOUNT
+ __pow5bits(static_cast<int32_t>(__q
- 1)) - 1;
196 __lastRemovedDigit
= static_cast<uint8_t>(__mulPow5InvDivPow2(__mv
, __q
- 1,
197 -__e2
+ static_cast<int32_t>(__q
) - 1 + __l
) % 10);
200 // The largest power of 5 that fits in 24 bits is 5^10, but __q <= 9 seems to be safe as well.
201 // Only one of __mp, __mv, and __mm can be a multiple of 5, if any.
203 __vrIsTrailingZeros
= __multipleOfPowerOf5(__mv
, __q
);
204 } else if (__acceptBounds
) {
205 __vmIsTrailingZeros
= __multipleOfPowerOf5(__mm
, __q
);
207 __vp
-= __multipleOfPowerOf5(__mp
, __q
);
211 const uint32_t __q
= __log10Pow5(-__e2
);
212 __e10
= static_cast<int32_t>(__q
) + __e2
;
213 const int32_t __i
= -__e2
- static_cast<int32_t>(__q
);
214 const int32_t __k
= __pow5bits(__i
) - __FLOAT_POW5_BITCOUNT
;
215 int32_t __j
= static_cast<int32_t>(__q
) - __k
;
216 __vr
= __mulPow5divPow2(__mv
, static_cast<uint32_t>(__i
), __j
);
217 __vp
= __mulPow5divPow2(__mp
, static_cast<uint32_t>(__i
), __j
);
218 __vm
= __mulPow5divPow2(__mm
, static_cast<uint32_t>(__i
), __j
);
219 if (__q
!= 0 && (__vp
- 1) / 10 <= __vm
/ 10) {
220 __j
= static_cast<int32_t>(__q
) - 1 - (__pow5bits(__i
+ 1) - __FLOAT_POW5_BITCOUNT
);
221 __lastRemovedDigit
= static_cast<uint8_t>(__mulPow5divPow2(__mv
, static_cast<uint32_t>(__i
+ 1), __j
) % 10);
224 // {__vr,__vp,__vm} is trailing zeros if {__mv,__mp,__mm} has at least __q trailing 0 bits.
225 // __mv = 4 * __m2, so it always has at least two trailing 0 bits.
226 __vrIsTrailingZeros
= true;
227 if (__acceptBounds
) {
228 // __mm = __mv - 1 - __mmShift, so it has 1 trailing 0 bit iff __mmShift == 1.
229 __vmIsTrailingZeros
= __mmShift
== 1;
231 // __mp = __mv + 2, so it always has at least one trailing 0 bit.
234 } else if (__q
< 31) { // TRANSITION(ulfjack): Use a tighter bound here.
235 __vrIsTrailingZeros
= __multipleOfPowerOf2(__mv
, __q
- 1);
239 // Step 4: Find the shortest decimal representation in the interval of valid representations.
240 int32_t __removed
= 0;
242 if (__vmIsTrailingZeros
|| __vrIsTrailingZeros
) {
243 // General case, which happens rarely (~4.0%).
244 while (__vp
/ 10 > __vm
/ 10) {
245 #ifdef __clang__ // TRANSITION, LLVM-23106
246 __vmIsTrailingZeros
&= __vm
- (__vm
/ 10) * 10 == 0;
248 __vmIsTrailingZeros
&= __vm
% 10 == 0;
250 __vrIsTrailingZeros
&= __lastRemovedDigit
== 0;
251 __lastRemovedDigit
= static_cast<uint8_t>(__vr
% 10);
257 if (__vmIsTrailingZeros
) {
258 while (__vm
% 10 == 0) {
259 __vrIsTrailingZeros
&= __lastRemovedDigit
== 0;
260 __lastRemovedDigit
= static_cast<uint8_t>(__vr
% 10);
267 if (__vrIsTrailingZeros
&& __lastRemovedDigit
== 5 && __vr
% 2 == 0) {
268 // Round even if the exact number is .....50..0.
269 __lastRemovedDigit
= 4;
271 // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
272 _Output
= __vr
+ ((__vr
== __vm
&& (!__acceptBounds
|| !__vmIsTrailingZeros
)) || __lastRemovedDigit
>= 5);
274 // Specialized for the common case (~96.0%). Percentages below are relative to this.
275 // Loop iterations below (approximately):
276 // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
277 while (__vp
/ 10 > __vm
/ 10) {
278 __lastRemovedDigit
= static_cast<uint8_t>(__vr
% 10);
284 // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
285 _Output
= __vr
+ (__vr
== __vm
|| __lastRemovedDigit
>= 5);
287 const int32_t __exp
= __e10
+ __removed
;
289 __floating_decimal_32 __fd
;
290 __fd
.__exponent
= __exp
;
291 __fd
.__mantissa
= _Output
;
295 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline to_chars_result
_Large_integer_to_chars(char* const _First
, char* const _Last
,
296 const uint32_t _Mantissa2
, const int32_t _Exponent2
) {
298 // Print the integer _Mantissa2 * 2^_Exponent2 exactly.
300 // For nonzero integers, _Exponent2 >= -23. (The minimum value occurs when _Mantissa2 * 2^_Exponent2 is 1.
301 // In that case, _Mantissa2 is the implicit 1 bit followed by 23 zeros, so _Exponent2 is -23 to shift away
302 // the zeros.) The dense range of exactly representable integers has negative or zero exponents
303 // (as positive exponents make the range non-dense). For that dense range, Ryu will always be used:
304 // every digit is necessary to uniquely identify the value, so Ryu must print them all.
306 // Positive exponents are the non-dense range of exactly representable integers.
307 // This contains all of the values for which Ryu can't be used (and a few Ryu-friendly values).
309 // Performance note: Long division appears to be faster than losslessly widening float to double and calling
310 // __d2fixed_buffered_n(). If __f2fixed_buffered_n() is implemented, it might be faster than long division.
312 _LIBCPP_ASSERT_INTERNAL(_Exponent2
> 0, "");
313 _LIBCPP_ASSERT_INTERNAL(_Exponent2
<= 104, ""); // because __ieeeExponent <= 254
315 // Manually represent _Mantissa2 * 2^_Exponent2 as a large integer. _Mantissa2 is always 24 bits
316 // (due to the implicit bit), while _Exponent2 indicates a shift of at most 104 bits.
317 // 24 + 104 equals 128 equals 4 * 32, so we need exactly 4 32-bit elements.
318 // We use a little-endian representation, visualized like this:
322 // _Data[3] _Data[2] _Data[1] _Data[0]
326 constexpr uint32_t _Data_size
= 4;
327 uint32_t _Data
[_Data_size
]{};
329 // _Maxidx is the index of the most significant nonzero element.
330 uint32_t _Maxidx
= ((24 + static_cast<uint32_t>(_Exponent2
) + 31) / 32) - 1;
331 _LIBCPP_ASSERT_INTERNAL(_Maxidx
< _Data_size
, "");
333 const uint32_t _Bit_shift
= static_cast<uint32_t>(_Exponent2
) % 32;
334 if (_Bit_shift
<= 8) { // _Mantissa2's 24 bits don't cross an element boundary
335 _Data
[_Maxidx
] = _Mantissa2
<< _Bit_shift
;
336 } else { // _Mantissa2's 24 bits cross an element boundary
337 _Data
[_Maxidx
- 1] = _Mantissa2
<< _Bit_shift
;
338 _Data
[_Maxidx
] = _Mantissa2
>> (32 - _Bit_shift
);
341 // If Ryu hasn't determined the total output length, we need to buffer the digits generated from right to left
342 // by long division. The largest possible float is: 340'282346638'528859811'704183484'516925440
344 int32_t _Filled_blocks
= 0;
345 // From left to right, we're going to print:
346 // _Data[0] will be [1, 10] digits.
347 // Then if _Filled_blocks > 0:
348 // _Blocks[_Filled_blocks - 1], ..., _Blocks[0] will be 0-filled 9-digit blocks.
350 if (_Maxidx
!= 0) { // If the integer is actually large, perform long division.
351 // Otherwise, skip to printing _Data[0].
353 // Loop invariant: _Maxidx != 0 (i.e. the integer is actually large)
355 const uint32_t _Most_significant_elem
= _Data
[_Maxidx
];
356 const uint32_t _Initial_remainder
= _Most_significant_elem
% 1000000000;
357 const uint32_t _Initial_quotient
= _Most_significant_elem
/ 1000000000;
358 _Data
[_Maxidx
] = _Initial_quotient
;
359 uint64_t _Remainder
= _Initial_remainder
;
361 // Process less significant elements.
362 uint32_t _Idx
= _Maxidx
;
364 --_Idx
; // Initially, _Remainder is at most 10^9 - 1.
366 // Now, _Remainder is at most (10^9 - 1) * 2^32 + 2^32 - 1, simplified to 10^9 * 2^32 - 1.
367 _Remainder
= (_Remainder
<< 32) | _Data
[_Idx
];
369 // floor((10^9 * 2^32 - 1) / 10^9) == 2^32 - 1, so uint32_t _Quotient is lossless.
370 const uint32_t _Quotient
= static_cast<uint32_t>(__div1e9(_Remainder
));
372 // _Remainder is at most 10^9 - 1 again.
373 // For uint32_t truncation, see the __mod1e9() comment in d2s_intrinsics.h.
374 _Remainder
= static_cast<uint32_t>(_Remainder
) - 1000000000u * _Quotient
;
376 _Data
[_Idx
] = _Quotient
;
379 // Store a 0-filled 9-digit block.
380 _Blocks
[_Filled_blocks
++] = static_cast<uint32_t>(_Remainder
);
382 if (_Initial_quotient
== 0) { // Is the large integer shrinking?
383 --_Maxidx
; // log2(10^9) is 29.9, so we can't shrink by more than one element.
385 break; // We've finished long division. Now we need to print _Data[0].
391 _LIBCPP_ASSERT_INTERNAL(_Data
[0] != 0, "");
392 for (uint32_t _Idx
= 1; _Idx
< _Data_size
; ++_Idx
) {
393 _LIBCPP_ASSERT_INTERNAL(_Data
[_Idx
] == 0, "");
396 const uint32_t _Data_olength
= _Data
[0] >= 1000000000 ? 10 : __decimalLength9(_Data
[0]);
397 const uint32_t _Total_fixed_length
= _Data_olength
+ 9 * _Filled_blocks
;
399 if (_Last
- _First
< static_cast<ptrdiff_t>(_Total_fixed_length
)) {
400 return { _Last
, errc::value_too_large
};
403 char* _Result
= _First
;
405 // Print _Data[0]. While it's up to 10 digits,
406 // which is more than Ryu generates, the code below can handle this.
407 __append_n_digits(_Data_olength
, _Data
[0], _Result
);
408 _Result
+= _Data_olength
;
410 // Print 0-filled 9-digit blocks.
411 for (int32_t _Idx
= _Filled_blocks
- 1; _Idx
>= 0; --_Idx
) {
412 __append_nine_digits(_Blocks
[_Idx
], _Result
);
416 return { _Result
, errc
{} };
419 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
inline to_chars_result
__to_chars(char* const _First
, char* const _Last
, const __floating_decimal_32 __v
,
420 chars_format _Fmt
, const uint32_t __ieeeMantissa
, const uint32_t __ieeeExponent
) {
421 // Step 5: Print the decimal representation.
422 uint32_t _Output
= __v
.__mantissa
;
423 int32_t _Ryu_exponent
= __v
.__exponent
;
424 const uint32_t __olength
= __decimalLength9(_Output
);
425 int32_t _Scientific_exponent
= _Ryu_exponent
+ static_cast<int32_t>(__olength
) - 1;
427 if (_Fmt
== chars_format
{}) {
431 if (__olength
== 1) {
432 // Value | Fixed | Scientific
433 // 1e-3 | "0.001" | "1e-03"
434 // 1e4 | "10000" | "1e+04"
438 // Value | Fixed | Scientific
439 // 1234e-7 | "0.0001234" | "1.234e-04"
440 // 1234e5 | "123400000" | "1.234e+08"
441 _Lower
= -static_cast<int32_t>(__olength
+ 3);
445 if (_Lower
<= _Ryu_exponent
&& _Ryu_exponent
<= _Upper
) {
446 _Fmt
= chars_format::fixed
;
448 _Fmt
= chars_format::scientific
;
450 } else if (_Fmt
== chars_format::general
) {
451 // C11 7.21.6.1 "The fprintf function"/8:
452 // "Let P equal [...] 6 if the precision is omitted [...].
453 // Then, if a conversion with style E would have an exponent of X:
454 // - if P > X >= -4, the conversion is with style f [...].
455 // - otherwise, the conversion is with style e [...]."
456 if (-4 <= _Scientific_exponent
&& _Scientific_exponent
< 6) {
457 _Fmt
= chars_format::fixed
;
459 _Fmt
= chars_format::scientific
;
463 if (_Fmt
== chars_format::fixed
) {
464 // Example: _Output == 1729, __olength == 4
466 // _Ryu_exponent | Printed | _Whole_digits | _Total_fixed_length | Notes
467 // --------------|----------|---------------|----------------------|---------------------------------------
468 // 2 | 172900 | 6 | _Whole_digits | Ryu can't be used for printing
469 // 1 | 17290 | 5 | (sometimes adjusted) | when the trimmed digits are nonzero.
470 // --------------|----------|---------------|----------------------|---------------------------------------
471 // 0 | 1729 | 4 | _Whole_digits | Unified length cases.
472 // --------------|----------|---------------|----------------------|---------------------------------------
473 // -1 | 172.9 | 3 | __olength + 1 | This case can't happen for
474 // -2 | 17.29 | 2 | | __olength == 1, but no additional
475 // -3 | 1.729 | 1 | | code is needed to avoid it.
476 // --------------|----------|---------------|----------------------|---------------------------------------
477 // -4 | 0.1729 | 0 | 2 - _Ryu_exponent | C11 7.21.6.1 "The fprintf function"/8:
478 // -5 | 0.01729 | -1 | | "If a decimal-point character appears,
479 // -6 | 0.001729 | -2 | | at least one digit appears before it."
481 const int32_t _Whole_digits
= static_cast<int32_t>(__olength
) + _Ryu_exponent
;
483 uint32_t _Total_fixed_length
;
484 if (_Ryu_exponent
>= 0) { // cases "172900" and "1729"
485 _Total_fixed_length
= static_cast<uint32_t>(_Whole_digits
);
487 // Rounding can affect the number of digits.
488 // For example, 1e11f is exactly "99999997952" which is 11 digits instead of 12.
489 // We can use a lookup table to detect this and adjust the total length.
490 static constexpr uint8_t _Adjustment
[39] = {
491 0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1 };
492 _Total_fixed_length
-= _Adjustment
[_Ryu_exponent
];
493 // _Whole_digits doesn't need to be adjusted because these cases won't refer to it later.
495 } else if (_Whole_digits
> 0) { // case "17.29"
496 _Total_fixed_length
= __olength
+ 1;
497 } else { // case "0.001729"
498 _Total_fixed_length
= static_cast<uint32_t>(2 - _Ryu_exponent
);
501 if (_Last
- _First
< static_cast<ptrdiff_t>(_Total_fixed_length
)) {
502 return { _Last
, errc::value_too_large
};
506 if (_Ryu_exponent
> 0) { // case "172900"
509 if (_Ryu_exponent
> 10) { // 10^10 is the largest power of 10 that's exactly representable as a float.
510 _Can_use_ryu
= false;
512 // Ryu generated X: __v.__mantissa * 10^_Ryu_exponent
513 // __v.__mantissa == 2^_Trailing_zero_bits * (__v.__mantissa >> _Trailing_zero_bits)
514 // 10^_Ryu_exponent == 2^_Ryu_exponent * 5^_Ryu_exponent
516 // _Trailing_zero_bits is [0, 29] (aside: because 2^29 is the largest power of 2
517 // with 9 decimal digits, which is float's round-trip limit.)
518 // _Ryu_exponent is [1, 10].
519 // Normalization adds [2, 23] (aside: at least 2 because the pre-normalized mantissa is at least 5).
520 // This adds up to [3, 62], which is well below float's maximum binary exponent 127.
522 // Therefore, we just need to consider (__v.__mantissa >> _Trailing_zero_bits) * 5^_Ryu_exponent.
524 // If that product would exceed 24 bits, then X can't be exactly represented as a float.
525 // (That's not a problem for round-tripping, because X is close enough to the original float,
526 // but X isn't mathematically equal to the original float.) This requires a high-precision fallback.
528 // If the product is 24 bits or smaller, then X can be exactly represented as a float (and we don't
529 // need to re-synthesize it; the original float must have been X, because Ryu wouldn't produce the
530 // same output for two different floats X and Y). This allows Ryu's output to be used (zero-filled).
532 // (2^24 - 1) / 5^0 (for indexing), (2^24 - 1) / 5^1, ..., (2^24 - 1) / 5^10
533 static constexpr uint32_t _Max_shifted_mantissa
[11] = {
534 16777215, 3355443, 671088, 134217, 26843, 5368, 1073, 214, 42, 8, 1 };
536 unsigned long _Trailing_zero_bits
;
537 (void) _BitScanForward(&_Trailing_zero_bits
, __v
.__mantissa
); // __v.__mantissa is guaranteed nonzero
538 const uint32_t _Shifted_mantissa
= __v
.__mantissa
>> _Trailing_zero_bits
;
539 _Can_use_ryu
= _Shifted_mantissa
<= _Max_shifted_mantissa
[_Ryu_exponent
];
543 const uint32_t _Mantissa2
= __ieeeMantissa
| (1u << __FLOAT_MANTISSA_BITS
); // restore implicit bit
544 const int32_t _Exponent2
= static_cast<int32_t>(__ieeeExponent
)
545 - __FLOAT_BIAS
- __FLOAT_MANTISSA_BITS
; // bias and normalization
547 // Performance note: We've already called Ryu, so this will redundantly perform buffering and bounds checking.
548 return _Large_integer_to_chars(_First
, _Last
, _Mantissa2
, _Exponent2
);
552 // Print the decimal digits, left-aligned within [_First, _First + _Total_fixed_length).
553 _Mid
= _First
+ __olength
;
554 } else { // cases "1729", "17.29", and "0.001729"
555 // Print the decimal digits, right-aligned within [_First, _First + _Total_fixed_length).
556 _Mid
= _First
+ _Total_fixed_length
;
559 while (_Output
>= 10000) {
560 #ifdef __clang__ // TRANSITION, LLVM-38217
561 const uint32_t __c
= _Output
- 10000 * (_Output
/ 10000);
563 const uint32_t __c
= _Output
% 10000;
566 const uint32_t __c0
= (__c
% 100) << 1;
567 const uint32_t __c1
= (__c
/ 100) << 1;
568 std::memcpy(_Mid
-= 2, __DIGIT_TABLE
+ __c0
, 2);
569 std::memcpy(_Mid
-= 2, __DIGIT_TABLE
+ __c1
, 2);
571 if (_Output
>= 100) {
572 const uint32_t __c
= (_Output
% 100) << 1;
574 std::memcpy(_Mid
-= 2, __DIGIT_TABLE
+ __c
, 2);
577 const uint32_t __c
= _Output
<< 1;
578 std::memcpy(_Mid
-= 2, __DIGIT_TABLE
+ __c
, 2);
580 *--_Mid
= static_cast<char>('0' + _Output
);
583 if (_Ryu_exponent
> 0) { // case "172900" with _Can_use_ryu
584 // Performance note: it might be more efficient to do this immediately after setting _Mid.
585 std::memset(_First
+ __olength
, '0', static_cast<size_t>(_Ryu_exponent
));
586 } else if (_Ryu_exponent
== 0) { // case "1729"
588 } else if (_Whole_digits
> 0) { // case "17.29"
589 // Performance note: moving digits might not be optimal.
590 std::memmove(_First
, _First
+ 1, static_cast<size_t>(_Whole_digits
));
591 _First
[_Whole_digits
] = '.';
592 } else { // case "0.001729"
593 // Performance note: a larger memset() followed by overwriting '.' might be more efficient.
596 std::memset(_First
+ 2, '0', static_cast<size_t>(-_Whole_digits
));
599 return { _First
+ _Total_fixed_length
, errc
{} };
602 const uint32_t _Total_scientific_length
=
603 __olength
+ (__olength
> 1) + 4; // digits + possible decimal point + scientific exponent
604 if (_Last
- _First
< static_cast<ptrdiff_t>(_Total_scientific_length
)) {
605 return { _Last
, errc::value_too_large
};
607 char* const __result
= _First
;
609 // Print the decimal digits.
611 while (_Output
>= 10000) {
612 #ifdef __clang__ // TRANSITION, LLVM-38217
613 const uint32_t __c
= _Output
- 10000 * (_Output
/ 10000);
615 const uint32_t __c
= _Output
% 10000;
618 const uint32_t __c0
= (__c
% 100) << 1;
619 const uint32_t __c1
= (__c
/ 100) << 1;
620 std::memcpy(__result
+ __olength
- __i
- 1, __DIGIT_TABLE
+ __c0
, 2);
621 std::memcpy(__result
+ __olength
- __i
- 3, __DIGIT_TABLE
+ __c1
, 2);
624 if (_Output
>= 100) {
625 const uint32_t __c
= (_Output
% 100) << 1;
627 std::memcpy(__result
+ __olength
- __i
- 1, __DIGIT_TABLE
+ __c
, 2);
631 const uint32_t __c
= _Output
<< 1;
632 // We can't use memcpy here: the decimal dot goes between these two digits.
633 __result
[2] = __DIGIT_TABLE
[__c
+ 1];
634 __result
[0] = __DIGIT_TABLE
[__c
];
636 __result
[0] = static_cast<char>('0' + _Output
);
639 // Print decimal point if needed.
643 __index
= __olength
+ 1;
648 // Print the exponent.
649 __result
[__index
++] = 'e';
650 if (_Scientific_exponent
< 0) {
651 __result
[__index
++] = '-';
652 _Scientific_exponent
= -_Scientific_exponent
;
654 __result
[__index
++] = '+';
657 std::memcpy(__result
+ __index
, __DIGIT_TABLE
+ 2 * _Scientific_exponent
, 2);
660 return { _First
+ _Total_scientific_length
, errc
{} };
663 [[nodiscard
]] to_chars_result
__f2s_buffered_n(char* const _First
, char* const _Last
, const float __f
,
664 const chars_format _Fmt
) {
666 // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
667 const uint32_t __bits
= __float_to_bits(__f
);
669 // Case distinction; exit early for the easy cases.
671 if (_Fmt
== chars_format::scientific
) {
672 if (_Last
- _First
< 5) {
673 return { _Last
, errc::value_too_large
};
676 std::memcpy(_First
, "0e+00", 5);
678 return { _First
+ 5, errc
{} };
681 // Print "0" for chars_format::fixed, chars_format::general, and chars_format{}.
682 if (_First
== _Last
) {
683 return { _Last
, errc::value_too_large
};
688 return { _First
+ 1, errc
{} };
691 // Decode __bits into mantissa and exponent.
692 const uint32_t __ieeeMantissa
= __bits
& ((1u << __FLOAT_MANTISSA_BITS
) - 1);
693 const uint32_t __ieeeExponent
= __bits
>> __FLOAT_MANTISSA_BITS
;
695 // When _Fmt == chars_format::fixed and the floating-point number is a large integer,
696 // it's faster to skip Ryu and immediately print the integer exactly.
697 if (_Fmt
== chars_format::fixed
) {
698 const uint32_t _Mantissa2
= __ieeeMantissa
| (1u << __FLOAT_MANTISSA_BITS
); // restore implicit bit
699 const int32_t _Exponent2
= static_cast<int32_t>(__ieeeExponent
)
700 - __FLOAT_BIAS
- __FLOAT_MANTISSA_BITS
; // bias and normalization
702 // Normal values are equal to _Mantissa2 * 2^_Exponent2.
703 // (Subnormals are different, but they'll be rejected by the _Exponent2 test here, so they can be ignored.)
705 if (_Exponent2
> 0) {
706 return _Large_integer_to_chars(_First
, _Last
, _Mantissa2
, _Exponent2
);
710 const __floating_decimal_32 __v
= __f2d(__ieeeMantissa
, __ieeeExponent
);
711 return __to_chars(_First
, _Last
, __v
, _Fmt
, __ieeeMantissa
, __ieeeExponent
);
714 _LIBCPP_END_NAMESPACE_STD