1 /*---------------------------------------------------------------------------
3 * Ryu floating-point output for single precision.
5 * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group
10 * This is a modification of code taken from github.com/ulfjack/ryu under the
11 * terms of the Boost license (not the Apache license). The original copyright
14 * Copyright 2018 Ulf Adams
16 * The contents of this file may be used under the terms of the Apache
17 * License, Version 2.0.
19 * (See accompanying file LICENSE-Apache or copy at
20 * http://www.apache.org/licenses/LICENSE-2.0)
22 * Alternatively, the contents of this file may be used under the terms of the
23 * Boost Software License, Version 1.0.
25 * (See accompanying file LICENSE-Boost or copy at
26 * https://www.boost.org/LICENSE_1_0.txt)
28 * Unless required by applicable law or agreed to in writing, this software is
29 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
30 * KIND, either express or implied.
32 *---------------------------------------------------------------------------
38 #include "postgres_fe.h"
41 #include "common/shortest_dec.h"
42 #include "digit_table.h"
43 #include "ryu_common.h"
45 #define FLOAT_MANTISSA_BITS 23
46 #define FLOAT_EXPONENT_BITS 8
47 #define FLOAT_BIAS 127
50 * This table is generated (by the upstream) by PrintFloatLookupTable,
51 * and modified (by us) to add UINT64CONST.
53 #define FLOAT_POW5_INV_BITCOUNT 59
54 static const uint64 FLOAT_POW5_INV_SPLIT
[31] = {
55 UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826),
56 UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670),
57 UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688),
58 UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727),
59 UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350),
60 UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234),
61 UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971),
62 UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730)
64 #define FLOAT_POW5_BITCOUNT 61
65 static const uint64 FLOAT_POW5_SPLIT
[47] = {
66 UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000),
67 UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000),
68 UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000),
69 UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000),
70 UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000),
71 UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000),
72 UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031),
73 UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594),
74 UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675),
75 UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678),
76 UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187),
77 UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)
81 pow5Factor(uint32 value
)
88 const uint32 q
= value
/ 5;
89 const uint32 r
= value
% 5;
100 /* Returns true if value is divisible by 5^p. */
102 multipleOfPowerOf5(const uint32 value
, const uint32 p
)
104 return pow5Factor(value
) >= p
;
107 /* Returns true if value is divisible by 2^p. */
109 multipleOfPowerOf2(const uint32 value
, const uint32 p
)
111 /* return __builtin_ctz(value) >= p; */
112 return (value
& ((1u << p
) - 1)) == 0;
116 * It seems to be slightly faster to avoid uint128_t here, although the
117 * generated code for uint128_t looks slightly nicer.
120 mulShift(const uint32 m
, const uint64 factor
, const int32 shift
)
123 * The casts here help MSVC to avoid calls to the __allmul library
126 const uint32 factorLo
= (uint32
) (factor
);
127 const uint32 factorHi
= (uint32
) (factor
>> 32);
128 const uint64 bits0
= (uint64
) m
* factorLo
;
129 const uint64 bits1
= (uint64
) m
* factorHi
;
133 #ifdef RYU_32_BIT_PLATFORM
136 * On 32-bit platforms we can avoid a 64-bit shift-right since we only
137 * need the upper 32 bits of the result and the shift value is > 32.
139 const uint32 bits0Hi
= (uint32
) (bits0
>> 32);
140 uint32 bits1Lo
= (uint32
) (bits1
);
141 uint32 bits1Hi
= (uint32
) (bits1
>> 32);
144 bits1Hi
+= (bits1Lo
< bits0Hi
);
146 const int32 s
= shift
- 32;
148 return (bits1Hi
<< (32 - s
)) | (bits1Lo
>> s
);
150 #else /* RYU_32_BIT_PLATFORM */
152 const uint64 sum
= (bits0
>> 32) + bits1
;
153 const uint64 shiftedSum
= sum
>> (shift
- 32);
155 Assert(shiftedSum
<= PG_UINT32_MAX
);
156 return (uint32
) shiftedSum
;
158 #endif /* RYU_32_BIT_PLATFORM */
162 mulPow5InvDivPow2(const uint32 m
, const uint32 q
, const int32 j
)
164 return mulShift(m
, FLOAT_POW5_INV_SPLIT
[q
], j
);
168 mulPow5divPow2(const uint32 m
, const uint32 i
, const int32 j
)
170 return mulShift(m
, FLOAT_POW5_SPLIT
[i
], j
);
174 decimalLength(const uint32 v
)
176 /* Function precondition: v is not a 10-digit number. */
177 /* (9 digits are sufficient for round-tripping.) */
178 Assert(v
< 1000000000);
214 /* A floating decimal representing m * 10^e. */
215 typedef struct floating_decimal_32
219 } floating_decimal_32
;
221 static inline floating_decimal_32
222 f2d(const uint32 ieeeMantissa
, const uint32 ieeeExponent
)
227 if (ieeeExponent
== 0)
229 /* We subtract 2 so that the bounds computation has 2 additional bits. */
230 e2
= 1 - FLOAT_BIAS
- FLOAT_MANTISSA_BITS
- 2;
235 e2
= ieeeExponent
- FLOAT_BIAS
- FLOAT_MANTISSA_BITS
- 2;
236 m2
= (1u << FLOAT_MANTISSA_BITS
) | ieeeMantissa
;
239 #if STRICTLY_SHORTEST
240 const bool even
= (m2
& 1) == 0;
241 const bool acceptBounds
= even
;
243 const bool acceptBounds
= false;
246 /* Step 2: Determine the interval of legal decimal representations. */
247 const uint32 mv
= 4 * m2
;
248 const uint32 mp
= 4 * m2
+ 2;
250 /* Implicit bool -> int conversion. True is 1, false is 0. */
251 const uint32 mmShift
= ieeeMantissa
!= 0 || ieeeExponent
<= 1;
252 const uint32 mm
= 4 * m2
- 1 - mmShift
;
254 /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */
259 bool vmIsTrailingZeros
= false;
260 bool vrIsTrailingZeros
= false;
261 uint8 lastRemovedDigit
= 0;
265 const uint32 q
= log10Pow2(e2
);
269 const int32 k
= FLOAT_POW5_INV_BITCOUNT
+ pow5bits(q
) - 1;
270 const int32 i
= -e2
+ q
+ k
;
272 vr
= mulPow5InvDivPow2(mv
, q
, i
);
273 vp
= mulPow5InvDivPow2(mp
, q
, i
);
274 vm
= mulPow5InvDivPow2(mm
, q
, i
);
276 if (q
!= 0 && (vp
- 1) / 10 <= vm
/ 10)
279 * We need to know one removed digit even if we are not going to
280 * loop below. We could use q = X - 1 above, except that would
281 * require 33 bits for the result, and we've found that 32-bit
282 * arithmetic is faster even on 64-bit machines.
284 const int32 l
= FLOAT_POW5_INV_BITCOUNT
+ pow5bits(q
- 1) - 1;
286 lastRemovedDigit
= (uint8
) (mulPow5InvDivPow2(mv
, q
- 1, -e2
+ q
- 1 + l
) % 10);
291 * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9
292 * seems to be safe as well.
294 * Only one of mp, mv, and mm can be a multiple of 5, if any.
298 vrIsTrailingZeros
= multipleOfPowerOf5(mv
, q
);
300 else if (acceptBounds
)
302 vmIsTrailingZeros
= multipleOfPowerOf5(mm
, q
);
306 vp
-= multipleOfPowerOf5(mp
, q
);
312 const uint32 q
= log10Pow5(-e2
);
316 const int32 i
= -e2
- q
;
317 const int32 k
= pow5bits(i
) - FLOAT_POW5_BITCOUNT
;
320 vr
= mulPow5divPow2(mv
, i
, j
);
321 vp
= mulPow5divPow2(mp
, i
, j
);
322 vm
= mulPow5divPow2(mm
, i
, j
);
324 if (q
!= 0 && (vp
- 1) / 10 <= vm
/ 10)
326 j
= q
- 1 - (pow5bits(i
+ 1) - FLOAT_POW5_BITCOUNT
);
327 lastRemovedDigit
= (uint8
) (mulPow5divPow2(mv
, i
+ 1, j
) % 10);
332 * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
335 /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
336 vrIsTrailingZeros
= true;
340 * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
343 vmIsTrailingZeros
= mmShift
== 1;
348 * mp = mv + 2, so it always has at least one trailing 0 bit.
355 /* TODO(ulfjack):Use a tighter bound here. */
356 vrIsTrailingZeros
= multipleOfPowerOf2(mv
, q
- 1);
361 * Step 4: Find the shortest decimal representation in the interval of
362 * legal representations.
367 if (vmIsTrailingZeros
|| vrIsTrailingZeros
)
369 /* General case, which happens rarely (~4.0%). */
370 while (vp
/ 10 > vm
/ 10)
372 vmIsTrailingZeros
&= vm
- (vm
/ 10) * 10 == 0;
373 vrIsTrailingZeros
&= lastRemovedDigit
== 0;
374 lastRemovedDigit
= (uint8
) (vr
% 10);
380 if (vmIsTrailingZeros
)
384 vrIsTrailingZeros
&= lastRemovedDigit
== 0;
385 lastRemovedDigit
= (uint8
) (vr
% 10);
393 if (vrIsTrailingZeros
&& lastRemovedDigit
== 5 && vr
% 2 == 0)
395 /* Round even if the exact number is .....50..0. */
396 lastRemovedDigit
= 4;
400 * We need to take vr + 1 if vr is outside bounds or we need to round
403 output
= vr
+ ((vr
== vm
&& (!acceptBounds
|| !vmIsTrailingZeros
)) || lastRemovedDigit
>= 5);
408 * Specialized for the common case (~96.0%). Percentages below are
411 * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2:
412 * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
414 while (vp
/ 10 > vm
/ 10)
416 lastRemovedDigit
= (uint8
) (vr
% 10);
424 * We need to take vr + 1 if vr is outside bounds or we need to round
427 output
= vr
+ (vr
== vm
|| lastRemovedDigit
>= 5);
430 const int32 exp
= e10
+ removed
;
432 floating_decimal_32 fd
;
435 fd
.mantissa
= output
;
440 to_chars_f(const floating_decimal_32 v
, const uint32 olength
, char *const result
)
442 /* Step 5: Print the decimal representation. */
445 uint32 output
= v
.mantissa
;
446 int32 exp
= v
.exponent
;
449 * On entry, mantissa * 10^exp is the result to be output.
450 * Caller has already done the - sign if needed.
452 * We want to insert the point somewhere depending on the output length
453 * and exponent, which might mean adding zeros:
456 * 1+ | ddddddddd000000
458 * -1 .. -len+1 | dddddddd.d to d.ddddddddd
459 * -len ... | 0.ddddddddd to 0.000dddddd
462 int32 nexp
= exp
+ olength
;
466 /* -nexp is number of 0s to add after '.' */
470 /* copy 8 bytes rather than 5 to let compiler optimize */
471 memcpy(result
, "0.000000", 8);
476 * dddd.dddd; leave space at the start and move the '.' in after
483 * We can save some code later by pre-filling with zeros. We know that
484 * there can be no more than 6 output digits in this form, otherwise
485 * we would not choose fixed-point output. memset 8 rather than 6
486 * bytes to let the compiler optimize it.
488 Assert(exp
< 6 && exp
+ olength
<= 6);
489 memset(result
, '0', 8);
492 while (output
>= 10000)
494 const uint32 c
= output
- 10000 * (output
/ 10000);
495 const uint32 c0
= (c
% 100) << 1;
496 const uint32 c1
= (c
/ 100) << 1;
500 memcpy(result
+ index
+ olength
- i
- 2, DIGIT_TABLE
+ c0
, 2);
501 memcpy(result
+ index
+ olength
- i
- 4, DIGIT_TABLE
+ c1
, 2);
506 const uint32 c
= (output
% 100) << 1;
509 memcpy(result
+ index
+ olength
- i
- 2, DIGIT_TABLE
+ c
, 2);
514 const uint32 c
= output
<< 1;
516 memcpy(result
+ index
+ olength
- i
- 2, DIGIT_TABLE
+ c
, 2);
520 result
[index
] = (char) ('0' + output
);
526 * nexp is 1..6 here, representing the number of digits before the
527 * point. A value of 7+ is not possible because we switch to
528 * scientific notation when the display exponent reaches 6.
531 /* gcc only seems to want to optimize memmove for small 2^n */
534 memmove(result
+ index
- 1, result
+ index
, 4);
539 memmove(result
+ index
- 1, result
+ index
, 2);
544 result
[index
- 1] = result
[index
];
551 /* we supplied the trailing zeros earlier, now just set the length. */
552 index
= olength
+ exp
;
556 index
= olength
+ (2 - nexp
);
563 to_chars(const floating_decimal_32 v
, const bool sign
, char *const result
)
565 /* Step 5: Print the decimal representation. */
568 uint32 output
= v
.mantissa
;
569 uint32 olength
= decimalLength(output
);
570 int32 exp
= v
.exponent
+ olength
- 1;
573 result
[index
++] = '-';
576 * The thresholds for fixed-point output are chosen to match printf
577 * defaults. Beware that both the code of to_chars_f and the value of
578 * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
580 if (exp
>= -4 && exp
< 6)
581 return to_chars_f(v
, olength
, result
+ index
) + sign
;
584 * If v.exponent is exactly 0, we might have reached here via the small
585 * integer fast path, in which case v.mantissa might contain trailing
586 * (decimal) zeros. For scientific notation we need to move these zeros
587 * into the exponent. (For fixed point this doesn't matter, which is why
588 * we do this here rather than above.)
590 * Since we already calculated the display exponent (exp) above based on
591 * the old decimal length, that value does not change here. Instead, we
592 * just reduce the display length for each digit removed.
594 * If we didn't get here via the fast path, the raw exponent will not
595 * usually be 0, and there will be no trailing zeros, so we pay no more
596 * than one div10/multiply extra cost. We claw back half of that by
597 * checking for divisibility by 2 before dividing by 10.
601 while ((output
& 1) == 0)
603 const uint32 q
= output
/ 10;
604 const uint32 r
= output
- 10 * q
;
614 * Print the decimal digits.
615 * The following code is equivalent to:
617 * for (uint32 i = 0; i < olength - 1; ++i) {
618 * const uint32 c = output % 10; output /= 10;
619 * result[index + olength - i] = (char) ('0' + c);
621 * result[index] = '0' + output % 10;
625 while (output
>= 10000)
627 const uint32 c
= output
- 10000 * (output
/ 10000);
628 const uint32 c0
= (c
% 100) << 1;
629 const uint32 c1
= (c
/ 100) << 1;
633 memcpy(result
+ index
+ olength
- i
- 1, DIGIT_TABLE
+ c0
, 2);
634 memcpy(result
+ index
+ olength
- i
- 3, DIGIT_TABLE
+ c1
, 2);
639 const uint32 c
= (output
% 100) << 1;
642 memcpy(result
+ index
+ olength
- i
- 1, DIGIT_TABLE
+ c
, 2);
647 const uint32 c
= output
<< 1;
650 * We can't use memcpy here: the decimal dot goes between these two
653 result
[index
+ olength
- i
] = DIGIT_TABLE
[c
+ 1];
654 result
[index
] = DIGIT_TABLE
[c
];
658 result
[index
] = (char) ('0' + output
);
661 /* Print decimal point if needed. */
664 result
[index
+ 1] = '.';
665 index
+= olength
+ 1;
672 /* Print the exponent. */
673 result
[index
++] = 'e';
676 result
[index
++] = '-';
680 result
[index
++] = '+';
682 memcpy(result
+ index
, DIGIT_TABLE
+ 2 * exp
, 2);
689 f2d_small_int(const uint32 ieeeMantissa
,
690 const uint32 ieeeExponent
,
691 floating_decimal_32
*v
)
693 const int32 e2
= (int32
) ieeeExponent
- FLOAT_BIAS
- FLOAT_MANTISSA_BITS
;
696 * Avoid using multiple "return false;" here since it tends to provoke the
697 * compiler into inlining multiple copies of f2d, which is undesirable.
700 if (e2
>= -FLOAT_MANTISSA_BITS
&& e2
<= 0)
703 * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23:
704 * 1 <= f = m2 / 2^-e2 < 2^24.
706 * Test if the lower -e2 bits of the significand are 0, i.e. whether
707 * the fraction is 0. We can use ieeeMantissa here, since the implied
708 * 1 bit can never be tested by this; the implied 1 can only be part
709 * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already
710 * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24)
712 const uint32 mask
= (1U << -e2
) - 1;
713 const uint32 fraction
= ieeeMantissa
& mask
;
718 * f is an integer in the range [1, 2^24).
719 * Note: mantissa might contain trailing (decimal) 0's.
720 * Note: since 2^24 < 10^9, there is no need to adjust
723 const uint32 m2
= (1U << FLOAT_MANTISSA_BITS
) | ieeeMantissa
;
725 v
->mantissa
= m2
>> -e2
;
735 * Store the shortest decimal representation of the given float as an
736 * UNTERMINATED string in the caller's supplied buffer (which must be at least
737 * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long).
739 * Returns the number of bytes stored.
742 float_to_shortest_decimal_bufn(float f
, char *result
)
745 * Step 1: Decode the floating-point number, and unify normalized and
748 const uint32 bits
= float_to_bits(f
);
750 /* Decode bits into sign, mantissa, and exponent. */
751 const bool ieeeSign
= ((bits
>> (FLOAT_MANTISSA_BITS
+ FLOAT_EXPONENT_BITS
)) & 1) != 0;
752 const uint32 ieeeMantissa
= bits
& ((1u << FLOAT_MANTISSA_BITS
) - 1);
753 const uint32 ieeeExponent
= (bits
>> FLOAT_MANTISSA_BITS
) & ((1u << FLOAT_EXPONENT_BITS
) - 1);
755 /* Case distinction; exit early for the easy cases. */
756 if (ieeeExponent
== ((1u << FLOAT_EXPONENT_BITS
) - 1u) || (ieeeExponent
== 0 && ieeeMantissa
== 0))
758 return copy_special_str(result
, ieeeSign
, (ieeeExponent
!= 0), (ieeeMantissa
!= 0));
761 floating_decimal_32 v
;
762 const bool isSmallInt
= f2d_small_int(ieeeMantissa
, ieeeExponent
, &v
);
766 v
= f2d(ieeeMantissa
, ieeeExponent
);
769 return to_chars(v
, ieeeSign
, result
);
773 * Store the shortest decimal representation of the given float as a
774 * null-terminated string in the caller's supplied buffer (which must be at
775 * least FLOAT_SHORTEST_DECIMAL_LEN bytes long).
777 * Returns the string length.
780 float_to_shortest_decimal_buf(float f
, char *result
)
782 const int index
= float_to_shortest_decimal_bufn(f
, result
);
784 /* Terminate the string. */
785 Assert(index
< FLOAT_SHORTEST_DECIMAL_LEN
);
786 result
[index
] = '\0';
791 * Return the shortest decimal representation as a null-terminated palloc'd
792 * string (outside the backend, uses malloc() instead).
794 * Caller is responsible for freeing the result.
797 float_to_shortest_decimal(float f
)
799 char *const result
= (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN
);
801 float_to_shortest_decimal_buf(f
, result
);