src/common/f2s.c

   1 /*---------------------------------------------------------------------------
   2  *
   3  * Ryu floating-point output for single precision.
   4  *
   5  * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group
   6  *
   7  * IDENTIFICATION
   8  *        src/common/f2s.c
   9  *
  10  * This is a modification of code taken from github.com/ulfjack/ryu under the
  11  * terms of the Boost license (not the Apache license). The original copyright
  12  * notice follows:
  13  *
  14  * Copyright 2018 Ulf Adams
  15  *
  16  * The contents of this file may be used under the terms of the Apache
  17  * License, Version 2.0.
  18  *
  19  *     (See accompanying file LICENSE-Apache or copy at
  20  *      http://www.apache.org/licenses/LICENSE-2.0)
  21  *
  22  * Alternatively, the contents of this file may be used under the terms of the
  23  * Boost Software License, Version 1.0.
  24  *
  25  *     (See accompanying file LICENSE-Boost or copy at
  26  *      https://www.boost.org/LICENSE_1_0.txt)
  27  *
  28  * Unless required by applicable law or agreed to in writing, this software is
  29  * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  30  * KIND, either express or implied.
  31  *
  32  *---------------------------------------------------------------------------
  33  */
  34
  35 #ifndef FRONTEND
  36 #include "postgres.h"
  37 #else
  38 #include "postgres_fe.h"
  39 #endif
  40
  41 #include "common/shortest_dec.h"
  42 #include "digit_table.h"
  43 #include "ryu_common.h"
  44
  45 #define FLOAT_MANTISSA_BITS 23
  46 #define FLOAT_EXPONENT_BITS 8
  47 #define FLOAT_BIAS 127
  48
  49 /*
  50  * This table is generated (by the upstream) by PrintFloatLookupTable,
  51  * and modified (by us) to add UINT64CONST.
  52  */
  53 #define FLOAT_POW5_INV_BITCOUNT 59
  54 static const uint64 FLOAT_POW5_INV_SPLIT[31] = {
  55         UINT64CONST(576460752303423489), UINT64CONST(461168601842738791), UINT64CONST(368934881474191033), UINT64CONST(295147905179352826),
  56         UINT64CONST(472236648286964522), UINT64CONST(377789318629571618), UINT64CONST(302231454903657294), UINT64CONST(483570327845851670),
  57         UINT64CONST(386856262276681336), UINT64CONST(309485009821345069), UINT64CONST(495176015714152110), UINT64CONST(396140812571321688),
  58         UINT64CONST(316912650057057351), UINT64CONST(507060240091291761), UINT64CONST(405648192073033409), UINT64CONST(324518553658426727),
  59         UINT64CONST(519229685853482763), UINT64CONST(415383748682786211), UINT64CONST(332306998946228969), UINT64CONST(531691198313966350),
  60         UINT64CONST(425352958651173080), UINT64CONST(340282366920938464), UINT64CONST(544451787073501542), UINT64CONST(435561429658801234),
  61         UINT64CONST(348449143727040987), UINT64CONST(557518629963265579), UINT64CONST(446014903970612463), UINT64CONST(356811923176489971),
  62         UINT64CONST(570899077082383953), UINT64CONST(456719261665907162), UINT64CONST(365375409332725730)
  63 };
  64 #define FLOAT_POW5_BITCOUNT 61
  65 static const uint64 FLOAT_POW5_SPLIT[47] = {
  66         UINT64CONST(1152921504606846976), UINT64CONST(1441151880758558720), UINT64CONST(1801439850948198400), UINT64CONST(2251799813685248000),
  67         UINT64CONST(1407374883553280000), UINT64CONST(1759218604441600000), UINT64CONST(2199023255552000000), UINT64CONST(1374389534720000000),
  68         UINT64CONST(1717986918400000000), UINT64CONST(2147483648000000000), UINT64CONST(1342177280000000000), UINT64CONST(1677721600000000000),
  69         UINT64CONST(2097152000000000000), UINT64CONST(1310720000000000000), UINT64CONST(1638400000000000000), UINT64CONST(2048000000000000000),
  70         UINT64CONST(1280000000000000000), UINT64CONST(1600000000000000000), UINT64CONST(2000000000000000000), UINT64CONST(1250000000000000000),
  71         UINT64CONST(1562500000000000000), UINT64CONST(1953125000000000000), UINT64CONST(1220703125000000000), UINT64CONST(1525878906250000000),
  72         UINT64CONST(1907348632812500000), UINT64CONST(1192092895507812500), UINT64CONST(1490116119384765625), UINT64CONST(1862645149230957031),
  73         UINT64CONST(1164153218269348144), UINT64CONST(1455191522836685180), UINT64CONST(1818989403545856475), UINT64CONST(2273736754432320594),
  74         UINT64CONST(1421085471520200371), UINT64CONST(1776356839400250464), UINT64CONST(2220446049250313080), UINT64CONST(1387778780781445675),
  75         UINT64CONST(1734723475976807094), UINT64CONST(2168404344971008868), UINT64CONST(1355252715606880542), UINT64CONST(1694065894508600678),
  76         UINT64CONST(2117582368135750847), UINT64CONST(1323488980084844279), UINT64CONST(1654361225106055349), UINT64CONST(2067951531382569187),
  77         UINT64CONST(1292469707114105741), UINT64CONST(1615587133892632177), UINT64CONST(2019483917365790221)
  78 };
  79
  80 static inline uint32
  81 pow5Factor(uint32 value)
  82 {
  83         uint32          count = 0;
  84
  85         for (;;)
  86         {
  87                 Assert(value != 0);
  88                 const uint32 q = value / 5;
  89                 const uint32 r = value % 5;
  90
  91                 if (r != 0)
  92                         break;
  93
  94                 value = q;
  95                 ++count;
  96         }
  97         return count;
  98 }
  99
 100 /*  Returns true if value is divisible by 5^p. */
 101 static inline bool
 102 multipleOfPowerOf5(const uint32 value, const uint32 p)
 103 {
 104         return pow5Factor(value) >= p;
 105 }
 106
 107 /*  Returns true if value is divisible by 2^p. */
 108 static inline bool
 109 multipleOfPowerOf2(const uint32 value, const uint32 p)
 110 {
 111         /* return __builtin_ctz(value) >= p; */
 112         return (value & ((1u << p) - 1)) == 0;
 113 }
 114
 115 /*
 116  * It seems to be slightly faster to avoid uint128_t here, although the
 117  * generated code for uint128_t looks slightly nicer.
 118  */
 119 static inline uint32
 120 mulShift(const uint32 m, const uint64 factor, const int32 shift)
 121 {
 122         /*
 123          * The casts here help MSVC to avoid calls to the __allmul library
 124          * function.
 125          */
 126         const uint32 factorLo = (uint32) (factor);
 127         const uint32 factorHi = (uint32) (factor >> 32);
 128         const uint64 bits0 = (uint64) m * factorLo;
 129         const uint64 bits1 = (uint64) m * factorHi;
 130
 131         Assert(shift > 32);
 132
 133 #ifdef RYU_32_BIT_PLATFORM
 134
 135         /*
 136          * On 32-bit platforms we can avoid a 64-bit shift-right since we only
 137          * need the upper 32 bits of the result and the shift value is > 32.
 138          */
 139         const uint32 bits0Hi = (uint32) (bits0 >> 32);
 140         uint32          bits1Lo = (uint32) (bits1);
 141         uint32          bits1Hi = (uint32) (bits1 >> 32);
 142
 143         bits1Lo += bits0Hi;
 144         bits1Hi += (bits1Lo < bits0Hi);
 145
 146         const int32 s = shift - 32;
 147
 148         return (bits1Hi << (32 - s)) | (bits1Lo >> s);
 149
 150 #else                                                   /* RYU_32_BIT_PLATFORM */
 151
 152         const uint64 sum = (bits0 >> 32) + bits1;
 153         const uint64 shiftedSum = sum >> (shift - 32);
 154
 155         Assert(shiftedSum <= PG_UINT32_MAX);
 156         return (uint32) shiftedSum;
 157
 158 #endif                                                  /* RYU_32_BIT_PLATFORM */
 159 }
 160
 161 static inline uint32
 162 mulPow5InvDivPow2(const uint32 m, const uint32 q, const int32 j)
 163 {
 164         return mulShift(m, FLOAT_POW5_INV_SPLIT[q], j);
 165 }
 166
 167 static inline uint32
 168 mulPow5divPow2(const uint32 m, const uint32 i, const int32 j)
 169 {
 170         return mulShift(m, FLOAT_POW5_SPLIT[i], j);
 171 }
 172
 173 static inline uint32
 174 decimalLength(const uint32 v)
 175 {
 176         /* Function precondition: v is not a 10-digit number. */
 177         /* (9 digits are sufficient for round-tripping.) */
 178         Assert(v < 1000000000);
 179         if (v >= 100000000)
 180         {
 181                 return 9;
 182         }
 183         if (v >= 10000000)
 184         {
 185                 return 8;
 186         }
 187         if (v >= 1000000)
 188         {
 189                 return 7;
 190         }
 191         if (v >= 100000)
 192         {
 193                 return 6;
 194         }
 195         if (v >= 10000)
 196         {
 197                 return 5;
 198         }
 199         if (v >= 1000)
 200         {
 201                 return 4;
 202         }
 203         if (v >= 100)
 204         {
 205                 return 3;
 206         }
 207         if (v >= 10)
 208         {
 209                 return 2;
 210         }
 211         return 1;
 212 }
 213
 214 /*  A floating decimal representing m * 10^e. */
 215 typedef struct floating_decimal_32
 216 {
 217         uint32          mantissa;
 218         int32           exponent;
 219 } floating_decimal_32;
 220
 221 static inline floating_decimal_32
 222 f2d(const uint32 ieeeMantissa, const uint32 ieeeExponent)
 223 {
 224         int32           e2;
 225         uint32          m2;
 226
 227         if (ieeeExponent == 0)
 228         {
 229                 /* We subtract 2 so that the bounds computation has 2 additional bits. */
 230                 e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
 231                 m2 = ieeeMantissa;
 232         }
 233         else
 234         {
 235                 e2 = ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2;
 236                 m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa;
 237         }
 238
 239 #if STRICTLY_SHORTEST
 240         const bool      even = (m2 & 1) == 0;
 241         const bool      acceptBounds = even;
 242 #else
 243         const bool      acceptBounds = false;
 244 #endif
 245
 246         /* Step 2: Determine the interval of legal decimal representations. */
 247         const uint32 mv = 4 * m2;
 248         const uint32 mp = 4 * m2 + 2;
 249
 250         /* Implicit bool -> int conversion. True is 1, false is 0. */
 251         const uint32 mmShift = ieeeMantissa != 0 || ieeeExponent <= 1;
 252         const uint32 mm = 4 * m2 - 1 - mmShift;
 253
 254         /* Step 3: Convert to a decimal power base using 64-bit arithmetic. */
 255         uint32          vr,
 256                                 vp,
 257                                 vm;
 258         int32           e10;
 259         bool            vmIsTrailingZeros = false;
 260         bool            vrIsTrailingZeros = false;
 261         uint8           lastRemovedDigit = 0;
 262
 263         if (e2 >= 0)
 264         {
 265                 const uint32 q = log10Pow2(e2);
 266
 267                 e10 = q;
 268
 269                 const int32 k = FLOAT_POW5_INV_BITCOUNT + pow5bits(q) - 1;
 270                 const int32 i = -e2 + q + k;
 271
 272                 vr = mulPow5InvDivPow2(mv, q, i);
 273                 vp = mulPow5InvDivPow2(mp, q, i);
 274                 vm = mulPow5InvDivPow2(mm, q, i);
 275
 276                 if (q != 0 && (vp - 1) / 10 <= vm / 10)
 277                 {
 278                         /*
 279                          * We need to know one removed digit even if we are not going to
 280                          * loop below. We could use q = X - 1 above, except that would
 281                          * require 33 bits for the result, and we've found that 32-bit
 282                          * arithmetic is faster even on 64-bit machines.
 283                          */
 284                         const int32 l = FLOAT_POW5_INV_BITCOUNT + pow5bits(q - 1) - 1;
 285
 286                         lastRemovedDigit = (uint8) (mulPow5InvDivPow2(mv, q - 1, -e2 + q - 1 + l) % 10);
 287                 }
 288                 if (q <= 9)
 289                 {
 290                         /*
 291                          * The largest power of 5 that fits in 24 bits is 5^10, but q <= 9
 292                          * seems to be safe as well.
 293                          *
 294                          * Only one of mp, mv, and mm can be a multiple of 5, if any.
 295                          */
 296                         if (mv % 5 == 0)
 297                         {
 298                                 vrIsTrailingZeros = multipleOfPowerOf5(mv, q);
 299                         }
 300                         else if (acceptBounds)
 301                         {
 302                                 vmIsTrailingZeros = multipleOfPowerOf5(mm, q);
 303                         }
 304                         else
 305                         {
 306                                 vp -= multipleOfPowerOf5(mp, q);
 307                         }
 308                 }
 309         }
 310         else
 311         {
 312                 const uint32 q = log10Pow5(-e2);
 313
 314                 e10 = q + e2;
 315
 316                 const int32 i = -e2 - q;
 317                 const int32 k = pow5bits(i) - FLOAT_POW5_BITCOUNT;
 318                 int32           j = q - k;
 319
 320                 vr = mulPow5divPow2(mv, i, j);
 321                 vp = mulPow5divPow2(mp, i, j);
 322                 vm = mulPow5divPow2(mm, i, j);
 323
 324                 if (q != 0 && (vp - 1) / 10 <= vm / 10)
 325                 {
 326                         j = q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT);
 327                         lastRemovedDigit = (uint8) (mulPow5divPow2(mv, i + 1, j) % 10);
 328                 }
 329                 if (q <= 1)
 330                 {
 331                         /*
 332                          * {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q
 333                          * trailing 0 bits.
 334                          */
 335                         /* mv = 4 * m2, so it always has at least two trailing 0 bits. */
 336                         vrIsTrailingZeros = true;
 337                         if (acceptBounds)
 338                         {
 339                                 /*
 340                                  * mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff
 341                                  * mmShift == 1.
 342                                  */
 343                                 vmIsTrailingZeros = mmShift == 1;
 344                         }
 345                         else
 346                         {
 347                                 /*
 348                                  * mp = mv + 2, so it always has at least one trailing 0 bit.
 349                                  */
 350                                 --vp;
 351                         }
 352                 }
 353                 else if (q < 31)
 354                 {
 355                         /* TODO(ulfjack):Use a tighter bound here. */
 356                         vrIsTrailingZeros = multipleOfPowerOf2(mv, q - 1);
 357                 }
 358         }
 359
 360         /*
 361          * Step 4: Find the shortest decimal representation in the interval of
 362          * legal representations.
 363          */
 364         uint32          removed = 0;
 365         uint32          output;
 366
 367         if (vmIsTrailingZeros || vrIsTrailingZeros)
 368         {
 369                 /* General case, which happens rarely (~4.0%). */
 370                 while (vp / 10 > vm / 10)
 371                 {
 372                         vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0;
 373                         vrIsTrailingZeros &= lastRemovedDigit == 0;
 374                         lastRemovedDigit = (uint8) (vr % 10);
 375                         vr /= 10;
 376                         vp /= 10;
 377                         vm /= 10;
 378                         ++removed;
 379                 }
 380                 if (vmIsTrailingZeros)
 381                 {
 382                         while (vm % 10 == 0)
 383                         {
 384                                 vrIsTrailingZeros &= lastRemovedDigit == 0;
 385                                 lastRemovedDigit = (uint8) (vr % 10);
 386                                 vr /= 10;
 387                                 vp /= 10;
 388                                 vm /= 10;
 389                                 ++removed;
 390                         }
 391                 }
 392
 393                 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0)
 394                 {
 395                         /* Round even if the exact number is .....50..0. */
 396                         lastRemovedDigit = 4;
 397                 }
 398
 399                 /*
 400                  * We need to take vr + 1 if vr is outside bounds or we need to round
 401                  * up.
 402                  */
 403                 output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5);
 404         }
 405         else
 406         {
 407                 /*
 408                  * Specialized for the common case (~96.0%). Percentages below are
 409                  * relative to this.
 410                  *
 411                  * Loop iterations below (approximately): 0: 13.6%, 1: 70.7%, 2:
 412                  * 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01%
 413                  */
 414                 while (vp / 10 > vm / 10)
 415                 {
 416                         lastRemovedDigit = (uint8) (vr % 10);
 417                         vr /= 10;
 418                         vp /= 10;
 419                         vm /= 10;
 420                         ++removed;
 421                 }
 422
 423                 /*
 424                  * We need to take vr + 1 if vr is outside bounds or we need to round
 425                  * up.
 426                  */
 427                 output = vr + (vr == vm || lastRemovedDigit >= 5);
 428         }
 429
 430         const int32 exp = e10 + removed;
 431
 432         floating_decimal_32 fd;
 433
 434         fd.exponent = exp;
 435         fd.mantissa = output;
 436         return fd;
 437 }
 438
 439 static inline int
 440 to_chars_f(const floating_decimal_32 v, const uint32 olength, char *const result)
 441 {
 442         /* Step 5: Print the decimal representation. */
 443         int                     index = 0;
 444
 445         uint32          output = v.mantissa;
 446         int32           exp = v.exponent;
 447
 448         /*----
 449          * On entry, mantissa * 10^exp is the result to be output.
 450          * Caller has already done the - sign if needed.
 451          *
 452          * We want to insert the point somewhere depending on the output length
 453          * and exponent, which might mean adding zeros:
 454          *
 455          *            exp  | format
 456          *            1+   |  ddddddddd000000
 457          *            0    |  ddddddddd
 458          *  -1 .. -len+1   |  dddddddd.d to d.ddddddddd
 459          *  -len ...       |  0.ddddddddd to 0.000dddddd
 460          */
 461         uint32          i = 0;
 462         int32           nexp = exp + olength;
 463
 464         if (nexp <= 0)
 465         {
 466                 /* -nexp is number of 0s to add after '.' */
 467                 Assert(nexp >= -3);
 468                 /* 0.000ddddd */
 469                 index = 2 - nexp;
 470                 /* copy 8 bytes rather than 5 to let compiler optimize */
 471                 memcpy(result, "0.000000", 8);
 472         }
 473         else if (exp < 0)
 474         {
 475                 /*
 476                  * dddd.dddd; leave space at the start and move the '.' in after
 477                  */
 478                 index = 1;
 479         }
 480         else
 481         {
 482                 /*
 483                  * We can save some code later by pre-filling with zeros. We know that
 484                  * there can be no more than 6 output digits in this form, otherwise
 485                  * we would not choose fixed-point output. memset 8 rather than 6
 486                  * bytes to let the compiler optimize it.
 487                  */
 488                 Assert(exp < 6 && exp + olength <= 6);
 489                 memset(result, '0', 8);
 490         }
 491
 492         while (output >= 10000)
 493         {
 494                 const uint32 c = output - 10000 * (output / 10000);
 495                 const uint32 c0 = (c % 100) << 1;
 496                 const uint32 c1 = (c / 100) << 1;
 497
 498                 output /= 10000;
 499
 500                 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c0, 2);
 501                 memcpy(result + index + olength - i - 4, DIGIT_TABLE + c1, 2);
 502                 i += 4;
 503         }
 504         if (output >= 100)
 505         {
 506                 const uint32 c = (output % 100) << 1;
 507
 508                 output /= 100;
 509                 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
 510                 i += 2;
 511         }
 512         if (output >= 10)
 513         {
 514                 const uint32 c = output << 1;
 515
 516                 memcpy(result + index + olength - i - 2, DIGIT_TABLE + c, 2);
 517         }
 518         else
 519         {
 520                 result[index] = (char) ('0' + output);
 521         }
 522
 523         if (index == 1)
 524         {
 525                 /*
 526                  * nexp is 1..6 here, representing the number of digits before the
 527                  * point. A value of 7+ is not possible because we switch to
 528                  * scientific notation when the display exponent reaches 6.
 529                  */
 530                 Assert(nexp < 7);
 531                 /* gcc only seems to want to optimize memmove for small 2^n */
 532                 if (nexp & 4)
 533                 {
 534                         memmove(result + index - 1, result + index, 4);
 535                         index += 4;
 536                 }
 537                 if (nexp & 2)
 538                 {
 539                         memmove(result + index - 1, result + index, 2);
 540                         index += 2;
 541                 }
 542                 if (nexp & 1)
 543                 {
 544                         result[index - 1] = result[index];
 545                 }
 546                 result[nexp] = '.';
 547                 index = olength + 1;
 548         }
 549         else if (exp >= 0)
 550         {
 551                 /* we supplied the trailing zeros earlier, now just set the length. */
 552                 index = olength + exp;
 553         }
 554         else
 555         {
 556                 index = olength + (2 - nexp);
 557         }
 558
 559         return index;
 560 }
 561
 562 static inline int
 563 to_chars(const floating_decimal_32 v, const bool sign, char *const result)
 564 {
 565         /* Step 5: Print the decimal representation. */
 566         int                     index = 0;
 567
 568         uint32          output = v.mantissa;
 569         uint32          olength = decimalLength(output);
 570         int32           exp = v.exponent + olength - 1;
 571
 572         if (sign)
 573                 result[index++] = '-';
 574
 575         /*
 576          * The thresholds for fixed-point output are chosen to match printf
 577          * defaults. Beware that both the code of to_chars_f and the value of
 578          * FLOAT_SHORTEST_DECIMAL_LEN are sensitive to these thresholds.
 579          */
 580         if (exp >= -4 && exp < 6)
 581                 return to_chars_f(v, olength, result + index) + sign;
 582
 583         /*
 584          * If v.exponent is exactly 0, we might have reached here via the small
 585          * integer fast path, in which case v.mantissa might contain trailing
 586          * (decimal) zeros. For scientific notation we need to move these zeros
 587          * into the exponent. (For fixed point this doesn't matter, which is why
 588          * we do this here rather than above.)
 589          *
 590          * Since we already calculated the display exponent (exp) above based on
 591          * the old decimal length, that value does not change here. Instead, we
 592          * just reduce the display length for each digit removed.
 593          *
 594          * If we didn't get here via the fast path, the raw exponent will not
 595          * usually be 0, and there will be no trailing zeros, so we pay no more
 596          * than one div10/multiply extra cost. We claw back half of that by
 597          * checking for divisibility by 2 before dividing by 10.
 598          */
 599         if (v.exponent == 0)
 600         {
 601                 while ((output & 1) == 0)
 602                 {
 603                         const uint32 q = output / 10;
 604                         const uint32 r = output - 10 * q;
 605
 606                         if (r != 0)
 607                                 break;
 608                         output = q;
 609                         --olength;
 610                 }
 611         }
 612
 613         /*----
 614          * Print the decimal digits.
 615          * The following code is equivalent to:
 616          *
 617          * for (uint32 i = 0; i < olength - 1; ++i) {
 618          *   const uint32 c = output % 10; output /= 10;
 619          *   result[index + olength - i] = (char) ('0' + c);
 620          * }
 621          * result[index] = '0' + output % 10;
 622          */
 623         uint32          i = 0;
 624
 625         while (output >= 10000)
 626         {
 627                 const uint32 c = output - 10000 * (output / 10000);
 628                 const uint32 c0 = (c % 100) << 1;
 629                 const uint32 c1 = (c / 100) << 1;
 630
 631                 output /= 10000;
 632
 633                 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2);
 634                 memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2);
 635                 i += 4;
 636         }
 637         if (output >= 100)
 638         {
 639                 const uint32 c = (output % 100) << 1;
 640
 641                 output /= 100;
 642                 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2);
 643                 i += 2;
 644         }
 645         if (output >= 10)
 646         {
 647                 const uint32 c = output << 1;
 648
 649                 /*
 650                  * We can't use memcpy here: the decimal dot goes between these two
 651                  * digits.
 652                  */
 653                 result[index + olength - i] = DIGIT_TABLE[c + 1];
 654                 result[index] = DIGIT_TABLE[c];
 655         }
 656         else
 657         {
 658                 result[index] = (char) ('0' + output);
 659         }
 660
 661         /* Print decimal point if needed. */
 662         if (olength > 1)
 663         {
 664                 result[index + 1] = '.';
 665                 index += olength + 1;
 666         }
 667         else
 668         {
 669                 ++index;
 670         }
 671
 672         /* Print the exponent. */
 673         result[index++] = 'e';
 674         if (exp < 0)
 675         {
 676                 result[index++] = '-';
 677                 exp = -exp;
 678         }
 679         else
 680                 result[index++] = '+';
 681
 682         memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
 683         index += 2;
 684
 685         return index;
 686 }
 687
 688 static inline bool
 689 f2d_small_int(const uint32 ieeeMantissa,
 690                           const uint32 ieeeExponent,
 691                           floating_decimal_32 *v)
 692 {
 693         const int32 e2 = (int32) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS;
 694
 695         /*
 696          * Avoid using multiple "return false;" here since it tends to provoke the
 697          * compiler into inlining multiple copies of f2d, which is undesirable.
 698          */
 699
 700         if (e2 >= -FLOAT_MANTISSA_BITS && e2 <= 0)
 701         {
 702                 /*----
 703                  * Since 2^23 <= m2 < 2^24 and 0 <= -e2 <= 23:
 704                  *   1 <= f = m2 / 2^-e2 < 2^24.
 705                  *
 706                  * Test if the lower -e2 bits of the significand are 0, i.e. whether
 707                  * the fraction is 0. We can use ieeeMantissa here, since the implied
 708                  * 1 bit can never be tested by this; the implied 1 can only be part
 709                  * of a fraction if e2 < -FLOAT_MANTISSA_BITS which we already
 710                  * checked. (e.g. 0.5 gives ieeeMantissa == 0 and e2 == -24)
 711                  */
 712                 const uint32 mask = (1U << -e2) - 1;
 713                 const uint32 fraction = ieeeMantissa & mask;
 714
 715                 if (fraction == 0)
 716                 {
 717                         /*----
 718                          * f is an integer in the range [1, 2^24).
 719                          * Note: mantissa might contain trailing (decimal) 0's.
 720                          * Note: since 2^24 < 10^9, there is no need to adjust
 721                          * decimalLength().
 722                          */
 723                         const uint32 m2 = (1U << FLOAT_MANTISSA_BITS) | ieeeMantissa;
 724
 725                         v->mantissa = m2 >> -e2;
 726                         v->exponent = 0;
 727                         return true;
 728                 }
 729         }
 730
 731         return false;
 732 }
 733
 734 /*
 735  * Store the shortest decimal representation of the given float as an
 736  * UNTERMINATED string in the caller's supplied buffer (which must be at least
 737  * FLOAT_SHORTEST_DECIMAL_LEN-1 bytes long).
 738  *
 739  * Returns the number of bytes stored.
 740  */
 741 int
 742 float_to_shortest_decimal_bufn(float f, char *result)
 743 {
 744         /*
 745          * Step 1: Decode the floating-point number, and unify normalized and
 746          * subnormal cases.
 747          */
 748         const uint32 bits = float_to_bits(f);
 749
 750         /* Decode bits into sign, mantissa, and exponent. */
 751         const bool      ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0;
 752         const uint32 ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1);
 753         const uint32 ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1);
 754
 755         /* Case distinction; exit early for the easy cases. */
 756         if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0))
 757         {
 758                 return copy_special_str(result, ieeeSign, (ieeeExponent != 0), (ieeeMantissa != 0));
 759         }
 760
 761         floating_decimal_32 v;
 762         const bool      isSmallInt = f2d_small_int(ieeeMantissa, ieeeExponent, &v);
 763
 764         if (!isSmallInt)
 765         {
 766                 v = f2d(ieeeMantissa, ieeeExponent);
 767         }
 768
 769         return to_chars(v, ieeeSign, result);
 770 }
 771
 772 /*
 773  * Store the shortest decimal representation of the given float as a
 774  * null-terminated string in the caller's supplied buffer (which must be at
 775  * least FLOAT_SHORTEST_DECIMAL_LEN bytes long).
 776  *
 777  * Returns the string length.
 778  */
 779 int
 780 float_to_shortest_decimal_buf(float f, char *result)
 781 {
 782         const int       index = float_to_shortest_decimal_bufn(f, result);
 783
 784         /* Terminate the string. */
 785         Assert(index < FLOAT_SHORTEST_DECIMAL_LEN);
 786         result[index] = '\0';
 787         return index;
 788 }
 789
 790 /*
 791  * Return the shortest decimal representation as a null-terminated palloc'd
 792  * string (outside the backend, uses malloc() instead).
 793  *
 794  * Caller is responsible for freeing the result.
 795  */
 796 char *
 797 float_to_shortest_decimal(float f)
 798 {
 799         char       *const result = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN);
 800
 801         float_to_shortest_decimal_buf(f, result);
 802         return result;
 803 }