libcxx/src/include/from_chars_floating_point.h

   1 //===----------------------------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
  10 #define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
  11
  12 // These headers are in the shared LLVM-libc header library.
  13 #include "shared/fp_bits.h"
  14 #include "shared/str_to_float.h"
  15 #include "shared/str_to_integer.h"
  16
  17 #include <__assert>
  18 #include <__config>
  19 #include <cctype>
  20 #include <charconv>
  21 #include <concepts>
  22 #include <limits>
  23
  24 // Included for the _Floating_type_traits class
  25 #include "to_chars_floating_point.h"
  26
  27 _LIBCPP_BEGIN_NAMESPACE_STD
  28
  29 // Parses an infinity string.
  30 // Valid strings are case insensitive and contain INF or INFINITY.
  31 //
  32 // - __first is the first argument to std::from_chars. When the string is invalid
  33 //   this value is returned as ptr in the result.
  34 // - __last is the last argument of std::from_chars.
  35 // - __value is the value argument of std::from_chars,
  36 // - __ptr is the current position is the input string. This is points beyond
  37 //   the initial I character.
  38 // - __negative whether a valid string represents -inf or +inf.
  39 template <floating_point _Fp>
  40 __from_chars_result<_Fp>
  41 __from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
  42   if (__last - __ptr < 2) [[unlikely]]
  43     return {_Fp{0}, 0, errc::invalid_argument};
  44
  45   if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
  46     return {_Fp{0}, 0, errc::invalid_argument};
  47
  48   __ptr += 2;
  49
  50   // At this point the result is valid and contains INF.
  51   // When the remaining part contains INITY this will be consumed. Otherwise
  52   // only INF is consumed. For example INFINITZ will consume INF and ignore
  53   // INITZ.
  54
  55   if (__last - __ptr >= 5              //
  56       && std::tolower(__ptr[0]) == 'i' //
  57       && std::tolower(__ptr[1]) == 'n' //
  58       && std::tolower(__ptr[2]) == 'i' //
  59       && std::tolower(__ptr[3]) == 't' //
  60       && std::tolower(__ptr[4]) == 'y')
  61     __ptr += 5;
  62
  63   if constexpr (numeric_limits<_Fp>::has_infinity) {
  64     if (__negative)
  65       return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
  66
  67     return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
  68   } else {
  69     return {_Fp{0}, __ptr - __first, errc::result_out_of_range};
  70   }
  71 }
  72
  73 // Parses a nan string.
  74 // Valid strings are case insensitive and contain INF or INFINITY.
  75 //
  76 // - __first is the first argument to std::from_chars. When the string is invalid
  77 //   this value is returned as ptr in the result.
  78 // - __last is the last argument of std::from_chars.
  79 // - __value is the value argument of std::from_chars,
  80 // - __ptr is the current position is the input string. This is points beyond
  81 //   the initial N character.
  82 // - __negative whether a valid string represents -nan or +nan.
  83 template <floating_point _Fp>
  84 __from_chars_result<_Fp>
  85 __from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
  86   if (__last - __ptr < 2) [[unlikely]]
  87     return {_Fp{0}, 0, errc::invalid_argument};
  88
  89   if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
  90     return {_Fp{0}, 0, errc::invalid_argument};
  91
  92   __ptr += 2;
  93
  94   // At this point the result is valid and contains NAN. When the remaining
  95   // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
  96   // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
  97   // (abcd.
  98   if (__last - __ptr >= 2 && __ptr[0] == '(') {
  99     size_t __offset = 1;
 100     do {
 101       if (__ptr[__offset] == ')') {
 102         __ptr += __offset + 1;
 103         break;
 104       }
 105       if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
 106         break;
 107       ++__offset;
 108     } while (__ptr + __offset != __last);
 109   }
 110
 111   if (__negative)
 112     return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
 113
 114   return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
 115 }
 116
 117 template <class _Tp>
 118 struct __fractional_constant_result {
 119   size_t __offset{size_t(-1)};
 120   _Tp __mantissa{0};
 121   int __exponent{0};
 122   bool __truncated{false};
 123   bool __is_valid{false};
 124 };
 125
 126 // Parses the hex constant part of the hexadecimal floating-point value.
 127 // - input start of buffer given to from_chars
 128 // - __n the number of elements in the buffer
 129 // - __offset where to start parsing. The input can have an optional sign, the
 130 //   offset starts after this sign.
 131 template <class _Tp>
 132 __fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
 133   __fractional_constant_result<_Tp> __result;
 134
 135   const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16;
 136   bool __fraction                         = false;
 137   for (; __offset < __n; ++__offset) {
 138     if (std::isxdigit(__input[__offset])) {
 139       __result.__is_valid = true;
 140
 141       uint32_t __digit = __input[__offset] - '0';
 142       switch (std::tolower(__input[__offset])) {
 143       case 'a':
 144         __digit = 10;
 145         break;
 146       case 'b':
 147         __digit = 11;
 148         break;
 149       case 'c':
 150         __digit = 12;
 151         break;
 152       case 'd':
 153         __digit = 13;
 154         break;
 155       case 'e':
 156         __digit = 14;
 157         break;
 158       case 'f':
 159         __digit = 15;
 160         break;
 161       }
 162
 163       if (__result.__mantissa < __mantissa_truncate_threshold) {
 164         __result.__mantissa = (__result.__mantissa * 16) + __digit;
 165         if (__fraction)
 166           __result.__exponent -= 4;
 167       } else {
 168         if (__digit > 0)
 169           __result.__truncated = true;
 170         if (!__fraction)
 171           __result.__exponent += 4;
 172       }
 173     } else if (__input[__offset] == '.') {
 174       if (__fraction)
 175         break; // this means that __input[__offset] points to a second decimal point, ending the number.
 176
 177       __fraction = true;
 178     } else
 179       break;
 180   }
 181
 182   __result.__offset = __offset;
 183   return __result;
 184 }
 185
 186 struct __exponent_result {
 187   size_t __offset{size_t(-1)};
 188   int __value{0};
 189   bool __present{false};
 190 };
 191
 192 // When the exponent is not present the result of the struct contains
 193 // __offset, 0, false. This allows using the results unconditionally, the
 194 // __present is important for the scientific notation, where the value is
 195 // mandatory.
 196 __exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
 197   if (__offset + 1 < __n &&                          // an exponent always needs at least one digit.
 198       std::tolower(__input[__offset]) == __marker && //
 199       !std::isspace(__input[__offset + 1])           // leading whitespace is not allowed.
 200   ) {
 201     ++__offset;
 202     LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
 203         LIBC_NAMESPACE::shared::strtointeger<int32_t>(__input + __offset, 10, __n - __offset);
 204     // __result.error contains the errno value, 0 or ERANGE these are not interesting.
 205     // If the number of characters parsed is 0 it means there was no number.
 206     if (__e.parsed_len != 0)
 207       return {__offset + __e.parsed_len, __e.value, true};
 208     else
 209       --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
 210   }
 211
 212   return {__offset, 0, false};
 213 }
 214
 215 // Here we do this operation as int64 to avoid overflow.
 216 int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
 217   int64_t __sum = __fractional + __exponent;
 218
 219   if (__sum > __max_biased_exponent)
 220     return __max_biased_exponent;
 221
 222   if (__sum < -__max_biased_exponent)
 223     return -__max_biased_exponent;
 224
 225   return __sum;
 226 }
 227
 228 template <class _Fp, class _Tp>
 229 __from_chars_result<_Fp>
 230 __calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
 231   auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
 232   __r.set_mantissa(__mantissa);
 233   __r.set_biased_exponent(__exponent);
 234
 235   // C17 7.12.1/6
 236   // The result underflows if the magnitude of the mathematical result is so
 237   // small that the mathematical result cannot be represented, without
 238   // extraordinary roundoff error, in an object of the specified type.237) If
 239   // the result underflows, the function returns an implementation-defined
 240   // value whose magnitude is no greater than the smallest normalized positive
 241   // number in the specified type; if the integer expression math_errhandling
 242   // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
 243   // implementation-defined; if the integer expression math_errhandling &
 244   // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
 245   // exception is raised is implementation-defined.
 246   //
 247   // LLVM-LIBC sets ERAGNE for subnormal values
 248   //
 249   // [charconv.from.chars]/1
 250   //   ... If the parsed value is not in the range representable by the type of
 251   //   value, value is unmodified and the member ec of the return value is
 252   //   equal to errc::result_out_of_range. ...
 253   //
 254   // Undo the ERANGE for subnormal values.
 255   if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
 256     __result.__ec = errc{};
 257
 258   if (__negative)
 259     __result.__value = -__r.get_val();
 260   else
 261     __result.__value = __r.get_val();
 262
 263   return __result;
 264 }
 265
 266 // Implements from_chars for decimal floating-point values.
 267 // __first forwarded from from_chars
 268 // __last forwarded from from_chars
 269 // __value forwarded from from_chars
 270 // __fmt forwarded from from_chars
 271 // __ptr the start of the buffer to parse. This is after the optional sign character.
 272 // __negative should __value be set to a negative value?
 273 //
 274 // This function and __from_chars_floating_point_decimal are similar. However
 275 // the similar parts are all in helper functions. So the amount of code
 276 // duplication is minimal.
 277 template <floating_point _Fp>
 278 __from_chars_result<_Fp>
 279 __from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
 280   size_t __n         = __last - __first;
 281   ptrdiff_t __offset = __ptr - __first;
 282
 283   auto __fractional =
 284       std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
 285   if (!__fractional.__is_valid)
 286     return {_Fp{0}, 0, errc::invalid_argument};
 287
 288   auto __parsed_exponent = std::__parse_exponent(__first, __n, __fractional.__offset, 'p');
 289   __offset               = __parsed_exponent.__offset;
 290   int __exponent         = std::__merge_exponents(
 291       __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
 292
 293   __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
 294   LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
 295   if (__fractional.__mantissa != 0) {
 296     auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
 297         {__fractional.__mantissa, __exponent},
 298         __fractional.__truncated,
 299         LIBC_NAMESPACE::shared::RoundDirection::Nearest);
 300     __expanded_float = __temp.num;
 301     if (__temp.error == ERANGE) {
 302       __result.__ec = errc::result_out_of_range;
 303     }
 304   }
 305
 306   return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
 307 }
 308
 309 // Parses the hex constant part of the decimal float value.
 310 // - input start of buffer given to from_chars
 311 // - __n the number of elements in the buffer
 312 // - __offset where to start parsing. The input can have an optional sign, the
 313 //   offset starts after this sign.
 314 template <class _Tp>
 315 __fractional_constant_result<_Tp>
 316 __parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
 317   __fractional_constant_result<_Tp> __result;
 318
 319   const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10;
 320   bool __fraction                         = false;
 321   for (; __offset < __n; ++__offset) {
 322     if (std::isdigit(__input[__offset])) {
 323       __result.__is_valid = true;
 324
 325       uint32_t __digit = __input[__offset] - '0';
 326       if (__result.__mantissa < __mantissa_truncate_threshold) {
 327         __result.__mantissa = (__result.__mantissa * 10) + __digit;
 328         if (__fraction)
 329           --__result.__exponent;
 330       } else {
 331         if (__digit > 0)
 332           __result.__truncated = true;
 333         if (!__fraction)
 334           ++__result.__exponent;
 335       }
 336     } else if (__input[__offset] == '.') {
 337       if (__fraction)
 338         break; // this means that __input[__offset] points to a second decimal point, ending the number.
 339
 340       __fraction = true;
 341     } else
 342       break;
 343   }
 344
 345   __result.__offset = __offset;
 346   return __result;
 347 }
 348
 349 // Implements from_chars for decimal floating-point values.
 350 // __first forwarded from from_chars
 351 // __last forwarded from from_chars
 352 // __value forwarded from from_chars
 353 // __fmt forwarded from from_chars
 354 // __ptr the start of the buffer to parse. This is after the optional sign character.
 355 // __negative should __value be set to a negative value?
 356 template <floating_point _Fp>
 357 __from_chars_result<_Fp> __from_chars_floating_point_decimal(
 358     const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
 359   ptrdiff_t __n      = __last - __first;
 360   ptrdiff_t __offset = __ptr - __first;
 361
 362   auto __fractional =
 363       std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
 364   if (!__fractional.__is_valid)
 365     return {_Fp{0}, 0, errc::invalid_argument};
 366
 367   __offset = __fractional.__offset;
 368
 369   // LWG3456 Pattern used by std::from_chars is underspecified
 370   // This changes fixed to ignore a possible exponent instead of making its
 371   // existance an error.
 372   int __exponent;
 373   if (__fmt == chars_format::fixed) {
 374     __exponent =
 375         std::__merge_exponents(__fractional.__exponent, 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
 376   } else {
 377     auto __parsed_exponent = std::__parse_exponent(__first, __n, __offset, 'e');
 378     if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
 379       // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
 380       // the otherwise optional exponent part shall appear;
 381       return {_Fp{0}, 0, errc::invalid_argument};
 382     }
 383
 384     __offset   = __parsed_exponent.__offset;
 385     __exponent = std::__merge_exponents(
 386         __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
 387   }
 388
 389   __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
 390   LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
 391   if (__fractional.__mantissa != 0) {
 392     // This function expects to parse a positive value. This means it does not
 393     // take a __first, __n as arguments, since __first points to '-' for
 394     // negative values.
 395     auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
 396         {__fractional.__mantissa, __exponent},
 397         __fractional.__truncated,
 398         LIBC_NAMESPACE::shared::RoundDirection::Nearest,
 399         __ptr,
 400         __last - __ptr);
 401     __expanded_float = __temp.num;
 402     if (__temp.error == ERANGE) {
 403       __result.__ec = errc::result_out_of_range;
 404     }
 405   }
 406
 407   return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
 408 }
 409
 410 template <floating_point _Fp>
 411 __from_chars_result<_Fp>
 412 __from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
 413   if (__first == __last) [[unlikely]]
 414     return {_Fp{0}, 0, errc::invalid_argument};
 415
 416   const char* __ptr = __first;
 417   bool __negative   = *__ptr == '-';
 418   if (__negative) {
 419     ++__ptr;
 420     if (__ptr == __last) [[unlikely]]
 421       return {_Fp{0}, 0, errc::invalid_argument};
 422   }
 423
 424   // [charconv.from.chars]
 425   //   [Note 1: If the pattern allows for an optional sign, but the string has
 426   //   no digit characters following the sign, no characters match the pattern.
 427   //   -- end note]
 428   // This is true for integrals, floating point allows -.0
 429
 430   // [charconv.from.chars]/6.2
 431   //   if fmt has chars_format::scientific set but not chars_format::fixed, the
 432   //   otherwise optional exponent part shall appear;
 433   // Since INF/NAN do not have an exponent this value is not valid.
 434   //
 435   // LWG3456 Pattern used by std::from_chars is underspecified
 436   // Does not address this point, but proposed option B does solve this issue,
 437   // Both MSVC STL and libstdc++ implement this this behaviour.
 438   switch (std::tolower(*__ptr)) {
 439   case 'i':
 440     return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative);
 441   case 'n':
 442     if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
 443       // NOTE: The pointer passed here will be parsed in the default C locale.
 444       // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
 445       return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative);
 446     return {_Fp{0}, 0, errc::invalid_argument};
 447   }
 448
 449   if (__fmt == chars_format::hex)
 450     return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);
 451
 452   return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
 453 }
 454
 455 _LIBCPP_END_NAMESPACE_STD
 456
 457 #endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H