libcxx/include/__format/parser_std_format_spec.h

   1 // -*- C++ -*-
   2 //===----------------------------------------------------------------------===//
   3 //
   4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5 // See https://llvm.org/LICENSE.txt for license information.
   6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
  11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
  12
  13 /// \file Contains the std-format-spec parser.
  14 ///
  15 /// Most of the code can be reused in the chrono-format-spec.
  16 /// This header has some support for the chrono-format-spec since it doesn't
  17 /// affect the std-format-spec.
  18
  19 #include <__algorithm/copy_n.h>
  20 #include <__algorithm/min.h>
  21 #include <__assert>
  22 #include <__concepts/arithmetic.h>
  23 #include <__concepts/same_as.h>
  24 #include <__config>
  25 #include <__format/format_arg.h>
  26 #include <__format/format_error.h>
  27 #include <__format/format_parse_context.h>
  28 #include <__format/format_string.h>
  29 #include <__format/unicode.h>
  30 #include <__format/width_estimation_table.h>
  31 #include <__iterator/concepts.h>
  32 #include <__iterator/iterator_traits.h> // iter_value_t
  33 #include <__memory/addressof.h>
  34 #include <__type_traits/common_type.h>
  35 #include <__type_traits/is_constant_evaluated.h>
  36 #include <__type_traits/is_trivially_copyable.h>
  37 #include <__variant/monostate.h>
  38 #include <cstdint>
  39 #include <string>
  40 #include <string_view>
  41
  42 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
  43 #  pragma GCC system_header
  44 #endif
  45
  46 _LIBCPP_PUSH_MACROS
  47 #include <__undef_macros>
  48
  49 _LIBCPP_BEGIN_NAMESPACE_STD
  50
  51 #if _LIBCPP_STD_VER >= 20
  52
  53 namespace __format_spec {
  54
  55 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void
  56 __throw_invalid_option_format_error(const char* __id, const char* __option) {
  57   std::__throw_format_error(
  58       (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
  59 }
  60
  61 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
  62   std::__throw_format_error(
  63       (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
  64 }
  65
  66 template <contiguous_iterator _Iterator, class _ParseContext>
  67 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
  68 __parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) {
  69   using _CharT = iter_value_t<_Iterator>;
  70   // This function is a wrapper to call the real parser. But it does the
  71   // validation for the pre-conditions and post-conditions.
  72   if (__begin == __end)
  73     std::__throw_format_error("End of input while parsing an argument index");
  74
  75   __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx);
  76
  77   if (__r.__last == __end || *__r.__last != _CharT('}'))
  78     std::__throw_format_error("The argument index is invalid");
  79
  80   ++__r.__last;
  81   return __r;
  82 }
  83
  84 template <class _Context>
  85 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
  86 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
  87   // [format.string.std]/8
  88   //   If the corresponding formatting argument is not of integral type...
  89   // This wording allows char and bool too. LWG-3720 changes the wording to
  90   //    If the corresponding formatting argument is not of standard signed or
  91   //    unsigned integer type,
  92   // This means the 128-bit will not be valid anymore.
  93   // TODO FMT Verify this resolution is accepted and add a test to verify
  94   //          128-bit integrals fail and switch to visit_format_arg.
  95   return _VSTD::__visit_format_arg(
  96       [](auto __arg) -> uint32_t {
  97         using _Type = decltype(__arg);
  98         if constexpr (same_as<_Type, monostate>)
  99           std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
 100
 101         // [format.string.std]/8
 102         // If { arg-idopt } is used in a width or precision, the value of the
 103         // corresponding formatting argument is used in its place. If the
 104         // corresponding formatting argument is not of standard signed or unsigned
 105         // integer type, or its value is negative for precision or non-positive for
 106         // width, an exception of type format_error is thrown.
 107         //
 108         // When an integral is used in a format function, it is stored as one of
 109         // the types checked below. Other integral types are promoted. For example,
 110         // a signed char is stored as an int.
 111         if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || //
 112                       same_as<_Type, long long> || same_as<_Type, unsigned long long>) {
 113           if constexpr (signed_integral<_Type>) {
 114             if (__arg < 0)
 115               std::__throw_format_error("An argument index may not have a negative value");
 116           }
 117
 118           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
 119           if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
 120             std::__throw_format_error("The value of the argument index exceeds its maximum value");
 121
 122           return __arg;
 123         } else
 124           std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
 125       },
 126       __format_arg);
 127 }
 128
 129 /// These fields are a filter for which elements to parse.
 130 ///
 131 /// They default to false so when a new field is added it needs to be opted in
 132 /// explicitly.
 133 // TODO FMT Use an ABI tag for this struct.
 134 struct __fields {
 135   uint16_t __sign_                 : 1 {false};
 136   uint16_t __alternate_form_       : 1 {false};
 137   uint16_t __zero_padding_         : 1 {false};
 138   uint16_t __precision_            : 1 {false};
 139   uint16_t __locale_specific_form_ : 1 {false};
 140   uint16_t __type_                 : 1 {false};
 141   // Determines the valid values for fill.
 142   //
 143   // Originally the fill could be any character except { and }. Range-based
 144   // formatters use the colon to mark the beginning of the
 145   // underlying-format-spec. To avoid parsing ambiguities these formatter
 146   // specializations prohibit the use of the colon as a fill character.
 147   uint16_t __use_range_fill_ : 1 {false};
 148   uint16_t __clear_brackets_ : 1 {false};
 149   uint16_t __consume_all_    : 1 {false};
 150 };
 151
 152 // By not placing this constant in the formatter class it's not duplicated for
 153 // char and wchar_t.
 154 inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true};
 155 inline constexpr __fields __fields_integral{
 156     .__sign_                 = true,
 157     .__alternate_form_       = true,
 158     .__zero_padding_         = true,
 159     .__locale_specific_form_ = true,
 160     .__type_                 = true,
 161     .__consume_all_          = true};
 162 inline constexpr __fields __fields_floating_point{
 163     .__sign_                 = true,
 164     .__alternate_form_       = true,
 165     .__zero_padding_         = true,
 166     .__precision_            = true,
 167     .__locale_specific_form_ = true,
 168     .__type_                 = true,
 169     .__consume_all_          = true};
 170 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true};
 171 inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true};
 172
 173 #  if _LIBCPP_STD_VER >= 23
 174 inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true};
 175 inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true};
 176 inline constexpr __fields __fields_fill_align_width{};
 177 #  endif
 178
 179 enum class __alignment : uint8_t {
 180   /// No alignment is set in the format string.
 181   __default,
 182   __left,
 183   __center,
 184   __right,
 185   __zero_padding
 186 };
 187
 188 enum class __sign : uint8_t {
 189   /// No sign is set in the format string.
 190   ///
 191   /// The sign isn't allowed for certain format-types. By using this value
 192   /// it's possible to detect whether or not the user explicitly set the sign
 193   /// flag. For formatting purposes it behaves the same as \ref __minus.
 194   __default,
 195   __minus,
 196   __plus,
 197   __space
 198 };
 199
 200 enum class __type : uint8_t {
 201   __default = 0,
 202   __string,
 203   __binary_lower_case,
 204   __binary_upper_case,
 205   __octal,
 206   __decimal,
 207   __hexadecimal_lower_case,
 208   __hexadecimal_upper_case,
 209   __pointer_lower_case,
 210   __pointer_upper_case,
 211   __char,
 212   __hexfloat_lower_case,
 213   __hexfloat_upper_case,
 214   __scientific_lower_case,
 215   __scientific_upper_case,
 216   __fixed_lower_case,
 217   __fixed_upper_case,
 218   __general_lower_case,
 219   __general_upper_case,
 220   __debug
 221 };
 222
 223 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
 224   uint32_t __shift = static_cast<uint32_t>(__t);
 225   if (__shift == 0)
 226     return 1;
 227
 228   if (__shift > 31)
 229     std::__throw_format_error("The type does not fit in the mask");
 230
 231   return 1 << __shift;
 232 }
 233
 234 inline constexpr uint32_t __type_mask_integer =
 235     __create_type_mask(__type::__binary_lower_case) |      //
 236     __create_type_mask(__type::__binary_upper_case) |      //
 237     __create_type_mask(__type::__decimal) |                //
 238     __create_type_mask(__type::__octal) |                  //
 239     __create_type_mask(__type::__hexadecimal_lower_case) | //
 240     __create_type_mask(__type::__hexadecimal_upper_case);
 241
 242 struct __std {
 243   __alignment __alignment_ : 3;
 244   __sign __sign_ : 2;
 245   bool __alternate_form_ : 1;
 246   bool __locale_specific_form_ : 1;
 247   __type __type_;
 248 };
 249
 250 struct __chrono {
 251   __alignment __alignment_ : 3;
 252   bool __locale_specific_form_ : 1;
 253   bool __hour_                 : 1;
 254   bool __weekday_name_ : 1;
 255   bool __weekday_              : 1;
 256   bool __day_of_year_          : 1;
 257   bool __week_of_year_         : 1;
 258   bool __month_name_ : 1;
 259 };
 260
 261 // The fill UCS scalar value.
 262 //
 263 // This is always an array, with 1, 2, or 4 elements.
 264 // The size of the data structure is always 32-bits.
 265 template <class _CharT>
 266 struct __code_point;
 267
 268 template <>
 269 struct __code_point<char> {
 270   char __data[4] = {' '};
 271 };
 272
 273 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 274 template <>
 275 struct __code_point<wchar_t> {
 276   wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
 277 };
 278 #  endif
 279
 280 /// Contains the parsed formatting specifications.
 281 ///
 282 /// This contains information for both the std-format-spec and the
 283 /// chrono-format-spec. This results in some unused members for both
 284 /// specifications. However these unused members don't increase the size
 285 /// of the structure.
 286 ///
 287 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
 288 /// kept stable.
 289 template <class _CharT>
 290 struct __parsed_specifications {
 291   union {
 292     // The field __alignment_ is the first element in __std_ and __chrono_.
 293     // This allows the code to always inspect this value regards which member
 294     // of the union is the active member [class.union.general]/2.
 295     //
 296     // This is needed since the generic output routines handle the alignment of
 297     // the output.
 298     __alignment __alignment_ : 3;
 299     __std __std_;
 300     __chrono __chrono_;
 301   };
 302
 303   /// The requested width.
 304   ///
 305   /// When the format-spec used an arg-id for this field it has already been
 306   /// replaced with the value of that arg-id.
 307   int32_t __width_;
 308
 309   /// The requested precision.
 310   ///
 311   /// When the format-spec used an arg-id for this field it has already been
 312   /// replaced with the value of that arg-id.
 313   int32_t __precision_;
 314
 315   __code_point<_CharT> __fill_;
 316
 317   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
 318
 319   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
 320 };
 321
 322 // Validate the struct is small and cheap to copy since the struct is passed by
 323 // value in formatting functions.
 324 static_assert(sizeof(__parsed_specifications<char>) == 16);
 325 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
 326 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 327 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
 328 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
 329 #  endif
 330
 331 /// The parser for the std-format-spec.
 332 ///
 333 /// Note this class is a member of std::formatter specializations. It's
 334 /// expected developers will create their own formatter specializations that
 335 /// inherit from the std::formatter specializations. This means this class
 336 /// must be ABI stable. To aid the stability the unused bits in the class are
 337 /// set to zero. That way they can be repurposed if a future revision of the
 338 /// Standards adds new fields to std-format-spec.
 339 template <class _CharT>
 340 class _LIBCPP_TEMPLATE_VIS __parser {
 341 public:
 342   // Parses the format specification.
 343   //
 344   // Depending on whether the parsing is done compile-time or run-time
 345   // the method slightly differs.
 346   // - Only parses a field when it is in the __fields. Accepting all
 347   //   fields and then validating the valid ones has a performance impact.
 348   //   This is faster but gives slighly worse error messages.
 349   // - At compile-time when a field is not accepted the parser will still
 350   //   parse it and give an error when it's present. This gives a more
 351   //   accurate error.
 352   // The idea is that most times the format instead of the vformat
 353   // functions are used. In that case the error will be detected during
 354   // compilation and there is no need to pay for the run-time overhead.
 355   template <class _ParseContext>
 356   _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) {
 357     auto __begin = __ctx.begin();
 358     auto __end   = __ctx.end();
 359     if (__begin == __end)
 360       return __begin;
 361
 362     if (__parse_fill_align(__begin, __end, __fields.__use_range_fill_) && __begin == __end)
 363       return __begin;
 364
 365     if (__fields.__sign_) {
 366       if (__parse_sign(__begin) && __begin == __end)
 367         return __begin;
 368     } else if (std::is_constant_evaluated() && __parse_sign(__begin)) {
 369       std::__throw_format_error("The format specification does not allow the sign option");
 370     }
 371
 372     if (__fields.__alternate_form_) {
 373       if (__parse_alternate_form(__begin) && __begin == __end)
 374         return __begin;
 375     } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) {
 376       std::__throw_format_error("The format specifier does not allow the alternate form option");
 377     }
 378
 379     if (__fields.__zero_padding_) {
 380       if (__parse_zero_padding(__begin) && __begin == __end)
 381         return __begin;
 382     } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) {
 383       std::__throw_format_error("The format specifier does not allow the zero-padding option");
 384     }
 385
 386     if (__parse_width(__begin, __end, __ctx) && __begin == __end)
 387       return __begin;
 388
 389     if (__fields.__precision_) {
 390       if (__parse_precision(__begin, __end, __ctx) && __begin == __end)
 391         return __begin;
 392     } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) {
 393       std::__throw_format_error("The format specifier does not allow the precision option");
 394     }
 395
 396     if (__fields.__locale_specific_form_) {
 397       if (__parse_locale_specific_form(__begin) && __begin == __end)
 398         return __begin;
 399     } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) {
 400       std::__throw_format_error("The format specifier does not allow the locale-specific form option");
 401     }
 402
 403     if (__fields.__clear_brackets_) {
 404       if (__parse_clear_brackets(__begin) && __begin == __end)
 405         return __begin;
 406     } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) {
 407       std::__throw_format_error("The format specifier does not allow the n option");
 408     }
 409
 410     if (__fields.__type_)
 411       __parse_type(__begin);
 412
 413     if (!__fields.__consume_all_)
 414       return __begin;
 415
 416     if (__begin != __end && *__begin != _CharT('}'))
 417       std::__throw_format_error("The format specifier should consume the input or end with a '}'");
 418
 419     return __begin;
 420   }
 421
 422   // Validates the selected the parsed data.
 423   //
 424   // The valid fields in the parser may depend on the display type
 425   // selected. But the type is the last optional field, so by the time
 426   // it's known an option can't be used, it already has been parsed.
 427   // This does the validation again.
 428   //
 429   // For example an integral may have a sign, zero-padding, or alternate
 430   // form when the type option is not 'c'. So the generic approach is:
 431   //
 432   // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
 433   // if (__parser.__type_ == __format_spec::__type::__char) {
 434   //   __parser.__validate((__format_spec::__fields_bool, "an integer");
 435   //   ... // more char adjustments
 436   // } else {
 437   //   ... // validate an integral type.
 438   // }
 439   //
 440   // For some types all valid options need a second validation run, like
 441   // boolean types.
 442   //
 443   // Depending on whether the validation is done at compile-time or
 444   // run-time the error differs
 445   // - run-time the exception is thrown and contains the type of field
 446   //   being validated.
 447   // - at compile-time the line with `std::__throw_format_error` is shown
 448   //   in the output. In that case it's important for the error to be on one
 449   //   line.
 450   // Note future versions of C++ may allow better compile-time error
 451   // reporting.
 452   _LIBCPP_HIDE_FROM_ABI constexpr void
 453   __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const {
 454     if (!__fields.__sign_ && __sign_ != __sign::__default) {
 455       if (std::is_constant_evaluated())
 456         std::__throw_format_error("The format specifier does not allow the sign option");
 457       else
 458         __format_spec::__throw_invalid_option_format_error(__id, "sign");
 459     }
 460
 461     if (!__fields.__alternate_form_ && __alternate_form_) {
 462       if (std::is_constant_evaluated())
 463         std::__throw_format_error("The format specifier does not allow the alternate form option");
 464       else
 465         __format_spec::__throw_invalid_option_format_error(__id, "alternate form");
 466     }
 467
 468     if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) {
 469       if (std::is_constant_evaluated())
 470         std::__throw_format_error("The format specifier does not allow the zero-padding option");
 471       else
 472         __format_spec::__throw_invalid_option_format_error(__id, "zero-padding");
 473     }
 474
 475     if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id.
 476       if (std::is_constant_evaluated())
 477         std::__throw_format_error("The format specifier does not allow the precision option");
 478       else
 479         __format_spec::__throw_invalid_option_format_error(__id, "precision");
 480     }
 481
 482     if (!__fields.__locale_specific_form_ && __locale_specific_form_) {
 483       if (std::is_constant_evaluated())
 484         std::__throw_format_error("The format specifier does not allow the locale-specific form option");
 485       else
 486         __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
 487     }
 488
 489     if ((__create_type_mask(__type_) & __type_mask) == 0) {
 490       if (std::is_constant_evaluated())
 491         std::__throw_format_error("The format specifier uses an invalid value for the type option");
 492       else
 493         __format_spec::__throw_invalid_type_format_error(__id);
 494     }
 495   }
 496
 497   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
 498   _LIBCPP_HIDE_FROM_ABI
 499   __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
 500     return __parsed_specifications<_CharT>{
 501         .__std_ = __std{.__alignment_            = __alignment_,
 502                         .__sign_                 = __sign_,
 503                         .__alternate_form_       = __alternate_form_,
 504                         .__locale_specific_form_ = __locale_specific_form_,
 505                         .__type_                 = __type_},
 506         .__width_{__get_width(__ctx)},
 507         .__precision_{__get_precision(__ctx)},
 508         .__fill_{__fill_}};
 509   }
 510
 511   _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
 512     return __parsed_specifications<_CharT>{
 513         .__chrono_ =
 514             __chrono{.__alignment_            = __alignment_,
 515                      .__locale_specific_form_ = __locale_specific_form_,
 516                      .__hour_                 = __hour_,
 517                      .__weekday_name_         = __weekday_name_,
 518                      .__weekday_              = __weekday_,
 519                      .__day_of_year_          = __day_of_year_,
 520                      .__week_of_year_         = __week_of_year_,
 521                      .__month_name_           = __month_name_},
 522         .__width_{__get_width(__ctx)},
 523         .__precision_{__get_precision(__ctx)},
 524         .__fill_{__fill_}};
 525   }
 526
 527   __alignment __alignment_ : 3 {__alignment::__default};
 528   __sign __sign_ : 2 {__sign::__default};
 529   bool __alternate_form_ : 1 {false};
 530   bool __locale_specific_form_ : 1 {false};
 531   bool __clear_brackets_       : 1 {false};
 532   __type __type_{__type::__default};
 533
 534   // These flags are only used for formatting chrono. Since the struct has
 535   // padding space left it's added to this structure.
 536   bool __hour_ : 1 {false};
 537
 538   bool __weekday_name_ : 1 {false};
 539   bool __weekday_      : 1 {false};
 540
 541   bool __day_of_year_  : 1 {false};
 542   bool __week_of_year_ : 1 {false};
 543
 544   bool __month_name_ : 1 {false};
 545
 546   uint8_t __reserved_0_ : 2 {0};
 547   uint8_t __reserved_1_ : 6 {0};
 548   // These two flags are only used internally and not part of the
 549   // __parsed_specifications. Therefore put them at the end.
 550   bool __width_as_arg_ : 1 {false};
 551   bool __precision_as_arg_ : 1 {false};
 552
 553   /// The requested width, either the value or the arg-id.
 554   int32_t __width_{0};
 555
 556   /// The requested precision, either the value or the arg-id.
 557   int32_t __precision_{-1};
 558
 559   __code_point<_CharT> __fill_{};
 560
 561 private:
 562   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
 563     switch (__c) {
 564     case _CharT('<'):
 565       __alignment_ = __alignment::__left;
 566       return true;
 567
 568     case _CharT('^'):
 569       __alignment_ = __alignment::__center;
 570       return true;
 571
 572     case _CharT('>'):
 573       __alignment_ = __alignment::__right;
 574       return true;
 575     }
 576     return false;
 577   }
 578
 579   _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) {
 580     // The forbidden fill characters all code points formed from a single code unit, thus the
 581     // check can be omitted when more code units are used.
 582     if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':')))
 583       std::__throw_format_error("The fill option contains an invalid value");
 584     else if (__fill == _CharT('{') || __fill == _CharT('}'))
 585       std::__throw_format_error("The fill option contains an invalid value");
 586   }
 587
 588 #  ifndef _LIBCPP_HAS_NO_UNICODE
 589   // range-fill and tuple-fill are identical
 590   template <contiguous_iterator _Iterator>
 591     requires same_as<_CharT, char>
 592 #    ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 593           || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
 594 #    endif
 595   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
 596     _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end,
 597                                  "when called with an empty input the function will cause "
 598                                  "undefined behavior by evaluating data not in the input");
 599     __unicode::__code_point_view<_CharT> __view{__begin, __end};
 600     __unicode::__consume_result __consumed = __view.__consume();
 601     if (__consumed.__status != __unicode::__consume_result::__ok)
 602       std::__throw_format_error("The format specifier contains malformed Unicode characters");
 603
 604     if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
 605       ptrdiff_t __code_units = __view.__position() - __begin;
 606       if (__code_units == 1)
 607         // The forbidden fill characters all are code points encoded
 608         // in one code unit, thus the check can be omitted when more
 609         // code units are used.
 610         __validate_fill_character(*__begin, __use_range_fill);
 611
 612       std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
 613       __begin += __code_units + 1;
 614       return true;
 615     }
 616
 617     if (!__parse_alignment(*__begin))
 618       return false;
 619
 620     ++__begin;
 621     return true;
 622   }
 623
 624 #    ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 625   template <contiguous_iterator _Iterator>
 626     requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
 627   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
 628     _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end,
 629                                  "when called with an empty input the function will cause "
 630                                  "undefined behavior by evaluating data not in the input");
 631     if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
 632       if (!__unicode::__is_scalar_value(*__begin))
 633         std::__throw_format_error("The fill option contains an invalid value");
 634
 635       __validate_fill_character(*__begin, __use_range_fill);
 636
 637       __fill_.__data[0] = *__begin;
 638       __begin += 2;
 639       return true;
 640     }
 641
 642     if (!__parse_alignment(*__begin))
 643       return false;
 644
 645     ++__begin;
 646     return true;
 647   }
 648
 649 #    endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
 650
 651 #  else // _LIBCPP_HAS_NO_UNICODE
 652   // range-fill and tuple-fill are identical
 653   template <contiguous_iterator _Iterator>
 654   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
 655     _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end,
 656                                  "when called with an empty input the function will cause "
 657                                  "undefined behavior by evaluating data not in the input");
 658     if (__begin + 1 != __end) {
 659       if (__parse_alignment(*(__begin + 1))) {
 660         __validate_fill_character(*__begin, __use_range_fill);
 661
 662         __fill_.__data[0] = *__begin;
 663         __begin += 2;
 664         return true;
 665       }
 666     }
 667
 668     if (!__parse_alignment(*__begin))
 669       return false;
 670
 671     ++__begin;
 672     return true;
 673   }
 674
 675 #  endif // _LIBCPP_HAS_NO_UNICODE
 676
 677   template <contiguous_iterator _Iterator>
 678   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
 679     switch (*__begin) {
 680     case _CharT('-'):
 681       __sign_ = __sign::__minus;
 682       break;
 683     case _CharT('+'):
 684       __sign_ = __sign::__plus;
 685       break;
 686     case _CharT(' '):
 687       __sign_ = __sign::__space;
 688       break;
 689     default:
 690       return false;
 691     }
 692     ++__begin;
 693     return true;
 694   }
 695
 696   template <contiguous_iterator _Iterator>
 697   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
 698     if (*__begin != _CharT('#'))
 699       return false;
 700
 701     __alternate_form_ = true;
 702     ++__begin;
 703     return true;
 704   }
 705
 706   template <contiguous_iterator _Iterator>
 707   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
 708     if (*__begin != _CharT('0'))
 709       return false;
 710
 711     if (__alignment_ == __alignment::__default)
 712       __alignment_ = __alignment::__zero_padding;
 713     ++__begin;
 714     return true;
 715   }
 716
 717   template <contiguous_iterator _Iterator>
 718   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) {
 719     if (*__begin == _CharT('0'))
 720       std::__throw_format_error("The width option should not have a leading zero");
 721
 722     if (*__begin == _CharT('{')) {
 723       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
 724       __width_as_arg_ = true;
 725       __width_ = __r.__value;
 726       __begin = __r.__last;
 727       return true;
 728     }
 729
 730     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
 731       return false;
 732
 733     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
 734     __width_ = __r.__value;
 735     _LIBCPP_ASSERT_UNCATEGORIZED(__width_ != 0, "A zero value isn't allowed and should be impossible, "
 736                                                 "due to validations in this function");
 737     __begin = __r.__last;
 738     return true;
 739   }
 740
 741   template <contiguous_iterator _Iterator>
 742   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) {
 743     if (*__begin != _CharT('.'))
 744       return false;
 745
 746     ++__begin;
 747     if (__begin == __end)
 748       std::__throw_format_error("End of input while parsing format specifier precision");
 749
 750     if (*__begin == _CharT('{')) {
 751       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
 752       __precision_as_arg_ = true;
 753       __precision_ = __arg_id.__value;
 754       __begin = __arg_id.__last;
 755       return true;
 756     }
 757
 758     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
 759       std::__throw_format_error("The precision option does not contain a value or an argument index");
 760
 761     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
 762     __precision_ = __r.__value;
 763     __precision_as_arg_ = false;
 764     __begin = __r.__last;
 765     return true;
 766   }
 767
 768   template <contiguous_iterator _Iterator>
 769   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
 770     if (*__begin != _CharT('L'))
 771       return false;
 772
 773     __locale_specific_form_ = true;
 774     ++__begin;
 775     return true;
 776   }
 777
 778   template <contiguous_iterator _Iterator>
 779   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) {
 780     if (*__begin != _CharT('n'))
 781       return false;
 782
 783     __clear_brackets_ = true;
 784     ++__begin;
 785     return true;
 786   }
 787
 788   template <contiguous_iterator _Iterator>
 789   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
 790     // Determines the type. It does not validate whether the selected type is
 791     // valid. Most formatters have optional fields that are only allowed for
 792     // certain types. These parsers need to do validation after the type has
 793     // been parsed. So its easier to implement the validation for all types in
 794     // the specific parse function.
 795     switch (*__begin) {
 796     case 'A':
 797       __type_ = __type::__hexfloat_upper_case;
 798       break;
 799     case 'B':
 800       __type_ = __type::__binary_upper_case;
 801       break;
 802     case 'E':
 803       __type_ = __type::__scientific_upper_case;
 804       break;
 805     case 'F':
 806       __type_ = __type::__fixed_upper_case;
 807       break;
 808     case 'G':
 809       __type_ = __type::__general_upper_case;
 810       break;
 811     case 'X':
 812       __type_ = __type::__hexadecimal_upper_case;
 813       break;
 814     case 'a':
 815       __type_ = __type::__hexfloat_lower_case;
 816       break;
 817     case 'b':
 818       __type_ = __type::__binary_lower_case;
 819       break;
 820     case 'c':
 821       __type_ = __type::__char;
 822       break;
 823     case 'd':
 824       __type_ = __type::__decimal;
 825       break;
 826     case 'e':
 827       __type_ = __type::__scientific_lower_case;
 828       break;
 829     case 'f':
 830       __type_ = __type::__fixed_lower_case;
 831       break;
 832     case 'g':
 833       __type_ = __type::__general_lower_case;
 834       break;
 835     case 'o':
 836       __type_ = __type::__octal;
 837       break;
 838     case 'p':
 839       __type_ = __type::__pointer_lower_case;
 840       break;
 841     case 'P':
 842       __type_ = __type::__pointer_upper_case;
 843       break;
 844     case 's':
 845       __type_ = __type::__string;
 846       break;
 847     case 'x':
 848       __type_ = __type::__hexadecimal_lower_case;
 849       break;
 850 #  if _LIBCPP_STD_VER >= 23
 851     case '?':
 852       __type_ = __type::__debug;
 853       break;
 854 #  endif
 855     default:
 856       return;
 857     }
 858     ++__begin;
 859   }
 860
 861   _LIBCPP_HIDE_FROM_ABI
 862   int32_t __get_width(auto& __ctx) const {
 863     if (!__width_as_arg_)
 864       return __width_;
 865
 866     return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
 867   }
 868
 869   _LIBCPP_HIDE_FROM_ABI
 870   int32_t __get_precision(auto& __ctx) const {
 871     if (!__precision_as_arg_)
 872       return __precision_;
 873
 874     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
 875   }
 876 };
 877
 878 // Validates whether the reserved bitfields don't change the size.
 879 static_assert(sizeof(__parser<char>) == 16);
 880 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 881 static_assert(sizeof(__parser<wchar_t>) == 16);
 882 #  endif
 883
 884 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
 885   switch (__type) {
 886   case __format_spec::__type::__default:
 887   case __format_spec::__type::__string:
 888   case __format_spec::__type::__debug:
 889     break;
 890
 891   default:
 892     std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
 893   }
 894 }
 895
 896 template <class _CharT>
 897 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) {
 898   __parser.__validate(__format_spec::__fields_bool, __id);
 899   if (__parser.__alignment_ == __alignment::__default)
 900     __parser.__alignment_ = __alignment::__left;
 901 }
 902
 903 template <class _CharT>
 904 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) {
 905   __format_spec::__process_display_type_bool_string(__parser, __id);
 906 }
 907
 908 template <class _CharT>
 909 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) {
 910   switch (__parser.__type_) {
 911   case __format_spec::__type::__default:
 912   case __format_spec::__type::__string:
 913     __format_spec::__process_display_type_bool_string(__parser, __id);
 914     break;
 915
 916   case __format_spec::__type::__binary_lower_case:
 917   case __format_spec::__type::__binary_upper_case:
 918   case __format_spec::__type::__octal:
 919   case __format_spec::__type::__decimal:
 920   case __format_spec::__type::__hexadecimal_lower_case:
 921   case __format_spec::__type::__hexadecimal_upper_case:
 922     break;
 923
 924   default:
 925     __format_spec::__throw_invalid_type_format_error(__id);
 926   }
 927 }
 928
 929 template <class _CharT>
 930 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) {
 931   switch (__parser.__type_) {
 932   case __format_spec::__type::__default:
 933   case __format_spec::__type::__char:
 934   case __format_spec::__type::__debug:
 935     __format_spec::__process_display_type_char(__parser, __id);
 936     break;
 937
 938   case __format_spec::__type::__binary_lower_case:
 939   case __format_spec::__type::__binary_upper_case:
 940   case __format_spec::__type::__octal:
 941   case __format_spec::__type::__decimal:
 942   case __format_spec::__type::__hexadecimal_lower_case:
 943   case __format_spec::__type::__hexadecimal_upper_case:
 944     break;
 945
 946   default:
 947     __format_spec::__throw_invalid_type_format_error(__id);
 948   }
 949 }
 950
 951 template <class _CharT>
 952 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) {
 953   switch (__parser.__type_) {
 954   case __format_spec::__type::__default:
 955   case __format_spec::__type::__binary_lower_case:
 956   case __format_spec::__type::__binary_upper_case:
 957   case __format_spec::__type::__octal:
 958   case __format_spec::__type::__decimal:
 959   case __format_spec::__type::__hexadecimal_lower_case:
 960   case __format_spec::__type::__hexadecimal_upper_case:
 961     break;
 962
 963   case __format_spec::__type::__char:
 964     __format_spec::__process_display_type_char(__parser, __id);
 965     break;
 966
 967   default:
 968     __format_spec::__throw_invalid_type_format_error(__id);
 969   }
 970 }
 971
 972 template <class _CharT>
 973 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) {
 974   switch (__parser.__type_) {
 975   case __format_spec::__type::__default:
 976   case __format_spec::__type::__hexfloat_lower_case:
 977   case __format_spec::__type::__hexfloat_upper_case:
 978     // Precision specific behavior will be handled later.
 979     break;
 980   case __format_spec::__type::__scientific_lower_case:
 981   case __format_spec::__type::__scientific_upper_case:
 982   case __format_spec::__type::__fixed_lower_case:
 983   case __format_spec::__type::__fixed_upper_case:
 984   case __format_spec::__type::__general_lower_case:
 985   case __format_spec::__type::__general_upper_case:
 986     if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
 987       // Set the default precision for the call to to_chars.
 988       __parser.__precision_ = 6;
 989     break;
 990
 991   default:
 992     __format_spec::__throw_invalid_type_format_error(__id);
 993   }
 994 }
 995
 996 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) {
 997   switch (__type) {
 998   case __format_spec::__type::__default:
 999   case __format_spec::__type::__pointer_lower_case:
1000   case __format_spec::__type::__pointer_upper_case:
1001     break;
1002
1003   default:
1004     __format_spec::__throw_invalid_type_format_error(__id);
1005   }
1006 }
1007
1008 template <contiguous_iterator _Iterator>
1009 struct __column_width_result {
1010   /// The number of output columns.
1011   size_t __width_;
1012   /// One beyond the last code unit used in the estimation.
1013   ///
1014   /// This limits the original output to fit in the wanted number of columns.
1015   _Iterator __last_;
1016 };
1017
1018 template <contiguous_iterator _Iterator>
1019 __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
1020
1021 /// Since a column width can be two it's possible that the requested column
1022 /// width can't be achieved. Depending on the intended usage the policy can be
1023 /// selected.
1024 /// - When used as precision the maximum width may not be exceeded and the
1025 ///   result should be "rounded down" to the previous boundary.
1026 /// - When used as a width we're done once the minimum is reached, but
1027 ///   exceeding is not an issue. Rounding down is an issue since that will
1028 ///   result in writing fill characters. Therefore the result needs to be
1029 ///   "rounded up".
1030 enum class __column_width_rounding { __down, __up };
1031
1032 #  ifndef _LIBCPP_HAS_NO_UNICODE
1033
1034 namespace __detail {
1035 template <contiguous_iterator _Iterator>
1036 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
1037     _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
1038   using _CharT = iter_value_t<_Iterator>;
1039   __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
1040
1041   __column_width_result<_Iterator> __result{0, __first};
1042   while (__result.__last_ != __last && __result.__width_ <= __maximum) {
1043     typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
1044     int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_);
1045
1046     // When the next entry would exceed the maximum width the previous width
1047     // might be returned. For example when a width of 100 is requested the
1048     // returned width might be 99, since the next code point has an estimated
1049     // column width of 2. This depends on the rounding flag.
1050     // When the maximum is exceeded the loop will abort the next iteration.
1051     if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
1052       return __result;
1053
1054     __result.__width_ += __width;
1055     __result.__last_ = __cluster.__last_;
1056   }
1057
1058   return __result;
1059 }
1060
1061 } // namespace __detail
1062
1063 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1064 // Depending on format the relation between the number of code units stored and
1065 // the number of output columns differs. The first relation is the number of
1066 // code units forming a code point. (The text assumes the code units are
1067 // unsigned.)
1068 // - UTF-8 The number of code units is between one and four. The first 127
1069 //   Unicode code points match the ASCII character set. When the highest bit is
1070 //   set it means the code point has more than one code unit.
1071 // - UTF-16: The number of code units is between 1 and 2. When the first
1072 //   code unit is in the range [0xd800,0xdfff) it means the code point uses two
1073 //   code units.
1074 // - UTF-32: The number of code units is always one.
1075 //
1076 // The code point to the number of columns is specified in
1077 // [format.string.std]/11. This list might change in the future.
1078 //
1079 // Another thing to be taken into account is Grapheme clustering. This means
1080 // that in some cases multiple code points are combined one element in the
1081 // output. For example:
1082 // - an ASCII character with a combined diacritical mark
1083 // - an emoji with a skin tone modifier
1084 // - a group of combined people emoji to create a family
1085 // - a combination of flag emoji
1086 //
1087 // See also:
1088 // - [format.string.general]/11
1089 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
1090 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1091
1092 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
1093
1094 /// Determines the number of output columns needed to render the input.
1095 ///
1096 /// \note When the scanner encounters malformed Unicode it acts as-if every
1097 /// code unit is a one column code point. Typically a terminal uses the same
1098 /// strategy and replaces every malformed code unit with a one column
1099 /// replacement character.
1100 ///
1101 /// \param __first    Points to the first element of the input range.
1102 /// \param __last     Points beyond the last element of the input range.
1103 /// \param __maximum  The maximum number of output columns. The returned number
1104 ///                   of estimated output columns will not exceed this value.
1105 /// \param __rounding Selects the rounding method.
1106 ///                   \c __down result.__width_ <= __maximum
1107 ///                   \c __up result.__width_ <= __maximum + 1
1108 template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
1109 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
1110     basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
1111   // The width estimation is done in two steps:
1112   // - Quickly process for the ASCII part. ASCII has the following properties
1113   //   - One code unit is one code point
1114   //   - Every code point has an estimated width of one
1115   // - When needed it will a Unicode Grapheme clustering algorithm to find
1116   //   the proper place for truncation.
1117
1118   if (__str.empty() || __maximum == 0)
1119     return {0, __str.begin()};
1120
1121   // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1122   // character they might be part of an extended grapheme cluster. For example:
1123   //   an ASCII letter and a COMBINING ACUTE ACCENT
1124   // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1125   // need to scan one code unit beyond the requested precision. When this code
1126   // unit is non-ASCII we omit the current code unit and let the Grapheme
1127   // clustering algorithm do its work.
1128   auto __it = __str.begin();
1129   if (__format_spec::__is_ascii(*__it)) {
1130     do {
1131       --__maximum;
1132       ++__it;
1133       if (__it == __str.end())
1134         return {__str.size(), __str.end()};
1135
1136       if (__maximum == 0) {
1137         if (__format_spec::__is_ascii(*__it))
1138           return {static_cast<size_t>(__it - __str.begin()), __it};
1139
1140         break;
1141       }
1142     } while (__format_spec::__is_ascii(*__it));
1143     --__it;
1144     ++__maximum;
1145   }
1146
1147   ptrdiff_t __ascii_size = __it - __str.begin();
1148   __column_width_result __result =
1149       __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
1150
1151   __result.__width_ += __ascii_size;
1152   return __result;
1153 }
1154 #  else // !defined(_LIBCPP_HAS_NO_UNICODE)
1155 template <class _CharT>
1156 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
1157 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
1158   // When Unicode isn't supported assume ASCII and every code unit is one code
1159   // point. In ASCII the estimated column width is always one. Thus there's no
1160   // need for rounding.
1161   size_t __width_ = _VSTD::min(__str.size(), __maximum);
1162   return {__width_, __str.begin() + __width_};
1163 }
1164
1165 #  endif // !defined(_LIBCPP_HAS_NO_UNICODE)
1166
1167 } // namespace __format_spec
1168
1169 #endif //_LIBCPP_STD_VER >= 20
1170
1171 _LIBCPP_END_NAMESPACE_STD
1172
1173 _LIBCPP_POP_MACROS
1174
1175 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H