2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
13 /// \file Contains the std-format-spec parser.
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
19 #include <__algorithm/copy_n.h>
20 #include <__algorithm/min.h>
22 #include <__concepts/arithmetic.h>
23 #include <__concepts/same_as.h>
25 #include <__format/format_arg.h>
26 #include <__format/format_error.h>
27 #include <__format/format_parse_context.h>
28 #include <__format/format_string.h>
29 #include <__format/unicode.h>
30 #include <__format/width_estimation_table.h>
31 #include <__iterator/concepts.h>
32 #include <__iterator/iterator_traits.h> // iter_value_t
33 #include <__memory/addressof.h>
34 #include <__type_traits/common_type.h>
35 #include <__type_traits/is_constant_evaluated.h>
36 #include <__type_traits/is_trivially_copyable.h>
37 #include <__variant/monostate.h>
40 #include <string_view>
42 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
43 # pragma GCC system_header
47 #include <__undef_macros>
49 _LIBCPP_BEGIN_NAMESPACE_STD
51 #if _LIBCPP_STD_VER >= 20
53 namespace __format_spec
{
55 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
inline void
56 __throw_invalid_option_format_error(const char* __id
, const char* __option
) {
57 std::__throw_format_error(
58 (string("The format specifier for ") + __id
+ " does not allow the " + __option
+ " option").c_str());
61 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
inline void __throw_invalid_type_format_error(const char* __id
) {
62 std::__throw_format_error(
63 (string("The type option contains an invalid value for ") + __id
+ " formatting argument").c_str());
66 template <contiguous_iterator _Iterator
, class _ParseContext
>
67 _LIBCPP_HIDE_FROM_ABI
constexpr __format::__parse_number_result
<_Iterator
>
68 __parse_arg_id(_Iterator __begin
, _Iterator __end
, _ParseContext
& __ctx
) {
69 using _CharT
= iter_value_t
<_Iterator
>;
70 // This function is a wrapper to call the real parser. But it does the
71 // validation for the pre-conditions and post-conditions.
73 std::__throw_format_error("End of input while parsing an argument index");
75 __format::__parse_number_result __r
= __format::__parse_arg_id(__begin
, __end
, __ctx
);
77 if (__r
.__last
== __end
|| *__r
.__last
!= _CharT('}'))
78 std::__throw_format_error("The argument index is invalid");
84 template <class _Context
>
85 _LIBCPP_HIDE_FROM_ABI
constexpr uint32_t
86 __substitute_arg_id(basic_format_arg
<_Context
> __format_arg
) {
87 // [format.string.std]/8
88 // If the corresponding formatting argument is not of integral type...
89 // This wording allows char and bool too. LWG-3720 changes the wording to
90 // If the corresponding formatting argument is not of standard signed or
91 // unsigned integer type,
92 // This means the 128-bit will not be valid anymore.
93 // TODO FMT Verify this resolution is accepted and add a test to verify
94 // 128-bit integrals fail and switch to visit_format_arg.
95 return _VSTD::__visit_format_arg(
96 [](auto __arg
) -> uint32_t {
97 using _Type
= decltype(__arg
);
98 if constexpr (same_as
<_Type
, monostate
>)
99 std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
101 // [format.string.std]/8
102 // If { arg-idopt } is used in a width or precision, the value of the
103 // corresponding formatting argument is used in its place. If the
104 // corresponding formatting argument is not of standard signed or unsigned
105 // integer type, or its value is negative for precision or non-positive for
106 // width, an exception of type format_error is thrown.
108 // When an integral is used in a format function, it is stored as one of
109 // the types checked below. Other integral types are promoted. For example,
110 // a signed char is stored as an int.
111 if constexpr (same_as
<_Type
, int> || same_as
<_Type
, unsigned int> || //
112 same_as
<_Type
, long long> || same_as
<_Type
, unsigned long long>) {
113 if constexpr (signed_integral
<_Type
>) {
115 std::__throw_format_error("An argument index may not have a negative value");
118 using _CT
= common_type_t
<_Type
, decltype(__format::__number_max
)>;
119 if (static_cast<_CT
>(__arg
) > static_cast<_CT
>(__format::__number_max
))
120 std::__throw_format_error("The value of the argument index exceeds its maximum value");
124 std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
129 /// These fields are a filter for which elements to parse.
131 /// They default to false so when a new field is added it needs to be opted in
133 // TODO FMT Use an ABI tag for this struct.
135 uint16_t __sign_
: 1 {false};
136 uint16_t __alternate_form_
: 1 {false};
137 uint16_t __zero_padding_
: 1 {false};
138 uint16_t __precision_
: 1 {false};
139 uint16_t __locale_specific_form_
: 1 {false};
140 uint16_t __type_
: 1 {false};
141 // Determines the valid values for fill.
143 // Originally the fill could be any character except { and }. Range-based
144 // formatters use the colon to mark the beginning of the
145 // underlying-format-spec. To avoid parsing ambiguities these formatter
146 // specializations prohibit the use of the colon as a fill character.
147 uint16_t __use_range_fill_
: 1 {false};
148 uint16_t __clear_brackets_
: 1 {false};
149 uint16_t __consume_all_
: 1 {false};
152 // By not placing this constant in the formatter class it's not duplicated for
154 inline constexpr __fields __fields_bool
{.__locale_specific_form_
= true, .__type_
= true, .__consume_all_
= true};
155 inline constexpr __fields __fields_integral
{
157 .__alternate_form_
= true,
158 .__zero_padding_
= true,
159 .__locale_specific_form_
= true,
161 .__consume_all_
= true};
162 inline constexpr __fields __fields_floating_point
{
164 .__alternate_form_
= true,
165 .__zero_padding_
= true,
166 .__precision_
= true,
167 .__locale_specific_form_
= true,
169 .__consume_all_
= true};
170 inline constexpr __fields __fields_string
{.__precision_
= true, .__type_
= true, .__consume_all_
= true};
171 inline constexpr __fields __fields_pointer
{.__zero_padding_
= true, .__type_
= true, .__consume_all_
= true};
173 # if _LIBCPP_STD_VER >= 23
174 inline constexpr __fields __fields_tuple
{.__use_range_fill_
= true, .__clear_brackets_
= true};
175 inline constexpr __fields __fields_range
{.__use_range_fill_
= true, .__clear_brackets_
= true};
176 inline constexpr __fields __fields_fill_align_width
{};
179 enum class __alignment
: uint8_t {
180 /// No alignment is set in the format string.
188 enum class __sign
: uint8_t {
189 /// No sign is set in the format string.
191 /// The sign isn't allowed for certain format-types. By using this value
192 /// it's possible to detect whether or not the user explicitly set the sign
193 /// flag. For formatting purposes it behaves the same as \ref __minus.
200 enum class __type
: uint8_t {
207 __hexadecimal_lower_case
,
208 __hexadecimal_upper_case
,
209 __pointer_lower_case
,
210 __pointer_upper_case
,
212 __hexfloat_lower_case
,
213 __hexfloat_upper_case
,
214 __scientific_lower_case
,
215 __scientific_upper_case
,
218 __general_lower_case
,
219 __general_upper_case
,
223 _LIBCPP_HIDE_FROM_ABI
inline constexpr uint32_t __create_type_mask(__type __t
) {
224 uint32_t __shift
= static_cast<uint32_t>(__t
);
229 std::__throw_format_error("The type does not fit in the mask");
234 inline constexpr uint32_t __type_mask_integer
=
235 __create_type_mask(__type::__binary_lower_case
) | //
236 __create_type_mask(__type::__binary_upper_case
) | //
237 __create_type_mask(__type::__decimal
) | //
238 __create_type_mask(__type::__octal
) | //
239 __create_type_mask(__type::__hexadecimal_lower_case
) | //
240 __create_type_mask(__type::__hexadecimal_upper_case
);
243 __alignment __alignment_
: 3;
245 bool __alternate_form_
: 1;
246 bool __locale_specific_form_
: 1;
251 __alignment __alignment_
: 3;
252 bool __locale_specific_form_
: 1;
254 bool __weekday_name_
: 1;
256 bool __day_of_year_
: 1;
257 bool __week_of_year_
: 1;
258 bool __month_name_
: 1;
261 // The fill UCS scalar value.
263 // This is always an array, with 1, 2, or 4 elements.
264 // The size of the data structure is always 32-bits.
265 template <class _CharT
>
269 struct __code_point
<char> {
270 char __data
[4] = {' '};
273 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
275 struct __code_point
<wchar_t> {
276 wchar_t __data
[4 / sizeof(wchar_t)] = {L
' '};
280 /// Contains the parsed formatting specifications.
282 /// This contains information for both the std-format-spec and the
283 /// chrono-format-spec. This results in some unused members for both
284 /// specifications. However these unused members don't increase the size
285 /// of the structure.
287 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
289 template <class _CharT
>
290 struct __parsed_specifications
{
292 // The field __alignment_ is the first element in __std_ and __chrono_.
293 // This allows the code to always inspect this value regards which member
294 // of the union is the active member [class.union.general]/2.
296 // This is needed since the generic output routines handle the alignment of
298 __alignment __alignment_
: 3;
303 /// The requested width.
305 /// When the format-spec used an arg-id for this field it has already been
306 /// replaced with the value of that arg-id.
309 /// The requested precision.
311 /// When the format-spec used an arg-id for this field it has already been
312 /// replaced with the value of that arg-id.
313 int32_t __precision_
;
315 __code_point
<_CharT
> __fill_
;
317 _LIBCPP_HIDE_FROM_ABI
constexpr bool __has_width() const { return __width_
> 0; }
319 _LIBCPP_HIDE_FROM_ABI
constexpr bool __has_precision() const { return __precision_
>= 0; }
322 // Validate the struct is small and cheap to copy since the struct is passed by
323 // value in formatting functions.
324 static_assert(sizeof(__parsed_specifications
<char>) == 16);
325 static_assert(is_trivially_copyable_v
<__parsed_specifications
<char>>);
326 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
327 static_assert(sizeof(__parsed_specifications
<wchar_t>) == 16);
328 static_assert(is_trivially_copyable_v
<__parsed_specifications
<wchar_t>>);
331 /// The parser for the std-format-spec.
333 /// Note this class is a member of std::formatter specializations. It's
334 /// expected developers will create their own formatter specializations that
335 /// inherit from the std::formatter specializations. This means this class
336 /// must be ABI stable. To aid the stability the unused bits in the class are
337 /// set to zero. That way they can be repurposed if a future revision of the
338 /// Standards adds new fields to std-format-spec.
339 template <class _CharT
>
340 class _LIBCPP_TEMPLATE_VIS __parser
{
342 // Parses the format specification.
344 // Depending on whether the parsing is done compile-time or run-time
345 // the method slightly differs.
346 // - Only parses a field when it is in the __fields. Accepting all
347 // fields and then validating the valid ones has a performance impact.
348 // This is faster but gives slighly worse error messages.
349 // - At compile-time when a field is not accepted the parser will still
350 // parse it and give an error when it's present. This gives a more
352 // The idea is that most times the format instead of the vformat
353 // functions are used. In that case the error will be detected during
354 // compilation and there is no need to pay for the run-time overhead.
355 template <class _ParseContext
>
356 _LIBCPP_HIDE_FROM_ABI
constexpr typename
_ParseContext::iterator
__parse(_ParseContext
& __ctx
, __fields __fields
) {
357 auto __begin
= __ctx
.begin();
358 auto __end
= __ctx
.end();
359 if (__begin
== __end
)
362 if (__parse_fill_align(__begin
, __end
, __fields
.__use_range_fill_
) && __begin
== __end
)
365 if (__fields
.__sign_
) {
366 if (__parse_sign(__begin
) && __begin
== __end
)
368 } else if (std::is_constant_evaluated() && __parse_sign(__begin
)) {
369 std::__throw_format_error("The format specification does not allow the sign option");
372 if (__fields
.__alternate_form_
) {
373 if (__parse_alternate_form(__begin
) && __begin
== __end
)
375 } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin
)) {
376 std::__throw_format_error("The format specifier does not allow the alternate form option");
379 if (__fields
.__zero_padding_
) {
380 if (__parse_zero_padding(__begin
) && __begin
== __end
)
382 } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin
)) {
383 std::__throw_format_error("The format specifier does not allow the zero-padding option");
386 if (__parse_width(__begin
, __end
, __ctx
) && __begin
== __end
)
389 if (__fields
.__precision_
) {
390 if (__parse_precision(__begin
, __end
, __ctx
) && __begin
== __end
)
392 } else if (std::is_constant_evaluated() && __parse_precision(__begin
, __end
, __ctx
)) {
393 std::__throw_format_error("The format specifier does not allow the precision option");
396 if (__fields
.__locale_specific_form_
) {
397 if (__parse_locale_specific_form(__begin
) && __begin
== __end
)
399 } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin
)) {
400 std::__throw_format_error("The format specifier does not allow the locale-specific form option");
403 if (__fields
.__clear_brackets_
) {
404 if (__parse_clear_brackets(__begin
) && __begin
== __end
)
406 } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin
)) {
407 std::__throw_format_error("The format specifier does not allow the n option");
410 if (__fields
.__type_
)
411 __parse_type(__begin
);
413 if (!__fields
.__consume_all_
)
416 if (__begin
!= __end
&& *__begin
!= _CharT('}'))
417 std::__throw_format_error("The format specifier should consume the input or end with a '}'");
422 // Validates the selected the parsed data.
424 // The valid fields in the parser may depend on the display type
425 // selected. But the type is the last optional field, so by the time
426 // it's known an option can't be used, it already has been parsed.
427 // This does the validation again.
429 // For example an integral may have a sign, zero-padding, or alternate
430 // form when the type option is not 'c'. So the generic approach is:
432 // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
433 // if (__parser.__type_ == __format_spec::__type::__char) {
434 // __parser.__validate((__format_spec::__fields_bool, "an integer");
435 // ... // more char adjustments
437 // ... // validate an integral type.
440 // For some types all valid options need a second validation run, like
443 // Depending on whether the validation is done at compile-time or
444 // run-time the error differs
445 // - run-time the exception is thrown and contains the type of field
447 // - at compile-time the line with `std::__throw_format_error` is shown
448 // in the output. In that case it's important for the error to be on one
450 // Note future versions of C++ may allow better compile-time error
452 _LIBCPP_HIDE_FROM_ABI
constexpr void
453 __validate(__fields __fields
, const char* __id
, uint32_t __type_mask
= -1) const {
454 if (!__fields
.__sign_
&& __sign_
!= __sign::__default
) {
455 if (std::is_constant_evaluated())
456 std::__throw_format_error("The format specifier does not allow the sign option");
458 __format_spec::__throw_invalid_option_format_error(__id
, "sign");
461 if (!__fields
.__alternate_form_
&& __alternate_form_
) {
462 if (std::is_constant_evaluated())
463 std::__throw_format_error("The format specifier does not allow the alternate form option");
465 __format_spec::__throw_invalid_option_format_error(__id
, "alternate form");
468 if (!__fields
.__zero_padding_
&& __alignment_
== __alignment::__zero_padding
) {
469 if (std::is_constant_evaluated())
470 std::__throw_format_error("The format specifier does not allow the zero-padding option");
472 __format_spec::__throw_invalid_option_format_error(__id
, "zero-padding");
475 if (!__fields
.__precision_
&& __precision_
!= -1) { // Works both when the precision has a value or an arg-id.
476 if (std::is_constant_evaluated())
477 std::__throw_format_error("The format specifier does not allow the precision option");
479 __format_spec::__throw_invalid_option_format_error(__id
, "precision");
482 if (!__fields
.__locale_specific_form_
&& __locale_specific_form_
) {
483 if (std::is_constant_evaluated())
484 std::__throw_format_error("The format specifier does not allow the locale-specific form option");
486 __format_spec::__throw_invalid_option_format_error(__id
, "locale-specific form");
489 if ((__create_type_mask(__type_
) & __type_mask
) == 0) {
490 if (std::is_constant_evaluated())
491 std::__throw_format_error("The format specifier uses an invalid value for the type option");
493 __format_spec::__throw_invalid_type_format_error(__id
);
497 /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
498 _LIBCPP_HIDE_FROM_ABI
499 __parsed_specifications
<_CharT
> __get_parsed_std_specifications(auto& __ctx
) const {
500 return __parsed_specifications
<_CharT
>{
501 .__std_
= __std
{.__alignment_
= __alignment_
,
503 .__alternate_form_
= __alternate_form_
,
504 .__locale_specific_form_
= __locale_specific_form_
,
506 .__width_
{__get_width(__ctx
)},
507 .__precision_
{__get_precision(__ctx
)},
511 _LIBCPP_HIDE_FROM_ABI __parsed_specifications
<_CharT
> __get_parsed_chrono_specifications(auto& __ctx
) const {
512 return __parsed_specifications
<_CharT
>{
514 __chrono
{.__alignment_
= __alignment_
,
515 .__locale_specific_form_
= __locale_specific_form_
,
517 .__weekday_name_
= __weekday_name_
,
518 .__weekday_
= __weekday_
,
519 .__day_of_year_
= __day_of_year_
,
520 .__week_of_year_
= __week_of_year_
,
521 .__month_name_
= __month_name_
},
522 .__width_
{__get_width(__ctx
)},
523 .__precision_
{__get_precision(__ctx
)},
527 __alignment __alignment_
: 3 {__alignment::__default
};
528 __sign __sign_
: 2 {__sign::__default
};
529 bool __alternate_form_
: 1 {false};
530 bool __locale_specific_form_
: 1 {false};
531 bool __clear_brackets_
: 1 {false};
532 __type __type_
{__type::__default
};
534 // These flags are only used for formatting chrono. Since the struct has
535 // padding space left it's added to this structure.
536 bool __hour_
: 1 {false};
538 bool __weekday_name_
: 1 {false};
539 bool __weekday_
: 1 {false};
541 bool __day_of_year_
: 1 {false};
542 bool __week_of_year_
: 1 {false};
544 bool __month_name_
: 1 {false};
546 uint8_t __reserved_0_
: 2 {0};
547 uint8_t __reserved_1_
: 6 {0};
548 // These two flags are only used internally and not part of the
549 // __parsed_specifications. Therefore put them at the end.
550 bool __width_as_arg_
: 1 {false};
551 bool __precision_as_arg_
: 1 {false};
553 /// The requested width, either the value or the arg-id.
556 /// The requested precision, either the value or the arg-id.
557 int32_t __precision_
{-1};
559 __code_point
<_CharT
> __fill_
{};
562 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_alignment(_CharT __c
) {
565 __alignment_
= __alignment::__left
;
569 __alignment_
= __alignment::__center
;
573 __alignment_
= __alignment::__right
;
579 _LIBCPP_HIDE_FROM_ABI
constexpr void __validate_fill_character(_CharT __fill
, bool __use_range_fill
) {
580 // The forbidden fill characters all code points formed from a single code unit, thus the
581 // check can be omitted when more code units are used.
582 if (__use_range_fill
&& (__fill
== _CharT('{') || __fill
== _CharT('}') || __fill
== _CharT(':')))
583 std::__throw_format_error("The fill option contains an invalid value");
584 else if (__fill
== _CharT('{') || __fill
== _CharT('}'))
585 std::__throw_format_error("The fill option contains an invalid value");
588 # ifndef _LIBCPP_HAS_NO_UNICODE
589 // range-fill and tuple-fill are identical
590 template <contiguous_iterator _Iterator
>
591 requires same_as
<_CharT
, char>
592 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
593 || (same_as
<_CharT
, wchar_t> && sizeof(wchar_t) == 2)
595 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_fill_align(_Iterator
& __begin
, _Iterator __end
, bool __use_range_fill
) {
596 _LIBCPP_ASSERT_UNCATEGORIZED(__begin
!= __end
,
597 "when called with an empty input the function will cause "
598 "undefined behavior by evaluating data not in the input");
599 __unicode::__code_point_view
<_CharT
> __view
{__begin
, __end
};
600 __unicode::__consume_result __consumed
= __view
.__consume();
601 if (__consumed
.__status
!= __unicode::__consume_result::__ok
)
602 std::__throw_format_error("The format specifier contains malformed Unicode characters");
604 if (__view
.__position() < __end
&& __parse_alignment(*__view
.__position())) {
605 ptrdiff_t __code_units
= __view
.__position() - __begin
;
606 if (__code_units
== 1)
607 // The forbidden fill characters all are code points encoded
608 // in one code unit, thus the check can be omitted when more
609 // code units are used.
610 __validate_fill_character(*__begin
, __use_range_fill
);
612 std::copy_n(__begin
, __code_units
, std::addressof(__fill_
.__data
[0]));
613 __begin
+= __code_units
+ 1;
617 if (!__parse_alignment(*__begin
))
624 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
625 template <contiguous_iterator _Iterator
>
626 requires(same_as
<_CharT
, wchar_t> && sizeof(wchar_t) == 4)
627 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_fill_align(_Iterator
& __begin
, _Iterator __end
, bool __use_range_fill
) {
628 _LIBCPP_ASSERT_UNCATEGORIZED(__begin
!= __end
,
629 "when called with an empty input the function will cause "
630 "undefined behavior by evaluating data not in the input");
631 if (__begin
+ 1 != __end
&& __parse_alignment(*(__begin
+ 1))) {
632 if (!__unicode::__is_scalar_value(*__begin
))
633 std::__throw_format_error("The fill option contains an invalid value");
635 __validate_fill_character(*__begin
, __use_range_fill
);
637 __fill_
.__data
[0] = *__begin
;
642 if (!__parse_alignment(*__begin
))
649 # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
651 # else // _LIBCPP_HAS_NO_UNICODE
652 // range-fill and tuple-fill are identical
653 template <contiguous_iterator _Iterator
>
654 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_fill_align(_Iterator
& __begin
, _Iterator __end
, bool __use_range_fill
) {
655 _LIBCPP_ASSERT_UNCATEGORIZED(__begin
!= __end
,
656 "when called with an empty input the function will cause "
657 "undefined behavior by evaluating data not in the input");
658 if (__begin
+ 1 != __end
) {
659 if (__parse_alignment(*(__begin
+ 1))) {
660 __validate_fill_character(*__begin
, __use_range_fill
);
662 __fill_
.__data
[0] = *__begin
;
668 if (!__parse_alignment(*__begin
))
675 # endif // _LIBCPP_HAS_NO_UNICODE
677 template <contiguous_iterator _Iterator
>
678 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_sign(_Iterator
& __begin
) {
681 __sign_
= __sign::__minus
;
684 __sign_
= __sign::__plus
;
687 __sign_
= __sign::__space
;
696 template <contiguous_iterator _Iterator
>
697 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_alternate_form(_Iterator
& __begin
) {
698 if (*__begin
!= _CharT('#'))
701 __alternate_form_
= true;
706 template <contiguous_iterator _Iterator
>
707 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_zero_padding(_Iterator
& __begin
) {
708 if (*__begin
!= _CharT('0'))
711 if (__alignment_
== __alignment::__default
)
712 __alignment_
= __alignment::__zero_padding
;
717 template <contiguous_iterator _Iterator
>
718 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_width(_Iterator
& __begin
, _Iterator __end
, auto& __ctx
) {
719 if (*__begin
== _CharT('0'))
720 std::__throw_format_error("The width option should not have a leading zero");
722 if (*__begin
== _CharT('{')) {
723 __format::__parse_number_result __r
= __format_spec::__parse_arg_id(++__begin
, __end
, __ctx
);
724 __width_as_arg_
= true;
725 __width_
= __r
.__value
;
726 __begin
= __r
.__last
;
730 if (*__begin
< _CharT('0') || *__begin
> _CharT('9'))
733 __format::__parse_number_result __r
= __format::__parse_number(__begin
, __end
);
734 __width_
= __r
.__value
;
735 _LIBCPP_ASSERT_UNCATEGORIZED(__width_
!= 0, "A zero value isn't allowed and should be impossible, "
736 "due to validations in this function");
737 __begin
= __r
.__last
;
741 template <contiguous_iterator _Iterator
>
742 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_precision(_Iterator
& __begin
, _Iterator __end
, auto& __ctx
) {
743 if (*__begin
!= _CharT('.'))
747 if (__begin
== __end
)
748 std::__throw_format_error("End of input while parsing format specifier precision");
750 if (*__begin
== _CharT('{')) {
751 __format::__parse_number_result __arg_id
= __format_spec::__parse_arg_id(++__begin
, __end
, __ctx
);
752 __precision_as_arg_
= true;
753 __precision_
= __arg_id
.__value
;
754 __begin
= __arg_id
.__last
;
758 if (*__begin
< _CharT('0') || *__begin
> _CharT('9'))
759 std::__throw_format_error("The precision option does not contain a value or an argument index");
761 __format::__parse_number_result __r
= __format::__parse_number(__begin
, __end
);
762 __precision_
= __r
.__value
;
763 __precision_as_arg_
= false;
764 __begin
= __r
.__last
;
768 template <contiguous_iterator _Iterator
>
769 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_locale_specific_form(_Iterator
& __begin
) {
770 if (*__begin
!= _CharT('L'))
773 __locale_specific_form_
= true;
778 template <contiguous_iterator _Iterator
>
779 _LIBCPP_HIDE_FROM_ABI
constexpr bool __parse_clear_brackets(_Iterator
& __begin
) {
780 if (*__begin
!= _CharT('n'))
783 __clear_brackets_
= true;
788 template <contiguous_iterator _Iterator
>
789 _LIBCPP_HIDE_FROM_ABI
constexpr void __parse_type(_Iterator
& __begin
) {
790 // Determines the type. It does not validate whether the selected type is
791 // valid. Most formatters have optional fields that are only allowed for
792 // certain types. These parsers need to do validation after the type has
793 // been parsed. So its easier to implement the validation for all types in
794 // the specific parse function.
797 __type_
= __type::__hexfloat_upper_case
;
800 __type_
= __type::__binary_upper_case
;
803 __type_
= __type::__scientific_upper_case
;
806 __type_
= __type::__fixed_upper_case
;
809 __type_
= __type::__general_upper_case
;
812 __type_
= __type::__hexadecimal_upper_case
;
815 __type_
= __type::__hexfloat_lower_case
;
818 __type_
= __type::__binary_lower_case
;
821 __type_
= __type::__char
;
824 __type_
= __type::__decimal
;
827 __type_
= __type::__scientific_lower_case
;
830 __type_
= __type::__fixed_lower_case
;
833 __type_
= __type::__general_lower_case
;
836 __type_
= __type::__octal
;
839 __type_
= __type::__pointer_lower_case
;
842 __type_
= __type::__pointer_upper_case
;
845 __type_
= __type::__string
;
848 __type_
= __type::__hexadecimal_lower_case
;
850 # if _LIBCPP_STD_VER >= 23
852 __type_
= __type::__debug
;
861 _LIBCPP_HIDE_FROM_ABI
862 int32_t __get_width(auto& __ctx
) const {
863 if (!__width_as_arg_
)
866 return __format_spec::__substitute_arg_id(__ctx
.arg(__width_
));
869 _LIBCPP_HIDE_FROM_ABI
870 int32_t __get_precision(auto& __ctx
) const {
871 if (!__precision_as_arg_
)
874 return __format_spec::__substitute_arg_id(__ctx
.arg(__precision_
));
878 // Validates whether the reserved bitfields don't change the size.
879 static_assert(sizeof(__parser
<char>) == 16);
880 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
881 static_assert(sizeof(__parser
<wchar_t>) == 16);
884 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_display_type_string(__format_spec::__type __type
) {
886 case __format_spec::__type::__default
:
887 case __format_spec::__type::__string
:
888 case __format_spec::__type::__debug
:
892 std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
896 template <class _CharT
>
897 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_display_type_bool_string(__parser
<_CharT
>& __parser
, const char* __id
) {
898 __parser
.__validate(__format_spec::__fields_bool
, __id
);
899 if (__parser
.__alignment_
== __alignment::__default
)
900 __parser
.__alignment_
= __alignment::__left
;
903 template <class _CharT
>
904 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_display_type_char(__parser
<_CharT
>& __parser
, const char* __id
) {
905 __format_spec::__process_display_type_bool_string(__parser
, __id
);
908 template <class _CharT
>
909 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_parsed_bool(__parser
<_CharT
>& __parser
, const char* __id
) {
910 switch (__parser
.__type_
) {
911 case __format_spec::__type::__default
:
912 case __format_spec::__type::__string
:
913 __format_spec::__process_display_type_bool_string(__parser
, __id
);
916 case __format_spec::__type::__binary_lower_case
:
917 case __format_spec::__type::__binary_upper_case
:
918 case __format_spec::__type::__octal
:
919 case __format_spec::__type::__decimal
:
920 case __format_spec::__type::__hexadecimal_lower_case
:
921 case __format_spec::__type::__hexadecimal_upper_case
:
925 __format_spec::__throw_invalid_type_format_error(__id
);
929 template <class _CharT
>
930 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_parsed_char(__parser
<_CharT
>& __parser
, const char* __id
) {
931 switch (__parser
.__type_
) {
932 case __format_spec::__type::__default
:
933 case __format_spec::__type::__char
:
934 case __format_spec::__type::__debug
:
935 __format_spec::__process_display_type_char(__parser
, __id
);
938 case __format_spec::__type::__binary_lower_case
:
939 case __format_spec::__type::__binary_upper_case
:
940 case __format_spec::__type::__octal
:
941 case __format_spec::__type::__decimal
:
942 case __format_spec::__type::__hexadecimal_lower_case
:
943 case __format_spec::__type::__hexadecimal_upper_case
:
947 __format_spec::__throw_invalid_type_format_error(__id
);
951 template <class _CharT
>
952 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_parsed_integer(__parser
<_CharT
>& __parser
, const char* __id
) {
953 switch (__parser
.__type_
) {
954 case __format_spec::__type::__default
:
955 case __format_spec::__type::__binary_lower_case
:
956 case __format_spec::__type::__binary_upper_case
:
957 case __format_spec::__type::__octal
:
958 case __format_spec::__type::__decimal
:
959 case __format_spec::__type::__hexadecimal_lower_case
:
960 case __format_spec::__type::__hexadecimal_upper_case
:
963 case __format_spec::__type::__char
:
964 __format_spec::__process_display_type_char(__parser
, __id
);
968 __format_spec::__throw_invalid_type_format_error(__id
);
972 template <class _CharT
>
973 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_parsed_floating_point(__parser
<_CharT
>& __parser
, const char* __id
) {
974 switch (__parser
.__type_
) {
975 case __format_spec::__type::__default
:
976 case __format_spec::__type::__hexfloat_lower_case
:
977 case __format_spec::__type::__hexfloat_upper_case
:
978 // Precision specific behavior will be handled later.
980 case __format_spec::__type::__scientific_lower_case
:
981 case __format_spec::__type::__scientific_upper_case
:
982 case __format_spec::__type::__fixed_lower_case
:
983 case __format_spec::__type::__fixed_upper_case
:
984 case __format_spec::__type::__general_lower_case
:
985 case __format_spec::__type::__general_upper_case
:
986 if (!__parser
.__precision_as_arg_
&& __parser
.__precision_
== -1)
987 // Set the default precision for the call to to_chars.
988 __parser
.__precision_
= 6;
992 __format_spec::__throw_invalid_type_format_error(__id
);
996 _LIBCPP_HIDE_FROM_ABI
constexpr void __process_display_type_pointer(__format_spec::__type __type
, const char* __id
) {
998 case __format_spec::__type::__default
:
999 case __format_spec::__type::__pointer_lower_case
:
1000 case __format_spec::__type::__pointer_upper_case
:
1004 __format_spec::__throw_invalid_type_format_error(__id
);
1008 template <contiguous_iterator _Iterator
>
1009 struct __column_width_result
{
1010 /// The number of output columns.
1012 /// One beyond the last code unit used in the estimation.
1014 /// This limits the original output to fit in the wanted number of columns.
1018 template <contiguous_iterator _Iterator
>
1019 __column_width_result(size_t, _Iterator
) -> __column_width_result
<_Iterator
>;
1021 /// Since a column width can be two it's possible that the requested column
1022 /// width can't be achieved. Depending on the intended usage the policy can be
1024 /// - When used as precision the maximum width may not be exceeded and the
1025 /// result should be "rounded down" to the previous boundary.
1026 /// - When used as a width we're done once the minimum is reached, but
1027 /// exceeding is not an issue. Rounding down is an issue since that will
1028 /// result in writing fill characters. Therefore the result needs to be
1030 enum class __column_width_rounding
{ __down
, __up
};
1032 # ifndef _LIBCPP_HAS_NO_UNICODE
1034 namespace __detail
{
1035 template <contiguous_iterator _Iterator
>
1036 _LIBCPP_HIDE_FROM_ABI
constexpr __column_width_result
<_Iterator
> __estimate_column_width_grapheme_clustering(
1037 _Iterator __first
, _Iterator __last
, size_t __maximum
, __column_width_rounding __rounding
) noexcept
{
1038 using _CharT
= iter_value_t
<_Iterator
>;
1039 __unicode::__extended_grapheme_cluster_view
<_CharT
> __view
{__first
, __last
};
1041 __column_width_result
<_Iterator
> __result
{0, __first
};
1042 while (__result
.__last_
!= __last
&& __result
.__width_
<= __maximum
) {
1043 typename
__unicode::__extended_grapheme_cluster_view
<_CharT
>::__cluster __cluster
= __view
.__consume();
1044 int __width
= __width_estimation_table::__estimated_width(__cluster
.__code_point_
);
1046 // When the next entry would exceed the maximum width the previous width
1047 // might be returned. For example when a width of 100 is requested the
1048 // returned width might be 99, since the next code point has an estimated
1049 // column width of 2. This depends on the rounding flag.
1050 // When the maximum is exceeded the loop will abort the next iteration.
1051 if (__rounding
== __column_width_rounding::__down
&& __result
.__width_
+ __width
> __maximum
)
1054 __result
.__width_
+= __width
;
1055 __result
.__last_
= __cluster
.__last_
;
1061 } // namespace __detail
1063 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1064 // Depending on format the relation between the number of code units stored and
1065 // the number of output columns differs. The first relation is the number of
1066 // code units forming a code point. (The text assumes the code units are
1068 // - UTF-8 The number of code units is between one and four. The first 127
1069 // Unicode code points match the ASCII character set. When the highest bit is
1070 // set it means the code point has more than one code unit.
1071 // - UTF-16: The number of code units is between 1 and 2. When the first
1072 // code unit is in the range [0xd800,0xdfff) it means the code point uses two
1074 // - UTF-32: The number of code units is always one.
1076 // The code point to the number of columns is specified in
1077 // [format.string.std]/11. This list might change in the future.
1079 // Another thing to be taken into account is Grapheme clustering. This means
1080 // that in some cases multiple code points are combined one element in the
1081 // output. For example:
1082 // - an ASCII character with a combined diacritical mark
1083 // - an emoji with a skin tone modifier
1084 // - a group of combined people emoji to create a family
1085 // - a combination of flag emoji
1088 // - [format.string.general]/11
1089 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
1090 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1092 _LIBCPP_HIDE_FROM_ABI
constexpr bool __is_ascii(char32_t __c
) { return __c
< 0x80; }
1094 /// Determines the number of output columns needed to render the input.
1096 /// \note When the scanner encounters malformed Unicode it acts as-if every
1097 /// code unit is a one column code point. Typically a terminal uses the same
1098 /// strategy and replaces every malformed code unit with a one column
1099 /// replacement character.
1101 /// \param __first Points to the first element of the input range.
1102 /// \param __last Points beyond the last element of the input range.
1103 /// \param __maximum The maximum number of output columns. The returned number
1104 /// of estimated output columns will not exceed this value.
1105 /// \param __rounding Selects the rounding method.
1106 /// \c __down result.__width_ <= __maximum
1107 /// \c __up result.__width_ <= __maximum + 1
1108 template <class _CharT
, class _Iterator
= typename basic_string_view
<_CharT
>::const_iterator
>
1109 _LIBCPP_HIDE_FROM_ABI
constexpr __column_width_result
<_Iterator
> __estimate_column_width(
1110 basic_string_view
<_CharT
> __str
, size_t __maximum
, __column_width_rounding __rounding
) noexcept
{
1111 // The width estimation is done in two steps:
1112 // - Quickly process for the ASCII part. ASCII has the following properties
1113 // - One code unit is one code point
1114 // - Every code point has an estimated width of one
1115 // - When needed it will a Unicode Grapheme clustering algorithm to find
1116 // the proper place for truncation.
1118 if (__str
.empty() || __maximum
== 0)
1119 return {0, __str
.begin()};
1121 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1122 // character they might be part of an extended grapheme cluster. For example:
1123 // an ASCII letter and a COMBINING ACUTE ACCENT
1124 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1125 // need to scan one code unit beyond the requested precision. When this code
1126 // unit is non-ASCII we omit the current code unit and let the Grapheme
1127 // clustering algorithm do its work.
1128 auto __it
= __str
.begin();
1129 if (__format_spec::__is_ascii(*__it
)) {
1133 if (__it
== __str
.end())
1134 return {__str
.size(), __str
.end()};
1136 if (__maximum
== 0) {
1137 if (__format_spec::__is_ascii(*__it
))
1138 return {static_cast<size_t>(__it
- __str
.begin()), __it
};
1142 } while (__format_spec::__is_ascii(*__it
));
1147 ptrdiff_t __ascii_size
= __it
- __str
.begin();
1148 __column_width_result __result
=
1149 __detail::__estimate_column_width_grapheme_clustering(__it
, __str
.end(), __maximum
, __rounding
);
1151 __result
.__width_
+= __ascii_size
;
1154 # else // !defined(_LIBCPP_HAS_NO_UNICODE)
1155 template <class _CharT
>
1156 _LIBCPP_HIDE_FROM_ABI
constexpr __column_width_result
<typename basic_string_view
<_CharT
>::const_iterator
>
1157 __estimate_column_width(basic_string_view
<_CharT
> __str
, size_t __maximum
, __column_width_rounding
) noexcept
{
1158 // When Unicode isn't supported assume ASCII and every code unit is one code
1159 // point. In ASCII the estimated column width is always one. Thus there's no
1160 // need for rounding.
1161 size_t __width_
= _VSTD::min(__str
.size(), __maximum
);
1162 return {__width_
, __str
.begin() + __width_
};
1165 # endif // !defined(_LIBCPP_HAS_NO_UNICODE)
1167 } // namespace __format_spec
1169 #endif //_LIBCPP_STD_VER >= 20
1171 _LIBCPP_END_NAMESPACE_STD
1175 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H