1 //===-- Format string parser for printf -------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
10 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
12 #include "include/llvm-libc-macros/stdfix-macros.h"
13 #include "src/__support/CPP/algorithm.h" // max
14 #include "src/__support/CPP/limits.h"
15 #include "src/__support/CPP/optional.h"
16 #include "src/__support/CPP/type_traits.h"
17 #include "src/__support/macros/config.h"
18 #include "src/__support/str_to_integer.h"
19 #include "src/stdio/printf_core/core_structs.h"
20 #include "src/stdio/printf_core/printf_config.h"
24 #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
25 #include "src/__support/fixed_point/fx_rep.h"
26 #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
27 #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
28 #include "src/errno/libc_errno.h"
29 #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
31 namespace LIBC_NAMESPACE_DECL
{
32 namespace printf_core
{
34 template <typename T
> struct int_type_of
{
37 template <> struct int_type_of
<double> {
38 using type
= fputil::FPBits
<double>::StorageType
;
40 template <> struct int_type_of
<long double> {
41 using type
= fputil::FPBits
<long double>::StorageType
;
44 #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
46 struct int_type_of
<cpp::enable_if
<cpp::is_fixed_point_v
<T
>, T
>> {
47 using type
= typename
fixed_point::FXRep
<T
>::StorageType
;
49 #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
51 template <typename T
> using int_type_of_v
= typename int_type_of
<T
>::type
;
53 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
54 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index) \
56 auto temp = get_arg_value<arg_type>(index); \
57 if (!temp.has_value()) { \
58 section.has_conv = false; \
60 dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value()); \
64 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _) \
65 dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
66 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
68 template <typename ArgProvider
> class Parser
{
69 const char *__restrict str
;
74 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
75 // args_start stores the start of the va_args, which is allows getting the
76 // value of arguments that have already been passed. args_index is tracked so
77 // that we know which argument args_cur is on.
78 ArgProvider args_start
;
79 size_t args_index
= 1;
81 // Defined in printf_config.h
82 static constexpr size_t DESC_ARR_LEN
= LIBC_COPT_PRINTF_INDEX_ARR_LEN
;
84 // desc_arr stores the sizes of the variables in the ArgProvider. This is used
85 // in index mode to reduce repeated string parsing. The sizes are stored as
86 // TypeDesc objects, which store the size as well as minimal type information.
87 // This is necessary because some systems separate the floating point and
88 // integer values in va_args.
89 TypeDesc desc_arr
[DESC_ARR_LEN
] = {type_desc_from_type
<void>()};
91 // TODO: Look into object stores for optimization.
93 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
96 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
97 LIBC_INLINE
Parser(const char *__restrict new_str
, ArgProvider
&args
)
98 : str(new_str
), args_cur(args
), args_start(args
) {}
100 LIBC_INLINE
Parser(const char *__restrict new_str
, ArgProvider
&args
)
101 : str(new_str
), args_cur(args
) {}
102 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
104 // get_next_section will parse the format string until it has a fully
105 // specified format section. This can either be a raw format section with no
106 // conversion, or a format section with a conversion that has all of its
107 // variables stored in the format section.
108 LIBC_INLINE FormatSection
get_next_section() {
109 FormatSection section
;
110 size_t starting_pos
= cur_pos
;
111 if (str
[cur_pos
] == '%') {
113 section
.has_conv
= true;
116 [[maybe_unused
]] size_t conv_index
= 0;
118 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
119 conv_index
= parse_index(&cur_pos
);
120 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
122 section
.flags
= parse_flags(&cur_pos
);
125 section
.min_width
= 0;
126 if (str
[cur_pos
] == '*') {
129 WRITE_ARG_VAL_SIMPLEST(section
.min_width
, int, parse_index(&cur_pos
));
130 } else if (internal::isdigit(str
[cur_pos
])) {
131 auto result
= internal::strtointeger
<int>(str
+ cur_pos
, 10);
132 section
.min_width
= result
.value
;
133 cur_pos
= cur_pos
+ result
.parsed_len
;
135 if (section
.min_width
< 0) {
137 (section
.min_width
== INT_MIN
) ? INT_MAX
: -section
.min_width
;
138 section
.flags
= static_cast<FormatFlags
>(section
.flags
|
139 FormatFlags::LEFT_JUSTIFIED
);
143 section
.precision
= -1; // negative precisions are ignored.
144 if (str
[cur_pos
] == '.') {
146 section
.precision
= 0; // if there's a . but no specified precision, the
147 // precision is implicitly 0.
148 if (str
[cur_pos
] == '*') {
151 WRITE_ARG_VAL_SIMPLEST(section
.precision
, int, parse_index(&cur_pos
));
153 } else if (internal::isdigit(str
[cur_pos
])) {
154 auto result
= internal::strtointeger
<int>(str
+ cur_pos
, 10);
155 section
.precision
= result
.value
;
156 cur_pos
= cur_pos
+ result
.parsed_len
;
160 auto [lm
, bw
] = parse_length_modifier(&cur_pos
);
161 section
.length_modifier
= lm
;
162 section
.conv_name
= str
[cur_pos
];
163 section
.bit_width
= bw
;
164 switch (str
[cur_pos
]) {
166 // Regardless of options, a % conversion is always safe. The standard
167 // says that "The complete conversion specification shall be %%" but it
168 // also says that "If a conversion specification is invalid, the
169 // behavior is undefined." Based on that we define that any conversion
170 // specification ending in '%' shall display as '%' regardless of any
171 // valid or invalid options.
172 section
.has_conv
= true;
175 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, int, conv_index
);
186 case (LengthModifier::hh
):
187 case (LengthModifier::h
):
188 case (LengthModifier::none
):
189 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, int, conv_index
);
191 case (LengthModifier::l
):
192 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long, conv_index
);
194 case (LengthModifier::ll
):
195 case (LengthModifier::L
): // This isn't in the standard, but is in other
196 // libc implementations.
198 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long long, conv_index
);
200 case (LengthModifier::j
):
202 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, intmax_t, conv_index
);
204 case (LengthModifier::z
):
206 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, size_t, conv_index
);
208 case (LengthModifier::t
):
210 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, ptrdiff_t, conv_index
);
213 case (LengthModifier::w
):
214 case (LengthModifier::wf
):
216 section
.has_conv
= false;
217 } else if (bw
<= cpp::numeric_limits
<unsigned int>::digits
) {
218 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, int, conv_index
);
219 } else if (bw
<= cpp::numeric_limits
<unsigned long>::digits
) {
220 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long, conv_index
);
221 } else if (bw
<= cpp::numeric_limits
<unsigned long long>::digits
) {
222 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long long, conv_index
);
224 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, intmax_t, conv_index
);
229 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
238 if (lm
!= LengthModifier::L
) {
239 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, double, conv_index
);
241 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long double, conv_index
);
244 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
245 #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
246 // Capitalization represents sign, but we only need to get the right
247 // bitwidth here so we ignore that.
250 // all fract sizes we support are less than 32 bits, and currently doing
251 // va_args with fixed point types just doesn't work.
252 // TODO: Move to fixed point types once va_args supports it.
253 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, uint32_t, conv_index
);
257 if (lm
== LengthModifier::l
) {
258 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, uint64_t, conv_index
);
260 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, uint32_t, conv_index
);
263 #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
264 #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
266 // %m is an odd conversion in that it doesn't consume an argument, it
267 // just takes the current value of errno as its argument.
268 section
.conv_val_raw
= static_cast<int>(libc_errno
);
270 #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
271 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
272 case ('n'): // Intentional fallthrough
273 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
275 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_ptr
, void *, conv_index
);
278 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_ptr
, char *, conv_index
);
281 // if the conversion is undefined, change this to a raw section.
282 section
.has_conv
= false;
285 // If the end of the format section is on the '\0'. This means we need to
286 // not advance the cur_pos.
287 if (str
[cur_pos
] != '\0')
292 section
.has_conv
= false;
293 while (str
[cur_pos
] != '%' && str
[cur_pos
] != '\0')
296 section
.raw_string
= {str
+ starting_pos
, cur_pos
- starting_pos
};
301 // parse_flags parses the flags inside a format string. It assumes that
302 // str[*local_pos] is inside a format specifier, and parses any flags it
303 // finds. It returns a FormatFlags object containing the set of found flags
304 // arithmetically or'd together. local_pos will be moved past any flags found.
305 LIBC_INLINE FormatFlags
parse_flags(size_t *local_pos
) {
306 bool found_flag
= true;
307 FormatFlags flags
= FormatFlags(0);
309 switch (str
[*local_pos
]) {
311 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::LEFT_JUSTIFIED
);
314 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::FORCE_SIGN
);
317 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::SPACE_PREFIX
);
320 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::ALTERNATE_FORM
);
323 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::LEADING_ZEROES
);
334 // parse_length_modifier parses the length modifier inside a format string. It
335 // assumes that str[*local_pos] is inside a format specifier. It returns a
336 // LengthModifier with the length modifier it found. It will advance local_pos
337 // after the format specifier if one is found.
338 LIBC_INLINE LengthSpec
parse_length_modifier(size_t *local_pos
) {
339 switch (str
[*local_pos
]) {
341 if (str
[*local_pos
+ 1] == 'l') {
343 return {LengthModifier::ll
, 0};
346 return {LengthModifier::l
, 0};
350 if (str
[*local_pos
+ 1] == 'f') {
352 lm
= LengthModifier::wf
;
355 lm
= LengthModifier::w
;
357 if (internal::isdigit(str
[*local_pos
])) {
358 const auto result
= internal::strtointeger
<int>(str
+ *local_pos
, 10);
359 *local_pos
+= result
.parsed_len
;
360 return {lm
, static_cast<size_t>(cpp::max(0, result
.value
))};
365 if (str
[*local_pos
+ 1] == 'h') {
367 return {LengthModifier::hh
, 0};
370 return {LengthModifier::h
, 0};
374 return {LengthModifier::L
, 0};
377 return {LengthModifier::j
, 0};
380 return {LengthModifier::z
, 0};
383 return {LengthModifier::t
, 0};
385 return {LengthModifier::none
, 0};
389 // get_next_arg_value gets the next value from the arg list as type T.
390 template <class T
> LIBC_INLINE T
get_next_arg_value() {
391 return args_cur
.template next_var
<T
>();
394 //----------------------------------------------------
395 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
396 //----------------------------------------------------
398 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
400 // parse_index parses the index of a value inside a format string. It
401 // assumes that str[*local_pos] points to character after a '%' or '*', and
402 // returns 0 if there is no closing $, or if it finds no number. If it finds a
403 // number, it will move local_pos past the end of the $, else it will not move
405 LIBC_INLINE
size_t parse_index(size_t *local_pos
) {
406 if (internal::isdigit(str
[*local_pos
])) {
407 auto result
= internal::strtointeger
<int>(str
+ *local_pos
, 10);
408 size_t index
= result
.value
;
409 if (str
[*local_pos
+ result
.parsed_len
] != '$')
411 *local_pos
= 1 + result
.parsed_len
+ *local_pos
;
417 LIBC_INLINE
void set_type_desc(size_t index
, TypeDesc value
) {
418 if (index
!= 0 && index
<= DESC_ARR_LEN
)
419 desc_arr
[index
- 1] = value
;
422 // get_arg_value gets the value from the arg list at index (starting at 1).
423 // This may require parsing the format string. An index of 0 is interpreted as
424 // the next value. If the format string is not valid, it may have gaps in its
425 // indexes. Requesting the value for any index after a gap will fail, since
426 // the arg list must be read in order and with the correct types.
427 template <class T
> LIBC_INLINE
cpp::optional
<T
> get_arg_value(size_t index
) {
428 if (!(index
== 0 || index
== args_index
)) {
429 bool success
= args_to_index(index
);
431 // If we can't get to this index, then the value of the arg can't be
433 return cpp::optional
<T
>();
437 set_type_desc(index
, type_desc_from_type
<T
>());
440 return get_next_arg_value
<T
>();
443 // the ArgProvider can only return the next item in the list. This function is
444 // used in index mode when the item that needs to be read is not the next one.
445 // It moves cur_args to the index requested so the appropriate value may
446 // be read. This may involve parsing the format string, and is in the worst
447 // case an O(n^2) operation.
448 LIBC_INLINE
bool args_to_index(size_t index
) {
449 if (args_index
> index
) {
451 args_cur
= args_start
;
454 while (args_index
< index
) {
455 TypeDesc cur_type_desc
= type_desc_from_type
<void>();
456 if (args_index
<= DESC_ARR_LEN
)
457 cur_type_desc
= desc_arr
[args_index
- 1];
459 if (cur_type_desc
== type_desc_from_type
<void>())
460 cur_type_desc
= get_type_desc(args_index
);
462 // A type of void represents the type being unknown. If the type for the
463 // requested index isn't in the desc_arr and isn't found by parsing the
464 // string, then then advancing to the requested index is impossible. In
465 // that case the function returns false.
466 if (cur_type_desc
== type_desc_from_type
<void>())
469 if (cur_type_desc
== type_desc_from_type
<uint32_t>())
470 args_cur
.template next_var
<uint32_t>();
471 else if (cur_type_desc
== type_desc_from_type
<uint64_t>())
472 args_cur
.template next_var
<uint64_t>();
473 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
474 // Floating point numbers are stored separately from the other arguments.
475 else if (cur_type_desc
== type_desc_from_type
<double>())
476 args_cur
.template next_var
<double>();
477 else if (cur_type_desc
== type_desc_from_type
<long double>())
478 args_cur
.template next_var
<long double>();
479 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
480 #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
481 // Floating point numbers may be stored separately from the other
483 else if (cur_type_desc
== type_desc_from_type
<short fract
>())
484 args_cur
.template next_var
<short fract
>();
485 else if (cur_type_desc
== type_desc_from_type
<fract
>())
486 args_cur
.template next_var
<fract
>();
487 else if (cur_type_desc
== type_desc_from_type
<long fract
>())
488 args_cur
.template next_var
<long fract
>();
489 else if (cur_type_desc
== type_desc_from_type
<short accum
>())
490 args_cur
.template next_var
<short accum
>();
491 else if (cur_type_desc
== type_desc_from_type
<accum
>())
492 args_cur
.template next_var
<accum
>();
493 else if (cur_type_desc
== type_desc_from_type
<long accum
>())
494 args_cur
.template next_var
<long accum
>();
495 #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
496 // pointers may be stored separately from normal values.
497 else if (cur_type_desc
== type_desc_from_type
<void *>())
498 args_cur
.template next_var
<void *>();
500 args_cur
.template next_var
<uint32_t>();
507 // get_type_desc assumes that this format string uses index mode. It iterates
508 // through the format string until it finds a format specifier that defines
509 // the type of index, and returns a TypeDesc describing that type. It does not
511 LIBC_INLINE TypeDesc
get_type_desc(size_t index
) {
512 // index mode is assumed, and the indices start at 1, so an index
514 size_t local_pos
= 0;
516 while (str
[local_pos
]) {
517 if (str
[local_pos
] == '%') {
520 size_t conv_index
= parse_index(&local_pos
);
522 // the flags aren't relevant for this situation, but I need to skip past
523 // them so they're parsed but the result is discarded.
524 parse_flags(&local_pos
);
527 if (str
[local_pos
] == '*') {
530 size_t width_index
= parse_index(&local_pos
);
531 set_type_desc(width_index
, type_desc_from_type
<int>());
532 if (width_index
== index
)
533 return type_desc_from_type
<int>();
535 } else if (internal::isdigit(str
[local_pos
])) {
536 while (internal::isdigit(str
[local_pos
]))
541 if (str
[local_pos
] == '.') {
543 if (str
[local_pos
] == '*') {
546 size_t precision_index
= parse_index(&local_pos
);
547 set_type_desc(precision_index
, type_desc_from_type
<int>());
548 if (precision_index
== index
)
549 return type_desc_from_type
<int>();
551 } else if (internal::isdigit(str
[local_pos
])) {
552 while (internal::isdigit(str
[local_pos
]))
557 auto [lm
, bw
] = parse_length_modifier(&local_pos
);
559 // if we don't have an index for this conversion, then its position is
560 // unknown and all this information is irrelevant. The rest of this
561 // logic has been for skipping past this conversion properly to avoid
562 // weirdness with %%.
563 if (conv_index
== 0) {
564 if (str
[local_pos
] != '\0')
569 TypeDesc conv_size
= type_desc_from_type
<void>();
570 switch (str
[local_pos
]) {
572 conv_size
= type_desc_from_type
<void>();
575 conv_size
= type_desc_from_type
<int>();
586 case (LengthModifier::hh
):
587 case (LengthModifier::h
):
588 case (LengthModifier::none
):
589 conv_size
= type_desc_from_type
<int>();
591 case (LengthModifier::l
):
592 conv_size
= type_desc_from_type
<long>();
594 case (LengthModifier::ll
):
595 case (LengthModifier::L
): // This isn't in the standard, but is in
596 // other libc implementations.
597 conv_size
= type_desc_from_type
<long long>();
599 case (LengthModifier::j
):
600 conv_size
= type_desc_from_type
<intmax_t>();
602 case (LengthModifier::z
):
603 conv_size
= type_desc_from_type
<size_t>();
605 case (LengthModifier::t
):
606 conv_size
= type_desc_from_type
<ptrdiff_t>();
608 case (LengthModifier::w
):
609 case (LengthModifier::wf
):
610 if (bw
<= cpp::numeric_limits
<unsigned int>::digits
) {
611 conv_size
= type_desc_from_type
<int>();
612 } else if (bw
<= cpp::numeric_limits
<unsigned long>::digits
) {
613 conv_size
= type_desc_from_type
<long>();
614 } else if (bw
<= cpp::numeric_limits
<unsigned long long>::digits
) {
615 conv_size
= type_desc_from_type
<long long>();
617 conv_size
= type_desc_from_type
<intmax_t>();
622 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
631 if (lm
!= LengthModifier::L
)
632 conv_size
= type_desc_from_type
<double>();
634 conv_size
= type_desc_from_type
<long double>();
636 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
637 #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
638 // Capitalization represents sign, but we only need to get the right
639 // bitwidth here so we ignore that.
642 conv_size
= type_desc_from_type
<uint32_t>();
646 if (lm
== LengthModifier::l
) {
647 conv_size
= type_desc_from_type
<uint64_t>();
649 conv_size
= type_desc_from_type
<uint32_t>();
652 #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
653 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
655 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
658 conv_size
= type_desc_from_type
<void *>();
661 conv_size
= type_desc_from_type
<int>();
665 set_type_desc(conv_index
, conv_size
);
666 if (conv_index
== index
)
669 // If the end of the format section is on the '\0'. This means we need to
670 // not advance the local_pos.
671 if (str
[local_pos
] != '\0')
675 // If there is no size for the requested index, then it's unknown. Return
677 return type_desc_from_type
<void>();
680 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
683 } // namespace printf_core
684 } // namespace LIBC_NAMESPACE_DECL
686 #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H