1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // #define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
13 #include "src/__support/arg_list.h"
15 #include "src/__support/CPP/bit.h"
16 #include "src/__support/CPP/optional.h"
17 #include "src/__support/CPP/string_view.h"
18 #include "src/__support/CPP/type_traits.h"
19 #include "src/__support/FPUtil/FPBits.h"
20 #include "src/__support/ctype_utils.h"
21 #include "src/__support/str_to_integer.h"
22 #include "src/stdio/printf_core/core_structs.h"
24 namespace __llvm_libc
{
25 namespace printf_core
{
27 template <typename T
> struct int_type_of
{
30 template <> struct int_type_of
<double> {
31 using type
= fputil::FPBits
<double>::UIntType
;
33 template <> struct int_type_of
<long double> {
34 using type
= fputil::FPBits
<long double>::UIntType
;
36 template <typename T
> using int_type_of_v
= typename int_type_of
<T
>::type
;
38 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
39 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index) \
41 auto temp = get_arg_value<arg_type>(index); \
42 if (!temp.has_value()) { \
43 section.has_conv = false; \
45 dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value()); \
49 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _) \
50 dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
51 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
53 FormatSection
Parser::get_next_section() {
54 FormatSection section
;
55 size_t starting_pos
= cur_pos
;
56 if (str
[cur_pos
] == '%') {
58 section
.has_conv
= true;
61 [[maybe_unused
]] size_t conv_index
= 0;
63 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
64 conv_index
= parse_index(&cur_pos
);
65 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
67 section
.flags
= parse_flags(&cur_pos
);
70 section
.min_width
= 0;
71 if (str
[cur_pos
] == '*') {
74 WRITE_ARG_VAL_SIMPLEST(section
.min_width
, int, parse_index(&cur_pos
));
75 } else if (internal::isdigit(str
[cur_pos
])) {
76 auto result
= internal::strtointeger
<int>(str
+ cur_pos
, 10);
77 section
.min_width
= result
.value
;
78 cur_pos
= cur_pos
+ result
.parsed_len
;
80 if (section
.min_width
< 0) {
81 section
.min_width
= -section
.min_width
;
83 static_cast<FormatFlags
>(section
.flags
| FormatFlags::LEFT_JUSTIFIED
);
87 section
.precision
= -1; // negative precisions are ignored.
88 if (str
[cur_pos
] == '.') {
90 section
.precision
= 0; // if there's a . but no specified precision, the
91 // precision is implicitly 0.
92 if (str
[cur_pos
] == '*') {
95 WRITE_ARG_VAL_SIMPLEST(section
.precision
, int, parse_index(&cur_pos
));
97 } else if (internal::isdigit(str
[cur_pos
])) {
98 auto result
= internal::strtointeger
<int>(str
+ cur_pos
, 10);
99 section
.precision
= result
.value
;
100 cur_pos
= cur_pos
+ result
.parsed_len
;
104 LengthModifier lm
= parse_length_modifier(&cur_pos
);
106 section
.length_modifier
= lm
;
107 section
.conv_name
= str
[cur_pos
];
108 switch (str
[cur_pos
]) {
110 // Regardless of options, a % conversion is always safe. The standard says
111 // that "The complete conversion specification shall be %%" but it also
112 // says that "If a conversion specification is invalid, the behavior is
113 // undefined." Based on that we define that any conversion specification
114 // ending in '%' shall display as '%' regardless of any valid or invalid
116 section
.has_conv
= true;
119 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, int, conv_index
);
128 case (LengthModifier::hh
):
129 case (LengthModifier::h
):
130 case (LengthModifier::none
):
131 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, int, conv_index
);
133 case (LengthModifier::l
):
134 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long, conv_index
);
136 case (LengthModifier::ll
):
137 case (LengthModifier::L
): // This isn't in the standard, but is in other
138 // libc implementations.
140 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long long, conv_index
);
142 case (LengthModifier::j
):
144 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, intmax_t, conv_index
);
146 case (LengthModifier::z
):
148 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, size_t, conv_index
);
150 case (LengthModifier::t
):
152 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, ptrdiff_t, conv_index
);
156 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
165 if (lm
!= LengthModifier::L
) {
166 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, double, conv_index
);
168 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_raw
, long double, conv_index
);
171 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
172 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
174 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
177 WRITE_ARG_VAL_SIMPLEST(section
.conv_val_ptr
, void *, conv_index
);
180 // if the conversion is undefined, change this to a raw section.
181 section
.has_conv
= false;
184 // If the end of the format section is on the '\0'. This means we need to
185 // not advance the cur_pos.
186 if (str
[cur_pos
] != '\0')
191 section
.has_conv
= false;
192 while (str
[cur_pos
] != '%' && str
[cur_pos
] != '\0')
195 section
.raw_string
= {str
+ starting_pos
, cur_pos
- starting_pos
};
199 FormatFlags
Parser::parse_flags(size_t *local_pos
) {
200 bool found_flag
= true;
201 FormatFlags flags
= FormatFlags(0);
203 switch (str
[*local_pos
]) {
205 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::LEFT_JUSTIFIED
);
208 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::FORCE_SIGN
);
211 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::SPACE_PREFIX
);
214 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::ALTERNATE_FORM
);
217 flags
= static_cast<FormatFlags
>(flags
| FormatFlags::LEADING_ZEROES
);
228 LengthModifier
Parser::parse_length_modifier(size_t *local_pos
) {
229 switch (str
[*local_pos
]) {
231 if (str
[*local_pos
+ 1] == 'l') {
233 return LengthModifier::ll
;
236 return LengthModifier::l
;
239 if (str
[*local_pos
+ 1] == 'h') {
241 return LengthModifier::hh
;
244 return LengthModifier::h
;
248 return LengthModifier::L
;
251 return LengthModifier::j
;
254 return LengthModifier::z
;
257 return LengthModifier::t
;
259 return LengthModifier::none
;
263 //----------------------------------------------------
264 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
265 //----------------------------------------------------
267 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
269 size_t Parser::parse_index(size_t *local_pos
) {
270 if (internal::isdigit(str
[*local_pos
])) {
271 auto result
= internal::strtointeger
<int>(str
+ *local_pos
, 10);
272 size_t index
= result
.value
;
273 if (str
[*local_pos
+ result
.parsed_len
] != '$')
275 *local_pos
= 1 + result
.parsed_len
+ *local_pos
;
281 TypeDesc
Parser::get_type_desc(size_t index
) {
282 // index mode is assumed, and the indicies start at 1, so an index
284 size_t local_pos
= 0;
286 while (str
[local_pos
]) {
287 if (str
[local_pos
] == '%') {
290 size_t conv_index
= parse_index(&local_pos
);
292 // the flags aren't relevant for this situation, but I need to skip past
293 // them so they're parsed but the result is discarded.
294 parse_flags(&local_pos
);
297 if (str
[local_pos
] == '*') {
300 size_t width_index
= parse_index(&local_pos
);
301 set_type_desc(width_index
, type_desc_from_type
<int>());
302 if (width_index
== index
)
303 return type_desc_from_type
<int>();
305 } else if (internal::isdigit(str
[local_pos
])) {
306 while (internal::isdigit(str
[local_pos
]))
311 if (str
[local_pos
] == '.') {
313 if (str
[local_pos
] == '*') {
316 size_t precision_index
= parse_index(&local_pos
);
317 set_type_desc(precision_index
, type_desc_from_type
<int>());
318 if (precision_index
== index
)
319 return type_desc_from_type
<int>();
321 } else if (internal::isdigit(str
[local_pos
])) {
322 while (internal::isdigit(str
[local_pos
]))
327 LengthModifier lm
= parse_length_modifier(&local_pos
);
329 // if we don't have an index for this conversion, then its position is
330 // unknown and all this information is irrelevant. The rest of this logic
331 // has been for skipping past this conversion properly to avoid
332 // weirdness with %%.
333 if (conv_index
== 0) {
334 if (str
[local_pos
] != '\0')
339 TypeDesc conv_size
= type_desc_from_type
<void>();
340 switch (str
[local_pos
]) {
342 conv_size
= type_desc_from_type
<void>();
345 conv_size
= type_desc_from_type
<int>();
354 case (LengthModifier::hh
):
355 case (LengthModifier::h
):
356 case (LengthModifier::none
):
357 conv_size
= type_desc_from_type
<int>();
359 case (LengthModifier::l
):
360 conv_size
= type_desc_from_type
<long>();
362 case (LengthModifier::ll
):
363 case (LengthModifier::L
): // This isn't in the standard, but is in other
364 // libc implementations.
365 conv_size
= type_desc_from_type
<long long>();
367 case (LengthModifier::j
):
368 conv_size
= type_desc_from_type
<intmax_t>();
370 case (LengthModifier::z
):
371 conv_size
= type_desc_from_type
<size_t>();
373 case (LengthModifier::t
):
374 conv_size
= type_desc_from_type
<ptrdiff_t>();
378 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
387 if (lm
!= LengthModifier::L
)
388 conv_size
= type_desc_from_type
<double>();
390 conv_size
= type_desc_from_type
<long double>();
392 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
393 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
395 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
398 conv_size
= type_desc_from_type
<void *>();
401 conv_size
= type_desc_from_type
<int>();
405 set_type_desc(conv_index
, conv_size
);
406 if (conv_index
== index
)
409 // If the end of the format section is on the '\0'. This means we need to
410 // not advance the local_pos.
411 if (str
[local_pos
] != '\0')
415 // If there is no size for the requested index, then it's unknown. Return
417 return type_desc_from_type
<void>();
420 bool Parser::args_to_index(size_t index
) {
421 if (args_index
> index
) {
423 args_cur
= args_start
;
426 while (args_index
< index
) {
427 TypeDesc cur_type_desc
= type_desc_from_type
<void>();
428 if (args_index
<= DESC_ARR_LEN
)
429 cur_type_desc
= desc_arr
[args_index
- 1];
431 if (cur_type_desc
== type_desc_from_type
<void>())
432 cur_type_desc
= get_type_desc(args_index
);
434 // A type of void represents the type being unknown. If the type for the
435 // requested index isn't in the desc_arr and isn't found by parsing the
436 // string, then then advancing to the requested index is impossible. In that
437 // case the function returns false.
438 if (cur_type_desc
== type_desc_from_type
<void>())
441 if (cur_type_desc
== type_desc_from_type
<uint32_t>())
442 args_cur
.next_var
<uint32_t>();
443 else if (cur_type_desc
== type_desc_from_type
<uint64_t>())
444 args_cur
.next_var
<uint64_t>();
445 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
446 // Floating point numbers are stored separately from the other arguments.
447 else if (cur_type_desc
== type_desc_from_type
<double>())
448 args_cur
.next_var
<double>();
449 else if (cur_type_desc
== type_desc_from_type
<long double>())
450 args_cur
.next_var
<long double>();
451 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
452 // pointers may be stored separately from normal values.
453 else if (cur_type_desc
== type_desc_from_type
<void *>())
454 args_cur
.next_var
<void *>();
456 args_cur
.next_var
<uint32_t>();
463 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
465 } // namespace printf_core
466 } // namespace __llvm_libc