[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / libc / src / stdio / scanf_core / parser.h
blob7f3a53be3570088611ced4f154526d874aee5185
1 //===-- Format string parser for scanf -------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H
10 #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H
12 #include "src/__support/arg_list.h"
13 #include "src/__support/common.h"
14 #include "src/__support/ctype_utils.h"
15 #include "src/__support/str_to_integer.h"
16 #include "src/stdio/scanf_core/core_structs.h"
17 #include "src/stdio/scanf_core/scanf_config.h"
19 #include <stddef.h>
21 namespace LIBC_NAMESPACE {
22 namespace scanf_core {
24 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
25 #define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
26 #else
27 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
28 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
30 template <typename ArgProvider> class Parser {
31 const char *__restrict str;
33 size_t cur_pos = 0;
34 ArgProvider args_cur;
36 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
37 // args_start stores the start of the va_args, which is used when a previous
38 // argument is needed. In that case, we have to read the arguments from the
39 // beginning since they don't support reading backwards.
40 ArgProvider args_start;
41 size_t args_index = 1;
42 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
44 public:
45 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
46 LIBC_INLINE Parser(const char *__restrict new_str, internal::ArgList &args)
47 : str(new_str), args_cur(args), args_start(args) {}
48 #else
49 LIBC_INLINE Parser(const char *__restrict new_str, internal::ArgList &args)
50 : str(new_str), args_cur(args) {}
51 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
53 // get_next_section will parse the format string until it has a fully
54 // specified format section. This can either be a raw format section with no
55 // conversion, or a format section with a conversion that has all of its
56 // variables stored in the format section.
57 LIBC_INLINE FormatSection get_next_section() {
58 FormatSection section;
59 size_t starting_pos = cur_pos;
60 if (str[cur_pos] == '%') {
61 // format section
62 section.has_conv = true;
64 ++cur_pos;
65 [[maybe_unused]] size_t conv_index = 0;
67 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
68 conv_index = parse_index(&cur_pos);
69 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
71 if (str[cur_pos] == '*') {
72 ++cur_pos;
73 section.flags = FormatFlags::NO_WRITE;
76 // handle width
77 section.max_width = -1;
78 if (internal::isdigit(str[cur_pos])) {
79 auto result = internal::strtointeger<int>(str + cur_pos, 10);
80 section.max_width = result.value;
81 cur_pos = cur_pos + result.parsed_len;
84 // TODO(michaelrj): add posix allocate flag support.
85 // if (str[cur_pos] == 'm') {
86 // ++cur_pos;
87 // section.flags = FormatFlags::ALLOCATE;
88 // }
90 LengthModifier lm = parse_length_modifier(&cur_pos);
91 section.length_modifier = lm;
93 section.conv_name = str[cur_pos];
95 // If NO_WRITE is not set, then read the next arg as the output pointer.
96 if ((section.flags & FormatFlags::NO_WRITE) == 0) {
97 // Since all outputs are pointers, there's no need to distinguish when
98 // reading from va_args. They're all the same size and stored the same.
99 section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
102 // If the end of the format section is on the '\0'. This means we need to
103 // not advance the cur_pos and we should not count this has having a
104 // conversion.
105 if (str[cur_pos] != '\0') {
106 ++cur_pos;
107 } else {
108 section.has_conv = false;
111 // If the format is a bracketed one, then we need to parse out the insides
112 // of the brackets.
113 if (section.conv_name == '[') {
114 constexpr char CLOSING_BRACKET = ']';
115 constexpr char INVERT_FLAG = '^';
116 constexpr char RANGE_OPERATOR = '-';
118 cpp::bitset<256> scan_set;
119 bool invert = false;
121 // The circumflex in the first position represents the inversion flag,
122 // but it's easier to apply that at the end so we just store it for now.
123 if (str[cur_pos] == INVERT_FLAG) {
124 invert = true;
125 ++cur_pos;
128 // This is used to determine if a hyphen is being used as a literal or
129 // as a range operator.
130 size_t set_start_pos = cur_pos;
132 // Normally the right bracket closes the set, but if it's the first
133 // character (possibly after the inversion flag) then it's instead
134 // included as a character in the set and the second right bracket
135 // closes the set.
136 if (str[cur_pos] == CLOSING_BRACKET) {
137 scan_set.set(CLOSING_BRACKET);
138 ++cur_pos;
141 while (str[cur_pos] != '\0' && str[cur_pos] != CLOSING_BRACKET) {
142 // If a hyphen is being used as a range operator, since it's neither
143 // at the beginning nor end of the set.
144 if (str[cur_pos] == RANGE_OPERATOR && cur_pos != set_start_pos &&
145 str[cur_pos + 1] != CLOSING_BRACKET && str[cur_pos + 1] != '\0') {
146 // Technically there is no requirement to correct the ordering of
147 // the range, but since the range operator is entirely
148 // implementation defined it seems like a good convenience.
149 char a = str[cur_pos - 1];
150 char b = str[cur_pos + 1];
151 char start = (a < b ? a : b);
152 char end = (a < b ? b : a);
153 scan_set.set_range(start, end);
154 cur_pos += 2;
155 } else {
156 scan_set.set(str[cur_pos]);
157 ++cur_pos;
160 if (invert)
161 scan_set.flip();
163 if (str[cur_pos] == CLOSING_BRACKET) {
164 ++cur_pos;
165 section.scan_set = scan_set;
166 } else {
167 // if the end of the string was encountered, this is not a valid set.
168 section.has_conv = false;
171 } else {
172 // raw section
173 section.has_conv = false;
174 while (str[cur_pos] != '%' && str[cur_pos] != '\0')
175 ++cur_pos;
177 section.raw_string = {str + starting_pos, cur_pos - starting_pos};
178 return section;
181 private:
182 // parse_length_modifier parses the length modifier inside a format string. It
183 // assumes that str[*local_pos] is inside a format specifier. It returns a
184 // LengthModifier with the length modifier it found. It will advance local_pos
185 // after the format specifier if one is found.
186 LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
187 switch (str[*local_pos]) {
188 case ('l'):
189 if (str[*local_pos + 1] == 'l') {
190 *local_pos += 2;
191 return LengthModifier::ll;
192 } else {
193 ++*local_pos;
194 return LengthModifier::l;
196 case ('h'):
197 if (str[*local_pos + 1] == 'h') {
198 *local_pos += 2;
199 return LengthModifier::hh;
200 } else {
201 ++*local_pos;
202 return LengthModifier::h;
204 case ('L'):
205 ++*local_pos;
206 return LengthModifier::L;
207 case ('j'):
208 ++*local_pos;
209 return LengthModifier::j;
210 case ('z'):
211 ++*local_pos;
212 return LengthModifier::z;
213 case ('t'):
214 ++*local_pos;
215 return LengthModifier::t;
216 default:
217 return LengthModifier::NONE;
221 // get_next_arg_value gets the next value from the arg list as type T.
222 template <class T> LIBC_INLINE T get_next_arg_value() {
223 return args_cur.template next_var<T>();
226 //----------------------------------------------------
227 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
228 //----------------------------------------------------
230 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
232 // parse_index parses the index of a value inside a format string. It
233 // assumes that str[*local_pos] points to character after a '%' or '*', and
234 // returns 0 if there is no closing $, or if it finds no number. If it finds a
235 // number, it will move local_pos past the end of the $, else it will not move
236 // local_pos.
237 LIBC_INLINE size_t parse_index(size_t *local_pos) {
238 if (internal::isdigit(str[*local_pos])) {
239 auto result = internal::strtointeger<int>(str + *local_pos, 10);
240 size_t index = result.value;
241 if (str[*local_pos + result.parsed_len] != '$')
242 return 0;
243 *local_pos = 1 + result.parsed_len + *local_pos;
244 return index;
246 return 0;
249 // get_arg_value gets the value from the arg list at index (starting at 1).
250 // This may require parsing the format string. An index of 0 is interpreted as
251 // the next value.
252 template <class T> LIBC_INLINE T get_arg_value(size_t index) {
253 if (!(index == 0 || index == args_index))
254 args_to_index(index);
256 ++args_index;
257 return get_next_arg_value<T>();
260 // the ArgList can only return the next item in the list. This function is
261 // used in index mode when the item that needs to be read is not the next one.
262 // It moves cur_args to the index requested so the appropriate value may
263 // be read. This may involve parsing the format string, and is in the worst
264 // case an O(n^2) operation.
265 LIBC_INLINE void args_to_index(size_t index) {
266 if (args_index > index) {
267 args_index = 1;
268 args_cur = args_start;
271 while (args_index < index) {
272 // Since all arguments must be pointers, we can just read all of them as
273 // void * and not worry about type issues.
274 args_cur.template next_var<void *>();
275 ++args_index;
279 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
282 } // namespace scanf_core
283 } // namespace LIBC_NAMESPACE
285 #endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H