[libc][NFC] Move aligned access implementations to separate header
[llvm-project.git] / libc / src / stdio / printf_core / parser.cpp
blob6b2c174c3f233294747b895791b04da846bea56c
1 //===-- Format string parser implementation for printf ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 // #define LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
11 #include "parser.h"
13 #include "src/__support/arg_list.h"
15 #include "src/__support/CPP/bit.h"
16 #include "src/__support/CPP/optional.h"
17 #include "src/__support/CPP/string_view.h"
18 #include "src/__support/CPP/type_traits.h"
19 #include "src/__support/FPUtil/FPBits.h"
20 #include "src/__support/ctype_utils.h"
21 #include "src/__support/str_to_integer.h"
22 #include "src/stdio/printf_core/core_structs.h"
24 namespace __llvm_libc {
25 namespace printf_core {
27 template <typename T> struct int_type_of {
28 using type = T;
30 template <> struct int_type_of<double> {
31 using type = fputil::FPBits<double>::UIntType;
33 template <> struct int_type_of<long double> {
34 using type = fputil::FPBits<long double>::UIntType;
36 template <typename T> using int_type_of_v = typename int_type_of<T>::type;
38 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
39 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index) \
40 { \
41 auto temp = get_arg_value<arg_type>(index); \
42 if (!temp.has_value()) { \
43 section.has_conv = false; \
44 } else { \
45 dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value()); \
46 } \
48 #else
49 #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _) \
50 dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
51 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
53 FormatSection Parser::get_next_section() {
54 FormatSection section;
55 size_t starting_pos = cur_pos;
56 if (str[cur_pos] == '%') {
57 // format section
58 section.has_conv = true;
60 ++cur_pos;
61 [[maybe_unused]] size_t conv_index = 0;
63 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
64 conv_index = parse_index(&cur_pos);
65 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
67 section.flags = parse_flags(&cur_pos);
69 // handle width
70 section.min_width = 0;
71 if (str[cur_pos] == '*') {
72 ++cur_pos;
74 WRITE_ARG_VAL_SIMPLEST(section.min_width, int, parse_index(&cur_pos));
75 } else if (internal::isdigit(str[cur_pos])) {
76 auto result = internal::strtointeger<int>(str + cur_pos, 10);
77 section.min_width = result.value;
78 cur_pos = cur_pos + result.parsed_len;
80 if (section.min_width < 0) {
81 section.min_width = -section.min_width;
82 section.flags =
83 static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
86 // handle precision
87 section.precision = -1; // negative precisions are ignored.
88 if (str[cur_pos] == '.') {
89 ++cur_pos;
90 section.precision = 0; // if there's a . but no specified precision, the
91 // precision is implicitly 0.
92 if (str[cur_pos] == '*') {
93 ++cur_pos;
95 WRITE_ARG_VAL_SIMPLEST(section.precision, int, parse_index(&cur_pos));
97 } else if (internal::isdigit(str[cur_pos])) {
98 auto result = internal::strtointeger<int>(str + cur_pos, 10);
99 section.precision = result.value;
100 cur_pos = cur_pos + result.parsed_len;
104 LengthModifier lm = parse_length_modifier(&cur_pos);
106 section.length_modifier = lm;
107 section.conv_name = str[cur_pos];
108 switch (str[cur_pos]) {
109 case ('%'):
110 // Regardless of options, a % conversion is always safe. The standard says
111 // that "The complete conversion specification shall be %%" but it also
112 // says that "If a conversion specification is invalid, the behavior is
113 // undefined." Based on that we define that any conversion specification
114 // ending in '%' shall display as '%' regardless of any valid or invalid
115 // options.
116 section.has_conv = true;
117 break;
118 case ('c'):
119 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
120 break;
121 case ('d'):
122 case ('i'):
123 case ('o'):
124 case ('x'):
125 case ('X'):
126 case ('u'):
127 switch (lm) {
128 case (LengthModifier::hh):
129 case (LengthModifier::h):
130 case (LengthModifier::none):
131 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
132 break;
133 case (LengthModifier::l):
134 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
135 break;
136 case (LengthModifier::ll):
137 case (LengthModifier::L): // This isn't in the standard, but is in other
138 // libc implementations.
140 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
141 break;
142 case (LengthModifier::j):
144 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
145 break;
146 case (LengthModifier::z):
148 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, size_t, conv_index);
149 break;
150 case (LengthModifier::t):
152 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
153 break;
155 break;
156 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
157 case ('f'):
158 case ('F'):
159 case ('e'):
160 case ('E'):
161 case ('a'):
162 case ('A'):
163 case ('g'):
164 case ('G'):
165 if (lm != LengthModifier::L) {
166 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index);
167 } else {
168 WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index);
170 break;
171 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
172 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
173 case ('n'):
174 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
175 case ('p'):
176 case ('s'):
177 WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
178 break;
179 default:
180 // if the conversion is undefined, change this to a raw section.
181 section.has_conv = false;
182 break;
184 // If the end of the format section is on the '\0'. This means we need to
185 // not advance the cur_pos.
186 if (str[cur_pos] != '\0')
187 ++cur_pos;
189 } else {
190 // raw section
191 section.has_conv = false;
192 while (str[cur_pos] != '%' && str[cur_pos] != '\0')
193 ++cur_pos;
195 section.raw_string = {str + starting_pos, cur_pos - starting_pos};
196 return section;
199 FormatFlags Parser::parse_flags(size_t *local_pos) {
200 bool found_flag = true;
201 FormatFlags flags = FormatFlags(0);
202 while (found_flag) {
203 switch (str[*local_pos]) {
204 case '-':
205 flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
206 break;
207 case '+':
208 flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
209 break;
210 case ' ':
211 flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
212 break;
213 case '#':
214 flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
215 break;
216 case '0':
217 flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
218 break;
219 default:
220 found_flag = false;
222 if (found_flag)
223 ++*local_pos;
225 return flags;
228 LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
229 switch (str[*local_pos]) {
230 case ('l'):
231 if (str[*local_pos + 1] == 'l') {
232 *local_pos += 2;
233 return LengthModifier::ll;
234 } else {
235 ++*local_pos;
236 return LengthModifier::l;
238 case ('h'):
239 if (str[*local_pos + 1] == 'h') {
240 *local_pos += 2;
241 return LengthModifier::hh;
242 } else {
243 ++*local_pos;
244 return LengthModifier::h;
246 case ('L'):
247 ++*local_pos;
248 return LengthModifier::L;
249 case ('j'):
250 ++*local_pos;
251 return LengthModifier::j;
252 case ('z'):
253 ++*local_pos;
254 return LengthModifier::z;
255 case ('t'):
256 ++*local_pos;
257 return LengthModifier::t;
258 default:
259 return LengthModifier::none;
263 //----------------------------------------------------
264 // INDEX MODE ONLY FUNCTIONS AFTER HERE:
265 //----------------------------------------------------
267 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
269 size_t Parser::parse_index(size_t *local_pos) {
270 if (internal::isdigit(str[*local_pos])) {
271 auto result = internal::strtointeger<int>(str + *local_pos, 10);
272 size_t index = result.value;
273 if (str[*local_pos + result.parsed_len] != '$')
274 return 0;
275 *local_pos = 1 + result.parsed_len + *local_pos;
276 return index;
278 return 0;
281 TypeDesc Parser::get_type_desc(size_t index) {
282 // index mode is assumed, and the indicies start at 1, so an index
283 // of 0 is invalid.
284 size_t local_pos = 0;
286 while (str[local_pos]) {
287 if (str[local_pos] == '%') {
288 ++local_pos;
290 size_t conv_index = parse_index(&local_pos);
292 // the flags aren't relevant for this situation, but I need to skip past
293 // them so they're parsed but the result is discarded.
294 parse_flags(&local_pos);
296 // handle width
297 if (str[local_pos] == '*') {
298 ++local_pos;
300 size_t width_index = parse_index(&local_pos);
301 set_type_desc(width_index, type_desc_from_type<int>());
302 if (width_index == index)
303 return type_desc_from_type<int>();
305 } else if (internal::isdigit(str[local_pos])) {
306 while (internal::isdigit(str[local_pos]))
307 ++local_pos;
310 // handle precision
311 if (str[local_pos] == '.') {
312 ++local_pos;
313 if (str[local_pos] == '*') {
314 ++local_pos;
316 size_t precision_index = parse_index(&local_pos);
317 set_type_desc(precision_index, type_desc_from_type<int>());
318 if (precision_index == index)
319 return type_desc_from_type<int>();
321 } else if (internal::isdigit(str[local_pos])) {
322 while (internal::isdigit(str[local_pos]))
323 ++local_pos;
327 LengthModifier lm = parse_length_modifier(&local_pos);
329 // if we don't have an index for this conversion, then its position is
330 // unknown and all this information is irrelevant. The rest of this logic
331 // has been for skipping past this conversion properly to avoid
332 // weirdness with %%.
333 if (conv_index == 0) {
334 if (str[local_pos] != '\0')
335 ++local_pos;
336 continue;
339 TypeDesc conv_size = type_desc_from_type<void>();
340 switch (str[local_pos]) {
341 case ('%'):
342 conv_size = type_desc_from_type<void>();
343 break;
344 case ('c'):
345 conv_size = type_desc_from_type<int>();
346 break;
347 case ('d'):
348 case ('i'):
349 case ('o'):
350 case ('x'):
351 case ('X'):
352 case ('u'):
353 switch (lm) {
354 case (LengthModifier::hh):
355 case (LengthModifier::h):
356 case (LengthModifier::none):
357 conv_size = type_desc_from_type<int>();
358 break;
359 case (LengthModifier::l):
360 conv_size = type_desc_from_type<long>();
361 break;
362 case (LengthModifier::ll):
363 case (LengthModifier::L): // This isn't in the standard, but is in other
364 // libc implementations.
365 conv_size = type_desc_from_type<long long>();
366 break;
367 case (LengthModifier::j):
368 conv_size = type_desc_from_type<intmax_t>();
369 break;
370 case (LengthModifier::z):
371 conv_size = type_desc_from_type<size_t>();
372 break;
373 case (LengthModifier::t):
374 conv_size = type_desc_from_type<ptrdiff_t>();
375 break;
377 break;
378 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
379 case ('f'):
380 case ('F'):
381 case ('e'):
382 case ('E'):
383 case ('a'):
384 case ('A'):
385 case ('g'):
386 case ('G'):
387 if (lm != LengthModifier::L)
388 conv_size = type_desc_from_type<double>();
389 else
390 conv_size = type_desc_from_type<long double>();
391 break;
392 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
393 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
394 case ('n'):
395 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
396 case ('p'):
397 case ('s'):
398 conv_size = type_desc_from_type<void *>();
399 break;
400 default:
401 conv_size = type_desc_from_type<int>();
402 break;
405 set_type_desc(conv_index, conv_size);
406 if (conv_index == index)
407 return conv_size;
409 // If the end of the format section is on the '\0'. This means we need to
410 // not advance the local_pos.
411 if (str[local_pos] != '\0')
412 ++local_pos;
415 // If there is no size for the requested index, then it's unknown. Return
416 // void.
417 return type_desc_from_type<void>();
420 bool Parser::args_to_index(size_t index) {
421 if (args_index > index) {
422 args_index = 1;
423 args_cur = args_start;
426 while (args_index < index) {
427 TypeDesc cur_type_desc = type_desc_from_type<void>();
428 if (args_index <= DESC_ARR_LEN)
429 cur_type_desc = desc_arr[args_index - 1];
431 if (cur_type_desc == type_desc_from_type<void>())
432 cur_type_desc = get_type_desc(args_index);
434 // A type of void represents the type being unknown. If the type for the
435 // requested index isn't in the desc_arr and isn't found by parsing the
436 // string, then then advancing to the requested index is impossible. In that
437 // case the function returns false.
438 if (cur_type_desc == type_desc_from_type<void>())
439 return false;
441 if (cur_type_desc == type_desc_from_type<uint32_t>())
442 args_cur.next_var<uint32_t>();
443 else if (cur_type_desc == type_desc_from_type<uint64_t>())
444 args_cur.next_var<uint64_t>();
445 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
446 // Floating point numbers are stored separately from the other arguments.
447 else if (cur_type_desc == type_desc_from_type<double>())
448 args_cur.next_var<double>();
449 else if (cur_type_desc == type_desc_from_type<long double>())
450 args_cur.next_var<long double>();
451 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
452 // pointers may be stored separately from normal values.
453 else if (cur_type_desc == type_desc_from_type<void *>())
454 args_cur.next_var<void *>();
455 else
456 args_cur.next_var<uint32_t>();
458 ++args_index;
460 return true;
463 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
465 } // namespace printf_core
466 } // namespace __llvm_libc