1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/stdio/scanf_core/int_converter.h"
11 #include "src/__support/CPP/limits.h"
12 #include "src/__support/ctype_utils.h"
13 #include "src/__support/macros/config.h"
14 #include "src/stdio/scanf_core/converter_utils.h"
15 #include "src/stdio/scanf_core/core_structs.h"
16 #include "src/stdio/scanf_core/reader.h"
20 namespace LIBC_NAMESPACE_DECL
{
21 namespace scanf_core
{
23 // This code is very similar to the code in __support/str_to_integer.h but is
24 // not quite the same. Here is the list of differences and why they exist:
25 // 1) This takes a reader and a format section instead of a char* and the base.
26 // This should be fairly self explanatory. While the char* could be adapted
27 // to a reader and the base could be calculated ahead of time, the
28 // semantics are slightly different, specifically a char* can be indexed
29 // freely (I can read str[2] and then str[0]) whereas a File (which the
30 // reader may contain) cannot.
31 // 2) Because this uses a Reader, this function can only unget once.
32 // This is relevant because scanf specifies it reads the "longest sequence
33 // of input characters which does not exceed any specified field width and
34 // which is, or is a prefix of, a matching input sequence." Whereas the
35 // strtol function accepts "the longest initial subsequence of the input
36 // string (...) that is of the expected form." This is demonstrated by the
37 // differences in how they deal with the string "0xZZZ" when parsing as
38 // hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
39 // since it reads the first 'Z', sees that it's not a valid hex digit, and
40 // reverses one character. The strtol function on the other hand only
41 // accepts the "0" since that's the longest valid hexadecimal sequence. It
42 // sees the 'Z' after the "0x" and determines that this is not the prefix
43 // to a valid hex string.
44 // 3) This conversion may have a maximum width.
45 // If a maximum width is specified, this conversion is only allowed to
46 // accept a certain number of characters. Strtol doesn't have any such
48 int convert_int(Reader
*reader
, const FormatSection
&to_conv
) {
49 // %d "Matches an optionally signed decimal integer [...] with the value 10
50 // for the base argument. The corresponding argument shall be a pointer to
53 // %i "Matches an optionally signed integer [...] with the value 0 for the
54 // base argument. The corresponding argument shall be a pointer to signed
57 // %u "Matches an optionally signed decimal integer [...] with the value 10
58 // for the base argument. The corresponding argument shall be a pointer to
61 // %o "Matches an optionally signed octal integer [...] with the value 8 for
62 // the base argument. The corresponding argument shall be a pointer to
65 // %x/X "Matches an optionally signed hexadecimal integer [...] with the value
66 // 16 for the base argument. The corresponding argument shall be a pointer to
69 size_t max_width
= cpp::numeric_limits
<size_t>::max();
70 if (to_conv
.max_width
> 0) {
71 max_width
= to_conv
.max_width
;
75 bool is_number
= false;
76 bool is_signed
= false;
78 if (to_conv
.conv_name
== 'i') {
81 } else if (to_conv
.conv_name
== 'o') {
83 } else if (internal::tolower(to_conv
.conv_name
) == 'x' ||
84 to_conv
.conv_name
== 'p') {
86 } else if (to_conv
.conv_name
== 'd') {
89 } else { // conv_name must be 'u'
93 char cur_char
= reader
->getc();
95 char result_sign
= '+';
96 if (cur_char
== '+' || cur_char
== '-') {
97 result_sign
= cur_char
;
100 cur_char
= reader
->getc();
102 // If the max width has been hit already, then the return value must be 0
103 // since no actual digits of the number have been parsed yet.
104 write_int_with_length(0, to_conv
);
105 return MATCHING_FAILURE
;
108 const bool is_negative
= result_sign
== '-';
110 // Base of 0 means automatically determine the base. Base of 16 may have a
112 if (base
== 0 || base
== 16) {
113 // If the first character is 0, then it could be octal or hex.
114 if (cur_char
== '0') {
117 // Read the next character to check.
120 cur_char
= reader
->getc();
122 write_int_with_length(0, to_conv
);
126 if (internal::tolower(cur_char
) == 'x') {
127 // This is a valid hex prefix.
130 // A valid hex prefix is not necessarily a valid number. For the
131 // conversion to be valid it needs to use all of the characters it
132 // consumes. From the standard:
133 // 7.23.6.2 paragraph 9: "An input item is defined as the longest
134 // sequence of input characters which does not exceed any specified
135 // field width and which is, or is a prefix of, a matching input
137 // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
138 // the execution of the directive fails: this condition is a matching
143 cur_char
= reader
->getc();
145 return MATCHING_FAILURE
;
153 } else if (base
== 0) {
154 if (internal::isdigit(cur_char
)) {
155 // If the first character is a different number, then it's 10.
158 // If the first character isn't a valid digit, then there are no valid
159 // digits at all. The number is 0.
160 reader
->ungetc(cur_char
);
161 write_int_with_length(0, to_conv
);
162 return MATCHING_FAILURE
;
167 constexpr uintmax_t UNSIGNED_MAX
= cpp::numeric_limits
<uintmax_t>::max();
168 constexpr uintmax_t SIGNED_MAX
=
169 static_cast<uintmax_t>(cpp::numeric_limits
<intmax_t>::max());
170 constexpr uintmax_t NEGATIVE_SIGNED_MAX
=
171 static_cast<uintmax_t>(cpp::numeric_limits
<intmax_t>::max()) + 1;
173 const uintmax_t MAX
=
174 (is_signed
? (is_negative
? NEGATIVE_SIGNED_MAX
: SIGNED_MAX
)
177 const uintmax_t max_div_by_base
= MAX
/ base
;
179 if (internal::isalnum(cur_char
) &&
180 internal::b36_char_to_int(cur_char
) < base
) {
184 bool has_overflow
= false;
186 for (; i
< max_width
&& internal::isalnum(cur_char
) &&
187 internal::b36_char_to_int(cur_char
) < base
;
188 ++i
, cur_char
= reader
->getc()) {
190 uintmax_t cur_digit
= internal::b36_char_to_int(cur_char
);
195 } else if (result
> max_div_by_base
) {
199 result
= result
* base
;
202 if (result
> MAX
- cur_digit
) {
206 result
= result
+ cur_digit
;
210 // We always read one more character than will be used, so we have to put the
212 reader
->ungetc(cur_char
);
215 return MATCHING_FAILURE
;
218 write_int_with_length(MAX
, to_conv
);
223 write_int_with_length(result
, to_conv
);
229 } // namespace scanf_core
230 } // namespace LIBC_NAMESPACE_DECL