1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/stdio/scanf_core/float_converter.h"
11 #include "src/__support/CPP/limits.h"
12 #include "src/__support/char_vector.h"
13 #include "src/__support/ctype_utils.h"
14 #include "src/stdio/scanf_core/converter_utils.h"
15 #include "src/stdio/scanf_core/core_structs.h"
16 #include "src/stdio/scanf_core/reader.h"
20 namespace LIBC_NAMESPACE
{
21 namespace scanf_core
{
23 // All of the floating point conversions are the same for scanf, every name will
24 // accept every style.
25 int convert_float(Reader
*reader
, const FormatSection
&to_conv
) {
26 // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
27 // infinity, or NaN, whose format is the same as expected for the subject
28 // sequence of the strtod function. The corresponding argument shall be a
29 // pointer to floating."
31 CharVector out_str
= CharVector();
32 bool is_number
= false;
34 size_t max_width
= cpp::numeric_limits
<size_t>::max();
35 if (to_conv
.max_width
> 0) {
36 max_width
= to_conv
.max_width
;
39 char cur_char
= reader
->getc();
41 if (cur_char
== '+' || cur_char
== '-') {
42 if (!out_str
.append(cur_char
)) {
43 return ALLOCATION_FAILURE
;
45 if (out_str
.length() == max_width
) {
46 return MATCHING_FAILURE
;
48 cur_char
= reader
->getc();
52 static constexpr char DECIMAL_POINT
= '.';
53 static const char inf_string
[] = "infinity";
57 if (to_lower(cur_char
) == inf_string
[0]) {
60 for (; to_lower(cur_char
) == inf_string
[inf_index
] &&
61 inf_index
< sizeof(inf_string
) && out_str
.length() < max_width
;
63 if (!out_str
.append(cur_char
)) {
64 return ALLOCATION_FAILURE
;
66 cur_char
= reader
->getc();
69 if (inf_index
== 3 || inf_index
== sizeof(inf_string
) - 1) {
70 write_float_with_length(out_str
.c_str(), to_conv
);
73 return MATCHING_FAILURE
;
77 static const char nan_string
[] = "nan";
80 if (to_lower(cur_char
) == nan_string
[0]) {
83 for (; to_lower(cur_char
) == nan_string
[nan_index
] &&
84 nan_index
< sizeof(nan_string
) && out_str
.length() < max_width
;
86 if (!out_str
.append(cur_char
)) {
87 return ALLOCATION_FAILURE
;
89 cur_char
= reader
->getc();
92 if (nan_index
== sizeof(nan_string
) - 1) {
93 write_float_with_length(out_str
.c_str(), to_conv
);
96 return MATCHING_FAILURE
;
100 // Assume base of 10 by default but check if it is actually base 16.
103 // If the string starts with 0 it might be in hex.
104 if (cur_char
== '0') {
106 // Read the next character to check.
107 if (!out_str
.append(cur_char
)) {
108 return ALLOCATION_FAILURE
;
110 // If we've hit the end, then this is "0", which is valid.
111 if (out_str
.length() == max_width
) {
112 write_float_with_length(out_str
.c_str(), to_conv
);
115 cur_char
= reader
->getc();
118 // If that next character is an 'x' then this is a hexadecimal number.
119 if (to_lower(cur_char
) == 'x') {
122 if (!out_str
.append(cur_char
)) {
123 return ALLOCATION_FAILURE
;
125 // If we've hit the end here, we have "0x" which is a valid prefix to a
126 // floating point number, and will be evaluated to 0.
127 if (out_str
.length() == max_width
) {
128 write_float_with_length(out_str
.c_str(), to_conv
);
131 cur_char
= reader
->getc();
136 const char exponent_mark
= ((base
== 10) ? 'e' : 'p');
137 bool after_decimal
= false;
139 // The format for the remaining characters at this point is DD.DDe+/-DD for
140 // base 10 and XX.XXp+/-DD for base 16
142 // This handles the digits before and after the decimal point, but not the
144 while (out_str
.length() < max_width
) {
145 if (internal::isalnum(cur_char
) &&
146 internal::b36_char_to_int(cur_char
) < base
) {
148 if (!out_str
.append(cur_char
)) {
149 return ALLOCATION_FAILURE
;
151 cur_char
= reader
->getc();
152 } else if (cur_char
== DECIMAL_POINT
&& !after_decimal
) {
153 after_decimal
= true;
154 if (!out_str
.append(cur_char
)) {
155 return ALLOCATION_FAILURE
;
157 cur_char
= reader
->getc();
163 // Handle the exponent, which has an exponent mark, an optional sign, and
165 if (to_lower(cur_char
) == exponent_mark
) {
166 if (!out_str
.append(cur_char
)) {
167 return ALLOCATION_FAILURE
;
169 if (out_str
.length() == max_width
) {
170 // This is laid out in the standard as being a matching error (100e is not
171 // a valid float) but may conflict with existing implementations.
172 return MATCHING_FAILURE
;
174 cur_char
= reader
->getc();
177 if (cur_char
== '+' || cur_char
== '-') {
178 if (!out_str
.append(cur_char
)) {
179 return ALLOCATION_FAILURE
;
181 if (out_str
.length() == max_width
) {
182 return MATCHING_FAILURE
;
184 cur_char
= reader
->getc();
188 // It is specified by the standard that "100er" is a matching failure since
189 // the longest prefix of a possibly valid floating-point number (which is
190 // "100e") is not a valid floating-point number. If there is an exponent
191 // mark then there must be a digit after it else the number is not valid.
192 // Some implementations will roll back two characters (to just "100") and
193 // accept that since the prefix is not valid, and some will interpret an
194 // exponent mark followed by no digits as an additional exponent of 0
195 // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
196 // by the standard, but they may be used in real code, see Hyrum's law. This
197 // code follows the standard, but may be incompatible due to code expecting
199 if (!internal::isdigit(cur_char
)) {
200 return MATCHING_FAILURE
;
203 while (internal::isdigit(cur_char
) && out_str
.length() < max_width
) {
204 if (!out_str
.append(cur_char
)) {
205 return ALLOCATION_FAILURE
;
207 cur_char
= reader
->getc();
211 // We always read one more character than will be used, so we have to put the
213 reader
->ungetc(cur_char
);
215 // If we haven't actually found any digits, this is a matching failure (this
216 // catches cases like "+.")
218 return MATCHING_FAILURE
;
220 write_float_with_length(out_str
.c_str(), to_conv
);
225 } // namespace scanf_core
226 } // namespace LIBC_NAMESPACE