1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/stdio/scanf_core/float_converter.h"
11 #include "src/__support/CPP/limits.h"
12 #include "src/__support/char_vector.h"
13 #include "src/__support/ctype_utils.h"
14 #include "src/__support/macros/config.h"
15 #include "src/stdio/scanf_core/converter_utils.h"
16 #include "src/stdio/scanf_core/core_structs.h"
17 #include "src/stdio/scanf_core/reader.h"
21 namespace LIBC_NAMESPACE_DECL
{
22 namespace scanf_core
{
24 // All of the floating point conversions are the same for scanf, every name will
25 // accept every style.
26 int convert_float(Reader
*reader
, const FormatSection
&to_conv
) {
27 // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
28 // infinity, or NaN, whose format is the same as expected for the subject
29 // sequence of the strtod function. The corresponding argument shall be a
30 // pointer to floating."
32 CharVector out_str
= CharVector();
33 bool is_number
= false;
35 size_t max_width
= cpp::numeric_limits
<size_t>::max();
36 if (to_conv
.max_width
> 0) {
37 max_width
= to_conv
.max_width
;
40 char cur_char
= reader
->getc();
42 if (cur_char
== '+' || cur_char
== '-') {
43 if (!out_str
.append(cur_char
)) {
44 return ALLOCATION_FAILURE
;
46 if (out_str
.length() == max_width
) {
47 return MATCHING_FAILURE
;
49 cur_char
= reader
->getc();
53 static constexpr char DECIMAL_POINT
= '.';
54 static const char inf_string
[] = "infinity";
58 if (internal::tolower(cur_char
) == inf_string
[0]) {
62 inf_index
< (sizeof(inf_string
) - 1) && out_str
.length() < max_width
&&
63 internal::tolower(cur_char
) == inf_string
[inf_index
];
65 if (!out_str
.append(cur_char
)) {
66 return ALLOCATION_FAILURE
;
68 cur_char
= reader
->getc();
71 if (inf_index
== 3 || inf_index
== sizeof(inf_string
) - 1) {
72 write_float_with_length(out_str
.c_str(), to_conv
);
75 return MATCHING_FAILURE
;
79 static const char nan_string
[] = "nan";
82 if (internal::tolower(cur_char
) == nan_string
[0]) {
86 nan_index
< (sizeof(nan_string
) - 1) && out_str
.length() < max_width
&&
87 internal::tolower(cur_char
) == nan_string
[nan_index
];
89 if (!out_str
.append(cur_char
)) {
90 return ALLOCATION_FAILURE
;
92 cur_char
= reader
->getc();
95 if (nan_index
== sizeof(nan_string
) - 1) {
96 write_float_with_length(out_str
.c_str(), to_conv
);
99 return MATCHING_FAILURE
;
103 // Assume base of 10 by default but check if it is actually base 16.
106 // If the string starts with 0 it might be in hex.
107 if (cur_char
== '0') {
109 // Read the next character to check.
110 if (!out_str
.append(cur_char
)) {
111 return ALLOCATION_FAILURE
;
113 // If we've hit the end, then this is "0", which is valid.
114 if (out_str
.length() == max_width
) {
115 write_float_with_length(out_str
.c_str(), to_conv
);
118 cur_char
= reader
->getc();
121 // If that next character is an 'x' then this is a hexadecimal number.
122 if (internal::tolower(cur_char
) == 'x') {
125 if (!out_str
.append(cur_char
)) {
126 return ALLOCATION_FAILURE
;
128 // If we've hit the end here, we have "0x" which is a valid prefix to a
129 // floating point number, and will be evaluated to 0.
130 if (out_str
.length() == max_width
) {
131 write_float_with_length(out_str
.c_str(), to_conv
);
134 cur_char
= reader
->getc();
139 const char exponent_mark
= ((base
== 10) ? 'e' : 'p');
140 bool after_decimal
= false;
142 // The format for the remaining characters at this point is DD.DDe+/-DD for
143 // base 10 and XX.XXp+/-DD for base 16
145 // This handles the digits before and after the decimal point, but not the
147 while (out_str
.length() < max_width
) {
148 if (internal::isalnum(cur_char
) &&
149 internal::b36_char_to_int(cur_char
) < base
) {
151 if (!out_str
.append(cur_char
)) {
152 return ALLOCATION_FAILURE
;
154 cur_char
= reader
->getc();
155 } else if (cur_char
== DECIMAL_POINT
&& !after_decimal
) {
156 after_decimal
= true;
157 if (!out_str
.append(cur_char
)) {
158 return ALLOCATION_FAILURE
;
160 cur_char
= reader
->getc();
166 // Handle the exponent, which has an exponent mark, an optional sign, and
168 if (internal::tolower(cur_char
) == exponent_mark
) {
169 if (!out_str
.append(cur_char
)) {
170 return ALLOCATION_FAILURE
;
172 if (out_str
.length() == max_width
) {
173 // This is laid out in the standard as being a matching error (100e is not
174 // a valid float) but may conflict with existing implementations.
175 return MATCHING_FAILURE
;
177 cur_char
= reader
->getc();
180 if (cur_char
== '+' || cur_char
== '-') {
181 if (!out_str
.append(cur_char
)) {
182 return ALLOCATION_FAILURE
;
184 if (out_str
.length() == max_width
) {
185 return MATCHING_FAILURE
;
187 cur_char
= reader
->getc();
191 // It is specified by the standard that "100er" is a matching failure since
192 // the longest prefix of a possibly valid floating-point number (which is
193 // "100e") is not a valid floating-point number. If there is an exponent
194 // mark then there must be a digit after it else the number is not valid.
195 // Some implementations will roll back two characters (to just "100") and
196 // accept that since the prefix is not valid, and some will interpret an
197 // exponent mark followed by no digits as an additional exponent of 0
198 // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
199 // by the standard, but they may be used in real code, see Hyrum's law. This
200 // code follows the standard, but may be incompatible due to code expecting
202 if (!internal::isdigit(cur_char
)) {
203 return MATCHING_FAILURE
;
206 while (internal::isdigit(cur_char
) && out_str
.length() < max_width
) {
207 if (!out_str
.append(cur_char
)) {
208 return ALLOCATION_FAILURE
;
210 cur_char
= reader
->getc();
214 // We always read one more character than will be used, so we have to put the
216 reader
->ungetc(cur_char
);
218 // If we haven't actually found any digits, this is a matching failure (this
219 // catches cases like "+.")
221 return MATCHING_FAILURE
;
223 write_float_with_length(out_str
.c_str(), to_conv
);
228 } // namespace scanf_core
229 } // namespace LIBC_NAMESPACE_DECL