1 //===-- String to integer conversion utils ----------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // -----------------------------------------------------------------------------
11 // This file is shared with libc++. You should also be careful when adding
12 // dependencies to this file, since it needs to build for all libc++ targets.
13 // -----------------------------------------------------------------------------
15 #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
16 #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
18 #include "src/__support/CPP/limits.h"
19 #include "src/__support/CPP/type_traits.h"
20 #include "src/__support/CPP/type_traits/make_unsigned.h"
21 #include "src/__support/big_int.h"
22 #include "src/__support/common.h"
23 #include "src/__support/ctype_utils.h"
24 #include "src/__support/macros/config.h"
25 #include "src/__support/str_to_num_result.h"
26 #include "src/__support/uint128.h"
27 #include "src/errno/libc_errno.h" // For ERANGE
29 namespace LIBC_NAMESPACE_DECL
{
32 // Returns a pointer to the first character in src that is not a whitespace
33 // character (as determined by isspace())
34 // TODO: Change from returning a pointer to returning a length.
35 LIBC_INLINE
const char *
36 first_non_whitespace(const char *__restrict src
,
37 size_t src_len
= cpp::numeric_limits
<size_t>::max()) {
39 while (src_cur
< src_len
&& internal::isspace(src
[src_cur
])) {
45 LIBC_INLINE
int b36_char_to_int(char input
) {
49 return (input
| 32) + 10 - 'a';
53 // checks if the next 3 characters of the string pointer are the start of a
54 // hexadecimal number. Does not advance the string pointer.
56 is_hex_start(const char *__restrict src
,
57 size_t src_len
= cpp::numeric_limits
<size_t>::max()) {
60 return *src
== '0' && (*(src
+ 1) | 32) == 'x' && isalnum(*(src
+ 2)) &&
61 b36_char_to_int(*(src
+ 2)) < 16;
64 // Takes the address of the string pointer and parses the base from the start of
66 LIBC_INLINE
int infer_base(const char *__restrict src
, size_t src_len
) {
67 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
68 // sequence of the decimal digits and the letters a (or A) through f (or F)
69 // with values 10 through 15 respectively." (C standard 6.4.4.1)
70 if (is_hex_start(src
, src_len
))
72 // An octal number is defined as "the prefix 0 optionally followed by a
73 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
74 // number that starts with 0, including just 0, is an octal number.
75 if (src_len
> 0 && src
[0] == '0')
77 // A decimal number is defined as beginning "with a nonzero digit and
78 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
82 // -----------------------------------------------------------------------------
84 // This interface is shared with libc++, if you change this interface you need
85 // to update it in both libc and libc++.
86 // -----------------------------------------------------------------------------
87 // Takes a pointer to a string and the base to convert to. This function is used
88 // as the backend for all of the string to int functions.
90 LIBC_INLINE StrToNumResult
<T
>
91 strtointeger(const char *__restrict src
, int base
,
92 const size_t src_len
= cpp::numeric_limits
<size_t>::max()) {
93 using ResultType
= make_integral_or_big_int_unsigned_t
<T
>;
95 ResultType result
= 0;
97 bool is_number
= false;
104 if (base
< 0 || base
== 1 || base
> 36)
105 return {0, 0, EINVAL
};
107 src_cur
= first_non_whitespace(src
, src_len
) - src
;
109 char result_sign
= '+';
110 if (src
[src_cur
] == '+' || src
[src_cur
] == '-') {
111 result_sign
= src
[src_cur
];
116 base
= infer_base(src
+ src_cur
, src_len
- src_cur
);
118 if (base
== 16 && is_hex_start(src
+ src_cur
, src_len
- src_cur
))
119 src_cur
= src_cur
+ 2;
121 constexpr bool IS_UNSIGNED
= cpp::is_unsigned_v
<T
>;
122 const bool is_positive
= (result_sign
== '+');
124 ResultType
constexpr NEGATIVE_MAX
=
125 !IS_UNSIGNED
? static_cast<ResultType
>(cpp::numeric_limits
<T
>::max()) + 1
126 : cpp::numeric_limits
<T
>::max();
127 ResultType
const abs_max
=
128 (is_positive
? cpp::numeric_limits
<T
>::max() : NEGATIVE_MAX
);
129 ResultType
const abs_max_div_by_base
=
130 static_cast<ResultType
>(abs_max
/ base
);
132 while (src_cur
< src_len
&& isalnum(src
[src_cur
])) {
133 int cur_digit
= b36_char_to_int(src
[src_cur
]);
134 if (cur_digit
>= base
)
140 // If the number has already hit the maximum value for the current type then
141 // the result cannot change, but we still need to advance src to the end of
143 if (result
== abs_max
) {
148 if (result
> abs_max_div_by_base
) {
152 result
= static_cast<ResultType
>(result
* base
);
154 if (result
> abs_max
- cur_digit
) {
158 result
= static_cast<ResultType
>(result
+ cur_digit
);
162 ptrdiff_t str_len
= is_number
? (src_cur
) : 0;
164 if (error_val
== ERANGE
) {
165 if (is_positive
|| IS_UNSIGNED
)
166 return {cpp::numeric_limits
<T
>::max(), str_len
, error_val
};
167 else // T is signed and there is a negative overflow
168 return {cpp::numeric_limits
<T
>::min(), str_len
, error_val
};
171 return {static_cast<T
>(is_positive
? result
: -result
), str_len
, error_val
};
174 } // namespace internal
175 } // namespace LIBC_NAMESPACE_DECL
177 #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H