1 //===-- Utilities to convert integral values to string ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Converts an integer to a string.
11 // By default, the string is written as decimal to an internal buffer and
12 // accessed via the 'view' method.
14 // IntegerToString<int> buffer(42);
15 // cpp::string_view view = buffer.view();
17 // The buffer is allocated on the stack and its size is so that the conversion
20 // It is also possible to write the data to a preallocated buffer, but this may
24 // if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
25 // cpp::string_view view = *maybe_view;
28 // The first template parameter is the type of the integer.
29 // The second template parameter defines how the integer is formatted.
30 // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
33 // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
34 // interpreted as a positive type but 'radix::Dec' will honor negative values.
37 // IntegerToString<int8_t>(-1) // "-1"
38 // IntegerToString<int8_t, radix::Dec>(-1) // "-1"
39 // IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
40 // IntegerToString<int8_t, radix::Oct>(-1) // "377"
41 // IntegerToString<int8_t, radix::Hex>(-1) // "ff"
43 // Additionnally, the format can be changed by navigating the subtypes:
44 // - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal
45 // - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
46 // - Uppercase : Use uppercase letters (only for HexString)
47 // - WithSign : Prepend '+' for positive values (only for DecString)
51 // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0) : "+00"
52 // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1) : "-01"
53 // IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255) : "0xFF"
54 // IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
55 //===----------------------------------------------------------------------===//
57 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
58 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
62 #include "src/__support/CPP/algorithm.h" // max
63 #include "src/__support/CPP/array.h"
64 #include "src/__support/CPP/bit.h"
65 #include "src/__support/CPP/limits.h"
66 #include "src/__support/CPP/optional.h"
67 #include "src/__support/CPP/span.h"
68 #include "src/__support/CPP/string_view.h"
69 #include "src/__support/CPP/type_traits.h"
70 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
71 #include "src/__support/common.h"
72 #include "src/__support/macros/config.h"
74 namespace LIBC_NAMESPACE_DECL
{
78 template <uint8_t base
, bool prefix
= false, bool force_sign
= false,
79 bool is_uppercase
= false, size_t min_digits
= 1>
81 static constexpr uint8_t BASE
= base
;
82 static constexpr size_t MIN_DIGITS
= min_digits
;
83 static constexpr bool IS_UPPERCASE
= is_uppercase
;
84 static constexpr bool PREFIX
= prefix
;
85 static constexpr char FORCE_SIGN
= force_sign
;
87 using WithPrefix
= Fmt
<BASE
, true, FORCE_SIGN
, IS_UPPERCASE
, MIN_DIGITS
>;
88 using WithSign
= Fmt
<BASE
, PREFIX
, true, IS_UPPERCASE
, MIN_DIGITS
>;
89 using Uppercase
= Fmt
<BASE
, PREFIX
, FORCE_SIGN
, true, MIN_DIGITS
>;
90 template <size_t value
>
91 using WithWidth
= Fmt
<BASE
, PREFIX
, FORCE_SIGN
, IS_UPPERCASE
, value
>;
94 static constexpr uint8_t NUMERICAL_DIGITS
= 10;
95 static constexpr uint8_t ALPHA_DIGITS
= 26;
96 static constexpr uint8_t MAX_DIGIT
= NUMERICAL_DIGITS
+ ALPHA_DIGITS
;
97 static_assert(BASE
> 1 && BASE
<= MAX_DIGIT
);
98 static_assert(!IS_UPPERCASE
|| BASE
> 10, "Uppercase is only for radix > 10");
99 static_assert(!FORCE_SIGN
|| BASE
== 10, "WithSign is only for radix == 10");
100 static_assert(!PREFIX
|| (BASE
== 2 || BASE
== 8 || BASE
== 16),
101 "WithPrefix is only for radix == 2, 8 or 16");
104 // Move this to a separate header since it might be useful elsewhere.
105 template <bool forward
> class StringBufferWriterImpl
{
106 cpp::span
<char> buffer
;
108 bool out_of_range
= false;
110 LIBC_INLINE
size_t location() const {
111 return forward
? index
: buffer
.size() - 1 - index
;
115 StringBufferWriterImpl(const StringBufferWriterImpl
&) = delete;
116 StringBufferWriterImpl(cpp::span
<char> buffer
) : buffer(buffer
) {}
118 LIBC_INLINE
size_t size() const { return index
; }
119 LIBC_INLINE
size_t remainder_size() const { return buffer
.size() - size(); }
120 LIBC_INLINE
bool empty() const { return size() == 0; }
121 LIBC_INLINE
bool full() const { return size() == buffer
.size(); }
122 LIBC_INLINE
bool ok() const { return !out_of_range
; }
124 LIBC_INLINE StringBufferWriterImpl
&push(char c
) {
127 buffer
[location()] = c
;
136 LIBC_INLINE
cpp::span
<char> remainder_span() const {
137 return forward
? buffer
.last(remainder_size())
138 : buffer
.first(remainder_size());
141 LIBC_INLINE
cpp::span
<char> buffer_span() const {
142 return forward
? buffer
.first(size()) : buffer
.last(size());
145 LIBC_INLINE
cpp::string_view
buffer_view() const {
146 const auto s
= buffer_span();
147 return {s
.data(), s
.size()};
151 using StringBufferWriter
= StringBufferWriterImpl
<true>;
152 using BackwardStringBufferWriter
= StringBufferWriterImpl
<false>;
154 } // namespace details
158 using Bin
= details::Fmt
<2>;
159 using Oct
= details::Fmt
<8>;
160 using Dec
= details::Fmt
<10>;
161 using Hex
= details::Fmt
<16>;
162 template <size_t radix
> using Custom
= details::Fmt
<radix
>;
166 // See file header for documentation.
167 template <typename T
, typename Fmt
= radix::Dec
> class IntegerToString
{
168 static_assert(cpp::is_integral_v
<T
> || is_big_int_v
<T
>);
170 LIBC_INLINE
static constexpr size_t compute_buffer_size() {
171 constexpr auto MAX_DIGITS
= []() -> size_t {
172 // We size the string buffer for base 10 using an approximation algorithm:
174 // size = ceil(sizeof(T) * 5 / 2)
176 // If sizeof(T) is 1, then size is 3 (actually need 3)
177 // If sizeof(T) is 2, then size is 5 (actually need 5)
178 // If sizeof(T) is 4, then size is 10 (actually need 10)
179 // If sizeof(T) is 8, then size is 20 (actually need 20)
180 // If sizeof(T) is 16, then size is 40 (actually need 39)
182 // NOTE: The ceil operation is actually implemented as
183 // floor(((sizeof(T) * 5) + 1) / 2)
184 // where floor operation is just integer division.
186 // This estimation grows slightly faster than the actual value, but the
187 // overhead is small enough to tolerate.
188 if constexpr (Fmt::BASE
== 10)
189 return ((sizeof(T
) * 5) + 1) / 2;
190 // For other bases, we approximate by rounding down to the nearest power
191 // of two base, since the space needed is easy to calculate and it won't
192 // overestimate by too much.
193 constexpr auto FLOOR_LOG_2
= [](size_t num
) -> size_t {
195 for (; num
> 1; num
/= 2)
199 constexpr size_t BITS_PER_DIGIT
= FLOOR_LOG_2(Fmt::BASE
);
200 return ((sizeof(T
) * 8 + (BITS_PER_DIGIT
- 1)) / BITS_PER_DIGIT
);
202 constexpr size_t DIGIT_SIZE
= cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS
);
203 constexpr size_t SIGN_SIZE
= Fmt::BASE
== 10 ? 1 : 0;
204 constexpr size_t PREFIX_SIZE
= Fmt::PREFIX
? 2 : 0;
205 return DIGIT_SIZE
+ SIGN_SIZE
+ PREFIX_SIZE
;
208 static constexpr size_t BUFFER_SIZE
= compute_buffer_size();
209 static_assert(BUFFER_SIZE
> 0);
211 // An internal stateless structure that handles the number formatting logic.
212 struct IntegerWriter
{
213 static_assert(cpp::is_integral_v
<T
> || is_big_int_v
<T
>);
214 using UNSIGNED_T
= make_integral_or_big_int_unsigned_t
<T
>;
216 LIBC_INLINE
static char digit_char(uint8_t digit
) {
218 return '0' + static_cast<char>(digit
);
219 return (Fmt::IS_UPPERCASE
? 'A' : 'a') + static_cast<char>(digit
- 10);
222 LIBC_INLINE
static void
223 write_unsigned_number(UNSIGNED_T value
,
224 details::BackwardStringBufferWriter
&sink
) {
225 for (; sink
.ok() && value
!= 0; value
/= Fmt::BASE
) {
226 const uint8_t digit(static_cast<uint8_t>(value
% Fmt::BASE
));
227 sink
.push(digit_char(digit
));
231 // Returns the absolute value of 'value' as 'UNSIGNED_T'.
232 LIBC_INLINE
static UNSIGNED_T
abs(T value
) {
233 if (cpp::is_unsigned_v
<T
> || value
>= 0)
234 return value
; // already of the right sign.
236 // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
237 // Thus negating the type's minimum value would overflow.
238 // From C++20 on, signed types are guaranteed to be represented as 2's
239 // complement. We take advantage of this representation and negate the
240 // value by using the exact same bit representation, e.g.,
241 // binary : 0b1000'0000
245 // Note: the compiler can completely optimize out the two branches and
246 // replace them by a simple negate instruction.
247 // https://godbolt.org/z/hE7zahT9W
248 if (value
== cpp::numeric_limits
<T
>::min()) {
249 return cpp::bit_cast
<UNSIGNED_T
>(value
);
251 return -value
; // legal and representable both as T and UNSIGNED_T.`
255 LIBC_INLINE
static void write(T value
,
256 details::BackwardStringBufferWriter
&sink
) {
257 if constexpr (Fmt::BASE
== 10) {
258 write_unsigned_number(abs(value
), sink
);
260 write_unsigned_number(static_cast<UNSIGNED_T
>(value
), sink
);
263 while (sink
.ok() && sink
.size() < Fmt::MIN_DIGITS
)
266 if constexpr (Fmt::BASE
== 10) {
269 else if (Fmt::FORCE_SIGN
)
273 if constexpr (Fmt::PREFIX
) {
274 if constexpr (Fmt::BASE
== 2) {
278 if constexpr (Fmt::BASE
== 16) {
282 if constexpr (Fmt::BASE
== 8) {
283 const cpp::string_view written
= sink
.buffer_view();
284 if (written
.empty() || written
.front() != '0')
291 cpp::array
<char, BUFFER_SIZE
> array
;
295 IntegerToString(const IntegerToString
&) = delete;
296 IntegerToString(T value
) {
297 details::BackwardStringBufferWriter
writer(array
);
298 IntegerWriter::write(value
, writer
);
299 written
= writer
.size();
302 [[nodiscard
]] LIBC_INLINE
static cpp::optional
<cpp::string_view
>
303 format_to(cpp::span
<char> buffer
, T value
) {
304 details::BackwardStringBufferWriter
writer(buffer
);
305 IntegerWriter::write(value
, writer
);
307 return cpp::string_view(buffer
.data() + buffer
.size() - writer
.size(),
312 LIBC_INLINE
static constexpr size_t buffer_size() { return BUFFER_SIZE
; }
314 LIBC_INLINE
size_t size() const { return written
; }
315 LIBC_INLINE
cpp::string_view
view() && = delete;
316 LIBC_INLINE
cpp::string_view
view() const & {
317 return cpp::string_view(array
.data() + array
.size() - size(), size());
321 } // namespace LIBC_NAMESPACE_DECL
323 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H