[libc] Use best-fit binary trie to make malloc logarithmic (#106259)
[llvm-project.git] / libc / src / stdio / printf_core / int_converter.h
blobf345e86b97a69116a47767637ca314c1748784a6
1 //===-- Integer Converter for printf ----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
10 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
12 #include "src/__support/CPP/span.h"
13 #include "src/__support/CPP/string_view.h"
14 #include "src/__support/integer_to_string.h"
15 #include "src/__support/macros/config.h"
16 #include "src/stdio/printf_core/converter_utils.h"
17 #include "src/stdio/printf_core/core_structs.h"
18 #include "src/stdio/printf_core/writer.h"
20 #include <inttypes.h>
21 #include <stddef.h>
23 namespace LIBC_NAMESPACE_DECL {
24 namespace printf_core {
26 // These functions only work on characters that are already known to be in the
27 // alphabet. Their behavior is undefined otherwise.
28 LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
29 LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; }
31 namespace details {
33 using HexFmt = IntegerToString<uintmax_t, radix::Hex>;
34 using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>;
35 using OctFmt = IntegerToString<uintmax_t, radix::Oct>;
36 using DecFmt = IntegerToString<uintmax_t>;
37 using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
39 LIBC_INLINE constexpr size_t num_buf_size() {
40 cpp::array<size_t, 5> sizes{
41 HexFmt::buffer_size(), HexFmtUppercase::buffer_size(),
42 OctFmt::buffer_size(), DecFmt::buffer_size(), BinFmt::buffer_size()};
44 auto result = sizes[0];
45 for (size_t i = 1; i < sizes.size(); i++)
46 result = cpp::max(result, sizes[i]);
47 return result;
50 LIBC_INLINE cpp::optional<cpp::string_view>
51 num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
52 if (to_lower(conv_name) == 'x') {
53 if (is_lower(conv_name))
54 return HexFmt::format_to(bufref, num);
55 else
56 return HexFmtUppercase::format_to(bufref, num);
57 } else if (conv_name == 'o') {
58 return OctFmt::format_to(bufref, num);
59 } else if (to_lower(conv_name) == 'b') {
60 return BinFmt::format_to(bufref, num);
61 } else {
62 return DecFmt::format_to(bufref, num);
66 } // namespace details
68 LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
69 static constexpr size_t BITS_IN_BYTE = 8;
70 static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
72 uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
73 bool is_negative = false;
74 FormatFlags flags = to_conv.flags;
75 const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
77 // If the conversion is signed, then handle negative values.
78 if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
79 // Check if the number is negative by checking the high bit. This works even
80 // for smaller numbers because they're sign extended by default.
81 if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
82 is_negative = true;
83 num = -num;
85 } else {
86 // These flags are only for signed conversions, so this removes them if the
87 // conversion is unsigned.
88 flags = FormatFlags(flags &
89 ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
92 num =
93 apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
94 cpp::array<char, details::num_buf_size()> buf;
95 auto str = details::num_to_strview(num, buf, to_conv.conv_name);
96 if (!str)
97 return INT_CONVERSION_ERROR;
99 size_t digits_written = str->size();
101 char sign_char = 0;
103 if (is_negative)
104 sign_char = '-';
105 else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
106 sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
107 else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
108 sign_char = ' ';
110 // These are signed to prevent underflow due to negative values. The eventual
111 // values will always be non-negative.
112 int zeroes;
113 int spaces;
115 // prefix is "0x" for hexadecimal, or the sign character for signed
116 // conversions. Since hexadecimal is unsigned these will never conflict.
117 size_t prefix_len;
118 char prefix[2];
119 if ((to_lower(to_conv.conv_name) == 'x') &&
120 ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
121 prefix_len = 2;
122 prefix[0] = '0';
123 prefix[1] = a + ('x' - 'a');
124 } else if ((to_lower(to_conv.conv_name) == 'b') &&
125 ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
126 prefix_len = 2;
127 prefix[0] = '0';
128 prefix[1] = a + ('b' - 'a');
129 } else {
130 prefix_len = (sign_char == 0 ? 0 : 1);
131 prefix[0] = sign_char;
134 // Negative precision indicates that it was not specified.
135 if (to_conv.precision < 0) {
136 if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
137 FormatFlags::LEADING_ZEROES) {
138 // If this conv has flag 0 but not - and no specified precision, it's
139 // padded with 0's instead of spaces identically to if precision =
140 // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
141 zeroes =
142 static_cast<int>(to_conv.min_width - digits_written - prefix_len);
143 spaces = 0;
144 } else {
145 // If there are enough digits to pass over the precision, just write the
146 // number, padded by spaces.
147 zeroes = 0;
148 spaces =
149 static_cast<int>(to_conv.min_width - digits_written - prefix_len);
151 } else {
152 // If precision was specified, possibly write zeroes, and possibly write
153 // spaces. Example: ("%5.4d", 10000) -> "10000"
154 // If the check for if zeroes is negative was not there, spaces would be
155 // incorrectly evaluated as 1.
157 // The standard treats the case when num and precision are both zeroes as
158 // special - it requires that no characters are produced. So, we adjust for
159 // that special case first.
160 if (num == 0 && to_conv.precision == 0)
161 digits_written = 0;
162 zeroes = static_cast<int>(to_conv.precision -
163 digits_written); // a negative value means 0
164 if (zeroes < 0)
165 zeroes = 0;
166 spaces = static_cast<int>(to_conv.min_width - zeroes - digits_written -
167 prefix_len);
170 // The standard says that alternate form for the o conversion "increases
171 // the precision, if and only if necessary, to force the first digit of the
172 // result to be a zero (if the value and precision are both 0, a single 0 is
173 // printed)"
174 // This if checks the following conditions:
175 // 1) is this an o conversion in alternate form?
176 // 2) does this number has a leading zero?
177 // 2a) ... because there are additional leading zeroes?
178 // 2b) ... because it is just "0", unless it will not write any digits.
179 const bool has_leading_zero =
180 (zeroes > 0) || ((num == 0) && (digits_written != 0));
181 if ((to_conv.conv_name == 'o') &&
182 ((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0) &&
183 !has_leading_zero) {
184 zeroes = 1;
185 --spaces;
188 if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
189 // If left justified it goes prefix zeroes digits spaces
190 if (prefix_len != 0)
191 RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len}));
192 if (zeroes > 0)
193 RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes));
194 if (digits_written > 0)
195 RET_IF_RESULT_NEGATIVE(writer->write(*str));
196 if (spaces > 0)
197 RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces));
198 } else {
199 // Else it goes spaces prefix zeroes digits
200 if (spaces > 0)
201 RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces));
202 if (prefix_len != 0)
203 RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len}));
204 if (zeroes > 0)
205 RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes));
206 if (digits_written > 0)
207 RET_IF_RESULT_NEGATIVE(writer->write(*str));
209 return WRITE_OK;
212 } // namespace printf_core
213 } // namespace LIBC_NAMESPACE_DECL
215 #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H