libc/src/__support/integer_to_string.h

   1 //===-- Utilities to convert integral values to string ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Converts an integer to a string.
  10 //
  11 // By default, the string is written as decimal to an internal buffer and
  12 // accessed via the 'view' method.
  13 //
  14 //   IntegerToString<int> buffer(42);
  15 //   cpp::string_view view = buffer.view();
  16 //
  17 // The buffer is allocated on the stack and its size is so that the conversion
  18 // always succeeds.
  19 //
  20 // It is also possible to write the data to a preallocated buffer, but this may
  21 // fail.
  22 //
  23 //   char buffer[8];
  24 //   if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
  25 //     cpp::string_view view = *maybe_view;
  26 //   }
  27 //
  28 // The first template parameter is the type of the integer.
  29 // The second template parameter defines how the integer is formatted.
  30 // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
  31 // 'radix::Hex'.
  32 //
  33 // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
  34 // interpreted as a positive type but 'radix::Dec' will honor negative values.
  35 // e.g.,
  36 //
  37 //   IntegerToString<int8_t>(-1)             // "-1"
  38 //   IntegerToString<int8_t, radix::Dec>(-1) // "-1"
  39 //   IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
  40 //   IntegerToString<int8_t, radix::Oct>(-1) // "377"
  41 //   IntegerToString<int8_t, radix::Hex>(-1) // "ff"
  42 //
  43 // Additionnally, the format can be changed by navigating the subtypes:
  44 //  - WithPrefix    : Adds "0b", "0", "0x" for binary, octal and hexadecimal
  45 //  - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
  46 //  - Uppercase     : Use uppercase letters (only for HexString)
  47 //  - WithSign      : Prepend '+' for positive values (only for DecString)
  48 //
  49 // Examples
  50 // --------
  51 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0)     : "+00"
  52 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1)    : "-01"
  53 //   IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255)   : "0xFF"
  54 //   IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
  55 //===----------------------------------------------------------------------===//
  56
  57 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
  58 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
  59
  60 #include <stdint.h>
  61
  62 #include "src/__support/CPP/algorithm.h" // max
  63 #include "src/__support/CPP/array.h"
  64 #include "src/__support/CPP/bit.h"
  65 #include "src/__support/CPP/limits.h"
  66 #include "src/__support/CPP/optional.h"
  67 #include "src/__support/CPP/span.h"
  68 #include "src/__support/CPP/string_view.h"
  69 #include "src/__support/CPP/type_traits.h"
  70 #include "src/__support/common.h"
  71
  72 namespace LIBC_NAMESPACE {
  73
  74 namespace details {
  75
  76 template <uint8_t base, bool prefix = false, bool force_sign = false,
  77           bool is_uppercase = false, size_t min_digits = 1>
  78 struct Fmt {
  79   static constexpr uint8_t BASE = base;
  80   static constexpr size_t MIN_DIGITS = min_digits;
  81   static constexpr bool IS_UPPERCASE = is_uppercase;
  82   static constexpr bool PREFIX = prefix;
  83   static constexpr char FORCE_SIGN = force_sign;
  84
  85   using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>;
  86   using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>;
  87   using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>;
  88   template <size_t value>
  89   using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>;
  90
  91   // Invariants
  92   static constexpr uint8_t NUMERICAL_DIGITS = 10;
  93   static constexpr uint8_t ALPHA_DIGITS = 26;
  94   static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS;
  95   static_assert(BASE > 1 && BASE <= MAX_DIGIT);
  96   static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10");
  97   static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10");
  98   static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16),
  99                 "WithPrefix is only for radix == 2, 8 or 16");
 100 };
 101
 102 // Move this to a separate header since it might be useful elsewhere.
 103 template <bool forward> class StringBufferWriterImpl {
 104   cpp::span<char> buffer;
 105   size_t index = 0;
 106   bool out_of_range = false;
 107
 108   LIBC_INLINE size_t location() const {
 109     return forward ? index : buffer.size() - 1 - index;
 110   }
 111
 112 public:
 113   StringBufferWriterImpl(const StringBufferWriterImpl &) = delete;
 114   StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {}
 115
 116   LIBC_INLINE size_t size() const { return index; }
 117   LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); }
 118   LIBC_INLINE bool empty() const { return size() == 0; }
 119   LIBC_INLINE bool full() const { return size() == buffer.size(); }
 120   LIBC_INLINE bool ok() const { return !out_of_range; }
 121
 122   LIBC_INLINE StringBufferWriterImpl &push(char c) {
 123     if (ok()) {
 124       if (!full()) {
 125         buffer[location()] = c;
 126         ++index;
 127       } else {
 128         out_of_range = true;
 129       }
 130     }
 131     return *this;
 132   }
 133
 134   LIBC_INLINE cpp::span<char> remainder_span() const {
 135     return forward ? buffer.last(remainder_size())
 136                    : buffer.first(remainder_size());
 137   }
 138
 139   LIBC_INLINE cpp::span<char> buffer_span() const {
 140     return forward ? buffer.first(size()) : buffer.last(size());
 141   }
 142
 143   LIBC_INLINE cpp::string_view buffer_view() const {
 144     const auto s = buffer_span();
 145     return {s.data(), s.size()};
 146   }
 147 };
 148
 149 using StringBufferWriter = StringBufferWriterImpl<true>;
 150 using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
 151
 152 } // namespace details
 153
 154 namespace radix {
 155
 156 using Bin = details::Fmt<2>;
 157 using Oct = details::Fmt<8>;
 158 using Dec = details::Fmt<10>;
 159 using Hex = details::Fmt<16>;
 160 template <size_t radix> using Custom = details::Fmt<radix>;
 161
 162 } // namespace radix
 163
 164 // See file header for documentation.
 165 template <typename T, typename Fmt = radix::Dec> class IntegerToString {
 166   static_assert(cpp::is_integral_v<T>);
 167
 168   LIBC_INLINE static constexpr size_t compute_buffer_size() {
 169     constexpr auto max_digits = []() -> size_t {
 170       // We size the string buffer for base 10 using an approximation algorithm:
 171       //
 172       //   size = ceil(sizeof(T) * 5 / 2)
 173       //
 174       // If sizeof(T) is 1, then size is 3 (actually need 3)
 175       // If sizeof(T) is 2, then size is 5 (actually need 5)
 176       // If sizeof(T) is 4, then size is 10 (actually need 10)
 177       // If sizeof(T) is 8, then size is 20 (actually need 20)
 178       // If sizeof(T) is 16, then size is 40 (actually need 39)
 179       //
 180       // NOTE: The ceil operation is actually implemented as
 181       //     floor(((sizeof(T) * 5) + 1) / 2)
 182       // where floor operation is just integer division.
 183       //
 184       // This estimation grows slightly faster than the actual value, but the
 185       // overhead is small enough to tolerate.
 186       if constexpr (Fmt::BASE == 10)
 187         return ((sizeof(T) * 5) + 1) / 2;
 188       // For other bases, we approximate by rounding down to the nearest power
 189       // of two base, since the space needed is easy to calculate and it won't
 190       // overestimate by too much.
 191       constexpr auto floor_log_2 = [](size_t num) -> size_t {
 192         size_t i = 0;
 193         for (; num > 1; num /= 2)
 194           ++i;
 195         return i;
 196       };
 197       constexpr size_t BITS_PER_DIGIT = floor_log_2(Fmt::BASE);
 198       return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
 199     };
 200     constexpr size_t digit_size = cpp::max(max_digits(), Fmt::MIN_DIGITS);
 201     constexpr size_t sign_size = Fmt::BASE == 10 ? 1 : 0;
 202     constexpr size_t prefix_size = Fmt::PREFIX ? 2 : 0;
 203     return digit_size + sign_size + prefix_size;
 204   }
 205
 206   static constexpr size_t BUFFER_SIZE = compute_buffer_size();
 207   static_assert(BUFFER_SIZE > 0);
 208
 209   // An internal stateless structure that handles the number formatting logic.
 210   struct IntegerWriter {
 211     static_assert(cpp::is_integral_v<T>);
 212     using UNSIGNED_T = cpp::make_unsigned_t<T>;
 213
 214     LIBC_INLINE static char digit_char(uint8_t digit) {
 215       if (digit < 10)
 216         return '0' + static_cast<char>(digit);
 217       return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
 218     }
 219
 220     LIBC_INLINE static void
 221     write_unsigned_number(UNSIGNED_T value,
 222                           details::BackwardStringBufferWriter &sink) {
 223       for (; sink.ok() && value != 0; value /= Fmt::BASE) {
 224         const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE));
 225         sink.push(digit_char(digit));
 226       }
 227     }
 228
 229     // Returns the absolute value of 'value' as 'UNSIGNED_T'.
 230     LIBC_INLINE static UNSIGNED_T abs(T value) {
 231       if (cpp::is_unsigned_v<T> || value >= 0)
 232         return value; // already of the right sign.
 233
 234       // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
 235       // Thus negating the type's minimum value would overflow.
 236       // From C++20 on, signed types are guaranteed to be represented as 2's
 237       // complement. We take advantage of this representation and negate the
 238       // value by using the exact same bit representation, e.g.,
 239       // binary : 0b1000'0000
 240       // int8_t : -128
 241       // uint8_t:  128
 242
 243       // Note: the compiler can completely optimize out the two branches and
 244       // replace them by a simple negate instruction.
 245       // https://godbolt.org/z/hE7zahT9W
 246       if (value == cpp::numeric_limits<T>::min()) {
 247         return cpp::bit_cast<UNSIGNED_T>(value);
 248       } else {
 249         return -value; // legal and representable both as T and UNSIGNED_T.`
 250       }
 251     }
 252
 253     LIBC_INLINE static void write(T value,
 254                                   details::BackwardStringBufferWriter &sink) {
 255       if constexpr (Fmt::BASE == 10) {
 256         write_unsigned_number(abs(value), sink);
 257       } else {
 258         write_unsigned_number(cpp::bit_cast<UNSIGNED_T>(value), sink);
 259       }
 260       // width
 261       while (sink.ok() && sink.size() < Fmt::MIN_DIGITS)
 262         sink.push('0');
 263       // sign
 264       if constexpr (Fmt::BASE == 10) {
 265         if (value < 0)
 266           sink.push('-');
 267         else if (Fmt::FORCE_SIGN)
 268           sink.push('+');
 269       }
 270       // prefix
 271       if constexpr (Fmt::PREFIX) {
 272         if constexpr (Fmt::BASE == 2) {
 273           sink.push('b');
 274           sink.push('0');
 275         }
 276         if constexpr (Fmt::BASE == 16) {
 277           sink.push('x');
 278           sink.push('0');
 279         }
 280         if constexpr (Fmt::BASE == 8) {
 281           const cpp::string_view written = sink.buffer_view();
 282           if (written.empty() || written.front() != '0')
 283             sink.push('0');
 284         }
 285       }
 286     }
 287   };
 288
 289   cpp::array<char, BUFFER_SIZE> array;
 290   size_t written = 0;
 291
 292 public:
 293   IntegerToString(const IntegerToString &) = delete;
 294   IntegerToString(T value) {
 295     details::BackwardStringBufferWriter writer(array);
 296     IntegerWriter::write(value, writer);
 297     written = writer.size();
 298   }
 299
 300   [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view>
 301   format_to(cpp::span<char> buffer, T value) {
 302     details::BackwardStringBufferWriter writer(buffer);
 303     IntegerWriter::write(value, writer);
 304     if (writer.ok())
 305       return cpp::string_view(buffer.data() + buffer.size() - writer.size(),
 306                               writer.size());
 307     return cpp::nullopt;
 308   }
 309
 310   LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; }
 311
 312   LIBC_INLINE size_t size() const { return written; }
 313   LIBC_INLINE cpp::string_view view() && = delete;
 314   LIBC_INLINE cpp::string_view view() const & {
 315     return cpp::string_view(array.data() + array.size() - size(), size());
 316   }
 317 };
 318
 319 } // namespace LIBC_NAMESPACE
 320
 321 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H