libc/src/__support/integer_to_string.h

   1 //===-- Utilities to convert integral values to string ----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Converts an integer to a string.
  10 //
  11 // By default, the string is written as decimal to an internal buffer and
  12 // accessed via the 'view' method.
  13 //
  14 //   IntegerToString<int> buffer(42);
  15 //   cpp::string_view view = buffer.view();
  16 //
  17 // The buffer is allocated on the stack and its size is so that the conversion
  18 // always succeeds.
  19 //
  20 // It is also possible to write the data to a preallocated buffer, but this may
  21 // fail.
  22 //
  23 //   char buffer[8];
  24 //   if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
  25 //     cpp::string_view view = *maybe_view;
  26 //   }
  27 //
  28 // The first template parameter is the type of the integer.
  29 // The second template parameter defines how the integer is formatted.
  30 // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
  31 // 'radix::Hex'.
  32 //
  33 // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
  34 // interpreted as a positive type but 'radix::Dec' will honor negative values.
  35 // e.g.,
  36 //
  37 //   IntegerToString<int8_t>(-1)             // "-1"
  38 //   IntegerToString<int8_t, radix::Dec>(-1) // "-1"
  39 //   IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
  40 //   IntegerToString<int8_t, radix::Oct>(-1) // "377"
  41 //   IntegerToString<int8_t, radix::Hex>(-1) // "ff"
  42 //
  43 // Additionnally, the format can be changed by navigating the subtypes:
  44 //  - WithPrefix    : Adds "0b", "0", "0x" for binary, octal and hexadecimal
  45 //  - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
  46 //  - Uppercase     : Use uppercase letters (only for HexString)
  47 //  - WithSign      : Prepend '+' for positive values (only for DecString)
  48 //
  49 // Examples
  50 // --------
  51 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0)     : "+00"
  52 //   IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1)    : "-01"
  53 //   IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255)   : "0xFF"
  54 //   IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
  55 //===----------------------------------------------------------------------===//
  56
  57 #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
  58 #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
  59
  60 #include <stdint.h>
  61
  62 #include "src/__support/CPP/algorithm.h" // max
  63 #include "src/__support/CPP/array.h"
  64 #include "src/__support/CPP/bit.h"
  65 #include "src/__support/CPP/limits.h"
  66 #include "src/__support/CPP/optional.h"
  67 #include "src/__support/CPP/span.h"
  68 #include "src/__support/CPP/string_view.h"
  69 #include "src/__support/CPP/type_traits.h"
  70 #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
  71 #include "src/__support/common.h"
  72 #include "src/__support/macros/config.h"
  73
  74 namespace LIBC_NAMESPACE_DECL {
  75
  76 namespace details {
  77
  78 template <uint8_t base, bool prefix = false, bool force_sign = false,
  79           bool is_uppercase = false, size_t min_digits = 1>
  80 struct Fmt {
  81   static constexpr uint8_t BASE = base;
  82   static constexpr size_t MIN_DIGITS = min_digits;
  83   static constexpr bool IS_UPPERCASE = is_uppercase;
  84   static constexpr bool PREFIX = prefix;
  85   static constexpr char FORCE_SIGN = force_sign;
  86
  87   using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>;
  88   using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>;
  89   using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>;
  90   template <size_t value>
  91   using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>;
  92
  93   // Invariants
  94   static constexpr uint8_t NUMERICAL_DIGITS = 10;
  95   static constexpr uint8_t ALPHA_DIGITS = 26;
  96   static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS;
  97   static_assert(BASE > 1 && BASE <= MAX_DIGIT);
  98   static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10");
  99   static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10");
 100   static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16),
 101                 "WithPrefix is only for radix == 2, 8 or 16");
 102 };
 103
 104 // Move this to a separate header since it might be useful elsewhere.
 105 template <bool forward> class StringBufferWriterImpl {
 106   cpp::span<char> buffer;
 107   size_t index = 0;
 108   bool out_of_range = false;
 109
 110   LIBC_INLINE size_t location() const {
 111     return forward ? index : buffer.size() - 1 - index;
 112   }
 113
 114 public:
 115   StringBufferWriterImpl(const StringBufferWriterImpl &) = delete;
 116   StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {}
 117
 118   LIBC_INLINE size_t size() const { return index; }
 119   LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); }
 120   LIBC_INLINE bool empty() const { return size() == 0; }
 121   LIBC_INLINE bool full() const { return size() == buffer.size(); }
 122   LIBC_INLINE bool ok() const { return !out_of_range; }
 123
 124   LIBC_INLINE StringBufferWriterImpl &push(char c) {
 125     if (ok()) {
 126       if (!full()) {
 127         buffer[location()] = c;
 128         ++index;
 129       } else {
 130         out_of_range = true;
 131       }
 132     }
 133     return *this;
 134   }
 135
 136   LIBC_INLINE cpp::span<char> remainder_span() const {
 137     return forward ? buffer.last(remainder_size())
 138                    : buffer.first(remainder_size());
 139   }
 140
 141   LIBC_INLINE cpp::span<char> buffer_span() const {
 142     return forward ? buffer.first(size()) : buffer.last(size());
 143   }
 144
 145   LIBC_INLINE cpp::string_view buffer_view() const {
 146     const auto s = buffer_span();
 147     return {s.data(), s.size()};
 148   }
 149 };
 150
 151 using StringBufferWriter = StringBufferWriterImpl<true>;
 152 using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
 153
 154 } // namespace details
 155
 156 namespace radix {
 157
 158 using Bin = details::Fmt<2>;
 159 using Oct = details::Fmt<8>;
 160 using Dec = details::Fmt<10>;
 161 using Hex = details::Fmt<16>;
 162 template <size_t radix> using Custom = details::Fmt<radix>;
 163
 164 } // namespace radix
 165
 166 // See file header for documentation.
 167 template <typename T, typename Fmt = radix::Dec> class IntegerToString {
 168   static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
 169
 170   LIBC_INLINE static constexpr size_t compute_buffer_size() {
 171     constexpr auto MAX_DIGITS = []() -> size_t {
 172       // We size the string buffer for base 10 using an approximation algorithm:
 173       //
 174       //   size = ceil(sizeof(T) * 5 / 2)
 175       //
 176       // If sizeof(T) is 1, then size is 3 (actually need 3)
 177       // If sizeof(T) is 2, then size is 5 (actually need 5)
 178       // If sizeof(T) is 4, then size is 10 (actually need 10)
 179       // If sizeof(T) is 8, then size is 20 (actually need 20)
 180       // If sizeof(T) is 16, then size is 40 (actually need 39)
 181       //
 182       // NOTE: The ceil operation is actually implemented as
 183       //     floor(((sizeof(T) * 5) + 1) / 2)
 184       // where floor operation is just integer division.
 185       //
 186       // This estimation grows slightly faster than the actual value, but the
 187       // overhead is small enough to tolerate.
 188       if constexpr (Fmt::BASE == 10)
 189         return ((sizeof(T) * 5) + 1) / 2;
 190       // For other bases, we approximate by rounding down to the nearest power
 191       // of two base, since the space needed is easy to calculate and it won't
 192       // overestimate by too much.
 193       constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t {
 194         size_t i = 0;
 195         for (; num > 1; num /= 2)
 196           ++i;
 197         return i;
 198       };
 199       constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE);
 200       return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
 201     };
 202     constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS);
 203     constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0;
 204     constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0;
 205     return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE;
 206   }
 207
 208   static constexpr size_t BUFFER_SIZE = compute_buffer_size();
 209   static_assert(BUFFER_SIZE > 0);
 210
 211   // An internal stateless structure that handles the number formatting logic.
 212   struct IntegerWriter {
 213     static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
 214     using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
 215
 216     LIBC_INLINE static char digit_char(uint8_t digit) {
 217       if (digit < 10)
 218         return '0' + static_cast<char>(digit);
 219       return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
 220     }
 221
 222     LIBC_INLINE static void
 223     write_unsigned_number(UNSIGNED_T value,
 224                           details::BackwardStringBufferWriter &sink) {
 225       for (; sink.ok() && value != 0; value /= Fmt::BASE) {
 226         const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE));
 227         sink.push(digit_char(digit));
 228       }
 229     }
 230
 231     // Returns the absolute value of 'value' as 'UNSIGNED_T'.
 232     LIBC_INLINE static UNSIGNED_T abs(T value) {
 233       if (cpp::is_unsigned_v<T> || value >= 0)
 234         return value; // already of the right sign.
 235
 236       // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
 237       // Thus negating the type's minimum value would overflow.
 238       // From C++20 on, signed types are guaranteed to be represented as 2's
 239       // complement. We take advantage of this representation and negate the
 240       // value by using the exact same bit representation, e.g.,
 241       // binary : 0b1000'0000
 242       // int8_t : -128
 243       // uint8_t:  128
 244
 245       // Note: the compiler can completely optimize out the two branches and
 246       // replace them by a simple negate instruction.
 247       // https://godbolt.org/z/hE7zahT9W
 248       if (value == cpp::numeric_limits<T>::min()) {
 249         return cpp::bit_cast<UNSIGNED_T>(value);
 250       } else {
 251         return -value; // legal and representable both as T and UNSIGNED_T.`
 252       }
 253     }
 254
 255     LIBC_INLINE static void write(T value,
 256                                   details::BackwardStringBufferWriter &sink) {
 257       if constexpr (Fmt::BASE == 10) {
 258         write_unsigned_number(abs(value), sink);
 259       } else {
 260         write_unsigned_number(static_cast<UNSIGNED_T>(value), sink);
 261       }
 262       // width
 263       while (sink.ok() && sink.size() < Fmt::MIN_DIGITS)
 264         sink.push('0');
 265       // sign
 266       if constexpr (Fmt::BASE == 10) {
 267         if (value < 0)
 268           sink.push('-');
 269         else if (Fmt::FORCE_SIGN)
 270           sink.push('+');
 271       }
 272       // prefix
 273       if constexpr (Fmt::PREFIX) {
 274         if constexpr (Fmt::BASE == 2) {
 275           sink.push('b');
 276           sink.push('0');
 277         }
 278         if constexpr (Fmt::BASE == 16) {
 279           sink.push('x');
 280           sink.push('0');
 281         }
 282         if constexpr (Fmt::BASE == 8) {
 283           const cpp::string_view written = sink.buffer_view();
 284           if (written.empty() || written.front() != '0')
 285             sink.push('0');
 286         }
 287       }
 288     }
 289   };
 290
 291   cpp::array<char, BUFFER_SIZE> array;
 292   size_t written = 0;
 293
 294 public:
 295   IntegerToString(const IntegerToString &) = delete;
 296   IntegerToString(T value) {
 297     details::BackwardStringBufferWriter writer(array);
 298     IntegerWriter::write(value, writer);
 299     written = writer.size();
 300   }
 301
 302   [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view>
 303   format_to(cpp::span<char> buffer, T value) {
 304     details::BackwardStringBufferWriter writer(buffer);
 305     IntegerWriter::write(value, writer);
 306     if (writer.ok())
 307       return cpp::string_view(buffer.data() + buffer.size() - writer.size(),
 308                               writer.size());
 309     return cpp::nullopt;
 310   }
 311
 312   LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; }
 313
 314   LIBC_INLINE size_t size() const { return written; }
 315   LIBC_INLINE cpp::string_view view() && = delete;
 316   LIBC_INLINE cpp::string_view view() const & {
 317     return cpp::string_view(array.data() + array.size() - size(), size());
 318   }
 319 };
 320
 321 } // namespace LIBC_NAMESPACE_DECL
 322
 323 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H