libc/src/stdio/printf_core/int_converter.h

   1 //===-- Integer Converter for printf ----------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
  10 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
  11
  12 #include "src/__support/CPP/span.h"
  13 #include "src/__support/CPP/string_view.h"
  14 #include "src/__support/integer_to_string.h"
  15 #include "src/__support/macros/config.h"
  16 #include "src/stdio/printf_core/converter_utils.h"
  17 #include "src/stdio/printf_core/core_structs.h"
  18 #include "src/stdio/printf_core/writer.h"
  19
  20 #include <inttypes.h>
  21 #include <stddef.h>
  22
  23 namespace LIBC_NAMESPACE_DECL {
  24 namespace printf_core {
  25
  26 // These functions only work on characters that are already known to be in the
  27 // alphabet. Their behavior is undefined otherwise.
  28 LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
  29 LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; }
  30
  31 namespace details {
  32
  33 using HexFmt = IntegerToString<uintmax_t, radix::Hex>;
  34 using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>;
  35 using OctFmt = IntegerToString<uintmax_t, radix::Oct>;
  36 using DecFmt = IntegerToString<uintmax_t>;
  37 using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
  38
  39 LIBC_INLINE constexpr size_t num_buf_size() {
  40   cpp::array<size_t, 5> sizes{
  41       HexFmt::buffer_size(), HexFmtUppercase::buffer_size(),
  42       OctFmt::buffer_size(), DecFmt::buffer_size(), BinFmt::buffer_size()};
  43
  44   auto result = sizes[0];
  45   for (size_t i = 1; i < sizes.size(); i++)
  46     result = cpp::max(result, sizes[i]);
  47   return result;
  48 }
  49
  50 LIBC_INLINE cpp::optional<cpp::string_view>
  51 num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
  52   if (to_lower(conv_name) == 'x') {
  53     if (is_lower(conv_name))
  54       return HexFmt::format_to(bufref, num);
  55     else
  56       return HexFmtUppercase::format_to(bufref, num);
  57   } else if (conv_name == 'o') {
  58     return OctFmt::format_to(bufref, num);
  59   } else if (to_lower(conv_name) == 'b') {
  60     return BinFmt::format_to(bufref, num);
  61   } else {
  62     return DecFmt::format_to(bufref, num);
  63   }
  64 }
  65
  66 } // namespace details
  67
  68 LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
  69   static constexpr size_t BITS_IN_BYTE = 8;
  70   static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
  71
  72   uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
  73   bool is_negative = false;
  74   FormatFlags flags = to_conv.flags;
  75   const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
  76
  77   // If the conversion is signed, then handle negative values.
  78   if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
  79     // Check if the number is negative by checking the high bit. This works even
  80     // for smaller numbers because they're sign extended by default.
  81     if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
  82       is_negative = true;
  83       num = -num;
  84     }
  85   } else {
  86     // These flags are only for signed conversions, so this removes them if the
  87     // conversion is unsigned.
  88     flags = FormatFlags(flags &
  89                         ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
  90   }
  91
  92   num =
  93       apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
  94   cpp::array<char, details::num_buf_size()> buf;
  95   auto str = details::num_to_strview(num, buf, to_conv.conv_name);
  96   if (!str)
  97     return INT_CONVERSION_ERROR;
  98
  99   size_t digits_written = str->size();
 100
 101   char sign_char = 0;
 102
 103   if (is_negative)
 104     sign_char = '-';
 105   else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
 106     sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
 107   else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
 108     sign_char = ' ';
 109
 110   // These are signed to prevent underflow due to negative values. The eventual
 111   // values will always be non-negative.
 112   int zeroes;
 113   int spaces;
 114
 115   // prefix is "0x" for hexadecimal, or the sign character for signed
 116   // conversions. Since hexadecimal is unsigned these will never conflict.
 117   size_t prefix_len;
 118   char prefix[2];
 119   if ((to_lower(to_conv.conv_name) == 'x') &&
 120       ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
 121     prefix_len = 2;
 122     prefix[0] = '0';
 123     prefix[1] = a + ('x' - 'a');
 124   } else if ((to_lower(to_conv.conv_name) == 'b') &&
 125              ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
 126     prefix_len = 2;
 127     prefix[0] = '0';
 128     prefix[1] = a + ('b' - 'a');
 129   } else {
 130     prefix_len = (sign_char == 0 ? 0 : 1);
 131     prefix[0] = sign_char;
 132   }
 133
 134   // Negative precision indicates that it was not specified.
 135   if (to_conv.precision < 0) {
 136     if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
 137         FormatFlags::LEADING_ZEROES) {
 138       // If this conv has flag 0 but not - and no specified precision, it's
 139       // padded with 0's instead of spaces identically to if precision =
 140       // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
 141       zeroes =
 142           static_cast<int>(to_conv.min_width - digits_written - prefix_len);
 143       spaces = 0;
 144     } else {
 145       // If there are enough digits to pass over the precision, just write the
 146       // number, padded by spaces.
 147       zeroes = 0;
 148       spaces =
 149           static_cast<int>(to_conv.min_width - digits_written - prefix_len);
 150     }
 151   } else {
 152     // If precision was specified, possibly write zeroes, and possibly write
 153     // spaces. Example: ("%5.4d", 10000) -> "10000"
 154     // If the check for if zeroes is negative was not there, spaces would be
 155     // incorrectly evaluated as 1.
 156     //
 157     // The standard treats the case when num and precision are both zeroes as
 158     // special - it requires that no characters are produced. So, we adjust for
 159     // that special case first.
 160     if (num == 0 && to_conv.precision == 0)
 161       digits_written = 0;
 162     zeroes = static_cast<int>(to_conv.precision -
 163                               digits_written); // a negative value means 0
 164     if (zeroes < 0)
 165       zeroes = 0;
 166     spaces = static_cast<int>(to_conv.min_width - zeroes - digits_written -
 167                               prefix_len);
 168   }
 169
 170   // The standard says that alternate form for the o conversion "increases
 171   // the precision, if and only if necessary, to force the first digit of the
 172   // result to be a zero (if the value and precision are both 0, a single 0 is
 173   // printed)"
 174   // This if checks the following conditions:
 175   // 1) is this an o conversion in alternate form?
 176   // 2) does this number has a leading zero?
 177   //    2a) ... because there are additional leading zeroes?
 178   //    2b) ... because it is just "0", unless it will not write any digits.
 179   const bool has_leading_zero =
 180       (zeroes > 0) || ((num == 0) && (digits_written != 0));
 181   if ((to_conv.conv_name == 'o') &&
 182       ((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0) &&
 183       !has_leading_zero) {
 184     zeroes = 1;
 185     --spaces;
 186   }
 187
 188   if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
 189     // If left justified it goes prefix zeroes digits spaces
 190     if (prefix_len != 0)
 191       RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len}));
 192     if (zeroes > 0)
 193       RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes));
 194     if (digits_written > 0)
 195       RET_IF_RESULT_NEGATIVE(writer->write(*str));
 196     if (spaces > 0)
 197       RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces));
 198   } else {
 199     // Else it goes spaces prefix zeroes digits
 200     if (spaces > 0)
 201       RET_IF_RESULT_NEGATIVE(writer->write(' ', spaces));
 202     if (prefix_len != 0)
 203       RET_IF_RESULT_NEGATIVE(writer->write({prefix, prefix_len}));
 204     if (zeroes > 0)
 205       RET_IF_RESULT_NEGATIVE(writer->write('0', zeroes));
 206     if (digits_written > 0)
 207       RET_IF_RESULT_NEGATIVE(writer->write(*str));
 208   }
 209   return WRITE_OK;
 210 }
 211
 212 } // namespace printf_core
 213 } // namespace LIBC_NAMESPACE_DECL
 214
 215 #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H