libc/src/stdio/scanf_core/float_converter.cpp

   1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "src/stdio/scanf_core/float_converter.h"
  10
  11 #include "src/__support/CPP/limits.h"
  12 #include "src/__support/char_vector.h"
  13 #include "src/__support/ctype_utils.h"
  14 #include "src/stdio/scanf_core/converter_utils.h"
  15 #include "src/stdio/scanf_core/core_structs.h"
  16 #include "src/stdio/scanf_core/reader.h"
  17
  18 #include <stddef.h>
  19
  20 namespace LIBC_NAMESPACE {
  21 namespace scanf_core {
  22
  23 // All of the floating point conversions are the same for scanf, every name will
  24 // accept every style.
  25 int convert_float(Reader *reader, const FormatSection &to_conv) {
  26   // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
  27   // infinity, or NaN, whose format is the same as expected for the subject
  28   // sequence of the strtod function. The corresponding argument shall be a
  29   // pointer to floating."
  30
  31   CharVector out_str = CharVector();
  32   bool is_number = false;
  33
  34   size_t max_width = cpp::numeric_limits<size_t>::max();
  35   if (to_conv.max_width > 0) {
  36     max_width = to_conv.max_width;
  37   }
  38
  39   char cur_char = reader->getc();
  40   // Handle the sign.
  41   if (cur_char == '+' || cur_char == '-') {
  42     if (!out_str.append(cur_char)) {
  43       return ALLOCATION_FAILURE;
  44     }
  45     if (out_str.length() == max_width) {
  46       return MATCHING_FAILURE;
  47     } else {
  48       cur_char = reader->getc();
  49     }
  50   }
  51
  52   static constexpr char DECIMAL_POINT = '.';
  53   static const char inf_string[] = "infinity";
  54
  55   // Handle inf
  56
  57   if (to_lower(cur_char) == inf_string[0]) {
  58     size_t inf_index = 0;
  59
  60     for (; to_lower(cur_char) == inf_string[inf_index] &&
  61            inf_index < sizeof(inf_string) && out_str.length() < max_width;
  62          ++inf_index) {
  63       if (!out_str.append(cur_char)) {
  64         return ALLOCATION_FAILURE;
  65       }
  66       cur_char = reader->getc();
  67     }
  68
  69     if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
  70       write_float_with_length(out_str.c_str(), to_conv);
  71       return READ_OK;
  72     } else {
  73       return MATCHING_FAILURE;
  74     }
  75   }
  76
  77   static const char nan_string[] = "nan";
  78
  79   // Handle nan
  80   if (to_lower(cur_char) == nan_string[0]) {
  81     size_t nan_index = 0;
  82
  83     for (; to_lower(cur_char) == nan_string[nan_index] &&
  84            nan_index < sizeof(nan_string) && out_str.length() < max_width;
  85          ++nan_index) {
  86       if (!out_str.append(cur_char)) {
  87         return ALLOCATION_FAILURE;
  88       }
  89       cur_char = reader->getc();
  90     }
  91
  92     if (nan_index == sizeof(nan_string) - 1) {
  93       write_float_with_length(out_str.c_str(), to_conv);
  94       return READ_OK;
  95     } else {
  96       return MATCHING_FAILURE;
  97     }
  98   }
  99
 100   // Assume base of 10 by default but check if it is actually base 16.
 101   int base = 10;
 102
 103   // If the string starts with 0 it might be in hex.
 104   if (cur_char == '0') {
 105     is_number = true;
 106     // Read the next character to check.
 107     if (!out_str.append(cur_char)) {
 108       return ALLOCATION_FAILURE;
 109     }
 110     // If we've hit the end, then this is "0", which is valid.
 111     if (out_str.length() == max_width) {
 112       write_float_with_length(out_str.c_str(), to_conv);
 113       return READ_OK;
 114     } else {
 115       cur_char = reader->getc();
 116     }
 117
 118     // If that next character is an 'x' then this is a hexadecimal number.
 119     if (to_lower(cur_char) == 'x') {
 120       base = 16;
 121
 122       if (!out_str.append(cur_char)) {
 123         return ALLOCATION_FAILURE;
 124       }
 125       // If we've hit the end here, we have "0x" which is a valid prefix to a
 126       // floating point number, and will be evaluated to 0.
 127       if (out_str.length() == max_width) {
 128         write_float_with_length(out_str.c_str(), to_conv);
 129         return READ_OK;
 130       } else {
 131         cur_char = reader->getc();
 132       }
 133     }
 134   }
 135
 136   const char exponent_mark = ((base == 10) ? 'e' : 'p');
 137   bool after_decimal = false;
 138
 139   // The format for the remaining characters at this point is DD.DDe+/-DD for
 140   // base 10 and XX.XXp+/-DD for base 16
 141
 142   // This handles the digits before and after the decimal point, but not the
 143   // exponent.
 144   while (out_str.length() < max_width) {
 145     if (internal::isalnum(cur_char) &&
 146         internal::b36_char_to_int(cur_char) < base) {
 147       is_number = true;
 148       if (!out_str.append(cur_char)) {
 149         return ALLOCATION_FAILURE;
 150       }
 151       cur_char = reader->getc();
 152     } else if (cur_char == DECIMAL_POINT && !after_decimal) {
 153       after_decimal = true;
 154       if (!out_str.append(cur_char)) {
 155         return ALLOCATION_FAILURE;
 156       }
 157       cur_char = reader->getc();
 158     } else {
 159       break;
 160     }
 161   }
 162
 163   // Handle the exponent, which has an exponent mark, an optional sign, and
 164   // decimal digits.
 165   if (to_lower(cur_char) == exponent_mark) {
 166     if (!out_str.append(cur_char)) {
 167       return ALLOCATION_FAILURE;
 168     }
 169     if (out_str.length() == max_width) {
 170       // This is laid out in the standard as being a matching error (100e is not
 171       // a valid float) but may conflict with existing implementations.
 172       return MATCHING_FAILURE;
 173     } else {
 174       cur_char = reader->getc();
 175     }
 176
 177     if (cur_char == '+' || cur_char == '-') {
 178       if (!out_str.append(cur_char)) {
 179         return ALLOCATION_FAILURE;
 180       }
 181       if (out_str.length() == max_width) {
 182         return MATCHING_FAILURE;
 183       } else {
 184         cur_char = reader->getc();
 185       }
 186     }
 187
 188     // It is specified by the standard that "100er" is a matching failure since
 189     // the longest prefix of a possibly valid floating-point number (which is
 190     // "100e") is not a valid floating-point number. If there is an exponent
 191     // mark then there must be a digit after it else the number is not valid.
 192     // Some implementations will roll back two characters (to just "100") and
 193     // accept that since the prefix is not valid, and some will interpret an
 194     // exponent mark followed by no digits as an additional exponent of 0
 195     // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
 196     // by the standard, but they may be used in real code, see Hyrum's law. This
 197     // code follows the standard, but may be incompatible due to code expecting
 198     // these bugs.
 199     if (!internal::isdigit(cur_char)) {
 200       return MATCHING_FAILURE;
 201     }
 202
 203     while (internal::isdigit(cur_char) && out_str.length() < max_width) {
 204       if (!out_str.append(cur_char)) {
 205         return ALLOCATION_FAILURE;
 206       }
 207       cur_char = reader->getc();
 208     }
 209   }
 210
 211   // We always read one more character than will be used, so we have to put the
 212   // last one back.
 213   reader->ungetc(cur_char);
 214
 215   // If we haven't actually found any digits, this is a matching failure (this
 216   // catches cases like "+.")
 217   if (!is_number) {
 218     return MATCHING_FAILURE;
 219   }
 220   write_float_with_length(out_str.c_str(), to_conv);
 221
 222   return READ_OK;
 223 }
 224
 225 } // namespace scanf_core
 226 } // namespace LIBC_NAMESPACE