sprintf.c

   1 /**********************************************************************
   2
   3   sprintf.c -
   4
   5   $Author$
   6   created at: Fri Oct 15 10:39:26 JST 1993
   7
   8   Copyright (C) 1993-2007 Yukihiro Matsumoto
   9   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
  10   Copyright (C) 2000  Information-technology Promotion Agency, Japan
  11
  12 **********************************************************************/
  13
  14 #include "ruby/ruby.h"
  15 #include "ruby/re.h"
  16 #include "ruby/encoding.h"
  17 #include <math.h>
  18 #include <stdarg.h>
  19
  20 #ifdef HAVE_IEEEFP_H
  21 #include <ieeefp.h>
  22 #endif
  23
  24 #define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
  25 #define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT)
  26 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
  27
  28 static void fmt_setup(char*,int,int,int,int);
  29
  30 static char*
  31 remove_sign_bits(char *str, int base)
  32 {
  33     char *s, *t;
  34
  35     s = t = str;
  36
  37     if (base == 16) {
  38         while (*t == 'f') {
  39             t++;
  40         }
  41     }
  42     else if (base == 8) {
  43         *t |= EXTENDSIGN(3, strlen(t));
  44         while (*t == '7') {
  45             t++;
  46         }
  47     }
  48     else if (base == 2) {
  49         while (*t == '1') {
  50             t++;
  51         }
  52     }
  53
  54     return t;
  55 }
  56
  57 static char
  58 sign_bits(int base, const char *p)
  59 {
  60     char c = '.';
  61
  62     switch (base) {
  63       case 16:
  64         if (*p == 'X') c = 'F';
  65         else c = 'f';
  66         break;
  67       case 8:
  68         c = '7'; break;
  69       case 2:
  70         c = '1'; break;
  71     }
  72     return c;
  73 }
  74
  75 #define FNONE  0
  76 #define FSHARP 1
  77 #define FMINUS 2
  78 #define FPLUS  4
  79 #define FZERO  8
  80 #define FSPACE 16
  81 #define FWIDTH 32
  82 #define FPREC  64
  83 #define FPREC0 128
  84
  85 #define CHECK(l) do {\
  86     while (blen + (l) >= bsiz) {\
  87         bsiz*=2;\
  88     }\
  89     rb_str_resize(result, bsiz);\
  90     buf = RSTRING_PTR(result);\
  91 } while (0)
  92
  93 #define PUSH(s, l) do { \
  94     CHECK(l);\
  95     memcpy(&buf[blen], s, l);\
  96     blen += (l);\
  97 } while (0)
  98
  99 #define FILL(c, l) do { \
 100     CHECK(l);\
 101     memset(&buf[blen], c, l);\
 102     blen += (l);\
 103 } while (0)
 104
 105 #define GETARG() (nextvalue != Qundef ? nextvalue : \
 106     posarg < 0 ? \
 107     (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \
 108     (posarg = nextarg++, GETNTHARG(posarg)))
 109
 110 #define GETPOSARG(n) (posarg > 0 ? \
 111     (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \
 112     ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \
 113                (posarg = -1, GETNTHARG(n))))
 114
 115 #define GETNTHARG(nth) \
 116     ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
 117
 118 #define GETNUM(n, val) \
 119     for (; p < end && rb_enc_isdigit(*p, enc); p++) {   \
 120         int next_n = 10 * n + (*p - '0'); \
 121         if (next_n / 10 != n) {\
 122             rb_raise(rb_eArgError, #val " too big"); \
 123         } \
 124         n = next_n; \
 125     } \
 126     if (p >= end) { \
 127         rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \
 128     }
 129
 130 #define GETASTER(val) do { \
 131     t = p++; \
 132     n = 0; \
 133     GETNUM(n, val); \
 134     if (*p == '$') { \
 135         tmp = GETPOSARG(n); \
 136     } \
 137     else { \
 138         tmp = GETARG(); \
 139         p = t; \
 140     } \
 141     val = NUM2INT(tmp); \
 142 } while (0)
 143
 144
 145 /*
 146  *  call-seq:
 147  *     format(format_string [, arguments...] )   => string
 148  *     sprintf(format_string [, arguments...] )  => string
 149  *
 150  *  Returns the string resulting from applying <i>format_string</i> to
 151  *  any additional arguments.  Within the format string, any characters
 152  *  other than format sequences are copied to the result.
 153  *
 154  *  The syntax of a format sequence is follows.
 155  *
 156  *    %[flags][width][.precision]type
 157  *
 158  *  A format
 159  *  sequence consists of a percent sign, followed by optional flags,
 160  *  width, and precision indicators, then terminated with a field type
 161  *  character.  The field type controls how the corresponding
 162  *  <code>sprintf</code> argument is to be interpreted, while the flags
 163  *  modify that interpretation.
 164  *
 165  *  The field type characters are:
 166  *
 167  *      Field |  Integer Format
 168  *      ------+--------------------------------------------------------------
 169  *        b   | Convert argument as a binary number.
 170  *            | Negative numbers will be displayed as a two's complement
 171  *            | prefixed with `..1'.
 172  *        B   | Equivalent to `b', but uses an uppercase 0B for prefix
 173  *            | in the alternative format by #.
 174  *        d   | Convert argument as a decimal number.
 175  *        i   | Identical to `d'.
 176  *        o   | Convert argument as an octal number.
 177  *            | Negative numbers will be displayed as a two's complement
 178  *            | prefixed with `..7'.
 179  *        u   | Identical to `d'.
 180  *        x   | Convert argument as a hexadecimal number.
 181  *            | Negative numbers will be displayed as a two's complement
 182  *            | prefixed with `..f' (representing an infinite string of
 183  *            | leading 'ff's).
 184  *        X   | Equivalent to `x', but uses uppercase letters.
 185  *
 186  *      Field |  Float Format
 187  *      ------+--------------------------------------------------------------
 188  *        e   | Convert floating point argument into exponential notation
 189  *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
 190  *            | The precision specifies the number of digits after the decimal
 191  *            | point (defaulting to six).
 192  *        E   | Equivalent to `e', but uses an uppercase E to indicate
 193  *            | the exponent.
 194  *        f   | Convert floating point argument as [-]ddd.dddddd,
 195  *            | where the precision specifies the number of digits after
 196  *            | the decimal point.
 197  *        g   | Convert a floating point number using exponential form
 198  *            | if the exponent is less than -4 or greater than or
 199  *            | equal to the precision, or in dd.dddd form otherwise.
 200  *            | The precision specifies the number of significant digits.
 201  *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
 202  *
 203  *      Field |  Other Format
 204  *      ------+--------------------------------------------------------------
 205  *        c   | Argument is the numeric code for a single character or
 206  *            | a single character string itself.
 207  *        p   | The valuing of argument.inspect.
 208  *        s   | Argument is a string to be substituted.  If the format
 209  *            | sequence contains a precision, at most that many characters
 210  *            | will be copied.
 211  *        %   | A percent sign itself will be displayed.  No argument taken.
 212  *
 213  *  The flags modifies the behavior of the formats.
 214  *  The flag characters are:
 215  *
 216  *    Flag     | Applies to    | Meaning
 217  *    ---------+---------------+-----------------------------------------
 218  *    space    | bBdiouxX      | Leave a space at the start of
 219  *             | eEfgG         | non-negative numbers.
 220  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
 221  *             |               | a minus sign with absolute value for
 222  *             |               | negative values.
 223  *    ---------+---------------+-----------------------------------------
 224  *    (digit)$ | all           | Specifies the absolute argument number
 225  *             |               | for this field.  Absolute and relative
 226  *             |               | argument numbers cannot be mixed in a
 227  *             |               | sprintf string.
 228  *    ---------+---------------+-----------------------------------------
 229  *     #       | bBoxX         | Use an alternative format.
 230  *             | eEfgG         | For the conversions `o', increase the precision
 231  *             |               | until the first digit will be `0' if
 232  *             |               | it is not formatted as complements.
 233  *             |               | For the conversions `x', `X', `b' and `B'
 234  *             |               | on non-zero, prefix the result with ``0x'',
 235  *             |               | ``0X'', ``0b'' and ``0B'', respectively.
 236  *             |               | For `e', `E', `f', `g', and 'G',
 237  *             |               | force a decimal point to be added,
 238  *             |               | even if no digits follow.
 239  *             |               | For `g' and 'G', do not remove trailing zeros.
 240  *    ---------+---------------+-----------------------------------------
 241  *    +        | bBdiouxX      | Add a leading plus sign to non-negative
 242  *             | eEfgG         | numbers.
 243  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
 244  *             |               | a minus sign with absolute value for
 245  *             |               | negative values.
 246  *    ---------+---------------+-----------------------------------------
 247  *    -        | all           | Left-justify the result of this conversion.
 248  *    ---------+---------------+-----------------------------------------
 249  *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
 250  *             | eEfgG         | For `o', `x', `X', `b' and `B', radix-1
 251  *             | (numeric fmt) | is used for negative numbers formatted as
 252  *             |               | complements.
 253  *    ---------+---------------+-----------------------------------------
 254  *    *        | all           | Use the next argument as the field width.
 255  *             |               | If negative, left-justify the result. If the
 256  *             |               | asterisk is followed by a number and a dollar
 257  *             |               | sign, use the indicated argument as the width.
 258  *
 259  *  Examples of flags:
 260  *
 261  *   # `+' and space flag specifies the sign of non-negative numbers.
 262  *   sprintf("%d", 123)  #=> "123"
 263  *   sprintf("%+d", 123) #=> "+123"
 264  *   sprintf("% d", 123) #=> " 123"
 265  *
 266  *   # `#' flag for `o' increases number of digits to show `0'.
 267  *   # `+' and space flag changes format of negative numbers.
 268  *   sprintf("%o", 123)   #=> "173"
 269  *   sprintf("%#o", 123)  #=> "0173"
 270  *   sprintf("%+o", -123) #=> "-173"
 271  *   sprintf("%o", -123)  #=> "..7605"
 272  *   sprintf("%#o", -123) #=> "..7605"
 273  *
 274  *   # `#' flag for `x' add a prefix `0x' for non-zero numbers.
 275  *   # `+' and space flag disables complements for negative numbers.
 276  *   sprintf("%x", 123)   #=> "7b"
 277  *   sprintf("%#x", 123)  #=> "0x7b"
 278  *   sprintf("%+x", -123) #=> "-7b"
 279  *   sprintf("%x", -123)  #=> "..f85"
 280  *   sprintf("%#x", -123) #=> "0x..f85"
 281  *   sprintf("%#x", 0)    #=> "0"
 282  *
 283  *   # `#' for `X' uses the prefix `0X'.
 284  *   sprintf("%X", 123)  #=> "7B"
 285  *   sprintf("%#X", 123) #=> "0X7B"
 286  *
 287  *   # `#' flag for `b' add a prefix `0b' for non-zero numbers.
 288  *   # `+' and space flag disables complements for negative numbers.
 289  *   sprintf("%b", 123)   #=> "1111011"
 290  *   sprintf("%#b", 123)  #=> "0b1111011"
 291  *   sprintf("%+b", -123) #=> "-1111011"
 292  *   sprintf("%b", -123)  #=> "..10000101"
 293  *   sprintf("%#b", -123) #=> "0b..10000101"
 294  *   sprintf("%#b", 0)    #=> "0"
 295  *
 296  *   # `#' for `B' uses the prefix `0B'.
 297  *   sprintf("%B", 123)  #=> "1111011"
 298  *   sprintf("%#B", 123) #=> "0B1111011"
 299  *
 300  *   # `#' for `e' forces to show the decimal point.
 301  *   sprintf("%.0e", 1)  #=> "1e+00"
 302  *   sprintf("%#.0e", 1) #=> "1.e+00"
 303  *
 304  *   # `#' for `f' forces to show the decimal point.
 305  *   sprintf("%.0f", 1234)  #=> "1234"
 306  *   sprintf("%#.0f", 1234) #=> "1234."
 307  *
 308  *   # `#' for `g' forces to show the decimal point.
 309  *   # It also disables stripping lowest zeros.
 310  *   sprintf("%g", 123.4)   #=> "123.4"
 311  *   sprintf("%#g", 123.4)  #=> "123.400"
 312  *   sprintf("%g", 123456)  #=> "123456"
 313  *   sprintf("%#g", 123456) #=> "123456."
 314  *
 315  *  The field width is an optional integer, followed optionally by a
 316  *  period and a precision.  The width specifies the minimum number of
 317  *  characters that will be written to the result for this field.
 318  *
 319  *  Examples of width:
 320  *
 321  *   # padding is done by spaces,       width=20
 322  *   # 0 or radix-1.             <------------------>
 323  *   sprintf("%20d", 123)   #=> "                 123"
 324  *   sprintf("%+20d", 123)  #=> "                +123"
 325  *   sprintf("%020d", 123)  #=> "00000000000000000123"
 326  *   sprintf("%+020d", 123) #=> "+0000000000000000123"
 327  *   sprintf("% 020d", 123) #=> " 0000000000000000123"
 328  *   sprintf("%-20d", 123)  #=> "123                 "
 329  *   sprintf("%-+20d", 123) #=> "+123                "
 330  *   sprintf("%- 20d", 123) #=> " 123                "
 331  *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
 332  *
 333  *  For
 334  *  numeric fields, the precision controls the number of decimal places
 335  *  displayed.  For string fields, the precision determines the maximum
 336  *  number of characters to be copied from the string.  (Thus, the format
 337  *  sequence <code>%10.10s</code> will always contribute exactly ten
 338  *  characters to the result.)
 339  *
 340  *  Examples of precisions:
 341  *
 342  *   # precision for `d', 'o', 'x' and 'b' is
 343  *   # minimum number of digits               <------>
 344  *   sprintf("%20.8d", 123)  #=> "            00000123"
 345  *   sprintf("%20.8o", 123)  #=> "            00000173"
 346  *   sprintf("%20.8x", 123)  #=> "            0000007b"
 347  *   sprintf("%20.8b", 123)  #=> "            01111011"
 348  *   sprintf("%20.8d", -123) #=> "           -00000123"
 349  *   sprintf("%20.8o", -123) #=> "            ..777605"
 350  *   sprintf("%20.8x", -123) #=> "            ..ffff85"
 351  *   sprintf("%20.8b", -11)  #=> "            ..110101"
 352  *
 353  *   # "0x" and "0b" for `#x' and `#b' is not counted for
 354  *   # precision but "0" for `#o' is counted.  <------>
 355  *   sprintf("%#20.8d", 123)  #=> "            00000123"
 356  *   sprintf("%#20.8o", 123)  #=> "            00000173"
 357  *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
 358  *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
 359  *   sprintf("%#20.8d", -123) #=> "           -00000123"
 360  *   sprintf("%#20.8o", -123) #=> "            ..777605"
 361  *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
 362  *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
 363  *
 364  *   # precision for `e' is number of
 365  *   # digits after the decimal point           <------>
 366  *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
 367  *
 368  *   # precision for `f' is number of
 369  *   # digits after the decimal point               <------>
 370  *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
 371  *
 372  *   # precision for `g' is number of
 373  *   # significant digits                          <------->
 374  *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
 375  *
 376  *   #                                         <------->
 377  *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
 378  *
 379  *   # precision for `s' is
 380  *   # maximum number of characters                    <------>
 381  *   sprintf("%20.8s", "string test") #=> "            string t"
 382  *
 383  *  Examples:
 384  *
 385  *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
 386  *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
 387  *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
 388  *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
 389  *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
 390  *     sprintf("%u", -123)                        #=> "-123"
 391  */
 392
 393 VALUE
 394 rb_f_sprintf(int argc, const VALUE *argv)
 395 {
 396     return rb_str_format(argc - 1, argv + 1, GETNTHARG(0));
 397 }
 398
 399 VALUE
 400 rb_str_format(int argc, const VALUE *argv, VALUE fmt)
 401 {
 402     rb_encoding *enc;
 403     const char *p, *end;
 404     char *buf;
 405     int blen, bsiz;
 406     VALUE result;
 407
 408     int width, prec, flags = FNONE;
 409     int nextarg = 1;
 410     int posarg = 0;
 411     int tainted = 0;
 412     VALUE nextvalue;
 413     VALUE tmp;
 414     VALUE str;
 415
 416 #define CHECK_FOR_WIDTH(f)                               \
 417     if ((f) & FWIDTH) {                                  \
 418         rb_raise(rb_eArgError, "width given twice");     \
 419     }                                                    \
 420     if ((f) & FPREC0) {                                  \
 421         rb_raise(rb_eArgError, "width after precision"); \
 422     }
 423 #define CHECK_FOR_FLAGS(f)                               \
 424     if ((f) & FWIDTH) {                                  \
 425         rb_raise(rb_eArgError, "flag after width");      \
 426     }                                                    \
 427     if ((f) & FPREC0) {                                  \
 428         rb_raise(rb_eArgError, "flag after precision"); \
 429     }
 430
 431     ++argc;
 432     --argv;
 433     if (OBJ_TAINTED(fmt)) tainted = 1;
 434     StringValue(fmt);
 435     enc = rb_enc_get(fmt);
 436     fmt = rb_str_new4(fmt);
 437     p = RSTRING_PTR(fmt);
 438     end = p + RSTRING_LEN(fmt);
 439     blen = 0;
 440     bsiz = 120;
 441     result = rb_str_buf_new(bsiz);
 442     rb_enc_copy(result, fmt);
 443     buf = RSTRING_PTR(result);
 444     memset(buf, 0, bsiz);
 445
 446     for (; p < end; p++) {
 447         const char *t;
 448         int n;
 449
 450         for (t = p; t < end && *t != '%'; t++) ;
 451         PUSH(p, t - p);
 452         if (t >= end) {
 453             /* end of fmt string */
 454             goto sprint_exit;
 455         }
 456         p = t + 1;              /* skip `%' */
 457
 458         width = prec = -1;
 459         nextvalue = Qundef;
 460       retry:
 461         switch (*p) {
 462           default:
 463             if (rb_enc_isprint(*p, enc))
 464                 rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
 465             else
 466                 rb_raise(rb_eArgError, "malformed format string");
 467             break;
 468
 469           case ' ':
 470             CHECK_FOR_FLAGS(flags);
 471             flags |= FSPACE;
 472             p++;
 473             goto retry;
 474
 475           case '#':
 476             CHECK_FOR_FLAGS(flags);
 477             flags |= FSHARP;
 478             p++;
 479             goto retry;
 480
 481           case '+':
 482             CHECK_FOR_FLAGS(flags);
 483             flags |= FPLUS;
 484             p++;
 485             goto retry;
 486
 487           case '-':
 488             CHECK_FOR_FLAGS(flags);
 489             flags |= FMINUS;
 490             p++;
 491             goto retry;
 492
 493           case '0':
 494             CHECK_FOR_FLAGS(flags);
 495             flags |= FZERO;
 496             p++;
 497             goto retry;
 498
 499           case '1': case '2': case '3': case '4':
 500           case '5': case '6': case '7': case '8': case '9':
 501             n = 0;
 502             GETNUM(n, width);
 503             if (*p == '$') {
 504                 if (nextvalue != Qundef) {
 505                     rb_raise(rb_eArgError, "value given twice - %d$", n);
 506                 }
 507                 nextvalue = GETPOSARG(n);
 508                 p++;
 509                 goto retry;
 510             }
 511             CHECK_FOR_WIDTH(flags);
 512             width = n;
 513             flags |= FWIDTH;
 514             goto retry;
 515
 516           case '*':
 517             CHECK_FOR_WIDTH(flags);
 518             flags |= FWIDTH;
 519             GETASTER(width);
 520             if (width < 0) {
 521                 flags |= FMINUS;
 522                 width = -width;
 523             }
 524             p++;
 525             goto retry;
 526
 527           case '.':
 528             if (flags & FPREC0) {
 529                 rb_raise(rb_eArgError, "precision given twice");
 530             }
 531             flags |= FPREC|FPREC0;
 532
 533             prec = 0;
 534             p++;
 535             if (*p == '*') {
 536                 GETASTER(prec);
 537                 if (prec < 0) { /* ignore negative precision */
 538                     flags &= ~FPREC;
 539                 }
 540                 p++;
 541                 goto retry;
 542             }
 543
 544             GETNUM(prec, precision);
 545             goto retry;
 546
 547           case '\n':
 548           case '\0':
 549             p--;
 550           case '%':
 551             if (flags != FNONE) {
 552                 rb_raise(rb_eArgError, "invalid format character - %%");
 553             }
 554             PUSH("%", 1);
 555             break;
 556
 557           case 'c':
 558             {
 559                 VALUE val = GETARG();
 560                 VALUE tmp;
 561                 int c, n;
 562
 563                 tmp = rb_check_string_type(val);
 564                 if (!NIL_P(tmp)) {
 565                     if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
 566                         rb_raise(rb_eArgError, "%%c requires a character");
 567                     }
 568                     c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc);
 569                 }
 570                 else {
 571                     c = NUM2INT(val);
 572                 }
 573                 n = rb_enc_codelen(c, enc);
 574                 if (n == 0) {
 575                     rb_raise(rb_eArgError, "invalid character");
 576                 }
 577                 if (!(flags & FWIDTH)) {
 578                     CHECK(n);
 579                     rb_enc_mbcput(c, &buf[blen], enc);
 580                     blen += n;
 581                 }
 582                 else if ((flags & FMINUS)) {
 583                     CHECK(n);
 584                     rb_enc_mbcput(c, &buf[blen], enc);
 585                     blen += n;
 586                     FILL(' ', width-1);
 587                 }
 588                 else {
 589                     FILL(' ', width-1);
 590                     CHECK(n);
 591                     rb_enc_mbcput(c, &buf[blen], enc);
 592                     blen += n;
 593                 }
 594             }
 595             break;
 596
 597           case 's':
 598           case 'p':
 599             {
 600                 VALUE arg = GETARG();
 601                 long len, slen;
 602
 603                 if (*p == 'p') arg = rb_inspect(arg);
 604                 str = rb_obj_as_string(arg);
 605                 if (OBJ_TAINTED(str)) tainted = 1;
 606                 len = RSTRING_LEN(str);
 607                 enc = rb_enc_check(result, str);
 608                 if (flags&(FPREC|FWIDTH)) {
 609                     slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
 610                     if (slen < 0) {
 611                         rb_raise(rb_eArgError, "invalid mbstring sequence");
 612                     }
 613                     if ((flags&FPREC) && (prec < slen)) {
 614                         char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
 615                                              prec, enc);
 616                         slen = prec;
 617                         len = p - RSTRING_PTR(str);
 618                     }
 619                     /* need to adjust multi-byte string pos */
 620                     if ((flags&FWIDTH) && (width > slen)) {
 621                         width -= slen;
 622                         if (!(flags&FMINUS)) {
 623                             CHECK(width);
 624                             while (width--) {
 625                                 buf[blen++] = ' ';
 626                             }
 627                         }
 628                         CHECK(len);
 629                         memcpy(&buf[blen], RSTRING_PTR(str), len);
 630                         blen += len;
 631                         if (flags&FMINUS) {
 632                             CHECK(width);
 633                             while (width--) {
 634                                 buf[blen++] = ' ';
 635                             }
 636                         }
 637                         rb_enc_associate(result, enc);
 638                         break;
 639                     }
 640                 }
 641                 PUSH(RSTRING_PTR(str), len);
 642                 rb_enc_associate(result, enc);
 643             }
 644             break;
 645
 646           case 'd':
 647           case 'i':
 648           case 'o':
 649           case 'x':
 650           case 'X':
 651           case 'b':
 652           case 'B':
 653           case 'u':
 654             {
 655                 volatile VALUE tmp1;
 656                 volatile VALUE val = GETARG();
 657                 char fbuf[32], nbuf[64], *s;
 658                 const char *prefix = 0;
 659                 int sign = 0, dots = 0;
 660                 char sc = 0;
 661                 long v = 0;
 662                 int base, bignum = 0;
 663                 int len, pos;
 664
 665                 switch (*p) {
 666                   case 'd':
 667                   case 'i':
 668                   case 'u':
 669                     sign = 1; break;
 670                   case 'o':
 671                   case 'x':
 672                   case 'X':
 673                   case 'b':
 674                   case 'B':
 675                     if (flags&(FPLUS|FSPACE)) sign = 1;
 676                     break;
 677                 }
 678                 if (flags & FSHARP) {
 679                     switch (*p) {
 680                       case 'o':
 681                         prefix = "0"; break;
 682                       case 'x':
 683                         prefix = "0x"; break;
 684                       case 'X':
 685                         prefix = "0X"; break;
 686                       case 'b':
 687                         prefix = "0b"; break;
 688                       case 'B':
 689                         prefix = "0B"; break;
 690                     }
 691                 }
 692
 693               bin_retry:
 694                 switch (TYPE(val)) {
 695                   case T_FLOAT:
 696                     if (FIXABLE(RFLOAT_VALUE(val))) {
 697                         val = LONG2FIX((long)RFLOAT_VALUE(val));
 698                         goto bin_retry;
 699                     }
 700                     val = rb_dbl2big(RFLOAT_VALUE(val));
 701                     if (FIXNUM_P(val)) goto bin_retry;
 702                     bignum = 1;
 703                     break;
 704                   case T_STRING:
 705                     val = rb_str_to_inum(val, 0, Qtrue);
 706                     goto bin_retry;
 707                   case T_BIGNUM:
 708                     bignum = 1;
 709                     break;
 710                   case T_FIXNUM:
 711                     v = FIX2LONG(val);
 712                     break;
 713                   default:
 714                     val = rb_Integer(val);
 715                     goto bin_retry;
 716                 }
 717
 718                 switch (*p) {
 719                   case 'o':
 720                     base = 8; break;
 721                   case 'x':
 722                   case 'X':
 723                     base = 16; break;
 724                   case 'b':
 725                   case 'B':
 726                     base = 2; break;
 727                   case 'u':
 728                   case 'd':
 729                   case 'i':
 730                   default:
 731                     base = 10; break;
 732                 }
 733
 734                 if (!bignum) {
 735                     if (base == 2) {
 736                         val = rb_int2big(v);
 737                         goto bin_retry;
 738                     }
 739                     if (sign) {
 740                         char c = *p;
 741                         if (c == 'i') c = 'd'; /* %d and %i are identical */
 742                         if (v < 0) {
 743                             v = -v;
 744                             sc = '-';
 745                             width--;
 746                         }
 747                         else if (flags & FPLUS) {
 748                             sc = '+';
 749                             width--;
 750                         }
 751                         else if (flags & FSPACE) {
 752                             sc = ' ';
 753                             width--;
 754                         }
 755                         sprintf(fbuf, "%%l%c", c);
 756                         sprintf(nbuf, fbuf, v);
 757                         s = nbuf;
 758                     }
 759                     else {
 760                         s = nbuf;
 761                         if (v < 0) {
 762                             dots = 1;
 763                         }
 764                         sprintf(fbuf, "%%l%c", *p == 'X' ? 'x' : *p);
 765                         sprintf(++s, fbuf, v);
 766                         if (v < 0) {
 767                             char d = 0;
 768
 769                             s = remove_sign_bits(s, base);
 770                             switch (base) {
 771                               case 16:
 772                                 d = 'f'; break;
 773                               case 8:
 774                                 d = '7'; break;
 775                             }
 776                             if (d && *s != d) {
 777                                 *--s = d;
 778                             }
 779                         }
 780                     }
 781                 }
 782                 else {
 783                     if (sign) {
 784                         tmp = rb_big2str(val, base);
 785                         s = RSTRING_PTR(tmp);
 786                         if (s[0] == '-') {
 787                             s++;
 788                             sc = '-';
 789                             width--;
 790                         }
 791                         else if (flags & FPLUS) {
 792                             sc = '+';
 793                             width--;
 794                         }
 795                         else if (flags & FSPACE) {
 796                             sc = ' ';
 797                             width--;
 798                         }
 799                     }
 800                     else {
 801                         if (!RBIGNUM_SIGN(val)) {
 802                             val = rb_big_clone(val);
 803                             rb_big_2comp(val);
 804                         }
 805                         tmp1 = tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
 806                         s = RSTRING_PTR(tmp);
 807                         if (*s == '-') {
 808                             dots = 1;
 809                             if (base == 10) {
 810                                 rb_warning("negative number for %%u specifier");
 811                             }
 812                             s = remove_sign_bits(++s, base);
 813                             switch (base) {
 814                               case 16:
 815                                 if (s[0] != 'f') *--s = 'f'; break;
 816                               case 8:
 817                                 if (s[0] != '7') *--s = '7'; break;
 818                               case 2:
 819                                 if (s[0] != '1') *--s = '1'; break;
 820                             }
 821                         }
 822                     }
 823                 }
 824
 825                 pos = -1;
 826                 len = strlen(s);
 827                 if (dots) {
 828                     prec -= 2;
 829                     width -= 2;
 830                 }
 831
 832                 if (*p == 'X') {
 833                     char *pp = s;
 834                     int c;
 835                     while ((c = (int)(unsigned char)*pp) != 0) {
 836                         *pp = rb_enc_toupper(c, enc);
 837                         pp++;
 838                     }
 839                 }
 840                 if (prefix && !prefix[1]) { /* octal */
 841                     if (dots) {
 842                         prefix = 0;
 843                     }
 844                     else if (len == 1 && *s == '0') {
 845                         len = 0;
 846                         if (flags & FPREC) prec--;
 847                     }
 848                     else if ((flags & FPREC) && (prec > len)) {
 849                         prefix = 0;
 850                     }
 851                 }
 852                 else if (len == 1 && *s == '0') {
 853                     prefix = 0;
 854                 }
 855                 if (prefix) {
 856                     width -= strlen(prefix);
 857                 }
 858                 if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
 859                     prec = width;
 860                     width = 0;
 861                 }
 862                 else {
 863                     if (prec < len) {
 864                         if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
 865                         prec = len;
 866                     }
 867                     width -= prec;
 868                 }
 869                 if (!(flags&FMINUS)) {
 870                     CHECK(width);
 871                     while (width-- > 0) {
 872                         buf[blen++] = ' ';
 873                     }
 874                 }
 875                 if (sc) PUSH(&sc, 1);
 876                 if (prefix) {
 877                     int plen = strlen(prefix);
 878                     PUSH(prefix, plen);
 879                 }
 880                 CHECK(prec - len);
 881                 if (dots) PUSH("..", 2);
 882                 if (!bignum && v < 0) {
 883                     char c = sign_bits(base, p);
 884                     while (len < prec--) {
 885                         buf[blen++] = c;
 886                     }
 887                 }
 888                 else if ((flags & (FMINUS|FPREC)) != FMINUS) {
 889                     char c;
 890
 891                     if (!sign && bignum && !RBIGNUM_SIGN(val))
 892                         c = sign_bits(base, p);
 893                     else
 894                         c = '0';
 895                     while (len < prec--) {
 896                         buf[blen++] = c;
 897                     }
 898                 }
 899                 PUSH(s, len);
 900                 CHECK(width);
 901                 while (width-- > 0) {
 902                     buf[blen++] = ' ';
 903                 }
 904             }
 905             break;
 906
 907           case 'f':
 908           case 'g':
 909           case 'G':
 910           case 'e':
 911           case 'E':
 912             {
 913                 VALUE val = GETARG();
 914                 double fval;
 915                 int i, need = 6;
 916                 char fbuf[32];
 917
 918                 fval = RFLOAT_VALUE(rb_Float(val));
 919                 if (isnan(fval) || isinf(fval)) {
 920                     const char *expr;
 921
 922                     if (isnan(fval)) {
 923                         expr = "NaN";
 924                     }
 925                     else {
 926                         expr = "Inf";
 927                     }
 928                     need = strlen(expr);
 929                     if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
 930                         need++;
 931                     if ((flags & FWIDTH) && need < width)
 932                         need = width;
 933
 934                     CHECK(need);
 935                     sprintf(&buf[blen], "%*s", need, "");
 936                     if (flags & FMINUS) {
 937                         if (!isnan(fval) && fval < 0.0)
 938                             buf[blen++] = '-';
 939                         else if (flags & FPLUS)
 940                             buf[blen++] = '+';
 941                         else if (flags & FSPACE)
 942                             blen++;
 943                         strncpy(&buf[blen], expr, strlen(expr));
 944                     }
 945                     else {
 946                         if (!isnan(fval) && fval < 0.0)
 947                             buf[blen + need - strlen(expr) - 1] = '-';
 948                         else if (flags & FPLUS)
 949                             buf[blen + need - strlen(expr) - 1] = '+';
 950                         else if ((flags & FSPACE) && need > width)
 951                             blen++;
 952                         strncpy(&buf[blen + need - strlen(expr)], expr,
 953                                 strlen(expr));
 954                     }
 955                     blen += strlen(&buf[blen]);
 956                     break;
 957                 }
 958
 959                 fmt_setup(fbuf, *p, flags, width, prec);
 960                 need = 0;
 961                 if (*p != 'e' && *p != 'E') {
 962                     i = INT_MIN;
 963                     frexp(fval, &i);
 964                     if (i > 0)
 965                         need = BIT_DIGITS(i);
 966                 }
 967                 need += (flags&FPREC) ? prec : 6;
 968                 if ((flags&FWIDTH) && need < width)
 969                     need = width;
 970                 need += 20;
 971
 972                 CHECK(need);
 973                 sprintf(&buf[blen], fbuf, fval);
 974                 blen += strlen(&buf[blen]);
 975             }
 976             break;
 977         }
 978         flags = FNONE;
 979     }
 980
 981   sprint_exit:
 982     /* XXX - We cannot validate the number of arguments if (digit)$ style used.
 983      */
 984     if (posarg >= 0 && nextarg < argc) {
 985         const char *mesg = "too many arguments for format string";
 986         if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
 987         if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
 988     }
 989     rb_str_resize(result, blen);
 990
 991     if (tainted) OBJ_TAINT(result);
 992     return result;
 993 }
 994
 995 static void
 996 fmt_setup(char *buf, int c, int flags, int width, int prec)
 997 {
 998     *buf++ = '%';
 999     if (flags & FSHARP) *buf++ = '#';
1000     if (flags & FPLUS)  *buf++ = '+';
1001     if (flags & FMINUS) *buf++ = '-';
1002     if (flags & FZERO)  *buf++ = '0';
1003     if (flags & FSPACE) *buf++ = ' ';
1004
1005     if (flags & FWIDTH) {
1006         sprintf(buf, "%d", width);
1007         buf += strlen(buf);
1008     }
1009
1010     if (flags & FPREC) {
1011         sprintf(buf, ".%d", prec);
1012         buf += strlen(buf);
1013     }
1014
1015     *buf++ = c;
1016     *buf = '\0';
1017 }
1018
1019 #undef FILE
1020 #define FILE rb_printf_buffer
1021 #define __sbuf rb_printf_sbuf
1022 #define __sFILE rb_printf_sfile
1023 #undef feof
1024 #undef ferror
1025 #undef clearerr
1026 #undef fileno
1027 #if SIZEOF_LONG < SIZEOF_VOIDP
1028 # if  SIZEOF_LONG_LONG == SIZEOF_VOIDP
1029 #  define _HAVE_SANE_QUAD_
1030 #  define _HAVE_LLP64_
1031 #  define quad_t LONG_LONG
1032 #  define u_quad_t unsigned LONG_LONG
1033 # endif
1034 #endif
1035 #undef vsnprintf
1036 #undef snprintf
1037 #define FLOATING_POINT 1
1038 #define BSD__dtoa dtoa
1039 #include "missing/vsnprintf.c"
1040
1041 static int
1042 ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
1043 {
1044     struct __siov *iov;
1045     VALUE result = (VALUE)fp->_bf._base;
1046     char *buf = (char*)fp->_p;
1047     size_t len, n;
1048     size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w;
1049
1050     if (RBASIC(result)->klass) {
1051         rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
1052     }
1053     if ((len = uio->uio_resid) == 0)
1054         return 0;
1055     CHECK(len);
1056     buf += blen;
1057     fp->_w = bsiz;
1058     for (iov = uio->uio_iov; len > 0; ++iov) {
1059         MEMCPY(buf, iov->iov_base, char, n = iov->iov_len);
1060         buf += n;
1061         len -= n;
1062     }
1063     fp->_p = (unsigned char *)buf;
1064     return 0;
1065 }
1066
1067 VALUE
1068 rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap)
1069 {
1070     rb_printf_buffer f;
1071     VALUE result;
1072
1073     f._flags = __SWR | __SSTR;
1074     f._bf._size = 0;
1075     f._w = 120;
1076     result = rb_str_buf_new(f._w);
1077     if (enc) rb_enc_associate(result, enc);
1078     f._bf._base = (unsigned char *)result;
1079     f._p = (unsigned char *)RSTRING_PTR(result);
1080     RBASIC(result)->klass = 0;
1081     f.vwrite = ruby__sfvwrite;
1082     BSD_vfprintf(&f, fmt, ap);
1083     RBASIC(result)->klass = rb_cString;
1084     rb_str_resize(result, (char *)f._p - RSTRING_PTR(result));
1085
1086     return result;
1087 }
1088
1089 VALUE
1090 rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
1091 {
1092     VALUE result;
1093     va_list ap;
1094
1095     va_start(ap, format);
1096     result = rb_enc_vsprintf(enc, format, ap);
1097     va_end(ap);
1098
1099     return result;
1100 }
1101
1102 VALUE
1103 rb_vsprintf(const char *fmt, va_list ap)
1104 {
1105     return rb_enc_vsprintf(NULL, fmt, ap);
1106 }
1107
1108 VALUE
1109 rb_sprintf(const char *format, ...)
1110 {
1111     VALUE result;
1112     va_list ap;
1113
1114     va_start(ap, format);
1115     result = rb_vsprintf(format, ap);
1116     va_end(ap);
1117
1118     return result;
1119 }
1120
1121 VALUE
1122 rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
1123 {
1124     rb_printf_buffer f;
1125     VALUE klass;
1126
1127     StringValue(str);
1128     rb_str_modify(str);
1129     f._flags = __SWR | __SSTR;
1130     f._bf._size = 0;
1131     f._w = rb_str_capacity(str);
1132     f._bf._base = (unsigned char *)str;
1133     f._p = (unsigned char *)RSTRING_END(str);
1134     klass = RBASIC(str)->klass;
1135     RBASIC(str)->klass = 0;
1136     f.vwrite = ruby__sfvwrite;
1137     BSD_vfprintf(&f, fmt, ap);
1138     RBASIC(str)->klass = klass;
1139     rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));
1140
1141     return str;
1142 }
1143
1144 VALUE
1145 rb_str_catf(VALUE str, const char *format, ...)
1146 {
1147     va_list ap;
1148
1149     va_start(ap, format);
1150     str = rb_str_vcatf(str, format, ap);
1151     va_end(ap);
1152
1153     return str;
1154 }