sprintf.c

   1 /**********************************************************************
   2
   3   sprintf.c -
   4
   5   $Author$
   6   created at: Fri Oct 15 10:39:26 JST 1993
   7
   8   Copyright (C) 1993-2007 Yukihiro Matsumoto
   9   Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
  10   Copyright (C) 2000  Information-technology Promotion Agency, Japan
  11
  12 **********************************************************************/
  13
  14 #include "ruby/ruby.h"
  15 #include "ruby/re.h"
  16 #include "ruby/encoding.h"
  17 #include <math.h>
  18 #include <stdarg.h>
  19
  20 #define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
  21 #define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT)
  22 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
  23
  24 static void fmt_setup(char*,int,int,int,int);
  25
  26 static char*
  27 remove_sign_bits(char *str, int base)
  28 {
  29     char *s, *t;
  30
  31     s = t = str;
  32
  33     if (base == 16) {
  34         while (*t == 'f') {
  35             t++;
  36         }
  37     }
  38     else if (base == 8) {
  39         *t |= EXTENDSIGN(3, strlen(t));
  40         while (*t == '7') {
  41             t++;
  42         }
  43     }
  44     else if (base == 2) {
  45         while (*t == '1') {
  46             t++;
  47         }
  48     }
  49
  50     return t;
  51 }
  52
  53 static char
  54 sign_bits(int base, const char *p)
  55 {
  56     char c = '.';
  57
  58     switch (base) {
  59       case 16:
  60         if (*p == 'X') c = 'F';
  61         else c = 'f';
  62         break;
  63       case 8:
  64         c = '7'; break;
  65       case 2:
  66         c = '1'; break;
  67     }
  68     return c;
  69 }
  70
  71 #define FNONE  0
  72 #define FSHARP 1
  73 #define FMINUS 2
  74 #define FPLUS  4
  75 #define FZERO  8
  76 #define FSPACE 16
  77 #define FWIDTH 32
  78 #define FPREC  64
  79 #define FPREC0 128
  80
  81 #define CHECK(l) do {\
  82     while (blen + (l) >= bsiz) {\
  83         bsiz*=2;\
  84     }\
  85     rb_str_resize(result, bsiz);\
  86     buf = RSTRING_PTR(result);\
  87 } while (0)
  88
  89 #define PUSH(s, l) do { \
  90     CHECK(l);\
  91     memcpy(&buf[blen], s, l);\
  92     blen += (l);\
  93 } while (0)
  94
  95 #define FILL(c, l) do { \
  96     CHECK(l);\
  97     memset(&buf[blen], c, l);\
  98     blen += (l);\
  99 } while (0)
 100
 101 #define GETARG() (nextvalue != Qundef ? nextvalue : \
 102     posarg < 0 ? \
 103     (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \
 104     (posarg = nextarg++, GETNTHARG(posarg)))
 105
 106 #define GETPOSARG(n) (posarg > 0 ? \
 107     (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", n, posarg), 0) : \
 108     ((n < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", n), 0) : \
 109                (posarg = -1, GETNTHARG(n))))
 110
 111 #define GETNTHARG(nth) \
 112     ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
 113
 114 #define GETNUM(n, val) \
 115     for (; p < end && rb_enc_isdigit(*p, enc); p++) {   \
 116         int next_n = 10 * n + (*p - '0'); \
 117         if (next_n / 10 != n) {\
 118             rb_raise(rb_eArgError, #val " too big"); \
 119         } \
 120         n = next_n; \
 121     } \
 122     if (p >= end) { \
 123         rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \
 124     }
 125
 126 #define GETASTER(val) do { \
 127     t = p++; \
 128     n = 0; \
 129     GETNUM(n, val); \
 130     if (*p == '$') { \
 131         tmp = GETPOSARG(n); \
 132     } \
 133     else { \
 134         tmp = GETARG(); \
 135         p = t; \
 136     } \
 137     val = NUM2INT(tmp); \
 138 } while (0)
 139
 140
 141 /*
 142  *  call-seq:
 143  *     format(format_string [, arguments...] )   => string
 144  *     sprintf(format_string [, arguments...] )  => string
 145  *
 146  *  Returns the string resulting from applying <i>format_string</i> to
 147  *  any additional arguments.  Within the format string, any characters
 148  *  other than format sequences are copied to the result.
 149  *
 150  *  The syntax of a format sequence is follows.
 151  *
 152  *    %[flags][width][.precision]type
 153  *
 154  *  A format
 155  *  sequence consists of a percent sign, followed by optional flags,
 156  *  width, and precision indicators, then terminated with a field type
 157  *  character.  The field type controls how the corresponding
 158  *  <code>sprintf</code> argument is to be interpreted, while the flags
 159  *  modify that interpretation.
 160  *
 161  *  The field type characters are:
 162  *
 163  *      Field |  Integer Format
 164  *      ------+--------------------------------------------------------------
 165  *        b   | Convert argument as a binary number.
 166  *            | Negative numbers will be displayed as a two's complement
 167  *            | prefixed with `..1'.
 168  *        B   | Equivalent to `b', but uses an uppercase 0B for prefix
 169  *            | in the alternative format by #.
 170  *        d   | Convert argument as a decimal number.
 171  *        i   | Identical to `d'.
 172  *        o   | Convert argument as an octal number.
 173  *            | Negative numbers will be displayed as a two's complement
 174  *            | prefixed with `..7'.
 175  *        u   | Identical to `d'.
 176  *        x   | Convert argument as a hexadecimal number.
 177  *            | Negative numbers will be displayed as a two's complement
 178  *            | prefixed with `..f' (representing an infinite string of
 179  *            | leading 'ff's).
 180  *        X   | Equivalent to `x', but uses uppercase letters.
 181  *
 182  *      Field |  Float Format
 183  *      ------+--------------------------------------------------------------
 184  *        e   | Convert floating point argument into exponential notation
 185  *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
 186  *            | The precision specifies the number of digits after the decimal
 187  *            | point (defaulting to six).
 188  *        E   | Equivalent to `e', but uses an uppercase E to indicate
 189  *            | the exponent.
 190  *        f   | Convert floating point argument as [-]ddd.dddddd,
 191  *            | where the precision specifies the number of digits after
 192  *            | the decimal point.
 193  *        g   | Convert a floating point number using exponential form
 194  *            | if the exponent is less than -4 or greater than or
 195  *            | equal to the precision, or in dd.dddd form otherwise.
 196  *            | The precision specifies the number of significant digits.
 197  *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
 198  *
 199  *      Field |  Other Format
 200  *      ------+--------------------------------------------------------------
 201  *        c   | Argument is the numeric code for a single character or
 202  *            | a single character string itself.
 203  *        p   | The valuing of argument.inspect.
 204  *        s   | Argument is a string to be substituted.  If the format
 205  *            | sequence contains a precision, at most that many characters
 206  *            | will be copied.
 207  *
 208  *  The flags modifies the behavior of the formats.
 209  *  The flag characters are:
 210  *
 211  *    Flag     | Applies to    | Meaning
 212  *    ---------+---------------+-----------------------------------------
 213  *    space    | bBdiouxX      | Leave a space at the start of
 214  *             | eEfgG         | non-negative numbers.
 215  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
 216  *             |               | a minus sign with absolute value for
 217  *             |               | negative values.
 218  *    ---------+---------------+-----------------------------------------
 219  *    (digit)$ | all           | Specifies the absolute argument number
 220  *             |               | for this field.  Absolute and relative
 221  *             |               | argument numbers cannot be mixed in a
 222  *             |               | sprintf string.
 223  *    ---------+---------------+-----------------------------------------
 224  *     #       | bBoxX         | Use an alternative format.
 225  *             | eEfgG         | For the conversions `o', increase the precision
 226  *             |               | until the first digit will be `0' if
 227  *             |               | it is not formatted as complements.
 228  *             |               | For the conversions `x', `X', `b' and `B'
 229  *             |               | on non-zero, prefix the result with ``0x'',
 230  *             |               | ``0X'', ``0b'' and ``0B'', respectively.
 231  *             |               | For `e', `E', `f', `g', and 'G',
 232  *             |               | force a decimal point to be added,
 233  *             |               | even if no digits follow.
 234  *             |               | For `g' and 'G', do not remove trailing zeros.
 235  *    ---------+---------------+-----------------------------------------
 236  *    +        | bBdiouxX      | Add a leading plus sign to non-negative
 237  *             | eEfgG         | numbers.
 238  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
 239  *             |               | a minus sign with absolute value for
 240  *             |               | negative values.
 241  *    ---------+---------------+-----------------------------------------
 242  *    -        | all           | Left-justify the result of this conversion.
 243  *    ---------+---------------+-----------------------------------------
 244  *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
 245  *             | eEfgG         | For `o', `x', `X', `b' and `B', radix-1
 246  *             | (numeric fmt) | is used for negative numbers formatted as
 247  *             |               | complements.
 248  *    ---------+---------------+-----------------------------------------
 249  *    *        | all           | Use the next argument as the field width.
 250  *             |               | If negative, left-justify the result. If the
 251  *             |               | asterisk is followed by a number and a dollar
 252  *             |               | sign, use the indicated argument as the width.
 253  *
 254  *  Examples of flags:
 255  *
 256  *   # `+' and space flag specifies the sign of non-negative numbers.
 257  *   sprintf("%d", 123)  #=> "123"
 258  *   sprintf("%+d", 123) #=> "+123"
 259  *   sprintf("% d", 123) #=> " 123"
 260  *
 261  *   # `#' flag for `o' increases number of digits to show `0'.
 262  *   # `+' and space flag changes format of negative numbers.
 263  *   sprintf("%o", 123)   #=> "173"
 264  *   sprintf("%#o", 123)  #=> "0173"
 265  *   sprintf("%+o", -123) #=> "-173"
 266  *   sprintf("%o", -123)  #=> "..7605"
 267  *   sprintf("%#o", -123) #=> "..7605"
 268  *
 269  *   # `#' flag for `x' add a prefix `0x' for non-zero numbers.
 270  *   # `+' and space flag disables complements for negative numbers.
 271  *   sprintf("%x", 123)   #=> "7b"
 272  *   sprintf("%#x", 123)  #=> "0x7b"
 273  *   sprintf("%+x", -123) #=> "-7b"
 274  *   sprintf("%x", -123)  #=> "..f85"
 275  *   sprintf("%#x", -123) #=> "0x..f85"
 276  *   sprintf("%#x", 0)    #=> "0"
 277  *
 278  *   # `#' for `X' uses the prefix `0X'.
 279  *   sprintf("%X", 123)  #=> "7B"
 280  *   sprintf("%#X", 123) #=> "0X7B"
 281  *
 282  *   # `#' flag for `b' add a prefix `0b' for non-zero numbers.
 283  *   # `+' and space flag disables complements for negative numbers.
 284  *   sprintf("%b", 123)   #=> "1111011"
 285  *   sprintf("%#b", 123)  #=> "0b1111011"
 286  *   sprintf("%+b", -123) #=> "-1111011"
 287  *   sprintf("%b", -123)  #=> "..10000101"
 288  *   sprintf("%#b", -123) #=> "0b..10000101"
 289  *   sprintf("%#b", 0)    #=> "0"
 290  *
 291  *   # `#' for `B' uses the prefix `0B'.
 292  *   sprintf("%B", 123)  #=> "1111011"
 293  *   sprintf("%#B", 123) #=> "0B1111011"
 294  *
 295  *   # `#' for `e' forces to show the decimal point.
 296  *   sprintf("%.0e", 1)  #=> "1e+00"
 297  *   sprintf("%#.0e", 1) #=> "1.e+00"
 298  *
 299  *   # `#' for `f' forces to show the decimal point.
 300  *   sprintf("%.0f", 1234)  #=> "1234"
 301  *   sprintf("%#.0f", 1234) #=> "1234."
 302  *
 303  *   # `#' for `g' forces to show the decimal point.
 304  *   # It also disables stripping lowest zeros.
 305  *   sprintf("%g", 123.4)   #=> "123.4"
 306  *   sprintf("%#g", 123.4)  #=> "123.400"
 307  *   sprintf("%g", 123456)  #=> "123456"
 308  *   sprintf("%#g", 123456) #=> "123456."
 309  *
 310  *  The field width is an optional integer, followed optionally by a
 311  *  period and a precision.  The width specifies the minimum number of
 312  *  characters that will be written to the result for this field.
 313  *
 314  *  Examples of width:
 315  *
 316  *   # padding is done by spaces,       width=20
 317  *   # 0 or radix-1.             <------------------>
 318  *   sprintf("%20d", 123)   #=> "                 123"
 319  *   sprintf("%+20d", 123)  #=> "                +123"
 320  *   sprintf("%020d", 123)  #=> "00000000000000000123"
 321  *   sprintf("%+020d", 123) #=> "+0000000000000000123"
 322  *   sprintf("% 020d", 123) #=> " 0000000000000000123"
 323  *   sprintf("%-20d", 123)  #=> "123                 "
 324  *   sprintf("%-+20d", 123) #=> "+123                "
 325  *   sprintf("%- 20d", 123) #=> " 123                "
 326  *   sprintf("%020x", -123) #=> "..ffffffffffffffff85"
 327  *
 328  *  For
 329  *  numeric fields, the precision controls the number of decimal places
 330  *  displayed.  For string fields, the precision determines the maximum
 331  *  number of characters to be copied from the string.  (Thus, the format
 332  *  sequence <code>%10.10s</code> will always contribute exactly ten
 333  *  characters to the result.)
 334  *
 335  *  Examples of precisions:
 336  *
 337  *   # precision for `d', 'o', 'x' and 'b' is
 338  *   # minimum number of digits               <------>
 339  *   sprintf("%20.8d", 123)  #=> "            00000123"
 340  *   sprintf("%20.8o", 123)  #=> "            00000173"
 341  *   sprintf("%20.8x", 123)  #=> "            0000007b"
 342  *   sprintf("%20.8b", 123)  #=> "            01111011"
 343  *   sprintf("%20.8d", -123) #=> "           -00000123"
 344  *   sprintf("%20.8o", -123) #=> "            ..777605"
 345  *   sprintf("%20.8x", -123) #=> "            ..ffff85"
 346  *   sprintf("%20.8b", -11)  #=> "            ..110101"
 347  *
 348  *   # "0x" and "0b" for `#x' and `#b' is not counted for
 349  *   # precision but "0" for `#o' is counted.  <------>
 350  *   sprintf("%#20.8d", 123)  #=> "            00000123"
 351  *   sprintf("%#20.8o", 123)  #=> "            00000173"
 352  *   sprintf("%#20.8x", 123)  #=> "          0x0000007b"
 353  *   sprintf("%#20.8b", 123)  #=> "          0b01111011"
 354  *   sprintf("%#20.8d", -123) #=> "           -00000123"
 355  *   sprintf("%#20.8o", -123) #=> "            ..777605"
 356  *   sprintf("%#20.8x", -123) #=> "          0x..ffff85"
 357  *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
 358  *
 359  *   # precision for `e' is number of
 360  *   # digits after the decimal point           <------>
 361  *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
 362  *
 363  *   # precision for `f' is number of
 364  *   # digits after the decimal point               <------>
 365  *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
 366  *
 367  *   # precision for `g' is number of
 368  *   # significant digits                          <------->
 369  *   sprintf("%20.8g", 1234.56789) #=> "           1234.5679"
 370  *
 371  *   #                                         <------->
 372  *   sprintf("%20.8g", 123456789)  #=> "       1.2345679e+08"
 373  *
 374  *   # precision for `s' is
 375  *   # maximum number of characters                    <------>
 376  *   sprintf("%20.8s", "string test") #=> "            string t"
 377  *
 378  *  Examples:
 379  *
 380  *     sprintf("%d %04x", 123, 123)               #=> "123 007b"
 381  *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
 382  *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
 383  *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
 384  *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
 385  *     sprintf("%u", -123)                        #=> "-123"
 386  */
 387
 388 VALUE
 389 rb_f_sprintf(int argc, const VALUE *argv)
 390 {
 391     return rb_str_format(argc - 1, argv + 1, GETNTHARG(0));
 392 }
 393
 394 VALUE
 395 rb_str_format(int argc, const VALUE *argv, VALUE fmt)
 396 {
 397     rb_encoding *enc;
 398     const char *p, *end;
 399     char *buf;
 400     int blen, bsiz;
 401     VALUE result;
 402
 403     int width, prec, flags = FNONE;
 404     int nextarg = 1;
 405     int posarg = 0;
 406     int tainted = 0;
 407     VALUE nextvalue;
 408     VALUE tmp;
 409     VALUE str;
 410
 411 #define CHECK_FOR_WIDTH(f)                               \
 412     if ((f) & FWIDTH) {                                  \
 413         rb_raise(rb_eArgError, "width given twice");     \
 414     }                                                    \
 415     if ((f) & FPREC0) {                                  \
 416         rb_raise(rb_eArgError, "width after precision"); \
 417     }
 418 #define CHECK_FOR_FLAGS(f)                               \
 419     if ((f) & FWIDTH) {                                  \
 420         rb_raise(rb_eArgError, "flag after width");      \
 421     }                                                    \
 422     if ((f) & FPREC0) {                                  \
 423         rb_raise(rb_eArgError, "flag after precision"); \
 424     }
 425
 426     ++argc;
 427     --argv;
 428     if (OBJ_TAINTED(fmt)) tainted = 1;
 429     StringValue(fmt);
 430     enc = rb_enc_get(fmt);
 431     fmt = rb_str_new4(fmt);
 432     p = RSTRING_PTR(fmt);
 433     end = p + RSTRING_LEN(fmt);
 434     blen = 0;
 435     bsiz = 120;
 436     result = rb_str_buf_new(bsiz);
 437     rb_enc_copy(result, fmt);
 438     buf = RSTRING_PTR(result);
 439     memset(buf, 0, bsiz);
 440
 441     for (; p < end; p++) {
 442         const char *t;
 443         int n;
 444
 445         for (t = p; t < end && *t != '%'; t++) ;
 446         PUSH(p, t - p);
 447         if (t >= end) {
 448             /* end of fmt string */
 449             goto sprint_exit;
 450         }
 451         p = t + 1;              /* skip `%' */
 452
 453         width = prec = -1;
 454         nextvalue = Qundef;
 455       retry:
 456         switch (*p) {
 457           default:
 458             if (rb_enc_isprint(*p, enc))
 459                 rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
 460             else
 461                 rb_raise(rb_eArgError, "malformed format string");
 462             break;
 463
 464           case ' ':
 465             CHECK_FOR_FLAGS(flags);
 466             flags |= FSPACE;
 467             p++;
 468             goto retry;
 469
 470           case '#':
 471             CHECK_FOR_FLAGS(flags);
 472             flags |= FSHARP;
 473             p++;
 474             goto retry;
 475
 476           case '+':
 477             CHECK_FOR_FLAGS(flags);
 478             flags |= FPLUS;
 479             p++;
 480             goto retry;
 481
 482           case '-':
 483             CHECK_FOR_FLAGS(flags);
 484             flags |= FMINUS;
 485             p++;
 486             goto retry;
 487
 488           case '0':
 489             CHECK_FOR_FLAGS(flags);
 490             flags |= FZERO;
 491             p++;
 492             goto retry;
 493
 494           case '1': case '2': case '3': case '4':
 495           case '5': case '6': case '7': case '8': case '9':
 496             n = 0;
 497             GETNUM(n, width);
 498             if (*p == '$') {
 499                 if (nextvalue != Qundef) {
 500                     rb_raise(rb_eArgError, "value given twice - %d$", n);
 501                 }
 502                 nextvalue = GETPOSARG(n);
 503                 p++;
 504                 goto retry;
 505             }
 506             CHECK_FOR_WIDTH(flags);
 507             width = n;
 508             flags |= FWIDTH;
 509             goto retry;
 510
 511           case '*':
 512             CHECK_FOR_WIDTH(flags);
 513             flags |= FWIDTH;
 514             GETASTER(width);
 515             if (width < 0) {
 516                 flags |= FMINUS;
 517                 width = -width;
 518             }
 519             p++;
 520             goto retry;
 521
 522           case '.':
 523             if (flags & FPREC0) {
 524                 rb_raise(rb_eArgError, "precision given twice");
 525             }
 526             flags |= FPREC|FPREC0;
 527
 528             prec = 0;
 529             p++;
 530             if (*p == '*') {
 531                 GETASTER(prec);
 532                 if (prec < 0) { /* ignore negative precision */
 533                     flags &= ~FPREC;
 534                 }
 535                 p++;
 536                 goto retry;
 537             }
 538
 539             GETNUM(prec, precision);
 540             goto retry;
 541
 542           case '\n':
 543           case '\0':
 544             p--;
 545           case '%':
 546             if (flags != FNONE) {
 547                 rb_raise(rb_eArgError, "invalid format character - %%");
 548             }
 549             PUSH("%", 1);
 550             break;
 551
 552           case 'c':
 553             {
 554                 VALUE val = GETARG();
 555                 VALUE tmp;
 556                 int c, n;
 557
 558                 tmp = rb_check_string_type(val);
 559                 if (!NIL_P(tmp)) {
 560                     if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
 561                         rb_raise(rb_eArgError, "%%c requires a character");
 562                     }
 563                     c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc);
 564                 }
 565                 else {
 566                     c = NUM2INT(val);
 567                 }
 568                 n = rb_enc_codelen(c, enc);
 569                 if (n == 0) {
 570                     rb_raise(rb_eArgError, "invalid character");
 571                 }
 572                 if (!(flags & FWIDTH)) {
 573                     CHECK(n);
 574                     rb_enc_mbcput(c, &buf[blen], enc);
 575                     blen += n;
 576                 }
 577                 else if ((flags & FMINUS)) {
 578                     CHECK(n);
 579                     rb_enc_mbcput(c, &buf[blen], enc);
 580                     blen += n;
 581                     FILL(' ', width-1);
 582                 }
 583                 else {
 584                     FILL(' ', width-1);
 585                     CHECK(n);
 586                     rb_enc_mbcput(c, &buf[blen], enc);
 587                     blen += n;
 588                 }
 589             }
 590             break;
 591
 592           case 's':
 593           case 'p':
 594             {
 595                 VALUE arg = GETARG();
 596                 long len, slen;
 597
 598                 if (*p == 'p') arg = rb_inspect(arg);
 599                 str = rb_obj_as_string(arg);
 600                 if (OBJ_TAINTED(str)) tainted = 1;
 601                 len = RSTRING_LEN(str);
 602                 enc = rb_enc_check(result, str);
 603                 if (flags&(FPREC|FWIDTH)) {
 604                     slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
 605                     if (slen < 0) {
 606                         rb_raise(rb_eArgError, "invalid mbstring sequence");
 607                     }
 608                     if ((flags&FPREC) && (prec < slen)) {
 609                         char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
 610                                              prec, enc);
 611                         slen = prec;
 612                         len = p - RSTRING_PTR(str);
 613                     }
 614                     /* need to adjust multi-byte string pos */
 615                     if ((flags&FWIDTH) && (width > slen)) {
 616                         width -= slen;
 617                         if (!(flags&FMINUS)) {
 618                             CHECK(width);
 619                             while (width--) {
 620                                 buf[blen++] = ' ';
 621                             }
 622                         }
 623                         CHECK(len);
 624                         memcpy(&buf[blen], RSTRING_PTR(str), len);
 625                         blen += len;
 626                         if (flags&FMINUS) {
 627                             CHECK(width);
 628                             while (width--) {
 629                                 buf[blen++] = ' ';
 630                             }
 631                         }
 632                         rb_enc_associate(result, enc);
 633                         break;
 634                     }
 635                 }
 636                 PUSH(RSTRING_PTR(str), len);
 637                 rb_enc_associate(result, enc);
 638             }
 639             break;
 640
 641           case 'd':
 642           case 'i':
 643           case 'o':
 644           case 'x':
 645           case 'X':
 646           case 'b':
 647           case 'B':
 648           case 'u':
 649             {
 650                 volatile VALUE tmp1;
 651                 volatile VALUE val = GETARG();
 652                 char fbuf[32], nbuf[64], *s;
 653                 const char *prefix = 0;
 654                 int sign = 0, dots = 0;
 655                 char sc = 0;
 656                 long v = 0;
 657                 int base, bignum = 0;
 658                 int len, pos;
 659
 660                 switch (*p) {
 661                   case 'd':
 662                   case 'i':
 663                   case 'u':
 664                     sign = 1; break;
 665                   case 'o':
 666                   case 'x':
 667                   case 'X':
 668                   case 'b':
 669                   case 'B':
 670                     if (flags&(FPLUS|FSPACE)) sign = 1;
 671                     break;
 672                 }
 673                 if (flags & FSHARP) {
 674                     switch (*p) {
 675                       case 'o':
 676                         prefix = "0"; break;
 677                       case 'x':
 678                         prefix = "0x"; break;
 679                       case 'X':
 680                         prefix = "0X"; break;
 681                       case 'b':
 682                         prefix = "0b"; break;
 683                       case 'B':
 684                         prefix = "0B"; break;
 685                     }
 686                 }
 687
 688               bin_retry:
 689                 switch (TYPE(val)) {
 690                   case T_FLOAT:
 691                     if (FIXABLE(RFLOAT_VALUE(val))) {
 692                         val = LONG2FIX((long)RFLOAT_VALUE(val));
 693                         goto bin_retry;
 694                     }
 695                     val = rb_dbl2big(RFLOAT_VALUE(val));
 696                     if (FIXNUM_P(val)) goto bin_retry;
 697                     bignum = 1;
 698                     break;
 699                   case T_STRING:
 700                     val = rb_str_to_inum(val, 0, Qtrue);
 701                     goto bin_retry;
 702                   case T_BIGNUM:
 703                     bignum = 1;
 704                     break;
 705                   case T_FIXNUM:
 706                     v = FIX2LONG(val);
 707                     break;
 708                   default:
 709                     val = rb_Integer(val);
 710                     goto bin_retry;
 711                 }
 712
 713                 switch (*p) {
 714                   case 'o':
 715                     base = 8; break;
 716                   case 'x':
 717                   case 'X':
 718                     base = 16; break;
 719                   case 'b':
 720                   case 'B':
 721                     base = 2; break;
 722                   case 'u':
 723                   case 'd':
 724                   case 'i':
 725                   default:
 726                     base = 10; break;
 727                 }
 728
 729                 if (!bignum) {
 730                     if (base == 2) {
 731                         val = rb_int2big(v);
 732                         goto bin_retry;
 733                     }
 734                     if (sign) {
 735                         char c = *p;
 736                         if (c == 'i') c = 'd'; /* %d and %i are identical */
 737                         if (v < 0) {
 738                             v = -v;
 739                             sc = '-';
 740                             width--;
 741                         }
 742                         else if (flags & FPLUS) {
 743                             sc = '+';
 744                             width--;
 745                         }
 746                         else if (flags & FSPACE) {
 747                             sc = ' ';
 748                             width--;
 749                         }
 750                         sprintf(fbuf, "%%l%c", c);
 751                         sprintf(nbuf, fbuf, v);
 752                         s = nbuf;
 753                     }
 754                     else {
 755                         s = nbuf;
 756                         if (v < 0) {
 757                             dots = 1;
 758                         }
 759                         sprintf(fbuf, "%%l%c", *p == 'X' ? 'x' : *p);
 760                         sprintf(++s, fbuf, v);
 761                         if (v < 0) {
 762                             char d = 0;
 763
 764                             s = remove_sign_bits(s, base);
 765                             switch (base) {
 766                               case 16:
 767                                 d = 'f'; break;
 768                               case 8:
 769                                 d = '7'; break;
 770                             }
 771                             if (d && *s != d) {
 772                                 *--s = d;
 773                             }
 774                         }
 775                     }
 776                 }
 777                 else {
 778                     if (sign) {
 779                         tmp = rb_big2str(val, base);
 780                         s = RSTRING_PTR(tmp);
 781                         if (s[0] == '-') {
 782                             s++;
 783                             sc = '-';
 784                             width--;
 785                         }
 786                         else if (flags & FPLUS) {
 787                             sc = '+';
 788                             width--;
 789                         }
 790                         else if (flags & FSPACE) {
 791                             sc = ' ';
 792                             width--;
 793                         }
 794                     }
 795                     else {
 796                         if (!RBIGNUM_SIGN(val)) {
 797                             val = rb_big_clone(val);
 798                             rb_big_2comp(val);
 799                         }
 800                         tmp1 = tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
 801                         s = RSTRING_PTR(tmp);
 802                         if (*s == '-') {
 803                             dots = 1;
 804                             if (base == 10) {
 805                                 rb_warning("negative number for %%u specifier");
 806                             }
 807                             s = remove_sign_bits(++s, base);
 808                             switch (base) {
 809                               case 16:
 810                                 if (s[0] != 'f') *--s = 'f'; break;
 811                               case 8:
 812                                 if (s[0] != '7') *--s = '7'; break;
 813                               case 2:
 814                                 if (s[0] != '1') *--s = '1'; break;
 815                             }
 816                         }
 817                     }
 818                 }
 819
 820                 pos = -1;
 821                 len = strlen(s);
 822                 if (dots) {
 823                     prec -= 2;
 824                     width -= 2;
 825                 }
 826
 827                 if (*p == 'X') {
 828                     char *pp = s;
 829                     int c;
 830                     while ((c = (int)(unsigned char)*pp) != 0) {
 831                         *pp = rb_enc_toupper(c, enc);
 832                         pp++;
 833                     }
 834                 }
 835                 if (prefix && !prefix[1]) { /* octal */
 836                     if (dots) {
 837                         prefix = 0;
 838                     }
 839                     else if (len == 1 && *s == '0') {
 840                         len = 0;
 841                         if (flags & FPREC) prec--;
 842                     }
 843                     else if ((flags & FPREC) && (prec > len)) {
 844                         prefix = 0;
 845                     }
 846                 }
 847                 else if (len == 1 && *s == '0') {
 848                     prefix = 0;
 849                 }
 850                 if (prefix) {
 851                     width -= strlen(prefix);
 852                 }
 853                 if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
 854                     prec = width;
 855                     width = 0;
 856                 }
 857                 else {
 858                     if (prec < len) {
 859                         if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
 860                         prec = len;
 861                     }
 862                     width -= prec;
 863                 }
 864                 if (!(flags&FMINUS)) {
 865                     CHECK(width);
 866                     while (width-- > 0) {
 867                         buf[blen++] = ' ';
 868                     }
 869                 }
 870                 if (sc) PUSH(&sc, 1);
 871                 if (prefix) {
 872                     int plen = strlen(prefix);
 873                     PUSH(prefix, plen);
 874                 }
 875                 CHECK(prec - len);
 876                 if (dots) PUSH("..", 2);
 877                 if (!bignum && v < 0) {
 878                     char c = sign_bits(base, p);
 879                     while (len < prec--) {
 880                         buf[blen++] = c;
 881                     }
 882                 }
 883                 else if ((flags & (FMINUS|FPREC)) != FMINUS) {
 884                     char c;
 885
 886                     if (!sign && bignum && !RBIGNUM_SIGN(val))
 887                         c = sign_bits(base, p);
 888                     else
 889                         c = '0';
 890                     while (len < prec--) {
 891                         buf[blen++] = c;
 892                     }
 893                 }
 894                 PUSH(s, len);
 895                 CHECK(width);
 896                 while (width-- > 0) {
 897                     buf[blen++] = ' ';
 898                 }
 899             }
 900             break;
 901
 902           case 'f':
 903           case 'g':
 904           case 'G':
 905           case 'e':
 906           case 'E':
 907             {
 908                 VALUE val = GETARG();
 909                 double fval;
 910                 int i, need = 6;
 911                 char fbuf[32];
 912
 913                 fval = RFLOAT_VALUE(rb_Float(val));
 914                 if (isnan(fval) || isinf(fval)) {
 915                     const char *expr;
 916
 917                     if (isnan(fval)) {
 918                         expr = "NaN";
 919                     }
 920                     else {
 921                         expr = "Inf";
 922                     }
 923                     need = strlen(expr);
 924                     if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
 925                         need++;
 926                     if ((flags & FWIDTH) && need < width)
 927                         need = width;
 928
 929                     CHECK(need);
 930                     sprintf(&buf[blen], "%*s", need, "");
 931                     if (flags & FMINUS) {
 932                         if (!isnan(fval) && fval < 0.0)
 933                             buf[blen++] = '-';
 934                         else if (flags & FPLUS)
 935                             buf[blen++] = '+';
 936                         else if (flags & FSPACE)
 937                             blen++;
 938                         strncpy(&buf[blen], expr, strlen(expr));
 939                     }
 940                     else {
 941                         if (!isnan(fval) && fval < 0.0)
 942                             buf[blen + need - strlen(expr) - 1] = '-';
 943                         else if (flags & FPLUS)
 944                             buf[blen + need - strlen(expr) - 1] = '+';
 945                         else if ((flags & FSPACE) && need > width)
 946                             blen++;
 947                         strncpy(&buf[blen + need - strlen(expr)], expr,
 948                                 strlen(expr));
 949                     }
 950                     blen += strlen(&buf[blen]);
 951                     break;
 952                 }
 953
 954                 fmt_setup(fbuf, *p, flags, width, prec);
 955                 need = 0;
 956                 if (*p != 'e' && *p != 'E') {
 957                     i = INT_MIN;
 958                     frexp(fval, &i);
 959                     if (i > 0)
 960                         need = BIT_DIGITS(i);
 961                 }
 962                 need += (flags&FPREC) ? prec : 6;
 963                 if ((flags&FWIDTH) && need < width)
 964                     need = width;
 965                 need += 20;
 966
 967                 CHECK(need);
 968                 sprintf(&buf[blen], fbuf, fval);
 969                 blen += strlen(&buf[blen]);
 970             }
 971             break;
 972         }
 973         flags = FNONE;
 974     }
 975
 976   sprint_exit:
 977     /* XXX - We cannot validate the number of arguments if (digit)$ style used.
 978      */
 979     if (posarg >= 0 && nextarg < argc) {
 980         const char *mesg = "too many arguments for format string";
 981         if (RTEST(ruby_debug)) rb_raise(rb_eArgError, mesg);
 982         if (RTEST(ruby_verbose)) rb_warn(mesg);
 983     }
 984     rb_str_resize(result, blen);
 985
 986     if (tainted) OBJ_TAINT(result);
 987     return result;
 988 }
 989
 990 static void
 991 fmt_setup(char *buf, int c, int flags, int width, int prec)
 992 {
 993     *buf++ = '%';
 994     if (flags & FSHARP) *buf++ = '#';
 995     if (flags & FPLUS)  *buf++ = '+';
 996     if (flags & FMINUS) *buf++ = '-';
 997     if (flags & FZERO)  *buf++ = '0';
 998     if (flags & FSPACE) *buf++ = ' ';
 999
1000     if (flags & FWIDTH) {
1001         sprintf(buf, "%d", width);
1002         buf += strlen(buf);
1003     }
1004
1005     if (flags & FPREC) {
1006         sprintf(buf, ".%d", prec);
1007         buf += strlen(buf);
1008     }
1009
1010     *buf++ = c;
1011     *buf = '\0';
1012 }
1013
1014 #undef FILE
1015 #define FILE rb_printf_buffer
1016 #define __sbuf rb_printf_sbuf
1017 #define __sFILE rb_printf_sfile
1018 #undef feof
1019 #undef ferror
1020 #undef clearerr
1021 #undef fileno
1022 #if SIZEOF_LONG < SIZEOF_VOIDP
1023 # if  SIZEOF_LONG_LONG == SIZEOF_VOIDP
1024 #  define _HAVE_SANE_QUAD_
1025 #  define _HAVE_LLP64_
1026 #  define quad_t LONG_LONG
1027 #  define u_quad_t unsigned LONG_LONG
1028 # endif
1029 #endif
1030 #undef vsnprintf
1031 #undef snprintf
1032 #include "missing/vsnprintf.c"
1033
1034 static int
1035 ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
1036 {
1037     struct __siov *iov;
1038     VALUE result = (VALUE)fp->_bf._base;
1039     char *buf = (char*)fp->_p;
1040     size_t len, n;
1041     int blen = buf - RSTRING_PTR(result), bsiz = fp->_w;
1042
1043     if (RBASIC(result)->klass) {
1044         rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
1045     }
1046     if ((len = uio->uio_resid) == 0)
1047         return 0;
1048     CHECK(len);
1049     buf += blen;
1050     fp->_w = bsiz;
1051     for (iov = uio->uio_iov; len > 0; ++iov) {
1052         MEMCPY(buf, iov->iov_base, char, n = iov->iov_len);
1053         buf += n;
1054         len -= n;
1055     }
1056     fp->_p = (unsigned char *)buf;
1057     return 0;
1058 }
1059
1060 VALUE
1061 rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap)
1062 {
1063     rb_printf_buffer f;
1064     VALUE result;
1065
1066     f._flags = __SWR | __SSTR;
1067     f._bf._size = 0;
1068     f._w = 120;
1069     result = rb_str_buf_new(f._w);
1070     if (enc) rb_enc_associate(result, enc);
1071     f._bf._base = (unsigned char *)result;
1072     f._p = (unsigned char *)RSTRING_PTR(result);
1073     RBASIC(result)->klass = 0;
1074     f.vwrite = ruby__sfvwrite;
1075     BSD_vfprintf(&f, fmt, ap);
1076     RBASIC(result)->klass = rb_cString;
1077     rb_str_resize(result, (char *)f._p - RSTRING_PTR(result));
1078
1079     return result;
1080 }
1081
1082 VALUE
1083 rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
1084 {
1085     VALUE result;
1086     va_list ap;
1087
1088     va_start(ap, format);
1089     result = rb_enc_vsprintf(enc, format, ap);
1090     va_end(ap);
1091
1092     return result;
1093 }
1094
1095 VALUE
1096 rb_vsprintf(const char *fmt, va_list ap)
1097 {
1098     return rb_enc_vsprintf(NULL, fmt, ap);
1099 }
1100
1101 VALUE
1102 rb_sprintf(const char *format, ...)
1103 {
1104     VALUE result;
1105     va_list ap;
1106
1107     va_start(ap, format);
1108     result = rb_vsprintf(format, ap);
1109     va_end(ap);
1110
1111     return result;
1112 }