1 /* printf - format and print data
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19 #include <sys/types.h>
24 #include "cl-strtod.h"
25 #include "octhexdigits.h"
27 #include "unicodeio.h"
30 /* The official name of this program (e.g., no 'g' prefix). */
31 #define PROGRAM_NAME "printf"
33 #define AUTHORS proper_name ("David MacKenzie")
35 /* The value to return to the calling program. */
36 static int exit_status
;
38 /* True if the POSIXLY_CORRECT environment variable is set. */
39 static bool posixly_correct
;
41 /* This message appears in N_() here rather than just in _() below because
42 the sole use would have been in a #define. */
43 static char const *const cfcc_msg
=
44 N_("warning: %s: character(s) following character constant have been ignored");
49 if (status
!= EXIT_SUCCESS
)
54 Usage: %s FORMAT [ARGUMENT]...\n\
57 program_name
, program_name
);
59 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
62 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
63 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
66 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
74 \\c produce no further output\n\
78 \\r carriage return\n\
83 \\NNN byte with octal value NNN (1 to 3 digits)\n\
84 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
85 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
86 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
90 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
91 except that octal escapes should have a leading 0 like \\0NNN\n\
92 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
93 escaping non-printable characters with the POSIX $'' syntax\
95 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
96 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
98 printf (USAGE_BUILTIN_WARNING
, PROGRAM_NAME
);
99 emit_ancillary_info (PROGRAM_NAME
);
105 verify_numeric (char const *s
, char const *end
)
109 error (0, 0, _("%s: expected a numeric value"), quote (s
));
110 exit_status
= EXIT_FAILURE
;
114 error (0, errno
, "%s", quote (s
));
115 exit_status
= EXIT_FAILURE
;
119 error (0, 0, _("%s: value not completely converted"), quote (s
));
120 exit_status
= EXIT_FAILURE
;
124 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
126 FUNC_NAME (char const *s) \
131 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
133 unsigned char ch = *++s; \
136 if (MB_CUR_MAX > 1 && *(s + 1)) \
138 mbstate_t mbstate; mbszero (&mbstate); \
140 size_t slen = strlen (s); \
142 /* Use mbrtowc not mbrtoc32, as per POSIX. */ \
143 bytes = mbrtowc (&wc, s, slen, &mbstate); \
151 /* If POSIXLY_CORRECT is not set, then give a warning that there \
152 are characters following the character constant and that GNU \
153 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
154 set, then don't give the warning. */ \
155 if (*++s != 0 && !posixly_correct) \
156 error (0, 0, _(cfcc_msg), s); \
161 val = (LIB_FUNC_EXPR); \
162 verify_numeric (s, end); \
167 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
168 STRTOX (uintmax_t, vstrtoumax
, strtoumax (s
, &end
, 0))
169 STRTOX (long double, vstrtold
, cl_strtold (s
, &end
))
171 /* Output a single-character \ escape. */
174 print_esc_char (char c
)
178 case 'a': /* Alert. */
181 case 'b': /* Backspace. */
184 case 'c': /* Cancel the rest of the output. */
187 case 'e': /* Escape. */
190 case 'f': /* Form feed. */
193 case 'n': /* New line. */
196 case 'r': /* Carriage return. */
199 case 't': /* Horizontal tab. */
202 case 'v': /* Vertical tab. */
211 /* Print a \ escape sequence starting at ESCSTART.
212 Return the number of characters in the escape sequence
213 besides the backslash.
214 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
215 is an octal digit; otherwise they are of the form \ooo. */
218 print_esc (char const *escstart
, bool octal_0
)
220 char const *p
= escstart
+ 1;
221 int esc_value
= 0; /* Value of \nnn escape. */
222 int esc_length
; /* Length of \nnn escape. */
226 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
227 for (esc_length
= 0, ++p
;
228 esc_length
< 2 && c_isxdigit (*p
);
230 esc_value
= esc_value
* 16 + fromhex (*p
);
232 error (EXIT_FAILURE
, 0, _("missing hexadecimal number in escape"));
237 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
238 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
239 extension to POSIX that is compatible with Bash 2.05b. */
240 for (esc_length
= 0, p
+= octal_0
&& *p
== '0';
241 esc_length
< 3 && isoct (*p
);
243 esc_value
= esc_value
* 8 + fromoct (*p
);
246 else if (*p
&& strchr ("\"\\abcefnrtv", *p
))
247 print_esc_char (*p
++);
248 else if (*p
== 'u' || *p
== 'U')
251 unsigned int uni_value
;
254 for (esc_length
= (esc_char
== 'u' ? 4 : 8), ++p
;
258 if (! c_isxdigit (*p
))
259 error (EXIT_FAILURE
, 0, _("missing hexadecimal number in escape"));
260 uni_value
= uni_value
* 16 + fromhex (*p
);
263 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
264 Note print_unicode_char() would print the literal \u.. in this case. */
265 if (uni_value
>= 0xd800 && uni_value
<= 0xdfff)
266 error (EXIT_FAILURE
, 0, _("invalid universal character name \\%c%0*x"),
267 esc_char
, (esc_char
== 'u' ? 4 : 8), uni_value
);
269 print_unicode_char (stdout
, uni_value
, 0);
280 return p
- escstart
- 1;
283 /* Print string STR, evaluating \ escapes. */
286 print_esc_string (char const *str
)
290 str
+= print_esc (str
, true);
295 /* Evaluate a printf conversion specification. START is the start of
296 the directive, and CONVERSION specifies the type of conversion.
297 FIELD_WIDTH and PRECISION are the field width and precision for '*'
298 values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
299 ARGUMENT is the argument to be formatted. */
302 print_direc (char const *start
, char conversion
,
303 bool have_field_width
, int field_width
,
304 bool have_precision
, int precision
,
305 char const *argument
)
307 char *p
; /* Null-terminated copy of % directive. */
309 /* Create a null-terminated copy of the % directive, with an
310 intmax_t-wide length modifier substituted for any existing
311 integer length modifier. */
314 char const *length_modifier
;
315 size_t length_modifier_len
;
319 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
320 length_modifier
= "j";
321 length_modifier_len
= 1;
324 case 'a': case 'e': case 'f': case 'g':
325 case 'A': case 'E': case 'F': case 'G':
326 length_modifier
= "L";
327 length_modifier_len
= 1;
331 length_modifier
= start
; /* Any valid pointer will do. */
332 length_modifier_len
= 0;
336 size_t length
= strlen (start
);
337 p
= xmalloc (length
+ length_modifier_len
+ 2);
338 q
= mempcpy (p
, start
, length
);
339 q
= mempcpy (q
, length_modifier
, length_modifier_len
);
349 intmax_t arg
= argument
? vstrtoimax (argument
) : 0;
350 if (!have_field_width
)
355 xprintf (p
, precision
, arg
);
360 xprintf (p
, field_width
, arg
);
362 xprintf (p
, field_width
, precision
, arg
);
372 uintmax_t arg
= argument
? vstrtoumax (argument
) : 0;
373 if (!have_field_width
)
378 xprintf (p
, precision
, arg
);
383 xprintf (p
, field_width
, arg
);
385 xprintf (p
, field_width
, precision
, arg
);
399 long double arg
= argument
? vstrtold (argument
) : 0;
400 if (!have_field_width
)
405 xprintf (p
, precision
, arg
);
410 xprintf (p
, field_width
, arg
);
412 xprintf (p
, field_width
, precision
, arg
);
419 char c
= argument
? *argument
: '\0';
420 if (!have_field_width
)
423 xprintf (p
, field_width
, c
);
430 if (!have_field_width
)
433 xprintf (p
, argument
);
435 xprintf (p
, precision
, argument
);
440 xprintf (p
, field_width
, argument
);
442 xprintf (p
, field_width
, precision
, argument
);
450 /* Set curr_arg from indexed %i$ or otherwise next in sequence.
451 POS can be 0,1,2,3 corresponding to
452 [%][width][.precision][conversion] respectively. */
456 char const *f
; /* Pointer into 'format'. */
457 int curr_arg
; /* Current offset. */
458 int curr_s_arg
; /* Current sequential offset. */
459 int end_arg
; /* End arg processed. */
460 int direc_arg
; /* Arg for main directive. */
462 ATTRIBUTE_PURE
static struct arg_cursor
463 get_curr_arg (int pos
, struct arg_cursor ac
)
465 /* Convert sequences like "123$" by hand to avoid problems with strtol,
466 which might treat "$" as part of the number in some locales. */
468 char const *f
= ac
.f
;
469 if (pos
< 3 && c_isdigit (*f
))
473 for (; c_isdigit (*f
); f
++)
475 v
|= ckd_mul (&a
, a
, 10);
476 v
|= ckd_add (&a
, a
, *f
- '0');
479 arg
= v
? INT_MAX
: a
;
484 /* Process indexed %i$ format. */
492 /* Process sequential arg. */
493 arg
= (pos
== 0 ? (ac
.direc_arg
= -1)
494 : pos
< 3 || ac
.direc_arg
< 0 ? ++ac
.curr_s_arg
501 ac
.end_arg
= MAX (ac
.end_arg
, arg
);
506 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
507 arguments to any '%' directives.
508 Return the number of elements of ARGV used. */
511 print_formatted (char const *format
, int argc
, char **argv
)
513 struct arg_cursor ac
;
514 ac
.curr_arg
= ac
.curr_s_arg
= ac
.end_arg
= ac
.direc_arg
= -1;
515 char const *direc_start
; /* Start of % directive. */
516 char *direc
; /* Generated % directive. */
517 char *pdirec
; /* Pointer to current end of directive. */
518 bool have_field_width
; /* True if FIELD_WIDTH is valid. */
519 int field_width
= 0; /* Arg to first '*'. */
520 bool have_precision
; /* True if PRECISION is valid. */
521 int precision
= 0; /* Arg to second '*'. */
522 char ok
[UCHAR_MAX
+ 1]; /* ok['x'] is true if %x is allowed. */
524 direc
= xmalloc (strlen (format
) + 1);
526 for (ac
.f
= format
; *ac
.f
; ac
.f
++)
534 have_field_width
= have_precision
= false;
541 ac
= get_curr_arg (0, ac
);
545 /* FIXME: Field width and precision are not supported
546 for %b, even though POSIX requires it. */
547 ac
= get_curr_arg (3, ac
);
548 if (ac
.curr_arg
< argc
)
549 print_esc_string (argv
[ac
.curr_arg
]);
555 ac
= get_curr_arg (3, ac
);
556 if (ac
.curr_arg
< argc
)
558 fputs (quotearg_style (shell_escape_quoting_style
,
559 argv
[ac
.curr_arg
]), stdout
);
564 memset (ok
, 0, sizeof ok
);
565 ok
['a'] = ok
['A'] = ok
['c'] = ok
['d'] = ok
['e'] = ok
['E'] =
566 ok
['f'] = ok
['F'] = ok
['g'] = ok
['G'] = ok
['i'] = ok
['o'] =
567 ok
['s'] = ok
['u'] = ok
['x'] = ok
['X'] = 1;
573 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
577 ok
['a'] = ok
['A'] = ok
['c'] = ok
['e'] = ok
['E'] =
578 ok
['o'] = ok
['s'] = ok
['x'] = ok
['X'] = 0;
580 case '-': case '+': case ' ':
583 ok
['c'] = ok
['d'] = ok
['i'] = ok
['s'] = ok
['u'] = 0;
586 ok
['c'] = ok
['s'] = 0;
589 goto no_more_flag_characters
;
593 no_more_flag_characters
:
599 ac
= get_curr_arg (1, ac
);
601 if (ac
.curr_arg
< argc
)
603 intmax_t width
= vstrtoimax (argv
[ac
.curr_arg
]);
604 if (INT_MIN
<= width
&& width
<= INT_MAX
)
607 error (EXIT_FAILURE
, 0, _("invalid field width: %s"),
608 quote (argv
[ac
.curr_arg
]));
612 have_field_width
= true;
615 while (ISDIGIT (*ac
.f
))
625 ac
= get_curr_arg (2, ac
);
627 if (ac
.curr_arg
< argc
)
629 intmax_t prec
= vstrtoimax (argv
[ac
.curr_arg
]);
632 /* A negative precision is taken as if the
633 precision were omitted, so -1 is safe
634 here even if prec < INT_MIN. */
637 else if (INT_MAX
< prec
)
638 error (EXIT_FAILURE
, 0, _("invalid precision: %s"),
639 quote (argv
[ac
.curr_arg
]));
645 have_precision
= true;
648 while (ISDIGIT (*ac
.f
))
654 while (*ac
.f
== 'l' || *ac
.f
== 'L' || *ac
.f
== 'h'
655 || *ac
.f
== 'j' || *ac
.f
== 't' || *ac
.f
== 'z')
659 unsigned char conversion
= *ac
.f
;
660 int speclen
= MIN (ac
.f
+ 1 - direc_start
, INT_MAX
);
661 if (! ok
[conversion
])
662 error (EXIT_FAILURE
, 0,
663 _("%.*s: invalid conversion specification"),
664 speclen
, direc_start
);
667 ac
= get_curr_arg (3, ac
);
669 print_direc (direc
, *ac
.f
,
670 have_field_width
, field_width
,
671 have_precision
, precision
,
672 ac
.curr_arg
< argc
? argv
[ac
.curr_arg
] : nullptr);
677 ac
.f
+= print_esc (ac
.f
, false);
686 return MIN (argc
, ac
.end_arg
+ 1);
690 main (int argc
, char **argv
)
695 initialize_main (&argc
, &argv
);
696 set_program_name (argv
[0]);
697 setlocale (LC_ALL
, "");
698 bindtextdomain (PACKAGE
, LOCALEDIR
);
699 textdomain (PACKAGE
);
701 atexit (close_stdout
);
703 exit_status
= EXIT_SUCCESS
;
705 posixly_correct
= (getenv ("POSIXLY_CORRECT") != nullptr);
707 /* We directly parse options, rather than use parse_long_options, in
708 order to avoid accepting abbreviations. */
711 if (STREQ (argv
[1], "--help"))
712 usage (EXIT_SUCCESS
);
714 if (STREQ (argv
[1], "--version"))
716 version_etc (stdout
, PROGRAM_NAME
, PACKAGE_NAME
, Version
, AUTHORS
,
722 /* The above handles --help and --version.
723 Since there is no other invocation of getopt, handle '--' here. */
724 if (1 < argc
&& STREQ (argv
[1], "--"))
732 error (0, 0, _("missing operand"));
733 usage (EXIT_FAILURE
);
742 args_used
= print_formatted (format
, argc
, argv
);
746 while (args_used
> 0 && argc
> 0);
750 _("warning: ignoring excess arguments, starting with %s"),