maint: distinguish EOVERFLOW vs ERANGE better
[coreutils.git] / src / printf.c
blob7ac7a76ceee462e281ea01d9338b6578e2faae69
1 /* printf - format and print data
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Usage: printf format [argument...]
19 A front end to the printf function that lets it be used from the shell.
21 Backslash escapes:
23 \" = double quote
24 \\ = backslash
25 \a = alert (bell)
26 \b = backspace
27 \c = produce no further output
28 \e = escape
29 \f = form feed
30 \n = new line
31 \r = carriage return
32 \t = horizontal tab
33 \v = vertical tab
34 \ooo = octal number (ooo is 1 to 3 digits)
35 \xhh = hexadecimal number (hhh is 1 to 2 digits)
36 \uhhhh = 16-bit Unicode character (hhhh is 4 digits)
37 \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits)
39 Additional directive:
41 %b = print an argument string, interpreting backslash escapes,
42 except that octal escapes are of the form \0 or \0ooo.
44 %q = print an argument string in a format that can be
45 reused as shell input. Escaped characters used the
46 POSIX $'' syntax supported by most shells.
48 The 'format' argument is re-used as many times as necessary
49 to convert all of the given arguments.
51 David MacKenzie <djm@gnu.ai.mit.edu> */
53 #include <config.h>
54 #include <stdio.h>
55 #include <sys/types.h>
56 #include <wchar.h>
58 #include "system.h"
59 #include "c-ctype.h"
60 #include "cl-strtod.h"
61 #include "octhexdigits.h"
62 #include "quote.h"
63 #include "unicodeio.h"
64 #include "xprintf.h"
66 /* The official name of this program (e.g., no 'g' prefix). */
67 #define PROGRAM_NAME "printf"
69 #define AUTHORS proper_name ("David MacKenzie")
71 /* The value to return to the calling program. */
72 static int exit_status;
74 /* True if the POSIXLY_CORRECT environment variable is set. */
75 static bool posixly_correct;
77 /* This message appears in N_() here rather than just in _() below because
78 the sole use would have been in a #define. */
79 static char const *const cfcc_msg =
80 N_("warning: %s: character(s) following character constant have been ignored");
82 void
83 usage (int status)
85 if (status != EXIT_SUCCESS)
86 emit_try_help ();
87 else
89 printf (_("\
90 Usage: %s FORMAT [ARGUMENT]...\n\
91 or: %s OPTION\n\
92 "),
93 program_name, program_name);
94 fputs (_("\
95 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
96 \n\
97 "), stdout);
98 fputs (HELP_OPTION_DESCRIPTION, stdout);
99 fputs (VERSION_OPTION_DESCRIPTION, stdout);
100 fputs (_("\
102 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
104 \\\" double quote\n\
105 "), stdout);
106 fputs (_("\
107 \\\\ backslash\n\
108 \\a alert (BEL)\n\
109 \\b backspace\n\
110 \\c produce no further output\n\
111 \\e escape\n\
112 \\f form feed\n\
113 \\n new line\n\
114 \\r carriage return\n\
115 \\t horizontal tab\n\
116 \\v vertical tab\n\
117 "), stdout);
118 fputs (_("\
119 \\NNN byte with octal value NNN (1 to 3 digits)\n\
120 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
121 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
122 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
123 "), stdout);
124 fputs (_("\
125 %% a single %\n\
126 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
127 except that octal escapes are of the form \\0 or \\0NNN\n\
128 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
129 escaping non-printable characters with the POSIX $'' syntax.\
130 \n\n\
131 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
132 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
133 "), stdout);
134 printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
135 emit_ancillary_info (PROGRAM_NAME);
137 exit (status);
140 static void
141 verify_numeric (char const *s, char const *end)
143 if (errno)
145 error (0, errno, "%s", quote (s));
146 exit_status = EXIT_FAILURE;
148 else if (*end)
150 if (s == end)
151 error (0, 0, _("%s: expected a numeric value"), quote (s));
152 else
153 error (0, 0, _("%s: value not completely converted"), quote (s));
154 exit_status = EXIT_FAILURE;
158 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
159 static TYPE \
160 FUNC_NAME (char const *s) \
162 char *end; \
163 TYPE val; \
165 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
167 unsigned char ch = *++s; \
168 val = ch; \
170 if (MB_CUR_MAX > 1 && *(s + 1)) \
172 mbstate_t mbstate; mbszero (&mbstate); \
173 wchar_t wc; \
174 size_t slen = strlen (s); \
175 ssize_t bytes; \
176 /* Use mbrtowc not mbrtoc32, as per POSIX. */ \
177 bytes = mbrtowc (&wc, s, slen, &mbstate); \
178 if (0 < bytes) \
180 val = wc; \
181 s += bytes - 1; \
185 /* If POSIXLY_CORRECT is not set, then give a warning that there \
186 are characters following the character constant and that GNU \
187 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
188 set, then don't give the warning. */ \
189 if (*++s != 0 && !posixly_correct) \
190 error (0, 0, _(cfcc_msg), s); \
192 else \
194 errno = 0; \
195 val = (LIB_FUNC_EXPR); \
196 verify_numeric (s, end); \
198 return val; \
201 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
202 STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
203 STRTOX (long double, vstrtold, cl_strtold (s, &end))
205 /* Output a single-character \ escape. */
207 static void
208 print_esc_char (char c)
210 switch (c)
212 case 'a': /* Alert. */
213 putchar ('\a');
214 break;
215 case 'b': /* Backspace. */
216 putchar ('\b');
217 break;
218 case 'c': /* Cancel the rest of the output. */
219 exit (EXIT_SUCCESS);
220 break;
221 case 'e': /* Escape. */
222 putchar ('\x1B');
223 break;
224 case 'f': /* Form feed. */
225 putchar ('\f');
226 break;
227 case 'n': /* New line. */
228 putchar ('\n');
229 break;
230 case 'r': /* Carriage return. */
231 putchar ('\r');
232 break;
233 case 't': /* Horizontal tab. */
234 putchar ('\t');
235 break;
236 case 'v': /* Vertical tab. */
237 putchar ('\v');
238 break;
239 default:
240 putchar (c);
241 break;
245 /* Print a \ escape sequence starting at ESCSTART.
246 Return the number of characters in the escape sequence
247 besides the backslash.
248 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
249 is an octal digit; otherwise they are of the form \ooo. */
251 static int
252 print_esc (char const *escstart, bool octal_0)
254 char const *p = escstart + 1;
255 int esc_value = 0; /* Value of \nnn escape. */
256 int esc_length; /* Length of \nnn escape. */
258 if (*p == 'x')
260 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
261 for (esc_length = 0, ++p;
262 esc_length < 2 && c_isxdigit (to_uchar (*p));
263 ++esc_length, ++p)
264 esc_value = esc_value * 16 + fromhex (*p);
265 if (esc_length == 0)
266 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
267 putchar (esc_value);
269 else if (isoct (*p))
271 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
272 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
273 extension to POSIX that is compatible with Bash 2.05b. */
274 for (esc_length = 0, p += octal_0 && *p == '0';
275 esc_length < 3 && isoct (*p);
276 ++esc_length, ++p)
277 esc_value = esc_value * 8 + fromoct (*p);
278 putchar (esc_value);
280 else if (*p && strchr ("\"\\abcefnrtv", *p))
281 print_esc_char (*p++);
282 else if (*p == 'u' || *p == 'U')
284 char esc_char = *p;
285 unsigned int uni_value;
287 uni_value = 0;
288 for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
289 esc_length > 0;
290 --esc_length, ++p)
292 if (! c_isxdigit (to_uchar (*p)))
293 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
294 uni_value = uni_value * 16 + fromhex (*p);
297 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
298 Note print_unicode_char() would print the literal \u.. in this case. */
299 if (uni_value >= 0xd800 && uni_value <= 0xdfff)
300 error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
301 esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
303 print_unicode_char (stdout, uni_value, 0);
305 else
307 putchar ('\\');
308 if (*p)
310 putchar (*p);
311 p++;
314 return p - escstart - 1;
317 /* Print string STR, evaluating \ escapes. */
319 static void
320 print_esc_string (char const *str)
322 for (; *str; str++)
323 if (*str == '\\')
324 str += print_esc (str, true);
325 else
326 putchar (*str);
329 /* Evaluate a printf conversion specification. START is the start of
330 the directive, LENGTH is its length, and CONVERSION specifies the
331 type of conversion. LENGTH does not include any length modifier or
332 the conversion specifier itself. FIELD_WIDTH and PRECISION are the
333 field width and precision for '*' values, if HAVE_FIELD_WIDTH and
334 HAVE_PRECISION are true, respectively. ARGUMENT is the argument to
335 be formatted. */
337 static void
338 print_direc (char const *start, size_t length, char conversion,
339 bool have_field_width, int field_width,
340 bool have_precision, int precision,
341 char const *argument)
343 char *p; /* Null-terminated copy of % directive. */
345 /* Create a null-terminated copy of the % directive, with an
346 intmax_t-wide length modifier substituted for any existing
347 integer length modifier. */
349 char *q;
350 char const *length_modifier;
351 size_t length_modifier_len;
353 switch (conversion)
355 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
356 length_modifier = "j";
357 length_modifier_len = 1;
358 break;
360 case 'a': case 'e': case 'f': case 'g':
361 case 'A': case 'E': case 'F': case 'G':
362 length_modifier = "L";
363 length_modifier_len = 1;
364 break;
366 default:
367 length_modifier = start; /* Any valid pointer will do. */
368 length_modifier_len = 0;
369 break;
372 p = xmalloc (length + length_modifier_len + 2);
373 q = mempcpy (p, start, length);
374 q = mempcpy (q, length_modifier, length_modifier_len);
375 *q++ = conversion;
376 *q = '\0';
379 switch (conversion)
381 case 'd':
382 case 'i':
384 intmax_t arg = vstrtoimax (argument);
385 if (!have_field_width)
387 if (!have_precision)
388 xprintf (p, arg);
389 else
390 xprintf (p, precision, arg);
392 else
394 if (!have_precision)
395 xprintf (p, field_width, arg);
396 else
397 xprintf (p, field_width, precision, arg);
400 break;
402 case 'o':
403 case 'u':
404 case 'x':
405 case 'X':
407 uintmax_t arg = vstrtoumax (argument);
408 if (!have_field_width)
410 if (!have_precision)
411 xprintf (p, arg);
412 else
413 xprintf (p, precision, arg);
415 else
417 if (!have_precision)
418 xprintf (p, field_width, arg);
419 else
420 xprintf (p, field_width, precision, arg);
423 break;
425 case 'a':
426 case 'A':
427 case 'e':
428 case 'E':
429 case 'f':
430 case 'F':
431 case 'g':
432 case 'G':
434 long double arg = vstrtold (argument);
435 if (!have_field_width)
437 if (!have_precision)
438 xprintf (p, arg);
439 else
440 xprintf (p, precision, arg);
442 else
444 if (!have_precision)
445 xprintf (p, field_width, arg);
446 else
447 xprintf (p, field_width, precision, arg);
450 break;
452 case 'c':
453 if (!have_field_width)
454 xprintf (p, *argument);
455 else
456 xprintf (p, field_width, *argument);
457 break;
459 case 's':
460 if (!have_field_width)
462 if (!have_precision)
463 xprintf (p, argument);
464 else
465 xprintf (p, precision, argument);
467 else
469 if (!have_precision)
470 xprintf (p, field_width, argument);
471 else
472 xprintf (p, field_width, precision, argument);
474 break;
477 free (p);
480 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
481 arguments to any '%' directives.
482 Return the number of elements of ARGV used. */
484 static int
485 print_formatted (char const *format, int argc, char **argv)
487 int save_argc = argc; /* Preserve original value. */
488 char const *f; /* Pointer into 'format'. */
489 char const *direc_start; /* Start of % directive. */
490 size_t direc_length; /* Length of % directive. */
491 bool have_field_width; /* True if FIELD_WIDTH is valid. */
492 int field_width = 0; /* Arg to first '*'. */
493 bool have_precision; /* True if PRECISION is valid. */
494 int precision = 0; /* Arg to second '*'. */
495 char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
497 for (f = format; *f; ++f)
499 switch (*f)
501 case '%':
502 direc_start = f++;
503 direc_length = 1;
504 have_field_width = have_precision = false;
505 if (*f == '%')
507 putchar ('%');
508 break;
510 if (*f == 'b')
512 /* FIXME: Field width and precision are not supported
513 for %b, even though POSIX requires it. */
514 if (argc > 0)
516 print_esc_string (*argv);
517 ++argv;
518 --argc;
520 break;
523 if (*f == 'q')
525 if (argc > 0)
527 fputs (quotearg_style (shell_escape_quoting_style, *argv),
528 stdout);
529 ++argv;
530 --argc;
532 break;
535 memset (ok, 0, sizeof ok);
536 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
537 ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
538 ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
540 for (;; f++, direc_length++)
541 switch (*f)
543 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
544 case 'I':
545 #endif
546 case '\'':
547 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
548 ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
549 break;
550 case '-': case '+': case ' ':
551 break;
552 case '#':
553 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
554 break;
555 case '0':
556 ok['c'] = ok['s'] = 0;
557 break;
558 default:
559 goto no_more_flag_characters;
561 no_more_flag_characters:
563 if (*f == '*')
565 ++f;
566 ++direc_length;
567 if (argc > 0)
569 intmax_t width = vstrtoimax (*argv);
570 if (INT_MIN <= width && width <= INT_MAX)
571 field_width = width;
572 else
573 error (EXIT_FAILURE, 0, _("invalid field width: %s"),
574 quote (*argv));
575 ++argv;
576 --argc;
578 else
579 field_width = 0;
580 have_field_width = true;
582 else
583 while (ISDIGIT (*f))
585 ++f;
586 ++direc_length;
588 if (*f == '.')
590 ++f;
591 ++direc_length;
592 ok['c'] = 0;
593 if (*f == '*')
595 ++f;
596 ++direc_length;
597 if (argc > 0)
599 intmax_t prec = vstrtoimax (*argv);
600 if (prec < 0)
602 /* A negative precision is taken as if the
603 precision were omitted, so -1 is safe
604 here even if prec < INT_MIN. */
605 precision = -1;
607 else if (INT_MAX < prec)
608 error (EXIT_FAILURE, 0, _("invalid precision: %s"),
609 quote (*argv));
610 else
611 precision = prec;
612 ++argv;
613 --argc;
615 else
616 precision = 0;
617 have_precision = true;
619 else
620 while (ISDIGIT (*f))
622 ++f;
623 ++direc_length;
627 while (*f == 'l' || *f == 'L' || *f == 'h'
628 || *f == 'j' || *f == 't' || *f == 'z')
629 ++f;
632 unsigned char conversion = *f;
633 int speclen = MIN (f + 1 - direc_start, INT_MAX);
634 if (! ok[conversion])
635 error (EXIT_FAILURE, 0,
636 _("%.*s: invalid conversion specification"),
637 speclen, direc_start);
640 print_direc (direc_start, direc_length, *f,
641 have_field_width, field_width,
642 have_precision, precision,
643 (argc <= 0 ? "" : (argc--, *argv++)));
644 break;
646 case '\\':
647 f += print_esc (f, false);
648 break;
650 default:
651 putchar (*f);
655 return save_argc - argc;
659 main (int argc, char **argv)
661 char *format;
662 int args_used;
664 initialize_main (&argc, &argv);
665 set_program_name (argv[0]);
666 setlocale (LC_ALL, "");
667 bindtextdomain (PACKAGE, LOCALEDIR);
668 textdomain (PACKAGE);
670 atexit (close_stdout);
672 exit_status = EXIT_SUCCESS;
674 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
676 /* We directly parse options, rather than use parse_long_options, in
677 order to avoid accepting abbreviations. */
678 if (argc == 2)
680 if (STREQ (argv[1], "--help"))
681 usage (EXIT_SUCCESS);
683 if (STREQ (argv[1], "--version"))
685 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
686 (char *) nullptr);
687 return EXIT_SUCCESS;
691 /* The above handles --help and --version.
692 Since there is no other invocation of getopt, handle '--' here. */
693 if (1 < argc && STREQ (argv[1], "--"))
695 --argc;
696 ++argv;
699 if (argc <= 1)
701 error (0, 0, _("missing operand"));
702 usage (EXIT_FAILURE);
705 format = argv[1];
706 argc -= 2;
707 argv += 2;
711 args_used = print_formatted (format, argc, argv);
712 argc -= args_used;
713 argv += args_used;
715 while (args_used > 0 && argc > 0);
717 if (argc > 0)
718 error (0, 0,
719 _("warning: ignoring excess arguments, starting with %s"),
720 quote (argv[0]));
722 return exit_status;