printf: add indexed argument support
[coreutils.git] / src / printf.c
blobfb11f84a94bcfd9f56d98eac34a6c35047465704
1 /* printf - format and print data
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 #include <config.h>
18 #include <stdio.h>
19 #include <sys/types.h>
20 #include <wchar.h>
22 #include "system.h"
23 #include "c-ctype.h"
24 #include "cl-strtod.h"
25 #include "octhexdigits.h"
26 #include "quote.h"
27 #include "unicodeio.h"
28 #include "xprintf.h"
30 /* The official name of this program (e.g., no 'g' prefix). */
31 #define PROGRAM_NAME "printf"
33 #define AUTHORS proper_name ("David MacKenzie")
35 /* The value to return to the calling program. */
36 static int exit_status;
38 /* True if the POSIXLY_CORRECT environment variable is set. */
39 static bool posixly_correct;
41 /* This message appears in N_() here rather than just in _() below because
42 the sole use would have been in a #define. */
43 static char const *const cfcc_msg =
44 N_("warning: %s: character(s) following character constant have been ignored");
46 void
47 usage (int status)
49 if (status != EXIT_SUCCESS)
50 emit_try_help ();
51 else
53 printf (_("\
54 Usage: %s FORMAT [ARGUMENT]...\n\
55 or: %s OPTION\n\
56 "),
57 program_name, program_name);
58 fputs (_("\
59 Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\
60 \n\
61 "), stdout);
62 fputs (HELP_OPTION_DESCRIPTION, stdout);
63 fputs (VERSION_OPTION_DESCRIPTION, stdout);
64 fputs (_("\
65 \n\
66 FORMAT controls the output as in C printf. Interpreted sequences are:\n\
67 \n\
68 \\\" double quote\n\
69 "), stdout);
70 fputs (_("\
71 \\\\ backslash\n\
72 \\a alert (BEL)\n\
73 \\b backspace\n\
74 \\c produce no further output\n\
75 \\e escape\n\
76 \\f form feed\n\
77 \\n new line\n\
78 \\r carriage return\n\
79 \\t horizontal tab\n\
80 \\v vertical tab\n\
81 "), stdout);
82 fputs (_("\
83 \\NNN byte with octal value NNN (1 to 3 digits)\n\
84 \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\
85 \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\
86 \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\
87 "), stdout);
88 fputs (_("\
89 %% a single %\n\
90 %b ARGUMENT as a string with '\\' escapes interpreted,\n\
91 except that octal escapes should have a leading 0 like \\0NNN\n\
92 %q ARGUMENT is printed in a format that can be reused as shell input,\n\
93 escaping non-printable characters with the POSIX $'' syntax\
94 \n\n\
95 and all C format specifications ending with one of diouxXfeEgGcs, with\n\
96 ARGUMENTs converted to proper type first. Variable widths are handled.\n\
97 "), stdout);
98 printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
99 emit_ancillary_info (PROGRAM_NAME);
101 exit (status);
104 static void
105 verify_numeric (char const *s, char const *end)
107 if (errno)
109 error (0, errno, "%s", quote (s));
110 exit_status = EXIT_FAILURE;
112 else if (*end)
114 if (s == end)
115 error (0, 0, _("%s: expected a numeric value"), quote (s));
116 else
117 error (0, 0, _("%s: value not completely converted"), quote (s));
118 exit_status = EXIT_FAILURE;
122 #define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \
123 static TYPE \
124 FUNC_NAME (char const *s) \
126 char *end; \
127 TYPE val; \
129 if ((*s == '\"' || *s == '\'') && *(s + 1)) \
131 unsigned char ch = *++s; \
132 val = ch; \
134 if (MB_CUR_MAX > 1 && *(s + 1)) \
136 mbstate_t mbstate; mbszero (&mbstate); \
137 wchar_t wc; \
138 size_t slen = strlen (s); \
139 ssize_t bytes; \
140 /* Use mbrtowc not mbrtoc32, as per POSIX. */ \
141 bytes = mbrtowc (&wc, s, slen, &mbstate); \
142 if (0 < bytes) \
144 val = wc; \
145 s += bytes - 1; \
149 /* If POSIXLY_CORRECT is not set, then give a warning that there \
150 are characters following the character constant and that GNU \
151 printf is ignoring those characters. If POSIXLY_CORRECT *is* \
152 set, then don't give the warning. */ \
153 if (*++s != 0 && !posixly_correct) \
154 error (0, 0, _(cfcc_msg), s); \
156 else \
158 errno = 0; \
159 val = (LIB_FUNC_EXPR); \
160 verify_numeric (s, end); \
162 return val; \
165 STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0))
166 STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0))
167 STRTOX (long double, vstrtold, cl_strtold (s, &end))
169 /* Output a single-character \ escape. */
171 static void
172 print_esc_char (char c)
174 switch (c)
176 case 'a': /* Alert. */
177 putchar ('\a');
178 break;
179 case 'b': /* Backspace. */
180 putchar ('\b');
181 break;
182 case 'c': /* Cancel the rest of the output. */
183 exit (EXIT_SUCCESS);
184 break;
185 case 'e': /* Escape. */
186 putchar ('\x1B');
187 break;
188 case 'f': /* Form feed. */
189 putchar ('\f');
190 break;
191 case 'n': /* New line. */
192 putchar ('\n');
193 break;
194 case 'r': /* Carriage return. */
195 putchar ('\r');
196 break;
197 case 't': /* Horizontal tab. */
198 putchar ('\t');
199 break;
200 case 'v': /* Vertical tab. */
201 putchar ('\v');
202 break;
203 default:
204 putchar (c);
205 break;
209 /* Print a \ escape sequence starting at ESCSTART.
210 Return the number of characters in the escape sequence
211 besides the backslash.
212 If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o
213 is an octal digit; otherwise they are of the form \ooo. */
215 static int
216 print_esc (char const *escstart, bool octal_0)
218 char const *p = escstart + 1;
219 int esc_value = 0; /* Value of \nnn escape. */
220 int esc_length; /* Length of \nnn escape. */
222 if (*p == 'x')
224 /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */
225 for (esc_length = 0, ++p;
226 esc_length < 2 && c_isxdigit (to_uchar (*p));
227 ++esc_length, ++p)
228 esc_value = esc_value * 16 + fromhex (*p);
229 if (esc_length == 0)
230 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
231 putchar (esc_value);
233 else if (isoct (*p))
235 /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise).
236 Allow \ooo if octal_0 && *p != '0'; this is an undocumented
237 extension to POSIX that is compatible with Bash 2.05b. */
238 for (esc_length = 0, p += octal_0 && *p == '0';
239 esc_length < 3 && isoct (*p);
240 ++esc_length, ++p)
241 esc_value = esc_value * 8 + fromoct (*p);
242 putchar (esc_value);
244 else if (*p && strchr ("\"\\abcefnrtv", *p))
245 print_esc_char (*p++);
246 else if (*p == 'u' || *p == 'U')
248 char esc_char = *p;
249 unsigned int uni_value;
251 uni_value = 0;
252 for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
253 esc_length > 0;
254 --esc_length, ++p)
256 if (! c_isxdigit (to_uchar (*p)))
257 error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape"));
258 uni_value = uni_value * 16 + fromhex (*p);
261 /* Error for invalid code points 0000D800 through 0000DFFF inclusive.
262 Note print_unicode_char() would print the literal \u.. in this case. */
263 if (uni_value >= 0xd800 && uni_value <= 0xdfff)
264 error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"),
265 esc_char, (esc_char == 'u' ? 4 : 8), uni_value);
267 print_unicode_char (stdout, uni_value, 0);
269 else
271 putchar ('\\');
272 if (*p)
274 putchar (*p);
275 p++;
278 return p - escstart - 1;
281 /* Print string STR, evaluating \ escapes. */
283 static void
284 print_esc_string (char const *str)
286 for (; *str; str++)
287 if (*str == '\\')
288 str += print_esc (str, true);
289 else
290 putchar (*str);
293 /* Evaluate a printf conversion specification. START is the start of
294 the directive, and CONVERSION specifies the type of conversion.
295 FIELD_WIDTH and PRECISION are the field width and precision for '*'
296 values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively.
297 ARGUMENT is the argument to be formatted. */
299 static void
300 print_direc (char const *start, char conversion,
301 bool have_field_width, int field_width,
302 bool have_precision, int precision,
303 char const *argument)
305 char *p; /* Null-terminated copy of % directive. */
307 /* Create a null-terminated copy of the % directive, with an
308 intmax_t-wide length modifier substituted for any existing
309 integer length modifier. */
311 char *q;
312 char const *length_modifier;
313 size_t length_modifier_len;
315 switch (conversion)
317 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X':
318 length_modifier = "j";
319 length_modifier_len = 1;
320 break;
322 case 'a': case 'e': case 'f': case 'g':
323 case 'A': case 'E': case 'F': case 'G':
324 length_modifier = "L";
325 length_modifier_len = 1;
326 break;
328 default:
329 length_modifier = start; /* Any valid pointer will do. */
330 length_modifier_len = 0;
331 break;
334 size_t length = strlen (start);
335 p = xmalloc (length + length_modifier_len + 2);
336 q = mempcpy (p, start, length);
337 q = mempcpy (q, length_modifier, length_modifier_len);
338 *q++ = conversion;
339 *q = '\0';
342 switch (conversion)
344 case 'd':
345 case 'i':
347 intmax_t arg = vstrtoimax (argument);
348 if (!have_field_width)
350 if (!have_precision)
351 xprintf (p, arg);
352 else
353 xprintf (p, precision, arg);
355 else
357 if (!have_precision)
358 xprintf (p, field_width, arg);
359 else
360 xprintf (p, field_width, precision, arg);
363 break;
365 case 'o':
366 case 'u':
367 case 'x':
368 case 'X':
370 uintmax_t arg = vstrtoumax (argument);
371 if (!have_field_width)
373 if (!have_precision)
374 xprintf (p, arg);
375 else
376 xprintf (p, precision, arg);
378 else
380 if (!have_precision)
381 xprintf (p, field_width, arg);
382 else
383 xprintf (p, field_width, precision, arg);
386 break;
388 case 'a':
389 case 'A':
390 case 'e':
391 case 'E':
392 case 'f':
393 case 'F':
394 case 'g':
395 case 'G':
397 long double arg = vstrtold (argument);
398 if (!have_field_width)
400 if (!have_precision)
401 xprintf (p, arg);
402 else
403 xprintf (p, precision, arg);
405 else
407 if (!have_precision)
408 xprintf (p, field_width, arg);
409 else
410 xprintf (p, field_width, precision, arg);
413 break;
415 case 'c':
416 if (!have_field_width)
417 xprintf (p, *argument);
418 else
419 xprintf (p, field_width, *argument);
420 break;
422 case 's':
423 if (!have_field_width)
425 if (!have_precision)
426 xprintf (p, argument);
427 else
428 xprintf (p, precision, argument);
430 else
432 if (!have_precision)
433 xprintf (p, field_width, argument);
434 else
435 xprintf (p, field_width, precision, argument);
437 break;
440 free (p);
443 /* Print the text in FORMAT, using ARGV (with ARGC elements) for
444 arguments to any '%' directives.
445 Return the number of elements of ARGV used. */
447 static int
448 print_formatted (char const *format, int argc, char **argv)
451 /* Set curr_arg from indexed %i$ or otherwise next in sequence.
452 POS can be 0,1,2,3 corresponding to
453 [%][width][.precision][conversion] respectively. */
455 #define GET_CURR_ARG(POS) \
456 do { \
457 intmax_t arg = 0; \
458 size_t argl; \
459 /* Check with strspn() first to avoid spaces etc. \
460 This also avoids any locale ambiguities, \
461 and simplifies strtoimax errno checking. */ \
462 if (POS != 3 && (argl = strspn (f, "0123456789")) \
463 && f[argl] == '$') \
464 arg = MIN (strtoimax (f, nullptr, 10), INT_MAX); \
465 if (1 <= arg && arg <= INT_MAX) \
466 /* Process indexed %i$ format. */ \
468 SET_CURR_ARG (arg - 1); \
469 f += argl + 1; \
470 if (POS == 0) \
471 direc_arg = arg - 1; \
473 else \
474 /* Sequential arg processing. */ \
476 if (POS == 0) \
477 direc_arg = -1; \
478 else if (POS < 3 || direc_arg == -1) \
479 SET_CURR_ARG (++curr_s_arg); \
480 else \
481 SET_CURR_ARG (direc_arg); \
483 } while (0) \
485 #define SET_CURR_ARG(ARG) \
486 do { \
487 curr_arg = ARG; \
488 end_arg = MAX (curr_arg, end_arg); \
489 } while (0) \
491 int curr_arg = -1; /* Current offset. */
492 int curr_s_arg = -1; /* Current sequential offset. */
493 int end_arg = -1; /* End arg processed. */
494 int direc_arg = -1; /* Arg for main directive. */
495 char const *f; /* Pointer into 'format'. */
496 char const *direc_start; /* Start of % directive. */
497 char *direc; /* Generated % directive. */
498 char *pdirec; /* Pointer to current end of directive. */
499 bool have_field_width; /* True if FIELD_WIDTH is valid. */
500 int field_width = 0; /* Arg to first '*'. */
501 bool have_precision; /* True if PRECISION is valid. */
502 int precision = 0; /* Arg to second '*'. */
503 char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */
505 direc = xmalloc (strlen (format) + 1);
507 for (f = format; *f; ++f)
509 switch (*f)
511 case '%':
512 direc_start = f;
513 pdirec = direc;
514 *pdirec++ = *f++;
515 have_field_width = have_precision = false;
516 if (*f == '%')
518 putchar ('%');
519 break;
522 GET_CURR_ARG (0);
524 if (*f == 'b')
526 /* FIXME: Field width and precision are not supported
527 for %b, even though POSIX requires it. */
528 GET_CURR_ARG (3);
529 if (curr_arg < argc)
530 print_esc_string (argv[curr_arg]);
531 break;
534 if (*f == 'q')
536 GET_CURR_ARG (3);
537 if (curr_arg < argc)
539 fputs (quotearg_style (shell_escape_quoting_style,
540 argv[curr_arg]), stdout);
542 break;
545 memset (ok, 0, sizeof ok);
546 ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] =
547 ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] =
548 ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1;
550 for (;; f++)
552 switch (*f)
554 #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__
555 case 'I':
556 #endif
557 case '\'':
558 ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] =
559 ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0;
560 break;
561 case '-': case '+': case ' ':
562 break;
563 case '#':
564 ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0;
565 break;
566 case '0':
567 ok['c'] = ok['s'] = 0;
568 break;
569 default:
570 goto no_more_flag_characters;
572 *pdirec++ = *f;
574 no_more_flag_characters:
576 if (*f == '*')
578 *pdirec++ = *f++;
580 GET_CURR_ARG (1);
582 if (curr_arg < argc)
584 intmax_t width = vstrtoimax (argv[curr_arg]);
585 if (INT_MIN <= width && width <= INT_MAX)
586 field_width = width;
587 else
588 error (EXIT_FAILURE, 0, _("invalid field width: %s"),
589 quote (argv[curr_arg]));
591 else
592 field_width = 0;
593 have_field_width = true;
595 else
596 while (ISDIGIT (*f))
597 *pdirec++ = *f++;
598 if (*f == '.')
600 *pdirec++ = *f++;
601 ok['c'] = 0;
602 if (*f == '*')
604 *pdirec++ = *f++;
606 GET_CURR_ARG (2);
608 if (curr_arg < argc)
610 intmax_t prec = vstrtoimax (argv[curr_arg]);
611 if (prec < 0)
613 /* A negative precision is taken as if the
614 precision were omitted, so -1 is safe
615 here even if prec < INT_MIN. */
616 precision = -1;
618 else if (INT_MAX < prec)
619 error (EXIT_FAILURE, 0, _("invalid precision: %s"),
620 quote (argv[curr_arg]));
621 else
622 precision = prec;
624 else
625 precision = 0;
626 have_precision = true;
628 else
629 while (ISDIGIT (*f))
630 *pdirec++ = *f++;
633 *pdirec++ = '\0';
635 while (*f == 'l' || *f == 'L' || *f == 'h'
636 || *f == 'j' || *f == 't' || *f == 'z')
637 ++f;
640 unsigned char conversion = *f;
641 int speclen = MIN (f + 1 - direc_start, INT_MAX);
642 if (! ok[conversion])
643 error (EXIT_FAILURE, 0,
644 _("%.*s: invalid conversion specification"),
645 speclen, direc_start);
648 GET_CURR_ARG (3);
650 print_direc (direc, *f,
651 have_field_width, field_width,
652 have_precision, precision,
653 (argc <= curr_arg ? "" : argv[curr_arg]));
655 break;
657 case '\\':
658 f += print_esc (f, false);
659 break;
661 default:
662 putchar (*f);
666 free (direc);
667 return MIN (argc, end_arg + 1);
671 main (int argc, char **argv)
673 char *format;
674 int args_used;
676 initialize_main (&argc, &argv);
677 set_program_name (argv[0]);
678 setlocale (LC_ALL, "");
679 bindtextdomain (PACKAGE, LOCALEDIR);
680 textdomain (PACKAGE);
682 atexit (close_stdout);
684 exit_status = EXIT_SUCCESS;
686 posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr);
688 /* We directly parse options, rather than use parse_long_options, in
689 order to avoid accepting abbreviations. */
690 if (argc == 2)
692 if (STREQ (argv[1], "--help"))
693 usage (EXIT_SUCCESS);
695 if (STREQ (argv[1], "--version"))
697 version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS,
698 (char *) nullptr);
699 return EXIT_SUCCESS;
703 /* The above handles --help and --version.
704 Since there is no other invocation of getopt, handle '--' here. */
705 if (1 < argc && STREQ (argv[1], "--"))
707 --argc;
708 ++argv;
711 if (argc <= 1)
713 error (0, 0, _("missing operand"));
714 usage (EXIT_FAILURE);
717 format = argv[1];
718 argc -= 2;
719 argv += 2;
723 args_used = print_formatted (format, argc, argv);
724 argc -= args_used;
725 argv += args_used;
727 while (args_used > 0 && argc > 0);
729 if (argc > 0)
730 error (0, 0,
731 _("warning: ignoring excess arguments, starting with %s"),
732 quote (argv[0]));
734 return exit_status;