From 5cecd703e57b2e1301767d82cbe5bb01cae88472 Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?= Date: Wed, 11 Sep 2024 16:07:48 +0100 Subject: [PATCH] printf: add indexed argument support * src/printf.c (print_formatted): Add support for %i$ indexed args. * tests/printf/printf-indexed.sh: Add a new file of test cases. * tests/local.mk: Reference the new test file. * doc/coreutils.texi (printf invocation): Mention how mixed processing of indexed and sequential references are supported, unlike the printf(2) library function. * NEWS: Mention the new (POSIX:2024) feature. Addresses https://bugs.gnu.org/73068 --- NEWS | 4 + doc/coreutils.texi | 9 ++ src/printf.c | 184 ++++++++++++++++++++++++++--------------- tests/local.mk | 1 + tests/printf/printf-indexed.sh | 103 +++++++++++++++++++++++ 5 files changed, 233 insertions(+), 68 deletions(-) create mode 100755 tests/printf/printf-indexed.sh diff --git a/NEWS b/NEWS index e1d3f82d1..184e4dfcc 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,10 @@ GNU coreutils NEWS -*- outline -*- ls now supports the --sort=name option, to explicitly select the default operation of sorting by file name. + printf now supports indexed arguments, using the POSIX:2024 specified + %$ format, where '' is an integer referencing a particular argument, + thus allowing repetition or reordering of printf arguments. + ** Improvements 'head -c NUM', 'head -n NUM', 'nl -l NUM', 'nproc --ignore NUM', diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 58b425779..03dfd0011 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -13430,6 +13430,15 @@ depending on whether the context expects a string or a number. For example, the command @samp{printf %sx%d} prints @samp{x0}. @item +Indexed arguments referenced with @samp{%...$} formats, can be +mixed with standard sequential argument references, +in which case both index types are independent. +For example, the command @samp{printf '%1$s%s' A} prints @samp{AA}. +Also the highest referenced argument, either indexed or sequential, +is considered to be the last one referenced, when determining +unused arguments for reprocessing with the @var{format} argument. + +@item @kindex \c An additional escape, @samp{\c}, causes @command{printf} to produce no further output. For example, the command @samp{printf 'A%sC\cD%sF' B diff --git a/src/printf.c b/src/printf.c index de3507925..fb11f84a9 100644 --- a/src/printf.c +++ b/src/printf.c @@ -291,15 +291,13 @@ print_esc_string (char const *str) } /* Evaluate a printf conversion specification. START is the start of - the directive, LENGTH is its length, and CONVERSION specifies the - type of conversion. LENGTH does not include any length modifier or - the conversion specifier itself. FIELD_WIDTH and PRECISION are the - field width and precision for '*' values, if HAVE_FIELD_WIDTH and - HAVE_PRECISION are true, respectively. ARGUMENT is the argument to - be formatted. */ + the directive, and CONVERSION specifies the type of conversion. + FIELD_WIDTH and PRECISION are the field width and precision for '*' + values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively. + ARGUMENT is the argument to be formatted. */ static void -print_direc (char const *start, size_t length, char conversion, +print_direc (char const *start, char conversion, bool have_field_width, int field_width, bool have_precision, int precision, char const *argument) @@ -333,6 +331,7 @@ print_direc (char const *start, size_t length, char conversion, break; } + size_t length = strlen (start); p = xmalloc (length + length_modifier_len + 2); q = mempcpy (p, start, length); q = mempcpy (q, length_modifier, length_modifier_len); @@ -448,50 +447,97 @@ print_direc (char const *start, size_t length, char conversion, static int print_formatted (char const *format, int argc, char **argv) { - int save_argc = argc; /* Preserve original value. */ + +/* Set curr_arg from indexed %i$ or otherwise next in sequence. + POS can be 0,1,2,3 corresponding to + [%][width][.precision][conversion] respectively. */ + +#define GET_CURR_ARG(POS) \ +do { \ + intmax_t arg = 0; \ + size_t argl; \ + /* Check with strspn() first to avoid spaces etc. \ + This also avoids any locale ambiguities, \ + and simplifies strtoimax errno checking. */ \ + if (POS != 3 && (argl = strspn (f, "0123456789")) \ + && f[argl] == '$') \ + arg = MIN (strtoimax (f, nullptr, 10), INT_MAX); \ + if (1 <= arg && arg <= INT_MAX) \ + /* Process indexed %i$ format. */ \ + { \ + SET_CURR_ARG (arg - 1); \ + f += argl + 1; \ + if (POS == 0) \ + direc_arg = arg - 1; \ + } \ + else \ + /* Sequential arg processing. */ \ + { \ + if (POS == 0) \ + direc_arg = -1; \ + else if (POS < 3 || direc_arg == -1) \ + SET_CURR_ARG (++curr_s_arg); \ + else \ + SET_CURR_ARG (direc_arg); \ + } \ +} while (0) \ + +#define SET_CURR_ARG(ARG) \ +do { \ + curr_arg = ARG; \ + end_arg = MAX (curr_arg, end_arg); \ +} while (0) \ + + int curr_arg = -1; /* Current offset. */ + int curr_s_arg = -1; /* Current sequential offset. */ + int end_arg = -1; /* End arg processed. */ + int direc_arg = -1; /* Arg for main directive. */ char const *f; /* Pointer into 'format'. */ char const *direc_start; /* Start of % directive. */ - size_t direc_length; /* Length of % directive. */ + char *direc; /* Generated % directive. */ + char *pdirec; /* Pointer to current end of directive. */ bool have_field_width; /* True if FIELD_WIDTH is valid. */ int field_width = 0; /* Arg to first '*'. */ bool have_precision; /* True if PRECISION is valid. */ int precision = 0; /* Arg to second '*'. */ char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */ + direc = xmalloc (strlen (format) + 1); + for (f = format; *f; ++f) { switch (*f) { case '%': - direc_start = f++; - direc_length = 1; + direc_start = f; + pdirec = direc; + *pdirec++ = *f++; have_field_width = have_precision = false; if (*f == '%') { putchar ('%'); break; } + + GET_CURR_ARG (0); + if (*f == 'b') { /* FIXME: Field width and precision are not supported for %b, even though POSIX requires it. */ - if (argc > 0) - { - print_esc_string (*argv); - ++argv; - --argc; - } + GET_CURR_ARG (3); + if (curr_arg < argc) + print_esc_string (argv[curr_arg]); break; } if (*f == 'q') { - if (argc > 0) + GET_CURR_ARG (3); + if (curr_arg < argc) { - fputs (quotearg_style (shell_escape_quoting_style, *argv), - stdout); - ++argv; - --argc; + fputs (quotearg_style (shell_escape_quoting_style, + argv[curr_arg]), stdout); } break; } @@ -501,43 +547,46 @@ print_formatted (char const *format, int argc, char **argv) ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1; - for (;; f++, direc_length++) - switch (*f) - { + for (;; f++) + { + switch (*f) + { #if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__ - case 'I': + case 'I': #endif - case '\'': - ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = - ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; - break; - case '-': case '+': case ' ': - break; - case '#': - ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; - break; - case '0': - ok['c'] = ok['s'] = 0; - break; - default: - goto no_more_flag_characters; - } + case '\'': + ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = + ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; + break; + case '-': case '+': case ' ': + break; + case '#': + ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; + break; + case '0': + ok['c'] = ok['s'] = 0; + break; + default: + goto no_more_flag_characters; + } + *pdirec++ = *f; + } no_more_flag_characters: if (*f == '*') { - ++f; - ++direc_length; - if (argc > 0) + *pdirec++ = *f++; + + GET_CURR_ARG (1); + + if (curr_arg < argc) { - intmax_t width = vstrtoimax (*argv); + intmax_t width = vstrtoimax (argv[curr_arg]); if (INT_MIN <= width && width <= INT_MAX) field_width = width; else error (EXIT_FAILURE, 0, _("invalid field width: %s"), - quote (*argv)); - ++argv; - --argc; + quote (argv[curr_arg])); } else field_width = 0; @@ -545,22 +594,20 @@ print_formatted (char const *format, int argc, char **argv) } else while (ISDIGIT (*f)) - { - ++f; - ++direc_length; - } + *pdirec++ = *f++; if (*f == '.') { - ++f; - ++direc_length; + *pdirec++ = *f++; ok['c'] = 0; if (*f == '*') { - ++f; - ++direc_length; - if (argc > 0) + *pdirec++ = *f++; + + GET_CURR_ARG (2); + + if (curr_arg < argc) { - intmax_t prec = vstrtoimax (*argv); + intmax_t prec = vstrtoimax (argv[curr_arg]); if (prec < 0) { /* A negative precision is taken as if the @@ -570,11 +617,9 @@ print_formatted (char const *format, int argc, char **argv) } else if (INT_MAX < prec) error (EXIT_FAILURE, 0, _("invalid precision: %s"), - quote (*argv)); + quote (argv[curr_arg])); else precision = prec; - ++argv; - --argc; } else precision = 0; @@ -582,12 +627,11 @@ print_formatted (char const *format, int argc, char **argv) } else while (ISDIGIT (*f)) - { - ++f; - ++direc_length; - } + *pdirec++ = *f++; } + *pdirec++ = '\0'; + while (*f == 'l' || *f == 'L' || *f == 'h' || *f == 'j' || *f == 't' || *f == 'z') ++f; @@ -601,10 +645,13 @@ print_formatted (char const *format, int argc, char **argv) speclen, direc_start); } - print_direc (direc_start, direc_length, *f, + GET_CURR_ARG (3); + + print_direc (direc, *f, have_field_width, field_width, have_precision, precision, - (argc <= 0 ? "" : (argc--, *argv++))); + (argc <= curr_arg ? "" : argv[curr_arg])); + break; case '\\': @@ -616,7 +663,8 @@ print_formatted (char const *format, int argc, char **argv) } } - return save_argc - argc; + free (direc); + return MIN (argc, end_arg + 1); } int diff --git a/tests/local.mk b/tests/local.mk index fdbf36946..f72353862 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -353,6 +353,7 @@ all_tests = \ tests/printf/printf.sh \ tests/printf/printf-cov.pl \ tests/printf/printf-hex.sh \ + tests/printf/printf-indexed.sh \ tests/printf/printf-mb.sh \ tests/printf/printf-surprise.sh \ tests/printf/printf-quote.sh \ diff --git a/tests/printf/printf-indexed.sh b/tests/printf/printf-indexed.sh new file mode 100755 index 000000000..7a7744436 --- /dev/null +++ b/tests/printf/printf-indexed.sh @@ -0,0 +1,103 @@ +#!/bin/sh +# tests for printf %i$ indexed format processing + +# Copyright (C) 2024 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ printf +i +getlimits_ + +prog='env printf' + +printf_check() { + cat < exp || framework_failure_ +$1 +EOF + + shift + + $prog "$@" > out || fail=1 + compare exp out || fail=1 +} + +printf_check_err() { + cat < exp || framework_failure_ +$1 +EOF + + shift + + returns_ 1 $prog "$@" 2> out || fail=1 + compare exp out || fail=1 +} + +NL=" +" + +# Reordering +printf_check '21' '%2$s%1$s\n' 1 2 + +# Repetition +printf_check "11${NL}22" '%1$s%1$s\n' 1 2 + +# Multiple uses of format +printf_check "A C B${NL}D " '%s %3$s %s\n' A B C D +printf_check " 4${NL}1" '%1$*d\n' 4 1 + +# Mixed indexed and sequential main arg +printf_check "A B A" '%s %s %1$s\n' A B +printf_check ' 0 1 ' '%100$*d %s %s %s\n' 4 1 + +# indexed arg, width, and precision +printf_check ' 01' '%1$*2$.*3$d\n' 1 3 2 +# indexed arg, sequential width, and precision +printf_check ' 01' '%3$*.*d\n' 3 2 1 +# indexed arg, width, and sequential precision +printf_check ' 01' '%3$*2$.*d\n' 2 3 1 +# indexed arg, precision, and sequential width +printf_check ' 01' '%3$*.*2$d\n' 3 2 1 +# Indexed arg, width +printf_check ' 1' '%2$*1$d\n' 4 1 +# Indexed arg, and sequential width +printf_check ' 1' '%2$*d\n' 4 1 + +# Flags come after $ (0 is not a flag here but allowed): +printf_check ' 1' '%01$4d\n' 1 +# Flags come after $ (0 is a flag here): +printf_check '0001' '%1$0*2$d\n' 1 4 +# Flags come after $ (-2 not taken as a valid index here): +printf_check_err 'printf: %-2$: invalid conversion specification' \ + '%-2$s %1$s\n' A B +# Flags come after $ (' ' is not allowed as part of number here) +printf_check_err 'printf: % 2$: invalid conversion specification' \ + '% 2$s %1$s\n' A B + +# Ensure only base 10 numbers are accepted +printf_check_err "printf: 'A': expected a numeric value" \ + '%0x2$s %2$s\n' A B +# Ensure empty numbers are rejected +printf_check_err 'printf: %$: invalid conversion specification' \ + '%$d\n' 1 +# Verify int limits are clamped appropriately (to INT_MAX) +# (Large indexes are useful to ensure a single pass with the format arg) +# Note you can't have more than INT_MAX - 2 args anyway as argc is an int, +# and that also includes the command name and format at least. +for i in 999 $INT_MAX $INT_OFLOW $INTMAX_MAX $INTMAX_OFLOW; do + printf_check 'empty' "empty%$i\$s\n" 'foo' +done + +Exit $fail -- 2.11.4.GIT