From 7d8deb89c892f79892526706b421b8680ea26cef Mon Sep 17 00:00:00 2001 From: Andrew Borodin Date: Sun, 1 Sep 2024 13:56:08 +0300 Subject: [PATCH] tar: prefer stoint to strtoul and variants. When parsing numbers prefer using strtosysint() (renamed stoint) to using strtoul() and its variants. This is simpler and faster and likely more reliable than relying on quirks of the system strtoul() etc, and it standardizes how tar deals with parsing integers. Among other things, the C standard and POSIX don't specify what strtol() does to errno when conversions cannot be performed, and it requires strtoul() to support "-" before unsigned numbers. * (stoint): rename from strtosysint, move to tar-internal.c and add a gboollean * argument for reporting overflow. All callers changed. * (decode_num): prefer stoint() to strtol() etc. Don't rely on errno == EINVAL as the standards don't guarantee it. * (decode_signed_num): likewise. * (decode_record): likewise. * (sparse_map_decoder): likewise. * (decode_timespec): Simplify by using ckd_sub() rather than checking for overflow by hand. * tar-sparse.c: remove include of errno.h, it's no lomger used. * tar-xheader.c: likewise Sync with GNU tar d1e72a536f26188230a147d948b9057714fd0b6b. Signed-off-by: Andrew Borodin --- src/vfs/tar/tar-internal.c | 99 ++++++++++++++++++++++++++++++ src/vfs/tar/tar-internal.h | 2 + src/vfs/tar/tar-sparse.c | 17 +----- src/vfs/tar/tar-xheader.c | 147 +++++++++------------------------------------ 4 files changed, 132 insertions(+), 133 deletions(-) diff --git a/src/vfs/tar/tar-internal.c b/src/vfs/tar/tar-internal.c index 9471f1dec..8ac3efb54 100644 --- a/src/vfs/tar/tar-internal.c +++ b/src/vfs/tar/tar-internal.c @@ -32,6 +32,7 @@ #include +#include /* isdigit() */ #include /* uintmax_t */ #include "lib/global.h" @@ -232,6 +233,104 @@ tar_assign_string_dup_n (char **string, const char *value, size_t n) /* --------------------------------------------------------------------------------------------- */ +/* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point + to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether + the input is out of @minval..@maxval range. If the input is out of that range, return an extreme + value. @minval must not be positive. + + If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1 + are assumed to be represented using leading '-' in the usual way. If the represented value + exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented + value. + + On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE; + then return 0. + + Sample call to this function: + + char *s_end; + gboolean overflow; + idx_t i; + + i = stoint (s, &s_end, &overflow, 0, IDX_MAX); + if ((s_end == s) | (s_end == '\0') | overflow) + diagnose_invalid (s); + + This example uses "|" instead of "||" for fewer branches at runtime, + which tends to be more efficient on modern processors. + + This function is named "stoint" instead of "strtoint" because + reserves names beginning with "str". + */ +#if ! (INTMAX_MAX <= UINTMAX_MAX) +#error "strtosysint: nonnegative intmax_t does not fit in uintmax_t" +#endif +intmax_t +stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval) +{ + char const *p = arg; + intmax_t i; + int v = 0; + + if (isdigit (*p)) + { + if (minval <= 0) + { + i = *p - '0'; + + while (isdigit (*++p) != 0) + { + v |= ckd_mul (&i, i, 10) ? 1 : 0; + v |= ckd_add (&i, i, *p - '0') ? 1 : 0; + } + + v |= maxval < i ? 1 : 0; + if (v != 0) + i = maxval; + } + else + { + uintmax_t u = *p - '0'; + + while (isdigit (*++p) != 0) + { + v |= ckd_mul (&u, u, 10) ? 1 : 0; + v |= ckd_add (&u, u, *p - '0') ? 1 : 0; + } + + v |= maxval < u ? 1 : 0; + if (v != 0) + u = maxval; + i = tar_represent_uintmax (u); + } + } + else if (minval < 0 && *p == '-' && isdigit (p[1])) + { + p++; + i = -(*p - '0'); + + while (isdigit (*++p) != 0) + { + v |= ckd_mul (&i, i, 10) ? 1 : 0; + v |= ckd_sub (&i, i, *p - '0') ? 1 : 0; + } + + v |= i < minval ? 1 : 0; + if (v != 0) + i = minval; + } + else + i = 0; + + if (arglim != NULL) + *arglim = (char *) p; + if (overflow != NULL) + *overflow = v != 0; + return i; +} + +/* --------------------------------------------------------------------------------------------- */ + /** * Convert buffer at @where0 of size @digs from external format to intmax_t. * @digs must be positive. diff --git a/src/vfs/tar/tar-internal.h b/src/vfs/tar/tar-internal.h index 4a8491215..a9d8961a5 100644 --- a/src/vfs/tar/tar-internal.h +++ b/src/vfs/tar/tar-internal.h @@ -312,6 +312,8 @@ void tar_base64_init (void); void tar_assign_string (char **string, char *value); void tar_assign_string_dup (char **string, const char *value); void tar_assign_string_dup_n (char **string, const char *value, size_t n); +intmax_t stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, + uintmax_t maxval); intmax_t tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval, uintmax_t maxval, gboolean octal_only); off_t off_from_header (const char *p, size_t s); diff --git a/src/vfs/tar/tar-sparse.c b/src/vfs/tar/tar-sparse.c index 344b15d14..7b0d3b6d7 100644 --- a/src/vfs/tar/tar-sparse.c +++ b/src/vfs/tar/tar-sparse.c @@ -41,8 +41,6 @@ #include -#include /* isdigit() */ -#include #include /* uintmax_t */ #include "lib/global.h" @@ -268,20 +266,11 @@ static struct tar_sparse_optab const pax_optab = { static gboolean decode_num (uintmax_t *num, const char *arg, uintmax_t maxval) { - uintmax_t u; char *arg_lim; + gboolean overflow; - if (!isdigit (*arg)) - return FALSE; - - errno = 0; - u = (uintmax_t) g_ascii_strtoll (arg, &arg_lim, 10); - - if (!(u <= maxval && errno != ERANGE) || *arg_lim != '\0') - return FALSE; - - *num = u; - return TRUE; + *num = stoint (arg, &arg_lim, &overflow, 0, maxval); + return (((arg_lim == arg ? 1 : 0) | (*arg_lim != '\0') | (overflow ? 1 : 0)) == 0); } /* --------------------------------------------------------------------------------------------- */ diff --git a/src/vfs/tar/tar-xheader.c b/src/vfs/tar/tar-xheader.c index ae7774e57..766124a77 100644 --- a/src/vfs/tar/tar-xheader.c +++ b/src/vfs/tar/tar-xheader.c @@ -31,7 +31,6 @@ #include #include /* isdigit() */ -#include #include #include @@ -180,61 +179,6 @@ static GSList *global_header_override_list = NULL; /*** file scope functions ************************************************************************/ /* --------------------------------------------------------------------------------------------- */ -/* Convert a prefix of the string @arg to a system integer type whose minimum value is @minval - and maximum @maxval. If @minval is negative, negative integers @minval .. -1 are assumed - to be represented using leading '-' in the usual way. If the represented value exceeds INTMAX_MAX, - return a negative integer V such that (uintmax_t) V yields the represented value. If @arglim is - nonnull, store into *@arglim a pointer to the first character after the prefix. - - This is the inverse of sysinttostr. - - On a normal return, set errno = 0. - On conversion error, return 0 and set errno = EINVAL. - On overflow, return an extreme value and set errno = ERANGE. - */ -#if ! (INTMAX_MAX <= UINTMAX_MAX) -#error "strtosysint: nonnegative intmax_t does not fit in uintmax_t" -#endif -static intmax_t -strtosysint (const char *arg, char **arglim, intmax_t minval, uintmax_t maxval) -{ - errno = 0; - - if (maxval <= INTMAX_MAX) - { - if (isdigit (arg[*arg == '-' ? 1 : 0])) - { - gint64 i; - - i = g_ascii_strtoll (arg, arglim, 10); - if ((gint64) minval <= i && i <= (gint64) maxval) - return (intmax_t) i; - - errno = ERANGE; - return i < (gint64) minval ? minval : (intmax_t) maxval; - } - } - else - { - if (isdigit (*arg)) - { - guint64 i; - - i = g_ascii_strtoull (arg, arglim, 10); - if (i <= (guint64) maxval) - return tar_represent_uintmax ((uintmax_t) i); - - errno = ERANGE; - return maxval; - } - } - - errno = EINVAL; - return 0; -} - -/* --------------------------------------------------------------------------------------------- */ - static struct xhdr_tab * locate_handler (const char *keyword) { @@ -310,40 +254,16 @@ run_override_list (GSList *kp, struct tar_stat_info *st) static struct timespec decode_timespec (const char *arg, char **arg_lim, gboolean parse_fraction) { - time_t s = TYPE_MINIMUM (time_t); int ns = -1; - const char *p = arg; - gboolean negative = *arg == '-'; + gboolean overflow; + time_t s; + char const *p = *arg_lim; struct timespec r; - if (!isdigit (arg[negative])) - errno = EINVAL; - else - { - errno = 0; - - if (negative) - { - gint64 i; - - i = g_ascii_strtoll (arg, arg_lim, 10); - if (TYPE_SIGNED (time_t) ? TYPE_MINIMUM (time_t) <= i : 0 <= i) - s = (intmax_t) i; - else - errno = ERANGE; - } - else - { - guint64 i; - - i = g_ascii_strtoull (arg, arg_lim, 10); - if (i <= TYPE_MAXIMUM (time_t)) - s = (uintmax_t) i; - else - errno = ERANGE; - } + s = stoint (arg, arg_lim, &overflow, TYPE_MINIMUM (time_t), TYPE_MAXIMUM (time_t)); - p = *arg_lim; + if (p != arg) + { ns = 0; if (parse_fraction && *p == '.') @@ -360,13 +280,15 @@ decode_timespec (const char *arg, char **arg_lim, gboolean parse_fraction) else if (*p != '0') trailing_nonzero = TRUE; + *arg_lim = (char *) p; + while (digits < LOG10_BILLION) { digits++; ns *= 10; } - if (negative) + if (*arg == '-') { /* Convert "-1.10000000000001" to s == -2, ns == 89999999. I.e., truncate time stamps towards minus infinity while @@ -374,23 +296,14 @@ decode_timespec (const char *arg, char **arg_lim, gboolean parse_fraction) if (trailing_nonzero) ns++; if (ns != 0) - { - if (s == TYPE_MINIMUM (time_t)) - ns = -1; - else - { - s--; - ns = BILLION - ns; - } - } + ns = ckd_sub (&s, s, 1) ? -1 : BILLION - ns; } } - if (errno == ERANGE) + if (overflow) ns = -1; } - *arg_lim = (char *) p; r.tv_sec = s; r.tv_nsec = ns; return r; @@ -428,19 +341,17 @@ decode_signed_num (intmax_t *num, const char *arg, intmax_t minval, uintmax_t ma const char *keyword) { char *arg_lim; + gboolean overflow; intmax_t u; (void) keyword; - if (!isdigit (*arg)) - return FALSE; /* malformed extended header */ + u = stoint (arg, &arg_lim, &overflow, minval, maxval); - u = strtosysint (arg, &arg_lim, minval, maxval); - - if (errno == EINVAL || *arg_lim != '\0') + if (arg_lim == arg || *arg_lim != '\0') return FALSE; /* malformed extended header */ - if (errno == ERANGE) + if (overflow) return FALSE; /* out of range */ *num = u; @@ -670,11 +581,11 @@ decode_record (struct xheader *xhdr, char **ptr, { char *start = *ptr; char *p = start; - size_t len; + idx_t len; char *len_lim; const char *keyword; char *nextp; - size_t len_max; + idx_t len_max; gboolean ret; len_max = xhdr->buffer + xhdr->size - start; @@ -682,10 +593,12 @@ decode_record (struct xheader *xhdr, char **ptr, while (*p == ' ' || *p == '\t') p++; - if (!isdigit (*p)) + len = stoint (p, &len_lim, NULL, 0, IDX_MAX); + + if (len_lim == p) + /* Is the length missing? */ return (*p != '\0' ? decode_record_fail : decode_record_finish); - len = (uintmax_t) g_ascii_strtoull (p, &len_lim, 10); if (len_max < len) return decode_record_fail; @@ -887,26 +800,22 @@ sparse_map_decoder (struct tar_stat_info *st, const char *keyword, const char *a while (TRUE) { - gint64 u; + off_t u; char *delim; + gboolean overflow; + + u = stoint (arg, &delim, &overflow, 0, TYPE_MAXIMUM (off_t)); - if (!isdigit (*arg)) + if (delim == arg) { /* malformed extended header */ return FALSE; } - errno = 0; - u = g_ascii_strtoll (arg, &delim, 10); - if (TYPE_MAXIMUM (off_t) < u) - { - u = TYPE_MAXIMUM (off_t); - errno = ERANGE; - } if (offset) { e.offset = u; - if (errno == ERANGE) + if (overflow) { /* out of range */ return FALSE; @@ -915,7 +824,7 @@ sparse_map_decoder (struct tar_stat_info *st, const char *keyword, const char *a else { e.numbytes = u; - if (errno == ERANGE) + if (overflow) { /* out of range */ return FALSE; -- 2.11.4.GIT