src/vfs/tar/tar-internal.c

   1 /*
   2    Virtual File System: GNU Tar file system.
   3
   4    Copyright (C) 2023-2024
   5    Free Software Foundation, Inc.
   6
   7    Written by:
   8    Andrew Borodin <aborodin@vmail.ru>, 2023
   9
  10    This file is part of the Midnight Commander.
  11
  12    The Midnight Commander is free software: you can redistribute it
  13    and/or modify it under the terms of the GNU General Public License as
  14    published by the Free Software Foundation, either version 3 of the License,
  15    or (at your option) any later version.
  16
  17    The Midnight Commander is distributed in the hope that it will be useful,
  18    but WITHOUT ANY WARRANTY; without even the implied warranty of
  19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20    GNU General Public License for more details.
  21
  22    You should have received a copy of the GNU General Public License
  23    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24  */
  25
  26 /**
  27  * \file
  28  * \brief Source: Virtual File System: GNU Tar file system
  29  * \author Andrew Borodin
  30  * \date 2022
  31  */
  32
  33 #include <config.h>
  34
  35 #include <ctype.h>              /* isdigit() */
  36 #include <inttypes.h>           /* uintmax_t */
  37
  38 #include "lib/global.h"
  39 #include "lib/widget.h"         /* message() */
  40 #include "lib/vfs/vfs.h"        /* mc_read() */
  41
  42 #include "tar-internal.h"
  43
  44 /*** global variables ****************************************************************************/
  45
  46 /*** file scope macro definitions ****************************************************************/
  47
  48 #ifndef UINTMAX_WIDTH
  49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
  50 #endif
  51
  52 /* Log base 2 of common values. */
  53 #define LG_8 3
  54 #define LG_256 8
  55
  56 /*** file scope type declarations ****************************************************************/
  57
  58 /*** forward declarations (file scope functions) *************************************************/
  59
  60 /*** file scope variables ************************************************************************/
  61
  62 /* Table of base-64 digit values + 1, indexed by unsigned chars.
  63    See Internet RFC 2045 Table 1.
  64    Zero entries are for unsigned chars that are not base-64 digits.  */
  65 /* *INDENT-OFF* */
  66 static char const base64_map[UCHAR_MAX + 1] =
  67 {
  68     ['A'] =  0 + 1, ['B'] =  1 + 1, ['C'] =  2 + 1, ['D'] =  3 + 1,
  69     ['E'] =  4 + 1, ['F'] =  5 + 1, ['G'] =  6 + 1, ['H'] =  7 + 1,
  70     ['I'] =  8 + 1, ['J'] =  9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
  71     ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1,
  72     ['Q'] = 16 + 1, ['R'] = 17 + 1, ['S'] = 18 + 1, ['T'] = 19 + 1,
  73     ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
  74     ['Y'] = 24 + 1, ['Z'] = 25 + 1,
  75     ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
  76     ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1,
  77     ['i'] = 34 + 1, ['j'] = 35 + 1, ['k'] = 36 + 1, ['l'] = 37 + 1,
  78     ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
  79     ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1,
  80     ['u'] = 46 + 1, ['v'] = 47 + 1, ['w'] = 48 + 1, ['x'] = 49 + 1,
  81     ['y'] = 50 + 1, ['z'] = 51 + 1,
  82     ['0'] = 52 + 1, ['1'] = 53 + 1, ['2'] = 54 + 1, ['3'] = 55 + 1,
  83     ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
  84     ['8'] = 60 + 1, ['9'] = 61 + 1,
  85     ['+'] = 62 + 1, ['/'] = 63 + 1,
  86 };
  87 /* *INDENT-ON* */
  88
  89 /* --------------------------------------------------------------------------------------------- */
  90 /*** file scope functions ************************************************************************/
  91 /* --------------------------------------------------------------------------------------------- */
  92
  93 static gboolean
  94 tar_short_read (size_t status, tar_super_t *archive)
  95 {
  96     size_t left;                /* bytes left */
  97     char *more;                 /* pointer to next byte to read */
  98
  99     more = archive->record_start->buffer + status;
 100     left = record_size - status;
 101
 102     while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
 103     {
 104         if (status != 0)
 105         {
 106             ssize_t r;
 107
 108             r = mc_read (archive->fd, more, left);
 109             if (r == -1)
 110                 return FALSE;
 111
 112             status = (size_t) r;
 113         }
 114
 115         if (status == 0)
 116             break;
 117
 118         left -= status;
 119         more += status;
 120     }
 121
 122     record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
 123
 124     return TRUE;
 125 }
 126
 127 /* --------------------------------------------------------------------------------------------- */
 128
 129 static gboolean
 130 tar_flush_read (tar_super_t *archive)
 131 {
 132     size_t status;
 133
 134     status = mc_read (archive->fd, archive->record_start->buffer, record_size);
 135     if ((idx_t) status == record_size)
 136         return TRUE;
 137
 138     return tar_short_read (status, archive);
 139 }
 140
 141 /* --------------------------------------------------------------------------------------------- */
 142
 143 /**  Flush the current buffer from the archive.
 144  */
 145 static gboolean
 146 tar_flush_archive (tar_super_t *archive)
 147 {
 148     record_start_block += record_end - archive->record_start;
 149     current_block = archive->record_start;
 150     record_end = archive->record_start + blocking_factor;
 151
 152     return tar_flush_read (archive);
 153 }
 154
 155 /* --------------------------------------------------------------------------------------------- */
 156
 157 static off_t
 158 tar_seek_archive (tar_super_t *archive, off_t size)
 159 {
 160     off_t start, offset;
 161     off_t nrec, nblk;
 162     off_t skipped;
 163
 164     /* If low level I/O is already at EOF, do not try to seek further. */
 165     if (record_end < archive->record_start + blocking_factor)
 166         return 0;
 167
 168     skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
 169     if (size <= skipped)
 170         return 0;
 171
 172     /* Compute number of records to skip */
 173     nrec = (size - skipped) / record_size;
 174     if (nrec == 0)
 175         return 0;
 176
 177     start = tar_current_block_ordinal (archive);
 178
 179     offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
 180     if (offset < 0)
 181         return offset;
 182
 183 #if 0
 184     if ((offset % record_size) != 0)
 185     {
 186         message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
 187         return -1;
 188     }
 189 #endif
 190
 191     /* Convert to number of records */
 192     offset /= BLOCKSIZE;
 193     /* Compute number of skipped blocks */
 194     nblk = offset - start;
 195
 196     /* Update buffering info */
 197     record_start_block = offset - blocking_factor;
 198     current_block = record_end;
 199
 200     return nblk;
 201 }
 202
 203 /* --------------------------------------------------------------------------------------------- */
 204 /*** public functions ****************************************************************************/
 205 /* --------------------------------------------------------------------------------------------- */
 206
 207 gboolean
 208 is_octal_digit (char c)
 209 {
 210     return '0' <= c && c <= '7';
 211 }
 212
 213 /* --------------------------------------------------------------------------------------------- */
 214
 215 void
 216 tar_assign_string (char **string, char *value)
 217 {
 218     g_free (*string);
 219     *string = value;
 220 }
 221
 222 /* --------------------------------------------------------------------------------------------- */
 223
 224 void
 225 tar_assign_string_dup (char **string, const char *value)
 226 {
 227     g_free (*string);
 228     *string = g_strdup (value);
 229 }
 230
 231 /* --------------------------------------------------------------------------------------------- */
 232
 233 void
 234 tar_assign_string_dup_n (char **string, const char *value, size_t n)
 235 {
 236     g_free (*string);
 237     *string = g_strndup (value, n);
 238 }
 239
 240 /* --------------------------------------------------------------------------------------------- */
 241
 242 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
 243    to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
 244    the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
 245    value. @minval must not be positive.
 246
 247    If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
 248    are assumed to be represented using leading '-' in the usual way. If the represented value
 249    exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
 250    value.
 251
 252    On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
 253    then return 0.
 254
 255    Sample call to this function:
 256
 257    char *s_end;
 258    gboolean overflow;
 259    idx_t i;
 260
 261    i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
 262    if ((s_end == s) | (s_end == '\0') | overflow)
 263    diagnose_invalid (s);
 264
 265    This example uses "|" instead of "||" for fewer branches at runtime,
 266    which tends to be more efficient on modern processors.
 267
 268    This function is named "stoint" instead of "strtoint" because
 269    <string.h> reserves names beginning with "str".
 270  */
 271 #if ! (INTMAX_MAX <= UINTMAX_MAX)
 272 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
 273 #endif
 274 intmax_t
 275 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
 276 {
 277     char const *p = arg;
 278     intmax_t i;
 279     int v = 0;
 280
 281     if (isdigit (*p))
 282     {
 283         if (minval <= 0)
 284         {
 285             i = *p - '0';
 286
 287             while (isdigit (*++p) != 0)
 288             {
 289                 v |= ckd_mul (&i, i, 10) ? 1 : 0;
 290                 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
 291             }
 292
 293             v |= maxval < (uintmax_t) i ? 1 : 0;
 294             if (v != 0)
 295                 i = maxval;
 296         }
 297         else
 298         {
 299             uintmax_t u = *p - '0';
 300
 301             while (isdigit (*++p) != 0)
 302             {
 303                 v |= ckd_mul (&u, u, 10) ? 1 : 0;
 304                 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
 305             }
 306
 307             v |= maxval < u ? 1 : 0;
 308             if (v != 0)
 309                 u = maxval;
 310             i = tar_represent_uintmax (u);
 311         }
 312     }
 313     else if (minval < 0 && *p == '-' && isdigit (p[1]))
 314     {
 315         p++;
 316         i = -(*p - '0');
 317
 318         while (isdigit (*++p) != 0)
 319         {
 320             v |= ckd_mul (&i, i, 10) ? 1 : 0;
 321             v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
 322         }
 323
 324         v |= i < minval ? 1 : 0;
 325         if (v != 0)
 326             i = minval;
 327     }
 328     else
 329         i = 0;
 330
 331     if (arglim != NULL)
 332         *arglim = (char *) p;
 333     if (overflow != NULL)
 334         *overflow = v != 0;
 335     return i;
 336 }
 337
 338 /* --------------------------------------------------------------------------------------------- */
 339
 340 /**
 341  * Convert buffer at @where0 of size @digs from external format to intmax_t.
 342  * @digs must be positive.
 343  * If @type is non-NULL, data are of type @type.
 344  * The buffer must represent a value in the range -@minval through @maxval;
 345  * if the mathematically correct result V would be greater than INTMAX_MAX,
 346  * return a negative integer V such that (uintmax_t) V yields the correct result.
 347  * If @octal_only, allow only octal numbers instead of the other GNU extensions.
 348  *
 349  * Result is -1 if the field is invalid.
 350  */
 351 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
 352 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
 353 #endif
 354 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
 355 #error "tar_from_header() returns intmax_t to represent uintmax_t"
 356 #endif
 357 intmax_t
 358 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
 359                  uintmax_t maxval, gboolean octal_only)
 360 {
 361     uintmax_t value = 0;
 362     uintmax_t uminval = minval;
 363     uintmax_t minus_minval = -uminval;
 364     const char *where = where0;
 365     char const *lim = where + digs;
 366     gboolean negative = FALSE;
 367
 368     /* Accommodate buggy tar of unknown vintage, which outputs leading
 369        NUL if the previous field overflows. */
 370     if (*where == '\0')
 371         where++;
 372
 373     /* Accommodate older tars, which output leading spaces. */
 374     while (TRUE)
 375     {
 376         if (where == lim)
 377             return (-1);
 378
 379         if (!g_ascii_isspace (*where))
 380             break;
 381
 382         where++;
 383     }
 384
 385     if (is_octal_digit (*where))
 386     {
 387         char const *where1 = where;
 388         gboolean overflow = FALSE;
 389
 390         while (TRUE)
 391         {
 392             value += *where++ - '0';
 393             if (where == lim || !is_octal_digit (*where))
 394                 break;
 395             overflow |= ckd_mul (&value, value, 8);
 396         }
 397
 398         /* Parse the output of older, unportable tars, which generate
 399            negative values in two's complement octal. If the leading
 400            nonzero digit is 1, we can't recover the original value
 401            reliably; so do this only if the digit is 2 or more. This
 402            catches the common case of 32-bit negative time stamps. */
 403         if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
 404         {
 405             /* Compute the negative of the input value, assuming two's complement. */
 406             int digit;
 407
 408             digit = (*where1 - '0') | 4;
 409             overflow = FALSE;
 410             value = 0;
 411             where = where1;
 412
 413             while (TRUE)
 414             {
 415                 value += 7 - digit;
 416                 where++;
 417                 if (where == lim || !is_octal_digit (*where))
 418                     break;
 419                 digit = *where - '0';
 420                 overflow |= ckd_mul (&value, value, 8);
 421             }
 422
 423             overflow |= ckd_add (&value, value, 1);
 424
 425             if (!overflow && value <= minus_minval)
 426                 negative = TRUE;
 427         }
 428
 429         if (overflow)
 430             return (-1);
 431     }
 432     else if (octal_only)
 433     {
 434         /* Suppress the following extensions. */
 435     }
 436     else if (*where == '-' || *where == '+')
 437     {
 438         /* Parse base-64 output produced only by tar test versions
 439            1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
 440            Support for this will be withdrawn in future tar releases. */
 441
 442         negative = *where++ == '-';
 443
 444         while (where != lim)
 445         {
 446             unsigned char uc = *where;
 447             char dig;
 448
 449             dig = base64_map[uc];
 450             if (dig <= 0)
 451                 break;
 452
 453             if (ckd_mul (&value, value, 64))
 454                 return (-1);
 455             value |= dig - 1;
 456             where++;
 457         }
 458     }
 459     else if (where <= lim - 2 && (*where == '\200'      /* positive base-256 */
 460                                   || *where == '\377' /* negative base-256 */ ))
 461     {
 462         /* Parse base-256 output.  A nonnegative number N is
 463            represented as (256**DIGS)/2 + N; a negative number -N is
 464            represented as (256**DIGS) - N, i.e. as two's complement.
 465            The representation guarantees that the leading bit is
 466            always on, so that we don't confuse this format with the
 467            others (assuming ASCII bytes of 8 bits or more). */
 468
 469         int signbit;
 470         uintmax_t topbits;
 471
 472         signbit = *where & (1 << (LG_256 - 2));
 473         topbits = ((uintmax_t) - signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
 474
 475         value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
 476
 477         while (TRUE)
 478         {
 479             unsigned char uc;
 480
 481             uc = *where++;
 482             value = (value << LG_256) + uc;
 483             if (where == lim)
 484                 break;
 485
 486             if (((value << LG_256 >> LG_256) | topbits) != value)
 487                 return (-1);
 488         }
 489
 490         negative = signbit != 0;
 491         if (negative)
 492             value = -value;
 493     }
 494
 495     if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
 496         return (-1);
 497
 498     if (value <= (negative ? minus_minval : maxval))
 499         return tar_represent_uintmax (negative ? -value : value);
 500
 501     return (-1);
 502 }
 503
 504 /* --------------------------------------------------------------------------------------------- */
 505
 506 off_t
 507 off_from_header (const char *p, size_t s)
 508 {
 509     /* Negative offsets are not allowed in tar files, so invoke
 510        from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
 511     return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
 512 }
 513
 514 /* --------------------------------------------------------------------------------------------- */
 515
 516 /**
 517  * Return the location of the next available input or output block.
 518  * Return NULL for EOF.
 519  */
 520 union block *
 521 tar_find_next_block (tar_super_t *archive)
 522 {
 523     if (current_block == record_end)
 524     {
 525         if (hit_eof)
 526             return NULL;
 527
 528         if (!tar_flush_archive (archive))
 529         {
 530             message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
 531             return NULL;
 532         }
 533
 534         if (current_block == record_end)
 535         {
 536             hit_eof = TRUE;
 537             return NULL;
 538         }
 539     }
 540
 541     return current_block;
 542 }
 543
 544 /* --------------------------------------------------------------------------------------------- */
 545
 546 /**
 547  * Indicate that we have used all blocks up thru @block.
 548  */
 549 gboolean
 550 tar_set_next_block_after (union block *block)
 551 {
 552     while (block >= current_block)
 553         current_block++;
 554
 555     /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
 556        could mean the next block (if the input record is exactly one block long), which is not
 557        what is intended.  */
 558
 559     return !(current_block > record_end);
 560 }
 561
 562 /* --------------------------------------------------------------------------------------------- */
 563
 564 /**
 565  * Compute and return the block ordinal at current_block.
 566  */
 567 off_t
 568 tar_current_block_ordinal (const tar_super_t *archive)
 569 {
 570     return record_start_block + (current_block - archive->record_start);
 571 }
 572
 573 /* --------------------------------------------------------------------------------------------- */
 574
 575 /**
 576  * Skip over @size bytes of data in blocks in the archive.
 577  */
 578 gboolean
 579 tar_skip_file (tar_super_t *archive, off_t size)
 580 {
 581     union block *x;
 582     off_t nblk;
 583
 584     nblk = tar_seek_archive (archive, size);
 585     if (nblk >= 0)
 586         size -= nblk * BLOCKSIZE;
 587
 588     while (size > 0)
 589     {
 590         x = tar_find_next_block (archive);
 591         if (x == NULL)
 592             return FALSE;
 593
 594         tar_set_next_block_after (x);
 595         size -= BLOCKSIZE;
 596     }
 597
 598     return TRUE;
 599 }
 600
 601 /* --------------------------------------------------------------------------------------------- */