2 Virtual File System: GNU Tar file system.
4 Copyright (C) 2023-2024
5 Free Software Foundation, Inc.
8 Andrew Borodin <aborodin@vmail.ru>, 2023
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
28 * \brief Source: Virtual File System: GNU Tar file system
29 * \author Andrew Borodin
35 #include <ctype.h> /* isdigit() */
36 #include <inttypes.h> /* uintmax_t */
38 #include "lib/global.h"
39 #include "lib/widget.h" /* message() */
40 #include "lib/vfs/vfs.h" /* mc_read() */
42 #include "tar-internal.h"
44 /*** global variables ****************************************************************************/
46 /*** file scope macro definitions ****************************************************************/
49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
52 /* Log base 2 of common values. */
56 /*** file scope type declarations ****************************************************************/
58 /*** forward declarations (file scope functions) *************************************************/
60 /*** file scope variables ************************************************************************/
62 /* Table of base-64 digit values + 1, indexed by unsigned chars.
63 See Internet RFC 2045 Table 1.
64 Zero entries are for unsigned chars that are not base-64 digits. */
66 static char const base64_map
[UCHAR_MAX
+ 1] =
68 ['A'] = 0 + 1, ['B'] = 1 + 1, ['C'] = 2 + 1, ['D'] = 3 + 1,
69 ['E'] = 4 + 1, ['F'] = 5 + 1, ['G'] = 6 + 1, ['H'] = 7 + 1,
70 ['I'] = 8 + 1, ['J'] = 9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
71 ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1,
72 ['Q'] = 16 + 1, ['R'] = 17 + 1, ['S'] = 18 + 1, ['T'] = 19 + 1,
73 ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
74 ['Y'] = 24 + 1, ['Z'] = 25 + 1,
75 ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
76 ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1,
77 ['i'] = 34 + 1, ['j'] = 35 + 1, ['k'] = 36 + 1, ['l'] = 37 + 1,
78 ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
79 ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1,
80 ['u'] = 46 + 1, ['v'] = 47 + 1, ['w'] = 48 + 1, ['x'] = 49 + 1,
81 ['y'] = 50 + 1, ['z'] = 51 + 1,
82 ['0'] = 52 + 1, ['1'] = 53 + 1, ['2'] = 54 + 1, ['3'] = 55 + 1,
83 ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
84 ['8'] = 60 + 1, ['9'] = 61 + 1,
85 ['+'] = 62 + 1, ['/'] = 63 + 1,
89 /* --------------------------------------------------------------------------------------------- */
90 /*** file scope functions ************************************************************************/
91 /* --------------------------------------------------------------------------------------------- */
94 tar_short_read (size_t status
, tar_super_t
*archive
)
96 size_t left
; /* bytes left */
97 char *more
; /* pointer to next byte to read */
99 more
= archive
->record_start
->buffer
+ status
;
100 left
= record_size
- status
;
102 while (left
% BLOCKSIZE
!= 0 || (left
!= 0 && status
!= 0))
108 r
= mc_read (archive
->fd
, more
, left
);
122 record_end
= archive
->record_start
+ (record_size
- left
) / BLOCKSIZE
;
127 /* --------------------------------------------------------------------------------------------- */
130 tar_flush_read (tar_super_t
*archive
)
134 status
= mc_read (archive
->fd
, archive
->record_start
->buffer
, record_size
);
135 if ((idx_t
) status
== record_size
)
138 return tar_short_read (status
, archive
);
141 /* --------------------------------------------------------------------------------------------- */
143 /** Flush the current buffer from the archive.
146 tar_flush_archive (tar_super_t
*archive
)
148 record_start_block
+= record_end
- archive
->record_start
;
149 current_block
= archive
->record_start
;
150 record_end
= archive
->record_start
+ blocking_factor
;
152 return tar_flush_read (archive
);
155 /* --------------------------------------------------------------------------------------------- */
158 tar_seek_archive (tar_super_t
*archive
, off_t size
)
164 /* If low level I/O is already at EOF, do not try to seek further. */
165 if (record_end
< archive
->record_start
+ blocking_factor
)
168 skipped
= (blocking_factor
- (current_block
- archive
->record_start
)) * BLOCKSIZE
;
172 /* Compute number of records to skip */
173 nrec
= (size
- skipped
) / record_size
;
177 start
= tar_current_block_ordinal (archive
);
179 offset
= mc_lseek (archive
->fd
, nrec
* record_size
, SEEK_CUR
);
184 if ((offset
% record_size
) != 0)
186 message (D_ERROR
, MSG_ERROR
, _("tar: mc_lseek not stopped at a record boundary"));
191 /* Convert to number of records */
193 /* Compute number of skipped blocks */
194 nblk
= offset
- start
;
196 /* Update buffering info */
197 record_start_block
= offset
- blocking_factor
;
198 current_block
= record_end
;
203 /* --------------------------------------------------------------------------------------------- */
204 /*** public functions ****************************************************************************/
205 /* --------------------------------------------------------------------------------------------- */
208 is_octal_digit (char c
)
210 return '0' <= c
&& c
<= '7';
213 /* --------------------------------------------------------------------------------------------- */
216 tar_assign_string (char **string
, char *value
)
222 /* --------------------------------------------------------------------------------------------- */
225 tar_assign_string_dup (char **string
, const char *value
)
228 *string
= g_strdup (value
);
231 /* --------------------------------------------------------------------------------------------- */
234 tar_assign_string_dup_n (char **string
, const char *value
, size_t n
)
237 *string
= g_strndup (value
, n
);
240 /* --------------------------------------------------------------------------------------------- */
242 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
243 to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
244 the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
245 value. @minval must not be positive.
247 If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
248 are assumed to be represented using leading '-' in the usual way. If the represented value
249 exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
252 On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
255 Sample call to this function:
261 i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
262 if ((s_end == s) | (s_end == '\0') | overflow)
263 diagnose_invalid (s);
265 This example uses "|" instead of "||" for fewer branches at runtime,
266 which tends to be more efficient on modern processors.
268 This function is named "stoint" instead of "strtoint" because
269 <string.h> reserves names beginning with "str".
271 #if ! (INTMAX_MAX <= UINTMAX_MAX)
272 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
275 stoint (const char *arg
, char **arglim
, gboolean
*overflow
, intmax_t minval
, uintmax_t maxval
)
287 while (isdigit (*++p
) != 0)
289 v
|= ckd_mul (&i
, i
, 10) ? 1 : 0;
290 v
|= ckd_add (&i
, i
, *p
- '0') ? 1 : 0;
293 v
|= maxval
< (uintmax_t) i
? 1 : 0;
299 uintmax_t u
= *p
- '0';
301 while (isdigit (*++p
) != 0)
303 v
|= ckd_mul (&u
, u
, 10) ? 1 : 0;
304 v
|= ckd_add (&u
, u
, *p
- '0') ? 1 : 0;
307 v
|= maxval
< u
? 1 : 0;
310 i
= tar_represent_uintmax (u
);
313 else if (minval
< 0 && *p
== '-' && isdigit (p
[1]))
318 while (isdigit (*++p
) != 0)
320 v
|= ckd_mul (&i
, i
, 10) ? 1 : 0;
321 v
|= ckd_sub (&i
, i
, *p
- '0') ? 1 : 0;
324 v
|= i
< minval
? 1 : 0;
332 *arglim
= (char *) p
;
333 if (overflow
!= NULL
)
338 /* --------------------------------------------------------------------------------------------- */
341 * Convert buffer at @where0 of size @digs from external format to intmax_t.
342 * @digs must be positive.
343 * If @type is non-NULL, data are of type @type.
344 * The buffer must represent a value in the range -@minval through @maxval;
345 * if the mathematically correct result V would be greater than INTMAX_MAX,
346 * return a negative integer V such that (uintmax_t) V yields the correct result.
347 * If @octal_only, allow only octal numbers instead of the other GNU extensions.
349 * Result is -1 if the field is invalid.
351 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
352 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
354 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
355 #error "tar_from_header() returns intmax_t to represent uintmax_t"
358 tar_from_header (const char *where0
, size_t digs
, char const *type
, intmax_t minval
,
359 uintmax_t maxval
, gboolean octal_only
)
362 uintmax_t uminval
= minval
;
363 uintmax_t minus_minval
= -uminval
;
364 const char *where
= where0
;
365 char const *lim
= where
+ digs
;
366 gboolean negative
= FALSE
;
368 /* Accommodate buggy tar of unknown vintage, which outputs leading
369 NUL if the previous field overflows. */
373 /* Accommodate older tars, which output leading spaces. */
379 if (!g_ascii_isspace (*where
))
385 if (is_octal_digit (*where
))
387 char const *where1
= where
;
388 gboolean overflow
= FALSE
;
392 value
+= *where
++ - '0';
393 if (where
== lim
|| !is_octal_digit (*where
))
395 overflow
|= ckd_mul (&value
, value
, 8);
398 /* Parse the output of older, unportable tars, which generate
399 negative values in two's complement octal. If the leading
400 nonzero digit is 1, we can't recover the original value
401 reliably; so do this only if the digit is 2 or more. This
402 catches the common case of 32-bit negative time stamps. */
403 if ((overflow
|| maxval
< value
) && *where1
>= 2 && type
!= NULL
)
405 /* Compute the negative of the input value, assuming two's complement. */
408 digit
= (*where1
- '0') | 4;
417 if (where
== lim
|| !is_octal_digit (*where
))
419 digit
= *where
- '0';
420 overflow
|= ckd_mul (&value
, value
, 8);
423 overflow
|= ckd_add (&value
, value
, 1);
425 if (!overflow
&& value
<= minus_minval
)
434 /* Suppress the following extensions. */
436 else if (*where
== '-' || *where
== '+')
438 /* Parse base-64 output produced only by tar test versions
439 1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
440 Support for this will be withdrawn in future tar releases. */
442 negative
= *where
++ == '-';
446 unsigned char uc
= *where
;
449 dig
= base64_map
[uc
];
453 if (ckd_mul (&value
, value
, 64))
459 else if (where
<= lim
- 2 && (*where
== '\200' /* positive base-256 */
460 || *where
== '\377' /* negative base-256 */ ))
462 /* Parse base-256 output. A nonnegative number N is
463 represented as (256**DIGS)/2 + N; a negative number -N is
464 represented as (256**DIGS) - N, i.e. as two's complement.
465 The representation guarantees that the leading bit is
466 always on, so that we don't confuse this format with the
467 others (assuming ASCII bytes of 8 bits or more). */
472 signbit
= *where
& (1 << (LG_256
- 2));
473 topbits
= ((uintmax_t) - signbit
) << (UINTMAX_WIDTH
- LG_256
- (LG_256
- 2));
475 value
= (*where
++ & ((1 << (LG_256
- 2)) - 1)) - signbit
;
482 value
= (value
<< LG_256
) + uc
;
486 if (((value
<< LG_256
>> LG_256
) | topbits
) != value
)
490 negative
= signbit
!= 0;
495 if (where
!= lim
&& *where
!= '\0' && !g_ascii_isspace (*where
))
498 if (value
<= (negative
? minus_minval
: maxval
))
499 return tar_represent_uintmax (negative
? -value
: value
);
504 /* --------------------------------------------------------------------------------------------- */
507 off_from_header (const char *p
, size_t s
)
509 /* Negative offsets are not allowed in tar files, so invoke
510 from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
511 return tar_from_header (p
, s
, "off_t", 0, TYPE_MAXIMUM (off_t
), FALSE
);
514 /* --------------------------------------------------------------------------------------------- */
517 * Return the location of the next available input or output block.
518 * Return NULL for EOF.
521 tar_find_next_block (tar_super_t
*archive
)
523 if (current_block
== record_end
)
528 if (!tar_flush_archive (archive
))
530 message (D_ERROR
, MSG_ERROR
, _("Inconsistent tar archive"));
534 if (current_block
== record_end
)
541 return current_block
;
544 /* --------------------------------------------------------------------------------------------- */
547 * Indicate that we have used all blocks up thru @block.
550 tar_set_next_block_after (union block
*block
)
552 while (block
>= current_block
)
555 /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
556 could mean the next block (if the input record is exactly one block long), which is not
559 return !(current_block
> record_end
);
562 /* --------------------------------------------------------------------------------------------- */
565 * Compute and return the block ordinal at current_block.
568 tar_current_block_ordinal (const tar_super_t
*archive
)
570 return record_start_block
+ (current_block
- archive
->record_start
);
573 /* --------------------------------------------------------------------------------------------- */
576 * Skip over @size bytes of data in blocks in the archive.
579 tar_skip_file (tar_super_t
*archive
, off_t size
)
584 nblk
= tar_seek_archive (archive
, size
);
586 size
-= nblk
* BLOCKSIZE
;
590 x
= tar_find_next_block (archive
);
594 tar_set_next_block_after (x
);
601 /* --------------------------------------------------------------------------------------------- */