src/vfs/tar/tar-internal.c: define UINTMAX_WIDTH.
[midnight-commander.git] / src / vfs / tar / tar-internal.c
blob1afb1369b164667d3479e563b8c7135d155a0dc7
1 /*
2 Virtual File System: GNU Tar file system.
4 Copyright (C) 2023-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Andrew Borodin <aborodin@vmail.ru>, 2023
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /**
27 * \file
28 * \brief Source: Virtual File System: GNU Tar file system
29 * \author Andrew Borodin
30 * \date 2022
33 #include <config.h>
35 #include <ctype.h> /* isdigit() */
36 #include <inttypes.h> /* uintmax_t */
38 #include "lib/global.h"
39 #include "lib/widget.h" /* message() */
40 #include "lib/vfs/vfs.h" /* mc_read() */
42 #include "tar-internal.h"
44 /*** global variables ****************************************************************************/
46 /*** file scope macro definitions ****************************************************************/
48 #ifndef UINTMAX_WIDTH
49 #define UINTMAX_WIDTH (sizeof (uintmax_t) * CHAR_BIT)
50 #endif
52 /* Log base 2 of common values. */
53 #define LG_8 3
54 #define LG_256 8
56 /*** file scope type declarations ****************************************************************/
58 /*** forward declarations (file scope functions) *************************************************/
60 /*** file scope variables ************************************************************************/
62 /* Table of base-64 digit values + 1, indexed by unsigned chars.
63 See Internet RFC 2045 Table 1.
64 Zero entries are for unsigned chars that are not base-64 digits. */
65 /* *INDENT-OFF* */
66 static char const base64_map[UCHAR_MAX + 1] =
68 ['A'] = 0 + 1, ['B'] = 1 + 1, ['C'] = 2 + 1, ['D'] = 3 + 1,
69 ['E'] = 4 + 1, ['F'] = 5 + 1, ['G'] = 6 + 1, ['H'] = 7 + 1,
70 ['I'] = 8 + 1, ['J'] = 9 + 1, ['K'] = 10 + 1, ['L'] = 11 + 1,
71 ['M'] = 12 + 1, ['N'] = 13 + 1, ['O'] = 14 + 1, ['P'] = 15 + 1,
72 ['Q'] = 16 + 1, ['R'] = 17 + 1, ['S'] = 18 + 1, ['T'] = 19 + 1,
73 ['U'] = 20 + 1, ['V'] = 21 + 1, ['W'] = 22 + 1, ['X'] = 23 + 1,
74 ['Y'] = 24 + 1, ['Z'] = 25 + 1,
75 ['a'] = 26 + 1, ['b'] = 27 + 1, ['c'] = 28 + 1, ['d'] = 29 + 1,
76 ['e'] = 30 + 1, ['f'] = 31 + 1, ['g'] = 32 + 1, ['h'] = 33 + 1,
77 ['i'] = 34 + 1, ['j'] = 35 + 1, ['k'] = 36 + 1, ['l'] = 37 + 1,
78 ['m'] = 38 + 1, ['n'] = 39 + 1, ['o'] = 40 + 1, ['p'] = 41 + 1,
79 ['q'] = 42 + 1, ['r'] = 43 + 1, ['s'] = 44 + 1, ['t'] = 45 + 1,
80 ['u'] = 46 + 1, ['v'] = 47 + 1, ['w'] = 48 + 1, ['x'] = 49 + 1,
81 ['y'] = 50 + 1, ['z'] = 51 + 1,
82 ['0'] = 52 + 1, ['1'] = 53 + 1, ['2'] = 54 + 1, ['3'] = 55 + 1,
83 ['4'] = 56 + 1, ['5'] = 57 + 1, ['6'] = 58 + 1, ['7'] = 59 + 1,
84 ['8'] = 60 + 1, ['9'] = 61 + 1,
85 ['+'] = 62 + 1, ['/'] = 63 + 1,
87 /* *INDENT-ON* */
89 /* --------------------------------------------------------------------------------------------- */
90 /*** file scope functions ************************************************************************/
91 /* --------------------------------------------------------------------------------------------- */
93 static gboolean
94 tar_short_read (size_t status, tar_super_t *archive)
96 size_t left; /* bytes left */
97 char *more; /* pointer to next byte to read */
99 more = archive->record_start->buffer + status;
100 left = record_size - status;
102 while (left % BLOCKSIZE != 0 || (left != 0 && status != 0))
104 if (status != 0)
106 ssize_t r;
108 r = mc_read (archive->fd, more, left);
109 if (r == -1)
110 return FALSE;
112 status = (size_t) r;
115 if (status == 0)
116 break;
118 left -= status;
119 more += status;
122 record_end = archive->record_start + (record_size - left) / BLOCKSIZE;
124 return TRUE;
127 /* --------------------------------------------------------------------------------------------- */
129 static gboolean
130 tar_flush_read (tar_super_t *archive)
132 size_t status;
134 status = mc_read (archive->fd, archive->record_start->buffer, record_size);
135 if ((idx_t) status == record_size)
136 return TRUE;
138 return tar_short_read (status, archive);
141 /* --------------------------------------------------------------------------------------------- */
143 /** Flush the current buffer from the archive.
145 static gboolean
146 tar_flush_archive (tar_super_t *archive)
148 record_start_block += record_end - archive->record_start;
149 current_block = archive->record_start;
150 record_end = archive->record_start + blocking_factor;
152 return tar_flush_read (archive);
155 /* --------------------------------------------------------------------------------------------- */
157 static off_t
158 tar_seek_archive (tar_super_t *archive, off_t size)
160 off_t start, offset;
161 off_t nrec, nblk;
162 off_t skipped;
164 /* If low level I/O is already at EOF, do not try to seek further. */
165 if (record_end < archive->record_start + blocking_factor)
166 return 0;
168 skipped = (blocking_factor - (current_block - archive->record_start)) * BLOCKSIZE;
169 if (size <= skipped)
170 return 0;
172 /* Compute number of records to skip */
173 nrec = (size - skipped) / record_size;
174 if (nrec == 0)
175 return 0;
177 start = tar_current_block_ordinal (archive);
179 offset = mc_lseek (archive->fd, nrec * record_size, SEEK_CUR);
180 if (offset < 0)
181 return offset;
183 #if 0
184 if ((offset % record_size) != 0)
186 message (D_ERROR, MSG_ERROR, _("tar: mc_lseek not stopped at a record boundary"));
187 return -1;
189 #endif
191 /* Convert to number of records */
192 offset /= BLOCKSIZE;
193 /* Compute number of skipped blocks */
194 nblk = offset - start;
196 /* Update buffering info */
197 record_start_block = offset - blocking_factor;
198 current_block = record_end;
200 return nblk;
203 /* --------------------------------------------------------------------------------------------- */
204 /*** public functions ****************************************************************************/
205 /* --------------------------------------------------------------------------------------------- */
207 gboolean
208 is_octal_digit (char c)
210 return '0' <= c && c <= '7';
213 /* --------------------------------------------------------------------------------------------- */
215 void
216 tar_assign_string (char **string, char *value)
218 g_free (*string);
219 *string = value;
222 /* --------------------------------------------------------------------------------------------- */
224 void
225 tar_assign_string_dup (char **string, const char *value)
227 g_free (*string);
228 *string = g_strdup (value);
231 /* --------------------------------------------------------------------------------------------- */
233 void
234 tar_assign_string_dup_n (char **string, const char *value, size_t n)
236 g_free (*string);
237 *string = g_strndup (value, n);
240 /* --------------------------------------------------------------------------------------------- */
242 /* Convert a prefix of the string @arg to a system integer type. If @arglim, set *@arglim to point
243 to just after the prefix. If @overflow, set *@overflow to TRUE or FALSE depending on whether
244 the input is out of @minval..@maxval range. If the input is out of that range, return an extreme
245 value. @minval must not be positive.
247 If @minval is negative, @maxval can be at most INTMAX_MAX, and negative integers @minval .. -1
248 are assumed to be represented using leading '-' in the usual way. If the represented value
249 exceeds INTMAX_MAX, return a negative integer V such that (uintmax_t) V yields the represented
250 value.
252 On conversion error: if @arglim set *@arglim = @arg if @overflow set *@overflow = FALSE;
253 then return 0.
255 Sample call to this function:
257 char *s_end;
258 gboolean overflow;
259 idx_t i;
261 i = stoint (s, &s_end, &overflow, 0, IDX_MAX);
262 if ((s_end == s) | (s_end == '\0') | overflow)
263 diagnose_invalid (s);
265 This example uses "|" instead of "||" for fewer branches at runtime,
266 which tends to be more efficient on modern processors.
268 This function is named "stoint" instead of "strtoint" because
269 <string.h> reserves names beginning with "str".
271 #if ! (INTMAX_MAX <= UINTMAX_MAX)
272 #error "strtosysint: nonnegative intmax_t does not fit in uintmax_t"
273 #endif
274 intmax_t
275 stoint (const char *arg, char **arglim, gboolean *overflow, intmax_t minval, uintmax_t maxval)
277 char const *p = arg;
278 intmax_t i;
279 int v = 0;
281 if (isdigit (*p))
283 if (minval <= 0)
285 i = *p - '0';
287 while (isdigit (*++p) != 0)
289 v |= ckd_mul (&i, i, 10) ? 1 : 0;
290 v |= ckd_add (&i, i, *p - '0') ? 1 : 0;
293 v |= maxval < (uintmax_t) i ? 1 : 0;
294 if (v != 0)
295 i = maxval;
297 else
299 uintmax_t u = *p - '0';
301 while (isdigit (*++p) != 0)
303 v |= ckd_mul (&u, u, 10) ? 1 : 0;
304 v |= ckd_add (&u, u, *p - '0') ? 1 : 0;
307 v |= maxval < u ? 1 : 0;
308 if (v != 0)
309 u = maxval;
310 i = tar_represent_uintmax (u);
313 else if (minval < 0 && *p == '-' && isdigit (p[1]))
315 p++;
316 i = -(*p - '0');
318 while (isdigit (*++p) != 0)
320 v |= ckd_mul (&i, i, 10) ? 1 : 0;
321 v |= ckd_sub (&i, i, *p - '0') ? 1 : 0;
324 v |= i < minval ? 1 : 0;
325 if (v != 0)
326 i = minval;
328 else
329 i = 0;
331 if (arglim != NULL)
332 *arglim = (char *) p;
333 if (overflow != NULL)
334 *overflow = v != 0;
335 return i;
338 /* --------------------------------------------------------------------------------------------- */
341 * Convert buffer at @where0 of size @digs from external format to intmax_t.
342 * @digs must be positive.
343 * If @type is non-NULL, data are of type @type.
344 * The buffer must represent a value in the range -@minval through @maxval;
345 * if the mathematically correct result V would be greater than INTMAX_MAX,
346 * return a negative integer V such that (uintmax_t) V yields the correct result.
347 * If @octal_only, allow only octal numbers instead of the other GNU extensions.
349 * Result is -1 if the field is invalid.
351 #if !(INTMAX_MAX <= UINTMAX_MAX && - (INTMAX_MIN + 1) <= UINTMAX_MAX)
352 #error "tar_from_header() internally represents intmax_t as uintmax_t + sign"
353 #endif
354 #if !(UINTMAX_MAX / 2 <= INTMAX_MAX)
355 #error "tar_from_header() returns intmax_t to represent uintmax_t"
356 #endif
357 intmax_t
358 tar_from_header (const char *where0, size_t digs, char const *type, intmax_t minval,
359 uintmax_t maxval, gboolean octal_only)
361 uintmax_t value = 0;
362 uintmax_t uminval = minval;
363 uintmax_t minus_minval = -uminval;
364 const char *where = where0;
365 char const *lim = where + digs;
366 gboolean negative = FALSE;
368 /* Accommodate buggy tar of unknown vintage, which outputs leading
369 NUL if the previous field overflows. */
370 if (*where == '\0')
371 where++;
373 /* Accommodate older tars, which output leading spaces. */
374 while (TRUE)
376 if (where == lim)
377 return (-1);
379 if (!g_ascii_isspace (*where))
380 break;
382 where++;
385 if (is_octal_digit (*where))
387 char const *where1 = where;
388 gboolean overflow = FALSE;
390 while (TRUE)
392 value += *where++ - '0';
393 if (where == lim || !is_octal_digit (*where))
394 break;
395 overflow |= ckd_mul (&value, value, 8);
398 /* Parse the output of older, unportable tars, which generate
399 negative values in two's complement octal. If the leading
400 nonzero digit is 1, we can't recover the original value
401 reliably; so do this only if the digit is 2 or more. This
402 catches the common case of 32-bit negative time stamps. */
403 if ((overflow || maxval < value) && *where1 >= 2 && type != NULL)
405 /* Compute the negative of the input value, assuming two's complement. */
406 int digit;
408 digit = (*where1 - '0') | 4;
409 overflow = FALSE;
410 value = 0;
411 where = where1;
413 while (TRUE)
415 value += 7 - digit;
416 where++;
417 if (where == lim || !is_octal_digit (*where))
418 break;
419 digit = *where - '0';
420 overflow |= ckd_mul (&value, value, 8);
423 overflow |= ckd_add (&value, value, 1);
425 if (!overflow && value <= minus_minval)
426 negative = TRUE;
429 if (overflow)
430 return (-1);
432 else if (octal_only)
434 /* Suppress the following extensions. */
436 else if (*where == '-' || *where == '+')
438 /* Parse base-64 output produced only by tar test versions
439 1.13.6 (1999-08-11) through 1.13.11 (1999-08-23).
440 Support for this will be withdrawn in future tar releases. */
442 negative = *where++ == '-';
444 while (where != lim)
446 unsigned char uc = *where;
447 char dig;
449 dig = base64_map[uc];
450 if (dig <= 0)
451 break;
453 if (ckd_mul (&value, value, 64))
454 return (-1);
455 value |= dig - 1;
456 where++;
459 else if (where <= lim - 2 && (*where == '\200' /* positive base-256 */
460 || *where == '\377' /* negative base-256 */ ))
462 /* Parse base-256 output. A nonnegative number N is
463 represented as (256**DIGS)/2 + N; a negative number -N is
464 represented as (256**DIGS) - N, i.e. as two's complement.
465 The representation guarantees that the leading bit is
466 always on, so that we don't confuse this format with the
467 others (assuming ASCII bytes of 8 bits or more). */
469 int signbit;
470 uintmax_t topbits;
472 signbit = *where & (1 << (LG_256 - 2));
473 topbits = ((uintmax_t) - signbit) << (UINTMAX_WIDTH - LG_256 - (LG_256 - 2));
475 value = (*where++ & ((1 << (LG_256 - 2)) - 1)) - signbit;
477 while (TRUE)
479 unsigned char uc;
481 uc = *where++;
482 value = (value << LG_256) + uc;
483 if (where == lim)
484 break;
486 if (((value << LG_256 >> LG_256) | topbits) != value)
487 return (-1);
490 negative = signbit != 0;
491 if (negative)
492 value = -value;
495 if (where != lim && *where != '\0' && !g_ascii_isspace (*where))
496 return (-1);
498 if (value <= (negative ? minus_minval : maxval))
499 return tar_represent_uintmax (negative ? -value : value);
501 return (-1);
504 /* --------------------------------------------------------------------------------------------- */
506 off_t
507 off_from_header (const char *p, size_t s)
509 /* Negative offsets are not allowed in tar files, so invoke
510 from_header with minimum value 0, not TYPE_MINIMUM (off_t). */
511 return tar_from_header (p, s, "off_t", 0, TYPE_MAXIMUM (off_t), FALSE);
514 /* --------------------------------------------------------------------------------------------- */
517 * Return the location of the next available input or output block.
518 * Return NULL for EOF.
520 union block *
521 tar_find_next_block (tar_super_t *archive)
523 if (current_block == record_end)
525 if (hit_eof)
526 return NULL;
528 if (!tar_flush_archive (archive))
530 message (D_ERROR, MSG_ERROR, _("Inconsistent tar archive"));
531 return NULL;
534 if (current_block == record_end)
536 hit_eof = TRUE;
537 return NULL;
541 return current_block;
544 /* --------------------------------------------------------------------------------------------- */
547 * Indicate that we have used all blocks up thru @block.
549 gboolean
550 tar_set_next_block_after (union block *block)
552 while (block >= current_block)
553 current_block++;
555 /* Do *not* flush the archive here. If we do, the same argument to tar_set_next_block_after()
556 could mean the next block (if the input record is exactly one block long), which is not
557 what is intended. */
559 return !(current_block > record_end);
562 /* --------------------------------------------------------------------------------------------- */
565 * Compute and return the block ordinal at current_block.
567 off_t
568 tar_current_block_ordinal (const tar_super_t *archive)
570 return record_start_block + (current_block - archive->record_start);
573 /* --------------------------------------------------------------------------------------------- */
576 * Skip over @size bytes of data in blocks in the archive.
578 gboolean
579 tar_skip_file (tar_super_t *archive, off_t size)
581 union block *x;
582 off_t nblk;
584 nblk = tar_seek_archive (archive, size);
585 if (nblk >= 0)
586 size -= nblk * BLOCKSIZE;
588 while (size > 0)
590 x = tar_find_next_block (archive);
591 if (x == NULL)
592 return FALSE;
594 tar_set_next_block_after (x);
595 size -= BLOCKSIZE;
598 return TRUE;
601 /* --------------------------------------------------------------------------------------------- */