tar: avoid need for base64_init and extra table.
[midnight-commander.git] / src / vfs / tar / tar-sparse.c
blob7b0d3b6d7324d5cc8306e338c86077c33c1f0d7c
1 /*
2 Virtual File System: GNU Tar file system.
4 Copyright (C) 2003-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Andrew Borodin <aborodin@vmail.ru>, 2023
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /**
27 * \file
28 * \brief Source: Virtual File System: GNU Tar file system
32 * Avoid following error:
33 * comparison of unsigned expression < 0 is always false [-Werror=type-limits]
35 * https://www.boost.org/doc/libs/1_55_0/libs/integer/test/cstdint_test.cpp
36 * We can't suppress this warning on the command line as not all GCC versions support -Wno-type-limits
38 #if defined(__GNUC__) && (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4))
39 #pragma GCC diagnostic ignored "-Wtype-limits"
40 #endif
42 #include <config.h>
44 #include <inttypes.h> /* uintmax_t */
46 #include "lib/global.h"
48 #include "tar-internal.h"
50 /* Old GNU Format.
51 The sparse file information is stored in the oldgnu_header in the following manner:
53 The header is marked with type 'S'. Its 'size' field contains the cumulative size
54 of all non-empty blocks of the file. The actual file size is stored in `realsize'
55 member of oldgnu_header.
57 The map of the file is stored in a list of 'struct sparse'. Each struct contains
58 offset to the block of data and its size (both as octal numbers). The first file
59 header contains at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
60 contains more structs, then the field 'isextended' of the main header is set to
61 1 (binary) and the 'struct sparse_header' header follows, containing at most
62 21 following structs (SPARSES_IN_SPARSE_HEADER). If more structs follow, 'isextended'
63 field of the extended header is set and next next extension header follows, etc...
66 /*** global variables ****************************************************************************/
68 /*** file scope macro definitions ****************************************************************/
70 /* Bound on length of the string representing an unsigned integer
71 value representable in B bits. log10 (2.0) < 146/485. The
72 smallest value of B where this bound is not tight is 2621. */
73 #define INT_BITS_STRLEN_BOUND(b) (((b) * 146 + 484) / 485)
75 /* Bound on length of the string representing an integer type or expression T.
76 T must not be a bit-field expression.
78 Subtract 1 for the sign bit if T is signed, and then add 1 more for
79 a minus sign if needed.
81 Because _GL_SIGNED_TYPE_OR_EXPR sometimes returns 1 when its argument is
82 unsigned, this macro may overestimate the true bound by one byte when
83 applied to unsigned types of size 2, 4, 16, ... bytes. */
84 #define INT_STRLEN_BOUND(t) \
85 (INT_BITS_STRLEN_BOUND (_GL_TYPE_WIDTH (t) - _GL_SIGNED_TYPE_OR_EXPR (t)) \
86 + _GL_SIGNED_TYPE_OR_EXPR (t))
88 /* Bound on buffer size needed to represent an integer type or expression T,
89 including the terminating null. T must not be a bit-field expression. */
90 #define INT_BUFSIZE_BOUND(t) (INT_STRLEN_BOUND (t) + 1)
92 #define UINTMAX_STRSIZE_BOUND INT_BUFSIZE_BOUND (uintmax_t)
94 #define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
96 #define COPY_BUF(arch,b,buf,src) \
97 do \
98 { \
99 char *endp = b->buffer + BLOCKSIZE; \
100 char *dst = buf; \
101 do \
103 if (dst == buf + UINTMAX_STRSIZE_BOUND - 1) \
104 /* numeric overflow in sparse archive member */ \
105 return FALSE; \
106 if (src == endp) \
108 tar_set_next_block_after (b); \
109 b = tar_find_next_block (arch); \
110 if (b == NULL) \
111 /* unexpected EOF in archive */ \
112 return FALSE; \
113 src = b->buffer; \
114 endp = b->buffer + BLOCKSIZE; \
116 *dst = *src++; \
118 while (*dst++ != '\n'); \
119 dst[-1] = '\0'; \
121 while (FALSE)
123 /*** file scope type declarations ****************************************************************/
125 struct tar_sparse_file;
127 struct tar_sparse_optab
129 gboolean (*init) (struct tar_sparse_file * file);
130 gboolean (*done) (struct tar_sparse_file * file);
131 gboolean (*sparse_member_p) (struct tar_sparse_file * file);
132 gboolean (*fixup_header) (struct tar_sparse_file * file);
133 gboolean (*decode_header) (tar_super_t * archive, struct tar_sparse_file * file);
136 struct tar_sparse_file
138 int fd; /**< File descriptor */
139 off_t dumped_size; /**< Number of bytes actually written to the archive */
140 struct tar_stat_info *stat_info; /**< Information about the file */
141 struct tar_sparse_optab const *optab;
142 void *closure; /**< Any additional data optab calls might reqiure */
145 enum oldgnu_add_status
147 add_ok,
148 add_finish,
149 add_fail
152 /*** forward declarations (file scope functions) *************************************************/
154 static gboolean oldgnu_sparse_member_p (struct tar_sparse_file *file);
155 static gboolean oldgnu_fixup_header (struct tar_sparse_file *file);
156 static gboolean oldgnu_get_sparse_info (tar_super_t * archive, struct tar_sparse_file *file);
158 static gboolean star_sparse_member_p (struct tar_sparse_file *file);
159 static gboolean star_fixup_header (struct tar_sparse_file *file);
160 static gboolean star_get_sparse_info (tar_super_t * archive, struct tar_sparse_file *file);
162 static gboolean pax_sparse_member_p (struct tar_sparse_file *file);
163 static gboolean pax_decode_header (tar_super_t * archive, struct tar_sparse_file *file);
165 /*** file scope variables ************************************************************************/
167 /* *INDENT-OFF* */
168 static struct tar_sparse_optab const oldgnu_optab =
170 .init = NULL, /* No init function */
171 .done = NULL, /* No done function */
172 .sparse_member_p = oldgnu_sparse_member_p,
173 .fixup_header = oldgnu_fixup_header,
174 .decode_header = oldgnu_get_sparse_info
176 /* *INDENT-ON* */
178 /* *INDENT-OFF* */
179 static struct tar_sparse_optab const star_optab =
181 .init = NULL, /* No init function */
182 .done = NULL, /* No done function */
183 .sparse_member_p = star_sparse_member_p,
184 .fixup_header = star_fixup_header,
185 .decode_header = star_get_sparse_info
187 /* *INDENT-ON* */
189 /* GNU PAX sparse file format. There are several versions:
190 * 0.0
192 The initial version of sparse format used by tar 1.14-1.15.1.
193 The sparse file map is stored in x header:
195 GNU.sparse.size Real size of the stored file
196 GNU.sparse.numblocks Number of blocks in the sparse map repeat numblocks time
197 GNU.sparse.offset Offset of the next data block
198 GNU.sparse.numbytes Size of the next data block end repeat
200 This has been reported as conflicting with the POSIX specs. The reason is
201 that offsets and sizes of non-zero data blocks were stored in multiple instances
202 of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas POSIX requires the
203 latest occurrence of the variable to override all previous occurrences.
205 To avoid this incompatibility two following versions were introduced.
207 * 0.1
209 Used by tar 1.15.2 -- 1.15.91 (alpha releases).
211 The sparse file map is stored in x header:
213 GNU.sparse.size Real size of the stored file
214 GNU.sparse.numblocks Number of blocks in the sparse map
215 GNU.sparse.map Map of non-null data chunks. A string consisting of comma-separated
216 values "offset,size[,offset,size]..."
218 The resulting GNU.sparse.map string can be *very* long. While POSIX does not impose
219 any limit on the length of a x header variable, this can confuse some tars.
221 * 1.0
223 Starting from this version, the exact sparse format version is specified explicitly
224 in the header using the following variables:
226 GNU.sparse.major Major version
227 GNU.sparse.minor Minor version
229 X header keeps the following variables:
231 GNU.sparse.name Real file name of the sparse file
232 GNU.sparse.realsize Real size of the stored file (corresponds to the old GNU.sparse.size
233 variable)
235 The name field of the ustar header is constructed using the pattern "%d/GNUSparseFile.%p/%f".
237 The sparse map itself is stored in the file data block, preceding the actual file data.
238 It consists of a series of octal numbers of arbitrary length, delimited by newlines.
239 The map is padded with nulls to the nearest block boundary.
241 The first number gives the number of entries in the map. Following are map entries, each one
242 consisting of two numbers giving the offset and size of the data block it describes.
244 The format is designed in such a way that non-posix aware tars and tars not supporting
245 GNU.sparse.* keywords will extract each sparse file in its condensed form with the file map
246 attached and will place it into a separate directory. Then, using a simple program it would be
247 possible to expand the file to its original form even without GNU tar.
249 Bu default, v.1.0 archives are created. To use other formats, --sparse-version option is provided.
250 Additionally, v.0.0 can be obtained by deleting GNU.sparse.map from 0.1 format:
251 --sparse-version 0.1 --pax-option delete=GNU.sparse.map
254 static struct tar_sparse_optab const pax_optab = {
255 .init = NULL, /* No init function */
256 .done = NULL, /* No done function */
257 .sparse_member_p = pax_sparse_member_p,
258 .fixup_header = NULL, /* No fixup_header function */
259 .decode_header = pax_decode_header
262 /* --------------------------------------------------------------------------------------------- */
263 /*** file scope functions ************************************************************************/
264 /* --------------------------------------------------------------------------------------------- */
266 static gboolean
267 decode_num (uintmax_t *num, const char *arg, uintmax_t maxval)
269 char *arg_lim;
270 gboolean overflow;
272 *num = stoint (arg, &arg_lim, &overflow, 0, maxval);
273 return (((arg_lim == arg ? 1 : 0) | (*arg_lim != '\0') | (overflow ? 1 : 0)) == 0);
276 /* --------------------------------------------------------------------------------------------- */
278 static gboolean
279 sparse_select_optab (const tar_super_t *archive, struct tar_sparse_file *file)
281 switch (archive->type)
283 case TAR_V7:
284 case TAR_USTAR:
285 return FALSE;
287 case TAR_OLDGNU:
288 case TAR_GNU: /* FIXME: This one should disappear? */
289 file->optab = &oldgnu_optab;
290 break;
292 case TAR_POSIX:
293 file->optab = &pax_optab;
294 break;
296 case TAR_STAR:
297 file->optab = &star_optab;
298 break;
300 default:
301 return FALSE;
304 return TRUE;
307 /* --------------------------------------------------------------------------------------------- */
309 static gboolean
310 sparse_init (tar_super_t *archive, struct tar_sparse_file *file)
312 memset (file, 0, sizeof (*file));
314 if (!sparse_select_optab (archive, file))
315 return FALSE;
317 if (file->optab->init != NULL)
318 return file->optab->init (file);
320 return TRUE;
323 /* --------------------------------------------------------------------------------------------- */
325 static gboolean
326 sparse_done (struct tar_sparse_file *file)
328 if (file->optab->done != NULL)
329 return file->optab->done (file);
331 return TRUE;
334 /* --------------------------------------------------------------------------------------------- */
336 static gboolean
337 sparse_member_p (struct tar_sparse_file *file)
339 if (file->optab->sparse_member_p != NULL)
340 return file->optab->sparse_member_p (file);
342 return FALSE;
345 /* --------------------------------------------------------------------------------------------- */
347 static gboolean
348 sparse_fixup_header (struct tar_sparse_file *file)
350 if (file->optab->fixup_header != NULL)
351 return file->optab->fixup_header (file);
353 return TRUE;
356 /* --------------------------------------------------------------------------------------------- */
358 static gboolean
359 sparse_decode_header (tar_super_t *archive, struct tar_sparse_file *file)
361 if (file->optab->decode_header != NULL)
362 return file->optab->decode_header (archive, file);
364 return TRUE;
367 /* --------------------------------------------------------------------------------------------- */
369 static inline void
370 sparse_add_map (struct tar_stat_info *st, struct sp_array *sp)
372 if (st->sparse_map == NULL)
373 st->sparse_map = g_array_sized_new (FALSE, FALSE, sizeof (struct sp_array), 1);
374 g_array_append_val (st->sparse_map, *sp);
377 /* --------------------------------------------------------------------------------------------- */
380 * Add a sparse item to the sparse file
382 static enum oldgnu_add_status
383 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
385 struct sp_array sp;
386 off_t size;
388 if (s->numbytes[0] == '\0')
389 return add_finish;
391 sp.offset = OFF_FROM_HEADER (s->offset);
392 sp.numbytes = OFF_FROM_HEADER (s->numbytes);
394 if (sp.offset < 0 || sp.numbytes < 0 || ckd_add (&size, sp.offset, sp.numbytes)
395 || file->stat_info->stat.st_size < size || file->stat_info->archive_file_size < 0)
396 return add_fail;
398 sparse_add_map (file->stat_info, &sp);
400 return add_ok;
403 /* --------------------------------------------------------------------------------------------- */
405 static gboolean
406 oldgnu_sparse_member_p (struct tar_sparse_file *file)
408 (void) file;
410 return current_header->header.typeflag == GNUTYPE_SPARSE;
413 /* --------------------------------------------------------------------------------------------- */
415 static gboolean
416 oldgnu_fixup_header (struct tar_sparse_file *file)
418 /* NOTE! st_size was initialized from the header which actually contains archived size.
419 The following fixes it */
420 off_t realsize;
422 realsize = OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
423 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
424 file->stat_info->stat.st_size = MAX (0, realsize);
426 return (realsize >= 0);
429 /* --------------------------------------------------------------------------------------------- */
432 * Convert old GNU format sparse data to internal representation.
434 static gboolean
435 oldgnu_get_sparse_info (tar_super_t *archive, struct tar_sparse_file *file)
437 size_t i;
438 union block *h = current_header;
439 gboolean ext_p;
440 enum oldgnu_add_status rc = add_fail;
442 if (file->stat_info->sparse_map != NULL)
443 g_array_set_size (file->stat_info->sparse_map, 0);
445 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
447 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
448 if (rc != add_ok)
449 break;
452 for (ext_p = h->oldgnu_header.isextended != 0; rc == add_ok && ext_p;
453 ext_p = h->sparse_header.isextended != 0)
455 h = tar_find_next_block (archive);
456 if (h == NULL)
457 return FALSE;
459 tar_set_next_block_after (h);
461 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
462 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
465 return (rc != add_fail);
468 /* --------------------------------------------------------------------------------------------- */
470 static gboolean
471 star_sparse_member_p (struct tar_sparse_file *file)
473 (void) file;
475 return current_header->header.typeflag == GNUTYPE_SPARSE;
478 /* --------------------------------------------------------------------------------------------- */
480 static gboolean
481 star_fixup_header (struct tar_sparse_file *file)
483 /* NOTE! st_size was initialized from the header which actually contains archived size.
484 The following fixes it */
485 off_t realsize;
487 realsize = OFF_FROM_HEADER (current_header->star_in_header.realsize);
488 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
489 file->stat_info->stat.st_size = MAX (0, realsize);
491 return (realsize >= 0);
494 /* --------------------------------------------------------------------------------------------- */
497 * Convert STAR format sparse data to internal representation
499 static gboolean
500 star_get_sparse_info (tar_super_t *archive, struct tar_sparse_file *file)
502 size_t i;
503 union block *h = current_header;
504 gboolean ext_p = TRUE;
505 enum oldgnu_add_status rc = add_ok;
507 if (file->stat_info->sparse_map != NULL)
508 g_array_set_size (file->stat_info->sparse_map, 0);
510 if (h->star_in_header.prefix[0] == '\0' && h->star_in_header.sp[0].offset[10] != '\0')
512 /* Old star format */
513 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
515 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
516 if (rc != add_ok)
517 break;
520 ext_p = h->star_in_header.isextended != 0;
523 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended != 0)
525 h = tar_find_next_block (archive);
526 if (h == NULL)
527 return FALSE;
529 tar_set_next_block_after (h);
531 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
532 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
534 file->dumped_size += BLOCKSIZE;
537 return (rc != add_fail);
540 /* --------------------------------------------------------------------------------------------- */
542 static gboolean
543 pax_sparse_member_p (struct tar_sparse_file *file)
545 return file->stat_info->sparse_map != NULL && file->stat_info->sparse_map->len > 0
546 && file->stat_info->sparse_major > 0;
549 /* --------------------------------------------------------------------------------------------- */
551 static gboolean
552 pax_decode_header (tar_super_t *archive, struct tar_sparse_file *file)
554 if (file->stat_info->sparse_major > 0)
556 uintmax_t u;
557 char nbuf[UINTMAX_STRSIZE_BOUND];
558 union block *blk;
559 char *p;
560 size_t sparse_map_len;
561 size_t i;
562 off_t start;
564 start = tar_current_block_ordinal (archive);
565 tar_set_next_block_after (current_header);
566 blk = tar_find_next_block (archive);
567 if (blk == NULL)
568 /* unexpected EOF in archive */
569 return FALSE;
570 p = blk->buffer;
571 COPY_BUF (archive, blk, nbuf, p);
573 if (!decode_num (&u, nbuf, SIZE_MAX))
575 /* malformed sparse archive member */
576 return FALSE;
579 if (file->stat_info->sparse_map == NULL)
580 file->stat_info->sparse_map =
581 g_array_sized_new (FALSE, FALSE, sizeof (struct sp_array), u);
582 else
583 g_array_set_size (file->stat_info->sparse_map, u);
585 sparse_map_len = u;
587 for (i = 0; i < sparse_map_len; i++)
589 struct sp_array sp;
590 off_t size;
592 COPY_BUF (archive, blk, nbuf, p);
593 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
595 /* malformed sparse archive member */
596 return FALSE;
598 sp.offset = u;
599 COPY_BUF (archive, blk, nbuf, p);
600 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)) || ckd_add (&size, sp.offset, u)
601 || file->stat_info->stat.st_size < size)
603 /* malformed sparse archive member */
604 return FALSE;
606 sp.numbytes = u;
607 sparse_add_map (file->stat_info, &sp);
610 tar_set_next_block_after (blk);
612 file->dumped_size += BLOCKSIZE * (tar_current_block_ordinal (archive) - start);
615 return TRUE;
618 /* --------------------------------------------------------------------------------------------- */
619 /*** public functions ****************************************************************************/
620 /* --------------------------------------------------------------------------------------------- */
622 gboolean
623 tar_sparse_member_p (tar_super_t *archive, struct tar_stat_info *st)
625 struct tar_sparse_file file;
627 if (!sparse_init (archive, &file))
628 return FALSE;
630 file.stat_info = st;
631 return sparse_member_p (&file);
634 /* --------------------------------------------------------------------------------------------- */
636 gboolean
637 tar_sparse_fixup_header (tar_super_t *archive, struct tar_stat_info *st)
639 struct tar_sparse_file file;
641 if (!sparse_init (archive, &file))
642 return FALSE;
644 file.stat_info = st;
645 return sparse_fixup_header (&file);
648 /* --------------------------------------------------------------------------------------------- */
650 enum dump_status
651 tar_sparse_skip_file (tar_super_t *archive, struct tar_stat_info *st)
653 gboolean rc = TRUE;
654 struct tar_sparse_file file;
656 if (!sparse_init (archive, &file))
657 return dump_status_not_implemented;
659 file.stat_info = st;
660 file.fd = -1;
662 rc = sparse_decode_header (archive, &file);
663 (void) tar_skip_file (archive, file.stat_info->archive_file_size - file.dumped_size);
664 return (sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
667 /* --------------------------------------------------------------------------------------------- */