Fix incremental archiving of renamed directories.
[tar/ericb.git] / src / sparse.c
blob9fde5080d5304a32253fd67d72e1a99a0c69bf56
1 /* Functions for dealing with sparse files
3 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any later
8 version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13 Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 #include <system.h>
20 #include <inttostr.h>
21 #include <quotearg.h>
22 #include "common.h"
24 struct tar_sparse_file;
25 static bool sparse_select_optab (struct tar_sparse_file *file);
27 enum sparse_scan_state
29 scan_begin,
30 scan_block,
31 scan_end
34 struct tar_sparse_optab
36 bool (*init) (struct tar_sparse_file *);
37 bool (*done) (struct tar_sparse_file *);
38 bool (*sparse_member_p) (struct tar_sparse_file *);
39 bool (*dump_header) (struct tar_sparse_file *);
40 bool (*fixup_header) (struct tar_sparse_file *);
41 bool (*decode_header) (struct tar_sparse_file *);
42 bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
43 void *);
44 bool (*dump_region) (struct tar_sparse_file *, size_t);
45 bool (*extract_region) (struct tar_sparse_file *, size_t);
48 struct tar_sparse_file
50 int fd; /* File descriptor */
51 bool seekable; /* Is fd seekable? */
52 off_t offset; /* Current offset in fd if seekable==false.
53 Otherwise unused */
54 off_t dumped_size; /* Number of bytes actually written
55 to the archive */
56 struct tar_stat_info *stat_info; /* Information about the file */
57 struct tar_sparse_optab const *optab; /* Operation table */
58 void *closure; /* Any additional data optab calls might
59 require */
62 /* Dump zeros to file->fd until offset is reached. It is used instead of
63 lseek if the output file is not seekable */
64 static bool
65 dump_zeros (struct tar_sparse_file *file, off_t offset)
67 static char const zero_buf[BLOCKSIZE];
69 if (offset < file->offset)
71 errno = EINVAL;
72 return false;
75 while (file->offset < offset)
77 size_t size = (BLOCKSIZE < offset - file->offset
78 ? BLOCKSIZE
79 : offset - file->offset);
80 ssize_t wrbytes;
82 wrbytes = write (file->fd, zero_buf, size);
83 if (wrbytes <= 0)
85 if (wrbytes == 0)
86 errno = EINVAL;
87 return false;
89 file->offset += wrbytes;
92 return true;
95 static bool
96 tar_sparse_member_p (struct tar_sparse_file *file)
98 if (file->optab->sparse_member_p)
99 return file->optab->sparse_member_p (file);
100 return false;
103 static bool
104 tar_sparse_init (struct tar_sparse_file *file)
106 memset (file, 0, sizeof *file);
108 if (!sparse_select_optab (file))
109 return false;
111 if (file->optab->init)
112 return file->optab->init (file);
114 return true;
117 static bool
118 tar_sparse_done (struct tar_sparse_file *file)
120 if (file->optab->done)
121 return file->optab->done (file);
122 return true;
125 static bool
126 tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
127 void *block)
129 if (file->optab->scan_block)
130 return file->optab->scan_block (file, state, block);
131 return true;
134 static bool
135 tar_sparse_dump_region (struct tar_sparse_file *file, size_t i)
137 if (file->optab->dump_region)
138 return file->optab->dump_region (file, i);
139 return false;
142 static bool
143 tar_sparse_extract_region (struct tar_sparse_file *file, size_t i)
145 if (file->optab->extract_region)
146 return file->optab->extract_region (file, i);
147 return false;
150 static bool
151 tar_sparse_dump_header (struct tar_sparse_file *file)
153 if (file->optab->dump_header)
154 return file->optab->dump_header (file);
155 return false;
158 static bool
159 tar_sparse_decode_header (struct tar_sparse_file *file)
161 if (file->optab->decode_header)
162 return file->optab->decode_header (file);
163 return true;
166 static bool
167 tar_sparse_fixup_header (struct tar_sparse_file *file)
169 if (file->optab->fixup_header)
170 return file->optab->fixup_header (file);
171 return true;
175 static bool
176 lseek_or_error (struct tar_sparse_file *file, off_t offset)
178 if (file->seekable
179 ? lseek (file->fd, offset, SEEK_SET) < 0
180 : ! dump_zeros (file, offset))
182 seek_diag_details (file->stat_info->orig_file_name, offset);
183 return false;
185 return true;
188 /* Takes a blockful of data and basically cruises through it to see if
189 it's made *entirely* of zeros, returning a 0 the instant it finds
190 something that is a nonzero, i.e., useful data. */
191 static bool
192 zero_block_p (char const *buffer, size_t size)
194 while (size--)
195 if (*buffer++)
196 return false;
197 return true;
200 static void
201 sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
203 struct sp_array *sparse_map = st->sparse_map;
204 size_t avail = st->sparse_map_avail;
205 if (avail == st->sparse_map_size)
206 st->sparse_map = sparse_map =
207 x2nrealloc (sparse_map, &st->sparse_map_size, sizeof *sparse_map);
208 sparse_map[avail] = *sp;
209 st->sparse_map_avail = avail + 1;
212 /* Scan the sparse file and create its map */
213 static bool
214 sparse_scan_file (struct tar_sparse_file *file)
216 struct tar_stat_info *st = file->stat_info;
217 int fd = file->fd;
218 char buffer[BLOCKSIZE];
219 size_t count;
220 off_t offset = 0;
221 struct sp_array sp = {0, 0};
223 if (!lseek_or_error (file, 0))
224 return false;
226 st->archive_file_size = 0;
228 if (!tar_sparse_scan (file, scan_begin, NULL))
229 return false;
231 while ((count = safe_read (fd, buffer, sizeof buffer)) != 0
232 && count != SAFE_READ_ERROR)
234 /* Analyze the block. */
235 if (zero_block_p (buffer, count))
237 if (sp.numbytes)
239 sparse_add_map (st, &sp);
240 sp.numbytes = 0;
241 if (!tar_sparse_scan (file, scan_block, NULL))
242 return false;
245 else
247 if (sp.numbytes == 0)
248 sp.offset = offset;
249 sp.numbytes += count;
250 st->archive_file_size += count;
251 if (!tar_sparse_scan (file, scan_block, buffer))
252 return false;
255 offset += count;
258 if (sp.numbytes == 0)
259 sp.offset = offset;
261 sparse_add_map (st, &sp);
262 st->archive_file_size += count;
263 return tar_sparse_scan (file, scan_end, NULL);
266 static struct tar_sparse_optab const oldgnu_optab;
267 static struct tar_sparse_optab const star_optab;
268 static struct tar_sparse_optab const pax_optab;
270 static bool
271 sparse_select_optab (struct tar_sparse_file *file)
273 switch (current_format == DEFAULT_FORMAT ? archive_format : current_format)
275 case V7_FORMAT:
276 case USTAR_FORMAT:
277 return false;
279 case OLDGNU_FORMAT:
280 case GNU_FORMAT: /*FIXME: This one should disappear? */
281 file->optab = &oldgnu_optab;
282 break;
284 case POSIX_FORMAT:
285 file->optab = &pax_optab;
286 break;
288 case STAR_FORMAT:
289 file->optab = &star_optab;
290 break;
292 default:
293 return false;
295 return true;
298 static bool
299 sparse_dump_region (struct tar_sparse_file *file, size_t i)
301 union block *blk;
302 off_t bytes_left = file->stat_info->sparse_map[i].numbytes;
304 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
305 return false;
307 while (bytes_left > 0)
309 size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
310 size_t bytes_read;
312 blk = find_next_block ();
313 bytes_read = safe_read (file->fd, blk->buffer, bufsize);
314 if (bytes_read == SAFE_READ_ERROR)
316 read_diag_details (file->stat_info->orig_file_name,
317 (file->stat_info->sparse_map[i].offset
318 + file->stat_info->sparse_map[i].numbytes
319 - bytes_left),
320 bufsize);
321 return false;
324 memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
325 bytes_left -= bytes_read;
326 file->dumped_size += bytes_read;
327 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
328 set_next_block_after (blk);
331 return true;
334 static bool
335 sparse_extract_region (struct tar_sparse_file *file, size_t i)
337 size_t write_size;
339 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
340 return false;
342 write_size = file->stat_info->sparse_map[i].numbytes;
344 if (write_size == 0)
346 /* Last block of the file is a hole */
347 if (file->seekable && sys_truncate (file->fd))
348 truncate_warn (file->stat_info->orig_file_name);
350 else while (write_size > 0)
352 size_t count;
353 size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
354 union block *blk = find_next_block ();
355 if (!blk)
357 ERROR ((0, 0, _("Unexpected EOF in archive")));
358 return false;
360 set_next_block_after (blk);
361 count = full_write (file->fd, blk->buffer, wrbytes);
362 write_size -= count;
363 file->dumped_size += count;
364 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
365 file->offset += count;
366 if (count != wrbytes)
368 write_error_details (file->stat_info->orig_file_name,
369 count, wrbytes);
370 return false;
373 return true;
378 /* Interface functions */
379 enum dump_status
380 sparse_dump_file (int fd, struct tar_stat_info *st)
382 bool rc;
383 struct tar_sparse_file file;
385 if (!tar_sparse_init (&file))
386 return dump_status_not_implemented;
388 file.stat_info = st;
389 file.fd = fd;
390 file.seekable = true; /* File *must* be seekable for dump to work */
392 rc = sparse_scan_file (&file);
393 if (rc && file.optab->dump_region)
395 tar_sparse_dump_header (&file);
397 if (fd >= 0)
399 size_t i;
401 mv_begin (file.stat_info);
402 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
403 rc = tar_sparse_dump_region (&file, i);
404 mv_end ();
408 pad_archive (file.stat_info->archive_file_size - file.dumped_size);
409 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
412 bool
413 sparse_member_p (struct tar_stat_info *st)
415 struct tar_sparse_file file;
417 if (!tar_sparse_init (&file))
418 return false;
419 file.stat_info = st;
420 return tar_sparse_member_p (&file);
423 bool
424 sparse_fixup_header (struct tar_stat_info *st)
426 struct tar_sparse_file file;
428 if (!tar_sparse_init (&file))
429 return false;
430 file.stat_info = st;
431 return tar_sparse_fixup_header (&file);
434 enum dump_status
435 sparse_extract_file (int fd, struct tar_stat_info *st, off_t *size)
437 bool rc = true;
438 struct tar_sparse_file file;
439 size_t i;
441 if (!tar_sparse_init (&file))
442 return dump_status_not_implemented;
444 file.stat_info = st;
445 file.fd = fd;
446 file.seekable = lseek (fd, 0, SEEK_SET) == 0;
447 file.offset = 0;
449 rc = tar_sparse_decode_header (&file);
450 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
451 rc = tar_sparse_extract_region (&file, i);
452 *size = file.stat_info->archive_file_size - file.dumped_size;
453 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
456 enum dump_status
457 sparse_skip_file (struct tar_stat_info *st)
459 bool rc = true;
460 struct tar_sparse_file file;
462 if (!tar_sparse_init (&file))
463 return dump_status_not_implemented;
465 file.stat_info = st;
466 file.fd = -1;
468 rc = tar_sparse_decode_header (&file);
469 skip_file (file.stat_info->archive_file_size - file.dumped_size);
470 return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
474 static bool
475 check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
477 if (!lseek_or_error (file, beg))
478 return false;
480 while (beg < end)
482 size_t bytes_read;
483 size_t rdsize = BLOCKSIZE < end - beg ? BLOCKSIZE : end - beg;
484 char diff_buffer[BLOCKSIZE];
486 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
487 if (bytes_read == SAFE_READ_ERROR)
489 read_diag_details (file->stat_info->orig_file_name,
490 beg,
491 rdsize);
492 return false;
494 if (!zero_block_p (diff_buffer, bytes_read))
496 char begbuf[INT_BUFSIZE_BOUND (off_t)];
497 report_difference (file->stat_info,
498 _("File fragment at %s is not a hole"),
499 offtostr (beg, begbuf));
500 return false;
503 beg += bytes_read;
505 return true;
508 static bool
509 check_data_region (struct tar_sparse_file *file, size_t i)
511 size_t size_left;
513 if (!lseek_or_error (file, file->stat_info->sparse_map[i].offset))
514 return false;
515 size_left = file->stat_info->sparse_map[i].numbytes;
516 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
518 while (size_left > 0)
520 size_t bytes_read;
521 size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
522 char diff_buffer[BLOCKSIZE];
524 union block *blk = find_next_block ();
525 if (!blk)
527 ERROR ((0, 0, _("Unexpected EOF in archive")));
528 return false;
530 set_next_block_after (blk);
531 bytes_read = safe_read (file->fd, diff_buffer, rdsize);
532 if (bytes_read == SAFE_READ_ERROR)
534 read_diag_details (file->stat_info->orig_file_name,
535 (file->stat_info->sparse_map[i].offset
536 + file->stat_info->sparse_map[i].numbytes
537 - size_left),
538 rdsize);
539 return false;
541 file->dumped_size += bytes_read;
542 size_left -= bytes_read;
543 mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
544 if (memcmp (blk->buffer, diff_buffer, rdsize))
546 report_difference (file->stat_info, _("Contents differ"));
547 return false;
550 return true;
553 bool
554 sparse_diff_file (int fd, struct tar_stat_info *st)
556 bool rc = true;
557 struct tar_sparse_file file;
558 size_t i;
559 off_t offset = 0;
561 if (!tar_sparse_init (&file))
562 return dump_status_not_implemented;
564 file.stat_info = st;
565 file.fd = fd;
566 file.seekable = true; /* File *must* be seekable for compare to work */
568 rc = tar_sparse_decode_header (&file);
569 mv_begin (st);
570 for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
572 rc = check_sparse_region (&file,
573 offset, file.stat_info->sparse_map[i].offset)
574 && check_data_region (&file, i);
575 offset = file.stat_info->sparse_map[i].offset
576 + file.stat_info->sparse_map[i].numbytes;
579 if (!rc)
580 skip_file (file.stat_info->archive_file_size - file.dumped_size);
581 mv_end ();
583 tar_sparse_done (&file);
584 return rc;
588 /* Old GNU Format. The sparse file information is stored in the
589 oldgnu_header in the following manner:
591 The header is marked with type 'S'. Its `size' field contains
592 the cumulative size of all non-empty blocks of the file. The
593 actual file size is stored in `realsize' member of oldgnu_header.
595 The map of the file is stored in a list of `struct sparse'.
596 Each struct contains offset to the block of data and its
597 size (both as octal numbers). The first file header contains
598 at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
599 contains more structs, then the field `isextended' of the main
600 header is set to 1 (binary) and the `struct sparse_header'
601 header follows, containing at most 21 following structs
602 (SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
603 field of the extended header is set and next next extension header
604 follows, etc... */
606 enum oldgnu_add_status
608 add_ok,
609 add_finish,
610 add_fail
613 static bool
614 oldgnu_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
616 return current_header->header.typeflag == GNUTYPE_SPARSE;
619 /* Add a sparse item to the sparse file and its obstack */
620 static enum oldgnu_add_status
621 oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
623 struct sp_array sp;
625 if (s->numbytes[0] == '\0')
626 return add_finish;
627 sp.offset = OFF_FROM_HEADER (s->offset);
628 sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
629 if (sp.offset < 0
630 || file->stat_info->stat.st_size < sp.offset + sp.numbytes
631 || file->stat_info->archive_file_size < 0)
632 return add_fail;
634 sparse_add_map (file->stat_info, &sp);
635 return add_ok;
638 static bool
639 oldgnu_fixup_header (struct tar_sparse_file *file)
641 /* NOTE! st_size was initialized from the header
642 which actually contains archived size. The following fixes it */
643 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
644 file->stat_info->stat.st_size =
645 OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
646 return true;
649 /* Convert old GNU format sparse data to internal representation */
650 static bool
651 oldgnu_get_sparse_info (struct tar_sparse_file *file)
653 size_t i;
654 union block *h = current_header;
655 int ext_p;
656 enum oldgnu_add_status rc;
658 file->stat_info->sparse_map_avail = 0;
659 for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
661 rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
662 if (rc != add_ok)
663 break;
666 for (ext_p = h->oldgnu_header.isextended;
667 rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
669 h = find_next_block ();
670 if (!h)
672 ERROR ((0, 0, _("Unexpected EOF in archive")));
673 return false;
675 set_next_block_after (h);
676 for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
677 rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
680 if (rc == add_fail)
682 ERROR ((0, 0, _("%s: invalid sparse archive member"),
683 file->stat_info->orig_file_name));
684 return false;
686 return true;
689 static void
690 oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
691 struct sparse *sp, size_t sparse_size)
693 for (; *pindex < file->stat_info->sparse_map_avail
694 && sparse_size > 0; sparse_size--, sp++, ++*pindex)
696 OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
697 sp->offset);
698 SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
699 sp->numbytes);
703 static bool
704 oldgnu_dump_header (struct tar_sparse_file *file)
706 off_t block_ordinal = current_block_ordinal ();
707 union block *blk;
708 size_t i;
710 blk = start_header (file->stat_info);
711 blk->header.typeflag = GNUTYPE_SPARSE;
712 if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
713 blk->oldgnu_header.isextended = 1;
715 /* Store the real file size */
716 OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
717 /* Store the effective (shrunken) file size */
718 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
720 i = 0;
721 oldgnu_store_sparse_info (file, &i,
722 blk->oldgnu_header.sp,
723 SPARSES_IN_OLDGNU_HEADER);
724 blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
725 finish_header (file->stat_info, blk, block_ordinal);
727 while (i < file->stat_info->sparse_map_avail)
729 blk = find_next_block ();
730 memset (blk->buffer, 0, BLOCKSIZE);
731 oldgnu_store_sparse_info (file, &i,
732 blk->sparse_header.sp,
733 SPARSES_IN_SPARSE_HEADER);
734 if (i < file->stat_info->sparse_map_avail)
735 blk->sparse_header.isextended = 1;
736 set_next_block_after (blk);
738 return true;
741 static struct tar_sparse_optab const oldgnu_optab = {
742 NULL, /* No init function */
743 NULL, /* No done function */
744 oldgnu_sparse_member_p,
745 oldgnu_dump_header,
746 oldgnu_fixup_header,
747 oldgnu_get_sparse_info,
748 NULL, /* No scan_block function */
749 sparse_dump_region,
750 sparse_extract_region,
754 /* Star */
756 static bool
757 star_sparse_member_p (struct tar_sparse_file *file __attribute__ ((unused)))
759 return current_header->header.typeflag == GNUTYPE_SPARSE;
762 static bool
763 star_fixup_header (struct tar_sparse_file *file)
765 /* NOTE! st_size was initialized from the header
766 which actually contains archived size. The following fixes it */
767 file->stat_info->archive_file_size = file->stat_info->stat.st_size;
768 file->stat_info->stat.st_size =
769 OFF_FROM_HEADER (current_header->star_in_header.realsize);
770 return true;
773 /* Convert STAR format sparse data to internal representation */
774 static bool
775 star_get_sparse_info (struct tar_sparse_file *file)
777 size_t i;
778 union block *h = current_header;
779 int ext_p;
780 enum oldgnu_add_status rc = add_ok;
782 file->stat_info->sparse_map_avail = 0;
784 if (h->star_in_header.prefix[0] == '\0'
785 && h->star_in_header.sp[0].offset[10] != '\0')
787 /* Old star format */
788 for (i = 0; i < SPARSES_IN_STAR_HEADER; i++)
790 rc = oldgnu_add_sparse (file, &h->star_in_header.sp[i]);
791 if (rc != add_ok)
792 break;
794 ext_p = h->star_in_header.isextended;
796 else
797 ext_p = 1;
799 for (; rc == add_ok && ext_p; ext_p = h->star_ext_header.isextended)
801 h = find_next_block ();
802 if (!h)
804 ERROR ((0, 0, _("Unexpected EOF in archive")));
805 return false;
807 set_next_block_after (h);
808 for (i = 0; i < SPARSES_IN_STAR_EXT_HEADER && rc == add_ok; i++)
809 rc = oldgnu_add_sparse (file, &h->star_ext_header.sp[i]);
812 if (rc == add_fail)
814 ERROR ((0, 0, _("%s: invalid sparse archive member"),
815 file->stat_info->orig_file_name));
816 return false;
818 return true;
822 static struct tar_sparse_optab const star_optab = {
823 NULL, /* No init function */
824 NULL, /* No done function */
825 star_sparse_member_p,
826 NULL,
827 star_fixup_header,
828 star_get_sparse_info,
829 NULL, /* No scan_block function */
830 NULL, /* No dump region function */
831 sparse_extract_region,
835 /* GNU PAX sparse file format. There are several versions:
837 * 0.0
839 The initial version of sparse format used by tar 1.14-1.15.1.
840 The sparse file map is stored in x header:
842 GNU.sparse.size Real size of the stored file
843 GNU.sparse.numblocks Number of blocks in the sparse map
844 repeat numblocks time
845 GNU.sparse.offset Offset of the next data block
846 GNU.sparse.numbytes Size of the next data block
847 end repeat
849 This has been reported as conflicting with the POSIX specs. The reason is
850 that offsets and sizes of non-zero data blocks were stored in multiple
851 instances of GNU.sparse.offset/GNU.sparse.numbytes variables, whereas
852 POSIX requires the latest occurrence of the variable to override all
853 previous occurrences.
855 To avoid this incompatibility two following versions were introduced.
857 * 0.1
859 Used by tar 1.15.2 -- 1.15.91 (alpha releases).
861 The sparse file map is stored in
862 x header:
864 GNU.sparse.size Real size of the stored file
865 GNU.sparse.numblocks Number of blocks in the sparse map
866 GNU.sparse.map Map of non-null data chunks. A string consisting
867 of comma-separated values "offset,size[,offset,size]..."
869 The resulting GNU.sparse.map string can be *very* long. While POSIX does not
870 impose any limit on the length of a x header variable, this can confuse some
871 tars.
873 * 1.0
875 Starting from this version, the exact sparse format version is specified
876 explicitely in the header using the following variables:
878 GNU.sparse.major Major version
879 GNU.sparse.minor Minor version
881 X header keeps the following variables:
883 GNU.sparse.name Real file name of the sparse file
884 GNU.sparse.realsize Real size of the stored file (corresponds to the old
885 GNU.sparse.size variable)
887 The name field of the ustar header is constructed using the pattern
888 "%d/GNUSparseFile.%p/%f".
890 The sparse map itself is stored in the file data block, preceding the actual
891 file data. It consists of a series of octal numbers of arbitrary length,
892 delimited by newlines. The map is padded with nulls to the nearest block
893 boundary.
895 The first number gives the number of entries in the map. Following are map
896 entries, each one consisting of two numbers giving the offset and size of
897 the data block it describes.
899 The format is designed in such a way that non-posix aware tars and tars not
900 supporting GNU.sparse.* keywords will extract each sparse file in its
901 condensed form with the file map attached and will place it into a separate
902 directory. Then, using a simple program it would be possible to expand the
903 file to its original form even without GNU tar.
905 Bu default, v.1.0 archives are created. To use other formats,
906 --sparse-version option is provided. Additionally, v.0.0 can be obtained
907 by deleting GNU.sparse.map from 0.1 format: --sparse-version 0.1
908 --pax-option delete=GNU.sparse.map
911 static bool
912 pax_sparse_member_p (struct tar_sparse_file *file)
914 return file->stat_info->sparse_map_avail > 0
915 || file->stat_info->sparse_major > 0;
918 static bool
919 pax_dump_header_0 (struct tar_sparse_file *file)
921 off_t block_ordinal = current_block_ordinal ();
922 union block *blk;
923 size_t i;
924 char nbuf[UINTMAX_STRSIZE_BOUND];
925 struct sp_array *map = file->stat_info->sparse_map;
926 char *save_file_name = NULL;
928 /* Store the real file size */
929 xheader_store ("GNU.sparse.size", file->stat_info, NULL);
930 xheader_store ("GNU.sparse.numblocks", file->stat_info, NULL);
932 if (xheader_keyword_deleted_p ("GNU.sparse.map")
933 || tar_sparse_minor == 0)
935 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
937 xheader_store ("GNU.sparse.offset", file->stat_info, &i);
938 xheader_store ("GNU.sparse.numbytes", file->stat_info, &i);
941 else
943 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
944 save_file_name = file->stat_info->file_name;
945 file->stat_info->file_name = xheader_format_name (file->stat_info,
946 "%d/GNUSparseFile.%p/%f", 0);
948 xheader_string_begin (&file->stat_info->xhdr);
949 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
951 if (i)
952 xheader_string_add (&file->stat_info->xhdr, ",");
953 xheader_string_add (&file->stat_info->xhdr,
954 umaxtostr (map[i].offset, nbuf));
955 xheader_string_add (&file->stat_info->xhdr, ",");
956 xheader_string_add (&file->stat_info->xhdr,
957 umaxtostr (map[i].numbytes, nbuf));
959 if (!xheader_string_end (&file->stat_info->xhdr,
960 "GNU.sparse.map"))
962 free (file->stat_info->file_name);
963 file->stat_info->file_name = save_file_name;
964 return false;
967 blk = start_header (file->stat_info);
968 /* Store the effective (shrunken) file size */
969 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
970 finish_header (file->stat_info, blk, block_ordinal);
971 if (save_file_name)
973 free (file->stat_info->file_name);
974 file->stat_info->file_name = save_file_name;
976 return true;
979 static bool
980 pax_dump_header_1 (struct tar_sparse_file *file)
982 off_t block_ordinal = current_block_ordinal ();
983 union block *blk;
984 char *p, *q;
985 size_t i;
986 char nbuf[UINTMAX_STRSIZE_BOUND];
987 off_t size = 0;
988 struct sp_array *map = file->stat_info->sparse_map;
989 char *save_file_name = file->stat_info->file_name;
991 #define COPY_STRING(b,dst,src) do \
993 char *endp = b->buffer + BLOCKSIZE; \
994 char *srcp = src; \
995 while (*srcp) \
997 if (dst == endp) \
999 set_next_block_after (b); \
1000 b = find_next_block (); \
1001 dst = b->buffer; \
1002 endp = b->buffer + BLOCKSIZE; \
1004 *dst++ = *srcp++; \
1006 } while (0)
1008 /* Compute stored file size */
1009 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1010 size += strlen (p) + 1;
1011 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1013 p = umaxtostr (map[i].offset, nbuf);
1014 size += strlen (p) + 1;
1015 p = umaxtostr (map[i].numbytes, nbuf);
1016 size += strlen (p) + 1;
1018 size = (size + BLOCKSIZE - 1) / BLOCKSIZE;
1019 file->stat_info->archive_file_size += size * BLOCKSIZE;
1020 file->dumped_size += size * BLOCKSIZE;
1022 /* Store sparse file identification */
1023 xheader_store ("GNU.sparse.major", file->stat_info, NULL);
1024 xheader_store ("GNU.sparse.minor", file->stat_info, NULL);
1025 xheader_store ("GNU.sparse.name", file->stat_info, NULL);
1026 xheader_store ("GNU.sparse.realsize", file->stat_info, NULL);
1028 file->stat_info->file_name = xheader_format_name (file->stat_info,
1029 "%d/GNUSparseFile.%p/%f", 0);
1031 blk = start_header (file->stat_info);
1032 /* Store the effective (shrunken) file size */
1033 OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
1034 finish_header (file->stat_info, blk, block_ordinal);
1035 free (file->stat_info->file_name);
1036 file->stat_info->file_name = save_file_name;
1038 blk = find_next_block ();
1039 q = blk->buffer;
1040 p = umaxtostr (file->stat_info->sparse_map_avail, nbuf);
1041 COPY_STRING (blk, q, p);
1042 COPY_STRING (blk, q, "\n");
1043 for (i = 0; i < file->stat_info->sparse_map_avail; i++)
1045 p = umaxtostr (map[i].offset, nbuf);
1046 COPY_STRING (blk, q, p);
1047 COPY_STRING (blk, q, "\n");
1048 p = umaxtostr (map[i].numbytes, nbuf);
1049 COPY_STRING (blk, q, p);
1050 COPY_STRING (blk, q, "\n");
1052 memset (q, 0, BLOCKSIZE - (q - blk->buffer));
1053 set_next_block_after (blk);
1054 return true;
1057 static bool
1058 pax_dump_header (struct tar_sparse_file *file)
1060 file->stat_info->sparse_major = tar_sparse_major;
1061 file->stat_info->sparse_minor = tar_sparse_minor;
1063 return (file->stat_info->sparse_major == 0) ?
1064 pax_dump_header_0 (file) : pax_dump_header_1 (file);
1067 static bool
1068 decode_num (uintmax_t *num, char const *arg, uintmax_t maxval)
1070 uintmax_t u;
1071 char *arg_lim;
1073 if (!ISDIGIT (*arg))
1074 return false;
1076 u = strtoumax (arg, &arg_lim, 10);
1078 if (! (u <= maxval && errno != ERANGE) || *arg_lim)
1079 return false;
1081 *num = u;
1082 return true;
1085 static bool
1086 pax_decode_header (struct tar_sparse_file *file)
1088 if (file->stat_info->sparse_major > 0)
1090 uintmax_t u;
1091 char nbuf[UINTMAX_STRSIZE_BOUND];
1092 union block *blk;
1093 char *p;
1094 size_t i;
1096 #define COPY_BUF(b,buf,src) do \
1098 char *endp = b->buffer + BLOCKSIZE; \
1099 char *dst = buf; \
1100 do \
1102 if (dst == buf + UINTMAX_STRSIZE_BOUND -1) \
1104 ERROR ((0, 0, _("%s: numeric overflow in sparse archive member"), \
1105 file->stat_info->orig_file_name)); \
1106 return false; \
1108 if (src == endp) \
1110 set_next_block_after (b); \
1111 file->dumped_size += BLOCKSIZE; \
1112 b = find_next_block (); \
1113 src = b->buffer; \
1114 endp = b->buffer + BLOCKSIZE; \
1116 *dst = *src++; \
1118 while (*dst++ != '\n'); \
1119 dst[-1] = 0; \
1120 } while (0)
1122 set_next_block_after (current_header);
1123 file->dumped_size += BLOCKSIZE;
1124 blk = find_next_block ();
1125 p = blk->buffer;
1126 COPY_BUF (blk,nbuf,p);
1127 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1129 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1130 file->stat_info->orig_file_name));
1131 return false;
1133 file->stat_info->sparse_map_size = u;
1134 file->stat_info->sparse_map = xcalloc (file->stat_info->sparse_map_size,
1135 sizeof (*file->stat_info->sparse_map));
1136 file->stat_info->sparse_map_avail = 0;
1137 for (i = 0; i < file->stat_info->sparse_map_size; i++)
1139 struct sp_array sp;
1141 COPY_BUF (blk,nbuf,p);
1142 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (off_t)))
1144 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1145 file->stat_info->orig_file_name));
1146 return false;
1148 sp.offset = u;
1149 COPY_BUF (blk,nbuf,p);
1150 if (!decode_num (&u, nbuf, TYPE_MAXIMUM (size_t)))
1152 ERROR ((0, 0, _("%s: malformed sparse archive member"),
1153 file->stat_info->orig_file_name));
1154 return false;
1156 sp.numbytes = u;
1157 sparse_add_map (file->stat_info, &sp);
1159 set_next_block_after (blk);
1162 return true;
1165 static struct tar_sparse_optab const pax_optab = {
1166 NULL, /* No init function */
1167 NULL, /* No done function */
1168 pax_sparse_member_p,
1169 pax_dump_header,
1170 NULL,
1171 pax_decode_header,
1172 NULL, /* No scan_block function */
1173 sparse_dump_region,
1174 sparse_extract_region,