1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
11 struct timestamp_data
{
12 struct de_timestamp timestamp
;
13 i64 timestamp_unix
; // Same time as .timestamp, for convenience
26 i64 exthdr_num_data_blocks
;
27 i64 extdata_nbytes_needed
;
28 i64 extdata_nbytes_used
;
30 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
34 const char *tar_filename
;
36 i64 checksum_calc
; // for temporary use
38 // Data associated with current member file
42 int de_tar_create_file(deark
*c
)
44 struct tar_ctx
*tctx
= NULL
;
47 if(c
->tar_data
) return 1;
49 tctx
= de_malloc(c
, sizeof(struct tar_ctx
));
50 c
->tar_data
= (void*)tctx
;
52 if(c
->archive_to_stdout
) {
53 tctx
->tar_filename
= "[stdout]";
54 de_err(c
, "TAR to stdout is not implemented");
60 if(c
->output_archive_filename
) {
61 tctx
->tar_filename
= c
->output_archive_filename
;
64 tctx
->tar_filename
= "output.tar";
67 de_info(c
, "Creating %s", tctx
->tar_filename
);
68 tctx
->outf
= dbuf_create_unmanaged_file(c
, tctx
->tar_filename
,
69 c
->overwrite_mode
, 0);
71 if(tctx
->outf
->btype
==DBUF_TYPE_NULL
) {
82 static void destroy_md(deark
*c
, struct tar_md
*md
)
85 de_free(c
, md
->filename
);
89 void de_tar_close_file(deark
*c
)
91 struct tar_ctx
*tctx
= (struct tar_ctx
*)c
->tar_data
;
95 dbuf_write_zeroes(tctx
->outf
, 512*2);
96 dbuf_close(tctx
->outf
);
98 destroy_md(c
, tctx
->md
);
103 static void prepare_timestamp_exthdr(deark
*c
, struct tar_md
*md
, int tsidx
)
107 int is_high_prec
= 0;
108 struct timestamp_data
*tsd
= &md
->tsdata
[tsidx
];
110 if(!tsd
->timestamp
.is_valid
) return;
112 unix_time
= tsd
->timestamp_unix
;
114 if(unix_time
>=0 && tsd
->timestamp
.precision
>DE_TSPREC_1SEC
) {
115 subsec
= de_timestamp_get_subsec(&tsd
->timestamp
);
116 if(subsec
!=0) is_high_prec
= 1;
119 if(tsidx
!=DE_TIMESTAMPIDX_MODIFY
|| is_high_prec
|| unix_time
<0 || unix_time
>0x1ffffffffLL
) {
120 tsd
->need_exthdr
= 1;
127 de_snprintf(tsd
->exthdr_sz
, sizeof(tsd
->exthdr_sz
),
128 "%"I64_FMT
".%07"I64_FMT
, unix_time
, subsec
);
131 de_snprintf(tsd
->exthdr_sz
, sizeof(tsd
->exthdr_sz
),
132 "%"I64_FMT
, unix_time
);
135 // Max length for this item is around 29, so we allow 2 bytes for the
137 // E.g. "28 mtime=1222333444.5555555\n"
138 md
->extdata_nbytes_needed
+= 2 + 1 + 5 + 1 + (i64
)de_strlen(tsd
->exthdr_sz
) + 1;
141 // f is type DBUF_TYPE_ODBUF, in the process of being created.
142 // We are responsible for setting f->parent_dbuf and
143 // f->offset_into_parent_dbuf.
144 void de_tar_start_member_file(deark
*c
, dbuf
*f
)
146 struct tar_ctx
*tctx
= NULL
;
147 struct tar_md
*md
= NULL
;
151 de_tar_create_file(c
);
153 tctx
= (struct tar_ctx
*)c
->tar_data
;
155 destroy_md(c
, tctx
->md
);
156 tctx
->md
= de_malloc(c
, sizeof(struct tar_md
));
159 f
->parent_dbuf
= tctx
->outf
;
161 md
->headers_pos
= tctx
->outf
->len
;
163 if(c
->preserve_file_times_archives
&& f
->fi_copy
) {
164 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
165 //if(tsidx != DE_TIMESTAMPIDX_MODIFY) continue;
167 if(f
->fi_copy
->timestamp
[tsidx
].is_valid
) {
168 md
->tsdata
[tsidx
].timestamp
= f
->fi_copy
->timestamp
[tsidx
];
170 else if(tsidx
== DE_TIMESTAMPIDX_MODIFY
) {
171 // Special handling if we don't have a mod time.
172 if(c
->reproducible_output
) {
173 de_get_reproducible_timestamp(c
, &md
->tsdata
[tsidx
].timestamp
);
176 de_cached_current_time_to_timestamp(c
, &md
->tsdata
[tsidx
].timestamp
);
177 // Although c->current_time is probably high precision, we treat it as
178 // low precision, so as not to write an "mtime" extended header.
179 // TODO: If we write "mtime" for some other reason, it can be high prec.
180 md
->tsdata
[tsidx
].timestamp
.precision
= DE_TSPREC_1SEC
;
184 // Unavailable timestamp that isn't the mod time.
188 md
->tsdata
[tsidx
].timestamp_unix
= de_timestamp_to_unix_time(&md
->tsdata
[tsidx
].timestamp
);
192 if(f
->fi_copy
&& f
->fi_copy
->is_directory
) {
196 md
->namelen
= de_strlen(f
->name
);
198 // Append a '/' to directory names
199 md
->filename
= de_malloc(c
, (i64
)md
->namelen
+2);
200 de_snprintf(md
->filename
, md
->namelen
+2, "%s/", f
->name
);
201 md
->namelen
= de_strlen(md
->filename
);
204 md
->filename
= de_strdup(c
, f
->name
);
207 if(md
->namelen
>100) {
208 md
->need_exthdr_path
= 1;
210 else if(!de_is_ascii((const u8
*)md
->filename
, md
->namelen
)) {
211 md
->need_exthdr_path
= 1;
214 md
->extdata_nbytes_needed
+= 23; // For "size"; this is enough for 10TB
216 if(md
->need_exthdr_path
) {
217 // Likely an overestimate: up to 6 bytes for the item size,
218 // 4 for the "path" string, 3 for field separators.
219 md
->extdata_nbytes_needed
+= (i64
)md
->namelen
+ 13;
222 prepare_timestamp_exthdr(c
, md
, DE_TIMESTAMPIDX_MODIFY
);
223 prepare_timestamp_exthdr(c
, md
, DE_TIMESTAMPIDX_ACCESS
);
224 prepare_timestamp_exthdr(c
, md
, DE_TIMESTAMPIDX_ATTRCHANGE
);
225 prepare_timestamp_exthdr(c
, md
, DE_TIMESTAMPIDX_CREATE
);
227 if(md
->extdata_nbytes_needed
>0) {
232 md
->exthdr_num_data_blocks
= (md
->extdata_nbytes_needed
+511)/512;
233 md
->headers_size
= (1 + md
->exthdr_num_data_blocks
+ 1) * 512;
236 md
->exthdr_num_data_blocks
= 0;
237 md
->headers_size
= 512;
240 // Reserve space for the tar headers. We won't know the member file size
241 // until it has been completely written, so we can't write the headers
242 // yet. Instead we'll write them to headers_tmpdbuf, and seek back later
243 // and patch them into the main tar file.
244 dbuf_write_zeroes(tctx
->outf
, md
->headers_size
);
246 f
->offset_into_parent_dbuf
= tctx
->outf
->len
;
249 // TODO: Maybe support "base-256" format.
250 static int format_ascii_octal_field(deark
*c
, struct tar_ctx
*tctx
,
251 i64 val
, u8
*buf2
, size_t buf2len
)
253 char buf1
[32]; // The largest field we need to support is 12 bytes
257 de_zeromem(buf2
, buf2len
);
258 if(buf2len
>12) return 0;
261 de_snprintf(buf1
, sizeof(buf1
), "%"U64_FMTo
, (u64
)val
);
262 len_in_octal
= de_strlen(buf1
);
263 if(len_in_octal
> buf2len
) {
264 for(k
=0; k
<buf2len
; k
++) {
268 else if(len_in_octal
== buf2len
) {
269 de_memcpy(buf2
, buf1
, buf2len
);
272 size_t num_leading_0s
= buf2len
- 1 - len_in_octal
;
274 for(k
=0; k
<buf2len
; k
++) {
275 if(k
< num_leading_0s
) {
278 else if(k
< buf2len
- 1) {
279 buf2
[k
] = buf1
[k
-num_leading_0s
];
290 static int cksum_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
293 struct tar_ctx
*tctx
= (struct tar_ctx
*)brctx
->userdata
;
296 for(i
=0; i
<buf_len
; i
++) {
297 if((brctx
->offset
+i
) >=148 && (brctx
->offset
+i
)<156)
298 tctx
->checksum_calc
+= 32; // (The checksum field itself)
300 tctx
->checksum_calc
+= (i64
)buf
[i
];
306 // Set the checksum field for the header starting at 'pos'.
307 static void set_checksum_field(deark
*c
, struct tar_ctx
*tctx
,
312 tctx
->checksum_calc
= 0;
313 dbuf_buffered_read(hdr
, 0, 512, cksum_cbfn
, (void*)tctx
);
315 format_ascii_octal_field(c
, tctx
, tctx
->checksum_calc
, buf
, 7);
318 dbuf_write_at(hdr
, 148, buf
, 8);
321 static void format_and_write_ascii_field(deark
*c
, struct tar_ctx
*tctx
,
322 const char *val_sz
, size_t fieldlen
, dbuf
*hdrs
, i64 fieldpos
)
326 val_strlen
= de_strlen(val_sz
);
327 if(val_strlen
< fieldlen
) {
328 dbuf_write_at(hdrs
, fieldpos
, (const u8
*)val_sz
, val_strlen
);
329 // (padding bytes will remain at 0)
331 else if(val_strlen
==fieldlen
) {
332 dbuf_write_at(hdrs
, fieldpos
, (const u8
*)val_sz
, fieldlen
);
335 dbuf_write_at(hdrs
, fieldpos
, (const u8
*)val_sz
, fieldlen
);
339 static void format_and_write_ascii_octal_field(deark
*c
, struct tar_ctx
*tctx
,
340 i64 val
, size_t fieldlen
, dbuf
*hdrs
, i64 fieldpos
)
344 if(fieldlen
>12) return;
345 format_ascii_octal_field(c
, tctx
, val
, buf
, fieldlen
);
346 dbuf_write_at(hdrs
, fieldpos
, buf
, fieldlen
);
349 // Set fields common to both the main header, and the POSIX extended (Pax)
351 static void set_common_header_fields(deark
*c
, struct tar_ctx
*tctx
,
354 struct tar_md
*md
= tctx
->md
;
357 format_and_write_ascii_octal_field(c
, tctx
, 0, 8, hdr
, 108);
359 format_and_write_ascii_octal_field(c
, tctx
, 0, 8, hdr
, 116);
361 format_and_write_ascii_octal_field(c
, tctx
, md
->tsdata
[DE_TIMESTAMPIDX_MODIFY
].timestamp_unix
, 12, hdr
, 136);
363 dbuf_write_at(hdr
, 257, (const u8
*)"ustar\0" "00", 8);
364 format_and_write_ascii_field(c
, tctx
, "root", 32, hdr
, 265); // uname
365 format_and_write_ascii_field(c
, tctx
, "root", 32, hdr
, 297); // gname
368 static void make_main_header(deark
*c
, struct tar_ctx
*tctx
,
369 dbuf
*f
, dbuf
*mainhdr
)
371 struct tar_md
*md
= tctx
->md
;
379 else if(f
->fi_copy
&& (f
->fi_copy
->mode_flags
&DE_MODEFLAG_EXE
)) {
386 set_common_header_fields(c
, tctx
, mainhdr
);
389 format_and_write_ascii_field(c
, tctx
, md
->filename
, 100, mainhdr
, 0);
392 format_and_write_ascii_octal_field(c
, tctx
, mode
, 8, mainhdr
, 100);
395 format_and_write_ascii_octal_field(c
, tctx
, f
->len
, 12, mainhdr
, 124);
398 dbuf_writebyte_at(mainhdr
, 156, typeflag
);
400 // Done populating main header, now set the checksum
402 dbuf_truncate(mainhdr
, 512);
403 set_checksum_field(c
, tctx
, mainhdr
);
406 // *ppos is the current offset into extdata. It will be updated.
407 static void add_exthdr_item(deark
*c
, struct tar_ctx
*tctx
,
408 dbuf
*extdata
, const char *name
, const char *val
, i64
*ppos
)
414 len1
= (i64
)de_strlen(name
) + (i64
)de_strlen(val
) + 3;
415 // This size of the size field depends on itself. Ugh.
416 if(len1
<=8) item_len
= len1
+1;
417 else if(len1
<=97) item_len
= len1
+2;
418 else if(len1
<=996) item_len
= len1
+3;
419 else if(len1
<=9995) item_len
= len1
+4;
420 else if(len1
<=99994) item_len
= len1
+5;
421 else if(len1
<=999993) item_len
= len1
+6;
427 tmps
= de_malloc(c
, item_len
+1);
428 de_snprintf(tmps
, (size_t)(item_len
+1), "%"I64_FMT
" %s=%s\n", item_len
, name
, val
);
429 dbuf_write_at(extdata
, *ppos
, (const u8
*)tmps
, item_len
);
436 static void make_exthdrs(deark
*c
, struct tar_ctx
*tctx
,
437 dbuf
*f
, dbuf
*exthdr
, dbuf
*extdata
)
439 struct tar_md
*md
= tctx
->md
;
444 set_common_header_fields(c
, tctx
, exthdr
);
447 // This pseudo-filename will be ignored by any decent untar program.
448 // The template used here is similar to what bsdtar does.
449 // (Using f->name here instead of md->filename, because we don't
450 // want directory names to have a '/' appended.)
451 de_snprintf(namebuf
, sizeof(namebuf
), "PaxHeader/%s", f
->name
);
452 format_and_write_ascii_field(c
, tctx
, namebuf
, 100, exthdr
, 0);
455 format_and_write_ascii_octal_field(c
, tctx
, 0644, 8, exthdr
, 100);
458 dbuf_writebyte_at(exthdr
, 156, 'x');
462 if(md
->need_exthdr_size
) {
463 de_snprintf(buf
, sizeof(buf
), "%"I64_FMT
, f
->len
);
464 add_exthdr_item(c
, tctx
, extdata
, "size", buf
, &extdata_len
);
467 if(md
->need_exthdr_path
) {
468 add_exthdr_item(c
, tctx
, extdata
, "path", md
->filename
, &extdata_len
);
471 if(md
->tsdata
[DE_TIMESTAMPIDX_MODIFY
].need_exthdr
) {
472 add_exthdr_item(c
, tctx
, extdata
, "mtime", md
->tsdata
[DE_TIMESTAMPIDX_MODIFY
].exthdr_sz
, &extdata_len
);
474 if(md
->tsdata
[DE_TIMESTAMPIDX_ACCESS
].need_exthdr
) {
475 add_exthdr_item(c
, tctx
, extdata
, "atime", md
->tsdata
[DE_TIMESTAMPIDX_ACCESS
].exthdr_sz
, &extdata_len
);
477 if(md
->tsdata
[DE_TIMESTAMPIDX_ATTRCHANGE
].need_exthdr
) {
478 add_exthdr_item(c
, tctx
, extdata
, "ctime", md
->tsdata
[DE_TIMESTAMPIDX_ATTRCHANGE
].exthdr_sz
, &extdata_len
);
480 if(md
->tsdata
[DE_TIMESTAMPIDX_CREATE
].need_exthdr
) {
481 add_exthdr_item(c
, tctx
, extdata
, "LIBARCHIVE.creationtime", md
->tsdata
[DE_TIMESTAMPIDX_CREATE
].exthdr_sz
, &extdata_len
);
484 // We have to use exactly the number of exthdr data blocks that we
485 // precalculated, no more and no fewer. But it is possible that we
486 // overestimated. If so, we have to pad the data somehow, and using
487 // empty "comment" items is one way to do that.
488 while(extdata_len
< (512*md
->exthdr_num_data_blocks
- 511)) {
489 add_exthdr_item(c
, tctx
, extdata
, "comment", "", &extdata_len
);
491 dbuf_truncate(extdata
, 512*md
->exthdr_num_data_blocks
);
494 format_and_write_ascii_octal_field(c
, tctx
, extdata_len
, 12, exthdr
, 124);
496 dbuf_truncate(exthdr
, 512);
497 set_checksum_field(c
, tctx
, exthdr
);
500 void de_tar_end_member_file(deark
*c
, dbuf
*f
)
502 struct tar_ctx
*tctx
= (struct tar_ctx
*)c
->tar_data
;
503 struct tar_md
*md
= tctx
->md
;
507 dbuf
*mainhdr
= NULL
;
509 dbuf
*extdata
= NULL
;
511 // Write any needed padding to the main tar file.
512 padded_len
= de_pad_to_n(f
->len
, 512);
513 dbuf_write_zeroes(tctx
->outf
, padded_len
- f
->len
);
515 // Construct the headers, using temporary dbufs
518 mainhdr
= dbuf_create_membuf(c
, 512, 0);
519 make_main_header(c
, tctx
, f
, mainhdr
);
522 // Extended header & data
523 exthdr
= dbuf_create_membuf(c
, 512, 0);
524 extdata
= dbuf_create_membuf(c
, 512*md
->exthdr_num_data_blocks
, 0);
525 md
->need_exthdr_size
= (f
->len
> 0x1FFFFFFFFLL
)?1:0;
526 make_exthdrs(c
, tctx
, f
, exthdr
, extdata
);
529 // Seek back and write the headers to the main tar file.
530 // FIXME: This is a hack, sort of. A dbuf doesn't expect us to access its
531 // fp pointer, or to mix copy_at with other 'write' functions.
532 saved_pos
= de_ftell(tctx
->outf
->fp
);
533 writepos
= md
->headers_pos
;
534 if(md
->has_exthdr
&& exthdr
&& extdata
) {
535 dbuf_copy_at(exthdr
, 0, 512, tctx
->outf
, writepos
);
537 dbuf_copy_at(extdata
, 0, 512*md
->exthdr_num_data_blocks
, tctx
->outf
, writepos
);
538 writepos
+= 512*md
->exthdr_num_data_blocks
;
540 dbuf_copy_at(mainhdr
, 0, 512, tctx
->outf
, writepos
);
541 de_fseek(tctx
->outf
->fp
, saved_pos
, SEEK_SET
);
547 destroy_md(c
, tctx
->md
);