Minor refactoring, related to lzah
[deark.git] / src / deark-tar.c
blob5f58caee8cbc863e93735a4a22d3fb625cb4096b
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // TAR format output
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
11 struct timestamp_data {
12 struct de_timestamp timestamp;
13 i64 timestamp_unix; // Same time as .timestamp, for convenience
14 u8 need_exthdr;
15 char exthdr_sz[32];
18 struct tar_md {
19 u8 is_dir;
20 u8 has_exthdr;
21 u8 need_exthdr_size;
22 u8 need_exthdr_path;
23 size_t namelen;
24 i64 headers_pos;
25 i64 headers_size;
26 i64 exthdr_num_data_blocks;
27 i64 extdata_nbytes_needed;
28 i64 extdata_nbytes_used;
29 char *filename;
30 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
33 struct tar_ctx {
34 const char *tar_filename;
35 dbuf *outf;
36 i64 checksum_calc; // for temporary use
38 // Data associated with current member file
39 struct tar_md *md;
42 int de_tar_create_file(deark *c)
44 struct tar_ctx *tctx = NULL;
45 int retval = 0;
47 if(c->tar_data) return 1;
49 tctx = de_malloc(c, sizeof(struct tar_ctx));
50 c->tar_data = (void*)tctx;
52 if(c->archive_to_stdout) {
53 tctx->tar_filename = "[stdout]";
54 de_err(c, "TAR to stdout is not implemented");
55 de_fatalerror(c);
56 goto done;
60 if(c->output_archive_filename) {
61 tctx->tar_filename = c->output_archive_filename;
63 else {
64 tctx->tar_filename = "output.tar";
67 de_info(c, "Creating %s", tctx->tar_filename);
68 tctx->outf = dbuf_create_unmanaged_file(c, tctx->tar_filename,
69 c->overwrite_mode, 0);
71 if(tctx->outf->btype==DBUF_TYPE_NULL) {
72 de_fatalerror(c);
73 goto done;
76 retval = 1;
78 done:
79 return retval;
82 static void destroy_md(deark *c, struct tar_md *md)
84 if(!md) return;
85 de_free(c, md->filename);
86 de_free(c, md);
89 void de_tar_close_file(deark *c)
91 struct tar_ctx *tctx = (struct tar_ctx *)c->tar_data;
93 if(!tctx) return;
94 if(tctx->outf) {
95 dbuf_write_zeroes(tctx->outf, 512*2);
96 dbuf_close(tctx->outf);
98 destroy_md(c, tctx->md);
99 de_free(c, tctx);
100 c->tar_data = NULL;
103 static void prepare_timestamp_exthdr(deark *c, struct tar_md *md, int tsidx)
105 i64 unix_time;
106 i64 subsec = 0;
107 int is_high_prec = 0;
108 struct timestamp_data *tsd = &md->tsdata[tsidx];
110 if(!tsd->timestamp.is_valid) return;
112 unix_time = tsd->timestamp_unix;
114 if(unix_time>=0 && tsd->timestamp.precision>DE_TSPREC_1SEC) {
115 subsec = de_timestamp_get_subsec(&tsd->timestamp);
116 if(subsec!=0) is_high_prec = 1;
119 if(tsidx!=DE_TIMESTAMPIDX_MODIFY || is_high_prec || unix_time<0 || unix_time>0x1ffffffffLL) {
120 tsd->need_exthdr = 1;
122 else {
123 return;
126 if(is_high_prec) {
127 de_snprintf(tsd->exthdr_sz, sizeof(tsd->exthdr_sz),
128 "%"I64_FMT".%07"I64_FMT, unix_time, subsec);
130 else {
131 de_snprintf(tsd->exthdr_sz, sizeof(tsd->exthdr_sz),
132 "%"I64_FMT, unix_time);
135 // Max length for this item is around 29, so we allow 2 bytes for the
136 // length field.
137 // E.g. "28 mtime=1222333444.5555555\n"
138 md->extdata_nbytes_needed += 2 + 1 + 5 + 1 + (i64)de_strlen(tsd->exthdr_sz) + 1;
141 // f is type DBUF_TYPE_ODBUF, in the process of being created.
142 // We are responsible for setting f->parent_dbuf and
143 // f->offset_into_parent_dbuf.
144 void de_tar_start_member_file(deark *c, dbuf *f)
146 struct tar_ctx *tctx = NULL;
147 struct tar_md *md = NULL;
148 int tsidx;
150 if(!c->tar_data) {
151 de_tar_create_file(c);
153 tctx = (struct tar_ctx *)c->tar_data;
154 if(!tctx) return;
155 destroy_md(c, tctx->md);
156 tctx->md = de_malloc(c, sizeof(struct tar_md));
157 md = tctx->md;
159 f->parent_dbuf = tctx->outf;
161 md->headers_pos = tctx->outf->len;
163 if(c->preserve_file_times_archives && f->fi_copy) {
164 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
165 //if(tsidx != DE_TIMESTAMPIDX_MODIFY) continue;
167 if(f->fi_copy->timestamp[tsidx].is_valid) {
168 md->tsdata[tsidx].timestamp = f->fi_copy->timestamp[tsidx];
170 else if(tsidx == DE_TIMESTAMPIDX_MODIFY) {
171 // Special handling if we don't have a mod time.
172 if(c->reproducible_output) {
173 de_get_reproducible_timestamp(c, &md->tsdata[tsidx].timestamp);
175 else {
176 de_cached_current_time_to_timestamp(c, &md->tsdata[tsidx].timestamp);
177 // Although c->current_time is probably high precision, we treat it as
178 // low precision, so as not to write an "mtime" extended header.
179 // TODO: If we write "mtime" for some other reason, it can be high prec.
180 md->tsdata[tsidx].timestamp.precision = DE_TSPREC_1SEC;
183 else {
184 // Unavailable timestamp that isn't the mod time.
185 continue;
188 md->tsdata[tsidx].timestamp_unix = de_timestamp_to_unix_time(&md->tsdata[tsidx].timestamp);
192 if(f->fi_copy && f->fi_copy->is_directory) {
193 md->is_dir = 1;
196 md->namelen = de_strlen(f->name);
197 if(md->is_dir) {
198 // Append a '/' to directory names
199 md->filename = de_malloc(c, (i64)md->namelen+2);
200 de_snprintf(md->filename, md->namelen+2, "%s/", f->name);
201 md->namelen = de_strlen(md->filename);
203 else {
204 md->filename = de_strdup(c, f->name);
207 if(md->namelen>100) {
208 md->need_exthdr_path = 1;
210 else if(!de_is_ascii((const u8*)md->filename, md->namelen)) {
211 md->need_exthdr_path = 1;
214 md->extdata_nbytes_needed += 23; // For "size"; this is enough for 10TB
216 if(md->need_exthdr_path) {
217 // Likely an overestimate: up to 6 bytes for the item size,
218 // 4 for the "path" string, 3 for field separators.
219 md->extdata_nbytes_needed += (i64)md->namelen + 13;
222 prepare_timestamp_exthdr(c, md, DE_TIMESTAMPIDX_MODIFY);
223 prepare_timestamp_exthdr(c, md, DE_TIMESTAMPIDX_ACCESS);
224 prepare_timestamp_exthdr(c, md, DE_TIMESTAMPIDX_ATTRCHANGE);
225 prepare_timestamp_exthdr(c, md, DE_TIMESTAMPIDX_CREATE);
227 if(md->extdata_nbytes_needed>0) {
228 md->has_exthdr = 1;
231 if(md->has_exthdr) {
232 md->exthdr_num_data_blocks = (md->extdata_nbytes_needed+511)/512;
233 md->headers_size = (1 + md->exthdr_num_data_blocks + 1) * 512;
235 else {
236 md->exthdr_num_data_blocks = 0;
237 md->headers_size = 512;
240 // Reserve space for the tar headers. We won't know the member file size
241 // until it has been completely written, so we can't write the headers
242 // yet. Instead we'll write them to headers_tmpdbuf, and seek back later
243 // and patch them into the main tar file.
244 dbuf_write_zeroes(tctx->outf, md->headers_size);
246 f->offset_into_parent_dbuf = tctx->outf->len;
249 // TODO: Maybe support "base-256" format.
250 static int format_ascii_octal_field(deark *c, struct tar_ctx *tctx,
251 i64 val, u8 *buf2, size_t buf2len)
253 char buf1[32]; // The largest field we need to support is 12 bytes
254 size_t k;
255 size_t len_in_octal;
257 de_zeromem(buf2, buf2len);
258 if(buf2len>12) return 0;
259 if(val<0) val = 0;
261 de_snprintf(buf1, sizeof(buf1), "%"U64_FMTo, (u64)val);
262 len_in_octal = de_strlen(buf1);
263 if(len_in_octal > buf2len) {
264 for(k=0; k<buf2len; k++) {
265 buf2[k] = '7';
268 else if(len_in_octal == buf2len) {
269 de_memcpy(buf2, buf1, buf2len);
271 else {
272 size_t num_leading_0s = buf2len - 1 - len_in_octal;
274 for(k=0; k<buf2len; k++) {
275 if(k < num_leading_0s) {
276 buf2[k] = '0';
278 else if(k < buf2len - 1) {
279 buf2[k] = buf1[k-num_leading_0s];
281 else {
282 buf2[k] = '\0';
287 return 1;
290 static int cksum_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
291 i64 buf_len)
293 struct tar_ctx *tctx = (struct tar_ctx*)brctx->userdata;
294 i64 i;
296 for(i=0; i<buf_len; i++) {
297 if((brctx->offset+i) >=148 && (brctx->offset+i)<156)
298 tctx->checksum_calc += 32; // (The checksum field itself)
299 else
300 tctx->checksum_calc += (i64)buf[i];
303 return 1;
306 // Set the checksum field for the header starting at 'pos'.
307 static void set_checksum_field(deark *c, struct tar_ctx *tctx,
308 dbuf *hdr)
310 u8 buf[8];
312 tctx->checksum_calc = 0;
313 dbuf_buffered_read(hdr, 0, 512, cksum_cbfn, (void*)tctx);
315 format_ascii_octal_field(c, tctx, tctx->checksum_calc, buf, 7);
316 buf[6] = 0x00;
317 buf[7] = 0x20;
318 dbuf_write_at(hdr, 148, buf, 8);
321 static void format_and_write_ascii_field(deark *c, struct tar_ctx *tctx,
322 const char *val_sz, size_t fieldlen, dbuf *hdrs, i64 fieldpos)
324 size_t val_strlen;
326 val_strlen = de_strlen(val_sz);
327 if(val_strlen < fieldlen) {
328 dbuf_write_at(hdrs, fieldpos, (const u8*)val_sz, val_strlen);
329 // (padding bytes will remain at 0)
331 else if(val_strlen==fieldlen) {
332 dbuf_write_at(hdrs, fieldpos, (const u8*)val_sz, fieldlen);
334 else {
335 dbuf_write_at(hdrs, fieldpos, (const u8*)val_sz, fieldlen);
339 static void format_and_write_ascii_octal_field(deark *c, struct tar_ctx *tctx,
340 i64 val, size_t fieldlen, dbuf *hdrs, i64 fieldpos)
342 u8 buf[12];
344 if(fieldlen>12) return;
345 format_ascii_octal_field(c, tctx, val, buf, fieldlen);
346 dbuf_write_at(hdrs, fieldpos, buf, fieldlen);
349 // Set fields common to both the main header, and the POSIX extended (Pax)
350 // header.
351 static void set_common_header_fields(deark *c, struct tar_ctx *tctx,
352 dbuf *hdr)
354 struct tar_md *md = tctx->md;
356 // uid
357 format_and_write_ascii_octal_field(c, tctx, 0, 8, hdr, 108);
358 // gid
359 format_and_write_ascii_octal_field(c, tctx, 0, 8, hdr, 116);
360 // mtime
361 format_and_write_ascii_octal_field(c, tctx, md->tsdata[DE_TIMESTAMPIDX_MODIFY].timestamp_unix, 12, hdr, 136);
362 // magic/version
363 dbuf_write_at(hdr, 257, (const u8*)"ustar\0" "00", 8);
364 format_and_write_ascii_field(c, tctx, "root", 32, hdr, 265); // uname
365 format_and_write_ascii_field(c, tctx, "root", 32, hdr, 297); // gname
368 static void make_main_header(deark *c, struct tar_ctx *tctx,
369 dbuf *f, dbuf *mainhdr)
371 struct tar_md *md = tctx->md;
372 i64 mode;
373 u8 typeflag = '0';
375 if(md->is_dir) {
376 mode = 0755;
377 typeflag = '5';
379 else if(f->fi_copy && (f->fi_copy->mode_flags&DE_MODEFLAG_EXE)) {
380 mode = 0755;
382 else {
383 mode = 0644;
386 set_common_header_fields(c, tctx, mainhdr);
388 // "name"
389 format_and_write_ascii_field(c, tctx, md->filename, 100, mainhdr, 0);
391 // "mode"
392 format_and_write_ascii_octal_field(c, tctx, mode, 8, mainhdr, 100);
394 // "size"
395 format_and_write_ascii_octal_field(c, tctx, f->len, 12, mainhdr, 124);
397 // typeflag
398 dbuf_writebyte_at(mainhdr, 156, typeflag);
400 // Done populating main header, now set the checksum
402 dbuf_truncate(mainhdr, 512);
403 set_checksum_field(c, tctx, mainhdr);
406 // *ppos is the current offset into extdata. It will be updated.
407 static void add_exthdr_item(deark *c, struct tar_ctx *tctx,
408 dbuf *extdata, const char *name, const char *val, i64 *ppos)
410 i64 len1;
411 i64 item_len = 0;
412 char *tmps = NULL;
414 len1 = (i64)de_strlen(name) + (i64)de_strlen(val) + 3;
415 // This size of the size field depends on itself. Ugh.
416 if(len1<=8) item_len = len1+1;
417 else if(len1<=97) item_len = len1+2;
418 else if(len1<=996) item_len = len1+3;
419 else if(len1<=9995) item_len = len1+4;
420 else if(len1<=99994) item_len = len1+5;
421 else if(len1<=999993) item_len = len1+6;
422 else { // Error
423 (*ppos)++;
424 goto done;
427 tmps = de_malloc(c, item_len+1);
428 de_snprintf(tmps, (size_t)(item_len+1), "%"I64_FMT" %s=%s\n", item_len, name, val);
429 dbuf_write_at(extdata, *ppos, (const u8*)tmps, item_len);
430 (*ppos) += item_len;
432 done:
433 de_free(c, tmps);
436 static void make_exthdrs(deark *c, struct tar_ctx *tctx,
437 dbuf *f, dbuf *exthdr, dbuf *extdata)
439 struct tar_md *md = tctx->md;
440 i64 extdata_len = 0;
441 char namebuf[101];
442 char buf[80];
444 set_common_header_fields(c, tctx, exthdr);
446 // "name"
447 // This pseudo-filename will be ignored by any decent untar program.
448 // The template used here is similar to what bsdtar does.
449 // (Using f->name here instead of md->filename, because we don't
450 // want directory names to have a '/' appended.)
451 de_snprintf(namebuf, sizeof(namebuf), "PaxHeader/%s", f->name);
452 format_and_write_ascii_field(c, tctx, namebuf, 100, exthdr, 0);
454 // "mode"
455 format_and_write_ascii_octal_field(c, tctx, 0644, 8, exthdr, 100);
457 // typeflag
458 dbuf_writebyte_at(exthdr, 156, 'x');
460 // Extended data
462 if(md->need_exthdr_size) {
463 de_snprintf(buf, sizeof(buf), "%"I64_FMT, f->len);
464 add_exthdr_item(c, tctx, extdata, "size", buf, &extdata_len);
467 if(md->need_exthdr_path) {
468 add_exthdr_item(c, tctx, extdata, "path", md->filename, &extdata_len);
471 if(md->tsdata[DE_TIMESTAMPIDX_MODIFY].need_exthdr) {
472 add_exthdr_item(c, tctx, extdata, "mtime", md->tsdata[DE_TIMESTAMPIDX_MODIFY].exthdr_sz, &extdata_len);
474 if(md->tsdata[DE_TIMESTAMPIDX_ACCESS].need_exthdr) {
475 add_exthdr_item(c, tctx, extdata, "atime", md->tsdata[DE_TIMESTAMPIDX_ACCESS].exthdr_sz, &extdata_len);
477 if(md->tsdata[DE_TIMESTAMPIDX_ATTRCHANGE].need_exthdr) {
478 add_exthdr_item(c, tctx, extdata, "ctime", md->tsdata[DE_TIMESTAMPIDX_ATTRCHANGE].exthdr_sz, &extdata_len);
480 if(md->tsdata[DE_TIMESTAMPIDX_CREATE].need_exthdr) {
481 add_exthdr_item(c, tctx, extdata, "LIBARCHIVE.creationtime", md->tsdata[DE_TIMESTAMPIDX_CREATE].exthdr_sz, &extdata_len);
484 // We have to use exactly the number of exthdr data blocks that we
485 // precalculated, no more and no fewer. But it is possible that we
486 // overestimated. If so, we have to pad the data somehow, and using
487 // empty "comment" items is one way to do that.
488 while(extdata_len < (512*md->exthdr_num_data_blocks - 511)) {
489 add_exthdr_item(c, tctx, extdata, "comment", "", &extdata_len);
491 dbuf_truncate(extdata, 512*md->exthdr_num_data_blocks);
493 // "size"
494 format_and_write_ascii_octal_field(c, tctx, extdata_len, 12, exthdr, 124);
496 dbuf_truncate(exthdr, 512);
497 set_checksum_field(c, tctx, exthdr);
500 void de_tar_end_member_file(deark *c, dbuf *f)
502 struct tar_ctx *tctx = (struct tar_ctx *)c->tar_data;
503 struct tar_md *md = tctx->md;
504 i64 padded_len;
505 i64 saved_pos;
506 i64 writepos;
507 dbuf *mainhdr = NULL;
508 dbuf *exthdr = NULL;
509 dbuf *extdata = NULL;
511 // Write any needed padding to the main tar file.
512 padded_len = de_pad_to_n(f->len, 512);
513 dbuf_write_zeroes(tctx->outf, padded_len - f->len);
515 // Construct the headers, using temporary dbufs
517 // Main header
518 mainhdr = dbuf_create_membuf(c, 512, 0);
519 make_main_header(c, tctx, f, mainhdr);
521 if(md->has_exthdr) {
522 // Extended header & data
523 exthdr = dbuf_create_membuf(c, 512, 0);
524 extdata = dbuf_create_membuf(c, 512*md->exthdr_num_data_blocks, 0);
525 md->need_exthdr_size = (f->len > 0x1FFFFFFFFLL)?1:0;
526 make_exthdrs(c, tctx, f, exthdr, extdata);
529 // Seek back and write the headers to the main tar file.
530 // FIXME: This is a hack, sort of. A dbuf doesn't expect us to access its
531 // fp pointer, or to mix copy_at with other 'write' functions.
532 saved_pos = de_ftell(tctx->outf->fp);
533 writepos = md->headers_pos;
534 if(md->has_exthdr && exthdr && extdata) {
535 dbuf_copy_at(exthdr, 0, 512, tctx->outf, writepos);
536 writepos += 512;
537 dbuf_copy_at(extdata, 0, 512*md->exthdr_num_data_blocks, tctx->outf, writepos);
538 writepos += 512*md->exthdr_num_data_blocks;
540 dbuf_copy_at(mainhdr, 0, 512, tctx->outf, writepos);
541 de_fseek(tctx->outf->fp, saved_pos, SEEK_SET);
543 dbuf_close(mainhdr);
544 dbuf_close(exthdr);
545 dbuf_close(extdata);
547 destroy_md(c, tctx->md);
548 tctx->md = NULL;