1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_tar
);
11 // Represents a single physical header block, and the associated data that
13 // Sometimes, a logical member file is composed of multiple physical members.
14 struct phys_member_data
{
15 #define TARFMT_UNKNOWN 0
16 #define TARFMT_POSIX 1
27 struct de_stringreaderdata
*linkname
;
29 struct de_timestamp timestamps
[DE_TIMESTAMPIDX_COUNT
];
32 // A struct to collect various extended attributes for a logical member
33 // (or for global attributes).
35 de_ucstring
*alt_name
;
36 de_ucstring
*linkname
;
37 u8 main_file_is_special
;
40 struct de_timestamp alt_timestamps
[DE_TIMESTAMPIDX_COUNT
];
44 de_ucstring
*filename
;
46 int is_dir
, is_regular_file
, is_symlink
;
49 typedef struct localctx_struct
{
52 struct extattr_data
*global_ea
;
53 struct de_crcobj
*crco_cksum
;
56 static const char* get_fmt_name(int fmt
)
58 const char *n
= "unknown or old-style";
60 case TARFMT_POSIX
: n
= "POSIX"; break;
61 case TARFMT_GNU
: n
= "GNU"; break;
62 case TARFMT_STAR
: n
= "star"; break;
67 static int read_ascii_octal_number(dbuf
*f
, i64 pos
, i64 fieldsize
,
71 b1
= dbuf_getbyte(f
, pos
);
74 // The usual ASCII-octal format
75 return dbuf_read_ascii_number(f
, pos
, fieldsize
, 8, value
);
78 // "base-256" or some other special format
79 if(b1
==0x80) { // positive base-256 number
80 *value
= dbuf_getint_ext(f
, pos
+1, (unsigned int)(fieldsize
-1), 0, 0);
83 else if(b1
==0xff) { // negative base-256 number
84 *value
= dbuf_getint_ext(f
, pos
+1, (unsigned int)(fieldsize
-1), 0, 1);
92 static void read_12char_timestamp(deark
*c
, struct phys_member_data
*pmd
, i64 pos
,
93 int tsidx
, const char *name
)
97 char timestamp_buf
[64];
99 ret
= read_ascii_octal_number(c
->infile
, pos
, 12, ×tamp_unix
);
101 de_unix_time_to_timestamp(timestamp_unix
, &pmd
->timestamps
[tsidx
], 0x1);
102 de_dbg_timestamp_to_string(c
, &pmd
->timestamps
[tsidx
],
103 timestamp_buf
, sizeof(timestamp_buf
), 0);
104 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, timestamp_unix
, timestamp_buf
);
108 // Sets md->checksum_calc
109 static void calc_checksum(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
113 d
->crco_cksum
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
116 de_crcobj_reset(d
->crco_cksum
);
119 // A loose upper bound for the max possible checksum value is 512*255
121 de_crcobj_addbuf(d
->crco_cksum
, &hdrblock
[0], 148);
122 de_crcobj_addrun(d
->crco_cksum
, 32, 8); // (The checksum field itself)
123 de_crcobj_addbuf(d
->crco_cksum
, &hdrblock
[156], 512-156);
124 pmd
->checksum_calc
= (i64
)de_crcobj_getval(d
->crco_cksum
);
127 // Returns 1 if it was parsed successfully, and is not a trailer.
128 static int read_phys_member_header(deark
*c
, lctx
*d
,
129 struct phys_member_data
*pmd
, i64 pos1
)
134 de_ucstring
*tmpstr
= NULL
;
136 int saved_indent_level
;
139 de_dbg_indent_save(c
, &saved_indent_level
);
141 de_dbg(c
, "physical archive member header at %"I64_FMT
, pos1
);
144 // Look ahead to try to figure out some things about the format of this member.
146 de_read(hdrblock
, pos1
, 512);
147 calc_checksum(c
, d
, pmd
, hdrblock
);
149 if(pmd
->checksum_calc
==8*32 && de_is_all_zeroes(&hdrblock
[148], 8)) {
150 // "The end of the archive is indicated by two records consisting
151 // entirely of zero bytes."
152 // Most tar programs seem to stop at the first "zero block", so that's
154 de_dbg(c
, "[trailer record]");
155 d
->found_trailer
= 1;
159 pmd
->linkflag
= hdrblock
[156];
161 if(!de_memcmp(&hdrblock
[257], (const void*)"ustar \0", 8)) {
162 pmd
->fmt
= TARFMT_GNU
;
164 else if(!de_memcmp(&hdrblock
[257], (const void*)"ustar\0", 6)) {
165 pmd
->fmt
= TARFMT_POSIX
;
167 else if(!de_memcmp(&hdrblock
[508], (const void*)"tar\0", 4)) {
168 pmd
->fmt
= TARFMT_STAR
;
171 de_dbg(c
, "tar format: %s", get_fmt_name(pmd
->fmt
));
173 pmd
->name
= ucstring_create(c
);
174 dbuf_read_to_ucstring(c
->infile
, pos
, 100, pmd
->name
, DE_CONVFLAG_STOP_AT_NUL
,
177 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(pmd
->name
));
179 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &pmd
->mode
);
181 de_dbg(c
, "mode: octal(%06o)", (unsigned int)pmd
->mode
);
185 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &n
);
187 de_dbg(c
, "uid: %"I64_FMT
, n
);
190 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &n
);
192 de_dbg(c
, "gid: %"I64_FMT
, n
);
196 ret
= read_ascii_octal_number(c
->infile
, pos
, 12, &pmd
->filesize
);
197 de_sanitize_length(&pmd
->filesize
);
200 de_dbg(c
, "size: %"I64_FMT
, pmd
->filesize
);
201 pmd
->file_data_pos
= pos1
+ 512;
203 read_12char_timestamp(c
, pmd
, pos
, DE_TIMESTAMPIDX_MODIFY
, "mtime");
206 (void)read_ascii_octal_number(c
->infile
, pos
, 8, &pmd
->checksum
);
207 de_dbg(c
, "header checksum (reported): %"I64_FMT
, pmd
->checksum
);
208 de_dbg(c
, "header checksum (calculated): %"I64_FMT
, pmd
->checksum_calc
);
209 if(pmd
->checksum
!= pmd
->checksum_calc
) {
210 de_err(c
, "%s: Header checksum failed: reported=%"I64_FMT
", calculated=%"I64_FMT
,
211 ucstring_getpsz_d(pmd
->name
), pmd
->checksum
, pmd
->checksum_calc
);
215 // linkflag already set, above
216 de_dbg(c
, "linkflag/typeflag: 0x%02x ('%c')", (unsigned int)pmd
->linkflag
,
217 de_byte_to_printable_char(pmd
->linkflag
));
220 if(de_getbyte(pos
)!=0) {
221 pmd
->linkname
= dbuf_read_string(c
->infile
, pos
, 100, 100, DE_CONVFLAG_STOP_AT_NUL
,
223 de_dbg(c
, "linkname: \"%s\"", ucstring_getpsz_d(pmd
->linkname
->str
));
227 tmpstr
= ucstring_create(c
);
229 if(c
->debug_level
>=2) {
230 ucstring_empty(tmpstr
);
231 dbuf_read_to_ucstring(c
->infile
, pos
, 8, tmpstr
, 0,
232 DE_EXTENC_MAKE(DE_ENCODING_ASCII
, DE_ENCSUBTYPE_PRINTABLE
));
233 de_dbg2(c
, "magic/version: \"%s\"", ucstring_getpsz_d(tmpstr
));
238 if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_GNU
) {
239 ucstring_empty(tmpstr
);
240 dbuf_read_to_ucstring(c
->infile
, pos
, 32, tmpstr
, DE_CONVFLAG_STOP_AT_NUL
,
242 de_dbg(c
, "uname: \"%s\"", ucstring_getpsz(tmpstr
));
246 if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_GNU
) {
247 ucstring_empty(tmpstr
);
248 dbuf_read_to_ucstring(c
->infile
, pos
, 32, tmpstr
, DE_CONVFLAG_STOP_AT_NUL
,
250 de_dbg(c
, "gname: \"%s\"", ucstring_getpsz(tmpstr
));
254 pos
+= 8; // devmajor
255 pos
+= 8; // devminor
257 // TODO?: There are various dialect-specific fields after this point, more of
258 // which might be worth supporting.
260 // TODO?: Some (rare?) GNU files have atime/ctime fields here, but is it
261 // worth trying to detect them?
262 // And "star" files can have atime/ctime fields at offset 476.
264 if((pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_STAR
) && (de_getbyte(pos
)!=0)) {
265 // This field might only be 131 bytes, instead of 155. Let's hope that
266 // it's NUL terminated in that case.
267 pmd
->prefix
= ucstring_create(c
);
268 dbuf_read_to_ucstring(c
->infile
, pos
, 155, pmd
->prefix
,
269 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
270 de_dbg(c
, "prefix: \"%s\"", ucstring_getpsz_d(pmd
->prefix
));
277 ucstring_destroy(tmpstr
);
279 de_dbg_indent_restore(c
, saved_indent_level
);
283 static void destroy_pmd(deark
*c
, struct phys_member_data
*pmd
)
286 ucstring_destroy(pmd
->name
);
287 de_destroy_stringreaderdata(c
, pmd
->linkname
);
288 ucstring_destroy(pmd
->prefix
);
292 static void destroy_extattr_data(deark
*c
, struct extattr_data
*ea
)
295 ucstring_destroy(ea
->alt_name
);
296 ucstring_destroy(ea
->linkname
);
299 static void read_gnu_longpath(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
300 struct extattr_data
*ea
)
302 i64 pos
= pmd
->file_data_pos
;
303 i64 ext_name_len
= pmd
->filesize
;
305 de_dbg(c
, "LongPath data at %"I64_FMT
, pos
);
307 if(ext_name_len
<1) goto done
;
309 if(pmd
->linkflag
=='K') {
311 ea
->linkname
= ucstring_create(c
);
313 ucstring_empty(ea
->linkname
);
314 // TODO: It's a little inconsistent that we convert a GNU extended linkname
315 // to a ucstring, while we keep the original bytes of old-style linknames.
316 dbuf_read_to_ucstring_n(c
->infile
, pos
, ext_name_len
-1, 32767, ea
->linkname
, 0,
318 de_dbg(c
, "ext. linkname: \"%s\"", ucstring_getpsz_d(ea
->linkname
));
320 else { // 'L', presumably
322 ea
->alt_name
= ucstring_create(c
);
324 ucstring_empty(ea
->alt_name
);
325 dbuf_read_to_ucstring_n(c
->infile
, pos
, ext_name_len
-1, 32767, ea
->alt_name
, 0,
327 de_dbg(c
, "ext. filename: \"%s\"", ucstring_getpsz_d(ea
->alt_name
));
331 de_dbg_indent(c
, -1);
343 struct de_stringreaderdata
*name
;
344 struct de_stringreaderdata
*value
;
347 static void do_exthdr_timestamp(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
348 struct exthdr_item
*ehi
, struct extattr_data
*ea
, int tsidx
, const char *name
)
353 char timestamp_buf
[64];
355 if(ehi
->val_len
<1) return;
357 // TODO: There is probably more roundoff error here than there needs to be.
358 val_dbl
= de_strtod(ehi
->value
->sz
, NULL
);
360 val_int
= (i64
)val_dbl
;
361 val_frac
= val_dbl
- (double)val_int
;
364 val_int
= (i64
)val_dbl
;
368 de_unix_time_to_timestamp(val_int
, &ea
->alt_timestamps
[tsidx
], 0x1);
370 de_timestamp_set_subsec(&ea
->alt_timestamps
[tsidx
], val_frac
);
373 de_dbg_timestamp_to_string(c
, &ea
->alt_timestamps
[tsidx
],
374 timestamp_buf
, sizeof(timestamp_buf
), 0);
375 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
378 static int read_exthdr_item(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
379 struct extattr_data
*ea
,
380 i64 pos1
, i64 max_len
, i64
*bytes_consumed
)
382 struct exthdr_item
*ehi
= NULL
;
385 int saved_indent_level
;
389 STATE_LOOKING_FOR_LEN
, STATE_READING_LEN
,
390 STATE_LOOKING_FOR_NAME
, STATE_READING_NAME
, STATE_DONE
393 state
= STATE_LOOKING_FOR_LEN
;
394 de_dbg_indent_save(c
, &saved_indent_level
);
395 de_dbg(c
, "extended header field at %"I64_FMT
, pos1
);
398 ehi
= de_malloc(c
, sizeof(struct exthdr_item
));
399 ehi
->base_pos
= pos1
;
405 // Parse one header item. We will read the initial "length" field
406 // immediately, because it is needed for proper parsing.
407 // For the name and value fields, we only record their location and size.
408 for(offs
=0; ; offs
++) {
412 if(state
==STATE_DONE
) break;
414 if(offs
>=max_len
) goto done
;
416 // If we know the reported length of this item, enforce it.
417 if(state
>STATE_READING_LEN
&& offs
>=ehi
->fieldlen
) goto done
;
419 ch
= de_getbyte(pos1
+offs
);
420 is_whitespace
= (ch
==' ' || ch
==0x09);
422 if(state
==STATE_LOOKING_FOR_LEN
) {
423 if(is_whitespace
) continue;
424 ehi
->fieldlen_offs
= offs
;
425 state
= STATE_READING_LEN
;
427 else if(state
==STATE_READING_LEN
) {
429 ehi
->fieldlen_len
= offs
- ehi
->fieldlen_offs
;
430 ret
= dbuf_read_ascii_number(c
->infile
,
431 pos1
+ehi
->fieldlen_offs
, ehi
->fieldlen_len
, 10, &ehi
->fieldlen
);
435 de_dbg(c
, "length: %d", (int)ehi
->fieldlen
);
436 if(ehi
->fieldlen
> max_len
) {
439 state
= STATE_LOOKING_FOR_NAME
;
442 else if(state
==STATE_LOOKING_FOR_NAME
) {
443 if(is_whitespace
) continue;
444 ehi
->name_offs
= offs
;
445 state
= STATE_READING_NAME
;
447 else if(state
==STATE_READING_NAME
) {
449 ehi
->name_len
= offs
- ehi
->name_offs
;
450 ehi
->val_offs
= offs
+1;
451 ehi
->val_len
= ehi
->fieldlen
- offs
- 2;
452 if(ehi
->val_len
<0) goto done
;
458 // Sanity check: The item must end with a newline
459 if(de_getbyte(pos1
+ehi
->fieldlen
-1) != 0x0a) {
463 n
= de_min_int(ehi
->name_len
, 256);
464 ehi
->name
= dbuf_read_string(c
->infile
, pos1
+ehi
->name_offs
,
465 n
, n
, 0, DE_ENCODING_UTF8
);
466 de_dbg(c
, "keyword: \"%s\"", ucstring_getpsz_d(ehi
->name
->str
));
468 n
= de_min_int(ehi
->val_len
, 65536);
469 ehi
->value
= dbuf_read_string(c
->infile
, pos1
+ehi
->val_offs
,
470 n
, n
, 0, DE_ENCODING_UTF8
);de_dbg(c
, "value: \"%s\"", ucstring_getpsz_d(ehi
->value
->str
));
472 if(!de_strncmp(ehi
->name
->sz
, "GNU.sparse.", 11)) {
473 ea
->main_file_is_special
= 1;
476 if(!de_strcmp(ehi
->name
->sz
, "path") ||
477 !de_strcmp(ehi
->name
->sz
, "GNU.sparse.name"))
479 if(!ea
->alt_name
) ea
->alt_name
= ucstring_create(c
);
480 ucstring_empty(ea
->alt_name
);
481 ucstring_append_ucstring(ea
->alt_name
, ehi
->value
->str
);
483 else if(!de_strcmp(ehi
->name
->sz
, "linkpath")) {
484 if(!ea
->linkname
) ea
->linkname
= ucstring_create(c
);
485 ucstring_empty(ea
->linkname
);
486 ucstring_append_ucstring(ea
->linkname
, ehi
->value
->str
);
488 else if(!de_strcmp(ehi
->name
->sz
, "mtime")) {
489 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_MODIFY
, "mod time");
491 else if(!de_strcmp(ehi
->name
->sz
, "atime")) {
492 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_ACCESS
, "access time");
494 else if(!de_strcmp(ehi
->name
->sz
, "ctime")) {
495 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_ATTRCHANGE
, "attrib-change time");
497 else if(!de_strcmp(ehi
->name
->sz
, "LIBARCHIVE.creationtime")) {
498 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_CREATE
, "create time");
500 else if(!de_strcmp(ehi
->name
->sz
, "size")) {
501 if(ehi
->val_len
==0) {
502 ea
->has_alt_size
= 0;
505 ea
->has_alt_size
= 1;
506 ea
->alt_size
= de_strtoll(ehi
->value
->sz
, NULL
, 10);
509 // TODO: "hdrcharset"
511 *bytes_consumed
= ehi
->fieldlen
;
516 de_warn(c
, "Failed to parse extended header at %"I64_FMT
, pos1
);
519 de_destroy_stringreaderdata(c
, ehi
->name
);
520 de_destroy_stringreaderdata(c
, ehi
->value
);
523 de_dbg_indent_restore(c
, saved_indent_level
);
527 static void read_exthdr(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
528 struct extattr_data
*ea
)
530 int saved_indent_level
;
531 i64 pos
= pmd
->file_data_pos
;
533 de_dbg_indent_save(c
, &saved_indent_level
);
534 de_dbg(c
, "POSIX extended header data at %"I64_FMT
, pmd
->file_data_pos
);
537 if(c
->debug_level
>=2) {
539 tmps
= ucstring_create(c
);
540 dbuf_read_to_ucstring_n(c
->infile
, pmd
->file_data_pos
, pmd
->filesize
,
541 32768, tmps
, 0, DE_ENCODING_UTF8
);
542 de_dbg(c
, "data: \"%s\"", ucstring_getpsz_d(tmps
));
543 ucstring_destroy(tmps
);
546 while(pos
< pmd
->file_data_pos
+ pmd
->filesize
) {
547 i64 bytes_consumed
= 0;
549 if(!read_exthdr_item(c
, d
, pmd
, ea
, pos
,
550 pmd
->file_data_pos
+pmd
->filesize
-pos
, &bytes_consumed
))
554 if(bytes_consumed
<1) break;
555 pos
+= bytes_consumed
;
558 de_dbg_indent_restore(c
, saved_indent_level
);
561 static int read_member(deark
*c
, lctx
*d
, i64 pos1
, i64
*bytes_consumed_member
)
563 int saved_indent_level
;
565 struct member_data
*md
= NULL
;
566 struct phys_member_data
*pmd
= NULL
;
567 struct extattr_data
*ea
= NULL
;
569 unsigned int snflags
;
573 de_dbg_indent_save(c
, &saved_indent_level
);
575 de_dbg(c
, "logical archive member at %"I64_FMT
, pos1
);
578 md
= de_malloc(c
, sizeof(struct member_data
));
579 md
->fi
= de_finfo_create(c
);
580 md
->fi
->detect_root_dot_dir
= 1;
581 md
->filename
= ucstring_create(c
);
583 ea
= de_malloc(c
, sizeof(struct extattr_data
));
586 int is_supplemental_item
= 0;
588 if(pos
>= c
->infile
->len
) goto done
;
592 pmd
= de_malloc(c
, sizeof(struct phys_member_data
));
594 if(!read_phys_member_header(c
, d
, pmd
, pos
)) {
599 if(pmd
->linkflag
=='L' || pmd
->linkflag
=='K') {
600 is_supplemental_item
= 1;
601 read_gnu_longpath(c
, d
, pmd
, ea
);
603 else if(pmd
->linkflag
== 'x' || pmd
->linkflag
== 'X') {
604 is_supplemental_item
= 1;
605 read_exthdr(c
, d
, pmd
, ea
);
607 else if(pmd
->linkflag
== 'g') {
608 read_exthdr(c
, d
, pmd
, d
->global_ea
);
610 // TODO: linkflag 'K'
612 if(!is_supplemental_item
) {
616 // Prepare to read the next physical member
617 pos
+= de_pad_to_n(pmd
->filesize
, 512);
621 // At this point, pmd is the main physical member for this logical file.
622 // Any other 'pmd's have been discarded, other than extended attributes
623 // that were recorded in ea.
625 if(ea
->has_alt_size
) {
626 pmd
->filesize
= ea
->alt_size
;
628 pos
+= de_pad_to_n(pmd
->filesize
, 512);
632 if((pmd
->checksum
!= pmd
->checksum_calc
) && c
->extract_level
<2) {
633 // TODO: This little more than a hack, so that we don't extract so
634 // much garbage if the file is corrupt, or we go off the rails.
635 // There are more robust ways to deal with such issues.
636 de_dbg(c
, "[not extracting, due to bad checksum]");
640 // Decide on a filename
641 if(ucstring_isnonempty(ea
->alt_name
)) {
642 ucstring_append_ucstring(md
->filename
, ea
->alt_name
);
645 if(ucstring_isnonempty(pmd
->prefix
)) {
646 ucstring_append_ucstring(md
->filename
, pmd
->prefix
);
647 ucstring_append_char(md
->filename
, '/');
649 if(ucstring_isnonempty(pmd
->name
)) {
650 ucstring_append_ucstring(md
->filename
, pmd
->name
);
654 // Try to figure out what kind of "file" this is.
656 if(pmd
->linkflag
=='2') {
659 else if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_STAR
) {
660 if(pmd
->linkflag
=='0' || pmd
->linkflag
==0) {
661 md
->is_regular_file
= 1;
663 else if(pmd
->linkflag
=='5') {
667 else if(pmd
->fmt
==TARFMT_GNU
) {
668 if(pmd
->linkflag
=='0' || pmd
->linkflag
=='7' || pmd
->linkflag
==0) {
669 md
->is_regular_file
= 1;
671 else if(pmd
->linkflag
=='5') {
676 if(pmd
->name
->len
>=1 && pmd
->name
->str
[pmd
->name
->len
-1]=='/') {
679 else if(pmd
->linkflag
==0 || pmd
->linkflag
=='0') {
680 md
->is_regular_file
= 1;
684 if(ea
->main_file_is_special
) {
685 md
->is_regular_file
= 0;
688 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, pmd
->file_data_pos
,
691 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
692 if(ea
->alt_timestamps
[tsidx
].is_valid
) {
693 md
->fi
->timestamp
[tsidx
] = ea
->alt_timestamps
[tsidx
];
695 else if(pmd
->timestamps
[tsidx
].is_valid
) {
696 md
->fi
->timestamp
[tsidx
] = pmd
->timestamps
[tsidx
];
700 if(!md
->is_regular_file
&& !md
->is_dir
) {
701 de_warn(c
, "\"%s\" is a %s. It will not be extracted as such.",
702 ucstring_getpsz(md
->filename
),
703 md
->is_symlink
?"symlink":"special file");
706 snflags
= DE_SNFLAG_FULLPATH
;
708 md
->fi
->is_directory
= 1;
709 snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
711 else if(md
->is_regular_file
) {
712 if((pmd
->mode
& 0111)!=0) {
713 md
->fi
->mode_flags
|= DE_MODEFLAG_EXE
;
716 md
->fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
719 de_finfo_set_name_from_ucstring(c
, md
->fi
, md
->filename
, snflags
);
720 md
->fi
->original_filename_flag
= 1;
722 if(pmd
->file_data_pos
+ pmd
->filesize
> c
->infile
->len
) goto done
;
724 outf
= dbuf_create_output_file(c
, NULL
, md
->fi
, 0);
726 // If a symlink has no data, write the 'linkname' field instead.
727 if(md
->is_symlink
&& pmd
->filesize
==0) {
728 if(ucstring_isnonempty(ea
->linkname
)) {
729 ucstring_write_as_utf8(c
, ea
->linkname
, outf
, 0);
732 else if(pmd
->linkname
) {
733 dbuf_write(outf
, (const u8
*)pmd
->linkname
->sz
,
734 (i64
)pmd
->linkname
->sz_strlen
);
739 dbuf_copy(c
->infile
, pmd
->file_data_pos
, pmd
->filesize
, outf
);
743 *bytes_consumed_member
= pos
- pos1
;
745 destroy_extattr_data(c
, ea
);
747 ucstring_destroy(md
->filename
);
748 de_finfo_destroy(c
, md
->fi
);
751 de_dbg_indent_restore(c
, saved_indent_level
);
755 static void de_run_tar(deark
*c
, de_module_params
*mparams
)
762 d
= de_malloc(c
, sizeof(lctx
));
764 d
->global_ea
= de_malloc(c
, sizeof(struct extattr_data
));
766 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_UTF8
);
770 if(d
->found_trailer
) break;
771 if(pos
>= c
->infile
->len
) break;
772 if(pos
+512 > c
->infile
->len
) {
773 de_warn(c
, "Ignoring %d extra bytes at end of file", (int)(c
->infile
->len
- pos
));
777 ret
= read_member(c
, d
, pos
, &item_len
);
778 if(!ret
|| item_len
<1) break;
783 de_crcobj_destroy(d
->crco_cksum
);
784 destroy_extattr_data(c
, d
->global_ea
);
789 static int de_identify_tar(deark
*c
)
796 has_ext
= de_input_file_has_ext(c
, "tar");;
797 if(!dbuf_memcmp(c
->infile
, 257, "ustar", 5)) {
798 return has_ext
? 100 : 90;
802 if(!dbuf_memcmp(c
->infile
, 508, "tar\0", 4)) {
807 // Try to detect tar formats that don't have the "ustar" identifier.
808 if(!has_ext
) return 0;
810 // The 'checksum' field has a fairly distinctive format.
811 // "This field should be stored as six octal digits followed by a null and
812 // a space character."
814 de_read(buf
, 148, 8);
817 if(buf
[k
]>='0' && buf
[k
]<='7') {
820 else if(buf
[k
]!=' ') {
824 if(digit_count
<1) return 0;
825 if(buf
[6]==0x00 && buf
[7]==' ') return 60;
826 if(buf
[6]==' ' && buf
[7]==0x00) return 15;
830 void de_module_tar(deark
*c
, struct deark_module_info
*mi
)
833 mi
->desc
= "tar archive";
834 mi
->run_fn
= de_run_tar
;
835 mi
->identify_fn
= de_identify_tar
;