1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_tar
);
11 // Represents a single physical header block, and the associated data that
13 // Sometimes, a logical member file is composed of multiple physical members.
14 struct phys_member_data
{
15 #define TARFMT_UNKNOWN 0
16 #define TARFMT_POSIX 1
27 struct de_stringreaderdata
*linkname
;
29 struct de_timestamp timestamps
[DE_TIMESTAMPIDX_COUNT
];
32 // A struct to collect various extended attributes for a logical member
33 // (or for global attributes).
35 de_ucstring
*alt_name
;
36 de_ucstring
*linkname
;
37 u8 main_file_is_special
;
40 struct de_timestamp alt_timestamps
[DE_TIMESTAMPIDX_COUNT
];
44 de_ucstring
*filename
;
46 int is_dir
, is_regular_file
, is_symlink
;
49 typedef struct localctx_struct
{
52 struct extattr_data
*global_ea
;
55 static const char* get_fmt_name(int fmt
)
57 const char *n
= "unknown or old-style";
59 case TARFMT_POSIX
: n
= "POSIX"; break;
60 case TARFMT_GNU
: n
= "GNU"; break;
61 case TARFMT_STAR
: n
= "star"; break;
66 static int read_ascii_octal_number(dbuf
*f
, i64 pos
, i64 fieldsize
,
70 b1
= dbuf_getbyte(f
, pos
);
73 // The usual ASCII-octal format
74 return dbuf_read_ascii_number(f
, pos
, fieldsize
, 8, value
);
77 // "base-256" or some other special format
78 if(b1
==0x80) { // positive base-256 number
79 *value
= dbuf_getint_ext(f
, pos
+1, (unsigned int)(fieldsize
-1), 0, 0);
82 else if(b1
==0xff) { // negative base-256 number
83 *value
= dbuf_getint_ext(f
, pos
+1, (unsigned int)(fieldsize
-1), 0, 1);
91 static void read_12char_timestamp(deark
*c
, struct phys_member_data
*pmd
, i64 pos
,
92 int tsidx
, const char *name
)
96 char timestamp_buf
[64];
98 ret
= read_ascii_octal_number(c
->infile
, pos
, 12, ×tamp_unix
);
100 de_unix_time_to_timestamp(timestamp_unix
, &pmd
->timestamps
[tsidx
], 0x1);
101 de_dbg_timestamp_to_string(c
, &pmd
->timestamps
[tsidx
],
102 timestamp_buf
, sizeof(timestamp_buf
), 0);
103 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, timestamp_unix
, timestamp_buf
);
107 // Sets md->checksum_calc
108 static void calc_checksum(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
113 pmd
->checksum_calc
= 0;
114 for(i
=0; i
<512; i
++) {
116 pmd
->checksum_calc
+= 32; // (The checksum field itself)
118 pmd
->checksum_calc
+= (i64
)hdrblock
[i
];
122 // Returns 1 if it was parsed successfully, and is not a trailer.
123 static int read_phys_member_header(deark
*c
, lctx
*d
,
124 struct phys_member_data
*pmd
, i64 pos1
)
129 de_ucstring
*tmpstr
= NULL
;
131 int saved_indent_level
;
134 de_dbg_indent_save(c
, &saved_indent_level
);
136 de_dbg(c
, "physical archive member header at %"I64_FMT
, pos1
);
139 // Look ahead to try to figure out some things about the format of this member.
141 de_read(hdrblock
, pos1
, 512);
142 calc_checksum(c
, d
, pmd
, hdrblock
);
144 if(pmd
->checksum_calc
==8*32 && de_is_all_zeroes(&hdrblock
[148], 8)) {
145 // "The end of the archive is indicated by two records consisting
146 // entirely of zero bytes."
147 // Most tar programs seem to stop at the first "zero block", so that's
149 de_dbg(c
, "[trailer record]");
150 d
->found_trailer
= 1;
154 pmd
->linkflag
= hdrblock
[156];
156 if(!de_memcmp(&hdrblock
[257], (const void*)"ustar \0", 8)) {
157 pmd
->fmt
= TARFMT_GNU
;
159 else if(!de_memcmp(&hdrblock
[257], (const void*)"ustar\0", 6)) {
160 pmd
->fmt
= TARFMT_POSIX
;
162 else if(!de_memcmp(&hdrblock
[508], (const void*)"tar\0", 4)) {
163 pmd
->fmt
= TARFMT_STAR
;
166 de_dbg(c
, "tar format: %s", get_fmt_name(pmd
->fmt
));
168 pmd
->name
= ucstring_create(c
);
169 dbuf_read_to_ucstring(c
->infile
, pos
, 100, pmd
->name
, DE_CONVFLAG_STOP_AT_NUL
,
172 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(pmd
->name
));
174 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &pmd
->mode
);
176 de_dbg(c
, "mode: octal(%06o)", (unsigned int)pmd
->mode
);
180 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &n
);
182 de_dbg(c
, "uid: %"I64_FMT
, n
);
185 ret
= read_ascii_octal_number(c
->infile
, pos
, 8, &n
);
187 de_dbg(c
, "gid: %"I64_FMT
, n
);
191 ret
= read_ascii_octal_number(c
->infile
, pos
, 12, &pmd
->filesize
);
194 de_dbg(c
, "size: %"I64_FMT
, pmd
->filesize
);
195 pmd
->file_data_pos
= pos1
+ 512;
197 read_12char_timestamp(c
, pmd
, pos
, DE_TIMESTAMPIDX_MODIFY
, "mtime");
200 (void)read_ascii_octal_number(c
->infile
, pos
, 8, &pmd
->checksum
);
201 de_dbg(c
, "header checksum (reported): %"I64_FMT
, pmd
->checksum
);
202 de_dbg(c
, "header checksum (calculated): %"I64_FMT
, pmd
->checksum_calc
);
203 if(pmd
->checksum
!= pmd
->checksum_calc
) {
204 de_err(c
, "%s: Header checksum failed: reported=%"I64_FMT
", calculated=%"I64_FMT
,
205 ucstring_getpsz_d(pmd
->name
), pmd
->checksum
, pmd
->checksum_calc
);
209 // linkflag already set, above
210 de_dbg(c
, "linkflag/typeflag: 0x%02x ('%c')", (unsigned int)pmd
->linkflag
,
211 de_byte_to_printable_char(pmd
->linkflag
));
214 if(de_getbyte(pos
)!=0) {
215 pmd
->linkname
= dbuf_read_string(c
->infile
, pos
, 100, 100, DE_CONVFLAG_STOP_AT_NUL
,
217 de_dbg(c
, "linkname: \"%s\"", ucstring_getpsz_d(pmd
->linkname
->str
));
221 tmpstr
= ucstring_create(c
);
223 if(c
->debug_level
>=2) {
224 ucstring_empty(tmpstr
);
225 dbuf_read_to_ucstring(c
->infile
, pos
, 8, tmpstr
, 0,
226 DE_EXTENC_MAKE(DE_ENCODING_ASCII
, DE_ENCSUBTYPE_PRINTABLE
));
227 de_dbg2(c
, "magic/version: \"%s\"", ucstring_getpsz_d(tmpstr
));
232 if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_GNU
) {
233 ucstring_empty(tmpstr
);
234 dbuf_read_to_ucstring(c
->infile
, pos
, 32, tmpstr
, DE_CONVFLAG_STOP_AT_NUL
,
236 de_dbg(c
, "uname: \"%s\"", ucstring_getpsz(tmpstr
));
240 if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_GNU
) {
241 ucstring_empty(tmpstr
);
242 dbuf_read_to_ucstring(c
->infile
, pos
, 32, tmpstr
, DE_CONVFLAG_STOP_AT_NUL
,
244 de_dbg(c
, "gname: \"%s\"", ucstring_getpsz(tmpstr
));
248 pos
+= 8; // devmajor
249 pos
+= 8; // devminor
251 // TODO?: There are various dialect-specific fields after this point, more of
252 // which might be worth supporting.
254 // TODO?: Some (rare?) GNU files have atime/ctime fields here, but is it
255 // worth trying to detect them?
256 // And "star" files can have atime/ctime fields at offset 476.
258 if((pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_STAR
) && (de_getbyte(pos
)!=0)) {
259 // This field might only be 131 bytes, instead of 155. Let's hope that
260 // it's NUL terminated in that case.
261 pmd
->prefix
= ucstring_create(c
);
262 dbuf_read_to_ucstring(c
->infile
, pos
, 155, pmd
->prefix
,
263 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
264 de_dbg(c
, "prefix: \"%s\"", ucstring_getpsz_d(pmd
->prefix
));
271 ucstring_destroy(tmpstr
);
273 de_dbg_indent_restore(c
, saved_indent_level
);
277 static void destroy_pmd(deark
*c
, struct phys_member_data
*pmd
)
280 ucstring_destroy(pmd
->name
);
281 de_destroy_stringreaderdata(c
, pmd
->linkname
);
282 ucstring_destroy(pmd
->prefix
);
286 static void destroy_extattr_data(deark
*c
, struct extattr_data
*ea
)
289 ucstring_destroy(ea
->alt_name
);
290 ucstring_destroy(ea
->linkname
);
293 static void read_gnu_longpath(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
294 struct extattr_data
*ea
)
296 i64 pos
= pmd
->file_data_pos
;
297 i64 ext_name_len
= pmd
->filesize
;
299 de_dbg(c
, "LongPath data at %"I64_FMT
, pos
);
301 if(ext_name_len
<1) goto done
;
303 if(pmd
->linkflag
=='K') {
305 ea
->linkname
= ucstring_create(c
);
307 ucstring_empty(ea
->linkname
);
308 // TODO: It's a little inconsistent that we convert a GNU extended linkname
309 // to a ucstring, while we keep the original bytes of old-style linknames.
310 dbuf_read_to_ucstring_n(c
->infile
, pos
, ext_name_len
-1, 32767, ea
->linkname
, 0,
312 de_dbg(c
, "ext. linkname: \"%s\"", ucstring_getpsz_d(ea
->linkname
));
314 else { // 'L', presumably
316 ea
->alt_name
= ucstring_create(c
);
318 ucstring_empty(ea
->alt_name
);
319 dbuf_read_to_ucstring_n(c
->infile
, pos
, ext_name_len
-1, 32767, ea
->alt_name
, 0,
321 de_dbg(c
, "ext. filename: \"%s\"", ucstring_getpsz_d(ea
->alt_name
));
325 de_dbg_indent(c
, -1);
337 struct de_stringreaderdata
*name
;
338 struct de_stringreaderdata
*value
;
341 static void do_exthdr_timestamp(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
342 struct exthdr_item
*ehi
, struct extattr_data
*ea
, int tsidx
, const char *name
)
347 char timestamp_buf
[64];
349 if(ehi
->val_len
<1) return;
351 // TODO: There is probably more roundoff error here than there needs to be.
352 val_dbl
= de_strtod(ehi
->value
->sz
, NULL
);
354 val_int
= (i64
)val_dbl
;
355 val_frac
= val_dbl
- (double)val_int
;
358 val_int
= (i64
)val_dbl
;
362 de_unix_time_to_timestamp(val_int
, &ea
->alt_timestamps
[tsidx
], 0x1);
364 de_timestamp_set_subsec(&ea
->alt_timestamps
[tsidx
], val_frac
);
367 de_dbg_timestamp_to_string(c
, &ea
->alt_timestamps
[tsidx
],
368 timestamp_buf
, sizeof(timestamp_buf
), 0);
369 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
372 static int read_exthdr_item(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
373 struct extattr_data
*ea
,
374 i64 pos1
, i64 max_len
, i64
*bytes_consumed
)
376 struct exthdr_item
*ehi
= NULL
;
379 int saved_indent_level
;
383 STATE_LOOKING_FOR_LEN
, STATE_READING_LEN
,
384 STATE_LOOKING_FOR_NAME
, STATE_READING_NAME
, STATE_DONE
387 state
= STATE_LOOKING_FOR_LEN
;
388 de_dbg_indent_save(c
, &saved_indent_level
);
389 de_dbg(c
, "extended header field at %"I64_FMT
, pos1
);
392 ehi
= de_malloc(c
, sizeof(struct exthdr_item
));
393 ehi
->base_pos
= pos1
;
399 // Parse one header item. We will read the initial "length" field
400 // immediately, because it is needed for proper parsing.
401 // For the name and value fields, we only record their location and size.
402 for(offs
=0; ; offs
++) {
406 if(state
==STATE_DONE
) break;
408 if(offs
>=max_len
) goto done
;
410 // If we know the reported length of this item, enforce it.
411 if(state
>STATE_READING_LEN
&& offs
>=ehi
->fieldlen
) goto done
;
413 ch
= de_getbyte(pos1
+offs
);
414 is_whitespace
= (ch
==' ' || ch
==0x09);
416 if(state
==STATE_LOOKING_FOR_LEN
) {
417 if(is_whitespace
) continue;
418 ehi
->fieldlen_offs
= offs
;
419 state
= STATE_READING_LEN
;
421 else if(state
==STATE_READING_LEN
) {
423 ehi
->fieldlen_len
= offs
- ehi
->fieldlen_offs
;
424 ret
= dbuf_read_ascii_number(c
->infile
,
425 pos1
+ehi
->fieldlen_offs
, ehi
->fieldlen_len
, 10, &ehi
->fieldlen
);
429 de_dbg(c
, "length: %d", (int)ehi
->fieldlen
);
430 if(ehi
->fieldlen
> max_len
) {
433 state
= STATE_LOOKING_FOR_NAME
;
436 else if(state
==STATE_LOOKING_FOR_NAME
) {
437 if(is_whitespace
) continue;
438 ehi
->name_offs
= offs
;
439 state
= STATE_READING_NAME
;
441 else if(state
==STATE_READING_NAME
) {
443 ehi
->name_len
= offs
- ehi
->name_offs
;
444 ehi
->val_offs
= offs
+1;
445 ehi
->val_len
= ehi
->fieldlen
- offs
- 2;
446 if(ehi
->val_len
<0) goto done
;
452 // Sanity check: The item must end with a newline
453 if(de_getbyte(pos1
+ehi
->fieldlen
-1) != 0x0a) {
457 n
= de_min_int(ehi
->name_len
, 256);
458 ehi
->name
= dbuf_read_string(c
->infile
, pos1
+ehi
->name_offs
,
459 n
, n
, 0, DE_ENCODING_UTF8
);
460 de_dbg(c
, "keyword: \"%s\"", ucstring_getpsz_d(ehi
->name
->str
));
462 n
= de_min_int(ehi
->val_len
, 65536);
463 ehi
->value
= dbuf_read_string(c
->infile
, pos1
+ehi
->val_offs
,
464 n
, n
, 0, DE_ENCODING_UTF8
);de_dbg(c
, "value: \"%s\"", ucstring_getpsz_d(ehi
->value
->str
));
466 if(!de_strncmp(ehi
->name
->sz
, "GNU.sparse.", 11)) {
467 ea
->main_file_is_special
= 1;
470 if(!de_strcmp(ehi
->name
->sz
, "path") ||
471 !de_strcmp(ehi
->name
->sz
, "GNU.sparse.name"))
473 if(!ea
->alt_name
) ea
->alt_name
= ucstring_create(c
);
474 ucstring_empty(ea
->alt_name
);
475 ucstring_append_ucstring(ea
->alt_name
, ehi
->value
->str
);
477 else if(!de_strcmp(ehi
->name
->sz
, "linkpath")) {
478 if(!ea
->linkname
) ea
->linkname
= ucstring_create(c
);
479 ucstring_empty(ea
->linkname
);
480 ucstring_append_ucstring(ea
->linkname
, ehi
->value
->str
);
482 else if(!de_strcmp(ehi
->name
->sz
, "mtime")) {
483 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_MODIFY
, "mod time");
485 else if(!de_strcmp(ehi
->name
->sz
, "atime")) {
486 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_ACCESS
, "access time");
488 else if(!de_strcmp(ehi
->name
->sz
, "ctime")) {
489 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_ATTRCHANGE
, "attrib-change time");
491 else if(!de_strcmp(ehi
->name
->sz
, "LIBARCHIVE.creationtime")) {
492 do_exthdr_timestamp(c
, d
, pmd
, ehi
, ea
, DE_TIMESTAMPIDX_CREATE
, "create time");
494 else if(!de_strcmp(ehi
->name
->sz
, "size")) {
495 if(ehi
->val_len
==0) {
496 ea
->has_alt_size
= 0;
499 ea
->has_alt_size
= 1;
500 ea
->alt_size
= de_strtoll(ehi
->value
->sz
, NULL
, 10);
503 // TODO: "hdrcharset"
505 *bytes_consumed
= ehi
->fieldlen
;
510 de_warn(c
, "Failed to parse extended header at %"I64_FMT
, pos1
);
513 de_destroy_stringreaderdata(c
, ehi
->name
);
514 de_destroy_stringreaderdata(c
, ehi
->value
);
517 de_dbg_indent_restore(c
, saved_indent_level
);
521 static void read_exthdr(deark
*c
, lctx
*d
, struct phys_member_data
*pmd
,
522 struct extattr_data
*ea
)
524 int saved_indent_level
;
525 i64 pos
= pmd
->file_data_pos
;
527 de_dbg_indent_save(c
, &saved_indent_level
);
528 de_dbg(c
, "POSIX extended header data at %"I64_FMT
, pmd
->file_data_pos
);
531 if(c
->debug_level
>=2) {
533 tmps
= ucstring_create(c
);
534 dbuf_read_to_ucstring_n(c
->infile
, pmd
->file_data_pos
, pmd
->filesize
,
535 32768, tmps
, 0, DE_ENCODING_UTF8
);
536 de_dbg(c
, "data: \"%s\"", ucstring_getpsz_d(tmps
));
537 ucstring_destroy(tmps
);
540 while(pos
< pmd
->file_data_pos
+ pmd
->filesize
) {
541 i64 bytes_consumed
= 0;
543 if(!read_exthdr_item(c
, d
, pmd
, ea
, pos
,
544 pmd
->file_data_pos
+pmd
->filesize
-pos
, &bytes_consumed
))
548 if(bytes_consumed
<1) break;
549 pos
+= bytes_consumed
;
552 de_dbg_indent_restore(c
, saved_indent_level
);
555 static int read_member(deark
*c
, lctx
*d
, i64 pos1
, i64
*bytes_consumed_member
)
557 int saved_indent_level
;
559 struct member_data
*md
= NULL
;
560 struct phys_member_data
*pmd
= NULL
;
561 struct extattr_data
*ea
= NULL
;
563 unsigned int snflags
;
567 de_dbg_indent_save(c
, &saved_indent_level
);
569 de_dbg(c
, "logical archive member at %"I64_FMT
, pos1
);
572 md
= de_malloc(c
, sizeof(struct member_data
));
573 md
->fi
= de_finfo_create(c
);
574 md
->fi
->detect_root_dot_dir
= 1;
575 md
->filename
= ucstring_create(c
);
577 ea
= de_malloc(c
, sizeof(struct extattr_data
));
580 int is_supplemental_item
= 0;
582 if(pos
>= c
->infile
->len
) goto done
;
586 pmd
= de_malloc(c
, sizeof(struct phys_member_data
));
588 if(!read_phys_member_header(c
, d
, pmd
, pos
)) {
593 if(pmd
->linkflag
=='L' || pmd
->linkflag
=='K') {
594 is_supplemental_item
= 1;
595 read_gnu_longpath(c
, d
, pmd
, ea
);
597 else if(pmd
->linkflag
== 'x' || pmd
->linkflag
== 'X') {
598 is_supplemental_item
= 1;
599 read_exthdr(c
, d
, pmd
, ea
);
601 else if(pmd
->linkflag
== 'g') {
602 read_exthdr(c
, d
, pmd
, d
->global_ea
);
604 // TODO: linkflag 'K'
606 if(!is_supplemental_item
) {
610 // Prepare to read the next physical member
611 pos
+= de_pad_to_n(pmd
->filesize
, 512);
615 // At this point, pmd is the main physical member for this logical file.
616 // Any other 'pmd's have been discarded, other than extended attributes
617 // that were recorded in ea.
619 if(ea
->has_alt_size
) {
620 pmd
->filesize
= ea
->alt_size
;
622 pos
+= de_pad_to_n(pmd
->filesize
, 512);
626 if((pmd
->checksum
!= pmd
->checksum_calc
) && c
->extract_level
<2) {
627 // TODO: This little more than a hack, so that we don't extract so
628 // much garbage if the file is corrupt, or we go off the rails.
629 // There are more robust ways to deal with such issues.
630 de_dbg(c
, "[not extracting, due to bad checksum]");
634 // Decide on a filename
635 if(ucstring_isnonempty(ea
->alt_name
)) {
636 ucstring_append_ucstring(md
->filename
, ea
->alt_name
);
639 if(ucstring_isnonempty(pmd
->prefix
)) {
640 ucstring_append_ucstring(md
->filename
, pmd
->prefix
);
641 ucstring_append_char(md
->filename
, '/');
643 if(ucstring_isnonempty(pmd
->name
)) {
644 ucstring_append_ucstring(md
->filename
, pmd
->name
);
648 // Try to figure out what kind of "file" this is.
650 if(pmd
->linkflag
=='2') {
653 else if(pmd
->fmt
==TARFMT_POSIX
|| pmd
->fmt
==TARFMT_STAR
) {
654 if(pmd
->linkflag
=='0' || pmd
->linkflag
==0) {
655 md
->is_regular_file
= 1;
657 else if(pmd
->linkflag
=='5') {
661 else if(pmd
->fmt
==TARFMT_GNU
) {
662 if(pmd
->linkflag
=='0' || pmd
->linkflag
=='7' || pmd
->linkflag
==0) {
663 md
->is_regular_file
= 1;
665 else if(pmd
->linkflag
=='5') {
670 if(pmd
->name
->len
>=1 && pmd
->name
->str
[pmd
->name
->len
-1]=='/') {
673 else if(pmd
->linkflag
==0 || pmd
->linkflag
=='0') {
674 md
->is_regular_file
= 1;
678 if(ea
->main_file_is_special
) {
679 md
->is_regular_file
= 0;
682 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, pmd
->file_data_pos
,
685 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
686 if(ea
->alt_timestamps
[tsidx
].is_valid
) {
687 md
->fi
->timestamp
[tsidx
] = ea
->alt_timestamps
[tsidx
];
689 else if(pmd
->timestamps
[tsidx
].is_valid
) {
690 md
->fi
->timestamp
[tsidx
] = pmd
->timestamps
[tsidx
];
694 if(!md
->is_regular_file
&& !md
->is_dir
) {
695 de_warn(c
, "\"%s\" is a %s. It will not be extracted as such.",
696 ucstring_getpsz(md
->filename
),
697 md
->is_symlink
?"symlink":"special file");
700 snflags
= DE_SNFLAG_FULLPATH
;
702 md
->fi
->is_directory
= 1;
703 snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
705 else if(md
->is_regular_file
) {
706 if((pmd
->mode
& 0111)!=0) {
707 md
->fi
->mode_flags
|= DE_MODEFLAG_EXE
;
710 md
->fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
713 de_finfo_set_name_from_ucstring(c
, md
->fi
, md
->filename
, snflags
);
714 md
->fi
->original_filename_flag
= 1;
716 if(pmd
->file_data_pos
+ pmd
->filesize
> c
->infile
->len
) goto done
;
718 outf
= dbuf_create_output_file(c
, NULL
, md
->fi
, 0);
720 // If a symlink has no data, write the 'linkname' field instead.
721 if(md
->is_symlink
&& pmd
->filesize
==0) {
722 if(ucstring_isnonempty(ea
->linkname
)) {
723 ucstring_write_as_utf8(c
, ea
->linkname
, outf
, 0);
726 else if(pmd
->linkname
) {
727 dbuf_write(outf
, (const u8
*)pmd
->linkname
->sz
,
728 (i64
)pmd
->linkname
->sz_strlen
);
733 dbuf_copy(c
->infile
, pmd
->file_data_pos
, pmd
->filesize
, outf
);
737 *bytes_consumed_member
= pos
- pos1
;
739 destroy_extattr_data(c
, ea
);
741 ucstring_destroy(md
->filename
);
742 de_finfo_destroy(c
, md
->fi
);
745 de_dbg_indent_restore(c
, saved_indent_level
);
749 static void de_run_tar(deark
*c
, de_module_params
*mparams
)
756 d
= de_malloc(c
, sizeof(lctx
));
758 d
->global_ea
= de_malloc(c
, sizeof(struct extattr_data
));
760 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_UTF8
);
764 if(d
->found_trailer
) break;
765 if(pos
>= c
->infile
->len
) break;
766 if(pos
+512 > c
->infile
->len
) {
767 de_warn(c
, "Ignoring %d extra bytes at end of file", (int)(c
->infile
->len
- pos
));
771 ret
= read_member(c
, d
, pos
, &item_len
);
772 if(!ret
|| item_len
<1) break;
777 destroy_extattr_data(c
, d
->global_ea
);
782 static int de_identify_tar(deark
*c
)
789 has_ext
= de_input_file_has_ext(c
, "tar");;
790 if(!dbuf_memcmp(c
->infile
, 257, "ustar", 5)) {
791 return has_ext
? 100 : 90;
795 if(!dbuf_memcmp(c
->infile
, 508, "tar\0", 4)) {
800 // Try to detect tar formats that don't have the "ustar" identifier.
801 if(!has_ext
) return 0;
803 // The 'checksum' field has a fairly distinctive format.
804 // "This field should be stored as six octal digits followed by a null and
805 // a space character."
807 de_read(buf
, 148, 8);
810 if(buf
[k
]>='0' && buf
[k
]<='7') {
813 else if(buf
[k
]!=' ') {
817 if(digit_count
<1) return 0;
818 if(buf
[6]==0x00 && buf
[7]==' ') return 60;
819 if(buf
[6]==' ' && buf
[7]==0x00) return 15;
823 void de_module_tar(deark
*c
, struct deark_module_info
*mi
)
826 mi
->desc
= "tar archive";
827 mi
->run_fn
= de_run_tar
;
828 mi
->identify_fn
= de_identify_tar
;