lzah: Improved some debug messages
[deark.git] / modules / stuffit.c
blob8c93b904733dddf61208dbf34c03bf74f004a553
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // StuffIt
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_stuffit);
11 #define MAX_NESTING_LEVEL 32
13 struct cmpr_meth_info;
15 struct fork_data {
16 u8 is_rsrc_fork;
17 u8 cmpr_meth_etc;
18 #define CMPR_NONE 0
19 #define CMPR_RLE 1
20 #define CMPR_LZW 2
21 #define CMPR_HUFFMAN 3
22 #define CMPR_LZAH 5
23 #define CMPR_FIXEDHUFF 6
24 #define CMPR_MW 8
25 #define CMPR_LZHUFF 13
26 u8 is_a_file;
27 u8 cmpr_meth;
28 u8 is_encrypted;
29 u32 crc_reported;
30 i64 unc_len;
31 i64 cmpr_pos;
32 i64 cmpr_len;
33 const char *forkname;
34 const struct cmpr_meth_info *cmi;
35 u8 decompress_succeeded;
38 struct member_data {
39 u8 is_folder;
40 unsigned int finder_flags;
41 struct de_advfile *advf;
42 struct de_stringreaderdata *fname;
43 de_ucstring *full_fname;
44 struct de_fourcc filetype;
45 struct de_fourcc creator;
46 struct de_timestamp mod_time;
47 struct de_timestamp create_time;
48 struct fork_data rfork;
49 struct fork_data dfork;
50 i64 v5_next_member_pos;
51 i64 v5_first_entry_pos; // valid if is_folder
52 i64 v5_num_files_in_folder; // valid if is_folder
53 u8 v5_need_strarray_pop;
56 typedef struct localctx_struct {
57 int file_fmt; // 1=old, 2=new
58 int input_encoding;
59 int nmembers;
60 int subdir_level;
61 u8 ver;
62 i64 archive_size;
63 struct de_strarray *curpath;
64 struct de_crcobj *crco_rfork;
65 struct de_crcobj *crco_dfork;
66 struct de_crcobj *crco_hdr;
67 u8 v5_archive_flags;
68 i64 v5_first_entry_pos; // for the root directory
69 struct de_inthashtable *v5_offsets_seen;
70 } lctx;
72 typedef void (*decompressor_fn)(deark *c, lctx *d, struct member_data *md,
73 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
74 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres);
76 struct cmpr_meth_info {
77 u8 id;
78 const char *name;
79 decompressor_fn decompressor;
82 static void do_decompr_uncompressed(deark *c, lctx *d, struct member_data *md,
83 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
84 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
86 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
89 static void do_decompr_rle(deark *c, lctx *d, struct member_data *md,
90 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
91 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
93 fmtutil_decompress_rle90_ex(c, dcmpri, dcmpro, dres, 0);
96 static void do_decompr_lzw(deark *c, lctx *d, struct member_data *md,
97 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
98 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
100 struct de_lzw_params delzwp;
102 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
103 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
104 // TODO: What are the right lzw settings?
105 delzwp.max_code_size = 14;
106 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
109 struct sit_huffctx {
110 deark *c;
111 const char *modname;
112 struct de_dfilter_in_params *dcmpri;
113 struct de_dfilter_out_params *dcmpro;
114 struct de_dfilter_results *dres;
115 struct fmtutil_huffman_decoder *ht;
116 int errflag;
117 struct de_bitreader bitrd;
120 // A recursive function to read the tree definition.
121 static void sit_huff_read_tree(struct sit_huffctx *hctx, u64 curr_code, UI curr_code_nbits)
123 u8 x;
125 if(curr_code_nbits>FMTUTIL_HUFFMAN_MAX_CODE_LENGTH) {
126 hctx->errflag = 1;
128 if(hctx->bitrd.eof_flag || hctx->errflag) return;
130 x = (u8)de_bitreader_getbits(&hctx->bitrd, 1);
131 if(hctx->bitrd.eof_flag) return;
133 if(x==0) {
134 sit_huff_read_tree(hctx, curr_code<<1, curr_code_nbits+1);
135 if(hctx->bitrd.eof_flag || hctx->errflag) return;
136 sit_huff_read_tree(hctx, (curr_code<<1) | 1, curr_code_nbits+1);
138 else {
139 int ret;
140 fmtutil_huffman_valtype val;
142 val = (fmtutil_huffman_valtype)de_bitreader_getbits(&hctx->bitrd, 8);
143 if(hctx->c->debug_level>=2) {
144 char b2buf[72];
146 de_dbg(hctx->c, "code: \"%s\" = %d",
147 de_print_base2_fixed(b2buf, sizeof(b2buf), curr_code, curr_code_nbits),
148 (int)val);
150 ret = fmtutil_huffman_add_code(hctx->c, hctx->ht->bk, curr_code, curr_code_nbits, val);
151 if(!ret) {
152 hctx->errflag = 1;
157 // While its code is no longer used by Deark, I credit:
158 // Unsit Version 1 (January 15, 1988), for StuffIt 1.31: unsit.c
159 // by Allan G. Weber
160 // for helping me understand the StuffIt type 3 (Huffman) compression format.
161 static void do_decompr_huffman(deark *c, lctx *d, struct member_data *md,
162 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
163 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
165 struct sit_huffctx *hctx = NULL;
166 i64 nbytes_written = 0;
167 char pos_descr[32];
169 hctx = de_malloc(c, sizeof(struct sit_huffctx));
170 hctx->c = c;
171 hctx->modname = "huffman";
172 hctx->dcmpri = dcmpri;
173 hctx->dcmpro = dcmpro;
174 hctx->dres = dres;
175 hctx->ht = fmtutil_huffman_create_decoder(c, 256, 512);
177 hctx->bitrd.f = dcmpri->f;
178 hctx->bitrd.curpos = dcmpri->pos;
179 hctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
181 // Read the tree definition
182 de_dbg2(c, "interpreted huffman codebook:");
183 de_dbg_indent(c, 1);
184 sit_huff_read_tree(hctx, 0, 0);
185 de_dbg_indent(c, -1);
186 if(hctx->errflag) goto done;
187 if(c->debug_level>=4) {
188 fmtutil_huffman_dump(c, hctx->ht);
190 if(fmtutil_huffman_get_max_bits(hctx->ht->bk)<1) {
191 goto done;
194 // Read the data section
195 de_bitreader_describe_curpos(&hctx->bitrd, pos_descr, sizeof(pos_descr));
196 de_dbg(c, "cmpr data codes at %s", pos_descr);
197 while(1) {
198 int ret;
199 fmtutil_huffman_valtype val = 0;
201 if(dcmpro->len_known) {
202 if(nbytes_written >= dcmpro->expected_len) break;
205 if(hctx->bitrd.eof_flag || hctx->errflag) break;
207 ret = fmtutil_huffman_read_next_value(hctx->ht->bk, &hctx->bitrd, &val, NULL);
208 if(!ret) {
209 if(hctx->bitrd.eof_flag) break;
210 hctx->errflag = 1;
211 break;
214 dbuf_writebyte(dcmpro->f, (u8)val);
215 nbytes_written++;
218 done:
219 if(hctx->errflag) {
220 de_dfilter_set_generic_error(c, dres, hctx->modname);
222 if(hctx) {
223 fmtutil_huffman_destroy_decoder(c, hctx->ht);
224 de_free(c, hctx);
228 // -------- LZAH (type 5) decompression --------
230 static void do_decompr_lzah(deark *c, lctx *d, struct member_data *md,
231 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
232 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
234 fmtutil_lh1_codectype1(c, dcmpri, dcmpro, dres, NULL);
237 // -------- "Fixed Huffman" (type 6) decompression --------
239 // There are FIXEDHUFF_NUMCODES Huffman codes, whose low-level decoded values
240 // are 0...(FIXEDHUFF_NUMCODES-1).
241 // The fixed Huffman encoding is not canonical. The codes are ordered by their
242 // low-level decoded value, not by their bit length.
243 // While the set of Huffman codes is fixed, the interpretation of those codes
244 // is different in each block. We don't actually change the Huffman "values",
245 // though -- instead we use a translation table (hctx->translation).
247 // This compression type doesn't seem to be very common. A sample file:
248 // http://cd.textfiles.com/thegreatunsorted/old_apps/archivers/zipit.sea
250 // Credit: I used the macunpack program from the macutil software as
251 // documentation for this format, though none of its source code is used here.
253 #define FIXEDHUFF_NUMCODES 257
255 struct sit_fixedhuffctx {
256 deark *c;
257 const char *modname;
258 struct de_dfilter_in_params *dcmpri;
259 struct de_dfilter_out_params *dcmpro;
260 struct de_dfilter_results *dres;
261 struct fmtutil_huffman_decoder *ht;
262 int errflag;
263 u8 translation[256];
266 static void sit_fixedhuff_init_tree(struct sit_fixedhuffctx *hctx)
268 deark *c = hctx->c;
269 size_t i, k;
270 size_t cdlen_curpos;
271 UI prev_code_bit_length = 0;
272 u64 prev_code = 0; // valid if prev_code_bit_length>0
273 int saved_indent_level;
274 char b2buf[72];
275 static const u8 cdlen_RLEcounts [13] = {1, 1, 4,12,32,16,49, 2,2,40,95, 2, 1};
276 static const u8 cdlen_RLElengths[13] = {3, 4, 5, 6, 7, 8, 9,10,9,10,11,13,12};
277 u8 code_lengths[FIXEDHUFF_NUMCODES];
279 de_dbg_indent_save(c, &saved_indent_level);
280 de_dbgx(c, 4, "standard huffman codebook:");
281 de_dbg_indent(c, 1);
283 // "Decompress" cdlen_RLE*[] to code_lengths[].
284 cdlen_curpos = 0;
285 for(i=0; i<DE_ARRAYCOUNT(cdlen_RLEcounts); i++) {
286 for(k=0; k<(size_t)cdlen_RLEcounts[i]; k++) {
287 if(cdlen_curpos>=FIXEDHUFF_NUMCODES) goto done;
288 code_lengths[cdlen_curpos++] = cdlen_RLElengths[i];
292 // This is similar to fmtutil_huffman_make_canonical_tree(), but different.
293 // Maybe it would be a useful library function.
294 for(i=0; i<FIXEDHUFF_NUMCODES; i++) {
295 u64 thiscode;
296 UI symlen;
297 int ret;
299 symlen = (UI)code_lengths[i];
301 if(prev_code_bit_length==0) { // this is the first code
302 thiscode = 0;
304 else if(symlen < prev_code_bit_length) {
305 thiscode = prev_code >> (prev_code_bit_length - symlen);
306 thiscode++;
308 else {
309 thiscode = prev_code + 1;
310 if(symlen > prev_code_bit_length) {
311 thiscode <<= (symlen - prev_code_bit_length);
315 prev_code_bit_length = symlen;
316 prev_code = thiscode;
318 if(c->debug_level>=4) {
319 de_dbg3(c, "code: \"%s\" = %d",
320 de_print_base2_fixed(b2buf, sizeof(b2buf), thiscode, symlen), (int)i);
322 ret = fmtutil_huffman_add_code(c, hctx->ht->bk, thiscode, symlen, (fmtutil_huffman_valtype)i);
323 if(!ret) {
324 hctx->errflag = 1;
325 goto done;
329 done:
330 de_dbg_indent_restore(c, saved_indent_level);
333 static void do_decompr_fixedhuff(deark *c, lctx *d, struct member_data *md,
334 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
335 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
337 struct sit_fixedhuffctx *hctx = NULL;
338 i64 i;
339 i64 pos, endpos;
340 i64 nbytes_written = 0;
341 int saved_indent_level;
342 struct de_dfilter_ctx *pb_dfctx = NULL;
343 struct de_dfilter_out_params pb_dcmpro;
344 struct de_dfilter_results pb_dres;
346 de_dbg_indent_save(c, &saved_indent_level);
347 hctx = de_malloc(c, sizeof(struct sit_fixedhuffctx));
348 hctx->c = c;
349 hctx->modname = "fixedhuffman";
350 hctx->dcmpri = dcmpri;
351 hctx->dcmpro = dcmpro;
352 hctx->dres = dres;
353 hctx->ht = fmtutil_huffman_create_decoder(c, FIXEDHUFF_NUMCODES, 0);
355 sit_fixedhuff_init_tree(hctx);
356 if(hctx->errflag) goto done;
358 if(c->debug_level>=4) {
359 fmtutil_huffman_dump(c, hctx->ht);
362 pos = dcmpri->pos;
363 endpos = dcmpri->pos + dcmpri->len;
365 while(1) { // For each block...
366 i64 blocksize_raw;
367 i64 blocksize;
368 i64 block_endpos;
369 i64 ndefs;
370 i64 prev_len;
371 i64 nbytes_written_this_block;
373 if(hctx->errflag) goto done;
374 if(dcmpro->len_known && (nbytes_written>=dcmpro->expected_len)) {
375 de_dbg2(c, "[stopping due to sufficient output]");
376 goto done;
378 if(pos + 4 > endpos) {
379 de_dbg2(c, "[stopping, no room for a block at %"I64_FMT"]", pos);
380 goto done;
382 de_dbg2(c, "block at %"I64_FMT, pos);
383 de_dbg_indent(c, 1);
385 blocksize_raw = dbuf_geti32be_p(dcmpri->f, &pos);
386 de_dbg2(c, "block size code: %"I64_FMT, blocksize_raw);
388 if(pb_dfctx) {
389 de_dfilter_destroy(pb_dfctx);
390 pb_dfctx = NULL;
392 de_dfilter_init_objects(c, NULL, &pb_dcmpro, &pb_dres);
393 pb_dcmpro.f = dcmpro->f;
394 if(dcmpro->len_known) {
395 // We apparently aren't told this block's decompressed size after PackBits
396 // decompression.
397 // Set the PackBits decoder's expected output len (really max len)
398 // to the maximum possible number of decompressed bytes still needed.
399 pb_dcmpro.expected_len = dcmpro->expected_len - nbytes_written;
400 pb_dcmpro.len_known = 1;
402 pb_dfctx = de_dfilter_create(c, dfilter_packbits_codec, NULL, &pb_dcmpro, &pb_dres);
404 prev_len = dcmpro->f->len;
406 if(blocksize_raw >= 0) { // PackBits + Huffman
407 i64 intermediate_len;
408 i64 nbytes_decoded_intermed = 0; // After Huffman decompression, before packbits
409 struct de_bitreader bitrd;
411 blocksize = blocksize_raw;
412 if(blocksize<10) {
413 goto done;
416 block_endpos = pos - 4 + blocksize;
417 if(block_endpos > endpos) {
418 hctx->errflag = 1;
419 goto done;
422 // This field seems to be the 'size in bytes' after Huffman decompression,
423 // as opposed to (say) the number of Huffman codes, which should be one
424 // larger (for the STOP code).
425 intermediate_len = dbuf_getu32be_p(dcmpri->f, &pos);
426 de_dbg2(c, "intermediate len: %"I64_FMT, intermediate_len);
427 if(intermediate_len > DE_MAX_SANE_OBJECT_SIZE) { // TODO what should the limit be?
428 hctx->errflag = 1;
429 goto done;
432 ndefs = dbuf_geti16be_p(dcmpri->f, &pos);
433 de_dbg2(c, "num code defs: %d", (int)ndefs);
435 if(ndefs<0 || ndefs>256) {
436 de_dfilter_set_errorf(c, dres, hctx->modname, "Can't handle num_defs=%d", (int)ndefs);
437 goto done;
440 for(i=0; i<ndefs; i++) {
441 hctx->translation[i] = dbuf_getbyte_p(dcmpri->f, &pos);
442 if(c->debug_level>=3) {
443 de_dbg3(c, "ll:%d = hl:%u", (int)i, (UI)hctx->translation[i]);
447 de_dbg2(c, "compressed data (PackBits+Huffman) at %"I64_FMT, pos);
448 de_zeromem(&bitrd, sizeof(struct de_bitreader));
449 bitrd.f = dcmpri->f;
450 bitrd.curpos = pos;
451 bitrd.endpos = block_endpos;
453 while(1) {
454 int ret;
455 fmtutil_huffman_valtype val = 0;
457 if(nbytes_decoded_intermed >= intermediate_len) break; // Have enough output data
459 ret = fmtutil_huffman_read_next_value(hctx->ht->bk, &bitrd, &val, NULL);
460 if(bitrd.eof_flag) break;
461 if(!ret) {
462 de_dfilter_set_errorf(c, dres, hctx->modname, "Error reading Huffman codes");
463 goto done;
465 if(val<0 || val>255) {
466 break; // "stop" code
469 de_dfilter_addbuf(pb_dfctx, &hctx->translation[(int)val], 1);
470 nbytes_decoded_intermed++;
473 else { // just PackBits
474 blocksize = -blocksize_raw;
476 if(blocksize<4) {
477 goto done;
480 block_endpos = pos - 4 + blocksize;
481 if(block_endpos > endpos) {
482 hctx->errflag = 1;
483 goto done;
486 de_dbg2(c, "compressed data (PackBits) at %"I64_FMT, pos);
487 de_dfilter_addslice(pb_dfctx, dcmpri->f, pos, blocksize-4);
490 // Note: I'm assuming that each block is compressed independently (with
491 // PackBits), but I'm not 100% sure. It could be that the whole file is
492 // first compressed with PackBits, and then split into segments. If so,
493 // this won't always work.
494 nbytes_written_this_block = dcmpro->f->len - prev_len;
495 de_dbg2(c, "decompressed to %"I64_FMT" bytes", nbytes_written_this_block);
496 nbytes_written += nbytes_written_this_block;
498 pos = block_endpos;
499 de_dbg_indent(c, -1);
502 done:
503 if(pb_dfctx) de_dfilter_destroy(pb_dfctx);
505 if(hctx) {
506 if(hctx->errflag) {
507 de_dfilter_set_generic_error(c, dres, hctx->modname);
510 fmtutil_huffman_destroy_decoder(c, hctx->ht);
511 de_free(c, hctx);
514 de_dbg_indent_restore(c, saved_indent_level);
517 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
518 { CMPR_NONE, "uncompressed", do_decompr_uncompressed },
519 { CMPR_RLE, "RLE", do_decompr_rle },
520 { CMPR_LZW, "LZW", do_decompr_lzw },
521 { CMPR_HUFFMAN, "Huffman", do_decompr_huffman },
522 { CMPR_LZAH, "LZAH", do_decompr_lzah },
523 { CMPR_FIXEDHUFF, "fixed Huffman", do_decompr_fixedhuff },
524 { CMPR_MW, "MW", NULL },
525 { CMPR_LZHUFF, "LZ+Huffman", NULL },
526 { 14, "installer", NULL },
527 { 15, "Arsenic", NULL }
530 static const struct cmpr_meth_info *find_cmpr_meth_info(deark *c, u8 id)
532 size_t k;
534 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
535 if(id == cmpr_meth_info_arr[k].id)
536 return &cmpr_meth_info_arr[k];
538 return NULL;
541 // Given a 'fork_data' fk with fk.cmpr_meth_etc set,
542 // - sets fk.is_a_file
543 // - sets fk.cmpr_meth
544 // - sets fk.is_encrypted
545 // - sets fk.cmi
546 // - writes a description to the 's' string
547 static void decode_cmpr_meth(deark *c, lctx *d, struct fork_data *fk,
548 de_ucstring *s)
550 const char *name = NULL;
551 u8 cmpr = fk->cmpr_meth_etc;
553 if(d->file_fmt==1 && cmpr<32 && (cmpr & 16)) {
554 fk->is_encrypted = 1;
555 cmpr -= 16;
558 if(d->file_fmt==2 || cmpr<16) {
559 fk->is_a_file = 1;
560 fk->cmpr_meth = cmpr;
563 if(fk->is_a_file) {
564 fk->cmi = find_cmpr_meth_info(c, fk->cmpr_meth);
567 if(fk->cmi) {
568 name = fk->cmi->name;
570 else if(d->file_fmt==1 && fk->cmpr_meth_etc==32) {
571 name = "folder";
573 else if(d->file_fmt==1 && fk->cmpr_meth_etc==33) {
574 name = "end of folder marker";
577 if(!name) name="?";
578 ucstring_append_flags_item(s, name);
579 if(d->file_fmt==1 && fk->is_encrypted) {
580 ucstring_append_flags_item(s, "encrypted");
584 static int do_member_header(deark *c, lctx *d, struct member_data *md, i64 pos1)
586 i64 pos = pos1;
587 i64 fnlen;
588 i64 n;
589 u32 hdr_crc_reported;
590 u32 hdr_crc_calc;
591 de_ucstring *descr = NULL;
592 int saved_indent_level;
593 char timestamp_buf[64];
595 de_dbg_indent_save(c, &saved_indent_level);
596 de_dbg(c, "member header at %"I64_FMT, pos1);
597 de_dbg_indent(c, 1);
599 md->rfork.cmpr_meth_etc = de_getbyte_p(&pos);
600 descr = ucstring_create(c);
601 decode_cmpr_meth(c, d, &md->rfork, descr);
602 de_dbg(c, "rsrc cmpr meth (etc.): %u (%s)", (unsigned int)md->rfork.cmpr_meth_etc,
603 ucstring_getpsz(descr));
605 md->dfork.cmpr_meth_etc = de_getbyte_p(&pos);
606 ucstring_empty(descr);
607 decode_cmpr_meth(c, d, &md->dfork, descr);
608 de_dbg(c, "data cmpr meth (etc.): %u (%s)", (unsigned int)md->dfork.cmpr_meth_etc,
609 ucstring_getpsz(descr));
611 fnlen = (i64)de_getbyte_p(&pos);
612 if(fnlen>63) fnlen=63;
613 md->fname = dbuf_read_string(c->infile, pos, fnlen, fnlen, 0, d->input_encoding);
614 de_dbg(c, "filename: \"%s\"", ucstring_getpsz(md->fname->str));
615 pos += 63;
617 if(md->dfork.is_a_file || md->rfork.is_a_file) {
618 dbuf_read_fourcc(c->infile, pos, &md->filetype, 4, 0x0);
619 de_dbg(c, "filetype: '%s'", md->filetype.id_dbgstr);
620 de_memcpy(md->advf->typecode, md->filetype.bytes, 4);
621 md->advf->has_typecode = 1;
622 pos += 4;
623 dbuf_read_fourcc(c->infile, pos, &md->creator, 4, 0x0);
624 de_dbg(c, "creator: '%s'", md->creator.id_dbgstr);
625 de_memcpy(md->advf->creatorcode, md->creator.bytes, 4);
626 md->advf->has_creatorcode = 1;
627 pos += 4;
629 md->finder_flags = (unsigned int)de_getu16be_p(&pos);
630 de_dbg(c, "finder flags: 0x%04x", md->finder_flags);
631 md->advf->finderflags = (u16)md->finder_flags;
632 md->advf->has_finderflags = 1;
634 else {
635 // Don't know if these fields mean anything for folders.
636 // Possibly they're the first 10 bytes of DInfo (Finder Info for
637 // folders), though that seems a little odd.
638 pos += 10;
641 n = de_getu32be_p(&pos);
642 de_mac_time_to_timestamp(n, &md->create_time);
643 de_timestamp_to_string(&md->create_time, timestamp_buf, sizeof(timestamp_buf), 0);
644 de_dbg(c, "create time: %"I64_FMT" (%s)", n, timestamp_buf);
645 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
647 n = de_getu32be_p(&pos);
648 de_mac_time_to_timestamp(n, &md->mod_time);
649 de_timestamp_to_string(&md->mod_time, timestamp_buf, sizeof(timestamp_buf), 0);
650 de_dbg(c, "mod time: %"I64_FMT" (%s)", n, timestamp_buf);
651 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
653 md->rfork.unc_len = de_getu32be_p(&pos);
654 md->dfork.unc_len = de_getu32be_p(&pos);
655 md->rfork.cmpr_len = de_getu32be_p(&pos);
656 md->dfork.cmpr_len = de_getu32be_p(&pos);
657 de_dbg(c, "rsrc uncmpr len: %"I64_FMT, md->rfork.unc_len);
658 de_dbg(c, "rsrc cmpr len: %"I64_FMT, md->rfork.cmpr_len);
659 de_dbg(c, "data uncmpr len: %"I64_FMT, md->dfork.unc_len);
660 de_dbg(c, "data cmpr len: %"I64_FMT, md->dfork.cmpr_len);
662 md->rfork.crc_reported = (u32)de_getu16be_p(&pos);
663 de_dbg(c, "rsrc crc (reported): 0x%04x", (UI)md->rfork.crc_reported);
664 md->dfork.crc_reported = (u32)de_getu16be_p(&pos);
665 de_dbg(c, "data crc (reported): 0x%04x", (UI)md->dfork.crc_reported);
667 pos += 6; // reserved, etc.
669 hdr_crc_reported = (u32)de_getu16be_p(&pos);
670 de_dbg(c, "header crc (reported): 0x%04x", (UI)hdr_crc_reported);
672 de_crcobj_reset(d->crco_hdr);
673 de_crcobj_addslice(d->crco_hdr, c->infile, pos1, 110);
674 hdr_crc_calc = de_crcobj_getval(d->crco_hdr);
675 de_dbg(c, "header crc (calculated): 0x%04x", (UI)hdr_crc_calc);
676 if(hdr_crc_reported != hdr_crc_calc) {
677 de_warn(c, "Bad header CRC (reported 0x%04x, calculated 0x%04x)", (UI)hdr_crc_reported,
678 (UI)hdr_crc_calc);
681 de_dbg_indent(c, -1);
683 de_dbg_indent_restore(c, saved_indent_level);
684 ucstring_destroy(descr);
685 return 1;
688 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
690 struct de_crcobj *crco = (struct de_crcobj*)userdata;
691 de_crcobj_addbuf(crco, buf, buf_len);
694 // Sets md->advf->*fork.fork_exists, according to whether we think we
695 // can decompress the fork.
696 static void do_pre_decompress_fork(deark *c, lctx *d, struct member_data *md,
697 struct fork_data *frk)
699 struct de_advfile_forkinfo *advfki;
700 int ok = 0;
702 if(frk->is_rsrc_fork) {
703 advfki = &md->advf->rsrcfork;
705 else {
706 advfki = &md->advf->mainfork;
709 if(!frk->is_a_file) {
710 goto done;
713 // TODO: What is the correct way to determine the nonexistence of a fork?
714 if(frk->unc_len==0 && frk->cmpr_len==0) {
715 goto done;
718 if(frk->cmpr_pos + frk->cmpr_len > c->infile->len) {
719 de_err(c, "Unexpected end of file");
720 goto done;
723 de_dbg(c, "cmpr method: %u (%s)", (unsigned int)frk->cmpr_meth,
724 frk->cmi?frk->cmi->name:"?");
726 if(!frk->cmi) {
727 de_err(c, "Unknown compression method: %u", (unsigned int)frk->cmpr_meth);
728 goto done;
731 if(!frk->cmi->decompressor) {
732 de_err(c, "%s[%s fork]: Unsupported compression method: %u (%s)",
733 ucstring_getpsz_d(md->full_fname), frk->forkname,
734 (unsigned int)frk->cmpr_meth, frk->cmi->name);
735 goto done;
738 if(frk->is_encrypted) {
739 de_err(c, "Encrypted files are not supported");
740 goto done;
743 ok = 1;
745 advfki->writelistener_cb = our_writelistener_cb;
746 if(frk->is_rsrc_fork) {
747 advfki->userdata_for_writelistener = (void*)d->crco_rfork;
748 de_crcobj_reset(d->crco_rfork);
750 else {
751 advfki->userdata_for_writelistener = (void*)d->crco_dfork;
752 de_crcobj_reset(d->crco_dfork);
755 done:
756 advfki->fork_exists = (ok)?1:0;
759 static void do_main_decompress_fork(deark *c, lctx *d, struct member_data *md,
760 struct fork_data *frk, dbuf *outf)
762 struct de_dfilter_in_params dcmpri;
763 struct de_dfilter_out_params dcmpro;
764 struct de_dfilter_results dres;
765 int saved_indent_level;
767 de_dbg_indent_save(c, &saved_indent_level);
768 if(!frk || !frk->cmi || !frk->cmi->decompressor) {
769 goto done;
772 de_dbg(c, "decompressing %s fork", frk->forkname);
773 de_dbg_indent(c, 1);
775 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
776 dcmpri.f = c->infile;
777 dcmpri.pos = frk->cmpr_pos;
778 dcmpri.len = frk->cmpr_len;
779 dcmpro.f = outf;
780 dcmpro.len_known = 1;
781 dcmpro.expected_len = frk->unc_len;
782 frk->cmi->decompressor(c, d, md, frk, &dcmpri, &dcmpro, &dres);
783 if(dres.errcode) {
784 de_err(c, "Decompression failed for file %s[%s fork]: %s", ucstring_getpsz_d(md->full_fname),
785 frk->forkname, de_dfilter_get_errmsg(c, &dres));
786 goto done;
788 frk->decompress_succeeded = 1;
790 done:
791 de_dbg_indent_restore(c, saved_indent_level);
794 static void do_post_decompress_fork(deark *c, lctx *d, struct member_data *md,
795 struct fork_data *frk)
797 u32 crc_calc;
799 if(!frk->decompress_succeeded) goto done;
801 if(frk->is_rsrc_fork) {
802 crc_calc = de_crcobj_getval(d->crco_rfork);
804 else {
805 crc_calc = de_crcobj_getval(d->crco_dfork);
807 de_dbg(c, "%s crc (calculated): 0x%04x", frk->forkname, (unsigned int)crc_calc);
808 if(crc_calc != frk->crc_reported) {
809 de_err(c, "CRC check failed for file %s[%s fork]", ucstring_getpsz_d(md->full_fname),
810 frk->forkname);
812 done:
816 static void do_extract_folder(deark *c, lctx *d, struct member_data *md)
818 dbuf *outf = NULL;
819 de_finfo *fi = NULL;
821 if(!md->is_folder) goto done;
822 fi = de_finfo_create(c);
823 fi->is_directory = 1;
824 de_finfo_set_name_from_ucstring(c, fi, md->full_fname, DE_SNFLAG_FULLPATH);
825 fi->original_filename_flag = 1;
826 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
827 fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
828 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
829 done:
830 dbuf_close(outf);
831 de_finfo_destroy(c, fi);
834 struct advfudata {
835 lctx *d;
836 struct member_data *md;
839 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
840 struct de_advfile_cbparams *afp)
842 struct advfudata *u = (struct advfudata*)advf->userdata;
844 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
845 do_main_decompress_fork(c, u->d, u->md, &u->md->dfork, afp->outf);
847 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
848 do_main_decompress_fork(c, u->d, u->md, &u->md->rfork, afp->outf);
851 return 1;
854 // This is for files only. Use do_extract_folder() for folders.
855 static void do_extract_member_file(deark *c, lctx *d, struct member_data *md)
857 struct advfudata u;
859 ucstring_append_ucstring(md->advf->filename, md->full_fname);
860 md->advf->original_filename_flag = 1;
861 md->advf->snflags = DE_SNFLAG_FULLPATH;
862 de_advfile_set_orig_filename(md->advf, md->fname->sz, md->fname->sz_strlen);
864 // resource fork
865 if(md->rfork.cmpr_len>0) {
866 de_dbg(c, "rsrc fork data at %"I64_FMT", len=%"I64_FMT,
867 md->rfork.cmpr_pos, md->rfork.cmpr_len);
868 md->advf->rsrcfork.fork_len = md->rfork.unc_len;
869 de_dbg_indent(c, 1);
870 do_pre_decompress_fork(c, d, md, &md->rfork);
871 de_dbg_indent(c, -1);
874 // data fork
875 if(md->dfork.cmpr_len>0) {
876 de_dbg(c, "data fork data at %"I64_FMT", len=%"I64_FMT,
877 md->dfork.cmpr_pos, md->dfork.cmpr_len);
878 md->advf->mainfork.fork_len = md->dfork.unc_len;
879 de_dbg_indent(c, 1);
880 do_pre_decompress_fork(c, d, md, &md->dfork);
881 de_dbg_indent(c, -1);
884 u.d = d;
885 u.md = md;
886 md->advf->userdata = (void*)&u;
887 md->advf->writefork_cbfn = my_advfile_cbfn;
888 de_advfile_run(md->advf);
890 if(md->advf->rsrcfork.fork_exists) {
891 do_post_decompress_fork(c, d, md, &md->rfork);
893 if(md->advf->mainfork.fork_exists) {
894 do_post_decompress_fork(c, d, md, &md->dfork);
898 // Returns:
899 // 0 if the member could not be parsed sufficiently to determine its size
900 // 1 normally
901 static int do_member(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
903 i64 pos = pos1;
904 struct member_data *md = NULL;
905 int saved_indent_level;
906 int retval = 0;
907 int curpath_need_pop = 0;
909 *bytes_consumed = 0;
910 de_dbg_indent_save(c, &saved_indent_level);
912 md = de_malloc(c, sizeof(struct member_data));
913 md->rfork.is_rsrc_fork = 1;
914 md->dfork.forkname = "data";
915 md->rfork.forkname = "resource";
917 de_dbg(c, "member at %"I64_FMT, pos1);
918 de_dbg_indent(c, 1);
920 md->advf = de_advfile_create(c);
922 if(!do_member_header(c, d, md, pos)) goto done;
924 *bytes_consumed = 112;
926 if(md->rfork.cmpr_meth_etc==32 || md->dfork.cmpr_meth_etc==32) {
927 md->is_folder = 1;
928 md->rfork.cmpr_len = 0;
929 md->dfork.cmpr_len = 0;
931 else if(md->rfork.cmpr_meth_etc==33 || md->dfork.cmpr_meth_etc==33) {
932 // end of folder marker
933 if(d->subdir_level>0) d->subdir_level--;
934 de_strarray_pop(d->curpath);
935 retval = 1;
936 goto done;
938 else if(md->rfork.cmpr_meth_etc>33 || md->dfork.cmpr_meth_etc>33) {
939 de_err(c, "Unknown member type. Cannot continue.");
940 goto done;
943 *bytes_consumed += md->rfork.cmpr_len + md->dfork.cmpr_len;
944 retval = 1;
946 pos += 112;
948 md->full_fname = ucstring_create(c);
949 de_strarray_push(d->curpath, md->fname->str);
950 curpath_need_pop = 1;
951 de_strarray_make_path(d->curpath, md->full_fname, DE_MPFLAG_NOTRAILINGSLASH);
952 de_dbg(c, "full name: \"%s\"", ucstring_getpsz_d(md->full_fname));
954 if(md->is_folder) {
955 if(d->subdir_level >= MAX_NESTING_LEVEL) {
956 de_err(c, "Directories nested too deeply");
957 retval = 0;
958 goto done;
960 d->subdir_level++;
961 curpath_need_pop = 0;
962 do_extract_folder(c, d, md);
963 goto done;
966 md->rfork.cmpr_pos = pos;
967 pos += md->rfork.cmpr_len;
968 md->dfork.cmpr_pos = pos;
969 //pos += md->dfork.cmpr_len;
971 do_extract_member_file(c, d, md);
973 done:
974 if(curpath_need_pop) {
975 de_strarray_pop(d->curpath);
977 if(md) {
978 de_destroy_stringreaderdata(c, md->fname);
979 ucstring_destroy(md->full_fname);
980 de_advfile_destroy(md->advf);
981 de_free(c, md);
983 de_dbg_indent_restore(c, saved_indent_level);
984 return retval;
987 static int do_master_header(deark *c, lctx *d, i64 pos1)
989 i64 pos = pos1;
991 de_dbg(c, "master header at %d", (int)pos1);
992 de_dbg_indent(c, 1);
993 pos += 4; // signature
995 d->nmembers = (int)de_getu16be_p(&pos);
996 de_dbg(c, "number of members: %d", d->nmembers);
998 d->archive_size = de_getu32be_p(&pos);
999 de_dbg(c, "reported archive file size: %"I64_FMT, d->archive_size);
1001 pos += 4; // expected to be "rLau"
1003 d->ver = de_getbyte_p(&pos);
1004 de_dbg(c, "version: %u", (unsigned int)d->ver);
1006 de_dbg_indent(c, -1);
1007 return 1;
1010 // If nmembers==-1, number of members is unknown
1011 static void do_sequence_of_members(deark *c, lctx *d, i64 pos1)
1013 int root_member_count = 0;
1014 i64 pos = pos1;
1016 while(1) {
1017 int ret;
1018 int is_root_member;
1019 i64 bytes_consumed = 0;
1021 if(pos+112 > c->infile->len) {
1022 if(d->subdir_level==0 && root_member_count!=d->nmembers) {
1023 de_warn(c, "Expected %d top-level member file(s), found %d",
1024 d->nmembers, root_member_count);
1026 break;
1029 // The "number of files" field appears to be untrustworthy, or its meaning
1030 // is not correctly understood.
1031 // FWIW, The Unarchiver also ignores it.
1032 //if((d->subdir_level==0) && (root_member_count >= d->nmembers)) break;
1034 is_root_member = (d->subdir_level==0);
1035 ret = do_member(c, d, pos, &bytes_consumed);
1036 if(ret==0) break;
1037 if(bytes_consumed<1) break;
1038 pos += bytes_consumed;
1039 if(is_root_member) root_member_count++;
1043 static void do_oldfmt(deark *c, lctx *d)
1045 i64 pos = 0;
1047 if(!do_master_header(c, d, pos)) goto done;
1048 pos += 22;
1049 do_sequence_of_members(c, d, pos);
1051 done:
1055 static void do_v5_comment(deark *c, lctx *d, struct member_data *md, i64 pos, i64 len)
1057 de_ucstring *s = NULL;
1059 s = ucstring_create(c);
1060 dbuf_read_to_ucstring_n(c->infile, pos, len, 4096, s, 0, d->input_encoding);
1061 de_dbg(c, "file comment: \"%s\"", ucstring_getpsz_d(s));
1062 ucstring_destroy(s);
1065 static void do_v5_list_of_members(deark *c, lctx *d, i64 first_member_pos,
1066 i64 num_members_expected);
1068 static int do_v5_member_header(deark *c, lctx *d, struct member_data *md, i64 pos1)
1070 i64 pos = pos1;
1071 i64 fnlen, fnlen_sanitized;
1072 i64 n;
1073 i64 hdrsize;
1074 i64 hdr_endpos;
1075 u32 hdr_crc_reported;
1076 u32 hdr_crc_calc;
1077 u8 flags;
1078 de_ucstring *descr = NULL;
1079 int saved_indent_level;
1080 int retval = 0;
1081 char timestamp_buf[64];
1083 de_dbg_indent_save(c, &saved_indent_level);
1084 if(pos1==0) goto done;
1086 de_dbg(c, "member header at %"I64_FMT, pos1);
1087 de_dbg_indent(c, 1);
1089 n = de_getu32be_p(&pos);
1090 if(n!=0xa5a5a5a5) {
1091 de_err(c, "Expected member not found at %"I64_FMT, pos1);
1092 goto done;
1095 descr = ucstring_create(c);
1097 pos++; // ver?
1098 pos++; // ?
1099 hdrsize = de_getu16be_p(&pos);
1100 hdr_endpos = pos1 + hdrsize;
1101 de_dbg(c, "base header at %"I64_FMT", len=%"I64_FMT, pos1, hdrsize);
1102 de_dbg_indent(c, 1);
1103 if(hdrsize<48 || hdrsize>2000) {
1104 de_err(c, "Bad header");
1105 goto done;
1108 // calculate actual header crc
1109 de_crcobj_reset(d->crco_hdr);
1110 de_crcobj_addslice(d->crco_hdr, c->infile, pos1, 32);
1111 de_crcobj_addzeroes(d->crco_hdr, 2);
1112 de_crcobj_addslice(d->crco_hdr, c->infile, pos1+34, hdrsize-34);
1113 hdr_crc_calc = de_crcobj_getval(d->crco_hdr);
1115 pos++; // ?
1116 flags = de_getbyte_p(&pos);
1117 ucstring_empty(descr);
1118 if(flags & 0x40) {
1119 md->is_folder = 1;
1120 ucstring_append_flags_item(descr, "folder");
1122 if(flags & 0x20) {
1123 md->dfork.is_encrypted = 1;
1124 md->rfork.is_encrypted = 1;
1125 ucstring_append_flags_item(descr, "encrypted");
1127 de_dbg(c, "flags: 0x%02x (%s)", (UI)flags, ucstring_getpsz_d(descr));
1129 n = de_getu32be_p(&pos);
1130 de_mac_time_to_timestamp(n, &md->create_time);
1131 de_timestamp_to_string(&md->create_time, timestamp_buf, sizeof(timestamp_buf), 0);
1132 de_dbg(c, "create time: %"I64_FMT" (%s)", n, timestamp_buf);
1133 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
1135 n = de_getu32be_p(&pos);
1136 de_mac_time_to_timestamp(n, &md->mod_time);
1137 de_timestamp_to_string(&md->mod_time, timestamp_buf, sizeof(timestamp_buf), 0);
1138 de_dbg(c, "mod time: %"I64_FMT" (%s)", n, timestamp_buf);
1139 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
1141 n = de_getu32be_p(&pos);
1142 de_dbg(c, "prev: %"I64_FMT, n);
1143 md->v5_next_member_pos = de_getu32be_p(&pos);
1144 de_dbg(c, "next: %"I64_FMT, md->v5_next_member_pos);
1145 retval = 1;
1147 // at offset 26
1148 n = de_getu32be_p(&pos);
1149 de_dbg(c, "parent: %"I64_FMT, n);
1151 fnlen = de_getu16be_p(&pos);
1152 de_dbg(c, "filename len: %u", (UI)fnlen);
1153 fnlen_sanitized = de_min_int(fnlen, 1024);
1155 hdr_crc_reported = (u32)de_getu16be_p(&pos);
1156 de_dbg(c, "header crc (reported): 0x%04x", (UI)hdr_crc_reported);
1157 de_dbg(c, "header crc (calculated): 0x%04x", (UI)hdr_crc_calc);
1158 if(hdr_crc_reported != hdr_crc_calc) {
1159 de_warn(c, "Bad header CRC (reported 0x%04x, calculated 0x%04x)", (UI)hdr_crc_reported,
1160 (UI)hdr_crc_calc);
1163 // at offset 34
1164 if(md->is_folder) {
1165 md->v5_first_entry_pos = de_getu32be_p(&pos);
1166 de_dbg(c, "offset of first entry: %"I64_FMT, md->v5_first_entry_pos);
1168 n = de_getu32be_p(&pos);
1169 de_dbg(c, "folder size: %"I64_FMT, n);
1171 pos += 2; // data fork old crc16
1172 pos += 2; // ?
1174 md->v5_num_files_in_folder = de_getu16be_p(&pos);
1175 de_dbg(c, "number of files: %"I64_FMT, md->v5_num_files_in_folder);
1177 else {
1178 md->dfork.unc_len = de_getu32be_p(&pos);
1179 de_dbg(c, "data fork uncmpr len: %"I64_FMT, md->dfork.unc_len);
1180 // at offset 38
1181 md->dfork.cmpr_len = de_getu32be_p(&pos);
1182 de_dbg(c, "data fork cmpr len: %"I64_FMT, md->dfork.cmpr_len);
1184 md->dfork.crc_reported = (u32)de_getu16be_p(&pos);
1185 de_dbg(c, "data fork old crc (reported): 0x%04x", (UI)md->dfork.crc_reported);
1187 pos += 2; // ?
1189 md->dfork.cmpr_meth_etc = de_getbyte_p(&pos);
1190 ucstring_empty(descr);
1191 decode_cmpr_meth(c, d, &md->dfork, descr);
1192 de_dbg(c, "data fork cmpr meth: %u (%s)", (unsigned int)md->dfork.cmpr_meth_etc,
1193 ucstring_getpsz(descr));
1195 // at offset 47
1196 n = (i64)de_getbyte_p(&pos);
1197 de_dbg(c, "data fork passwd len: %u", (UI)n);
1198 pos += n;
1201 md->fname = dbuf_read_string(c->infile, pos, fnlen_sanitized, fnlen_sanitized, 0, d->input_encoding);
1202 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fname->str));
1203 de_strarray_push(d->curpath, md->fname->str);
1204 md->v5_need_strarray_pop = 1;
1205 pos += fnlen;
1207 if(hdr_endpos-pos >= 5) {
1208 n = de_getu16be_p(&pos); // comment len
1209 pos += 2;
1210 if(pos + n <= hdr_endpos) {
1211 do_v5_comment(c, d, md, pos, n);
1215 de_dbg_indent(c, -1); // end of first part of header
1217 pos = hdr_endpos;
1219 if(!md->is_folder) {
1220 UI flags2;
1222 flags2 = (UI)de_getu16be_p(&pos);
1223 de_dbg(c, "flags2: 0x%04x", flags2);
1224 pos += 2; // ?
1226 dbuf_read_fourcc(c->infile, pos, &md->filetype, 4, 0x0);
1227 de_dbg(c, "filetype: '%s'", md->filetype.id_dbgstr);
1228 de_memcpy(md->advf->typecode, md->filetype.bytes, 4);
1229 md->advf->has_typecode = 1;
1230 pos += 4;
1231 dbuf_read_fourcc(c->infile, pos, &md->creator, 4, 0x0);
1232 de_dbg(c, "creator: '%s'", md->creator.id_dbgstr);
1233 de_memcpy(md->advf->creatorcode, md->creator.bytes, 4);
1234 md->advf->has_creatorcode = 1;
1235 pos += 4;
1237 md->finder_flags = (unsigned int)de_getu16be_p(&pos);
1238 de_dbg(c, "finder flags: 0x%04x", md->finder_flags);
1239 md->advf->finderflags = (u16)md->finder_flags;
1240 md->advf->has_finderflags = 1;
1242 pos += 22; // ?
1244 if(flags2 & 0x0001) {
1245 md->rfork.unc_len = de_getu32be_p(&pos);
1246 de_dbg(c, "rsrc fork uncmpr len: %"I64_FMT, md->rfork.unc_len);
1247 md->rfork.cmpr_len = de_getu32be_p(&pos);
1248 de_dbg(c, "rsrc fork cmpr len: %"I64_FMT, md->rfork.cmpr_len);
1250 md->rfork.crc_reported = (u32)de_getu16be_p(&pos);
1251 de_dbg(c, "rsrc fork old crc (reported): 0x%04x", (UI)md->rfork.crc_reported);
1253 pos += 2; // ?
1255 md->rfork.cmpr_meth_etc = de_getbyte_p(&pos);
1256 ucstring_empty(descr);
1257 decode_cmpr_meth(c, d, &md->rfork, descr);
1258 de_dbg(c, "rsrc fork cmpr meth: %u (%s)", (unsigned int)md->rfork.cmpr_meth_etc,
1259 ucstring_getpsz(descr));
1261 n = (i64)de_getbyte_p(&pos);
1262 de_dbg(c, "rsrc fork passwd len: %u", (UI)n);
1263 pos += n;
1267 if(!md->is_folder) {
1268 md->rfork.cmpr_pos = pos;
1269 pos += md->rfork.cmpr_len;
1271 md->dfork.cmpr_pos = pos;
1272 pos += md->dfork.cmpr_len;
1275 done:
1276 de_dbg_indent_restore(c, saved_indent_level);
1277 ucstring_destroy(descr);
1278 return retval;
1281 static int do_v5_member(deark *c, lctx *d, i64 member_idx,
1282 i64 pos1, i64 *pnext_member_pos)
1284 struct member_data *md = NULL;
1285 int saved_indent_level;
1286 int retval = 0;
1288 de_dbg_indent_save(c, &saved_indent_level);
1290 if(pos1==0) goto done;
1292 if(!de_inthashtable_add_item(c, d->v5_offsets_seen, pos1, NULL)) {
1293 de_err(c, "Loop detected");
1294 goto done;
1297 md = de_malloc(c, sizeof(struct member_data));
1298 md->rfork.is_rsrc_fork = 1;
1299 md->dfork.forkname = "data";
1300 md->rfork.forkname = "resource";
1302 de_dbg(c, "member[%d] at %"I64_FMT, (int)member_idx, pos1);
1303 de_dbg_indent(c, 1);
1305 if(pos1<0 || pos1>=c->infile->len) {
1306 de_err(c, "Bad file offset");
1307 goto done;
1310 md->advf = de_advfile_create(c);
1312 if(!do_v5_member_header(c, d, md, pos1)) goto done;
1313 *pnext_member_pos = md->v5_next_member_pos;
1315 if(!md->full_fname) {
1316 md->full_fname = ucstring_create(c);
1317 de_strarray_make_path(d->curpath, md->full_fname, DE_MPFLAG_NOTRAILINGSLASH);
1319 de_dbg(c, "full name: \"%s\"", ucstring_getpsz_d(md->full_fname));
1321 if(md->is_folder) {
1322 do_extract_folder(c, d, md);
1324 if(d->subdir_level >= MAX_NESTING_LEVEL) {
1325 de_err(c, "Directories nested too deeply");
1326 retval = 0;
1327 goto done;
1329 de_dbg(c, "[folder contents]");
1330 de_dbg_indent(c, 1);
1331 d->subdir_level++;
1332 do_v5_list_of_members(c, d, md->v5_first_entry_pos, md->v5_num_files_in_folder);
1333 d->subdir_level--;
1334 de_dbg_indent(c, -1);
1336 else {
1337 do_extract_member_file(c, d, md);
1340 retval = 1;
1342 done:
1343 if(md) {
1344 if(md->v5_need_strarray_pop) {
1345 de_strarray_pop(d->curpath);
1347 de_destroy_stringreaderdata(c, md->fname);
1348 ucstring_destroy(md->full_fname);
1349 de_advfile_destroy(md->advf);
1350 de_free(c, md);
1352 de_dbg_indent_restore(c, saved_indent_level);
1353 return retval;
1356 static void do_v5_list_of_members(deark *c, lctx *d, i64 first_member_pos,
1357 i64 num_members_expected)
1359 i64 member_count = 0;
1360 i64 pos = first_member_pos;
1362 while(1) {
1363 int ret;
1364 i64 next_pos = 0;
1366 if(pos==0) break;
1367 if(member_count >= num_members_expected) break;
1369 ret = do_v5_member(c, d, member_count, pos, &next_pos);
1370 if(!ret) break;
1371 if(next_pos==0) break;
1373 pos = next_pos;
1374 member_count++;
1378 static int do_v5_archivehdr(deark *c, lctx *d, i64 pos1)
1380 i64 n;
1381 i64 pos = pos1;
1382 int retval = 0;
1384 de_dbg(c, "archive header at %"I64_FMT, pos1);
1385 de_dbg_indent(c, 1);
1386 pos += 80; // text
1387 pos += 2; // ?
1388 n = de_getbyte_p(&pos);
1389 de_dbg(c, "archive version: %u", (UI)n);
1390 d->v5_archive_flags = de_getbyte_p(&pos);
1391 de_dbg(c, "archive flags: 0x%02x", (UI)d->v5_archive_flags);
1393 d->archive_size = de_getu32be_p(&pos);
1394 de_dbg(c, "reported archive file size: %"I64_FMT, d->archive_size);
1396 pos += 4; // ?
1398 d->nmembers = (int)de_getu16be_p(&pos);
1399 de_dbg(c, "number of root members: %d", d->nmembers);
1401 d->v5_first_entry_pos = de_getu32be_p(&pos);
1402 de_dbg(c, "pos of first root member: %"I64_FMT, d->v5_first_entry_pos);
1404 n = de_getu16be_p(&pos);
1405 de_dbg(c, "archive crc (reported): 0x%04x", (UI)n);
1407 //if(d->v5_archive_flags & 0x10) pos += 14; // reserved
1408 // TODO: Archive comment
1409 retval = 1;
1411 de_dbg_indent(c, -1);
1412 return retval;
1415 static void do_v5(deark *c, lctx *d)
1417 d->v5_offsets_seen = de_inthashtable_create(c);
1418 if(!do_v5_archivehdr(c, d, 0)) goto done;
1419 do_v5_list_of_members(c, d, d->v5_first_entry_pos, d->nmembers);
1420 done:
1424 static void de_run_stuffit(deark *c, de_module_params *mparams)
1426 lctx *d = NULL;
1428 d = de_malloc(c, sizeof(lctx));
1430 if(!dbuf_memcmp(c->infile, 0, "SIT!", 4)) {
1431 d->file_fmt = 1;
1433 else if(!dbuf_memcmp(c->infile, 0, "StuffIt ", 8)) {
1434 d->file_fmt = 2;
1436 else {
1437 de_err(c, "Not a StuffIt file, or unknown version.");
1438 goto done;
1441 if(d->file_fmt==2) {
1442 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UTF8);
1444 else {
1445 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
1448 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1449 d->crco_rfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1450 d->crco_dfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1451 d->crco_hdr = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1453 if(d->file_fmt==1) {
1454 de_declare_fmt(c, "StuffIt, old format");
1455 do_oldfmt(c, d);
1457 else if(d->file_fmt==2) {
1458 de_declare_fmt(c, "StuffIt, v5 format");
1459 do_v5(c, d);
1461 else {
1462 de_err(c, "This version of StuffIt format is not supported.");
1465 done:
1466 if(d) {
1467 de_crcobj_destroy(d->crco_rfork);
1468 de_crcobj_destroy(d->crco_dfork);
1469 de_crcobj_destroy(d->crco_hdr);
1470 de_strarray_destroy(d->curpath);
1471 if(d->v5_offsets_seen) de_inthashtable_destroy(c, d->v5_offsets_seen);
1472 de_free(c, d);
1476 static int de_identify_stuffit(deark *c)
1478 u8 buf[9];
1480 de_read(buf, 0, sizeof(buf));
1481 if(!de_memcmp(buf, "SIT!", 4)) {
1482 return 100;
1484 if(!de_memcmp(buf, "StuffIt (", 9)) {
1485 if(de_getbyte(82)==0x05) return 100;
1487 return 0;
1490 void de_module_stuffit(deark *c, struct deark_module_info *mi)
1492 mi->id = "stuffit";
1493 mi->desc = "StuffIt archive";
1494 mi->run_fn = de_run_stuffit;
1495 mi->identify_fn = de_identify_stuffit;