Cleaned up some Huffman-related debug messages
[deark.git] / modules / stuffit.c
blob50cf55d8b03945f90185865bbb8eaad271176518
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // StuffIt
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_stuffit);
11 #define MAX_NESTING_LEVEL 32
13 struct cmpr_meth_info;
15 struct fork_data {
16 u8 is_rsrc_fork;
17 u8 cmpr_meth_etc;
18 #define CMPR_NONE 0
19 #define CMPR_RLE 1
20 #define CMPR_LZW 2
21 #define CMPR_HUFFMAN 3
22 #define CMPR_LZAH 5
23 #define CMPR_FIXEDHUFF 6
24 #define CMPR_MW 8
25 #define CMPR_LZHUFF 13
26 u8 is_a_file;
27 u8 cmpr_meth;
28 u8 is_encrypted;
29 u32 crc_reported;
30 i64 unc_len;
31 i64 cmpr_pos;
32 i64 cmpr_len;
33 const char *forkname;
34 const struct cmpr_meth_info *cmi;
37 struct member_data {
38 u8 is_folder;
39 unsigned int finder_flags;
40 struct de_advfile *advf;
41 struct de_stringreaderdata *fname;
42 de_ucstring *full_fname;
43 struct de_fourcc filetype;
44 struct de_fourcc creator;
45 struct de_timestamp mod_time;
46 struct de_timestamp create_time;
47 struct fork_data rfork;
48 struct fork_data dfork;
49 i64 v5_next_member_pos;
50 i64 v5_first_entry_pos; // valid if is_folder
51 i64 v5_num_files_in_folder; // valid if is_folder
52 u8 v5_need_strarray_pop;
55 typedef struct localctx_struct {
56 int file_fmt; // 1=old, 2=new
57 int input_encoding;
58 int nmembers;
59 int subdir_level;
60 u8 ver;
61 i64 archive_size;
62 struct de_strarray *curpath;
63 struct de_crcobj *crco_rfork;
64 struct de_crcobj *crco_dfork;
65 struct de_crcobj *crco_hdr;
66 u8 v5_archive_flags;
67 i64 v5_first_entry_pos; // for the root directory
68 struct de_inthashtable *v5_offsets_seen;
69 } lctx;
71 typedef void (*decompressor_fn)(deark *c, lctx *d, struct member_data *md,
72 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
73 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres);
75 struct cmpr_meth_info {
76 u8 id;
77 const char *name;
78 decompressor_fn decompressor;
81 static void do_decompr_uncompressed(deark *c, lctx *d, struct member_data *md,
82 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
83 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
85 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
88 static void do_decompr_rle(deark *c, lctx *d, struct member_data *md,
89 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
90 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
92 fmtutil_decompress_rle90_ex(c, dcmpri, dcmpro, dres, 0);
95 static void do_decompr_lzw(deark *c, lctx *d, struct member_data *md,
96 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
97 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
99 struct de_lzw_params delzwp;
101 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
102 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
103 // TODO: What are the right lzw settings?
104 delzwp.max_code_size = 14;
105 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
108 struct sit_huffctx {
109 deark *c;
110 const char *modname;
111 struct de_dfilter_in_params *dcmpri;
112 struct de_dfilter_out_params *dcmpro;
113 struct de_dfilter_results *dres;
114 struct fmtutil_huffman_tree *ht;
115 int errflag;
116 struct de_bitreader bitrd;
119 // A recursive function to read the tree definition.
120 static void sit_huff_read_tree(struct sit_huffctx *hctx, u64 curr_code, UI curr_code_nbits)
122 u8 x;
124 if(curr_code_nbits>FMTUTIL_HUFFMAN_MAX_CODE_LENGTH) {
125 hctx->errflag = 1;
127 if(hctx->bitrd.eof_flag || hctx->errflag) return;
129 x = (u8)de_bitreader_getbits(&hctx->bitrd, 1);
130 if(hctx->bitrd.eof_flag) return;
132 if(x==0) {
133 sit_huff_read_tree(hctx, curr_code<<1, curr_code_nbits+1);
134 if(hctx->bitrd.eof_flag || hctx->errflag) return;
135 sit_huff_read_tree(hctx, (curr_code<<1) | 1, curr_code_nbits+1);
137 else {
138 int ret;
139 fmtutil_huffman_valtype val;
141 val = (fmtutil_huffman_valtype)de_bitreader_getbits(&hctx->bitrd, 8);
142 if(hctx->c->debug_level>=2) {
143 char b2buf[72];
145 de_dbg(hctx->c, "code: \"%s\" = %d",
146 de_print_base2_fixed(b2buf, sizeof(b2buf), curr_code, curr_code_nbits),
147 (int)val);
149 ret = fmtutil_huffman_add_code(hctx->c, hctx->ht, curr_code, curr_code_nbits, val);
150 if(!ret) {
151 hctx->errflag = 1;
156 // While its code is no longer used by Deark, I credit:
157 // Unsit Version 1 (January 15, 1988), for StuffIt 1.31: unsit.c
158 // by Allan G. Weber
159 // for helping me understand the StuffIt type 3 (Huffman) compression format.
160 static void do_decompr_huffman(deark *c, lctx *d, struct member_data *md,
161 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
162 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
164 struct sit_huffctx *hctx = NULL;
165 i64 nbytes_written = 0;
166 char pos_descr[32];
168 hctx = de_malloc(c, sizeof(struct sit_huffctx));
169 hctx->c = c;
170 hctx->modname = "huffman";
171 hctx->dcmpri = dcmpri;
172 hctx->dcmpro = dcmpro;
173 hctx->dres = dres;
174 hctx->ht = fmtutil_huffman_create_tree(c, 256, 512);
176 hctx->bitrd.f = dcmpri->f;
177 hctx->bitrd.curpos = dcmpri->pos;
178 hctx->bitrd.endpos = dcmpri->pos + dcmpri->len;
180 // Read the tree definition
181 de_dbg2(c, "interpreted huffman codebook:");
182 de_dbg_indent(c, 1);
183 sit_huff_read_tree(hctx, 0, 0);
184 de_dbg_indent(c, -1);
185 if(hctx->errflag) goto done;
186 if(c->debug_level>=4) {
187 fmtutil_huffman_dump(c, hctx->ht);
189 if(fmtutil_huffman_get_max_bits(hctx->ht)<1) {
190 goto done;
193 // Read the data section
194 de_bitreader_describe_curpos(&hctx->bitrd, pos_descr, sizeof(pos_descr));
195 de_dbg(c, "cmpr data codes at %s", pos_descr);
196 while(1) {
197 int ret;
198 fmtutil_huffman_valtype val = 0;
200 if(dcmpro->len_known) {
201 if(nbytes_written >= dcmpro->expected_len) break;
204 if(hctx->bitrd.eof_flag || hctx->errflag) break;
206 ret = fmtutil_huffman_read_next_value(hctx->ht, &hctx->bitrd, &val, NULL);
207 if(!ret) {
208 if(hctx->bitrd.eof_flag) break;
209 hctx->errflag = 1;
210 break;
213 dbuf_writebyte(dcmpro->f, (u8)val);
214 nbytes_written++;
217 done:
218 if(hctx->errflag) {
219 de_dfilter_set_generic_error(c, dres, hctx->modname);
221 if(hctx) {
222 fmtutil_huffman_destroy_tree(c, hctx->ht);
223 de_free(c, hctx);
227 // -------- "Fixed Huffman" (type 6) decompression --------
229 // There are FIXEDHUFF_NUMCODES Huffman codes, whose low-level decoded values
230 // are 0...(FIXEDHUFF_NUMCODES-1).
231 // The fixed Huffman encoding is not canonical. The codes are ordered by their
232 // low-level decoded value, not by their bit length.
233 // While the set of Huffman codes is fixed, the interpretation of those codes
234 // is different in each block. We don't actually change the Huffman "values",
235 // though -- instead we use a translation table (hctx->translation).
237 // This compression type doesn't seem to be very common. A sample file:
238 // http://cd.textfiles.com/thegreatunsorted/old_apps/archivers/zipit.sea
240 // Credit: I used the macunpack program from the macutil software as
241 // documentation for this format, though none of its source code is used here.
243 #define FIXEDHUFF_NUMCODES 257
245 struct sit_fixedhuffctx {
246 deark *c;
247 const char *modname;
248 struct de_dfilter_in_params *dcmpri;
249 struct de_dfilter_out_params *dcmpro;
250 struct de_dfilter_results *dres;
251 struct fmtutil_huffman_tree *ht;
252 int errflag;
253 u8 translation[256];
256 static void sit_fixedhuff_init_tree(struct sit_fixedhuffctx *hctx)
258 deark *c = hctx->c;
259 size_t i, k;
260 size_t cdlen_curpos;
261 UI prev_code_bit_length = 0;
262 u64 prev_code = 0; // valid if prev_code_bit_length>0
263 int saved_indent_level;
264 char b2buf[72];
265 static const u8 cdlen_RLEcounts [13] = {1, 1, 4,12,32,16,49, 2,2,40,95, 2, 1};
266 static const u8 cdlen_RLElengths[13] = {3, 4, 5, 6, 7, 8, 9,10,9,10,11,13,12};
267 u8 code_lengths[FIXEDHUFF_NUMCODES];
269 de_dbg_indent_save(c, &saved_indent_level);
270 de_dbgx(c, 4, "standard huffman codebook:");
271 de_dbg_indent(c, 1);
273 // "Decompress" cdlen_RLE*[] to code_lengths[].
274 cdlen_curpos = 0;
275 for(i=0; i<DE_ARRAYCOUNT(cdlen_RLEcounts); i++) {
276 for(k=0; k<(size_t)cdlen_RLEcounts[i]; k++) {
277 if(cdlen_curpos>=FIXEDHUFF_NUMCODES) goto done;
278 code_lengths[cdlen_curpos++] = cdlen_RLElengths[i];
282 // This is similar to fmtutil_huffman_make_canonical_tree(), but different.
283 // Maybe it would be a useful library function.
284 for(i=0; i<FIXEDHUFF_NUMCODES; i++) {
285 u64 thiscode;
286 UI symlen;
287 int ret;
289 symlen = (UI)code_lengths[i];
291 if(prev_code_bit_length==0) { // this is the first code
292 thiscode = 0;
294 else if(symlen < prev_code_bit_length) {
295 thiscode = prev_code >> (prev_code_bit_length - symlen);
296 thiscode++;
298 else {
299 thiscode = prev_code + 1;
300 if(symlen > prev_code_bit_length) {
301 thiscode <<= (symlen - prev_code_bit_length);
305 prev_code_bit_length = symlen;
306 prev_code = thiscode;
308 if(c->debug_level>=4) {
309 de_dbg3(c, "code: \"%s\" = %d",
310 de_print_base2_fixed(b2buf, sizeof(b2buf), thiscode, symlen), (int)i);
312 ret = fmtutil_huffman_add_code(c, hctx->ht, thiscode, symlen, (fmtutil_huffman_valtype)i);
313 if(!ret) {
314 hctx->errflag = 1;
315 goto done;
319 done:
320 de_dbg_indent_restore(c, saved_indent_level);
323 static void do_decompr_fixedhuff(deark *c, lctx *d, struct member_data *md,
324 struct fork_data *frk, struct de_dfilter_in_params *dcmpri,
325 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
327 struct sit_fixedhuffctx *hctx = NULL;
328 i64 i;
329 i64 pos, endpos;
330 i64 nbytes_written = 0;
331 int saved_indent_level;
332 struct de_dfilter_ctx *pb_dfctx = NULL;
333 struct de_dfilter_out_params pb_dcmpro;
334 struct de_dfilter_results pb_dres;
336 de_dbg_indent_save(c, &saved_indent_level);
337 hctx = de_malloc(c, sizeof(struct sit_fixedhuffctx));
338 hctx->c = c;
339 hctx->modname = "fixedhuffman";
340 hctx->dcmpri = dcmpri;
341 hctx->dcmpro = dcmpro;
342 hctx->dres = dres;
343 hctx->ht = fmtutil_huffman_create_tree(c, FIXEDHUFF_NUMCODES, 0);
345 sit_fixedhuff_init_tree(hctx);
346 if(hctx->errflag) goto done;
348 if(c->debug_level>=4) {
349 fmtutil_huffman_dump(c, hctx->ht);
352 pos = dcmpri->pos;
353 endpos = dcmpri->pos + dcmpri->len;
355 while(1) { // For each block...
356 i64 blocksize_raw;
357 i64 blocksize;
358 i64 block_endpos;
359 i64 ndefs;
360 i64 prev_len;
361 i64 nbytes_written_this_block;
363 if(hctx->errflag) goto done;
364 if(dcmpro->len_known && (nbytes_written>=dcmpro->expected_len)) {
365 de_dbg2(c, "[stopping due to sufficient output]");
366 goto done;
368 if(pos + 4 > endpos) {
369 de_dbg2(c, "[stopping, no room for a block at %"I64_FMT"]", pos);
370 goto done;
372 de_dbg2(c, "block at %"I64_FMT, pos);
373 de_dbg_indent(c, 1);
375 blocksize_raw = dbuf_geti32be_p(dcmpri->f, &pos);
376 de_dbg2(c, "block size code: %"I64_FMT, blocksize_raw);
378 if(pb_dfctx) {
379 de_dfilter_destroy(pb_dfctx);
380 pb_dfctx = NULL;
382 de_dfilter_init_objects(c, NULL, &pb_dcmpro, &pb_dres);
383 pb_dcmpro.f = dcmpro->f;
384 if(dcmpro->len_known) {
385 // We apparently aren't told this block's decompressed size after PackBits
386 // decompression.
387 // Set the PackBits decoder's expected output len (really max len)
388 // to the maximum possible number of decompressed bytes still needed.
389 pb_dcmpro.expected_len = dcmpro->expected_len - nbytes_written;
390 pb_dcmpro.len_known = 1;
392 pb_dfctx = de_dfilter_create(c, dfilter_packbits_codec, NULL, &pb_dcmpro, &pb_dres);
394 prev_len = dcmpro->f->len;
396 if(blocksize_raw >= 0) { // PackBits + Huffman
397 i64 intermediate_len;
398 i64 nbytes_decoded_intermed = 0; // After Huffman decompression, before packbits
399 struct de_bitreader bitrd;
401 blocksize = blocksize_raw;
402 if(blocksize<10) {
403 goto done;
406 block_endpos = pos - 4 + blocksize;
407 if(block_endpos > endpos) {
408 hctx->errflag = 1;
409 goto done;
412 // This field seems to be the 'size in bytes' after Huffman decompression,
413 // as opposed to (say) the number of Huffman codes, which should be one
414 // larger (for the STOP code).
415 intermediate_len = dbuf_getu32be_p(dcmpri->f, &pos);
416 de_dbg2(c, "intermediate len: %"I64_FMT, intermediate_len);
417 if(intermediate_len > DE_MAX_SANE_OBJECT_SIZE) { // TODO what should the limit be?
418 hctx->errflag = 1;
419 goto done;
422 ndefs = dbuf_geti16be_p(dcmpri->f, &pos);
423 de_dbg2(c, "num code defs: %d", (int)ndefs);
425 if(ndefs<0 || ndefs>256) {
426 de_dfilter_set_errorf(c, dres, hctx->modname, "Can't handle num_defs=%d", (int)ndefs);
427 goto done;
430 for(i=0; i<ndefs; i++) {
431 hctx->translation[i] = dbuf_getbyte_p(dcmpri->f, &pos);
432 if(c->debug_level>=3) {
433 de_dbg3(c, "ll:%d = hl:%u", (int)i, (UI)hctx->translation[i]);
437 de_dbg2(c, "compressed data (PackBits+Huffman) at %"I64_FMT, pos);
438 de_zeromem(&bitrd, sizeof(struct de_bitreader));
439 bitrd.f = dcmpri->f;
440 bitrd.curpos = pos;
441 bitrd.endpos = block_endpos;
443 while(1) {
444 int ret;
445 fmtutil_huffman_valtype val = 0;
447 if(nbytes_decoded_intermed >= intermediate_len) break; // Have enough output data
449 ret = fmtutil_huffman_read_next_value(hctx->ht, &bitrd, &val, NULL);
450 if(bitrd.eof_flag) break;
451 if(!ret) {
452 de_dfilter_set_errorf(c, dres, hctx->modname, "Error reading Huffman codes");
453 goto done;
455 if(val<0 || val>255) {
456 break; // "stop" code
459 de_dfilter_addbuf(pb_dfctx, &hctx->translation[(int)val], 1);
460 nbytes_decoded_intermed++;
463 else { // just PackBits
464 blocksize = -blocksize_raw;
466 if(blocksize<4) {
467 goto done;
470 block_endpos = pos - 4 + blocksize;
471 if(block_endpos > endpos) {
472 hctx->errflag = 1;
473 goto done;
476 de_dbg2(c, "compressed data (PackBits) at %"I64_FMT, pos);
477 de_dfilter_addslice(pb_dfctx, dcmpri->f, pos, blocksize-4);
480 // Note: I'm assuming that each block is compressed independently (with
481 // PackBits), but I'm not 100% sure. It could be that the whole file is
482 // first compressed with PackBits, and then split into segments. If so,
483 // this won't always work.
484 nbytes_written_this_block = dcmpro->f->len - prev_len;
485 de_dbg2(c, "decompressed to %"I64_FMT" bytes", nbytes_written_this_block);
486 nbytes_written += nbytes_written_this_block;
488 pos = block_endpos;
489 de_dbg_indent(c, -1);
492 done:
493 if(pb_dfctx) de_dfilter_destroy(pb_dfctx);
495 if(hctx) {
496 if(hctx->errflag) {
497 de_dfilter_set_generic_error(c, dres, hctx->modname);
500 fmtutil_huffman_destroy_tree(c, hctx->ht);
501 de_free(c, hctx);
504 de_dbg_indent_restore(c, saved_indent_level);
507 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
508 { CMPR_NONE, "uncompressed", do_decompr_uncompressed },
509 { CMPR_RLE, "RLE", do_decompr_rle },
510 { CMPR_LZW, "LZW", do_decompr_lzw },
511 { CMPR_HUFFMAN, "Huffman", do_decompr_huffman },
512 { CMPR_LZAH, "LZAH", NULL },
513 { CMPR_FIXEDHUFF, "fixed Huffman", do_decompr_fixedhuff },
514 { CMPR_MW, "MW", NULL },
515 { CMPR_LZHUFF, "LZ+Huffman", NULL },
516 { 14, "installer", NULL },
517 { 15, "Arsenic", NULL }
520 static const struct cmpr_meth_info *find_cmpr_meth_info(deark *c, u8 id)
522 size_t k;
524 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
525 if(id == cmpr_meth_info_arr[k].id)
526 return &cmpr_meth_info_arr[k];
528 return NULL;
531 // Given a 'fork_data' fk with fk.cmpr_meth_etc set,
532 // - sets fk.is_a_file
533 // - sets fk.cmpr_meth
534 // - sets fk.is_encrypted
535 // - sets fk.cmi
536 // - writes a description to the 's' string
537 static void decode_cmpr_meth(deark *c, lctx *d, struct fork_data *fk,
538 de_ucstring *s)
540 const char *name = NULL;
541 u8 cmpr = fk->cmpr_meth_etc;
543 if(d->file_fmt==1 && cmpr<32 && (cmpr & 16)) {
544 fk->is_encrypted = 1;
545 cmpr -= 16;
548 if(d->file_fmt==2 || cmpr<16) {
549 fk->is_a_file = 1;
550 fk->cmpr_meth = cmpr;
553 if(fk->is_a_file) {
554 fk->cmi = find_cmpr_meth_info(c, fk->cmpr_meth);
557 if(fk->cmi) {
558 name = fk->cmi->name;
560 else if(d->file_fmt==1 && fk->cmpr_meth_etc==32) {
561 name = "folder";
563 else if(d->file_fmt==1 && fk->cmpr_meth_etc==33) {
564 name = "end of folder marker";
567 if(!name) name="?";
568 ucstring_append_flags_item(s, name);
569 if(d->file_fmt==1 && fk->is_encrypted) {
570 ucstring_append_flags_item(s, "encrypted");
574 static int do_member_header(deark *c, lctx *d, struct member_data *md, i64 pos1)
576 i64 pos = pos1;
577 i64 fnlen;
578 i64 n;
579 u32 hdr_crc_reported;
580 u32 hdr_crc_calc;
581 de_ucstring *descr = NULL;
582 int saved_indent_level;
583 char timestamp_buf[64];
585 de_dbg_indent_save(c, &saved_indent_level);
586 de_dbg(c, "member header at %"I64_FMT, pos1);
587 de_dbg_indent(c, 1);
589 md->rfork.cmpr_meth_etc = de_getbyte_p(&pos);
590 descr = ucstring_create(c);
591 decode_cmpr_meth(c, d, &md->rfork, descr);
592 de_dbg(c, "rsrc cmpr meth (etc.): %u (%s)", (unsigned int)md->rfork.cmpr_meth_etc,
593 ucstring_getpsz(descr));
595 md->dfork.cmpr_meth_etc = de_getbyte_p(&pos);
596 ucstring_empty(descr);
597 decode_cmpr_meth(c, d, &md->dfork, descr);
598 de_dbg(c, "data cmpr meth (etc.): %u (%s)", (unsigned int)md->dfork.cmpr_meth_etc,
599 ucstring_getpsz(descr));
601 fnlen = (i64)de_getbyte_p(&pos);
602 if(fnlen>63) fnlen=63;
603 md->fname = dbuf_read_string(c->infile, pos, fnlen, fnlen, 0, d->input_encoding);
604 de_dbg(c, "filename: \"%s\"", ucstring_getpsz(md->fname->str));
605 pos += 63;
607 if(md->dfork.is_a_file || md->rfork.is_a_file) {
608 dbuf_read_fourcc(c->infile, pos, &md->filetype, 4, 0x0);
609 de_dbg(c, "filetype: '%s'", md->filetype.id_dbgstr);
610 de_memcpy(md->advf->typecode, md->filetype.bytes, 4);
611 md->advf->has_typecode = 1;
612 pos += 4;
613 dbuf_read_fourcc(c->infile, pos, &md->creator, 4, 0x0);
614 de_dbg(c, "creator: '%s'", md->creator.id_dbgstr);
615 de_memcpy(md->advf->creatorcode, md->creator.bytes, 4);
616 md->advf->has_creatorcode = 1;
617 pos += 4;
619 md->finder_flags = (unsigned int)de_getu16be_p(&pos);
620 de_dbg(c, "finder flags: 0x%04x", md->finder_flags);
621 md->advf->finderflags = (u16)md->finder_flags;
622 md->advf->has_finderflags = 1;
624 else {
625 // Don't know if these fields mean anything for folders.
626 // Possibly they're the first 10 bytes of DInfo (Finder Info for
627 // folders), though that seems a little odd.
628 pos += 10;
631 n = de_getu32be_p(&pos);
632 de_mac_time_to_timestamp(n, &md->create_time);
633 de_timestamp_to_string(&md->create_time, timestamp_buf, sizeof(timestamp_buf), 0);
634 de_dbg(c, "create time: %"I64_FMT" (%s)", n, timestamp_buf);
635 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
637 n = de_getu32be_p(&pos);
638 de_mac_time_to_timestamp(n, &md->mod_time);
639 de_timestamp_to_string(&md->mod_time, timestamp_buf, sizeof(timestamp_buf), 0);
640 de_dbg(c, "mod time: %"I64_FMT" (%s)", n, timestamp_buf);
641 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
643 md->rfork.unc_len = de_getu32be_p(&pos);
644 md->dfork.unc_len = de_getu32be_p(&pos);
645 md->rfork.cmpr_len = de_getu32be_p(&pos);
646 md->dfork.cmpr_len = de_getu32be_p(&pos);
647 de_dbg(c, "rsrc uncmpr len: %"I64_FMT, md->rfork.unc_len);
648 de_dbg(c, "rsrc cmpr len: %"I64_FMT, md->rfork.cmpr_len);
649 de_dbg(c, "data uncmpr len: %"I64_FMT, md->dfork.unc_len);
650 de_dbg(c, "data cmpr len: %"I64_FMT, md->dfork.cmpr_len);
652 md->rfork.crc_reported = (u32)de_getu16be_p(&pos);
653 de_dbg(c, "rsrc crc (reported): 0x%04x", (UI)md->rfork.crc_reported);
654 md->dfork.crc_reported = (u32)de_getu16be_p(&pos);
655 de_dbg(c, "data crc (reported): 0x%04x", (UI)md->dfork.crc_reported);
657 pos += 6; // reserved, etc.
659 hdr_crc_reported = (u32)de_getu16be_p(&pos);
660 de_dbg(c, "header crc (reported): 0x%04x", (UI)hdr_crc_reported);
662 de_crcobj_reset(d->crco_hdr);
663 de_crcobj_addslice(d->crco_hdr, c->infile, pos1, 110);
664 hdr_crc_calc = de_crcobj_getval(d->crco_hdr);
665 de_dbg(c, "header crc (calculated): 0x%04x", (UI)hdr_crc_calc);
666 if(hdr_crc_reported != hdr_crc_calc) {
667 de_warn(c, "Bad header CRC (reported 0x%04x, calculated 0x%04x)", (UI)hdr_crc_reported,
668 (UI)hdr_crc_calc);
671 de_dbg_indent(c, -1);
673 de_dbg_indent_restore(c, saved_indent_level);
674 ucstring_destroy(descr);
675 return 1;
678 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
680 struct de_crcobj *crco = (struct de_crcobj*)userdata;
681 de_crcobj_addbuf(crco, buf, buf_len);
684 // Sets md->advf->*fork.fork_exists, according to whether we think we
685 // can decompress the fork.
686 static void do_pre_decompress_fork(deark *c, lctx *d, struct member_data *md,
687 struct fork_data *frk)
689 struct de_advfile_forkinfo *advfki;
690 int ok = 0;
692 if(frk->is_rsrc_fork) {
693 advfki = &md->advf->rsrcfork;
695 else {
696 advfki = &md->advf->mainfork;
699 if(!frk->is_a_file) {
700 goto done;
703 // TODO: What is the correct way to determine the nonexistence of a fork?
704 if(frk->unc_len==0 && frk->cmpr_len==0) {
705 goto done;
708 if(frk->cmpr_pos + frk->cmpr_len > c->infile->len) {
709 de_err(c, "Unexpected end of file");
710 goto done;
713 de_dbg(c, "cmpr method: %u (%s)", (unsigned int)frk->cmpr_meth,
714 frk->cmi?frk->cmi->name:"?");
716 if(!frk->cmi) {
717 de_err(c, "Unknown compression method: %u", (unsigned int)frk->cmpr_meth);
718 goto done;
721 if(!frk->cmi->decompressor) {
722 de_err(c, "%s[%s fork]: Unsupported compression method: %u (%s)",
723 ucstring_getpsz_d(md->full_fname), frk->forkname,
724 (unsigned int)frk->cmpr_meth, frk->cmi->name);
725 goto done;
728 if(frk->is_encrypted) {
729 de_err(c, "Encrypted files are not supported");
730 goto done;
733 ok = 1;
735 advfki->writelistener_cb = our_writelistener_cb;
736 if(frk->is_rsrc_fork) {
737 advfki->userdata_for_writelistener = (void*)d->crco_rfork;
738 de_crcobj_reset(d->crco_rfork);
740 else {
741 advfki->userdata_for_writelistener = (void*)d->crco_dfork;
742 de_crcobj_reset(d->crco_dfork);
745 done:
746 advfki->fork_exists = (ok)?1:0;
749 static void do_main_decompress_fork(deark *c, lctx *d, struct member_data *md,
750 struct fork_data *frk, dbuf *outf)
752 struct de_dfilter_in_params dcmpri;
753 struct de_dfilter_out_params dcmpro;
754 struct de_dfilter_results dres;
755 int saved_indent_level;
757 de_dbg_indent_save(c, &saved_indent_level);
758 if(!frk || !frk->cmi || !frk->cmi->decompressor) {
759 goto done;
762 de_dbg(c, "decompressing %s fork", frk->forkname);
763 de_dbg_indent(c, 1);
765 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
766 dcmpri.f = c->infile;
767 dcmpri.pos = frk->cmpr_pos;
768 dcmpri.len = frk->cmpr_len;
769 dcmpro.f = outf;
770 dcmpro.len_known = 1;
771 dcmpro.expected_len = frk->unc_len;
772 frk->cmi->decompressor(c, d, md, frk, &dcmpri, &dcmpro, &dres);
773 if(dres.errcode) {
774 de_err(c, "Decompression failed for file %s[%s fork]: %s", ucstring_getpsz_d(md->full_fname),
775 frk->forkname, de_dfilter_get_errmsg(c, &dres));
776 goto done;
779 done:
780 de_dbg_indent_restore(c, saved_indent_level);
783 static void do_post_decompress_fork(deark *c, lctx *d, struct member_data *md,
784 struct fork_data *frk)
786 u32 crc_calc;
788 if(frk->is_rsrc_fork) {
789 crc_calc = de_crcobj_getval(d->crco_rfork);
791 else {
792 crc_calc = de_crcobj_getval(d->crco_dfork);
794 de_dbg(c, "%s crc (calculated): 0x%04x", frk->forkname, (unsigned int)crc_calc);
795 if(crc_calc != frk->crc_reported) {
796 de_err(c, "CRC check failed for file %s[%s fork]", ucstring_getpsz_d(md->full_fname),
797 frk->forkname);
801 static void do_extract_folder(deark *c, lctx *d, struct member_data *md)
803 dbuf *outf = NULL;
804 de_finfo *fi = NULL;
806 if(!md->is_folder) goto done;
807 fi = de_finfo_create(c);
808 fi->is_directory = 1;
809 de_finfo_set_name_from_ucstring(c, fi, md->full_fname, DE_SNFLAG_FULLPATH);
810 fi->original_filename_flag = 1;
811 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
812 fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
813 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
814 done:
815 dbuf_close(outf);
816 de_finfo_destroy(c, fi);
819 struct advfudata {
820 lctx *d;
821 struct member_data *md;
824 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
825 struct de_advfile_cbparams *afp)
827 struct advfudata *u = (struct advfudata*)advf->userdata;
829 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
830 do_main_decompress_fork(c, u->d, u->md, &u->md->dfork, afp->outf);
832 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
833 do_main_decompress_fork(c, u->d, u->md, &u->md->rfork, afp->outf);
836 return 1;
839 // This is for files only. Use do_extract_folder() for folders.
840 static void do_extract_member_file(deark *c, lctx *d, struct member_data *md)
842 struct advfudata u;
844 ucstring_append_ucstring(md->advf->filename, md->full_fname);
845 md->advf->original_filename_flag = 1;
846 md->advf->snflags = DE_SNFLAG_FULLPATH;
847 de_advfile_set_orig_filename(md->advf, md->fname->sz, md->fname->sz_strlen);
849 // resource fork
850 if(md->rfork.cmpr_len>0) {
851 de_dbg(c, "rsrc fork data at %"I64_FMT", len=%"I64_FMT,
852 md->rfork.cmpr_pos, md->rfork.cmpr_len);
853 md->advf->rsrcfork.fork_len = md->rfork.unc_len;
854 de_dbg_indent(c, 1);
855 do_pre_decompress_fork(c, d, md, &md->rfork);
856 de_dbg_indent(c, -1);
859 // data fork
860 if(md->dfork.cmpr_len>0) {
861 de_dbg(c, "data fork data at %"I64_FMT", len=%"I64_FMT,
862 md->dfork.cmpr_pos, md->dfork.cmpr_len);
863 md->advf->mainfork.fork_len = md->dfork.unc_len;
864 de_dbg_indent(c, 1);
865 do_pre_decompress_fork(c, d, md, &md->dfork);
866 de_dbg_indent(c, -1);
869 u.d = d;
870 u.md = md;
871 md->advf->userdata = (void*)&u;
872 md->advf->writefork_cbfn = my_advfile_cbfn;
873 de_advfile_run(md->advf);
875 if(md->advf->rsrcfork.fork_exists) {
876 do_post_decompress_fork(c, d, md, &md->rfork);
878 if(md->advf->mainfork.fork_exists) {
879 do_post_decompress_fork(c, d, md, &md->dfork);
883 // Returns:
884 // 0 if the member could not be parsed sufficiently to determine its size
885 // 1 normally
886 static int do_member(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
888 i64 pos = pos1;
889 struct member_data *md = NULL;
890 int saved_indent_level;
891 int retval = 0;
892 int curpath_need_pop = 0;
894 *bytes_consumed = 0;
895 de_dbg_indent_save(c, &saved_indent_level);
897 md = de_malloc(c, sizeof(struct member_data));
898 md->rfork.is_rsrc_fork = 1;
899 md->dfork.forkname = "data";
900 md->rfork.forkname = "resource";
902 de_dbg(c, "member at %"I64_FMT, pos1);
903 de_dbg_indent(c, 1);
905 md->advf = de_advfile_create(c);
907 if(!do_member_header(c, d, md, pos)) goto done;
909 *bytes_consumed = 112;
911 if(md->rfork.cmpr_meth_etc==32 || md->dfork.cmpr_meth_etc==32) {
912 md->is_folder = 1;
913 md->rfork.cmpr_len = 0;
914 md->dfork.cmpr_len = 0;
916 else if(md->rfork.cmpr_meth_etc==33 || md->dfork.cmpr_meth_etc==33) {
917 // end of folder marker
918 if(d->subdir_level>0) d->subdir_level--;
919 de_strarray_pop(d->curpath);
920 retval = 1;
921 goto done;
923 else if(md->rfork.cmpr_meth_etc>33 || md->dfork.cmpr_meth_etc>33) {
924 de_err(c, "Unknown member type. Cannot continue.");
925 goto done;
928 *bytes_consumed += md->rfork.cmpr_len + md->dfork.cmpr_len;
929 retval = 1;
931 pos += 112;
933 md->full_fname = ucstring_create(c);
934 de_strarray_push(d->curpath, md->fname->str);
935 curpath_need_pop = 1;
936 de_strarray_make_path(d->curpath, md->full_fname, DE_MPFLAG_NOTRAILINGSLASH);
937 de_dbg(c, "full name: \"%s\"", ucstring_getpsz_d(md->full_fname));
939 if(md->is_folder) {
940 if(d->subdir_level >= MAX_NESTING_LEVEL) {
941 de_err(c, "Directories nested too deeply");
942 retval = 0;
943 goto done;
945 d->subdir_level++;
946 curpath_need_pop = 0;
947 do_extract_folder(c, d, md);
948 goto done;
951 md->rfork.cmpr_pos = pos;
952 pos += md->rfork.cmpr_len;
953 md->dfork.cmpr_pos = pos;
954 //pos += md->dfork.cmpr_len;
956 do_extract_member_file(c, d, md);
958 done:
959 if(curpath_need_pop) {
960 de_strarray_pop(d->curpath);
962 if(md) {
963 de_destroy_stringreaderdata(c, md->fname);
964 ucstring_destroy(md->full_fname);
965 de_advfile_destroy(md->advf);
966 de_free(c, md);
968 de_dbg_indent_restore(c, saved_indent_level);
969 return retval;
972 static int do_master_header(deark *c, lctx *d, i64 pos1)
974 i64 pos = pos1;
976 de_dbg(c, "master header at %d", (int)pos1);
977 de_dbg_indent(c, 1);
978 pos += 4; // signature
980 d->nmembers = (int)de_getu16be_p(&pos);
981 de_dbg(c, "number of members: %d", d->nmembers);
983 d->archive_size = de_getu32be_p(&pos);
984 de_dbg(c, "reported archive file size: %"I64_FMT, d->archive_size);
986 pos += 4; // expected to be "rLau"
988 d->ver = de_getbyte_p(&pos);
989 de_dbg(c, "version: %u", (unsigned int)d->ver);
991 de_dbg_indent(c, -1);
992 return 1;
995 // If nmembers==-1, number of members is unknown
996 static void do_sequence_of_members(deark *c, lctx *d, i64 pos1)
998 int root_member_count = 0;
999 i64 pos = pos1;
1001 while(1) {
1002 int ret;
1003 int is_root_member;
1004 i64 bytes_consumed = 0;
1006 if(pos+112 > c->infile->len) {
1007 if(d->subdir_level==0 && root_member_count!=d->nmembers) {
1008 de_warn(c, "Expected %d top-level member file(s), found %d",
1009 d->nmembers, root_member_count);
1011 break;
1014 // The "number of files" field appears to be untrustworthy, or its meaning
1015 // is not correctly understood.
1016 // FWIW, The Unarchiver also ignores it.
1017 //if((d->subdir_level==0) && (root_member_count >= d->nmembers)) break;
1019 is_root_member = (d->subdir_level==0);
1020 ret = do_member(c, d, pos, &bytes_consumed);
1021 if(ret==0) break;
1022 if(bytes_consumed<1) break;
1023 pos += bytes_consumed;
1024 if(is_root_member) root_member_count++;
1028 static void do_oldfmt(deark *c, lctx *d)
1030 i64 pos = 0;
1032 if(!do_master_header(c, d, pos)) goto done;
1033 pos += 22;
1034 do_sequence_of_members(c, d, pos);
1036 done:
1040 static void do_v5_comment(deark *c, lctx *d, struct member_data *md, i64 pos, i64 len)
1042 de_ucstring *s = NULL;
1044 s = ucstring_create(c);
1045 dbuf_read_to_ucstring_n(c->infile, pos, len, 4096, s, 0, d->input_encoding);
1046 de_dbg(c, "file comment: \"%s\"", ucstring_getpsz_d(s));
1047 ucstring_destroy(s);
1050 static void do_v5_list_of_members(deark *c, lctx *d, i64 first_member_pos,
1051 i64 num_members_expected);
1053 static int do_v5_member_header(deark *c, lctx *d, struct member_data *md, i64 pos1)
1055 i64 pos = pos1;
1056 i64 fnlen, fnlen_sanitized;
1057 i64 n;
1058 i64 hdrsize;
1059 i64 hdr_endpos;
1060 u32 hdr_crc_reported;
1061 u32 hdr_crc_calc;
1062 u8 flags;
1063 de_ucstring *descr = NULL;
1064 int saved_indent_level;
1065 int retval = 0;
1066 char timestamp_buf[64];
1068 de_dbg_indent_save(c, &saved_indent_level);
1069 if(pos1==0) goto done;
1071 de_dbg(c, "member header at %"I64_FMT, pos1);
1072 de_dbg_indent(c, 1);
1074 n = de_getu32be_p(&pos);
1075 if(n!=0xa5a5a5a5) {
1076 de_err(c, "Expected member not found at %"I64_FMT, pos1);
1077 goto done;
1080 descr = ucstring_create(c);
1082 pos++; // ver?
1083 pos++; // ?
1084 hdrsize = de_getu16be_p(&pos);
1085 hdr_endpos = pos1 + hdrsize;
1086 de_dbg(c, "base header at %"I64_FMT", len=%"I64_FMT, pos1, hdrsize);
1087 de_dbg_indent(c, 1);
1088 if(hdrsize<48 || hdrsize>2000) {
1089 de_err(c, "Bad header");
1090 goto done;
1093 // calculate actual header crc
1094 de_crcobj_reset(d->crco_hdr);
1095 de_crcobj_addslice(d->crco_hdr, c->infile, pos1, 32);
1096 de_crcobj_addzeroes(d->crco_hdr, 2);
1097 de_crcobj_addslice(d->crco_hdr, c->infile, pos1+34, hdrsize-34);
1098 hdr_crc_calc = de_crcobj_getval(d->crco_hdr);
1100 pos++; // ?
1101 flags = de_getbyte_p(&pos);
1102 ucstring_empty(descr);
1103 if(flags & 0x40) {
1104 md->is_folder = 1;
1105 ucstring_append_flags_item(descr, "folder");
1107 if(flags & 0x20) {
1108 md->dfork.is_encrypted = 1;
1109 md->rfork.is_encrypted = 1;
1110 ucstring_append_flags_item(descr, "encrypted");
1112 de_dbg(c, "flags: 0x%02x (%s)", (UI)flags, ucstring_getpsz_d(descr));
1114 n = de_getu32be_p(&pos);
1115 de_mac_time_to_timestamp(n, &md->create_time);
1116 de_timestamp_to_string(&md->create_time, timestamp_buf, sizeof(timestamp_buf), 0);
1117 de_dbg(c, "create time: %"I64_FMT" (%s)", n, timestamp_buf);
1118 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_time;
1120 n = de_getu32be_p(&pos);
1121 de_mac_time_to_timestamp(n, &md->mod_time);
1122 de_timestamp_to_string(&md->mod_time, timestamp_buf, sizeof(timestamp_buf), 0);
1123 de_dbg(c, "mod time: %"I64_FMT" (%s)", n, timestamp_buf);
1124 md->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
1126 n = de_getu32be_p(&pos);
1127 de_dbg(c, "prev: %"I64_FMT, n);
1128 md->v5_next_member_pos = de_getu32be_p(&pos);
1129 de_dbg(c, "next: %"I64_FMT, md->v5_next_member_pos);
1130 retval = 1;
1132 // at offset 26
1133 n = de_getu32be_p(&pos);
1134 de_dbg(c, "parent: %"I64_FMT, n);
1136 fnlen = de_getu16be_p(&pos);
1137 de_dbg(c, "filename len: %u", (UI)fnlen);
1138 fnlen_sanitized = de_min_int(fnlen, 1024);
1140 hdr_crc_reported = (u32)de_getu16be_p(&pos);
1141 de_dbg(c, "header crc (reported): 0x%04x", (UI)hdr_crc_reported);
1142 de_dbg(c, "header crc (calculated): 0x%04x", (UI)hdr_crc_calc);
1143 if(hdr_crc_reported != hdr_crc_calc) {
1144 de_warn(c, "Bad header CRC (reported 0x%04x, calculated 0x%04x)", (UI)hdr_crc_reported,
1145 (UI)hdr_crc_calc);
1148 // at offset 34
1149 if(md->is_folder) {
1150 md->v5_first_entry_pos = de_getu32be_p(&pos);
1151 de_dbg(c, "offset of first entry: %"I64_FMT, md->v5_first_entry_pos);
1153 n = de_getu32be_p(&pos);
1154 de_dbg(c, "folder size: %"I64_FMT, n);
1156 pos += 2; // data fork old crc16
1157 pos += 2; // ?
1159 md->v5_num_files_in_folder = de_getu16be_p(&pos);
1160 de_dbg(c, "number of files: %"I64_FMT, md->v5_num_files_in_folder);
1162 else {
1163 md->dfork.unc_len = de_getu32be_p(&pos);
1164 de_dbg(c, "data fork uncmpr len: %"I64_FMT, md->dfork.unc_len);
1165 // at offset 38
1166 md->dfork.cmpr_len = de_getu32be_p(&pos);
1167 de_dbg(c, "data fork cmpr len: %"I64_FMT, md->dfork.cmpr_len);
1169 md->dfork.crc_reported = (u32)de_getu16be_p(&pos);
1170 de_dbg(c, "data fork old crc (reported): 0x%04x", (UI)md->dfork.crc_reported);
1172 pos += 2; // ?
1174 md->dfork.cmpr_meth_etc = de_getbyte_p(&pos);
1175 ucstring_empty(descr);
1176 decode_cmpr_meth(c, d, &md->dfork, descr);
1177 de_dbg(c, "data fork cmpr meth: %u (%s)", (unsigned int)md->dfork.cmpr_meth_etc,
1178 ucstring_getpsz(descr));
1180 // at offset 47
1181 n = (i64)de_getbyte_p(&pos);
1182 de_dbg(c, "data fork passwd len: %u", (UI)n);
1183 pos += n;
1186 md->fname = dbuf_read_string(c->infile, pos, fnlen_sanitized, fnlen_sanitized, 0, d->input_encoding);
1187 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fname->str));
1188 de_strarray_push(d->curpath, md->fname->str);
1189 md->v5_need_strarray_pop = 1;
1190 pos += fnlen;
1192 if(hdr_endpos-pos >= 5) {
1193 n = de_getu16be_p(&pos); // comment len
1194 pos += 2;
1195 if(pos + n <= hdr_endpos) {
1196 do_v5_comment(c, d, md, pos, n);
1200 de_dbg_indent(c, -1); // end of first part of header
1202 pos = hdr_endpos;
1204 if(!md->is_folder) {
1205 UI flags2;
1207 flags2 = (UI)de_getu16be_p(&pos);
1208 de_dbg(c, "flags2: 0x%04x", flags2);
1209 pos += 2; // ?
1211 dbuf_read_fourcc(c->infile, pos, &md->filetype, 4, 0x0);
1212 de_dbg(c, "filetype: '%s'", md->filetype.id_dbgstr);
1213 de_memcpy(md->advf->typecode, md->filetype.bytes, 4);
1214 md->advf->has_typecode = 1;
1215 pos += 4;
1216 dbuf_read_fourcc(c->infile, pos, &md->creator, 4, 0x0);
1217 de_dbg(c, "creator: '%s'", md->creator.id_dbgstr);
1218 de_memcpy(md->advf->creatorcode, md->creator.bytes, 4);
1219 md->advf->has_creatorcode = 1;
1220 pos += 4;
1222 md->finder_flags = (unsigned int)de_getu16be_p(&pos);
1223 de_dbg(c, "finder flags: 0x%04x", md->finder_flags);
1224 md->advf->finderflags = (u16)md->finder_flags;
1225 md->advf->has_finderflags = 1;
1227 pos += 22; // ?
1229 if(flags2 & 0x0001) {
1230 md->rfork.unc_len = de_getu32be_p(&pos);
1231 de_dbg(c, "rsrc fork uncmpr len: %"I64_FMT, md->rfork.unc_len);
1232 md->rfork.cmpr_len = de_getu32be_p(&pos);
1233 de_dbg(c, "rsrc fork cmpr len: %"I64_FMT, md->rfork.cmpr_len);
1235 md->rfork.crc_reported = (u32)de_getu16be_p(&pos);
1236 de_dbg(c, "rsrc fork old crc (reported): 0x%04x", (UI)md->rfork.crc_reported);
1238 pos += 2; // ?
1240 md->rfork.cmpr_meth_etc = de_getbyte_p(&pos);
1241 ucstring_empty(descr);
1242 decode_cmpr_meth(c, d, &md->rfork, descr);
1243 de_dbg(c, "rsrc fork cmpr meth: %u (%s)", (unsigned int)md->rfork.cmpr_meth_etc,
1244 ucstring_getpsz(descr));
1246 n = (i64)de_getbyte_p(&pos);
1247 de_dbg(c, "rsrc fork passwd len: %u", (UI)n);
1248 pos += n;
1252 if(!md->is_folder) {
1253 md->rfork.cmpr_pos = pos;
1254 pos += md->rfork.cmpr_len;
1256 md->dfork.cmpr_pos = pos;
1257 pos += md->dfork.cmpr_len;
1260 done:
1261 de_dbg_indent_restore(c, saved_indent_level);
1262 ucstring_destroy(descr);
1263 return retval;
1266 static int do_v5_member(deark *c, lctx *d, i64 member_idx,
1267 i64 pos1, i64 *pnext_member_pos)
1269 struct member_data *md = NULL;
1270 int saved_indent_level;
1271 int retval = 0;
1273 de_dbg_indent_save(c, &saved_indent_level);
1275 if(pos1==0) goto done;
1277 if(!de_inthashtable_add_item(c, d->v5_offsets_seen, pos1, NULL)) {
1278 de_err(c, "Loop detected");
1279 goto done;
1282 md = de_malloc(c, sizeof(struct member_data));
1283 md->rfork.is_rsrc_fork = 1;
1284 md->dfork.forkname = "data";
1285 md->rfork.forkname = "resource";
1287 de_dbg(c, "member[%d] at %"I64_FMT, (int)member_idx, pos1);
1288 de_dbg_indent(c, 1);
1290 if(pos1<0 || pos1>=c->infile->len) {
1291 de_err(c, "Bad file offset");
1292 goto done;
1295 md->advf = de_advfile_create(c);
1297 if(!do_v5_member_header(c, d, md, pos1)) goto done;
1298 *pnext_member_pos = md->v5_next_member_pos;
1300 if(!md->full_fname) {
1301 md->full_fname = ucstring_create(c);
1302 de_strarray_make_path(d->curpath, md->full_fname, DE_MPFLAG_NOTRAILINGSLASH);
1304 de_dbg(c, "full name: \"%s\"", ucstring_getpsz_d(md->full_fname));
1306 if(md->is_folder) {
1307 do_extract_folder(c, d, md);
1309 if(d->subdir_level >= MAX_NESTING_LEVEL) {
1310 de_err(c, "Directories nested too deeply");
1311 retval = 0;
1312 goto done;
1314 de_dbg(c, "[folder contents]");
1315 de_dbg_indent(c, 1);
1316 d->subdir_level++;
1317 do_v5_list_of_members(c, d, md->v5_first_entry_pos, md->v5_num_files_in_folder);
1318 d->subdir_level--;
1319 de_dbg_indent(c, -1);
1321 else {
1322 do_extract_member_file(c, d, md);
1325 retval = 1;
1327 done:
1328 if(md) {
1329 if(md->v5_need_strarray_pop) {
1330 de_strarray_pop(d->curpath);
1332 de_destroy_stringreaderdata(c, md->fname);
1333 ucstring_destroy(md->full_fname);
1334 de_advfile_destroy(md->advf);
1335 de_free(c, md);
1337 de_dbg_indent_restore(c, saved_indent_level);
1338 return retval;
1341 static void do_v5_list_of_members(deark *c, lctx *d, i64 first_member_pos,
1342 i64 num_members_expected)
1344 i64 member_count = 0;
1345 i64 pos = first_member_pos;
1347 while(1) {
1348 int ret;
1349 i64 next_pos = 0;
1351 if(pos==0) break;
1352 if(member_count >= num_members_expected) break;
1354 ret = do_v5_member(c, d, member_count, pos, &next_pos);
1355 if(!ret) break;
1356 if(next_pos==0) break;
1358 pos = next_pos;
1359 member_count++;
1363 static int do_v5_archivehdr(deark *c, lctx *d, i64 pos1)
1365 i64 n;
1366 i64 pos = pos1;
1367 int retval = 0;
1369 de_dbg(c, "archive header at %"I64_FMT, pos1);
1370 de_dbg_indent(c, 1);
1371 pos += 80; // text
1372 pos += 2; // ?
1373 n = de_getbyte_p(&pos);
1374 de_dbg(c, "archive version: %u", (UI)n);
1375 d->v5_archive_flags = de_getbyte_p(&pos);
1376 de_dbg(c, "archive flags: 0x%02x", (UI)d->v5_archive_flags);
1378 d->archive_size = de_getu32be_p(&pos);
1379 de_dbg(c, "reported archive file size: %"I64_FMT, d->archive_size);
1381 pos += 4; // ?
1383 d->nmembers = (int)de_getu16be_p(&pos);
1384 de_dbg(c, "number of root members: %d", d->nmembers);
1386 d->v5_first_entry_pos = de_getu32be_p(&pos);
1387 de_dbg(c, "pos of first root member: %"I64_FMT, d->v5_first_entry_pos);
1389 n = de_getu16be_p(&pos);
1390 de_dbg(c, "archive crc (reported): 0x%04x", (UI)n);
1392 //if(d->v5_archive_flags & 0x10) pos += 14; // reserved
1393 // TODO: Archive comment
1394 retval = 1;
1396 de_dbg_indent(c, -1);
1397 return retval;
1400 static void do_v5(deark *c, lctx *d)
1402 d->v5_offsets_seen = de_inthashtable_create(c);
1403 if(!do_v5_archivehdr(c, d, 0)) goto done;
1404 do_v5_list_of_members(c, d, d->v5_first_entry_pos, d->nmembers);
1405 done:
1409 static void de_run_stuffit(deark *c, de_module_params *mparams)
1411 lctx *d = NULL;
1413 d = de_malloc(c, sizeof(lctx));
1415 if(!dbuf_memcmp(c->infile, 0, "SIT!", 4)) {
1416 d->file_fmt = 1;
1418 else if(!dbuf_memcmp(c->infile, 0, "StuffIt ", 8)) {
1419 d->file_fmt = 2;
1421 else {
1422 de_err(c, "Not a StuffIt file, or unknown version.");
1423 goto done;
1426 if(d->file_fmt==2) {
1427 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_UTF8);
1429 else {
1430 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
1433 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1434 d->crco_rfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1435 d->crco_dfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1436 d->crco_hdr = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1438 if(d->file_fmt==1) {
1439 de_declare_fmt(c, "StuffIt, old format");
1440 do_oldfmt(c, d);
1442 else if(d->file_fmt==2) {
1443 de_declare_fmt(c, "StuffIt, v5 format");
1444 do_v5(c, d);
1446 else {
1447 de_err(c, "This version of StuffIt format is not supported.");
1450 done:
1451 if(d) {
1452 de_crcobj_destroy(d->crco_rfork);
1453 de_crcobj_destroy(d->crco_dfork);
1454 de_crcobj_destroy(d->crco_hdr);
1455 de_strarray_destroy(d->curpath);
1456 if(d->v5_offsets_seen) de_inthashtable_destroy(c, d->v5_offsets_seen);
1457 de_free(c, d);
1461 static int de_identify_stuffit(deark *c)
1463 u8 buf[9];
1465 de_read(buf, 0, sizeof(buf));
1466 if(!de_memcmp(buf, "SIT!", 4)) {
1467 return 100;
1469 if(!de_memcmp(buf, "StuffIt (", 9)) {
1470 if(de_getbyte(82)==0x05) return 100;
1472 return 0;
1475 void de_module_stuffit(deark *c, struct deark_module_info *mi)
1477 mi->id = "stuffit";
1478 mi->desc = "StuffIt archive";
1479 mi->run_fn = de_run_stuffit;
1480 mi->identify_fn = de_identify_stuffit;