fnt: Improved error handling, etc.
[deark.git] / modules / arc.c
blob375baf9c566e7da453733391890d8e1e493b5e8f
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // ARC compressed archive
6 // Spark
7 // ArcMac
9 #include <deark-config.h>
10 #include <deark-private.h>
11 #include <deark-fmtutil.h>
12 DE_DECLARE_MODULE(de_module_arc);
13 DE_DECLARE_MODULE(de_module_spark);
14 DE_DECLARE_MODULE(de_module_arcmac);
16 #define FMT_ARC 1
17 #define FMT_SPARK 2
18 #define FMT_ARCMAC 3
19 #define FMT_PAK16SFX 4
21 #define MAX_NESTING_LEVEL 24
23 struct localctx_struct;
24 typedef struct localctx_struct lctx;
25 struct member_data;
26 typedef void (*decompressor_fn)(deark *c, lctx *d, struct member_data *md,
27 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
28 struct de_dfilter_results *dres);
30 struct cmpr_meth_info {
31 u8 cmpr_meth;
32 unsigned int flags;
33 const char *name;
34 decompressor_fn decompressor;
37 struct persistent_member_data {
38 de_ucstring *comment;
39 de_ucstring *path;
42 struct member_data {
43 deark *c;
44 lctx *d;
46 u8 cmpr_meth;
47 u8 cmpr_meth_masked;
48 u8 has_spark_attribs;
49 const struct cmpr_meth_info *cmi;
50 const char *cmpr_meth_name;
51 i64 orig_size;
52 i64 cmpr_data_pos;
53 i64 cmpr_size;
54 u32 crc_reported;
55 u32 crc_calc;
56 de_ucstring *fn;
57 struct de_timestamp arc_timestamp;
58 struct de_riscos_file_attrs rfa;
59 int is_dir;
61 i64 arcmac_dforklen;
62 i64 arcmac_rforklen;
63 struct de_stringreaderdata *arcmac_fn;
64 struct de_advfile *arcmac_advf;
67 struct localctx_struct {
68 int fmt;
69 const char *fmtname;
70 de_ext_encoding input_encoding_for_filenames;
71 de_ext_encoding input_encoding_for_comments;
72 de_ext_encoding input_encoding_for_arcmac_fn;
73 u8 method10; // 1=trimmed, 2=crushed
74 int recurse_subdirs;
75 u8 sig_byte;
76 u8 prescan_found_eoa;
77 u8 has_trailer_data;
78 u8 has_pak_trailer;
79 u8 has_arc_extensions;
80 i64 prescan_pos_after_eoa;
81 i64 num_top_level_members; // Not including EOA marker
82 i64 end_of_known_data;
83 struct de_crcobj *crco;
84 struct de_strarray *curpath;
85 struct persistent_member_data *persistent_md; // optional array[num_top_level_members]
86 dbuf *pak16sfx_outf;
89 struct member_parser_data {
90 int nesting_level;
91 int member_idx;
92 u8 magic;
93 u8 cmpr_meth, cmpr_meth_masked;
94 i64 member_pos;
95 i64 member_len;
96 i64 cmpr_data_pos;
97 i64 cmpr_data_len;
100 typedef void (*member_cb_type)(deark *c, lctx *d, struct member_parser_data *mpd);
102 // Look for any known ARC cmpr meth, including end-of-archive marker.
103 // Not for Spark format
104 static int is_known_cmpr_meth(u8 m)
106 if(m<=11 || m==20 || m==21 || m==22 || m==30) {
107 return 1;
109 return 0;
112 // Is this a plausible cmpr meth for the first archive member
113 // (including end-of-archive marker)?
114 // (Not for Spark format.)
115 static int is_known_first_cmpr_meth(u8 m)
117 if((m<=11) || m==20) {
118 return 1;
120 return 0;
123 // For ARC, not for Spark or ArcMac.
124 // It's assumed that the byte at pos1 is known to be 0x1a.
125 // Will validate the first member, and (unless it is the end marker,
126 // or strictness==0), the first two bytes of the second member.
127 static int is_valid_file_at(dbuf *f, i64 pos1, i64 endpos, UI strictness)
129 u8 marker;
130 u8 cmpr_meth;
131 i64 cmpr_len;
132 i64 pos = pos1;
134 pos++;
136 cmpr_meth = dbuf_getbyte_p(f, &pos);
137 if(!is_known_first_cmpr_meth(cmpr_meth)) {
138 return 0;
140 if(cmpr_meth==0) return 1; // End marker
142 if(cmpr_meth<20 && strictness>=2) {
143 u8 f1;
145 // test 1st char of filename
146 f1 = dbuf_getbyte(f, pos);
147 if(f1<0x20) return 0;
149 pos += 13;
151 if(strictness==0) return 1;
153 cmpr_len = dbuf_getu32le_p(f, &pos);
154 pos += 6;
155 if(cmpr_meth!=1) pos += 4;
156 pos += cmpr_len;
157 if(pos==endpos) return 1;
158 if(pos>endpos-2) return 0;
160 marker = dbuf_getbyte_p(f, &pos);
161 if(marker!=0x1a) return 0;
162 cmpr_meth = dbuf_getbyte_p(f, &pos);
163 if(!is_known_cmpr_meth(cmpr_meth)) return 0;
164 return 1;
167 static void mark_end_of_known_data(lctx *d, i64 pos)
169 if(pos > d->end_of_known_data) {
170 d->end_of_known_data = pos;
174 // Calls the supplied callback function for each ARC member found.
175 // Also called for end-of-archive/directory markers.
176 // Also called if unexpected data is encountered (with mpd->magic != 0x1a).
177 static void parse_member_sequence(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level,
178 member_cb_type member_cbfn)
180 struct member_parser_data *mpd = NULL;
181 int member_idx = 0;
182 i64 pos = pos1;
184 mpd = de_malloc(c, sizeof(struct member_parser_data));
186 while(1) {
187 if(pos+2 > pos1+len) break;
188 de_zeromem(mpd, sizeof(struct member_parser_data));
189 mpd->nesting_level = nesting_level;
190 mpd->member_idx = member_idx++;
191 mpd->member_pos = pos;
193 mpd->magic = de_getbyte_p(&pos);
195 if(mpd->magic==0xfe && d->fmt==FMT_PAK16SFX) {
196 // Some hacks here. We're not setting all the fields correctly,
197 // just the ones we need.
198 mpd->cmpr_data_len = (i64)de_getbyte_p(&pos);
199 mpd->cmpr_data_pos = pos;
200 mpd->member_len = 2+mpd->cmpr_data_len;
201 member_cbfn(c, d, mpd);
202 pos = mpd->member_pos + mpd->member_len;
203 continue;
205 else if(mpd->magic!=d->sig_byte) {
206 mpd->member_len = 1;
207 mpd->cmpr_data_pos = mpd->member_pos; // dummy value
208 member_cbfn(c, d, mpd);
209 break;
211 mpd->cmpr_meth = de_getbyte_p(&pos);
212 mpd->cmpr_meth_masked = mpd->cmpr_meth & 0x7f;
214 if(mpd->cmpr_meth_masked==0x00 || mpd->cmpr_meth==0x1f) { // end of archive/dir
215 mpd->member_len = 2;
216 mpd->cmpr_data_pos = mpd->member_pos+2; // dummy value
217 member_cbfn(c, d, mpd);
218 break;
221 if(d->fmt==FMT_ARCMAC) {
222 u8 magic2;
224 // TODO: Check for EOF?
225 pos += 57; // Skip remainder of 59-byte ArcMac preheader
226 magic2 = de_getbyte_p(&pos);
227 if(magic2 != 0x1a) { // Error
228 // TODO: Call member_cbfn()?
229 break;
232 // Read the "real" compression method field (should be the same?).
233 mpd->cmpr_meth = de_getbyte_p(&pos);
234 mpd->cmpr_meth_masked = mpd->cmpr_meth & 0x7f;
237 pos += 13;
238 mpd->cmpr_data_len = de_getu32le_p(&pos);
239 pos += 2+2+2;
240 if(mpd->cmpr_meth_masked!=0x01) {
241 pos += 4; // original size
243 if(mpd->cmpr_meth & 0x80) {
244 pos += 12; // Spark-specific data
247 mpd->cmpr_data_pos = pos;
248 mpd->member_len = mpd->cmpr_data_pos + mpd->cmpr_data_len - mpd->member_pos;
249 member_cbfn(c, d, mpd);
251 pos = mpd->member_pos + mpd->member_len;
254 de_free(c, mpd);
257 static void decompressor_stored(deark *c, lctx *d, struct member_data *md,
258 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
259 struct de_dfilter_results *dres)
261 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
264 static void decompressor_spark_compressed(deark *c, lctx *d, struct member_data *md,
265 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
266 struct de_dfilter_results *dres)
268 struct de_lzw_params delzwp;
270 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
271 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
272 delzwp.flags |= DE_LZWFLAG_HAS1BYTEHEADER;
273 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
276 static void decompressor_squashed(deark *c, lctx *d, struct member_data *md,
277 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
278 struct de_dfilter_results *dres)
280 struct de_lzw_params delzwp;
282 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
283 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
284 delzwp.max_code_size = 13;
285 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
288 static void decompressor_packed(deark *c, lctx *d, struct member_data *md,
289 struct de_dfilter_in_params *dcmpri,
290 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
292 fmtutil_decompress_rle90_ex(c, dcmpri, dcmpro, dres, 0);
295 static void decompressor_squeezed(deark *c, lctx *d, struct member_data *md,
296 struct de_dfilter_in_params *dcmpri,
297 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
299 struct de_dcmpr_two_layer_params tlp;
301 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
302 tlp.codec1_type1 = fmtutil_huff_squeeze_codectype1;
303 tlp.codec2 = dfilter_rle90_codec;
304 tlp.dcmpri = dcmpri;
305 tlp.dcmpro = dcmpro;
306 tlp.dres = dres;
307 de_dfilter_decompress_two_layer(c, &tlp);
310 static void decompressor_crunched5(deark *c, lctx *d, struct member_data *md,
311 struct de_dfilter_in_params *dcmpri,
312 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
314 struct de_lzw_params delzwp;
316 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
317 delzwp.fmt = DE_LZWFMT_ARC5;
318 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
321 static void decompressor_crunched6(deark *c, lctx *d, struct member_data *md,
322 struct de_dfilter_in_params *dcmpri,
323 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
325 struct de_dcmpr_two_layer_params tlp;
326 struct de_lzw_params delzwp;
328 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
329 delzwp.fmt = DE_LZWFMT_ARC5;
331 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
332 tlp.codec1_pushable = dfilter_lzw_codec;
333 tlp.codec1_private_params = (void*)&delzwp;
335 tlp.codec2 = dfilter_rle90_codec;
337 tlp.dcmpri = dcmpri;
338 tlp.dcmpro = dcmpro;
339 tlp.dres = dres;
341 de_dfilter_decompress_two_layer(c, &tlp);
344 static void decompressor_crunched8(deark *c, lctx *d, struct member_data *md,
345 struct de_dfilter_in_params *dcmpri,
346 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
348 struct de_dcmpr_two_layer_params tlp;
349 struct de_lzw_params delzwp;
351 // "Crunched" means "packed", then "compressed".
352 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
354 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
355 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
356 delzwp.flags |= DE_LZWFLAG_HAS1BYTEHEADER;
358 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
359 tlp.codec1_pushable = dfilter_lzw_codec;
360 tlp.codec1_private_params = (void*)&delzwp;
362 tlp.codec2 = dfilter_rle90_codec;
364 tlp.dcmpri = dcmpri;
365 tlp.dcmpro = dcmpro;
366 tlp.dres = dres;
368 de_dfilter_decompress_two_layer(c, &tlp);
371 static void decompressor_trimmed(deark *c, lctx *d, struct member_data *md,
372 struct de_dfilter_in_params *dcmpri,
373 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
375 struct de_dcmpr_two_layer_params tlp;
376 struct de_lh1_params lh1p;
378 de_zeromem(&lh1p, sizeof(struct de_lh1_params));
379 lh1p.is_arc_trimmed = 1;
380 lh1p.history_fill_val = 0x00;
382 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
383 tlp.codec1_pushable = dfilter_lh1_codec;
384 tlp.codec1_private_params = (void*)&lh1p;
386 tlp.codec2 = dfilter_rle90_codec;
388 tlp.dcmpri = dcmpri;
389 tlp.dcmpro = dcmpro;
390 tlp.dres = dres;
392 de_dfilter_decompress_two_layer(c, &tlp);
395 static void decompressor_distilled(deark *c, lctx *d, struct member_data *md,
396 struct de_dfilter_in_params *dcmpri,
397 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
399 fmtutil_distilled_codectype1(c, dcmpri, dcmpro, dres, NULL);
402 // Flags:
403 // 0x01 = valid in ARC
404 // 0x02 = valid in Spark
405 // 0x80 = assume high bit of cmpr_meth is set for Spark format
406 // 0x100, 0x200 = special
407 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
408 { 0x00, 0x03, "end of archive marker", NULL },
409 { 0x01, 0x83, "stored (old format)", decompressor_stored },
410 { 0x02, 0x83, "stored", decompressor_stored },
411 { 0x03, 0x83, "packed (RLE)", decompressor_packed },
412 { 0x04, 0x83, "squeezed (RLE + Huffman)", decompressor_squeezed },
413 { 0x05, 0x83, "crunched5 (static LZW)", decompressor_crunched5 },
414 { 0x06, 0x83, "crunched6 (RLE + static LZW)", decompressor_crunched6 },
415 { 0x07, 0x83, "crunched7 (ARC 4.6)", NULL },
416 { 0x08, 0x83, "crunched8 (RLE + dynamic LZW)", decompressor_crunched8 },
417 { 0x09, 0x83, "squashed (dynamic LZW)", decompressor_squashed },
418 { 10, 0x101, "trimmed", decompressor_trimmed },
419 { 10, 0x201, "crushed", NULL },
420 { 10, 0x01, "trimmed or crushed", NULL },
421 { 0x0b, 0x01, "distilled", decompressor_distilled },
422 { 20, 0x01, "archive info", NULL },
423 { 21, 0x01, "extended file info", NULL },
424 { 22, 0x01, "OS info", NULL },
425 { 0x1e, 0x01, "subdir", NULL },
426 { 0x1f, 0x01, "end of subdir marker", NULL },
427 { 0x80, 0x02, "end of archive marker", NULL },
428 { 0xff, 0x02, "compressed", decompressor_spark_compressed }
431 static const struct cmpr_meth_info *get_cmpr_meth_info(lctx *d, u8 cmpr_meth)
433 size_t k;
434 const struct cmpr_meth_info *p;
436 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
437 u8 meth_adjusted;
439 p = &cmpr_meth_info_arr[k];
440 if(d->fmt==FMT_ARC && !(p->flags & 0x1)) continue;
441 if(d->fmt==FMT_SPARK && !(p->flags & 0x2)) continue;
442 meth_adjusted = p->cmpr_meth;
443 if(d->fmt==FMT_SPARK && (p->flags & 0x80)) {
444 meth_adjusted |= 0x80;
446 if(meth_adjusted != cmpr_meth) continue;
448 if(p->cmpr_meth==10) {
449 // Method 10 has a conflict -- it could be either Trimmed (ARC7)
450 // or Crushed (PAK).
451 if(p->flags&0x100) { // Skip this unless we're sure it's Trimmed
452 if(d->method10!=1) {
453 if(d->has_pak_trailer || !d->has_arc_extensions) continue;
456 else if(p->flags&0x200) { // Skip this unless we're sure it's Crushed
457 if(d->method10!=2) {
458 if(!d->has_pak_trailer || d->has_arc_extensions) continue;
462 return p;
464 return NULL;
467 static void read_one_pk_comment(deark *c, lctx *d, i64 pos, de_ucstring *s)
469 dbuf_read_to_ucstring(c->infile, pos, 32, s, 0, d->input_encoding_for_comments);
470 ucstring_strip_trailing_spaces(s);
473 static void init_trailer_data(deark *c, lctx *d)
475 d->has_trailer_data = 1;
476 if(!d->persistent_md) {
477 d->persistent_md = de_mallocarray(c, d->num_top_level_members,
478 sizeof(struct persistent_member_data));
482 static int is_pkcomment_sig_at(deark *c, lctx *d, i64 pos)
484 i64 comments_descr_pos;
486 if(!d->prescan_found_eoa) return 0;
487 if(pos < d->prescan_pos_after_eoa) return 0;
488 if(pos > c->infile->len-8) return 0;
489 if((UI)de_getu32be(pos) != 0x504baa55U) {
490 return 0;
492 comments_descr_pos = de_getu32le(pos+4);
493 if((comments_descr_pos < d->prescan_pos_after_eoa) ||
494 (comments_descr_pos > pos-4))
496 return 0;
498 return 1;
501 static void do_pk_comments(deark *c, lctx *d)
503 int found = 0;
504 i64 sig_pos;
505 i64 comments_descr_pos;
506 int has_file_comments = 0;
507 int has_archive_comment = 0;
508 i64 file_comments_pos = 0;
509 de_ucstring *archive_comment = NULL;
510 u8 dscr[4];
512 if(!d->prescan_found_eoa) return;
513 sig_pos = c->infile->len-8;
514 if(is_pkcomment_sig_at(c, d, sig_pos)){
515 found = 1;
518 if(!found) {
519 i64 count;
521 // The PK signature normally appears 8 bytes from the end of the file,
522 // but we'll scan for it, in case the file has padding appended to it.
523 // The assumption is that it appears after a whole number of 32-byte
524 // records. In case of multiple signatures, we use the earliest one that
525 // looks valid.
526 sig_pos = d->prescan_pos_after_eoa+32;
527 count = 0;
529 while(1) {
530 if(sig_pos >= c->infile->len-8) break;
531 if(count > d->num_top_level_members+10) break;
532 if(is_pkcomment_sig_at(c, d, sig_pos)){
533 found = 1;
534 break;
536 sig_pos += 32;
537 count++;
541 if(!found) return;
543 de_dbg(c, "PKARC/PKPAK comment block found, ID at %"I64_FMT, sig_pos);
544 de_dbg_indent(c, 1);
545 mark_end_of_known_data(d, sig_pos+8);
546 init_trailer_data(c, d);
548 // Note: This logic is based on reverse engineering, and could be wrong.
549 comments_descr_pos = de_getu32le(sig_pos+4);
550 de_dbg(c, "descriptor pos: %"I64_FMT, comments_descr_pos);
551 // comments_descr_pos has already been validated, by is_pkcomment_sig_at().
553 de_read(dscr, comments_descr_pos, 4);
554 if(dscr[0]==0x20 && dscr[1]==0x20 && dscr[2]==0x20 && dscr[3]==0x00) {
555 has_file_comments = 0;
556 has_archive_comment = 1;
558 else if(dscr[0]==0x01 && dscr[3]==0x20) {
559 has_file_comments = 1;
560 has_archive_comment = 0;
562 else if(dscr[0]==0x01 && dscr[3]==0x00) {
563 has_file_comments = 1;
564 has_archive_comment = 1;
566 else {
567 de_dbg(c, "[unrecognized comments descriptor]");
570 if(has_file_comments) {
571 file_comments_pos = comments_descr_pos + 32;
572 if(sig_pos - file_comments_pos < 32) {
573 has_file_comments = 0;
577 if(has_archive_comment) {
578 archive_comment = ucstring_create(c);
579 read_one_pk_comment(c, d, comments_descr_pos-32, archive_comment);
580 de_dbg(c, "archive comment: \"%s\"", ucstring_getpsz_d(archive_comment));
583 if(has_file_comments) {
584 i64 num_file_comments;
585 i64 i;
587 num_file_comments = (sig_pos - file_comments_pos)/32;
588 de_dbg(c, "apparent number of file comments: %d", (int)num_file_comments);
590 for(i=0; i<num_file_comments && i<d->num_top_level_members; i++) {
591 if(!d->persistent_md[i].comment) {
592 d->persistent_md[i].comment = ucstring_create(c);
594 if(ucstring_isnonempty(d->persistent_md[i].comment)) continue;
595 read_one_pk_comment(c, d,file_comments_pos + i*32, d->persistent_md[i].comment);
599 ucstring_destroy(archive_comment);
600 de_dbg_indent(c, -1);
603 // Always sets *pbytes_consumed.
604 // Returns 0 if there are no more records after this.
605 static int do_pak_ext_record(deark *c, lctx *d, i64 pos1, i64 *pbytes_consumed)
607 i64 pos = pos1;
608 u8 rectype;
609 const char *rtname = "?";
610 int retval = 0;
611 i64 filenum;
612 i64 filenum_adj = 0;
613 i64 dlen;
614 de_ucstring *archive_comment = NULL;
615 struct persistent_member_data *pmd = NULL;
616 int saved_indent_level;
618 de_dbg_indent_save(c, &saved_indent_level);
619 *pbytes_consumed = 0;
620 if(de_getbyte_p(&pos) != 0xfe) goto done;
621 de_dbg(c, "record at %"I64_FMT, pos1);
622 de_dbg_indent(c, 1);
624 rectype = de_getbyte_p(&pos);
625 switch(rectype) {
626 case 0: rtname = "end"; break;
627 case 1: rtname = "remark"; break;
628 case 2: rtname = "path"; break;
629 case 3: rtname = "security envelope"; break;
631 de_dbg(c, "rectype: %d (%s)", (int)rectype, rtname);
632 if(rectype==0) {
633 *pbytes_consumed = 2;
634 goto done;
637 filenum = de_getu16le_p(&pos);
638 de_dbg(c, "file num: %d", (int)filenum);
639 dlen = de_getu32le_p(&pos);
640 de_dbg(c, "dlen: %"I64_FMT, dlen);
641 if(pos+dlen > c->infile->len) goto done;
643 *pbytes_consumed = 8 + dlen;
644 retval = 1;
646 if(filenum > 0) {
647 filenum_adj = filenum - 1;
648 if(filenum_adj < d->num_top_level_members) {
649 pmd = &d->persistent_md[filenum_adj];
653 if(rectype==1) { // remark
654 if(filenum==0) { // archive comment
655 archive_comment = ucstring_create(c);
656 dbuf_read_to_ucstring_n(c->infile, pos, dlen, 16384, archive_comment,
657 0, d->input_encoding_for_comments);
658 de_dbg(c, "archive comment: \"%s\"", ucstring_getpsz_d(archive_comment));
660 else { // file comment
661 if(!pmd) goto done;
663 if(!pmd->comment) {
664 pmd->comment = ucstring_create(c);
666 if(ucstring_isnonempty(pmd->comment)) goto done;
667 dbuf_read_to_ucstring_n(c->infile, pos, dlen, 2048, pmd->comment,
668 0, d->input_encoding_for_comments);
671 else if(rectype==2) {
672 if(!pmd) goto done;
673 if(!pmd->path) {
674 pmd->path = ucstring_create(c);
676 if(ucstring_isnonempty(pmd->path)) goto done;
677 dbuf_read_to_ucstring_n(c->infile, pos, dlen, 512, pmd->path,
678 0, d->input_encoding_for_comments);
681 done:
682 if(archive_comment) ucstring_destroy(archive_comment);
683 de_dbg_indent_restore(c, saved_indent_level);
684 return retval;
687 static void do_pak_trailer(deark *c, lctx *d)
689 u8 b;
690 i64 pos;
692 if(!d->prescan_found_eoa) return;
693 if(c->infile->len - d->prescan_pos_after_eoa < 2) return;
694 if(de_getbyte(d->prescan_pos_after_eoa) != 0xfe) return;
695 b = de_getbyte(d->prescan_pos_after_eoa+1);
696 if(b>4) return;
698 pos = d->prescan_pos_after_eoa;
699 de_dbg(c, "PAK extended records at %"I64_FMT, pos);
700 de_dbg_indent(c, 1);
701 d->has_pak_trailer = 1;
702 init_trailer_data(c, d);
704 while(1) {
705 int ret;
706 i64 bytes_consumed = 0;
708 if(pos > c->infile->len-2) break;
709 ret = do_pak_ext_record(c, d, pos, &bytes_consumed);
710 pos += bytes_consumed;
711 mark_end_of_known_data(d, pos);
712 if(!ret || bytes_consumed<8) break;
715 de_dbg_indent(c, -1);
718 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
720 char timestamp_buf[64];
722 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
723 de_dbg(c, "%s: %s", name, timestamp_buf);
726 // Convert backslashes to slashes, and make sure the string ends with a /.
727 static void fixup_path(deark *c, lctx *d, de_ucstring *s)
729 i64 i;
731 if(s->len<1) return;
733 for(i=0; i<s->len; i++) {
734 if(s->str[i]=='\\') {
735 s->str[i] = '/';
739 if(s->str[s->len-1]!='/') {
740 ucstring_append_char(s, '/');
744 static void do_decompress_fork_arcmac(struct member_data *md,
745 dbuf *outf, const char *fork_name)
747 struct de_dfilter_in_params dcmpri;
748 struct de_dfilter_out_params dcmpro;
749 struct de_dfilter_results dres;
750 deark *c = md->c;
751 int saved_indent_level;
753 de_dbg_indent_save(c, &saved_indent_level);
754 if(md->orig_size==0) goto done;
756 de_dbg(c, "decompressing %s fork", fork_name);
757 de_dbg_indent(c, 1);
759 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
760 dcmpri.f = c->infile;
761 dcmpri.pos = md->cmpr_data_pos;
762 dcmpri.len = md->cmpr_size;
763 dcmpro.f = outf;
764 dcmpro.len_known = 1;
765 dcmpro.expected_len = md->orig_size;
767 if(dcmpri.pos + dcmpri.len > dcmpri.f->len) {
768 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(md->arcmac_fn->str));
769 goto done;
772 md->cmi->decompressor(c, md->d, md, &dcmpri, &dcmpro, &dres);
773 dbuf_flush(dcmpro.f);
774 if(dres.errcode) {
775 de_err(c, "Decompression failed for file %s[%s fork]: %s",
776 ucstring_getpsz_d(md->arcmac_fn->str),
777 fork_name, de_dfilter_get_errmsg(c, &dres));
778 goto done;
781 md->crc_calc = de_crcobj_getval(md->d->crco);
782 de_dbg(c, "crc (calculated): 0x%04x", (unsigned int)md->crc_calc);
784 if(md->crc_calc!=md->crc_reported) {
785 de_err(c, "%s: CRC check failed", ucstring_getpsz_d(md->arcmac_fn->str));
788 done:
789 de_dbg_indent_restore(c, saved_indent_level);
792 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
793 struct de_advfile_cbparams *afp)
795 struct member_data *md = (struct member_data*)advf->userdata;
797 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
798 do_decompress_fork_arcmac(md, afp->outf, "data");
800 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
801 do_decompress_fork_arcmac(md, afp->outf, "rsrc");
804 return 1;
807 // TODO: Reduce code duplication with do_extract_member_file(), etc.
808 // Retrofitting the arc module for arcmac format made some things messy.
809 // It could be made somewhat cleaner by using the "advfile" system unconditionally
810 // -- there are pros and cons of doing that.
811 static void do_extract_member_file_arcmac(deark *c, lctx *d, struct member_data *md,
812 de_finfo *fi)
814 int saved_indent_level;
816 de_dbg_indent_save(c, &saved_indent_level);
818 if(!md->cmi || !md->cmi->decompressor) {
819 de_err(c, "%s: Compression type 0x%02x (%s) is not supported.",
820 ucstring_getpsz_d(md->fn), (unsigned int)md->cmpr_meth, md->cmpr_meth_name);
821 goto done;
824 if(md->arcmac_dforklen && md->arcmac_rforklen) {
825 // This seems to be allowed, but I need sample files.
826 de_err(c, "Can't handle multi-fork ArcMac file");
827 goto done;
829 if(md->arcmac_dforklen + md->arcmac_rforklen != md->orig_size) {
830 de_err(c, "Inconsistent ArcMac fork size");
831 goto done;
834 if(md->arcmac_fn && ucstring_isnonempty(md->arcmac_fn->str)) {
835 ucstring_append_ucstring(md->arcmac_advf->filename, md->arcmac_fn->str);
837 else {
838 ucstring_append_ucstring(md->arcmac_advf->filename, md->fn);
840 md->arcmac_advf->original_filename_flag = 1;
842 md->arcmac_advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] =
843 fi->timestamp[DE_TIMESTAMPIDX_MODIFY];
845 md->arcmac_advf->userdata = (void*)md;
846 md->arcmac_advf->writefork_cbfn = my_advfile_cbfn;
848 md->arcmac_advf->mainfork.writelistener_cb = de_writelistener_for_crc;
849 md->arcmac_advf->mainfork.userdata_for_writelistener = (void*)d->crco;
850 md->arcmac_advf->rsrcfork.writelistener_cb = de_writelistener_for_crc;
851 md->arcmac_advf->rsrcfork.userdata_for_writelistener = (void*)d->crco;
852 de_crcobj_reset(d->crco);
854 de_advfile_run(md->arcmac_advf);
856 done:
857 de_dbg_indent_restore(c, saved_indent_level);
860 static void do_extract_member_file(deark *c, lctx *d, struct member_data *md,
861 struct persistent_member_data *pmd, de_finfo *fi, i64 pos)
863 de_ucstring *fullfn = NULL;
864 dbuf *outf = NULL;
865 int ignore_failed_crc = 0;
866 int saved_indent_level;
867 struct de_dfilter_in_params dcmpri;
868 struct de_dfilter_out_params dcmpro;
869 struct de_dfilter_results dres;
871 de_dbg_indent_save(c, &saved_indent_level);
872 fullfn = ucstring_create(c);
874 if(pmd && ucstring_isnonempty(pmd->path)) {
875 // For PAK-style paths.
876 // (Pretty useless, until we support cmpr. meth. #11.)
877 // Note that PAK-style paths, and directory recursion, are not expected to
878 // be possible in the same file.
879 ucstring_append_ucstring(fullfn, pmd->path);
880 fixup_path(c, d, fullfn);
883 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
885 if(md->rfa.file_type_known) {
886 fmtutil_riscos_append_type_to_filename(c, fi, fullfn, &md->rfa, md->is_dir, 0);
888 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
890 de_dbg_indent(c, 1);
892 if(!md->cmi || !md->cmi->decompressor) {
893 const char *mname;
895 // A quick hack because we don't want the error message claiming that
896 // "trimmed or crushed" isn't supported. We do support trimmed, so
897 // if we got here, we must have decided it's crushed.
898 mname = md->cmpr_meth==10 ? "crushed" : md->cmpr_meth_name;
900 de_err(c, "%s: Compression type 0x%02x (%s) is not supported.",
901 ucstring_getpsz_d(md->fn), (UI)md->cmpr_meth, mname);
902 goto done;
905 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
906 dbuf_enable_wbuffer(outf);
908 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)d->crco);
909 de_crcobj_reset(d->crco);
911 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
912 dcmpri.f = c->infile;
913 dcmpri.pos = pos;
914 dcmpri.len = md->cmpr_size;
915 dcmpro.f = outf;
916 dcmpro.len_known = 1;
917 dcmpro.expected_len = md->orig_size;
919 if(dcmpri.pos + dcmpri.len > dcmpri.f->len) {
920 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(md->fn));
921 goto done;
924 md->cmi->decompressor(c, d, md, &dcmpri, &dcmpro, &dres);
925 dbuf_flush(dcmpro.f);
926 if(dres.errcode) {
927 de_err(c, "%s: Decompression failed: %s", ucstring_getpsz_d(md->fn),
928 de_dfilter_get_errmsg(c, &dres));
929 goto done;
932 md->crc_calc = de_crcobj_getval(d->crco);
933 de_dbg(c, "crc (calculated): 0x%04x", (unsigned int)md->crc_calc);
934 if(md->crc_reported==0 && !d->recurse_subdirs && md->rfa.file_type_known &&
935 md->rfa.file_type==0xddc && md->cmpr_meth==0x82)
937 ignore_failed_crc = 1;
939 if((md->crc_calc!=md->crc_reported) && !ignore_failed_crc) {
940 de_err(c, "%s: CRC check failed", ucstring_getpsz_d(md->fn));
943 done:
944 dbuf_close(outf);
945 ucstring_destroy(fullfn);
946 de_dbg_indent_restore(c, saved_indent_level);
949 // "Extract" a directory entry
950 static void do_extract_member_dir(deark *c, lctx *d, struct member_data *md,
951 de_finfo *fi)
953 dbuf *outf = NULL;
954 de_ucstring *fullfn = NULL;
956 fullfn = ucstring_create(c);
957 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
959 fi->is_directory = 1;
960 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
962 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
963 dbuf_close(outf);
964 ucstring_destroy(fullfn);
967 struct extinfo_item_info {
968 u8 cmprmeth;
969 u8 rectype;
970 unsigned int flags; // 0x1 = string
971 const char *name;
972 void *reserved;
975 static void do_info_record_string(deark *c, lctx *d, i64 pos, i64 len, const char *name)
977 de_ucstring *s = NULL;
979 s = ucstring_create(c);
980 dbuf_read_to_ucstring_n(c->infile, pos, len, 2048, s, DE_CONVFLAG_STOP_AT_NUL,
981 d->input_encoding_for_comments);
982 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(s));
983 ucstring_destroy(s);
986 static const struct extinfo_item_info extinfo_arr[] = {
987 { 20, 0, 0x01, "archive description", NULL },
988 { 20, 1, 0x01, "archive created by", NULL },
989 { 20, 2, 0x01, "archive last modified by", NULL },
990 { 21, 0, 0x01, "file description", NULL },
991 { 21, 1, 0x01, "long name", NULL },
992 { 21, 2, 0x00, "timestamps", NULL },
993 { 21, 3, 0x00, "icon", NULL },
994 { 21, 4, 0x01, "attributes", NULL },
995 { 21, 5, 0x01, "full path", NULL }
998 static const struct extinfo_item_info *find_extinfo_item(u8 cmprmeth, u8 rectype)
1000 size_t k;
1002 for(k=0; k<DE_ARRAYCOUNT(extinfo_arr); k++) {
1003 if(extinfo_arr[k].cmprmeth==cmprmeth && extinfo_arr[k].rectype==rectype) {
1004 return &extinfo_arr[k];
1007 return NULL;
1010 static void do_info_item(deark *c, lctx *d, struct member_data *md)
1012 int saved_indent_level;
1013 i64 pos = md->cmpr_data_pos;
1014 i64 endpos = md->cmpr_data_pos+md->cmpr_size;
1016 de_dbg_indent_save(c, &saved_indent_level);
1017 de_dbg(c, "info item data (meth=%d) at %"I64_FMT" len=%"I64_FMT, (int)md->cmpr_meth,
1018 md->cmpr_data_pos, md->cmpr_size);
1019 de_dbg_indent(c, 1);
1021 while(1) {
1022 i64 reclen;
1023 i64 recpos;
1024 i64 dpos;
1025 i64 dlen;
1026 u8 rectype;
1027 const struct extinfo_item_info *ei;
1028 const char *ei_name;
1030 recpos = pos;
1031 if(pos+3 > endpos) goto done;
1032 reclen = de_getu16le_p(&pos);
1033 if(reclen<3 || recpos+reclen > endpos) goto done;
1034 rectype = de_getbyte_p(&pos);
1035 ei = find_extinfo_item(md->cmpr_meth, rectype);
1036 if(ei && ei->name) ei_name = ei->name;
1037 else ei_name = "?";
1039 dpos = recpos + 3;
1040 dlen = reclen - 3;
1041 de_dbg(c, "record type %d (%s) at %"I64_FMT", dpos=%"I64_FMT", dlen=%"I64_FMT,
1042 (int)rectype, ei_name, recpos, dpos, dlen);
1043 de_dbg_indent(c, 1);
1044 if(ei && (ei->flags & 0x01)) {
1045 do_info_record_string(c, d, dpos, dlen, ei_name);
1047 else {
1048 de_dbg_hexdump(c, c->infile, dpos, dlen, 256, NULL, 0x1);
1050 de_dbg_indent(c, -1);
1051 pos = recpos + reclen;
1054 done:
1055 de_dbg_indent_restore(c, saved_indent_level);
1058 static void do_sequence_of_members(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level);
1060 static void do_arcmac_preheader(deark *c, lctx *d, struct member_data *md, i64 pos1)
1062 i64 pos = pos1;
1063 u16 finder_flags;
1064 struct de_fourcc filetype;
1065 struct de_fourcc creator;
1067 if(md->arcmac_advf) return;
1068 md->arcmac_advf = de_advfile_create(c);
1069 md->arcmac_advf->enable_wbuffer = 1;
1071 pos += 2; // magic / cmprtype
1073 md->arcmac_fn = dbuf_read_string(c->infile, pos, 31, 31, DE_CONVFLAG_STOP_AT_NUL,
1074 d->input_encoding_for_arcmac_fn);
1075 de_dbg(c, "ArcMac filename: \"%s\"", ucstring_getpsz_d(md->arcmac_fn->str));
1076 if(md->arcmac_fn->sz_strlen>0) {
1077 md->arcmac_advf->original_filename_flag = 1;
1078 de_advfile_set_orig_filename(md->arcmac_advf, md->arcmac_fn->sz, md->arcmac_fn->sz_strlen);
1080 pos += 32;
1082 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
1083 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
1084 de_memcpy(md->arcmac_advf->typecode, filetype.bytes, 4);
1085 md->arcmac_advf->has_typecode = 1;
1086 pos += 4;
1088 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
1089 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
1090 de_memcpy(md->arcmac_advf->creatorcode, creator.bytes, 4);
1091 md->arcmac_advf->has_creatorcode = 1;
1092 pos += 4;
1094 finder_flags = (u16)de_getu16be_p(&pos);
1095 de_dbg(c, "finder flags: 0x%04x", finder_flags);
1096 md->arcmac_advf->finderflags = finder_flags;
1097 md->arcmac_advf->has_finderflags = 1;
1098 pos += 6; // remainder of finfo
1100 md->arcmac_dforklen = de_getu32le_p(&pos);
1101 de_dbg(c, "data fork len: %"I64_FMT, md->arcmac_dforklen);
1102 md->arcmac_rforklen = de_getu32le_p(&pos);
1103 de_dbg(c, "rsrc fork len: %"I64_FMT, md->arcmac_rforklen);
1105 md->arcmac_advf->mainfork.fork_exists = (md->arcmac_dforklen!=0);
1106 md->arcmac_advf->mainfork.fork_len = md->arcmac_dforklen;
1107 md->arcmac_advf->rsrcfork.fork_exists = (md->arcmac_rforklen!=0);
1108 md->arcmac_advf->rsrcfork.fork_len = md->arcmac_rforklen;
1111 // The main per-member processing function
1112 static void member_cb_main(deark *c, lctx *d, struct member_parser_data *mpd)
1114 int saved_indent_level;
1115 i64 pos1 = mpd->member_pos;
1116 i64 pos = pos1;
1117 i64 hdrsize;
1118 i64 mod_time_raw, mod_date_raw;
1119 de_finfo *fi = NULL;
1120 struct member_data *md = NULL;
1121 int need_curpath_pop = 0;
1122 struct persistent_member_data *pmd = NULL;
1124 de_dbg_indent_save(c, &saved_indent_level);
1125 de_dbg(c, "member at %"I64_FMT, pos1);
1126 de_dbg_indent(c, 1);
1127 md = de_malloc(c, sizeof(struct member_data));
1128 md->c = c;
1129 md->d = d;
1131 if(mpd->nesting_level==0 && d->persistent_md && (mpd->member_idx < d->num_top_level_members)) {
1132 pmd = &d->persistent_md[mpd->member_idx];
1133 if(ucstring_isnonempty(pmd->comment)) {
1134 de_dbg(c, "file comment: \"%s\"", ucstring_getpsz_d(pmd->comment));
1136 if(ucstring_isnonempty(pmd->path)) {
1137 de_dbg(c, "path: \"%s\"", ucstring_getpsz_d(pmd->path));
1141 pos++; // 'magic' byte, already read by the parser
1142 if(mpd->magic != d->sig_byte) {
1143 de_err(c, "Failed to find %s member at %"I64_FMT, d->fmtname, pos1);
1144 goto done;
1147 if(d->fmt==FMT_ARCMAC && mpd->cmpr_meth_masked!=0) {
1148 do_arcmac_preheader(c, d, md, mpd->member_pos);
1149 pos += 59;
1152 pos++; // compression ID, already read by the parser
1153 md->cmpr_meth = mpd->cmpr_meth;
1154 md->cmpr_meth_masked = mpd->cmpr_meth_masked;
1156 md->cmi = get_cmpr_meth_info(d, md->cmpr_meth);
1157 if(md->cmi && md->cmi->name) {
1158 md->cmpr_meth_name = md->cmi->name;
1160 else {
1161 md->cmpr_meth_name = "?";
1164 de_dbg(c, "cmpr meth: 0x%02x (%s)", (unsigned int)md->cmpr_meth, md->cmpr_meth_name);
1166 if(md->cmpr_meth_masked==0x00 || md->cmpr_meth==0x1f) {
1167 hdrsize = 2;
1169 else {
1170 if(md->cmpr_meth_masked==0x01) {
1171 hdrsize = 25;
1173 else {
1174 hdrsize = 29;
1176 if(md->cmpr_meth>=128) {
1177 hdrsize += 12;
1180 if(mpd->member_len<hdrsize) {
1181 de_err(c, "Insufficient data for archive member at %"I64_FMT, pos1);
1182 goto done;
1185 if(md->cmpr_meth_masked==0x00 || md->cmpr_meth==0x1f) { // end of archive/dir marker
1186 goto done;
1189 md->fn = ucstring_create(c);
1190 dbuf_read_to_ucstring(c->infile, pos, 13, md->fn, DE_CONVFLAG_STOP_AT_NUL,
1191 d->input_encoding_for_filenames);
1192 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
1193 pos += 13;
1195 pos += 4; // cmpr_size, already read by the parser
1196 md->cmpr_size = mpd->cmpr_data_len;
1197 de_dbg(c, "cmpr size: %"I64_FMT, md->cmpr_size);
1199 mod_date_raw = de_getu16le_p(&pos);
1200 mod_time_raw = de_getu16le_p(&pos);
1201 de_dos_datetime_to_timestamp(&md->arc_timestamp, mod_date_raw, mod_time_raw);
1202 md->arc_timestamp.tzcode = DE_TZCODE_LOCAL;
1203 dbg_timestamp(c, &md->arc_timestamp, ((d->fmt==FMT_SPARK) ? "timestamp (ARC)":"timestamp"));
1205 md->crc_reported = (u32)de_getu16le_p(&pos);
1206 de_dbg(c, "crc (reported): 0x%04x", (unsigned int)md->crc_reported);
1207 if((md->cmpr_meth_masked)==0x01) {
1208 md->orig_size = md->cmpr_size;
1210 else {
1211 md->orig_size = de_getu32le_p(&pos);
1212 de_dbg(c, "orig size: %"I64_FMT, md->orig_size);
1215 if(d->fmt == FMT_SPARK) {
1216 md->has_spark_attribs = 1;
1217 fmtutil_riscos_read_load_exec(c, c->infile, &md->rfa, pos);
1218 pos += 8;
1219 fmtutil_riscos_read_attribs_field(c, c->infile, &md->rfa, pos, 0);
1220 pos += 4;
1223 md->cmpr_data_pos = mpd->cmpr_data_pos;
1225 de_strarray_push(d->curpath, md->fn);
1226 need_curpath_pop = 1;
1228 // TODO: Is it possible to distinguish between a subdirectory, and a Spark
1229 // member file that should always be extracted? Does a nonzero CRC mean
1230 // we should not recurse?
1231 if(d->fmt==FMT_SPARK && d->recurse_subdirs && md->rfa.file_type_known &&
1232 (md->rfa.file_type==0xddc) && md->cmpr_meth==0x82)
1234 md->is_dir = 1;
1236 else if(d->fmt==FMT_ARC && d->recurse_subdirs && md->cmpr_meth==0x1e) {
1237 md->is_dir = 1;
1240 if(d->recurse_subdirs) {
1241 de_dbg(c, "is directory: %d", md->is_dir);
1244 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT, md->cmpr_data_pos, md->cmpr_size);
1246 // Extract...
1247 fi = de_finfo_create(c);
1248 fi->original_filename_flag = 1;
1250 if(md->rfa.mod_time.is_valid) {
1251 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->rfa.mod_time;
1253 else if(md->arc_timestamp.is_valid) {
1254 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->arc_timestamp;
1257 if(md->has_spark_attribs) {
1258 fi->has_riscos_data = 1;
1259 fi->riscos_attribs = md->rfa.attribs;
1260 fi->load_addr = md->rfa.load_addr;
1261 fi->exec_addr = md->rfa.exec_addr;
1264 if(md->is_dir) {
1265 fi->is_directory = 1;
1268 if(md->is_dir) {
1269 do_extract_member_dir(c, d, md, fi);
1271 // Nested subdirectory archives (ARC 6 "z" option, or Spark) have both a known
1272 // length (md->cmpr_size), and an end-of-archive marker. So there are two
1273 // ways to parse them:
1274 // 1) Recursively, meaning we trust the md->cmpr_size field (or maybe we should
1275 // use orig_size instead?).
1276 // 2) As a flat sequence of members, meaning we trust that a nested archive
1277 // will not have extra data after the end-of-archive marker.
1278 // Here, we use the recursive method.
1279 do_sequence_of_members(c, d, md->cmpr_data_pos, md->cmpr_size, mpd->nesting_level+1);
1281 else if(md->cmpr_meth>=30 && md->cmpr_meth<=39) {
1282 de_warn(c, "Unknown control item type %d at %"I64_FMT, (int)md->cmpr_meth, pos1);
1283 goto done;
1285 else if(md->cmpr_meth>=20 && md->cmpr_meth<=29) {
1286 do_info_item(c, d, md);
1288 else if(d->fmt==FMT_ARCMAC && md->arcmac_advf) {
1289 do_extract_member_file_arcmac(c, d, md, fi);
1291 else {
1292 do_extract_member_file(c, d, md, pmd, fi, md->cmpr_data_pos);
1295 done:
1296 if(need_curpath_pop) {
1297 de_strarray_pop(d->curpath);
1299 if(fi) de_finfo_destroy(c, fi);
1300 if(md) {
1301 ucstring_destroy(md->fn);
1302 if(md->arcmac_fn) de_destroy_stringreaderdata(c, md->arcmac_fn);
1303 if(md->arcmac_advf) de_advfile_destroy(md->arcmac_advf);
1304 de_free(c, md);
1306 de_dbg_indent_restore(c, saved_indent_level);
1309 static void do_sequence_of_members(deark *c, lctx *d, i64 pos1, i64 len, int nesting_level)
1311 if(nesting_level >= MAX_NESTING_LEVEL) {
1312 de_err(c, "Max subdir nesting level exceeded");
1313 return;
1316 de_dbg(c, "archive at %"I64_FMT, pos1);
1317 de_dbg_indent(c, 1);
1318 parse_member_sequence(c, d, pos1, len, nesting_level, member_cb_main);
1319 de_dbg_indent(c, -1);
1322 static void member_cb_for_prescan(deark *c, lctx *d, struct member_parser_data *mpd)
1324 if(mpd->magic!=d->sig_byte) return;
1325 if(mpd->cmpr_meth_masked==0x00) { // end of archive
1326 d->prescan_found_eoa = 1;
1327 d->prescan_pos_after_eoa = mpd->member_pos + mpd->member_len;
1328 de_dbg2(c, "end of member sequence at %"I64_FMT, d->prescan_pos_after_eoa);
1329 return;
1331 if(mpd->cmpr_meth==20 || mpd->cmpr_meth==21 || mpd->cmpr_meth==30) {
1332 // Features we're pretty sure aren't used by PAK.
1333 d->has_arc_extensions = 1;
1335 d->num_top_level_members++;
1336 de_dbg2(c, "member at %"I64_FMT, mpd->member_pos);
1339 // Unfortunately, a pre-pass is necessary for robust handling of some ARC format
1340 // extensions. The main issue is member-file comments, which we want to be
1341 // available when we process that member file, but can only be found after we've
1342 // read through the whole ARC file.
1343 static void do_prescan_file(deark *c, lctx *d, i64 startpos)
1345 de_dbg2(c, "prescan");
1346 d->num_top_level_members = 0;
1347 de_dbg_indent(c, 1);
1348 parse_member_sequence(c, d, startpos, c->infile->len-startpos, 0, member_cb_for_prescan);
1349 de_dbg2(c, "number of members: %"I64_FMT, d->num_top_level_members);
1350 de_dbg_indent(c, -1);
1353 static int find_arc_marker(deark *c, const u8 *buf, size_t buflen, i64 *ppos)
1355 size_t i;
1357 for(i=0; i<buflen; i++) {
1358 if(buf[i]==0x1a) {
1359 *ppos = (i64)i;
1360 return 1;
1363 return 0;
1366 static void destroy_lctx(deark *c, lctx *d)
1368 if(!d) return;
1369 de_crcobj_destroy(d->crco);
1370 de_strarray_destroy(d->curpath);
1371 if(d->pak16sfx_outf) {
1372 dbuf_close(d->pak16sfx_outf);
1374 if(d->persistent_md) {
1375 i64 i;
1377 for(i=0; i<d->num_top_level_members; i++) {
1378 ucstring_destroy(d->persistent_md[i].comment);
1379 ucstring_destroy(d->persistent_md[i].path);
1381 de_free(c, d->persistent_md);
1383 de_free(c, d);
1386 static int is_btspk(deark *c, lctx *d)
1388 if(d->fmt!=FMT_ARC && d->fmt!=FMT_ARCMAC) return 0;
1390 if(dbuf_memcmp(c->infile, c->infile->len-31,
1391 (const void*)"\x1b\x03" "MASTR\xff" "END!", 12))
1393 return 0;
1395 return 1;
1398 static void do_run_arc_spark_internal(deark *c, lctx *d)
1400 i64 members_endpos;
1401 i64 num_extra_bytes;
1402 i64 pos = 0;
1404 if(is_btspk(c, d)) {
1405 de_warn(c, "This looks like BTSPK format, which is not correctly supported.");
1408 d->sig_byte = (d->fmt==FMT_ARCMAC) ? 0x1b : 0x1a;
1410 if(d->sig_byte==0x1a) {
1411 u8 buf[33];
1413 // Tolerate up to sizeof(buf)-1 bytes of initial junk
1414 de_read(buf, 0, sizeof(buf));
1415 if(!find_arc_marker(c, buf, sizeof(buf), &pos)) {
1416 de_err(c, "Not a(n) %s file", d->fmtname);
1417 goto done;
1421 de_declare_fmt(c, d->fmtname);
1422 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1423 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1425 do_prescan_file(c, d, pos);
1426 if(d->prescan_found_eoa) {
1427 members_endpos = d->prescan_pos_after_eoa;
1429 else {
1430 members_endpos = c->infile->len;
1432 mark_end_of_known_data(d, members_endpos);
1434 if(d->fmt==FMT_ARC) {
1435 do_pk_comments(c, d);
1436 do_pak_trailer(c, d);
1439 do_sequence_of_members(c, d, pos, members_endpos, 0);
1441 num_extra_bytes = c->infile->len - d->end_of_known_data;
1442 if(num_extra_bytes>0) {
1443 de_dbg(c, "extra bytes at end of archive: %"I64_FMT" (at %"I64_FMT")",
1444 num_extra_bytes, d->end_of_known_data);
1447 done:
1451 ////////// Special converter for PAK v1.6 SFX archives
1453 static void member_cb_for_pak16sfx(deark *c, lctx *d, struct member_parser_data *mpd)
1455 de_dbg2(c, "pak16sfx member at %"I64_FMT", type=0x%02x", mpd->member_pos,
1456 (UI)mpd->magic);
1457 de_dbg_indent(c, 1);
1459 // This is not perfect. We throw away the extended info, instead of converting
1460 // it to extended records.
1461 if(mpd->magic==0x1a) {
1462 if(!d->pak16sfx_outf) {
1463 d->pak16sfx_outf = dbuf_create_output_file(c, "pak", NULL, 0);
1465 dbuf_copy(c->infile, mpd->member_pos, mpd->member_len, d->pak16sfx_outf);
1467 de_dbg_indent(c, -1);
1470 static void do_convert_pak16sfx(deark *c, lctx *d)
1472 d->fmt = FMT_PAK16SFX;
1473 d->recurse_subdirs = 0;
1474 d->sig_byte = 0x1a;
1475 parse_member_sequence(c, d, 0, c->infile->len, 0,member_cb_for_pak16sfx);
1476 if(d->pak16sfx_outf) {
1477 dbuf_writeu16be(d->pak16sfx_outf, 0xfe00);
1481 /////////////////////// ARC (core ARC-only functions)
1483 static void de_run_arc(deark *c, de_module_params *mparams)
1485 lctx *d = NULL;
1486 const char *s;
1488 d = de_malloc(c, sizeof(lctx));
1489 d->fmt = FMT_ARC;
1490 d->fmtname = "ARC";
1491 // TODO: Make 'recurse' configurable. Would require us to make the embedded
1492 // archives end with the correct marker.
1493 d->recurse_subdirs = 1;
1494 d->input_encoding_for_filenames = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1495 d->input_encoding_for_comments = DE_EXTENC_MAKE(d->input_encoding_for_filenames,
1496 DE_ENCSUBTYPE_HYBRID);
1498 // TODO: It would probably be worth it to have a separate module for PAK, so we
1499 // can take the .PAK file extension into account when guessing what method #10
1500 // is. It's complicated, though, and not very useful until we support Crushed
1501 // decompression.
1503 s = de_get_ext_option(c, "arc:method10");
1504 if(s) {
1505 if(!de_strcmp(s, "trimmed")) {
1506 d->method10 = 1;
1508 else if(!de_strcmp(s, "crushed")) {
1509 d->method10 = 2;
1513 if(de_havemodcode(c, mparams, '6')) {
1514 do_convert_pak16sfx(c, d);
1515 goto done;
1518 do_run_arc_spark_internal(c, d);
1520 done:
1521 destroy_lctx(c, d);
1524 static int de_identify_arc(deark *c)
1526 static const char *exts[] = {"arc", "ark", "pak", "spk", "sdn", "com"};
1527 u8 has_ext = 0;
1528 UI ext_idx = 0;
1529 UI strictness;
1530 int ends_with_trailer = 0;
1531 int ends_with_comments = 0;
1532 int starts_with_trailer = 0;
1533 i64 arc_start = 0;
1534 size_t k;
1535 u8 cmpr_meth;
1536 u8 buf[5];
1538 de_read(buf, 0, sizeof(buf));
1540 // Look for 0x1a in the first 4 bytes. Some .COM-style self-extracting
1541 // archives start with 1-3 bytes of code before the ARC marker.
1542 if(!find_arc_marker(c, buf, sizeof(buf)-1, &arc_start)) {
1543 return 0;
1546 cmpr_meth = buf[arc_start+1];
1548 if(cmpr_meth==0) {
1549 // Don't tolerate empty archives that don't start at the beginning of file.
1550 if(arc_start!=0) return 0;
1552 starts_with_trailer = 1;
1555 // Get info about file extension
1556 for(k=0; k<DE_ARRAYCOUNT(exts); k++) {
1557 if(de_input_file_has_ext(c, exts[k])) {
1558 has_ext = 1;
1559 ext_idx = (UI)k;
1560 break;
1564 // Only tolerate leading junk for a few file extensions
1565 if(arc_start>0) {
1566 if(ext_idx==0 || ext_idx==1 || ext_idx==5) { // .arc, .ark, .com
1569 else {
1570 return 0;
1574 // Look at some of the file, to see if it seems ok.
1575 if(has_ext && ext_idx<=1 && arc_start==0) {
1576 strictness = 0;
1578 else if(has_ext && ext_idx<=3 && arc_start==0) {
1579 strictness = 1;
1581 else {
1582 strictness = 2;
1584 if(!is_valid_file_at(c->infile, arc_start, c->infile->len, strictness)) {
1585 return 0;
1588 // Handle 2-byte files
1589 if(starts_with_trailer && c->infile->len==2) {
1590 if(!has_ext) return 0;
1591 if(ext_idx==0) return 100;
1592 if(ext_idx<=3) return 15;
1593 return 0;
1596 if(de_getu16be(c->infile->len-2) == 0x1a00) {
1597 // Standard ARC trailer
1598 ends_with_trailer = 1;
1601 if(de_getu32be(c->infile->len-8) == 0x504baa55) {
1602 // PKARC trailer, for files with comments
1603 ends_with_comments = 1;
1606 if(!ends_with_trailer && !ends_with_comments) {
1607 // PAK-style extensions
1608 if(de_getu16be(c->infile->len-2) == 0xfe00) {
1609 ends_with_comments = 1;
1613 if(starts_with_trailer) {
1614 if(ends_with_comments) return 25;
1615 else return 0;
1617 if(has_ext && (ends_with_trailer || ends_with_comments)) return 90;
1618 if(ends_with_trailer || ends_with_comments) return 35;
1619 if(has_ext) return 24;
1620 if(arc_start==0) return 19;
1621 return 0;
1624 static void de_help_arc(deark *c)
1626 de_msg(c, "-opt arc:method10=<trimmed|crushed|auto> : How to interpret compression "
1627 "method #10");
1630 void de_module_arc(deark *c, struct deark_module_info *mi)
1632 mi->id = "arc";
1633 mi->desc = "ARC compressed archive";
1634 mi->run_fn = de_run_arc;
1635 mi->identify_fn = de_identify_arc;
1636 mi->help_fn = de_help_arc;
1639 /////////////////////// Spark
1641 static void de_run_spark(deark *c, de_module_params *mparams)
1643 lctx *d = NULL;
1645 d = de_malloc(c, sizeof(lctx));
1646 d->fmt = FMT_SPARK;
1647 d->fmtname = "Spark";
1648 d->input_encoding_for_filenames = de_get_input_encoding(c, NULL, DE_ENCODING_RISCOS);
1649 d->input_encoding_for_comments = DE_EXTENC_MAKE(d->input_encoding_for_filenames,
1650 DE_ENCSUBTYPE_HYBRID);
1651 d->recurse_subdirs = de_get_ext_option_bool(c, "spark:recurse", 1);
1653 do_run_arc_spark_internal(c, d);
1654 destroy_lctx(c, d);
1657 static int de_identify_spark(deark *c)
1659 u8 b;
1660 u32 load_addr;
1661 int ldaddrcheck = 0;
1662 int has_trailer = 0;
1664 if(de_getbyte(0) != 0x1a) return 0;
1665 b = de_getbyte(1); // compression method
1666 if(b==0x82 || b==0x83 || b==0x88 || b==0x89 || b==0xff) {
1669 else if(b==0x81 || b==0x84 || b==0x85 || b==0x86) {
1670 ; // TODO: Verify that these are possible in Spark.
1672 else {
1673 return 0;
1676 load_addr = (u32)de_getu32le(29);
1677 if((load_addr & 0xfff00000) == 0xfff00000) {
1678 ldaddrcheck = 1;
1681 if(de_getu16be(c->infile->len-2) == 0x1a80) {
1682 has_trailer = 1;
1685 if(has_trailer && ldaddrcheck) return 85;
1686 if(ldaddrcheck) return 30;
1687 if(has_trailer) return 10;
1688 return 0;
1691 static void de_help_spark(deark *c)
1693 de_msg(c, "-opt spark:recurse=0 : Extract subdirs as Spark files");
1696 void de_module_spark(deark *c, struct deark_module_info *mi)
1698 mi->id = "spark";
1699 mi->desc = "Spark archive";
1700 mi->run_fn = de_run_spark;
1701 mi->identify_fn = de_identify_spark;
1702 mi->help_fn = de_help_spark;
1705 /////////////////////// ArcMac
1707 static void de_run_arcmac(deark *c, de_module_params *mparams)
1709 lctx *d = NULL;
1711 d = de_malloc(c, sizeof(lctx));
1712 d->fmt = FMT_ARCMAC;
1713 d->fmtname = "ArcMac";
1714 d->recurse_subdirs = 1;
1715 d->input_encoding_for_arcmac_fn = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
1716 d->input_encoding_for_filenames = DE_ENCODING_CP437;
1717 d->input_encoding_for_comments = DE_EXTENC_MAKE(d->input_encoding_for_filenames,
1718 DE_ENCSUBTYPE_HYBRID);
1720 do_run_arc_spark_internal(c, d);
1721 destroy_lctx(c, d);
1724 static int de_identify_arcmac(deark *c)
1726 u8 buf1[2];
1727 u8 buf2[2];
1729 de_read(buf1, 0, 2);
1730 if(buf1[0]!=0x1b) return 0;
1731 if(!(buf1[1]>=1 && buf1[1]<=9)) return 0;
1732 de_read(buf2, 59, 2);
1733 if(buf2[0]!=0x1a) return 0;
1734 if(buf2[1]!=buf1[1]) return 0;
1735 return 80;
1738 void de_module_arcmac(deark *c, struct deark_module_info *mi)
1740 mi->id = "arcmac";
1741 mi->desc = "ArcMac compressed archive";
1742 mi->run_fn = de_run_arcmac;
1743 mi->identify_fn = de_identify_arcmac;