1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // ARC compressed archive
9 #include <deark-config.h>
10 #include <deark-private.h>
11 #include <deark-fmtutil.h>
12 DE_DECLARE_MODULE(de_module_arc
);
13 DE_DECLARE_MODULE(de_module_spark
);
14 DE_DECLARE_MODULE(de_module_arcmac
);
19 #define FMT_PAK16SFX 4
21 #define MAX_NESTING_LEVEL 24
23 struct localctx_struct
;
24 typedef struct localctx_struct lctx
;
26 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct member_data
*md
,
27 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
28 struct de_dfilter_results
*dres
);
30 struct cmpr_meth_info
{
34 decompressor_fn decompressor
;
37 struct persistent_member_data
{
49 const struct cmpr_meth_info
*cmi
;
50 const char *cmpr_meth_name
;
57 struct de_timestamp arc_timestamp
;
58 struct de_riscos_file_attrs rfa
;
63 struct de_stringreaderdata
*arcmac_fn
;
64 struct de_advfile
*arcmac_advf
;
67 struct localctx_struct
{
70 de_ext_encoding input_encoding_for_filenames
;
71 de_ext_encoding input_encoding_for_comments
;
72 de_ext_encoding input_encoding_for_arcmac_fn
;
73 u8 method10
; // 1=trimmed, 2=crushed
79 u8 has_arc_extensions
;
80 i64 prescan_pos_after_eoa
;
81 i64 num_top_level_members
; // Not including EOA marker
82 i64 end_of_known_data
;
83 struct de_crcobj
*crco
;
84 struct de_strarray
*curpath
;
85 struct persistent_member_data
*persistent_md
; // optional array[num_top_level_members]
89 struct member_parser_data
{
93 u8 cmpr_meth
, cmpr_meth_masked
;
100 typedef void (*member_cb_type
)(deark
*c
, lctx
*d
, struct member_parser_data
*mpd
);
102 // Look for any known ARC cmpr meth, including end-of-archive marker.
103 // Not for Spark format
104 static int is_known_cmpr_meth(u8 m
)
106 if(m
<=11 || m
==20 || m
==21 || m
==22 || m
==30) {
112 // Is this a plausible cmpr meth for the first archive member
113 // (including end-of-archive marker)?
114 // (Not for Spark format.)
115 static int is_known_first_cmpr_meth(u8 m
)
117 if((m
<=11) || m
==20) {
123 // For ARC, not for Spark or ArcMac.
124 // It's assumed that the byte at pos1 is known to be 0x1a.
125 // Will validate the first member, and (unless it is the end marker,
126 // or strictness==0), the first two bytes of the second member.
127 static int is_valid_file_at(dbuf
*f
, i64 pos1
, i64 endpos
, UI strictness
)
136 cmpr_meth
= dbuf_getbyte_p(f
, &pos
);
137 if(!is_known_first_cmpr_meth(cmpr_meth
)) {
140 if(cmpr_meth
==0) return 1; // End marker
142 if(cmpr_meth
<20 && strictness
>=2) {
145 // test 1st char of filename
146 f1
= dbuf_getbyte(f
, pos
);
147 if(f1
<0x20) return 0;
151 if(strictness
==0) return 1;
153 cmpr_len
= dbuf_getu32le_p(f
, &pos
);
155 if(cmpr_meth
!=1) pos
+= 4;
157 if(pos
==endpos
) return 1;
158 if(pos
>endpos
-2) return 0;
160 marker
= dbuf_getbyte_p(f
, &pos
);
161 if(marker
!=0x1a) return 0;
162 cmpr_meth
= dbuf_getbyte_p(f
, &pos
);
163 if(!is_known_cmpr_meth(cmpr_meth
)) return 0;
167 static void mark_end_of_known_data(lctx
*d
, i64 pos
)
169 if(pos
> d
->end_of_known_data
) {
170 d
->end_of_known_data
= pos
;
174 // Calls the supplied callback function for each ARC member found.
175 // Also called for end-of-archive/directory markers.
176 // Also called if unexpected data is encountered (with mpd->magic != 0x1a).
177 static void parse_member_sequence(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, int nesting_level
,
178 member_cb_type member_cbfn
)
180 struct member_parser_data
*mpd
= NULL
;
184 mpd
= de_malloc(c
, sizeof(struct member_parser_data
));
187 if(pos
+2 > pos1
+len
) break;
188 de_zeromem(mpd
, sizeof(struct member_parser_data
));
189 mpd
->nesting_level
= nesting_level
;
190 mpd
->member_idx
= member_idx
++;
191 mpd
->member_pos
= pos
;
193 mpd
->magic
= de_getbyte_p(&pos
);
195 if(mpd
->magic
==0xfe && d
->fmt
==FMT_PAK16SFX
) {
196 // Some hacks here. We're not setting all the fields correctly,
197 // just the ones we need.
198 mpd
->cmpr_data_len
= (i64
)de_getbyte_p(&pos
);
199 mpd
->cmpr_data_pos
= pos
;
200 mpd
->member_len
= 2+mpd
->cmpr_data_len
;
201 member_cbfn(c
, d
, mpd
);
202 pos
= mpd
->member_pos
+ mpd
->member_len
;
205 else if(mpd
->magic
!=d
->sig_byte
) {
207 mpd
->cmpr_data_pos
= mpd
->member_pos
; // dummy value
208 member_cbfn(c
, d
, mpd
);
211 mpd
->cmpr_meth
= de_getbyte_p(&pos
);
212 mpd
->cmpr_meth_masked
= mpd
->cmpr_meth
& 0x7f;
214 if(mpd
->cmpr_meth_masked
==0x00 || mpd
->cmpr_meth
==0x1f) { // end of archive/dir
216 mpd
->cmpr_data_pos
= mpd
->member_pos
+2; // dummy value
217 member_cbfn(c
, d
, mpd
);
221 if(d
->fmt
==FMT_ARCMAC
) {
224 // TODO: Check for EOF?
225 pos
+= 57; // Skip remainder of 59-byte ArcMac preheader
226 magic2
= de_getbyte_p(&pos
);
227 if(magic2
!= 0x1a) { // Error
228 // TODO: Call member_cbfn()?
232 // Read the "real" compression method field (should be the same?).
233 mpd
->cmpr_meth
= de_getbyte_p(&pos
);
234 mpd
->cmpr_meth_masked
= mpd
->cmpr_meth
& 0x7f;
238 mpd
->cmpr_data_len
= de_getu32le_p(&pos
);
240 if(mpd
->cmpr_meth_masked
!=0x01) {
241 pos
+= 4; // original size
243 if(mpd
->cmpr_meth
& 0x80) {
244 pos
+= 12; // Spark-specific data
247 mpd
->cmpr_data_pos
= pos
;
248 mpd
->member_len
= mpd
->cmpr_data_pos
+ mpd
->cmpr_data_len
- mpd
->member_pos
;
249 member_cbfn(c
, d
, mpd
);
251 pos
= mpd
->member_pos
+ mpd
->member_len
;
257 static void decompressor_stored(deark
*c
, lctx
*d
, struct member_data
*md
,
258 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
259 struct de_dfilter_results
*dres
)
261 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
264 static void decompressor_spark_compressed(deark
*c
, lctx
*d
, struct member_data
*md
,
265 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
266 struct de_dfilter_results
*dres
)
268 struct de_lzw_params delzwp
;
270 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
271 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
272 delzwp
.flags
|= DE_LZWFLAG_HAS1BYTEHEADER
;
273 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
276 static void decompressor_squashed(deark
*c
, lctx
*d
, struct member_data
*md
,
277 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
278 struct de_dfilter_results
*dres
)
280 struct de_lzw_params delzwp
;
282 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
283 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
284 delzwp
.max_code_size
= 13;
285 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
288 static void decompressor_packed(deark
*c
, lctx
*d
, struct member_data
*md
,
289 struct de_dfilter_in_params
*dcmpri
,
290 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
292 fmtutil_decompress_rle90_ex(c
, dcmpri
, dcmpro
, dres
, 0);
295 static void decompressor_squeezed(deark
*c
, lctx
*d
, struct member_data
*md
,
296 struct de_dfilter_in_params
*dcmpri
,
297 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
299 struct de_dcmpr_two_layer_params tlp
;
301 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
302 tlp
.codec1_type1
= fmtutil_huff_squeeze_codectype1
;
303 tlp
.codec2
= dfilter_rle90_codec
;
307 de_dfilter_decompress_two_layer(c
, &tlp
);
310 static void decompressor_crunched5(deark
*c
, lctx
*d
, struct member_data
*md
,
311 struct de_dfilter_in_params
*dcmpri
,
312 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
314 struct de_lzw_params delzwp
;
316 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
317 delzwp
.fmt
= DE_LZWFMT_ARC5
;
318 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
321 static void decompressor_crunched6(deark
*c
, lctx
*d
, struct member_data
*md
,
322 struct de_dfilter_in_params
*dcmpri
,
323 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
325 struct de_dcmpr_two_layer_params tlp
;
326 struct de_lzw_params delzwp
;
328 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
329 delzwp
.fmt
= DE_LZWFMT_ARC5
;
331 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
332 tlp
.codec1_pushable
= dfilter_lzw_codec
;
333 tlp
.codec1_private_params
= (void*)&delzwp
;
335 tlp
.codec2
= dfilter_rle90_codec
;
341 de_dfilter_decompress_two_layer(c
, &tlp
);
344 static void decompressor_crunched8(deark
*c
, lctx
*d
, struct member_data
*md
,
345 struct de_dfilter_in_params
*dcmpri
,
346 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
348 struct de_dcmpr_two_layer_params tlp
;
349 struct de_lzw_params delzwp
;
351 // "Crunched" means "packed", then "compressed".
352 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
354 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
355 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
356 delzwp
.flags
|= DE_LZWFLAG_HAS1BYTEHEADER
;
358 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
359 tlp
.codec1_pushable
= dfilter_lzw_codec
;
360 tlp
.codec1_private_params
= (void*)&delzwp
;
362 tlp
.codec2
= dfilter_rle90_codec
;
368 de_dfilter_decompress_two_layer(c
, &tlp
);
371 static void decompressor_trimmed(deark
*c
, lctx
*d
, struct member_data
*md
,
372 struct de_dfilter_in_params
*dcmpri
,
373 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
375 struct de_dcmpr_two_layer_params tlp
;
376 struct de_lh1_params lh1p
;
378 de_zeromem(&lh1p
, sizeof(struct de_lh1_params
));
379 lh1p
.is_arc_trimmed
= 1;
380 lh1p
.history_fill_val
= 0x00;
382 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
383 tlp
.codec1_pushable
= dfilter_lh1_codec
;
384 tlp
.codec1_private_params
= (void*)&lh1p
;
386 tlp
.codec2
= dfilter_rle90_codec
;
392 de_dfilter_decompress_two_layer(c
, &tlp
);
395 static void decompressor_distilled(deark
*c
, lctx
*d
, struct member_data
*md
,
396 struct de_dfilter_in_params
*dcmpri
,
397 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
399 fmtutil_distilled_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
403 // 0x01 = valid in ARC
404 // 0x02 = valid in Spark
405 // 0x80 = assume high bit of cmpr_meth is set for Spark format
406 // 0x100, 0x200 = special
407 static const struct cmpr_meth_info cmpr_meth_info_arr
[] = {
408 { 0x00, 0x03, "end of archive marker", NULL
},
409 { 0x01, 0x83, "stored (old format)", decompressor_stored
},
410 { 0x02, 0x83, "stored", decompressor_stored
},
411 { 0x03, 0x83, "packed (RLE)", decompressor_packed
},
412 { 0x04, 0x83, "squeezed (RLE + Huffman)", decompressor_squeezed
},
413 { 0x05, 0x83, "crunched5 (static LZW)", decompressor_crunched5
},
414 { 0x06, 0x83, "crunched6 (RLE + static LZW)", decompressor_crunched6
},
415 { 0x07, 0x83, "crunched7 (ARC 4.6)", NULL
},
416 { 0x08, 0x83, "crunched8 (RLE + dynamic LZW)", decompressor_crunched8
},
417 { 0x09, 0x83, "squashed (dynamic LZW)", decompressor_squashed
},
418 { 10, 0x101, "trimmed", decompressor_trimmed
},
419 { 10, 0x201, "crushed", NULL
},
420 { 10, 0x01, "trimmed or crushed", NULL
},
421 { 0x0b, 0x01, "distilled", decompressor_distilled
},
422 { 20, 0x01, "archive info", NULL
},
423 { 21, 0x01, "extended file info", NULL
},
424 { 22, 0x01, "OS info", NULL
},
425 { 0x1e, 0x01, "subdir", NULL
},
426 { 0x1f, 0x01, "end of subdir marker", NULL
},
427 { 0x80, 0x02, "end of archive marker", NULL
},
428 { 0xff, 0x02, "compressed", decompressor_spark_compressed
}
431 static const struct cmpr_meth_info
*get_cmpr_meth_info(lctx
*d
, u8 cmpr_meth
)
434 const struct cmpr_meth_info
*p
;
436 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_info_arr
); k
++) {
439 p
= &cmpr_meth_info_arr
[k
];
440 if(d
->fmt
==FMT_ARC
&& !(p
->flags
& 0x1)) continue;
441 if(d
->fmt
==FMT_SPARK
&& !(p
->flags
& 0x2)) continue;
442 meth_adjusted
= p
->cmpr_meth
;
443 if(d
->fmt
==FMT_SPARK
&& (p
->flags
& 0x80)) {
444 meth_adjusted
|= 0x80;
446 if(meth_adjusted
!= cmpr_meth
) continue;
448 if(p
->cmpr_meth
==10) {
449 // Method 10 has a conflict -- it could be either Trimmed (ARC7)
451 if(p
->flags
&0x100) { // Skip this unless we're sure it's Trimmed
453 if(d
->has_pak_trailer
|| !d
->has_arc_extensions
) continue;
456 else if(p
->flags
&0x200) { // Skip this unless we're sure it's Crushed
458 if(!d
->has_pak_trailer
|| d
->has_arc_extensions
) continue;
467 static void read_one_pk_comment(deark
*c
, lctx
*d
, i64 pos
, de_ucstring
*s
)
469 dbuf_read_to_ucstring(c
->infile
, pos
, 32, s
, 0, d
->input_encoding_for_comments
);
470 ucstring_strip_trailing_spaces(s
);
473 static void init_trailer_data(deark
*c
, lctx
*d
)
475 d
->has_trailer_data
= 1;
476 if(!d
->persistent_md
) {
477 d
->persistent_md
= de_mallocarray(c
, d
->num_top_level_members
,
478 sizeof(struct persistent_member_data
));
482 static int is_pkcomment_sig_at(deark
*c
, lctx
*d
, i64 pos
)
484 i64 comments_descr_pos
;
486 if(!d
->prescan_found_eoa
) return 0;
487 if(pos
< d
->prescan_pos_after_eoa
) return 0;
488 if(pos
> c
->infile
->len
-8) return 0;
489 if((UI
)de_getu32be(pos
) != 0x504baa55U
) {
492 comments_descr_pos
= de_getu32le(pos
+4);
493 if((comments_descr_pos
< d
->prescan_pos_after_eoa
) ||
494 (comments_descr_pos
> pos
-4))
501 static void do_pk_comments(deark
*c
, lctx
*d
)
505 i64 comments_descr_pos
;
506 int has_file_comments
= 0;
507 int has_archive_comment
= 0;
508 i64 file_comments_pos
= 0;
509 de_ucstring
*archive_comment
= NULL
;
512 if(!d
->prescan_found_eoa
) return;
513 sig_pos
= c
->infile
->len
-8;
514 if(is_pkcomment_sig_at(c
, d
, sig_pos
)){
521 // The PK signature normally appears 8 bytes from the end of the file,
522 // but we'll scan for it, in case the file has padding appended to it.
523 // The assumption is that it appears after a whole number of 32-byte
524 // records. In case of multiple signatures, we use the earliest one that
526 sig_pos
= d
->prescan_pos_after_eoa
+32;
530 if(sig_pos
>= c
->infile
->len
-8) break;
531 if(count
> d
->num_top_level_members
+10) break;
532 if(is_pkcomment_sig_at(c
, d
, sig_pos
)){
543 de_dbg(c
, "PKARC/PKPAK comment block found, ID at %"I64_FMT
, sig_pos
);
545 mark_end_of_known_data(d
, sig_pos
+8);
546 init_trailer_data(c
, d
);
548 // Note: This logic is based on reverse engineering, and could be wrong.
549 comments_descr_pos
= de_getu32le(sig_pos
+4);
550 de_dbg(c
, "descriptor pos: %"I64_FMT
, comments_descr_pos
);
551 // comments_descr_pos has already been validated, by is_pkcomment_sig_at().
553 de_read(dscr
, comments_descr_pos
, 4);
554 if(dscr
[0]==0x20 && dscr
[1]==0x20 && dscr
[2]==0x20 && dscr
[3]==0x00) {
555 has_file_comments
= 0;
556 has_archive_comment
= 1;
558 else if(dscr
[0]==0x01 && dscr
[3]==0x20) {
559 has_file_comments
= 1;
560 has_archive_comment
= 0;
562 else if(dscr
[0]==0x01 && dscr
[3]==0x00) {
563 has_file_comments
= 1;
564 has_archive_comment
= 1;
567 de_dbg(c
, "[unrecognized comments descriptor]");
570 if(has_file_comments
) {
571 file_comments_pos
= comments_descr_pos
+ 32;
572 if(sig_pos
- file_comments_pos
< 32) {
573 has_file_comments
= 0;
577 if(has_archive_comment
) {
578 archive_comment
= ucstring_create(c
);
579 read_one_pk_comment(c
, d
, comments_descr_pos
-32, archive_comment
);
580 de_dbg(c
, "archive comment: \"%s\"", ucstring_getpsz_d(archive_comment
));
583 if(has_file_comments
) {
584 i64 num_file_comments
;
587 num_file_comments
= (sig_pos
- file_comments_pos
)/32;
588 de_dbg(c
, "apparent number of file comments: %d", (int)num_file_comments
);
590 for(i
=0; i
<num_file_comments
&& i
<d
->num_top_level_members
; i
++) {
591 if(!d
->persistent_md
[i
].comment
) {
592 d
->persistent_md
[i
].comment
= ucstring_create(c
);
594 if(ucstring_isnonempty(d
->persistent_md
[i
].comment
)) continue;
595 read_one_pk_comment(c
, d
,file_comments_pos
+ i
*32, d
->persistent_md
[i
].comment
);
599 ucstring_destroy(archive_comment
);
600 de_dbg_indent(c
, -1);
603 // Always sets *pbytes_consumed.
604 // Returns 0 if there are no more records after this.
605 static int do_pak_ext_record(deark
*c
, lctx
*d
, i64 pos1
, i64
*pbytes_consumed
)
609 const char *rtname
= "?";
614 de_ucstring
*archive_comment
= NULL
;
615 struct persistent_member_data
*pmd
= NULL
;
616 int saved_indent_level
;
618 de_dbg_indent_save(c
, &saved_indent_level
);
619 *pbytes_consumed
= 0;
620 if(de_getbyte_p(&pos
) != 0xfe) goto done
;
621 de_dbg(c
, "record at %"I64_FMT
, pos1
);
624 rectype
= de_getbyte_p(&pos
);
626 case 0: rtname
= "end"; break;
627 case 1: rtname
= "remark"; break;
628 case 2: rtname
= "path"; break;
629 case 3: rtname
= "security envelope"; break;
631 de_dbg(c
, "rectype: %d (%s)", (int)rectype
, rtname
);
633 *pbytes_consumed
= 2;
637 filenum
= de_getu16le_p(&pos
);
638 de_dbg(c
, "file num: %d", (int)filenum
);
639 dlen
= de_getu32le_p(&pos
);
640 de_dbg(c
, "dlen: %"I64_FMT
, dlen
);
641 if(pos
+dlen
> c
->infile
->len
) goto done
;
643 *pbytes_consumed
= 8 + dlen
;
647 filenum_adj
= filenum
- 1;
648 if(filenum_adj
< d
->num_top_level_members
) {
649 pmd
= &d
->persistent_md
[filenum_adj
];
653 if(rectype
==1) { // remark
654 if(filenum
==0) { // archive comment
655 archive_comment
= ucstring_create(c
);
656 dbuf_read_to_ucstring_n(c
->infile
, pos
, dlen
, 16384, archive_comment
,
657 0, d
->input_encoding_for_comments
);
658 de_dbg(c
, "archive comment: \"%s\"", ucstring_getpsz_d(archive_comment
));
660 else { // file comment
664 pmd
->comment
= ucstring_create(c
);
666 if(ucstring_isnonempty(pmd
->comment
)) goto done
;
667 dbuf_read_to_ucstring_n(c
->infile
, pos
, dlen
, 2048, pmd
->comment
,
668 0, d
->input_encoding_for_comments
);
671 else if(rectype
==2) {
674 pmd
->path
= ucstring_create(c
);
676 if(ucstring_isnonempty(pmd
->path
)) goto done
;
677 dbuf_read_to_ucstring_n(c
->infile
, pos
, dlen
, 512, pmd
->path
,
678 0, d
->input_encoding_for_comments
);
682 if(archive_comment
) ucstring_destroy(archive_comment
);
683 de_dbg_indent_restore(c
, saved_indent_level
);
687 static void do_pak_trailer(deark
*c
, lctx
*d
)
692 if(!d
->prescan_found_eoa
) return;
693 if(c
->infile
->len
- d
->prescan_pos_after_eoa
< 2) return;
694 if(de_getbyte(d
->prescan_pos_after_eoa
) != 0xfe) return;
695 b
= de_getbyte(d
->prescan_pos_after_eoa
+1);
698 pos
= d
->prescan_pos_after_eoa
;
699 de_dbg(c
, "PAK extended records at %"I64_FMT
, pos
);
701 d
->has_pak_trailer
= 1;
702 init_trailer_data(c
, d
);
706 i64 bytes_consumed
= 0;
708 if(pos
> c
->infile
->len
-2) break;
709 ret
= do_pak_ext_record(c
, d
, pos
, &bytes_consumed
);
710 pos
+= bytes_consumed
;
711 mark_end_of_known_data(d
, pos
);
712 if(!ret
|| bytes_consumed
<8) break;
715 de_dbg_indent(c
, -1);
718 static void dbg_timestamp(deark
*c
, struct de_timestamp
*ts
, const char *name
)
720 char timestamp_buf
[64];
722 de_timestamp_to_string(ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
723 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
726 // Convert backslashes to slashes, and make sure the string ends with a /.
727 static void fixup_path(deark
*c
, lctx
*d
, de_ucstring
*s
)
733 for(i
=0; i
<s
->len
; i
++) {
734 if(s
->str
[i
]=='\\') {
739 if(s
->str
[s
->len
-1]!='/') {
740 ucstring_append_char(s
, '/');
744 static void do_decompress_fork_arcmac(struct member_data
*md
,
745 dbuf
*outf
, const char *fork_name
)
747 struct de_dfilter_in_params dcmpri
;
748 struct de_dfilter_out_params dcmpro
;
749 struct de_dfilter_results dres
;
751 int saved_indent_level
;
753 de_dbg_indent_save(c
, &saved_indent_level
);
754 if(md
->orig_size
==0) goto done
;
756 de_dbg(c
, "decompressing %s fork", fork_name
);
759 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
760 dcmpri
.f
= c
->infile
;
761 dcmpri
.pos
= md
->cmpr_data_pos
;
762 dcmpri
.len
= md
->cmpr_size
;
764 dcmpro
.len_known
= 1;
765 dcmpro
.expected_len
= md
->orig_size
;
767 if(dcmpri
.pos
+ dcmpri
.len
> dcmpri
.f
->len
) {
768 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(md
->arcmac_fn
->str
));
772 md
->cmi
->decompressor(c
, md
->d
, md
, &dcmpri
, &dcmpro
, &dres
);
773 dbuf_flush(dcmpro
.f
);
775 de_err(c
, "Decompression failed for file %s[%s fork]: %s",
776 ucstring_getpsz_d(md
->arcmac_fn
->str
),
777 fork_name
, de_dfilter_get_errmsg(c
, &dres
));
781 md
->crc_calc
= de_crcobj_getval(md
->d
->crco
);
782 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)md
->crc_calc
);
784 if(md
->crc_calc
!=md
->crc_reported
) {
785 de_err(c
, "%s: CRC check failed", ucstring_getpsz_d(md
->arcmac_fn
->str
));
789 de_dbg_indent_restore(c
, saved_indent_level
);
792 static int my_advfile_cbfn(deark
*c
, struct de_advfile
*advf
,
793 struct de_advfile_cbparams
*afp
)
795 struct member_data
*md
= (struct member_data
*)advf
->userdata
;
797 if(afp
->whattodo
== DE_ADVFILE_WRITEMAIN
) {
798 do_decompress_fork_arcmac(md
, afp
->outf
, "data");
800 else if(afp
->whattodo
== DE_ADVFILE_WRITERSRC
) {
801 do_decompress_fork_arcmac(md
, afp
->outf
, "rsrc");
807 // TODO: Reduce code duplication with do_extract_member_file(), etc.
808 // Retrofitting the arc module for arcmac format made some things messy.
809 // It could be made somewhat cleaner by using the "advfile" system unconditionally
810 // -- there are pros and cons of doing that.
811 static void do_extract_member_file_arcmac(deark
*c
, lctx
*d
, struct member_data
*md
,
814 int saved_indent_level
;
816 de_dbg_indent_save(c
, &saved_indent_level
);
818 if(!md
->cmi
|| !md
->cmi
->decompressor
) {
819 de_err(c
, "%s: Compression type 0x%02x (%s) is not supported.",
820 ucstring_getpsz_d(md
->fn
), (unsigned int)md
->cmpr_meth
, md
->cmpr_meth_name
);
824 if(md
->arcmac_dforklen
&& md
->arcmac_rforklen
) {
825 // This seems to be allowed, but I need sample files.
826 de_err(c
, "Can't handle multi-fork ArcMac file");
829 if(md
->arcmac_dforklen
+ md
->arcmac_rforklen
!= md
->orig_size
) {
830 de_err(c
, "Inconsistent ArcMac fork size");
834 if(md
->arcmac_fn
&& ucstring_isnonempty(md
->arcmac_fn
->str
)) {
835 ucstring_append_ucstring(md
->arcmac_advf
->filename
, md
->arcmac_fn
->str
);
838 ucstring_append_ucstring(md
->arcmac_advf
->filename
, md
->fn
);
840 md
->arcmac_advf
->original_filename_flag
= 1;
842 md
->arcmac_advf
->mainfork
.fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] =
843 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
];
845 md
->arcmac_advf
->userdata
= (void*)md
;
846 md
->arcmac_advf
->writefork_cbfn
= my_advfile_cbfn
;
848 md
->arcmac_advf
->mainfork
.writelistener_cb
= de_writelistener_for_crc
;
849 md
->arcmac_advf
->mainfork
.userdata_for_writelistener
= (void*)d
->crco
;
850 md
->arcmac_advf
->rsrcfork
.writelistener_cb
= de_writelistener_for_crc
;
851 md
->arcmac_advf
->rsrcfork
.userdata_for_writelistener
= (void*)d
->crco
;
852 de_crcobj_reset(d
->crco
);
854 de_advfile_run(md
->arcmac_advf
);
857 de_dbg_indent_restore(c
, saved_indent_level
);
860 static void do_extract_member_file(deark
*c
, lctx
*d
, struct member_data
*md
,
861 struct persistent_member_data
*pmd
, de_finfo
*fi
, i64 pos
)
863 de_ucstring
*fullfn
= NULL
;
865 int ignore_failed_crc
= 0;
866 int saved_indent_level
;
867 struct de_dfilter_in_params dcmpri
;
868 struct de_dfilter_out_params dcmpro
;
869 struct de_dfilter_results dres
;
871 de_dbg_indent_save(c
, &saved_indent_level
);
872 fullfn
= ucstring_create(c
);
874 if(pmd
&& ucstring_isnonempty(pmd
->path
)) {
875 // For PAK-style paths.
876 // (Pretty useless, until we support cmpr. meth. #11.)
877 // Note that PAK-style paths, and directory recursion, are not expected to
878 // be possible in the same file.
879 ucstring_append_ucstring(fullfn
, pmd
->path
);
880 fixup_path(c
, d
, fullfn
);
883 de_strarray_make_path(d
->curpath
, fullfn
, DE_MPFLAG_NOTRAILINGSLASH
);
885 if(md
->rfa
.file_type_known
) {
886 fmtutil_riscos_append_type_to_filename(c
, fi
, fullfn
, &md
->rfa
, md
->is_dir
, 0);
888 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
892 if(!md
->cmi
|| !md
->cmi
->decompressor
) {
895 // A quick hack because we don't want the error message claiming that
896 // "trimmed or crushed" isn't supported. We do support trimmed, so
897 // if we got here, we must have decided it's crushed.
898 mname
= md
->cmpr_meth
==10 ? "crushed" : md
->cmpr_meth_name
;
900 de_err(c
, "%s: Compression type 0x%02x (%s) is not supported.",
901 ucstring_getpsz_d(md
->fn
), (UI
)md
->cmpr_meth
, mname
);
905 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
906 dbuf_enable_wbuffer(outf
);
908 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)d
->crco
);
909 de_crcobj_reset(d
->crco
);
911 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
912 dcmpri
.f
= c
->infile
;
914 dcmpri
.len
= md
->cmpr_size
;
916 dcmpro
.len_known
= 1;
917 dcmpro
.expected_len
= md
->orig_size
;
919 if(dcmpri
.pos
+ dcmpri
.len
> dcmpri
.f
->len
) {
920 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(md
->fn
));
924 md
->cmi
->decompressor(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
925 dbuf_flush(dcmpro
.f
);
927 de_err(c
, "%s: Decompression failed: %s", ucstring_getpsz_d(md
->fn
),
928 de_dfilter_get_errmsg(c
, &dres
));
932 md
->crc_calc
= de_crcobj_getval(d
->crco
);
933 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)md
->crc_calc
);
934 if(md
->crc_reported
==0 && !d
->recurse_subdirs
&& md
->rfa
.file_type_known
&&
935 md
->rfa
.file_type
==0xddc && md
->cmpr_meth
==0x82)
937 ignore_failed_crc
= 1;
939 if((md
->crc_calc
!=md
->crc_reported
) && !ignore_failed_crc
) {
940 de_err(c
, "%s: CRC check failed", ucstring_getpsz_d(md
->fn
));
945 ucstring_destroy(fullfn
);
946 de_dbg_indent_restore(c
, saved_indent_level
);
949 // "Extract" a directory entry
950 static void do_extract_member_dir(deark
*c
, lctx
*d
, struct member_data
*md
,
954 de_ucstring
*fullfn
= NULL
;
956 fullfn
= ucstring_create(c
);
957 de_strarray_make_path(d
->curpath
, fullfn
, DE_MPFLAG_NOTRAILINGSLASH
);
959 fi
->is_directory
= 1;
960 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
962 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
964 ucstring_destroy(fullfn
);
967 struct extinfo_item_info
{
970 unsigned int flags
; // 0x1 = string
975 static void do_info_record_string(deark
*c
, lctx
*d
, i64 pos
, i64 len
, const char *name
)
977 de_ucstring
*s
= NULL
;
979 s
= ucstring_create(c
);
980 dbuf_read_to_ucstring_n(c
->infile
, pos
, len
, 2048, s
, DE_CONVFLAG_STOP_AT_NUL
,
981 d
->input_encoding_for_comments
);
982 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
986 static const struct extinfo_item_info extinfo_arr
[] = {
987 { 20, 0, 0x01, "archive description", NULL
},
988 { 20, 1, 0x01, "archive created by", NULL
},
989 { 20, 2, 0x01, "archive last modified by", NULL
},
990 { 21, 0, 0x01, "file description", NULL
},
991 { 21, 1, 0x01, "long name", NULL
},
992 { 21, 2, 0x00, "timestamps", NULL
},
993 { 21, 3, 0x00, "icon", NULL
},
994 { 21, 4, 0x01, "attributes", NULL
},
995 { 21, 5, 0x01, "full path", NULL
}
998 static const struct extinfo_item_info
*find_extinfo_item(u8 cmprmeth
, u8 rectype
)
1002 for(k
=0; k
<DE_ARRAYCOUNT(extinfo_arr
); k
++) {
1003 if(extinfo_arr
[k
].cmprmeth
==cmprmeth
&& extinfo_arr
[k
].rectype
==rectype
) {
1004 return &extinfo_arr
[k
];
1010 static void do_info_item(deark
*c
, lctx
*d
, struct member_data
*md
)
1012 int saved_indent_level
;
1013 i64 pos
= md
->cmpr_data_pos
;
1014 i64 endpos
= md
->cmpr_data_pos
+md
->cmpr_size
;
1016 de_dbg_indent_save(c
, &saved_indent_level
);
1017 de_dbg(c
, "info item data (meth=%d) at %"I64_FMT
" len=%"I64_FMT
, (int)md
->cmpr_meth
,
1018 md
->cmpr_data_pos
, md
->cmpr_size
);
1019 de_dbg_indent(c
, 1);
1027 const struct extinfo_item_info
*ei
;
1028 const char *ei_name
;
1031 if(pos
+3 > endpos
) goto done
;
1032 reclen
= de_getu16le_p(&pos
);
1033 if(reclen
<3 || recpos
+reclen
> endpos
) goto done
;
1034 rectype
= de_getbyte_p(&pos
);
1035 ei
= find_extinfo_item(md
->cmpr_meth
, rectype
);
1036 if(ei
&& ei
->name
) ei_name
= ei
->name
;
1041 de_dbg(c
, "record type %d (%s) at %"I64_FMT
", dpos=%"I64_FMT
", dlen=%"I64_FMT
,
1042 (int)rectype
, ei_name
, recpos
, dpos
, dlen
);
1043 de_dbg_indent(c
, 1);
1044 if(ei
&& (ei
->flags
& 0x01)) {
1045 do_info_record_string(c
, d
, dpos
, dlen
, ei_name
);
1048 de_dbg_hexdump(c
, c
->infile
, dpos
, dlen
, 256, NULL
, 0x1);
1050 de_dbg_indent(c
, -1);
1051 pos
= recpos
+ reclen
;
1055 de_dbg_indent_restore(c
, saved_indent_level
);
1058 static void do_sequence_of_members(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, int nesting_level
);
1060 static void do_arcmac_preheader(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
)
1064 struct de_fourcc filetype
;
1065 struct de_fourcc creator
;
1067 if(md
->arcmac_advf
) return;
1068 md
->arcmac_advf
= de_advfile_create(c
);
1069 md
->arcmac_advf
->enable_wbuffer
= 1;
1071 pos
+= 2; // magic / cmprtype
1073 md
->arcmac_fn
= dbuf_read_string(c
->infile
, pos
, 31, 31, DE_CONVFLAG_STOP_AT_NUL
,
1074 d
->input_encoding_for_arcmac_fn
);
1075 de_dbg(c
, "ArcMac filename: \"%s\"", ucstring_getpsz_d(md
->arcmac_fn
->str
));
1076 if(md
->arcmac_fn
->sz_strlen
>0) {
1077 md
->arcmac_advf
->original_filename_flag
= 1;
1078 de_advfile_set_orig_filename(md
->arcmac_advf
, md
->arcmac_fn
->sz
, md
->arcmac_fn
->sz_strlen
);
1082 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
1083 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
1084 de_memcpy(md
->arcmac_advf
->typecode
, filetype
.bytes
, 4);
1085 md
->arcmac_advf
->has_typecode
= 1;
1088 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
1089 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
1090 de_memcpy(md
->arcmac_advf
->creatorcode
, creator
.bytes
, 4);
1091 md
->arcmac_advf
->has_creatorcode
= 1;
1094 finder_flags
= (u16
)de_getu16be_p(&pos
);
1095 de_dbg(c
, "finder flags: 0x%04x", finder_flags
);
1096 md
->arcmac_advf
->finderflags
= finder_flags
;
1097 md
->arcmac_advf
->has_finderflags
= 1;
1098 pos
+= 6; // remainder of finfo
1100 md
->arcmac_dforklen
= de_getu32le_p(&pos
);
1101 de_dbg(c
, "data fork len: %"I64_FMT
, md
->arcmac_dforklen
);
1102 md
->arcmac_rforklen
= de_getu32le_p(&pos
);
1103 de_dbg(c
, "rsrc fork len: %"I64_FMT
, md
->arcmac_rforklen
);
1105 md
->arcmac_advf
->mainfork
.fork_exists
= (md
->arcmac_dforklen
!=0);
1106 md
->arcmac_advf
->mainfork
.fork_len
= md
->arcmac_dforklen
;
1107 md
->arcmac_advf
->rsrcfork
.fork_exists
= (md
->arcmac_rforklen
!=0);
1108 md
->arcmac_advf
->rsrcfork
.fork_len
= md
->arcmac_rforklen
;
1111 // The main per-member processing function
1112 static void member_cb_main(deark
*c
, lctx
*d
, struct member_parser_data
*mpd
)
1114 int saved_indent_level
;
1115 i64 pos1
= mpd
->member_pos
;
1118 i64 mod_time_raw
, mod_date_raw
;
1119 de_finfo
*fi
= NULL
;
1120 struct member_data
*md
= NULL
;
1121 int need_curpath_pop
= 0;
1122 struct persistent_member_data
*pmd
= NULL
;
1124 de_dbg_indent_save(c
, &saved_indent_level
);
1125 de_dbg(c
, "member at %"I64_FMT
, pos1
);
1126 de_dbg_indent(c
, 1);
1127 md
= de_malloc(c
, sizeof(struct member_data
));
1131 if(mpd
->nesting_level
==0 && d
->persistent_md
&& (mpd
->member_idx
< d
->num_top_level_members
)) {
1132 pmd
= &d
->persistent_md
[mpd
->member_idx
];
1133 if(ucstring_isnonempty(pmd
->comment
)) {
1134 de_dbg(c
, "file comment: \"%s\"", ucstring_getpsz_d(pmd
->comment
));
1136 if(ucstring_isnonempty(pmd
->path
)) {
1137 de_dbg(c
, "path: \"%s\"", ucstring_getpsz_d(pmd
->path
));
1141 pos
++; // 'magic' byte, already read by the parser
1142 if(mpd
->magic
!= d
->sig_byte
) {
1143 de_err(c
, "Failed to find %s member at %"I64_FMT
, d
->fmtname
, pos1
);
1147 if(d
->fmt
==FMT_ARCMAC
&& mpd
->cmpr_meth_masked
!=0) {
1148 do_arcmac_preheader(c
, d
, md
, mpd
->member_pos
);
1152 pos
++; // compression ID, already read by the parser
1153 md
->cmpr_meth
= mpd
->cmpr_meth
;
1154 md
->cmpr_meth_masked
= mpd
->cmpr_meth_masked
;
1156 md
->cmi
= get_cmpr_meth_info(d
, md
->cmpr_meth
);
1157 if(md
->cmi
&& md
->cmi
->name
) {
1158 md
->cmpr_meth_name
= md
->cmi
->name
;
1161 md
->cmpr_meth_name
= "?";
1164 de_dbg(c
, "cmpr meth: 0x%02x (%s)", (unsigned int)md
->cmpr_meth
, md
->cmpr_meth_name
);
1166 if(md
->cmpr_meth_masked
==0x00 || md
->cmpr_meth
==0x1f) {
1170 if(md
->cmpr_meth_masked
==0x01) {
1176 if(md
->cmpr_meth
>=128) {
1180 if(mpd
->member_len
<hdrsize
) {
1181 de_err(c
, "Insufficient data for archive member at %"I64_FMT
, pos1
);
1185 if(md
->cmpr_meth_masked
==0x00 || md
->cmpr_meth
==0x1f) { // end of archive/dir marker
1189 md
->fn
= ucstring_create(c
);
1190 dbuf_read_to_ucstring(c
->infile
, pos
, 13, md
->fn
, DE_CONVFLAG_STOP_AT_NUL
,
1191 d
->input_encoding_for_filenames
);
1192 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->fn
));
1195 pos
+= 4; // cmpr_size, already read by the parser
1196 md
->cmpr_size
= mpd
->cmpr_data_len
;
1197 de_dbg(c
, "cmpr size: %"I64_FMT
, md
->cmpr_size
);
1199 mod_date_raw
= de_getu16le_p(&pos
);
1200 mod_time_raw
= de_getu16le_p(&pos
);
1201 de_dos_datetime_to_timestamp(&md
->arc_timestamp
, mod_date_raw
, mod_time_raw
);
1202 md
->arc_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
1203 dbg_timestamp(c
, &md
->arc_timestamp
, ((d
->fmt
==FMT_SPARK
) ? "timestamp (ARC)":"timestamp"));
1205 md
->crc_reported
= (u32
)de_getu16le_p(&pos
);
1206 de_dbg(c
, "crc (reported): 0x%04x", (unsigned int)md
->crc_reported
);
1207 if((md
->cmpr_meth_masked
)==0x01) {
1208 md
->orig_size
= md
->cmpr_size
;
1211 md
->orig_size
= de_getu32le_p(&pos
);
1212 de_dbg(c
, "orig size: %"I64_FMT
, md
->orig_size
);
1215 if(d
->fmt
== FMT_SPARK
) {
1216 md
->has_spark_attribs
= 1;
1217 fmtutil_riscos_read_load_exec(c
, c
->infile
, &md
->rfa
, pos
);
1219 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &md
->rfa
, pos
, 0);
1223 md
->cmpr_data_pos
= mpd
->cmpr_data_pos
;
1225 de_strarray_push(d
->curpath
, md
->fn
);
1226 need_curpath_pop
= 1;
1228 // TODO: Is it possible to distinguish between a subdirectory, and a Spark
1229 // member file that should always be extracted? Does a nonzero CRC mean
1230 // we should not recurse?
1231 if(d
->fmt
==FMT_SPARK
&& d
->recurse_subdirs
&& md
->rfa
.file_type_known
&&
1232 (md
->rfa
.file_type
==0xddc) && md
->cmpr_meth
==0x82)
1236 else if(d
->fmt
==FMT_ARC
&& d
->recurse_subdirs
&& md
->cmpr_meth
==0x1e) {
1240 if(d
->recurse_subdirs
) {
1241 de_dbg(c
, "is directory: %d", md
->is_dir
);
1244 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, md
->cmpr_data_pos
, md
->cmpr_size
);
1247 fi
= de_finfo_create(c
);
1248 fi
->original_filename_flag
= 1;
1250 if(md
->rfa
.mod_time
.is_valid
) {
1251 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->rfa
.mod_time
;
1253 else if(md
->arc_timestamp
.is_valid
) {
1254 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->arc_timestamp
;
1257 if(md
->has_spark_attribs
) {
1258 fi
->has_riscos_data
= 1;
1259 fi
->riscos_attribs
= md
->rfa
.attribs
;
1260 fi
->load_addr
= md
->rfa
.load_addr
;
1261 fi
->exec_addr
= md
->rfa
.exec_addr
;
1265 fi
->is_directory
= 1;
1269 do_extract_member_dir(c
, d
, md
, fi
);
1271 // Nested subdirectory archives (ARC 6 "z" option, or Spark) have both a known
1272 // length (md->cmpr_size), and an end-of-archive marker. So there are two
1273 // ways to parse them:
1274 // 1) Recursively, meaning we trust the md->cmpr_size field (or maybe we should
1275 // use orig_size instead?).
1276 // 2) As a flat sequence of members, meaning we trust that a nested archive
1277 // will not have extra data after the end-of-archive marker.
1278 // Here, we use the recursive method.
1279 do_sequence_of_members(c
, d
, md
->cmpr_data_pos
, md
->cmpr_size
, mpd
->nesting_level
+1);
1281 else if(md
->cmpr_meth
>=30 && md
->cmpr_meth
<=39) {
1282 de_warn(c
, "Unknown control item type %d at %"I64_FMT
, (int)md
->cmpr_meth
, pos1
);
1285 else if(md
->cmpr_meth
>=20 && md
->cmpr_meth
<=29) {
1286 do_info_item(c
, d
, md
);
1288 else if(d
->fmt
==FMT_ARCMAC
&& md
->arcmac_advf
) {
1289 do_extract_member_file_arcmac(c
, d
, md
, fi
);
1292 do_extract_member_file(c
, d
, md
, pmd
, fi
, md
->cmpr_data_pos
);
1296 if(need_curpath_pop
) {
1297 de_strarray_pop(d
->curpath
);
1299 if(fi
) de_finfo_destroy(c
, fi
);
1301 ucstring_destroy(md
->fn
);
1302 if(md
->arcmac_fn
) de_destroy_stringreaderdata(c
, md
->arcmac_fn
);
1303 if(md
->arcmac_advf
) de_advfile_destroy(md
->arcmac_advf
);
1306 de_dbg_indent_restore(c
, saved_indent_level
);
1309 static void do_sequence_of_members(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, int nesting_level
)
1311 if(nesting_level
>= MAX_NESTING_LEVEL
) {
1312 de_err(c
, "Max subdir nesting level exceeded");
1316 de_dbg(c
, "archive at %"I64_FMT
, pos1
);
1317 de_dbg_indent(c
, 1);
1318 parse_member_sequence(c
, d
, pos1
, len
, nesting_level
, member_cb_main
);
1319 de_dbg_indent(c
, -1);
1322 static void member_cb_for_prescan(deark
*c
, lctx
*d
, struct member_parser_data
*mpd
)
1324 if(mpd
->magic
!=d
->sig_byte
) return;
1325 if(mpd
->cmpr_meth_masked
==0x00) { // end of archive
1326 d
->prescan_found_eoa
= 1;
1327 d
->prescan_pos_after_eoa
= mpd
->member_pos
+ mpd
->member_len
;
1328 de_dbg2(c
, "end of member sequence at %"I64_FMT
, d
->prescan_pos_after_eoa
);
1331 if(mpd
->cmpr_meth
==20 || mpd
->cmpr_meth
==21 || mpd
->cmpr_meth
==30) {
1332 // Features we're pretty sure aren't used by PAK.
1333 d
->has_arc_extensions
= 1;
1335 d
->num_top_level_members
++;
1336 de_dbg2(c
, "member at %"I64_FMT
, mpd
->member_pos
);
1339 // Unfortunately, a pre-pass is necessary for robust handling of some ARC format
1340 // extensions. The main issue is member-file comments, which we want to be
1341 // available when we process that member file, but can only be found after we've
1342 // read through the whole ARC file.
1343 static void do_prescan_file(deark
*c
, lctx
*d
, i64 startpos
)
1345 de_dbg2(c
, "prescan");
1346 d
->num_top_level_members
= 0;
1347 de_dbg_indent(c
, 1);
1348 parse_member_sequence(c
, d
, startpos
, c
->infile
->len
-startpos
, 0, member_cb_for_prescan
);
1349 de_dbg2(c
, "number of members: %"I64_FMT
, d
->num_top_level_members
);
1350 de_dbg_indent(c
, -1);
1353 static int find_arc_marker(deark
*c
, const u8
*buf
, size_t buflen
, i64
*ppos
)
1357 for(i
=0; i
<buflen
; i
++) {
1366 static void destroy_lctx(deark
*c
, lctx
*d
)
1369 de_crcobj_destroy(d
->crco
);
1370 de_strarray_destroy(d
->curpath
);
1371 if(d
->pak16sfx_outf
) {
1372 dbuf_close(d
->pak16sfx_outf
);
1374 if(d
->persistent_md
) {
1377 for(i
=0; i
<d
->num_top_level_members
; i
++) {
1378 ucstring_destroy(d
->persistent_md
[i
].comment
);
1379 ucstring_destroy(d
->persistent_md
[i
].path
);
1381 de_free(c
, d
->persistent_md
);
1386 static int is_btspk(deark
*c
, lctx
*d
)
1388 if(d
->fmt
!=FMT_ARC
&& d
->fmt
!=FMT_ARCMAC
) return 0;
1390 if(dbuf_memcmp(c
->infile
, c
->infile
->len
-31,
1391 (const void*)"\x1b\x03" "MASTR\xff" "END!", 12))
1398 static void do_run_arc_spark_internal(deark
*c
, lctx
*d
)
1401 i64 num_extra_bytes
;
1404 if(is_btspk(c
, d
)) {
1405 de_warn(c
, "This looks like BTSPK format, which is not correctly supported.");
1408 d
->sig_byte
= (d
->fmt
==FMT_ARCMAC
) ? 0x1b : 0x1a;
1410 if(d
->sig_byte
==0x1a) {
1413 // Tolerate up to sizeof(buf)-1 bytes of initial junk
1414 de_read(buf
, 0, sizeof(buf
));
1415 if(!find_arc_marker(c
, buf
, sizeof(buf
), &pos
)) {
1416 de_err(c
, "Not a(n) %s file", d
->fmtname
);
1421 de_declare_fmt(c
, d
->fmtname
);
1422 d
->curpath
= de_strarray_create(c
, MAX_NESTING_LEVEL
+10);
1423 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1425 do_prescan_file(c
, d
, pos
);
1426 if(d
->prescan_found_eoa
) {
1427 members_endpos
= d
->prescan_pos_after_eoa
;
1430 members_endpos
= c
->infile
->len
;
1432 mark_end_of_known_data(d
, members_endpos
);
1434 if(d
->fmt
==FMT_ARC
) {
1435 do_pk_comments(c
, d
);
1436 do_pak_trailer(c
, d
);
1439 do_sequence_of_members(c
, d
, pos
, members_endpos
, 0);
1441 num_extra_bytes
= c
->infile
->len
- d
->end_of_known_data
;
1442 if(num_extra_bytes
>0) {
1443 de_dbg(c
, "extra bytes at end of archive: %"I64_FMT
" (at %"I64_FMT
")",
1444 num_extra_bytes
, d
->end_of_known_data
);
1451 ////////// Special converter for PAK v1.6 SFX archives
1453 static void member_cb_for_pak16sfx(deark
*c
, lctx
*d
, struct member_parser_data
*mpd
)
1455 de_dbg2(c
, "pak16sfx member at %"I64_FMT
", type=0x%02x", mpd
->member_pos
,
1457 de_dbg_indent(c
, 1);
1459 // This is not perfect. We throw away the extended info, instead of converting
1460 // it to extended records.
1461 if(mpd
->magic
==0x1a) {
1462 if(!d
->pak16sfx_outf
) {
1463 d
->pak16sfx_outf
= dbuf_create_output_file(c
, "pak", NULL
, 0);
1465 dbuf_copy(c
->infile
, mpd
->member_pos
, mpd
->member_len
, d
->pak16sfx_outf
);
1467 de_dbg_indent(c
, -1);
1470 static void do_convert_pak16sfx(deark
*c
, lctx
*d
)
1472 d
->fmt
= FMT_PAK16SFX
;
1473 d
->recurse_subdirs
= 0;
1475 parse_member_sequence(c
, d
, 0, c
->infile
->len
, 0,member_cb_for_pak16sfx
);
1476 if(d
->pak16sfx_outf
) {
1477 dbuf_writeu16be(d
->pak16sfx_outf
, 0xfe00);
1481 /////////////////////// ARC (core ARC-only functions)
1483 static void de_run_arc(deark
*c
, de_module_params
*mparams
)
1488 d
= de_malloc(c
, sizeof(lctx
));
1491 // TODO: Make 'recurse' configurable. Would require us to make the embedded
1492 // archives end with the correct marker.
1493 d
->recurse_subdirs
= 1;
1494 d
->input_encoding_for_filenames
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1495 d
->input_encoding_for_comments
= DE_EXTENC_MAKE(d
->input_encoding_for_filenames
,
1496 DE_ENCSUBTYPE_HYBRID
);
1498 // TODO: It would probably be worth it to have a separate module for PAK, so we
1499 // can take the .PAK file extension into account when guessing what method #10
1500 // is. It's complicated, though, and not very useful until we support Crushed
1503 s
= de_get_ext_option(c
, "arc:method10");
1505 if(!de_strcmp(s
, "trimmed")) {
1508 else if(!de_strcmp(s
, "crushed")) {
1513 if(de_havemodcode(c
, mparams
, '6')) {
1514 do_convert_pak16sfx(c
, d
);
1518 do_run_arc_spark_internal(c
, d
);
1524 static int de_identify_arc(deark
*c
)
1526 static const char *exts
[] = {"arc", "ark", "pak", "spk", "sdn", "com"};
1530 int ends_with_trailer
= 0;
1531 int ends_with_comments
= 0;
1532 int starts_with_trailer
= 0;
1538 de_read(buf
, 0, sizeof(buf
));
1540 // Look for 0x1a in the first 4 bytes. Some .COM-style self-extracting
1541 // archives start with 1-3 bytes of code before the ARC marker.
1542 if(!find_arc_marker(c
, buf
, sizeof(buf
)-1, &arc_start
)) {
1546 cmpr_meth
= buf
[arc_start
+1];
1549 // Don't tolerate empty archives that don't start at the beginning of file.
1550 if(arc_start
!=0) return 0;
1552 starts_with_trailer
= 1;
1555 // Get info about file extension
1556 for(k
=0; k
<DE_ARRAYCOUNT(exts
); k
++) {
1557 if(de_input_file_has_ext(c
, exts
[k
])) {
1564 // Only tolerate leading junk for a few file extensions
1566 if(ext_idx
==0 || ext_idx
==1 || ext_idx
==5) { // .arc, .ark, .com
1574 // Look at some of the file, to see if it seems ok.
1575 if(has_ext
&& ext_idx
<=1 && arc_start
==0) {
1578 else if(has_ext
&& ext_idx
<=3 && arc_start
==0) {
1584 if(!is_valid_file_at(c
->infile
, arc_start
, c
->infile
->len
, strictness
)) {
1588 // Handle 2-byte files
1589 if(starts_with_trailer
&& c
->infile
->len
==2) {
1590 if(!has_ext
) return 0;
1591 if(ext_idx
==0) return 100;
1592 if(ext_idx
<=3) return 15;
1596 if(de_getu16be(c
->infile
->len
-2) == 0x1a00) {
1597 // Standard ARC trailer
1598 ends_with_trailer
= 1;
1601 if(de_getu32be(c
->infile
->len
-8) == 0x504baa55) {
1602 // PKARC trailer, for files with comments
1603 ends_with_comments
= 1;
1606 if(!ends_with_trailer
&& !ends_with_comments
) {
1607 // PAK-style extensions
1608 if(de_getu16be(c
->infile
->len
-2) == 0xfe00) {
1609 ends_with_comments
= 1;
1613 if(starts_with_trailer
) {
1614 if(ends_with_comments
) return 25;
1617 if(has_ext
&& (ends_with_trailer
|| ends_with_comments
)) return 90;
1618 if(ends_with_trailer
|| ends_with_comments
) return 35;
1619 if(has_ext
) return 24;
1620 if(arc_start
==0) return 19;
1624 static void de_help_arc(deark
*c
)
1626 de_msg(c
, "-opt arc:method10=<trimmed|crushed|auto> : How to interpret compression "
1630 void de_module_arc(deark
*c
, struct deark_module_info
*mi
)
1633 mi
->desc
= "ARC compressed archive";
1634 mi
->run_fn
= de_run_arc
;
1635 mi
->identify_fn
= de_identify_arc
;
1636 mi
->help_fn
= de_help_arc
;
1639 /////////////////////// Spark
1641 static void de_run_spark(deark
*c
, de_module_params
*mparams
)
1645 d
= de_malloc(c
, sizeof(lctx
));
1647 d
->fmtname
= "Spark";
1648 d
->input_encoding_for_filenames
= de_get_input_encoding(c
, NULL
, DE_ENCODING_RISCOS
);
1649 d
->input_encoding_for_comments
= DE_EXTENC_MAKE(d
->input_encoding_for_filenames
,
1650 DE_ENCSUBTYPE_HYBRID
);
1651 d
->recurse_subdirs
= de_get_ext_option_bool(c
, "spark:recurse", 1);
1653 do_run_arc_spark_internal(c
, d
);
1657 static int de_identify_spark(deark
*c
)
1661 int ldaddrcheck
= 0;
1662 int has_trailer
= 0;
1664 if(de_getbyte(0) != 0x1a) return 0;
1665 b
= de_getbyte(1); // compression method
1666 if(b
==0x82 || b
==0x83 || b
==0x88 || b
==0x89 || b
==0xff) {
1669 else if(b
==0x81 || b
==0x84 || b
==0x85 || b
==0x86) {
1670 ; // TODO: Verify that these are possible in Spark.
1676 load_addr
= (u32
)de_getu32le(29);
1677 if((load_addr
& 0xfff00000) == 0xfff00000) {
1681 if(de_getu16be(c
->infile
->len
-2) == 0x1a80) {
1685 if(has_trailer
&& ldaddrcheck
) return 85;
1686 if(ldaddrcheck
) return 30;
1687 if(has_trailer
) return 10;
1691 static void de_help_spark(deark
*c
)
1693 de_msg(c
, "-opt spark:recurse=0 : Extract subdirs as Spark files");
1696 void de_module_spark(deark
*c
, struct deark_module_info
*mi
)
1699 mi
->desc
= "Spark archive";
1700 mi
->run_fn
= de_run_spark
;
1701 mi
->identify_fn
= de_identify_spark
;
1702 mi
->help_fn
= de_help_spark
;
1705 /////////////////////// ArcMac
1707 static void de_run_arcmac(deark
*c
, de_module_params
*mparams
)
1711 d
= de_malloc(c
, sizeof(lctx
));
1712 d
->fmt
= FMT_ARCMAC
;
1713 d
->fmtname
= "ArcMac";
1714 d
->recurse_subdirs
= 1;
1715 d
->input_encoding_for_arcmac_fn
= de_get_input_encoding(c
, NULL
, DE_ENCODING_MACROMAN
);
1716 d
->input_encoding_for_filenames
= DE_ENCODING_CP437
;
1717 d
->input_encoding_for_comments
= DE_EXTENC_MAKE(d
->input_encoding_for_filenames
,
1718 DE_ENCSUBTYPE_HYBRID
);
1720 do_run_arc_spark_internal(c
, d
);
1724 static int de_identify_arcmac(deark
*c
)
1729 de_read(buf1
, 0, 2);
1730 if(buf1
[0]!=0x1b) return 0;
1731 if(!(buf1
[1]>=1 && buf1
[1]<=9)) return 0;
1732 de_read(buf2
, 59, 2);
1733 if(buf2
[0]!=0x1a) return 0;
1734 if(buf2
[1]!=buf1
[1]) return 0;
1738 void de_module_arcmac(deark
*c
, struct deark_module_info
*mi
)
1741 mi
->desc
= "ArcMac compressed archive";
1742 mi
->run_fn
= de_run_arcmac
;
1743 mi
->identify_fn
= de_identify_arcmac
;