exe: Support PAK v1.6 self-extracting archives
[deark.git] / modules / arcfs.c
blob1788f96f15dee6c178f620e3ec5076f7c90bb8eb
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // ArcFS
6 // Squash
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_arcfs);
12 DE_DECLARE_MODULE(de_module_squash);
14 #define MAX_NESTING_LEVEL 32
16 struct arcfs_member_data {
17 struct de_riscos_file_attrs rfa;
18 int is_dir;
19 int is_regular_file;
20 u8 cmpr_method;
21 i64 file_data_offs_rel;
22 i64 file_data_offs_abs;
23 i64 orig_len;
24 i64 cmpr_len;
25 const char *cmpr_meth_name;
26 de_ucstring *fn;
29 typedef struct localctx_struct {
30 int subdir_level;
31 i64 nmembers;
32 i64 data_offs;
33 struct de_crcobj *crco;
34 struct de_strarray *curpath;
35 } lctx;
37 static int do_arcfs_file_header(deark *c, lctx *d, i64 pos1)
39 i64 pos = pos1;
40 i64 hlen;
41 u32 ver_r, ver_rw;
42 u32 format_ver;
43 int retval = 0;
45 de_dbg(c, "file header at %d", (int)pos1);
46 de_dbg_indent(c, 1);
47 pos += 8; // Signature
49 hlen = de_getu32le_p(&pos);
50 d->nmembers = hlen/36;
51 de_dbg(c, "header len: %d (%d members)", (int)hlen, (int)d->nmembers);
53 d->data_offs = de_getu32le_p(&pos);
54 de_dbg(c, "data offset: %d", (int)d->data_offs);
56 ver_r = (u32)de_getu32le_p(&pos);
57 de_dbg(c, "version req'd for read: %u.%02u", (unsigned int)(ver_r/100),
58 (unsigned int)(ver_r%100));
59 ver_rw = (u32)de_getu32le_p(&pos);
60 de_dbg(c, "version req'd for read/write: %u.%02u", (unsigned int)(ver_rw/100),
61 (unsigned int)(ver_rw%100));
63 // ??
64 format_ver = (u32)de_getu32le_p(&pos);
65 de_dbg(c, "format version: %u", (unsigned int)format_ver);
66 if(format_ver!=0) {
67 de_err(c, "Unsupported format version: %u", (unsigned int)format_ver);
68 goto done;
71 // 68 reserved bytes here
73 retval = 1;
74 done:
75 de_dbg_indent(c, -1);
76 return retval;
79 static void do_arcfs_compressed(deark *c, lctx *d, struct arcfs_member_data *md,
80 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
81 struct de_dfilter_results *dres)
83 struct de_lzw_params delzwp;
85 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
86 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
87 delzwp.max_code_size = md->rfa.lzwmaxbits;
88 if(!dcmpro->len_known) {
89 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
91 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
94 static void do_arcfs_crunched(deark *c, lctx *d, struct arcfs_member_data *md,
95 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
96 struct de_dfilter_results *dres)
98 struct de_dcmpr_two_layer_params tlp;
99 struct de_lzw_params delzwp;
101 // "Crunched" means "packed", then "compressed".
102 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
104 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
105 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
106 delzwp.max_code_size = md->rfa.lzwmaxbits;
108 // This flag tells the LZW decompressor to stop, instead of reporting failure,
109 // if bad LZW compressed data is encountered.
110 // The problem is that some ArcFS files have garbage at the end of the
111 // compressed data.
112 // Apparently, we're expected to have a single decompression algorithm that
113 // handles both layers of compression simultaneously, without any buffering
114 // between them. That way, we could stop immediately when we've decompressed
115 // a sufficient number of bytes, and never encounter the garbage. But we
116 // don't have that.
117 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
119 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
120 tlp.codec1_pushable = dfilter_lzw_codec;
121 tlp.codec1_private_params = (void*)&delzwp;
123 tlp.codec2 = dfilter_rle90_codec;
125 tlp.dcmpri = dcmpri;
126 tlp.dcmpro = dcmpro;
127 tlp.dres = dres;
129 de_dfilter_decompress_two_layer(c, &tlp);
132 static void do_arcfs_extract_member_file(deark *c, lctx *d, struct arcfs_member_data *md,
133 de_finfo *fi)
135 dbuf *outf = NULL;
136 u32 crc_calc;
137 de_ucstring *fullfn = NULL;
138 struct de_dfilter_in_params dcmpri;
139 struct de_dfilter_out_params dcmpro;
140 struct de_dfilter_results dres;
141 int have_dres = 0;
143 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
144 if(md->file_data_offs_abs + md->cmpr_len > c->infile->len) goto done;
146 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT,
147 md->file_data_offs_abs, md->cmpr_len);
149 fullfn = ucstring_create(c);
150 de_strarray_make_path(d->curpath, fullfn, 0);
151 ucstring_append_ucstring(fullfn, md->fn);
152 fmtutil_riscos_append_type_to_filename(c, fi, fullfn, &md->rfa, md->is_dir, 0);
154 if(md->cmpr_method!=0x82 && md->cmpr_method!=0x83 && md->cmpr_method!=0x88 &&
155 md->cmpr_method!=0xff)
157 de_err(c, "Compression type 0x%02x (%s) is not supported.",
158 (unsigned int)md->cmpr_method, md->cmpr_meth_name);
159 goto done;
162 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
164 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
165 dbuf_enable_wbuffer(outf);
167 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)d->crco);
168 de_crcobj_reset(d->crco);
170 dcmpri.f = c->infile;
171 dcmpri.pos = md->file_data_offs_abs;
172 dcmpri.len = md->cmpr_len;
173 dcmpro.f = outf;
174 dcmpro.len_known = 1;
175 dcmpro.expected_len = md->orig_len;
177 if(md->cmpr_method==0x82) { // stored
178 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
180 else if(md->cmpr_method==0x83) {
181 fmtutil_decompress_rle90_ex(c, &dcmpri, &dcmpro, &dres, 0);
182 have_dres = 1;
184 else if(md->cmpr_method==0xff) {
185 do_arcfs_compressed(c, d, md, &dcmpri, &dcmpro, &dres);
186 have_dres = 1;
188 else if(md->cmpr_method==0x88) {
189 do_arcfs_crunched(c, d, md, &dcmpri, &dcmpro, &dres);
190 have_dres = 1;
192 dbuf_flush(dcmpro.f);
194 if(have_dres && dres.errcode!=0) {
195 de_err(c, "%s: Decompression failed: %s",
196 ucstring_getpsz_d(md->fn), de_dfilter_get_errmsg(c, &dres));
197 goto done;
200 if(outf->len != md->orig_len) {
201 de_err(c, "%s: Decompression failed: Expected size %"I64_FMT
202 ", got %"I64_FMT, ucstring_getpsz_d(md->fn), md->orig_len, outf->len);
203 goto done;
206 crc_calc = de_crcobj_getval(d->crco);
207 de_dbg(c, "crc (calculated): 0x%04x", (unsigned int)crc_calc);
208 if(crc_calc != md->rfa.crc_from_attribs) {
209 if(md->rfa.crc_from_attribs==0) {
210 de_warn(c, "CRC check not available for file %s", ucstring_getpsz_d(md->fn));
212 else {
213 de_err(c, "CRC check failed for file %s", ucstring_getpsz_d(md->fn));
217 done:
218 dbuf_close(outf);
219 ucstring_destroy(fullfn);
222 // "Extract" a directory entry
223 static void do_arcfs_extract_member_dir(deark *c, lctx *d, struct arcfs_member_data *md,
224 de_finfo *fi)
226 dbuf *outf = NULL;
227 de_ucstring *fullfn = NULL;
229 fullfn = ucstring_create(c);
230 // Note that md->fn has already been added to d->curpath
231 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
233 fi->is_directory = 1;
234 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
236 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
237 dbuf_close(outf);
238 ucstring_destroy(fullfn);
241 static void do_arcfs_extract_member(deark *c, lctx *d, struct arcfs_member_data *md)
243 de_finfo *fi = NULL;
245 fi = de_finfo_create(c);
246 fi->original_filename_flag = 1;
247 if(md->rfa.mod_time.is_valid) {
248 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->rfa.mod_time;
251 fi->has_riscos_data = 1;
252 fi->riscos_attribs = md->rfa.attribs;
253 fi->load_addr = md->rfa.load_addr;
254 fi->exec_addr = md->rfa.exec_addr;
256 if(md->is_regular_file) {
257 do_arcfs_extract_member_file(c, d, md, fi);
259 else if(md->is_dir) {
260 do_arcfs_extract_member_dir(c, d, md, fi);
263 de_finfo_destroy(c, fi);
266 static const char *get_info_byte_name(u8 t)
268 const char *name = NULL;
269 switch(t) {
270 case 0x00: name="end of dir marker"; break;
271 case 0x01: name="deleted object"; break;
272 case 0x82: name="stored"; break;
273 case 0x83: name="packed (RLE)"; break;
274 case 0x88: name="crunched"; break;
275 case 0x89: name="squashed"; break;
276 case 0xff: name="compressed"; break;
278 return name?name:"?";
281 static void destroy_arcfs_member_data(deark *c, struct arcfs_member_data *md)
283 if(!md) return;
284 ucstring_destroy(md->fn);
285 de_free(c, md);
288 // Returns 0 only if we should stop parsing the entire arcfs file.
289 static int do_arcfs_member(deark *c, lctx *d, i64 idx, i64 pos1)
291 i64 pos = pos1;
292 u32 info_word;
293 u8 info_byte;
294 unsigned int tmpflags;
295 int saved_indent_level;
296 struct arcfs_member_data *md;
297 int retval = 0;
299 de_dbg_indent_save(c, &saved_indent_level);
300 md = de_malloc(c, sizeof(struct arcfs_member_data));
301 de_dbg(c, "header at %"I64_FMT, pos1);
302 de_dbg_indent(c, 1);
304 retval = 1;
305 info_byte = de_getbyte_p(&pos);
306 md->cmpr_meth_name = get_info_byte_name(info_byte);
307 de_dbg(c, "info byte: 0x%02x (%s)", (unsigned int)info_byte, md->cmpr_meth_name);
308 if(info_byte==1) goto done; // deleted object
309 if(info_byte==0) { // end of directory marker
310 if(d->subdir_level>0) d->subdir_level--;
311 de_strarray_pop(d->curpath);
312 goto done;
314 md->cmpr_method = info_byte;
316 // Look ahead at the "information word".
317 // TODO: Is this the right way to check for a directory?
318 info_word = (u32)de_getu32le(pos1+32);
319 md->is_dir = (info_word&0x80000000U)?1:0;
320 md->is_regular_file = !md->is_dir;
322 md->fn = ucstring_create(c);
323 dbuf_read_to_ucstring(c->infile, pos, 11, md->fn, DE_CONVFLAG_STOP_AT_NUL,
324 DE_ENCODING_RISCOS);
325 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
326 if(md->is_dir) {
327 if(d->subdir_level >= MAX_NESTING_LEVEL) {
328 de_err(c, "Directories nested too deeply");
329 retval = 0;
330 goto done;
332 d->subdir_level++;
333 de_strarray_push(d->curpath, md->fn);
335 pos += 11;
337 md->orig_len = de_getu32le_p(&pos);
338 if(md->is_regular_file) {
339 de_dbg(c, "orig file length: %"I64_FMT, md->orig_len);
342 fmtutil_riscos_read_load_exec(c, c->infile, &md->rfa, pos);
343 pos += 8;
345 tmpflags = 0;
346 if(md->is_regular_file)
347 tmpflags |= DE_RISCOS_FLAG_HAS_CRC;
348 if(md->cmpr_method==0xff || md->cmpr_method==0x88)
349 tmpflags |= DE_RISCOS_FLAG_HAS_LZWMAXBITS;
350 fmtutil_riscos_read_attribs_field(c, c->infile, &md->rfa, pos, tmpflags);
351 pos += 4;
353 md->cmpr_len = de_getu32le_p(&pos);
354 if(md->is_regular_file) {
355 de_dbg(c, "compressed length: %"I64_FMT, md->cmpr_len);
358 de_dbg(c, "info word: 0x%08x", (unsigned int)info_word);
359 de_dbg_indent(c, 1);
360 de_dbg(c, "is directory: %d", md->is_dir);
361 if(md->is_regular_file) {
362 md->file_data_offs_rel = (i64)info_word;
363 md->file_data_offs_abs = d->data_offs+md->file_data_offs_rel;
364 de_dbg(c, "file data offset: (%"I64_FMT"+)%"I64_FMT,
365 d->data_offs, md->file_data_offs_rel);
367 de_dbg_indent(c, -1);
369 de_dbg_indent(c, -1);
371 do_arcfs_extract_member(c, d, md);
373 done:
374 destroy_arcfs_member_data(c, md);
375 de_dbg_indent_restore(c, saved_indent_level);
376 return retval;
379 static void do_arcfs_members(deark *c, lctx *d, i64 pos1)
381 i64 k;
382 i64 pos = pos1;
384 for(k=0; k<d->nmembers; k++) {
385 int ret;
387 if(pos>=c->infile->len) break;
388 de_dbg(c, "member[%d]", (int)k);
389 de_dbg_indent(c, 1);
390 ret = do_arcfs_member(c, d, k, pos);
391 de_dbg_indent(c, -1);
392 if(!ret) break;
393 pos += 36;
397 static void de_run_arcfs(deark *c, de_module_params *mparams)
399 lctx *d = NULL;
400 i64 pos;
402 d = de_malloc(c, sizeof(lctx));
403 pos = 0;
404 if(!do_arcfs_file_header(c, d, pos)) goto done;
405 pos += 96;
407 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
408 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
409 do_arcfs_members(c, d, pos);
411 done:
412 if(d) {
413 de_crcobj_destroy(d->crco);
414 de_strarray_destroy(d->curpath);
415 de_free(c, d);
419 static int de_identify_arcfs(deark *c)
421 if(!dbuf_memcmp(c->infile, 0, "Archive\x00", 8))
422 return 100;
423 return 0;
426 void de_module_arcfs(deark *c, struct deark_module_info *mi)
428 mi->id = "arcfs";
429 mi->desc = "ArcFS (RISC OS archive)";
430 mi->run_fn = de_run_arcfs;
431 mi->identify_fn = de_identify_arcfs;
434 ///////////////////////////////////////////////////////////////////////////
435 // Squash
437 typedef struct sqctx_struct {
438 i64 orig_len;
439 struct de_riscos_file_attrs rfa;
440 } sqctx;
442 static void do_squash_header(deark *c, sqctx *d, i64 pos1)
444 i64 pos = pos1;
446 de_dbg(c, "header at %d", (int)pos1);
448 de_dbg_indent(c, 1);
449 pos += 4; // signature
450 d->orig_len = de_getu32le_p(&pos);
451 de_dbg(c, "orig file length: %"I64_FMT, d->orig_len);
453 fmtutil_riscos_read_load_exec(c, c->infile, &d->rfa, pos);
454 pos += 8;
455 de_dbg_indent(c, -1);
458 static void do_squash_main(deark *c, sqctx *d)
460 dbuf *outf = NULL;
461 de_finfo *fi = NULL;
462 de_ucstring *fn = NULL;
463 struct de_dfilter_results dres;
464 struct de_dfilter_in_params dcmpri;
465 struct de_dfilter_out_params dcmpro;
466 struct de_lzw_params delzwp;
467 int saved_indent_level;
469 de_dbg_indent_save(c, &saved_indent_level);
470 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
472 dcmpri.f = c->infile;
473 dcmpri.pos = 20;
474 dcmpri.len = c->infile->len - dcmpri.pos;
475 de_dbg(c, "compressed data at %"I64_FMT, dcmpri.pos);
476 de_dbg_indent(c, 1);
478 fi = de_finfo_create(c);
480 fn = ucstring_create(c);
482 fi->has_riscos_data = 1;
483 fi->riscos_attribs = d->rfa.attribs;
484 fi->load_addr = d->rfa.load_addr;
485 fi->exec_addr = d->rfa.exec_addr;
487 ucstring_append_sz(fn, "bin", DE_ENCODING_LATIN1);
488 fmtutil_riscos_append_type_to_filename(c, fi, fn, &d->rfa, 0, 1);
489 de_finfo_set_name_from_ucstring(c, fi, fn, 0);
491 if(d->rfa.mod_time.is_valid) {
492 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = d->rfa.mod_time;
495 outf = dbuf_create_output_file(c, NULL, fi, 0);
496 dbuf_enable_wbuffer(outf);
497 dcmpro.f = outf;
498 dcmpro.len_known = 0;
500 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
501 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
502 delzwp.flags |= DE_LZWFLAG_HAS3BYTEHEADER;
504 fmtutil_decompress_lzw(c, &dcmpri, &dcmpro, &dres, &delzwp);
505 dbuf_flush(dcmpro.f);
507 if(dres.errcode) {
508 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
509 goto done;
512 if(outf->len != d->orig_len) {
513 de_err(c, "Decompression failed, expected size %"I64_FMT
514 ", got %"I64_FMT, d->orig_len, outf->len);
515 goto done;
518 done:
519 dbuf_close(outf);
520 de_finfo_destroy(c, fi);
521 ucstring_destroy(fn);
522 de_dbg_indent_restore(c, saved_indent_level);
525 static void de_run_squash(deark *c, de_module_params *mparams)
527 sqctx *d = NULL;
529 d = de_malloc(c, sizeof(sqctx));
531 do_squash_header(c, d, 0);
532 do_squash_main(c, d);
534 de_free(c, d);
537 static int de_identify_squash(deark *c)
539 if(!dbuf_memcmp(c->infile, 0, "SQSH", 4))
540 return 100;
541 return 0;
544 void de_module_squash(deark *c, struct deark_module_info *mi)
546 mi->id = "squash";
547 mi->desc = "Squash (RISC OS compressed file)";
548 mi->run_fn = de_run_squash;
549 mi->identify_fn = de_identify_squash;