riff: Basic support for extracting RDIB images
[deark.git] / modules / arcfs.c
blobfef935a4465de5374357f62454b2a7b94d671789
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // ArcFS
6 // Squash
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_arcfs);
12 DE_DECLARE_MODULE(de_module_squash);
14 #define MAX_NESTING_LEVEL 32
16 struct arcfs_member_data {
17 struct de_riscos_file_attrs rfa;
18 int is_dir;
19 int is_regular_file;
20 u8 cmpr_method;
21 i64 file_data_offs_rel;
22 i64 file_data_offs_abs;
23 i64 orig_len;
24 i64 cmpr_len;
25 const char *cmpr_meth_name;
26 de_ucstring *fn;
29 typedef struct localctx_struct {
30 int subdir_level;
31 i64 nmembers;
32 i64 data_offs;
33 struct de_crcobj *crco;
34 struct de_strarray *curpath;
35 } lctx;
37 static int do_arcfs_file_header(deark *c, lctx *d, i64 pos1)
39 i64 pos = pos1;
40 i64 hlen;
41 u32 ver_r, ver_rw;
42 u32 format_ver;
43 int retval = 0;
45 de_dbg(c, "file header at %d", (int)pos1);
46 de_dbg_indent(c, 1);
47 pos += 8; // Signature
49 hlen = de_getu32le_p(&pos);
50 d->nmembers = hlen/36;
51 de_dbg(c, "header len: %d (%d members)", (int)hlen, (int)d->nmembers);
53 d->data_offs = de_getu32le_p(&pos);
54 de_dbg(c, "data offset: %d", (int)d->data_offs);
56 ver_r = (u32)de_getu32le_p(&pos);
57 de_dbg(c, "version req'd for read: %u.%02u", (unsigned int)(ver_r/100),
58 (unsigned int)(ver_r%100));
59 ver_rw = (u32)de_getu32le_p(&pos);
60 de_dbg(c, "version req'd for read/write: %u.%02u", (unsigned int)(ver_rw/100),
61 (unsigned int)(ver_rw%100));
63 // ??
64 format_ver = (u32)de_getu32le_p(&pos);
65 de_dbg(c, "format version: %u", (unsigned int)format_ver);
66 if(format_ver!=0) {
67 de_err(c, "Unsupported format version: %u", (unsigned int)format_ver);
68 goto done;
71 // 68 reserved bytes here
73 retval = 1;
74 done:
75 de_dbg_indent(c, -1);
76 return retval;
79 static void do_arcfs_compressed(deark *c, lctx *d, struct arcfs_member_data *md,
80 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
81 struct de_dfilter_results *dres)
83 struct de_lzw_params delzwp;
85 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
86 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
87 delzwp.max_code_size = md->rfa.lzwmaxbits;
88 if(!dcmpro->len_known) {
89 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
91 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
94 static void do_arcfs_crunched(deark *c, lctx *d, struct arcfs_member_data *md,
95 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
96 struct de_dfilter_results *dres)
98 struct de_dcmpr_two_layer_params tlp;
99 struct de_lzw_params delzwp;
101 // "Crunched" means "packed", then "compressed".
102 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
104 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
105 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
106 delzwp.max_code_size = md->rfa.lzwmaxbits;
108 // This flag tells the LZW decompressor to stop, instead of reporting failure,
109 // if bad LZW compressed data is encountered.
110 // The problem is that some ArcFS files have garbage at the end of the
111 // compressed data.
112 // Apparently, we're expected to have a single decompression algorithm that
113 // handles both layers of compression simultaneously, without any buffering
114 // between them. That way, we could stop immediately when we've decompressed
115 // a sufficient number of bytes, and never encounter the garbage. But we
116 // don't have that.
117 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
119 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
120 tlp.codec1_pushable = dfilter_lzw_codec;
121 tlp.codec1_private_params = (void*)&delzwp;
123 tlp.codec2 = dfilter_rle90_codec;
125 tlp.dcmpri = dcmpri;
126 tlp.dcmpro = dcmpro;
127 tlp.dres = dres;
129 de_dfilter_decompress_two_layer(c, &tlp);
132 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
134 struct de_crcobj *crco = (struct de_crcobj*)userdata;
135 de_crcobj_addbuf(crco, buf, buf_len);
138 static void do_arcfs_extract_member_file(deark *c, lctx *d, struct arcfs_member_data *md,
139 de_finfo *fi)
141 dbuf *outf = NULL;
142 u32 crc_calc;
143 de_ucstring *fullfn = NULL;
144 struct de_dfilter_in_params dcmpri;
145 struct de_dfilter_out_params dcmpro;
146 struct de_dfilter_results dres;
147 int have_dres = 0;
149 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
150 if(md->file_data_offs_abs + md->cmpr_len > c->infile->len) goto done;
152 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT,
153 md->file_data_offs_abs, md->cmpr_len);
155 fullfn = ucstring_create(c);
156 de_strarray_make_path(d->curpath, fullfn, 0);
157 ucstring_append_ucstring(fullfn, md->fn);
158 fmtutil_riscos_append_type_to_filename(c, fi, fullfn, &md->rfa, md->is_dir, 0);
160 if(md->cmpr_method!=0x82 && md->cmpr_method!=0x83 && md->cmpr_method!=0x88 &&
161 md->cmpr_method!=0xff)
163 de_err(c, "Compression type 0x%02x (%s) is not supported.",
164 (unsigned int)md->cmpr_method, md->cmpr_meth_name);
165 goto done;
168 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
170 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
172 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
173 de_crcobj_reset(d->crco);
175 dcmpri.f = c->infile;
176 dcmpri.pos = md->file_data_offs_abs;
177 dcmpri.len = md->cmpr_len;
178 dcmpro.f = outf;
179 dcmpro.len_known = 1;
180 dcmpro.expected_len = md->orig_len;
182 if(md->cmpr_method==0x82) { // stored
183 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
185 else if(md->cmpr_method==0x83) {
186 fmtutil_decompress_rle90_ex(c, &dcmpri, &dcmpro, &dres, 0);
187 have_dres = 1;
189 else if(md->cmpr_method==0xff) {
190 do_arcfs_compressed(c, d, md, &dcmpri, &dcmpro, &dres);
191 have_dres = 1;
193 else if(md->cmpr_method==0x88) {
194 do_arcfs_crunched(c, d, md, &dcmpri, &dcmpro, &dres);
195 have_dres = 1;
198 if(have_dres && dres.errcode!=0) {
199 de_err(c, "%s: Decompression failed: %s",
200 ucstring_getpsz_d(md->fn), de_dfilter_get_errmsg(c, &dres));
201 goto done;
204 if(outf->len != md->orig_len) {
205 de_err(c, "%s: Decompression failed: Expected size %"I64_FMT
206 ", got %"I64_FMT, ucstring_getpsz_d(md->fn), md->orig_len, outf->len);
207 goto done;
210 crc_calc = de_crcobj_getval(d->crco);
211 de_dbg(c, "crc (calculated): 0x%04x", (unsigned int)crc_calc);
212 if(crc_calc != md->rfa.crc_from_attribs) {
213 if(md->rfa.crc_from_attribs==0) {
214 de_warn(c, "CRC check not available for file %s", ucstring_getpsz_d(md->fn));
216 else {
217 de_err(c, "CRC check failed for file %s", ucstring_getpsz_d(md->fn));
221 done:
222 dbuf_close(outf);
223 ucstring_destroy(fullfn);
226 // "Extract" a directory entry
227 static void do_arcfs_extract_member_dir(deark *c, lctx *d, struct arcfs_member_data *md,
228 de_finfo *fi)
230 dbuf *outf = NULL;
231 de_ucstring *fullfn = NULL;
233 fullfn = ucstring_create(c);
234 // Note that md->fn has already been added to d->curpath
235 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
237 fi->is_directory = 1;
238 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
240 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
241 dbuf_close(outf);
242 ucstring_destroy(fullfn);
245 static void do_arcfs_extract_member(deark *c, lctx *d, struct arcfs_member_data *md)
247 de_finfo *fi = NULL;
249 fi = de_finfo_create(c);
250 fi->original_filename_flag = 1;
251 if(md->rfa.mod_time.is_valid) {
252 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->rfa.mod_time;
255 fi->has_riscos_data = 1;
256 fi->riscos_attribs = md->rfa.attribs;
257 fi->load_addr = md->rfa.load_addr;
258 fi->exec_addr = md->rfa.exec_addr;
260 if(md->is_regular_file) {
261 do_arcfs_extract_member_file(c, d, md, fi);
263 else if(md->is_dir) {
264 do_arcfs_extract_member_dir(c, d, md, fi);
267 de_finfo_destroy(c, fi);
270 static const char *get_info_byte_name(u8 t)
272 const char *name = NULL;
273 switch(t) {
274 case 0x00: name="end of dir marker"; break;
275 case 0x01: name="deleted object"; break;
276 case 0x82: name="stored"; break;
277 case 0x83: name="packed (RLE)"; break;
278 case 0x88: name="crunched"; break;
279 case 0x89: name="squashed"; break;
280 case 0xff: name="compressed"; break;
282 return name?name:"?";
285 static void destroy_arcfs_member_data(deark *c, struct arcfs_member_data *md)
287 if(!md) return;
288 ucstring_destroy(md->fn);
289 de_free(c, md);
292 // Returns 0 only if we should stop parsing the entire arcfs file.
293 static int do_arcfs_member(deark *c, lctx *d, i64 idx, i64 pos1)
295 i64 pos = pos1;
296 u32 info_word;
297 u8 info_byte;
298 unsigned int tmpflags;
299 int saved_indent_level;
300 struct arcfs_member_data *md;
301 int retval = 0;
303 de_dbg_indent_save(c, &saved_indent_level);
304 md = de_malloc(c, sizeof(struct arcfs_member_data));
305 de_dbg(c, "header at %"I64_FMT, pos1);
306 de_dbg_indent(c, 1);
308 retval = 1;
309 info_byte = de_getbyte_p(&pos);
310 md->cmpr_meth_name = get_info_byte_name(info_byte);
311 de_dbg(c, "info byte: 0x%02x (%s)", (unsigned int)info_byte, md->cmpr_meth_name);
312 if(info_byte==1) goto done; // deleted object
313 if(info_byte==0) { // end of directory marker
314 if(d->subdir_level>0) d->subdir_level--;
315 de_strarray_pop(d->curpath);
316 goto done;
318 md->cmpr_method = info_byte;
320 // Look ahead at the "information word".
321 // TODO: Is this the right way to check for a directory?
322 info_word = (u32)de_getu32le(pos1+32);
323 md->is_dir = (info_word&0x80000000U)?1:0;
324 md->is_regular_file = !md->is_dir;
326 md->fn = ucstring_create(c);
327 dbuf_read_to_ucstring(c->infile, pos, 11, md->fn, DE_CONVFLAG_STOP_AT_NUL,
328 DE_ENCODING_RISCOS);
329 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
330 if(md->is_dir) {
331 if(d->subdir_level >= MAX_NESTING_LEVEL) {
332 de_err(c, "Directories nested too deeply");
333 retval = 0;
334 goto done;
336 d->subdir_level++;
337 de_strarray_push(d->curpath, md->fn);
339 pos += 11;
341 md->orig_len = de_getu32le_p(&pos);
342 if(md->is_regular_file) {
343 de_dbg(c, "orig file length: %"I64_FMT, md->orig_len);
346 fmtutil_riscos_read_load_exec(c, c->infile, &md->rfa, pos);
347 pos += 8;
349 tmpflags = 0;
350 if(md->is_regular_file)
351 tmpflags |= DE_RISCOS_FLAG_HAS_CRC;
352 if(md->cmpr_method==0xff || md->cmpr_method==0x88)
353 tmpflags |= DE_RISCOS_FLAG_HAS_LZWMAXBITS;
354 fmtutil_riscos_read_attribs_field(c, c->infile, &md->rfa, pos, tmpflags);
355 pos += 4;
357 md->cmpr_len = de_getu32le_p(&pos);
358 if(md->is_regular_file) {
359 de_dbg(c, "compressed length: %"I64_FMT, md->cmpr_len);
362 de_dbg(c, "info word: 0x%08x", (unsigned int)info_word);
363 de_dbg_indent(c, 1);
364 de_dbg(c, "is directory: %d", md->is_dir);
365 if(md->is_regular_file) {
366 md->file_data_offs_rel = (i64)info_word;
367 md->file_data_offs_abs = d->data_offs+md->file_data_offs_rel;
368 de_dbg(c, "file data offset: (%"I64_FMT"+)%"I64_FMT,
369 d->data_offs, md->file_data_offs_rel);
371 de_dbg_indent(c, -1);
373 de_dbg_indent(c, -1);
375 do_arcfs_extract_member(c, d, md);
377 done:
378 destroy_arcfs_member_data(c, md);
379 de_dbg_indent_restore(c, saved_indent_level);
380 return retval;
383 static void do_arcfs_members(deark *c, lctx *d, i64 pos1)
385 i64 k;
386 i64 pos = pos1;
388 for(k=0; k<d->nmembers; k++) {
389 int ret;
391 if(pos>=c->infile->len) break;
392 de_dbg(c, "member[%d]", (int)k);
393 de_dbg_indent(c, 1);
394 ret = do_arcfs_member(c, d, k, pos);
395 de_dbg_indent(c, -1);
396 if(!ret) break;
397 pos += 36;
401 static void de_run_arcfs(deark *c, de_module_params *mparams)
403 lctx *d = NULL;
404 i64 pos;
406 d = de_malloc(c, sizeof(lctx));
407 pos = 0;
408 if(!do_arcfs_file_header(c, d, pos)) goto done;
409 pos += 96;
411 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
412 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
413 do_arcfs_members(c, d, pos);
415 done:
416 if(d) {
417 de_crcobj_destroy(d->crco);
418 de_strarray_destroy(d->curpath);
419 de_free(c, d);
423 static int de_identify_arcfs(deark *c)
425 if(!dbuf_memcmp(c->infile, 0, "Archive\x00", 8))
426 return 100;
427 return 0;
430 void de_module_arcfs(deark *c, struct deark_module_info *mi)
432 mi->id = "arcfs";
433 mi->desc = "ArcFS (RISC OS archive)";
434 mi->run_fn = de_run_arcfs;
435 mi->identify_fn = de_identify_arcfs;
438 ///////////////////////////////////////////////////////////////////////////
439 // Squash
441 typedef struct sqctx_struct {
442 i64 orig_len;
443 struct de_riscos_file_attrs rfa;
444 } sqctx;
446 static void do_squash_header(deark *c, sqctx *d, i64 pos1)
448 i64 pos = pos1;
450 de_dbg(c, "header at %d", (int)pos1);
452 de_dbg_indent(c, 1);
453 pos += 4; // signature
454 d->orig_len = de_getu32le_p(&pos);
455 de_dbg(c, "orig file length: %"I64_FMT, d->orig_len);
457 fmtutil_riscos_read_load_exec(c, c->infile, &d->rfa, pos);
458 pos += 8;
459 de_dbg_indent(c, -1);
462 static void do_squash_main(deark *c, sqctx *d)
464 dbuf *outf = NULL;
465 de_finfo *fi = NULL;
466 de_ucstring *fn = NULL;
467 struct de_dfilter_results dres;
468 struct de_dfilter_in_params dcmpri;
469 struct de_dfilter_out_params dcmpro;
470 struct de_lzw_params delzwp;
471 int saved_indent_level;
473 de_dbg_indent_save(c, &saved_indent_level);
474 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
476 dcmpri.f = c->infile;
477 dcmpri.pos = 20;
478 dcmpri.len = c->infile->len - dcmpri.pos;
479 de_dbg(c, "compressed data at %"I64_FMT, dcmpri.pos);
480 de_dbg_indent(c, 1);
482 fi = de_finfo_create(c);
484 fn = ucstring_create(c);
486 fi->has_riscos_data = 1;
487 fi->riscos_attribs = d->rfa.attribs;
488 fi->load_addr = d->rfa.load_addr;
489 fi->exec_addr = d->rfa.exec_addr;
491 ucstring_append_sz(fn, "bin", DE_ENCODING_LATIN1);
492 fmtutil_riscos_append_type_to_filename(c, fi, fn, &d->rfa, 0, 1);
493 de_finfo_set_name_from_ucstring(c, fi, fn, 0);
495 if(d->rfa.mod_time.is_valid) {
496 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = d->rfa.mod_time;
499 outf = dbuf_create_output_file(c, NULL, fi, 0);
500 dcmpro.f = outf;
501 dcmpro.len_known = 0;
503 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
504 delzwp.fmt = DE_LZWFMT_UNIXCOMPRESS;
505 delzwp.flags |= DE_LZWFLAG_HAS3BYTEHEADER;
507 fmtutil_decompress_lzw(c, &dcmpri, &dcmpro, &dres, &delzwp);
509 if(dres.errcode) {
510 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
511 goto done;
514 if(outf->len != d->orig_len) {
515 de_err(c, "Decompression failed, expected size %"I64_FMT
516 ", got %"I64_FMT, d->orig_len, outf->len);
517 goto done;
520 done:
521 dbuf_close(outf);
522 de_finfo_destroy(c, fi);
523 ucstring_destroy(fn);
524 de_dbg_indent_restore(c, saved_indent_level);
527 static void de_run_squash(deark *c, de_module_params *mparams)
529 sqctx *d = NULL;
531 d = de_malloc(c, sizeof(sqctx));
533 do_squash_header(c, d, 0);
534 do_squash_main(c, d);
536 de_free(c, d);
539 static int de_identify_squash(deark *c)
541 if(!dbuf_memcmp(c->infile, 0, "SQSH", 4))
542 return 100;
543 return 0;
546 void de_module_squash(deark *c, struct deark_module_info *mi)
548 mi->id = "squash";
549 mi->desc = "Squash (RISC OS compressed file)";
550 mi->run_fn = de_run_squash;
551 mi->identify_fn = de_identify_squash;