1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_arcfs
);
12 DE_DECLARE_MODULE(de_module_squash
);
14 #define MAX_NESTING_LEVEL 32
16 struct arcfs_member_data
{
17 struct de_riscos_file_attrs rfa
;
21 i64 file_data_offs_rel
;
22 i64 file_data_offs_abs
;
25 const char *cmpr_meth_name
;
29 typedef struct localctx_struct
{
33 struct de_crcobj
*crco
;
34 struct de_strarray
*curpath
;
37 static int do_arcfs_file_header(deark
*c
, lctx
*d
, i64 pos1
)
45 de_dbg(c
, "file header at %d", (int)pos1
);
47 pos
+= 8; // Signature
49 hlen
= de_getu32le_p(&pos
);
50 d
->nmembers
= hlen
/36;
51 de_dbg(c
, "header len: %d (%d members)", (int)hlen
, (int)d
->nmembers
);
53 d
->data_offs
= de_getu32le_p(&pos
);
54 de_dbg(c
, "data offset: %d", (int)d
->data_offs
);
56 ver_r
= (u32
)de_getu32le_p(&pos
);
57 de_dbg(c
, "version req'd for read: %u.%02u", (unsigned int)(ver_r
/100),
58 (unsigned int)(ver_r
%100));
59 ver_rw
= (u32
)de_getu32le_p(&pos
);
60 de_dbg(c
, "version req'd for read/write: %u.%02u", (unsigned int)(ver_rw
/100),
61 (unsigned int)(ver_rw
%100));
64 format_ver
= (u32
)de_getu32le_p(&pos
);
65 de_dbg(c
, "format version: %u", (unsigned int)format_ver
);
67 de_err(c
, "Unsupported format version: %u", (unsigned int)format_ver
);
71 // 68 reserved bytes here
79 static void do_arcfs_compressed(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
80 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
81 struct de_dfilter_results
*dres
)
83 struct de_lzw_params delzwp
;
85 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
86 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
87 delzwp
.max_code_size
= md
->rfa
.lzwmaxbits
;
88 if(!dcmpro
->len_known
) {
89 delzwp
.flags
|= DE_LZWFLAG_TOLERATETRAILINGJUNK
;
91 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
94 static void do_arcfs_crunched(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
95 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
96 struct de_dfilter_results
*dres
)
98 struct de_dcmpr_two_layer_params tlp
;
99 struct de_lzw_params delzwp
;
101 // "Crunched" means "packed", then "compressed".
102 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
104 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
105 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
106 delzwp
.max_code_size
= md
->rfa
.lzwmaxbits
;
108 // This flag tells the LZW decompressor to stop, instead of reporting failure,
109 // if bad LZW compressed data is encountered.
110 // The problem is that some ArcFS files have garbage at the end of the
112 // Apparently, we're expected to have a single decompression algorithm that
113 // handles both layers of compression simultaneously, without any buffering
114 // between them. That way, we could stop immediately when we've decompressed
115 // a sufficient number of bytes, and never encounter the garbage. But we
117 delzwp
.flags
|= DE_LZWFLAG_TOLERATETRAILINGJUNK
;
119 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
120 tlp
.codec1_pushable
= dfilter_lzw_codec
;
121 tlp
.codec1_private_params
= (void*)&delzwp
;
123 tlp
.codec2
= dfilter_rle90_codec
;
129 de_dfilter_decompress_two_layer(c
, &tlp
);
132 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
134 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
135 de_crcobj_addbuf(crco
, buf
, buf_len
);
138 static void do_arcfs_extract_member_file(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
143 de_ucstring
*fullfn
= NULL
;
144 struct de_dfilter_in_params dcmpri
;
145 struct de_dfilter_out_params dcmpro
;
146 struct de_dfilter_results dres
;
149 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
150 if(md
->file_data_offs_abs
+ md
->cmpr_len
> c
->infile
->len
) goto done
;
152 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
,
153 md
->file_data_offs_abs
, md
->cmpr_len
);
155 fullfn
= ucstring_create(c
);
156 de_strarray_make_path(d
->curpath
, fullfn
, 0);
157 ucstring_append_ucstring(fullfn
, md
->fn
);
158 fmtutil_riscos_append_type_to_filename(c
, fi
, fullfn
, &md
->rfa
, md
->is_dir
, 0);
160 if(md
->cmpr_method
!=0x82 && md
->cmpr_method
!=0x83 && md
->cmpr_method
!=0x88 &&
161 md
->cmpr_method
!=0xff)
163 de_err(c
, "Compression type 0x%02x (%s) is not supported.",
164 (unsigned int)md
->cmpr_method
, md
->cmpr_meth_name
);
168 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
170 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
172 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
173 de_crcobj_reset(d
->crco
);
175 dcmpri
.f
= c
->infile
;
176 dcmpri
.pos
= md
->file_data_offs_abs
;
177 dcmpri
.len
= md
->cmpr_len
;
179 dcmpro
.len_known
= 1;
180 dcmpro
.expected_len
= md
->orig_len
;
182 if(md
->cmpr_method
==0x82) { // stored
183 fmtutil_decompress_uncompressed(c
, &dcmpri
, &dcmpro
, &dres
, 0);
185 else if(md
->cmpr_method
==0x83) {
186 fmtutil_decompress_rle90_ex(c
, &dcmpri
, &dcmpro
, &dres
, 0);
189 else if(md
->cmpr_method
==0xff) {
190 do_arcfs_compressed(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
193 else if(md
->cmpr_method
==0x88) {
194 do_arcfs_crunched(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
198 if(have_dres
&& dres
.errcode
!=0) {
199 de_err(c
, "%s: Decompression failed: %s",
200 ucstring_getpsz_d(md
->fn
), de_dfilter_get_errmsg(c
, &dres
));
204 if(outf
->len
!= md
->orig_len
) {
205 de_err(c
, "%s: Decompression failed: Expected size %"I64_FMT
206 ", got %"I64_FMT
, ucstring_getpsz_d(md
->fn
), md
->orig_len
, outf
->len
);
210 crc_calc
= de_crcobj_getval(d
->crco
);
211 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)crc_calc
);
212 if(crc_calc
!= md
->rfa
.crc_from_attribs
) {
213 if(md
->rfa
.crc_from_attribs
==0) {
214 de_warn(c
, "CRC check not available for file %s", ucstring_getpsz_d(md
->fn
));
217 de_err(c
, "CRC check failed for file %s", ucstring_getpsz_d(md
->fn
));
223 ucstring_destroy(fullfn
);
226 // "Extract" a directory entry
227 static void do_arcfs_extract_member_dir(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
231 de_ucstring
*fullfn
= NULL
;
233 fullfn
= ucstring_create(c
);
234 // Note that md->fn has already been added to d->curpath
235 de_strarray_make_path(d
->curpath
, fullfn
, DE_MPFLAG_NOTRAILINGSLASH
);
237 fi
->is_directory
= 1;
238 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
240 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
242 ucstring_destroy(fullfn
);
245 static void do_arcfs_extract_member(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
)
249 fi
= de_finfo_create(c
);
250 fi
->original_filename_flag
= 1;
251 if(md
->rfa
.mod_time
.is_valid
) {
252 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->rfa
.mod_time
;
255 fi
->has_riscos_data
= 1;
256 fi
->riscos_attribs
= md
->rfa
.attribs
;
257 fi
->load_addr
= md
->rfa
.load_addr
;
258 fi
->exec_addr
= md
->rfa
.exec_addr
;
260 if(md
->is_regular_file
) {
261 do_arcfs_extract_member_file(c
, d
, md
, fi
);
263 else if(md
->is_dir
) {
264 do_arcfs_extract_member_dir(c
, d
, md
, fi
);
267 de_finfo_destroy(c
, fi
);
270 static const char *get_info_byte_name(u8 t
)
272 const char *name
= NULL
;
274 case 0x00: name
="end of dir marker"; break;
275 case 0x01: name
="deleted object"; break;
276 case 0x82: name
="stored"; break;
277 case 0x83: name
="packed (RLE)"; break;
278 case 0x88: name
="crunched"; break;
279 case 0x89: name
="squashed"; break;
280 case 0xff: name
="compressed"; break;
282 return name
?name
:"?";
285 static void destroy_arcfs_member_data(deark
*c
, struct arcfs_member_data
*md
)
288 ucstring_destroy(md
->fn
);
292 // Returns 0 only if we should stop parsing the entire arcfs file.
293 static int do_arcfs_member(deark
*c
, lctx
*d
, i64 idx
, i64 pos1
)
298 unsigned int tmpflags
;
299 int saved_indent_level
;
300 struct arcfs_member_data
*md
;
303 de_dbg_indent_save(c
, &saved_indent_level
);
304 md
= de_malloc(c
, sizeof(struct arcfs_member_data
));
305 de_dbg(c
, "header at %"I64_FMT
, pos1
);
309 info_byte
= de_getbyte_p(&pos
);
310 md
->cmpr_meth_name
= get_info_byte_name(info_byte
);
311 de_dbg(c
, "info byte: 0x%02x (%s)", (unsigned int)info_byte
, md
->cmpr_meth_name
);
312 if(info_byte
==1) goto done
; // deleted object
313 if(info_byte
==0) { // end of directory marker
314 if(d
->subdir_level
>0) d
->subdir_level
--;
315 de_strarray_pop(d
->curpath
);
318 md
->cmpr_method
= info_byte
;
320 // Look ahead at the "information word".
321 // TODO: Is this the right way to check for a directory?
322 info_word
= (u32
)de_getu32le(pos1
+32);
323 md
->is_dir
= (info_word
&0x80000000U
)?1:0;
324 md
->is_regular_file
= !md
->is_dir
;
326 md
->fn
= ucstring_create(c
);
327 dbuf_read_to_ucstring(c
->infile
, pos
, 11, md
->fn
, DE_CONVFLAG_STOP_AT_NUL
,
329 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->fn
));
331 if(d
->subdir_level
>= MAX_NESTING_LEVEL
) {
332 de_err(c
, "Directories nested too deeply");
337 de_strarray_push(d
->curpath
, md
->fn
);
341 md
->orig_len
= de_getu32le_p(&pos
);
342 if(md
->is_regular_file
) {
343 de_dbg(c
, "orig file length: %"I64_FMT
, md
->orig_len
);
346 fmtutil_riscos_read_load_exec(c
, c
->infile
, &md
->rfa
, pos
);
350 if(md
->is_regular_file
)
351 tmpflags
|= DE_RISCOS_FLAG_HAS_CRC
;
352 if(md
->cmpr_method
==0xff || md
->cmpr_method
==0x88)
353 tmpflags
|= DE_RISCOS_FLAG_HAS_LZWMAXBITS
;
354 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &md
->rfa
, pos
, tmpflags
);
357 md
->cmpr_len
= de_getu32le_p(&pos
);
358 if(md
->is_regular_file
) {
359 de_dbg(c
, "compressed length: %"I64_FMT
, md
->cmpr_len
);
362 de_dbg(c
, "info word: 0x%08x", (unsigned int)info_word
);
364 de_dbg(c
, "is directory: %d", md
->is_dir
);
365 if(md
->is_regular_file
) {
366 md
->file_data_offs_rel
= (i64
)info_word
;
367 md
->file_data_offs_abs
= d
->data_offs
+md
->file_data_offs_rel
;
368 de_dbg(c
, "file data offset: (%"I64_FMT
"+)%"I64_FMT
,
369 d
->data_offs
, md
->file_data_offs_rel
);
371 de_dbg_indent(c
, -1);
373 de_dbg_indent(c
, -1);
375 do_arcfs_extract_member(c
, d
, md
);
378 destroy_arcfs_member_data(c
, md
);
379 de_dbg_indent_restore(c
, saved_indent_level
);
383 static void do_arcfs_members(deark
*c
, lctx
*d
, i64 pos1
)
388 for(k
=0; k
<d
->nmembers
; k
++) {
391 if(pos
>=c
->infile
->len
) break;
392 de_dbg(c
, "member[%d]", (int)k
);
394 ret
= do_arcfs_member(c
, d
, k
, pos
);
395 de_dbg_indent(c
, -1);
401 static void de_run_arcfs(deark
*c
, de_module_params
*mparams
)
406 d
= de_malloc(c
, sizeof(lctx
));
408 if(!do_arcfs_file_header(c
, d
, pos
)) goto done
;
411 d
->curpath
= de_strarray_create(c
, MAX_NESTING_LEVEL
+10);
412 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
413 do_arcfs_members(c
, d
, pos
);
417 de_crcobj_destroy(d
->crco
);
418 de_strarray_destroy(d
->curpath
);
423 static int de_identify_arcfs(deark
*c
)
425 if(!dbuf_memcmp(c
->infile
, 0, "Archive\x00", 8))
430 void de_module_arcfs(deark
*c
, struct deark_module_info
*mi
)
433 mi
->desc
= "ArcFS (RISC OS archive)";
434 mi
->run_fn
= de_run_arcfs
;
435 mi
->identify_fn
= de_identify_arcfs
;
438 ///////////////////////////////////////////////////////////////////////////
441 typedef struct sqctx_struct
{
443 struct de_riscos_file_attrs rfa
;
446 static void do_squash_header(deark
*c
, sqctx
*d
, i64 pos1
)
450 de_dbg(c
, "header at %d", (int)pos1
);
453 pos
+= 4; // signature
454 d
->orig_len
= de_getu32le_p(&pos
);
455 de_dbg(c
, "orig file length: %"I64_FMT
, d
->orig_len
);
457 fmtutil_riscos_read_load_exec(c
, c
->infile
, &d
->rfa
, pos
);
459 de_dbg_indent(c
, -1);
462 static void do_squash_main(deark
*c
, sqctx
*d
)
466 de_ucstring
*fn
= NULL
;
467 struct de_dfilter_results dres
;
468 struct de_dfilter_in_params dcmpri
;
469 struct de_dfilter_out_params dcmpro
;
470 struct de_lzw_params delzwp
;
471 int saved_indent_level
;
473 de_dbg_indent_save(c
, &saved_indent_level
);
474 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
476 dcmpri
.f
= c
->infile
;
478 dcmpri
.len
= c
->infile
->len
- dcmpri
.pos
;
479 de_dbg(c
, "compressed data at %"I64_FMT
, dcmpri
.pos
);
482 fi
= de_finfo_create(c
);
484 fn
= ucstring_create(c
);
486 fi
->has_riscos_data
= 1;
487 fi
->riscos_attribs
= d
->rfa
.attribs
;
488 fi
->load_addr
= d
->rfa
.load_addr
;
489 fi
->exec_addr
= d
->rfa
.exec_addr
;
491 ucstring_append_sz(fn
, "bin", DE_ENCODING_LATIN1
);
492 fmtutil_riscos_append_type_to_filename(c
, fi
, fn
, &d
->rfa
, 0, 1);
493 de_finfo_set_name_from_ucstring(c
, fi
, fn
, 0);
495 if(d
->rfa
.mod_time
.is_valid
) {
496 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = d
->rfa
.mod_time
;
499 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
501 dcmpro
.len_known
= 0;
503 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
504 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
505 delzwp
.flags
|= DE_LZWFLAG_HAS3BYTEHEADER
;
507 fmtutil_decompress_lzw(c
, &dcmpri
, &dcmpro
, &dres
, &delzwp
);
510 de_err(c
, "%s", de_dfilter_get_errmsg(c
, &dres
));
514 if(outf
->len
!= d
->orig_len
) {
515 de_err(c
, "Decompression failed, expected size %"I64_FMT
516 ", got %"I64_FMT
, d
->orig_len
, outf
->len
);
522 de_finfo_destroy(c
, fi
);
523 ucstring_destroy(fn
);
524 de_dbg_indent_restore(c
, saved_indent_level
);
527 static void de_run_squash(deark
*c
, de_module_params
*mparams
)
531 d
= de_malloc(c
, sizeof(sqctx
));
533 do_squash_header(c
, d
, 0);
534 do_squash_main(c
, d
);
539 static int de_identify_squash(deark
*c
)
541 if(!dbuf_memcmp(c
->infile
, 0, "SQSH", 4))
546 void de_module_squash(deark
*c
, struct deark_module_info
*mi
)
549 mi
->desc
= "Squash (RISC OS compressed file)";
550 mi
->run_fn
= de_run_squash
;
551 mi
->identify_fn
= de_identify_squash
;