1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_arcfs
);
12 DE_DECLARE_MODULE(de_module_squash
);
14 #define MAX_NESTING_LEVEL 32
16 struct arcfs_member_data
{
17 struct de_riscos_file_attrs rfa
;
21 i64 file_data_offs_rel
;
22 i64 file_data_offs_abs
;
25 const char *cmpr_meth_name
;
29 typedef struct localctx_struct
{
33 struct de_crcobj
*crco
;
34 struct de_strarray
*curpath
;
37 static int do_arcfs_file_header(deark
*c
, lctx
*d
, i64 pos1
)
45 de_dbg(c
, "file header at %d", (int)pos1
);
47 pos
+= 8; // Signature
49 hlen
= de_getu32le_p(&pos
);
50 d
->nmembers
= hlen
/36;
51 de_dbg(c
, "header len: %d (%d members)", (int)hlen
, (int)d
->nmembers
);
53 d
->data_offs
= de_getu32le_p(&pos
);
54 de_dbg(c
, "data offset: %d", (int)d
->data_offs
);
56 ver_r
= (u32
)de_getu32le_p(&pos
);
57 de_dbg(c
, "version req'd for read: %u.%02u", (unsigned int)(ver_r
/100),
58 (unsigned int)(ver_r
%100));
59 ver_rw
= (u32
)de_getu32le_p(&pos
);
60 de_dbg(c
, "version req'd for read/write: %u.%02u", (unsigned int)(ver_rw
/100),
61 (unsigned int)(ver_rw
%100));
64 format_ver
= (u32
)de_getu32le_p(&pos
);
65 de_dbg(c
, "format version: %u", (unsigned int)format_ver
);
67 de_err(c
, "Unsupported format version: %u", (unsigned int)format_ver
);
71 // 68 reserved bytes here
79 static void do_arcfs_compressed(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
80 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
81 struct de_dfilter_results
*dres
)
83 struct de_lzw_params delzwp
;
85 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
86 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
87 delzwp
.max_code_size
= md
->rfa
.lzwmaxbits
;
88 if(!dcmpro
->len_known
) {
89 delzwp
.flags
|= DE_LZWFLAG_TOLERATETRAILINGJUNK
;
91 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
94 static void do_arcfs_crunched(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
95 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
96 struct de_dfilter_results
*dres
)
98 struct de_dcmpr_two_layer_params tlp
;
99 struct de_lzw_params delzwp
;
101 // "Crunched" means "packed", then "compressed".
102 // So we have to "uncompress" (LZW), then "unpack" (RLE90).
104 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
105 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
106 delzwp
.max_code_size
= md
->rfa
.lzwmaxbits
;
108 // This flag tells the LZW decompressor to stop, instead of reporting failure,
109 // if bad LZW compressed data is encountered.
110 // The problem is that some ArcFS files have garbage at the end of the
112 // Apparently, we're expected to have a single decompression algorithm that
113 // handles both layers of compression simultaneously, without any buffering
114 // between them. That way, we could stop immediately when we've decompressed
115 // a sufficient number of bytes, and never encounter the garbage. But we
117 delzwp
.flags
|= DE_LZWFLAG_TOLERATETRAILINGJUNK
;
119 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
120 tlp
.codec1_pushable
= dfilter_lzw_codec
;
121 tlp
.codec1_private_params
= (void*)&delzwp
;
123 tlp
.codec2
= dfilter_rle90_codec
;
129 de_dfilter_decompress_two_layer(c
, &tlp
);
132 static void do_arcfs_extract_member_file(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
137 de_ucstring
*fullfn
= NULL
;
138 struct de_dfilter_in_params dcmpri
;
139 struct de_dfilter_out_params dcmpro
;
140 struct de_dfilter_results dres
;
143 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
144 if(md
->file_data_offs_abs
+ md
->cmpr_len
> c
->infile
->len
) goto done
;
146 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
,
147 md
->file_data_offs_abs
, md
->cmpr_len
);
149 fullfn
= ucstring_create(c
);
150 de_strarray_make_path(d
->curpath
, fullfn
, 0);
151 ucstring_append_ucstring(fullfn
, md
->fn
);
152 fmtutil_riscos_append_type_to_filename(c
, fi
, fullfn
, &md
->rfa
, md
->is_dir
, 0);
154 if(md
->cmpr_method
!=0x82 && md
->cmpr_method
!=0x83 && md
->cmpr_method
!=0x88 &&
155 md
->cmpr_method
!=0xff)
157 de_err(c
, "Compression type 0x%02x (%s) is not supported.",
158 (unsigned int)md
->cmpr_method
, md
->cmpr_meth_name
);
162 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
164 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
165 dbuf_enable_wbuffer(outf
);
167 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)d
->crco
);
168 de_crcobj_reset(d
->crco
);
170 dcmpri
.f
= c
->infile
;
171 dcmpri
.pos
= md
->file_data_offs_abs
;
172 dcmpri
.len
= md
->cmpr_len
;
174 dcmpro
.len_known
= 1;
175 dcmpro
.expected_len
= md
->orig_len
;
177 if(md
->cmpr_method
==0x82) { // stored
178 fmtutil_decompress_uncompressed(c
, &dcmpri
, &dcmpro
, &dres
, 0);
180 else if(md
->cmpr_method
==0x83) {
181 fmtutil_decompress_rle90_ex(c
, &dcmpri
, &dcmpro
, &dres
, 0);
184 else if(md
->cmpr_method
==0xff) {
185 do_arcfs_compressed(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
188 else if(md
->cmpr_method
==0x88) {
189 do_arcfs_crunched(c
, d
, md
, &dcmpri
, &dcmpro
, &dres
);
192 dbuf_flush(dcmpro
.f
);
194 if(have_dres
&& dres
.errcode
!=0) {
195 de_err(c
, "%s: Decompression failed: %s",
196 ucstring_getpsz_d(md
->fn
), de_dfilter_get_errmsg(c
, &dres
));
200 if(outf
->len
!= md
->orig_len
) {
201 de_err(c
, "%s: Decompression failed: Expected size %"I64_FMT
202 ", got %"I64_FMT
, ucstring_getpsz_d(md
->fn
), md
->orig_len
, outf
->len
);
206 crc_calc
= de_crcobj_getval(d
->crco
);
207 de_dbg(c
, "crc (calculated): 0x%04x", (unsigned int)crc_calc
);
208 if(crc_calc
!= md
->rfa
.crc_from_attribs
) {
209 if(md
->rfa
.crc_from_attribs
==0) {
210 de_warn(c
, "CRC check not available for file %s", ucstring_getpsz_d(md
->fn
));
213 de_err(c
, "CRC check failed for file %s", ucstring_getpsz_d(md
->fn
));
219 ucstring_destroy(fullfn
);
222 // "Extract" a directory entry
223 static void do_arcfs_extract_member_dir(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
,
227 de_ucstring
*fullfn
= NULL
;
229 fullfn
= ucstring_create(c
);
230 // Note that md->fn has already been added to d->curpath
231 de_strarray_make_path(d
->curpath
, fullfn
, DE_MPFLAG_NOTRAILINGSLASH
);
233 fi
->is_directory
= 1;
234 de_finfo_set_name_from_ucstring(c
, fi
, fullfn
, DE_SNFLAG_FULLPATH
);
236 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
238 ucstring_destroy(fullfn
);
241 static void do_arcfs_extract_member(deark
*c
, lctx
*d
, struct arcfs_member_data
*md
)
245 fi
= de_finfo_create(c
);
246 fi
->original_filename_flag
= 1;
247 if(md
->rfa
.mod_time
.is_valid
) {
248 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->rfa
.mod_time
;
251 fi
->has_riscos_data
= 1;
252 fi
->riscos_attribs
= md
->rfa
.attribs
;
253 fi
->load_addr
= md
->rfa
.load_addr
;
254 fi
->exec_addr
= md
->rfa
.exec_addr
;
256 if(md
->is_regular_file
) {
257 do_arcfs_extract_member_file(c
, d
, md
, fi
);
259 else if(md
->is_dir
) {
260 do_arcfs_extract_member_dir(c
, d
, md
, fi
);
263 de_finfo_destroy(c
, fi
);
266 static const char *get_info_byte_name(u8 t
)
268 const char *name
= NULL
;
270 case 0x00: name
="end of dir marker"; break;
271 case 0x01: name
="deleted object"; break;
272 case 0x82: name
="stored"; break;
273 case 0x83: name
="packed (RLE)"; break;
274 case 0x88: name
="crunched"; break;
275 case 0x89: name
="squashed"; break;
276 case 0xff: name
="compressed"; break;
278 return name
?name
:"?";
281 static void destroy_arcfs_member_data(deark
*c
, struct arcfs_member_data
*md
)
284 ucstring_destroy(md
->fn
);
288 // Returns 0 only if we should stop parsing the entire arcfs file.
289 static int do_arcfs_member(deark
*c
, lctx
*d
, i64 idx
, i64 pos1
)
294 unsigned int tmpflags
;
295 int saved_indent_level
;
296 struct arcfs_member_data
*md
;
299 de_dbg_indent_save(c
, &saved_indent_level
);
300 md
= de_malloc(c
, sizeof(struct arcfs_member_data
));
301 de_dbg(c
, "header at %"I64_FMT
, pos1
);
305 info_byte
= de_getbyte_p(&pos
);
306 md
->cmpr_meth_name
= get_info_byte_name(info_byte
);
307 de_dbg(c
, "info byte: 0x%02x (%s)", (unsigned int)info_byte
, md
->cmpr_meth_name
);
308 if(info_byte
==1) goto done
; // deleted object
309 if(info_byte
==0) { // end of directory marker
310 if(d
->subdir_level
>0) d
->subdir_level
--;
311 de_strarray_pop(d
->curpath
);
314 md
->cmpr_method
= info_byte
;
316 // Look ahead at the "information word".
317 // TODO: Is this the right way to check for a directory?
318 info_word
= (u32
)de_getu32le(pos1
+32);
319 md
->is_dir
= (info_word
&0x80000000U
)?1:0;
320 md
->is_regular_file
= !md
->is_dir
;
322 md
->fn
= ucstring_create(c
);
323 dbuf_read_to_ucstring(c
->infile
, pos
, 11, md
->fn
, DE_CONVFLAG_STOP_AT_NUL
,
325 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->fn
));
327 if(d
->subdir_level
>= MAX_NESTING_LEVEL
) {
328 de_err(c
, "Directories nested too deeply");
333 de_strarray_push(d
->curpath
, md
->fn
);
337 md
->orig_len
= de_getu32le_p(&pos
);
338 if(md
->is_regular_file
) {
339 de_dbg(c
, "orig file length: %"I64_FMT
, md
->orig_len
);
342 fmtutil_riscos_read_load_exec(c
, c
->infile
, &md
->rfa
, pos
);
346 if(md
->is_regular_file
)
347 tmpflags
|= DE_RISCOS_FLAG_HAS_CRC
;
348 if(md
->cmpr_method
==0xff || md
->cmpr_method
==0x88)
349 tmpflags
|= DE_RISCOS_FLAG_HAS_LZWMAXBITS
;
350 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &md
->rfa
, pos
, tmpflags
);
353 md
->cmpr_len
= de_getu32le_p(&pos
);
354 if(md
->is_regular_file
) {
355 de_dbg(c
, "compressed length: %"I64_FMT
, md
->cmpr_len
);
358 de_dbg(c
, "info word: 0x%08x", (unsigned int)info_word
);
360 de_dbg(c
, "is directory: %d", md
->is_dir
);
361 if(md
->is_regular_file
) {
362 md
->file_data_offs_rel
= (i64
)info_word
;
363 md
->file_data_offs_abs
= d
->data_offs
+md
->file_data_offs_rel
;
364 de_dbg(c
, "file data offset: (%"I64_FMT
"+)%"I64_FMT
,
365 d
->data_offs
, md
->file_data_offs_rel
);
367 de_dbg_indent(c
, -1);
369 de_dbg_indent(c
, -1);
371 do_arcfs_extract_member(c
, d
, md
);
374 destroy_arcfs_member_data(c
, md
);
375 de_dbg_indent_restore(c
, saved_indent_level
);
379 static void do_arcfs_members(deark
*c
, lctx
*d
, i64 pos1
)
384 for(k
=0; k
<d
->nmembers
; k
++) {
387 if(pos
>=c
->infile
->len
) break;
388 de_dbg(c
, "member[%d]", (int)k
);
390 ret
= do_arcfs_member(c
, d
, k
, pos
);
391 de_dbg_indent(c
, -1);
397 static void de_run_arcfs(deark
*c
, de_module_params
*mparams
)
402 d
= de_malloc(c
, sizeof(lctx
));
404 if(!do_arcfs_file_header(c
, d
, pos
)) goto done
;
407 d
->curpath
= de_strarray_create(c
, MAX_NESTING_LEVEL
+10);
408 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
409 do_arcfs_members(c
, d
, pos
);
413 de_crcobj_destroy(d
->crco
);
414 de_strarray_destroy(d
->curpath
);
419 static int de_identify_arcfs(deark
*c
)
421 if(!dbuf_memcmp(c
->infile
, 0, "Archive\x00", 8))
426 void de_module_arcfs(deark
*c
, struct deark_module_info
*mi
)
429 mi
->desc
= "ArcFS (RISC OS archive)";
430 mi
->run_fn
= de_run_arcfs
;
431 mi
->identify_fn
= de_identify_arcfs
;
434 ///////////////////////////////////////////////////////////////////////////
437 typedef struct sqctx_struct
{
439 struct de_riscos_file_attrs rfa
;
442 static void do_squash_header(deark
*c
, sqctx
*d
, i64 pos1
)
446 de_dbg(c
, "header at %d", (int)pos1
);
449 pos
+= 4; // signature
450 d
->orig_len
= de_getu32le_p(&pos
);
451 de_dbg(c
, "orig file length: %"I64_FMT
, d
->orig_len
);
453 fmtutil_riscos_read_load_exec(c
, c
->infile
, &d
->rfa
, pos
);
455 de_dbg_indent(c
, -1);
458 static void do_squash_main(deark
*c
, sqctx
*d
)
462 de_ucstring
*fn
= NULL
;
463 struct de_dfilter_results dres
;
464 struct de_dfilter_in_params dcmpri
;
465 struct de_dfilter_out_params dcmpro
;
466 struct de_lzw_params delzwp
;
467 int saved_indent_level
;
469 de_dbg_indent_save(c
, &saved_indent_level
);
470 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
472 dcmpri
.f
= c
->infile
;
474 dcmpri
.len
= c
->infile
->len
- dcmpri
.pos
;
475 de_dbg(c
, "compressed data at %"I64_FMT
, dcmpri
.pos
);
478 fi
= de_finfo_create(c
);
480 fn
= ucstring_create(c
);
482 fi
->has_riscos_data
= 1;
483 fi
->riscos_attribs
= d
->rfa
.attribs
;
484 fi
->load_addr
= d
->rfa
.load_addr
;
485 fi
->exec_addr
= d
->rfa
.exec_addr
;
487 ucstring_append_sz(fn
, "bin", DE_ENCODING_LATIN1
);
488 fmtutil_riscos_append_type_to_filename(c
, fi
, fn
, &d
->rfa
, 0, 1);
489 de_finfo_set_name_from_ucstring(c
, fi
, fn
, 0);
491 if(d
->rfa
.mod_time
.is_valid
) {
492 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = d
->rfa
.mod_time
;
495 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
496 dbuf_enable_wbuffer(outf
);
498 dcmpro
.len_known
= 0;
500 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
501 delzwp
.fmt
= DE_LZWFMT_UNIXCOMPRESS
;
502 delzwp
.flags
|= DE_LZWFLAG_HAS3BYTEHEADER
;
504 fmtutil_decompress_lzw(c
, &dcmpri
, &dcmpro
, &dres
, &delzwp
);
505 dbuf_flush(dcmpro
.f
);
508 de_err(c
, "%s", de_dfilter_get_errmsg(c
, &dres
));
512 if(outf
->len
!= d
->orig_len
) {
513 de_err(c
, "Decompression failed, expected size %"I64_FMT
514 ", got %"I64_FMT
, d
->orig_len
, outf
->len
);
520 de_finfo_destroy(c
, fi
);
521 ucstring_destroy(fn
);
522 de_dbg_indent_restore(c
, saved_indent_level
);
525 static void de_run_squash(deark
*c
, de_module_params
*mparams
)
529 d
= de_malloc(c
, sizeof(sqctx
));
531 do_squash_header(c
, d
, 0);
532 do_squash_main(c
, d
);
537 static int de_identify_squash(deark
*c
)
539 if(!dbuf_memcmp(c
->infile
, 0, "SQSH", 4))
544 void de_module_squash(deark
*c
, struct deark_module_info
*mi
)
547 mi
->desc
= "Squash (RISC OS compressed file)";
548 mi
->run_fn
= de_run_squash
;
549 mi
->identify_fn
= de_identify_squash
;