1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZOO compressed archive format
7 // The ZOO parser in this file was originally derived from unzoo.c v4.4
8 // by Martin Schoenert.
9 // The original file had this notice:
12 *A unzoo.c Tools Martin Schoenert
14 *H @(#)$Id: unzoo.c,v 4.4 2000/05/29 08:56:57 sal Exp $
16 *Y This file is in the Public Domain.
19 // To be clear, the code in this file (Deark's zoo.c file) is covered by
20 // Deark's standard terms of use.
22 #include <deark-config.h>
23 #include <deark-private.h>
24 #include <deark-fmtutil.h>
26 DE_DECLARE_MODULE(de_module_zoo
);
27 DE_DECLARE_MODULE(de_module_zoo_filter
);
28 DE_DECLARE_MODULE(de_module_zoo_z
);
30 #define ZOO_SIGNATURE 0xfdc4a7dcU
32 #define ZOOCMPR_STORED 0
36 struct localctx_struct
;
37 typedef struct localctx_struct lctx
;
40 // Data associated with one Zoo member file
43 de_ucstring
*fullname
;
44 u8 type
; /* type of current member (1) */
45 u8 method
; /* packing method of member (0..2) */
47 i64 next_member_hdr_pos
;
52 i64 comment_len
; // 0 if no comment
53 unsigned int datdos
; /* date (in DOS format) */
54 unsigned int timdos
; /* time (in DOS format) */
58 u32 crc_hdr_calculated
;
59 u8 majver
; /* major version needed to extract */
60 u8 minver
; /* minor version needed to extract */
61 u8 is_deleted
; /* 1 if member is deleted, 0 else */
62 u8 timzon
; /* time zone */
63 unsigned int system
; /* system identifier */
64 u32 attribs
; /* file permissions */
65 u8 vflag
; /* gens. on, last gen., gen. limit */
66 unsigned int ver
; /* version number of member */
69 struct localctx_struct
{
71 int extract_comments_to_files
;
73 struct de_inthashtable
*offsets_seen
;
75 i64 first_member_hdr_pos
;
78 u8 type
; // archive header version
79 i64 archive_comment_pos
;
80 i64 archive_comment_len
; // 0 if no comment
81 u8 vdata
; /* gens. on, gen. limit */
83 int num_deleted_files_found
;
86 // Shared by all member files, so we don't have to recalculate the CRC table
87 // for each member file.
88 struct de_crcobj
*crco
;
91 // An offset is considered meaningful if len!=0.
92 static void on_offset_found(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
94 if(len
==0 || pos
<0) return;
95 if(pos
<d
->min_offset_found
) {
96 d
->min_offset_found
= pos
;
100 static const char *get_member_name_for_msg(deark
*c
, lctx
*d
, struct member_data
*md
)
102 if(md
&& ucstring_isnonempty(md
->fullname
)) {
103 return ucstring_getpsz_d(md
->fullname
);
108 static void do_extract_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, int is_main
)
110 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "comment.txt",
111 NULL
, DE_CREATEFLAG_IS_AUX
);
114 static void do_dbg_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, const char *name
,
117 de_ucstring
*s
= NULL
;
119 if(c
->debug_level
<1) return;
120 s
= ucstring_create(c
);
121 dbuf_read_to_ucstring_n(c
->infile
, pos
, len
, DE_DBG_MAX_STRLEN
, s
,
122 0, d
->input_encoding
);
123 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
127 static void do_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, const char *name
,
128 int is_main
, int extract_to_file
)
130 on_offset_found(c
, d
, pos
, len
);
132 if(pos
<0 || pos
+len
>c
->infile
->len
) return;
133 if(extract_to_file
) {
134 do_extract_comment(c
, d
, pos
, len
, is_main
);
137 do_dbg_comment(c
, d
, pos
, len
, name
, is_main
);
141 // Read the main file header
142 static int do_global_header(deark
*c
, lctx
*d
, i64 pos1
)
147 u32 zoo_minus
, zoo_minus_expected
;
149 de_ucstring
*txt
= NULL
;
151 de_dbg(c
, "archive header at %"I64_FMT
, pos1
);
154 // Intro text, e.g. "ZOO 2.10 Archive."
155 // Zoo source code (zoo.h) says "The contents of the text message are [...]
156 // not used by Zoo and they may be anything.".
157 txt
= ucstring_create(c
);
158 for(i
=0; i
<20; i
++) {
161 ch
= de_getbyte(pos
+i
);
162 if(ch
==26 || ch
==0) break;
163 if(ch
<32 || ch
>126) ch
= '_';
164 ucstring_append_char(txt
, (de_rune
)ch
);
166 de_dbg(c
, "header text: \"%s\"", ucstring_getpsz_d(txt
));
169 sig
= (unsigned int)de_getu32le_p(&pos
);
170 if (sig
!= ZOO_SIGNATURE
) goto done
;
172 d
->first_member_hdr_pos
= de_getu32le_p(&pos
);
173 de_dbg(c
, "first entry pos: %"I64_FMT
, d
->first_member_hdr_pos
);
175 zoo_minus
= (u32
)de_getu32le_p(&pos
);
176 de_dbg(c
, "consistency check: 0x%08x", (UI
)zoo_minus
);
177 zoo_minus_expected
= (u32
)((~(u32
)d
->first_member_hdr_pos
)+(u32
)1);
178 if(zoo_minus
!=zoo_minus_expected
) {
179 de_warn(c
, "Archive header failed consistency check (is 0x%08x, expected 0x%08x)",
180 (UI
)zoo_minus
, (UI
)zoo_minus_expected
);
183 // Note: The version number fields are sometimes erroneously documented as
184 // "version made by" and "version needed to extract [all files]".
185 d
->majver
= de_getbyte_p(&pos
);
186 d
->minver
= de_getbyte_p(&pos
);
187 de_dbg(c
, "version needed to manipulate archive: %d.%d", (int)d
->majver
, (int)d
->minver
);
189 // Fields that aren't present in old versions.
190 if(d
->first_member_hdr_pos
> 34) {
191 d
->type
= de_getbyte_p(&pos
);
192 de_dbg(c
, "archive header format version (\"type\"): %u", (unsigned int)d
->type
);
193 // 1 is the only value here with a known meaning, but we'll accept some slightly
194 // higher values, and assume they are backward-compatible.
195 if(d
->type
<1 || d
->type
>5) {
200 d
->archive_comment_pos
= de_getu32le_p(&pos
);
201 d
->archive_comment_len
= de_getu16le_p(&pos
);
202 de_dbg(c
, "archive comment pos: %"I64_FMT
", len=%d", d
->archive_comment_pos
,
203 (int)d
->archive_comment_len
);
204 do_comment(c
, d
, d
->archive_comment_pos
, d
->archive_comment_len
, "archive comment",
205 1, d
->extract_comments_to_files
);
207 d
->vdata
= de_getbyte_p(&pos
);
208 de_dbg(c
, "archive-level versioning settings (\"vdata\"): 0x%02x", (UI
)d
->vdata
);
215 ucstring_destroy(txt
);
216 de_dbg_indent(c
, -1);
220 static const char *get_cmpr_meth_name(u8 t
)
222 const char *name
= NULL
;
224 case 0: name
="stored"; break;
225 case 1: name
="lzd"; break;
226 case 2: name
="lzh"; break;
228 return name
?name
:"?";
231 // To be called after all mod_time-related fields have been read.
232 // Finish reporting the mod_time, and set md->fi->mod_time.
233 static void finish_modtime_decoding(deark
*c
, lctx
*d
, struct member_data
*md
)
235 i64 timestamp_offset
;
236 char timestamp_buf
[64];
238 timestamp_offset
= 0;
239 if ( md
->timzon
< 127 ) timestamp_offset
= 15*60*((i64
)md
->timzon
);
240 else if ( 127 < md
->timzon
) timestamp_offset
= 15*60*((i64
)md
->timzon
- 256);
242 de_dos_datetime_to_timestamp(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], (i64
)md
->datdos
, (i64
)md
->timdos
);
243 de_timestamp_to_string(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_buf
, sizeof(timestamp_buf
), 0);
244 de_dbg(c
, "mod time: %s", timestamp_buf
);
245 if(md
->timzon
== 127) {
246 md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
].tzcode
= DE_TZCODE_LOCAL
;
249 de_timestamp_cvt_to_utc(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_offset
);
250 de_timestamp_to_string(&md
->fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], timestamp_buf
, sizeof(timestamp_buf
), 0);
251 de_dbg(c
, "mod time (UTC): %s", timestamp_buf
);
255 static void calc_hdr_crc(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
, i64 lvar
)
257 de_crcobj_reset(d
->crco
);
258 de_crcobj_addslice(d
->crco
, c
->infile
, pos1
, 54);
259 de_crcobj_addzeroes(d
->crco
, 2);
260 de_crcobj_addslice(d
->crco
, c
->infile
, pos1
+56, lvar
);
261 md
->crc_hdr_calculated
= de_crcobj_getval(d
->crco
);
264 // Decode the trailer member. Only a few fields are potentially interesting; the
265 // rest are usually zeroed out.
266 // This code is duplicated in do_member_header(), but it's too much trouble to
268 static void do_member_eof(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
)
272 if(!md
->has_ext_header
) goto done
;
273 lvar
= de_getu16le(pos1
+51);
274 de_dbg(c
, "length of variable part: %d", (int)lvar
);
276 md
->crc_hdr_reported
= (u32
)de_getu16le(pos1
+54);
277 de_dbg(c
, "entry crc (reported): 0x%04x", (unsigned int)md
->crc_hdr_reported
);
278 calc_hdr_crc(c
, d
, md
, pos1
, lvar
);
279 de_dbg(c
, "entry crc (calculated): 0x%04x", (UI
)md
->crc_hdr_calculated
);
280 if(md
->crc_hdr_calculated
!= md
->crc_hdr_reported
) {
281 de_warn(c
, "Header CRC check failed");
288 static int do_member_header(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos1
)
290 de_ucstring
*shortname
= NULL
;
291 de_ucstring
*longname
= NULL
;
292 de_ucstring
*dirname
= NULL
;
296 i64 lvar
; /* length of variable part */
297 i64 lnamu
; /* length of long name */
298 i64 ldiru
; /* length of directory */
303 sig
= (unsigned int)de_getu32le_p(&pos
);
304 if(sig
!= ZOO_SIGNATURE
) {
305 de_err(c
, "Malformed Zoo file, bad magic number at %"I64_FMT
, pos1
);
309 /* read the fixed part of the directory entry */
310 md
->type
= de_getbyte_p(&pos
);
311 md
->has_ext_header
= (u8
)(md
->type
== 2);
312 md
->method
= de_getbyte_p(&pos
);
313 md
->next_member_hdr_pos
= de_getu32le_p(&pos
);
315 de_dbg(c
, "member header format version (\"type\"): %d", (int)md
->type
);
316 if(md
->next_member_hdr_pos
) {
317 de_dbg(c
, "compression method: %d (%s)", (int)md
->method
, get_cmpr_meth_name(md
->method
));
320 de_snprintf(descrbuf
, sizeof(descrbuf
), (md
->next_member_hdr_pos
?"":
321 " (none - This is the trailer record)"));
322 de_dbg(c
, "next entry pos: %"I64_FMT
"%s", md
->next_member_hdr_pos
, descrbuf
);
324 if(md
->next_member_hdr_pos
==0) {
325 do_member_eof(c
, d
, md
, pos1
);
330 md
->cmpr_pos
= de_getu32le_p(&pos
);
331 de_dbg(c
, "pos of file data: %"I64_FMT
, md
->cmpr_pos
);
333 md
->datdos
= (unsigned int)de_getu16le_p(&pos
);
334 md
->timdos
= (unsigned int)de_getu16le_p(&pos
);
335 de_dbg2(c
, "dos date,time: %u,%u", md
->datdos
, md
->timdos
);
336 if(!md
->has_ext_header
) {
338 finish_modtime_decoding(c
, d
, md
);
341 md
->crc_reported
= (u32
)de_getu16le_p(&pos
);
342 de_dbg(c
, "file data crc (reported): 0x%04x", (unsigned int)md
->crc_reported
);
343 md
->uncmpr_len
= de_getu32le_p(&pos
);
344 de_dbg(c
, "original size: %"I64_FMT
, md
->uncmpr_len
);
345 md
->cmpr_len
= de_getu32le_p(&pos
);
346 de_dbg(c
, "compressed size: %"I64_FMT
, md
->cmpr_len
);
348 // Note: The version number fields are sometimes erroneously documented as
349 // "version made by" and "version needed". But (according to Zoo 2.10),
350 // there is no "version made by" field.
351 md
->majver
= de_getbyte_p(&pos
);
352 md
->minver
= de_getbyte_p(&pos
);
353 de_dbg(c
, "version needed to extract: %d.%d", (int)md
->majver
, (int)md
->minver
);
355 md
->is_deleted
= de_getbyte_p(&pos
);
356 de_dbg(c
, "is deleted: %d", (int)md
->is_deleted
);
357 pos
++; // "file structure" (?)
358 md
->comment_pos
= de_getu32le_p(&pos
);
359 md
->comment_len
= de_getu16le_p(&pos
);
360 de_dbg(c
, "comment pos: %"I64_FMT
", len=%d", md
->comment_pos
, (int)md
->comment_len
);
361 do_comment(c
, d
, md
->comment_pos
, md
->comment_len
, "comment", 0,
362 (d
->extract_comments_to_files
) && (!md
->is_deleted
|| d
->undelete
));
364 // In "type 2" header format, the shortname field is a fixed 13 bytes, and is
365 // followed by other fields.
366 // In "type 1" header format, the shortname field is (allegedly) the last field
367 // in the header, and it's supposed to be NUL-terminated, so it's hard to be
368 // *sure* what size it is.
369 // Zoo 1.21 seems to leave room for 14 bytes, instead of the 13 that would be
370 // expected. And it seemingly allows up to 14-byte filenames with no NUL -- but
371 // this could well be a bug. Or perhaps the 13-byte filename field is followed
372 // by a 1-byte field of unknown purpose.
373 shortname
= ucstring_create(c
);
374 dbuf_read_to_ucstring(c
->infile
, pos
, 13, shortname
, DE_CONVFLAG_STOP_AT_NUL
,
376 de_dbg(c
, "short name: \"%s\"", ucstring_getpsz(shortname
));
379 if(!md
->has_ext_header
) {
380 goto done_with_header
;
383 // If has_ext_header, there are at least 3 more header fields:
384 // 2-byte length-of-variable-part
386 // 2-byte CRC of dir entry
388 lvar
= de_getu16le_p(&pos
);
389 de_dbg(c
, "length of variable part: %d", (int)lvar
);
391 md
->timzon
= de_getbyte_p(&pos
);
393 // Note: The timezone field is definitely a signed byte that is the
394 // number of 15-minute units from UTC, but it is unknown to me whether
395 // a positive number means west, or east. Under either interpretation,
396 // I have multiple sample files with highly implausible timezones. The
397 // interpretation used here is based on the preponderance of evidence.
398 if(md
->timzon
==127) {
399 de_strlcpy(descrbuf
, "unknown", sizeof(descrbuf
));
401 else if(md
->timzon
>127) {
402 de_snprintf(descrbuf
, sizeof(descrbuf
), "%.2f hours east of UTC",
403 ((double)md
->timzon
- 256.0)/-4.0);
406 de_snprintf(descrbuf
, sizeof(descrbuf
), "%.2f hours west of UTC",
407 ((double)md
->timzon
)/4.0);
409 de_dbg(c
, "time zone: %d (%s)", (int)md
->timzon
, descrbuf
);
410 finish_modtime_decoding(c
, d
, md
);
412 md
->crc_hdr_reported
= (u32
)de_getu16le_p(&pos
);
413 de_dbg(c
, "entry crc (reported): 0x%04x", (unsigned int)md
->crc_hdr_reported
);
414 calc_hdr_crc(c
, d
, md
, pos1
, lvar
);
415 de_dbg(c
, "entry crc (calculated): 0x%04x", (UI
)md
->crc_hdr_calculated
);
416 if(md
->crc_hdr_calculated
!= md
->crc_hdr_reported
) {
417 de_warn(c
, "Header CRC check failed");
420 // The "variable part" of the extended header begins here.
421 hdr_endpos
= pos
+ lvar
;
423 if(hdr_endpos
-pos
< 1) goto done_with_header
;
424 lnamu
= (i64
)de_getbyte_p(&pos
);
425 de_dbg2(c
, "long name len: %d", (int)lnamu
);
427 if(hdr_endpos
-pos
< 1) goto done_with_header
;
428 ldiru
= (i64
)de_getbyte_p(&pos
);
429 de_dbg2(c
, "dir name len: %d", (int)ldiru
);
431 if(hdr_endpos
-pos
< lnamu
) goto done_with_header
;
433 longname
= ucstring_create(c
);
434 dbuf_read_to_ucstring(c
->infile
, pos
, lnamu
, longname
,
435 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
436 de_dbg(c
, "long name: \"%s\"", ucstring_getpsz(longname
));
440 if(hdr_endpos
-pos
< ldiru
) goto done_with_header
;
442 dirname
= ucstring_create(c
);
443 dbuf_read_to_ucstring(c
->infile
, pos
, ldiru
, dirname
,
444 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
445 de_dbg(c
, "dir name: \"%s\"", ucstring_getpsz(dirname
));
449 if(hdr_endpos
-pos
< 2) goto done_with_header
;
450 md
->system
= (unsigned int)de_getu16le_p(&pos
);
451 de_dbg(c
, "system id: %u", md
->system
);
453 if(hdr_endpos
-pos
< 3) goto done_with_header
;
454 md
->attribs
= (u32
)dbuf_getint_ext(c
->infile
, pos
, 3, 1, 0);
456 de_dbg(c
, "attribs: 0x%06x", (UI
)md
->attribs
);
458 attribs_type
= (md
->attribs
>> 22);
459 de_dbg(c
, "attribs type: %u", attribs_type
);
460 if(attribs_type
== 1) {
461 de_dbg(c
, "perms: octal(%o)", (UI
)(md
->attribs
& 0x1ff));
462 if((md
->attribs
& 0111) != 0) {
463 md
->fi
->mode_flags
|= DE_MODEFLAG_EXE
;
466 md
->fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
469 de_dbg_indent(c
, -1);
471 if(hdr_endpos
-pos
< 1) goto done_with_header
;
472 md
->vflag
= de_getbyte_p(&pos
);
473 de_dbg(c
, "versioning settings (\"vflag\"): 0x%02x", (UI
)md
->vflag
);
475 if(hdr_endpos
-pos
< 2) goto done_with_header
;
476 md
->ver
= (unsigned int)de_getu16le_p(&pos
);
477 de_dbg(c
, "file version number: %u", md
->ver
);
480 // Note: Typically, there is a 5-byte "file leader" ("@)#(\0") here, between
481 // the member header and the member data, so pos is not
482 // expected to equal md->posdat.
484 // Figure out the best filename to use
485 if(ucstring_isnonempty(longname
) || ucstring_isnonempty(shortname
)) {
486 if(ucstring_isnonempty(dirname
)) {
487 ucstring_append_ucstring(md
->fullname
, dirname
);
488 ucstring_append_sz(md
->fullname
, "/", DE_ENCODING_LATIN1
);
490 if(ucstring_isnonempty(longname
)) {
491 ucstring_append_ucstring(md
->fullname
, longname
);
493 else if(ucstring_isnonempty(shortname
)) {
494 ucstring_append_ucstring(md
->fullname
, shortname
);
497 if(ucstring_isempty(md
->fullname
)) {
498 ucstring_append_sz(md
->fullname
, "_", DE_ENCODING_LATIN1
);
501 ucstring_printf(md
->fullname
, DE_ENCODING_LATIN1
, ".deleted%02d",
502 d
->num_deleted_files_found
);
505 de_finfo_set_name_from_ucstring(c
, md
->fi
, md
->fullname
, DE_SNFLAG_FULLPATH
);
506 md
->fi
->original_filename_flag
= 1;
512 ucstring_destroy(shortname
);
513 ucstring_destroy(longname
);
514 ucstring_destroy(dirname
);
518 static void decompress_lzd(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
519 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
521 struct de_lzw_params delzwp
;
523 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
524 delzwp
.fmt
= DE_LZWFMT_ZOOLZD
;
525 delzwp
.max_code_size
= 13;
526 fmtutil_decompress_lzw(c
, dcmpri
, dcmpro
, dres
, &delzwp
);
529 static void decompress_lzh(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
530 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
)
532 struct de_lh5x_params lzhparams
;
534 de_zeromem(&lzhparams
, sizeof(struct de_lh5x_params
));
535 lzhparams
.fmt
= DE_LH5X_FMT_LH5
;
536 lzhparams
.zero_codes_block_behavior
= DE_LH5X_ZCB_STOP
;
537 lzhparams
.warn_about_zero_codes_block
= 0;
539 // Zoo does not appear to allow LZ77 offsets that point to data before
540 // the beginning of the file, so it doesn't matter what we initialize the
541 // history buffer to.
542 lzhparams
.history_fill_val
= 0x00;
544 fmtutil_decompress_lh5x(c
, dcmpri
, dcmpro
, dres
, &lzhparams
);
547 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
549 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
551 de_crcobj_addbuf(crco
, buf
, buf_len
);
554 // Process a single member file (or "trailer" record).
555 // If there are more members after this, sets *next_member_hdr_pos to nonzero.
556 static void do_member(deark
*c
, lctx
*d
, i64 pos1
, i64
*next_member_hdr_pos
)
558 struct member_data
*md
= NULL
;
561 struct de_dfilter_in_params dcmpri
;
562 struct de_dfilter_out_params dcmpro
;
563 struct de_dfilter_results dres
;
564 int saved_indent_level
;
566 de_dbg_indent_save(c
, &saved_indent_level
);
567 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
568 on_offset_found(c
, d
, pos1
, 1);
570 md
= de_malloc(c
, sizeof(struct member_data
));
571 md
->fi
= de_finfo_create(c
);
572 md
->fullname
= ucstring_create(c
);
574 if (!do_member_header(c
, d
, md
, pos1
)) {
577 on_offset_found(c
, d
, md
->cmpr_pos
, md
->cmpr_len
);
579 *next_member_hdr_pos
= md
->next_member_hdr_pos
;
581 if ( ! md
->next_member_hdr_pos
) {
585 if(md
->is_deleted
&& !d
->undelete
) {
586 de_dbg(c
, "ignoring deleted entry");
590 if ( (md
->majver
>2) || (md
->majver
==2 && md
->minver
>1) ) {
591 de_err(c
, "%s: Unsupported format version: %d.%d",
592 get_member_name_for_msg(c
, d
, md
),
593 (int)md
->majver
, (int)md
->minver
);
597 if(md
->method
!=ZOOCMPR_STORED
&& md
->method
!=ZOOCMPR_LZD
&& md
->method
!=ZOOCMPR_LZH
) {
598 de_err(c
, "%s: Unsupported compression method: %d",
599 get_member_name_for_msg(c
, d
, md
), (int)md
->method
);
603 de_dbg(c
, "compressed data at %"I64_FMT
", len=%"I64_FMT
, md
->cmpr_pos
,
606 if(md
->cmpr_pos
+ md
->cmpr_len
> c
->infile
->len
) {
607 de_err(c
, "%s: Data goes beyond end of file", get_member_name_for_msg(c
, d
, md
));
611 // Ready to decompress. Set up the output file.
612 if(md
->fi
&& md
->fi
->original_filename_flag
) {
618 outf
= dbuf_create_output_file(c
, ext
, md
->fi
, 0);
619 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
620 de_crcobj_reset(d
->crco
);
622 dcmpri
.f
= c
->infile
;
623 dcmpri
.pos
= md
->cmpr_pos
;
624 dcmpri
.len
= md
->cmpr_len
;
627 dcmpro
.len_known
= 1;
628 dcmpro
.expected_len
= md
->uncmpr_len
;
633 fmtutil_decompress_uncompressed(c
, &dcmpri
, &dcmpro
, &dres
, 0);
636 decompress_lzd(c
, &dcmpri
, &dcmpro
, &dres
);
639 decompress_lzh(c
, &dcmpri
, &dcmpro
, &dres
);
642 goto done
; // Should be impossible
644 de_dbg_indent(c
, -1);
646 md
->crc_calculated
= de_crcobj_getval(d
->crco
);
648 de_dbg(c
, "file data crc (calculated): 0x%04x", (unsigned int)md
->crc_calculated
);
652 de_err(c
, "%s: %s", get_member_name_for_msg(c
, d
, md
),
653 de_dfilter_get_errmsg(c
, &dres
));
655 else if(outf
->len
!= md
->uncmpr_len
) {
656 de_err(c
, "%s: Expected %"I64_FMT
" uncompressed bytes, got %"I64_FMT
,
657 get_member_name_for_msg(c
, d
, md
), md
->uncmpr_len
, outf
->len
);
659 else if (md
->crc_calculated
!= md
->crc_reported
) {
660 de_err(c
, "%s: CRC check failed", get_member_name_for_msg(c
, d
, md
));
666 if(md
->is_deleted
) d
->num_deleted_files_found
++;
667 ucstring_destroy(md
->fullname
);
668 de_finfo_destroy(c
, md
->fi
);
671 de_dbg_indent_restore(c
, saved_indent_level
);
674 // The archive comment can be anywhere in the file, but Zoo normally
675 // puts it right after the archive header, at offset 42.
676 // I have a number of Zoo files in which a distributor has added their
677 // own comment at the end of the file, leaving the original comment
678 // intact but invisible.
679 static void check_for_orphaned_comment(deark
*c
, lctx
*d
)
684 if(d
->type
!= 1) return;
685 if(d
->archive_comment_pos
==0 || d
->archive_comment_len
==0) return;
687 if(d
->min_offset_found
<= ocpos
) return;
688 oclen
= d
->min_offset_found
- ocpos
;
689 if(oclen
<5 || oclen
>1000) return;
690 if(de_getbyte(ocpos
+oclen
-1) != 0x0a) return;
691 if(dbuf_search_byte(c
->infile
, 0x00, ocpos
, oclen
, &foundpos
)) return;
692 de_dbg(c
, "possible orphaned archive comment found at %"I64_FMT
", len=%"I64_FMT
,
694 do_comment(c
, d
, ocpos
, oclen
, "orphaned archive comment", 1, 0);
697 // The main function: process a Zoo file
698 static void de_run_zoo(deark
*c
, de_module_params
*mparams
)
702 int saved_indent_level
;
704 de_dbg_indent_save(c
, &saved_indent_level
);
705 d
= de_malloc(c
, sizeof(lctx
));
706 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
707 d
->undelete
= de_get_ext_option_bool(c
, "zoo:undelete", 0);
708 d
->extract_comments_to_files
= (c
->extract_level
>=2);
710 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
711 d
->min_offset_found
= c
->infile
->len
;
713 if(!do_global_header(c
, d
, pos
)) {
714 de_err(c
, "Bad global header");
718 /* loop over the members of the archive */
719 d
->offsets_seen
= de_inthashtable_create(c
); // For protection against infinite loops
720 pos
= d
->first_member_hdr_pos
;
722 i64 next_member_hdr_pos
;
724 de_dbg_indent_restore(c
, saved_indent_level
);
728 if(pos
>= c
->infile
->len
) {
729 de_err(c
, "Unexpected EOF, expected member header at %"I64_FMT
, pos
);
733 if(!de_inthashtable_add_item(c
, d
->offsets_seen
, pos
, NULL
)) {
734 de_err(c
, "Loop detected");
738 de_dbg(c
, "entry at %"I64_FMT
, pos
);
741 next_member_hdr_pos
= 0;
742 do_member(c
, d
, pos
, &next_member_hdr_pos
);
743 pos
= next_member_hdr_pos
;
747 check_for_orphaned_comment(c
, d
);
749 if(d
->num_deleted_files_found
>0 && !d
->undelete
) {
750 de_info(c
, "Note: %d deleted file(s) found. Use \"-opt zoo:undelete\" "
751 "to extract them.", d
->num_deleted_files_found
);
756 de_inthashtable_destroy(c
, d
->offsets_seen
);
757 de_crcobj_destroy(d
->crco
);
760 de_dbg_indent_restore(c
, saved_indent_level
);
763 static int de_identify_zoo(deark
*c
)
765 if(!dbuf_memcmp(c
->infile
, 20, "\xdc\xa7\xc4\xfd", 4))
770 static void de_help_zoo(deark
*c
)
772 de_msg(c
, "-opt zoo:undelete : Also extract deleted files");
775 void de_module_zoo(deark
*c
, struct deark_module_info
*mi
)
778 mi
->desc
= "Zoo compressed archive format";
779 mi
->run_fn
= de_run_zoo
;
780 mi
->identify_fn
= de_identify_zoo
;
781 mi
->help_fn
= de_help_zoo
;
784 /////////////////////
786 static void de_run_zoo_filter(deark
*c
, de_module_params
*mparams
)
789 struct de_crcobj
*crco
= NULL
;
793 struct de_dfilter_in_params dcmpri
;
794 struct de_dfilter_out_params dcmpro
;
795 struct de_dfilter_results dres
;
797 if(c
->infile
->len
<6) goto done
;
799 use_lzh
= de_get_ext_option_bool(c
, "zoo_filter:lzh", -1);
801 if(dbuf_is_all_zeroes(c
->infile
, c
->infile
->len
-4, 2)) {
809 de_declare_fmtf(c
, "Zoo filter, LZ%s", (use_lzh
?"H":"D"));
811 crc_reported
= (u32
)de_getu32le(c
->infile
->len
-2);
812 de_dbg(c
, "crc (reported): 0x%04x", (UI
)crc_reported
);
814 outf
= dbuf_create_output_file(c
, "bin", NULL
, 0);
815 crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
816 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)crco
);
818 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
819 dcmpri
.f
= c
->infile
;
821 dcmpri
.len
= c
->infile
->len
- 4;
824 dcmpro
.len_known
= 0;
827 decompress_lzh(c
, &dcmpri
, &dcmpro
, &dres
);
830 decompress_lzd(c
, &dcmpri
, &dcmpro
, &dres
);
834 de_err(c
, "%s", de_dfilter_get_errmsg(c
, &dres
));
838 crc_calculated
= de_crcobj_getval(crco
);
839 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)crc_calculated
);
840 if(crc_calculated
!= crc_reported
) {
841 de_err(c
, "CRC check failed");
847 de_crcobj_destroy(crco
);
850 static int de_identify_zoo_filter(deark
*c
)
854 if(c
->infile
->len
<6) return 0;
855 if(de_getu16le(0) != 0x5a32) return 0;
857 // LZH ends with 16 0 bits, followed by 0 to 7 bits of padding that we
858 // will hope are 0. So it must end with two 0x00 bytes.
859 // LZD ends with the EOF code: 257. By my calculation, one of the 1 bits
860 // from that code must occur in the second-to-last byte. And the last byte
861 // can have at most one '1' bit.
862 de_read(b
, c
->infile
->len
-4, 2);
864 if(b
[1]==0) return 45; // Possible LZH
867 if(b
[1]<=0x02 || b
[1]==0x04 || b
[1]==0x08 || b
[1]==0x10 ||
868 b
[1]==0x20 || b
[1]==0x40 || b
[1]==0x80)
870 return 45; // Possible LZD
876 void de_module_zoo_filter(deark
*c
, struct deark_module_info
*mi
)
878 mi
->id
= "zoo_filter";
879 mi
->desc
= "Zoo filter format";
880 mi
->run_fn
= de_run_zoo_filter
;
881 mi
->identify_fn
= de_identify_zoo_filter
;
884 /////////////////////
888 i64 outf_comment_pos
;
893 i64 outf_trailer_pos
;
898 // Convert Zoo Z format to Zoo format
899 // TODO?: Write to Zoo 2.x format instead of 1.20 format. But it's more trouble.
900 static void de_run_zoo_z(deark
*c
, de_module_params
*mparams
)
903 static const u8 archivehdr
[34] = {0x5a,0x4f,0x4f,0x20,0x31,0x2e,0x32,0x30,0x20,0x41,
904 0x72,0x63,0x68,0x69,0x76,0x65,0x2e,0x1a,0x00,0x00,0xdc,0xa7,0xc4,0xfd,0x22,0x00,
905 0x00,0x00,0xde,0xff,0xff,0xff,0x01,0x01};
906 struct zoo_z_ctx
*zctx
= NULL
;
909 de_declare_fmtf(c
, "Zoo Z, DOS-compatible");
911 zctx
= de_malloc(c
, sizeof(struct zoo_z_ctx
));
912 if(dbuf_memcmp(c
->infile
, 0, "\xfe\x07\x01", 3)) {
913 de_err(c
, "File not in Zoo Z format, or not a supported version");
918 zctx
->cmpr_len
= de_getu32le(14);
919 de_dbg(c
, "compressed size: %"I64_FMT
, zctx
->cmpr_len
);
920 zctx
->comment_len
= de_getu16le(20);
921 de_dbg(c
, "comment: size=%d", (int)zctx
->comment_len
);
923 // Figure out where everything will go.
924 zctx
->outf_member_pos
= 34;
925 zctx
->outf_leader_pos
= zctx
->outf_member_pos
+ 52;
926 zctx
->outf_cmpr_pos
= zctx
->outf_leader_pos
+ 5;
927 zctx
->outf_comment_pos
= zctx
->outf_cmpr_pos
+ zctx
->cmpr_len
;
928 zctx
->outf_trailer_pos
= zctx
->outf_comment_pos
+ zctx
->comment_len
;
929 zctx
->inf_comment_pos
= 36;
930 zctx
->inf_cmpr_pos
= zctx
->inf_comment_pos
+ zctx
->comment_len
;
932 if(zctx
->inf_comment_pos
+zctx
->comment_len
> c
->infile
->len
) goto done
;
933 if(zctx
->inf_cmpr_pos
+zctx
->cmpr_len
> c
->infile
->len
) goto done
;
935 outf
= dbuf_create_output_file(c
, "zoo", NULL
, 0);
938 dbuf_write(outf
, archivehdr
, 34);
940 // Main member header
941 dbuf_writeu32le(outf
, ZOO_SIGNATURE
);
942 dbuf_writebyte(outf
, 1); // "type"
943 dbuf_copy(c
->infile
, 3, 1, outf
); // packing method
945 dbuf_writeu32le(outf
, zctx
->outf_trailer_pos
);
946 dbuf_writeu32le(outf
, zctx
->outf_cmpr_pos
);
948 // date, time, crc, sizeorig, sizenow, maj ver, min ver
949 dbuf_copy(c
->infile
, 4, 16, outf
);
951 dbuf_writebyte(outf
, 0); // "deleted" flag
952 dbuf_writebyte(outf
, 0); // file structure / reserved
953 dbuf_writeu32le(outf
, zctx
->comment_len
?zctx
->outf_comment_pos
:0);
954 dbuf_writeu16le(outf
, zctx
->comment_len
);
955 dbuf_copy(c
->infile
, 22, 13, outf
); // filename
956 dbuf_writebyte(outf
, 0x4f); // ??? This seems to be what Zoo does
958 dbuf_write(outf
, (const u8
*)"@)#(\0", 5); // leader
959 dbuf_copy(c
->infile
, zctx
->inf_cmpr_pos
, zctx
->cmpr_len
, outf
); // cmpr data
961 if(zctx
->comment_len
) {
962 dbuf_copy(c
->infile
, zctx
->inf_comment_pos
, zctx
->comment_len
, outf
);
965 dbuf_writeu32le(outf
, ZOO_SIGNATURE
);
966 dbuf_write_zeroes(outf
, 48);
972 de_err(c
, "Conversion to Zoo format failed");
977 static int de_identify_zoo_z(deark
*c
)
979 if(dbuf_memcmp(c
->infile
, 0, "\xfe\x07\x01", 3)) return 0;
983 void de_module_zoo_z(deark
*c
, struct deark_module_info
*mi
)
986 mi
->desc
= "Zoo Z format";
987 mi
->run_fn
= de_run_zoo_z
;
988 mi
->identify_fn
= de_identify_zoo_z
;