New "videomaster" module
[deark.git] / modules / zoo.c
blob9d7b220de072f8d2586972c20b8d8c05cef7bfa0
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZOO compressed archive format
7 // The ZOO parser in this file was originally derived from unzoo.c v4.4
8 // by Martin Schoenert.
9 // The original file had this notice:
12 *A unzoo.c Tools Martin Schoenert
14 *H @(#)$Id: unzoo.c,v 4.4 2000/05/29 08:56:57 sal Exp $
16 *Y This file is in the Public Domain.
19 // To be clear, the code in this file (Deark's zoo.c file) is covered by
20 // Deark's standard terms of use.
22 #include <deark-config.h>
23 #include <deark-private.h>
24 #include <deark-fmtutil.h>
26 DE_DECLARE_MODULE(de_module_zoo);
27 DE_DECLARE_MODULE(de_module_zoo_filter);
28 DE_DECLARE_MODULE(de_module_zoo_z);
30 #define ZOO_SIGNATURE 0xfdc4a7dcU
32 #define ZOOCMPR_STORED 0
33 #define ZOOCMPR_LZD 1
34 #define ZOOCMPR_LZH 2
36 struct localctx_struct;
37 typedef struct localctx_struct lctx;
38 struct member_data;
40 // Data associated with one Zoo member file
41 struct member_data {
42 de_finfo *fi;
43 de_ucstring *fullname;
44 u8 type; /* type of current member (1) */
45 u8 method; /* packing method of member (0..2) */
46 u8 has_ext_header;
47 i64 next_member_hdr_pos;
48 i64 cmpr_pos;
49 i64 cmpr_len;
50 i64 uncmpr_len;
51 i64 comment_pos;
52 i64 comment_len; // 0 if no comment
53 unsigned int datdos; /* date (in DOS format) */
54 unsigned int timdos; /* time (in DOS format) */
55 u32 crc_reported;
56 u32 crc_calculated;
57 u32 crc_hdr_reported;
58 u32 crc_hdr_calculated;
59 u8 majver; /* major version needed to extract */
60 u8 minver; /* minor version needed to extract */
61 u8 is_deleted; /* 1 if member is deleted, 0 else */
62 u8 timzon; /* time zone */
63 unsigned int system; /* system identifier */
64 u32 attribs; /* file permissions */
65 u8 vflag; /* gens. on, last gen., gen. limit */
66 unsigned int ver; /* version number of member */
69 struct localctx_struct {
70 int input_encoding;
71 int extract_comments_to_files;
72 int undelete;
73 struct de_inthashtable *offsets_seen;
75 i64 first_member_hdr_pos;
76 u8 majver;
77 u8 minver;
78 u8 type; // archive header version
79 i64 archive_comment_pos;
80 i64 archive_comment_len; // 0 if no comment
81 u8 vdata; /* gens. on, gen. limit */
83 int num_deleted_files_found;
84 i64 min_offset_found;
86 // Shared by all member files, so we don't have to recalculate the CRC table
87 // for each member file.
88 struct de_crcobj *crco;
91 // An offset is considered meaningful if len!=0.
92 static void on_offset_found(deark *c, lctx *d, i64 pos, i64 len)
94 if(len==0 || pos<0) return;
95 if(pos<d->min_offset_found) {
96 d->min_offset_found = pos;
100 static const char *get_member_name_for_msg(deark *c, lctx *d, struct member_data *md)
102 if(md && ucstring_isnonempty(md->fullname)) {
103 return ucstring_getpsz_d(md->fullname);
105 return "(?)";
108 static void do_extract_comment(deark *c, lctx *d, i64 pos, i64 len, int is_main)
110 dbuf_create_file_from_slice(c->infile, pos, len, "comment.txt",
111 NULL, DE_CREATEFLAG_IS_AUX);
114 static void do_dbg_comment(deark *c, lctx *d, i64 pos, i64 len, const char *name,
115 int is_main)
117 de_ucstring *s = NULL;
119 if(c->debug_level<1) return;
120 s = ucstring_create(c);
121 dbuf_read_to_ucstring_n(c->infile, pos, len, DE_DBG_MAX_STRLEN, s,
122 0, d->input_encoding);
123 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(s));
124 ucstring_destroy(s);
127 static void do_comment(deark *c, lctx *d, i64 pos, i64 len, const char *name,
128 int is_main, int extract_to_file)
130 on_offset_found(c, d, pos, len);
131 if(len<1) return;
132 if(pos<0 || pos+len>c->infile->len) return;
133 if(extract_to_file) {
134 do_extract_comment(c, d, pos, len, is_main);
136 else {
137 do_dbg_comment(c, d, pos, len, name, is_main);
141 // Read the main file header
142 static int do_global_header(deark *c, lctx *d, i64 pos1)
144 i64 pos = pos1;
145 int retval = 0;
146 unsigned int sig;
147 u32 zoo_minus, zoo_minus_expected;
148 i64 i;
149 de_ucstring *txt = NULL;
151 de_dbg(c, "archive header at %"I64_FMT, pos1);
152 de_dbg_indent(c, 1);
154 // Intro text, e.g. "ZOO 2.10 Archive."
155 // Zoo source code (zoo.h) says "The contents of the text message are [...]
156 // not used by Zoo and they may be anything.".
157 txt = ucstring_create(c);
158 for(i=0; i<20; i++) {
159 u8 ch;
161 ch = de_getbyte(pos+i);
162 if(ch==26 || ch==0) break;
163 if(ch<32 || ch>126) ch = '_';
164 ucstring_append_char(txt, (de_rune)ch);
166 de_dbg(c, "header text: \"%s\"", ucstring_getpsz_d(txt));
167 pos += 20;
169 sig = (unsigned int)de_getu32le_p(&pos);
170 if (sig != ZOO_SIGNATURE) goto done;
172 d->first_member_hdr_pos = de_getu32le_p(&pos);
173 de_dbg(c, "first entry pos: %"I64_FMT, d->first_member_hdr_pos);
175 zoo_minus = (u32)de_getu32le_p(&pos);
176 de_dbg(c, "consistency check: 0x%08x", (UI)zoo_minus);
177 zoo_minus_expected = (u32)((~(u32)d->first_member_hdr_pos)+(u32)1);
178 if(zoo_minus!=zoo_minus_expected) {
179 de_warn(c, "Archive header failed consistency check (is 0x%08x, expected 0x%08x)",
180 (UI)zoo_minus, (UI)zoo_minus_expected);
183 // Note: The version number fields are sometimes erroneously documented as
184 // "version made by" and "version needed to extract [all files]".
185 d->majver = de_getbyte_p(&pos);
186 d->minver = de_getbyte_p(&pos);
187 de_dbg(c, "version needed to manipulate archive: %d.%d", (int)d->majver, (int)d->minver);
189 // Fields that aren't present in old versions.
190 if(d->first_member_hdr_pos > 34) {
191 d->type = de_getbyte_p(&pos);
192 de_dbg(c, "archive header format version (\"type\"): %u", (unsigned int)d->type);
193 // 1 is the only value here with a known meaning, but we'll accept some slightly
194 // higher values, and assume they are backward-compatible.
195 if(d->type<1 || d->type>5) {
196 d->type = 0;
197 goto after_ext_hdr;
200 d->archive_comment_pos = de_getu32le_p(&pos);
201 d->archive_comment_len = de_getu16le_p(&pos);
202 de_dbg(c, "archive comment pos: %"I64_FMT", len=%d", d->archive_comment_pos,
203 (int)d->archive_comment_len);
204 do_comment(c, d, d->archive_comment_pos, d->archive_comment_len, "archive comment",
205 1, d->extract_comments_to_files);
207 d->vdata = de_getbyte_p(&pos);
208 de_dbg(c, "archive-level versioning settings (\"vdata\"): 0x%02x", (UI)d->vdata);
210 after_ext_hdr:
212 retval = 1;
214 done:
215 ucstring_destroy(txt);
216 de_dbg_indent(c, -1);
217 return retval;
220 static const char *get_cmpr_meth_name(u8 t)
222 const char *name = NULL;
223 switch(t) {
224 case 0: name="stored"; break;
225 case 1: name="lzd"; break;
226 case 2: name="lzh"; break;
228 return name?name:"?";
231 // To be called after all mod_time-related fields have been read.
232 // Finish reporting the mod_time, and set md->fi->mod_time.
233 static void finish_modtime_decoding(deark *c, lctx *d, struct member_data *md)
235 i64 timestamp_offset;
236 char timestamp_buf[64];
238 timestamp_offset = 0;
239 if ( md->timzon < 127 ) timestamp_offset = 15*60*((i64)md->timzon );
240 else if ( 127 < md->timzon ) timestamp_offset = 15*60*((i64)md->timzon - 256);
242 de_dos_datetime_to_timestamp(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], (i64)md->datdos, (i64)md->timdos);
243 de_timestamp_to_string(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_buf, sizeof(timestamp_buf), 0);
244 de_dbg(c, "mod time: %s", timestamp_buf);
245 if(md->timzon == 127) {
246 md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY].tzcode = DE_TZCODE_LOCAL;
248 else {
249 de_timestamp_cvt_to_utc(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_offset);
250 de_timestamp_to_string(&md->fi->timestamp[DE_TIMESTAMPIDX_MODIFY], timestamp_buf, sizeof(timestamp_buf), 0);
251 de_dbg(c, "mod time (UTC): %s", timestamp_buf);
255 static void calc_hdr_crc(deark *c, lctx *d, struct member_data *md, i64 pos1, i64 lvar)
257 de_crcobj_reset(d->crco);
258 de_crcobj_addslice(d->crco, c->infile, pos1, 54);
259 de_crcobj_addzeroes(d->crco, 2);
260 de_crcobj_addslice(d->crco, c->infile, pos1+56, lvar);
261 md->crc_hdr_calculated = de_crcobj_getval(d->crco);
264 // Decode the trailer member. Only a few fields are potentially interesting; the
265 // rest are usually zeroed out.
266 // This code is duplicated in do_member_header(), but it's too much trouble to
267 // share it.
268 static void do_member_eof(deark *c, lctx *d, struct member_data *md, i64 pos1)
270 i64 lvar;
272 if(!md->has_ext_header) goto done;
273 lvar = de_getu16le(pos1+51);
274 de_dbg(c, "length of variable part: %d", (int)lvar);
276 md->crc_hdr_reported = (u32)de_getu16le(pos1+54);
277 de_dbg(c, "entry crc (reported): 0x%04x", (unsigned int)md->crc_hdr_reported);
278 calc_hdr_crc(c, d, md, pos1, lvar);
279 de_dbg(c, "entry crc (calculated): 0x%04x", (UI)md->crc_hdr_calculated);
280 if(md->crc_hdr_calculated != md->crc_hdr_reported) {
281 de_warn(c, "Header CRC check failed");
284 done:
288 static int do_member_header(deark *c, lctx *d, struct member_data *md, i64 pos1)
290 de_ucstring *shortname = NULL;
291 de_ucstring *longname = NULL;
292 de_ucstring *dirname = NULL;
293 int retval = 0;
294 i64 pos = pos1;
295 i64 hdr_endpos;
296 i64 lvar; /* length of variable part */
297 i64 lnamu; /* length of long name */
298 i64 ldiru; /* length of directory */
299 unsigned int sig;
300 UI attribs_type;
301 char descrbuf[80];
303 sig = (unsigned int)de_getu32le_p(&pos);
304 if(sig != ZOO_SIGNATURE) {
305 de_err(c, "Malformed Zoo file, bad magic number at %"I64_FMT, pos1);
306 goto done;
309 /* read the fixed part of the directory entry */
310 md->type = de_getbyte_p(&pos);
311 md->has_ext_header = (u8)(md->type == 2);
312 md->method = de_getbyte_p(&pos);
313 md->next_member_hdr_pos = de_getu32le_p(&pos);
315 de_dbg(c, "member header format version (\"type\"): %d", (int)md->type);
316 if(md->next_member_hdr_pos) {
317 de_dbg(c, "compression method: %d (%s)", (int)md->method, get_cmpr_meth_name(md->method));
320 de_snprintf(descrbuf, sizeof(descrbuf), (md->next_member_hdr_pos?"":
321 " (none - This is the trailer record)"));
322 de_dbg(c, "next entry pos: %"I64_FMT"%s", md->next_member_hdr_pos, descrbuf);
324 if(md->next_member_hdr_pos==0) {
325 do_member_eof(c, d, md, pos1);
326 retval = 1;
327 goto done;
330 md->cmpr_pos = de_getu32le_p(&pos);
331 de_dbg(c, "pos of file data: %"I64_FMT, md->cmpr_pos);
333 md->datdos = (unsigned int)de_getu16le_p(&pos);
334 md->timdos = (unsigned int)de_getu16le_p(&pos);
335 de_dbg2(c, "dos date,time: %u,%u", md->datdos, md->timdos);
336 if(!md->has_ext_header) {
337 md->timzon = 127;
338 finish_modtime_decoding(c, d, md);
341 md->crc_reported = (u32)de_getu16le_p(&pos);
342 de_dbg(c, "file data crc (reported): 0x%04x", (unsigned int)md->crc_reported);
343 md->uncmpr_len = de_getu32le_p(&pos);
344 de_dbg(c, "original size: %"I64_FMT, md->uncmpr_len);
345 md->cmpr_len = de_getu32le_p(&pos);
346 de_dbg(c, "compressed size: %"I64_FMT, md->cmpr_len);
348 // Note: The version number fields are sometimes erroneously documented as
349 // "version made by" and "version needed". But (according to Zoo 2.10),
350 // there is no "version made by" field.
351 md->majver = de_getbyte_p(&pos);
352 md->minver = de_getbyte_p(&pos);
353 de_dbg(c, "version needed to extract: %d.%d", (int)md->majver, (int)md->minver);
355 md->is_deleted = de_getbyte_p(&pos);
356 de_dbg(c, "is deleted: %d", (int)md->is_deleted);
357 pos++; // "file structure" (?)
358 md->comment_pos = de_getu32le_p(&pos);
359 md->comment_len = de_getu16le_p(&pos);
360 de_dbg(c, "comment pos: %"I64_FMT", len=%d", md->comment_pos, (int)md->comment_len);
361 do_comment(c, d, md->comment_pos, md->comment_len, "comment", 0,
362 (d->extract_comments_to_files) && (!md->is_deleted || d->undelete));
364 // In "type 2" header format, the shortname field is a fixed 13 bytes, and is
365 // followed by other fields.
366 // In "type 1" header format, the shortname field is (allegedly) the last field
367 // in the header, and it's supposed to be NUL-terminated, so it's hard to be
368 // *sure* what size it is.
369 // Zoo 1.21 seems to leave room for 14 bytes, instead of the 13 that would be
370 // expected. And it seemingly allows up to 14-byte filenames with no NUL -- but
371 // this could well be a bug. Or perhaps the 13-byte filename field is followed
372 // by a 1-byte field of unknown purpose.
373 shortname = ucstring_create(c);
374 dbuf_read_to_ucstring(c->infile, pos, 13, shortname, DE_CONVFLAG_STOP_AT_NUL,
375 d->input_encoding);
376 de_dbg(c, "short name: \"%s\"", ucstring_getpsz(shortname));
377 pos += 13;
379 if(!md->has_ext_header) {
380 goto done_with_header;
383 // If has_ext_header, there are at least 3 more header fields:
384 // 2-byte length-of-variable-part
385 // 1-byte timezone
386 // 2-byte CRC of dir entry
388 lvar = de_getu16le_p(&pos);
389 de_dbg(c, "length of variable part: %d", (int)lvar);
391 md->timzon = de_getbyte_p(&pos);
393 // Note: The timezone field is definitely a signed byte that is the
394 // number of 15-minute units from UTC, but it is unknown to me whether
395 // a positive number means west, or east. Under either interpretation,
396 // I have multiple sample files with highly implausible timezones. The
397 // interpretation used here is based on the preponderance of evidence.
398 if(md->timzon==127) {
399 de_strlcpy(descrbuf, "unknown", sizeof(descrbuf));
401 else if(md->timzon>127) {
402 de_snprintf(descrbuf, sizeof(descrbuf), "%.2f hours east of UTC",
403 ((double)md->timzon - 256.0)/-4.0);
405 else {
406 de_snprintf(descrbuf, sizeof(descrbuf), "%.2f hours west of UTC",
407 ((double)md->timzon)/4.0);
409 de_dbg(c, "time zone: %d (%s)", (int)md->timzon, descrbuf);
410 finish_modtime_decoding(c, d, md);
412 md->crc_hdr_reported = (u32)de_getu16le_p(&pos);
413 de_dbg(c, "entry crc (reported): 0x%04x", (unsigned int)md->crc_hdr_reported);
414 calc_hdr_crc(c, d, md, pos1, lvar);
415 de_dbg(c, "entry crc (calculated): 0x%04x", (UI)md->crc_hdr_calculated);
416 if(md->crc_hdr_calculated != md->crc_hdr_reported) {
417 de_warn(c, "Header CRC check failed");
420 // The "variable part" of the extended header begins here.
421 hdr_endpos = pos + lvar;
423 if(hdr_endpos-pos < 1) goto done_with_header;
424 lnamu = (i64)de_getbyte_p(&pos);
425 de_dbg2(c, "long name len: %d", (int)lnamu);
427 if(hdr_endpos-pos < 1) goto done_with_header;
428 ldiru = (i64)de_getbyte_p(&pos);
429 de_dbg2(c, "dir name len: %d", (int)ldiru);
431 if(hdr_endpos-pos < lnamu) goto done_with_header;
432 if(lnamu>0) {
433 longname = ucstring_create(c);
434 dbuf_read_to_ucstring(c->infile, pos, lnamu, longname,
435 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
436 de_dbg(c, "long name: \"%s\"", ucstring_getpsz(longname));
438 pos += lnamu;
440 if(hdr_endpos-pos < ldiru) goto done_with_header;
441 if(ldiru>0) {
442 dirname = ucstring_create(c);
443 dbuf_read_to_ucstring(c->infile, pos, ldiru, dirname,
444 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
445 de_dbg(c, "dir name: \"%s\"", ucstring_getpsz(dirname));
447 pos += ldiru;
449 if(hdr_endpos-pos < 2) goto done_with_header;
450 md->system = (unsigned int)de_getu16le_p(&pos);
451 de_dbg(c, "system id: %u", md->system);
453 if(hdr_endpos-pos < 3) goto done_with_header;
454 md->attribs = (u32)dbuf_getint_ext(c->infile, pos, 3, 1, 0);
455 pos += 3;
456 de_dbg(c, "attribs: 0x%06x", (UI)md->attribs);
457 de_dbg_indent(c, 1);
458 attribs_type = (md->attribs >> 22);
459 de_dbg(c, "attribs type: %u", attribs_type);
460 if(attribs_type == 1) {
461 de_dbg(c, "perms: octal(%o)", (UI)(md->attribs & 0x1ff));
462 if((md->attribs & 0111) != 0) {
463 md->fi->mode_flags |= DE_MODEFLAG_EXE;
465 else {
466 md->fi->mode_flags |= DE_MODEFLAG_NONEXE;
469 de_dbg_indent(c, -1);
471 if(hdr_endpos-pos < 1) goto done_with_header;
472 md->vflag = de_getbyte_p(&pos);
473 de_dbg(c, "versioning settings (\"vflag\"): 0x%02x", (UI)md->vflag);
475 if(hdr_endpos-pos < 2) goto done_with_header;
476 md->ver = (unsigned int)de_getu16le_p(&pos);
477 de_dbg(c, "file version number: %u", md->ver);
479 done_with_header:
480 // Note: Typically, there is a 5-byte "file leader" ("@)#(\0") here, between
481 // the member header and the member data, so pos is not
482 // expected to equal md->posdat.
484 // Figure out the best filename to use
485 if(ucstring_isnonempty(longname) || ucstring_isnonempty(shortname)) {
486 if(ucstring_isnonempty(dirname)) {
487 ucstring_append_ucstring(md->fullname, dirname);
488 ucstring_append_sz(md->fullname, "/", DE_ENCODING_LATIN1);
490 if(ucstring_isnonempty(longname)) {
491 ucstring_append_ucstring(md->fullname, longname);
493 else if(ucstring_isnonempty(shortname)) {
494 ucstring_append_ucstring(md->fullname, shortname);
497 if(ucstring_isempty(md->fullname)) {
498 ucstring_append_sz(md->fullname, "_", DE_ENCODING_LATIN1);
500 if(md->is_deleted) {
501 ucstring_printf(md->fullname, DE_ENCODING_LATIN1, ".deleted%02d",
502 d->num_deleted_files_found);
505 de_finfo_set_name_from_ucstring(c, md->fi, md->fullname, DE_SNFLAG_FULLPATH);
506 md->fi->original_filename_flag = 1;
509 retval = 1;
511 done:
512 ucstring_destroy(shortname);
513 ucstring_destroy(longname);
514 ucstring_destroy(dirname);
515 return retval;
518 static void decompress_lzd(deark *c, struct de_dfilter_in_params *dcmpri,
519 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
521 struct de_lzw_params delzwp;
523 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
524 delzwp.fmt = DE_LZWFMT_ZOOLZD;
525 delzwp.max_code_size = 13;
526 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
529 static void decompress_lzh(deark *c, struct de_dfilter_in_params *dcmpri,
530 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
532 struct de_lh5x_params lzhparams;
534 de_zeromem(&lzhparams, sizeof(struct de_lh5x_params));
535 lzhparams.fmt = DE_LH5X_FMT_LH5;
536 lzhparams.zero_codes_block_behavior = DE_LH5X_ZCB_STOP;
537 lzhparams.warn_about_zero_codes_block = 0;
539 // Zoo does not appear to allow LZ77 offsets that point to data before
540 // the beginning of the file, so it doesn't matter what we initialize the
541 // history buffer to.
542 lzhparams.history_fill_val = 0x00;
544 fmtutil_decompress_lh5x(c, dcmpri, dcmpro, dres, &lzhparams);
547 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
549 struct de_crcobj *crco = (struct de_crcobj *)userdata;
551 de_crcobj_addbuf(crco, buf, buf_len);
554 // Process a single member file (or "trailer" record).
555 // If there are more members after this, sets *next_member_hdr_pos to nonzero.
556 static void do_member(deark *c, lctx *d, i64 pos1, i64 *next_member_hdr_pos)
558 struct member_data *md = NULL;
559 dbuf *outf = NULL;
560 const char *ext;
561 struct de_dfilter_in_params dcmpri;
562 struct de_dfilter_out_params dcmpro;
563 struct de_dfilter_results dres;
564 int saved_indent_level;
566 de_dbg_indent_save(c, &saved_indent_level);
567 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
568 on_offset_found(c, d, pos1, 1);
570 md = de_malloc(c, sizeof(struct member_data));
571 md->fi = de_finfo_create(c);
572 md->fullname = ucstring_create(c);
574 if (!do_member_header(c, d, md, pos1)) {
575 goto done;
577 on_offset_found(c, d, md->cmpr_pos, md->cmpr_len);
579 *next_member_hdr_pos = md->next_member_hdr_pos;
581 if ( ! md->next_member_hdr_pos ) {
582 goto done;
585 if(md->is_deleted && !d->undelete) {
586 de_dbg(c, "ignoring deleted entry");
587 goto done;
590 if ( (md->majver>2) || (md->majver==2 && md->minver>1) ) {
591 de_err(c, "%s: Unsupported format version: %d.%d",
592 get_member_name_for_msg(c, d, md),
593 (int)md->majver, (int)md->minver);
594 goto done;
597 if(md->method!=ZOOCMPR_STORED && md->method!=ZOOCMPR_LZD && md->method!=ZOOCMPR_LZH) {
598 de_err(c, "%s: Unsupported compression method: %d",
599 get_member_name_for_msg(c, d, md), (int)md->method);
600 goto done;
603 de_dbg(c, "compressed data at %"I64_FMT", len=%"I64_FMT, md->cmpr_pos,
604 md->cmpr_len);
606 if(md->cmpr_pos + md->cmpr_len > c->infile->len) {
607 de_err(c, "%s: Data goes beyond end of file", get_member_name_for_msg(c, d, md));
608 goto done;
611 // Ready to decompress. Set up the output file.
612 if(md->fi && md->fi->original_filename_flag) {
613 ext = NULL;
615 else {
616 ext = "bin";
618 outf = dbuf_create_output_file(c, ext, md->fi, 0);
619 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
620 de_crcobj_reset(d->crco);
622 dcmpri.f = c->infile;
623 dcmpri.pos = md->cmpr_pos;
624 dcmpri.len = md->cmpr_len;
626 dcmpro.f = outf;
627 dcmpro.len_known = 1;
628 dcmpro.expected_len = md->uncmpr_len;
630 de_dbg_indent(c, 1);
631 switch(md->method) {
632 case ZOOCMPR_STORED:
633 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
634 break;
635 case ZOOCMPR_LZD:
636 decompress_lzd(c, &dcmpri, &dcmpro, &dres);
637 break;
638 case ZOOCMPR_LZH:
639 decompress_lzh(c, &dcmpri, &dcmpro, &dres);
640 break;
641 default:
642 goto done; // Should be impossible
644 de_dbg_indent(c, -1);
646 md->crc_calculated = de_crcobj_getval(d->crco);
647 if(!dres.errcode) {
648 de_dbg(c, "file data crc (calculated): 0x%04x", (unsigned int)md->crc_calculated);
651 if(dres.errcode) {
652 de_err(c, "%s: %s", get_member_name_for_msg(c, d, md),
653 de_dfilter_get_errmsg(c, &dres));
655 else if(outf->len != md->uncmpr_len) {
656 de_err(c, "%s: Expected %"I64_FMT" uncompressed bytes, got %"I64_FMT,
657 get_member_name_for_msg(c, d, md), md->uncmpr_len, outf->len);
659 else if (md->crc_calculated != md->crc_reported) {
660 de_err(c, "%s: CRC check failed", get_member_name_for_msg(c, d, md));
663 done:
664 dbuf_close(outf);
665 if(md) {
666 if(md->is_deleted) d->num_deleted_files_found++;
667 ucstring_destroy(md->fullname);
668 de_finfo_destroy(c, md->fi);
669 de_free(c, md);
671 de_dbg_indent_restore(c, saved_indent_level);
674 // The archive comment can be anywhere in the file, but Zoo normally
675 // puts it right after the archive header, at offset 42.
676 // I have a number of Zoo files in which a distributor has added their
677 // own comment at the end of the file, leaving the original comment
678 // intact but invisible.
679 static void check_for_orphaned_comment(deark *c, lctx *d)
681 i64 ocpos, oclen;
682 i64 foundpos = 0;
684 if(d->type != 1) return;
685 if(d->archive_comment_pos==0 || d->archive_comment_len==0) return;
686 ocpos = 42;
687 if(d->min_offset_found <= ocpos) return;
688 oclen = d->min_offset_found - ocpos;
689 if(oclen<5 || oclen>1000) return;
690 if(de_getbyte(ocpos+oclen-1) != 0x0a) return;
691 if(dbuf_search_byte(c->infile, 0x00, ocpos, oclen, &foundpos)) return;
692 de_dbg(c, "possible orphaned archive comment found at %"I64_FMT", len=%"I64_FMT,
693 ocpos, oclen);
694 do_comment(c, d, ocpos, oclen, "orphaned archive comment", 1, 0);
697 // The main function: process a Zoo file
698 static void de_run_zoo(deark *c, de_module_params *mparams)
700 lctx *d = NULL;
701 i64 pos = 0;
702 int saved_indent_level;
704 de_dbg_indent_save(c, &saved_indent_level);
705 d = de_malloc(c, sizeof(lctx));
706 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
707 d->undelete = de_get_ext_option_bool(c, "zoo:undelete", 0);
708 d->extract_comments_to_files = (c->extract_level>=2);
710 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
711 d->min_offset_found = c->infile->len;
713 if(!do_global_header(c, d, pos)) {
714 de_err(c, "Bad global header");
715 goto done;
718 /* loop over the members of the archive */
719 d->offsets_seen = de_inthashtable_create(c); // For protection against infinite loops
720 pos = d->first_member_hdr_pos;
721 while ( 1 ) {
722 i64 next_member_hdr_pos;
724 de_dbg_indent_restore(c, saved_indent_level);
726 if(pos==0) break;
728 if(pos >= c->infile->len) {
729 de_err(c, "Unexpected EOF, expected member header at %"I64_FMT, pos);
730 goto after_members;
733 if(!de_inthashtable_add_item(c, d->offsets_seen, pos, NULL)) {
734 de_err(c, "Loop detected");
735 goto after_members;
738 de_dbg(c, "entry at %"I64_FMT, pos);
739 de_dbg_indent(c, 1);
741 next_member_hdr_pos = 0;
742 do_member(c, d, pos, &next_member_hdr_pos);
743 pos = next_member_hdr_pos;
746 after_members:
747 check_for_orphaned_comment(c, d);
749 if(d->num_deleted_files_found>0 && !d->undelete) {
750 de_info(c, "Note: %d deleted file(s) found. Use \"-opt zoo:undelete\" "
751 "to extract them.", d->num_deleted_files_found);
754 done:
755 if(d) {
756 de_inthashtable_destroy(c, d->offsets_seen);
757 de_crcobj_destroy(d->crco);
758 de_free(c, d);
760 de_dbg_indent_restore(c, saved_indent_level);
763 static int de_identify_zoo(deark *c)
765 if(!dbuf_memcmp(c->infile, 20, "\xdc\xa7\xc4\xfd", 4))
766 return 100;
767 return 0;
770 static void de_help_zoo(deark *c)
772 de_msg(c, "-opt zoo:undelete : Also extract deleted files");
775 void de_module_zoo(deark *c, struct deark_module_info *mi)
777 mi->id = "zoo";
778 mi->desc = "Zoo compressed archive format";
779 mi->run_fn = de_run_zoo;
780 mi->identify_fn = de_identify_zoo;
781 mi->help_fn = de_help_zoo;
784 /////////////////////
786 static void de_run_zoo_filter(deark *c, de_module_params *mparams)
788 dbuf *outf = NULL;
789 struct de_crcobj *crco = NULL;
790 int use_lzh = 0;
791 u32 crc_reported;
792 u32 crc_calculated;
793 struct de_dfilter_in_params dcmpri;
794 struct de_dfilter_out_params dcmpro;
795 struct de_dfilter_results dres;
797 if(c->infile->len<6) goto done;
799 use_lzh = de_get_ext_option_bool(c, "zoo_filter:lzh", -1);
800 if(use_lzh<0) {
801 if(dbuf_is_all_zeroes(c->infile, c->infile->len-4, 2)) {
802 use_lzh = 1;
804 else {
805 use_lzh = 0;
809 de_declare_fmtf(c, "Zoo filter, LZ%s", (use_lzh?"H":"D"));
811 crc_reported = (u32)de_getu32le(c->infile->len-2);
812 de_dbg(c, "crc (reported): 0x%04x", (UI)crc_reported);
814 outf = dbuf_create_output_file(c, "bin", NULL, 0);
815 crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
816 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)crco);
818 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
819 dcmpri.f = c->infile;
820 dcmpri.pos = 2;
821 dcmpri.len = c->infile->len - 4;
823 dcmpro.f = outf;
824 dcmpro.len_known = 0;
826 if(use_lzh) {
827 decompress_lzh(c, &dcmpri, &dcmpro, &dres);
829 else {
830 decompress_lzd(c, &dcmpri, &dcmpro, &dres);
833 if(dres.errcode) {
834 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
835 goto done;
838 crc_calculated = de_crcobj_getval(crco);
839 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calculated);
840 if(crc_calculated != crc_reported) {
841 de_err(c, "CRC check failed");
842 goto done;
845 done:
846 dbuf_close(outf);
847 de_crcobj_destroy(crco);
850 static int de_identify_zoo_filter(deark *c)
852 u8 b[2];
854 if(c->infile->len<6) return 0;
855 if(de_getu16le(0) != 0x5a32) return 0;
857 // LZH ends with 16 0 bits, followed by 0 to 7 bits of padding that we
858 // will hope are 0. So it must end with two 0x00 bytes.
859 // LZD ends with the EOF code: 257. By my calculation, one of the 1 bits
860 // from that code must occur in the second-to-last byte. And the last byte
861 // can have at most one '1' bit.
862 de_read(b, c->infile->len-4, 2);
863 if(b[0]==0) {
864 if(b[1]==0) return 45; // Possible LZH
866 else {
867 if(b[1]<=0x02 || b[1]==0x04 || b[1]==0x08 || b[1]==0x10 ||
868 b[1]==0x20 || b[1]==0x40 || b[1]==0x80)
870 return 45; // Possible LZD
873 return 0;
876 void de_module_zoo_filter(deark *c, struct deark_module_info *mi)
878 mi->id = "zoo_filter";
879 mi->desc = "Zoo filter format";
880 mi->run_fn = de_run_zoo_filter;
881 mi->identify_fn = de_identify_zoo_filter;
884 /////////////////////
886 struct zoo_z_ctx {
887 i64 outf_member_pos;
888 i64 outf_comment_pos;
889 i64 comment_len;
890 i64 outf_leader_pos;
891 i64 outf_cmpr_pos;
892 i64 cmpr_len;
893 i64 outf_trailer_pos;
894 i64 inf_comment_pos;
895 i64 inf_cmpr_pos;
898 // Convert Zoo Z format to Zoo format
899 // TODO?: Write to Zoo 2.x format instead of 1.20 format. But it's more trouble.
900 static void de_run_zoo_z(deark *c, de_module_params *mparams)
902 dbuf *outf = NULL;
903 static const u8 archivehdr[34] = {0x5a,0x4f,0x4f,0x20,0x31,0x2e,0x32,0x30,0x20,0x41,
904 0x72,0x63,0x68,0x69,0x76,0x65,0x2e,0x1a,0x00,0x00,0xdc,0xa7,0xc4,0xfd,0x22,0x00,
905 0x00,0x00,0xde,0xff,0xff,0xff,0x01,0x01};
906 struct zoo_z_ctx *zctx = NULL;
907 int need_errmsg = 0;
909 de_declare_fmtf(c, "Zoo Z, DOS-compatible");
911 zctx = de_malloc(c, sizeof(struct zoo_z_ctx));
912 if(dbuf_memcmp(c->infile, 0, "\xfe\x07\x01", 3)) {
913 de_err(c, "File not in Zoo Z format, or not a supported version");
914 goto done;
917 need_errmsg = 1;
918 zctx->cmpr_len = de_getu32le(14);
919 de_dbg(c, "compressed size: %"I64_FMT, zctx->cmpr_len);
920 zctx->comment_len = de_getu16le(20);
921 de_dbg(c, "comment: size=%d", (int)zctx->comment_len);
923 // Figure out where everything will go.
924 zctx->outf_member_pos = 34;
925 zctx->outf_leader_pos = zctx->outf_member_pos + 52;
926 zctx->outf_cmpr_pos = zctx->outf_leader_pos + 5;
927 zctx->outf_comment_pos = zctx->outf_cmpr_pos + zctx->cmpr_len;
928 zctx->outf_trailer_pos = zctx->outf_comment_pos + zctx->comment_len;
929 zctx->inf_comment_pos = 36;
930 zctx->inf_cmpr_pos = zctx->inf_comment_pos + zctx->comment_len;
932 if(zctx->inf_comment_pos+zctx->comment_len > c->infile->len) goto done;
933 if(zctx->inf_cmpr_pos+zctx->cmpr_len > c->infile->len) goto done;
935 outf = dbuf_create_output_file(c, "zoo", NULL, 0);
937 // Archive header
938 dbuf_write(outf, archivehdr, 34);
940 // Main member header
941 dbuf_writeu32le(outf, ZOO_SIGNATURE);
942 dbuf_writebyte(outf, 1); // "type"
943 dbuf_copy(c->infile, 3, 1, outf); // packing method
945 dbuf_writeu32le(outf, zctx->outf_trailer_pos);
946 dbuf_writeu32le(outf, zctx->outf_cmpr_pos);
948 // date, time, crc, sizeorig, sizenow, maj ver, min ver
949 dbuf_copy(c->infile, 4, 16, outf);
951 dbuf_writebyte(outf, 0); // "deleted" flag
952 dbuf_writebyte(outf, 0); // file structure / reserved
953 dbuf_writeu32le(outf, zctx->comment_len?zctx->outf_comment_pos:0);
954 dbuf_writeu16le(outf, zctx->comment_len);
955 dbuf_copy(c->infile, 22, 13, outf); // filename
956 dbuf_writebyte(outf, 0x4f); // ??? This seems to be what Zoo does
958 dbuf_write(outf, (const u8*)"@)#(\0", 5); // leader
959 dbuf_copy(c->infile, zctx->inf_cmpr_pos, zctx->cmpr_len, outf); // cmpr data
961 if(zctx->comment_len) {
962 dbuf_copy(c->infile, zctx->inf_comment_pos, zctx->comment_len, outf);
965 dbuf_writeu32le(outf, ZOO_SIGNATURE);
966 dbuf_write_zeroes(outf, 48);
967 need_errmsg = 0;
969 done:
970 dbuf_close(outf);
971 if(need_errmsg) {
972 de_err(c, "Conversion to Zoo format failed");
974 de_free(c, zctx);
977 static int de_identify_zoo_z(deark *c)
979 if(dbuf_memcmp(c->infile, 0, "\xfe\x07\x01", 3)) return 0;
980 return 80;
983 void de_module_zoo_z(deark *c, struct deark_module_info *mi)
985 mi->id = "zoo_z";
986 mi->desc = "Zoo Z format";
987 mi->run_fn = de_run_zoo_z;
988 mi->identify_fn = de_identify_zoo_z;