Minor refactoring of the IFF and box-format parsers
[deark.git] / modules / lbr.c
blob5fac77e7d94ded7b920207e3086c9719b16e3a41
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // LBR - uncompressed CP/M archive format
6 // Squeeze compressed file
7 // Crunch v1 compressed file
8 // CRLZH compressed file
9 // ZSQ compressed file
10 // LZWCOM compressed file
12 #include <deark-private.h>
13 #include <deark-fmtutil.h>
14 DE_DECLARE_MODULE(de_module_lbr);
15 DE_DECLARE_MODULE(de_module_squeeze);
16 DE_DECLARE_MODULE(de_module_crunch);
17 DE_DECLARE_MODULE(de_module_crlzh);
18 DE_DECLARE_MODULE(de_module_zsq);
19 DE_DECLARE_MODULE(de_module_lzwcom);
21 #define LBR_DIRENT_SIZE 32
22 #define LBR_SECTOR_SIZE 128
24 struct member_data {
25 int is_dir;
26 u8 status;
27 u8 pad_count;
28 u32 crc_reported;
29 u32 crc_calc;
30 i64 pos_in_sectors;
31 i64 pos_in_bytes;
32 i64 len_in_sectors;
33 i64 len_in_bytes_withpadding;
34 i64 len_in_bytes_nopadding;
35 de_ucstring *fn;
36 struct de_timestamp create_timestamp;
37 struct de_timestamp change_timestamp;
40 typedef struct localctx_struct {
41 de_encoding input_encoding;
42 i64 dir_len_in_bytes;
43 struct de_crcobj *crco;
44 } lctx;
46 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
48 struct de_crcobj *crco = (struct de_crcobj*)userdata;
49 de_crcobj_addbuf(crco, buf, buf_len);
52 static void do_extract_member(deark *c, lctx *d, struct member_data *md)
54 de_finfo *fi = NULL;
55 dbuf *outf = NULL;
57 fi = de_finfo_create(c);
58 if(md->is_dir) {
59 fi->is_directory = 1;
60 fi->is_root_dir = 1;
62 else {
63 de_finfo_set_name_from_ucstring(c, fi, md->fn, 0);
64 fi->original_filename_flag = 1;
67 if(md->create_timestamp.is_valid) {
68 fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_timestamp;
70 if(md->change_timestamp.is_valid) {
71 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->change_timestamp;
74 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
76 de_crcobj_reset(d->crco);
77 if(md->is_dir) {
78 de_crcobj_addslice(d->crco, c->infile, md->pos_in_bytes, 16);
79 de_crcobj_addzeroes(d->crco, 2); // The 2-byte CRC field
80 de_crcobj_addslice(d->crco, c->infile, md->pos_in_bytes+18, md->len_in_bytes_withpadding-18);
82 else {
83 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
84 dbuf_copy(c->infile, md->pos_in_bytes, md->len_in_bytes_nopadding, outf);
85 // CRC calculation includes padding bytes:
86 de_crcobj_addslice(d->crco, c->infile,
87 md->pos_in_bytes + md->len_in_bytes_nopadding,
88 md->len_in_bytes_withpadding - md->len_in_bytes_nopadding);
90 md->crc_calc = de_crcobj_getval(d->crco);
91 de_dbg(c, "crc (calculated): 0x%04x", (UI)md->crc_calc);
93 de_finfo_destroy(c, fi);
94 dbuf_close(outf);
97 static void read_8_3_filename(deark *c, lctx *d, struct member_data *md, i64 pos)
99 de_ucstring *ext = NULL;
101 dbuf_read_to_ucstring(c->infile, pos, 8, md->fn, 0, d->input_encoding);
102 ucstring_strip_trailing_spaces(md->fn);
103 if(md->fn->len==0) {
104 ucstring_append_char(md->fn, '_');
107 ext = ucstring_create(c);
108 dbuf_read_to_ucstring(c->infile, pos+8, 3, ext, 0, d->input_encoding);
109 ucstring_strip_trailing_spaces(ext);
110 if(ext->len>0) {
111 ucstring_append_char(md->fn, '.');
112 ucstring_append_ucstring(md->fn, ext);
115 ucstring_destroy(ext);
118 static void handle_timestamp(deark *c, lctx *d, i64 date_raw, i64 time_raw,
119 struct de_timestamp *ts, const char *name)
121 i64 ut;
122 char timestamp_buf[64];
124 if(date_raw==0) {
125 de_dbg(c, "%s: [not set]", name);
126 return;
129 // Day 0 is Dec 31, 1977 (or it would be, if 0 weren't reserved).
130 // Difference from Unix time (Jan 1, 1970) =
131 // 365 days in 1970, 1971, 1973, 1974, 1975
132 // + 366 days in 1972, 1976
133 // + 364 days in 1977.
134 ut = 86400 * (date_raw + (365*5 + 366*2 + 364));
136 // Time of day is in DOS format.
137 ut += 3600*(time_raw>>11); // hours
138 ut += 60*(time_raw&0x07e0)>>5; // minutes
139 ut += 2*(time_raw&0x001f); // seconds
140 de_unix_time_to_timestamp(ut, ts, 0);
141 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
142 de_dbg(c, "%s: %s", name, timestamp_buf);
145 static void on_bad_dir(deark *c)
147 de_err(c, "Bad directory. This is probably not an LBR file.");
150 // Returns nonzero if we can continue.
151 // if is_dir, sets d->dir_len_in_bytes.
152 static int do_entry(deark *c, lctx *d, i64 pos1, int is_dir)
154 int retval = 0;
155 int saved_indent_level;
156 struct member_data *md = NULL;
157 i64 crdate, chdate, crtime, chtime;
159 de_dbg_indent_save(c, &saved_indent_level);
160 md = de_malloc(c, sizeof(struct member_data));
161 md->is_dir = is_dir;
163 de_dbg(c, "%s entry at %"I64_FMT, (md->is_dir?"dir":"file"), pos1);
164 de_dbg_indent(c, 1);
166 md->status = de_getbyte(pos1);
167 de_dbg(c, "status: 0x%02x", (UI)md->status);
168 if(md->is_dir && md->status!=0x00) {
169 on_bad_dir(c);
170 goto done;
172 if(md->status==0xff) { // unused entry - marks end of directory
173 goto done;
175 if(md->status!=0x00) { // deleted entry (should be 0xfe)
176 de_dbg(c, "[deleted]");
177 retval = 1;
178 goto done;
181 md->fn = ucstring_create(c);
182 if(!md->is_dir) {
183 read_8_3_filename(c, d, md, pos1+1);
184 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
187 md->pos_in_sectors = de_getu16le(pos1+12);
188 md->pos_in_bytes = md->pos_in_sectors * LBR_SECTOR_SIZE;
189 de_dbg(c, "data offset: %"I64_FMT" (sector %"I64_FMT")", md->pos_in_bytes, md->pos_in_sectors);
190 if(md->is_dir && md->pos_in_bytes!=pos1) {
191 on_bad_dir(c);
192 goto done;
195 md->len_in_sectors = de_getu16le(pos1+14);
196 de_dbg(c, "length in sectors: %"I64_FMT, md->len_in_sectors);
198 md->crc_reported = (u32)de_getu16le(pos1+16);
199 de_dbg(c, "crc (reported): 0x%04x", (UI)md->crc_reported);
201 // 18-25: timestamps - TODO
202 crdate = de_getu16le(pos1+18);
203 chdate = de_getu16le(pos1+20);
204 crtime = de_getu16le(pos1+22);
205 chtime = de_getu16le(pos1+24);
206 handle_timestamp(c, d, crdate, crtime, &md->create_timestamp, "creation time");
207 handle_timestamp(c, d, chdate, chtime, &md->change_timestamp, "last changed time");
209 md->pad_count = de_getbyte(pos1+26);
210 de_dbg(c, "pad count: %u", (UI)md->pad_count);
211 if(md->pad_count>=LBR_SECTOR_SIZE || md->len_in_sectors<1) {
212 md->pad_count = 0;
215 md->len_in_bytes_withpadding = md->len_in_sectors*LBR_SECTOR_SIZE;
216 md->len_in_bytes_nopadding = md->len_in_bytes_withpadding - (i64)md->pad_count;
217 de_dbg(c, "length in bytes: %"I64_FMT, md->len_in_bytes_nopadding);
219 if(md->pos_in_bytes + md->len_in_bytes_nopadding > c->infile->len) {
220 de_err(c, "Unexpected end of file");
221 if(!md->is_dir) {
222 retval = 1;
224 goto done;
227 if(md->is_dir) {
228 d->dir_len_in_bytes = md->len_in_bytes_nopadding;
230 retval = 1;
232 do_extract_member(c, d, md);
234 done:
235 if(md) {
236 ucstring_destroy(md->fn);
237 de_free(c, md);
239 de_dbg_indent_restore(c, saved_indent_level);
240 return retval;
243 static void de_run_lbr(deark *c, de_module_params *mparams)
245 lctx *d = NULL;
246 i64 pos = 0;
248 d = de_malloc(c, sizeof(lctx));
249 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
251 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_CCITT);
253 // Read directory
254 if(!do_entry(c, d, pos, 1)) goto done;
255 pos += LBR_DIRENT_SIZE;
257 // Read member files
258 while(pos+LBR_DIRENT_SIZE <= c->infile->len &&
259 pos+LBR_DIRENT_SIZE <= d->dir_len_in_bytes)
261 if(!do_entry(c, d, pos, 0)) goto done;
262 pos += LBR_DIRENT_SIZE;
265 done:
266 if(d) {
267 de_crcobj_destroy(d->crco);
268 de_free(c, d);
272 static int de_identify_lbr(deark *c)
274 // TODO: Better detection is possible
275 if(!dbuf_memcmp(c->infile, 0, "\x00\x20\x20\x20\x20\x20\x20\x20\x20"
276 "\x20\x20\x20\x00\x00", 14))
277 return 100;
278 return 0;
281 void de_module_lbr(deark *c, struct deark_module_info *mi)
283 mi->id = "lbr";
284 mi->desc = "LBR archive";
285 mi->run_fn = de_run_lbr;
286 mi->identify_fn = de_identify_lbr;
289 ///////////////////////////////////////////////
290 // Squeeze - CP/M compressed file format
292 // For Crunch/CRLZH(/Squeeze?) filename fields
293 struct crcr_filename_data {
294 de_ucstring *fn;
295 de_ucstring *comment;
296 i64 size;
299 static int crcr_read_filename_etc(deark *c, i64 pos1, struct crcr_filename_data *fnd)
301 int retval = 0;
302 i64 pos = pos1;
303 enum crcrfnstate {
304 CRCRFNST_NEUTRAL, CRCRFNST_FILENAME, CRCRFNST_COMMENT, CRCRFNST_DATE
306 enum crcrfnstate state = CRCRFNST_FILENAME;
307 int found_dot = 0;
308 int extension_char_count = 0;
309 char attr_str[4] = "...";
310 static const char attr_codes[3] = {'R', 'S', 'A'};
311 int found_attr = 0;
313 // Note: Only ASCII can really be supported, because the characters are 7-bit.
314 // Normally, we'd use ucstring_append_bytes_ex() for something like this, but
315 // it's pointless here.
316 fnd->fn = ucstring_create(c);
318 while(1) {
319 u8 b1, b2;
321 // Note: CFX limits this entire field to about 80 bytes.
322 if(pos-pos1 > 300) goto done;
323 if(pos >= c->infile->len) goto done;
325 b1 = de_getbyte_p(&pos);
326 if(b1==0) {
327 break;
329 b2 = b1 & 0x7f;
331 if(b2==0x01) {
332 state = CRCRFNST_DATE; // TODO: Figure this field out
334 else if(state==CRCRFNST_FILENAME && b2=='[') {
335 state = CRCRFNST_COMMENT;
337 else if(state==CRCRFNST_FILENAME && extension_char_count>=3) {
338 state = CRCRFNST_NEUTRAL;
340 else if(state==CRCRFNST_FILENAME) {
341 ucstring_append_char(fnd->fn, (de_rune)b2);
342 if(found_dot) {
343 if(extension_char_count<3 && (b1 & 0x80)) {
344 // The CP/M low-level directory structure uses the high bit of
345 // the file extension bytes to store attributes. Some Crunch/
346 // CRLZH files do the same thing.
347 // CP/M also uses the high bit of the *filename*, for less-common
348 // attributes, but that doesn't seem possible here, because all 8
349 // bytes are not always stored.
350 found_attr = 1;
351 attr_str[extension_char_count] = attr_codes[extension_char_count];
353 extension_char_count++;
355 else {
356 if(b2=='.') found_dot = 1;
359 else if(state==CRCRFNST_COMMENT && b2==']') {
360 state = CRCRFNST_NEUTRAL;
362 else if(state==CRCRFNST_COMMENT) {
363 if(!fnd->comment) {
364 fnd->comment = ucstring_create(c);
366 ucstring_append_char(fnd->comment, (de_rune)b2);
370 ucstring_strip_trailing_spaces(fnd->fn);
371 fnd->size = pos - pos1;
372 retval = 1;
373 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(fnd->fn));
375 if(found_attr) {
376 de_dbg(c, "attribs: %s", attr_str);
379 if(fnd->comment) {
380 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(fnd->comment));
383 done:
384 return retval;
387 static void crcr_filename_data_freecontents(deark *c, struct crcr_filename_data *fnd)
389 ucstring_destroy(fnd->fn);
390 ucstring_destroy(fnd->comment);
393 struct squeeze_ctx {
394 u8 is_sq2;
395 de_encoding input_encoding;
396 struct crcr_filename_data fnd;
397 struct de_stringreaderdata *sq2_timestamp_string;
398 struct de_stringreaderdata *sq2_comment;
399 UI checksum_reported;
400 UI checksum_calc;
401 i64 cmpr_data_pos;
402 struct de_timestamp timestamp;
405 static void squeeze_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
407 struct squeeze_ctx *sqctx = (struct squeeze_ctx*)userdata;
408 i64 i;
410 for(i=0; i<buf_len; i++) {
411 sqctx->checksum_calc += buf[i];
415 static void do_sqeeze_timestamp(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
417 UI cksum_calc = 0;
418 UI cksum_reported;
419 i64 pos = pos1;
420 i64 sig;
421 i64 dt_raw, tm_raw;
422 char timestamp_buf[64];
424 if(c->infile->len-pos1 < 8) return;
425 sig = de_getu16le_p(&pos);
426 if(sig != 0xff77) return;
427 dt_raw = de_getu16le_p(&pos);
428 tm_raw = de_getu16le_p(&pos);
429 cksum_reported = (UI)de_getu16le_p(&pos);
430 cksum_calc = ((UI)sig + (UI)dt_raw + (UI)tm_raw)&0xffff;
431 if(cksum_calc != cksum_reported) return; // Presumably a false positive signature
433 de_dbg(c, "timestamp at %"I64_FMT, pos1);
434 de_dbg_indent(c, 1);
435 de_dos_datetime_to_timestamp(&sqctx->timestamp, dt_raw, tm_raw);
437 sqctx->timestamp.tzcode = DE_TZCODE_LOCAL;
438 de_timestamp_to_string(&sqctx->timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
439 de_dbg(c, "timestamp: %s", timestamp_buf);
441 de_dbg(c, "timestamp checksum (calculated): 0x%04x", cksum_calc);
442 de_dbg(c, "timestamp checksum (reported): 0x%04x", cksum_reported);
443 de_dbg_indent(c, -1);
446 static void read_squeeze_checksum(deark *c, struct squeeze_ctx *sqctx, i64 pos)
448 sqctx->checksum_reported = (u32)de_getu16le_p(&pos);
449 de_dbg(c, "checksum (reported): %u", (UI)sqctx->checksum_reported);
452 static int read_squeeze_headers(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
454 i64 pos = pos1;
455 int retval = 0;
457 read_squeeze_checksum(c, sqctx, pos);
458 pos += 2;
460 // I don't know the correct way to interpret the Squeeze filename field, if
461 // there even is such a way.
462 // Some Unsqueeze utilities accept it as-is, some truncate it after the third
463 // filename extension byte, some interpret it the same as Crunch format
464 // (including ignoring the high bit of every byte, for some reason).
465 // Doing it the Crunch way is probably safe.
466 if(!crcr_read_filename_etc(c, pos, &sqctx->fnd)) goto done;
467 pos += sqctx->fnd.size;
469 sqctx->cmpr_data_pos = pos;
470 retval = 1;
471 done:
472 if(!retval) {
473 de_err(c, "Malformed header");
475 return retval;
478 static int read_sq2_headers(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
480 i64 pos = pos1;
481 u8 b;
482 int retval = 0;
484 if(!crcr_read_filename_etc(c, pos, &sqctx->fnd)) goto done;
485 pos += sqctx->fnd.size;
487 sqctx->sq2_timestamp_string = dbuf_read_string(c->infile, pos, 300, 300,
488 DE_CONVFLAG_STOP_AT_NUL, sqctx->input_encoding);
489 if(!sqctx->sq2_timestamp_string->found_nul) goto done;
490 de_dbg(c, "timestamp_string: \"%s\"", ucstring_getpsz_d(sqctx->sq2_timestamp_string->str));
491 pos += sqctx->sq2_timestamp_string->bytes_consumed;
493 sqctx->sq2_comment = dbuf_read_string(c->infile, pos, 300, 300,
494 DE_CONVFLAG_STOP_AT_NUL, sqctx->input_encoding);
495 if(!sqctx->sq2_comment->found_nul) goto done;
496 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(sqctx->sq2_comment->str));
497 pos += sqctx->sq2_comment->bytes_consumed;
499 b = de_getbyte_p(&pos);
500 if(b != 0x1a) goto done;
502 read_squeeze_checksum(c, sqctx, pos);
503 pos += 2;
505 pos += 4; // ?
507 sqctx->cmpr_data_pos = pos;
508 retval = 1;
510 done:
511 if(!retval) {
512 de_err(c, "Malformed header");
514 return retval;
517 static void de_run_squeeze(deark *c, de_module_params *mparams)
519 i64 pos = 0;
520 i64 n;
521 struct squeeze_ctx *sqctx = NULL;
522 de_finfo *fi = NULL;
523 dbuf *outf_tmp = NULL;
524 dbuf *outf_final = NULL;
525 int saved_indent_level;
526 struct de_dfilter_in_params dcmpri;
527 struct de_dfilter_out_params dcmpro;
528 struct de_dfilter_results dres;
529 struct de_dcmpr_two_layer_params tlp;
531 de_dbg_indent_save(c, &saved_indent_level);
532 sqctx = de_malloc(c, sizeof(struct squeeze_ctx));
533 sqctx->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
535 n = de_getu16le_p(&pos);
536 if(n==0xff76) {
537 de_declare_fmt(c, "Squeezed");
539 else if(n==0xfffa) {
540 de_declare_fmt(c, "Squeeze v2 (SQ2)");
541 sqctx->is_sq2 = 1;
543 else {
544 de_dbg(c, "Not a Squeezed file");
545 goto done;
548 if(sqctx->is_sq2) {
549 if(!read_sq2_headers(c, sqctx, pos)) goto done;
551 else {
552 if(!read_squeeze_headers(c, sqctx, pos)) goto done;
555 pos = sqctx->cmpr_data_pos;
557 fi = de_finfo_create(c);
558 de_finfo_set_name_from_ucstring(c, fi, sqctx->fnd.fn, 0);
559 fi->original_filename_flag = 1;
561 de_dbg(c, "squeeze-compressed data at %"I64_FMT, pos);
562 de_dbg_indent(c, 1);
564 // We have to decompress the file before we can find the timestamp. That's
565 // why we decompress to a membuf.
566 outf_tmp = dbuf_create_membuf(c, 0, 0);
568 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
569 dcmpri.f = c->infile;
570 dcmpri.pos = pos;
571 dcmpri.len = c->infile->len - pos;
572 dcmpro.f = outf_tmp;
574 dbuf_set_writelistener(outf_tmp, squeeze_writelistener_cb, (void*)sqctx);
576 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
577 tlp.codec1_type1 = fmtutil_huff_squeeze_codectype1;
578 tlp.codec2 = dfilter_rle90_codec;
579 tlp.dcmpri = &dcmpri;
580 tlp.dcmpro = &dcmpro;
581 tlp.dres = &dres;
582 de_dfilter_decompress_two_layer(c, &tlp);
584 if(dres.bytes_consumed_valid) {
585 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
586 dcmpri.pos+dres.bytes_consumed);
588 do_sqeeze_timestamp(c, sqctx, dcmpri.pos+dres.bytes_consumed);
589 if(sqctx->timestamp.is_valid) {
590 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = sqctx->timestamp;
594 outf_final = dbuf_create_output_file(c, NULL, fi, 0);
595 dbuf_copy(outf_tmp, 0, outf_tmp->len, outf_final);
597 if(dres.errcode) {
598 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
599 goto done;
602 sqctx->checksum_calc &= 0xffff;
603 de_dbg(c, "checksum (calculated): %u", (UI)sqctx->checksum_calc);
604 if(sqctx->checksum_calc != sqctx->checksum_reported) {
605 de_err(c, "Checksum error. Decompression probably failed.");
606 goto done;
609 done:
610 if(sqctx) {
611 crcr_filename_data_freecontents(c, &sqctx->fnd);
612 de_destroy_stringreaderdata(c, sqctx->sq2_timestamp_string);
613 de_destroy_stringreaderdata(c, sqctx->sq2_comment);
614 de_free(c, sqctx);
616 dbuf_close(outf_final);
617 dbuf_close(outf_tmp);
618 de_finfo_destroy(c, fi);
619 de_dbg_indent_restore(c, saved_indent_level);
622 static int de_identify_squeeze(deark *c)
624 i64 id;
626 id = de_getu16le(0);
627 if(id==0xff76) return 70;
628 if(id==0xfffa) return 25; // SQ2
629 return 0;
632 void de_module_squeeze(deark *c, struct deark_module_info *mi)
634 mi->id = "squeeze";
635 mi->desc = "Squeeze (CP/M)";
636 mi->run_fn = de_run_squeeze;
637 mi->identify_fn = de_identify_squeeze;
640 ///////////////////////////////////////////////
641 // Crunch - CP/M compressed file format
643 struct crunch_ctx {
644 struct crcr_filename_data fnd;
645 u8 fmtver; // 1 or 2, 0 if unknown
646 u8 cksum_type;
647 UI checksum_reported;
648 UI checksum_calc;
651 static void crunch_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
653 struct crunch_ctx *crunchctx = (struct crunch_ctx*)userdata;
654 i64 i;
656 for(i=0; i<buf_len; i++) {
657 crunchctx->checksum_calc += buf[i];
661 static void decompress_crunch_v1(deark *c, struct crunch_ctx *crunchctx, i64 pos1)
663 de_finfo *fi = NULL;
664 dbuf *outf = NULL;
665 i64 pos = pos1;
666 struct de_dfilter_in_params dcmpri;
667 struct de_dfilter_out_params dcmpro;
668 struct de_dfilter_results dres;
669 struct de_lzw_params delzwp;
670 struct de_dcmpr_two_layer_params tlp;
672 de_dbg_indent(c, 1);
673 fi = de_finfo_create(c);
674 de_finfo_set_name_from_ucstring(c, fi, crunchctx->fnd.fn, 0);
675 fi->original_filename_flag = 1;
677 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
678 dbuf_set_writelistener(outf, crunch_writelistener_cb, (void*)crunchctx);
680 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
681 dcmpri.f = c->infile;
682 dcmpri.pos = pos;
683 dcmpri.len = c->infile->len - pos;
684 dcmpro.f = outf;
686 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
687 delzwp.fmt = DE_LZWFMT_ARC5;
688 delzwp.arc5_has_stop_code = 1;
690 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
691 tlp.codec1_pushable = dfilter_lzw_codec;
692 tlp.codec1_private_params = (void*)&delzwp;
693 tlp.codec2 = dfilter_rle90_codec;
694 tlp.dcmpri = &dcmpri;
695 tlp.dcmpro = &dcmpro;
696 tlp.dres = &dres;
697 de_dfilter_decompress_two_layer(c, &tlp);
699 if(dres.errcode) {
700 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
701 goto done;
704 if(dres.bytes_consumed_valid) {
705 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
706 dcmpri.pos+dres.bytes_consumed);
707 pos += dres.bytes_consumed;
709 if(crunchctx->cksum_type==0) {
710 crunchctx->checksum_calc &= 0xffff;
711 crunchctx->checksum_reported = (UI)de_getu16le_p(&pos);
712 de_dbg(c, "checksum (calculated): %u", crunchctx->checksum_calc);
713 de_dbg(c, "checksum (reported): %u", crunchctx->checksum_reported);
714 if(crunchctx->checksum_calc != crunchctx->checksum_reported) {
715 de_err(c, "Checksum error. Decompression probably failed.");
716 goto done;
721 done:
722 de_finfo_destroy(c, fi);
723 dbuf_close(outf);
724 de_dbg_indent(c, -1);
727 static void de_run_crunch(deark *c, de_module_params *mparams)
729 struct crunch_ctx *crunchctx = NULL;
730 i64 pos = 0;
731 u8 b;
732 u8 fmtver_raw;
733 const char *verstr;
735 crunchctx = de_malloc(c, sizeof(struct crunch_ctx));
737 pos += 2;
738 if(!crcr_read_filename_etc(c, pos, &crunchctx->fnd)) goto done;
739 pos += crunchctx->fnd.size;
741 b = de_getbyte_p(&pos);
742 de_dbg(c, "encoder version: 0x%02x", (UI)b);
744 fmtver_raw = de_getbyte_p(&pos);
745 if(fmtver_raw>=0x10 && fmtver_raw<=0x1f) {
746 crunchctx->fmtver = 1;
747 verstr = "old";
749 else if(fmtver_raw>=0x20 && fmtver_raw<=0x2f) {
750 crunchctx->fmtver = 2;
751 verstr = "new";
753 else {
754 verstr = "?";
756 de_dbg(c, "format version: 0x%02x (%s)", (UI)fmtver_raw, verstr);
757 if(crunchctx->fmtver!=0) {
758 de_declare_fmtf(c, "Crunch (v%d)", (int)crunchctx->fmtver);
761 crunchctx->cksum_type = de_getbyte_p(&pos);
762 de_dbg(c, "checksum type: 0x%02x (%s)", (UI)crunchctx->cksum_type,
763 (crunchctx->cksum_type==0?"standard":"?"));
765 b = de_getbyte_p(&pos);
766 de_dbg(c, "unused info byte: 0x%02x", (UI)b);
768 de_dbg(c, "compressed data at %"I64_FMT, pos);
769 if(crunchctx->fmtver==1) {
770 decompress_crunch_v1(c, crunchctx, pos);
772 else {
773 // v2 is by far the most common version, but it's not easy to support.
774 // We support v1, only because it's easy.
775 de_err(c, "This version of Crunch is not supported");
778 done:
779 if(crunchctx) {
780 crcr_filename_data_freecontents(c, &crunchctx->fnd);
781 de_free(c, crunchctx);
785 static int de_identify_crunch(deark *c)
787 i64 id;
789 id = de_getu16le(0);
790 if(id==0xfe76) return 70;
791 return 0;
794 void de_module_crunch(deark *c, struct deark_module_info *mi)
796 mi->id = "crunch";
797 mi->desc = "Crunch (CP/M)";
798 mi->run_fn = de_run_crunch;
799 mi->identify_fn = de_identify_crunch;
802 ///////////////////////////////////////////////
803 // CRLZH - CP/M compressed file format
805 struct crlzh_ctx {
806 struct crcr_filename_data fnd;
807 u8 fmtver; // 1 or 2, 0 if unknown
808 u8 cksum_type;
809 UI checksum_reported;
810 UI checksum_calc;
813 static void crlzh_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
815 struct crlzh_ctx *crlzhctx = (struct crlzh_ctx*)userdata;
816 i64 i;
818 for(i=0; i<buf_len; i++) {
819 crlzhctx->checksum_calc += buf[i];
823 static void decompress_crlzh(deark *c, struct crlzh_ctx *crlzhctx, i64 pos1)
825 de_finfo *fi = NULL;
826 dbuf *outf = NULL;
827 i64 pos = pos1;
828 struct de_dfilter_in_params dcmpri;
829 struct de_dfilter_out_params dcmpro;
830 struct de_dfilter_results dres;
831 struct de_lh1_params lh1p;
833 de_dbg_indent(c, 1);
834 fi = de_finfo_create(c);
835 de_finfo_set_name_from_ucstring(c, fi, crlzhctx->fnd.fn, 0);
836 fi->original_filename_flag = 1;
838 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
839 dbuf_set_writelistener(outf, crlzh_writelistener_cb, (void*)crlzhctx);
841 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
842 dcmpri.f = c->infile;
843 dcmpri.pos = pos;
844 dcmpri.len = c->infile->len - pos;
845 dcmpro.f = outf;
847 de_zeromem(&lh1p, sizeof(struct de_lh1_params));
848 if(crlzhctx->fmtver==1) {
849 lh1p.is_crlzh11 = 1;
851 else {
852 lh1p.is_crlzh20 = 1;
854 lh1p.history_fill_val = 0x20;
856 fmtutil_lh1_codectype1(c, &dcmpri, &dcmpro, &dres, (void*)&lh1p);
858 if(dres.errcode) {
859 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
860 goto done;
863 if(dres.bytes_consumed_valid) {
864 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
865 dcmpri.pos+dres.bytes_consumed);
866 pos += dres.bytes_consumed;
868 if(crlzhctx->cksum_type==0) {
869 crlzhctx->checksum_calc &= 0xffff;
870 crlzhctx->checksum_reported = (UI)de_getu16le_p(&pos);
871 de_dbg(c, "checksum (calculated): %u", crlzhctx->checksum_calc);
872 de_dbg(c, "checksum (reported): %u", crlzhctx->checksum_reported);
873 if(crlzhctx->checksum_calc != crlzhctx->checksum_reported) {
874 de_err(c, "Checksum error. Decompression probably failed.");
875 goto done;
880 done:
881 de_finfo_destroy(c, fi);
882 dbuf_close(outf);
883 de_dbg_indent(c, -1);
886 static void de_run_crlzh(deark *c, de_module_params *mparams)
888 struct crlzh_ctx *crlzhctx = NULL;
889 i64 pos = 0;
890 u8 b;
891 u8 fmtver_raw;
892 const char *verstr;
894 crlzhctx = de_malloc(c, sizeof(struct crlzh_ctx));
896 pos += 2;
897 if(!crcr_read_filename_etc(c, pos, &crlzhctx->fnd)) goto done;
898 pos += crlzhctx->fnd.size;
899 b = de_getbyte_p(&pos);
900 de_dbg(c, "encoder version: 0x%02x", (UI)b);
902 fmtver_raw = de_getbyte_p(&pos);
903 if(fmtver_raw<=0x1f) {
904 crlzhctx->fmtver = 1;
905 verstr = "old";
907 else if(fmtver_raw>=0x20 && fmtver_raw<=0x2f) {
908 // Note: Alternatives are ==0x20 (CFX), and >=0x20 (lbrate).
909 crlzhctx->fmtver = 2;
910 verstr = "new";
912 else {
913 verstr = "?";
915 de_dbg(c, "format version: 0x%02x (%s)", (UI)fmtver_raw, verstr);
916 if(crlzhctx->fmtver!=0) {
917 de_declare_fmtf(c, "CRLZH (v%d)", (int)crlzhctx->fmtver);
920 crlzhctx->cksum_type = de_getbyte_p(&pos);
921 de_dbg(c, "checksum type: 0x%02x (%s)", (UI)crlzhctx->cksum_type,
922 (crlzhctx->cksum_type==0?"standard":"?"));
924 b = de_getbyte_p(&pos);
925 de_dbg(c, "unused info byte: 0x%02x", (UI)b);
927 de_dbg(c, "compressed data at %"I64_FMT, pos);
928 decompress_crlzh(c, crlzhctx, pos);
930 done:
931 if(crlzhctx) {
932 crcr_filename_data_freecontents(c, &crlzhctx->fnd);
933 de_free(c, crlzhctx);
937 static int de_identify_crlzh(deark *c)
939 i64 id;
941 id = de_getu16le(0);
942 if(id==0xfd76) return 70;
943 return 0;
946 void de_module_crlzh(deark *c, struct deark_module_info *mi)
948 mi->id = "crlzh";
949 mi->desc = "CRLZH (CP/M)";
950 mi->run_fn = de_run_crlzh;
951 mi->identify_fn = de_identify_crlzh;
954 ///////////////////////////////////////////////
955 // ZSQ (ZSQUSQ)
956 // LZW compression utility by W. Chin, A. Kumar.
957 // Format used by v1.0, 1985-10-26.
959 #define CODE_WACK 0x5741434bU
961 struct zsq_ctx {
962 de_encoding input_encoding;
963 de_ucstring *fn;
964 UI checksum_reported;
965 UI checksum_calc;
966 struct de_timestamp timestamp;
969 static void zsq_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
971 struct zsq_ctx *zsqctx = (struct zsq_ctx*)userdata;
972 i64 i;
974 for(i=0; i<buf_len; i++) {
975 zsqctx->checksum_calc += buf[i];
979 static void do_zsq_decompress(deark *c, struct zsq_ctx *zsqctx, i64 pos, dbuf *outf)
981 struct de_dfilter_in_params dcmpri;
982 struct de_dfilter_out_params dcmpro;
983 struct de_dfilter_results dres;
984 struct de_lzw_params delzwp;
986 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
987 delzwp.fmt = DE_LZWFMT_ARC5;
989 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
990 dcmpri.f = c->infile;
991 dcmpri.pos = pos;
992 dcmpri.len = c->infile->len - pos;
993 dcmpro.f = outf;
995 dbuf_set_writelistener(outf, zsq_writelistener_cb, (void*)zsqctx);
997 fmtutil_decompress_lzw(c, &dcmpri, &dcmpro, &dres, &delzwp);
999 zsqctx->checksum_calc &= 0xffff;
1000 de_dbg(c, "checksum (calculated): %u", (UI)zsqctx->checksum_calc);
1001 if(zsqctx->checksum_calc != zsqctx->checksum_reported) {
1002 de_err(c, "Checksum error. Decompression probably failed.");
1006 static void zsq_read_timestamp(deark *c, struct zsq_ctx *zsqctx, i64 pos)
1008 i64 dt_raw, tm_raw;
1009 char timestamp_buf[64];
1011 dt_raw = de_getu16le(pos);
1012 tm_raw = de_getu16le(pos+2);
1013 de_dos_datetime_to_timestamp(&zsqctx->timestamp, dt_raw, tm_raw);
1014 de_timestamp_to_string(&zsqctx->timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
1015 de_dbg(c, "timestamp: %s", timestamp_buf);
1018 static void de_run_zsq(deark *c, de_module_params *mparams)
1020 struct zsq_ctx *zsqctx = NULL;
1021 i64 pos = 0;
1022 i64 hdr_len;
1023 i64 hdr_endpos;
1024 u32 id;
1025 dbuf *outf = NULL;
1026 de_finfo *fi = NULL;
1028 zsqctx = de_malloc(c, sizeof(struct zsq_ctx));
1029 zsqctx->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1031 id = (u32)de_getu32be_p(&pos);
1032 if(id != CODE_WACK) {
1033 de_err(c, "Not a ZSQ file");
1034 goto done;
1037 fi = de_finfo_create(c);
1039 zsqctx->checksum_reported = (u32)de_getu16le_p(&pos);
1040 de_dbg(c, "checksum (reported): %u", (UI)zsqctx->checksum_reported);
1042 hdr_len = de_getu16le_p(&pos);
1043 hdr_endpos = pos + hdr_len;
1044 if(hdr_endpos > c->infile->len) {
1045 de_err(c, "Bad header length");
1046 goto done;
1049 zsq_read_timestamp(c, zsqctx, pos);
1050 pos += 4;
1052 zsqctx->fn = ucstring_create(c);
1053 dbuf_read_to_ucstring_n(c->infile, pos, hdr_endpos-pos, 255, zsqctx->fn,
1054 DE_CONVFLAG_STOP_AT_NUL, zsqctx->input_encoding);
1055 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(zsqctx->fn));
1057 de_finfo_set_name_from_ucstring(c, fi, zsqctx->fn, 0);
1058 fi->original_filename_flag = 1;
1060 pos = hdr_endpos;
1061 de_dbg(c, "compressed data at %"I64_FMT, pos);
1063 outf = dbuf_create_output_file(c, NULL, fi, 0);
1065 do_zsq_decompress(c, zsqctx, pos, outf);
1067 done:
1068 dbuf_close(outf);
1069 de_finfo_destroy(c, fi);
1070 if(zsqctx) {
1071 ucstring_destroy(zsqctx->fn);
1072 de_free(c, zsqctx);
1076 static int de_identify_zsq(deark *c)
1078 if(de_getu32be(0)==CODE_WACK) {
1079 return 90;
1081 return 0;
1084 void de_module_zsq(deark *c, struct deark_module_info *mi)
1086 mi->id = "zsq";
1087 mi->desc = "ZSQ (ZSQUSQ, LZW-compressed file)";
1088 mi->run_fn = de_run_zsq;
1089 mi->identify_fn = de_identify_zsq;
1092 // **************************************************************************
1093 // LZWCOM
1094 // **************************************************************************
1096 struct lzwcom_ctx {
1097 int ver; // 1, 2, or -1 if unknown
1098 struct de_crcobj *crco;
1101 static void lzwcom_detect_version(deark *c, struct lzwcom_ctx *d)
1103 u32 crc_reported, crc_calc;
1105 if(c->infile->len < 1026) {
1106 d->ver = -1;
1107 return;
1110 de_crcobj_reset(d->crco);
1111 de_crcobj_addslice(d->crco, c->infile, 0, 1024);
1112 crc_calc = de_crcobj_getval(d->crco); // Field only exists in v2 format
1113 crc_reported = (u32)de_getu16le(1024);
1114 if(crc_reported==crc_calc) {
1115 d->ver = 2;
1117 else {
1118 d->ver = 1;
1122 static void de_run_lzwcom(deark *c, de_module_params *mparams)
1124 struct lzwcom_ctx *d = NULL;
1125 struct de_dfilter_ctx *dfctx = NULL;
1126 dbuf *outf = NULL;
1127 struct de_dfilter_out_params dcmpro;
1128 struct de_dfilter_results dres;
1129 struct de_lzw_params delzwp;
1130 int errflag = 0;
1131 i64 pos = 0;
1132 const char *s;
1134 d = de_malloc(c, sizeof(struct lzwcom_ctx));
1135 d->ver = -1;
1136 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1138 s = de_get_ext_option(c, "lzwcom:version");
1139 if(s) {
1140 d->ver = de_atoi(s);
1142 if(d->ver>=2) d->ver = 2;
1143 else if(d->ver!=1) d->ver = -1;
1145 if(d->ver == -1) {
1146 lzwcom_detect_version(c, d);
1148 if(d->ver != -1) {
1149 de_declare_fmtf(c, "LZWCOM v%d", d->ver);
1151 else {
1152 de_declare_fmt(c, "LZWCOM (unknown version)");
1155 outf = dbuf_create_output_file(c, "unc", NULL, 0);
1156 de_dfilter_init_objects(c, NULL, &dcmpro, &dres);
1157 dcmpro.f = outf;
1159 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
1160 delzwp.fmt = DE_LZWFMT_ARC5;
1161 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
1162 dfctx = de_dfilter_create(c, dfilter_lzw_codec, (void*)&delzwp, &dcmpro, &dres);
1164 while(1) {
1165 i64 block_dlen;
1166 i64 block_pos = pos;
1168 if(dres.errcode) break;
1169 if(dfctx->finished_flag) break;
1170 if(pos >= c->infile->len) break;
1171 block_dlen = de_min_int(1024, c->infile->len - pos);
1173 if(d->ver==2) {
1174 de_dbg(c, "block at %"I64_FMT", dlen=%"I64_FMT, block_pos, block_dlen);
1177 de_dfilter_addslice(dfctx, c->infile, pos, block_dlen);
1179 // Oddly, this format includes CRCs of the *compressed* bytes, instead of
1180 // of the decompressed bytes. So it doesn't detect incorrect decompression.
1181 if(d->ver==2) {
1182 de_crcobj_reset(d->crco);
1183 de_crcobj_addslice(d->crco, c->infile, pos, block_dlen);
1186 pos += block_dlen;
1188 if(d->ver==2) {
1189 u32 crc_reported, crc_calc;
1191 if(c->infile->len - pos < 2) break;
1192 crc_calc = de_crcobj_getval(d->crco);
1193 crc_reported = (u32)de_getu16le_p(&pos);
1194 de_dbg_indent(c, 1);
1195 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calc);
1196 de_dbg(c, "crc (reported): 0x%04x", (UI)crc_reported);
1197 de_dbg_indent(c,- 1);
1198 if(!errflag && crc_calc!=crc_reported) {
1199 de_warn(c, "CRC check failed at %"I64_FMT". This might not be an LZWCOM v2 file.", pos-2);
1200 errflag = 1;
1205 de_dfilter_finish(dfctx);
1206 if(dres.errcode) {
1207 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
1210 de_dfilter_destroy(dfctx);
1211 dbuf_close(outf);
1212 if(d) {
1213 de_crcobj_destroy(d->crco);
1214 de_free(c, d);
1218 static void de_help_lzwcom(deark *c)
1220 de_msg(c, "-opt lzwcom:version=<1|2> : The format version");
1223 void de_module_lzwcom(deark *c, struct deark_module_info *mi)
1225 mi->id = "lzwcom";
1226 mi->desc = "LZWCOM compressed file";
1227 mi->run_fn = de_run_lzwcom;
1228 mi->identify_fn = NULL;
1229 mi->help_fn = de_help_lzwcom;