exe: Support PAK v1.6 self-extracting archives
[deark.git] / modules / lbr.c
blobcdc45e0d3c759a9e3debefd4c22b0fe57422d231
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // LBR - uncompressed CP/M archive format
6 // Squeeze compressed file
7 // Crunch v1 compressed file
8 // CRLZH compressed file
9 // ZSQ compressed file
10 // LZWCOM compressed file
12 #include <deark-private.h>
13 #include <deark-fmtutil.h>
14 DE_DECLARE_MODULE(de_module_lbr);
15 DE_DECLARE_MODULE(de_module_squeeze);
16 DE_DECLARE_MODULE(de_module_crunch);
17 DE_DECLARE_MODULE(de_module_crlzh);
18 DE_DECLARE_MODULE(de_module_zsq);
19 DE_DECLARE_MODULE(de_module_lzwcom);
21 #define LBR_DIRENT_SIZE 32
22 #define LBR_SECTOR_SIZE 128
24 struct member_data {
25 int is_dir;
26 u8 status;
27 u8 pad_count;
28 u32 crc_reported;
29 u32 crc_calc;
30 i64 pos_in_sectors;
31 i64 pos_in_bytes;
32 i64 len_in_sectors;
33 i64 len_in_bytes_withpadding;
34 i64 len_in_bytes_nopadding;
35 de_ucstring *fn;
36 struct de_timestamp create_timestamp;
37 struct de_timestamp change_timestamp;
40 typedef struct localctx_struct {
41 de_encoding input_encoding;
42 i64 dir_len_in_bytes;
43 struct de_crcobj *crco;
44 } lctx;
46 static void do_extract_member(deark *c, lctx *d, struct member_data *md)
48 de_finfo *fi = NULL;
49 dbuf *outf = NULL;
51 fi = de_finfo_create(c);
52 if(md->is_dir) {
53 fi->is_directory = 1;
54 fi->is_root_dir = 1;
56 else {
57 de_finfo_set_name_from_ucstring(c, fi, md->fn, 0);
58 fi->original_filename_flag = 1;
61 if(md->create_timestamp.is_valid) {
62 fi->timestamp[DE_TIMESTAMPIDX_CREATE] = md->create_timestamp;
64 if(md->change_timestamp.is_valid) {
65 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->change_timestamp;
68 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
70 de_crcobj_reset(d->crco);
71 if(md->is_dir) {
72 de_crcobj_addslice(d->crco, c->infile, md->pos_in_bytes, 16);
73 de_crcobj_addzeroes(d->crco, 2); // The 2-byte CRC field
74 de_crcobj_addslice(d->crco, c->infile, md->pos_in_bytes+18, md->len_in_bytes_withpadding-18);
76 else {
77 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)d->crco);
78 dbuf_copy(c->infile, md->pos_in_bytes, md->len_in_bytes_nopadding, outf);
79 // CRC calculation includes padding bytes:
80 de_crcobj_addslice(d->crco, c->infile,
81 md->pos_in_bytes + md->len_in_bytes_nopadding,
82 md->len_in_bytes_withpadding - md->len_in_bytes_nopadding);
84 md->crc_calc = de_crcobj_getval(d->crco);
85 de_dbg(c, "crc (calculated): 0x%04x", (UI)md->crc_calc);
87 de_finfo_destroy(c, fi);
88 dbuf_close(outf);
91 static void read_8_3_filename(deark *c, lctx *d, struct member_data *md, i64 pos)
93 de_ucstring *ext = NULL;
95 dbuf_read_to_ucstring(c->infile, pos, 8, md->fn, 0, d->input_encoding);
96 ucstring_strip_trailing_spaces(md->fn);
97 if(md->fn->len==0) {
98 ucstring_append_char(md->fn, '_');
101 ext = ucstring_create(c);
102 dbuf_read_to_ucstring(c->infile, pos+8, 3, ext, 0, d->input_encoding);
103 ucstring_strip_trailing_spaces(ext);
104 if(ext->len>0) {
105 ucstring_append_char(md->fn, '.');
106 ucstring_append_ucstring(md->fn, ext);
109 ucstring_destroy(ext);
112 static void handle_timestamp(deark *c, lctx *d, i64 date_raw, i64 time_raw,
113 struct de_timestamp *ts, const char *name)
115 i64 ut;
116 char timestamp_buf[64];
118 if(date_raw==0) {
119 de_dbg(c, "%s: [not set]", name);
120 return;
123 // Day 0 is Dec 31, 1977 (or it would be, if 0 weren't reserved).
124 // Difference from Unix time (Jan 1, 1970) =
125 // 365 days in 1970, 1971, 1973, 1974, 1975
126 // + 366 days in 1972, 1976
127 // + 364 days in 1977.
128 ut = 86400 * (date_raw + (365*5 + 366*2 + 364));
130 // Time of day is in DOS format.
131 ut += 3600*(time_raw>>11); // hours
132 ut += 60*(time_raw&0x07e0)>>5; // minutes
133 ut += 2*(time_raw&0x001f); // seconds
134 de_unix_time_to_timestamp(ut, ts, 0);
135 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
136 de_dbg(c, "%s: %s", name, timestamp_buf);
139 static void on_bad_dir(deark *c)
141 de_err(c, "Bad directory. This is probably not an LBR file.");
144 // Returns nonzero if we can continue.
145 // if is_dir, sets d->dir_len_in_bytes.
146 static int do_entry(deark *c, lctx *d, i64 pos1, int is_dir)
148 int retval = 0;
149 int saved_indent_level;
150 struct member_data *md = NULL;
151 i64 crdate, chdate, crtime, chtime;
153 de_dbg_indent_save(c, &saved_indent_level);
154 md = de_malloc(c, sizeof(struct member_data));
155 md->is_dir = is_dir;
157 de_dbg(c, "%s entry at %"I64_FMT, (md->is_dir?"dir":"file"), pos1);
158 de_dbg_indent(c, 1);
160 md->status = de_getbyte(pos1);
161 de_dbg(c, "status: 0x%02x", (UI)md->status);
162 if(md->is_dir && md->status!=0x00) {
163 on_bad_dir(c);
164 goto done;
166 if(md->status==0xff) { // unused entry - marks end of directory
167 goto done;
169 if(md->status!=0x00) { // deleted entry (should be 0xfe)
170 de_dbg(c, "[deleted]");
171 retval = 1;
172 goto done;
175 md->fn = ucstring_create(c);
176 if(!md->is_dir) {
177 read_8_3_filename(c, d, md, pos1+1);
178 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->fn));
181 md->pos_in_sectors = de_getu16le(pos1+12);
182 md->pos_in_bytes = md->pos_in_sectors * LBR_SECTOR_SIZE;
183 de_dbg(c, "data offset: %"I64_FMT" (sector %"I64_FMT")", md->pos_in_bytes, md->pos_in_sectors);
184 if(md->is_dir && md->pos_in_bytes!=pos1) {
185 on_bad_dir(c);
186 goto done;
189 md->len_in_sectors = de_getu16le(pos1+14);
190 de_dbg(c, "length in sectors: %"I64_FMT, md->len_in_sectors);
192 md->crc_reported = (u32)de_getu16le(pos1+16);
193 de_dbg(c, "crc (reported): 0x%04x", (UI)md->crc_reported);
195 // 18-25: timestamps - TODO
196 crdate = de_getu16le(pos1+18);
197 chdate = de_getu16le(pos1+20);
198 crtime = de_getu16le(pos1+22);
199 chtime = de_getu16le(pos1+24);
200 handle_timestamp(c, d, crdate, crtime, &md->create_timestamp, "creation time");
201 handle_timestamp(c, d, chdate, chtime, &md->change_timestamp, "last changed time");
203 md->pad_count = de_getbyte(pos1+26);
204 de_dbg(c, "pad count: %u", (UI)md->pad_count);
205 if(md->pad_count>=LBR_SECTOR_SIZE || md->len_in_sectors<1) {
206 md->pad_count = 0;
209 md->len_in_bytes_withpadding = md->len_in_sectors*LBR_SECTOR_SIZE;
210 md->len_in_bytes_nopadding = md->len_in_bytes_withpadding - (i64)md->pad_count;
211 de_dbg(c, "length in bytes: %"I64_FMT, md->len_in_bytes_nopadding);
213 if(md->pos_in_bytes + md->len_in_bytes_nopadding > c->infile->len) {
214 de_err(c, "Unexpected end of file");
215 if(!md->is_dir) {
216 retval = 1;
218 goto done;
221 if(md->is_dir) {
222 d->dir_len_in_bytes = md->len_in_bytes_nopadding;
224 retval = 1;
226 do_extract_member(c, d, md);
228 done:
229 if(md) {
230 ucstring_destroy(md->fn);
231 de_free(c, md);
233 de_dbg_indent_restore(c, saved_indent_level);
234 return retval;
237 static void de_run_lbr(deark *c, de_module_params *mparams)
239 lctx *d = NULL;
240 i64 pos = 0;
242 d = de_malloc(c, sizeof(lctx));
243 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
245 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_XMODEM);
247 // Read directory
248 if(!do_entry(c, d, pos, 1)) goto done;
249 pos += LBR_DIRENT_SIZE;
251 // Read member files
252 while(pos+LBR_DIRENT_SIZE <= c->infile->len &&
253 pos+LBR_DIRENT_SIZE <= d->dir_len_in_bytes)
255 if(!do_entry(c, d, pos, 0)) goto done;
256 pos += LBR_DIRENT_SIZE;
259 done:
260 if(d) {
261 de_crcobj_destroy(d->crco);
262 de_free(c, d);
266 static int de_identify_lbr(deark *c)
268 // TODO: Better detection is possible
269 if(!dbuf_memcmp(c->infile, 0, "\x00\x20\x20\x20\x20\x20\x20\x20\x20"
270 "\x20\x20\x20\x00\x00", 14))
271 return 100;
272 return 0;
275 void de_module_lbr(deark *c, struct deark_module_info *mi)
277 mi->id = "lbr";
278 mi->desc = "LBR archive";
279 mi->run_fn = de_run_lbr;
280 mi->identify_fn = de_identify_lbr;
283 ///////////////////////////////////////////////
284 // Squeeze - CP/M compressed file format
286 // For Crunch/CRLZH(/Squeeze?) filename fields
287 struct crcr_filename_data {
288 de_ucstring *fn;
289 de_ucstring *comment;
290 i64 size;
293 static int crcr_read_filename_etc(deark *c, i64 pos1, struct crcr_filename_data *fnd)
295 int retval = 0;
296 i64 pos = pos1;
297 enum crcrfnstate {
298 CRCRFNST_NEUTRAL, CRCRFNST_FILENAME, CRCRFNST_COMMENT, CRCRFNST_DATE
300 enum crcrfnstate state = CRCRFNST_FILENAME;
301 int found_dot = 0;
302 int extension_char_count = 0;
303 char attr_str[4] = "...";
304 static const char attr_codes[3] = {'R', 'S', 'A'};
305 int found_attr = 0;
307 // Note: Only ASCII can really be supported, because the characters are 7-bit.
308 // Normally, we'd use ucstring_append_bytes_ex() for something like this, but
309 // it's pointless here.
310 fnd->fn = ucstring_create(c);
312 while(1) {
313 u8 b1, b2;
315 // Note: CFX limits this entire field to about 80 bytes.
316 if(pos-pos1 > 300) goto done;
317 if(pos >= c->infile->len) goto done;
319 b1 = de_getbyte_p(&pos);
320 if(b1==0) {
321 break;
323 b2 = b1 & 0x7f;
325 if(b2==0x01) {
326 state = CRCRFNST_DATE; // TODO: Figure this field out
328 else if(state==CRCRFNST_FILENAME && b2=='[') {
329 state = CRCRFNST_COMMENT;
331 else if(state==CRCRFNST_FILENAME && extension_char_count>=3) {
332 state = CRCRFNST_NEUTRAL;
334 else if(state==CRCRFNST_FILENAME) {
335 ucstring_append_char(fnd->fn, (de_rune)b2);
336 if(found_dot) {
337 if(extension_char_count<3 && (b1 & 0x80)) {
338 // The CP/M low-level directory structure uses the high bit of
339 // the file extension bytes to store attributes. Some Crunch/
340 // CRLZH files do the same thing.
341 // CP/M also uses the high bit of the *filename*, for less-common
342 // attributes, but that doesn't seem possible here, because all 8
343 // bytes are not always stored.
344 found_attr = 1;
345 attr_str[extension_char_count] = attr_codes[extension_char_count];
347 extension_char_count++;
349 else {
350 if(b2=='.') found_dot = 1;
353 else if(state==CRCRFNST_COMMENT && b2==']') {
354 state = CRCRFNST_NEUTRAL;
356 else if(state==CRCRFNST_COMMENT) {
357 if(!fnd->comment) {
358 fnd->comment = ucstring_create(c);
360 ucstring_append_char(fnd->comment, (de_rune)b2);
364 ucstring_strip_trailing_spaces(fnd->fn);
365 fnd->size = pos - pos1;
366 retval = 1;
367 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(fnd->fn));
369 if(found_attr) {
370 de_dbg(c, "attribs: %s", attr_str);
373 if(fnd->comment) {
374 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(fnd->comment));
377 done:
378 return retval;
381 static void crcr_filename_data_freecontents(deark *c, struct crcr_filename_data *fnd)
383 ucstring_destroy(fnd->fn);
384 ucstring_destroy(fnd->comment);
387 struct squeeze_ctx {
388 u8 is_sq2;
389 de_encoding input_encoding;
390 struct crcr_filename_data fnd;
391 struct de_stringreaderdata *sq2_timestamp_string;
392 struct de_stringreaderdata *sq2_comment;
393 UI checksum_reported;
394 UI checksum_calc;
395 i64 cmpr_data_pos;
396 struct de_timestamp timestamp;
399 static void do_sqeeze_timestamp(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
401 UI cksum_calc = 0;
402 UI cksum_reported;
403 i64 pos = pos1;
404 i64 sig;
405 i64 dt_raw, tm_raw;
406 char timestamp_buf[64];
408 if(c->infile->len-pos1 < 8) return;
409 sig = de_getu16le_p(&pos);
410 if(sig != 0xff77) return;
411 dt_raw = de_getu16le_p(&pos);
412 tm_raw = de_getu16le_p(&pos);
413 cksum_reported = (UI)de_getu16le_p(&pos);
414 cksum_calc = (UI)de_calccrc_oneshot(c->infile, pos1, 6, DE_CRCOBJ_SUM_U16LE);
415 cksum_calc &= 0xffff;
416 if(cksum_calc != cksum_reported) return; // Presumably a false positive signature
418 de_dbg(c, "timestamp at %"I64_FMT, pos1);
419 de_dbg_indent(c, 1);
420 de_dos_datetime_to_timestamp(&sqctx->timestamp, dt_raw, tm_raw);
422 sqctx->timestamp.tzcode = DE_TZCODE_LOCAL;
423 de_timestamp_to_string(&sqctx->timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
424 de_dbg(c, "timestamp: %s", timestamp_buf);
426 de_dbg(c, "timestamp checksum (calculated): 0x%04x", cksum_calc);
427 de_dbg(c, "timestamp checksum (reported): 0x%04x", cksum_reported);
428 de_dbg_indent(c, -1);
431 static void read_squeeze_checksum(deark *c, struct squeeze_ctx *sqctx, i64 pos)
433 sqctx->checksum_reported = (u32)de_getu16le_p(&pos);
434 de_dbg(c, "checksum (reported): %u", (UI)sqctx->checksum_reported);
437 static int read_squeeze_headers(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
439 i64 pos = pos1;
440 int retval = 0;
442 read_squeeze_checksum(c, sqctx, pos);
443 pos += 2;
445 // I don't know the correct way to interpret the Squeeze filename field, if
446 // there even is such a way.
447 // Some Unsqueeze utilities accept it as-is, some truncate it after the third
448 // filename extension byte, some interpret it the same as Crunch format
449 // (including ignoring the high bit of every byte, for some reason).
450 // Doing it the Crunch way is probably safe.
451 if(!crcr_read_filename_etc(c, pos, &sqctx->fnd)) goto done;
452 pos += sqctx->fnd.size;
454 sqctx->cmpr_data_pos = pos;
455 retval = 1;
456 done:
457 if(!retval) {
458 de_err(c, "Malformed header");
460 return retval;
463 static int read_sq2_headers(deark *c, struct squeeze_ctx *sqctx, i64 pos1)
465 i64 pos = pos1;
466 u8 b;
467 int retval = 0;
469 if(!crcr_read_filename_etc(c, pos, &sqctx->fnd)) goto done;
470 pos += sqctx->fnd.size;
472 sqctx->sq2_timestamp_string = dbuf_read_string(c->infile, pos, 300, 300,
473 DE_CONVFLAG_STOP_AT_NUL, sqctx->input_encoding);
474 if(!sqctx->sq2_timestamp_string->found_nul) goto done;
475 de_dbg(c, "timestamp_string: \"%s\"", ucstring_getpsz_d(sqctx->sq2_timestamp_string->str));
476 pos += sqctx->sq2_timestamp_string->bytes_consumed;
478 sqctx->sq2_comment = dbuf_read_string(c->infile, pos, 300, 300,
479 DE_CONVFLAG_STOP_AT_NUL, sqctx->input_encoding);
480 if(!sqctx->sq2_comment->found_nul) goto done;
481 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(sqctx->sq2_comment->str));
482 pos += sqctx->sq2_comment->bytes_consumed;
484 b = de_getbyte_p(&pos);
485 if(b != 0x1a) goto done;
487 read_squeeze_checksum(c, sqctx, pos);
488 pos += 2;
490 pos += 4; // ?
492 sqctx->cmpr_data_pos = pos;
493 retval = 1;
495 done:
496 if(!retval) {
497 de_err(c, "Malformed header");
499 return retval;
502 static void de_run_squeeze(deark *c, de_module_params *mparams)
504 i64 pos = 0;
505 i64 n;
506 struct squeeze_ctx *sqctx = NULL;
507 de_finfo *fi = NULL;
508 struct de_crcobj *crco = NULL;
509 dbuf *outf_tmp = NULL;
510 dbuf *outf_final = NULL;
511 int saved_indent_level;
512 struct de_dfilter_in_params dcmpri;
513 struct de_dfilter_out_params dcmpro;
514 struct de_dfilter_results dres;
515 struct de_dcmpr_two_layer_params tlp;
517 de_dbg_indent_save(c, &saved_indent_level);
518 sqctx = de_malloc(c, sizeof(struct squeeze_ctx));
519 sqctx->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
521 n = de_getu16le_p(&pos);
522 if(n==0xff76) {
523 de_declare_fmt(c, "Squeezed");
525 else if(n==0xfffa) {
526 de_declare_fmt(c, "Squeeze v2 (SQ2)");
527 sqctx->is_sq2 = 1;
529 else {
530 de_dbg(c, "Not a Squeezed file");
531 goto done;
534 if(sqctx->is_sq2) {
535 if(!read_sq2_headers(c, sqctx, pos)) goto done;
537 else {
538 if(!read_squeeze_headers(c, sqctx, pos)) goto done;
541 pos = sqctx->cmpr_data_pos;
543 fi = de_finfo_create(c);
544 de_finfo_set_name_from_ucstring(c, fi, sqctx->fnd.fn, 0);
545 fi->original_filename_flag = 1;
547 de_dbg(c, "squeeze-compressed data at %"I64_FMT, pos);
548 de_dbg_indent(c, 1);
550 // We have to decompress the file before we can find the timestamp. That's
551 // why we decompress to a membuf.
552 outf_tmp = dbuf_create_membuf(c, 0, 0);
553 dbuf_enable_wbuffer(outf_tmp);
555 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
556 dcmpri.f = c->infile;
557 dcmpri.pos = pos;
558 dcmpri.len = c->infile->len - pos;
559 dcmpro.f = outf_tmp;
561 crco = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
562 dbuf_set_writelistener(outf_tmp, de_writelistener_for_crc, (void*)crco);
564 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
565 tlp.codec1_type1 = fmtutil_huff_squeeze_codectype1;
566 tlp.codec2 = dfilter_rle90_codec;
567 tlp.dcmpri = &dcmpri;
568 tlp.dcmpro = &dcmpro;
569 tlp.dres = &dres;
570 de_dfilter_decompress_two_layer(c, &tlp);
571 dbuf_flush(dcmpro.f);
573 if(dres.bytes_consumed_valid) {
574 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
575 dcmpri.pos+dres.bytes_consumed);
577 do_sqeeze_timestamp(c, sqctx, dcmpri.pos+dres.bytes_consumed);
578 if(sqctx->timestamp.is_valid) {
579 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = sqctx->timestamp;
583 outf_final = dbuf_create_output_file(c, NULL, fi, 0);
584 dbuf_copy(outf_tmp, 0, outf_tmp->len, outf_final);
586 if(dres.errcode) {
587 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
588 goto done;
591 sqctx->checksum_calc = de_crcobj_getval(crco);
592 sqctx->checksum_calc &= 0xffff;
593 de_dbg(c, "checksum (calculated): %u", (UI)sqctx->checksum_calc);
594 if(sqctx->checksum_calc != sqctx->checksum_reported) {
595 de_err(c, "Checksum error. Decompression probably failed.");
596 goto done;
599 done:
600 if(sqctx) {
601 crcr_filename_data_freecontents(c, &sqctx->fnd);
602 de_destroy_stringreaderdata(c, sqctx->sq2_timestamp_string);
603 de_destroy_stringreaderdata(c, sqctx->sq2_comment);
604 de_free(c, sqctx);
606 dbuf_close(outf_final);
607 dbuf_close(outf_tmp);
608 de_finfo_destroy(c, fi);
609 de_crcobj_destroy(crco);
610 de_dbg_indent_restore(c, saved_indent_level);
613 static int de_identify_squeeze(deark *c)
615 i64 id;
617 id = de_getu16le(0);
618 if(id==0xff76) return 70;
619 if(id==0xfffa) return 25; // SQ2
620 return 0;
623 void de_module_squeeze(deark *c, struct deark_module_info *mi)
625 mi->id = "squeeze";
626 mi->desc = "Squeeze (CP/M)";
627 mi->run_fn = de_run_squeeze;
628 mi->identify_fn = de_identify_squeeze;
631 ///////////////////////////////////////////////
632 // Crunch - CP/M compressed file format
634 struct crunch_ctx {
635 struct crcr_filename_data fnd;
636 u8 fmtver; // 1 or 2, 0 if unknown
637 u8 cksum_type;
638 UI checksum_reported;
639 UI checksum_calc;
642 static void decompress_crunch_v1(deark *c, struct crunch_ctx *crunchctx, i64 pos1)
644 de_finfo *fi = NULL;
645 dbuf *outf = NULL;
646 i64 pos = pos1;
647 struct de_crcobj *crco = NULL;
648 struct de_dfilter_in_params dcmpri;
649 struct de_dfilter_out_params dcmpro;
650 struct de_dfilter_results dres;
651 struct de_lzw_params delzwp;
652 struct de_dcmpr_two_layer_params tlp;
654 de_dbg_indent(c, 1);
655 fi = de_finfo_create(c);
656 de_finfo_set_name_from_ucstring(c, fi, crunchctx->fnd.fn, 0);
657 fi->original_filename_flag = 1;
659 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
660 dbuf_enable_wbuffer(outf);
661 crco = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
662 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)crco);
664 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
665 dcmpri.f = c->infile;
666 dcmpri.pos = pos;
667 dcmpri.len = c->infile->len - pos;
668 dcmpro.f = outf;
670 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
671 delzwp.fmt = DE_LZWFMT_ARC5;
672 delzwp.arc5_has_stop_code = 1;
674 de_zeromem(&tlp, sizeof(struct de_dcmpr_two_layer_params));
675 tlp.codec1_pushable = dfilter_lzw_codec;
676 tlp.codec1_private_params = (void*)&delzwp;
677 tlp.codec2 = dfilter_rle90_codec;
678 tlp.dcmpri = &dcmpri;
679 tlp.dcmpro = &dcmpro;
680 tlp.dres = &dres;
681 de_dfilter_decompress_two_layer(c, &tlp);
682 dbuf_flush(dcmpro.f);
684 if(dres.errcode) {
685 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
686 goto done;
689 if(dres.bytes_consumed_valid) {
690 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
691 dcmpri.pos+dres.bytes_consumed);
692 pos += dres.bytes_consumed;
694 if(crunchctx->cksum_type==0) {
695 crunchctx->checksum_calc = de_crcobj_getval(crco);
696 crunchctx->checksum_calc &= 0xffff;
697 crunchctx->checksum_reported = (UI)de_getu16le_p(&pos);
698 de_dbg(c, "checksum (calculated): %u", crunchctx->checksum_calc);
699 de_dbg(c, "checksum (reported): %u", crunchctx->checksum_reported);
700 if(crunchctx->checksum_calc != crunchctx->checksum_reported) {
701 de_err(c, "Checksum error. Decompression probably failed.");
702 goto done;
707 done:
708 de_finfo_destroy(c, fi);
709 dbuf_close(outf);
710 de_crcobj_destroy(crco);
711 de_dbg_indent(c, -1);
714 static void de_run_crunch(deark *c, de_module_params *mparams)
716 struct crunch_ctx *crunchctx = NULL;
717 i64 pos = 0;
718 u8 b;
719 u8 fmtver_raw;
720 const char *verstr;
722 crunchctx = de_malloc(c, sizeof(struct crunch_ctx));
724 pos += 2;
725 if(!crcr_read_filename_etc(c, pos, &crunchctx->fnd)) goto done;
726 pos += crunchctx->fnd.size;
728 b = de_getbyte_p(&pos);
729 de_dbg(c, "encoder version: 0x%02x", (UI)b);
731 fmtver_raw = de_getbyte_p(&pos);
732 if(fmtver_raw>=0x10 && fmtver_raw<=0x1f) {
733 crunchctx->fmtver = 1;
734 verstr = "old";
736 else if(fmtver_raw>=0x20 && fmtver_raw<=0x2f) {
737 crunchctx->fmtver = 2;
738 verstr = "new";
740 else {
741 verstr = "?";
743 de_dbg(c, "format version: 0x%02x (%s)", (UI)fmtver_raw, verstr);
744 if(crunchctx->fmtver!=0) {
745 de_declare_fmtf(c, "Crunch (v%d)", (int)crunchctx->fmtver);
748 crunchctx->cksum_type = de_getbyte_p(&pos);
749 de_dbg(c, "checksum type: 0x%02x (%s)", (UI)crunchctx->cksum_type,
750 (crunchctx->cksum_type==0?"standard":"?"));
752 b = de_getbyte_p(&pos);
753 de_dbg(c, "unused info byte: 0x%02x", (UI)b);
755 de_dbg(c, "compressed data at %"I64_FMT, pos);
756 if(crunchctx->fmtver==1) {
757 decompress_crunch_v1(c, crunchctx, pos);
759 else {
760 // v2 is by far the most common version, but it's not easy to support.
761 // We support v1, only because it's easy.
762 de_err(c, "This version of Crunch is not supported");
765 done:
766 if(crunchctx) {
767 crcr_filename_data_freecontents(c, &crunchctx->fnd);
768 de_free(c, crunchctx);
772 static int de_identify_crunch(deark *c)
774 i64 id;
776 id = de_getu16le(0);
777 if(id==0xfe76) return 70;
778 return 0;
781 void de_module_crunch(deark *c, struct deark_module_info *mi)
783 mi->id = "crunch";
784 mi->desc = "Crunch (CP/M)";
785 mi->run_fn = de_run_crunch;
786 mi->identify_fn = de_identify_crunch;
789 ///////////////////////////////////////////////
790 // CRLZH - CP/M compressed file format
792 struct crlzh_ctx {
793 struct crcr_filename_data fnd;
794 u8 fmtver; // 1 or 2, 0 if unknown
795 u8 cksum_type;
796 UI checksum_reported;
797 UI checksum_calc;
800 static void decompress_crlzh(deark *c, struct crlzh_ctx *crlzhctx, i64 pos1)
802 de_finfo *fi = NULL;
803 dbuf *outf = NULL;
804 i64 pos = pos1;
805 struct de_crcobj *crco = NULL;
806 struct de_dfilter_in_params dcmpri;
807 struct de_dfilter_out_params dcmpro;
808 struct de_dfilter_results dres;
809 struct de_lh1_params lh1p;
811 de_dbg_indent(c, 1);
812 fi = de_finfo_create(c);
813 de_finfo_set_name_from_ucstring(c, fi, crlzhctx->fnd.fn, 0);
814 fi->original_filename_flag = 1;
816 outf = dbuf_create_output_file(c, NULL, fi, 0x0);
817 dbuf_enable_wbuffer(outf);
818 crco = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
819 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)crco);
821 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
822 dcmpri.f = c->infile;
823 dcmpri.pos = pos;
824 dcmpri.len = c->infile->len - pos;
825 dcmpro.f = outf;
827 de_zeromem(&lh1p, sizeof(struct de_lh1_params));
828 if(crlzhctx->fmtver==1) {
829 lh1p.is_crlzh11 = 1;
831 else {
832 lh1p.is_crlzh20 = 1;
834 lh1p.history_fill_val = 0x20;
836 fmtutil_lh1_codectype1(c, &dcmpri, &dcmpro, &dres, (void*)&lh1p);
837 dbuf_flush(dcmpro.f);
839 if(dres.errcode) {
840 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
841 goto done;
844 if(dres.bytes_consumed_valid) {
845 de_dbg(c, "compressed data size: %"I64_FMT", ends at %"I64_FMT, dres.bytes_consumed,
846 dcmpri.pos+dres.bytes_consumed);
847 pos += dres.bytes_consumed;
849 if(crlzhctx->cksum_type==0) {
850 crlzhctx->checksum_calc = de_crcobj_getval(crco);
851 crlzhctx->checksum_calc &= 0xffff;
852 crlzhctx->checksum_reported = (UI)de_getu16le_p(&pos);
853 de_dbg(c, "checksum (calculated): %u", crlzhctx->checksum_calc);
854 de_dbg(c, "checksum (reported): %u", crlzhctx->checksum_reported);
855 if(crlzhctx->checksum_calc != crlzhctx->checksum_reported) {
856 de_err(c, "Checksum error. Decompression probably failed.");
857 goto done;
862 done:
863 de_finfo_destroy(c, fi);
864 dbuf_close(outf);
865 de_crcobj_destroy(crco);
866 de_dbg_indent(c, -1);
869 static void de_run_crlzh(deark *c, de_module_params *mparams)
871 struct crlzh_ctx *crlzhctx = NULL;
872 i64 pos = 0;
873 u8 b;
874 u8 fmtver_raw;
875 const char *verstr;
877 crlzhctx = de_malloc(c, sizeof(struct crlzh_ctx));
879 pos += 2;
880 if(!crcr_read_filename_etc(c, pos, &crlzhctx->fnd)) goto done;
881 pos += crlzhctx->fnd.size;
882 b = de_getbyte_p(&pos);
883 de_dbg(c, "encoder version: 0x%02x", (UI)b);
885 fmtver_raw = de_getbyte_p(&pos);
886 if(fmtver_raw<=0x1f) {
887 crlzhctx->fmtver = 1;
888 verstr = "old";
890 else if(fmtver_raw>=0x20 && fmtver_raw<=0x2f) {
891 // Note: Alternatives are ==0x20 (CFX), and >=0x20 (lbrate).
892 crlzhctx->fmtver = 2;
893 verstr = "new";
895 else {
896 verstr = "?";
898 de_dbg(c, "format version: 0x%02x (%s)", (UI)fmtver_raw, verstr);
899 if(crlzhctx->fmtver!=0) {
900 de_declare_fmtf(c, "CRLZH (v%d)", (int)crlzhctx->fmtver);
903 crlzhctx->cksum_type = de_getbyte_p(&pos);
904 de_dbg(c, "checksum type: 0x%02x (%s)", (UI)crlzhctx->cksum_type,
905 (crlzhctx->cksum_type==0?"standard":"?"));
907 b = de_getbyte_p(&pos);
908 de_dbg(c, "unused info byte: 0x%02x", (UI)b);
910 de_dbg(c, "compressed data at %"I64_FMT, pos);
911 decompress_crlzh(c, crlzhctx, pos);
913 done:
914 if(crlzhctx) {
915 crcr_filename_data_freecontents(c, &crlzhctx->fnd);
916 de_free(c, crlzhctx);
920 static int de_identify_crlzh(deark *c)
922 i64 id;
924 id = de_getu16le(0);
925 if(id==0xfd76) return 70;
926 return 0;
929 void de_module_crlzh(deark *c, struct deark_module_info *mi)
931 mi->id = "crlzh";
932 mi->desc = "CRLZH (CP/M)";
933 mi->run_fn = de_run_crlzh;
934 mi->identify_fn = de_identify_crlzh;
937 ///////////////////////////////////////////////
938 // ZSQ (ZSQUSQ)
939 // LZW compression utility by W. Chin, A. Kumar.
940 // Format used by v1.0, 1985-10-26.
942 #define CODE_WACK 0x5741434bU
944 struct zsq_ctx {
945 de_encoding input_encoding;
946 de_ucstring *fn;
947 UI checksum_reported;
948 UI checksum_calc;
949 struct de_timestamp timestamp;
952 static void do_zsq_decompress(deark *c, struct zsq_ctx *zsqctx, i64 pos, dbuf *outf)
954 struct de_crcobj *crco = NULL;
955 struct de_dfilter_in_params dcmpri;
956 struct de_dfilter_out_params dcmpro;
957 struct de_dfilter_results dres;
958 struct de_lzw_params delzwp;
960 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
961 delzwp.fmt = DE_LZWFMT_ARC5;
963 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
964 dcmpri.f = c->infile;
965 dcmpri.pos = pos;
966 dcmpri.len = c->infile->len - pos;
967 dcmpro.f = outf;
969 crco = de_crcobj_create(c, DE_CRCOBJ_SUM_BYTES);
970 dbuf_set_writelistener(outf, de_writelistener_for_crc, (void*)crco);
972 fmtutil_decompress_lzw(c, &dcmpri, &dcmpro, &dres, &delzwp);
973 dbuf_flush(outf);
975 zsqctx->checksum_calc = de_crcobj_getval(crco);
976 zsqctx->checksum_calc &= 0xffff;
977 de_dbg(c, "checksum (calculated): %u", (UI)zsqctx->checksum_calc);
978 if(zsqctx->checksum_calc != zsqctx->checksum_reported) {
979 de_err(c, "Checksum error. Decompression probably failed.");
981 de_crcobj_destroy(crco);
984 static void zsq_read_timestamp(deark *c, struct zsq_ctx *zsqctx, i64 pos)
986 i64 dt_raw, tm_raw;
987 char timestamp_buf[64];
989 dt_raw = de_getu16le(pos);
990 tm_raw = de_getu16le(pos+2);
991 de_dos_datetime_to_timestamp(&zsqctx->timestamp, dt_raw, tm_raw);
992 de_timestamp_to_string(&zsqctx->timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
993 de_dbg(c, "timestamp: %s", timestamp_buf);
996 static void de_run_zsq(deark *c, de_module_params *mparams)
998 struct zsq_ctx *zsqctx = NULL;
999 i64 pos = 0;
1000 i64 hdr_len;
1001 i64 hdr_endpos;
1002 u32 id;
1003 dbuf *outf = NULL;
1004 de_finfo *fi = NULL;
1006 zsqctx = de_malloc(c, sizeof(struct zsq_ctx));
1007 zsqctx->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1009 id = (u32)de_getu32be_p(&pos);
1010 if(id != CODE_WACK) {
1011 de_err(c, "Not a ZSQ file");
1012 goto done;
1015 fi = de_finfo_create(c);
1017 zsqctx->checksum_reported = (u32)de_getu16le_p(&pos);
1018 de_dbg(c, "checksum (reported): %u", (UI)zsqctx->checksum_reported);
1020 hdr_len = de_getu16le_p(&pos);
1021 hdr_endpos = pos + hdr_len;
1022 if(hdr_endpos > c->infile->len) {
1023 de_err(c, "Bad header length");
1024 goto done;
1027 zsq_read_timestamp(c, zsqctx, pos);
1028 pos += 4;
1030 zsqctx->fn = ucstring_create(c);
1031 dbuf_read_to_ucstring_n(c->infile, pos, hdr_endpos-pos, 255, zsqctx->fn,
1032 DE_CONVFLAG_STOP_AT_NUL, zsqctx->input_encoding);
1033 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(zsqctx->fn));
1035 de_finfo_set_name_from_ucstring(c, fi, zsqctx->fn, 0);
1036 fi->original_filename_flag = 1;
1038 pos = hdr_endpos;
1039 de_dbg(c, "compressed data at %"I64_FMT, pos);
1041 outf = dbuf_create_output_file(c, NULL, fi, 0);
1042 dbuf_enable_wbuffer(outf);
1044 do_zsq_decompress(c, zsqctx, pos, outf);
1046 done:
1047 dbuf_close(outf);
1048 de_finfo_destroy(c, fi);
1049 if(zsqctx) {
1050 ucstring_destroy(zsqctx->fn);
1051 de_free(c, zsqctx);
1055 static int de_identify_zsq(deark *c)
1057 if(de_getu32be(0)==CODE_WACK) {
1058 return 90;
1060 return 0;
1063 void de_module_zsq(deark *c, struct deark_module_info *mi)
1065 mi->id = "zsq";
1066 mi->desc = "ZSQ (ZSQUSQ, LZW-compressed file)";
1067 mi->run_fn = de_run_zsq;
1068 mi->identify_fn = de_identify_zsq;
1071 // **************************************************************************
1072 // LZWCOM
1073 // **************************************************************************
1075 struct lzwcom_ctx {
1076 int ver; // 1, 2, or -1 if unknown
1077 struct de_crcobj *crco;
1080 static void lzwcom_detect_version(deark *c, struct lzwcom_ctx *d)
1082 u32 crc_reported, crc_calc;
1084 if(c->infile->len < 1026) {
1085 d->ver = -1;
1086 return;
1089 de_crcobj_reset(d->crco);
1090 de_crcobj_addslice(d->crco, c->infile, 0, 1024);
1091 crc_calc = de_crcobj_getval(d->crco); // Field only exists in v2 format
1092 crc_reported = (u32)de_getu16le(1024);
1093 if(crc_reported==crc_calc) {
1094 d->ver = 2;
1096 else {
1097 d->ver = 1;
1101 static void de_run_lzwcom(deark *c, de_module_params *mparams)
1103 struct lzwcom_ctx *d = NULL;
1104 struct de_dfilter_ctx *dfctx = NULL;
1105 dbuf *outf = NULL;
1106 struct de_dfilter_out_params dcmpro;
1107 struct de_dfilter_results dres;
1108 struct de_lzw_params delzwp;
1109 int errflag = 0;
1110 i64 pos = 0;
1111 const char *s;
1112 u8 *rbuf = NULL;
1114 d = de_malloc(c, sizeof(struct lzwcom_ctx));
1115 d->ver = -1;
1116 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_ARC);
1118 s = de_get_ext_option(c, "lzwcom:version");
1119 if(s) {
1120 d->ver = de_atoi(s);
1122 if(d->ver>=2) d->ver = 2;
1123 else if(d->ver!=1) d->ver = -1;
1125 if(d->ver == -1) {
1126 lzwcom_detect_version(c, d);
1128 if(d->ver != -1) {
1129 de_declare_fmtf(c, "LZWCOM v%d", d->ver);
1131 else {
1132 de_declare_fmt(c, "LZWCOM (unknown version)");
1135 outf = dbuf_create_output_file(c, "unc", NULL, 0);
1136 dbuf_enable_wbuffer(outf);
1137 de_dfilter_init_objects(c, NULL, &dcmpro, &dres);
1138 dcmpro.f = outf;
1140 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
1141 delzwp.fmt = DE_LZWFMT_ARC5;
1142 delzwp.flags |= DE_LZWFLAG_TOLERATETRAILINGJUNK;
1143 dfctx = de_dfilter_create(c, dfilter_lzw_codec, (void*)&delzwp, &dcmpro, &dres);
1144 rbuf = de_malloc(c, 1024);
1146 while(1) {
1147 i64 block_dlen;
1148 i64 block_pos = pos;
1150 if(dres.errcode) break;
1151 if(dfctx->finished_flag) break;
1152 if(pos >= c->infile->len) break;
1153 block_dlen = de_min_int(1024, c->infile->len - pos);
1155 if(d->ver==2) {
1156 de_dbg(c, "block at %"I64_FMT", dlen=%"I64_FMT, block_pos, block_dlen);
1159 dbuf_read(c->infile, rbuf, pos, block_dlen);
1160 de_dfilter_addbuf(dfctx, rbuf, block_dlen);
1162 // Oddly, this format includes CRCs of the *compressed* bytes, instead of
1163 // of the decompressed bytes. So it doesn't detect incorrect decompression.
1164 if(d->ver==2) {
1165 de_crcobj_reset(d->crco);
1166 de_crcobj_addbuf(d->crco, rbuf, block_dlen);
1169 pos += block_dlen;
1171 if(d->ver==2) {
1172 u32 crc_reported, crc_calc;
1174 if(c->infile->len - pos < 2) break;
1175 crc_calc = de_crcobj_getval(d->crco);
1176 crc_reported = (u32)de_getu16le_p(&pos);
1177 de_dbg_indent(c, 1);
1178 de_dbg(c, "crc (calculated): 0x%04x", (UI)crc_calc);
1179 de_dbg(c, "crc (reported): 0x%04x", (UI)crc_reported);
1180 de_dbg_indent(c,- 1);
1181 if(!errflag && crc_calc!=crc_reported) {
1182 de_warn(c, "CRC check failed at %"I64_FMT". This might not be an LZWCOM v2 file.", pos-2);
1183 errflag = 1;
1188 de_dfilter_finish(dfctx);
1189 dbuf_flush(outf);
1190 if(dres.errcode) {
1191 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
1194 de_dfilter_destroy(dfctx);
1195 dbuf_close(outf);
1196 if(d) {
1197 de_crcobj_destroy(d->crco);
1198 de_free(c, d);
1200 de_free(c, rbuf);
1203 static void de_help_lzwcom(deark *c)
1205 de_msg(c, "-opt lzwcom:version=<1|2> : The format version");
1208 void de_module_lzwcom(deark *c, struct deark_module_info *mi)
1210 mi->id = "lzwcom";
1211 mi->desc = "LZWCOM compressed file";
1212 mi->run_fn = de_run_lzwcom;
1213 mi->identify_fn = NULL;
1214 mi->help_fn = de_help_lzwcom;