1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // LBR - uncompressed CP/M archive format
6 // Squeeze compressed file
7 // Crunch v1 compressed file
8 // CRLZH compressed file
10 // LZWCOM compressed file
12 #include <deark-private.h>
13 #include <deark-fmtutil.h>
14 DE_DECLARE_MODULE(de_module_lbr
);
15 DE_DECLARE_MODULE(de_module_squeeze
);
16 DE_DECLARE_MODULE(de_module_crunch
);
17 DE_DECLARE_MODULE(de_module_crlzh
);
18 DE_DECLARE_MODULE(de_module_zsq
);
19 DE_DECLARE_MODULE(de_module_lzwcom
);
21 #define LBR_DIRENT_SIZE 32
22 #define LBR_SECTOR_SIZE 128
33 i64 len_in_bytes_withpadding
;
34 i64 len_in_bytes_nopadding
;
36 struct de_timestamp create_timestamp
;
37 struct de_timestamp change_timestamp
;
40 typedef struct localctx_struct
{
41 de_encoding input_encoding
;
43 struct de_crcobj
*crco
;
46 static void do_extract_member(deark
*c
, lctx
*d
, struct member_data
*md
)
51 fi
= de_finfo_create(c
);
57 de_finfo_set_name_from_ucstring(c
, fi
, md
->fn
, 0);
58 fi
->original_filename_flag
= 1;
61 if(md
->create_timestamp
.is_valid
) {
62 fi
->timestamp
[DE_TIMESTAMPIDX_CREATE
] = md
->create_timestamp
;
64 if(md
->change_timestamp
.is_valid
) {
65 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->change_timestamp
;
68 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
70 de_crcobj_reset(d
->crco
);
72 de_crcobj_addslice(d
->crco
, c
->infile
, md
->pos_in_bytes
, 16);
73 de_crcobj_addzeroes(d
->crco
, 2); // The 2-byte CRC field
74 de_crcobj_addslice(d
->crco
, c
->infile
, md
->pos_in_bytes
+18, md
->len_in_bytes_withpadding
-18);
77 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)d
->crco
);
78 dbuf_copy(c
->infile
, md
->pos_in_bytes
, md
->len_in_bytes_nopadding
, outf
);
79 // CRC calculation includes padding bytes:
80 de_crcobj_addslice(d
->crco
, c
->infile
,
81 md
->pos_in_bytes
+ md
->len_in_bytes_nopadding
,
82 md
->len_in_bytes_withpadding
- md
->len_in_bytes_nopadding
);
84 md
->crc_calc
= de_crcobj_getval(d
->crco
);
85 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)md
->crc_calc
);
87 de_finfo_destroy(c
, fi
);
91 static void read_8_3_filename(deark
*c
, lctx
*d
, struct member_data
*md
, i64 pos
)
93 de_ucstring
*ext
= NULL
;
95 dbuf_read_to_ucstring(c
->infile
, pos
, 8, md
->fn
, 0, d
->input_encoding
);
96 ucstring_strip_trailing_spaces(md
->fn
);
98 ucstring_append_char(md
->fn
, '_');
101 ext
= ucstring_create(c
);
102 dbuf_read_to_ucstring(c
->infile
, pos
+8, 3, ext
, 0, d
->input_encoding
);
103 ucstring_strip_trailing_spaces(ext
);
105 ucstring_append_char(md
->fn
, '.');
106 ucstring_append_ucstring(md
->fn
, ext
);
109 ucstring_destroy(ext
);
112 static void handle_timestamp(deark
*c
, lctx
*d
, i64 date_raw
, i64 time_raw
,
113 struct de_timestamp
*ts
, const char *name
)
116 char timestamp_buf
[64];
119 de_dbg(c
, "%s: [not set]", name
);
123 // Day 0 is Dec 31, 1977 (or it would be, if 0 weren't reserved).
124 // Difference from Unix time (Jan 1, 1970) =
125 // 365 days in 1970, 1971, 1973, 1974, 1975
126 // + 366 days in 1972, 1976
127 // + 364 days in 1977.
128 ut
= 86400 * (date_raw
+ (365*5 + 366*2 + 364));
130 // Time of day is in DOS format.
131 ut
+= 3600*(time_raw
>>11); // hours
132 ut
+= 60*(time_raw
&0x07e0)>>5; // minutes
133 ut
+= 2*(time_raw
&0x001f); // seconds
134 de_unix_time_to_timestamp(ut
, ts
, 0);
135 de_timestamp_to_string(ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
136 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
139 static void on_bad_dir(deark
*c
)
141 de_err(c
, "Bad directory. This is probably not an LBR file.");
144 // Returns nonzero if we can continue.
145 // if is_dir, sets d->dir_len_in_bytes.
146 static int do_entry(deark
*c
, lctx
*d
, i64 pos1
, int is_dir
)
149 int saved_indent_level
;
150 struct member_data
*md
= NULL
;
151 i64 crdate
, chdate
, crtime
, chtime
;
153 de_dbg_indent_save(c
, &saved_indent_level
);
154 md
= de_malloc(c
, sizeof(struct member_data
));
157 de_dbg(c
, "%s entry at %"I64_FMT
, (md
->is_dir
?"dir":"file"), pos1
);
160 md
->status
= de_getbyte(pos1
);
161 de_dbg(c
, "status: 0x%02x", (UI
)md
->status
);
162 if(md
->is_dir
&& md
->status
!=0x00) {
166 if(md
->status
==0xff) { // unused entry - marks end of directory
169 if(md
->status
!=0x00) { // deleted entry (should be 0xfe)
170 de_dbg(c
, "[deleted]");
175 md
->fn
= ucstring_create(c
);
177 read_8_3_filename(c
, d
, md
, pos1
+1);
178 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(md
->fn
));
181 md
->pos_in_sectors
= de_getu16le(pos1
+12);
182 md
->pos_in_bytes
= md
->pos_in_sectors
* LBR_SECTOR_SIZE
;
183 de_dbg(c
, "data offset: %"I64_FMT
" (sector %"I64_FMT
")", md
->pos_in_bytes
, md
->pos_in_sectors
);
184 if(md
->is_dir
&& md
->pos_in_bytes
!=pos1
) {
189 md
->len_in_sectors
= de_getu16le(pos1
+14);
190 de_dbg(c
, "length in sectors: %"I64_FMT
, md
->len_in_sectors
);
192 md
->crc_reported
= (u32
)de_getu16le(pos1
+16);
193 de_dbg(c
, "crc (reported): 0x%04x", (UI
)md
->crc_reported
);
195 // 18-25: timestamps - TODO
196 crdate
= de_getu16le(pos1
+18);
197 chdate
= de_getu16le(pos1
+20);
198 crtime
= de_getu16le(pos1
+22);
199 chtime
= de_getu16le(pos1
+24);
200 handle_timestamp(c
, d
, crdate
, crtime
, &md
->create_timestamp
, "creation time");
201 handle_timestamp(c
, d
, chdate
, chtime
, &md
->change_timestamp
, "last changed time");
203 md
->pad_count
= de_getbyte(pos1
+26);
204 de_dbg(c
, "pad count: %u", (UI
)md
->pad_count
);
205 if(md
->pad_count
>=LBR_SECTOR_SIZE
|| md
->len_in_sectors
<1) {
209 md
->len_in_bytes_withpadding
= md
->len_in_sectors
*LBR_SECTOR_SIZE
;
210 md
->len_in_bytes_nopadding
= md
->len_in_bytes_withpadding
- (i64
)md
->pad_count
;
211 de_dbg(c
, "length in bytes: %"I64_FMT
, md
->len_in_bytes_nopadding
);
213 if(md
->pos_in_bytes
+ md
->len_in_bytes_nopadding
> c
->infile
->len
) {
214 de_err(c
, "Unexpected end of file");
222 d
->dir_len_in_bytes
= md
->len_in_bytes_nopadding
;
226 do_extract_member(c
, d
, md
);
230 ucstring_destroy(md
->fn
);
233 de_dbg_indent_restore(c
, saved_indent_level
);
237 static void de_run_lbr(deark
*c
, de_module_params
*mparams
)
242 d
= de_malloc(c
, sizeof(lctx
));
243 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_ASCII
);
245 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_XMODEM
);
248 if(!do_entry(c
, d
, pos
, 1)) goto done
;
249 pos
+= LBR_DIRENT_SIZE
;
252 while(pos
+LBR_DIRENT_SIZE
<= c
->infile
->len
&&
253 pos
+LBR_DIRENT_SIZE
<= d
->dir_len_in_bytes
)
255 if(!do_entry(c
, d
, pos
, 0)) goto done
;
256 pos
+= LBR_DIRENT_SIZE
;
261 de_crcobj_destroy(d
->crco
);
266 static int de_identify_lbr(deark
*c
)
268 // TODO: Better detection is possible
269 if(!dbuf_memcmp(c
->infile
, 0, "\x00\x20\x20\x20\x20\x20\x20\x20\x20"
270 "\x20\x20\x20\x00\x00", 14))
275 void de_module_lbr(deark
*c
, struct deark_module_info
*mi
)
278 mi
->desc
= "LBR archive";
279 mi
->run_fn
= de_run_lbr
;
280 mi
->identify_fn
= de_identify_lbr
;
283 ///////////////////////////////////////////////
284 // Squeeze - CP/M compressed file format
286 // For Crunch/CRLZH(/Squeeze?) filename fields
287 struct crcr_filename_data
{
289 de_ucstring
*comment
;
293 static int crcr_read_filename_etc(deark
*c
, i64 pos1
, struct crcr_filename_data
*fnd
)
298 CRCRFNST_NEUTRAL
, CRCRFNST_FILENAME
, CRCRFNST_COMMENT
, CRCRFNST_DATE
300 enum crcrfnstate state
= CRCRFNST_FILENAME
;
302 int extension_char_count
= 0;
303 char attr_str
[4] = "...";
304 static const char attr_codes
[3] = {'R', 'S', 'A'};
307 // Note: Only ASCII can really be supported, because the characters are 7-bit.
308 // Normally, we'd use ucstring_append_bytes_ex() for something like this, but
309 // it's pointless here.
310 fnd
->fn
= ucstring_create(c
);
315 // Note: CFX limits this entire field to about 80 bytes.
316 if(pos
-pos1
> 300) goto done
;
317 if(pos
>= c
->infile
->len
) goto done
;
319 b1
= de_getbyte_p(&pos
);
326 state
= CRCRFNST_DATE
; // TODO: Figure this field out
328 else if(state
==CRCRFNST_FILENAME
&& b2
=='[') {
329 state
= CRCRFNST_COMMENT
;
331 else if(state
==CRCRFNST_FILENAME
&& extension_char_count
>=3) {
332 state
= CRCRFNST_NEUTRAL
;
334 else if(state
==CRCRFNST_FILENAME
) {
335 ucstring_append_char(fnd
->fn
, (de_rune
)b2
);
337 if(extension_char_count
<3 && (b1
& 0x80)) {
338 // The CP/M low-level directory structure uses the high bit of
339 // the file extension bytes to store attributes. Some Crunch/
340 // CRLZH files do the same thing.
341 // CP/M also uses the high bit of the *filename*, for less-common
342 // attributes, but that doesn't seem possible here, because all 8
343 // bytes are not always stored.
345 attr_str
[extension_char_count
] = attr_codes
[extension_char_count
];
347 extension_char_count
++;
350 if(b2
=='.') found_dot
= 1;
353 else if(state
==CRCRFNST_COMMENT
&& b2
==']') {
354 state
= CRCRFNST_NEUTRAL
;
356 else if(state
==CRCRFNST_COMMENT
) {
358 fnd
->comment
= ucstring_create(c
);
360 ucstring_append_char(fnd
->comment
, (de_rune
)b2
);
364 ucstring_strip_trailing_spaces(fnd
->fn
);
365 fnd
->size
= pos
- pos1
;
367 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(fnd
->fn
));
370 de_dbg(c
, "attribs: %s", attr_str
);
374 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz_d(fnd
->comment
));
381 static void crcr_filename_data_freecontents(deark
*c
, struct crcr_filename_data
*fnd
)
383 ucstring_destroy(fnd
->fn
);
384 ucstring_destroy(fnd
->comment
);
389 de_encoding input_encoding
;
390 struct crcr_filename_data fnd
;
391 struct de_stringreaderdata
*sq2_timestamp_string
;
392 struct de_stringreaderdata
*sq2_comment
;
393 UI checksum_reported
;
396 struct de_timestamp timestamp
;
399 static void do_sqeeze_timestamp(deark
*c
, struct squeeze_ctx
*sqctx
, i64 pos1
)
406 char timestamp_buf
[64];
408 if(c
->infile
->len
-pos1
< 8) return;
409 sig
= de_getu16le_p(&pos
);
410 if(sig
!= 0xff77) return;
411 dt_raw
= de_getu16le_p(&pos
);
412 tm_raw
= de_getu16le_p(&pos
);
413 cksum_reported
= (UI
)de_getu16le_p(&pos
);
414 cksum_calc
= (UI
)de_calccrc_oneshot(c
->infile
, pos1
, 6, DE_CRCOBJ_SUM_U16LE
);
415 cksum_calc
&= 0xffff;
416 if(cksum_calc
!= cksum_reported
) return; // Presumably a false positive signature
418 de_dbg(c
, "timestamp at %"I64_FMT
, pos1
);
420 de_dos_datetime_to_timestamp(&sqctx
->timestamp
, dt_raw
, tm_raw
);
422 sqctx
->timestamp
.tzcode
= DE_TZCODE_LOCAL
;
423 de_timestamp_to_string(&sqctx
->timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
424 de_dbg(c
, "timestamp: %s", timestamp_buf
);
426 de_dbg(c
, "timestamp checksum (calculated): 0x%04x", cksum_calc
);
427 de_dbg(c
, "timestamp checksum (reported): 0x%04x", cksum_reported
);
428 de_dbg_indent(c
, -1);
431 static void read_squeeze_checksum(deark
*c
, struct squeeze_ctx
*sqctx
, i64 pos
)
433 sqctx
->checksum_reported
= (u32
)de_getu16le_p(&pos
);
434 de_dbg(c
, "checksum (reported): %u", (UI
)sqctx
->checksum_reported
);
437 static int read_squeeze_headers(deark
*c
, struct squeeze_ctx
*sqctx
, i64 pos1
)
442 read_squeeze_checksum(c
, sqctx
, pos
);
445 // I don't know the correct way to interpret the Squeeze filename field, if
446 // there even is such a way.
447 // Some Unsqueeze utilities accept it as-is, some truncate it after the third
448 // filename extension byte, some interpret it the same as Crunch format
449 // (including ignoring the high bit of every byte, for some reason).
450 // Doing it the Crunch way is probably safe.
451 if(!crcr_read_filename_etc(c
, pos
, &sqctx
->fnd
)) goto done
;
452 pos
+= sqctx
->fnd
.size
;
454 sqctx
->cmpr_data_pos
= pos
;
458 de_err(c
, "Malformed header");
463 static int read_sq2_headers(deark
*c
, struct squeeze_ctx
*sqctx
, i64 pos1
)
469 if(!crcr_read_filename_etc(c
, pos
, &sqctx
->fnd
)) goto done
;
470 pos
+= sqctx
->fnd
.size
;
472 sqctx
->sq2_timestamp_string
= dbuf_read_string(c
->infile
, pos
, 300, 300,
473 DE_CONVFLAG_STOP_AT_NUL
, sqctx
->input_encoding
);
474 if(!sqctx
->sq2_timestamp_string
->found_nul
) goto done
;
475 de_dbg(c
, "timestamp_string: \"%s\"", ucstring_getpsz_d(sqctx
->sq2_timestamp_string
->str
));
476 pos
+= sqctx
->sq2_timestamp_string
->bytes_consumed
;
478 sqctx
->sq2_comment
= dbuf_read_string(c
->infile
, pos
, 300, 300,
479 DE_CONVFLAG_STOP_AT_NUL
, sqctx
->input_encoding
);
480 if(!sqctx
->sq2_comment
->found_nul
) goto done
;
481 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz_d(sqctx
->sq2_comment
->str
));
482 pos
+= sqctx
->sq2_comment
->bytes_consumed
;
484 b
= de_getbyte_p(&pos
);
485 if(b
!= 0x1a) goto done
;
487 read_squeeze_checksum(c
, sqctx
, pos
);
492 sqctx
->cmpr_data_pos
= pos
;
497 de_err(c
, "Malformed header");
502 static void de_run_squeeze(deark
*c
, de_module_params
*mparams
)
506 struct squeeze_ctx
*sqctx
= NULL
;
508 struct de_crcobj
*crco
= NULL
;
509 dbuf
*outf_tmp
= NULL
;
510 dbuf
*outf_final
= NULL
;
511 int saved_indent_level
;
512 struct de_dfilter_in_params dcmpri
;
513 struct de_dfilter_out_params dcmpro
;
514 struct de_dfilter_results dres
;
515 struct de_dcmpr_two_layer_params tlp
;
517 de_dbg_indent_save(c
, &saved_indent_level
);
518 sqctx
= de_malloc(c
, sizeof(struct squeeze_ctx
));
519 sqctx
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
521 n
= de_getu16le_p(&pos
);
523 de_declare_fmt(c
, "Squeezed");
526 de_declare_fmt(c
, "Squeeze v2 (SQ2)");
530 de_dbg(c
, "Not a Squeezed file");
535 if(!read_sq2_headers(c
, sqctx
, pos
)) goto done
;
538 if(!read_squeeze_headers(c
, sqctx
, pos
)) goto done
;
541 pos
= sqctx
->cmpr_data_pos
;
543 fi
= de_finfo_create(c
);
544 de_finfo_set_name_from_ucstring(c
, fi
, sqctx
->fnd
.fn
, 0);
545 fi
->original_filename_flag
= 1;
547 de_dbg(c
, "squeeze-compressed data at %"I64_FMT
, pos
);
550 // We have to decompress the file before we can find the timestamp. That's
551 // why we decompress to a membuf.
552 outf_tmp
= dbuf_create_membuf(c
, 0, 0);
553 dbuf_enable_wbuffer(outf_tmp
);
555 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
556 dcmpri
.f
= c
->infile
;
558 dcmpri
.len
= c
->infile
->len
- pos
;
561 crco
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
562 dbuf_set_writelistener(outf_tmp
, de_writelistener_for_crc
, (void*)crco
);
564 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
565 tlp
.codec1_type1
= fmtutil_huff_squeeze_codectype1
;
566 tlp
.codec2
= dfilter_rle90_codec
;
567 tlp
.dcmpri
= &dcmpri
;
568 tlp
.dcmpro
= &dcmpro
;
570 de_dfilter_decompress_two_layer(c
, &tlp
);
571 dbuf_flush(dcmpro
.f
);
573 if(dres
.bytes_consumed_valid
) {
574 de_dbg(c
, "compressed data size: %"I64_FMT
", ends at %"I64_FMT
, dres
.bytes_consumed
,
575 dcmpri
.pos
+dres
.bytes_consumed
);
577 do_sqeeze_timestamp(c
, sqctx
, dcmpri
.pos
+dres
.bytes_consumed
);
578 if(sqctx
->timestamp
.is_valid
) {
579 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = sqctx
->timestamp
;
583 outf_final
= dbuf_create_output_file(c
, NULL
, fi
, 0);
584 dbuf_copy(outf_tmp
, 0, outf_tmp
->len
, outf_final
);
587 de_err(c
, "Decompression failed: %s", de_dfilter_get_errmsg(c
, &dres
));
591 sqctx
->checksum_calc
= de_crcobj_getval(crco
);
592 sqctx
->checksum_calc
&= 0xffff;
593 de_dbg(c
, "checksum (calculated): %u", (UI
)sqctx
->checksum_calc
);
594 if(sqctx
->checksum_calc
!= sqctx
->checksum_reported
) {
595 de_err(c
, "Checksum error. Decompression probably failed.");
601 crcr_filename_data_freecontents(c
, &sqctx
->fnd
);
602 de_destroy_stringreaderdata(c
, sqctx
->sq2_timestamp_string
);
603 de_destroy_stringreaderdata(c
, sqctx
->sq2_comment
);
606 dbuf_close(outf_final
);
607 dbuf_close(outf_tmp
);
608 de_finfo_destroy(c
, fi
);
609 de_crcobj_destroy(crco
);
610 de_dbg_indent_restore(c
, saved_indent_level
);
613 static int de_identify_squeeze(deark
*c
)
618 if(id
==0xff76) return 70;
619 if(id
==0xfffa) return 25; // SQ2
623 void de_module_squeeze(deark
*c
, struct deark_module_info
*mi
)
626 mi
->desc
= "Squeeze (CP/M)";
627 mi
->run_fn
= de_run_squeeze
;
628 mi
->identify_fn
= de_identify_squeeze
;
631 ///////////////////////////////////////////////
632 // Crunch - CP/M compressed file format
635 struct crcr_filename_data fnd
;
636 u8 fmtver
; // 1 or 2, 0 if unknown
638 UI checksum_reported
;
642 static void decompress_crunch_v1(deark
*c
, struct crunch_ctx
*crunchctx
, i64 pos1
)
647 struct de_crcobj
*crco
= NULL
;
648 struct de_dfilter_in_params dcmpri
;
649 struct de_dfilter_out_params dcmpro
;
650 struct de_dfilter_results dres
;
651 struct de_lzw_params delzwp
;
652 struct de_dcmpr_two_layer_params tlp
;
655 fi
= de_finfo_create(c
);
656 de_finfo_set_name_from_ucstring(c
, fi
, crunchctx
->fnd
.fn
, 0);
657 fi
->original_filename_flag
= 1;
659 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
660 dbuf_enable_wbuffer(outf
);
661 crco
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
662 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)crco
);
664 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
665 dcmpri
.f
= c
->infile
;
667 dcmpri
.len
= c
->infile
->len
- pos
;
670 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
671 delzwp
.fmt
= DE_LZWFMT_ARC5
;
672 delzwp
.arc5_has_stop_code
= 1;
674 de_zeromem(&tlp
, sizeof(struct de_dcmpr_two_layer_params
));
675 tlp
.codec1_pushable
= dfilter_lzw_codec
;
676 tlp
.codec1_private_params
= (void*)&delzwp
;
677 tlp
.codec2
= dfilter_rle90_codec
;
678 tlp
.dcmpri
= &dcmpri
;
679 tlp
.dcmpro
= &dcmpro
;
681 de_dfilter_decompress_two_layer(c
, &tlp
);
682 dbuf_flush(dcmpro
.f
);
685 de_err(c
, "Decompression failed: %s", de_dfilter_get_errmsg(c
, &dres
));
689 if(dres
.bytes_consumed_valid
) {
690 de_dbg(c
, "compressed data size: %"I64_FMT
", ends at %"I64_FMT
, dres
.bytes_consumed
,
691 dcmpri
.pos
+dres
.bytes_consumed
);
692 pos
+= dres
.bytes_consumed
;
694 if(crunchctx
->cksum_type
==0) {
695 crunchctx
->checksum_calc
= de_crcobj_getval(crco
);
696 crunchctx
->checksum_calc
&= 0xffff;
697 crunchctx
->checksum_reported
= (UI
)de_getu16le_p(&pos
);
698 de_dbg(c
, "checksum (calculated): %u", crunchctx
->checksum_calc
);
699 de_dbg(c
, "checksum (reported): %u", crunchctx
->checksum_reported
);
700 if(crunchctx
->checksum_calc
!= crunchctx
->checksum_reported
) {
701 de_err(c
, "Checksum error. Decompression probably failed.");
708 de_finfo_destroy(c
, fi
);
710 de_crcobj_destroy(crco
);
711 de_dbg_indent(c
, -1);
714 static void de_run_crunch(deark
*c
, de_module_params
*mparams
)
716 struct crunch_ctx
*crunchctx
= NULL
;
722 crunchctx
= de_malloc(c
, sizeof(struct crunch_ctx
));
725 if(!crcr_read_filename_etc(c
, pos
, &crunchctx
->fnd
)) goto done
;
726 pos
+= crunchctx
->fnd
.size
;
728 b
= de_getbyte_p(&pos
);
729 de_dbg(c
, "encoder version: 0x%02x", (UI
)b
);
731 fmtver_raw
= de_getbyte_p(&pos
);
732 if(fmtver_raw
>=0x10 && fmtver_raw
<=0x1f) {
733 crunchctx
->fmtver
= 1;
736 else if(fmtver_raw
>=0x20 && fmtver_raw
<=0x2f) {
737 crunchctx
->fmtver
= 2;
743 de_dbg(c
, "format version: 0x%02x (%s)", (UI
)fmtver_raw
, verstr
);
744 if(crunchctx
->fmtver
!=0) {
745 de_declare_fmtf(c
, "Crunch (v%d)", (int)crunchctx
->fmtver
);
748 crunchctx
->cksum_type
= de_getbyte_p(&pos
);
749 de_dbg(c
, "checksum type: 0x%02x (%s)", (UI
)crunchctx
->cksum_type
,
750 (crunchctx
->cksum_type
==0?"standard":"?"));
752 b
= de_getbyte_p(&pos
);
753 de_dbg(c
, "unused info byte: 0x%02x", (UI
)b
);
755 de_dbg(c
, "compressed data at %"I64_FMT
, pos
);
756 if(crunchctx
->fmtver
==1) {
757 decompress_crunch_v1(c
, crunchctx
, pos
);
760 // v2 is by far the most common version, but it's not easy to support.
761 // We support v1, only because it's easy.
762 de_err(c
, "This version of Crunch is not supported");
767 crcr_filename_data_freecontents(c
, &crunchctx
->fnd
);
768 de_free(c
, crunchctx
);
772 static int de_identify_crunch(deark
*c
)
777 if(id
==0xfe76) return 70;
781 void de_module_crunch(deark
*c
, struct deark_module_info
*mi
)
784 mi
->desc
= "Crunch (CP/M)";
785 mi
->run_fn
= de_run_crunch
;
786 mi
->identify_fn
= de_identify_crunch
;
789 ///////////////////////////////////////////////
790 // CRLZH - CP/M compressed file format
793 struct crcr_filename_data fnd
;
794 u8 fmtver
; // 1 or 2, 0 if unknown
796 UI checksum_reported
;
800 static void decompress_crlzh(deark
*c
, struct crlzh_ctx
*crlzhctx
, i64 pos1
)
805 struct de_crcobj
*crco
= NULL
;
806 struct de_dfilter_in_params dcmpri
;
807 struct de_dfilter_out_params dcmpro
;
808 struct de_dfilter_results dres
;
809 struct de_lh1_params lh1p
;
812 fi
= de_finfo_create(c
);
813 de_finfo_set_name_from_ucstring(c
, fi
, crlzhctx
->fnd
.fn
, 0);
814 fi
->original_filename_flag
= 1;
816 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0x0);
817 dbuf_enable_wbuffer(outf
);
818 crco
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
819 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)crco
);
821 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
822 dcmpri
.f
= c
->infile
;
824 dcmpri
.len
= c
->infile
->len
- pos
;
827 de_zeromem(&lh1p
, sizeof(struct de_lh1_params
));
828 if(crlzhctx
->fmtver
==1) {
834 lh1p
.history_fill_val
= 0x20;
836 fmtutil_lh1_codectype1(c
, &dcmpri
, &dcmpro
, &dres
, (void*)&lh1p
);
837 dbuf_flush(dcmpro
.f
);
840 de_err(c
, "Decompression failed: %s", de_dfilter_get_errmsg(c
, &dres
));
844 if(dres
.bytes_consumed_valid
) {
845 de_dbg(c
, "compressed data size: %"I64_FMT
", ends at %"I64_FMT
, dres
.bytes_consumed
,
846 dcmpri
.pos
+dres
.bytes_consumed
);
847 pos
+= dres
.bytes_consumed
;
849 if(crlzhctx
->cksum_type
==0) {
850 crlzhctx
->checksum_calc
= de_crcobj_getval(crco
);
851 crlzhctx
->checksum_calc
&= 0xffff;
852 crlzhctx
->checksum_reported
= (UI
)de_getu16le_p(&pos
);
853 de_dbg(c
, "checksum (calculated): %u", crlzhctx
->checksum_calc
);
854 de_dbg(c
, "checksum (reported): %u", crlzhctx
->checksum_reported
);
855 if(crlzhctx
->checksum_calc
!= crlzhctx
->checksum_reported
) {
856 de_err(c
, "Checksum error. Decompression probably failed.");
863 de_finfo_destroy(c
, fi
);
865 de_crcobj_destroy(crco
);
866 de_dbg_indent(c
, -1);
869 static void de_run_crlzh(deark
*c
, de_module_params
*mparams
)
871 struct crlzh_ctx
*crlzhctx
= NULL
;
877 crlzhctx
= de_malloc(c
, sizeof(struct crlzh_ctx
));
880 if(!crcr_read_filename_etc(c
, pos
, &crlzhctx
->fnd
)) goto done
;
881 pos
+= crlzhctx
->fnd
.size
;
882 b
= de_getbyte_p(&pos
);
883 de_dbg(c
, "encoder version: 0x%02x", (UI
)b
);
885 fmtver_raw
= de_getbyte_p(&pos
);
886 if(fmtver_raw
<=0x1f) {
887 crlzhctx
->fmtver
= 1;
890 else if(fmtver_raw
>=0x20 && fmtver_raw
<=0x2f) {
891 // Note: Alternatives are ==0x20 (CFX), and >=0x20 (lbrate).
892 crlzhctx
->fmtver
= 2;
898 de_dbg(c
, "format version: 0x%02x (%s)", (UI
)fmtver_raw
, verstr
);
899 if(crlzhctx
->fmtver
!=0) {
900 de_declare_fmtf(c
, "CRLZH (v%d)", (int)crlzhctx
->fmtver
);
903 crlzhctx
->cksum_type
= de_getbyte_p(&pos
);
904 de_dbg(c
, "checksum type: 0x%02x (%s)", (UI
)crlzhctx
->cksum_type
,
905 (crlzhctx
->cksum_type
==0?"standard":"?"));
907 b
= de_getbyte_p(&pos
);
908 de_dbg(c
, "unused info byte: 0x%02x", (UI
)b
);
910 de_dbg(c
, "compressed data at %"I64_FMT
, pos
);
911 decompress_crlzh(c
, crlzhctx
, pos
);
915 crcr_filename_data_freecontents(c
, &crlzhctx
->fnd
);
916 de_free(c
, crlzhctx
);
920 static int de_identify_crlzh(deark
*c
)
925 if(id
==0xfd76) return 70;
929 void de_module_crlzh(deark
*c
, struct deark_module_info
*mi
)
932 mi
->desc
= "CRLZH (CP/M)";
933 mi
->run_fn
= de_run_crlzh
;
934 mi
->identify_fn
= de_identify_crlzh
;
937 ///////////////////////////////////////////////
939 // LZW compression utility by W. Chin, A. Kumar.
940 // Format used by v1.0, 1985-10-26.
942 #define CODE_WACK 0x5741434bU
945 de_encoding input_encoding
;
947 UI checksum_reported
;
949 struct de_timestamp timestamp
;
952 static void do_zsq_decompress(deark
*c
, struct zsq_ctx
*zsqctx
, i64 pos
, dbuf
*outf
)
954 struct de_crcobj
*crco
= NULL
;
955 struct de_dfilter_in_params dcmpri
;
956 struct de_dfilter_out_params dcmpro
;
957 struct de_dfilter_results dres
;
958 struct de_lzw_params delzwp
;
960 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
961 delzwp
.fmt
= DE_LZWFMT_ARC5
;
963 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
964 dcmpri
.f
= c
->infile
;
966 dcmpri
.len
= c
->infile
->len
- pos
;
969 crco
= de_crcobj_create(c
, DE_CRCOBJ_SUM_BYTES
);
970 dbuf_set_writelistener(outf
, de_writelistener_for_crc
, (void*)crco
);
972 fmtutil_decompress_lzw(c
, &dcmpri
, &dcmpro
, &dres
, &delzwp
);
975 zsqctx
->checksum_calc
= de_crcobj_getval(crco
);
976 zsqctx
->checksum_calc
&= 0xffff;
977 de_dbg(c
, "checksum (calculated): %u", (UI
)zsqctx
->checksum_calc
);
978 if(zsqctx
->checksum_calc
!= zsqctx
->checksum_reported
) {
979 de_err(c
, "Checksum error. Decompression probably failed.");
981 de_crcobj_destroy(crco
);
984 static void zsq_read_timestamp(deark
*c
, struct zsq_ctx
*zsqctx
, i64 pos
)
987 char timestamp_buf
[64];
989 dt_raw
= de_getu16le(pos
);
990 tm_raw
= de_getu16le(pos
+2);
991 de_dos_datetime_to_timestamp(&zsqctx
->timestamp
, dt_raw
, tm_raw
);
992 de_timestamp_to_string(&zsqctx
->timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
993 de_dbg(c
, "timestamp: %s", timestamp_buf
);
996 static void de_run_zsq(deark
*c
, de_module_params
*mparams
)
998 struct zsq_ctx
*zsqctx
= NULL
;
1004 de_finfo
*fi
= NULL
;
1006 zsqctx
= de_malloc(c
, sizeof(struct zsq_ctx
));
1007 zsqctx
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1009 id
= (u32
)de_getu32be_p(&pos
);
1010 if(id
!= CODE_WACK
) {
1011 de_err(c
, "Not a ZSQ file");
1015 fi
= de_finfo_create(c
);
1017 zsqctx
->checksum_reported
= (u32
)de_getu16le_p(&pos
);
1018 de_dbg(c
, "checksum (reported): %u", (UI
)zsqctx
->checksum_reported
);
1020 hdr_len
= de_getu16le_p(&pos
);
1021 hdr_endpos
= pos
+ hdr_len
;
1022 if(hdr_endpos
> c
->infile
->len
) {
1023 de_err(c
, "Bad header length");
1027 zsq_read_timestamp(c
, zsqctx
, pos
);
1030 zsqctx
->fn
= ucstring_create(c
);
1031 dbuf_read_to_ucstring_n(c
->infile
, pos
, hdr_endpos
-pos
, 255, zsqctx
->fn
,
1032 DE_CONVFLAG_STOP_AT_NUL
, zsqctx
->input_encoding
);
1033 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(zsqctx
->fn
));
1035 de_finfo_set_name_from_ucstring(c
, fi
, zsqctx
->fn
, 0);
1036 fi
->original_filename_flag
= 1;
1039 de_dbg(c
, "compressed data at %"I64_FMT
, pos
);
1041 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
1042 dbuf_enable_wbuffer(outf
);
1044 do_zsq_decompress(c
, zsqctx
, pos
, outf
);
1048 de_finfo_destroy(c
, fi
);
1050 ucstring_destroy(zsqctx
->fn
);
1055 static int de_identify_zsq(deark
*c
)
1057 if(de_getu32be(0)==CODE_WACK
) {
1063 void de_module_zsq(deark
*c
, struct deark_module_info
*mi
)
1066 mi
->desc
= "ZSQ (ZSQUSQ, LZW-compressed file)";
1067 mi
->run_fn
= de_run_zsq
;
1068 mi
->identify_fn
= de_identify_zsq
;
1071 // **************************************************************************
1073 // **************************************************************************
1076 int ver
; // 1, 2, or -1 if unknown
1077 struct de_crcobj
*crco
;
1080 static void lzwcom_detect_version(deark
*c
, struct lzwcom_ctx
*d
)
1082 u32 crc_reported
, crc_calc
;
1084 if(c
->infile
->len
< 1026) {
1089 de_crcobj_reset(d
->crco
);
1090 de_crcobj_addslice(d
->crco
, c
->infile
, 0, 1024);
1091 crc_calc
= de_crcobj_getval(d
->crco
); // Field only exists in v2 format
1092 crc_reported
= (u32
)de_getu16le(1024);
1093 if(crc_reported
==crc_calc
) {
1101 static void de_run_lzwcom(deark
*c
, de_module_params
*mparams
)
1103 struct lzwcom_ctx
*d
= NULL
;
1104 struct de_dfilter_ctx
*dfctx
= NULL
;
1106 struct de_dfilter_out_params dcmpro
;
1107 struct de_dfilter_results dres
;
1108 struct de_lzw_params delzwp
;
1114 d
= de_malloc(c
, sizeof(struct lzwcom_ctx
));
1116 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_ARC
);
1118 s
= de_get_ext_option(c
, "lzwcom:version");
1120 d
->ver
= de_atoi(s
);
1122 if(d
->ver
>=2) d
->ver
= 2;
1123 else if(d
->ver
!=1) d
->ver
= -1;
1126 lzwcom_detect_version(c
, d
);
1129 de_declare_fmtf(c
, "LZWCOM v%d", d
->ver
);
1132 de_declare_fmt(c
, "LZWCOM (unknown version)");
1135 outf
= dbuf_create_output_file(c
, "unc", NULL
, 0);
1136 dbuf_enable_wbuffer(outf
);
1137 de_dfilter_init_objects(c
, NULL
, &dcmpro
, &dres
);
1140 de_zeromem(&delzwp
, sizeof(struct de_lzw_params
));
1141 delzwp
.fmt
= DE_LZWFMT_ARC5
;
1142 delzwp
.flags
|= DE_LZWFLAG_TOLERATETRAILINGJUNK
;
1143 dfctx
= de_dfilter_create(c
, dfilter_lzw_codec
, (void*)&delzwp
, &dcmpro
, &dres
);
1144 rbuf
= de_malloc(c
, 1024);
1148 i64 block_pos
= pos
;
1150 if(dres
.errcode
) break;
1151 if(dfctx
->finished_flag
) break;
1152 if(pos
>= c
->infile
->len
) break;
1153 block_dlen
= de_min_int(1024, c
->infile
->len
- pos
);
1156 de_dbg(c
, "block at %"I64_FMT
", dlen=%"I64_FMT
, block_pos
, block_dlen
);
1159 dbuf_read(c
->infile
, rbuf
, pos
, block_dlen
);
1160 de_dfilter_addbuf(dfctx
, rbuf
, block_dlen
);
1162 // Oddly, this format includes CRCs of the *compressed* bytes, instead of
1163 // of the decompressed bytes. So it doesn't detect incorrect decompression.
1165 de_crcobj_reset(d
->crco
);
1166 de_crcobj_addbuf(d
->crco
, rbuf
, block_dlen
);
1172 u32 crc_reported
, crc_calc
;
1174 if(c
->infile
->len
- pos
< 2) break;
1175 crc_calc
= de_crcobj_getval(d
->crco
);
1176 crc_reported
= (u32
)de_getu16le_p(&pos
);
1177 de_dbg_indent(c
, 1);
1178 de_dbg(c
, "crc (calculated): 0x%04x", (UI
)crc_calc
);
1179 de_dbg(c
, "crc (reported): 0x%04x", (UI
)crc_reported
);
1180 de_dbg_indent(c
,- 1);
1181 if(!errflag
&& crc_calc
!=crc_reported
) {
1182 de_warn(c
, "CRC check failed at %"I64_FMT
". This might not be an LZWCOM v2 file.", pos
-2);
1188 de_dfilter_finish(dfctx
);
1191 de_err(c
, "Decompression failed: %s", de_dfilter_get_errmsg(c
, &dres
));
1194 de_dfilter_destroy(dfctx
);
1197 de_crcobj_destroy(d
->crco
);
1203 static void de_help_lzwcom(deark
*c
)
1205 de_msg(c
, "-opt lzwcom:version=<1|2> : The format version");
1208 void de_module_lzwcom(deark
*c
, struct deark_module_info
*mi
)
1211 mi
->desc
= "LZWCOM compressed file";
1212 mi
->run_fn
= de_run_lzwcom
;
1213 mi
->identify_fn
= NULL
;
1214 mi
->help_fn
= de_help_lzwcom
;