bmp: Rewrote the RLE decompressor
[deark.git] / modules / nufx.c
blobec9e8606ab7c73c6db16ae450c3f79e0709a381f
1 // This file is part of Deark.
2 // Copyright (C) 2023 Jason Summers
3 // See the file COPYING for terms of use.
5 // NuFX / ShrinkIt (Apple II format)
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_nufx);
11 #define MAX_THREADS_PER_RECORD 16
13 struct nufx_ctx;
15 struct nufx_thread {
16 UI idx;
17 UI thread_class;
18 UI cmpr_meth;
19 UI kind;
20 u32 crc_reported;
21 i64 thread_eof;
22 i64 orig_len;
23 i64 cmpr_len;
24 i64 cmpr_pos;
25 i64 block_size; // disk images only
26 i64 num_blocks; // disk images only
27 u8 dcmpr_ok_flag;
28 u8 respect_crc_field;
31 struct nufx_record {
32 struct nufx_ctx *d;
33 UI idx;
34 UI version;
35 i64 hdr_pos;
36 i64 hdr_len; // including thread hdrs, excluding data
37 i64 attrib_count;
38 u32 header_crc;
39 UI num_threads;
40 UI filesys_id;
41 UI filesys_info;
42 UI access_code;
43 u32 file_type;
44 u32 extra_type;
45 UI storage_type;
46 u8 is_disk_image;
47 i64 option_size;
48 i64 cur_data_pos;
49 struct de_timestamp create_time;
50 struct de_timestamp mod_time;
51 struct de_timestamp archived_time;
52 de_ucstring *filename_old;
53 de_ucstring *filename;
54 struct nufx_thread *threads; // array[rec->num_threads]
55 struct nufx_thread *filename_thread_ptr; // pointer to somewhere in ->threads, or NULL
56 struct nufx_thread *data_thread_ptr; // ...
57 struct nufx_thread *resource_thread_ptr; // ...
58 struct nufx_thread *disk_image_thread_ptr; // ...
61 struct nufx_ctx {
62 u8 fatalerrflag;
63 u8 need_errmsg;
64 u8 extract_comments;
65 de_encoding input_encoding;
66 UI master_ver;
67 u32 master_crc_reported;
68 i64 total_records;
69 i64 master_eof;
70 i64 next_record_pos;
71 struct de_timestamp archive_create_time;
72 struct de_timestamp archive_mod_time;
73 struct de_crcobj *crco_misc;
74 struct de_crcobj *crco_for_lzw_codec;
75 struct de_crcobj *crco_rfork;
76 struct de_crcobj *crco_dfork;
79 static const char *get_cmpr_meth_name(UI n)
81 const char *name = NULL;
83 switch(n) {
84 case 0: name="uncompressed"; break;
85 case 1: name="Squeeze"; break;
86 case 2: name="ShrinkIt LZW/1"; break;
87 case 3: name="ShrinkIt LZW/2"; break;
88 case 4: name="Unix compress 12-bit"; break;
89 case 5: name="Unix compress 16-bit"; break;
91 return name?name:"?";
94 static int cmpr_meth_is_supported(UI meth)
96 // TODO: There are more compression schemes to support.
97 if(meth==0 || meth==2 || meth==3) return 1;
98 return 0;
101 static const char *get_thread_type_name(UI cla, UI kind)
103 if(cla==0) { // "message"
104 if(kind==0) return "text (obsolete)";
105 if(kind==1) return "comment"; // (?)
106 if(kind==2) return "icon";
108 if(cla==1) { // "control"
109 if(kind==0) return "directory";
110 return "unknown 'control' thread";
112 if(cla==2) { // "data"
113 if(kind==0) return "data fork";
114 if(kind==1) return "disk image";
115 if(kind==2) return "resource fork";
116 return "unknown 'data' thread";
118 if(cla==3) { // "filename"
119 if(kind==0) return "filename";
121 return "?";
124 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
126 char timestamp_buf[64];
128 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
129 de_dbg(c, "%s: %s", name, timestamp_buf);
132 static void nufx_read_datetime_to_timestamp_p(dbuf *f, i64 *ppos,
133 struct de_timestamp *ts)
135 i64 yr, mo, da, hr, mi, se;
136 i64 pos;
138 pos = *ppos;
139 *ppos += 8;
141 se = (i64)dbuf_getbyte_p(f, &pos);
142 mi = (i64)dbuf_getbyte_p(f, &pos);
143 hr = (i64)dbuf_getbyte_p(f, &pos);
144 yr = 1900 + (i64)dbuf_getbyte_p(f, &pos);
145 da = 1 + (i64)dbuf_getbyte_p(f, &pos);
146 mo = 1 + (i64)dbuf_getbyte_p(f, &pos);
148 if(yr==1900) {
149 de_zeromem(ts, sizeof(struct de_timestamp));
150 ts->is_valid = 0;
151 return;
153 de_make_timestamp(ts, yr, mo, da, hr, mi, se);
154 ts->precision = DE_TSPREC_1SEC;
157 static void do_nufx_master_record(deark *c, struct nufx_ctx *d)
159 i64 pos1, pos;
160 u32 master_crc_calc;
161 int saved_indent_level;
163 de_dbg_indent_save(c, &saved_indent_level);
164 pos1 = 0;
165 de_dbg(c, "master record at %"I64_FMT, pos1);
166 de_dbg_indent(c, 1);
167 pos = pos1 + 6;
169 d->master_crc_reported = (u32)de_getu16le_p(&pos);
170 de_dbg(c, "master crc (reported): 0x%04x", (UI)d->master_crc_reported);
172 de_crcobj_reset(d->crco_misc);
173 de_crcobj_addslice(d->crco_misc, c->infile, pos, 40);
174 master_crc_calc = de_crcobj_getval(d->crco_misc);
175 de_dbg(c, "master crc (calculated): 0x%04x", (UI)master_crc_calc);
177 d->total_records = de_getu32le_p(&pos);
178 de_dbg(c, "total records: %"I64_FMT, d->total_records);
180 nufx_read_datetime_to_timestamp_p(c->infile, &pos, &d->archive_create_time);
181 dbg_timestamp(c, &d->archive_create_time, "archive create time");
182 nufx_read_datetime_to_timestamp_p(c->infile, &pos, &d->archive_mod_time);
183 dbg_timestamp(c, &d->archive_mod_time, "archive mod time");
185 d->master_ver = (UI)de_getu16le_p(&pos);
186 de_dbg(c, "fmt ver: %u", d->master_ver);
187 pos += 8; // reserved
189 if(d->master_ver >= 1) {
190 d->master_eof = de_getu32le_p(&pos);
191 de_dbg(c, "master eof: %"I64_FMT, d->master_eof);
193 else {
194 d->master_eof = c->infile->len;
197 pos = pos1 + 48; // Master record is always this size?
199 d->next_record_pos = pos;
201 if(d->master_eof > c->infile->len) {
202 d->fatalerrflag = 1;
203 d->need_errmsg = 1;
204 goto done;
207 done:
208 de_dbg_indent_restore(c, saved_indent_level);
211 // Updates rec->cur_data_pos
212 static void read_thread_header(deark *c,
213 struct nufx_ctx *d, struct nufx_record *rec,
214 struct nufx_thread *t,
215 i64 pos1)
217 i64 pos = pos1;
218 int saved_indent_level;
220 de_dbg_indent_save(c, &saved_indent_level);
222 de_dbg(c, "thread #%u header at %"I64_FMT, t->idx, pos1);
223 de_dbg_indent(c, 1);
225 t->thread_class = (UI)de_getu16le_p(&pos);
226 de_dbg(c, "thread class: 0x%04x", t->thread_class);
227 t->cmpr_meth = (UI)de_getu16le_p(&pos);
228 de_dbg(c, "cmpr meth: 0x%04x (%s)", t->cmpr_meth,
229 get_cmpr_meth_name(t->cmpr_meth));
230 t->kind = (UI)de_getu16le_p(&pos);
231 de_dbg(c, "thread kind: 0x%04x", t->kind);
233 de_dbg(c, "interpreted type: %s",
234 get_thread_type_name(t->thread_class, t->kind));
236 // If record_version==3, this crc should be present.
237 // If record_version==2, the spec. is confusing.
238 // If record_version==1, no crc is present here. (I guess this field is 0?)
239 t->crc_reported = (u32)de_getu16le_p(&pos);
241 if(rec->version>=2 && t->thread_class==2 && t->cmpr_meth!=0) {
242 t->respect_crc_field = 1;
244 else if(rec->version>=3 && t->thread_class==2) {
245 t->respect_crc_field = 1;
247 de_dbg(c, "thread crc (reported): 0x%04x%s", (UI)t->crc_reported,
248 (t->respect_crc_field ? "" : " [ignored]"));
250 t->thread_eof = de_getu32le_p(&pos);
251 de_dbg(c, "orig len: %"I64_FMT, t->thread_eof);
252 if(t->thread_class==2 && t->kind==1) {
253 t->num_blocks = (i64)rec->extra_type;
254 de_dbg(c, "num blocks: %"I64_FMT, t->num_blocks);
255 t->block_size = (i64)rec->storage_type;
256 de_dbg(c, "block size: %"I64_FMT, t->block_size);
257 t->orig_len = t->num_blocks * t->block_size;
258 de_dbg(c, "disk size (calculated): %"I64_FMT, t->orig_len);
260 else {
261 t->orig_len = t->thread_eof;
264 t->cmpr_len = de_getu32le_p(&pos);
265 de_dbg(c, "cmpr len: %"I64_FMT, t->cmpr_len);
267 t->cmpr_pos = rec->cur_data_pos;
268 de_dbg(c, "cmpr data pos: %"I64_FMT, t->cmpr_pos);
270 rec->cur_data_pos += t->cmpr_len;
272 if(rec->cur_data_pos > d->master_eof) {
273 d->fatalerrflag = 1;
274 d->need_errmsg = 1;
275 goto done;
278 // Track the threads we care about.
279 if(t->thread_class==3 && t->kind==0) {
280 rec->filename_thread_ptr = t;
282 else if(t->thread_class==2 && t->kind==0) {
283 rec->data_thread_ptr = t;
285 else if(t->thread_class==2 && t->kind==2) {
286 rec->resource_thread_ptr = t;
288 else if(t->thread_class==2 && t->kind==1) {
289 rec->disk_image_thread_ptr = t;
292 done:
293 de_dbg_indent_restore(c, saved_indent_level);
296 // Read the record header, including the thread headers
297 static void do_nufx_record_header(deark *c,
298 struct nufx_ctx *d, struct nufx_record *rec)
300 i64 pos;
301 i64 pos_of_fnlen_field;
302 i64 pos_after_fnlen_field;
303 i64 fnlen;
304 u32 rh_crc_calc;
305 UI tidx;
306 int saved_indent_level;
308 de_dbg_indent_save(c, &saved_indent_level);
309 de_dbg(c, "record header at %"I64_FMT, rec->hdr_pos);
310 de_dbg_indent(c, 1);
312 pos = rec->hdr_pos+4;
313 rec->header_crc = (u32)de_getu16le_p(&pos);
314 de_dbg(c, "record header crc (reported): 0x%04x", (UI)rec->header_crc);
316 rec->attrib_count = de_getu16le_p(&pos);
317 de_dbg(c, "attrib count: %"I64_FMT, rec->attrib_count);
318 pos_after_fnlen_field = rec->hdr_pos + rec->attrib_count;
319 pos_of_fnlen_field = pos_after_fnlen_field - 2;
321 rec->version = (UI)de_getu16le_p(&pos);
322 de_dbg(c, "record version: %u", (UI)rec->version);
324 rec->num_threads = (UI)de_getu32le_p(&pos);
325 de_dbg(c, "total threads: %u", rec->num_threads);
326 if(rec->num_threads > MAX_THREADS_PER_RECORD) {
327 d->fatalerrflag = 1;
328 d->need_errmsg = 1;
329 goto done;
332 rec->filesys_id = (UI)de_getu16le_p(&pos);
333 de_dbg(c, "filesys id: 0x%04x", rec->filesys_id);
334 rec->filesys_info = (UI)de_getu16le_p(&pos);
335 de_dbg(c, "filesys info: 0x%04x", rec->filesys_info);
336 rec->access_code = (UI)de_getu32le_p(&pos);
337 de_dbg(c, "access: 0x%08x", rec->access_code);
338 rec->file_type = (u32)de_getu32le_p(&pos);
339 de_dbg(c, "file type: 0x%08x", (UI)rec->file_type);
340 rec->extra_type = (u32)de_getu32le_p(&pos);
341 de_dbg(c, "extra type: 0x%08x", (UI)rec->extra_type);
342 rec->storage_type = (UI)de_getu16le_p(&pos);
343 de_dbg(c, "storage type: 0x%04x", rec->storage_type);
345 nufx_read_datetime_to_timestamp_p(c->infile, &pos, &rec->create_time);
346 dbg_timestamp(c, &rec->create_time, "create time");
347 nufx_read_datetime_to_timestamp_p(c->infile, &pos, &rec->mod_time);
348 dbg_timestamp(c, &rec->mod_time, "mod time");
349 nufx_read_datetime_to_timestamp_p(c->infile, &pos, &rec->archived_time);
350 dbg_timestamp(c, &rec->archived_time, "archived time");
352 if(rec->version<1) goto read_fnlen;
353 if(pos+2 > pos_of_fnlen_field) goto read_fnlen;
355 rec->option_size = de_getu16le_p(&pos);
356 if(pos+rec->option_size > pos_of_fnlen_field) {
357 rec->option_size = 0;
359 de_dbg(c, "option size: %"I64_FMT, rec->option_size);
360 if(c->debug_level>=2) {
361 de_dbg_hexdump(c, c->infile, pos, rec->option_size, 256, NULL, 0x1);
363 // Note: The spec. says something about padding option_size to an even
364 // number of bytes, but we don't do anything that would rely on that.
366 read_fnlen:
367 pos = pos_of_fnlen_field;
368 fnlen = de_getu16le_p(&pos);
369 if(fnlen>0 && !rec->filename_old) {
370 rec->filename_old = ucstring_create(c);
371 dbuf_read_to_ucstring_n(c->infile, pos, fnlen, 255, rec->filename_old,
372 0, d->input_encoding);
373 de_dbg(c, "filename (old style): \"%s\"",
374 ucstring_getpsz_d(rec->filename_old));
376 pos += fnlen;
378 rec->hdr_len = (pos + 16*(i64)rec->num_threads) - rec->hdr_pos;
380 de_crcobj_reset(d->crco_misc);
381 de_crcobj_addslice(d->crco_misc, c->infile, rec->hdr_pos+6,
382 rec->hdr_len-6);
383 rh_crc_calc = de_crcobj_getval(d->crco_misc);
384 de_dbg(c, "record header crc (calculated): 0x%04x", (UI)rh_crc_calc);
386 rec->threads = de_mallocarray(c, rec->num_threads, sizeof(struct nufx_thread));
388 rec->cur_data_pos = rec->hdr_pos + rec->hdr_len;
390 for(tidx=0; tidx<rec->num_threads; tidx++) {
391 struct nufx_thread *t;
393 t = &rec->threads[tidx];
394 t->idx = tidx;
395 read_thread_header(c, d, rec, t, pos);
396 if(d->fatalerrflag) goto done;
397 pos += 16;
400 d->next_record_pos = rec->cur_data_pos;
402 done:
403 de_dbg_indent_restore(c, saved_indent_level);
406 static void decompress_chunk_rle_layer(deark *c, struct de_dfilter_in_params *dcmpri,
407 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres, u8 rlechar)
409 i64 srcpos = dcmpri->pos;
410 i64 endpos;
411 i64 nbytes_written = 0;
412 const char *modname = "rle";
414 endpos = dcmpri->pos + dcmpri->len;
415 while(srcpos < endpos) {
416 u8 x;
418 if(nbytes_written >= dcmpro->expected_len) break;
419 x = dbuf_getbyte_p(dcmpri->f, &srcpos);
420 if(x==rlechar) {
421 i64 count;
422 u8 val;
424 val = dbuf_getbyte_p(dcmpri->f, &srcpos);
425 count = 1 + (i64)dbuf_getbyte_p(dcmpri->f, &srcpos);
426 if(nbytes_written+count > dcmpro->expected_len) {
427 goto done;
429 dbuf_write_run(dcmpro->f, val, count);
430 nbytes_written += count;
432 else {
433 dbuf_writebyte(dcmpro->f, x);
434 nbytes_written++;
438 done:
439 if(nbytes_written != dcmpro->expected_len) {
440 de_dfilter_set_generic_error(c, dres, modname);
442 dres->bytes_consumed = srcpos - dcmpri->pos;
443 dres->bytes_consumed_valid = 1;
446 static void decompress_chunk_lzw1_lzw_layer(deark *c,
447 struct de_dfilter_in_params *dcmpri,
448 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
450 struct de_lzw_params delzwp;
452 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
453 delzwp.fmt = DE_LZWFMT_SHRINKIT1;
454 delzwp.max_code_size = 12;
455 fmtutil_decompress_lzw(c, dcmpri, dcmpro, dres, &delzwp);
458 static void decompress_lzw_1(deark *c, struct nufx_ctx *d,
459 struct de_dfilter_in_params *dcmpri,
460 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
462 u32 lzwcrc_reported;
463 u32 lzwcrc_calc;
464 i64 pos = dcmpri->pos;
465 i64 nbytes_to_copy;
466 u8 volnum;
467 u8 rlechar;
468 i64 output_bytes_remaining;
469 dbuf *tmpdbuf_lzw = NULL;
470 dbuf *tmpdbuf_rle = NULL;
471 struct de_dfilter_in_params *dcmpri_lzw = NULL;
472 struct de_dfilter_out_params *dcmpro_lzw = NULL;
473 struct de_dfilter_results *dres_lzw = NULL;
474 struct de_dfilter_in_params *dcmpri_rle = NULL;
475 struct de_dfilter_out_params *dcmpro_rle = NULL;
476 struct de_dfilter_results *dres_rle = NULL;
477 const char *modname = "nufx_lzw1";
479 output_bytes_remaining = dcmpro->expected_len;
480 tmpdbuf_lzw = dbuf_create_membuf(c, 4096, 0);
481 tmpdbuf_rle = dbuf_create_membuf(c, 4096, 0);
482 dcmpri_lzw = de_malloc(c, sizeof(struct de_dfilter_in_params));
483 dcmpro_lzw = de_malloc(c, sizeof(struct de_dfilter_out_params));
484 dres_lzw = de_malloc(c, sizeof(struct de_dfilter_results));
485 dcmpri_rle = de_malloc(c, sizeof(struct de_dfilter_in_params));
486 dcmpro_rle = de_malloc(c, sizeof(struct de_dfilter_out_params));
487 dres_rle = de_malloc(c, sizeof(struct de_dfilter_results));
489 // The compressed data is chunked. There's an initial 4-byte header, then each
490 // chunk has a 3-byte header.
491 // Every chunk should decompress to exactly 4096 bytes. (The last chunk is padded,
492 // and the padding is included in the internal CRC computation).
493 // A chunk may use LZW, RLE, both, or neither.
494 // We can tell which methods are used, as well as the intermediate-decompressed size,
495 // from the chunk header.
496 // We need to do LZW decompression first (if applicable), then RLE decompression (if
497 // applicable).
498 // There's no easy way to figure out the size of a chunk of compressed data.
499 // We can only do it after we do the LZW decompression, and see how much source data
500 // was consumed (rounding up to the next whole byte) in order to produce the
501 // intermediate number of bytes.
502 // Our LZW decompressor tells us this info (though we'd rather not have to rely on it).
504 lzwcrc_reported = (u32)de_getu16le_p(&pos);
505 de_dbg(c, "lzwcodec crc (reported): 0x%04x", (UI)lzwcrc_reported);
506 volnum = dbuf_getbyte_p(dcmpri->f, &pos);
507 de_dbg(c, "lzwcodec vol num: %u", (UI)volnum);
508 rlechar = dbuf_getbyte_p(dcmpri->f, &pos);
509 de_dbg(c, "lzwcodec rle char: 0x%02x", (UI)rlechar);
511 de_crcobj_reset(d->crco_for_lzw_codec);
513 while(1) {
514 i64 chkpos;
515 i64 intermed_chunk_len; // size we expect after RLE decompression, before LZW decompression
516 u8 uses_rle;
517 u8 uses_lzw;
519 if(output_bytes_remaining<1) break;
520 if(pos+3 > dcmpri->pos + dcmpri->len) break;
521 chkpos = pos;
522 intermed_chunk_len = dbuf_getu16le_p(dcmpri->f, &pos); // if 4096, no RLE
523 uses_rle = (intermed_chunk_len != 4096);
524 uses_lzw = dbuf_getbyte_p(dcmpri->f, &pos); // if 0, no LZW
525 de_dbg(c, "chunk at %"I64_FMT", intermed_len=%"I64_FMT", lzw=%u, rle=%u",
526 chkpos, intermed_chunk_len, (UI)uses_lzw, (UI)uses_rle);
528 dbuf_empty(tmpdbuf_lzw);
529 if(uses_lzw) {
530 de_dfilter_init_objects(c, dcmpri_lzw, dcmpro_lzw, dres_lzw);
531 dcmpri_lzw->f = dcmpri->f;
532 dcmpri_lzw->pos = pos;
533 // ->len is just a maximum. We don't know the compressed data size yet.
534 dcmpri_lzw->len = dcmpri->len + dcmpri->pos - pos;
535 dcmpro_lzw->f = tmpdbuf_lzw;
536 dcmpro_lzw->len_known = 1;
537 dcmpro_lzw->expected_len = intermed_chunk_len;
538 decompress_chunk_lzw1_lzw_layer(c, dcmpri_lzw, dcmpro_lzw, dres_lzw);
539 if(dres_lzw->errcode) {
540 de_dfilter_transfer_error2(c, dres_lzw, dres, modname);
541 goto done;
543 if(!dres_lzw->bytes_consumed_valid) {
544 de_dfilter_set_generic_error(c, dres, modname);
545 goto done;
547 pos += dres_lzw->bytes_consumed;
549 else {
550 dbuf_copy(dcmpri->f, pos, intermed_chunk_len, tmpdbuf_lzw);
551 pos += intermed_chunk_len;
554 dbuf_empty(tmpdbuf_rle);
555 if(uses_rle) {
556 de_dfilter_init_objects(c, dcmpri_rle, dcmpro_rle, dres_rle);
557 dcmpri_rle->f = tmpdbuf_lzw;
558 dcmpri_rle->pos = 0;
559 dcmpri_rle->len = tmpdbuf_lzw->len;
560 dcmpro_rle->f = tmpdbuf_rle;
561 dcmpro_rle->len_known = 1;
562 dcmpro_rle->expected_len = 4096;
563 decompress_chunk_rle_layer(c, dcmpri_rle, dcmpro_rle, dres_rle, rlechar);
564 if(dres_rle->errcode) {
565 de_dfilter_transfer_error2(c, dres_rle, dres, modname);
566 goto done;
569 else {
570 dbuf_copy(tmpdbuf_lzw, 0, 4096, tmpdbuf_rle);
573 de_crcobj_addslice(d->crco_for_lzw_codec, tmpdbuf_rle, 0, 4096);
575 nbytes_to_copy = de_min_int(output_bytes_remaining, 4096);
576 dbuf_copy(tmpdbuf_rle, 0, nbytes_to_copy, dcmpro->f);
577 output_bytes_remaining -= nbytes_to_copy;
580 lzwcrc_calc = de_crcobj_getval(d->crco_for_lzw_codec);
581 de_dbg(c, "lzwcodec crc (calculated): 0x%04x", (UI)lzwcrc_calc);
582 if(lzwcrc_calc != lzwcrc_reported) {
583 de_dfilter_set_errorf(c, dres, modname, "Codec internal CRC check failed");
584 goto done;
587 done:
588 dbuf_close(tmpdbuf_lzw);
589 de_free(c, dcmpri_lzw);
590 de_free(c, dcmpro_lzw);
591 de_free(c, dres_lzw);
592 de_free(c, dcmpri_rle);
593 de_free(c, dcmpro_rle);
594 de_free(c, dres_rle);
597 // TODO? We could merge the lzw_1 and lzw_2 decompressors, but it'd be a bit messy.
598 static void decompress_lzw_2(deark *c, struct nufx_ctx *d,
599 struct de_dfilter_in_params *dcmpri,
600 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres)
602 i64 pos = dcmpri->pos;
603 i64 nbytes_to_copy;
604 u8 volnum;
605 u8 rlechar;
606 u8 need_lzw_clear = 0;
607 i64 output_bytes_remaining;
608 dbuf *tmpdbuf_lzw = NULL;
609 dbuf *tmpdbuf_rle = NULL;
610 struct de_dfilter_out_params *dcmpro_lzw = NULL;
611 struct de_dfilter_results *dres_lzw = NULL;
612 struct de_dfilter_in_params *dcmpri_rle = NULL;
613 struct de_dfilter_out_params *dcmpro_rle = NULL;
614 struct de_dfilter_results *dres_rle = NULL;
615 const char *modname = "nufx_lzw2";
616 struct de_dfilter_ctx *dfctx = NULL;
617 struct de_lzw_params delzwp;
619 output_bytes_remaining = dcmpro->expected_len;
621 tmpdbuf_lzw = dbuf_create_membuf(c, 4096, 0);
622 tmpdbuf_rle = dbuf_create_membuf(c, 4096, 0);
623 dcmpro_lzw = de_malloc(c, sizeof(struct de_dfilter_out_params));
624 dres_lzw = de_malloc(c, sizeof(struct de_dfilter_results));
625 dcmpri_rle = de_malloc(c, sizeof(struct de_dfilter_in_params));
626 dcmpro_rle = de_malloc(c, sizeof(struct de_dfilter_out_params));
627 dres_rle = de_malloc(c, sizeof(struct de_dfilter_results));
629 de_zeromem(&delzwp, sizeof(struct de_lzw_params));
630 delzwp.fmt = DE_LZWFMT_SHRINKIT2;
631 delzwp.max_code_size = 12;
633 // The compressed data is chunked. There's an initial 2-byte header, then each
634 // chunk has a 4-byte (if chunk uses LZW) or 2-byte (otherwise) header.
636 volnum = dbuf_getbyte_p(dcmpri->f, &pos);
637 de_dbg(c, "lzwcodec vol num: %u", (UI)volnum);
638 rlechar = dbuf_getbyte_p(dcmpri->f, &pos);
639 de_dbg(c, "lzwcodec rle char: 0x%02x", (UI)rlechar);
641 de_dfilter_init_objects(c, NULL, dcmpro_lzw, dres_lzw);
643 while(1) {
644 i64 chkpos;
645 i64 intermed_chunk_len; // size we expect after RLE decompression, before LZW decompression
646 i64 cmpr_len;
647 u8 uses_rle;
648 u8 uses_lzw;
650 if(output_bytes_remaining<1) break;
651 if(pos > dcmpri->pos + dcmpri->len) break;
652 chkpos = pos;
653 intermed_chunk_len = dbuf_getu16le_p(dcmpri->f, &pos); // if 4096, no RLE
654 uses_lzw = (intermed_chunk_len & 0x8000)?1:0;
655 intermed_chunk_len &= 0x1fff;
656 uses_rle = (intermed_chunk_len != 4096);
657 if(uses_lzw) {
658 cmpr_len = dbuf_getu16le_p(dcmpri->f, &pos);
659 cmpr_len -= 4; // Apparently, the length includes the chunk header
660 if(cmpr_len<0) cmpr_len = 0;
662 else {
663 cmpr_len = intermed_chunk_len;
666 de_dbg(c, "chunk at %"I64_FMT", intermed_len=%"I64_FMT", "
667 "cmpr_len=%"I64_FMT", lzw=%u, rle=%u",
668 chkpos, intermed_chunk_len, cmpr_len, (UI)uses_lzw, (UI)uses_rle);
670 dbuf_empty(tmpdbuf_lzw);
671 if(uses_lzw) {
672 if(dfctx) {
673 if(need_lzw_clear) {
674 de_dfilter_command(dfctx, DE_DFILTER_COMMAND_SOFTRESET, 0);
677 else {
678 dcmpro_lzw->f = tmpdbuf_lzw;
679 dcmpro_lzw->len_known = 0;
680 dfctx = de_dfilter_create(c, dfilter_lzw_codec, (void*)&delzwp, dcmpro_lzw, dres_lzw);
682 need_lzw_clear = 0;
684 de_dfilter_addslice(dfctx, dcmpri->f, pos, cmpr_len);
685 de_dfilter_command(dfctx, DE_DFILTER_COMMAND_FINISH_BLOCK, 0);
686 dbuf_flush(dcmpro_lzw->f);
688 // Hack, to cause the error message to be available. TODO: Improve this.
689 if(dfctx->finished_flag) {
690 de_dfilter_finish(dfctx);
693 if(dres_lzw->errcode) {
694 de_dfilter_transfer_error2(c, dres_lzw, dres, modname);
695 goto done;
698 if(dcmpro_lzw->f->len != intermed_chunk_len) {
699 de_dfilter_set_errorf(c, dres, modname, "LZW decompression failed "
700 "(expected %"I64_FMT" bytes, got %"I64_FMT")",
701 intermed_chunk_len, dcmpro_lzw->f->len);
702 goto done;
705 else {
706 dbuf_copy(dcmpri->f, pos, intermed_chunk_len, tmpdbuf_lzw);
707 if(dfctx) {
708 // A non-LZW chunk following an LZW chunk: We'll have to reset the
709 // LZW decompressor if/when we encounter another LZW chunk.
710 need_lzw_clear = 1;
713 pos += cmpr_len;
715 dbuf_empty(tmpdbuf_rle);
716 if(uses_rle) {
717 de_dfilter_init_objects(c, dcmpri_rle, dcmpro_rle, dres_rle);
718 dcmpri_rle->f = tmpdbuf_lzw;
719 dcmpri_rle->pos = 0;
720 dcmpri_rle->len = tmpdbuf_lzw->len;
721 dcmpro_rle->f = tmpdbuf_rle;
722 dcmpro_rle->len_known = 1;
723 dcmpro_rle->expected_len = 4096;
724 decompress_chunk_rle_layer(c, dcmpri_rle, dcmpro_rle, dres_rle, rlechar);
725 if(dres_rle->errcode) {
726 de_dfilter_transfer_error2(c, dres_rle, dres, modname);
727 goto done;
730 else {
731 dbuf_copy(tmpdbuf_lzw, 0, 4096, tmpdbuf_rle);
734 de_crcobj_addslice(d->crco_for_lzw_codec, tmpdbuf_rle, 0, 4096);
736 nbytes_to_copy = de_min_int(output_bytes_remaining, 4096);
737 dbuf_copy(tmpdbuf_rle, 0, nbytes_to_copy, dcmpro->f);
738 output_bytes_remaining -= nbytes_to_copy;
741 done:
742 de_dfilter_destroy(dfctx);
743 dbuf_close(tmpdbuf_lzw);
744 de_free(c, dcmpro_lzw);
745 de_free(c, dres_lzw);
746 de_free(c, dcmpri_rle);
747 de_free(c, dcmpro_rle);
748 de_free(c, dres_rle);
751 static int decompress_thread(deark *c,
752 struct nufx_ctx *d, struct nufx_record *rec,
753 struct nufx_thread *t, dbuf *outf)
755 int retval = 0;
756 struct de_dfilter_in_params dcmpri;
757 struct de_dfilter_out_params dcmpro;
758 struct de_dfilter_results dres;
760 de_dbg(c, "[reading thread #%u]", t->idx);
761 de_dbg_indent(c, 1);
763 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
764 if(!t) goto done;
765 if(t->cmpr_pos + t->cmpr_len > c->infile->len) goto done;
767 dcmpri.f = c->infile;
768 dcmpri.pos = t->cmpr_pos;
769 dcmpri.len = t->cmpr_len;
770 dcmpro.f = outf;
771 dcmpro.expected_len = t->orig_len;
772 dcmpro.len_known = 1;
774 if(t->cmpr_meth==0 || t->orig_len==0) {
775 fmtutil_decompress_uncompressed(c, &dcmpri, &dcmpro, &dres, 0);
777 else if(t->cmpr_meth==2) {
778 decompress_lzw_1(c, d, &dcmpri, &dcmpro, &dres);
780 else if(t->cmpr_meth==3) {
781 decompress_lzw_2(c, d, &dcmpri, &dcmpro, &dres);
783 else {
784 de_dfilter_set_errorf(c, &dres, NULL, "Unsupported compression method");
787 if(dres.errcode) {
788 de_err(c, "Decompression failed for record#%u thread#%u: %s",
789 rec->idx, t->idx, de_dfilter_get_errmsg(c, &dres));
790 goto done;
793 retval = 1;
794 t->dcmpr_ok_flag = 1;
795 goto done;
797 done:
798 de_dbg_indent(c, -1);
799 return retval;
802 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
803 struct de_advfile_cbparams *afp)
805 struct nufx_record *rec = (struct nufx_record*)advf->userdata;
806 struct nufx_ctx *d = rec->d;
808 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
809 decompress_thread(c, d, rec, rec->data_thread_ptr, afp->outf);
811 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
812 decompress_thread(c, d, rec, rec->resource_thread_ptr, afp->outf);
815 return 1;
818 static int my_advfile_cbfn_diskimage(deark *c, struct de_advfile *advf,
819 struct de_advfile_cbparams *afp)
821 struct nufx_record *rec = (struct nufx_record*)advf->userdata;
822 struct nufx_ctx *d = rec->d;
824 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
825 decompress_thread(c, d, rec, rec->disk_image_thread_ptr, afp->outf);
828 return 1;
831 // Extract either {data and resource forks}, or {disk image}.
832 static void extract_main_threads(deark *c, struct nufx_ctx *d,
833 struct nufx_record *rec, u8 disk_image_flag)
835 struct de_advfile *advf = NULL;
836 struct nufx_thread *t_d;
837 struct nufx_thread *t_r;
838 const char *dname;
839 u32 d_crc_calc, r_crc_calc;
840 u8 ok_cmpr;
842 if(disk_image_flag) {
843 dname = "disk image";
844 t_d = rec->disk_image_thread_ptr;
845 t_r = NULL;
847 else {
848 dname = "data fork";
849 t_d = rec->data_thread_ptr;
850 t_r = rec->resource_thread_ptr;
853 if(!t_d && !t_r) {
854 goto done;
857 ok_cmpr = 1;
858 if(t_d) {
859 if(t_d->orig_len>0 && !cmpr_meth_is_supported(t_d->cmpr_meth)) {
860 ok_cmpr = 0;
863 if(t_r) {
864 if(t_r->orig_len>0 && !cmpr_meth_is_supported(t_r->cmpr_meth)) {
865 ok_cmpr = 0;
868 // Continue only if we're pretty sure we can decompress both forks.
869 if(!ok_cmpr) {
870 de_err(c, "record #%u: Compression method not supported", rec->idx);
871 goto done;
874 advf = de_advfile_create(c);
875 advf->userdata = (void*)rec;
876 advf->writefork_cbfn = disk_image_flag ? my_advfile_cbfn_diskimage : my_advfile_cbfn;
877 advf->enable_wbuffer = 1;
879 advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = rec->mod_time;
880 advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE] = rec->create_time;
882 ucstring_append_ucstring(advf->filename, rec->filename);
883 advf->original_filename_flag = 1;
884 //de_advfile_set_orig_filename(rec->advf, ...); // TODO?
886 if(t_d) {
887 advf->mainfork.fork_exists = 1;
888 advf->mainfork.fork_len = t_d->orig_len;
889 advf->mainfork.writelistener_cb = de_writelistener_for_crc;
890 advf->mainfork.userdata_for_writelistener = (void*)d->crco_dfork;
891 de_crcobj_reset(d->crco_dfork);
893 if(t_r) {
894 advf->rsrcfork.fork_exists = 1;
895 advf->rsrcfork.fork_len = t_r->orig_len;
896 advf->rsrcfork.writelistener_cb = de_writelistener_for_crc;
897 advf->rsrcfork.userdata_for_writelistener = (void*)d->crco_rfork;
898 de_crcobj_reset(d->crco_rfork);
901 de_advfile_run(advf);
903 if(t_d && t_d->dcmpr_ok_flag && t_d->respect_crc_field) {
904 d_crc_calc = de_crcobj_getval(d->crco_dfork);
905 de_dbg(c, "%s crc (calculated): 0x%04x", dname, (UI)d_crc_calc);
906 if(d_crc_calc!=t_d->crc_reported) {
907 de_err(c, "CRC check failed for record #%u %s", rec->idx, dname);
910 if(t_r && t_r->dcmpr_ok_flag && t_r->respect_crc_field) {
911 r_crc_calc = de_crcobj_getval(d->crco_rfork);
912 de_dbg(c, "rsrc fork crc (calculated): 0x%04x", (UI)r_crc_calc);
913 if(r_crc_calc!=t_r->crc_reported) {
914 de_err(c, "CRC check failed for record #%u resource fork", rec->idx);
918 done:
919 de_advfile_destroy(advf);
922 static void do_extract_aux_thread(deark *c, struct nufx_ctx *d,
923 struct nufx_record *rec, struct nufx_thread *t, const char *name)
925 de_finfo *fi = NULL;
926 dbuf *outf = NULL;
928 fi = de_finfo_create(c);
929 // TODO: Better file naming
930 de_finfo_set_name_from_sz(c, fi, name, 0, DE_ENCODING_UTF8);
931 outf = dbuf_create_output_file(c, NULL, fi, DE_CREATEFLAG_IS_AUX);
932 decompress_thread(c, d, rec, t, outf);
933 dbuf_close(outf);
934 de_finfo_destroy(c, fi);
937 static void do_dbg_comment(deark *c, struct nufx_ctx *d,
938 struct nufx_record *rec, struct nufx_thread *t)
940 dbuf *tmpdbuf = NULL;
941 de_ucstring *s = NULL;
943 tmpdbuf = dbuf_create_membuf(c, DE_DBG_MAX_STRLEN, 0x1);
944 if(!decompress_thread(c, d, rec, t, tmpdbuf)) goto done;
946 s = ucstring_create(c);
947 dbuf_read_to_ucstring_n(tmpdbuf, 0, tmpdbuf->len, DE_DBG_MAX_STRLEN,
948 s, 0, d->input_encoding);
949 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(s));
951 done:
952 dbuf_close(tmpdbuf);
953 ucstring_destroy(s);
956 static void extract_from_record(deark *c,
957 struct nufx_ctx *d, struct nufx_record *rec)
959 dbuf *filename_dbuf = NULL;
960 UI tidx;
961 int ret;
963 if(rec->filename_thread_ptr) {
964 filename_dbuf = dbuf_create_membuf(c, 0, 0);
965 ret = decompress_thread(c, d, rec, rec->filename_thread_ptr,
966 filename_dbuf);
967 if(!ret || filename_dbuf->len<1) {
968 goto done;
970 dbuf_read_to_ucstring_n(filename_dbuf, 0, filename_dbuf->len, 255,
971 rec->filename, 0, d->input_encoding);
972 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(rec->filename));
974 if(ucstring_isempty(rec->filename) && ucstring_isnonempty(rec->filename_old)) {
975 ucstring_append_ucstring(rec->filename, rec->filename_old);
978 de_dbg(c, "[extracting files]");
979 de_dbg_indent(c, 1);
981 // A record shouldn't contain both a disk image, and a file, but for lack
982 // of something better to do, we tolerate it.
984 if(rec->disk_image_thread_ptr) {
985 extract_main_threads(c, d, rec, 1);
988 if(rec->data_thread_ptr || rec->resource_thread_ptr) {
989 extract_main_threads(c, d, rec, 0);
992 if(!rec->data_thread_ptr && !rec->resource_thread_ptr && !rec->disk_image_thread_ptr) {
993 de_warn(c, "record #%u: No supported content found", rec->idx);
996 // Handle ancillary threads, such as comments.
997 for(tidx=0; tidx<rec->num_threads; tidx++) {
998 struct nufx_thread *t;
1000 t = &rec->threads[tidx];
1001 if(t->orig_len>0 && t->thread_class==0 && t->kind==1) {
1002 if(d->extract_comments) {
1003 do_extract_aux_thread(c, d, rec, t, "comment");
1005 else {
1006 do_dbg_comment(c, d, rec, t);
1011 de_dbg_indent(c, -1);
1013 done:
1014 dbuf_close(filename_dbuf);
1017 static void do_nufx_record(deark *c,
1018 struct nufx_ctx *d, struct nufx_record *rec)
1020 int saved_indent_level;
1021 UI id;
1023 de_dbg_indent_save(c, &saved_indent_level);
1025 id = (UI)de_getu32be(rec->hdr_pos);
1026 if(id != 0x4ef546d8) {
1027 de_err(c, "Expected record not found at %"I64_FMT, rec->hdr_pos);
1028 d->fatalerrflag = 1;
1029 goto done;
1032 de_dbg(c, "record #%u at %"I64_FMT, rec->idx, rec->hdr_pos);
1033 de_dbg_indent(c, 1);
1035 do_nufx_record_header(c, d, rec);
1036 if(d->fatalerrflag) goto done;
1038 extract_from_record(c, d, rec);
1040 done:
1041 de_dbg_indent_restore(c, saved_indent_level);
1044 static void destroy_record(deark *c, struct nufx_record *rec)
1046 if(!rec) return;
1047 ucstring_destroy(rec->filename);
1048 ucstring_destroy(rec->filename_old);
1049 de_free(c, rec->threads);
1050 de_free(c, rec);
1053 static void de_run_nufx(deark *c, de_module_params *mparams)
1055 struct nufx_ctx *d = NULL;
1056 struct nufx_record *rec = NULL;
1057 i64 pos;
1058 i64 rec_idx = 0;
1060 d = de_malloc(c, sizeof(struct nufx_ctx));
1061 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_ASCII);
1062 d->extract_comments = (c->extract_level>=2);
1063 d->crco_misc = de_crcobj_create(c, DE_CRCOBJ_CRC16_XMODEM);
1064 d->crco_for_lzw_codec = de_crcobj_create(c, DE_CRCOBJ_CRC16_XMODEM);
1065 d->crco_dfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_IBM3740);
1066 d->crco_rfork = de_crcobj_create(c, DE_CRCOBJ_CRC16_IBM3740);
1068 do_nufx_master_record(c, d);
1069 if(d->fatalerrflag) goto done;
1070 pos = d->next_record_pos;
1072 for(rec_idx=0; rec_idx<d->total_records; rec_idx++) {
1073 if(pos >= d->master_eof) goto done;
1075 if(rec) {
1076 destroy_record(c, rec);
1077 rec = NULL;
1080 rec = de_malloc(c, sizeof(struct nufx_record));
1081 rec->d = d;
1082 rec->idx = (UI)rec_idx;
1083 rec->filename = ucstring_create(c);
1084 rec->hdr_pos = pos;
1085 do_nufx_record(c, d, rec);
1086 if(d->fatalerrflag) goto done;
1088 pos = d->next_record_pos;
1091 done:
1092 destroy_record(c, rec);
1093 if(d) {
1094 if(d->need_errmsg) {
1095 de_err(c, "Bad or unsupported NuFX file");
1097 de_crcobj_destroy(d->crco_misc);
1098 de_crcobj_destroy(d->crco_for_lzw_codec);
1099 de_crcobj_destroy(d->crco_dfork);
1100 de_crcobj_destroy(d->crco_rfork);
1101 de_free(c, d);
1105 static int de_identify_nufx(deark *c)
1107 if(dbuf_memcmp(c->infile, 0, (const void*)"\x4e\xf5\x46\xe9\x6c\xe5", 6)) {
1108 return 0;
1110 return 100;
1113 void de_module_nufx(deark *c, struct deark_module_info *mi)
1115 mi->id = "nufx";
1116 mi->desc = "NuFX / ShrinkIt";
1117 mi->run_fn = de_run_nufx;
1118 mi->identify_fn = de_identify_nufx;