New "ea_data" module
[deark.git] / modules / pack.c
blob71aa124a008a16f66c856e612fd04bdab0d2309b
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // Unix "pack" (.z) compressed format
7 #include <deark-private.h>
8 #include <deark-fmtutil.h>
9 DE_DECLARE_MODULE(de_module_pack);
11 #define PCK_EOF_CODE (-1)
13 typedef struct localctx_struct {
14 i64 unc_size;
15 i64 tree_def_size;
16 struct fmtutil_huffman_decoder *ht;
17 struct de_bitreader bitrd;
19 UI depth;
20 #define PCK_MAX_LEVELS FMTUTIL_HUFFMAN_MAX_CODE_LENGTH // Traditional unpack maxes out at ~24
21 UI leaves_per_level[PCK_MAX_LEVELS];
22 } lctx;
24 static int read_tree(deark *c, lctx *d, i64 pos1)
26 i64 pos = pos1;
27 UI lv;
28 int retval = 0;
30 d->depth = (UI)de_getbyte_p(&pos);
31 de_dbg(c, "depth: %u", d->depth);
32 if(d->depth>=PCK_MAX_LEVELS) goto done;
33 if(d->depth<1) goto done;
35 for(lv=0; lv<d->depth; lv++) {
36 d->leaves_per_level[lv] = (UI)de_getbyte_p(&pos);
37 if(lv==d->depth-1) {
38 // The count of leaves in the last level is biased, and the last leaf
39 // (for EOF) is virtual (not stored in the following table).
40 // The bias is 2 if you think it includes the virtual leaf, otherwise 1.
41 d->leaves_per_level[lv] += 2;
43 de_dbg2(c, "num length %u codes: %u", (UI)(lv+1), d->leaves_per_level[lv]);
46 for(lv=0; lv<d->depth; lv++) {
47 UI n_stored_leaves_this_level;
48 UI k;
50 if(lv==d->depth-1) {
51 n_stored_leaves_this_level = d->leaves_per_level[lv] - 1;
53 else {
54 n_stored_leaves_this_level = d->leaves_per_level[lv];
57 for(k=0; k<n_stored_leaves_this_level; k++) {
58 u8 ch;
60 ch = de_getbyte_p(&pos);
61 de_dbg3(c, "lv=%u ch=%u", lv, (UI)ch);
62 fmtutil_huffman_record_a_code_length(c, d->ht->builder, (fmtutil_huffman_valtype)ch, lv+1);
65 if(lv==d->depth-1) {
66 de_dbg3(c, "lv=%u EOF", lv);
67 fmtutil_huffman_record_a_code_length(c, d->ht->builder, (fmtutil_huffman_valtype)PCK_EOF_CODE, lv+1);
71 if(!fmtutil_huffman_make_canonical_code(c, d->ht->bk, d->ht->builder, FMTUTIL_MCTFLAG_LEFT_ALIGN_BRANCHES)) {
72 de_err(c, "Failed to decode Huffman tree");
75 retval = 1;
76 done:
77 d->tree_def_size = pos - pos1;
78 return retval;
81 static void decode_file_data(deark *c, lctx *d, i64 pos1, dbuf *outf)
83 i64 ncodes_expected;
84 i64 i;
86 de_dbg(c, "compressed data at %"I64_FMT, pos1);
88 d->bitrd.f = c->infile;
89 d->bitrd.curpos = pos1;
90 d->bitrd.endpos = c->infile->len;
91 d->bitrd.bbll.is_lsb = 0;
92 de_bitbuf_lowlevel_empty(&d->bitrd.bbll);
94 ncodes_expected = d->unc_size + 1;
96 for(i=0; i<ncodes_expected; i++) {
97 int ret;
98 fmtutil_huffman_valtype val = 0;
100 ret = fmtutil_huffman_read_next_value(d->ht->bk, &d->bitrd, &val, NULL);
101 if(ret && c->debug_level>=3) {
102 de_dbg3(c, "val: %d", (int)val);
105 if(i==ncodes_expected-1) { // Expecting the EOF code at this position
106 if(!ret || val!=PCK_EOF_CODE) {
107 de_warn(c, "EOF code not found. Decompression might have failed.");
109 goto done;
112 if(!ret) {
113 if(d->bitrd.eof_flag) {
114 de_err(c, "Unexpected end of file");
116 else {
117 de_err(c, "Huffman decode error");
119 goto done;
122 if(val==PCK_EOF_CODE) {
123 de_err(c, "Unexpected EOF code");
124 goto done;
127 dbuf_writebyte(outf, (u8)val);
130 done:
134 static void de_run_pack(deark *c, de_module_params *mparams)
136 lctx *d = NULL;
137 i64 pos = 0;
138 dbuf *outf = NULL;
140 d = de_malloc(c, sizeof(lctx));
142 pos += 2;
143 d->unc_size = de_getu32be_p(&pos);
144 de_dbg(c, "uncompressed size: %"I64_FMT, d->unc_size);
146 if(d->unc_size!=0) {
147 d->ht = fmtutil_huffman_create_decoder(c, 257, 257);
148 if(!read_tree(c, d, pos)) goto done;
149 pos += d->tree_def_size;
152 outf = dbuf_create_output_file(c, "bin", NULL, 0);
154 if(d->unc_size!=0) {
155 decode_file_data(c, d, pos, outf);
158 done:
159 dbuf_close(outf);
160 if(d) {
161 if(d->ht) {
162 fmtutil_huffman_destroy_decoder(c, d->ht);
164 de_free(c, d);
168 static int de_identify_pack(deark *c)
170 if(!dbuf_memcmp(c->infile, 0, "\x1f\x1e", 2)) {
171 if(de_input_file_has_ext(c, ".z")) {
172 return 100;
174 return 65;
176 return 0;
179 void de_module_pack(deark *c, struct deark_module_info *mi)
181 mi->id = "pack";
182 mi->desc = "Unix pack (.z)";
183 mi->run_fn = de_run_pack;
184 mi->identify_fn = de_identify_pack;