iccprofile: Refactoring
[deark.git] / modules / binhex.c
blob084f10a7639953b65641d052f7b09acfc6ba5993
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // BinHex (.hqx)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_binhex);
12 struct binhex_forkinfo {
13 i64 pos; // position in d->decompressed
14 i64 len;
15 u32 crc_reported;
16 struct de_crcobj *crco;
17 const char *forkname;
20 typedef struct localctx_struct {
21 int input_encoding;
22 dbuf *decoded;
23 dbuf *decompressed;
24 struct de_advfile *advf;
25 struct binhex_forkinfo fki_data;
26 struct binhex_forkinfo fki_rsrc;
27 } lctx;
29 // Returns 0-63 if successful, 255 for invalid character.
30 static u8 get_char_value(u8 b)
32 int k;
33 static const u8 binhexchars[] =
34 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
36 for(k=0; k<64; k++) {
37 if(b==binhexchars[k]) return (u8)k;
39 return 255;
42 // Decode the base-64 data, and write to d->decoded.
43 // Returns 0 if there was an error.
44 static int do_decode_main(deark *c, lctx *d, i64 pos)
46 u8 b;
47 u8 x;
48 u8 pending_byte = 0;
49 unsigned int pending_bits_used = 0;
51 while(1) {
52 if(pos >= c->infile->len) return 0; // unexpected end of file
53 b = de_getbyte(pos);
54 pos++;
55 if(b==':') {
56 break;
58 else if(b=='\x0a' || b=='\x0d' || b==' ' || b=='\t') {
59 // Ignore whitespace
60 continue;
63 x = get_char_value(b);
64 if(x>=64) {
65 de_err(c, "Invalid BinHex data at %d", (int)(pos-1));
66 return 0;
69 // TODO: Simplify this code
70 if(pending_bits_used==0) {
71 pending_byte = x;
72 pending_bits_used = 6;
74 else if(pending_bits_used==2) {
75 pending_byte = (pending_byte<<(8-pending_bits_used))|x;
76 dbuf_writebyte(d->decoded, pending_byte);
77 pending_bits_used -= 2;
79 else if(pending_bits_used==4) {
80 pending_byte = (pending_byte<<(8-pending_bits_used))|(x>>(pending_bits_used-2));
81 dbuf_writebyte(d->decoded, pending_byte);
82 pending_byte = x&0x03;
83 pending_bits_used -= 2;
85 else if(pending_bits_used==6) {
86 pending_byte = (pending_byte<<(8-pending_bits_used))|(x>>(pending_bits_used-2));
87 dbuf_writebyte(d->decoded, pending_byte);
88 pending_byte = x&0x0f;
89 pending_bits_used -= 2;
93 de_dbg(c, "size after decoding: %d", (int)d->decoded->len);
94 return 1;
97 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
99 struct de_crcobj *crco = (struct de_crcobj*)userdata;
100 de_crcobj_addbuf(crco, buf, buf_len);
103 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
104 struct de_advfile_cbparams *afp)
106 lctx *d = (lctx*)advf->userdata;
108 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
109 dbuf_copy(d->decompressed, d->fki_data.pos, advf->mainfork.fork_len, afp->outf);
111 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
112 dbuf_copy(d->decompressed, d->fki_rsrc.pos, advf->rsrcfork.fork_len, afp->outf);
115 return 1;
118 // Returns 0 if there is a serious error with this fork.
119 static int do_pre_extract_fork(deark *c, lctx *d, dbuf *inf, struct binhex_forkinfo *fki,
120 struct de_advfile_forkinfo *advfki)
122 fki->crc_reported = (u32)dbuf_getu16be(inf, fki->pos + fki->len);
123 de_dbg(c, "%s fork crc (reported): 0x%04x", fki->forkname,
124 (unsigned int)fki->crc_reported);
126 advfki->writelistener_cb = our_writelistener_cb;
127 advfki->userdata_for_writelistener = (void*)fki->crco;
129 if((fki->pos + fki->len > inf->len) && fki->len!=0) {
130 de_err(c, "%s fork goes beyond end of file", fki->forkname);
131 fki->len = 0;
132 return 0;
134 return 1;
137 static void do_post_extract_fork(deark *c, lctx *d, struct binhex_forkinfo *fki)
139 u32 crc_calc;
141 // Here, the BinHex spec says we should feed two 0x00 bytes to the CRC
142 // calculation, to account for the CRC field itself. However, if I do
143 // that, none of files I've tested have the correct CRC. If I don't,
144 // all of them have the correct CRC.
145 //de_crcobj_addbuf(fki->crco, (const u8*)"\0\0", 2);
147 crc_calc = de_crcobj_getval(fki->crco);
148 de_dbg(c, "%s fork crc (calculated): 0x%04x", fki->forkname,
149 (unsigned int)crc_calc);
150 if(crc_calc != fki->crc_reported) {
151 de_err(c, "CRC check failed for %s fork", fki->forkname);
155 static void do_extract_forks(deark *c, lctx *d)
157 i64 name_len;
158 dbuf *inf;
159 i64 pos;
160 u32 hc; // Header CRC
161 struct de_stringreaderdata *fname = NULL;
162 struct de_fourcc filetype;
163 struct de_fourcc creator;
165 inf = d->decompressed;
166 pos = 0;
168 // Read the header
170 name_len = (i64)dbuf_getbyte(inf, pos);
171 pos+=1;
172 de_dbg(c, "name len: %d", (int)name_len);
174 if(name_len > 0) {
175 fname = dbuf_read_string(inf, pos, name_len, name_len, 0, d->input_encoding);
176 ucstring_append_ucstring(d->advf->filename, fname->str);
177 d->advf->original_filename_flag = 1;
178 de_dbg(c, "name: \"%s\"", ucstring_getpsz_d(fname->str));
179 de_advfile_set_orig_filename(d->advf, fname->sz, fname->sz_strlen);
181 else {
182 ucstring_append_sz(d->advf->filename, "bin", DE_ENCODING_LATIN1);
185 pos+=name_len;
186 pos+=1; // Skip the 0x00 byte after the name.
188 dbuf_read_fourcc(inf, pos, &filetype, 4, 0x0);
189 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
190 de_memcpy(d->advf->typecode, filetype.bytes, 4);
191 d->advf->has_typecode = 1;
192 pos += 4;
193 dbuf_read_fourcc(inf, pos, &creator, 4, 0x0);
194 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
195 de_memcpy(d->advf->creatorcode, creator.bytes, 4);
196 d->advf->has_creatorcode = 1;
197 pos += 4;
199 d->advf->finderflags = (u16)dbuf_getu16be_p(inf, &pos);
200 d->advf->has_finderflags = 1;
201 de_dbg(c, "flags: 0x%04x", (unsigned int)d->advf->finderflags);
203 d->fki_data.len = dbuf_getu32be_p(inf, &pos);
204 de_dbg(c, "data fork len: %d", (int)d->fki_data.len);
205 d->fki_rsrc.len = dbuf_getu32be_p(inf, &pos);
206 de_dbg(c, "resource fork len: %d", (int)d->fki_rsrc.len);
208 hc = (u32)dbuf_getu16be_p(inf, &pos);
209 de_dbg(c, "header crc (reported): 0x%04x", (unsigned int)hc);
210 // TODO: Verify header CRC
212 d->fki_data.forkname = "data";
213 d->fki_rsrc.forkname = "rsrc";
215 // Walk through the file, and record some offsets
216 d->fki_data.pos = pos;
217 pos += d->fki_data.len;
218 pos += 2; // for the CRC
220 d->fki_rsrc.pos = pos;
221 // [d->fki_rsrc.len bytes here]
222 // [2 bytes here, for the CRC]
224 d->fki_data.crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_CCITT);
225 d->fki_rsrc.crco = de_crcobj_create(c, DE_CRCOBJ_CRC16_CCITT);
227 if(!do_pre_extract_fork(c, d, inf, &d->fki_data, &d->advf->mainfork)) {
228 goto done;
230 (void)do_pre_extract_fork(c, d, inf, &d->fki_rsrc, &d->advf->rsrcfork);
232 d->advf->mainfork.fork_exists = (d->fki_data.len > 0);
233 d->advf->mainfork.fork_len = d->fki_data.len;
234 d->advf->rsrcfork.fork_exists = (d->fki_rsrc.len > 0);
235 d->advf->rsrcfork.fork_len = d->fki_rsrc.len;
236 d->advf->userdata = (void*)d;
237 d->advf->writefork_cbfn = my_advfile_cbfn;
239 de_advfile_run(d->advf);
241 do_post_extract_fork(c, d, &d->fki_data);
242 do_post_extract_fork(c, d, &d->fki_rsrc);
244 done:
245 de_destroy_stringreaderdata(c, fname);
246 de_crcobj_destroy(d->fki_data.crco);
247 d->fki_data.crco = NULL;
248 de_crcobj_destroy(d->fki_rsrc.crco);
249 d->fki_rsrc.crco = NULL;
252 static void do_binhex(deark *c, lctx *d, i64 pos)
254 int ret;
255 struct de_dfilter_in_params dcmpri;
256 struct de_dfilter_out_params dcmpro;
257 struct de_dfilter_results dres;
259 de_dbg(c, "BinHex data starts at %d", (int)pos);
261 d->decoded = dbuf_create_membuf(c, 65536, 0);
262 d->decompressed = dbuf_create_membuf(c, 65536, 0);
264 ret = do_decode_main(c, d, pos);
265 if(!ret) goto done;
267 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
268 dcmpri.f = d->decoded;
269 dcmpri.pos = 0;
270 dcmpri.len = d->decoded->len;
271 dcmpro.f = d->decompressed;
272 fmtutil_decompress_rle90_ex(c, &dcmpri, &dcmpro, &dres, 0);
273 if(dres.errcode) {
274 de_err(c, "%s", de_dfilter_get_errmsg(c, &dres));
275 goto done;
277 de_dbg(c, "size after decompression: %d", (int)d->decompressed->len);
279 d->advf = de_advfile_create(c);
281 do_extract_forks(c, d);
283 done:
284 de_advfile_destroy(d->advf);
285 d->advf = NULL;
286 dbuf_close(d->decompressed);
287 d->decompressed = NULL;
288 dbuf_close(d->decoded);
289 d->decoded = NULL;
292 static int find_start(deark *c, i64 *foundpos)
294 i64 pos;
295 u8 b;
296 int ret;
298 *foundpos = 0;
300 ret = dbuf_search(c->infile,
301 (const u8*)"(This file must be converted with BinHex", 40,
302 0, 8192, &pos);
303 if(!ret) return 0;
305 pos += 40;
307 // Find the next CR/LF byte
308 while(1) {
309 b = de_getbyte(pos);
310 pos++;
311 if(b=='\x0a' || b=='\x0d') {
312 break;
316 // Skip any number of additional whitespace
317 while(1) {
318 b = de_getbyte(pos);
319 if(b=='\x0a' || b=='\x0d' || b==' ' || b=='\t') {
320 pos++;
322 else {
323 break;
327 // Current byte should be a colon (:)
328 b = de_getbyte(pos);
329 if(b==':') {
330 *foundpos = pos+1;
331 return 1;
334 return 0;
337 static void de_run_binhex(deark *c, de_module_params *mparams)
339 lctx *d = NULL;
340 i64 pos;
341 int ret;
343 d = de_malloc(c, sizeof(lctx));
344 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
346 ret = find_start(c, &pos);
347 if(!ret) {
348 de_err(c, "Not a BinHex file");
349 goto done;
352 do_binhex(c, d, pos);
354 done:
355 de_free(c, d);
358 static int de_identify_binhex(deark *c)
360 int ret;
361 i64 foundpos;
363 if(!dbuf_memcmp(c->infile, 0,
364 "(This file must be converted with BinHex", 40))
366 return 100;
369 if(!de_input_file_has_ext(c, "hqx")) return 0;
371 // File has .hqx extension. Try harder to identify it.
372 ret = find_start(c, &foundpos);
373 if(ret) return 100;
375 return 0;
378 void de_module_binhex(deark *c, struct deark_module_info *mi)
380 mi->id = "binhex";
381 mi->desc = "Macintosh BinHex (.hqx) archive";
382 mi->run_fn = de_run_binhex;
383 mi->identify_fn = de_identify_binhex;