fnt: Improved error handling, etc.
[deark.git] / modules / binscii.c
blob132bcdbf4a5ea3af7a90ca00d630a128b1b26ed6
1 // This file is part of Deark.
2 // Copyright (C) 2023 Jason Summers
3 // See the file COPYING for terms of use.
5 // BinSCII (Apple II format)
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_binscii);
10 #define BINSCII_LINE_MAXLEN 128
11 #define BINSCII_ENCODED_UNITS_PER_LINE 16
12 #define BINSCII_DECODED_BYTES_PER_LINE (BINSCII_ENCODED_UNITS_PER_LINE*3)
13 #define BINSCII_ENCODED_BYTES_PER_LINE (BINSCII_ENCODED_UNITS_PER_LINE*4)
14 static const u8* g_binscii_seg_sig = (const u8*)"FiLeStArTfIlEsTaRt";
16 enum binscii_parse_state {
17 BSC_NEUTRAL = 0,
18 BSC_READY_FOR_HEADER1,
19 BSC_READY_FOR_HEADER2,
20 BSC_READY_FOR_DATA,
21 BSC_READY_FOR_CRC
24 struct binscii_segment {
25 i64 pos;
26 i64 fn_len;
27 i64 orig_len;
28 i64 offset;
29 u8 acmode;
30 u8 filetype;
31 UI auxtype;
32 u8 storetype;
33 i64 size_in_blocks;
34 UI crdate_raw;
35 UI crtime_raw;
36 UI moddate_raw;
37 UI modtime_raw;
38 i64 segment_len;
39 u32 hdr_crc_reported;
40 i64 nbytes_processed;
41 u8 bmap[256];
44 struct binscii_md {
45 UI seg_count; // Num segments encountered so far (maybe unused)
46 i64 orig_len;
47 i64 nbytes_written;
48 de_ucstring *fn;
49 dbuf *outf;
50 struct de_timestamp mod_time;
51 struct de_timestamp create_time;
54 struct binscii_ctx {
55 struct binscii_md *cur_md;
56 enum binscii_parse_state parse_state;
57 u8 errflag;
58 u8 need_errmsg;
59 UI seg_count_total;
60 i64 pos;
61 dbuf *tmpdbuf;
62 struct de_crcobj *crco_header;
63 struct de_crcobj *crco_segdata;
64 i64 linebuf_used;
65 u8 linebuf[BINSCII_LINE_MAXLEN];
66 struct binscii_segment cur_seg;
69 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
71 char timestamp_buf[64];
73 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
74 de_dbg(c, "%s: %s", name, timestamp_buf);
77 static void binscii_set_generic_error(deark *c, struct binscii_ctx *d)
79 if(d->errflag) return;
80 d->errflag = 1;
81 d->need_errmsg = 1;
84 // Destroys d->cur_md
85 static void binscii_close_cur_file(deark *c, struct binscii_ctx *d)
87 struct binscii_md *md;
89 md = d->cur_md;
90 if(!d->cur_md) return;
91 de_dbg(c, "closing file");
92 if(d->cur_md->orig_len != d->cur_md->nbytes_written) {
93 binscii_set_generic_error(c, d);
95 dbuf_close(md->outf);
96 ucstring_destroy(md->fn);
97 de_free(c, md);
98 d->cur_md = NULL;
101 static struct binscii_md *binscii_create_md(deark *c)
103 struct binscii_md *md;
105 md = de_malloc(c, sizeof(struct binscii_md));
106 return md;
109 // Decode some encoded "units", from memory to a dbuf.
110 // Each unit is 4 bytes encoded, 3 bytes decoded.
111 static void binscii_decode(deark *c, struct binscii_ctx *d, const u8 *src,
112 i64 num_units, dbuf *outf)
114 i64 i;
115 u8 ib[4];
117 for(i=0; i<num_units; i++) {
118 UI j;
120 for(j=0; j<4; j++) {
121 ib[j] = d->cur_seg.bmap[(UI)src[i*4+j]];
123 dbuf_writebyte(outf, (ib[3]<<2)|(ib[2]>>4));
124 dbuf_writebyte(outf, ((ib[2]&0x0f)<<4)|(ib[1]>>2));
125 dbuf_writebyte(outf, ((ib[1]&0x03)<<6)|ib[0]);
129 static void do_binscii_header1(deark *c, struct binscii_ctx *d)
131 i64 i;
133 // The "alphabet" line
134 for(i=0; i<64; i++) {
135 d->cur_seg.bmap[(UI)d->linebuf[i]] = (u8)i;
139 static void binscii_create_output_file(deark *c, struct binscii_ctx *d)
141 de_finfo *fi = NULL;
143 if(!d->cur_md) {
144 binscii_set_generic_error(c, d);
145 goto done;
147 if(d->cur_md->outf) {
148 binscii_set_generic_error(c, d);
149 goto done;
152 fi = de_finfo_create(c);
153 if(d->cur_md->fn) {
154 de_finfo_set_name_from_ucstring(c, fi, d->cur_md->fn, 0);
155 fi->original_filename_flag = 1;
157 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = d->cur_md->mod_time;
158 fi->timestamp[DE_TIMESTAMPIDX_CREATE] = d->cur_md->create_time;
160 d->cur_md->outf = dbuf_create_output_file(c, NULL, fi, 0);
161 dbuf_enable_wbuffer(d->cur_md->outf);
163 done:
164 de_finfo_destroy(c, fi);
167 static void do_binscii_header2(deark *c, struct binscii_ctx *d)
169 i64 pos;
170 u32 hdr_crc_calc;
171 u8 is_first_seg;
172 struct binscii_segment *seg = &d->cur_seg;
173 de_ucstring *fn = NULL;
175 // Some fields we process for all segments.
176 // Others we only process only for the first segment of a file
177 // (or we process them differently).
179 // TODO: Does the fn length use d->bmap, or is the coding fixed as
180 // 'A'=1, 'B'==2, ... ?
181 // (Some BinSCII decoders do it one way, some do it the other.)
182 if(d->linebuf[0]>=64+1 && d->linebuf[0]<=64+15) {
183 seg->fn_len = (i64)d->linebuf[0] - 64;
185 else {
186 binscii_set_generic_error(c, d);
187 goto done;
189 fn = ucstring_create(c);
190 ucstring_append_bytes(fn, &d->linebuf[1],
191 seg->fn_len, 0, DE_ENCODING_ASCII);
192 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(fn));
194 dbuf_empty(d->tmpdbuf);
195 binscii_decode(c, d, &d->linebuf[16], 9, d->tmpdbuf);
197 pos = 0;
198 seg->orig_len = dbuf_getint_ext(d->tmpdbuf, pos, 3, 1, 0);
199 de_dbg(c, "orig len: %"I64_FMT, seg->orig_len);
200 pos += 3;
202 seg->offset = dbuf_getint_ext(d->tmpdbuf, pos, 3, 1, 0);
203 de_dbg(c, "seg offset: %"I64_FMT, seg->offset);
204 pos += 3;
206 is_first_seg = (seg->offset==0);
208 if(is_first_seg) {
209 // If we're already in the middle of a file, close it.
210 if(d->cur_md) {
211 binscii_close_cur_file(c, d);
212 if(d->errflag) goto done;
215 // Open a new file
216 d->cur_md = binscii_create_md(c);
219 if(!d->cur_md) {
220 binscii_set_generic_error(c, d);
221 goto done;
224 // After this point, we can freely use both cur_md and cur_seg.
226 if(is_first_seg) {
227 d->cur_md->orig_len = seg->orig_len;
230 if(is_first_seg) {
231 // TODO: Better decoding & use of file attributes
232 seg->acmode = dbuf_getbyte_p(d->tmpdbuf, &pos);
233 de_dbg(c, "access mode: 0x%02x", (UI)seg->acmode);
234 seg->filetype = dbuf_getbyte_p(d->tmpdbuf, &pos);
235 de_dbg(c, "file type: 0x%02x", (UI)seg->filetype);
236 seg->auxtype = (UI)dbuf_getu16le_p(d->tmpdbuf, &pos);
237 de_dbg(c, "aux file type: 0x%04x", (UI)seg->auxtype);
238 seg->storetype = dbuf_getbyte_p(d->tmpdbuf, &pos);
239 de_dbg(c, "storage type: 0x%02x", (UI)seg->storetype);
240 seg->size_in_blocks = dbuf_getu16le_p(d->tmpdbuf, &pos);
241 de_dbg(c, "orig len in blocks: %"I64_FMT, seg->size_in_blocks);
243 else {
244 pos += 7;
247 if(is_first_seg) {
248 seg->crdate_raw = (UI)dbuf_getu16le_p(d->tmpdbuf, &pos);
249 seg->crtime_raw = (UI)dbuf_getu16le_p(d->tmpdbuf, &pos);
250 de_prodos_datetime_to_timestamp(&d->cur_md->create_time, seg->crdate_raw, seg->crtime_raw);
251 dbg_timestamp(c, &d->cur_md->create_time, "create time");
252 seg->moddate_raw = (UI)dbuf_getu16le_p(d->tmpdbuf, &pos);
253 seg->modtime_raw = (UI)dbuf_getu16le_p(d->tmpdbuf, &pos);
254 de_prodos_datetime_to_timestamp(&d->cur_md->mod_time, seg->moddate_raw, seg->modtime_raw);
255 dbg_timestamp(c, &d->cur_md->mod_time, "mod time");
257 else {
258 pos += 8;
261 seg->segment_len = dbuf_getint_ext(d->tmpdbuf, pos, 3, 1, 0);
262 de_dbg(c, "seg len: %"I64_FMT, seg->segment_len);
263 pos += 3;
265 seg->hdr_crc_reported = (u32)dbuf_getu16le_p(d->tmpdbuf, &pos);
266 de_dbg(c, "header crc (reported): 0x%04x", (UI)seg->hdr_crc_reported);
267 de_crcobj_reset(d->crco_header);
268 de_crcobj_addslice(d->crco_header, d->tmpdbuf, 0, 24);
269 hdr_crc_calc = de_crcobj_getval(d->crco_header);
270 de_dbg(c, "header crc (calculated): 0x%04x", (UI)hdr_crc_calc);
272 if(hdr_crc_calc!=seg->hdr_crc_reported) {
273 de_err(c, "Header CRC check failed for segment at %"I64_FMT, d->cur_seg.pos);
274 d->errflag = 1;
275 goto done;
278 if(seg->offset != d->cur_md->nbytes_written) {
279 binscii_set_generic_error(c, d);
280 goto done;
283 if(is_first_seg) {
284 if(!d->cur_md->fn) {
285 d->cur_md->fn = ucstring_clone(fn);
287 if(!d->cur_md->outf) {
288 binscii_create_output_file(c, d);
292 d->cur_md->seg_count++;
293 done:
294 ucstring_destroy(fn);
297 static void do_binscii_data_line(deark *c, struct binscii_ctx *d)
299 i64 amt_to_write;
301 if(!d->cur_md || !d->cur_md->outf) goto done;
303 dbuf_empty(d->tmpdbuf);
304 binscii_decode(c, d, d->linebuf, BINSCII_ENCODED_UNITS_PER_LINE, d->tmpdbuf);
306 // CRC calculation includes padding bytes.
307 de_crcobj_addslice(d->crco_segdata, d->tmpdbuf, 0, BINSCII_DECODED_BYTES_PER_LINE);
309 amt_to_write = d->cur_md->orig_len - d->cur_md->nbytes_written;
310 amt_to_write = de_min_int(amt_to_write, BINSCII_DECODED_BYTES_PER_LINE);
311 dbuf_copy(d->tmpdbuf, 0, amt_to_write, d->cur_md->outf);
313 d->cur_seg.nbytes_processed += BINSCII_DECODED_BYTES_PER_LINE;
314 if(d->cur_seg.nbytes_processed >= d->cur_seg.segment_len) {
315 d->parse_state = BSC_READY_FOR_CRC;
318 d->cur_md->nbytes_written += amt_to_write;
320 done:
324 static void do_binscii_crc_line(deark *c, struct binscii_ctx *d)
326 u32 crc_reported, crc_calc;
328 if(!d->cur_md) goto done;
330 // For a CRC line, we expect linebuf_used==4.
331 if(d->linebuf_used<4 || d->linebuf_used>=BINSCII_ENCODED_BYTES_PER_LINE) {
332 binscii_set_generic_error(c, d);
333 goto done;
336 dbuf_empty(d->tmpdbuf);
337 binscii_decode(c, d, d->linebuf, 1, d->tmpdbuf);
338 crc_reported = (u32)dbuf_getu16le(d->tmpdbuf, 0);
339 de_dbg(c, "segment data crc (reported): 0x%04x", (UI)crc_reported);
341 crc_calc = de_crcobj_getval(d->crco_segdata);
342 de_dbg(c, "segment data crc (calculated): 0x%04x", (UI)crc_calc);
344 if(crc_calc!=crc_reported) {
345 de_err(c, "Data CRC check failed for segment at %"I64_FMT, d->cur_seg.pos);
346 d->errflag = 1;
347 goto done;
350 if(d->cur_md->nbytes_written >= d->cur_md->orig_len) {
351 binscii_close_cur_file(c, d);
354 done:
358 // Caller sets d->linebuf, d->linebuf_used
359 static void do_binscii_line(deark *c, struct binscii_ctx *d)
361 switch(d->parse_state) {
362 case BSC_NEUTRAL:
363 if(!de_memcmp(d->linebuf, g_binscii_seg_sig, 18)) {
364 de_zeromem(&d->cur_seg, sizeof(struct binscii_segment));
365 d->cur_seg.pos = d->pos;
366 de_crcobj_reset(d->crco_segdata);
367 de_dbg(c, "segment at %"I64_FMT, d->cur_seg.pos);
368 de_dbg_indent(c, 1);
369 d->parse_state = BSC_READY_FOR_HEADER1;
370 d->seg_count_total++;
372 break;
373 case BSC_READY_FOR_HEADER1:
374 do_binscii_header1(c, d);
375 d->parse_state = BSC_READY_FOR_HEADER2;
376 break;
377 case BSC_READY_FOR_HEADER2:
378 do_binscii_header2(c, d);
379 d->parse_state = BSC_READY_FOR_DATA;
380 break;
381 case BSC_READY_FOR_DATA:
382 do_binscii_data_line(c, d);
383 break;
384 case BSC_READY_FOR_CRC:
385 do_binscii_crc_line(c, d);
386 d->parse_state = BSC_NEUTRAL;
387 de_dbg_indent(c, -1);
388 break;
392 // Reads d->linebuf_used bytes.
393 // May modify d->linebuf_used, to delete leading whitespace.
394 static void binscii_read_line_to_linebuf(deark *c, struct binscii_ctx *d)
396 UI num_leading_junk_bytes = 0;
397 i64 new_linebuf_used;
398 i64 k;
400 de_zeromem(d->linebuf, BINSCII_LINE_MAXLEN);
401 de_read(d->linebuf, d->pos, d->linebuf_used);
403 for(k=0; k<d->linebuf_used; k++) {
404 if(d->linebuf[k]<=0x20) {
405 num_leading_junk_bytes++;
407 else {
408 break;
412 if(num_leading_junk_bytes==0) return;
413 new_linebuf_used = d->linebuf_used - (i64)num_leading_junk_bytes;
414 de_memmove((void*)&d->linebuf[0], (const void*)&d->linebuf[num_leading_junk_bytes],
415 new_linebuf_used);
416 d->linebuf_used = new_linebuf_used;
419 static void de_run_binscii(deark *c, de_module_params *mparams)
421 struct binscii_ctx *d = NULL;
422 int saved_indent_level;
424 de_dbg_indent_save(c, &saved_indent_level);
425 d = de_malloc(c, sizeof(struct binscii_ctx));
426 d->tmpdbuf = dbuf_create_membuf(c, 128, 0);
427 d->crco_segdata = de_crcobj_create(c, DE_CRCOBJ_CRC16_XMODEM);
428 d->crco_header = de_crcobj_create(c, DE_CRCOBJ_CRC16_XMODEM);
430 while(1) {
431 int ret;
432 i64 content_len, total_len;
434 if(d->errflag) goto done;
435 ret = dbuf_find_line(c->infile, d->pos, &content_len, &total_len);
436 if(!ret) goto done;
438 d->linebuf_used = (content_len<=BINSCII_LINE_MAXLEN) ? content_len : BINSCII_LINE_MAXLEN;
439 binscii_read_line_to_linebuf(c, d);
440 do_binscii_line(c, d);
441 d->pos += total_len;
444 done:
445 de_dbg_indent_restore(c, saved_indent_level);
446 if(d) {
447 binscii_close_cur_file(c, d);
448 dbuf_close(d->tmpdbuf);
449 de_crcobj_destroy(d->crco_header);
450 de_crcobj_destroy(d->crco_segdata);
451 if(d->need_errmsg) {
452 de_err(c, "Failed to decode file");
454 else if(d->seg_count_total==0 && !d->errflag) {
455 de_err(c, "No BinSCII data found");
457 de_free(c, d);
461 static int de_identify_binscii(deark *c)
463 int has_ext;
464 int ret;
465 i64 foundpos;
467 has_ext = de_input_file_has_ext(c, "bsc") ||
468 de_input_file_has_ext(c, "bsq");
469 if(!dbuf_memcmp(c->infile, 0, g_binscii_seg_sig, 18)) {
470 return has_ext?100:90;
473 if(!has_ext) return 0;
475 ret = dbuf_search(c->infile, g_binscii_seg_sig, 18, 0, 4096, &foundpos);
476 if(ret) {
477 // TODO? We could do better, by making sure the string starts at the
478 // beginning of a line, etc.
479 return 35;
481 return 0;
484 void de_module_binscii(deark *c, struct deark_module_info *mi)
486 mi->id = "binscii";
487 mi->desc = "BinSCII";
488 mi->run_fn = de_run_binscii;
489 mi->identify_fn = de_identify_binscii;