1 // This file is part of Deark.
2 // Copyright (C) 2023 Jason Summers
3 // See the file COPYING for terms of use.
5 // BinSCII (Apple II format)
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_binscii
);
10 #define BINSCII_LINE_MAXLEN 128
11 #define BINSCII_ENCODED_UNITS_PER_LINE 16
12 #define BINSCII_DECODED_BYTES_PER_LINE (BINSCII_ENCODED_UNITS_PER_LINE*3)
13 #define BINSCII_ENCODED_BYTES_PER_LINE (BINSCII_ENCODED_UNITS_PER_LINE*4)
14 static const u8
* g_binscii_seg_sig
= (const u8
*)"FiLeStArTfIlEsTaRt";
16 enum binscii_parse_state
{
18 BSC_READY_FOR_HEADER1
,
19 BSC_READY_FOR_HEADER2
,
24 struct binscii_segment
{
45 UI seg_count
; // Num segments encountered so far (maybe unused)
50 struct de_timestamp mod_time
;
51 struct de_timestamp create_time
;
55 struct binscii_md
*cur_md
;
56 enum binscii_parse_state parse_state
;
62 struct de_crcobj
*crco_header
;
63 struct de_crcobj
*crco_segdata
;
65 u8 linebuf
[BINSCII_LINE_MAXLEN
];
66 struct binscii_segment cur_seg
;
69 static void dbg_timestamp(deark
*c
, struct de_timestamp
*ts
, const char *name
)
71 char timestamp_buf
[64];
73 de_timestamp_to_string(ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
74 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
77 static void binscii_set_generic_error(deark
*c
, struct binscii_ctx
*d
)
79 if(d
->errflag
) return;
85 static void binscii_close_cur_file(deark
*c
, struct binscii_ctx
*d
)
87 struct binscii_md
*md
;
90 if(!d
->cur_md
) return;
91 de_dbg(c
, "closing file");
92 if(d
->cur_md
->orig_len
!= d
->cur_md
->nbytes_written
) {
93 binscii_set_generic_error(c
, d
);
96 ucstring_destroy(md
->fn
);
101 static struct binscii_md
*binscii_create_md(deark
*c
)
103 struct binscii_md
*md
;
105 md
= de_malloc(c
, sizeof(struct binscii_md
));
109 // Decode some encoded "units", from memory to a dbuf.
110 // Each unit is 4 bytes encoded, 3 bytes decoded.
111 static void binscii_decode(deark
*c
, struct binscii_ctx
*d
, const u8
*src
,
112 i64 num_units
, dbuf
*outf
)
117 for(i
=0; i
<num_units
; i
++) {
121 ib
[j
] = d
->cur_seg
.bmap
[(UI
)src
[i
*4+j
]];
123 dbuf_writebyte(outf
, (ib
[3]<<2)|(ib
[2]>>4));
124 dbuf_writebyte(outf
, ((ib
[2]&0x0f)<<4)|(ib
[1]>>2));
125 dbuf_writebyte(outf
, ((ib
[1]&0x03)<<6)|ib
[0]);
129 static void do_binscii_header1(deark
*c
, struct binscii_ctx
*d
)
133 // The "alphabet" line
134 for(i
=0; i
<64; i
++) {
135 d
->cur_seg
.bmap
[(UI
)d
->linebuf
[i
]] = (u8
)i
;
139 static void binscii_create_output_file(deark
*c
, struct binscii_ctx
*d
)
144 binscii_set_generic_error(c
, d
);
147 if(d
->cur_md
->outf
) {
148 binscii_set_generic_error(c
, d
);
152 fi
= de_finfo_create(c
);
154 de_finfo_set_name_from_ucstring(c
, fi
, d
->cur_md
->fn
, 0);
155 fi
->original_filename_flag
= 1;
157 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = d
->cur_md
->mod_time
;
158 fi
->timestamp
[DE_TIMESTAMPIDX_CREATE
] = d
->cur_md
->create_time
;
160 d
->cur_md
->outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
161 dbuf_enable_wbuffer(d
->cur_md
->outf
);
164 de_finfo_destroy(c
, fi
);
167 static void do_binscii_header2(deark
*c
, struct binscii_ctx
*d
)
172 struct binscii_segment
*seg
= &d
->cur_seg
;
173 de_ucstring
*fn
= NULL
;
175 // Some fields we process for all segments.
176 // Others we only process only for the first segment of a file
177 // (or we process them differently).
179 // TODO: Does the fn length use d->bmap, or is the coding fixed as
180 // 'A'=1, 'B'==2, ... ?
181 // (Some BinSCII decoders do it one way, some do it the other.)
182 if(d
->linebuf
[0]>=64+1 && d
->linebuf
[0]<=64+15) {
183 seg
->fn_len
= (i64
)d
->linebuf
[0] - 64;
186 binscii_set_generic_error(c
, d
);
189 fn
= ucstring_create(c
);
190 ucstring_append_bytes(fn
, &d
->linebuf
[1],
191 seg
->fn_len
, 0, DE_ENCODING_ASCII
);
192 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(fn
));
194 dbuf_empty(d
->tmpdbuf
);
195 binscii_decode(c
, d
, &d
->linebuf
[16], 9, d
->tmpdbuf
);
198 seg
->orig_len
= dbuf_getint_ext(d
->tmpdbuf
, pos
, 3, 1, 0);
199 de_dbg(c
, "orig len: %"I64_FMT
, seg
->orig_len
);
202 seg
->offset
= dbuf_getint_ext(d
->tmpdbuf
, pos
, 3, 1, 0);
203 de_dbg(c
, "seg offset: %"I64_FMT
, seg
->offset
);
206 is_first_seg
= (seg
->offset
==0);
209 // If we're already in the middle of a file, close it.
211 binscii_close_cur_file(c
, d
);
212 if(d
->errflag
) goto done
;
216 d
->cur_md
= binscii_create_md(c
);
220 binscii_set_generic_error(c
, d
);
224 // After this point, we can freely use both cur_md and cur_seg.
227 d
->cur_md
->orig_len
= seg
->orig_len
;
231 // TODO: Better decoding & use of file attributes
232 seg
->acmode
= dbuf_getbyte_p(d
->tmpdbuf
, &pos
);
233 de_dbg(c
, "access mode: 0x%02x", (UI
)seg
->acmode
);
234 seg
->filetype
= dbuf_getbyte_p(d
->tmpdbuf
, &pos
);
235 de_dbg(c
, "file type: 0x%02x", (UI
)seg
->filetype
);
236 seg
->auxtype
= (UI
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
237 de_dbg(c
, "aux file type: 0x%04x", (UI
)seg
->auxtype
);
238 seg
->storetype
= dbuf_getbyte_p(d
->tmpdbuf
, &pos
);
239 de_dbg(c
, "storage type: 0x%02x", (UI
)seg
->storetype
);
240 seg
->size_in_blocks
= dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
241 de_dbg(c
, "orig len in blocks: %"I64_FMT
, seg
->size_in_blocks
);
248 seg
->crdate_raw
= (UI
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
249 seg
->crtime_raw
= (UI
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
250 de_prodos_datetime_to_timestamp(&d
->cur_md
->create_time
, seg
->crdate_raw
, seg
->crtime_raw
);
251 dbg_timestamp(c
, &d
->cur_md
->create_time
, "create time");
252 seg
->moddate_raw
= (UI
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
253 seg
->modtime_raw
= (UI
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
254 de_prodos_datetime_to_timestamp(&d
->cur_md
->mod_time
, seg
->moddate_raw
, seg
->modtime_raw
);
255 dbg_timestamp(c
, &d
->cur_md
->mod_time
, "mod time");
261 seg
->segment_len
= dbuf_getint_ext(d
->tmpdbuf
, pos
, 3, 1, 0);
262 de_dbg(c
, "seg len: %"I64_FMT
, seg
->segment_len
);
265 seg
->hdr_crc_reported
= (u32
)dbuf_getu16le_p(d
->tmpdbuf
, &pos
);
266 de_dbg(c
, "header crc (reported): 0x%04x", (UI
)seg
->hdr_crc_reported
);
267 de_crcobj_reset(d
->crco_header
);
268 de_crcobj_addslice(d
->crco_header
, d
->tmpdbuf
, 0, 24);
269 hdr_crc_calc
= de_crcobj_getval(d
->crco_header
);
270 de_dbg(c
, "header crc (calculated): 0x%04x", (UI
)hdr_crc_calc
);
272 if(hdr_crc_calc
!=seg
->hdr_crc_reported
) {
273 de_err(c
, "Header CRC check failed for segment at %"I64_FMT
, d
->cur_seg
.pos
);
278 if(seg
->offset
!= d
->cur_md
->nbytes_written
) {
279 binscii_set_generic_error(c
, d
);
285 d
->cur_md
->fn
= ucstring_clone(fn
);
287 if(!d
->cur_md
->outf
) {
288 binscii_create_output_file(c
, d
);
292 d
->cur_md
->seg_count
++;
294 ucstring_destroy(fn
);
297 static void do_binscii_data_line(deark
*c
, struct binscii_ctx
*d
)
301 if(!d
->cur_md
|| !d
->cur_md
->outf
) goto done
;
303 dbuf_empty(d
->tmpdbuf
);
304 binscii_decode(c
, d
, d
->linebuf
, BINSCII_ENCODED_UNITS_PER_LINE
, d
->tmpdbuf
);
306 // CRC calculation includes padding bytes.
307 de_crcobj_addslice(d
->crco_segdata
, d
->tmpdbuf
, 0, BINSCII_DECODED_BYTES_PER_LINE
);
309 amt_to_write
= d
->cur_md
->orig_len
- d
->cur_md
->nbytes_written
;
310 amt_to_write
= de_min_int(amt_to_write
, BINSCII_DECODED_BYTES_PER_LINE
);
311 dbuf_copy(d
->tmpdbuf
, 0, amt_to_write
, d
->cur_md
->outf
);
313 d
->cur_seg
.nbytes_processed
+= BINSCII_DECODED_BYTES_PER_LINE
;
314 if(d
->cur_seg
.nbytes_processed
>= d
->cur_seg
.segment_len
) {
315 d
->parse_state
= BSC_READY_FOR_CRC
;
318 d
->cur_md
->nbytes_written
+= amt_to_write
;
324 static void do_binscii_crc_line(deark
*c
, struct binscii_ctx
*d
)
326 u32 crc_reported
, crc_calc
;
328 if(!d
->cur_md
) goto done
;
330 // For a CRC line, we expect linebuf_used==4.
331 if(d
->linebuf_used
<4 || d
->linebuf_used
>=BINSCII_ENCODED_BYTES_PER_LINE
) {
332 binscii_set_generic_error(c
, d
);
336 dbuf_empty(d
->tmpdbuf
);
337 binscii_decode(c
, d
, d
->linebuf
, 1, d
->tmpdbuf
);
338 crc_reported
= (u32
)dbuf_getu16le(d
->tmpdbuf
, 0);
339 de_dbg(c
, "segment data crc (reported): 0x%04x", (UI
)crc_reported
);
341 crc_calc
= de_crcobj_getval(d
->crco_segdata
);
342 de_dbg(c
, "segment data crc (calculated): 0x%04x", (UI
)crc_calc
);
344 if(crc_calc
!=crc_reported
) {
345 de_err(c
, "Data CRC check failed for segment at %"I64_FMT
, d
->cur_seg
.pos
);
350 if(d
->cur_md
->nbytes_written
>= d
->cur_md
->orig_len
) {
351 binscii_close_cur_file(c
, d
);
358 // Caller sets d->linebuf, d->linebuf_used
359 static void do_binscii_line(deark
*c
, struct binscii_ctx
*d
)
361 switch(d
->parse_state
) {
363 if(!de_memcmp(d
->linebuf
, g_binscii_seg_sig
, 18)) {
364 de_zeromem(&d
->cur_seg
, sizeof(struct binscii_segment
));
365 d
->cur_seg
.pos
= d
->pos
;
366 de_crcobj_reset(d
->crco_segdata
);
367 de_dbg(c
, "segment at %"I64_FMT
, d
->cur_seg
.pos
);
369 d
->parse_state
= BSC_READY_FOR_HEADER1
;
370 d
->seg_count_total
++;
373 case BSC_READY_FOR_HEADER1
:
374 do_binscii_header1(c
, d
);
375 d
->parse_state
= BSC_READY_FOR_HEADER2
;
377 case BSC_READY_FOR_HEADER2
:
378 do_binscii_header2(c
, d
);
379 d
->parse_state
= BSC_READY_FOR_DATA
;
381 case BSC_READY_FOR_DATA
:
382 do_binscii_data_line(c
, d
);
384 case BSC_READY_FOR_CRC
:
385 do_binscii_crc_line(c
, d
);
386 d
->parse_state
= BSC_NEUTRAL
;
387 de_dbg_indent(c
, -1);
392 // Reads d->linebuf_used bytes.
393 // May modify d->linebuf_used, to delete leading whitespace.
394 static void binscii_read_line_to_linebuf(deark
*c
, struct binscii_ctx
*d
)
396 UI num_leading_junk_bytes
= 0;
397 i64 new_linebuf_used
;
400 de_zeromem(d
->linebuf
, BINSCII_LINE_MAXLEN
);
401 de_read(d
->linebuf
, d
->pos
, d
->linebuf_used
);
403 for(k
=0; k
<d
->linebuf_used
; k
++) {
404 if(d
->linebuf
[k
]<=0x20) {
405 num_leading_junk_bytes
++;
412 if(num_leading_junk_bytes
==0) return;
413 new_linebuf_used
= d
->linebuf_used
- (i64
)num_leading_junk_bytes
;
414 de_memmove((void*)&d
->linebuf
[0], (const void*)&d
->linebuf
[num_leading_junk_bytes
],
416 d
->linebuf_used
= new_linebuf_used
;
419 static void de_run_binscii(deark
*c
, de_module_params
*mparams
)
421 struct binscii_ctx
*d
= NULL
;
422 int saved_indent_level
;
424 de_dbg_indent_save(c
, &saved_indent_level
);
425 d
= de_malloc(c
, sizeof(struct binscii_ctx
));
426 d
->tmpdbuf
= dbuf_create_membuf(c
, 128, 0);
427 d
->crco_segdata
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_XMODEM
);
428 d
->crco_header
= de_crcobj_create(c
, DE_CRCOBJ_CRC16_XMODEM
);
432 i64 content_len
, total_len
;
434 if(d
->errflag
) goto done
;
435 ret
= dbuf_find_line(c
->infile
, d
->pos
, &content_len
, &total_len
);
438 d
->linebuf_used
= (content_len
<=BINSCII_LINE_MAXLEN
) ? content_len
: BINSCII_LINE_MAXLEN
;
439 binscii_read_line_to_linebuf(c
, d
);
440 do_binscii_line(c
, d
);
445 de_dbg_indent_restore(c
, saved_indent_level
);
447 binscii_close_cur_file(c
, d
);
448 dbuf_close(d
->tmpdbuf
);
449 de_crcobj_destroy(d
->crco_header
);
450 de_crcobj_destroy(d
->crco_segdata
);
452 de_err(c
, "Failed to decode file");
454 else if(d
->seg_count_total
==0 && !d
->errflag
) {
455 de_err(c
, "No BinSCII data found");
461 static int de_identify_binscii(deark
*c
)
467 has_ext
= de_input_file_has_ext(c
, "bsc") ||
468 de_input_file_has_ext(c
, "bsq");
469 if(!dbuf_memcmp(c
->infile
, 0, g_binscii_seg_sig
, 18)) {
470 return has_ext
?100:90;
473 if(!has_ext
) return 0;
475 ret
= dbuf_search(c
->infile
, g_binscii_seg_sig
, 18, 0, 4096, &foundpos
);
477 // TODO? We could do better, by making sure the string starts at the
478 // beginning of a line, etc.
484 void de_module_binscii(deark
*c
, struct deark_module_info
*mi
)
487 mi
->desc
= "BinSCII";
488 mi
->run_fn
= de_run_binscii
;
489 mi
->identify_fn
= de_identify_binscii
;