zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / bintext.c
blobc8689a715515572a87b0e5d7d6fb9407167e5a2e
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // XBIN character graphics
6 // "Binary Text" character graphics
7 // ArtWorx ADF character graphics
9 #include <deark-config.h>
10 #include <deark-private.h>
11 #include <deark-fmtutil.h>
12 DE_DECLARE_MODULE(de_module_xbin);
13 DE_DECLARE_MODULE(de_module_bintext);
14 DE_DECLARE_MODULE(de_module_artworx_adf);
15 DE_DECLARE_MODULE(de_module_icedraw);
17 typedef struct localctx_struct {
18 i64 width_in_chars, height_in_chars;
19 i64 font_height;
20 u8 has_palette, has_font, compression, nonblink, has_512chars;
22 i64 font_data_len;
23 u8 *font_data;
24 int is_standard_font;
25 struct de_bitmap_font *font;
26 } lctx;
28 static void do_bin_main(deark *c, lctx *d, dbuf *unc_data, struct de_char_context *charctx)
30 i64 i, j;
31 u8 ccode, acode;
32 u8 fgcol, bgcol;
33 struct de_char_screen *screen;
34 struct de_encconv_state es;
36 charctx->nscreens = 1;
37 charctx->screens = de_mallocarray(c, charctx->nscreens, sizeof(struct de_char_screen*));
38 charctx->screens[0] = de_malloc(c, sizeof(struct de_char_screen));
39 screen = charctx->screens[0];
40 screen->width = d->width_in_chars;
41 screen->height = d->height_in_chars;
42 screen->cell_rows = de_mallocarray(c, d->height_in_chars, sizeof(struct de_char_cell*));
43 de_encconv_init(&es, DE_ENCODING_CP437_G);
45 for(j=0; j<d->height_in_chars; j++) {
46 screen->cell_rows[j] = de_mallocarray(c, d->width_in_chars, sizeof(struct de_char_cell));
48 for(i=0; i<d->width_in_chars; i++) {
49 ccode = dbuf_getbyte(unc_data, j*d->width_in_chars*2 + i*2);
50 acode = dbuf_getbyte(unc_data, j*d->width_in_chars*2 + i*2 + 1);
52 if((acode&0x80) && !d->nonblink) {
53 screen->cell_rows[j][i].blink = 1;
54 acode -= 0x80;
57 fgcol = (acode & 0x0f);
58 bgcol = (acode & 0xf0) >> 4;
60 screen->cell_rows[j][i].fgcol = (u32)fgcol;
61 screen->cell_rows[j][i].bgcol = (u32)bgcol;
62 screen->cell_rows[j][i].codepoint = (i32)ccode;
63 screen->cell_rows[j][i].codepoint_unicode = de_char_to_unicode_ex((i32)ccode, &es);
67 de_char_output_to_file(c, charctx);
70 static void do_uncompress_data(deark *c, lctx *d, i64 pos1, dbuf *unc_data)
72 i64 pos;
73 u8 cmprtype;
74 i64 count;
75 i64 xpos, ypos;
76 u8 b;
77 u8 b1, b2;
78 i64 k;
80 pos = pos1;
82 xpos = 0; ypos = 0;
84 while(pos < c->infile->len) {
85 if(xpos >= d->width_in_chars) {
86 ypos++;
87 xpos = 0;
89 if(ypos >= d->height_in_chars) {
90 break;
93 b = de_getbyte(pos);
94 pos++;
95 cmprtype = b>>6;
96 count = (i64)(b&0x3f) +1;
98 switch(cmprtype) {
99 case 0: // Uncompressed
100 dbuf_copy(c->infile, pos, count*2, unc_data);
101 pos += count*2;
102 break;
103 case 1: // Character compression
104 b1 = de_getbyte(pos++); // character code
105 for(k=0; k<count; k++) {
106 b2 = de_getbyte(pos++); // attribute code
107 dbuf_writebyte(unc_data, b1);
108 dbuf_writebyte(unc_data, b2);
110 break;
111 case 2: // Attribute compression
112 b2 = de_getbyte(pos++); // attribute code
113 for(k=0; k<count; k++) {
114 b1 = de_getbyte(pos++); // character code
115 dbuf_writebyte(unc_data, b1);
116 dbuf_writebyte(unc_data, b2);
118 break;
119 case 3: // Character/Attribute compression
120 b1 = de_getbyte(pos++); // character code
121 b2 = de_getbyte(pos++); // attribute code
122 for(k=0; k<count; k++) {
123 dbuf_writebyte(unc_data, b1);
124 dbuf_writebyte(unc_data, b2);
126 break;
129 xpos += count;
133 static void do_read_palette(deark *c, lctx *d,struct de_char_context *charctx,
134 i64 pos, int adf_style)
136 i64 k;
137 u8 cr1, cg1, cb1;
138 u8 cr2, cg2, cb2;
139 i64 cpos;
140 char tmps[64];
142 de_dbg(c, "palette at %d", (int)pos);
144 for(k=0; k<16; k++) {
145 i64 idx = k;
147 if(adf_style) {
148 if(k>=8) idx = 48+k;
149 else if(k==6) idx = 20;
151 cpos = pos + idx*3;
152 cr1 = de_getbyte(cpos);
153 cg1 = de_getbyte(cpos+1);
154 cb1 = de_getbyte(cpos+2);
155 cr2 = de_scale_63_to_255(cr1);
156 cg2 = de_scale_63_to_255(cg1);
157 cb2 = de_scale_63_to_255(cb1);
158 charctx->pal[k] = DE_MAKE_RGB(cr2, cg2, cb2);
159 de_snprintf(tmps, sizeof(tmps), "(%2d,%2d,%2d) "DE_CHAR_RIGHTARROW" ",
160 (int)cr1, (int)cg1, (int)cb1);
161 de_dbg_pal_entry2(c, k, charctx->pal[k], tmps, NULL, NULL);
165 static void do_default_palette(deark *c, lctx *d, struct de_char_context *charctx)
167 int k;
169 for(k=0; k<16; k++) {
170 charctx->pal[k] = de_palette_pc16(k);
174 static void do_extract_font(deark *c, lctx *d)
176 de_finfo *fi = NULL;
178 if(!d->has_font || !d->font) return;
179 fi = de_finfo_create(c);
180 de_finfo_set_name_from_sz(c, fi, "font", 0, DE_ENCODING_ASCII);
182 de_font_bitmap_font_to_image(c, d->font, fi, DE_CREATEFLAG_IS_AUX);
184 de_finfo_destroy(c, fi);
187 static void do_read_font_data(deark *c, lctx *d, i64 pos)
189 u32 crc;
190 struct de_crcobj *crco;
192 de_dbg(c, "font at %d, %d bytes", (int)pos, (int)d->font_data_len);
193 de_dbg_indent(c, 1);
194 d->font_data = de_malloc(c, d->font_data_len);
195 de_read(d->font_data, pos, d->font_data_len);
197 crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
198 de_crcobj_addbuf(crco, d->font_data, d->font_data_len);
199 crc = de_crcobj_getval(crco);
200 de_crcobj_destroy(crco);
202 d->is_standard_font = de_font_is_standard_vga_font(c, crc);
203 de_dbg(c, "font crc: 0x%08x (%s)", (unsigned int)crc,
204 d->is_standard_font?"known CP437 font":"unrecognized");
206 if(de_get_ext_option(c, "font:dumpvgafont")) {
207 dbuf *df;
208 df = dbuf_create_output_file(c, "font.dat", NULL, DE_CREATEFLAG_IS_AUX);
209 dbuf_write(df, d->font_data, d->font_data_len);
210 dbuf_close(df);
212 de_dbg_indent(c, -1);
215 // Finish populating the d->font struct.
216 static int do_generate_font(deark *c, lctx *d)
218 i64 i;
219 struct de_encconv_state es;
221 if(!d->font) return 0;
222 if(d->font->num_chars!=256) {
223 de_err(c, "Only 256-character fonts are supported");
224 return 0;
226 if(d->font_data_len!=d->font->num_chars*d->font_height) {
227 de_err(c, "Incorrect font data size");
228 return 0;
230 d->font->nominal_width = 8;
231 d->font->nominal_height = (int)d->font_height;
232 d->font->char_array = de_mallocarray(c, d->font->num_chars, sizeof(struct de_bitmap_font_char));
233 de_encconv_init(&es, DE_ENCODING_CP437_G);
235 for(i=0; i<d->font->num_chars; i++) {
236 d->font->char_array[i].codepoint_nonunicode = (i32)i;
237 d->font->char_array[i].codepoint_unicode = de_char_to_unicode_ex((i32)i, &es);
238 d->font->char_array[i].width = d->font->nominal_width;
239 d->font->char_array[i].height = d->font->nominal_height;
240 d->font->char_array[i].rowspan = 1;
241 d->font->char_array[i].bitmap = &d->font_data[i*d->font_height];
244 return 1;
247 static void free_lctx(deark *c, lctx *d)
249 if(d->font) {
250 de_free(c, d->font->char_array);
251 de_destroy_bitmap_font(c, d->font);
253 de_free(c, d->font_data);
254 de_free(c, d);
257 static void de_run_xbin(deark *c, de_module_params *mparams)
259 lctx *d = NULL;
260 struct de_char_context *charctx = NULL;
261 struct de_SAUCE_detection_data sdd;
262 struct de_SAUCE_info *si = NULL;
263 i64 pos = 0;
264 u8 flags;
265 dbuf *unc_data = NULL;
267 d = de_malloc(c, sizeof(lctx));
269 charctx = de_create_charctx(c, 0);
270 charctx->prefer_image_output = 1;
271 de_char_decide_output_format(c, charctx);
273 fmtutil_detect_SAUCE(c, c->infile, &sdd, 0x1);
274 if(sdd.has_SAUCE) {
275 si = fmtutil_create_SAUCE(c);
277 de_dbg_indent(c, 1);
278 fmtutil_handle_SAUCE(c, c->infile, si);
279 de_dbg_indent(c, -1);
281 charctx->title = si->title;
282 charctx->artist = si->artist;
283 charctx->organization = si->organization;
284 charctx->creation_date = si->creation_date;
285 charctx->comment = si->comment;
288 d->width_in_chars = de_getu16le(5);
289 d->height_in_chars = de_getu16le(7);
290 d->font_height = (i64)de_getbyte(9);
292 flags = de_getbyte(10);
293 de_dbg(c, "dimensions: %d"DE_CHAR_TIMES"%d characters", (int)d->width_in_chars, (int)d->height_in_chars);
294 de_dbg(c, "font height: %d", (int)d->font_height);
295 de_dbg(c, "flags: 0x%02x", (unsigned int)flags);
296 d->has_palette = (flags&0x01)?1:0;
297 d->has_font = (flags&0x02)?1:0;
298 d->compression = (flags&0x04)?1:0;
299 d->nonblink = (flags&0x08)?1:0;
300 d->has_512chars = (flags&0x10)?1:0;
301 de_dbg(c, " has palette: %d", (int)d->has_palette);
302 de_dbg(c, " has font: %d", (int)d->has_font);
303 de_dbg(c, " compression: %d", (int)d->compression);
304 de_dbg(c, " non-blink mode: %d", (int)d->nonblink);
305 de_dbg(c, " 512 character mode: %d", (int)d->has_512chars);
307 if(d->has_font && (d->font_height<1 || d->font_height>32)) {
308 de_err(c, "Invalid font height: %d", (int)d->font_height);
309 goto done;
311 pos = 11;
313 if(d->has_palette) {
314 do_read_palette(c, d, charctx, pos, 0);
315 pos += 48;
317 else {
318 de_dbg(c, "using default palette");
319 do_default_palette(c, d, charctx);
322 if(d->has_font) {
323 d->font = de_create_bitmap_font(c);
324 d->font->has_nonunicode_codepoints = 1;
325 d->font->has_unicode_codepoints = 1;
326 d->font->prefer_unicode = 0;
327 d->font->num_chars = d->has_512chars ? 512 : 256;
328 d->font_data_len = d->font->num_chars * d->font_height;
329 if(d->font->num_chars!=256) {
330 de_err(c, "%d-character mode is not supported", (int)d->font->num_chars);
331 goto done;
334 do_read_font_data(c, d, pos);
335 pos += d->font_data_len;
337 if(d->is_standard_font) {
338 charctx->suppress_custom_font_warning = 1;
341 if(!do_generate_font(c, d)) goto done;
343 if(c->extract_level>=2) {
344 do_extract_font(c, d);
347 charctx->font = d->font;
349 else {
350 // Use default font
352 if(d->has_512chars) {
353 de_err(c, "This type of XBIN file is not supported.");
354 goto done;
357 if(d->font_height==0) {
358 // Not really legal, but we'll let it mean "default".
360 else if(d->font_height!=16) {
361 if(charctx->outfmt==1) { // image output
362 de_warn(c, "Incompatible font height (%d), using 16 instead.", (int)d->font_height);
365 d->font_height = 16;
368 de_dbg(c, "image data at %d", (int)pos);
370 if(d->compression) {
371 unc_data = dbuf_create_membuf(c, d->width_in_chars * d->height_in_chars * 2, 1);
372 do_uncompress_data(c, d, pos, unc_data);
374 else {
375 unc_data = dbuf_open_input_subfile(c->infile, pos, c->infile->len-pos);
377 do_bin_main(c, d, unc_data, charctx);
379 done:
380 dbuf_close(unc_data);
381 de_free_charctx_screens(c, charctx);
382 de_destroy_charctx(c, charctx);
383 fmtutil_free_SAUCE(c, si);
384 free_lctx(c, d);
387 static int de_identify_xbin(deark *c)
389 if(!dbuf_memcmp(c->infile, 0, "XBIN\x1a", 5))
390 return 100;
391 return 0;
394 static void de_help_xbin(deark *c)
396 de_msg(c, "-opt char:output=html : Write HTML instead of an image file");
397 de_msg(c, "-opt char:charwidth=<8|9> : Width of a character cell");
400 void de_module_xbin(deark *c, struct deark_module_info *mi)
402 mi->id = "xbin";
403 mi->desc = "XBIN character graphics";
404 mi->run_fn = de_run_xbin;
405 mi->identify_fn = de_identify_xbin;
406 mi->help_fn = de_help_xbin;
409 ////////////////////// Binary Text //////////////////////
411 static void de_run_bintext(deark *c, de_module_params *mparams)
413 lctx *d = NULL;
414 struct de_char_context *charctx = NULL;
415 struct de_SAUCE_detection_data sdd;
416 struct de_SAUCE_info *si = NULL;
417 dbuf *unc_data = NULL;
418 i64 effective_file_size = 0;
419 int valid_sauce = 0;
420 const char *s;
421 i64 width_req = 0;
423 d = de_malloc(c, sizeof(lctx));
425 charctx = de_malloc(c, sizeof(struct de_char_context));
426 charctx->prefer_image_output = 0;
428 s=de_get_ext_option(c, "char:width");
429 if(s) {
430 width_req = de_atoi(s);
433 fmtutil_detect_SAUCE(c, c->infile, &sdd, 0x1);
434 if(sdd.has_SAUCE) {
435 si = fmtutil_create_SAUCE(c);
437 de_dbg_indent(c, 1);
438 fmtutil_handle_SAUCE(c, c->infile, si);
439 de_dbg_indent(c, -1);
441 charctx->title = si->title;
442 charctx->artist = si->artist;
443 charctx->organization = si->organization;
444 charctx->creation_date = si->creation_date;
445 charctx->comment = si->comment;
447 effective_file_size = si->original_file_size;
449 if(si->data_type==5) {
450 valid_sauce = 1;
452 if(si->file_type==1 && si->tinfo1>0) {
453 // Some files created by ACiDDraw do this.
454 d->width_in_chars = 2*(i64)si->tinfo1;
456 else {
457 // For BinText, the FileType field is inexplicably used for the width (usually).
458 d->width_in_chars = 2*(i64)si->file_type;
461 if(si->tflags & 0x01) {
462 d->nonblink = 1;
464 if((si->tflags & 0x18)>>3 == 0x02) {
465 // Square pixels requested
466 charctx->no_density = 1;
468 if((si->tflags & 0x06)>>1 == 0x02) {
469 charctx->prefer_9col_mode = 1;
474 if(!valid_sauce) {
475 d->width_in_chars = 160;
476 effective_file_size = c->infile->len;
479 if(width_req>0) d->width_in_chars = width_req;
481 if(d->width_in_chars<1) d->width_in_chars=160;
482 if(effective_file_size%(d->width_in_chars*2)) {
483 de_warn(c, "File does not contain a whole number of rows. The width may "
484 "be wrong. Try \"-opt char:width=...\".");
486 d->height_in_chars = effective_file_size / (d->width_in_chars*2);
488 de_dbg(c, "width: %d chars", (int)d->width_in_chars);
489 de_dbg(c, "calculated height: %d chars", (int)d->height_in_chars);
490 d->has_palette = 1;
491 d->has_font = 1;
492 d->compression = 0;
493 d->has_512chars = 0;
495 do_default_palette(c, d, charctx);
497 unc_data = dbuf_open_input_subfile(c->infile, 0, effective_file_size);
498 do_bin_main(c, d, unc_data, charctx);
500 dbuf_close(unc_data);
501 de_free_charctx(c, charctx);
502 fmtutil_free_SAUCE(c, si);
503 free_lctx(c, d);
506 static int de_identify_bintext(deark *c)
508 if(!c->detection_data->SAUCE_detection_attempted) {
509 // FIXME?: This is known to happen if "-disablemods sauce" was used.
510 de_err(c, "bintext detection requires sauce module");
511 return 0;
513 if(c->detection_data->sauce.has_SAUCE) {
514 if(c->detection_data->sauce.data_type==5)
516 return 100;
519 return 0;
522 static void de_help_bintext(deark *c)
524 de_msg(c, "-opt char:output=image : Write an image file instead of HTML");
525 de_msg(c, " -opt char:charwidth=<8|9> : Width of a character cell");
526 de_msg(c, "-opt char:width=<n> : Number of characters per row");
529 void de_module_bintext(deark *c, struct deark_module_info *mi)
531 mi->id = "bintext";
532 mi->desc = "Binary Text character graphics";
533 mi->run_fn = de_run_bintext;
534 mi->identify_fn = de_identify_bintext;
535 mi->help_fn = de_help_bintext;
538 ////////////////////// ArtWorx Data Format (ADF) //////////////////////
540 static void de_run_artworx_adf(deark *c, de_module_params *mparams)
542 lctx *d = NULL;
543 struct de_char_context *charctx = NULL;
544 dbuf *unc_data = NULL;
545 i64 data_start;
546 i64 data_len;
548 d = de_malloc(c, sizeof(lctx));
550 // TODO: ADF files can probably have SAUCE records, so we should read
551 // the SAUCE data if present. But there does not seem to be a defined
552 // SAUCE file type for ADF.
554 charctx = de_malloc(c, sizeof(struct de_char_context));
555 charctx->prefer_image_output = 1;
557 data_start = 1+192+4096;
558 data_len = c->infile->len - data_start;
559 if(data_len<0) goto done;
561 d->width_in_chars = 80;
562 d->height_in_chars = data_len / (d->width_in_chars*2);
564 de_dbg(c, "guessed width: %d chars", (int)d->width_in_chars);
565 de_dbg(c, "calculated height: %d chars", (int)d->height_in_chars);
566 if(d->height_in_chars<1) goto done;
567 d->has_palette = 0;
568 d->has_font = 1;
569 d->compression = 0;
570 d->has_512chars = 0;
571 d->nonblink = 1;
573 do_read_palette(c, d, charctx, 1, 1);
576 // TODO: This duplicates a lot of the xbin code.
578 d->font = de_create_bitmap_font(c);
579 d->font->has_nonunicode_codepoints = 1;
580 d->font->has_unicode_codepoints = 1;
581 d->font->prefer_unicode = 0;
582 d->font->num_chars = 256;
583 d->font_height = 16;
584 d->font_data_len = d->font->num_chars * d->font_height;
586 do_read_font_data(c, d, 1+192);
588 if(d->is_standard_font) {
589 charctx->suppress_custom_font_warning = 1;
592 if(!do_generate_font(c, d)) goto done;
594 if(c->extract_level>=2) {
595 do_extract_font(c, d);
598 charctx->font = d->font;
601 unc_data = dbuf_open_input_subfile(c->infile, data_start, data_len);
602 do_bin_main(c, d, unc_data, charctx);
604 done:
605 dbuf_close(unc_data);
606 de_free_charctx(c, charctx);
607 free_lctx(c, d);
610 static int de_identify_artworx_adf(deark *c)
612 u8 ver;
614 // TODO: This detection algorithm will fail if there is a SAUCE record.
616 if(c->infile->len < 1+192+4096+160) {
617 return 0;
619 if((c->infile->len - (1+192+4096))%160 != 0) {
620 return 0;
622 if(!de_input_file_has_ext(c, "adf")) return 0;
623 ver = de_getbyte(0);
624 // I don't know what version numbers are allowed, but I'll assume the
625 // version number should be small.
626 if(ver>4) return 0;
627 return 75;
630 static void de_help_artworx_adf(deark *c)
632 de_msg(c, "-opt char:output=html : Write HTML instead of an image file");
633 de_msg(c, "-opt char:charwidth=<8|9> : Width of a character cell");
634 de_msg(c, "-opt char:width=<n> : Number of characters per row");
637 void de_module_artworx_adf(deark *c, struct deark_module_info *mi)
639 mi->id = "artworx_adf";
640 mi->desc = "ArtWorx Data Format (ADF)";
641 mi->run_fn = de_run_artworx_adf;
642 mi->identify_fn = de_identify_artworx_adf;
643 mi->help_fn = de_help_artworx_adf;
646 ////////////////////// iCEDraw format (.idf) //////////////////////
648 // This module is not yet implemented. This stub exists because it seemed
649 // like the simplest way to accomplish multiple goals:
650 // * Avoid having iCEDraw mis-identified as ANSI Art.
651 // * Avoid an error message from the SAUCE module implying that ANSI
652 // Art is not a supported format.
653 // * Print debugging info about the SAUCE record, if present.
654 // * Print the same error message whether or not a SAUCE record is present.
656 static void de_run_icedraw(deark *c, de_module_params *mparams)
658 struct de_SAUCE_detection_data sdd;
660 fmtutil_detect_SAUCE(c, c->infile, &sdd, 0x1);
661 if(sdd.has_SAUCE) {
662 // Read the SAUCE record if present, just for the debugging info.
663 struct de_SAUCE_info *si = NULL;
664 si = fmtutil_create_SAUCE(c);
666 de_dbg_indent(c, 1);
667 fmtutil_handle_SAUCE(c, c->infile, si);
668 de_dbg_indent(c, -1);
670 fmtutil_free_SAUCE(c, si);
673 de_err(c, "iCEDraw format is not supported");
676 static int de_identify_icedraw(deark *c)
678 if(!dbuf_memcmp(c->infile, 0, "\x04\x31\x2e\x34", 4)) {
679 return 100;
681 return 0;
684 void de_module_icedraw(deark *c, struct deark_module_info *mi)
686 mi->id = "icedraw";
687 mi->desc = "iCEDraw character graphics format";
688 mi->run_fn = de_run_icedraw;
689 mi->identify_fn = de_identify_icedraw;
690 mi->flags |= DE_MODFLAG_NONWORKING;