Minor refactoring of the IFF and box-format parsers
[deark.git] / modules / pkfont.c
blobaa12f2115f1c8cba591c6192c7aeaf97febdefcd
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // PK font ("packed font")
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_pkfont);
11 #define PK_XXX1 240
12 #define PK_XXX2 241
13 #define PK_XXX3 242
14 #define PK_XXX4 243
15 #define PK_YYY 244
17 struct page_ctx {
18 i32 cc;
19 int w, h;
20 i64 tfm;
21 i64 dm;
22 i64 hoff, voff;
23 i64 dyn_f;
24 int start_with_black;
25 i64 raster_pos;
26 i64 raster_len;
28 i64 curpos_x, curpos_y;
29 i64 pixelcount;
32 typedef struct localctx_struct {
33 struct de_bitmap_font *font;
34 i64 char_array_alloc;
35 } lctx;
37 static void do_preamble(deark *c, lctx *d, i64 pos, i64 *bytesused)
39 i64 comment_len;
41 de_dbg(c, "preamble at %d", (int)pos);
42 de_dbg_indent(c, 1);
44 // (identification byte (should be 89) is at pos+1)
46 comment_len = (i64)de_getbyte(pos+2);
47 de_dbg(c, "comment length: %d", (int)comment_len);
49 *bytesused = 3+comment_len+16;
50 de_dbg_indent(c, -1);
53 static i64 do_getu24be(dbuf *f, i64 pos)
55 return dbuf_getint_ext(f, pos, 3, 0, 0);
58 static u8 get_nybble(dbuf *f, i64 abs_byte_pos, i64 nybble_offs)
60 u8 b;
61 b = dbuf_getbyte(f, abs_byte_pos + nybble_offs/2);
62 if(nybble_offs%2) {
63 return b&0x0f;
65 return b>>4;
68 static int get_packed_int(dbuf *f, i64 raster_pos, i64 *nybble_pos,
69 i64 initial_zero_count, i64 *result)
71 u8 v = 0;
72 i64 zero_count = initial_zero_count;
73 i64 val;
74 i64 i;
76 while(1) {
77 v = get_nybble(f, raster_pos, *nybble_pos);
78 (*nybble_pos)++;
80 if(v==0) {
81 zero_count++;
82 if(zero_count>16) { // Sanity check
83 de_err(f->c, "Bad packed int at %d", (int)raster_pos);
84 *result = 0;
85 return 0;
88 else {
89 break;
93 val = (i64)v;
94 // There are zero_count+1 data nybbles, but we've already read the first one,
95 // so we need to read zero_count more of them.
96 for(i=0; i<zero_count; i++) {
97 val = (val<<4) | get_nybble(f, raster_pos, *nybble_pos);
98 (*nybble_pos)++;
101 *result = val;
102 return 1;
105 static void set_bit_at_cur_pos(struct de_bitmap_font_char *ch, struct page_ctx *pg)
107 i64 bytepos;
108 i64 bitpos;
110 if(pg->curpos_x<0 || pg->curpos_x>=pg->w) return;
111 if(pg->curpos_y<0 || pg->curpos_y>=pg->h) return;
113 bytepos = pg->curpos_y*ch->rowspan + pg->curpos_x/8;
114 bitpos = pg->curpos_x%8;
115 ch->bitmap[bytepos] |= 1<<(7-bitpos);
118 // Copy row number pg->curpos_y-1 zero more more times, updating
119 // pg->curpos_y as appropriate.
120 static void repeat_row_as_needed(struct de_bitmap_font_char *ch, struct page_ctx *pg, i64 repeat_count)
122 i64 z;
123 i64 from_row, to_row;
125 from_row = pg->curpos_y-1;
126 if(from_row<0) return;
128 for(z=0; z<repeat_count; z++) {
129 to_row = pg->curpos_y;
130 if(to_row>=pg->h) return;
131 de_memcpy(&ch->bitmap[to_row*ch->rowspan], &ch->bitmap[from_row*ch->rowspan], (size_t)ch->rowspan);
132 pg->curpos_y++;
133 pg->pixelcount += pg->w;
137 static void do_read_raster(deark *c, lctx *d, struct page_ctx *pg)
139 i64 char_idx;
140 struct de_bitmap_font_char *ch;
141 u8 v, v1;
142 i64 nybble_pos;
143 i64 expected_num_pixels;
144 i64 j;
145 i64 k;
146 int parity;
147 int next_num_is_repeat_count;
148 i64 number;
149 i64 run_count;
150 i64 repeat_count;
152 de_dbg(c, "%scompressed character raster at %d, len=%d", pg->dyn_f==14?"un":"",
153 (int)pg->raster_pos, (int)pg->raster_len);
154 de_dbg_indent(c, 1);
156 expected_num_pixels = (i64)pg->w * (i64)pg->h;
157 if(expected_num_pixels<1) {
158 de_dbg(c, "ignoring zero-size character (cc=%d) at %d",
159 (int)pg->cc, (int)pg->raster_pos);
160 goto done;
163 // Make sure we have room for the new character
164 if(d->font->num_chars+1 > d->char_array_alloc) {
165 i64 new_numalloc;
166 new_numalloc = d->char_array_alloc*2;
167 if(new_numalloc<d->font->num_chars+1) new_numalloc=d->font->num_chars+1;
168 if(new_numalloc<37) new_numalloc=37;
169 d->font->char_array = de_reallocarray(c, d->font->char_array,
170 d->char_array_alloc, sizeof(struct de_bitmap_font_char),
171 new_numalloc);
172 d->char_array_alloc = new_numalloc;
175 // Create the new character
176 char_idx = d->font->num_chars++;
178 ch = &d->font->char_array[char_idx];
179 ch->width = pg->w;
180 ch->height = pg->h;
181 if(pg->hoff<0 && (pg->hoff >= -128)) {
182 // Not sure if this is the right way to handle horizontal spacing, but
183 // it looks about right. (At least in a relative way. With some fonts,
184 // the glyphs definitely would have to be rendered closer together than
185 // our presentation would imply.)
186 // Some characters have positive ->hoff values, but we don't have any
187 // way to deal with that.
188 ch->extraspace_l = (i16)-pg->hoff;
191 // The vertical offset will be normalized later, once we know the offsets
192 // of all the characters.
193 ch->v_offset = (int)-pg->voff;
195 ch->rowspan = (ch->width+7)/8;
196 ch->bitmap = de_malloc(c, ch->rowspan * ch->height);
197 ch->codepoint_nonunicode = pg->cc;
199 if(pg->dyn_f==14) {
200 u8 *srcbitmap;
201 i64 srcbitmap_size;
203 srcbitmap_size = (pg->w*pg->h+7)/8;
204 srcbitmap = de_malloc(c, srcbitmap_size);
205 de_read(srcbitmap, pg->raster_pos, srcbitmap_size);
206 for(j=0; j<pg->h; j++) {
207 de_copy_bits(srcbitmap, j*ch->width, ch->bitmap, j*ch->rowspan*8, ch->width);
210 de_free(c, srcbitmap);
211 goto done;
214 nybble_pos = 0;
215 number = 0;
216 parity = pg->start_with_black;
217 repeat_count = 0;
218 next_num_is_repeat_count = 0;
219 pg->curpos_x = 0;
220 pg->curpos_y = 0;
221 pg->pixelcount = 0;
223 while(1) {
224 double initial_abs_nybble_pos = (double)pg->raster_pos + (double)nybble_pos/2.0;
226 if(nybble_pos >= pg->raster_len*2) break; // out of source data
227 if(pg->curpos_y>=pg->h) break; // reached end of image
229 v = get_nybble(c->infile, pg->raster_pos, nybble_pos++);
231 // The compressed data is a sequence of tokens.
232 // A token consists of one or more nybbles.
233 // A token beginning with nybble value 0 through 13 represents a number.
234 // A number is either a "run count" or a "repeat count".
235 // 14 and 15 are special one-nybble tokens.
236 // 14 indicates that the next number is a repeat count (instead of a run count).
237 // 15 means to set the current repeat count to 1.
239 if(v==14) {
240 next_num_is_repeat_count = 1;
241 if(c->debug_level>=3) {
242 de_dbg3(c, "[%.1f] n=%d; repeat_count=...", initial_abs_nybble_pos, (int)v);
244 continue;
246 else if(v==15) { // v==15: repeat count = 1
247 if(c->debug_level>=3) {
248 de_dbg3(c, "[%.1f] n=%d; repeat_count=1", initial_abs_nybble_pos, (int)v);
250 repeat_count = 1;
251 continue;
254 // If we get here, then this nybble represents a number, or the start of a number.
256 if(v==0) { // large run count
257 if(!get_packed_int(c->infile, pg->raster_pos, &nybble_pos, 1, &number)) goto done;
258 number = number - 15 + (13-pg->dyn_f)*16 + pg->dyn_f;
260 else if(v<=pg->dyn_f) { // one-nybble run count
261 number = (i64)v;
263 else if(v<=13) { // two-nybble run count
264 v1 = get_nybble(c->infile, pg->raster_pos, nybble_pos++);
265 number = ((i64)v-pg->dyn_f-1)*16 + v1 + pg->dyn_f + 1;
268 if(next_num_is_repeat_count) {
269 if(c->debug_level>=3) {
270 de_dbg3(c, "[%.1f] ...%d", initial_abs_nybble_pos, (int)number);
272 repeat_count = number;
273 next_num_is_repeat_count = 0;
274 continue;
277 // If we get here, we have a number that represents a run count (not a
278 // repeat count).
279 // Apply it to the character bitmap.
281 run_count = number;
283 if(c->debug_level>=3) {
284 de_dbg3(c, "[%.1f] n=%d; run_count=%d %s", initial_abs_nybble_pos,
285 (int)v, (int)run_count, parity?"B":"W");
288 for(k=0; k<run_count; k++) {
289 pg->pixelcount++;
290 if(parity) {
291 set_bit_at_cur_pos(ch, pg);
293 pg->curpos_x++;
295 if(pg->curpos_x>=pg->w) {
296 pg->curpos_y++;
297 pg->curpos_x = 0;
299 // A repeat count applies to the "row on which the first pixel of
300 // the next run count will lie".
301 // This means that repeats should be applied immediately after the
302 // last pixel of a row has been emitted (as opposed to immediately
303 // before the first pixel of a row is emitted).
304 repeat_row_as_needed(ch, pg, repeat_count);
305 repeat_count = 0;
308 parity = !parity;
311 if(pg->pixelcount != expected_num_pixels) {
312 de_warn(c, "Expected %d pixels, got %d (codepoint %d)", (int)expected_num_pixels,
313 (int)pg->pixelcount, (int)pg->cc);
316 done:
317 de_dbg_indent(c, -1);
320 static int do_char_descr(deark *c, lctx *d, i64 pos, i64 *bytesused)
322 u8 flagbyte;
323 u8 lsb3;
324 #define CHAR_PREAMBLE_FORMAT_SHORT 1
325 #define CHAR_PREAMBLE_FORMAT_EXT_SHORT 2
326 #define CHAR_PREAMBLE_FORMAT_LONG 3
327 int char_preamble_format;
328 i64 pl;
329 i64 tfm_offs;
330 struct page_ctx *pg = NULL;
331 int retval = 0;
333 pg = de_malloc(c, sizeof(struct page_ctx));
335 de_dbg(c, "character descriptor at %d", (int)pos);
336 de_dbg_indent(c, 1);
338 flagbyte = de_getbyte(pos);
339 pg->dyn_f = ((i64)flagbyte)>>4;
340 de_dbg(c, "dyn_f: %d", (int)pg->dyn_f);
342 // Character preamble format: (lsb=...)
343 // 0-3: short format
344 // 4-6: extended short format
345 // 7: long form
346 lsb3 = flagbyte&0x7;
348 pg->start_with_black = (flagbyte&0x8)?1:0;
350 if(lsb3==7) {
351 char_preamble_format = CHAR_PREAMBLE_FORMAT_LONG;
353 else if(lsb3>=4) {
354 char_preamble_format = CHAR_PREAMBLE_FORMAT_EXT_SHORT;
356 else {
357 char_preamble_format = CHAR_PREAMBLE_FORMAT_SHORT;
360 if(char_preamble_format==CHAR_PREAMBLE_FORMAT_SHORT) {
361 pl = (i64)de_getbyte(pos+1);
362 pl |= ((i64)(flagbyte&0x03))<<8;
363 pg->cc = (i32)de_getbyte(pos+2);
364 tfm_offs = 3;
365 pg->tfm = do_getu24be(c->infile, pos+tfm_offs);
366 pg->dm = (i64)de_getbyte(pos+6);
367 pg->w = (int)de_getbyte(pos+7);
368 pg->h = (int)de_getbyte(pos+8);
369 pg->hoff = dbuf_geti8(c->infile, pos+9);
370 pg->voff = dbuf_geti8(c->infile, pos+10);
371 pg->raster_pos = pos + 11;
373 else if(char_preamble_format==CHAR_PREAMBLE_FORMAT_EXT_SHORT) {
374 pl = de_getu16be(pos+1);
375 pl |= ((i64)(flagbyte&0x03))<<16;
376 pg->cc = (i32)de_getbyte(pos+3);
377 tfm_offs = 4;
378 pg->tfm = do_getu24be(c->infile, pos+tfm_offs);
379 pg->dm = de_getu16be(pos+7);
380 pg->w = (int)de_getu16be(pos+9);
381 pg->h = (int)de_getu16be(pos+11);
382 pg->hoff = de_geti16be(pos+13);
383 pg->voff = de_geti16be(pos+15);
384 pg->raster_pos = pos + 17;
386 else {
387 de_err(c, "Unsupported character preamble format (%d)", (int)lsb3);
388 goto done;
391 de_dbg(c, "pl=%d cc=%d tfm=%d dm=%d w=%d h=%d hoff=%d voff=%d",
392 (int)pl, (int)pg->cc, (int)pg->tfm, (int)pg->dm, (int)pg->w, (int)pg->h,
393 (int)pg->hoff, (int)pg->voff);
395 pg->raster_len = (pos+tfm_offs+pl)-pg->raster_pos;
396 do_read_raster(c, d, pg);
398 *bytesused = tfm_offs + pl;
399 retval = 1;
401 done:
402 de_dbg_indent(c, -1);
403 de_free(c, pg);
404 return retval;
407 static const char *get_flagbyte_name(u8 flagbyte)
409 if(flagbyte<240) return "character descriptor";
410 switch(flagbyte) {
411 case PK_XXX1: return "special xxx1";
412 case PK_XXX2: return "special xxx2";
413 case PK_XXX3: return "special xxx3";
414 case PK_XXX4: return "special xxx4";
415 case PK_YYY: return "special yyy";
416 case 245: return "postamble";
417 case 246: return "no-op";
418 case 247: return "preamble";
420 return "?";
423 static void scan_and_fixup_font(deark *c, lctx *d)
425 struct de_bitmap_font_char *ch;
426 i64 i;
427 int min_v_pos = 1000000;
428 int max_v_pos = -1000000;
430 // Find the maximum character width, and the bounding box of the character heights.
431 for(i=0; i<d->font->num_chars; i++) {
432 ch = &d->font->char_array[i];
434 if(ch->width > d->font->nominal_width)
435 d->font->nominal_width = ch->width;
437 if(ch->v_offset < min_v_pos)
438 min_v_pos = ch->v_offset;
440 if(ch->v_offset + ch->height > max_v_pos)
441 max_v_pos = ch->v_offset + ch->height;
444 d->font->nominal_height = max_v_pos - min_v_pos;
446 // Another pass, to fixup the v_offsets so that the minimum one is 0.
447 for(i=0; i<d->font->num_chars; i++) {
448 ch = &d->font->char_array[i];
450 ch->v_offset -= min_v_pos;
454 static void de_run_pkfont(deark *c, de_module_params *mparams)
456 lctx *d = NULL;
457 i64 pos;
458 i64 bytesused;
459 i64 i;
460 u8 flagbyte;
461 i64 chars_in_file = 0;
463 d = de_malloc(c, sizeof(lctx));
464 d->font = de_create_bitmap_font(c);
465 d->font->has_nonunicode_codepoints = 1;
467 pos = 0;
468 while(pos < c->infile->len) {
469 flagbyte = de_getbyte(pos);
470 de_dbg(c, "flag byte at %d: 0x%02x (%s)", (int)pos, (unsigned int)flagbyte,
471 get_flagbyte_name(flagbyte));
472 bytesused = 0;
474 if(flagbyte >= 240) {
475 i64 dpos = 0;
476 i64 dlen = 0;
478 switch(flagbyte) {
479 case PK_XXX1:
480 dlen = (i64)de_getbyte(pos+1);
481 dpos = pos + 2;
482 bytesused = 2 + dlen;
483 break;
484 case PK_XXX2:
485 dlen = de_getu16be(pos+1);
486 dpos = pos + 3;
487 bytesused = 3 + dlen;
488 break;
489 case PK_XXX3:
490 dlen = dbuf_getint_ext(c->infile, pos+1, 3, 0, 0);
491 dpos = pos + 4;
492 bytesused = 4 + dlen;
493 break;
494 case PK_XXX4:
495 dlen = de_getu32be(pos+1);
496 dpos = pos + 5;
497 bytesused = 5 + dlen;
498 break;
499 case PK_YYY:
500 dlen = 4;
501 dpos = pos + 1;
502 bytesused = 5;
503 break;
504 case 245: // postamble
505 goto done_reading;
506 case 246: // no-op
507 bytesused = 1;
508 break;
509 case 247:
510 do_preamble(c, d, pos, &bytesused);
511 break;
512 default:
513 de_err(c, "Unsupported command: %d at %d", (int)flagbyte, (int)pos);
514 goto done;
517 if(dlen>0 && flagbyte>=240 && flagbyte<=244) {
518 de_dbg_indent(c, 1);
519 de_dbg_hexdump(c, c->infile, dpos, dlen, 256, NULL, 0x1);
520 de_dbg_indent(c, -1);
523 else {
524 chars_in_file++;
525 if(!do_char_descr(c, d, pos, &bytesused)) goto done;
528 if(bytesused<1) break;
529 pos += bytesused;
532 done_reading:
533 de_dbg(c, "number of characters: %d (%d processed)", (int)chars_in_file,
534 (int)d->font->num_chars);
536 scan_and_fixup_font(c, d);
537 de_font_bitmap_font_to_image(c, d->font, NULL, 0);
539 done:
540 if(d->font) {
541 if(d->font->char_array) {
542 for(i=0; i<d->font->num_chars; i++) {
543 de_free(c, d->font->char_array[i].bitmap);
545 de_free(c, d->font->char_array);
547 de_destroy_bitmap_font(c, d->font);
549 de_free(c, d);
552 static int de_identify_pkfont(deark *c)
554 if(!dbuf_memcmp(c->infile, 0, "\xf7\x59", 2))
555 return 75;
556 return 0;
559 void de_module_pkfont(deark *c, struct deark_module_info *mi)
561 mi->id = "pkfont";
562 mi->desc = "PK Font";
563 mi->run_fn = de_run_pkfont;
564 mi->identify_fn = de_identify_pkfont;