1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // PK font ("packed font")
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_pkfont
);
28 i64 curpos_x
, curpos_y
;
32 typedef struct localctx_struct
{
33 struct de_bitmap_font
*font
;
37 static void do_preamble(deark
*c
, lctx
*d
, i64 pos
, i64
*bytesused
)
41 de_dbg(c
, "preamble at %d", (int)pos
);
44 // (identification byte (should be 89) is at pos+1)
46 comment_len
= (i64
)de_getbyte(pos
+2);
47 de_dbg(c
, "comment length: %d", (int)comment_len
);
49 *bytesused
= 3+comment_len
+16;
53 static i64
do_getu24be(dbuf
*f
, i64 pos
)
55 return dbuf_getint_ext(f
, pos
, 3, 0, 0);
58 static u8
get_nybble(dbuf
*f
, i64 abs_byte_pos
, i64 nybble_offs
)
61 b
= dbuf_getbyte(f
, abs_byte_pos
+ nybble_offs
/2);
68 static int get_packed_int(dbuf
*f
, i64 raster_pos
, i64
*nybble_pos
,
69 i64 initial_zero_count
, i64
*result
)
72 i64 zero_count
= initial_zero_count
;
77 v
= get_nybble(f
, raster_pos
, *nybble_pos
);
82 if(zero_count
>16) { // Sanity check
83 de_err(f
->c
, "Bad packed int at %d", (int)raster_pos
);
94 // There are zero_count+1 data nybbles, but we've already read the first one,
95 // so we need to read zero_count more of them.
96 for(i
=0; i
<zero_count
; i
++) {
97 val
= (val
<<4) | get_nybble(f
, raster_pos
, *nybble_pos
);
105 static void set_bit_at_cur_pos(struct de_bitmap_font_char
*ch
, struct page_ctx
*pg
)
110 if(pg
->curpos_x
<0 || pg
->curpos_x
>=pg
->w
) return;
111 if(pg
->curpos_y
<0 || pg
->curpos_y
>=pg
->h
) return;
113 bytepos
= pg
->curpos_y
*ch
->rowspan
+ pg
->curpos_x
/8;
114 bitpos
= pg
->curpos_x
%8;
115 ch
->bitmap
[bytepos
] |= 1<<(7-bitpos
);
118 // Copy row number pg->curpos_y-1 zero more more times, updating
119 // pg->curpos_y as appropriate.
120 static void repeat_row_as_needed(struct de_bitmap_font_char
*ch
, struct page_ctx
*pg
, i64 repeat_count
)
123 i64 from_row
, to_row
;
125 from_row
= pg
->curpos_y
-1;
126 if(from_row
<0) return;
128 for(z
=0; z
<repeat_count
; z
++) {
129 to_row
= pg
->curpos_y
;
130 if(to_row
>=pg
->h
) return;
131 de_memcpy(&ch
->bitmap
[to_row
*ch
->rowspan
], &ch
->bitmap
[from_row
*ch
->rowspan
], (size_t)ch
->rowspan
);
133 pg
->pixelcount
+= pg
->w
;
137 static void do_read_raster(deark
*c
, lctx
*d
, struct page_ctx
*pg
)
140 struct de_bitmap_font_char
*ch
;
143 i64 expected_num_pixels
;
147 int next_num_is_repeat_count
;
152 de_dbg(c
, "%scompressed character raster at %d, len=%d", pg
->dyn_f
==14?"un":"",
153 (int)pg
->raster_pos
, (int)pg
->raster_len
);
156 expected_num_pixels
= (i64
)pg
->w
* (i64
)pg
->h
;
157 if(expected_num_pixels
<1) {
158 de_dbg(c
, "ignoring zero-size character (cc=%d) at %d",
159 (int)pg
->cc
, (int)pg
->raster_pos
);
163 // Make sure we have room for the new character
164 if(d
->font
->num_chars
+1 > d
->char_array_alloc
) {
166 new_numalloc
= d
->char_array_alloc
*2;
167 if(new_numalloc
<d
->font
->num_chars
+1) new_numalloc
=d
->font
->num_chars
+1;
168 if(new_numalloc
<37) new_numalloc
=37;
169 d
->font
->char_array
= de_reallocarray(c
, d
->font
->char_array
,
170 d
->char_array_alloc
, sizeof(struct de_bitmap_font_char
),
172 d
->char_array_alloc
= new_numalloc
;
175 // Create the new character
176 char_idx
= d
->font
->num_chars
++;
178 ch
= &d
->font
->char_array
[char_idx
];
181 if(pg
->hoff
<0 && (pg
->hoff
>= -128)) {
182 // Not sure if this is the right way to handle horizontal spacing, but
183 // it looks about right. (At least in a relative way. With some fonts,
184 // the glyphs definitely would have to be rendered closer together than
185 // our presentation would imply.)
186 // Some characters have positive ->hoff values, but we don't have any
187 // way to deal with that.
188 ch
->extraspace_l
= (i16
)-pg
->hoff
;
191 // The vertical offset will be normalized later, once we know the offsets
192 // of all the characters.
193 ch
->v_offset
= (int)-pg
->voff
;
195 ch
->rowspan
= (ch
->width
+7)/8;
196 ch
->bitmap
= de_malloc(c
, ch
->rowspan
* ch
->height
);
197 ch
->codepoint_nonunicode
= pg
->cc
;
203 srcbitmap_size
= (pg
->w
*pg
->h
+7)/8;
204 srcbitmap
= de_malloc(c
, srcbitmap_size
);
205 de_read(srcbitmap
, pg
->raster_pos
, srcbitmap_size
);
206 for(j
=0; j
<pg
->h
; j
++) {
207 de_copy_bits(srcbitmap
, j
*ch
->width
, ch
->bitmap
, j
*ch
->rowspan
*8, ch
->width
);
210 de_free(c
, srcbitmap
);
216 parity
= pg
->start_with_black
;
218 next_num_is_repeat_count
= 0;
224 double initial_abs_nybble_pos
= (double)pg
->raster_pos
+ (double)nybble_pos
/2.0;
226 if(nybble_pos
>= pg
->raster_len
*2) break; // out of source data
227 if(pg
->curpos_y
>=pg
->h
) break; // reached end of image
229 v
= get_nybble(c
->infile
, pg
->raster_pos
, nybble_pos
++);
231 // The compressed data is a sequence of tokens.
232 // A token consists of one or more nybbles.
233 // A token beginning with nybble value 0 through 13 represents a number.
234 // A number is either a "run count" or a "repeat count".
235 // 14 and 15 are special one-nybble tokens.
236 // 14 indicates that the next number is a repeat count (instead of a run count).
237 // 15 means to set the current repeat count to 1.
240 next_num_is_repeat_count
= 1;
241 if(c
->debug_level
>=3) {
242 de_dbg3(c
, "[%.1f] n=%d; repeat_count=...", initial_abs_nybble_pos
, (int)v
);
246 else if(v
==15) { // v==15: repeat count = 1
247 if(c
->debug_level
>=3) {
248 de_dbg3(c
, "[%.1f] n=%d; repeat_count=1", initial_abs_nybble_pos
, (int)v
);
254 // If we get here, then this nybble represents a number, or the start of a number.
256 if(v
==0) { // large run count
257 if(!get_packed_int(c
->infile
, pg
->raster_pos
, &nybble_pos
, 1, &number
)) goto done
;
258 number
= number
- 15 + (13-pg
->dyn_f
)*16 + pg
->dyn_f
;
260 else if(v
<=pg
->dyn_f
) { // one-nybble run count
263 else if(v
<=13) { // two-nybble run count
264 v1
= get_nybble(c
->infile
, pg
->raster_pos
, nybble_pos
++);
265 number
= ((i64
)v
-pg
->dyn_f
-1)*16 + v1
+ pg
->dyn_f
+ 1;
268 if(next_num_is_repeat_count
) {
269 if(c
->debug_level
>=3) {
270 de_dbg3(c
, "[%.1f] ...%d", initial_abs_nybble_pos
, (int)number
);
272 repeat_count
= number
;
273 next_num_is_repeat_count
= 0;
277 // If we get here, we have a number that represents a run count (not a
279 // Apply it to the character bitmap.
283 if(c
->debug_level
>=3) {
284 de_dbg3(c
, "[%.1f] n=%d; run_count=%d %s", initial_abs_nybble_pos
,
285 (int)v
, (int)run_count
, parity
?"B":"W");
288 for(k
=0; k
<run_count
; k
++) {
291 set_bit_at_cur_pos(ch
, pg
);
295 if(pg
->curpos_x
>=pg
->w
) {
299 // A repeat count applies to the "row on which the first pixel of
300 // the next run count will lie".
301 // This means that repeats should be applied immediately after the
302 // last pixel of a row has been emitted (as opposed to immediately
303 // before the first pixel of a row is emitted).
304 repeat_row_as_needed(ch
, pg
, repeat_count
);
311 if(pg
->pixelcount
!= expected_num_pixels
) {
312 de_warn(c
, "Expected %d pixels, got %d (codepoint %d)", (int)expected_num_pixels
,
313 (int)pg
->pixelcount
, (int)pg
->cc
);
317 de_dbg_indent(c
, -1);
320 static int do_char_descr(deark
*c
, lctx
*d
, i64 pos
, i64
*bytesused
)
324 #define CHAR_PREAMBLE_FORMAT_SHORT 1
325 #define CHAR_PREAMBLE_FORMAT_EXT_SHORT 2
326 #define CHAR_PREAMBLE_FORMAT_LONG 3
327 int char_preamble_format
;
330 struct page_ctx
*pg
= NULL
;
333 pg
= de_malloc(c
, sizeof(struct page_ctx
));
335 de_dbg(c
, "character descriptor at %d", (int)pos
);
338 flagbyte
= de_getbyte(pos
);
339 pg
->dyn_f
= ((i64
)flagbyte
)>>4;
340 de_dbg(c
, "dyn_f: %d", (int)pg
->dyn_f
);
342 // Character preamble format: (lsb=...)
344 // 4-6: extended short format
348 pg
->start_with_black
= (flagbyte
&0x8)?1:0;
351 char_preamble_format
= CHAR_PREAMBLE_FORMAT_LONG
;
354 char_preamble_format
= CHAR_PREAMBLE_FORMAT_EXT_SHORT
;
357 char_preamble_format
= CHAR_PREAMBLE_FORMAT_SHORT
;
360 if(char_preamble_format
==CHAR_PREAMBLE_FORMAT_SHORT
) {
361 pl
= (i64
)de_getbyte(pos
+1);
362 pl
|= ((i64
)(flagbyte
&0x03))<<8;
363 pg
->cc
= (i32
)de_getbyte(pos
+2);
365 pg
->tfm
= do_getu24be(c
->infile
, pos
+tfm_offs
);
366 pg
->dm
= (i64
)de_getbyte(pos
+6);
367 pg
->w
= (int)de_getbyte(pos
+7);
368 pg
->h
= (int)de_getbyte(pos
+8);
369 pg
->hoff
= dbuf_geti8(c
->infile
, pos
+9);
370 pg
->voff
= dbuf_geti8(c
->infile
, pos
+10);
371 pg
->raster_pos
= pos
+ 11;
373 else if(char_preamble_format
==CHAR_PREAMBLE_FORMAT_EXT_SHORT
) {
374 pl
= de_getu16be(pos
+1);
375 pl
|= ((i64
)(flagbyte
&0x03))<<16;
376 pg
->cc
= (i32
)de_getbyte(pos
+3);
378 pg
->tfm
= do_getu24be(c
->infile
, pos
+tfm_offs
);
379 pg
->dm
= de_getu16be(pos
+7);
380 pg
->w
= (int)de_getu16be(pos
+9);
381 pg
->h
= (int)de_getu16be(pos
+11);
382 pg
->hoff
= de_geti16be(pos
+13);
383 pg
->voff
= de_geti16be(pos
+15);
384 pg
->raster_pos
= pos
+ 17;
387 de_err(c
, "Unsupported character preamble format (%d)", (int)lsb3
);
391 de_dbg(c
, "pl=%d cc=%d tfm=%d dm=%d w=%d h=%d hoff=%d voff=%d",
392 (int)pl
, (int)pg
->cc
, (int)pg
->tfm
, (int)pg
->dm
, (int)pg
->w
, (int)pg
->h
,
393 (int)pg
->hoff
, (int)pg
->voff
);
395 pg
->raster_len
= (pos
+tfm_offs
+pl
)-pg
->raster_pos
;
396 do_read_raster(c
, d
, pg
);
398 *bytesused
= tfm_offs
+ pl
;
402 de_dbg_indent(c
, -1);
407 static const char *get_flagbyte_name(u8 flagbyte
)
409 if(flagbyte
<240) return "character descriptor";
411 case PK_XXX1
: return "special xxx1";
412 case PK_XXX2
: return "special xxx2";
413 case PK_XXX3
: return "special xxx3";
414 case PK_XXX4
: return "special xxx4";
415 case PK_YYY
: return "special yyy";
416 case 245: return "postamble";
417 case 246: return "no-op";
418 case 247: return "preamble";
423 static void scan_and_fixup_font(deark
*c
, lctx
*d
)
425 struct de_bitmap_font_char
*ch
;
427 int min_v_pos
= 1000000;
428 int max_v_pos
= -1000000;
430 // Find the maximum character width, and the bounding box of the character heights.
431 for(i
=0; i
<d
->font
->num_chars
; i
++) {
432 ch
= &d
->font
->char_array
[i
];
434 if(ch
->width
> d
->font
->nominal_width
)
435 d
->font
->nominal_width
= ch
->width
;
437 if(ch
->v_offset
< min_v_pos
)
438 min_v_pos
= ch
->v_offset
;
440 if(ch
->v_offset
+ ch
->height
> max_v_pos
)
441 max_v_pos
= ch
->v_offset
+ ch
->height
;
444 d
->font
->nominal_height
= max_v_pos
- min_v_pos
;
446 // Another pass, to fixup the v_offsets so that the minimum one is 0.
447 for(i
=0; i
<d
->font
->num_chars
; i
++) {
448 ch
= &d
->font
->char_array
[i
];
450 ch
->v_offset
-= min_v_pos
;
454 static void de_run_pkfont(deark
*c
, de_module_params
*mparams
)
461 i64 chars_in_file
= 0;
463 d
= de_malloc(c
, sizeof(lctx
));
464 d
->font
= de_create_bitmap_font(c
);
465 d
->font
->has_nonunicode_codepoints
= 1;
468 while(pos
< c
->infile
->len
) {
469 flagbyte
= de_getbyte(pos
);
470 de_dbg(c
, "flag byte at %d: 0x%02x (%s)", (int)pos
, (unsigned int)flagbyte
,
471 get_flagbyte_name(flagbyte
));
474 if(flagbyte
>= 240) {
480 dlen
= (i64
)de_getbyte(pos
+1);
482 bytesused
= 2 + dlen
;
485 dlen
= de_getu16be(pos
+1);
487 bytesused
= 3 + dlen
;
490 dlen
= dbuf_getint_ext(c
->infile
, pos
+1, 3, 0, 0);
492 bytesused
= 4 + dlen
;
495 dlen
= de_getu32be(pos
+1);
497 bytesused
= 5 + dlen
;
504 case 245: // postamble
510 do_preamble(c
, d
, pos
, &bytesused
);
513 de_err(c
, "Unsupported command: %d at %d", (int)flagbyte
, (int)pos
);
517 if(dlen
>0 && flagbyte
>=240 && flagbyte
<=244) {
519 de_dbg_hexdump(c
, c
->infile
, dpos
, dlen
, 256, NULL
, 0x1);
520 de_dbg_indent(c
, -1);
525 if(!do_char_descr(c
, d
, pos
, &bytesused
)) goto done
;
528 if(bytesused
<1) break;
533 de_dbg(c
, "number of characters: %d (%d processed)", (int)chars_in_file
,
534 (int)d
->font
->num_chars
);
536 scan_and_fixup_font(c
, d
);
537 de_font_bitmap_font_to_image(c
, d
->font
, NULL
, 0);
541 if(d
->font
->char_array
) {
542 for(i
=0; i
<d
->font
->num_chars
; i
++) {
543 de_free(c
, d
->font
->char_array
[i
].bitmap
);
545 de_free(c
, d
->font
->char_array
);
547 de_destroy_bitmap_font(c
, d
->font
);
552 static int de_identify_pkfont(deark
*c
)
554 if(!dbuf_memcmp(c
->infile
, 0, "\xf7\x59", 2))
559 void de_module_pkfont(deark
*c
, struct deark_module_info
*mi
)
562 mi
->desc
= "PK Font";
563 mi
->run_fn
= de_run_pkfont
;
564 mi
->identify_fn
= de_identify_pkfont
;