zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / fnt.c
blobdf941f9a7b33b0dffc3ef45639944599723c38ef
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Windows FNT font format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_fnt);
12 struct char_table_entry {
13 int pixel_width;
14 u8 is_abs_space;
15 i64 v1_pixel_offset;
16 i64 v23_data_pos;
19 typedef struct localctx_struct {
20 unsigned int fnt_version;
21 unsigned int dfType;
22 int nominal_char_width;
23 i64 char_height;
24 i64 char_table_pos;
25 i64 char_entry_size; // can be 0
26 i64 char_table_size; // can be 0
27 i64 dfWidthBytes;
28 i64 dfBitsOffset;
30 u8 first_char;
31 u8 last_char;
32 i64 num_chars_stored;
33 struct char_table_entry *char_table; // [num_chars_stored]
35 i64 dfPixWidth;
36 i64 dfPixHeight;
37 int detected_max_width;
39 unsigned int dfPoints;
40 i64 dfFace; // Offset of font face name
41 u8 dfCharSet;
43 int is_vector;
44 int has_abs_space_char;
45 de_encoding encoding;
47 de_finfo *fi;
48 } lctx;
50 static void get_char_bitmap_v1(deark *c, lctx *d,
51 struct char_table_entry *cte, struct de_bitmap_font_char *ch)
53 i64 row;
55 ch->rowspan = ((i64)cte->pixel_width+7)/8;
56 if(d->char_height * ch->rowspan > 32768) return;
57 ch->bitmap = de_malloc(c, d->char_height * ch->rowspan);
59 for(row=0; row<d->char_height; row++) {
60 i64 k;
62 for(k=0; k<(i64)cte->pixel_width; k++){
63 u8 b;
64 b = de_get_bits_symbol(c->infile, 1, d->dfBitsOffset + row*d->dfWidthBytes,
65 cte->v1_pixel_offset+k);
66 if(b) {
67 ch->bitmap[row*ch->rowspan + k/8] |= 1<<(7-k%8);
73 static void get_char_bitmap_v23(deark *c, lctx *d,
74 struct char_table_entry *cte, struct de_bitmap_font_char *ch)
76 i64 num_tiles;
77 i64 tile;
78 i64 row;
80 num_tiles = ((i64)cte->pixel_width+7)/8;
81 ch->rowspan = num_tiles;
82 if(d->char_height * num_tiles > 32768) return;
83 ch->bitmap = de_malloc(c, d->char_height * num_tiles);
85 for(row=0; row<d->char_height; row++) {
86 for(tile=0; tile<num_tiles; tile++) {
87 ch->bitmap[row * ch->rowspan + tile] =
88 de_getbyte(cte->v23_data_pos + tile*d->char_height + row);
93 // create bitmap_font object
94 static void do_make_image(deark *c, lctx *d)
96 struct de_bitmap_font *font = NULL;
97 i64 i;
98 struct de_encconv_state es_main;
99 struct de_encconv_state es_dec_special_gr;
101 de_dbg(c, "reading bitmaps");
102 de_dbg_indent(c, 1);
104 font = de_create_bitmap_font(c);
106 font->has_nonunicode_codepoints = 1;
107 if(d->encoding!=DE_ENCODING_UNKNOWN)
108 font->has_unicode_codepoints = 1;
109 font->prefer_unicode = 0;
111 font->nominal_width = d->nominal_char_width;
112 font->nominal_height = (int)d->char_height;
113 font->num_chars = d->num_chars_stored;
114 font->char_array = de_mallocarray(c, font->num_chars, sizeof(struct de_bitmap_font_char));
115 de_encconv_init(&es_main, d->encoding);
116 de_encconv_init(&es_dec_special_gr, DE_ENCODING_DEC_SPECIAL_GRAPHICS);
118 for(i=0; i<d->num_chars_stored; i++) {
119 i32 char_index;
120 struct char_table_entry *cte = &d->char_table[i];
121 struct de_bitmap_font_char *ch = &font->char_array[i];
123 if(cte->is_abs_space) {
124 // Arbitrarily put the "absolute space" char at codepoint 256,
125 // and U+2002 EN SPACE (best I can do).
126 ch->codepoint_nonunicode = 256;
127 ch->codepoint_unicode = 0x2002;
129 else {
130 char_index = (i32)d->first_char + (i32)i;
132 ch->codepoint_nonunicode = char_index;
134 if(font->has_unicode_codepoints) {
135 if(char_index<32 && d->dfCharSet==0) {
136 // This kind of font usually doesn't have glyphs below 32.
137 // If it does, assume that they are VT100 line drawing characters.
138 ch->codepoint_unicode =
139 de_char_to_unicode_ex(95+char_index, &es_dec_special_gr);
141 else {
142 ch->codepoint_unicode = de_char_to_unicode_ex(char_index, &es_main);
147 ch->width = cte->pixel_width;
148 ch->height = (int)d->char_height;
150 if(d->fnt_version==0x100) {
151 get_char_bitmap_v1(c, d, cte, ch);
153 else {
154 get_char_bitmap_v23(c, d, cte, ch);
158 de_font_bitmap_font_to_image(c, font, d->fi, 0);
160 if(font) {
161 if(font->char_array) {
162 for(i=0; i<font->num_chars; i++) {
163 de_free(c, font->char_array[i].bitmap);
165 de_free(c, font->char_array);
167 de_destroy_bitmap_font(c, font);
170 de_dbg_indent(c, -1);
173 // Note that there is similar code in exe.c. Any changed made here should
174 // potentially be copied.
175 static void read_face_name(deark *c, lctx *d)
177 de_ucstring *s = NULL;
179 if(d->dfFace<1) return;
181 de_dbg(c, "face name at %"I64_FMT, d->dfFace);
182 de_dbg_indent(c, 1);
184 // The facename is terminated with a NUL byte.
185 // There seems to be no defined limit to its length, but Windows font face
186 // names traditionally have to be quite short.
187 s = ucstring_create(c);
188 dbuf_read_to_ucstring(c->infile, d->dfFace, 64, s, DE_CONVFLAG_STOP_AT_NUL,
189 DE_ENCODING_ASCII);
191 de_dbg(c, "face name: \"%s\"", ucstring_getpsz_d(s));
193 if(!c->filenames_from_file) goto done;
195 if(!d->fi) d->fi = de_finfo_create(c);
196 ucstring_printf(s, DE_ENCODING_LATIN1, "-%u", d->dfPoints);
197 de_finfo_set_name_from_ucstring(c, d->fi, s, 0);
199 done:
200 de_dbg_indent(c, -1);
201 ucstring_destroy(s);
204 static int do_read_header(deark *c, lctx *d)
206 i64 dfMaxWidth;
207 int retval = 0;
208 int saved_indent_level;
210 de_dbg_indent_save(c, &saved_indent_level);
212 de_dbg(c, "fixed header at %d", (int)0);
213 de_dbg_indent(c, 1);
215 d->fnt_version = (unsigned int)de_getu16le(0);
216 de_dbg(c, "dfVersion: 0x%04x", d->fnt_version);
217 if(d->fnt_version!=0x100 && d->fnt_version!=0x200 && d->fnt_version!=0x300) {
218 de_err(c, "This version of FNT (0x%04x) is not supported", d->fnt_version);
219 goto done;
222 d->dfType = (unsigned int)de_getu16le(66);
223 d->is_vector = (d->dfType&0x1)?1:0;
224 de_dbg(c, "dfType: 0x%04x (%s)", d->dfType, d->is_vector?"vector":"bitmap");
226 d->dfPoints = (unsigned int)de_getu16le(68);
227 de_dbg(c, "dfPoints: %u", d->dfPoints);
229 d->dfPixWidth = de_getu16le(86);
230 de_dbg(c, "dfPixWidth: %d", (int)d->dfPixWidth);
231 d->dfPixHeight = de_getu16le(88);
232 d->char_height = d->dfPixHeight;
233 de_dbg(c, "dfPixHeight: %d", (int)d->dfPixHeight);
235 d->dfCharSet = de_getbyte(85);
236 de_dbg(c, "charset: 0x%02x (%s)", (int)d->dfCharSet,
237 fmtutil_get_windows_charset_name(d->dfCharSet));
238 if(d->dfCharSet==0x00) { // "ANSI"
239 d->encoding = DE_ENCODING_WINDOWS1252; // Guess
241 else if(d->dfCharSet==0xff) { // "OEM"
242 d->encoding = DE_ENCODING_CP437_G; // Guess
244 else {
245 d->encoding = DE_ENCODING_UNKNOWN;
248 dfMaxWidth = de_getu16le(93);
249 de_dbg(c, "dfMaxWidth: %d", (int)dfMaxWidth);
251 if(d->dfPixWidth!=dfMaxWidth && d->dfPixWidth!=0) {
252 de_warn(c, "dfMaxWidth (%d) does not equal dfPixWidth (%d)",
253 (int)dfMaxWidth, (int)d->dfPixWidth);
256 d->first_char = de_getbyte(95);
257 de_dbg(c, "first char: %d", (int)d->first_char);
258 d->last_char = de_getbyte(96);
259 de_dbg(c, "last char: %d", (int)d->last_char);
261 // 97 = dfDefaultChar
262 // 98 = dfBreakChar
264 d->dfWidthBytes = de_getu16le(99);
265 de_dbg(c, "dfWidthBytes: %d%s", (int)d->dfWidthBytes,
266 ((d->fnt_version>=0x200 || d->is_vector) ? " [unused]":""));
267 // 101-104 = dfDevice
269 d->dfFace = de_getu32le(105);
270 de_dbg(c, "dfFace: %u", (unsigned int)d->dfFace);
272 // 109-112 = dfBitsPointer
273 d->dfBitsOffset = de_getu32le(113);
274 de_dbg(c, "dfBitsOffset: %"I64_FMT, d->dfBitsOffset);
276 // Apparently, the first 117 bytes (through the dfBitsOffset field) are
277 // common to all versions
279 retval = 1;
280 done:
281 de_dbg_indent_restore(c, saved_indent_level);
282 return retval;
285 // For v1 fixed-width raster fonts.
286 // There is no char table (well, its size is 0), so generate a fake table.
287 static void do_create_char_table_v1(deark *c, lctx *d)
289 i64 k;
291 for(k=0; k<d->num_chars_stored; k++) {
292 d->char_table[k].pixel_width = (int)d->dfPixWidth;
293 d->char_table[k].v1_pixel_offset = d->dfPixWidth * k;
297 // For v1 variable-width raster fonts
298 static void do_read_char_table_v1(deark *c, lctx *d)
300 i64 k;
301 i64 pos;
302 i64 next_char_offset;
304 for(k=0; k<d->num_chars_stored; k++) {
305 pos = d->char_table_pos + d->char_entry_size*k;
306 d->char_table[k].v1_pixel_offset = de_getu16le(pos);
307 next_char_offset = de_getu16le(pos+2);
308 d->char_table[k].pixel_width = (int)(next_char_offset - d->char_table[k].v1_pixel_offset);
309 if(d->char_table[k].pixel_width<0) d->char_table[k].pixel_width=0;
313 // For all v2 and v3 raster fonts
314 static void do_read_char_table_v23(deark *c, lctx *d)
316 i64 k;
317 i64 pos;
319 for(k=0; k<d->num_chars_stored; k++) {
320 pos = d->char_table_pos + d->char_entry_size*k;
321 d->char_table[k].pixel_width = (int)de_getu16le(pos);
322 if(d->char_entry_size==6) {
323 d->char_table[k].v23_data_pos = de_getu32le(pos+2);
325 else {
326 d->char_table[k].v23_data_pos = de_getu16le(pos+2);
331 // Print debug info for each char, find the max char width,
332 // and other tasks.
333 static int do_postprocess_char_table(deark *c, lctx *d)
335 i64 k;
336 int retval = 0;
338 d->detected_max_width = 0;
340 for(k=0; k<d->num_chars_stored; k++) {
341 int codepoint;
343 if(d->has_abs_space_char && (k==d->num_chars_stored-1)) {
344 d->char_table[k].is_abs_space = 1;
347 // TODO: Maybe codepoint should be a field in char_table_entry.
348 if(d->char_table[k].is_abs_space) {
349 codepoint = 256;
351 else {
352 codepoint = (int)((int)d->first_char + (int)k);
355 if(d->fnt_version==0x100) {
356 de_dbg2(c, "char[%d] codepoint=%d pixoffset=%d width=%d", (int)k, codepoint,
357 (int)d->char_table[k].v1_pixel_offset, d->char_table[k].pixel_width);
359 else {
360 de_dbg2(c, "char[%d] codepoint=%d bitmappos=%d width=%d", (int)k, codepoint,
361 (int)d->char_table[k].v23_data_pos, d->char_table[k].pixel_width);
364 if(d->char_table[k].pixel_width > d->detected_max_width) {
365 d->detected_max_width = d->char_table[k].pixel_width;
369 de_dbg(c, "detected max width: %d", d->detected_max_width);
371 if(d->detected_max_width<1) goto done;
372 d->nominal_char_width = d->detected_max_width;
374 retval = 1;
375 done:
376 return retval;
379 static int do_read_char_table(deark *c, lctx *d)
381 int retval = 0;
382 int saved_indent_level;
384 de_dbg_indent_save(c, &saved_indent_level);
385 if(d->is_vector) return 0;
387 // There is an extra character at the end of the table that is an
388 // "absolute-space" character, and is guaranteed to be blank.
389 d->has_abs_space_char = (d->fnt_version>=0x200);
391 d->num_chars_stored = (i64)d->last_char - d->first_char + 1;
392 if(d->has_abs_space_char) d->num_chars_stored++;
393 de_dbg(c, "number of characters: %d", (int)d->num_chars_stored);
395 if(d->fnt_version==0x100) {
396 d->char_table_pos = 117;
397 if(d->dfPixWidth==0) { // proportional raster font
398 d->char_entry_size = 2;
400 else { // fixed-width raster font
401 d->char_entry_size = 0;
404 else if(d->fnt_version==0x200) {
405 d->char_table_pos = 118;
406 d->char_entry_size = 4;
408 else { // version 0x300
409 d->char_table_pos = 148;
410 d->char_entry_size = 6;
413 d->char_table_size = d->char_entry_size * d->num_chars_stored;
414 de_dbg(c, "character table at %d, size %d, %d bytes/entry",
415 (int)d->char_table_pos, (int)d->char_table_size, (int)d->char_entry_size);
416 de_dbg_indent(c, 1);
418 d->char_table = de_mallocarray(c, d->num_chars_stored, sizeof(struct char_table_entry));
420 if(d->char_table_size==0) {
421 do_create_char_table_v1(c, d);
423 else if(d->fnt_version==0x100) {
424 do_read_char_table_v1(c, d);
426 else {
427 do_read_char_table_v23(c, d);
430 if(!do_postprocess_char_table(c, d)) goto done;
432 de_dbg_indent(c, -1);
434 retval = 1;
435 done:
436 de_dbg_indent_restore(c, saved_indent_level);
437 return retval;
440 static void de_run_fnt(deark *c, de_module_params *mparams)
442 lctx *d = NULL;
444 d = de_malloc(c, sizeof(lctx));
446 if(!do_read_header(c, d)) goto done;
447 read_face_name(c, d);
449 if(d->is_vector) {
450 de_err(c, "This is a vector font. Not supported.");
451 goto done;
454 if(d->dfType & 0x4) {
455 de_err(c, "This type of font is not supported (dfType=0x%04x)", d->dfType);
456 goto done;
459 if(!do_read_char_table(c, d)) goto done;
461 do_make_image(c, d);
462 done:
463 if(d) {
464 de_finfo_destroy(c, d->fi);
465 de_free(c, d->char_table);
466 de_free(c, d);
470 static int de_identify_fnt(deark *c)
472 i64 ver;
474 // TODO: Better format detection.
475 if(de_input_file_has_ext(c, "fnt")) {
476 ver = de_getu16le(0);
477 if(ver==0x0100 || ver==0x0200 || ver==0x0300)
478 return 10;
480 return 0;
483 void de_module_fnt(deark *c, struct deark_module_info *mi)
485 mi->id = "fnt";
486 mi->desc = "Windows FNT font";
487 mi->run_fn = de_run_fnt;
488 mi->identify_fn = de_identify_fnt;