bmp: Rewrote the RLE decompressor
[deark.git] / modules / psf.c
blob685b799fc3c4cdd5f5c3399dd4fb0cb3b2f1baf0
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // PSF font (PC Screen Font)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_psf);
11 typedef struct localctx_struct {
12 int version;
13 u32 psf2_version;
14 u32 flags;
15 u8 mode;
16 i64 headersize;
17 i64 num_glyphs;
18 i64 glyph_width, glyph_height;
19 i64 bytes_per_glyph;
20 i64 font_data_size;
21 int has_unicode_table;
22 i64 unicode_table_pos;
24 #define MAX_EXTRA_CODEPOINTS 2000
25 int read_extra_codepoints;
26 i64 num_chars_alloc;
27 i64 index_of_first_extra_codepoint;
28 i64 num_extra_codepoints;
29 } lctx;
31 static void do_extra_codepoint(deark *c, lctx *d, struct de_bitmap_font *font,
32 i64 cur_idx, i32 n)
34 i64 extra_idx;
36 if(!d->read_extra_codepoints) return;
37 if(d->num_extra_codepoints >= MAX_EXTRA_CODEPOINTS) return;
39 extra_idx = d->index_of_first_extra_codepoint + d->num_extra_codepoints;
40 de_dbg2(c, "char[%d] alias [%d] = U+%04x", (int)cur_idx, (int)extra_idx,
41 (unsigned int)n);
42 if(n == font->char_array[cur_idx].codepoint_unicode) {
43 de_dbg2(c, "ignoring superfluous alias");
44 return;
46 font->char_array[extra_idx].codepoint_unicode = n;
47 font->char_array[extra_idx].bitmap = font->char_array[cur_idx].bitmap;
48 d->num_extra_codepoints++;
51 static void do_psf1_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font)
53 i64 cur_idx;
54 i64 pos;
55 int got_cp;
56 int found_fffe;
57 i32 n;
59 de_dbg(c, "Unicode table at %d", (int)d->unicode_table_pos);
60 de_dbg_indent(c, 1);
62 pos = d->unicode_table_pos;
63 cur_idx = 0;
64 got_cp = 0; // Have we set the codepoint for glyph[cur_idx]?
65 found_fffe = 0;
67 while(1) {
68 if(cur_idx >= d->num_glyphs) break;
69 if(pos+1 >= c->infile->len) break;
70 n = (i32)de_getu16le(pos);
71 pos+=2;
73 if(n==0xffff) {
74 if(!got_cp) {
75 de_warn(c, "Missing codepoint for char #%d", (int)cur_idx);
77 cur_idx++;
78 got_cp = 0;
79 found_fffe = 0;
80 continue;
82 else if(n==0xfffe) {
83 found_fffe = 1;
86 if(found_fffe) {
87 // Anything after 0xfffe is a multi-codepoint character, which we
88 // don't support.
89 continue;
92 if(!got_cp) {
93 de_dbg2(c, "char[%d] = U+%04x", (int)cur_idx, (unsigned int)n);
94 font->char_array[cur_idx].codepoint_unicode = n;
95 got_cp = 1;
96 continue;
99 // This is an "extra" codepoint for the current glyph.
100 do_extra_codepoint(c, d, font, cur_idx, n);
103 font->has_unicode_codepoints = 1;
104 font->prefer_unicode = 1;
106 de_dbg_indent(c, -1);
109 static void do_psf2_unicode_table(deark *c, lctx *d, struct de_bitmap_font *font)
111 i64 cur_idx;
112 i64 pos;
113 int ret;
114 i64 foundpos;
115 i64 char_data_len;
116 u8 char_data_buf[200];
117 i32 ch;
118 i64 utf8len;
120 de_dbg(c, "Unicode table at %d", (int)d->unicode_table_pos);
121 de_dbg_indent(c, 1);
123 pos = d->unicode_table_pos;
124 cur_idx = 0;
125 while(1) {
126 i64 pos_in_char_data;
127 i64 cp_idx;
129 if(cur_idx >= d->num_glyphs) break;
130 if(pos >= c->infile->len) break;
132 // Figure out the size of the data for this glyph
133 ret = dbuf_search_byte(c->infile, 0xff, pos,
134 c->infile->len - pos, &foundpos);
135 if(!ret) break;
136 char_data_len = foundpos - pos;
137 if(char_data_len<0) char_data_len=0;
138 else if(char_data_len>(i64)sizeof(char_data_buf)) char_data_len=(i64)sizeof(char_data_buf);
140 // Read all the data for this glyph
141 de_read(char_data_buf, pos, char_data_len);
143 // Read the codepoints for this glyph
144 cp_idx = 0;
145 pos_in_char_data = 0;
146 while(1) {
147 if(pos_in_char_data >= char_data_len) break;
148 if(cp_idx > MAX_EXTRA_CODEPOINTS) break; // sanity check
150 ret = de_utf8_to_uchar(&char_data_buf[pos_in_char_data], char_data_len-pos_in_char_data,
151 &ch, &utf8len);
152 if(!ret) {
153 // If there are any multi-codepoint aliases for this glyph, we
154 // expect de_utf8_to_uchar() to fail when it hits the 0xfe byte.
155 // So, this is not necessarily an error.
156 break;
159 if(cp_idx==0) {
160 // This is the primary Unicode codepoint for this glyph
161 de_dbg2(c, "char[%d] = U+%04x", (int)cur_idx, (unsigned int)ch);
162 font->char_array[cur_idx].codepoint_unicode = ch;
164 else {
165 do_extra_codepoint(c, d, font, cur_idx, ch);
168 cp_idx++;
169 pos_in_char_data += utf8len;
172 if(cp_idx==0) {
173 de_warn(c, "Missing codepoint for char #%d", (int)cur_idx);
176 // Advance to the next glyph
177 pos = foundpos+1;
178 cur_idx++;
181 font->has_unicode_codepoints = 1;
182 font->prefer_unicode = 1;
184 de_dbg_indent(c, -1);
187 static void do_glyphs(deark *c, lctx *d)
189 struct de_bitmap_font *font = NULL;
190 u8 *font_data = NULL;
191 i64 i;
192 i64 glyph_rowspan;
194 font = de_create_bitmap_font(c);
195 font->has_nonunicode_codepoints = 1;
196 font->nominal_width = (int)d->glyph_width;
197 font->nominal_height = (int)d->glyph_height;
198 font->num_chars = d->num_glyphs; // This may increase later
199 glyph_rowspan = (d->glyph_width+7)/8;
201 d->num_chars_alloc = d->num_glyphs;
202 if(d->read_extra_codepoints)
203 d->num_chars_alloc += MAX_EXTRA_CODEPOINTS;
205 d->index_of_first_extra_codepoint = d->num_glyphs;
206 d->num_extra_codepoints = 0;
208 font->char_array = de_mallocarray(c, d->num_chars_alloc, sizeof(struct de_bitmap_font_char));
210 font_data = de_malloc(c, d->font_data_size);
211 de_read(font_data, d->headersize, d->font_data_size);
213 for(i=0; i<d->num_chars_alloc; i++) {
214 font->char_array[i].width = font->nominal_width;
215 font->char_array[i].height = font->nominal_height;
216 font->char_array[i].rowspan = glyph_rowspan;
217 if(i<d->num_glyphs)
218 font->char_array[i].codepoint_nonunicode = (i32)i;
219 else
220 font->char_array[i].codepoint_nonunicode = DE_CODEPOINT_INVALID;
221 font->char_array[i].codepoint_unicode = DE_CODEPOINT_INVALID;
222 if(i<d->num_glyphs)
223 font->char_array[i].bitmap = &font_data[i*d->bytes_per_glyph];
226 if(d->has_unicode_table) {
227 if(d->version==2)
228 do_psf2_unicode_table(c, d, font);
229 else
230 do_psf1_unicode_table(c, d, font);
233 if(d->num_extra_codepoints>0) {
234 font->num_chars = d->index_of_first_extra_codepoint + d->num_extra_codepoints;
235 de_dbg(c, "codepoints aliases: %d", (int)d->num_extra_codepoints);
236 de_dbg(c, "total characters: %d", (int)font->num_chars);
239 de_font_bitmap_font_to_image(c, font, NULL, 0);
241 if(font) {
242 de_free(c, font->char_array);
243 de_destroy_bitmap_font(c, font);
245 de_free(c, font_data);
248 static void do_psf1_header(deark *c, lctx *d)
250 i64 pos = 0;
252 de_dbg(c, "PSFv1 header at %d", (int)pos);
253 de_dbg_indent(c, 1);
255 d->headersize = 4;
257 d->mode = de_getbyte(2);
258 de_dbg(c, "mode: 0x%02x", (unsigned int)d->mode);
259 de_dbg_indent(c, 1);
260 d->num_glyphs = (d->mode & 0x01) ? 512 : 256;
261 de_dbg(c, "number of glyphs: %d", (int)d->num_glyphs);
262 d->has_unicode_table = (d->mode & 0x02) ? 1 : 0;
263 de_dbg(c, "has Unicode table: %s", d->has_unicode_table?"yes":"no");
264 de_dbg_indent(c, -1);
266 d->bytes_per_glyph = (i64)de_getbyte(3);
267 d->glyph_height = d->bytes_per_glyph;
268 d->glyph_width = 8;
269 de_dbg(c, "glyph dimensions: %d"DE_CHAR_TIMES"%d", (int)d->glyph_width, (int)d->glyph_height);
271 de_dbg_indent(c, -1);
274 static void do_psf2_header(deark *c, lctx *d)
276 i64 pos = 0;
278 de_dbg(c, "PSFv2 header at %d", (int)pos);
279 de_dbg_indent(c, 1);
281 d->psf2_version = (u32)de_getu32le(pos+4);
282 de_dbg(c, "PSFv2 version number: %d", (int)d->psf2_version);
283 if(d->psf2_version!=0) {
284 de_warn(c, "Unknown PSFv2 version number: %d", (int)d->psf2_version);
287 d->headersize = de_getu32le(pos+8);
288 de_dbg(c, "header size: %d", (int)d->headersize);
290 d->flags = (u32)de_getu32le(pos+12);
291 de_dbg(c, "flags: 0x%08x", (unsigned int)d->flags);
292 de_dbg_indent(c, 1);
293 d->has_unicode_table = (d->flags & 0x01) ? 1 : 0;
294 de_dbg(c, "has Unicode table: %s", d->has_unicode_table?"yes":"no");
295 de_dbg_indent(c, -1);
297 d->num_glyphs = de_getu32le(pos+16);
298 de_dbg(c, "number of glyphs: %d", (int)d->num_glyphs);
300 d->bytes_per_glyph = de_getu32le(pos+20);
301 de_dbg(c, "bytes per glyph: %d", (int)d->bytes_per_glyph);
303 d->glyph_height = de_getu32le(pos+24);
304 d->glyph_width = de_getu32le(pos+28);
305 de_dbg(c, "glyph dimensions: %d"DE_CHAR_TIMES"%d", (int)d->glyph_width, (int)d->glyph_height);
307 de_dbg_indent(c, -1);
310 static void de_run_psf(deark *c, de_module_params *mparams)
312 lctx *d = NULL;
313 u8 b;
314 const char *s;
316 d = de_malloc(c, sizeof(lctx));
318 s = de_get_ext_option(c, "font:noaliases");
319 if(s)
320 d->read_extra_codepoints = 0;
321 else
322 d->read_extra_codepoints = 1;
324 b = de_getbyte(0);
325 if(b==0x36) {
326 d->version=1;
328 else if(b==0x72) {
329 d->version=2;
331 else {
332 de_err(c, "Not a PSF file");
333 goto done;
336 de_dbg(c, "PSF version: %d", (int)d->version);
338 if(d->version==2)
339 do_psf2_header(c, d);
340 else
341 do_psf1_header(c, d);
343 d->font_data_size = d->bytes_per_glyph * d->num_glyphs;
344 if(d->has_unicode_table) {
345 d->unicode_table_pos = d->headersize + d->font_data_size;
346 if(d->unicode_table_pos >= c->infile->len) {
347 d->has_unicode_table = 0;
351 if((d->headersize+d->font_data_size > c->infile->len) ||
352 d->bytes_per_glyph<1 ||
353 d->glyph_width<1 || d->glyph_width>256 ||
354 d->glyph_height<1 || d->glyph_height>256 ||
355 d->num_glyphs<1 || d->num_glyphs>2000000)
357 de_err(c, "Invalid or unsupported PSF file");
358 goto done;
361 do_glyphs(c, d);
363 done:
364 de_free(c, d);
367 static int de_identify_psf(deark *c)
369 if(!dbuf_memcmp(c->infile, 0, "\x72\xb5\x4a\x86", 4))
370 return 100;
371 if(!dbuf_memcmp(c->infile, 0, "\x36\x04", 2)) {
372 // TODO: Better PSFv1 detection.
373 return 65;
375 return 0;
378 static void de_help_psf(deark *c)
380 de_msg(c, "-opt font:noaliases : Restrict to one codepoint per glyph");
383 void de_module_psf(deark *c, struct deark_module_info *mi)
385 mi->id = "psf";
386 mi->desc = "PC Screen Font";
387 mi->run_fn = de_run_psf;
388 mi->identify_fn = de_identify_psf;
389 mi->help_fn = de_help_psf;