bmp: Rewrote the RLE decompressor
[deark.git] / modules / emf.c
blob1af7a9e042a60a973234c5e3519683c57d07bfc9
1 // This file is part of Deark.
2 // Copyright (C) 2017 Jason Summers
3 // See the file COPYING for terms of use.
5 // Enhanced Metafile (EMF)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_emf);
12 #define CODE_EMFPLUS 0x2b464d45U
13 #define CODE_GDIC 0x43494447U
15 typedef struct localctx_struct {
16 int input_encoding;
17 int is_emfplus;
18 int emf_found_header;
19 i64 emf_version;
20 i64 emf_num_records;
21 } lctx;
23 struct decoder_params {
24 u32 rectype;
25 i64 recpos;
26 i64 recsize_bytes;
27 i64 dpos;
28 i64 dlen;
31 // Handler functions return 0 on fatal error, otherwise 1.
32 typedef int (*record_decoder_fn)(deark *c, lctx *d, struct decoder_params *dp);
34 struct emf_func_info {
35 u32 rectype;
36 const char *name;
37 record_decoder_fn fn;
40 struct emfplus_rec_info {
41 u32 rectype;
42 const char *name;
43 void *reserved1;
46 // Note: This is duplicated in wmf.c
47 static u32 colorref_to_color(u32 colorref)
49 u32 r,g,b;
50 r = DE_COLOR_B(colorref);
51 g = DE_COLOR_G(colorref);
52 b = DE_COLOR_R(colorref);
53 return DE_MAKE_RGB(r,g,b);
56 // Note: This is duplicated in wmf.c
57 static void do_dbg_colorref(deark *c, lctx *d, u32 colorref)
59 u32 clr;
60 char csamp[16];
62 clr = colorref_to_color(colorref);
63 de_get_colorsample_code(c, clr, csamp, sizeof(csamp));
64 de_dbg(c, "colorref: 0x%08x%s", (unsigned int)colorref, csamp);
67 static void ucstring_strip_trailing_NULs(de_ucstring *s)
69 while(s->len>=1 && s->str[s->len-1]==0x0000) {
70 ucstring_truncate(s, s->len-1);
74 // Header record
75 static int emf_handler_01(deark *c, lctx *d, struct decoder_params *dp)
77 i64 pos;
78 i64 file_size;
79 i64 handles;
80 i64 desc_len;
81 i64 desc_offs;
82 i64 num_pal_entries;
83 int retval = 0;
84 de_ucstring *desc = NULL;
86 if(d->emf_found_header) { retval = 1; goto done; }
87 d->emf_found_header = 1;
89 if(dp->recsize_bytes<88) {
90 de_err(c, "Invalid EMF header size (is %d, must be at least 88)", (int)dp->recsize_bytes);
91 goto done;
94 // 2.2.9 Header Object
95 pos = dp->recpos + 8;
96 d->emf_version = de_getu32le(pos+36);
97 de_dbg(c, "version: 0x%08x", (unsigned int)d->emf_version);
98 file_size = de_getu32le(pos+40);
99 de_dbg(c, "reported file size: %d", (int)file_size);
100 d->emf_num_records = de_getu32le(pos+44);
101 de_dbg(c, "number of records in file: %d", (int)d->emf_num_records);
102 handles = de_getu16le(pos+48);
103 de_dbg(c, "handles: %d", (int)handles);
104 desc_len = de_getu32le(pos+52);
105 desc_offs = de_getu32le(pos+56);
106 de_dbg(c, "description offset=%d, len=%d", (int)desc_offs, (int)desc_len);
107 num_pal_entries = de_getu32le(pos+60);
108 de_dbg(c, "num pal entries: %d", (int)num_pal_entries);
110 if((desc_len>0) && (desc_offs+desc_len*2 <= dp->recsize_bytes)) {
111 desc = ucstring_create(c);
112 dbuf_read_to_ucstring_n(c->infile, dp->recpos+desc_offs, desc_len*2, DE_DBG_MAX_STRLEN*2,
113 desc, 0, DE_ENCODING_UTF16LE);
114 ucstring_strip_trailing_NULs(desc);
115 de_dbg(c, "description: \"%s\"", ucstring_getpsz(desc));
118 retval = 1;
119 done:
120 ucstring_destroy(desc);
121 return retval;
124 static void do_identify_and_extract_compressed_bitmap(deark *c, lctx *d,
125 i64 pos, i64 len)
127 const char *ext = NULL;
128 u8 buf[4];
129 i64 nbytes_to_extract;
130 i64 foundpos;
132 if(len<=0) return;
133 if(pos+len > c->infile->len) return;
134 nbytes_to_extract = len; // default
136 // Having dived six layers of abstraction deep into EMF+ format,
137 // we finally come to an actual embedded image.
138 // And we *still* don't know what format it's in! We apparently have to
139 // sniff the data and make a guess.
141 de_dbg(c, "bitmap at %d, padded_len=%d", (int)pos, (int)len);
143 de_read(buf, pos, 4);
144 if(buf[0]==0x89 && buf[1]==0x50) {
145 ext = "png";
146 // The 'len' field includes 0 to 3 padding bytes, which we want to
147 // remove. All PNG files end with ae 42 60 82.
148 if(dbuf_search_byte(c->infile, '\x82', pos+len-4, 4, &foundpos)) {
149 nbytes_to_extract = foundpos + 1 - pos;
152 else if(buf[0]==0xff && buf[1]==0xd8) {
153 // TODO: Try to detect the true end of file.
154 ext = "jpg";
156 else if(buf[0]=='G' && buf[1]=='I') {
157 // TODO: Try to detect the true end of file.
158 ext = "gif";
160 else if((buf[0]=='I' && buf[1]=='I') || (buf[0]=='M' && buf[1]=='M')) {
161 ext = "tif";
163 else {
164 de_warn(c, "Unidentified bitmap format at %d", (int)pos);
165 return;
168 if(nbytes_to_extract<=0) return;
169 dbuf_create_file_from_slice(c->infile, pos, nbytes_to_extract, ext, NULL, 0);
172 // EmfPlusBitmap
173 static void do_emfplus_object_image_bitmap(deark *c, lctx *d, i64 pos, i64 len)
175 i64 w, h;
176 i64 ty;
177 i64 endpos;
178 const char *name;
180 if(len<=0) return;
181 endpos = pos + len;
183 w = de_getu32le(pos);
184 h = de_getu32le(pos+4);
185 de_dbg_dimensions(c, w, h);
187 // 8 stride
188 // 12 pixelformat
189 ty = de_getu32le(pos+16); // BitmapDataType
190 switch(ty) {
191 case 0: name="Pixel"; break;
192 case 1: name="Compressed"; break;
193 default: name="?"; break;
195 de_dbg(c, "type: %d (%s)", (int)ty, name);
197 if(ty==1) {
198 do_identify_and_extract_compressed_bitmap(c, d, pos+20, endpos - (pos+20));
202 // EmfPlusMetafile
203 static void do_emfplus_object_image_metafile(deark *c, lctx *d, i64 pos, i64 len)
205 i64 ty;
206 i64 dlen;
207 const char *name;
208 const char *ext = NULL;
210 if(len<8) return;
212 ty = de_getu32le(pos);
213 switch(ty) {
214 case 1: name="Wmf"; break;
215 case 2: name="WmfPlaceable"; break;
216 case 3: name="Emf"; break;
217 case 4: name="EmfPlusOnly"; break;
218 case 5: name="EmfPlusDual"; break;
219 default: name = "?";
221 de_dbg(c, "type: %d (%s)", (int)ty, name);
223 dlen = de_getu32le(pos+4);
224 de_dbg(c, "metafile data size: %d", (int)dlen);
226 if(dlen<1 || dlen>len-8) return;
228 if(ty==1 || ty==2) ext="wmf";
229 else if(ty==3 || ty==4 || ty==5) ext="emf";
230 else return;
232 dbuf_create_file_from_slice(c->infile, pos+8, dlen, ext, NULL, 0);
235 // EmfPlusImage
236 static void do_emfplus_object_image(deark *c, lctx *d, i64 pos1, i64 len)
238 i64 ver;
239 i64 datatype;
240 i64 pos = pos1;
241 const char *name;
243 ver = de_getu32le(pos);
244 datatype = de_getu32le(pos+4);
246 switch(datatype) { // ImageDataType
247 case 0: name="Unknown"; break;
248 case 1: name="Bitmap"; break; // EmfPlusBitmap
249 case 2: name="Metafile"; break; // EmfPlusMetafile
250 default: name="?"; break;
253 de_dbg(c, "Image osver=0x%08x, type=%d (%s)", (unsigned int)ver,
254 (int)datatype, name);
256 if(datatype==1) {
257 do_emfplus_object_image_bitmap(c, d, pos1+8, len-8);
259 else if(datatype==2) {
260 do_emfplus_object_image_metafile(c, d, pos1+8, len-8);
264 // 0x4008 EmfPlusObject
265 // pos is the beginning of the 'ObjectData' field
266 // len is the DataSize field.
267 static void do_emfplus_object(deark *c, lctx *d, i64 pos, i64 len,
268 u32 flags)
270 u32 object_id;
271 u32 object_type;
272 const char *name;
273 static const char *names[10] = { "Invalid", "Brush", "Pen", "Path",
274 "Region", "Image", "Font", "StringFormat", "ImageAttributes",
275 "CustomLineCap" };
277 object_type = (flags&0x7f00)>>8;
278 object_id = (flags&0x00ff);
280 if(object_type<=9)
281 name = names[object_type];
282 else
283 name = "?";
285 de_dbg(c, "EmfPlusObject type=%d (%s), id=%d", (int)object_type, name,
286 (int)object_id);
288 de_dbg_indent(c, 1);
289 if(object_type==5) {
290 do_emfplus_object_image(c, d, pos, len);
292 de_dbg_indent(c, -1);
295 // EMF+ Comment
296 static int emfplus_handler_4003(deark *c, lctx *d, i64 rectype, i64 pos, i64 len)
298 if(c->debug_level>=2) {
299 de_dbg_hexdump(c, c->infile, pos, len, 256, "comment", 0x1);
301 else {
302 de_dbg(c, "[%d comment bytes at %d]", (int)len, (int)pos);
304 return 1;
307 // EMF+ DrawString
308 static int emfplus_handler_401c(deark *c, lctx *d, i64 rectype, i64 pos1, i64 len)
310 i64 pos = pos1;
311 i64 nchars;
312 de_ucstring *s = NULL;
314 pos += 8; // brushid, formatid
315 nchars = de_getu32le(pos);
316 pos += 4;
317 pos += 16; // layoutrect
318 if(pos+nchars*2 > pos1+len) goto done;
319 s = ucstring_create(c);
320 dbuf_read_to_ucstring_n(c->infile, pos, nchars*2, DE_DBG_MAX_STRLEN*2,
321 s, 0, DE_ENCODING_UTF16LE);
322 de_dbg(c, "text: \"%s\"", ucstring_getpsz(s));
324 done:
325 ucstring_destroy(s);
326 return 1;
329 static const struct emfplus_rec_info emfplus_rec_info_arr[] = {
330 { 0x4001, "Header", NULL },
331 { 0x4002, "EndOfFile", NULL },
332 { 0x4003, "Comment", NULL },
333 { 0x4004, "GetDC", NULL },
334 { 0x4008, "Object", NULL },
335 { 0x4009, "Clear", NULL },
336 { 0x400a, "FillRects", NULL },
337 { 0x400b, "DrawRects", NULL },
338 { 0x400c, "FillPolygon", NULL },
339 { 0x400d, "DrawLines", NULL },
340 { 0x400e, "FillEllipse", NULL },
341 { 0x400f, "DrawEllipse", NULL },
342 { 0x4010, "FillPie", NULL },
343 { 0x4011, "DrawPie", NULL },
344 { 0x4012, "DrawArc", NULL },
345 { 0x4013, "FillRegion", NULL },
346 { 0x4014, "FillPath", NULL },
347 { 0x4015, "DrawPath", NULL },
348 { 0x4016, "FillClosedCurve", NULL },
349 { 0x4017, "DrawClosedCurve", NULL },
350 { 0x4018, "DrawCurve", NULL },
351 { 0x4019, "DrawBeziers", NULL },
352 { 0x401a, "DrawImage", NULL },
353 { 0x401b, "DrawImagePoints", NULL },
354 { 0x401c, "DrawString", NULL },
355 { 0x401e, "SetAntiAliasMode", NULL },
356 { 0x401f, "SetTextRenderingHint", NULL },
357 { 0x4020, "SetTextContrast", NULL },
358 { 0x4021, "SetInterpolationMode", NULL },
359 { 0x4022, "SetPixelOffsetMode", NULL },
360 { 0x4024, "SetCompositingQuality", NULL },
361 { 0x402a, "SetWorldTransform", NULL },
362 { 0x402b, "ResetWorldTransform", NULL },
363 { 0x402c, "MultiplyWorldTransform", NULL },
364 { 0x402d, "TranslateWorldTransform", NULL },
365 { 0x402f, "RotateWorldTransform", NULL },
366 { 0x4030, "SetPageTransform", NULL },
367 { 0x4031, "ResetClip", NULL },
368 { 0x4032, "SetClipRect", NULL },
369 { 0x4033, "SetClipPath", NULL },
370 { 0x4034, "SetClipRegion", NULL },
371 { 0x4035, "OffsetClip", NULL },
372 { 0x4038, "SerializableObject", NULL }
375 static void do_one_emfplus_record(deark *c, lctx *d, i64 pos, i64 len,
376 i64 *bytes_consumed, int *continuation_flag)
378 u32 rectype;
379 u32 flags;
380 i64 size, datasize;
381 i64 payload_pos;
382 const struct emfplus_rec_info *epinfo = NULL;
383 size_t k;
384 int is_continued = 0;
386 if(len<12) {
387 *bytes_consumed = len;
388 *continuation_flag = 0;
389 return;
392 rectype = (u32)de_getu16le(pos);
393 flags = (u32)de_getu16le(pos+2);
394 size = de_getu32le(pos+4);
396 is_continued = (rectype==0x4008) && (flags&0x8000);
398 // The documentation suggests that the datasize field is in a different
399 // place if the continuation flag is set. It also suggests the opposite
400 // (or maybe just that it's safe to behave as if it were in the same place).
401 // It doesn't really matter, since we don't support 'continued' records.
403 // I don't know why the datasize field is padded to the next multiple of 4.
404 // It seems clearly unnecessary, and counterproductive.
405 // There is already the 'size' field above, which is padded, so the
406 // only reason for the datasize field to exist at all would be if it
407 // told us the *non-padded* size. Yet it doesn't. So now I have to write
408 // code to try to detect where an embedded PNG or whatever file ends.
409 // (The existence of 'continued' records makes this issue more complicated,
410 // but they are already a special case, so that's no excuse.)
411 datasize = de_getu32le(pos+8);
412 payload_pos = pos+12;
414 // Find the name, etc. of this record type
415 for(k=0; k<DE_ARRAYCOUNT(emfplus_rec_info_arr); k++) {
416 if(emfplus_rec_info_arr[k].rectype == rectype) {
417 epinfo = &emfplus_rec_info_arr[k];
418 break;
422 de_dbg(c, "rectype 0x%04x (%s) at %d, flags=0x%04x, dpos=%d, dlen=%d",
423 (unsigned int)rectype, epinfo ? epinfo->name : "?",
424 (int)pos,
425 (unsigned int)flags,
426 (int)payload_pos, (int)datasize);
428 // If this record or the previous record had the continuation flag set,
429 // give up.
430 if(is_continued || *continuation_flag) {
431 goto done;
434 de_dbg_indent(c, 1);
435 // TODO: Use handler function via epinfo
436 if(rectype==0x4003) {
437 emfplus_handler_4003(c, d, rectype, payload_pos, datasize);
439 else if(rectype==0x4008) {
440 do_emfplus_object(c, d, payload_pos, datasize, flags);
442 else if(rectype==0x401c) {
443 emfplus_handler_401c(c, d, rectype, payload_pos, datasize);
445 de_dbg_indent(c, -1);
447 done:
448 if(size<12) size=12;
449 *bytes_consumed = size;
450 *continuation_flag = is_continued;
453 // Series of EMF+ records (from a single EMF comment)
454 static void do_comment_emfplus(deark *c, lctx *d, i64 pos1, i64 len)
456 i64 pos = pos1;
457 i64 bytes_consumed;
458 int continuation_flag = 0;
460 de_dbg(c, "EMF+ data at %d, len=%d", (int)pos1, (int)len);
461 de_dbg_indent(c, 1);
463 while(1) {
464 if(pos >= pos1+len) break;
465 do_one_emfplus_record(c, d, pos, pos1+len-pos, &bytes_consumed, &continuation_flag);
466 pos += bytes_consumed;
468 // EMFPlusRecords (one or more EMF+ records)
469 de_dbg_indent(c, -1);
472 // Series of EMF+ records (from a single EMF comment)
473 static void do_comment_public(deark *c, lctx *d, i64 pos1, i64 len)
475 u32 ty;
476 const char *name;
477 ty = (u32)de_getu32le(pos1);
478 switch(ty) {
479 case 0x80000001U: name = "WINDOWS_METAFILE"; break;
480 case 0x00000002U: name = "BEGINGROUP"; break;
481 case 0x00000003U: name = "ENDGROUP"; break;
482 case 0x40000004U: name = "MULTIFORMATS"; break;
483 case 0x00000040U: name = "UNICODE_STRING"; break;
484 case 0x00000080U: name = "UNICODE_END"; break;
485 default: name = "?";
487 de_dbg(c, "public comment record type: 0x%08x (%s)", (unsigned int)ty, name);
490 // Comment record
491 static int emf_handler_46(deark *c, lctx *d, struct decoder_params *dp)
493 const char *name;
494 i64 datasize;
495 int handled = 0;
496 enum cmtid_enum { CMTID_UNK, CMTID_EMFSPOOL, CMTID_EMFPLUS, CMTID_PUBLIC,
497 CMTID_INKSCAPESCREEN, CMTID_INKSCAPEDRAWING };
498 enum cmtid_enum cmtid;
500 if(dp->recsize_bytes<16) goto done;
502 // Datasize is measured from the beginning of the next field (CommentIdentifier).
503 datasize = de_getu32le(dp->recpos+8);
504 de_dbg(c, "datasize: %"I64_FMT, datasize);
505 if(12+datasize > dp->recsize_bytes) goto done;
507 cmtid = CMTID_UNK;
508 name="?";
510 if(datasize>=4) {
511 struct de_fourcc id4cc;
513 // The first 4 bytes of comment data might or might not be a signature.
514 // The spec expects these bytes to be read as a little-endian int, which is
515 // then interpreted as a FOURCC, most-significant byte first.
516 // The standard FOURCC codes are designed backwards, so that in the
517 // file they appear forward. E.g. the spec says a code is "+FME", but in
518 // the file the bytes "EMF+" appear in that order. Our messages respect the
519 // spec, though it looks strange.
520 dbuf_read_fourcc(c->infile, dp->recpos+12, &id4cc, 4, 0x1);
522 if(id4cc.id==0x00000000) {
523 cmtid = CMTID_EMFSPOOL;
524 name = "EMR_COMMENT_EMFSPOOL";
526 else if(id4cc.id==CODE_EMFPLUS) {
527 cmtid = CMTID_EMFPLUS;
528 name = "EMR_COMMENT_EMFPLUS";
530 else if(id4cc.id==CODE_GDIC) {
531 cmtid = CMTID_PUBLIC;
532 name = "EMR_COMMENT_PUBLIC";
535 de_dbg(c, "type: 0x%08x '%s' (%s)", (unsigned int)id4cc.id,
536 id4cc.id_dbgstr, name);
539 if(cmtid==CMTID_UNK) {
540 u8 buf[16];
542 // FOURCC not recognized; try other methods
543 de_read(buf, dp->recpos+12, 16);
544 if(datasize>=7 && !de_memcmp(buf, "Screen=", 7)) {
545 cmtid = CMTID_INKSCAPESCREEN;
546 name = "Inkscape canvas size";
548 else if(datasize>=8 && !de_memcmp(buf, "Drawing=", 8)) {
549 cmtid = CMTID_INKSCAPEDRAWING;
550 name = "Inkscape image size";
553 de_dbg(c, "identified as: %s", name);
556 if(cmtid==CMTID_EMFPLUS) {
557 do_comment_emfplus(c, d, dp->recpos+16, datasize-4);
558 handled = 1;
560 else if(cmtid==CMTID_PUBLIC) {
561 do_comment_public(c, d, dp->recpos+16, datasize-4);
562 handled = 1;
565 if(!handled) {
566 de_dbg_hexdump(c, c->infile, dp->recpos+12, datasize, 256, NULL, 0x1);
569 done:
570 return 1;
573 static void extract_dib(deark *c, lctx *d, i64 bmi_pos, i64 bmi_len,
574 i64 bits_pos, i64 bits_len)
576 struct de_bmpinfo bi;
577 dbuf *outf = NULL;
578 i64 real_height;
580 if(bmi_len<12 || bmi_len>2048) goto done;
581 if(bits_len<1 || bmi_len+bits_len>DE_MAX_SANE_OBJECT_SIZE) goto done;
583 if(!fmtutil_get_bmpinfo(c, c->infile, &bi, bmi_pos, bmi_len, 0)) {
584 de_warn(c, "Invalid bitmap");
585 goto done;
588 real_height = bi.height;
590 // Sometimes, only a portion of the image is present. In most cases, we
591 // can compensate for that.
592 if(bi.bitcount>0 && bi.rowspan>0) {
593 i64 nscanlines_present;
595 nscanlines_present = bits_len/bi.rowspan;
596 if(nscanlines_present>0 && nscanlines_present<bi.height && bi.infohdrsize>=16) {
597 real_height = nscanlines_present;
601 outf = dbuf_create_output_file(c, "bmp", NULL, 0);
603 fmtutil_generate_bmpfileheader(c, outf, &bi, 14 + bmi_len + bits_len);
605 if(real_height == bi.height) {
606 // Copy the BITMAPINFO (headers & palette)
607 dbuf_copy(c->infile, bmi_pos, bmi_len, outf);
609 else {
610 u8 *tmp_bmi;
612 // Make a copy of the BITMAPINFO data, for us to modify.
613 tmp_bmi = de_malloc(c, bmi_len);
614 de_read(tmp_bmi, bmi_pos, bmi_len);
616 de_writeu32le_direct(&tmp_bmi[8], real_height); // Correct the biHeight field
618 if(bmi_len>=24) {
619 // Correct (or set) the biSizeImage field
620 de_writeu32le_direct(&tmp_bmi[20], bits_len);
622 dbuf_write(outf, tmp_bmi, bmi_len);
623 de_free(c, tmp_bmi);
626 // Copy the bitmap bits
627 dbuf_copy(c->infile, bits_pos, bits_len, outf);
629 done:
630 dbuf_close(outf);
633 static const char *get_stock_obj_name(unsigned int n)
635 const char *names[20] = { "WHITE_BRUSH", "LTGRAY_BRUSH", "GRAY_BRUSH",
636 "DKGRAY_BRUSH", "BLACK_BRUSH", "NULL_BRUSH", "WHITE_PEN", "BLACK_PEN",
637 "NULL_PEN", NULL, "OEM_FIXED_FONT", "ANSI_FIXED_FONT", "ANSI_VAR_FONT",
638 "SYSTEM_FONT", "DEVICE_DEFAULT_FONT", "DEFAULT_PALETTE",
639 "SYSTEM_FIXED_FONT", "DEFAULT_GUI_FONT", "DC_BRUSH", "DC_PEN" };
640 const char *name = NULL;
642 if(n & 0x80000000U) {
643 unsigned int idx;
644 idx = n & 0x7fffffff;
645 if(idx<20) name = names[idx];
647 return name ? name : "?";
650 static void read_object_index_p(deark *c, lctx *d, i64 *ppos)
652 unsigned int n;
653 n = (unsigned int)de_getu32le_p(ppos);
654 if(n & 0x80000000U) {
655 // A stock object
656 de_dbg(c, "object index: 0x%08x (%s)", n, get_stock_obj_name(n));
658 else {
659 de_dbg(c, "object index: %u", n);
663 static void read_LogPen(deark *c, lctx *d, i64 pos)
665 unsigned int style;
666 i64 n;
667 u32 colorref;
669 style = (unsigned int)de_getu32le_p(&pos);
670 de_dbg(c, "style: 0x%08x", style);
672 n = de_geti32le_p(&pos); // <PointL>.x = pen width
673 de_dbg(c, "width: %d", (int)n);
675 pos += 4; // <PointL>.y = unused
677 colorref = (u32)de_getu32le_p(&pos);
678 do_dbg_colorref(c, d, colorref);
681 static int handler_CREATEPEN(deark *c, lctx *d, struct decoder_params *dp)
683 i64 pos = dp->dpos;
685 if(dp->dlen<20) return 1;
686 read_object_index_p(c, d, &pos);
687 read_LogPen(c, d, pos);
688 return 1;
691 static void read_LogBrushEx(deark *c, lctx *d, i64 pos)
693 unsigned int style;
694 u32 colorref;
696 style = (unsigned int)de_getu32le_p(&pos);
697 de_dbg(c, "style: 0x%08x", style);
699 colorref = (u32)de_getu32le_p(&pos);
700 do_dbg_colorref(c, d, colorref);
702 // TODO: BrushHatch
705 static int handler_CREATEBRUSHINDIRECT(deark *c, lctx *d, struct decoder_params *dp)
707 i64 pos = dp->dpos;
709 if(dp->dlen<16) return 1;
710 read_object_index_p(c, d, &pos);
711 read_LogBrushEx(c, d, pos);
712 return 1;
715 static int handler_colorref(deark *c, lctx *d, struct decoder_params *dp)
717 u32 colorref;
718 colorref = (u32)de_getu32le(dp->dpos);
719 do_dbg_colorref(c, d, colorref);
720 return 1;
723 // Can handle any record that is, or begins with, and object index.
724 static int handler_object_index(deark *c, lctx *d, struct decoder_params *dp)
726 i64 pos = dp->dpos;
728 if(dp->dlen<4) return 1;
729 read_object_index_p(c, d, &pos);
730 return 1;
733 // BITBLT
734 static int emf_handler_4c(deark *c, lctx *d, struct decoder_params *dp)
736 i64 rop;
737 i64 bmi_offs;
738 i64 bmi_len;
739 i64 bits_offs;
740 i64 bits_len;
742 if(dp->recsize_bytes<100) return 1;
744 rop = de_getu32le(dp->recpos+40);
745 de_dbg(c, "raster operation: 0x%08x", (unsigned int)rop);
747 bmi_offs = de_getu32le(dp->recpos+84);
748 bmi_len = de_getu32le(dp->recpos+88);
749 de_dbg(c, "bmi offset=%d, len=%d", (int)bmi_offs, (int)bmi_len);
750 bits_offs = de_getu32le(dp->recpos+92);
751 bits_len = de_getu32le(dp->recpos+96);
752 de_dbg(c, "bits offset=%d, len=%d", (int)bits_offs, (int)bits_len);
754 if(bmi_len<12) return 1;
755 if(bmi_offs<100) return 1;
756 if(bmi_offs+bmi_len>dp->recsize_bytes) return 1;
757 if(bits_len<1) return 1;
758 if(bits_offs<100) return 1;
759 if(bits_offs+bits_len>dp->recsize_bytes) return 1;
760 extract_dib(c, d, dp->recpos+bmi_offs, bmi_len, dp->recpos+bits_offs, bits_len);
761 return 1;
764 // 0x50 = SetDIBitsToDevice
765 // 0x51 = StretchDIBits
766 static int emf_handler_50_51(deark *c, lctx *d, struct decoder_params *dp)
768 i64 rop;
769 i64 bmi_offs;
770 i64 bmi_len;
771 i64 bits_offs;
772 i64 bits_len;
773 i64 fixed_header_len;
774 i64 num_scans;
776 if(dp->rectype==0x50)
777 fixed_header_len = 76;
778 else
779 fixed_header_len = 80;
781 if(dp->recsize_bytes<fixed_header_len) return 1;
783 bmi_offs = de_getu32le(dp->recpos+48);
784 bmi_len = de_getu32le(dp->recpos+52);
785 de_dbg(c, "bmi offset=%d, len=%d", (int)bmi_offs, (int)bmi_len);
786 bits_offs = de_getu32le(dp->recpos+56);
787 bits_len = de_getu32le(dp->recpos+60);
788 de_dbg(c, "bits offset=%d, len=%d", (int)bits_offs, (int)bits_len);
790 if(dp->rectype==0x51) {
791 rop = de_getu32le(dp->recpos+68);
792 de_dbg(c, "raster operation: 0x%08x", (unsigned int)rop);
795 if(dp->rectype==0x50) {
796 num_scans = de_getu32le(dp->recpos+72);
797 de_dbg(c, "number of scanlines: %d", (int)num_scans);
800 if(bmi_len<12) return 1;
801 if(bmi_offs<fixed_header_len) return 1;
802 if(bmi_offs+bmi_len>dp->recsize_bytes) return 1;
803 if(bits_len<1) return 1;
804 if(bits_offs<fixed_header_len) return 1;
805 if(bits_offs+bits_len>dp->recsize_bytes) return 1;
806 extract_dib(c, d, dp->recpos+bmi_offs, bmi_len, dp->recpos+bits_offs, bits_len);
808 return 1;
811 static void do_emf_xEmrText(deark *c, lctx *d, i64 recpos, i64 pos1, i64 len,
812 i64 bytesperchar, de_encoding encoding)
814 i64 pos = pos1;
815 i64 nchars;
816 i64 offstring;
817 de_ucstring *s = NULL;
819 pos += 8; // Reference
820 nchars = de_getu32le(pos);
821 pos += 4;
822 offstring = de_getu32le(pos);
823 if(recpos+offstring+nchars*bytesperchar > pos1+len) goto done;
824 s = ucstring_create(c);
825 dbuf_read_to_ucstring_n(c->infile, recpos+offstring, nchars*bytesperchar,
826 DE_DBG_MAX_STRLEN*bytesperchar, s, 0, encoding);
827 ucstring_strip_trailing_NUL(s);
828 de_dbg(c, "text: \"%s\"", ucstring_getpsz(s));
830 done:
831 ucstring_destroy(s);
834 static void do_emf_aEmrText(deark *c, lctx *d, i64 recpos, i64 pos1, i64 len)
836 do_emf_xEmrText(c, d, recpos, pos1, len, 1, d->input_encoding);
839 static void do_emf_wEmrText(deark *c, lctx *d, i64 recpos, i64 pos1, i64 len)
841 do_emf_xEmrText(c, d, recpos, pos1, len, 2, DE_ENCODING_UTF16LE);
844 // 0x53 = EMR_EXTTEXTOUTA
845 static int emf_handler_53(deark *c, lctx *d, struct decoder_params *dp)
847 i64 pos = dp->recpos;
849 pos += 8; // type, size
850 pos += 16; // bounds
851 pos += 12; // iGraphicsMode, exScale, eyScale
852 do_emf_aEmrText(c, d, dp->recpos, pos, dp->recpos+dp->recsize_bytes - pos);
853 return 1;
856 // 0x54 = EMR_EXTTEXTOUTW
857 static int emf_handler_54(deark *c, lctx *d, struct decoder_params *dp)
859 i64 pos = dp->recpos;
861 pos += 8; // type, size
862 pos += 16; // bounds
863 pos += 12; // iGraphicsMode, exScale, eyScale
864 do_emf_wEmrText(c, d, dp->recpos, pos, dp->recpos+dp->recsize_bytes - pos);
865 return 1;
868 static void do_LogFont(deark *c, lctx *d, struct decoder_params *dp, i64 pos1, i64 len)
870 de_ucstring *facename = NULL;
871 i64 pos = pos1;
872 i64 n, n2;
873 u8 b;
875 if(len<92) goto done;
877 n = de_geti32le_p(&pos);
878 n2 = de_geti32le_p(&pos);
879 de_dbg(c, "height,width: %d,%d", (int)n, (int)n2);
880 pos += 15;
881 b = de_getbyte_p(&pos);
882 de_dbg(c, "charset: 0x%02x (%s)", (unsigned int)b,
883 fmtutil_get_windows_charset_name(b));
885 pos += 4;
886 facename = ucstring_create(c);
887 dbuf_read_to_ucstring(c->infile, pos, 32*2, facename, 0, DE_ENCODING_UTF16LE);
888 ucstring_truncate_at_NUL(facename);
889 de_dbg(c, "facename: \"%s\"", ucstring_getpsz_d(facename));
891 done:
892 ucstring_destroy(facename);
895 static void do_LogFontEx(deark *c, lctx *d, struct decoder_params *dp, i64 pos1, i64 len)
897 do_LogFont(c, d, dp, pos1, len);
898 // TODO: FullName, Style, Script
902 static void do_LogFontExDv(deark *c, lctx *d, struct decoder_params *dp, i64 pos1, i64 len)
904 do_LogFontEx(c, d, dp, pos1, len);
905 // TODO: DesignVector
908 static int handler_EXTCREATEFONTINDIRECTW(deark *c, lctx *d, struct decoder_params *dp)
910 i64 pos = dp->dpos;
911 i64 elw_size;
913 read_object_index_p(c, d, &pos); // ihFonts
915 // "If the size of the elw field is equal to or less than the size of a
916 // LogFontPanose object, elw MUST be treated as a fixed-length LogFont object.
917 // [Else LogFontExDv.] The size of a LogFontPanose object is 320 decimal."
918 elw_size = dp->dlen - 4;
920 if(elw_size<=320) {
921 do_LogFont(c, d, dp, pos, elw_size);
923 else {
924 do_LogFontExDv(c, d, dp, pos, elw_size);
927 return 1;
930 static const struct emf_func_info emf_func_info_arr[] = {
931 { 0x01, "HEADER", emf_handler_01 },
932 { 0x02, "POLYBEZIER", NULL },
933 { 0x03, "POLYGON", NULL },
934 { 0x04, "POLYLINE", NULL },
935 { 0x05, "POLYBEZIERTO", NULL },
936 { 0x06, "POLYLINETO", NULL },
937 { 0x07, "POLYPOLYLINE", NULL },
938 { 0x08, "POLYPOLYGON", NULL },
939 { 0x09, "SETWINDOWEXTEX", NULL },
940 { 0x0a, "SETWINDOWORGEX", NULL },
941 { 0x0b, "SETVIEWPORTEXTEX", NULL },
942 { 0x0c, "SETVIEWPORTORGEX", NULL },
943 { 0x0d, "SETBRUSHORGEX", NULL },
944 { 0x0e, "EOF", NULL },
945 { 0x0f, "SETPIXELV", NULL },
946 { 0x10, "SETMAPPERFLAGS", NULL },
947 { 0x11, "SETMAPMODE", NULL },
948 { 0x12, "SETBKMODE", NULL },
949 { 0x13, "SETPOLYFILLMODE", NULL },
950 { 0x14, "SETROP2", NULL },
951 { 0x15, "SETSTRETCHBLTMODE", NULL },
952 { 0x16, "SETTEXTALIGN", NULL },
953 { 0x17, "SETCOLORADJUSTMENT", NULL },
954 { 0x18, "SETTEXTCOLOR", handler_colorref },
955 { 0x19, "SETBKCOLOR", handler_colorref },
956 { 0x1a, "OFFSETCLIPRGN", NULL },
957 { 0x1b, "MOVETOEX", NULL },
958 { 0x1c, "SETMETARGN", NULL },
959 { 0x1d, "EXCLUDECLIPRECT", NULL },
960 { 0x1e, "INTERSECTCLIPRECT", NULL },
961 { 0x1f, "SCALEVIEWPORTEXTEX", NULL },
962 { 0x20, "SCALEWINDOWEXTEX", NULL },
963 { 0x21, "SAVEDC", NULL },
964 { 0x22, "RESTOREDC", NULL },
965 { 0x23, "SETWORLDTRANSFORM", NULL },
966 { 0x24, "MODIFYWORLDTRANSFORM", NULL },
967 { 0x25, "SELECTOBJECT", handler_object_index },
968 { 0x26, "CREATEPEN", handler_CREATEPEN },
969 { 0x27, "CREATEBRUSHINDIRECT", handler_CREATEBRUSHINDIRECT },
970 { 0x28, "DELETEOBJECT", handler_object_index },
971 { 0x29, "ANGLEARC", NULL },
972 { 0x2a, "ELLIPSE", NULL },
973 { 0x2b, "RECTANGLE", NULL },
974 { 0x2c, "ROUNDRECT", NULL },
975 { 0x2d, "ARC", NULL },
976 { 0x2e, "CHORD", NULL },
977 { 0x2f, "PIE", NULL },
978 { 0x30, "SELECTPALETTE", handler_object_index },
979 { 0x31, "CREATEPALETTE", handler_object_index }, // TODO: A better handler
980 { 0x32, "SETPALETTEENTRIES", NULL },
981 { 0x33, "RESIZEPALETTE", NULL },
982 { 0x34, "REALIZEPALETTE", NULL },
983 { 0x35, "EXTFLOODFILL", NULL },
984 { 0x36, "LINETO", NULL },
985 { 0x37, "ARCTO", NULL },
986 { 0x38, "POLYDRAW", NULL },
987 { 0x39, "SETARCDIRECTION", NULL },
988 { 0x3a, "SETMITERLIMIT", NULL },
989 { 0x3b, "BEGINPATH", NULL },
990 { 0x3c, "ENDPATH", NULL },
991 { 0x3d, "CLOSEFIGURE", NULL },
992 { 0x3e, "FILLPATH", NULL },
993 { 0x3f, "STROKEANDFILLPATH", NULL },
994 { 0x40, "STROKEPATH", NULL },
995 { 0x41, "FLATTENPATH", NULL },
996 { 0x42, "WIDENPATH", NULL },
997 { 0x43, "SELECTCLIPPATH", NULL },
998 { 0x44, "ABORTPATH", NULL },
999 { 0x46, "COMMENT", emf_handler_46 },
1000 { 0x47, "FILLRGN", NULL },
1001 { 0x48, "FRAMERGN", NULL },
1002 { 0x49, "INVERTRGN", NULL },
1003 { 0x4a, "PAINTRGN", NULL },
1004 { 0x4b, "EXTSELECTCLIPRGN", NULL },
1005 { 0x4c, "BITBLT", emf_handler_4c },
1006 { 0x4d, "STRETCHBLT", NULL },
1007 { 0x4e, "MASKBLT", NULL },
1008 { 0x4f, "PLGBLT", NULL },
1009 { 0x50, "SETDIBITSTODEVICE", emf_handler_50_51 },
1010 { 0x51, "STRETCHDIBITS", emf_handler_50_51 },
1011 { 0x52, "EXTCREATEFONTINDIRECTW", handler_EXTCREATEFONTINDIRECTW },
1012 { 0x53, "EXTTEXTOUTA", emf_handler_53 },
1013 { 0x54, "EXTTEXTOUTW", emf_handler_54 },
1014 { 0x55, "POLYBEZIER16", NULL },
1015 { 0x56, "POLYGON16", NULL },
1016 { 0x57, "POLYLINE16", NULL },
1017 { 0x58, "POLYBEZIERTO16", NULL },
1018 { 0x59, "POLYLINETO16", NULL },
1019 { 0x5a, "POLYPOLYLINE16", NULL },
1020 { 0x5b, "POLYPOLYGON16", NULL },
1021 { 0x5c, "POLYDRAW16", NULL },
1022 { 0x5d, "CREATEMONOBRUSH", handler_object_index }, // TODO: A better handler
1023 { 0x5e, "CREATEDIBPATTERNBRUSHPT", handler_object_index }, // TODO: A better handler
1024 { 0x5f, "EXTCREATEPEN", handler_object_index }, // TODO: A better handler
1025 { 0x60, "POLYTEXTOUTA", NULL },
1026 { 0x61, "POLYTEXTOUTW", NULL },
1027 { 0x62, "SETICMMODE", NULL },
1028 { 0x63, "CREATECOLORSPACE", handler_object_index }, // TODO: A better handler
1029 { 0x64, "SETCOLORSPACE", handler_object_index },
1030 { 0x65, "DELETECOLORSPACE", handler_object_index },
1031 { 0x66, "GLSRECORD", NULL },
1032 { 0x67, "GLSBOUNDEDRECORD", NULL },
1033 { 0x68, "PIXELFORMAT", NULL },
1034 { 0x69, "DRAWESCAPE", NULL },
1035 { 0x6a, "EXTESCAPE", NULL },
1036 { 0x6c, "SMALLTEXTOUT", NULL },
1037 { 0x6d, "FORCEUFIMAPPING", NULL },
1038 { 0x6e, "NAMEDESCAPE", NULL },
1039 { 0x6f, "COLORCORRECTPALETTE", NULL },
1040 { 0x70, "SETICMPROFILEA", NULL },
1041 { 0x71, "SETICMPROFILEW", NULL },
1042 { 0x72, "ALPHABLEND", NULL },
1043 { 0x73, "SETLAYOUT", NULL },
1044 { 0x74, "TRANSPARENTBLT", NULL },
1045 { 0x76, "GRADIENTFILL", NULL },
1046 { 0x77, "SETLINKEDUFIS", NULL },
1047 { 0x78, "ETTEXTJUSTIFICATION", NULL },
1048 { 0x79, "COLORMATCHTOTARGETW", NULL },
1049 { 0x7a, "CREATECOLORSPACEW", handler_object_index } // TODO: A better handler
1052 static const struct emf_func_info *find_emf_func_info(u32 rectype)
1054 size_t i;
1056 for(i=0; i<DE_ARRAYCOUNT(emf_func_info_arr); i++) {
1057 if(emf_func_info_arr[i].rectype == rectype) {
1058 return &emf_func_info_arr[i];
1061 return NULL;
1064 static int do_emf_record(deark *c, lctx *d, i64 recnum, i64 recpos,
1065 i64 recsize_bytes)
1067 int ret;
1068 const struct emf_func_info *fnci;
1069 struct decoder_params dp;
1071 de_zeromem(&dp, sizeof(struct decoder_params));
1072 dp.recpos = recpos;
1073 dp.recsize_bytes = recsize_bytes;
1074 dp.dpos = recpos+8;
1075 dp.dlen = recsize_bytes-8;
1076 if(dp.dlen<0) dp.dlen=0;
1078 dp.rectype = (u32)de_getu32le(recpos);
1080 fnci = find_emf_func_info(dp.rectype);
1082 de_dbg(c, "record #%d at %d, type=0x%02x (%s), dpos=%"I64_FMT", dlen=%"I64_FMT,
1083 (int)recnum, (int)recpos, (unsigned int)dp.rectype,
1084 fnci ? fnci->name : "?", dp.dpos, dp.dlen);
1086 if(fnci && fnci->fn) {
1087 de_dbg_indent(c, 1);
1088 ret = fnci->fn(c, d, &dp);
1089 de_dbg_indent(c, -1);
1090 if(!ret) return 0;
1093 return (dp.rectype==0x0e) ? 0 : 1; // 0x0e = EOF record
1096 static void do_emf_record_list(deark *c, lctx *d)
1098 i64 pos = 0;
1099 i64 recpos;
1100 i64 recsize_bytes;
1101 i64 count = 0;
1103 // The entire EMF file is a sequence of records. The header record
1104 // (type 0x01) is expected to appear first.
1106 while(1) {
1107 recpos = pos;
1109 if(recpos+8 > c->infile->len) {
1110 de_err(c, "Unexpected end of file (no EOF record found)");
1111 goto done;
1114 recsize_bytes = de_getu32le(recpos+4);
1115 if(recpos+recsize_bytes > c->infile->len) {
1116 de_err(c, "Unexpected end of file in record %d", (int)count);
1117 goto done;
1119 if(recsize_bytes<8) {
1120 de_err(c, "Bad record size (%d) at %d", (int)recsize_bytes, (int)recpos);
1121 goto done;
1124 if(!do_emf_record(c, d, count, recpos, recsize_bytes)) {
1125 break;
1128 pos += recsize_bytes;
1129 count++;
1132 done:
1136 // Look ahead to figure out if this seem to be an EMF+ file.
1137 // Sets d->is_emfplus.
1138 static void detect_emfplus(deark *c, lctx *d)
1140 i64 nextpos;
1141 nextpos = de_getu32le(4);
1142 if(de_getu32le(nextpos)==0x46 &&
1143 de_getu32le(nextpos+12)==CODE_EMFPLUS)
1145 d->is_emfplus = 1;
1149 static void de_run_emf(deark *c, de_module_params *mparams)
1151 lctx *d = NULL;
1153 d = de_malloc(c, sizeof(lctx));
1155 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_WINDOWS1252);
1157 detect_emfplus(c, d);
1159 if(d->is_emfplus)
1160 de_declare_fmt(c, "EMF+");
1161 else
1162 de_declare_fmt(c, "EMF");
1164 do_emf_record_list(c, d);
1166 de_free(c, d);
1169 static int de_identify_emf(deark *c)
1171 if(!dbuf_memcmp(c->infile, 0, "\x01\x00\x00\x00", 4) &&
1172 !dbuf_memcmp(c->infile, 40, " EMF", 4))
1174 return 100;
1176 return 0;
1179 void de_module_emf(deark *c, struct deark_module_info *mi)
1181 mi->id = "emf";
1182 mi->desc = "Enhanced Windows Metafile";
1183 mi->desc2 = "extract bitmaps only";
1184 mi->run_fn = de_run_emf;
1185 mi->identify_fn = de_identify_emf;