New "ea_data" module
[deark.git] / modules / officeart.c
blob343caed9363bd91bba07fcc7fd7293a570d47d7e
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Office Art / Office Drawing / "Escher" / "Blip"
6 // Refer to Microsoft's "[MS-ODRAW]" document.
7 // Found in some PowerPoint and Publisher files.
9 #include <deark-config.h>
10 #include <deark-private.h>
11 #include <deark-fmtutil.h>
12 DE_DECLARE_MODULE(de_module_officeart);
14 struct officeart_rectype {
15 u16 rectype;
16 u16 flags;
17 const char *name;
18 void *reserved;
21 static const struct officeart_rectype officeart_rectype_arr[] = {
22 { 0xf000, 0, "DggContainer", NULL },
23 { 0xf001, 0, "BStoreContainer", NULL },
24 { 0xf006, 0, "FDGGBlock", NULL },
25 { 0xf007, 0, "FBSE", NULL },
26 { 0xf00b, 0, "FOPT", NULL },
27 { 0xf01a, 0, "BlipEMF", NULL },
28 { 0xf01b, 0, "BlipWMF", NULL },
29 { 0xf01c, 0, "BlipPICT", NULL },
30 { 0xf01d, 0, "BlipJPEG", NULL },
31 { 0xf01e, 0, "BlipPNG", NULL },
32 { 0xf01f, 0, "BlipDIB", NULL },
33 { 0xf029, 0, "BlipTIFF", NULL },
34 { 0xf02a, 0, "BlipJPEG", NULL },
35 { 0xf11a, 0, "ColorMRUContainer", NULL },
36 { 0xf11e, 0, "SplitMenuColorContainer", NULL },
37 { 0xf122, 0, "TertiaryFOPT", NULL }
40 static const char *get_officeart_rectype_name(unsigned int t)
42 size_t k;
44 for(k=0; k<DE_ARRAYCOUNT(officeart_rectype_arr); k++) {
45 if((unsigned int)officeart_rectype_arr[k].rectype == t) {
46 return officeart_rectype_arr[k].name;
49 return "?";
52 struct officeartctx {
53 #define OACTX_STACKSIZE 10
54 i64 container_end_stack[OACTX_STACKSIZE];
55 size_t container_end_stackptr;
57 // Passed to do_OfficeArtStream_record():
58 i64 record_pos;
60 // Returned from do_OfficeArtStream_record():
61 i64 record_bytes_consumed;
62 int is_container;
63 i64 container_endpos; // valid if (is_container)
66 static int do_OfficeArtStream_record(deark *c, struct officeartctx *oactx,
67 dbuf *inf)
69 unsigned int rectype;
70 unsigned int recinstance;
71 unsigned int recver;
72 unsigned int n;
73 i64 reclen;
74 i64 extra_bytes = 0;
75 dbuf *outf = NULL;
76 const char *ext = "bin";
77 int has_metafileHeader = 0;
78 int has_zlib_cmpr = 0;
79 int is_dib = 0;
80 int is_pict = 0;
81 int retval = 0;
82 int is_blip = 0;
83 int saved_indent_level;
84 i64 pos1 = oactx->record_pos;
85 i64 pos = pos1;
87 oactx->record_bytes_consumed = 0;
88 oactx->is_container = 0;
89 oactx->container_endpos = 0;
91 de_dbg_indent_save(c, &saved_indent_level);
93 n = (unsigned int)dbuf_getu16le_p(inf, &pos);
94 recver = n&0x0f;
95 if(recver==0x0f) oactx->is_container = 1;
96 recinstance = n>>4;
98 rectype = (unsigned int)dbuf_getu16le_p(inf, &pos);
99 if((rectype&0xf000)!=0xf000) {
100 // Assume this is the end of data, not necessarily an error.
101 goto done;
104 reclen = dbuf_getu32le_p(inf, &pos);
106 de_dbg(c, "record at [%"I64_FMT"], ver=0x%x, inst=0x%03x, type=0x%04x (%s), dlen=%"I64_FMT,
107 pos1, recver, recinstance,
108 rectype, get_officeart_rectype_name(rectype), reclen);
109 de_dbg_indent(c, 1);
111 if(pos + reclen > inf->len) goto done;
112 if(oactx->is_container) {
113 // A container is described as *being* its header record. It does have
114 // a recLen, but it should be safe to ignore it if all we care about is
115 // reading the records at a low level.
116 oactx->record_bytes_consumed = 8;
117 oactx->container_endpos = oactx->record_pos + 8 + reclen;
119 else {
120 oactx->record_bytes_consumed = (pos-pos1) + reclen;
122 retval = 1;
124 if(rectype>=0xf018 && rectype<=0xf117) is_blip = 1;
125 if(!is_blip) goto done;
127 if(rectype==0xf01a) {
128 ext = "emf";
129 if(recinstance==0x3d4) extra_bytes=50;
130 else if(recinstance==0x3d5) extra_bytes=66;
131 if(extra_bytes) has_metafileHeader=1;
133 else if(rectype==0xf01b) {
134 ext = "wmf";
135 if(recinstance==0x216) extra_bytes=50;
136 else if(recinstance==0x217) extra_bytes=66;
137 if(extra_bytes) has_metafileHeader=1;
139 else if(rectype==0xf01c) {
140 ext = "pict";
141 if(recinstance==0x542) extra_bytes=50;
142 else if(recinstance==0x543) extra_bytes=66;
143 if(extra_bytes) has_metafileHeader=1;
144 is_pict = 1;
146 else if(rectype==0xf01d) {
147 ext = "jpg";
148 if(recinstance==0x46a || recinstance==0x6e2) extra_bytes = 17;
149 else if(recinstance==0x46b || recinstance==0x6e3) extra_bytes = 33;
151 else if(rectype==0xf01e) {
152 ext = "png";
153 if(recinstance==0x6e0) extra_bytes = 17;
154 else if(recinstance==0x6e1) extra_bytes = 33;
156 else if(rectype==0xf01f) {
157 ext = "dib";
158 if(recinstance==0x7a8) extra_bytes = 17;
159 else if(recinstance==0x7a9) extra_bytes = 33;
160 if(extra_bytes) is_dib=1;
162 else if(rectype==0xf029) {
163 ext = "tif";
164 if(recinstance==0x6e4) extra_bytes = 17;
165 else if(recinstance==0x6e5) extra_bytes = 33;
168 if(extra_bytes==0) {
169 de_warn(c, "Unsupported OfficeArtBlip format (recInstance=0x%03x, recType=0x%04x)",
170 recinstance, rectype);
171 goto done;
174 if(has_metafileHeader) {
175 // metafileHeader starts at pos+extra_bytes-34
176 u8 cmpr = dbuf_getbyte(inf, pos+extra_bytes-2);
177 // 0=DEFLATE, 0xfe=NONE
178 de_dbg(c, "compression type: %u", (unsigned int)cmpr);
179 has_zlib_cmpr = (cmpr==0);
182 pos += extra_bytes;
184 if(is_dib) {
185 de_run_module_by_id_on_slice2(c, "dib", "X", inf, pos, reclen-extra_bytes);
186 goto done;
189 outf = dbuf_create_output_file(c, ext, NULL, DE_CREATEFLAG_IS_AUX);
190 if(is_pict) {
191 dbuf_write_zeroes(outf, 512);
194 if(has_zlib_cmpr) {
195 i64 cmprlen;
197 cmprlen = reclen-extra_bytes;
198 fmtutil_decompress_deflate(inf, pos, cmprlen, outf, 0, NULL, DE_DEFLATEFLAG_ISZLIB);
199 de_dbg(c, "decompressed %"I64_FMT" to %"I64_FMT" bytes", cmprlen, outf->len);
201 else {
202 dbuf_copy(inf, pos, reclen-extra_bytes, outf);
205 done:
206 de_dbg_indent_restore(c, saved_indent_level);
207 dbuf_close(outf);
208 return retval;
211 static void de_run_officeart(deark *c, de_module_params *mparams)
213 struct officeartctx * oactx = NULL;
214 dbuf *inf = c->infile;
215 int saved_indent_level;
217 de_dbg_indent_save(c, &saved_indent_level);
218 oactx = de_malloc(c, sizeof(struct officeartctx));
220 oactx->record_pos = 0;
221 while(1) {
222 i64 ret;
224 if(oactx->record_pos >= inf->len-8) break;
226 // Have we reached the end of any containers?
227 while(oactx->container_end_stackptr>0 &&
228 oactx->record_pos>=oactx->container_end_stack[oactx->container_end_stackptr-1])
230 oactx->container_end_stackptr--;
231 de_dbg_indent(c, -1);
234 ret = do_OfficeArtStream_record(c, oactx, inf);
235 if(!ret || oactx->record_bytes_consumed<=0) break;
237 oactx->record_pos += oactx->record_bytes_consumed;
239 // Is a new container open?
240 if(oactx->is_container && oactx->container_end_stackptr<OACTX_STACKSIZE) {
241 oactx->container_end_stack[oactx->container_end_stackptr++] = oactx->container_endpos;
242 de_dbg_indent(c, 1);
246 de_free(c, oactx);
247 de_dbg_indent_restore(c, saved_indent_level);
250 void de_module_officeart(deark *c, struct deark_module_info *mi)
252 mi->id = "officeart";
253 mi->desc = "Office Art data";
254 mi->run_fn = de_run_officeart;
255 mi->flags |= DE_MODFLAG_HIDDEN;