riff: Basic support for extracting RDIB images
[deark.git] / modules / ole1.c
blobd6bc2115661d372c50ff64e4e2c4941625b4161e
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // OLE1.0 objects
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_ole1);
11 typedef struct localctx_struct {
12 int input_encoding;
13 int extract_all;
14 } lctx;
16 static const char *get_FormatID_name(unsigned int t)
18 const char *name;
19 switch(t) {
20 case 0: name="none"; break;
21 case 1: name="linked"; break;
22 case 2: name="embedded"; break;
23 case 3: name="static"; break;
24 case 5: name="presentation"; break;
25 default: name="?"; break;
27 return name;
30 static void do_static_bitmap(deark *c, lctx *d, i64 pos1)
32 i64 dlen;
33 i64 pos = pos1;
35 pos += 8; // ??
36 dlen = de_getu32le_p(&pos);
37 de_dbg(c, "bitmap size: %d", (int)dlen);
39 de_dbg(c, "BITMAP16 at %"I64_FMT, pos);
40 de_dbg_indent(c, 1);
41 de_run_module_by_id_on_slice2(c, "ddb", "N", c->infile, pos,
42 c->infile->len-pos);
43 de_dbg_indent(c, -1);
46 // Presentation object, or WRI-static-"OLE" object.
47 // pos1 points to the first field after FormatID (classname/typename)
48 static int do_ole_object_presentation(deark *c, lctx *d,
49 i64 pos1, i64 len, unsigned int formatID, i64 *bytes_consumed)
51 i64 pos = pos1;
52 i64 stringlen;
53 struct de_stringreaderdata *classname_srd = NULL;
54 struct de_stringreaderdata *clipfmtname_srd = NULL;
55 const char *name;
56 int retval = 0;
58 name = (formatID==3)?"static":"presentation";
59 stringlen = de_getu32le_p(&pos);
60 classname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
61 d->input_encoding);
62 de_dbg(c, "%s ClassName: \"%s\"", name, ucstring_getpsz(classname_srd->str));
63 pos += stringlen;
65 // TODO: Better handle the fields between ClassName and PresentationData
66 // (and maybe after PresentationData?).
68 if(!de_strcmp(classname_srd->sz, "DIB")) {
69 pos += 12;
70 de_dbg_indent(c, 1);
71 de_run_module_by_id_on_slice(c, "dib", NULL, c->infile, pos,
72 pos1+len-pos);
73 de_dbg_indent(c, -1);
74 goto done; // FIXME, calculate length
76 else if(!de_strcmp(classname_srd->sz, "METAFILEPICT")) {
77 i64 dlen;
78 pos += 8; // ??
79 dlen = de_getu32le_p(&pos);
80 de_dbg(c, "metafile size: %d", (int)dlen); // Includes "mfp", apparently
81 pos += 8; // "mfp" struct
82 dbuf_create_file_from_slice(c->infile, pos, dlen-8, "wmf", NULL, 0);
83 pos += dlen-8;
85 else if(!de_strcmp(classname_srd->sz, "BITMAP")) {
86 do_static_bitmap(c, d, pos);
87 goto done; // FIXME, calculate length
89 else {
90 u32 clipfmt;
91 i64 clp_data_size;
92 u8 buf[16];
94 // This is a GenericPresentationObject, a.k.a. clipboard format,
95 // either a StandardClipboardFormatPresentationObject
96 // or a RegisteredClipboardFormatPresentationObject.
97 clipfmt = (u32)de_getu32le_p(&pos);
98 de_dbg(c, "clipboard fmt: %u", (unsigned int)clipfmt);
100 if(clipfmt==0) {
101 stringlen = de_getu32le_p(&pos);
102 clipfmtname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
103 d->input_encoding);
104 de_dbg(c, "clipboard fmt name: \"%s\"", ucstring_getpsz(clipfmtname_srd->str));
105 pos += stringlen;
108 clp_data_size = de_getu32le_p(&pos);
109 de_dbg(c, "clipboard data size: %"I64_FMT, clp_data_size);
111 de_read(buf, pos, de_min_int((i64)sizeof(buf), clp_data_size));
113 if(clipfmtname_srd) {
114 if(!de_strcmp(classname_srd->sz, "PBrush") &&
115 buf[0]=='B' && buf[1]=='M')
117 dbuf_create_file_from_slice(c->infile, pos, clp_data_size, "bmp", NULL, 0);
119 else {
120 de_warn(c, "OLE clipboard type (\"%s\"/\"%s\") is not supported",
121 ucstring_getpsz(classname_srd->str),
122 ucstring_getpsz(clipfmtname_srd->str));
125 else {
126 de_warn(c, "OLE clipboard type %u is not supported", (unsigned int)clipfmt);
129 pos += clp_data_size;
132 *bytes_consumed = pos-pos1;
133 retval = 1;
135 done:
136 de_destroy_stringreaderdata(c, classname_srd);
137 de_destroy_stringreaderdata(c, clipfmtname_srd);
138 return retval;
141 // Note: This function is based on reverse engineering, and may not be correct.
142 static int do_ole_package(deark *c, lctx *d, i64 pos1, i64 len)
144 i64 endpos = pos1+len;
145 i64 pos = pos1;
146 struct de_stringreaderdata *caption = NULL;
147 struct de_stringreaderdata *iconsrc = NULL;
148 de_ucstring *filename = NULL;
149 de_finfo *fi = NULL;
150 unsigned int type_code1, type_code2;
151 i64 n, fnlen, fsize;
152 int saved_indent_level;
153 int retval = 0;
155 de_dbg_indent_save(c, &saved_indent_level);
157 de_dbg(c, "package at %"I64_FMT", len=%"I64_FMT, pos, len);
158 de_dbg_indent(c, 1);
159 type_code1 = (unsigned int)de_getu16le_p(&pos);
160 de_dbg(c, "stream header code: %u", type_code1);
161 if(type_code1 != 2) {
162 de_dbg(c, "[unknown package format]");
163 goto done;
166 caption = dbuf_read_string(c->infile, pos, de_min_int(256, endpos-pos), 256,
167 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
168 if(!caption->found_nul) goto done;
169 de_dbg(c, "caption: \"%s\"", ucstring_getpsz_d(caption->str));
170 pos += caption->bytes_consumed;
172 iconsrc = dbuf_read_string(c->infile, pos, de_min_int(256, endpos-pos), 256,
173 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
174 if(!iconsrc->found_nul) goto done;
175 de_dbg(c, "icon source: \"%s\"", ucstring_getpsz_d(iconsrc->str));
176 pos += iconsrc->bytes_consumed;
178 n = de_getu16le_p(&pos);
179 de_dbg(c, "icon #: %d", (int)n);
181 type_code2 = (unsigned int)de_getu16le_p(&pos);
182 de_dbg(c, "package type: %u", type_code2);
184 if(type_code2!=3) {
185 // Code 1 apparently means "run a program".
186 de_dbg(c, "[not an embedded file]");
187 goto done;
190 // A package can contain an arbitrary embedded file, which we'll try to
191 // extract.
193 fnlen = de_getu32le_p(&pos);
194 if(pos+fnlen > endpos) goto done;
195 filename = ucstring_create(c);
196 dbuf_read_to_ucstring_n(c->infile, pos, fnlen, 256, filename, DE_CONVFLAG_STOP_AT_NUL,
197 d->input_encoding);
198 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(filename));
199 pos += fnlen;
201 fsize = de_getu32le_p(&pos);
202 de_dbg(c, "file size: %"I64_FMT, fsize);
203 if(pos+fsize > endpos) goto done;
205 fi = de_finfo_create(c);
206 de_finfo_set_name_from_ucstring(c, fi, filename, 0);
207 dbuf_create_file_from_slice(c->infile, pos, fsize, NULL, fi, 0);
208 retval = 1;
210 done:
211 de_destroy_stringreaderdata(c, caption);
212 de_destroy_stringreaderdata(c, iconsrc);
213 ucstring_destroy(filename);
214 de_finfo_destroy(c, fi);
215 de_dbg_indent_restore(c, saved_indent_level);
216 return retval;
219 static void extract_unknown_ole_obj(deark *c, lctx *d, i64 pos, i64 len,
220 struct de_stringreaderdata *classname_srd)
222 de_finfo *fi = NULL;
223 de_ucstring *s = NULL;
225 fi = de_finfo_create(c);
226 s = ucstring_create(c);
228 ucstring_append_sz(s, "oleobj", DE_ENCODING_LATIN1);
229 if(ucstring_isnonempty(classname_srd->str)) {
230 ucstring_append_sz(s, ".", DE_ENCODING_LATIN1);
231 ucstring_append_ucstring(s, classname_srd->str);
234 de_finfo_set_name_from_ucstring(c, fi, s, 0);
236 dbuf_create_file_from_slice(c->infile, pos, len, "bin", fi, 0);
238 ucstring_destroy(s);
239 de_finfo_destroy(c, fi);
242 static int do_ole_object(deark *c, lctx *d, i64 pos1, i64 len, int exact_size_known,
243 int is_presentation, i64 *bytes_consumed);
245 static int do_ole_object_linked(deark *c, lctx *d,
246 i64 pos1, i64 len, int exact_size_known, i64 *bytes_consumed)
248 i64 pos = pos1;
249 i64 stringlen;
250 i64 bytes_consumed2 = 0;
251 int ret;
252 struct de_stringreaderdata *classname_srd = NULL;
253 struct de_stringreaderdata *topicname_srd = NULL;
254 struct de_stringreaderdata *itemname_srd = NULL;
255 struct de_stringreaderdata *networkname_srd = NULL;
256 int retval = 0;
258 stringlen = de_getu32le_p(&pos);
259 classname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
260 d->input_encoding);
261 de_dbg(c, "embedded ClassName: \"%s\"", ucstring_getpsz(classname_srd->str));
262 pos += stringlen;
264 stringlen = de_getu32le_p(&pos);
265 topicname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
266 d->input_encoding);
267 de_dbg(c, "TopicName/filename: \"%s\"", ucstring_getpsz(topicname_srd->str));
268 pos += stringlen;
270 stringlen = de_getu32le_p(&pos);
271 itemname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
272 d->input_encoding);
273 de_dbg(c, "ItemName/params: \"%s\"", ucstring_getpsz(itemname_srd->str));
274 pos += stringlen;
276 stringlen = de_getu32le_p(&pos);
277 networkname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
278 d->input_encoding);
279 de_dbg(c, "NetworkName: \"%s\"", ucstring_getpsz(networkname_srd->str));
280 pos += stringlen;
282 pos += 4; // reserved
283 pos += 4; // LinkUpdateOption
285 // Nested "presentation" object
286 ret = do_ole_object(c, d, pos, pos1+len-pos, exact_size_known, 1,
287 &bytes_consumed2);
288 if(!ret) goto done;
289 pos += bytes_consumed2;
291 *bytes_consumed = pos-pos1;
292 retval = 1;
294 done:
295 de_destroy_stringreaderdata(c, classname_srd);
296 de_destroy_stringreaderdata(c, topicname_srd);
297 de_destroy_stringreaderdata(c, itemname_srd);
298 de_destroy_stringreaderdata(c, networkname_srd);
299 return retval;
302 // pos1 points to the first field after FormatID (classname/typename)
303 static int do_ole_object_embedded(deark *c, lctx *d,
304 i64 pos1, i64 len, int exact_size_known, i64 *bytes_consumed)
306 i64 pos = pos1;
307 i64 stringlen;
308 i64 data_len;
309 i64 bytes_consumed2 = 0;
310 int ret;
311 int recognized = 0;
312 const char *ext = NULL;
313 int handled = 0;
314 u8 buf[16];
315 struct de_stringreaderdata *classname_srd = NULL;
316 struct de_stringreaderdata *topicname_srd = NULL;
317 struct de_stringreaderdata *itemname_srd = NULL;
318 int retval = 0;
320 // TODO: This code (for the next 3 fields) is duplicated in the function for
321 // "linked" objects.
323 stringlen = de_getu32le_p(&pos);
324 classname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
325 d->input_encoding);
326 de_dbg(c, "embedded ClassName: \"%s\"", ucstring_getpsz(classname_srd->str));
327 pos += stringlen;
329 stringlen = de_getu32le_p(&pos);
330 topicname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
331 d->input_encoding);
332 de_dbg(c, "TopicName/filename: \"%s\"", ucstring_getpsz(topicname_srd->str));
333 pos += stringlen;
335 stringlen = de_getu32le_p(&pos);
336 itemname_srd = dbuf_read_string(c->infile, pos, stringlen, 260, DE_CONVFLAG_STOP_AT_NUL,
337 d->input_encoding);
338 de_dbg(c, "ItemName/params: \"%s\"", ucstring_getpsz(itemname_srd->str));
339 pos += stringlen;
341 data_len = de_getu32le_p(&pos);
342 de_dbg(c, "NativeData: pos=%"I64_FMT", len=%"I64_FMT, pos, data_len);
344 // TODO: I don't know the extent to which it's better to sniff the data, or
345 // rely on the typename.
346 de_read(buf, pos, sizeof(buf));
348 if(!de_strcmp(classname_srd->sz, "Package")) {
349 recognized = 1;
350 handled = do_ole_package(c, d, pos, data_len);
352 else if(!de_strncmp(classname_srd->sz, "Word.Document.", 14) ||
353 !de_strncmp(classname_srd->sz, "Word.Picture.", 13))
355 ext = "doc";
357 else if (!de_strncmp(classname_srd->sz, "Excel.Chart.", 12) ||
358 !de_strcmp(classname_srd->sz, "ExcelWorksheet"))
360 ext = "xls";
362 else if(!de_strcmp(classname_srd->sz, "CDraw") &&
363 !de_memcmp(&buf[0], (const void*)"RIFF", 4) &&
364 !de_memcmp(&buf[8], (const void*)"CDR", 3) )
366 ext = "cdr"; // Looks like CorelDRAW
368 else if (!de_strcmp(classname_srd->sz, "PaintShopPro") &&
369 !de_memcmp(&buf[0], (const void*)"\x28\0\0\0", 4))
371 de_run_module_by_id_on_slice(c, "dib", NULL, c->infile, pos, data_len);
372 handled = 1;
374 if(!de_strcmp(classname_srd->sz, "ShapewareVISIO20")) {
375 ext = "vsd";
377 else if(buf[0]=='B' && buf[1]=='M') {
378 // TODO: Detect true length of data?
379 // TODO: This detection may be too aggressive.
380 ext = "bmp";
383 if(ext && !handled) {
384 dbuf_create_file_from_slice(c->infile, pos, data_len, ext, NULL, 0);
385 handled = 1;
388 if(!handled) {
389 if(d->extract_all) {
390 extract_unknown_ole_obj(c, d, pos, data_len, classname_srd);
392 else if(!recognized) {
393 de_warn(c, "Unknown/unsupported type of OLE object (\"%s\") at %"I64_FMT,
394 ucstring_getpsz(classname_srd->str), pos1);
398 pos += data_len;
399 // Nested "presentation" object
400 ret = do_ole_object(c, d, pos, pos1+len-pos, exact_size_known, 1,
401 &bytes_consumed2);
402 if(!ret) goto done;
403 pos += bytes_consumed2;
405 *bytes_consumed = pos-pos1;
406 retval = 1;
407 done:
408 de_destroy_stringreaderdata(c, classname_srd);
409 de_destroy_stringreaderdata(c, topicname_srd);
410 de_destroy_stringreaderdata(c, itemname_srd);
411 return retval;
414 static int do_ole_object(deark *c, lctx *d, i64 pos1, i64 len, int exact_size_known,
415 int is_presentation, i64 *bytes_consumed)
417 int saved_indent_level;
418 i64 pos = pos1;
419 i64 nbytesleft;
420 i64 bytes_consumed2;
421 int ret;
422 unsigned int n;
423 unsigned int formatID;
424 int retval = 0;
426 de_dbg_indent_save(c, &saved_indent_level);
427 if(len<8) goto done;
428 de_dbg(c, "OLE object at %"I64_FMT", len%s%"I64_FMT, pos1,
429 (exact_size_known?"=":DE_CHAR_LEQ), len);
430 de_dbg_indent(c, 1);
432 n = (unsigned int)de_getu32le_p(&pos);
433 de_dbg(c, "OLEVersion: 0x%08x", n);
435 formatID = (unsigned int)de_getu32le_p(&pos);
436 de_dbg(c, "FormatID: %u (%s)", formatID, get_FormatID_name(formatID));
438 nbytesleft = pos1+len-pos;
439 if(formatID==1 && !is_presentation) {
440 ret = do_ole_object_linked(c, d, pos, nbytesleft, exact_size_known, &bytes_consumed2);
441 if(!ret) goto done;
442 pos += bytes_consumed2;
444 else if(formatID==2 && !is_presentation) {
445 ret = do_ole_object_embedded(c, d, pos, nbytesleft, exact_size_known, &bytes_consumed2);
446 if(!ret) goto done;
447 pos += bytes_consumed2;
449 else if(formatID==3) {
450 ret = do_ole_object_presentation(c, d, pos, nbytesleft, formatID, &bytes_consumed2);
451 if(!ret) goto done;
452 pos += bytes_consumed2;
454 else if(formatID==5 && is_presentation) {
455 ret = do_ole_object_presentation(c, d, pos, nbytesleft, formatID, &bytes_consumed2);
456 if(!ret) goto done;
457 pos += bytes_consumed2;
459 else if(formatID==0 && is_presentation) {
462 else {
463 de_dbg(c, "[unsupported OLE FormatID]");
464 goto done;
467 *bytes_consumed = pos-pos1;
468 retval = 1;
470 done:
471 de_dbg_indent_restore(c, saved_indent_level);
472 return retval;
475 static void de_run_ole1(deark *c, de_module_params *mparams)
477 lctx *d = NULL;
478 i64 bytes_consumed = 0;
479 int ret;
480 int u_flag;
482 if(mparams) {
483 mparams->out_params.flags = 0;
486 d = de_malloc(c, sizeof(lctx));
488 d->input_encoding = de_get_input_encoding(c, mparams, DE_ENCODING_WINDOWS1252);
489 // Use the "U" code if the exact size of the object is unknown. This will
490 // improve the debug messages.
491 u_flag = de_havemodcode(c, mparams, 'U');
492 d->extract_all = de_get_ext_option_bool(c, "ole1:extractall",
493 ((c->extract_level>=2)?1:0));
495 ret = do_ole_object(c, d, 0, c->infile->len, (u_flag?0:1),
496 0, &bytes_consumed);
497 if(ret) {
498 if(mparams) {
499 mparams->out_params.flags |= 0x1;
500 mparams->out_params.int64_1 = bytes_consumed;
502 de_dbg3(c, "ole1: calculated size=%"I64_FMT, bytes_consumed);
504 else {
505 de_dbg3(c, "ole1: failed to calculate object size");
508 de_free(c, d);
511 void de_module_ole1(deark *c, struct deark_module_info *mi)
513 mi->id = "ole1";
514 mi->desc = "OLE1.0 objects";
515 mi->run_fn = de_run_ole1;
516 mi->identify_fn = NULL;
517 mi->flags |= DE_MODFLAG_HIDDEN;