1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_ole1
);
11 typedef struct localctx_struct
{
16 static const char *get_FormatID_name(unsigned int t
)
20 case 0: name
="none"; break;
21 case 1: name
="linked"; break;
22 case 2: name
="embedded"; break;
23 case 3: name
="static"; break;
24 case 5: name
="presentation"; break;
25 default: name
="?"; break;
30 static void do_static_bitmap(deark
*c
, lctx
*d
, i64 pos1
)
36 dlen
= de_getu32le_p(&pos
);
37 de_dbg(c
, "bitmap size: %d", (int)dlen
);
39 de_dbg(c
, "BITMAP16 at %"I64_FMT
, pos
);
41 de_run_module_by_id_on_slice2(c
, "ddb", "N", c
->infile
, pos
,
46 // Presentation object, or WRI-static-"OLE" object.
47 // pos1 points to the first field after FormatID (classname/typename)
48 static int do_ole_object_presentation(deark
*c
, lctx
*d
,
49 i64 pos1
, i64 len
, unsigned int formatID
, i64
*bytes_consumed
)
53 struct de_stringreaderdata
*classname_srd
= NULL
;
54 struct de_stringreaderdata
*clipfmtname_srd
= NULL
;
58 name
= (formatID
==3)?"static":"presentation";
59 stringlen
= de_getu32le_p(&pos
);
60 classname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
62 de_dbg(c
, "%s ClassName: \"%s\"", name
, ucstring_getpsz(classname_srd
->str
));
65 // TODO: Better handle the fields between ClassName and PresentationData
66 // (and maybe after PresentationData?).
68 if(!de_strcmp(classname_srd
->sz
, "DIB")) {
71 de_run_module_by_id_on_slice(c
, "dib", NULL
, c
->infile
, pos
,
74 goto done
; // FIXME, calculate length
76 else if(!de_strcmp(classname_srd
->sz
, "METAFILEPICT")) {
79 dlen
= de_getu32le_p(&pos
);
80 de_dbg(c
, "metafile size: %d", (int)dlen
); // Includes "mfp", apparently
81 pos
+= 8; // "mfp" struct
82 dbuf_create_file_from_slice(c
->infile
, pos
, dlen
-8, "wmf", NULL
, 0);
85 else if(!de_strcmp(classname_srd
->sz
, "BITMAP")) {
86 do_static_bitmap(c
, d
, pos
);
87 goto done
; // FIXME, calculate length
94 // This is a GenericPresentationObject, a.k.a. clipboard format,
95 // either a StandardClipboardFormatPresentationObject
96 // or a RegisteredClipboardFormatPresentationObject.
97 clipfmt
= (u32
)de_getu32le_p(&pos
);
98 de_dbg(c
, "clipboard fmt: %u", (unsigned int)clipfmt
);
101 stringlen
= de_getu32le_p(&pos
);
102 clipfmtname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
104 de_dbg(c
, "clipboard fmt name: \"%s\"", ucstring_getpsz(clipfmtname_srd
->str
));
108 clp_data_size
= de_getu32le_p(&pos
);
109 de_dbg(c
, "clipboard data size: %"I64_FMT
, clp_data_size
);
111 de_read(buf
, pos
, de_min_int((i64
)sizeof(buf
), clp_data_size
));
113 if(clipfmtname_srd
) {
114 if(!de_strcmp(classname_srd
->sz
, "PBrush") &&
115 buf
[0]=='B' && buf
[1]=='M')
117 dbuf_create_file_from_slice(c
->infile
, pos
, clp_data_size
, "bmp", NULL
, 0);
120 de_warn(c
, "OLE clipboard type (\"%s\"/\"%s\") is not supported",
121 ucstring_getpsz(classname_srd
->str
),
122 ucstring_getpsz(clipfmtname_srd
->str
));
126 de_warn(c
, "OLE clipboard type %u is not supported", (unsigned int)clipfmt
);
129 pos
+= clp_data_size
;
132 *bytes_consumed
= pos
-pos1
;
136 de_destroy_stringreaderdata(c
, classname_srd
);
137 de_destroy_stringreaderdata(c
, clipfmtname_srd
);
141 // Note: This function is based on reverse engineering, and may not be correct.
142 static int do_ole_package(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
144 i64 endpos
= pos1
+len
;
146 struct de_stringreaderdata
*caption
= NULL
;
147 struct de_stringreaderdata
*iconsrc
= NULL
;
148 de_ucstring
*filename
= NULL
;
150 unsigned int type_code1
, type_code2
;
152 int saved_indent_level
;
155 de_dbg_indent_save(c
, &saved_indent_level
);
157 de_dbg(c
, "package at %"I64_FMT
", len=%"I64_FMT
, pos
, len
);
159 type_code1
= (unsigned int)de_getu16le_p(&pos
);
160 de_dbg(c
, "stream header code: %u", type_code1
);
161 if(type_code1
!= 2) {
162 de_dbg(c
, "[unknown package format]");
166 caption
= dbuf_read_string(c
->infile
, pos
, de_min_int(256, endpos
-pos
), 256,
167 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
168 if(!caption
->found_nul
) goto done
;
169 de_dbg(c
, "caption: \"%s\"", ucstring_getpsz_d(caption
->str
));
170 pos
+= caption
->bytes_consumed
;
172 iconsrc
= dbuf_read_string(c
->infile
, pos
, de_min_int(256, endpos
-pos
), 256,
173 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
174 if(!iconsrc
->found_nul
) goto done
;
175 de_dbg(c
, "icon source: \"%s\"", ucstring_getpsz_d(iconsrc
->str
));
176 pos
+= iconsrc
->bytes_consumed
;
178 n
= de_getu16le_p(&pos
);
179 de_dbg(c
, "icon #: %d", (int)n
);
181 type_code2
= (unsigned int)de_getu16le_p(&pos
);
182 de_dbg(c
, "package type: %u", type_code2
);
185 // Code 1 apparently means "run a program".
186 de_dbg(c
, "[not an embedded file]");
190 // A package can contain an arbitrary embedded file, which we'll try to
193 fnlen
= de_getu32le_p(&pos
);
194 if(pos
+fnlen
> endpos
) goto done
;
195 filename
= ucstring_create(c
);
196 dbuf_read_to_ucstring_n(c
->infile
, pos
, fnlen
, 256, filename
, DE_CONVFLAG_STOP_AT_NUL
,
198 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(filename
));
201 fsize
= de_getu32le_p(&pos
);
202 de_dbg(c
, "file size: %"I64_FMT
, fsize
);
203 if(pos
+fsize
> endpos
) goto done
;
205 fi
= de_finfo_create(c
);
206 de_finfo_set_name_from_ucstring(c
, fi
, filename
, 0);
207 dbuf_create_file_from_slice(c
->infile
, pos
, fsize
, NULL
, fi
, 0);
211 de_destroy_stringreaderdata(c
, caption
);
212 de_destroy_stringreaderdata(c
, iconsrc
);
213 ucstring_destroy(filename
);
214 de_finfo_destroy(c
, fi
);
215 de_dbg_indent_restore(c
, saved_indent_level
);
219 static void extract_unknown_ole_obj(deark
*c
, lctx
*d
, i64 pos
, i64 len
,
220 struct de_stringreaderdata
*classname_srd
)
223 de_ucstring
*s
= NULL
;
225 fi
= de_finfo_create(c
);
226 s
= ucstring_create(c
);
228 ucstring_append_sz(s
, "oleobj", DE_ENCODING_LATIN1
);
229 if(ucstring_isnonempty(classname_srd
->str
)) {
230 ucstring_append_sz(s
, ".", DE_ENCODING_LATIN1
);
231 ucstring_append_ucstring(s
, classname_srd
->str
);
234 de_finfo_set_name_from_ucstring(c
, fi
, s
, 0);
236 dbuf_create_file_from_slice(c
->infile
, pos
, len
, "bin", fi
, 0);
239 de_finfo_destroy(c
, fi
);
242 static int do_ole_object(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, int exact_size_known
,
243 int is_presentation
, i64
*bytes_consumed
);
245 static int do_ole_object_linked(deark
*c
, lctx
*d
,
246 i64 pos1
, i64 len
, int exact_size_known
, i64
*bytes_consumed
)
250 i64 bytes_consumed2
= 0;
252 struct de_stringreaderdata
*classname_srd
= NULL
;
253 struct de_stringreaderdata
*topicname_srd
= NULL
;
254 struct de_stringreaderdata
*itemname_srd
= NULL
;
255 struct de_stringreaderdata
*networkname_srd
= NULL
;
258 stringlen
= de_getu32le_p(&pos
);
259 classname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
261 de_dbg(c
, "embedded ClassName: \"%s\"", ucstring_getpsz(classname_srd
->str
));
264 stringlen
= de_getu32le_p(&pos
);
265 topicname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
267 de_dbg(c
, "TopicName/filename: \"%s\"", ucstring_getpsz(topicname_srd
->str
));
270 stringlen
= de_getu32le_p(&pos
);
271 itemname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
273 de_dbg(c
, "ItemName/params: \"%s\"", ucstring_getpsz(itemname_srd
->str
));
276 stringlen
= de_getu32le_p(&pos
);
277 networkname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
279 de_dbg(c
, "NetworkName: \"%s\"", ucstring_getpsz(networkname_srd
->str
));
282 pos
+= 4; // reserved
283 pos
+= 4; // LinkUpdateOption
285 // Nested "presentation" object
286 ret
= do_ole_object(c
, d
, pos
, pos1
+len
-pos
, exact_size_known
, 1,
289 pos
+= bytes_consumed2
;
291 *bytes_consumed
= pos
-pos1
;
295 de_destroy_stringreaderdata(c
, classname_srd
);
296 de_destroy_stringreaderdata(c
, topicname_srd
);
297 de_destroy_stringreaderdata(c
, itemname_srd
);
298 de_destroy_stringreaderdata(c
, networkname_srd
);
302 // pos1 points to the first field after FormatID (classname/typename)
303 static int do_ole_object_embedded(deark
*c
, lctx
*d
,
304 i64 pos1
, i64 len
, int exact_size_known
, i64
*bytes_consumed
)
309 i64 bytes_consumed2
= 0;
312 const char *ext
= NULL
;
315 struct de_stringreaderdata
*classname_srd
= NULL
;
316 struct de_stringreaderdata
*topicname_srd
= NULL
;
317 struct de_stringreaderdata
*itemname_srd
= NULL
;
320 // TODO: This code (for the next 3 fields) is duplicated in the function for
323 stringlen
= de_getu32le_p(&pos
);
324 classname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
326 de_dbg(c
, "embedded ClassName: \"%s\"", ucstring_getpsz(classname_srd
->str
));
329 stringlen
= de_getu32le_p(&pos
);
330 topicname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
332 de_dbg(c
, "TopicName/filename: \"%s\"", ucstring_getpsz(topicname_srd
->str
));
335 stringlen
= de_getu32le_p(&pos
);
336 itemname_srd
= dbuf_read_string(c
->infile
, pos
, stringlen
, 260, DE_CONVFLAG_STOP_AT_NUL
,
338 de_dbg(c
, "ItemName/params: \"%s\"", ucstring_getpsz(itemname_srd
->str
));
341 data_len
= de_getu32le_p(&pos
);
342 de_dbg(c
, "NativeData: pos=%"I64_FMT
", len=%"I64_FMT
, pos
, data_len
);
344 // TODO: I don't know the extent to which it's better to sniff the data, or
345 // rely on the typename.
346 de_read(buf
, pos
, sizeof(buf
));
348 if(!de_strcmp(classname_srd
->sz
, "Package")) {
350 handled
= do_ole_package(c
, d
, pos
, data_len
);
352 else if(!de_strncmp(classname_srd
->sz
, "Word.Document.", 14) ||
353 !de_strncmp(classname_srd
->sz
, "Word.Picture.", 13))
357 else if (!de_strncmp(classname_srd
->sz
, "Excel.Chart.", 12) ||
358 !de_strcmp(classname_srd
->sz
, "ExcelWorksheet"))
362 else if(!de_strcmp(classname_srd
->sz
, "CDraw") &&
363 !de_memcmp(&buf
[0], (const void*)"RIFF", 4) &&
364 !de_memcmp(&buf
[8], (const void*)"CDR", 3) )
366 ext
= "cdr"; // Looks like CorelDRAW
368 else if (!de_strcmp(classname_srd
->sz
, "PaintShopPro") &&
369 !de_memcmp(&buf
[0], (const void*)"\x28\0\0\0", 4))
371 de_run_module_by_id_on_slice(c
, "dib", NULL
, c
->infile
, pos
, data_len
);
374 if(!de_strcmp(classname_srd
->sz
, "ShapewareVISIO20")) {
377 else if(buf
[0]=='B' && buf
[1]=='M') {
378 // TODO: Detect true length of data?
379 // TODO: This detection may be too aggressive.
383 if(ext
&& !handled
) {
384 dbuf_create_file_from_slice(c
->infile
, pos
, data_len
, ext
, NULL
, 0);
390 extract_unknown_ole_obj(c
, d
, pos
, data_len
, classname_srd
);
392 else if(!recognized
) {
393 de_warn(c
, "Unknown/unsupported type of OLE object (\"%s\") at %"I64_FMT
,
394 ucstring_getpsz(classname_srd
->str
), pos1
);
399 // Nested "presentation" object
400 ret
= do_ole_object(c
, d
, pos
, pos1
+len
-pos
, exact_size_known
, 1,
403 pos
+= bytes_consumed2
;
405 *bytes_consumed
= pos
-pos1
;
408 de_destroy_stringreaderdata(c
, classname_srd
);
409 de_destroy_stringreaderdata(c
, topicname_srd
);
410 de_destroy_stringreaderdata(c
, itemname_srd
);
414 static int do_ole_object(deark
*c
, lctx
*d
, i64 pos1
, i64 len
, int exact_size_known
,
415 int is_presentation
, i64
*bytes_consumed
)
417 int saved_indent_level
;
423 unsigned int formatID
;
426 de_dbg_indent_save(c
, &saved_indent_level
);
428 de_dbg(c
, "OLE object at %"I64_FMT
", len%s%"I64_FMT
, pos1
,
429 (exact_size_known
?"=":DE_CHAR_LEQ
), len
);
432 n
= (unsigned int)de_getu32le_p(&pos
);
433 de_dbg(c
, "OLEVersion: 0x%08x", n
);
435 formatID
= (unsigned int)de_getu32le_p(&pos
);
436 de_dbg(c
, "FormatID: %u (%s)", formatID
, get_FormatID_name(formatID
));
438 nbytesleft
= pos1
+len
-pos
;
439 if(formatID
==1 && !is_presentation
) {
440 ret
= do_ole_object_linked(c
, d
, pos
, nbytesleft
, exact_size_known
, &bytes_consumed2
);
442 pos
+= bytes_consumed2
;
444 else if(formatID
==2 && !is_presentation
) {
445 ret
= do_ole_object_embedded(c
, d
, pos
, nbytesleft
, exact_size_known
, &bytes_consumed2
);
447 pos
+= bytes_consumed2
;
449 else if(formatID
==3) {
450 ret
= do_ole_object_presentation(c
, d
, pos
, nbytesleft
, formatID
, &bytes_consumed2
);
452 pos
+= bytes_consumed2
;
454 else if(formatID
==5 && is_presentation
) {
455 ret
= do_ole_object_presentation(c
, d
, pos
, nbytesleft
, formatID
, &bytes_consumed2
);
457 pos
+= bytes_consumed2
;
459 else if(formatID
==0 && is_presentation
) {
463 de_dbg(c
, "[unsupported OLE FormatID]");
467 *bytes_consumed
= pos
-pos1
;
471 de_dbg_indent_restore(c
, saved_indent_level
);
475 static void de_run_ole1(deark
*c
, de_module_params
*mparams
)
478 i64 bytes_consumed
= 0;
483 mparams
->out_params
.flags
= 0;
486 d
= de_malloc(c
, sizeof(lctx
));
488 d
->input_encoding
= de_get_input_encoding(c
, mparams
, DE_ENCODING_WINDOWS1252
);
489 // Use the "U" code if the exact size of the object is unknown. This will
490 // improve the debug messages.
491 u_flag
= de_havemodcode(c
, mparams
, 'U');
492 d
->extract_all
= de_get_ext_option_bool(c
, "ole1:extractall",
493 ((c
->extract_level
>=2)?1:0));
495 ret
= do_ole_object(c
, d
, 0, c
->infile
->len
, (u_flag
?0:1),
499 mparams
->out_params
.flags
|= 0x1;
500 mparams
->out_params
.int64_1
= bytes_consumed
;
502 de_dbg3(c
, "ole1: calculated size=%"I64_FMT
, bytes_consumed
);
505 de_dbg3(c
, "ole1: failed to calculate object size");
511 void de_module_ole1(deark
*c
, struct deark_module_info
*mi
)
514 mi
->desc
= "OLE1.0 objects";
515 mi
->run_fn
= de_run_ole1
;
516 mi
->identify_fn
= NULL
;
517 mi
->flags
|= DE_MODFLAG_HIDDEN
;