New "ea_data" module
[deark.git] / modules / cfb.c
blobb5e75853c6ddaa7748a57635351065c1e908c85b
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Compound File Binary File Format
6 // a.k.a. "OLE Compound Document Format", and a million other names
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_cfb);
13 #define OBJTYPE_EMPTY 0x00
14 #define OBJTYPE_STORAGE 0x01
15 #define OBJTYPE_STREAM 0x02
16 #define OBJTYPE_ROOT_STORAGE 0x05
18 struct dir_entry_info {
19 // Relative order in which to process this entry
20 // 1 = For the root storage object (for the mini sector stream)
21 // 2 = Other high priority streams
22 // 3 = Normal dir entries
23 int pass;
25 u8 entry_type;
27 int is_mini_stream;
28 i64 stream_size;
29 i64 normal_sec_id; // First SecID, valid if is_mini_stream==0
30 i64 minisec_id; // First MiniSecID, valid if is_mini_stream==1
31 struct de_stringreaderdata *fname_srd;
32 u8 clsid[16];
33 struct de_timestamp mod_time;
35 const char *entry_type_name;
36 i64 name_len_raw;
37 u8 node_color;
39 i32 child_id;
40 i32 sibling_id[2];
41 i32 parent_id; // If parent_id==0, entry is in root dir.
42 de_ucstring *path; // Full dir path. Used by non-root STORAGE objects.
44 u8 is_thumbsdb_catalog;
47 struct thumbsdb_catalog_entry {
48 u32 id;
49 struct de_stringreaderdata *fname_srd;
50 struct de_timestamp mod_time;
53 typedef struct localctx_struct {
54 #define SUBFMT_AUTO 0
55 #define SUBFMT_RAW 1
56 #define SUBFMT_THUMBSDB 2
57 #define SUBFMT_TIFF37680 3
58 int subformat_req;
59 int subformat_final;
60 int thumbsdb_msrgba_mode;
61 u8 extract_raw_streams;
62 u8 decode_streams;
63 u8 dump_dir_structure;
64 i64 minor_ver, major_ver;
65 i64 sec_size;
66 //i64 num_dir_sectors;
67 i64 num_fat_sectors;
68 i64 first_dir_sec_id;
69 i64 std_stream_min_size;
70 i64 first_minifat_sec_id;
71 i64 num_minifat_sectors;
72 i64 mini_sector_size;
73 i64 first_difat_sec_id;
74 i64 num_difat_sectors;
75 i64 num_fat_entries;
76 i64 num_dir_entries;
78 // The DIFAT is an array of the secIDs that contain the FAT.
79 // It is stored in a linked list of sectors, except that the first
80 // 109 array entries are stored in the header.
81 // After that, the last 4 bytes of each sector are the SecID of the
82 // sector containing the next part of the DIFAT, and the remaining
83 // bytes are the payload data.
84 dbuf *difat;
86 // The FAT is an array of "next sectors". Given a SecID, it will tell you
87 // the "next" SecID in the stream that uses that sector, or it may have
88 // a special code that means "end of chain", etc.
89 // All the bytes of a FAT sector are used for payload data.
90 dbuf *fat;
92 dbuf *minifat; // mini sector allocation table
93 dbuf *dir;
94 struct dir_entry_info *dir_entry; // array[num_dir_entries]
95 dbuf *mini_sector_stream;
97 i64 thumbsdb_catalog_num_entries;
98 struct thumbsdb_catalog_entry *thumbsdb_catalog;
100 int could_be_thumbsdb;
101 int thumbsdb_old_names_found;
102 int thumbsdb_new_names_found;
103 int thumbsdb_catalog_found;
104 } lctx;
106 struct clsid_id_struct {
107 const u8 clsid[16];
108 u32 mask;
109 u32 flags;
110 const char *name;
112 static const struct clsid_id_struct known_clsids[] = {
113 {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 0xffff, 0, "n/a"}, // This must be first.
114 {{0x00,0x02,0x08,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "Excel?"},
115 {{0x00,0x02,0x09,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "MS Word?"},
116 {{0x00,0x02,0x0d,0x0b,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "Outlook item?"},
117 {{0x00,0x02,0x12,0x01,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MS Publisher?"},
118 {{0x00,0x02,0x13,0x03,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MS Works WDB?"},
119 {{0x00,0x02,0x1a,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "Visio?"},
120 {{0x00,0x06,0xf0,0x46,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "Outlook item?"},
121 {{0x00,0x0c,0x10,0x84,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MSI?"},
122 {{0x1c,0xdd,0x8c,0x7b,0x81,0xc0,0x45,0xa0,0x9f,0xed,0x04,0x14,0x31,0x44,0xcc,0x1e}, 0xffff, 0, "3ds Max?"},
123 {{0x56,0x61,0x67,0x00,0xc1,0x54,0x11,0xce,0x85,0x53,0x00,0xaa,0x00,0xa1,0xf9,0x5b}, 0xffff, 0, "FlashPix?"},
124 {{0x64,0x81,0x8d,0x10,0x4f,0x9b,0x11,0xcf,0x86,0xea,0x00,0xaa,0x00,0xb9,0x29,0xe8}, 0xffff, 0, "PowerPoint?"}
126 #define EMPTY_CLSID (known_clsids[0].clsid)
128 static i64 sec_id_to_offset(deark *c, lctx *d, i64 sec_id)
130 if(sec_id<0) return 0;
131 return d->sec_size + sec_id * d->sec_size;
134 static i64 get_next_sec_id(deark *c, lctx *d, i64 cur_sec_id)
136 i64 next_sec_id;
138 if(cur_sec_id < 0) return -2;
139 if(!d->fat) return -2;
140 next_sec_id = dbuf_geti32le(d->fat, cur_sec_id*4);
141 return next_sec_id;
144 static i64 get_next_minisec_id(deark *c, lctx *d, i64 cur_minisec_id)
146 i64 next_minisec_id;
148 if(cur_minisec_id < 0) return -2;
149 if(!d->minifat) return -2;
150 next_minisec_id = dbuf_geti32le(d->minifat, cur_minisec_id*4);
151 return next_minisec_id;
154 static void describe_sec_id(deark *c, lctx *d, i64 sec_id,
155 char *buf, size_t buf_len)
157 i64 sec_offset;
159 if(sec_id >= 0) {
160 sec_offset = sec_id_to_offset(c, d, sec_id);
161 de_snprintf(buf, buf_len, "offs=%d", (int)sec_offset);
163 else if(sec_id == -1) {
164 de_strlcpy(buf, "free", buf_len);
166 else if(sec_id == -2) {
167 de_strlcpy(buf, "end of chain", buf_len);
169 else if(sec_id == -3) {
170 de_strlcpy(buf, "FAT SecID", buf_len);
172 else if(sec_id == -4) {
173 de_strlcpy(buf, "DIFAT SecID", buf_len);
175 else {
176 de_strlcpy(buf, "?", buf_len);
180 // Copy a stream (with a known byte size) to a dbuf.
181 static void copy_normal_stream_to_dbuf(deark *c, lctx *d, i64 first_sec_id,
182 i64 stream_startpos, i64 stream_size,
183 dbuf *outf)
185 i64 sec_id;
186 i64 bytes_left_to_copy;
187 i64 bytes_left_to_skip;
189 if(stream_size<=0) return;
190 if(stream_startpos+stream_size > c->infile->len) {
191 // This is a not-too-strict emergency brake. If the file has been
192 // truncated, we might still be able to process some of the data
193 // that is there.
194 stream_size = c->infile->len - stream_startpos;
197 bytes_left_to_copy = stream_size;
198 bytes_left_to_skip = stream_startpos;
199 sec_id = first_sec_id;
200 while(bytes_left_to_copy > 0) {
201 i64 sec_offs;
202 i64 bytes_to_copy;
203 i64 bytes_to_skip;
205 if(sec_id<0) break;
206 sec_offs = sec_id_to_offset(c, d, sec_id);
208 bytes_to_skip = bytes_left_to_skip;
209 if(bytes_to_skip > d->sec_size) bytes_to_skip = d->sec_size;
211 bytes_to_copy = d->sec_size - bytes_to_skip;
212 if(bytes_to_copy > bytes_left_to_copy) bytes_to_copy = bytes_left_to_copy;
214 dbuf_copy(c->infile, sec_offs + bytes_to_skip, bytes_to_copy, outf);
216 bytes_left_to_copy -= bytes_to_copy;
217 bytes_left_to_skip -= bytes_to_skip;
218 sec_id = get_next_sec_id(c, d, sec_id);
222 // Same as copy_normal_stream_to_dbuf(), but for mini streams.
223 static void copy_mini_stream_to_dbuf(deark *c, lctx *d, i64 first_minisec_id,
224 i64 stream_startpos, i64 stream_size,
225 dbuf *outf)
227 i64 minisec_id;
228 i64 bytes_left_to_copy;
229 i64 bytes_left_to_skip;
231 if(!d->mini_sector_stream) return;
232 if(stream_size<=0 || stream_size>c->infile->len ||
233 stream_size>d->mini_sector_stream->len)
235 return;
238 bytes_left_to_copy = stream_size;
239 bytes_left_to_skip = stream_startpos;
240 minisec_id = first_minisec_id;
241 while(bytes_left_to_copy > 0) {
242 i64 minisec_offs;
243 i64 bytes_to_copy;
244 i64 bytes_to_skip;
246 if(minisec_id<0) break;
247 minisec_offs = minisec_id * d->mini_sector_size;
249 bytes_to_skip = bytes_left_to_skip;
250 if(bytes_to_skip > d->mini_sector_size) bytes_to_skip = d->mini_sector_size;
252 bytes_to_copy = d->mini_sector_size - bytes_to_skip;
253 if(bytes_to_copy > bytes_left_to_copy) bytes_to_copy = bytes_left_to_copy;
255 dbuf_copy(d->mini_sector_stream, minisec_offs + bytes_to_skip, bytes_to_copy, outf);
257 bytes_left_to_copy -= bytes_to_copy;
258 bytes_left_to_skip -= bytes_to_skip;
259 minisec_id = get_next_minisec_id(c, d, minisec_id);
263 static void copy_any_stream_to_dbuf(deark *c, lctx *d, struct dir_entry_info *dei,
264 i64 stream_startpos, i64 stream_size,
265 dbuf *outf)
267 if(dei->is_mini_stream) {
268 copy_mini_stream_to_dbuf(c, d, dei->minisec_id, stream_startpos, stream_size, outf);
270 else {
271 copy_normal_stream_to_dbuf(c, d, dei->normal_sec_id, stream_startpos, stream_size, outf);
275 static int do_header(deark *c, lctx *d)
277 i64 pos = 0;
278 i64 byte_order_code;
279 i64 sector_shift;
280 i64 mini_sector_shift;
281 char buf[80];
282 int retval = 0;
284 de_dbg(c, "header at %d", (int)pos);
285 de_dbg_indent(c, 1);
287 // offset 0-7: signature
288 // offset 8-23: CLSID
290 d->minor_ver = de_getu16le(pos+24);
291 d->major_ver = de_getu16le(pos+26);
292 de_dbg(c, "format version: %d.%d", (int)d->major_ver, (int)d->minor_ver);
293 if(d->major_ver!=3 && d->major_ver!=4) {
294 de_err(c, "Unsupported format version: %d", (int)d->major_ver);
295 goto done;
298 byte_order_code = de_getu16le(pos+28);
299 if(byte_order_code != 0xfffe) {
300 de_err(c, "Unsupported byte order code: 0x%04x", (unsigned int)byte_order_code);
301 goto done;
304 sector_shift = de_getu16le(pos+30); // aka ssz
305 d->sec_size = de_pow2(sector_shift);
306 de_dbg(c, "sector size: 2^%d (%d bytes)", (int)sector_shift,
307 (int)d->sec_size);
308 if(d->sec_size!=512 && d->sec_size!=4096) {
309 de_err(c, "Unsupported sector size: %d", (int)d->sec_size);
310 goto done;
313 mini_sector_shift = de_getu16le(pos+32); // aka sssz
314 d->mini_sector_size = de_pow2(mini_sector_shift);
315 de_dbg(c, "mini sector size: 2^%d (%d bytes)", (int)mini_sector_shift,
316 (int)d->mini_sector_size);
317 if(d->mini_sector_size!=64) {
318 de_err(c, "Unsupported mini sector size: %d", (int)d->mini_sector_size);
319 goto done;
322 // offset 34: 6 reserved bytes
324 //d->num_dir_sectors = de_getu32le(pos+40);
325 //de_dbg(c, "number of directory sectors: %u", (unsigned int)d->num_dir_sectors);
326 // Should be 0 if major_ver==3
328 // Number of sectors used by sector allocation table (FAT)
329 d->num_fat_sectors = de_getu32le(pos+44);
330 de_dbg(c, "number of FAT sectors: %d", (int)d->num_fat_sectors);
332 d->first_dir_sec_id = de_geti32le(pos+48);
333 describe_sec_id(c, d, d->first_dir_sec_id, buf, sizeof(buf));
334 de_dbg(c, "first directory sector: %d (%s)", (int)d->first_dir_sec_id, buf);
336 // offset 52, transaction signature number
338 d->std_stream_min_size = de_getu32le(pos+56);
339 de_dbg(c, "min size of a standard stream: %d", (int)d->std_stream_min_size);
341 // First sector of mini sector allocation table (MiniFAT)
342 d->first_minifat_sec_id = de_geti32le(pos+60);
343 describe_sec_id(c, d, d->first_minifat_sec_id, buf, sizeof(buf));
344 de_dbg(c, "first MiniFAT sector: %d (%s)", (int)d->first_minifat_sec_id, buf);
346 // Number of sectors used by MiniFAT
347 d->num_minifat_sectors = de_getu32le(pos+64);
348 de_dbg(c, "number of MiniFAT sectors: %d", (int)d->num_minifat_sectors);
350 // SecID of first (extra?) sector of the DIFAT
351 // (also called the Master Sector Allocation Table (MSAT))
352 d->first_difat_sec_id = de_geti32le(pos+68);
353 describe_sec_id(c, d, d->first_difat_sec_id, buf, sizeof(buf));
354 de_dbg(c, "first extended DIFAT sector: %d (%s)", (int)d->first_difat_sec_id, buf);
356 // Number of (extra?) sectors used by the DIFAT
357 d->num_difat_sectors = de_getu32le(pos+72);
358 de_dbg(c, "number of extended DIFAT sectors: %d", (int)d->num_difat_sectors);
360 // offset 76: 436 bytes of DIFAT data
361 retval = 1;
363 done:
364 de_dbg_indent(c, -1);
365 return retval;
368 // Read the locations of the FAT sectors
369 static void read_difat(deark *c, lctx *d)
371 i64 num_to_read;
372 i64 still_to_read;
373 i64 difat_sec_id;
374 i64 difat_sec_offs;
377 de_dbg(c, "reading DIFAT (total number of entries=%d)", (int)d->num_fat_sectors);
378 de_dbg_indent(c, 1);
380 if(d->num_fat_sectors > 1000000) {
381 // TODO: Decide what limits to enforce.
382 d->num_fat_sectors = 1000000;
385 // Expecting d->num_fat_sectors in the DIFAT table
386 d->difat = dbuf_create_membuf(c, d->num_fat_sectors * 4, 1);
388 still_to_read = d->num_fat_sectors;
390 // Copy the part of the DIFAT that is in the header
391 num_to_read = still_to_read;
392 if(num_to_read>109) num_to_read = 109;
393 de_dbg(c, "reading %d DIFAT entries from header, at 76", (int)num_to_read);
394 dbuf_copy(c->infile, 76, num_to_read*4, d->difat);
395 still_to_read -= num_to_read;
397 difat_sec_id = d->first_difat_sec_id;
398 while(still_to_read>0) {
399 if(difat_sec_id<0) break;
401 difat_sec_offs = sec_id_to_offset(c, d, difat_sec_id);
402 de_dbg(c, "reading DIFAT sector at %d", (int)difat_sec_offs);
403 num_to_read = (d->sec_size - 4)/4;
405 dbuf_copy(c->infile, difat_sec_offs, num_to_read*4, d->difat);
406 still_to_read -= num_to_read;
407 difat_sec_id = de_geti32le(difat_sec_offs + num_to_read*4);
410 de_dbg_indent(c, -1);
413 static void dump_fat(deark *c, lctx *d)
415 i64 i;
416 i64 sec_id;
417 char buf[80];
419 if(c->debug_level<2) return;
421 de_dbg2(c, "dumping FAT contents (%d entries)", (int)d->num_fat_entries);
423 de_dbg_indent(c, 1);
424 for(i=0; i<d->num_fat_entries; i++) {
425 sec_id = dbuf_geti32le(d->fat, i*4);
426 describe_sec_id(c, d, sec_id, buf, sizeof(buf));
427 de_dbg2(c, "FAT[%d]: next_SecID=%d (%s)", (int)i, (int)sec_id, buf);
429 de_dbg_indent(c, -1);
432 // Read the contents of the FAT sectors
433 static void read_fat(deark *c, lctx *d)
435 i64 i;
436 i64 sec_id;
437 i64 sec_offset;
438 char buf[80];
440 d->fat = dbuf_create_membuf(c, d->num_fat_sectors * d->sec_size, 1);
442 de_dbg(c, "reading FAT contents (%d sectors)", (int)d->num_fat_sectors);
443 de_dbg_indent(c, 1);
444 for(i=0; i<d->num_fat_sectors; i++) {
445 sec_id = dbuf_geti32le(d->difat, i*4);
446 sec_offset = sec_id_to_offset(c, d, sec_id);
447 describe_sec_id(c, d, sec_id, buf, sizeof(buf));
448 de_dbg(c, "reading sector: DIFAT_idx=%d, SecID=%d (%s)",
449 (int)i, (int)sec_id, buf);
450 dbuf_copy(c->infile, sec_offset, d->sec_size, d->fat);
452 de_dbg_indent(c, -1);
454 d->num_fat_entries = d->fat->len/4;
455 dump_fat(c, d);
458 static void dump_minifat(deark *c, lctx *d)
460 i64 i;
461 i64 sec_id;
462 i64 num_minifat_entries;
464 if(c->debug_level<2) return;
465 if(!d->minifat) return;
467 num_minifat_entries = d->minifat->len / 4;
468 de_dbg2(c, "dumping MiniFAT contents (%d entries)", (int)num_minifat_entries);
470 de_dbg_indent(c, 1);
471 for(i=0; i<num_minifat_entries; i++) {
472 sec_id = dbuf_geti32le(d->minifat, i*4);
473 de_dbg2(c, "MiniFAT[%d]: next_MiniSecID=%d", (int)i, (int)sec_id);
475 de_dbg_indent(c, -1);
478 // Read the contents of the MiniFAT sectors into d->minifat
479 static void read_minifat(deark *c, lctx *d)
481 i64 i;
482 i64 sec_id;
483 i64 sec_offset;
484 char buf[80];
486 if(d->num_minifat_sectors > 1000000) {
487 // TODO: Decide what limits to enforce.
488 d->num_minifat_sectors = 1000000;
491 d->minifat = dbuf_create_membuf(c, d->num_minifat_sectors * d->sec_size, 1);
493 // TODO: Use copy_normal_stream_to_dbuf
494 de_dbg(c, "reading MiniFAT contents (%d sectors)", (int)d->num_minifat_sectors);
495 de_dbg_indent(c, 1);
497 sec_id = d->first_minifat_sec_id;
499 for(i=0; i<d->num_minifat_sectors; i++) {
500 if(sec_id<0) break;
502 sec_offset = sec_id_to_offset(c, d, sec_id);
503 describe_sec_id(c, d, sec_id, buf, sizeof(buf));
504 de_dbg(c, "reading MiniFAT sector #%d, SecID=%d (%s), MiniSecIDs %d-%d",
505 (int)i, (int)sec_id, buf,
506 (int)(i*(d->sec_size/4)), (int)((i+1)*(d->sec_size/4)-1));
507 dbuf_copy(c->infile, sec_offset, d->sec_size, d->minifat);
509 sec_id = get_next_sec_id(c, d, sec_id);
511 de_dbg_indent(c, -1);
513 dump_minifat(c, d);
516 // Returns -1 if not a valid name
517 static i64 stream_name_to_catalog_id(deark *c, lctx *d, struct dir_entry_info *dei)
519 char buf[16];
520 size_t nlen;
521 size_t i;
523 nlen = dei->fname_srd->sz_utf8_strlen;
524 if(nlen>sizeof(buf)-1) return -1;
526 for(i=0; i<nlen; i++) {
527 // Name should contain only digits
528 if(dei->fname_srd->sz_utf8[i]<'0' || dei->fname_srd->sz_utf8[i]>'9') return -1;
530 // The stream name is the *reversed* string form of the ID number.
531 // (I assume this is to try to keep the directory tree structure balanced.)
532 buf[nlen-1-i] = dei->fname_srd->sz_utf8[i];
534 buf[nlen] = '\0';
536 return de_atoi64(buf);
539 // Returns an index into d->thumbsdb_catalog.
540 // Returns -1 if not found.
541 static i64 lookup_thumbsdb_catalog_entry(deark *c, lctx *d, struct dir_entry_info *dei)
543 i64 i;
544 i64 id;
546 if(d->thumbsdb_catalog_num_entries<1 || !d->thumbsdb_catalog) return -1;
547 if(!dei->fname_srd || !dei->fname_srd->str) return -1;
549 id = stream_name_to_catalog_id(c, d, dei);
550 if(id<0) return -1;
552 for(i=0; i<d->thumbsdb_catalog_num_entries; i++) {
553 if(d->thumbsdb_catalog[i].id == id)
554 return i;
556 return -1;
559 // This function tries to better handle a special nonstandard JPEG thumbnail format
560 // that I'm calling MSRGBA.
561 // We can't *really* handle it, because Deark doesn't decompress lossy formats, and
562 // AFAIK there is no standard format that we can losslessly convert it to.
563 // What we can do is add the missing quantization and Huffman tables, and add a
564 // custom segment to help identify the format.
565 // This should allow most JPEG viewers to decode the image, though most will guess
566 // it is CMYK, and display the colors all wrong (often all black).
567 // Note that the component ID numbers are ASCII 'R','G','B','A'.
568 // Based on my (possibly wrong) analysis, the 'R' channel is blue(!), 'G' is green,
569 // 'B' is red, and 'A' can be either opacity, or unused (dunno how to tell which).
571 // hdrsize is the length of just the first header.
572 // Returns 0 if nothing was extracted.
573 static int thumbsdb_msrgba_special_extract(deark *c, lctx *d, struct dir_entry_info *dei,
574 i64 hdrsize, dbuf *outf)
576 static const u8 qtable0[69] = {
577 0xff,0xdb,0x00,0x43,0x00,0x08,0x06,0x06,0x07,0x06,0x05,0x08,0x07,0x07,0x07,0x09,
578 0x09,0x08,0x0a,0x0c,0x14,0x0d,0x0c,0x0b,0x0b,0x0c,0x19,0x12,0x13,0x0f,0x14,0x1d,
579 0x1a,0x1f,0x1e,0x1d,0x1a,0x1c,0x1c,0x20,0x24,0x2e,0x27,0x20,0x22,0x2c,0x23,0x1c,
580 0x1c,0x28,0x37,0x29,0x2c,0x30,0x31,0x34,0x34,0x34,0x1f,0x27,0x39,0x3d,0x38,0x32,
581 0x3c,0x2e,0x33,0x34,0x32};
582 static const u8 htables[212] = {
583 0xff,0xc4,0x00,0xd2,0x00,0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,
585 0x0b,0x10,0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,
586 0x01,0x7d,0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,
587 0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,
588 0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,
589 0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,
590 0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,
591 0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,
592 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,
593 0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
594 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
595 0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,
596 0xf7,0xf8,0xf9,0xfa};
597 i64 inf_pos;
598 i64 idseg_len;
600 if(dei->stream_size<hdrsize+16+2+22) return 0;
601 inf_pos = hdrsize+16; // Also skip past the 16-byte extra header
603 // SOI
604 copy_any_stream_to_dbuf(c, d, dei, inf_pos, 2, outf);
605 inf_pos += 2;
607 // Special APP1 segment to record both headers, and identify the format.
608 dbuf_write(outf, (const u8*)"\xff\xe1", 2);
609 idseg_len = 2 + 12 + 1 + hdrsize + 16;
610 dbuf_writeu16be(outf, idseg_len);
611 dbuf_write(outf, (const u8*)"Deark_MSRGBA\0", 13);
612 copy_any_stream_to_dbuf(c, d, dei, 0, hdrsize+16, outf);
614 // DQT
615 dbuf_write(outf, qtable0, sizeof(qtable0));
616 // TODO: Do we ever need another quantization table?
618 // SOF0
619 // TODO?: This code is fragile. We could parse the JPEG data, instead of
620 // just hoping it is laid out like we expect.
621 copy_any_stream_to_dbuf(c, d, dei, inf_pos, 22, outf);
622 inf_pos += 22;
624 // DHT
625 dbuf_write(outf, htables, sizeof(htables));
627 // The rest of the file
628 copy_any_stream_to_dbuf(c, d, dei, inf_pos, dei->stream_size-inf_pos, outf);
630 return 1;
633 // Special handling of Thumbs.db files.
634 // Caller sets fi and tmpfn to default values. This function may modify them.
635 // firstpart = caller-supplied dbuf containing the first 256 or so bytes of the stream
636 static void do_extract_stream_to_file_thumbsdb(deark *c, lctx *d, struct dir_entry_info *dei,
637 de_finfo *fi, de_ucstring *tmpfn, dbuf *firstpart)
639 i64 hdrsize;
640 i64 catalog_idx;
641 const char *ext;
642 dbuf *outf = NULL;
643 i64 ver;
644 i64 reported_size;
645 i64 startpos;
646 i64 final_streamsize;
647 int is_msrgba = 0;
649 if(dei->is_thumbsdb_catalog) {
650 // We've already read the catalog.
651 goto done;
654 de_dbg(c, "reading Thumbs.db stream");
655 de_dbg_indent(c, 1);
657 startpos = 0;
658 final_streamsize = dei->stream_size;
660 // A Thumbs.db stream typically has a header, followed by an embedded JPEG
661 // (or something) file.
663 catalog_idx = lookup_thumbsdb_catalog_entry(c, d, dei);
665 if(catalog_idx>=0) {
666 if(d->thumbsdb_catalog[catalog_idx].mod_time.is_valid) {
667 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = d->thumbsdb_catalog[catalog_idx].mod_time; // struct copy
671 hdrsize = dbuf_getu32le(firstpart, 0);
672 de_dbg(c, "header size: %d", (int)hdrsize);
674 ver = dbuf_getu32le(firstpart, 4);
675 de_dbg(c, "version: %d", (int)ver);
677 // 0x0c = "Original format" Thumbs.db
678 // 0x18 = "Windows 7 format"
680 if((hdrsize==0x0c || hdrsize==0x18) && dei->stream_size>hdrsize) {
681 u8 sig1[4];
682 u8 sig2[4];
684 reported_size = dbuf_getu32le(firstpart, 8);
685 de_dbg(c, "reported size: %d", (int)reported_size);
687 startpos = hdrsize;
688 final_streamsize -= hdrsize;
689 de_dbg(c, "calculated size: %d", (int)final_streamsize);
691 if(catalog_idx>=0 && c->filenames_from_file) {
692 de_dbg(c, "name from catalog: \"%s\"",
693 ucstring_getpsz(d->thumbsdb_catalog[catalog_idx].fname_srd->str));
695 // Replace the default name with the name from the catalog.
696 ucstring_empty(tmpfn);
698 if(!de_strcasecmp(d->thumbsdb_catalog[catalog_idx].fname_srd->sz_utf8,
699 "{A42CD7B6-E9B9-4D02-B7A6-288B71AD28BA}"))
701 ucstring_append_sz(tmpfn, "_folder", DE_ENCODING_LATIN1);
703 else {
704 ucstring_append_ucstring(tmpfn, d->thumbsdb_catalog[catalog_idx].fname_srd->str);
708 dbuf_read(firstpart, sig1, hdrsize, 4);
709 dbuf_read(firstpart, sig2, hdrsize+16, 4);
711 if(sig1[0]==0xff && sig1[1]==0xd8) ext = "jpg";
712 else if(sig1[0]==0x89 && sig1[1]==0x50) ext = "png";
713 else if(sig1[0]==0x01 && sig1[1]==0x00 &&
714 sig2[0]==0xff && sig2[1]==0xd8)
716 // Looks like a nonstandard Microsoft RGBA JPEG.
717 ext = "msrgbajpg";
718 is_msrgba = 1;
720 else ext = "bin";
722 ucstring_printf(tmpfn, DE_ENCODING_LATIN1, ".thumb.%s", ext);
724 else {
725 de_warn(c, "Unidentified Thumbs.db stream \"%s\"",
726 ucstring_getpsz(dei->fname_srd->str));
729 de_dbg_indent(c, -1);
731 de_finfo_set_name_from_ucstring(c, fi, tmpfn, 0);
732 fi->original_filename_flag = 1;
734 outf = dbuf_create_output_file(c, NULL, fi, 0);
736 if(is_msrgba) {
737 if(d->thumbsdb_msrgba_mode) {
738 int ok = 0;
739 ok = thumbsdb_msrgba_special_extract(c, d, dei, hdrsize, outf);
740 if(ok) goto done;
743 // "MSRGBA" thumbnails seem to have an additional 16-byte header,
744 // before the JPEG data starts. In this mode, we just ignore it.
745 startpos += 16;
746 final_streamsize -= 16;
749 copy_any_stream_to_dbuf(c, d, dei, startpos, final_streamsize, outf);
751 done:
752 dbuf_close(outf);
755 static void do_OfficeArtStream(deark *c, lctx *d, struct dir_entry_info *dei)
757 dbuf *tmpstream = NULL;
759 de_dbg(c, "OfficeArt stream, len=%"I64_FMT, dei->stream_size);
760 de_dbg_indent(c, 1);
761 tmpstream = dbuf_create_membuf(c, dei->stream_size, 0x1);
762 copy_any_stream_to_dbuf(c, d, dei, 0, dei->stream_size, tmpstream);
763 if(tmpstream->len < dei->stream_size) {
764 de_warn(c, "OfficeArt stream might have been truncated");
767 de_run_module_by_id_on_slice2(c, "officeart", NULL, tmpstream, 0, tmpstream->len);
768 de_dbg_indent(c, -1);
769 dbuf_close(tmpstream);
772 static void do_Corel_simple_image(deark *c, lctx *d, struct dir_entry_info *dei,
773 dbuf *f, i64 pos1)
775 i64 pos = pos1;
776 i64 w, h;
777 i64 i, j;
778 u8 b;
779 de_bitmap *img = NULL;
781 w = dbuf_getu32le_p(f, &pos);
782 h = dbuf_getu32le_p(f, &pos);
783 de_dbg_dimensions(c, w, h);
784 if(!de_good_image_dimensions(c, w, h)) goto done;
786 img = de_bitmap_create(c, w, h, 1);
788 // TODO: I don't know whether this is the right way to interpret this
789 // image type.
790 for(j=0; j<img->height; j++) {
791 for(i=0; i<img->width; i++) {
792 b = dbuf_getbyte(f, pos + j*w + i);
793 de_bitmap_setpixel_gray(img, i, j, b);
797 de_bitmap_write_to_file(img, NULL, DE_CREATEFLAG_FLIP_IMAGE);
799 done:
800 de_bitmap_destroy(img);
803 // This is an object found in Corel Print House (.CPH) and similar files.
804 // This decoder is based on reverse engineering. It may be incorrect.
805 static void do_Corel_UIformat(deark *c, lctx *d, struct dir_entry_info *dei,
806 dbuf *f, i64 pos1, i64 len, int is_thumb)
808 i64 pos = pos1;
809 i64 hdr_len;
810 i64 ri_pos;
811 i64 ri_len;
812 i64 w, h;
813 i64 pal_offs, img_offs;
814 i64 rowspan;
815 i64 pixels_size;
816 i64 npalent;
817 int bpp;
818 int ok = 0;
819 int saved_indent_level;
820 de_bitmap *img = NULL;
821 u32 pal[256];
823 de_dbg_indent_save(c, &saved_indent_level);
825 if(dbuf_memcmp(f, pos, "UI\x00\x00", 4)) goto done;
827 de_dbg(c, "CorelUI at [%"I64_FMT"], len=%"I64_FMT, pos1, len);
828 de_dbg_indent(c, 1);
829 pos += 2; // "UI"
830 pos += 2; // ?
831 pos += 4; // The size of the "RI" segment? Redundant?
832 pos += 4; // ?
834 hdr_len = dbuf_getu32le_p(f, &pos);
835 // Apparently the size of the "UI" segment
836 if(hdr_len != 32) goto done;
837 // TODO: More fields here
839 ri_pos = pos1+hdr_len;
840 pos = ri_pos;
841 if(dbuf_memcmp(f, pos, "RI", 2)) goto done;
842 pos += 2;
843 ri_len = dbuf_getu32le_p(f, &pos);
844 if(ri_pos + ri_len > f->len) goto done;
846 pos += 16;
847 w = dbuf_getu32le_p(f, &pos);
848 h = dbuf_getu32le_p(f, &pos);
849 de_dbg_dimensions(c, w, h);
851 pos += 4; // ? (observed 1)
853 bpp = (int)dbuf_getu32le_p(f, &pos);
854 de_dbg(c, "bits/pixel?: %d", bpp);
855 rowspan = dbuf_getu32le_p(f, &pos);
856 de_dbg(c, "bytes/row?: %d", (int)rowspan);
857 pixels_size = dbuf_getu32le_p(f, &pos);
858 de_dbg(c, "pixels size: %"I64_FMT, pixels_size);
859 pos += 4; // ?
860 pos += 8; // ? (density?)
862 pal_offs = dbuf_getu32le_p(f, &pos);
863 de_dbg(c, "pal offs: %"I64_FMT, pal_offs);
865 img_offs = dbuf_getu32le_p(f, &pos);
866 de_dbg(c, "img offs: %"I64_FMT, img_offs);
868 pos += 12; // ?
870 if(bpp!=8 && bpp!=24) goto done;
872 // == palette ==
873 de_make_grayscale_palette(pal, 256, 0);
874 if(pal_offs!=0) {
875 // This formula doesn't make sense to me, but seems to work.
876 pos = ri_pos+14+pal_offs;
877 de_dbg(c, "palette at [%"I64_FMT"]", pos);
878 de_dbg_indent(c, 1);
879 pos += 2; // ? (observed 4, 5)
881 npalent = dbuf_getu16le_p(f, &pos);
882 de_dbg(c, "num pal entries: %d", (int)npalent);
883 if(npalent>256) goto done;
885 de_read_palette_rgb(f, pos, npalent, 3, pal, 256, DE_GETRGBFLAG_BGR);
886 de_dbg_indent(c, -1);
889 // == image ==
890 if(!de_good_image_dimensions(c, w, h)) goto done;
892 img = de_bitmap_create(c, w, h, (bpp<24 && pal_offs==0)?1:3);
893 pos = ri_pos+14+img_offs;
894 de_dbg(c, "bitmap at [%"I64_FMT"]", pos);
895 if(bpp<24) {
896 de_convert_image_paletted(f, pos, 8, rowspan, pal, img, 0);
898 else {
899 de_convert_image_rgb(f, pos, rowspan, 3, img, DE_GETRGBFLAG_BGR);
902 de_bitmap_write_to_file(img, is_thumb?"thumb":NULL, DE_CREATEFLAG_FLIP_IMAGE);
904 ok = 1;
906 done:
907 if(!ok) {
908 de_dbg(c, "[unsupported image type]");
910 de_bitmap_destroy(img);
911 de_dbg_indent_restore(c, saved_indent_level);
914 static void do_CorelImages_internal(deark *c, lctx *d, struct dir_entry_info *dei,
915 dbuf *f)
917 i64 pos = 0;
918 int saved_indent_level;
920 de_dbg_indent_save(c, &saved_indent_level);
922 while(1) {
923 unsigned int imgtype1_or_size;
924 unsigned int imgtype1;
925 i64 size1 = 0;
927 if(pos >= f->len-8) break;
929 de_dbg(c, "image at [%"I64_FMT"]", pos);
930 de_dbg_indent(c, 1);
932 // Seems like sometimes this first 'type' field is present, and
933 // sometimes it isn't (and we treat it like it's 0).
934 imgtype1_or_size = (unsigned int)dbuf_getu32le_p(f, &pos);
935 if(imgtype1_or_size<8) {
936 imgtype1 = imgtype1_or_size;
938 if(imgtype1==0) {
939 size1 = dbuf_getu32le_p(f, &pos);
942 else {
943 imgtype1 = 0;
944 size1 = (i64)imgtype1_or_size;
947 de_dbg(c, "low level imgtype: %u", imgtype1);
949 if(imgtype1==0) {
950 unsigned int imgtype2;
952 imgtype2 = (unsigned int)dbuf_getu32le_p(f, &pos);
953 de_dbg(c, "high level imgtype: %u", imgtype2);
954 de_dbg(c, "len: %"I64_FMT, size1);
956 if(pos+size1 > f->len) break;
958 if(imgtype2==0) { // "Uncompressed Image"?
959 do_Corel_UIformat(c, d, dei, f, pos, size1, 0);
961 else if(imgtype2==1) { // JPEG?
962 dbuf_create_file_from_slice(f, pos, size1, "jpg", NULL, 0);
964 else {
965 de_dbg(c, "[unsupported image type: %u:%u]", imgtype1, imgtype2);
968 else if(imgtype1==1) {
969 size1 = dbuf_getu32le(f, pos+8);
970 do_Corel_simple_image(c, d, dei, f, pos);
971 pos += 8 + 4;
973 else {
974 de_dbg(c, "[unsupported image type (%u), can't continue]", imgtype1);
975 goto done;
978 pos += size1;
979 de_dbg_indent(c, -1);
982 done:
983 de_dbg_indent_restore(c, saved_indent_level);
986 static void do_StreamNamedThumbnail(deark *c, lctx *d, struct dir_entry_info *dei)
988 dbuf *f = NULL;
989 i64 size1;
991 if(dei->stream_size<32 || dei->stream_size>DE_MAX_SANE_OBJECT_SIZE) {
992 goto done;
994 f = dbuf_create_membuf(c, 0, 0);
996 // Start by reading just a little, to figure out the data type
997 copy_any_stream_to_dbuf(c, d, dei, 0, 16, f);
998 size1 = dbuf_getu32le(f, 0);
999 if(size1+4 != dei->stream_size) goto done;
1000 if(dbuf_memcmp(f, 4, "UI\x00\x00", 4)) goto done;
1002 copy_any_stream_to_dbuf(c, d, dei, 16, dei->stream_size-16, f);
1003 do_Corel_UIformat(c, d, dei, f, 4, size1-4, 1);
1005 done:
1006 dbuf_close(f);
1009 static void do_StreamNamedImages(deark *c, lctx *d, struct dir_entry_info *dei)
1011 dbuf *f = NULL;
1013 if(dei->stream_size<32 || dei->stream_size>DE_MAX_SANE_OBJECT_SIZE) {
1014 goto done;
1016 f = dbuf_create_membuf(c, 0, 0);
1018 // Start by reading just a little, to figure out the data type
1019 copy_any_stream_to_dbuf(c, d, dei, 0, 16, f);
1020 if(dbuf_memcmp(f, 4, "\x01\x00\x00\x00\xff\xd8\xff", 7) &&
1021 dbuf_memcmp(f, 4, "\x00\x00\x00\x00\x55\x49\x00\x00", 8))
1023 // Not an "Images" stream we recognize.
1024 // TODO: Can we detect this if the first image is in "simple"
1025 // format?
1026 goto done;
1029 // This is an object found in Corel Print House (.CPH) and similar files.
1030 copy_any_stream_to_dbuf(c, d, dei, 16, dei->stream_size-16, f);
1031 do_CorelImages_internal(c, d, dei, f);
1033 done:
1034 dbuf_close(f);
1037 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *field_name)
1039 char timestamp_buf[64];
1041 if(ts->is_valid) {
1042 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
1043 de_dbg(c, "%s: %s", field_name, timestamp_buf);
1047 static void read_and_cvt_timestamp(deark *c, dbuf *f, i64 pos,
1048 struct de_timestamp *ts)
1050 i64 ts_as_FILETIME;
1052 de_zeromem(ts, sizeof(struct de_timestamp));
1053 ts_as_FILETIME = dbuf_geti64le(f, pos);
1054 if(ts_as_FILETIME!=0) {
1055 de_FILETIME_to_timestamp(ts_as_FILETIME, ts, 0x1);
1059 static int read_thumbsdb_catalog(deark *c, lctx *d, struct dir_entry_info *dei)
1061 i64 item_len;
1062 i64 n;
1063 i64 i;
1064 i64 pos;
1065 int retval = 0;
1066 dbuf *catf = NULL;
1068 if(d->thumbsdb_catalog) return 0; // Already read a catalog
1070 de_dbg(c, "reading thumbsdb catalog");
1071 de_dbg_indent(c, 1);
1073 catf = dbuf_create_membuf(c, dei->stream_size, 0);
1074 copy_any_stream_to_dbuf(c, d, dei, 0, dei->stream_size, catf);
1076 item_len = dbuf_getu16le(catf, 0);
1077 de_dbg(c, "header size: %d", (int)item_len); // (?)
1078 if(item_len!=16) goto done;
1080 n = dbuf_getu16le(catf, 2);
1081 de_dbg(c, "catalog version: %d", (int)n); // (?)
1082 if(n!=5 && n!=6 && n!=7) {
1083 de_warn(c, "Unsupported Catalog version: %d", (int)n);
1084 goto done;
1087 d->thumbsdb_catalog_num_entries = dbuf_getu16le(catf, 4); // This might really be a 4 byte int.
1088 de_dbg(c, "num entries: %d", (int)d->thumbsdb_catalog_num_entries);
1089 if(d->thumbsdb_catalog_num_entries>2048)
1090 d->thumbsdb_catalog_num_entries = 2048;
1092 d->thumbsdb_catalog = de_mallocarray(c, d->thumbsdb_catalog_num_entries,
1093 sizeof(struct thumbsdb_catalog_entry));
1095 pos = item_len;
1097 for(i=0; i<d->thumbsdb_catalog_num_entries; i++) {
1098 i64 name_len;
1100 if(pos >= catf->len) goto done;
1101 item_len = dbuf_getu32le(catf, pos);
1102 de_dbg(c, "catalog entry #%d, len=%d", (int)i, (int)item_len);
1103 if(item_len<20) goto done;
1104 if(pos+item_len > catf->len) goto done;
1106 de_dbg_indent(c, 1);
1108 d->thumbsdb_catalog[i].id = (u32)dbuf_getu32le(catf, pos+4);
1109 de_dbg(c, "id: %u", (unsigned int)d->thumbsdb_catalog[i].id);
1111 read_and_cvt_timestamp(c, catf, pos+8, &d->thumbsdb_catalog[i].mod_time);
1112 dbg_timestamp(c, &d->thumbsdb_catalog[i].mod_time, "timestamp");
1114 name_len = de_min_int(item_len-20, 65536);
1115 d->thumbsdb_catalog[i].fname_srd = dbuf_read_string(catf, pos+16, name_len, name_len,
1116 DE_CONVFLAG_WANT_UTF8, DE_ENCODING_UTF16LE);
1117 de_dbg(c, "name: \"%s\"", ucstring_getpsz(d->thumbsdb_catalog[i].fname_srd->str));
1119 de_dbg_indent(c, -1);
1121 pos += item_len;
1124 retval = 1;
1125 done:
1126 de_dbg_indent(c, -1);
1127 dbuf_close(catf);
1128 if(!retval) {
1129 d->thumbsdb_catalog_num_entries = 0; // Make sure we don't use a bad catalog.
1131 return retval;
1134 static void do_cfb_olepropertyset(deark *c, lctx *d, struct dir_entry_info *dei,
1135 int is_summaryinfo, int is_root)
1137 dbuf *f = NULL;
1138 int saved_indent_level;
1140 if(dei->stream_size>1000000) goto done;
1141 f = dbuf_create_membuf(c, dei->stream_size, 1);
1142 copy_any_stream_to_dbuf(c, d, dei, 0, dei->stream_size, f);
1144 de_dbg_indent_save(c, &saved_indent_level);
1145 if(is_summaryinfo) {
1146 de_dbg(c, "SummaryInformation (%s)", is_root?"root":"non-root");
1148 else {
1149 de_dbg(c, "property set stream");
1152 de_dbg_indent(c, 1);
1153 de_run_module_by_id_on_slice(c, "olepropset", NULL, f, 0, f->len);
1154 de_dbg_indent(c, -1);
1156 done:
1157 dbuf_close(f);
1160 static void read_mini_sector_stream(deark *c, lctx *d, i64 first_sec_id, i64 stream_size)
1162 if(d->mini_sector_stream) return; // Already done
1164 de_dbg(c, "reading mini sector stream (%d bytes)", (int)stream_size);
1165 d->mini_sector_stream = dbuf_create_membuf(c, 0, 0);
1166 copy_normal_stream_to_dbuf(c, d, first_sec_id, 0, stream_size, d->mini_sector_stream);
1169 // Reads the directory stream into d->dir, and sets d->num_dir_entries.
1170 static void read_directory_stream(deark *c, lctx *d)
1172 i64 dir_sec_id;
1173 i64 dir_sector_offs;
1174 i64 num_entries_per_sector;
1175 i64 dir_sector_count = 0;
1177 de_dbg(c, "reading directory stream");
1178 de_dbg_indent(c, 1);
1180 d->dir = dbuf_create_membuf(c, 0, 0);
1182 dir_sec_id = d->first_dir_sec_id;
1184 num_entries_per_sector = d->sec_size / 128;
1185 d->num_dir_entries = 0;
1187 // TODO: Use copy_normal_stream_to_dbuf
1188 while(1) {
1189 if(dir_sec_id<0) break;
1190 if(d->dir->len > c->infile->len) break;
1192 dir_sector_offs = sec_id_to_offset(c, d, dir_sec_id);
1194 de_dbg(c, "directory sector #%d SecID=%d (offs=%d), entries %d-%d",
1195 (int)dir_sector_count,
1196 (int)dir_sec_id, (int)dir_sector_offs,
1197 (int)d->num_dir_entries, (int)(d->num_dir_entries + num_entries_per_sector - 1));
1199 dbuf_copy(c->infile, dir_sector_offs, d->sec_size, d->dir);
1201 d->num_dir_entries += num_entries_per_sector;
1203 dir_sec_id = get_next_sec_id(c, d, dir_sec_id);
1204 dir_sector_count++;
1207 de_dbg(c, "number of directory entries: %d", (int)d->num_dir_entries);
1209 de_dbg_indent(c, -1);
1212 static void do_init_format_detection(deark *c, lctx *d)
1214 if(d->subformat_req!=SUBFMT_AUTO) return;
1215 d->could_be_thumbsdb = 1;
1218 static void do_finalize_format_detection(deark *c, lctx *d)
1220 d->subformat_final = SUBFMT_RAW; // default
1222 if(d->subformat_req!=SUBFMT_AUTO) {
1223 d->subformat_final = d->subformat_req;
1224 goto done;
1227 if(!d->could_be_thumbsdb) goto done;
1229 if(d->thumbsdb_old_names_found>0 && !d->thumbsdb_catalog_found)
1231 d->could_be_thumbsdb = 0;
1233 else if(d->thumbsdb_old_names_found + d->thumbsdb_new_names_found +
1234 d->thumbsdb_catalog_found < 1)
1236 d->could_be_thumbsdb = 0;
1239 if(d->could_be_thumbsdb) {
1240 d->subformat_final = SUBFMT_THUMBSDB;
1243 done:
1244 switch(d->subformat_final) {
1245 case SUBFMT_THUMBSDB:
1246 de_declare_fmt(c, "Thumbs.db");
1247 d->thumbsdb_msrgba_mode = de_get_ext_option_bool(c, "cfb:msrgbamode", 1);
1248 break;
1252 static void do_dump_dir_structure(deark *c, lctx *d)
1254 i64 i;
1256 de_dbg(c, "dir structure:");
1257 de_dbg_indent(c, 1);
1258 for(i=0; i<d->num_dir_entries; i++) {
1259 de_dbg(c, "[%d] t=%d p=%d c=%d s=%d,%d", (int)i,
1260 (int)d->dir_entry[i].entry_type,
1261 (int)d->dir_entry[i].parent_id,
1262 (int)d->dir_entry[i].child_id,
1263 (int)d->dir_entry[i].sibling_id[0],
1264 (int)d->dir_entry[i].sibling_id[1]);
1265 de_dbg_indent(c, 1);
1266 if(d->dir_entry[i].fname_srd && d->dir_entry[i].fname_srd->str) {
1267 de_dbg(c, "fname: \"%s\"",
1268 ucstring_getpsz(d->dir_entry[i].fname_srd->str));
1270 if(d->dir_entry[i].path) {
1271 de_dbg(c, "path: \"%s\"",
1272 ucstring_getpsz(d->dir_entry[i].path));
1274 de_dbg_indent(c, -1);
1276 de_dbg_indent(c, -1);
1279 static void do_mark_dir_entries_recursively(deark *c, lctx *d, i32 parent_id,
1280 i32 dir_entry_idx, int level)
1282 struct dir_entry_info *dei;
1283 int k;
1285 if(dir_entry_idx<0 || (i64)dir_entry_idx>=d->num_dir_entries) return;
1287 dei = &d->dir_entry[dir_entry_idx];
1289 if(dei->entry_type!=OBJTYPE_STORAGE && dei->entry_type!=OBJTYPE_STREAM) return;
1291 dei->parent_id = parent_id;
1293 if(dei->entry_type==OBJTYPE_STORAGE && dei->fname_srd && dei->fname_srd->str && !dei->path) {
1294 // Set the full pathname
1295 dei->path = ucstring_create(c);
1296 if(parent_id>0 && d->dir_entry[parent_id].path) {
1297 ucstring_append_ucstring(dei->path, d->dir_entry[parent_id].path);
1298 ucstring_append_sz(dei->path, "/", DE_ENCODING_LATIN1);
1300 ucstring_append_ucstring(dei->path, dei->fname_srd->str);
1303 if(level>50) return;
1304 for(k=0; k<2; k++) {
1305 do_mark_dir_entries_recursively(c, d, parent_id, dei->sibling_id[k], level+1);
1308 if(dei->entry_type==OBJTYPE_STORAGE) {
1309 // This is a "subdirectory" entry, so examine its children (starting with the
1310 // one that we know about).
1311 do_mark_dir_entries_recursively(c, d, dir_entry_idx, dei->child_id, level+1);
1315 // Figure out which entries are in the root directory.
1316 static void do_analyze_dir_structure(deark *c, lctx *d)
1318 de_dbg_indent(c, 1);
1320 if(d->dump_dir_structure) do_dump_dir_structure(c, d);
1322 if(d->num_dir_entries<1) goto done;
1324 // The first entry should be the root entry.
1325 if(d->dir_entry[0].entry_type!=OBJTYPE_ROOT_STORAGE) goto done;
1327 // Its child is one of the entries in the root directory. Start with it.
1328 do_mark_dir_entries_recursively(c, d, 0, d->dir_entry[0].child_id, 0);
1330 if(d->dump_dir_structure) do_dump_dir_structure(c, d);
1331 done:
1332 de_dbg_indent(c, -1);
1335 // Things to do after we've read the directory stream into memory, and
1336 // know how many entries there are.
1337 static void do_before_reading_directory_entries(deark *c, lctx *d)
1339 i64 i;
1341 // Stores some extra information for each directory entry, and a copy of
1342 // some information for convenience.
1343 // (The original entry is still available at d->dir[128*n].)
1344 d->dir_entry = de_mallocarray(c, d->num_dir_entries, sizeof(struct dir_entry_info));
1346 // Set defaults for each entry
1347 for(i=0; i<d->num_dir_entries; i++) {
1348 d->dir_entry[i].child_id = -1;
1349 d->dir_entry[i].sibling_id[0] = -1;
1350 d->dir_entry[i].sibling_id[1] = -1;
1354 static int is_thumbsdb_orig_name(deark *c, lctx *d, const char *name, size_t nlen)
1356 size_t i;
1358 if(nlen<1 || nlen>6) return 0;
1359 for(i=0; i<nlen; i++) {
1360 if(name[i]<'0' || name[i]>'9') return 0;
1362 return 1;
1365 static int is_thumbsdb_new_name(deark *c, lctx *d, const char *name, size_t nlen)
1367 size_t i;
1368 int count1 = 0;
1369 int found_underscore = 0;
1370 int count2 = 0;
1372 if(nlen<4 || nlen>22) return 0;
1373 for(i=0; i<nlen; i++) {
1374 if(!found_underscore && name[i]=='_') {
1375 found_underscore = 1;
1377 else if(!found_underscore) {
1378 // pre-underscore (pixel dimension)
1379 if(name[i]>='0' && name[i]<='9')
1380 count1++;
1381 else
1382 return 0;
1384 else {
1385 // post-underscore (hash?)
1386 if((name[i]>='0' && name[i]<='9') ||
1387 (name[i]>='a' && name[i]<='f'))
1389 count2++;
1391 else
1393 return 0;
1397 if(!found_underscore) return 0;
1398 if(count1<1 || count1>5) return 0;
1399 if(count2<1 || count2>16) return 0;
1400 return 1;
1403 static void do_per_dir_entry_format_detection(deark *c, lctx *d, struct dir_entry_info *dei)
1405 size_t nlen;
1407 if(dei->entry_type==OBJTYPE_EMPTY) return;
1408 if(d->subformat_req!=SUBFMT_AUTO) return;
1409 if(!d->could_be_thumbsdb) return;
1411 if(dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1412 if(de_memcmp(dei->clsid, EMPTY_CLSID, 16)) {
1413 d->could_be_thumbsdb = 0;
1414 return;
1418 if(dei->entry_type==OBJTYPE_STORAGE) {
1419 // Thumbs.db files aren't expected to have any Storage objects.
1420 d->could_be_thumbsdb = 0;
1421 return;
1423 if(dei->entry_type!=OBJTYPE_STREAM) {
1424 return;
1427 nlen = dei->fname_srd->sz_utf8_strlen;
1428 if(nlen<1 || nlen>21) {
1429 d->could_be_thumbsdb = 0;
1430 return;
1433 if(dei->is_thumbsdb_catalog) {
1434 d->thumbsdb_catalog_found++;
1435 return;
1438 if(is_thumbsdb_orig_name(c, d, dei->fname_srd->sz_utf8, nlen)) {
1439 d->thumbsdb_old_names_found++;
1440 return;
1443 if(is_thumbsdb_new_name(c, d, dei->fname_srd->sz_utf8, nlen)) {
1444 d->thumbsdb_new_names_found++;
1445 return;
1449 // Caller supplies and initializes buf
1450 static void identify_clsid(deark *c, lctx *d, const u8 *clsid, char *buf, size_t buflen)
1452 const char *name = "?";
1453 size_t i;
1455 for(i=0; i<DE_ARRAYCOUNT(known_clsids); i++) {
1456 u8 tmpclsid[16];
1457 unsigned int k;
1458 const struct clsid_id_struct *ci = &known_clsids[i];
1460 de_memcpy(tmpclsid, clsid, 16);
1461 for(k=0; k<16; k++) {
1462 if((ci->mask & (1<<(15-k)))==0) {
1463 tmpclsid[k] = 0x00;
1466 if(!de_memcmp(tmpclsid, ci->clsid, 16)) {
1467 name = ci->name;
1468 break;
1471 de_snprintf(buf, buflen, " (%s)", name);
1474 static void do_process_stream(deark *c, lctx *d, struct dir_entry_info *dei)
1476 int saved_indent_level;
1477 de_finfo *fi_raw = NULL; // Use this if we extract the raw stream
1478 de_finfo *fi_tmp = NULL; // Can be used by format-specific code
1479 de_ucstring *fn_raw = NULL; // Use this if we extract the raw stream
1480 de_ucstring *fn_tmp = NULL; // Can be used by format-specific code
1481 dbuf *firstpart = NULL;
1482 int is_thumbsdb_stream = 0;
1483 int is_OfficeArtStream = 0;
1484 int is_summaryinfo = 0;
1485 int is_propset = 0;
1486 int is_namedThumbnail = 0;
1487 int is_namedImages = 0;
1488 int is_root = (dei->parent_id==0);
1490 de_dbg_indent_save(c, &saved_indent_level);
1492 // By default, use the "stream name" as the filename.
1493 fn_raw = ucstring_create(c);
1495 if(dei->parent_id>0 && d->dir_entry[dei->parent_id].path) {
1496 ucstring_append_ucstring(fn_raw, d->dir_entry[dei->parent_id].path);
1497 ucstring_append_sz(fn_raw, "/", DE_ENCODING_LATIN1);
1500 ucstring_append_ucstring(fn_raw, dei->fname_srd->str);
1501 fn_tmp = ucstring_clone(fn_raw);
1503 fi_raw = de_finfo_create(c);
1504 fi_tmp = de_finfo_create(c);
1506 // By default, use the mod time from the directory entry.
1507 if(dei->mod_time.is_valid) {
1508 fi_raw->timestamp[DE_TIMESTAMPIDX_MODIFY] = dei->mod_time; // struct copy
1509 fi_tmp->timestamp[DE_TIMESTAMPIDX_MODIFY] = dei->mod_time; // struct copy
1512 if(d->extract_raw_streams) {
1513 dbuf *outf = NULL;
1515 de_finfo_set_name_from_ucstring(c, fi_raw, fn_raw, DE_SNFLAG_FULLPATH);
1516 fi_raw->original_filename_flag = 1;
1518 outf = dbuf_create_output_file(c, NULL, fi_raw, 0);
1519 copy_any_stream_to_dbuf(c, d, dei, 0, dei->stream_size, outf);
1520 dbuf_close(outf);
1523 if(!d->decode_streams) goto done;
1525 // Read the first part of the stream, to use for format detection.
1526 firstpart = dbuf_create_membuf(c, 256, 0x1);
1527 copy_any_stream_to_dbuf(c, d, dei, 0,
1528 (dei->stream_size>256)?256:dei->stream_size, firstpart);
1530 // Stream type detection
1532 // FIXME? The stream detection happens even if d->subformat_req==SUBFMT_RAW.
1533 // We probably should have different detection logic in that case.
1535 if(!de_strcasecmp(dei->fname_srd->sz_utf8, "\x05" "SummaryInformation")) {
1536 is_propset = 1;
1537 is_summaryinfo = 1;
1539 else if(!de_strncmp(dei->fname_srd->sz_utf8, "\x05", 1)) {
1540 // TODO: Is there a good way to tell whether a stream is a property set?
1541 is_propset = 1;
1543 else if(d->subformat_final==SUBFMT_TIFF37680 &&
1544 !de_strcasecmp(dei->fname_srd->sz_utf8, "CONTENTS"))
1546 // TODO: This is not the only place to find a "CONTENTS" stream.
1547 is_propset = 1;
1549 else if(d->subformat_final==SUBFMT_THUMBSDB) {
1550 is_thumbsdb_stream = 1;
1552 else if(!de_strcasecmp(dei->fname_srd->sz_utf8, "Pictures")) {
1553 // This stream often appears in PPT documents.
1554 is_OfficeArtStream = 1;
1556 else if(!de_strcasecmp(dei->fname_srd->sz_utf8, "EscherStm")) {
1557 is_OfficeArtStream = 1;
1559 else if(!de_strcasecmp(dei->fname_srd->sz_utf8, "EscherDelayStm")) {
1560 // Found in MS Publisher, and probably other formats.
1561 is_OfficeArtStream = 1;
1563 else if(!de_strcasecmp(dei->fname_srd->sz_utf8, "Thumbnail")) {
1564 is_namedThumbnail = 1;
1566 else if(!de_strcasecmp(dei->fname_srd->sz_utf8, "Images")) {
1567 is_namedImages = 1;
1570 if(is_OfficeArtStream) {
1571 unsigned int rectype;
1572 rectype = (unsigned int)dbuf_getu16le(firstpart, 2);
1573 if((rectype&0xf000)!=0xf000) {
1574 is_OfficeArtStream = 0;
1578 // End of stream type detection
1580 if(is_propset) {
1581 do_cfb_olepropertyset(c, d, dei, is_summaryinfo, is_root);
1583 else if(is_thumbsdb_stream) {
1584 do_extract_stream_to_file_thumbsdb(c, d, dei, fi_tmp, fn_tmp, firstpart);
1586 else if(is_OfficeArtStream) {
1587 do_OfficeArtStream(c, d, dei);
1589 else if(is_namedThumbnail) {
1590 do_StreamNamedThumbnail(c, d, dei);
1592 else if(is_namedImages) {
1593 do_StreamNamedImages(c, d, dei);
1596 done:
1597 de_dbg_indent_restore(c, saved_indent_level);
1598 dbuf_close(firstpart);
1599 ucstring_destroy(fn_raw);
1600 ucstring_destroy(fn_tmp);
1601 de_finfo_destroy(c, fi_raw);
1602 de_finfo_destroy(c, fi_tmp);
1606 // Read information about a directory entry. Do not print anything about it.
1607 static void do_read_dir_entry(deark *c, lctx *d, i64 dir_entry_idx, i64 dir_entry_offs)
1609 i64 name_len_bytes;
1610 i64 raw_sec_id;
1611 struct dir_entry_info *dei = NULL;
1613 if(!d->dir_entry) goto done; // error
1614 dei = &d->dir_entry[dir_entry_idx];
1616 dei->pass = 3; // Default pass in which to process this entry
1618 dei->entry_type = dbuf_getbyte(d->dir, dir_entry_offs+66);
1619 switch(dei->entry_type) {
1620 case OBJTYPE_EMPTY: dei->entry_type_name="empty"; break;
1621 case OBJTYPE_STORAGE: dei->entry_type_name="storage object"; break;
1622 case OBJTYPE_STREAM: dei->entry_type_name="stream"; break;
1623 case OBJTYPE_ROOT_STORAGE: dei->entry_type_name="root storage object"; break;
1624 default: dei->entry_type_name="?";
1627 if(dei->entry_type==OBJTYPE_EMPTY) goto done;
1629 dei->name_len_raw = dbuf_getu16le(d->dir, dir_entry_offs+64);
1631 name_len_bytes = dei->name_len_raw-2; // Ignore the trailing U+0000
1632 if(name_len_bytes<0) name_len_bytes = 0;
1634 dei->fname_srd = dbuf_read_string(d->dir, dir_entry_offs, name_len_bytes, name_len_bytes,
1635 DE_CONVFLAG_WANT_UTF8, DE_ENCODING_UTF16LE);
1637 dei->node_color = dbuf_getbyte(d->dir, dir_entry_offs+67);
1639 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_STREAM) {
1640 dei->sibling_id[0] = (i32)dbuf_geti32le(d->dir, dir_entry_offs+68);
1641 dei->sibling_id[1] = (i32)dbuf_geti32le(d->dir, dir_entry_offs+72);
1644 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1645 dei->child_id = (i32)dbuf_geti32le(d->dir, dir_entry_offs+76);
1648 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1649 dbuf_read(d->dir, dei->clsid, dir_entry_offs+80, 16);
1650 fmtutil_guid_to_uuid(dei->clsid);
1653 read_and_cvt_timestamp(c, d->dir, dir_entry_offs+108, &dei->mod_time);
1655 raw_sec_id = dbuf_geti32le(d->dir, dir_entry_offs+116);
1657 if(d->major_ver<=3) {
1658 dei->stream_size = dbuf_getu32le(d->dir, dir_entry_offs+120);
1660 else {
1661 dei->stream_size = dbuf_geti64le(d->dir, dir_entry_offs+120);
1664 dei->is_mini_stream = (dei->entry_type==OBJTYPE_STREAM) && (dei->stream_size < d->std_stream_min_size);
1666 if(dei->is_mini_stream) {
1667 dei->minisec_id = raw_sec_id;
1669 else {
1670 dei->normal_sec_id = raw_sec_id;
1673 if((d->subformat_req==SUBFMT_THUMBSDB || d->subformat_req==SUBFMT_AUTO) &&
1674 !de_strcmp(dei->fname_srd->sz_utf8, "Catalog"))
1676 dei->is_thumbsdb_catalog = 1;
1677 if(d->decode_streams) dei->pass = 2;
1680 if(dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1681 dei->pass = 1;
1684 do_per_dir_entry_format_detection(c, d, dei);
1686 done:
1690 // Process an directory entry from the d->dir stream, that has previously been
1691 // read into the d->dir_entry array.
1692 static void do_process_dir_entry(deark *c, lctx *d, i64 dir_entry_idx)
1694 struct dir_entry_info *dei = NULL;
1695 char clsid_string[50];
1696 char buf[80];
1698 if(!d->dir_entry) return; // error
1699 dei = &d->dir_entry[dir_entry_idx];
1701 de_dbg(c, "type: 0x%02x (%s)", (unsigned int)dei->entry_type, dei->entry_type_name);
1702 if(dei->entry_type==OBJTYPE_EMPTY) goto done;
1704 de_dbg2(c, "name len: %d bytes", (int)dei->name_len_raw);
1705 de_dbg(c, "name: \"%s\"", ucstring_getpsz(dei->fname_srd->str));
1706 de_dbg(c, "node color: %u", (unsigned int)dei->node_color);
1708 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_STREAM) {
1709 de_dbg(c, "sibling StreamIDs: %d, %d", (int)dei->sibling_id[0], (int)dei->sibling_id[1]);
1712 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1713 de_dbg(c, "child StreamID: %d", (int)dei->child_id);
1716 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_STREAM) {
1717 de_dbg(c, "parent: %d", (int)dei->parent_id);
1720 if(dei->entry_type==OBJTYPE_STORAGE || dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1721 buf[0] = '\0';
1722 if(dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1723 identify_clsid(c, d, dei->clsid, buf, sizeof(buf));
1726 fmtutil_render_uuid(c, dei->clsid, clsid_string, sizeof(clsid_string));
1727 de_dbg(c, "%sclsid: {%s}%s", (dei->entry_type==OBJTYPE_ROOT_STORAGE)?"root ":"",
1728 clsid_string, buf);
1731 dbg_timestamp(c, &dei->mod_time, "mod time");
1733 de_dbg(c, "stream size: %"I64_FMT, dei->stream_size);
1735 if(dei->is_mini_stream) {
1736 de_dbg(c, "first MiniSecID: %d", (int)dei->minisec_id);
1738 else {
1739 describe_sec_id(c, d, dei->normal_sec_id, buf, sizeof(buf));
1740 de_dbg(c, "first SecID: %d (%s)", (int)dei->normal_sec_id, buf);
1743 if(dei->entry_type==OBJTYPE_ROOT_STORAGE) {
1744 read_mini_sector_stream(c, d, dei->normal_sec_id, dei->stream_size);
1746 else if(dei->is_thumbsdb_catalog && d->decode_streams) {
1747 // TODO: Move this to do_process_stream()?
1748 read_thumbsdb_catalog(c, d, dei);
1750 else if(dei->entry_type==OBJTYPE_STREAM) {
1751 do_process_stream(c, d, dei);
1754 done:
1758 static void do_directory(deark *c, lctx *d)
1760 i64 i;
1761 int pass;
1762 int saved_indent_level;
1764 de_dbg_indent_save(c, &saved_indent_level);
1766 de_dbg(c, "reading directory entries");
1767 do_before_reading_directory_entries(c, d);
1768 for(i=0; i<d->num_dir_entries; i++) {
1769 i64 dir_entry_offs = 128*i;
1770 do_read_dir_entry(c, d, i, dir_entry_offs);
1773 de_dbg(c, "decoding directory structure");
1774 do_analyze_dir_structure(c, d);
1776 de_dbg(c, "detecting format");
1777 do_finalize_format_detection(c, d);
1779 de_dbg(c, "processing directory entries");
1780 de_dbg_indent(c, 1);
1781 for(pass=1; pass<=3; pass++) {
1782 de_dbg2(c, "[pass %d]", pass);
1783 for(i=0; i<d->num_dir_entries; i++) {
1784 if(d->dir_entry[i].pass == pass) {
1785 de_dbg(c, "directory entry, StreamID=%d", (int)i);
1786 de_dbg_indent(c, 1);
1787 do_process_dir_entry(c, d, i);
1788 de_dbg_indent(c, -1);
1793 de_dbg_indent_restore(c, saved_indent_level);
1796 static void de_run_cfb_internal(deark *c, lctx *d)
1798 do_init_format_detection(c, d);
1800 if(!do_header(c, d)) {
1801 goto done;
1804 read_difat(c, d);
1806 read_fat(c, d);
1808 read_minifat(c, d);
1810 read_directory_stream(c, d);
1812 do_directory(c, d);
1814 done:
1815 dbuf_close(d->difat);
1816 dbuf_close(d->fat);
1817 dbuf_close(d->minifat);
1818 dbuf_close(d->dir);
1819 if(d->dir_entry) {
1820 i64 k;
1821 for(k=0; k<d->num_dir_entries; k++) {
1822 de_destroy_stringreaderdata(c, d->dir_entry[k].fname_srd);
1823 ucstring_destroy(d->dir_entry[k].path);
1825 de_free(c, d->dir_entry);
1827 dbuf_close(d->mini_sector_stream);
1828 if(d->thumbsdb_catalog) {
1829 i64 k;
1830 for(k=0; k<d->thumbsdb_catalog_num_entries; k++) {
1831 de_destroy_stringreaderdata(c, d->thumbsdb_catalog[k].fname_srd);
1833 de_free(c, d->thumbsdb_catalog);
1834 d->thumbsdb_catalog = NULL;
1838 static void de_run_cfb(deark *c, de_module_params *mparams)
1840 lctx *d = NULL;
1841 const char *cfbfmt_opt;
1843 d = de_malloc(c, sizeof(lctx));
1844 d->decode_streams = 1;
1845 d->subformat_req = SUBFMT_AUTO;
1847 if(de_get_ext_option(c, "cfb:extractstreams")) {
1848 d->extract_raw_streams = 1;
1849 d->decode_streams = 0;
1851 if(de_get_ext_option(c, "cfb:dumpdir")) {
1852 d->dump_dir_structure = 1; // A low-level debugging feature
1855 if(de_havemodcode(c, mparams, 'T')) {
1856 // TIFF tag 37680 mode
1857 // TODO: Handle 'OLE Property Set Storage' more generally.
1858 d->subformat_req = SUBFMT_TIFF37680;
1861 if(d->subformat_req == SUBFMT_AUTO) {
1862 // If we haven't set subformat_req yet, look at the command-line option
1864 cfbfmt_opt = de_get_ext_option(c, "cfb:fmt");
1865 if(cfbfmt_opt) {
1866 if(!de_strcmp(cfbfmt_opt, "auto")) {
1867 d->subformat_req = SUBFMT_AUTO;
1869 else if(!de_strcmp(cfbfmt_opt, "raw")) {
1870 d->subformat_req = SUBFMT_RAW;
1872 else if(!de_strcmp(cfbfmt_opt, "thumbsdb")) {
1873 d->subformat_req = SUBFMT_THUMBSDB;
1878 de_run_cfb_internal(c, d);
1880 de_free(c, d);
1883 static int de_identify_cfb(deark *c)
1885 if(!dbuf_memcmp(c->infile, 0, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8))
1886 return 100;
1887 return 0;
1890 static void de_help_cfb(deark *c)
1892 de_msg(c, "-opt cfb:extractstreams : Extract raw streams, instead of decoding");
1893 de_msg(c, "-opt cfb:fmt=raw : Do not try to detect the document type");
1894 de_msg(c, "-opt cfb:fmt=thumbsdb : Assume Thumbs.db format");
1895 de_msg(c, "-opt cfb:msrgbamode=0 : Disable special processing of nonstandard-"
1896 "JPEG Thumbs.db thumbnails");
1899 void de_module_cfb(deark *c, struct deark_module_info *mi)
1901 mi->id = "cfb";
1902 mi->desc = "Microsoft Compound File Binary File";
1903 mi->run_fn = de_run_cfb;
1904 mi->identify_fn = de_identify_cfb;
1905 mi->help_fn = de_help_cfb;