1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Compound File Binary File Format
6 // a.k.a. "OLE Compound Document Format", and a million other names
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_cfb
);
13 #define OBJTYPE_EMPTY 0x00
14 #define OBJTYPE_STORAGE 0x01
15 #define OBJTYPE_STREAM 0x02
16 #define OBJTYPE_ROOT_STORAGE 0x05
18 struct dir_entry_info
{
19 // Relative order in which to process this entry
20 // 1 = For the root storage object (for the mini sector stream)
21 // 2 = Other high priority streams
22 // 3 = Normal dir entries
29 i64 normal_sec_id
; // First SecID, valid if is_mini_stream==0
30 i64 minisec_id
; // First MiniSecID, valid if is_mini_stream==1
31 struct de_stringreaderdata
*fname_srd
;
33 struct de_timestamp mod_time
;
35 const char *entry_type_name
;
41 i32 parent_id
; // If parent_id==0, entry is in root dir.
42 de_ucstring
*path
; // Full dir path. Used by non-root STORAGE objects.
44 u8 is_thumbsdb_catalog
;
47 struct thumbsdb_catalog_entry
{
49 struct de_stringreaderdata
*fname_srd
;
50 struct de_timestamp mod_time
;
53 typedef struct localctx_struct
{
56 #define SUBFMT_THUMBSDB 2
57 #define SUBFMT_TIFF37680 3
60 int thumbsdb_msrgba_mode
;
61 u8 extract_raw_streams
;
63 u8 dump_dir_structure
;
64 i64 minor_ver
, major_ver
;
66 //i64 num_dir_sectors;
69 i64 std_stream_min_size
;
70 i64 first_minifat_sec_id
;
71 i64 num_minifat_sectors
;
73 i64 first_difat_sec_id
;
74 i64 num_difat_sectors
;
78 // The DIFAT is an array of the secIDs that contain the FAT.
79 // It is stored in a linked list of sectors, except that the first
80 // 109 array entries are stored in the header.
81 // After that, the last 4 bytes of each sector are the SecID of the
82 // sector containing the next part of the DIFAT, and the remaining
83 // bytes are the payload data.
86 // The FAT is an array of "next sectors". Given a SecID, it will tell you
87 // the "next" SecID in the stream that uses that sector, or it may have
88 // a special code that means "end of chain", etc.
89 // All the bytes of a FAT sector are used for payload data.
92 dbuf
*minifat
; // mini sector allocation table
94 struct dir_entry_info
*dir_entry
; // array[num_dir_entries]
95 dbuf
*mini_sector_stream
;
97 i64 thumbsdb_catalog_num_entries
;
98 struct thumbsdb_catalog_entry
*thumbsdb_catalog
;
100 int could_be_thumbsdb
;
101 int thumbsdb_old_names_found
;
102 int thumbsdb_new_names_found
;
103 int thumbsdb_catalog_found
;
106 struct clsid_id_struct
{
112 static const struct clsid_id_struct known_clsids
[] = {
113 {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, 0xffff, 0, "n/a"}, // This must be first.
114 {{0x00,0x02,0x08,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "Excel?"},
115 {{0x00,0x02,0x09,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "MS Word?"},
116 {{0x00,0x02,0x0d,0x0b,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "Outlook item?"},
117 {{0x00,0x02,0x12,0x01,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MS Publisher?"},
118 {{0x00,0x02,0x13,0x03,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MS Works WDB?"},
119 {{0x00,0x02,0x1a,0x00,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xefff, 0, "Visio?"},
120 {{0x00,0x06,0xf0,0x46,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "Outlook item?"},
121 {{0x00,0x0c,0x10,0x84,0x00,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x46}, 0xffff, 0, "MSI?"},
122 {{0x1c,0xdd,0x8c,0x7b,0x81,0xc0,0x45,0xa0,0x9f,0xed,0x04,0x14,0x31,0x44,0xcc,0x1e}, 0xffff, 0, "3ds Max?"},
123 {{0x56,0x61,0x67,0x00,0xc1,0x54,0x11,0xce,0x85,0x53,0x00,0xaa,0x00,0xa1,0xf9,0x5b}, 0xffff, 0, "FlashPix?"},
124 {{0x64,0x81,0x8d,0x10,0x4f,0x9b,0x11,0xcf,0x86,0xea,0x00,0xaa,0x00,0xb9,0x29,0xe8}, 0xffff, 0, "PowerPoint?"}
126 #define EMPTY_CLSID (known_clsids[0].clsid)
128 static i64
sec_id_to_offset(deark
*c
, lctx
*d
, i64 sec_id
)
130 if(sec_id
<0) return 0;
131 return d
->sec_size
+ sec_id
* d
->sec_size
;
134 static i64
get_next_sec_id(deark
*c
, lctx
*d
, i64 cur_sec_id
)
138 if(cur_sec_id
< 0) return -2;
139 if(!d
->fat
) return -2;
140 next_sec_id
= dbuf_geti32le(d
->fat
, cur_sec_id
*4);
144 static i64
get_next_minisec_id(deark
*c
, lctx
*d
, i64 cur_minisec_id
)
148 if(cur_minisec_id
< 0) return -2;
149 if(!d
->minifat
) return -2;
150 next_minisec_id
= dbuf_geti32le(d
->minifat
, cur_minisec_id
*4);
151 return next_minisec_id
;
154 static void describe_sec_id(deark
*c
, lctx
*d
, i64 sec_id
,
155 char *buf
, size_t buf_len
)
160 sec_offset
= sec_id_to_offset(c
, d
, sec_id
);
161 de_snprintf(buf
, buf_len
, "offs=%d", (int)sec_offset
);
163 else if(sec_id
== -1) {
164 de_strlcpy(buf
, "free", buf_len
);
166 else if(sec_id
== -2) {
167 de_strlcpy(buf
, "end of chain", buf_len
);
169 else if(sec_id
== -3) {
170 de_strlcpy(buf
, "FAT SecID", buf_len
);
172 else if(sec_id
== -4) {
173 de_strlcpy(buf
, "DIFAT SecID", buf_len
);
176 de_strlcpy(buf
, "?", buf_len
);
180 // Copy a stream (with a known byte size) to a dbuf.
181 static void copy_normal_stream_to_dbuf(deark
*c
, lctx
*d
, i64 first_sec_id
,
182 i64 stream_startpos
, i64 stream_size
,
186 i64 bytes_left_to_copy
;
187 i64 bytes_left_to_skip
;
189 if(stream_size
<=0) return;
190 if(stream_startpos
+stream_size
> c
->infile
->len
) {
191 // This is a not-too-strict emergency brake. If the file has been
192 // truncated, we might still be able to process some of the data
194 stream_size
= c
->infile
->len
- stream_startpos
;
197 bytes_left_to_copy
= stream_size
;
198 bytes_left_to_skip
= stream_startpos
;
199 sec_id
= first_sec_id
;
200 while(bytes_left_to_copy
> 0) {
206 sec_offs
= sec_id_to_offset(c
, d
, sec_id
);
208 bytes_to_skip
= bytes_left_to_skip
;
209 if(bytes_to_skip
> d
->sec_size
) bytes_to_skip
= d
->sec_size
;
211 bytes_to_copy
= d
->sec_size
- bytes_to_skip
;
212 if(bytes_to_copy
> bytes_left_to_copy
) bytes_to_copy
= bytes_left_to_copy
;
214 dbuf_copy(c
->infile
, sec_offs
+ bytes_to_skip
, bytes_to_copy
, outf
);
216 bytes_left_to_copy
-= bytes_to_copy
;
217 bytes_left_to_skip
-= bytes_to_skip
;
218 sec_id
= get_next_sec_id(c
, d
, sec_id
);
222 // Same as copy_normal_stream_to_dbuf(), but for mini streams.
223 static void copy_mini_stream_to_dbuf(deark
*c
, lctx
*d
, i64 first_minisec_id
,
224 i64 stream_startpos
, i64 stream_size
,
228 i64 bytes_left_to_copy
;
229 i64 bytes_left_to_skip
;
231 if(!d
->mini_sector_stream
) return;
232 if(stream_size
<=0 || stream_size
>c
->infile
->len
||
233 stream_size
>d
->mini_sector_stream
->len
)
238 bytes_left_to_copy
= stream_size
;
239 bytes_left_to_skip
= stream_startpos
;
240 minisec_id
= first_minisec_id
;
241 while(bytes_left_to_copy
> 0) {
246 if(minisec_id
<0) break;
247 minisec_offs
= minisec_id
* d
->mini_sector_size
;
249 bytes_to_skip
= bytes_left_to_skip
;
250 if(bytes_to_skip
> d
->mini_sector_size
) bytes_to_skip
= d
->mini_sector_size
;
252 bytes_to_copy
= d
->mini_sector_size
- bytes_to_skip
;
253 if(bytes_to_copy
> bytes_left_to_copy
) bytes_to_copy
= bytes_left_to_copy
;
255 dbuf_copy(d
->mini_sector_stream
, minisec_offs
+ bytes_to_skip
, bytes_to_copy
, outf
);
257 bytes_left_to_copy
-= bytes_to_copy
;
258 bytes_left_to_skip
-= bytes_to_skip
;
259 minisec_id
= get_next_minisec_id(c
, d
, minisec_id
);
263 static void copy_any_stream_to_dbuf(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
264 i64 stream_startpos
, i64 stream_size
,
267 if(dei
->is_mini_stream
) {
268 copy_mini_stream_to_dbuf(c
, d
, dei
->minisec_id
, stream_startpos
, stream_size
, outf
);
271 copy_normal_stream_to_dbuf(c
, d
, dei
->normal_sec_id
, stream_startpos
, stream_size
, outf
);
275 static int do_header(deark
*c
, lctx
*d
)
280 i64 mini_sector_shift
;
284 de_dbg(c
, "header at %d", (int)pos
);
287 // offset 0-7: signature
288 // offset 8-23: CLSID
290 d
->minor_ver
= de_getu16le(pos
+24);
291 d
->major_ver
= de_getu16le(pos
+26);
292 de_dbg(c
, "format version: %d.%d", (int)d
->major_ver
, (int)d
->minor_ver
);
293 if(d
->major_ver
!=3 && d
->major_ver
!=4) {
294 de_err(c
, "Unsupported format version: %d", (int)d
->major_ver
);
298 byte_order_code
= de_getu16le(pos
+28);
299 if(byte_order_code
!= 0xfffe) {
300 de_err(c
, "Unsupported byte order code: 0x%04x", (unsigned int)byte_order_code
);
304 sector_shift
= de_getu16le(pos
+30); // aka ssz
305 d
->sec_size
= de_pow2(sector_shift
);
306 de_dbg(c
, "sector size: 2^%d (%d bytes)", (int)sector_shift
,
308 if(d
->sec_size
!=512 && d
->sec_size
!=4096) {
309 de_err(c
, "Unsupported sector size: %d", (int)d
->sec_size
);
313 mini_sector_shift
= de_getu16le(pos
+32); // aka sssz
314 d
->mini_sector_size
= de_pow2(mini_sector_shift
);
315 de_dbg(c
, "mini sector size: 2^%d (%d bytes)", (int)mini_sector_shift
,
316 (int)d
->mini_sector_size
);
317 if(d
->mini_sector_size
!=64) {
318 de_err(c
, "Unsupported mini sector size: %d", (int)d
->mini_sector_size
);
322 // offset 34: 6 reserved bytes
324 //d->num_dir_sectors = de_getu32le(pos+40);
325 //de_dbg(c, "number of directory sectors: %u", (unsigned int)d->num_dir_sectors);
326 // Should be 0 if major_ver==3
328 // Number of sectors used by sector allocation table (FAT)
329 d
->num_fat_sectors
= de_getu32le(pos
+44);
330 de_dbg(c
, "number of FAT sectors: %d", (int)d
->num_fat_sectors
);
332 d
->first_dir_sec_id
= de_geti32le(pos
+48);
333 describe_sec_id(c
, d
, d
->first_dir_sec_id
, buf
, sizeof(buf
));
334 de_dbg(c
, "first directory sector: %d (%s)", (int)d
->first_dir_sec_id
, buf
);
336 // offset 52, transaction signature number
338 d
->std_stream_min_size
= de_getu32le(pos
+56);
339 de_dbg(c
, "min size of a standard stream: %d", (int)d
->std_stream_min_size
);
341 // First sector of mini sector allocation table (MiniFAT)
342 d
->first_minifat_sec_id
= de_geti32le(pos
+60);
343 describe_sec_id(c
, d
, d
->first_minifat_sec_id
, buf
, sizeof(buf
));
344 de_dbg(c
, "first MiniFAT sector: %d (%s)", (int)d
->first_minifat_sec_id
, buf
);
346 // Number of sectors used by MiniFAT
347 d
->num_minifat_sectors
= de_getu32le(pos
+64);
348 de_dbg(c
, "number of MiniFAT sectors: %d", (int)d
->num_minifat_sectors
);
350 // SecID of first (extra?) sector of the DIFAT
351 // (also called the Master Sector Allocation Table (MSAT))
352 d
->first_difat_sec_id
= de_geti32le(pos
+68);
353 describe_sec_id(c
, d
, d
->first_difat_sec_id
, buf
, sizeof(buf
));
354 de_dbg(c
, "first extended DIFAT sector: %d (%s)", (int)d
->first_difat_sec_id
, buf
);
356 // Number of (extra?) sectors used by the DIFAT
357 d
->num_difat_sectors
= de_getu32le(pos
+72);
358 de_dbg(c
, "number of extended DIFAT sectors: %d", (int)d
->num_difat_sectors
);
360 // offset 76: 436 bytes of DIFAT data
364 de_dbg_indent(c
, -1);
368 // Read the locations of the FAT sectors
369 static void read_difat(deark
*c
, lctx
*d
)
377 de_dbg(c
, "reading DIFAT (total number of entries=%d)", (int)d
->num_fat_sectors
);
380 if(d
->num_fat_sectors
> 1000000) {
381 // TODO: Decide what limits to enforce.
382 d
->num_fat_sectors
= 1000000;
385 // Expecting d->num_fat_sectors in the DIFAT table
386 d
->difat
= dbuf_create_membuf(c
, d
->num_fat_sectors
* 4, 1);
388 still_to_read
= d
->num_fat_sectors
;
390 // Copy the part of the DIFAT that is in the header
391 num_to_read
= still_to_read
;
392 if(num_to_read
>109) num_to_read
= 109;
393 de_dbg(c
, "reading %d DIFAT entries from header, at 76", (int)num_to_read
);
394 dbuf_copy(c
->infile
, 76, num_to_read
*4, d
->difat
);
395 still_to_read
-= num_to_read
;
397 difat_sec_id
= d
->first_difat_sec_id
;
398 while(still_to_read
>0) {
399 if(difat_sec_id
<0) break;
401 difat_sec_offs
= sec_id_to_offset(c
, d
, difat_sec_id
);
402 de_dbg(c
, "reading DIFAT sector at %d", (int)difat_sec_offs
);
403 num_to_read
= (d
->sec_size
- 4)/4;
405 dbuf_copy(c
->infile
, difat_sec_offs
, num_to_read
*4, d
->difat
);
406 still_to_read
-= num_to_read
;
407 difat_sec_id
= de_geti32le(difat_sec_offs
+ num_to_read
*4);
410 de_dbg_indent(c
, -1);
413 static void dump_fat(deark
*c
, lctx
*d
)
419 if(c
->debug_level
<2) return;
421 de_dbg2(c
, "dumping FAT contents (%d entries)", (int)d
->num_fat_entries
);
424 for(i
=0; i
<d
->num_fat_entries
; i
++) {
425 sec_id
= dbuf_geti32le(d
->fat
, i
*4);
426 describe_sec_id(c
, d
, sec_id
, buf
, sizeof(buf
));
427 de_dbg2(c
, "FAT[%d]: next_SecID=%d (%s)", (int)i
, (int)sec_id
, buf
);
429 de_dbg_indent(c
, -1);
432 // Read the contents of the FAT sectors
433 static void read_fat(deark
*c
, lctx
*d
)
440 d
->fat
= dbuf_create_membuf(c
, d
->num_fat_sectors
* d
->sec_size
, 1);
442 de_dbg(c
, "reading FAT contents (%d sectors)", (int)d
->num_fat_sectors
);
444 for(i
=0; i
<d
->num_fat_sectors
; i
++) {
445 sec_id
= dbuf_geti32le(d
->difat
, i
*4);
446 sec_offset
= sec_id_to_offset(c
, d
, sec_id
);
447 describe_sec_id(c
, d
, sec_id
, buf
, sizeof(buf
));
448 de_dbg(c
, "reading sector: DIFAT_idx=%d, SecID=%d (%s)",
449 (int)i
, (int)sec_id
, buf
);
450 dbuf_copy(c
->infile
, sec_offset
, d
->sec_size
, d
->fat
);
452 de_dbg_indent(c
, -1);
454 d
->num_fat_entries
= d
->fat
->len
/4;
458 static void dump_minifat(deark
*c
, lctx
*d
)
462 i64 num_minifat_entries
;
464 if(c
->debug_level
<2) return;
465 if(!d
->minifat
) return;
467 num_minifat_entries
= d
->minifat
->len
/ 4;
468 de_dbg2(c
, "dumping MiniFAT contents (%d entries)", (int)num_minifat_entries
);
471 for(i
=0; i
<num_minifat_entries
; i
++) {
472 sec_id
= dbuf_geti32le(d
->minifat
, i
*4);
473 de_dbg2(c
, "MiniFAT[%d]: next_MiniSecID=%d", (int)i
, (int)sec_id
);
475 de_dbg_indent(c
, -1);
478 // Read the contents of the MiniFAT sectors into d->minifat
479 static void read_minifat(deark
*c
, lctx
*d
)
486 if(d
->num_minifat_sectors
> 1000000) {
487 // TODO: Decide what limits to enforce.
488 d
->num_minifat_sectors
= 1000000;
491 d
->minifat
= dbuf_create_membuf(c
, d
->num_minifat_sectors
* d
->sec_size
, 1);
493 // TODO: Use copy_normal_stream_to_dbuf
494 de_dbg(c
, "reading MiniFAT contents (%d sectors)", (int)d
->num_minifat_sectors
);
497 sec_id
= d
->first_minifat_sec_id
;
499 for(i
=0; i
<d
->num_minifat_sectors
; i
++) {
502 sec_offset
= sec_id_to_offset(c
, d
, sec_id
);
503 describe_sec_id(c
, d
, sec_id
, buf
, sizeof(buf
));
504 de_dbg(c
, "reading MiniFAT sector #%d, SecID=%d (%s), MiniSecIDs %d-%d",
505 (int)i
, (int)sec_id
, buf
,
506 (int)(i
*(d
->sec_size
/4)), (int)((i
+1)*(d
->sec_size
/4)-1));
507 dbuf_copy(c
->infile
, sec_offset
, d
->sec_size
, d
->minifat
);
509 sec_id
= get_next_sec_id(c
, d
, sec_id
);
511 de_dbg_indent(c
, -1);
516 // Returns -1 if not a valid name
517 static i64
stream_name_to_catalog_id(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
523 nlen
= dei
->fname_srd
->sz_utf8_strlen
;
524 if(nlen
>sizeof(buf
)-1) return -1;
526 for(i
=0; i
<nlen
; i
++) {
527 // Name should contain only digits
528 if(dei
->fname_srd
->sz_utf8
[i
]<'0' || dei
->fname_srd
->sz_utf8
[i
]>'9') return -1;
530 // The stream name is the *reversed* string form of the ID number.
531 // (I assume this is to try to keep the directory tree structure balanced.)
532 buf
[nlen
-1-i
] = dei
->fname_srd
->sz_utf8
[i
];
536 return de_atoi64(buf
);
539 // Returns an index into d->thumbsdb_catalog.
540 // Returns -1 if not found.
541 static i64
lookup_thumbsdb_catalog_entry(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
546 if(d
->thumbsdb_catalog_num_entries
<1 || !d
->thumbsdb_catalog
) return -1;
547 if(!dei
->fname_srd
|| !dei
->fname_srd
->str
) return -1;
549 id
= stream_name_to_catalog_id(c
, d
, dei
);
552 for(i
=0; i
<d
->thumbsdb_catalog_num_entries
; i
++) {
553 if(d
->thumbsdb_catalog
[i
].id
== id
)
559 // This function tries to better handle a special nonstandard JPEG thumbnail format
560 // that I'm calling MSRGBA.
561 // We can't *really* handle it, because Deark doesn't decompress lossy formats, and
562 // AFAIK there is no standard format that we can losslessly convert it to.
563 // What we can do is add the missing quantization and Huffman tables, and add a
564 // custom segment to help identify the format.
565 // This should allow most JPEG viewers to decode the image, though most will guess
566 // it is CMYK, and display the colors all wrong (often all black).
567 // Note that the component ID numbers are ASCII 'R','G','B','A'.
568 // Based on my (possibly wrong) analysis, the 'R' channel is blue(!), 'G' is green,
569 // 'B' is red, and 'A' can be either opacity, or unused (dunno how to tell which).
571 // hdrsize is the length of just the first header.
572 // Returns 0 if nothing was extracted.
573 static int thumbsdb_msrgba_special_extract(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
574 i64 hdrsize
, dbuf
*outf
)
576 static const u8 qtable0
[69] = {
577 0xff,0xdb,0x00,0x43,0x00,0x08,0x06,0x06,0x07,0x06,0x05,0x08,0x07,0x07,0x07,0x09,
578 0x09,0x08,0x0a,0x0c,0x14,0x0d,0x0c,0x0b,0x0b,0x0c,0x19,0x12,0x13,0x0f,0x14,0x1d,
579 0x1a,0x1f,0x1e,0x1d,0x1a,0x1c,0x1c,0x20,0x24,0x2e,0x27,0x20,0x22,0x2c,0x23,0x1c,
580 0x1c,0x28,0x37,0x29,0x2c,0x30,0x31,0x34,0x34,0x34,0x1f,0x27,0x39,0x3d,0x38,0x32,
581 0x3c,0x2e,0x33,0x34,0x32};
582 static const u8 htables
[212] = {
583 0xff,0xc4,0x00,0xd2,0x00,0x00,0x01,0x05,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00,
584 0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,
585 0x0b,0x10,0x00,0x02,0x01,0x03,0x03,0x02,0x04,0x03,0x05,0x05,0x04,0x04,0x00,0x00,
586 0x01,0x7d,0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,
587 0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,
588 0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,
589 0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,
590 0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,
591 0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,
592 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,
593 0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
594 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
595 0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,
596 0xf7,0xf8,0xf9,0xfa};
600 if(dei
->stream_size
<hdrsize
+16+2+22) return 0;
601 inf_pos
= hdrsize
+16; // Also skip past the 16-byte extra header
604 copy_any_stream_to_dbuf(c
, d
, dei
, inf_pos
, 2, outf
);
607 // Special APP1 segment to record both headers, and identify the format.
608 dbuf_write(outf
, (const u8
*)"\xff\xe1", 2);
609 idseg_len
= 2 + 12 + 1 + hdrsize
+ 16;
610 dbuf_writeu16be(outf
, idseg_len
);
611 dbuf_write(outf
, (const u8
*)"Deark_MSRGBA\0", 13);
612 copy_any_stream_to_dbuf(c
, d
, dei
, 0, hdrsize
+16, outf
);
615 dbuf_write(outf
, qtable0
, sizeof(qtable0
));
616 // TODO: Do we ever need another quantization table?
619 // TODO?: This code is fragile. We could parse the JPEG data, instead of
620 // just hoping it is laid out like we expect.
621 copy_any_stream_to_dbuf(c
, d
, dei
, inf_pos
, 22, outf
);
625 dbuf_write(outf
, htables
, sizeof(htables
));
627 // The rest of the file
628 copy_any_stream_to_dbuf(c
, d
, dei
, inf_pos
, dei
->stream_size
-inf_pos
, outf
);
633 // Special handling of Thumbs.db files.
634 // Caller sets fi and tmpfn to default values. This function may modify them.
635 // firstpart = caller-supplied dbuf containing the first 256 or so bytes of the stream
636 static void do_extract_stream_to_file_thumbsdb(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
637 de_finfo
*fi
, de_ucstring
*tmpfn
, dbuf
*firstpart
)
646 i64 final_streamsize
;
649 if(dei
->is_thumbsdb_catalog
) {
650 // We've already read the catalog.
654 de_dbg(c
, "reading Thumbs.db stream");
658 final_streamsize
= dei
->stream_size
;
660 // A Thumbs.db stream typically has a header, followed by an embedded JPEG
661 // (or something) file.
663 catalog_idx
= lookup_thumbsdb_catalog_entry(c
, d
, dei
);
666 if(d
->thumbsdb_catalog
[catalog_idx
].mod_time
.is_valid
) {
667 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = d
->thumbsdb_catalog
[catalog_idx
].mod_time
; // struct copy
671 hdrsize
= dbuf_getu32le(firstpart
, 0);
672 de_dbg(c
, "header size: %d", (int)hdrsize
);
674 ver
= dbuf_getu32le(firstpart
, 4);
675 de_dbg(c
, "version: %d", (int)ver
);
677 // 0x0c = "Original format" Thumbs.db
678 // 0x18 = "Windows 7 format"
680 if((hdrsize
==0x0c || hdrsize
==0x18) && dei
->stream_size
>hdrsize
) {
684 reported_size
= dbuf_getu32le(firstpart
, 8);
685 de_dbg(c
, "reported size: %d", (int)reported_size
);
688 final_streamsize
-= hdrsize
;
689 de_dbg(c
, "calculated size: %d", (int)final_streamsize
);
691 if(catalog_idx
>=0 && c
->filenames_from_file
) {
692 de_dbg(c
, "name from catalog: \"%s\"",
693 ucstring_getpsz(d
->thumbsdb_catalog
[catalog_idx
].fname_srd
->str
));
695 // Replace the default name with the name from the catalog.
696 ucstring_empty(tmpfn
);
698 if(!de_strcasecmp(d
->thumbsdb_catalog
[catalog_idx
].fname_srd
->sz_utf8
,
699 "{A42CD7B6-E9B9-4D02-B7A6-288B71AD28BA}"))
701 ucstring_append_sz(tmpfn
, "_folder", DE_ENCODING_LATIN1
);
704 ucstring_append_ucstring(tmpfn
, d
->thumbsdb_catalog
[catalog_idx
].fname_srd
->str
);
708 dbuf_read(firstpart
, sig1
, hdrsize
, 4);
709 dbuf_read(firstpart
, sig2
, hdrsize
+16, 4);
711 if(sig1
[0]==0xff && sig1
[1]==0xd8) ext
= "jpg";
712 else if(sig1
[0]==0x89 && sig1
[1]==0x50) ext
= "png";
713 else if(sig1
[0]==0x01 && sig1
[1]==0x00 &&
714 sig2
[0]==0xff && sig2
[1]==0xd8)
716 // Looks like a nonstandard Microsoft RGBA JPEG.
722 ucstring_printf(tmpfn
, DE_ENCODING_LATIN1
, ".thumb.%s", ext
);
725 de_warn(c
, "Unidentified Thumbs.db stream \"%s\"",
726 ucstring_getpsz(dei
->fname_srd
->str
));
729 de_dbg_indent(c
, -1);
731 de_finfo_set_name_from_ucstring(c
, fi
, tmpfn
, 0);
732 fi
->original_filename_flag
= 1;
734 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
737 if(d
->thumbsdb_msrgba_mode
) {
739 ok
= thumbsdb_msrgba_special_extract(c
, d
, dei
, hdrsize
, outf
);
743 // "MSRGBA" thumbnails seem to have an additional 16-byte header,
744 // before the JPEG data starts. In this mode, we just ignore it.
746 final_streamsize
-= 16;
749 copy_any_stream_to_dbuf(c
, d
, dei
, startpos
, final_streamsize
, outf
);
755 static void do_OfficeArtStream(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
757 dbuf
*tmpstream
= NULL
;
759 de_dbg(c
, "OfficeArt stream, len=%"I64_FMT
, dei
->stream_size
);
761 tmpstream
= dbuf_create_membuf(c
, dei
->stream_size
, 0x1);
762 copy_any_stream_to_dbuf(c
, d
, dei
, 0, dei
->stream_size
, tmpstream
);
763 if(tmpstream
->len
< dei
->stream_size
) {
764 de_warn(c
, "OfficeArt stream might have been truncated");
767 de_run_module_by_id_on_slice2(c
, "officeart", NULL
, tmpstream
, 0, tmpstream
->len
);
768 de_dbg_indent(c
, -1);
769 dbuf_close(tmpstream
);
772 static void do_Corel_simple_image(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
779 de_bitmap
*img
= NULL
;
781 w
= dbuf_getu32le_p(f
, &pos
);
782 h
= dbuf_getu32le_p(f
, &pos
);
783 de_dbg_dimensions(c
, w
, h
);
784 if(!de_good_image_dimensions(c
, w
, h
)) goto done
;
786 img
= de_bitmap_create(c
, w
, h
, 1);
788 // TODO: I don't know whether this is the right way to interpret this
790 for(j
=0; j
<img
->height
; j
++) {
791 for(i
=0; i
<img
->width
; i
++) {
792 b
= dbuf_getbyte(f
, pos
+ j
*w
+ i
);
793 de_bitmap_setpixel_gray(img
, i
, j
, b
);
797 de_bitmap_write_to_file(img
, NULL
, DE_CREATEFLAG_FLIP_IMAGE
);
800 de_bitmap_destroy(img
);
803 // This is an object found in Corel Print House (.CPH) and similar files.
804 // This decoder is based on reverse engineering. It may be incorrect.
805 static void do_Corel_UIformat(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
806 dbuf
*f
, i64 pos1
, i64 len
, int is_thumb
)
813 i64 pal_offs
, img_offs
;
819 int saved_indent_level
;
820 de_bitmap
*img
= NULL
;
823 de_dbg_indent_save(c
, &saved_indent_level
);
825 if(dbuf_memcmp(f
, pos
, "UI\x00\x00", 4)) goto done
;
827 de_dbg(c
, "CorelUI at [%"I64_FMT
"], len=%"I64_FMT
, pos1
, len
);
831 pos
+= 4; // The size of the "RI" segment? Redundant?
834 hdr_len
= dbuf_getu32le_p(f
, &pos
);
835 // Apparently the size of the "UI" segment
836 if(hdr_len
!= 32) goto done
;
837 // TODO: More fields here
839 ri_pos
= pos1
+hdr_len
;
841 if(dbuf_memcmp(f
, pos
, "RI", 2)) goto done
;
843 ri_len
= dbuf_getu32le_p(f
, &pos
);
844 if(ri_pos
+ ri_len
> f
->len
) goto done
;
847 w
= dbuf_getu32le_p(f
, &pos
);
848 h
= dbuf_getu32le_p(f
, &pos
);
849 de_dbg_dimensions(c
, w
, h
);
851 pos
+= 4; // ? (observed 1)
853 bpp
= (int)dbuf_getu32le_p(f
, &pos
);
854 de_dbg(c
, "bits/pixel?: %d", bpp
);
855 rowspan
= dbuf_getu32le_p(f
, &pos
);
856 de_dbg(c
, "bytes/row?: %d", (int)rowspan
);
857 pixels_size
= dbuf_getu32le_p(f
, &pos
);
858 de_dbg(c
, "pixels size: %"I64_FMT
, pixels_size
);
860 pos
+= 8; // ? (density?)
862 pal_offs
= dbuf_getu32le_p(f
, &pos
);
863 de_dbg(c
, "pal offs: %"I64_FMT
, pal_offs
);
865 img_offs
= dbuf_getu32le_p(f
, &pos
);
866 de_dbg(c
, "img offs: %"I64_FMT
, img_offs
);
870 if(bpp
!=8 && bpp
!=24) goto done
;
873 de_make_grayscale_palette(pal
, 256, 0);
875 // This formula doesn't make sense to me, but seems to work.
876 pos
= ri_pos
+14+pal_offs
;
877 de_dbg(c
, "palette at [%"I64_FMT
"]", pos
);
879 pos
+= 2; // ? (observed 4, 5)
881 npalent
= dbuf_getu16le_p(f
, &pos
);
882 de_dbg(c
, "num pal entries: %d", (int)npalent
);
883 if(npalent
>256) goto done
;
885 de_read_palette_rgb(f
, pos
, npalent
, 3, pal
, 256, DE_GETRGBFLAG_BGR
);
886 de_dbg_indent(c
, -1);
890 if(!de_good_image_dimensions(c
, w
, h
)) goto done
;
892 img
= de_bitmap_create(c
, w
, h
, (bpp
<24 && pal_offs
==0)?1:3);
893 pos
= ri_pos
+14+img_offs
;
894 de_dbg(c
, "bitmap at [%"I64_FMT
"]", pos
);
896 de_convert_image_paletted(f
, pos
, 8, rowspan
, pal
, img
, 0);
899 de_convert_image_rgb(f
, pos
, rowspan
, 3, img
, DE_GETRGBFLAG_BGR
);
902 de_bitmap_write_to_file(img
, is_thumb
?"thumb":NULL
, DE_CREATEFLAG_FLIP_IMAGE
);
908 de_dbg(c
, "[unsupported image type]");
910 de_bitmap_destroy(img
);
911 de_dbg_indent_restore(c
, saved_indent_level
);
914 static void do_CorelImages_internal(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
918 int saved_indent_level
;
920 de_dbg_indent_save(c
, &saved_indent_level
);
923 unsigned int imgtype1_or_size
;
924 unsigned int imgtype1
;
927 if(pos
>= f
->len
-8) break;
929 de_dbg(c
, "image at [%"I64_FMT
"]", pos
);
932 // Seems like sometimes this first 'type' field is present, and
933 // sometimes it isn't (and we treat it like it's 0).
934 imgtype1_or_size
= (unsigned int)dbuf_getu32le_p(f
, &pos
);
935 if(imgtype1_or_size
<8) {
936 imgtype1
= imgtype1_or_size
;
939 size1
= dbuf_getu32le_p(f
, &pos
);
944 size1
= (i64
)imgtype1_or_size
;
947 de_dbg(c
, "low level imgtype: %u", imgtype1
);
950 unsigned int imgtype2
;
952 imgtype2
= (unsigned int)dbuf_getu32le_p(f
, &pos
);
953 de_dbg(c
, "high level imgtype: %u", imgtype2
);
954 de_dbg(c
, "len: %"I64_FMT
, size1
);
956 if(pos
+size1
> f
->len
) break;
958 if(imgtype2
==0) { // "Uncompressed Image"?
959 do_Corel_UIformat(c
, d
, dei
, f
, pos
, size1
, 0);
961 else if(imgtype2
==1) { // JPEG?
962 dbuf_create_file_from_slice(f
, pos
, size1
, "jpg", NULL
, 0);
965 de_dbg(c
, "[unsupported image type: %u:%u]", imgtype1
, imgtype2
);
968 else if(imgtype1
==1) {
969 size1
= dbuf_getu32le(f
, pos
+8);
970 do_Corel_simple_image(c
, d
, dei
, f
, pos
);
974 de_dbg(c
, "[unsupported image type (%u), can't continue]", imgtype1
);
979 de_dbg_indent(c
, -1);
983 de_dbg_indent_restore(c
, saved_indent_level
);
986 static void do_StreamNamedThumbnail(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
991 if(dei
->stream_size
<32 || dei
->stream_size
>DE_MAX_SANE_OBJECT_SIZE
) {
994 f
= dbuf_create_membuf(c
, 0, 0);
996 // Start by reading just a little, to figure out the data type
997 copy_any_stream_to_dbuf(c
, d
, dei
, 0, 16, f
);
998 size1
= dbuf_getu32le(f
, 0);
999 if(size1
+4 != dei
->stream_size
) goto done
;
1000 if(dbuf_memcmp(f
, 4, "UI\x00\x00", 4)) goto done
;
1002 copy_any_stream_to_dbuf(c
, d
, dei
, 16, dei
->stream_size
-16, f
);
1003 do_Corel_UIformat(c
, d
, dei
, f
, 4, size1
-4, 1);
1009 static void do_StreamNamedImages(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
1013 if(dei
->stream_size
<32 || dei
->stream_size
>DE_MAX_SANE_OBJECT_SIZE
) {
1016 f
= dbuf_create_membuf(c
, 0, 0);
1018 // Start by reading just a little, to figure out the data type
1019 copy_any_stream_to_dbuf(c
, d
, dei
, 0, 16, f
);
1020 if(dbuf_memcmp(f
, 4, "\x01\x00\x00\x00\xff\xd8\xff", 7) &&
1021 dbuf_memcmp(f
, 4, "\x00\x00\x00\x00\x55\x49\x00\x00", 8))
1023 // Not an "Images" stream we recognize.
1024 // TODO: Can we detect this if the first image is in "simple"
1029 // This is an object found in Corel Print House (.CPH) and similar files.
1030 copy_any_stream_to_dbuf(c
, d
, dei
, 16, dei
->stream_size
-16, f
);
1031 do_CorelImages_internal(c
, d
, dei
, f
);
1037 static void dbg_timestamp(deark
*c
, struct de_timestamp
*ts
, const char *field_name
)
1039 char timestamp_buf
[64];
1042 de_timestamp_to_string(ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
1043 de_dbg(c
, "%s: %s", field_name
, timestamp_buf
);
1047 static void read_and_cvt_timestamp(deark
*c
, dbuf
*f
, i64 pos
,
1048 struct de_timestamp
*ts
)
1052 de_zeromem(ts
, sizeof(struct de_timestamp
));
1053 ts_as_FILETIME
= dbuf_geti64le(f
, pos
);
1054 if(ts_as_FILETIME
!=0) {
1055 de_FILETIME_to_timestamp(ts_as_FILETIME
, ts
, 0x1);
1059 static int read_thumbsdb_catalog(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
1068 if(d
->thumbsdb_catalog
) return 0; // Already read a catalog
1070 de_dbg(c
, "reading thumbsdb catalog");
1071 de_dbg_indent(c
, 1);
1073 catf
= dbuf_create_membuf(c
, dei
->stream_size
, 0);
1074 copy_any_stream_to_dbuf(c
, d
, dei
, 0, dei
->stream_size
, catf
);
1076 item_len
= dbuf_getu16le(catf
, 0);
1077 de_dbg(c
, "header size: %d", (int)item_len
); // (?)
1078 if(item_len
!=16) goto done
;
1080 n
= dbuf_getu16le(catf
, 2);
1081 de_dbg(c
, "catalog version: %d", (int)n
); // (?)
1082 if(n
!=5 && n
!=6 && n
!=7) {
1083 de_warn(c
, "Unsupported Catalog version: %d", (int)n
);
1087 d
->thumbsdb_catalog_num_entries
= dbuf_getu16le(catf
, 4); // This might really be a 4 byte int.
1088 de_dbg(c
, "num entries: %d", (int)d
->thumbsdb_catalog_num_entries
);
1089 if(d
->thumbsdb_catalog_num_entries
>2048)
1090 d
->thumbsdb_catalog_num_entries
= 2048;
1092 d
->thumbsdb_catalog
= de_mallocarray(c
, d
->thumbsdb_catalog_num_entries
,
1093 sizeof(struct thumbsdb_catalog_entry
));
1097 for(i
=0; i
<d
->thumbsdb_catalog_num_entries
; i
++) {
1100 if(pos
>= catf
->len
) goto done
;
1101 item_len
= dbuf_getu32le(catf
, pos
);
1102 de_dbg(c
, "catalog entry #%d, len=%d", (int)i
, (int)item_len
);
1103 if(item_len
<20) goto done
;
1104 if(pos
+item_len
> catf
->len
) goto done
;
1106 de_dbg_indent(c
, 1);
1108 d
->thumbsdb_catalog
[i
].id
= (u32
)dbuf_getu32le(catf
, pos
+4);
1109 de_dbg(c
, "id: %u", (unsigned int)d
->thumbsdb_catalog
[i
].id
);
1111 read_and_cvt_timestamp(c
, catf
, pos
+8, &d
->thumbsdb_catalog
[i
].mod_time
);
1112 dbg_timestamp(c
, &d
->thumbsdb_catalog
[i
].mod_time
, "timestamp");
1114 name_len
= de_min_int(item_len
-20, 65536);
1115 d
->thumbsdb_catalog
[i
].fname_srd
= dbuf_read_string(catf
, pos
+16, name_len
, name_len
,
1116 DE_CONVFLAG_WANT_UTF8
, DE_ENCODING_UTF16LE
);
1117 de_dbg(c
, "name: \"%s\"", ucstring_getpsz(d
->thumbsdb_catalog
[i
].fname_srd
->str
));
1119 de_dbg_indent(c
, -1);
1126 de_dbg_indent(c
, -1);
1129 d
->thumbsdb_catalog_num_entries
= 0; // Make sure we don't use a bad catalog.
1134 static void do_cfb_olepropertyset(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
,
1135 int is_summaryinfo
, int is_root
)
1138 int saved_indent_level
;
1140 if(dei
->stream_size
>1000000) goto done
;
1141 f
= dbuf_create_membuf(c
, dei
->stream_size
, 1);
1142 copy_any_stream_to_dbuf(c
, d
, dei
, 0, dei
->stream_size
, f
);
1144 de_dbg_indent_save(c
, &saved_indent_level
);
1145 if(is_summaryinfo
) {
1146 de_dbg(c
, "SummaryInformation (%s)", is_root
?"root":"non-root");
1149 de_dbg(c
, "property set stream");
1152 de_dbg_indent(c
, 1);
1153 de_run_module_by_id_on_slice(c
, "olepropset", NULL
, f
, 0, f
->len
);
1154 de_dbg_indent(c
, -1);
1160 static void read_mini_sector_stream(deark
*c
, lctx
*d
, i64 first_sec_id
, i64 stream_size
)
1162 if(d
->mini_sector_stream
) return; // Already done
1164 de_dbg(c
, "reading mini sector stream (%d bytes)", (int)stream_size
);
1165 d
->mini_sector_stream
= dbuf_create_membuf(c
, 0, 0);
1166 copy_normal_stream_to_dbuf(c
, d
, first_sec_id
, 0, stream_size
, d
->mini_sector_stream
);
1169 // Reads the directory stream into d->dir, and sets d->num_dir_entries.
1170 static void read_directory_stream(deark
*c
, lctx
*d
)
1173 i64 dir_sector_offs
;
1174 i64 num_entries_per_sector
;
1175 i64 dir_sector_count
= 0;
1177 de_dbg(c
, "reading directory stream");
1178 de_dbg_indent(c
, 1);
1180 d
->dir
= dbuf_create_membuf(c
, 0, 0);
1182 dir_sec_id
= d
->first_dir_sec_id
;
1184 num_entries_per_sector
= d
->sec_size
/ 128;
1185 d
->num_dir_entries
= 0;
1187 // TODO: Use copy_normal_stream_to_dbuf
1189 if(dir_sec_id
<0) break;
1190 if(d
->dir
->len
> c
->infile
->len
) break;
1192 dir_sector_offs
= sec_id_to_offset(c
, d
, dir_sec_id
);
1194 de_dbg(c
, "directory sector #%d SecID=%d (offs=%d), entries %d-%d",
1195 (int)dir_sector_count
,
1196 (int)dir_sec_id
, (int)dir_sector_offs
,
1197 (int)d
->num_dir_entries
, (int)(d
->num_dir_entries
+ num_entries_per_sector
- 1));
1199 dbuf_copy(c
->infile
, dir_sector_offs
, d
->sec_size
, d
->dir
);
1201 d
->num_dir_entries
+= num_entries_per_sector
;
1203 dir_sec_id
= get_next_sec_id(c
, d
, dir_sec_id
);
1207 de_dbg(c
, "number of directory entries: %d", (int)d
->num_dir_entries
);
1209 de_dbg_indent(c
, -1);
1212 static void do_init_format_detection(deark
*c
, lctx
*d
)
1214 if(d
->subformat_req
!=SUBFMT_AUTO
) return;
1215 d
->could_be_thumbsdb
= 1;
1218 static void do_finalize_format_detection(deark
*c
, lctx
*d
)
1220 d
->subformat_final
= SUBFMT_RAW
; // default
1222 if(d
->subformat_req
!=SUBFMT_AUTO
) {
1223 d
->subformat_final
= d
->subformat_req
;
1227 if(!d
->could_be_thumbsdb
) goto done
;
1229 if(d
->thumbsdb_old_names_found
>0 && !d
->thumbsdb_catalog_found
)
1231 d
->could_be_thumbsdb
= 0;
1233 else if(d
->thumbsdb_old_names_found
+ d
->thumbsdb_new_names_found
+
1234 d
->thumbsdb_catalog_found
< 1)
1236 d
->could_be_thumbsdb
= 0;
1239 if(d
->could_be_thumbsdb
) {
1240 d
->subformat_final
= SUBFMT_THUMBSDB
;
1244 switch(d
->subformat_final
) {
1245 case SUBFMT_THUMBSDB
:
1246 de_declare_fmt(c
, "Thumbs.db");
1247 d
->thumbsdb_msrgba_mode
= de_get_ext_option_bool(c
, "cfb:msrgbamode", 1);
1252 static void do_dump_dir_structure(deark
*c
, lctx
*d
)
1256 de_dbg(c
, "dir structure:");
1257 de_dbg_indent(c
, 1);
1258 for(i
=0; i
<d
->num_dir_entries
; i
++) {
1259 de_dbg(c
, "[%d] t=%d p=%d c=%d s=%d,%d", (int)i
,
1260 (int)d
->dir_entry
[i
].entry_type
,
1261 (int)d
->dir_entry
[i
].parent_id
,
1262 (int)d
->dir_entry
[i
].child_id
,
1263 (int)d
->dir_entry
[i
].sibling_id
[0],
1264 (int)d
->dir_entry
[i
].sibling_id
[1]);
1265 de_dbg_indent(c
, 1);
1266 if(d
->dir_entry
[i
].fname_srd
&& d
->dir_entry
[i
].fname_srd
->str
) {
1267 de_dbg(c
, "fname: \"%s\"",
1268 ucstring_getpsz(d
->dir_entry
[i
].fname_srd
->str
));
1270 if(d
->dir_entry
[i
].path
) {
1271 de_dbg(c
, "path: \"%s\"",
1272 ucstring_getpsz(d
->dir_entry
[i
].path
));
1274 de_dbg_indent(c
, -1);
1276 de_dbg_indent(c
, -1);
1279 static void do_mark_dir_entries_recursively(deark
*c
, lctx
*d
, i32 parent_id
,
1280 i32 dir_entry_idx
, int level
)
1282 struct dir_entry_info
*dei
;
1285 if(dir_entry_idx
<0 || (i64
)dir_entry_idx
>=d
->num_dir_entries
) return;
1287 dei
= &d
->dir_entry
[dir_entry_idx
];
1289 if(dei
->entry_type
!=OBJTYPE_STORAGE
&& dei
->entry_type
!=OBJTYPE_STREAM
) return;
1291 dei
->parent_id
= parent_id
;
1293 if(dei
->entry_type
==OBJTYPE_STORAGE
&& dei
->fname_srd
&& dei
->fname_srd
->str
&& !dei
->path
) {
1294 // Set the full pathname
1295 dei
->path
= ucstring_create(c
);
1296 if(parent_id
>0 && d
->dir_entry
[parent_id
].path
) {
1297 ucstring_append_ucstring(dei
->path
, d
->dir_entry
[parent_id
].path
);
1298 ucstring_append_sz(dei
->path
, "/", DE_ENCODING_LATIN1
);
1300 ucstring_append_ucstring(dei
->path
, dei
->fname_srd
->str
);
1303 if(level
>50) return;
1304 for(k
=0; k
<2; k
++) {
1305 do_mark_dir_entries_recursively(c
, d
, parent_id
, dei
->sibling_id
[k
], level
+1);
1308 if(dei
->entry_type
==OBJTYPE_STORAGE
) {
1309 // This is a "subdirectory" entry, so examine its children (starting with the
1310 // one that we know about).
1311 do_mark_dir_entries_recursively(c
, d
, dir_entry_idx
, dei
->child_id
, level
+1);
1315 // Figure out which entries are in the root directory.
1316 static void do_analyze_dir_structure(deark
*c
, lctx
*d
)
1318 de_dbg_indent(c
, 1);
1320 if(d
->dump_dir_structure
) do_dump_dir_structure(c
, d
);
1322 if(d
->num_dir_entries
<1) goto done
;
1324 // The first entry should be the root entry.
1325 if(d
->dir_entry
[0].entry_type
!=OBJTYPE_ROOT_STORAGE
) goto done
;
1327 // Its child is one of the entries in the root directory. Start with it.
1328 do_mark_dir_entries_recursively(c
, d
, 0, d
->dir_entry
[0].child_id
, 0);
1330 if(d
->dump_dir_structure
) do_dump_dir_structure(c
, d
);
1332 de_dbg_indent(c
, -1);
1335 // Things to do after we've read the directory stream into memory, and
1336 // know how many entries there are.
1337 static void do_before_reading_directory_entries(deark
*c
, lctx
*d
)
1341 // Stores some extra information for each directory entry, and a copy of
1342 // some information for convenience.
1343 // (The original entry is still available at d->dir[128*n].)
1344 d
->dir_entry
= de_mallocarray(c
, d
->num_dir_entries
, sizeof(struct dir_entry_info
));
1346 // Set defaults for each entry
1347 for(i
=0; i
<d
->num_dir_entries
; i
++) {
1348 d
->dir_entry
[i
].child_id
= -1;
1349 d
->dir_entry
[i
].sibling_id
[0] = -1;
1350 d
->dir_entry
[i
].sibling_id
[1] = -1;
1354 static int is_thumbsdb_orig_name(deark
*c
, lctx
*d
, const char *name
, size_t nlen
)
1358 if(nlen
<1 || nlen
>6) return 0;
1359 for(i
=0; i
<nlen
; i
++) {
1360 if(name
[i
]<'0' || name
[i
]>'9') return 0;
1365 static int is_thumbsdb_new_name(deark
*c
, lctx
*d
, const char *name
, size_t nlen
)
1369 int found_underscore
= 0;
1372 if(nlen
<4 || nlen
>22) return 0;
1373 for(i
=0; i
<nlen
; i
++) {
1374 if(!found_underscore
&& name
[i
]=='_') {
1375 found_underscore
= 1;
1377 else if(!found_underscore
) {
1378 // pre-underscore (pixel dimension)
1379 if(name
[i
]>='0' && name
[i
]<='9')
1385 // post-underscore (hash?)
1386 if((name
[i
]>='0' && name
[i
]<='9') ||
1387 (name
[i
]>='a' && name
[i
]<='f'))
1397 if(!found_underscore
) return 0;
1398 if(count1
<1 || count1
>5) return 0;
1399 if(count2
<1 || count2
>16) return 0;
1403 static void do_per_dir_entry_format_detection(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
1407 if(dei
->entry_type
==OBJTYPE_EMPTY
) return;
1408 if(d
->subformat_req
!=SUBFMT_AUTO
) return;
1409 if(!d
->could_be_thumbsdb
) return;
1411 if(dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1412 if(de_memcmp(dei
->clsid
, EMPTY_CLSID
, 16)) {
1413 d
->could_be_thumbsdb
= 0;
1418 if(dei
->entry_type
==OBJTYPE_STORAGE
) {
1419 // Thumbs.db files aren't expected to have any Storage objects.
1420 d
->could_be_thumbsdb
= 0;
1423 if(dei
->entry_type
!=OBJTYPE_STREAM
) {
1427 nlen
= dei
->fname_srd
->sz_utf8_strlen
;
1428 if(nlen
<1 || nlen
>21) {
1429 d
->could_be_thumbsdb
= 0;
1433 if(dei
->is_thumbsdb_catalog
) {
1434 d
->thumbsdb_catalog_found
++;
1438 if(is_thumbsdb_orig_name(c
, d
, dei
->fname_srd
->sz_utf8
, nlen
)) {
1439 d
->thumbsdb_old_names_found
++;
1443 if(is_thumbsdb_new_name(c
, d
, dei
->fname_srd
->sz_utf8
, nlen
)) {
1444 d
->thumbsdb_new_names_found
++;
1449 // Caller supplies and initializes buf
1450 static void identify_clsid(deark
*c
, lctx
*d
, const u8
*clsid
, char *buf
, size_t buflen
)
1452 const char *name
= "?";
1455 for(i
=0; i
<DE_ARRAYCOUNT(known_clsids
); i
++) {
1458 const struct clsid_id_struct
*ci
= &known_clsids
[i
];
1460 de_memcpy(tmpclsid
, clsid
, 16);
1461 for(k
=0; k
<16; k
++) {
1462 if((ci
->mask
& (1<<(15-k
)))==0) {
1466 if(!de_memcmp(tmpclsid
, ci
->clsid
, 16)) {
1471 de_snprintf(buf
, buflen
, " (%s)", name
);
1474 static void do_process_stream(deark
*c
, lctx
*d
, struct dir_entry_info
*dei
)
1476 int saved_indent_level
;
1477 de_finfo
*fi_raw
= NULL
; // Use this if we extract the raw stream
1478 de_finfo
*fi_tmp
= NULL
; // Can be used by format-specific code
1479 de_ucstring
*fn_raw
= NULL
; // Use this if we extract the raw stream
1480 de_ucstring
*fn_tmp
= NULL
; // Can be used by format-specific code
1481 dbuf
*firstpart
= NULL
;
1482 int is_thumbsdb_stream
= 0;
1483 int is_OfficeArtStream
= 0;
1484 int is_summaryinfo
= 0;
1486 int is_namedThumbnail
= 0;
1487 int is_namedImages
= 0;
1488 int is_root
= (dei
->parent_id
==0);
1490 de_dbg_indent_save(c
, &saved_indent_level
);
1492 // By default, use the "stream name" as the filename.
1493 fn_raw
= ucstring_create(c
);
1495 if(dei
->parent_id
>0 && d
->dir_entry
[dei
->parent_id
].path
) {
1496 ucstring_append_ucstring(fn_raw
, d
->dir_entry
[dei
->parent_id
].path
);
1497 ucstring_append_sz(fn_raw
, "/", DE_ENCODING_LATIN1
);
1500 ucstring_append_ucstring(fn_raw
, dei
->fname_srd
->str
);
1501 fn_tmp
= ucstring_clone(fn_raw
);
1503 fi_raw
= de_finfo_create(c
);
1504 fi_tmp
= de_finfo_create(c
);
1506 // By default, use the mod time from the directory entry.
1507 if(dei
->mod_time
.is_valid
) {
1508 fi_raw
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = dei
->mod_time
; // struct copy
1509 fi_tmp
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = dei
->mod_time
; // struct copy
1512 if(d
->extract_raw_streams
) {
1515 de_finfo_set_name_from_ucstring(c
, fi_raw
, fn_raw
, DE_SNFLAG_FULLPATH
);
1516 fi_raw
->original_filename_flag
= 1;
1518 outf
= dbuf_create_output_file(c
, NULL
, fi_raw
, 0);
1519 copy_any_stream_to_dbuf(c
, d
, dei
, 0, dei
->stream_size
, outf
);
1523 if(!d
->decode_streams
) goto done
;
1525 // Read the first part of the stream, to use for format detection.
1526 firstpart
= dbuf_create_membuf(c
, 256, 0x1);
1527 copy_any_stream_to_dbuf(c
, d
, dei
, 0,
1528 (dei
->stream_size
>256)?256:dei
->stream_size
, firstpart
);
1530 // Stream type detection
1532 // FIXME? The stream detection happens even if d->subformat_req==SUBFMT_RAW.
1533 // We probably should have different detection logic in that case.
1535 if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "\x05" "SummaryInformation")) {
1539 else if(!de_strncmp(dei
->fname_srd
->sz_utf8
, "\x05", 1)) {
1540 // TODO: Is there a good way to tell whether a stream is a property set?
1543 else if(d
->subformat_final
==SUBFMT_TIFF37680
&&
1544 !de_strcasecmp(dei
->fname_srd
->sz_utf8
, "CONTENTS"))
1546 // TODO: This is not the only place to find a "CONTENTS" stream.
1549 else if(d
->subformat_final
==SUBFMT_THUMBSDB
) {
1550 is_thumbsdb_stream
= 1;
1552 else if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "Pictures")) {
1553 // This stream often appears in PPT documents.
1554 is_OfficeArtStream
= 1;
1556 else if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "EscherStm")) {
1557 is_OfficeArtStream
= 1;
1559 else if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "EscherDelayStm")) {
1560 // Found in MS Publisher, and probably other formats.
1561 is_OfficeArtStream
= 1;
1563 else if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "Thumbnail")) {
1564 is_namedThumbnail
= 1;
1566 else if(!de_strcasecmp(dei
->fname_srd
->sz_utf8
, "Images")) {
1570 if(is_OfficeArtStream
) {
1571 unsigned int rectype
;
1572 rectype
= (unsigned int)dbuf_getu16le(firstpart
, 2);
1573 if((rectype
&0xf000)!=0xf000) {
1574 is_OfficeArtStream
= 0;
1578 // End of stream type detection
1581 do_cfb_olepropertyset(c
, d
, dei
, is_summaryinfo
, is_root
);
1583 else if(is_thumbsdb_stream
) {
1584 do_extract_stream_to_file_thumbsdb(c
, d
, dei
, fi_tmp
, fn_tmp
, firstpart
);
1586 else if(is_OfficeArtStream
) {
1587 do_OfficeArtStream(c
, d
, dei
);
1589 else if(is_namedThumbnail
) {
1590 do_StreamNamedThumbnail(c
, d
, dei
);
1592 else if(is_namedImages
) {
1593 do_StreamNamedImages(c
, d
, dei
);
1597 de_dbg_indent_restore(c
, saved_indent_level
);
1598 dbuf_close(firstpart
);
1599 ucstring_destroy(fn_raw
);
1600 ucstring_destroy(fn_tmp
);
1601 de_finfo_destroy(c
, fi_raw
);
1602 de_finfo_destroy(c
, fi_tmp
);
1606 // Read information about a directory entry. Do not print anything about it.
1607 static void do_read_dir_entry(deark
*c
, lctx
*d
, i64 dir_entry_idx
, i64 dir_entry_offs
)
1611 struct dir_entry_info
*dei
= NULL
;
1613 if(!d
->dir_entry
) goto done
; // error
1614 dei
= &d
->dir_entry
[dir_entry_idx
];
1616 dei
->pass
= 3; // Default pass in which to process this entry
1618 dei
->entry_type
= dbuf_getbyte(d
->dir
, dir_entry_offs
+66);
1619 switch(dei
->entry_type
) {
1620 case OBJTYPE_EMPTY
: dei
->entry_type_name
="empty"; break;
1621 case OBJTYPE_STORAGE
: dei
->entry_type_name
="storage object"; break;
1622 case OBJTYPE_STREAM
: dei
->entry_type_name
="stream"; break;
1623 case OBJTYPE_ROOT_STORAGE
: dei
->entry_type_name
="root storage object"; break;
1624 default: dei
->entry_type_name
="?";
1627 if(dei
->entry_type
==OBJTYPE_EMPTY
) goto done
;
1629 dei
->name_len_raw
= dbuf_getu16le(d
->dir
, dir_entry_offs
+64);
1631 name_len_bytes
= dei
->name_len_raw
-2; // Ignore the trailing U+0000
1632 if(name_len_bytes
<0) name_len_bytes
= 0;
1634 dei
->fname_srd
= dbuf_read_string(d
->dir
, dir_entry_offs
, name_len_bytes
, name_len_bytes
,
1635 DE_CONVFLAG_WANT_UTF8
, DE_ENCODING_UTF16LE
);
1637 dei
->node_color
= dbuf_getbyte(d
->dir
, dir_entry_offs
+67);
1639 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_STREAM
) {
1640 dei
->sibling_id
[0] = (i32
)dbuf_geti32le(d
->dir
, dir_entry_offs
+68);
1641 dei
->sibling_id
[1] = (i32
)dbuf_geti32le(d
->dir
, dir_entry_offs
+72);
1644 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1645 dei
->child_id
= (i32
)dbuf_geti32le(d
->dir
, dir_entry_offs
+76);
1648 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1649 dbuf_read(d
->dir
, dei
->clsid
, dir_entry_offs
+80, 16);
1650 fmtutil_guid_to_uuid(dei
->clsid
);
1653 read_and_cvt_timestamp(c
, d
->dir
, dir_entry_offs
+108, &dei
->mod_time
);
1655 raw_sec_id
= dbuf_geti32le(d
->dir
, dir_entry_offs
+116);
1657 if(d
->major_ver
<=3) {
1658 dei
->stream_size
= dbuf_getu32le(d
->dir
, dir_entry_offs
+120);
1661 dei
->stream_size
= dbuf_geti64le(d
->dir
, dir_entry_offs
+120);
1664 dei
->is_mini_stream
= (dei
->entry_type
==OBJTYPE_STREAM
) && (dei
->stream_size
< d
->std_stream_min_size
);
1666 if(dei
->is_mini_stream
) {
1667 dei
->minisec_id
= raw_sec_id
;
1670 dei
->normal_sec_id
= raw_sec_id
;
1673 if((d
->subformat_req
==SUBFMT_THUMBSDB
|| d
->subformat_req
==SUBFMT_AUTO
) &&
1674 !de_strcmp(dei
->fname_srd
->sz_utf8
, "Catalog"))
1676 dei
->is_thumbsdb_catalog
= 1;
1677 if(d
->decode_streams
) dei
->pass
= 2;
1680 if(dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1684 do_per_dir_entry_format_detection(c
, d
, dei
);
1690 // Process an directory entry from the d->dir stream, that has previously been
1691 // read into the d->dir_entry array.
1692 static void do_process_dir_entry(deark
*c
, lctx
*d
, i64 dir_entry_idx
)
1694 struct dir_entry_info
*dei
= NULL
;
1695 char clsid_string
[50];
1698 if(!d
->dir_entry
) return; // error
1699 dei
= &d
->dir_entry
[dir_entry_idx
];
1701 de_dbg(c
, "type: 0x%02x (%s)", (unsigned int)dei
->entry_type
, dei
->entry_type_name
);
1702 if(dei
->entry_type
==OBJTYPE_EMPTY
) goto done
;
1704 de_dbg2(c
, "name len: %d bytes", (int)dei
->name_len_raw
);
1705 de_dbg(c
, "name: \"%s\"", ucstring_getpsz(dei
->fname_srd
->str
));
1706 de_dbg(c
, "node color: %u", (unsigned int)dei
->node_color
);
1708 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_STREAM
) {
1709 de_dbg(c
, "sibling StreamIDs: %d, %d", (int)dei
->sibling_id
[0], (int)dei
->sibling_id
[1]);
1712 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1713 de_dbg(c
, "child StreamID: %d", (int)dei
->child_id
);
1716 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_STREAM
) {
1717 de_dbg(c
, "parent: %d", (int)dei
->parent_id
);
1720 if(dei
->entry_type
==OBJTYPE_STORAGE
|| dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1722 if(dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1723 identify_clsid(c
, d
, dei
->clsid
, buf
, sizeof(buf
));
1726 fmtutil_render_uuid(c
, dei
->clsid
, clsid_string
, sizeof(clsid_string
));
1727 de_dbg(c
, "%sclsid: {%s}%s", (dei
->entry_type
==OBJTYPE_ROOT_STORAGE
)?"root ":"",
1731 dbg_timestamp(c
, &dei
->mod_time
, "mod time");
1733 de_dbg(c
, "stream size: %"I64_FMT
, dei
->stream_size
);
1735 if(dei
->is_mini_stream
) {
1736 de_dbg(c
, "first MiniSecID: %d", (int)dei
->minisec_id
);
1739 describe_sec_id(c
, d
, dei
->normal_sec_id
, buf
, sizeof(buf
));
1740 de_dbg(c
, "first SecID: %d (%s)", (int)dei
->normal_sec_id
, buf
);
1743 if(dei
->entry_type
==OBJTYPE_ROOT_STORAGE
) {
1744 read_mini_sector_stream(c
, d
, dei
->normal_sec_id
, dei
->stream_size
);
1746 else if(dei
->is_thumbsdb_catalog
&& d
->decode_streams
) {
1747 // TODO: Move this to do_process_stream()?
1748 read_thumbsdb_catalog(c
, d
, dei
);
1750 else if(dei
->entry_type
==OBJTYPE_STREAM
) {
1751 do_process_stream(c
, d
, dei
);
1758 static void do_directory(deark
*c
, lctx
*d
)
1762 int saved_indent_level
;
1764 de_dbg_indent_save(c
, &saved_indent_level
);
1766 de_dbg(c
, "reading directory entries");
1767 do_before_reading_directory_entries(c
, d
);
1768 for(i
=0; i
<d
->num_dir_entries
; i
++) {
1769 i64 dir_entry_offs
= 128*i
;
1770 do_read_dir_entry(c
, d
, i
, dir_entry_offs
);
1773 de_dbg(c
, "decoding directory structure");
1774 do_analyze_dir_structure(c
, d
);
1776 de_dbg(c
, "detecting format");
1777 do_finalize_format_detection(c
, d
);
1779 de_dbg(c
, "processing directory entries");
1780 de_dbg_indent(c
, 1);
1781 for(pass
=1; pass
<=3; pass
++) {
1782 de_dbg2(c
, "[pass %d]", pass
);
1783 for(i
=0; i
<d
->num_dir_entries
; i
++) {
1784 if(d
->dir_entry
[i
].pass
== pass
) {
1785 de_dbg(c
, "directory entry, StreamID=%d", (int)i
);
1786 de_dbg_indent(c
, 1);
1787 do_process_dir_entry(c
, d
, i
);
1788 de_dbg_indent(c
, -1);
1793 de_dbg_indent_restore(c
, saved_indent_level
);
1796 static void de_run_cfb_internal(deark
*c
, lctx
*d
)
1798 do_init_format_detection(c
, d
);
1800 if(!do_header(c
, d
)) {
1810 read_directory_stream(c
, d
);
1815 dbuf_close(d
->difat
);
1817 dbuf_close(d
->minifat
);
1821 for(k
=0; k
<d
->num_dir_entries
; k
++) {
1822 de_destroy_stringreaderdata(c
, d
->dir_entry
[k
].fname_srd
);
1823 ucstring_destroy(d
->dir_entry
[k
].path
);
1825 de_free(c
, d
->dir_entry
);
1827 dbuf_close(d
->mini_sector_stream
);
1828 if(d
->thumbsdb_catalog
) {
1830 for(k
=0; k
<d
->thumbsdb_catalog_num_entries
; k
++) {
1831 de_destroy_stringreaderdata(c
, d
->thumbsdb_catalog
[k
].fname_srd
);
1833 de_free(c
, d
->thumbsdb_catalog
);
1834 d
->thumbsdb_catalog
= NULL
;
1838 static void de_run_cfb(deark
*c
, de_module_params
*mparams
)
1841 const char *cfbfmt_opt
;
1843 d
= de_malloc(c
, sizeof(lctx
));
1844 d
->decode_streams
= 1;
1845 d
->subformat_req
= SUBFMT_AUTO
;
1847 if(de_get_ext_option(c
, "cfb:extractstreams")) {
1848 d
->extract_raw_streams
= 1;
1849 d
->decode_streams
= 0;
1851 if(de_get_ext_option(c
, "cfb:dumpdir")) {
1852 d
->dump_dir_structure
= 1; // A low-level debugging feature
1855 if(de_havemodcode(c
, mparams
, 'T')) {
1856 // TIFF tag 37680 mode
1857 // TODO: Handle 'OLE Property Set Storage' more generally.
1858 d
->subformat_req
= SUBFMT_TIFF37680
;
1861 if(d
->subformat_req
== SUBFMT_AUTO
) {
1862 // If we haven't set subformat_req yet, look at the command-line option
1864 cfbfmt_opt
= de_get_ext_option(c
, "cfb:fmt");
1866 if(!de_strcmp(cfbfmt_opt
, "auto")) {
1867 d
->subformat_req
= SUBFMT_AUTO
;
1869 else if(!de_strcmp(cfbfmt_opt
, "raw")) {
1870 d
->subformat_req
= SUBFMT_RAW
;
1872 else if(!de_strcmp(cfbfmt_opt
, "thumbsdb")) {
1873 d
->subformat_req
= SUBFMT_THUMBSDB
;
1878 de_run_cfb_internal(c
, d
);
1883 static int de_identify_cfb(deark
*c
)
1885 if(!dbuf_memcmp(c
->infile
, 0, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8))
1890 static void de_help_cfb(deark
*c
)
1892 de_msg(c
, "-opt cfb:extractstreams : Extract raw streams, instead of decoding");
1893 de_msg(c
, "-opt cfb:fmt=raw : Do not try to detect the document type");
1894 de_msg(c
, "-opt cfb:fmt=thumbsdb : Assume Thumbs.db format");
1895 de_msg(c
, "-opt cfb:msrgbamode=0 : Disable special processing of nonstandard-"
1896 "JPEG Thumbs.db thumbnails");
1899 void de_module_cfb(deark
*c
, struct deark_module_info
*mi
)
1902 mi
->desc
= "Microsoft Compound File Binary File";
1903 mi
->run_fn
= de_run_cfb
;
1904 mi
->identify_fn
= de_identify_cfb
;
1905 mi
->help_fn
= de_help_cfb
;