1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Cabinent (CAB) format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cab
);
15 unsigned int typeCompress_raw
;
16 unsigned int cmpr_type
;
19 typedef struct localctx_struct
{
20 u8 versionMinor
, versionMajor
;
21 unsigned int header_flags
;
26 i64 cbCFHeader
, cbCFFolder
, cbCFData
;
30 static const char *get_cmpr_type_name(unsigned int n
)
35 case 0: name
="none"; break;
36 case 1: name
="MSZIP"; break;
37 case 2: name
="Quantum"; break;
38 case 3: name
="LZX"; break;
39 default: name
="?"; break;
44 static int do_one_CFDATA(deark
*c
, lctx
*d
, struct folder_info
*fldi
, i64 pos1
,
52 csum
= (u32
)de_getu32le_p(&pos
);
53 de_dbg(c
, "csum: 0x%08x", (unsigned int)csum
);
55 cbData
= de_getu16le_p(&pos
);
56 de_dbg(c
, "cbData: %d", (int)cbData
);
58 cbUncomp
= de_getu16le_p(&pos
);
59 de_dbg(c
, "cbUncomp: %d", (int)cbUncomp
);
61 if((d
->header_flags
&0x0004) && (d
->cbCFData
>0)) {
62 de_dbg(c
, "[%d bytes of abReserve data at %d]", (int)d
->cbCFData
,
65 de_dbg_hexdump(c
, c
->infile
, pos
, d
->cbCFData
, 256, NULL
, 0x1);
70 de_dbg(c
, "[%d bytes of %scompressed data at %d]", (int)cbData
,
71 (fldi
->cmpr_type
==0)?"un":"", (int)pos
);
74 *bytes_consumed
= pos
- pos1
;
78 static void do_CFDATA_for_one_CFFOLDER(deark
*c
, lctx
*d
, struct folder_info
*fldi
)
81 int saved_indent_level
;
82 i64 pos
= fldi
->coffCabStart
;
84 de_dbg_indent_save(c
, &saved_indent_level
);
85 if(fldi
->cCFData
<1) goto done
;
86 de_dbg(c
, "CFDATA blocks for CFFOLDER[%d], at %d, #=%d", (int)fldi
->folder_idx
,
87 (int)fldi
->coffCabStart
, (int)fldi
->cCFData
);
90 for(i
=0; i
<fldi
->cCFData
; i
++) {
91 i64 bytes_consumed
= 0;
93 if(pos
>=c
->infile
->len
) goto done
;
94 de_dbg(c
, "CFDATA[%d] for CFFOLDER[%d], at %d", (int)i
,
95 (int)fldi
->folder_idx
, (int)pos
);
97 if(!do_one_CFDATA(c
, d
, fldi
, pos
, &bytes_consumed
)) {
100 de_dbg_indent(c
, -1);
101 pos
+= bytes_consumed
;
105 de_dbg_indent_restore(c
, saved_indent_level
);
108 static int do_one_CFFOLDER(deark
*c
, lctx
*d
, i64 folder_idx
,
109 i64 pos1
, i64
*bytes_consumed
)
112 struct folder_info
*fldi
= NULL
;
114 fldi
= de_malloc(c
, sizeof(struct folder_info
));
115 fldi
->folder_idx
= folder_idx
;
117 fldi
->coffCabStart
= de_getu32le_p(&pos
);
118 de_dbg(c
, "first CFDATA blk offset (coffCabStart): %"I64_FMT
, fldi
->coffCabStart
);
120 fldi
->cCFData
= de_getu16le_p(&pos
);
121 de_dbg(c
, "no. of CFDATA blks for this folder (cCFData): %d", (int)fldi
->cCFData
);
123 fldi
->typeCompress_raw
= (unsigned int)de_getu16le_p(&pos
);
124 fldi
->cmpr_type
= fldi
->typeCompress_raw
& 0x000f;
125 de_dbg(c
, "typeCompress field: 0x%04x", fldi
->typeCompress_raw
);
127 de_dbg(c
, "compression type: 0x%04x (%s)", fldi
->cmpr_type
,
128 get_cmpr_type_name(fldi
->cmpr_type
));
129 de_dbg_indent(c
, -1);
131 if((d
->header_flags
&0x0004) && (d
->cbCFFolder
>0)) {
132 de_dbg(c
, "[%d bytes of abReserve data at %d]", (int)d
->cbCFFolder
,
135 de_dbg_hexdump(c
, c
->infile
, pos
, d
->cbCFFolder
, 256, NULL
, 0x1);
136 de_dbg_indent(c
, -1);
137 pos
+= d
->cbCFFolder
;
140 *bytes_consumed
= pos
-pos1
;
142 do_CFDATA_for_one_CFFOLDER(c
, d
, fldi
);
148 static void do_CFFOLDERs(deark
*c
, lctx
*d
)
150 i64 pos
= d
->CFHEADER_len
;
152 int saved_indent_level
;
154 de_dbg_indent_save(c
, &saved_indent_level
);
155 if(d
->cFolders
<1) goto done
;
156 de_dbg(c
, "CFFOLDER section at %d, nfolders=%d", (int)pos
, (int)d
->cFolders
);
159 for(i
=0; i
<d
->cFolders
; i
++) {
160 i64 bytes_consumed
= 0;
162 if(pos
>=c
->infile
->len
) break;
163 de_dbg(c
, "CFFOLDER[%d] at %d", (int)i
, (int)pos
);
165 if(!do_one_CFFOLDER(c
, d
, i
, pos
, &bytes_consumed
)) {
168 de_dbg_indent(c
, -1);
169 pos
+= bytes_consumed
;
173 de_dbg_indent_restore(c
, saved_indent_level
);
176 static const char *get_special_folder_name(i64 n
)
180 case 0xfffd: name
="CONTINUED_FROM_PREV"; break;
181 case 0xfffe: name
="CONTINUED_TO_NEXT"; break;
182 case 0xffff: name
="CONTINUED_PREV_AND_NEXT"; break;
183 default: name
="?"; break;
188 static int do_one_CFFILE(deark
*c
, lctx
*d
, i64 pos1
, i64
*bytes_consumed
)
196 unsigned int attribs
;
198 struct de_stringreaderdata
*szName
= NULL
;
199 de_ucstring
*attribs_str
= NULL
;
200 struct de_timestamp ts
;
201 char timestamp_buf
[64];
204 cbFile
= de_getu32le_p(&pos
);
205 de_dbg(c
, "uncompressed file size (cbFile): %"I64_FMT
, cbFile
);
207 uoffFolderStart
= de_getu32le_p(&pos
);
208 de_dbg(c
, "offset in folder (uoffFolderStart): %"I64_FMT
, uoffFolderStart
);
210 iFolder
= de_getu16le_p(&pos
);
211 if(iFolder
>=0xfffd) {
212 de_snprintf(tmps
, sizeof(tmps
), "0x%04x (%s)", (unsigned int)iFolder
,
213 get_special_folder_name(iFolder
));
216 de_snprintf(tmps
, sizeof(tmps
), "%u", (unsigned int)iFolder
);
218 de_dbg(c
, "folder index (iFolder): %s", tmps
);
220 date_
= de_getu16le_p(&pos
);
221 time_
= de_getu16le_p(&pos
);
222 de_dos_datetime_to_timestamp(&ts
, date_
, time_
);
223 ts
.tzcode
= DE_TZCODE_LOCAL
;
224 de_timestamp_to_string(&ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
225 de_dbg(c
, "timestamp: %s", timestamp_buf
);
227 attribs
= (unsigned int)de_getu16le_p(&pos
);
228 attribs_str
= ucstring_create(c
);
229 de_describe_dos_attribs(c
, (attribs
& 0x3f), attribs_str
, 0);
230 if(attribs
&0x40) ucstring_append_flags_item(attribs_str
, "EXEC");
231 if(attribs
&0x80) ucstring_append_flags_item(attribs_str
, "NAME_IS_UTF8");
232 de_dbg(c
, "attribs: 0x%04x (%s)", attribs
, ucstring_getpsz(attribs_str
));
234 szName
= dbuf_read_string(c
->infile
, pos
, 257, 257,
235 DE_CONVFLAG_STOP_AT_NUL
,
236 (attribs
&0x80)?DE_ENCODING_UTF8
:DE_ENCODING_ASCII
);
237 de_dbg(c
, "szName: \"%s\"", ucstring_getpsz(szName
->str
));
238 if(!szName
->found_nul
) goto done
;
239 pos
+= szName
->bytes_consumed
;
241 *bytes_consumed
= pos
-pos1
;
244 de_destroy_stringreaderdata(c
, szName
);
245 ucstring_destroy(attribs_str
);
249 static void do_CFFILEs(deark
*c
, lctx
*d
)
251 i64 pos
= d
->coffFiles
;
253 int saved_indent_level
;
255 de_dbg_indent_save(c
, &saved_indent_level
);
256 if(d
->cFiles
<1) goto done
;
257 de_dbg(c
, "CFFILE section at %d, nfiles=%d", (int)pos
, (int)d
->cFiles
);
259 for(i
=0; i
<d
->cFiles
; i
++) {
260 i64 bytes_consumed
= 0;
262 if(pos
>=c
->infile
->len
) break;
263 de_dbg(c
, "CFFILE[%d] at %d", (int)i
, (int)pos
);
265 if(!do_one_CFFILE(c
, d
, pos
, &bytes_consumed
)) {
268 de_dbg_indent(c
, -1);
269 pos
+= bytes_consumed
;
273 de_dbg_indent_restore(c
, saved_indent_level
);
276 // On success, sets d->CFHEADER_len.
277 static int do_CFHEADER(deark
*c
, lctx
*d
)
281 de_ucstring
*flags_str
= NULL
;
282 struct de_stringreaderdata
*CabinetPrev
= NULL
;
283 struct de_stringreaderdata
*DiskPrev
= NULL
;
284 struct de_stringreaderdata
*CabinetNext
= NULL
;
285 struct de_stringreaderdata
*DiskNext
= NULL
;
286 int saved_indent_level
;
288 de_dbg_indent_save(c
, &saved_indent_level
);
289 de_dbg(c
, "CFHEADER at %d", (int)pos
);
291 pos
+= 8; // signature, reserved1
292 d
->cbCabinet
= de_getu32le_p(&pos
);
293 de_dbg(c
, "cbCabinet: %"I64_FMT
, d
->cbCabinet
);
294 pos
+= 4; // reserved2
295 d
->coffFiles
= de_getu32le_p(&pos
);
296 de_dbg(c
, "coffFiles: %"I64_FMT
, d
->coffFiles
);
297 pos
+= 4; // reserved3
298 d
->versionMinor
= de_getbyte_p(&pos
);
299 d
->versionMajor
= de_getbyte_p(&pos
);
300 de_dbg(c
, "file format version: %u.%u", (unsigned int)d
->versionMajor
,
301 (unsigned int)d
->versionMinor
);
303 d
->cFolders
= de_getu16le_p(&pos
);
304 de_dbg(c
, "cFolders: %d", (int)d
->cFolders
);
306 d
->cFiles
= de_getu16le_p(&pos
);
307 de_dbg(c
, "cFiles: %d", (int)d
->cFiles
);
309 d
->header_flags
= (unsigned int)de_getu16le_p(&pos
);
310 flags_str
= ucstring_create(c
);
311 // The specification has a diagram showing that PREV_CABINET is 0x2,
312 // NEXT_CABINET is 0x04, etc. But the text below it says that PREV_CABINET
313 // is 0x1, NEXT_CABINET is 0x02, etc. I'm sure it's the text that's correct.
314 if(d
->header_flags
&0x0001) ucstring_append_flags_item(flags_str
, "PREV_CABINET");
315 if(d
->header_flags
&0x0002) ucstring_append_flags_item(flags_str
, "NEXT_CABINET");
316 if(d
->header_flags
&0x0004) ucstring_append_flags_item(flags_str
, "RESERVE_PRESENT");
317 de_dbg(c
, "flags: 0x%04x (%s)", d
->header_flags
, ucstring_getpsz(flags_str
));
319 pos
+= 2; // setID (arbitrary ID for a collection of linked cab files)
320 pos
+= 2; // iCabinet (sequence number in a mult-cab file)
322 if(d
->header_flags
&0x0004) { // RESERVE_PRESENT
323 d
->cbCFHeader
= de_getu16le_p(&pos
);
324 de_dbg(c
, "cbCFHeader: %d", (int)d
->cbCFHeader
);
325 d
->cbCFFolder
= (i64
)de_getbyte_p(&pos
);
326 de_dbg(c
, "cbCFFolder: %d", (int)d
->cbCFFolder
);
327 d
->cbCFData
= (i64
)de_getbyte_p(&pos
);
328 de_dbg(c
, "cbCFData: %d", (int)d
->cbCFData
);
330 if(d
->cbCFHeader
!=0) {
331 de_dbg(c
, "[%d bytes of abReserve data at %d]", (int)d
->cbCFHeader
,
334 de_dbg_hexdump(c
, c
->infile
, pos
, d
->cbCFHeader
, 256, NULL
, 0x1);
335 de_dbg_indent(c
, -1);
336 pos
+= d
->cbCFHeader
;
340 if(d
->header_flags
&0x0001) { // PREV_CABINET
341 CabinetPrev
= dbuf_read_string(c
->infile
, pos
, 256, 256,
342 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
343 de_dbg(c
, "szCabinetPrev: \"%s\"", ucstring_getpsz(CabinetPrev
->str
));
344 if(!CabinetPrev
->found_nul
) goto done
;
345 pos
+= CabinetPrev
->bytes_consumed
;
347 DiskPrev
= dbuf_read_string(c
->infile
, pos
, 256, 256,
348 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
349 de_dbg(c
, "szDiskPrev: \"%s\"", ucstring_getpsz(DiskPrev
->str
));
350 if(!DiskPrev
->found_nul
) goto done
;
351 pos
+= DiskPrev
->bytes_consumed
;
354 if(d
->header_flags
&0x0002) { // NEXT_CABINET
355 CabinetNext
= dbuf_read_string(c
->infile
, pos
, 256, 256,
356 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
357 de_dbg(c
, "szCabinetNext: \"%s\"", ucstring_getpsz(CabinetNext
->str
));
358 if(!CabinetNext
->found_nul
) goto done
;
359 pos
+= CabinetNext
->bytes_consumed
;
361 DiskNext
= dbuf_read_string(c
->infile
, pos
, 256, 256,
362 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
363 de_dbg(c
, "szDiskNext: \"%s\"", ucstring_getpsz(DiskNext
->str
));
364 if(!DiskNext
->found_nul
) goto done
;
365 pos
+= DiskNext
->bytes_consumed
;
368 // TODO: Additional fields may be here
370 de_dbg_indent(c
, -1);
372 if(d
->versionMajor
!=1 || d
->versionMinor
!=3) {
373 de_err(c
, "Unsupported CAB format version: %u.%u",
374 (unsigned int)d
->versionMajor
, (unsigned int)d
->versionMinor
);
378 d
->CFHEADER_len
= pos
;
381 de_destroy_stringreaderdata(c
, CabinetPrev
);
382 de_destroy_stringreaderdata(c
, DiskPrev
);
383 de_destroy_stringreaderdata(c
, CabinetNext
);
384 de_destroy_stringreaderdata(c
, DiskNext
);
385 ucstring_destroy(flags_str
);
386 de_dbg_indent_restore(c
, saved_indent_level
);
390 static void de_run_cab(deark
*c
, de_module_params
*mparams
)
394 d
= de_malloc(c
, sizeof(lctx
));
395 if(!do_CFHEADER(c
, d
)) goto done
;
403 static int de_identify_cab(deark
*c
)
405 if(!dbuf_memcmp(c
->infile
, 0, "MSCF", 4))
410 void de_module_cab(deark
*c
, struct deark_module_info
*mi
)
413 mi
->desc
= "Microsoft Cabinet (CAB)";
414 mi
->run_fn
= de_run_cab
;
415 mi
->identify_fn
= de_identify_cab
;
416 mi
->flags
|= DE_MODFLAG_WARNPARSEONLY
;