fnt: Improved error handling, etc.
[deark.git] / modules / cab.c
blob77460fd0eec385cda522bdc7c9fb5029785053e2
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Microsoft Cabinent (CAB) format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_cab);
11 struct folder_info {
12 i64 folder_idx;
13 i64 coffCabStart;
14 i64 cCFData;
15 unsigned int typeCompress_raw;
16 unsigned int cmpr_type;
19 typedef struct localctx_struct {
20 u8 versionMinor, versionMajor;
21 unsigned int header_flags;
22 i64 cbCabinet;
23 i64 coffFiles;
24 i64 cFolders;
25 i64 cFiles;
26 i64 cbCFHeader, cbCFFolder, cbCFData;
27 i64 CFHEADER_len;
28 } lctx;
30 static const char *get_cmpr_type_name(unsigned int n)
32 const char *name;
34 switch(n) {
35 case 0: name="none"; break;
36 case 1: name="MSZIP"; break;
37 case 2: name="Quantum"; break;
38 case 3: name="LZX"; break;
39 default: name="?"; break;
41 return name;
44 static int do_one_CFDATA(deark *c, lctx *d, struct folder_info *fldi, i64 pos1,
45 i64 *bytes_consumed)
47 u32 csum;
48 i64 cbData;
49 i64 cbUncomp;
50 i64 pos = pos1;
52 csum = (u32)de_getu32le_p(&pos);
53 de_dbg(c, "csum: 0x%08x", (unsigned int)csum);
55 cbData = de_getu16le_p(&pos);
56 de_dbg(c, "cbData: %d", (int)cbData);
58 cbUncomp = de_getu16le_p(&pos);
59 de_dbg(c, "cbUncomp: %d", (int)cbUncomp);
61 if((d->header_flags&0x0004) && (d->cbCFData>0)) {
62 de_dbg(c, "[%d bytes of abReserve data at %d]", (int)d->cbCFData,
63 (int)pos);
64 de_dbg_indent(c, 1);
65 de_dbg_hexdump(c, c->infile, pos, d->cbCFData, 256, NULL, 0x1);
66 de_dbg_indent(c, -1);
67 pos += d->cbCFData;
70 de_dbg(c, "[%d bytes of %scompressed data at %d]", (int)cbData,
71 (fldi->cmpr_type==0)?"un":"", (int)pos);
72 pos += cbData;
74 *bytes_consumed = pos - pos1;
75 return 1;
78 static void do_CFDATA_for_one_CFFOLDER(deark *c, lctx *d, struct folder_info *fldi)
80 i64 i;
81 int saved_indent_level;
82 i64 pos = fldi->coffCabStart;
84 de_dbg_indent_save(c, &saved_indent_level);
85 if(fldi->cCFData<1) goto done;
86 de_dbg(c, "CFDATA blocks for CFFOLDER[%d], at %d, #=%d", (int)fldi->folder_idx,
87 (int)fldi->coffCabStart, (int)fldi->cCFData);
88 de_dbg_indent(c, 1);
90 for(i=0; i<fldi->cCFData; i++) {
91 i64 bytes_consumed = 0;
93 if(pos>=c->infile->len) goto done;
94 de_dbg(c, "CFDATA[%d] for CFFOLDER[%d], at %d", (int)i,
95 (int)fldi->folder_idx, (int)pos);
96 de_dbg_indent(c, 1);
97 if(!do_one_CFDATA(c, d, fldi, pos, &bytes_consumed)) {
98 goto done;
100 de_dbg_indent(c, -1);
101 pos += bytes_consumed;
104 done:
105 de_dbg_indent_restore(c, saved_indent_level);
108 static int do_one_CFFOLDER(deark *c, lctx *d, i64 folder_idx,
109 i64 pos1, i64 *bytes_consumed)
111 i64 pos = pos1;
112 struct folder_info *fldi = NULL;
114 fldi = de_malloc(c, sizeof(struct folder_info));
115 fldi->folder_idx = folder_idx;
117 fldi->coffCabStart = de_getu32le_p(&pos);
118 de_dbg(c, "first CFDATA blk offset (coffCabStart): %"I64_FMT, fldi->coffCabStart);
120 fldi->cCFData = de_getu16le_p(&pos);
121 de_dbg(c, "no. of CFDATA blks for this folder (cCFData): %d", (int)fldi->cCFData);
123 fldi->typeCompress_raw = (unsigned int)de_getu16le_p(&pos);
124 fldi->cmpr_type = fldi->typeCompress_raw & 0x000f;
125 de_dbg(c, "typeCompress field: 0x%04x", fldi->typeCompress_raw);
126 de_dbg_indent(c, 1);
127 de_dbg(c, "compression type: 0x%04x (%s)", fldi->cmpr_type,
128 get_cmpr_type_name(fldi->cmpr_type));
129 de_dbg_indent(c, -1);
131 if((d->header_flags&0x0004) && (d->cbCFFolder>0)) {
132 de_dbg(c, "[%d bytes of abReserve data at %d]", (int)d->cbCFFolder,
133 (int)pos);
134 de_dbg_indent(c, 1);
135 de_dbg_hexdump(c, c->infile, pos, d->cbCFFolder, 256, NULL, 0x1);
136 de_dbg_indent(c, -1);
137 pos += d->cbCFFolder;
140 *bytes_consumed = pos-pos1;
142 do_CFDATA_for_one_CFFOLDER(c, d, fldi);
144 de_free(c, fldi);
145 return 1;
148 static void do_CFFOLDERs(deark *c, lctx *d)
150 i64 pos = d->CFHEADER_len;
151 i64 i;
152 int saved_indent_level;
154 de_dbg_indent_save(c, &saved_indent_level);
155 if(d->cFolders<1) goto done;
156 de_dbg(c, "CFFOLDER section at %d, nfolders=%d", (int)pos, (int)d->cFolders);
158 de_dbg_indent(c, 1);
159 for(i=0; i<d->cFolders; i++) {
160 i64 bytes_consumed = 0;
162 if(pos>=c->infile->len) break;
163 de_dbg(c, "CFFOLDER[%d] at %d", (int)i, (int)pos);
164 de_dbg_indent(c, 1);
165 if(!do_one_CFFOLDER(c, d, i, pos, &bytes_consumed)) {
166 goto done;
168 de_dbg_indent(c, -1);
169 pos += bytes_consumed;
172 done:
173 de_dbg_indent_restore(c, saved_indent_level);
176 static const char *get_special_folder_name(i64 n)
178 const char *name;
179 switch(n) {
180 case 0xfffd: name="CONTINUED_FROM_PREV"; break;
181 case 0xfffe: name="CONTINUED_TO_NEXT"; break;
182 case 0xffff: name="CONTINUED_PREV_AND_NEXT"; break;
183 default: name="?"; break;
185 return name;
188 static int do_one_CFFILE(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
190 i64 cbFile;
191 i64 uoffFolderStart;
192 i64 iFolder;
193 i64 pos = pos1;
194 i64 date_;
195 i64 time_;
196 unsigned int attribs;
197 int retval = 0;
198 struct de_stringreaderdata *szName = NULL;
199 de_ucstring *attribs_str = NULL;
200 struct de_timestamp ts;
201 char timestamp_buf[64];
202 char tmps[80];
204 cbFile = de_getu32le_p(&pos);
205 de_dbg(c, "uncompressed file size (cbFile): %"I64_FMT, cbFile);
207 uoffFolderStart = de_getu32le_p(&pos);
208 de_dbg(c, "offset in folder (uoffFolderStart): %"I64_FMT, uoffFolderStart);
210 iFolder = de_getu16le_p(&pos);
211 if(iFolder>=0xfffd) {
212 de_snprintf(tmps, sizeof(tmps), "0x%04x (%s)", (unsigned int)iFolder,
213 get_special_folder_name(iFolder));
215 else {
216 de_snprintf(tmps, sizeof(tmps), "%u", (unsigned int)iFolder);
218 de_dbg(c, "folder index (iFolder): %s", tmps);
220 date_ = de_getu16le_p(&pos);
221 time_ = de_getu16le_p(&pos);
222 de_dos_datetime_to_timestamp(&ts, date_, time_);
223 ts.tzcode = DE_TZCODE_LOCAL;
224 de_timestamp_to_string(&ts, timestamp_buf, sizeof(timestamp_buf), 0);
225 de_dbg(c, "timestamp: %s", timestamp_buf);
227 attribs = (unsigned int)de_getu16le_p(&pos);
228 attribs_str = ucstring_create(c);
229 de_describe_dos_attribs(c, (attribs & 0x3f), attribs_str, 0);
230 if(attribs&0x40) ucstring_append_flags_item(attribs_str, "EXEC");
231 if(attribs&0x80) ucstring_append_flags_item(attribs_str, "NAME_IS_UTF8");
232 de_dbg(c, "attribs: 0x%04x (%s)", attribs, ucstring_getpsz(attribs_str));
234 szName = dbuf_read_string(c->infile, pos, 257, 257,
235 DE_CONVFLAG_STOP_AT_NUL,
236 (attribs&0x80)?DE_ENCODING_UTF8:DE_ENCODING_ASCII);
237 de_dbg(c, "szName: \"%s\"", ucstring_getpsz(szName->str));
238 if(!szName->found_nul) goto done;
239 pos += szName->bytes_consumed;
241 *bytes_consumed = pos-pos1;
242 retval = 1;
243 done:
244 de_destroy_stringreaderdata(c, szName);
245 ucstring_destroy(attribs_str);
246 return retval;
249 static void do_CFFILEs(deark *c, lctx *d)
251 i64 pos = d->coffFiles;
252 i64 i;
253 int saved_indent_level;
255 de_dbg_indent_save(c, &saved_indent_level);
256 if(d->cFiles<1) goto done;
257 de_dbg(c, "CFFILE section at %d, nfiles=%d", (int)pos, (int)d->cFiles);
258 de_dbg_indent(c, 1);
259 for(i=0; i<d->cFiles; i++) {
260 i64 bytes_consumed = 0;
262 if(pos>=c->infile->len) break;
263 de_dbg(c, "CFFILE[%d] at %d", (int)i, (int)pos);
264 de_dbg_indent(c, 1);
265 if(!do_one_CFFILE(c, d, pos, &bytes_consumed)) {
266 goto done;
268 de_dbg_indent(c, -1);
269 pos += bytes_consumed;
272 done:
273 de_dbg_indent_restore(c, saved_indent_level);
276 // On success, sets d->CFHEADER_len.
277 static int do_CFHEADER(deark *c, lctx *d)
279 int retval = 0;
280 i64 pos = 0;
281 de_ucstring *flags_str = NULL;
282 struct de_stringreaderdata *CabinetPrev = NULL;
283 struct de_stringreaderdata *DiskPrev = NULL;
284 struct de_stringreaderdata *CabinetNext = NULL;
285 struct de_stringreaderdata *DiskNext = NULL;
286 int saved_indent_level;
288 de_dbg_indent_save(c, &saved_indent_level);
289 de_dbg(c, "CFHEADER at %d", (int)pos);
290 de_dbg_indent(c, 1);
291 pos += 8; // signature, reserved1
292 d->cbCabinet = de_getu32le_p(&pos);
293 de_dbg(c, "cbCabinet: %"I64_FMT, d->cbCabinet);
294 pos += 4; // reserved2
295 d->coffFiles = de_getu32le_p(&pos);
296 de_dbg(c, "coffFiles: %"I64_FMT, d->coffFiles);
297 pos += 4; // reserved3
298 d->versionMinor = de_getbyte_p(&pos);
299 d->versionMajor = de_getbyte_p(&pos);
300 de_dbg(c, "file format version: %u.%u", (unsigned int)d->versionMajor,
301 (unsigned int)d->versionMinor);
303 d->cFolders = de_getu16le_p(&pos);
304 de_dbg(c, "cFolders: %d", (int)d->cFolders);
306 d->cFiles = de_getu16le_p(&pos);
307 de_dbg(c, "cFiles: %d", (int)d->cFiles);
309 d->header_flags = (unsigned int)de_getu16le_p(&pos);
310 flags_str = ucstring_create(c);
311 // The specification has a diagram showing that PREV_CABINET is 0x2,
312 // NEXT_CABINET is 0x04, etc. But the text below it says that PREV_CABINET
313 // is 0x1, NEXT_CABINET is 0x02, etc. I'm sure it's the text that's correct.
314 if(d->header_flags&0x0001) ucstring_append_flags_item(flags_str, "PREV_CABINET");
315 if(d->header_flags&0x0002) ucstring_append_flags_item(flags_str, "NEXT_CABINET");
316 if(d->header_flags&0x0004) ucstring_append_flags_item(flags_str, "RESERVE_PRESENT");
317 de_dbg(c, "flags: 0x%04x (%s)", d->header_flags, ucstring_getpsz(flags_str));
319 pos += 2; // setID (arbitrary ID for a collection of linked cab files)
320 pos += 2; // iCabinet (sequence number in a mult-cab file)
322 if(d->header_flags&0x0004) { // RESERVE_PRESENT
323 d->cbCFHeader = de_getu16le_p(&pos);
324 de_dbg(c, "cbCFHeader: %d", (int)d->cbCFHeader);
325 d->cbCFFolder = (i64)de_getbyte_p(&pos);
326 de_dbg(c, "cbCFFolder: %d", (int)d->cbCFFolder);
327 d->cbCFData = (i64)de_getbyte_p(&pos);
328 de_dbg(c, "cbCFData: %d", (int)d->cbCFData);
330 if(d->cbCFHeader!=0) {
331 de_dbg(c, "[%d bytes of abReserve data at %d]", (int)d->cbCFHeader,
332 (int)pos);
333 de_dbg_indent(c, 1);
334 de_dbg_hexdump(c, c->infile, pos, d->cbCFHeader, 256, NULL, 0x1);
335 de_dbg_indent(c, -1);
336 pos += d->cbCFHeader;
340 if(d->header_flags&0x0001) { // PREV_CABINET
341 CabinetPrev = dbuf_read_string(c->infile, pos, 256, 256,
342 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
343 de_dbg(c, "szCabinetPrev: \"%s\"", ucstring_getpsz(CabinetPrev->str));
344 if(!CabinetPrev->found_nul) goto done;
345 pos += CabinetPrev->bytes_consumed;
347 DiskPrev = dbuf_read_string(c->infile, pos, 256, 256,
348 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
349 de_dbg(c, "szDiskPrev: \"%s\"", ucstring_getpsz(DiskPrev->str));
350 if(!DiskPrev->found_nul) goto done;
351 pos += DiskPrev->bytes_consumed;
354 if(d->header_flags&0x0002) { // NEXT_CABINET
355 CabinetNext = dbuf_read_string(c->infile, pos, 256, 256,
356 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
357 de_dbg(c, "szCabinetNext: \"%s\"", ucstring_getpsz(CabinetNext->str));
358 if(!CabinetNext->found_nul) goto done;
359 pos += CabinetNext->bytes_consumed;
361 DiskNext = dbuf_read_string(c->infile, pos, 256, 256,
362 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
363 de_dbg(c, "szDiskNext: \"%s\"", ucstring_getpsz(DiskNext->str));
364 if(!DiskNext->found_nul) goto done;
365 pos += DiskNext->bytes_consumed;
368 // TODO: Additional fields may be here
370 de_dbg_indent(c, -1);
372 if(d->versionMajor!=1 || d->versionMinor!=3) {
373 de_err(c, "Unsupported CAB format version: %u.%u",
374 (unsigned int)d->versionMajor, (unsigned int)d->versionMinor);
375 goto done;
378 d->CFHEADER_len = pos;
379 retval = 1;
380 done:
381 de_destroy_stringreaderdata(c, CabinetPrev);
382 de_destroy_stringreaderdata(c, DiskPrev);
383 de_destroy_stringreaderdata(c, CabinetNext);
384 de_destroy_stringreaderdata(c, DiskNext);
385 ucstring_destroy(flags_str);
386 de_dbg_indent_restore(c, saved_indent_level);
387 return retval;
390 static void de_run_cab(deark *c, de_module_params *mparams)
392 lctx *d = NULL;
394 d = de_malloc(c, sizeof(lctx));
395 if(!do_CFHEADER(c, d)) goto done;
396 do_CFFOLDERs(c, d);
397 do_CFFILEs(c, d);
399 done:
400 de_free(c, d);
403 static int de_identify_cab(deark *c)
405 if(!dbuf_memcmp(c->infile, 0, "MSCF", 4))
406 return 100;
407 return 0;
410 void de_module_cab(deark *c, struct deark_module_info *mi)
412 mi->id = "cab";
413 mi->desc = "Microsoft Cabinet (CAB)";
414 mi->run_fn = de_run_cab;
415 mi->identify_fn = de_identify_cab;
416 mi->flags |= DE_MODFLAG_WARNPARSEONLY;