zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / fat.c
blob76f4aa4958442d445c4b102dd47fbc913364630b
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // FAT disk image
6 // LoadDskF/SaveDskF/SKF OS/2 disk image
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_fat);
11 DE_DECLARE_MODULE(de_module_loaddskf);
13 #include "../foreign/dskdcmps.h"
15 #define MAX_NESTING_LEVEL 16
17 struct member_data {
18 u8 fn_base[8];
19 u8 fn_ext[3];
20 u8 is_subdir;
21 u8 is_special;
22 UI attribs;
23 UI ea_handle;
24 i64 fn_base_len, fn_ext_len;
25 i64 filesize;
26 i64 first_cluster;
27 de_ucstring *short_fn;
28 de_ucstring *long_fn;
29 struct de_timestamp mod_time;
32 struct dirctx {
33 u8 lfn_valid;
34 u8 first_seq_num;
35 u8 prev_seq_num;
36 u8 name_cksum;
37 i64 dir_entry_count;
38 i64 pending_lfn_bytesused;
39 #define LFN_CHARS_PER_FRAGMENT 13
40 #define LFN_MAX_FRAGMENTS 20
41 u8 pending_lfn[LFN_CHARS_PER_FRAGMENT*2*LFN_MAX_FRAGMENTS];
44 typedef struct localctx_struct {
45 de_encoding input_encoding;
46 u8 opt_check_root_dir;
47 u8 prescan_root_dir;
49 // TODO: Decide how to handle different variants of FAT.
50 #define FAT_SUBFMT_UNKNOWN 0
51 #define FAT_SUBFMT_PC 1
52 #define FAT_SUBFMT_ATARIST 2
53 int subfmt_req;
54 int subfmt;
55 #define FAT_PLATFORM_UNKNOWN 0
56 #define FAT_PLATFORM_PC 1
57 #define FAT_PLATFORM_ATARIST 2
58 int platform;
60 u8 num_fat_bits; // 12, 16, or 32. 0 if unknown.
61 u8 has_atarist_checksum;
62 i64 bytes_per_sector;
63 i64 sectors_per_cluster;
64 i64 bytes_per_cluster;
65 i64 num_sectors;
66 i64 data_region_sector;
67 i64 data_region_pos;
68 i64 num_data_region_clusters;
69 i64 num_rsvd_sectors;
70 i64 num_fats;
71 i64 num_sectors_per_fat;
72 i64 max_root_dir_entries16;
73 i64 root_dir_sector;
74 i64 num_cluster_identifiers;
75 struct de_strarray *curpath;
77 i64 num_fat_entries;
78 u32 *fat_nextcluster; // array[num_fat_entries]
79 u8 *cluster_used_flags; // array[num_fat_entries]
80 u8 *cluster_used_flags_saved; // array[num_fat_entries] (or NULL)
81 dbuf *ea_data; // NULL if not available
82 } lctx;
84 static void ucstring_append_hexbytes(de_ucstring *s, const u8 *buf, i64 buflen)
86 i64 i;
88 for(i = 0; i<buflen; i++) {
89 if(i>0) ucstring_append_char(s, ' ');
90 ucstring_append_char(s, (de_rune)de_get_hexchar((int)(buf[i]/16)));
91 ucstring_append_char(s, (de_rune)de_get_hexchar((int)(buf[i]%16)));
95 static void dbg_hexbytes_oneline_mem(deark *c, const u8 *buf, i64 buflen, const char *label)
97 de_ucstring *s = NULL;
99 if(buflen<0) buflen = 0;
100 if(buflen>64) buflen = 64;
101 s = ucstring_create(c);
102 ucstring_append_hexbytes(s, buf, buflen);
103 de_dbg(c, "%s: %s", label, ucstring_getpsz_d(s));
104 ucstring_destroy(s);
107 static void dbg_hexbytes_oneline(deark *c, dbuf *f, i64 pos, i64 len, const char *label)
109 u8 buf[64];
111 if(len<0) len = 0;
112 if(len>(i64)sizeof(buf)) len = (i64)sizeof(buf);
113 dbuf_read(f, buf, pos, len);
114 dbg_hexbytes_oneline_mem(c, buf, len, label);
117 static void fat_save_cluster_use_flags(deark *c, lctx *d)
119 if(!d->cluster_used_flags) return;
120 if(!d->cluster_used_flags_saved) {
121 d->cluster_used_flags_saved = de_malloc(c, d->num_fat_entries);
123 de_memcpy(d->cluster_used_flags_saved, d->cluster_used_flags,
124 (size_t)d->num_fat_entries);
127 static void fat_restore_cluster_use_flags(deark *c, lctx *d)
129 if(!d->cluster_used_flags_saved || !d->cluster_used_flags) return;
130 de_memcpy(d->cluster_used_flags, d->cluster_used_flags_saved,
131 (size_t)d->num_fat_entries);
134 static i64 sectornum_to_offset(deark *c, lctx *d, i64 secnum)
136 return secnum * d->bytes_per_sector;
139 static int is_good_clusternum(lctx *d, i64 cnum)
141 if(cnum<2) return 0;
142 if(cnum >= d->num_cluster_identifiers) return 0;
143 return 1;
146 static i64 clusternum_to_offset(deark *c, lctx *d, i64 cnum)
148 return d->data_region_pos + (cnum-2) * d->bytes_per_cluster;
151 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
153 char timestamp_buf[64];
155 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
156 de_dbg(c, "%s: %s", name, timestamp_buf);
159 static i64 get_unpadded_len(const u8 *s, i64 len1)
161 i64 i;
162 i64 len = len1;
164 // Stop at NUL, I guess.
165 for(i=0; i<len1; i++) {
166 if(s[i]==0x00) {
167 len = i;
168 break;
172 for(i=len; i>0; i--) {
173 if(s[i-1]!=' ') {
174 return i;
177 return 0;
180 static int extract_file_lowlevel(deark *c, lctx *d, struct member_data *md, dbuf *outf)
182 int retval = 0;
183 i64 cur_cluster;
184 i64 nbytes_remaining;
186 cur_cluster = md->first_cluster;
187 if(md->is_subdir) {
188 nbytes_remaining = 0;
190 else {
191 nbytes_remaining = md->filesize;
194 while(1) {
195 i64 dpos;
196 i64 nbytes_to_copy;
198 if(nbytes_remaining <= 0) break;
199 if(!is_good_clusternum(d, cur_cluster)) break;
200 if(d->cluster_used_flags[cur_cluster]) break;
201 d->cluster_used_flags[cur_cluster] = 1;
202 if(c->debug_level>=3) de_dbg3(c, "cluster: %d", (int)cur_cluster);
203 dpos = clusternum_to_offset(c, d, cur_cluster);
204 nbytes_to_copy = de_min_int(d->bytes_per_cluster, nbytes_remaining);
205 dbuf_copy(c->infile, dpos, nbytes_to_copy, outf);
206 nbytes_remaining -= nbytes_to_copy;
207 cur_cluster = (i64)d->fat_nextcluster[cur_cluster];
210 if(nbytes_remaining>0) {
211 goto done;
214 retval = 1;
215 done:
216 return retval;
219 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
221 dbuf *outf = NULL;
222 de_finfo *fi = NULL;
223 de_ucstring *fullfn = NULL;
225 if(!md->is_subdir) {
226 if(md->filesize > d->num_data_region_clusters * d->bytes_per_cluster) {
227 de_err(c, "%s: Bad file size", ucstring_getpsz_d(md->short_fn));
228 goto done;
232 fi = de_finfo_create(c);
233 fullfn = ucstring_create(c);
234 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
235 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
236 fi->original_filename_flag = 1;
237 if(md->is_subdir) {
238 fi->is_directory = 1;
240 if(md->mod_time.is_valid) {
241 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
244 outf = dbuf_create_output_file(c, NULL, fi, 0);
246 if(!extract_file_lowlevel(c, d, md, outf)) {
247 de_err(c, "%s: File extraction failed", ucstring_getpsz_d(md->short_fn));
248 goto done;
251 done:
252 dbuf_close(outf);
253 ucstring_destroy(fullfn);
254 de_finfo_destroy(c, fi);
257 static void do_subdir(deark *c, lctx *d, struct member_data *md, int nesting_level);
259 static void do_vfat_entry(deark *c, lctx *d, struct dirctx *dctx, i64 pos1, u8 seq_num_raw)
261 u8 seq_num;
262 u8 fn_cksum;
263 int is_first_entry = 0;
264 i64 startpos_in_lfn;
266 if(seq_num_raw==0xe5) {
267 de_dbg(c, "[deleted VFAT entry]");
268 dctx->lfn_valid = 0;
269 goto done;
272 de_dbg(c, "seq number: 0x%02x", (UI)seq_num_raw);
274 seq_num = seq_num_raw & 0xbf;
276 if(seq_num<1 || seq_num>LFN_MAX_FRAGMENTS) {
277 de_warn(c, "Bad VFAT sequence number (%u)", (UI)seq_num);
278 dctx->lfn_valid = 0;
279 goto done;
282 if(seq_num_raw & 0x40) {
283 is_first_entry = 1;
284 de_zeromem(dctx->pending_lfn, sizeof(dctx->pending_lfn));
285 dctx->first_seq_num = seq_num;
286 dctx->lfn_valid = 1;
288 else {
289 if(!dctx->lfn_valid || (seq_num+1 != dctx->prev_seq_num)) {
290 de_dbg(c, "[stray VFAT entry]");
291 dctx->lfn_valid = 0;
292 goto done;
295 dctx->prev_seq_num = seq_num;
297 startpos_in_lfn = LFN_CHARS_PER_FRAGMENT*2*((i64)seq_num-1);
299 de_read(&dctx->pending_lfn[startpos_in_lfn+ 0], pos1+ 1, 10); // 5 chars
300 fn_cksum = de_getbyte(pos1+13);
301 de_read(&dctx->pending_lfn[startpos_in_lfn+10], pos1+14, 12); // 6 more chars
302 de_read(&dctx->pending_lfn[startpos_in_lfn+22], pos1+28, 4); // 2 more chars
303 de_dbg(c, "filename checksum (reported): 0x%02x", (UI)fn_cksum);
304 if(!is_first_entry) {
305 if(fn_cksum != dctx->name_cksum) {
306 de_dbg(c, "[inconsistent VFAT checksums]");
307 dctx->lfn_valid = 0;
310 dctx->name_cksum = fn_cksum;
312 done:
316 static void vfat_cksum_update(const u8 *buf, size_t buflen, u8 *cksum)
318 size_t i;
320 for(i=0; i<buflen; i++) {
321 *cksum = (((*cksum) & 1) << 7) + ((*cksum) >> 1) + buf[i];
325 // If the long file name seems valid, sets it in md->long_fn for later use.
326 static void handle_vfat_lfn(deark *c, lctx *d, struct dirctx *dctx,
327 struct member_data *md)
329 u8 cksum_calc = 0;
330 i64 max_len_in_ucs2_chars;
331 i64 len_in_ucs2_chars = 0;
332 i64 i;
334 if(!dctx->lfn_valid) goto done;
335 if(dctx->prev_seq_num != 1) goto done;
336 if(md->long_fn) goto done;
338 vfat_cksum_update(md->fn_base, 8, &cksum_calc);
339 vfat_cksum_update(md->fn_ext, 3, &cksum_calc);
340 de_dbg(c, "filename checksum (calculated): 0x%02x", (UI)cksum_calc);
341 if(cksum_calc != dctx->name_cksum) goto done;
343 max_len_in_ucs2_chars = LFN_CHARS_PER_FRAGMENT * (i64)dctx->first_seq_num;
344 if(max_len_in_ucs2_chars > (i64)(sizeof(dctx->pending_lfn)/2)) goto done;
345 for(i=0; i<max_len_in_ucs2_chars; i++) {
346 if(dctx->pending_lfn[i*2]==0x00 && dctx->pending_lfn[i*2+1]==0x00) break;
347 if(dctx->pending_lfn[i*2]==0xff && dctx->pending_lfn[i*2+1]==0xff) break;
348 len_in_ucs2_chars++;
351 md->long_fn = ucstring_create(c);
352 ucstring_append_bytes(md->long_fn, dctx->pending_lfn, len_in_ucs2_chars*2,
353 0, DE_ENCODING_UTF16LE);
354 de_dbg(c, "long filename: \"%s\"", ucstring_getpsz_d(md->long_fn));
356 done:
360 // Reads from md->fn_base* and md->fn_ext*, writes to md->short_fn
361 static void decode_short_filename(deark *c, lctx *d, struct member_data *md)
363 if(md->fn_base_len>0) {
364 ucstring_append_bytes(md->short_fn, md->fn_base, md->fn_base_len, 0, d->input_encoding);
366 else {
367 ucstring_append_char(md->short_fn, '_');
369 if(md->fn_ext_len>0) {
370 ucstring_append_char(md->short_fn, '.');
371 ucstring_append_bytes(md->short_fn, md->fn_ext, md->fn_ext_len, 0, d->input_encoding);
375 static void decode_volume_label_name(deark *c, lctx *d, struct member_data *md)
377 if(md->fn_ext_len>0) {
378 ucstring_append_bytes(md->short_fn, md->fn_base, 8, 0, d->input_encoding);
379 ucstring_append_bytes(md->short_fn, md->fn_ext, md->fn_ext_len, 0, d->input_encoding);
381 else {
382 ucstring_append_bytes(md->short_fn, md->fn_base, md->fn_base_len, 0, d->input_encoding);
386 // md is that of the file whose EA data is being requested.
387 // Uses md->ea_handle.
388 static void do_fat_eadata_item(deark *c, lctx *d, struct member_data *md)
390 de_module_params *mparams = NULL;
392 if(!d->ea_data) goto done;
393 if(md->ea_handle==0) goto done;
394 mparams = de_malloc(c, sizeof(de_module_params));
395 mparams->in_params.input_encoding = d->input_encoding;
396 mparams->in_params.flags = 0x1;
397 mparams->in_params.uint1 = (u32)md->ea_handle;
398 de_dbg(c, "reading OS/2 extended attributes");
399 de_dbg_indent(c, 1);
400 // TODO: Better filenames for icons that may be extracted.
401 de_run_module_by_id_on_slice(c, "ea_data", mparams, d->ea_data, 0, d->ea_data->len);
402 de_dbg_indent(c, -1);
404 done:
405 de_free(c, mparams);
408 // md is that of the "EA DATA" file itself.
409 static void do_fat_eadata(deark *c, lctx *d, struct member_data *md)
411 int ret;
413 if(d->ea_data) goto done;
414 d->ea_data = dbuf_create_membuf(c, 0, 0);
415 dbuf_set_length_limit(d->ea_data, c->infile->len);
416 ret = extract_file_lowlevel(c, d, md, d->ea_data);
417 if(!ret) {
418 dbuf_close(d->ea_data);
419 d->ea_data = NULL;
420 goto done;
422 de_dbg(c, "[read EA data, len=%"I64_FMT"]", d->ea_data->len);
423 done:
427 // Returns 0 if this is the end-of-directory marker.
428 static int do_dir_entry(deark *c, lctx *d, struct dirctx *dctx,
429 i64 pos1, int nesting_level, int scanmode)
431 u8 firstbyte;
432 i64 ddate, dtime;
433 int retval = 0;
434 int is_deleted = 0;
435 int is_volume_label = 0;
436 int need_curpath_pop = 0;
437 de_ucstring *descr = NULL;
438 struct member_data *md = NULL;
440 md = de_malloc(c, sizeof(struct member_data));
442 de_dbg(c, "dir entry at %"I64_FMT, pos1);
443 de_dbg_indent(c, 1);
445 de_read(md->fn_base, pos1+0, 8);
446 de_read(md->fn_ext, pos1+8, 3);
447 firstbyte = md->fn_base[0];
449 if(firstbyte==0x00) {
450 de_dbg(c, "[end of dir marker]");
451 goto done;
453 retval = 1;
455 md->attribs = (UI)de_getbyte(pos1+11);
456 descr = ucstring_create(c);
457 de_describe_dos_attribs(c, md->attribs, descr, 0x1);
458 de_dbg(c, "attribs: 0x%02x (%s)", md->attribs, ucstring_getpsz_d(descr));
459 if((md->attribs & 0x3f)==0x0f) {
460 do_vfat_entry(c, d, dctx, pos1, firstbyte);
461 goto done;
464 if((md->attribs & 0x18) == 0x00) {
465 ; // Normal file
467 else if((md->attribs & 0x18) == 0x08) {
468 is_volume_label = 1;
469 md->is_special = 1;
471 else if((md->attribs & 0x18) == 0x10) {
472 md->is_subdir = 1;
474 else {
475 de_warn(c, "Invalid directory entry");
476 md->is_special = 1;
477 dctx->lfn_valid = 0;
478 goto done;
481 if(dctx->lfn_valid) {
482 handle_vfat_lfn(c, d, dctx, md);
483 dctx->lfn_valid = 0;
486 if(firstbyte==0xe5) {
487 de_dbg(c, "[deleted]");
488 is_deleted = 1;
489 md->fn_base[0] = '?';
491 else if(firstbyte==0x05) {
492 md->fn_base[0] = 0xe5;
495 md->fn_base_len = get_unpadded_len(md->fn_base, 8);
496 md->fn_ext_len = get_unpadded_len(md->fn_ext, 3);
498 if(md->is_subdir && md->fn_base_len>=1 && md->fn_base[0]=='.') {
499 // special "." and ".." dirs
500 md->is_special = 1;
503 md->short_fn = ucstring_create(c);
504 if(is_volume_label) {
505 decode_volume_label_name(c, d, md);
507 else {
508 decode_short_filename(c, d, md);
511 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->short_fn));
513 if(ucstring_isnonempty(md->long_fn)) {
514 de_strarray_push(d->curpath, md->long_fn);
516 else {
517 de_strarray_push(d->curpath, md->short_fn);
519 need_curpath_pop = 1;
521 if(!scanmode && d->num_fat_bits<32) {
522 md->ea_handle = (UI)de_getu16le(pos1+20);
523 if(md->ea_handle) {
524 de_dbg(c, "EA handle (if OS/2): %u", md->ea_handle);
528 dtime = de_getu16le(pos1+22);
529 ddate = de_getu16le(pos1+24);
530 de_dos_datetime_to_timestamp(&md->mod_time, ddate, dtime);
531 dbg_timestamp(c, &md->mod_time, "mod time");
533 // TODO: This is wrong for FAT32.
534 md->first_cluster = de_getu16le(pos1+26);
535 de_dbg(c, "first cluster: %"I64_FMT, md->first_cluster);
537 md->filesize = de_getu32le(pos1+28);
538 de_dbg(c, "file size: %"I64_FMT, md->filesize);
540 // (Done reading dir entry)
542 if(is_deleted) goto done;
544 if(scanmode) {
545 if(!md->is_subdir && !md->is_special && (md->attribs&0x04) &&
546 md->fn_base_len==7 && md->fn_ext_len==3 &&
547 !de_memcmp(md->fn_base, "EA DATA", 7) &&
548 !de_memcmp(md->fn_ext, " SF", 3) )
550 do_fat_eadata(c, d, md);
551 goto done;
553 de_dbg2(c, "[scan mode - not extracting]");
554 goto done;
557 if(md->ea_handle!=0 && d->ea_data) {
558 do_fat_eadata_item(c, d, md);
561 if(!md->is_subdir && !md->is_special) {
562 do_extract_file(c, d, md);
564 else if(md->is_subdir && !md->is_special) {
565 do_extract_file(c, d, md);
566 do_subdir(c, d, md, nesting_level+1);
569 done:
570 ucstring_destroy(descr);
571 if(md) {
572 ucstring_destroy(md->short_fn);
573 ucstring_destroy(md->long_fn);
575 if(need_curpath_pop) {
576 de_strarray_pop(d->curpath);
578 de_dbg_indent(c, -1);
579 return retval;
582 // Process a contiguous block of directory entries
583 // Returns 0 if an end-of-dir marker was found.
584 static int do_dir_entries(deark *c, lctx *d, struct dirctx *dctx,
585 i64 pos1, i64 len, int nesting_level, int scanmode)
587 i64 num_entries;
588 i64 i;
589 int retval = 0;
591 num_entries = len/32;
592 de_dbg(c, "num entries: %"I64_FMT, num_entries);
594 for(i=0; i<num_entries; i++) {
595 if(!do_dir_entry(c, d, dctx, pos1+32*i, nesting_level, scanmode)) {
596 goto done;
598 dctx->dir_entry_count++;
601 retval = 1;
602 done:
603 return retval;
606 static void destroy_dirctx(deark *c, struct dirctx *dctx)
608 if(!dctx) return;
609 de_free(c, dctx);
612 static void do_subdir(deark *c, lctx *d, struct member_data *md, int nesting_level)
614 int saved_indent_level;
615 i64 cur_cluster_num;
616 i64 cur_cluster_pos;
617 struct dirctx *dctx = NULL;
619 de_dbg_indent_save(c, &saved_indent_level);
621 if(nesting_level >= MAX_NESTING_LEVEL) {
622 de_err(c, "Directories nested too deeply");
623 goto done;
626 dctx = de_malloc(c, sizeof(struct dirctx));
628 cur_cluster_num = md->first_cluster;
629 if(!is_good_clusternum(d, cur_cluster_num)) {
630 de_err(c, "Bad subdirectory entry");
631 goto done;
633 cur_cluster_pos = clusternum_to_offset(c, d, cur_cluster_num);
634 de_dbg(c, "subdir starting at %"I64_FMT, cur_cluster_pos);
635 de_dbg_indent(c, 1);
637 while(1) {
638 if(!is_good_clusternum(d, cur_cluster_num)) {
639 break;
641 cur_cluster_pos = clusternum_to_offset(c, d, cur_cluster_num);
642 de_dbg(c, "[subdir cluster %"I64_FMT" at %"I64_FMT"]", cur_cluster_num, cur_cluster_pos);
644 if(d->cluster_used_flags[cur_cluster_num]) {
645 goto done;
647 d->cluster_used_flags[cur_cluster_num] = 1;
649 if(!do_dir_entries(c, d, dctx, cur_cluster_pos, d->bytes_per_cluster, nesting_level, 0)) {
650 break;
653 cur_cluster_num = d->fat_nextcluster[cur_cluster_num];
656 done:
657 destroy_dirctx(c, dctx);
658 de_dbg_indent_restore(c, saved_indent_level);
661 static void do_root_dir(deark *c, lctx *d)
663 i64 pos1;
664 struct dirctx *dctx = NULL;
666 dctx = de_malloc(c, sizeof(struct dirctx));
667 pos1 = sectornum_to_offset(c, d, d->root_dir_sector);
668 de_dbg(c, "dir at %"I64_FMT, pos1);
669 de_dbg_indent(c, 1);
670 if(pos1<d->bytes_per_sector) goto done;
671 if(d->prescan_root_dir) {
672 de_dbg(c, "[scanning root dir]");
673 // This feature causes us to intentionally read some clusters more than once,
674 // so we have to work around our protections against doing that.
675 fat_save_cluster_use_flags(c, d);
676 de_dbg_indent(c, 1);
677 (void)do_dir_entries(c, d, dctx, pos1, d->max_root_dir_entries16 * 32, 0, 1);
678 de_dbg_indent(c, -1);
679 fat_restore_cluster_use_flags(c, d);
680 de_dbg(c, "[done scanning root dir]");
682 (void)do_dir_entries(c, d, dctx, pos1, d->max_root_dir_entries16 * 32, 0, 0);
683 done:
684 destroy_dirctx(c, dctx);
685 de_dbg_indent(c, -1);
688 static int root_dir_seems_valid(deark *c, lctx *d)
690 i64 pos1;
691 i64 max_entries_to_check;
692 i64 i;
693 i64 entrycount = 0;
694 i64 errcount = 0;
696 if(d->num_fat_bits==32) return 1;
698 if(d->max_root_dir_entries16<=0) return 0;
699 pos1 = sectornum_to_offset(c, d, d->root_dir_sector);
700 if(pos1 + d->max_root_dir_entries16 * 32 > c->infile->len) {
701 return 0;
704 max_entries_to_check = de_max_int(d->max_root_dir_entries16, 10);
705 for(i=0; i<max_entries_to_check; i++) {
706 i64 entrypos;
707 u8 firstbyte;
708 u8 attribs;
710 entrypos = pos1 + 32*i;
711 firstbyte = de_getbyte(entrypos);
712 if(firstbyte==0x00) break;
713 if(firstbyte==0xe5) continue; // Don't validate deleted entries
714 entrycount++;
715 attribs = de_getbyte(entrypos+11);
716 if(attribs & 0xc0) {
717 errcount++;
719 else if((attribs & 0x3f) == 0x0f) {
720 ; // LFN; OK
722 else if((attribs & 0x18)==0x18) {
723 errcount++; // dir + vol.label not valid
726 // TODO: It's really lame to only validate the attribs field, when there's
727 // so much more we could be doing. But it's a hard problem. We don't want
728 // to be too sensitive to minor errors.
731 if(errcount>1 || (errcount==1 && entrycount<=1)) {
732 return 0;
734 return 1;
737 static void do_atarist_boot_checksum(deark *c, lctx *d, i64 pos1)
739 i64 i;
740 UI ck = 0;
742 for(i=0; i<256; i++) {
743 ck += (UI)de_getu16be(pos1+i*2);
744 ck &= 0xffff;
747 de_dbg(c, "Atari ST checksum: 0x%04x", ck);
748 if(ck==0x1234) {
749 d->has_atarist_checksum = 1;
753 static void do_oem_name(deark *c, lctx *d, i64 pos, i64 len)
755 struct de_stringreaderdata *srd;
756 i64 i;
758 srd = dbuf_read_string(c->infile, pos, len, len, 0, DE_ENCODING_ASCII);
760 // Require printable ASCII.
761 for(i=0; i<len; i++) {
762 if(srd->sz[i]<32 || srd->sz[i]>126) {
763 goto done;
767 de_dbg(c, "OEM name: \"%s\"", ucstring_getpsz_d(srd->str));
769 done:
770 de_destroy_stringreaderdata(c, srd);
773 static int do_boot_sector(deark *c, lctx *d, i64 pos1)
775 i64 pos;
776 i64 num_data_region_sectors;
777 i64 num_root_dir_sectors;
778 i64 num_sectors_per_fat16;
779 i64 num_sectors_per_fat32 = 0;
780 i64 num_sectors16;
781 i64 num_sectors32 = 0;
782 i64 num_sectors_per_track;
783 i64 num_heads;
784 i64 jmpinstrlen;
785 u8 b;
786 u8 cksum_sig[2];
787 int retval = 0;
789 de_dbg(c, "boot sector at %"I64_FMT, pos1);
790 de_dbg_indent(c, 1);
792 // BIOS parameter block
793 jmpinstrlen = (d->subfmt==FAT_SUBFMT_ATARIST)?2:3;
794 dbg_hexbytes_oneline(c, c->infile, pos1, jmpinstrlen, "jump instr");
796 if(d->subfmt==FAT_SUBFMT_ATARIST) {
797 do_oem_name(c, d, pos1+2, 6);
798 dbg_hexbytes_oneline(c, c->infile, pos1+8, 3, "serial num");
800 else {
801 do_oem_name(c, d, pos1+3, 8);
804 pos = pos1+11;
805 d->bytes_per_sector = de_getu16le_p(&pos);
806 de_dbg(c, "bytes per sector: %d", (int)d->bytes_per_sector);
807 d->sectors_per_cluster = (i64)de_getbyte_p(&pos);
808 de_dbg(c, "sectors per cluster: %d", (int)d->sectors_per_cluster);
809 d->num_rsvd_sectors = de_getu16le_p(&pos);
811 de_dbg(c, "reserved sectors: %d", (int)d->num_rsvd_sectors);
812 if(d->num_rsvd_sectors==0) {
813 // This happens on some Atari ST disks. Don't know why.
814 d->num_rsvd_sectors = 1;
817 d->num_fats = (i64)de_getbyte_p(&pos);
818 de_dbg(c, "number of FATs: %d", (int)d->num_fats);
820 // This is expected to be 0 for FAT32.
821 d->max_root_dir_entries16 = de_getu16le_p(&pos);
822 de_dbg(c, "max number of root dir entries (if FAT12/16): %d", (int)d->max_root_dir_entries16);
824 num_sectors16 = de_getu16le_p(&pos);
825 de_dbg(c, "number of sectors (old 16-bit field): %d", (int)num_sectors16);
826 b = de_getbyte_p(&pos);
827 de_dbg(c, "media descriptor: 0x%02x", (UI)b);
828 num_sectors_per_fat16 = de_getu16le_p(&pos);
829 de_dbg(c, "sectors per FAT (if FAT12/16): %d", (int)num_sectors_per_fat16);
831 num_sectors_per_track = de_getu16le_p(&pos);
832 de_dbg(c, "sectors per track: %d", (int)num_sectors_per_track);
833 num_heads = de_getu16le_p(&pos);
834 de_dbg(c, "number of heads: %d", (int)num_heads);
836 pos = pos1+0x1fe;
837 de_read(cksum_sig, pos, 2);
838 dbg_hexbytes_oneline_mem(c, cksum_sig, 2, "boot sector signature");
840 do_atarist_boot_checksum(c, d, pos1);
841 if(d->has_atarist_checksum) {
842 d->platform = FAT_PLATFORM_ATARIST;
843 de_dbg(c, "[This is probably a bootable Atari ST disk.]");
845 else if(cksum_sig[0]==0x55 && cksum_sig[1]==0xaa) {
846 d->platform = FAT_PLATFORM_PC;
847 de_dbg(c, "[Disk has PC-compatible boot code.]");
850 if(num_sectors16==0) {
851 num_sectors32 = de_getu32le(pos1+32);
852 de_dbg(c, "num sectors (new 32-bit field): %"I64_FMT, num_sectors32);
855 if(num_sectors_per_fat16==0) {
856 num_sectors_per_fat32 = de_getu32le(pos1+36);
857 de_dbg(c, "sectors per FAT (if FAT32): %u", (UI)num_sectors_per_fat32);
860 if(num_sectors_per_fat16==0) {
861 d->num_sectors_per_fat = num_sectors_per_fat32;
863 else {
864 d->num_sectors_per_fat = num_sectors_per_fat16;
867 if(num_sectors16==0) {
868 d->num_sectors = num_sectors32;
870 else {
871 d->num_sectors = num_sectors16;
874 if(d->sectors_per_cluster<1) goto done;
875 if(d->bytes_per_sector<32) goto done;
876 d->bytes_per_cluster = d->bytes_per_sector * d->sectors_per_cluster;
877 d->root_dir_sector = d->num_rsvd_sectors + d->num_sectors_per_fat * d->num_fats;
878 de_dbg(c, "root dir pos (calculated): %"I64_FMT" (sector %"I64_FMT")",
879 sectornum_to_offset(c, d, d->root_dir_sector), d->root_dir_sector);
881 // num_root_dir_sectors is expected to be 0 for FAT32.
882 num_root_dir_sectors = (d->max_root_dir_entries16*32 + d->bytes_per_sector - 1)/d->bytes_per_sector;
884 num_data_region_sectors = d->num_sectors - (d->root_dir_sector + num_root_dir_sectors);
885 if(num_data_region_sectors<0) goto done;
886 d->num_data_region_clusters = num_data_region_sectors / d->sectors_per_cluster;
887 de_dbg(c, "num clusters (calculated): %"I64_FMT, d->num_data_region_clusters);
889 d->data_region_sector = d->root_dir_sector + num_root_dir_sectors;
890 d->data_region_pos = d->data_region_sector * d->bytes_per_sector;
891 de_dbg(c, "data region pos (calculated): %"I64_FMT" (sector %"I64_FMT")", d->data_region_pos,
892 d->data_region_sector);
894 // (The first cluster is numbered "2".)
895 d->num_cluster_identifiers = d->num_data_region_clusters + 2;
897 if(d->num_data_region_clusters < 4085) {
898 d->num_fat_bits = 12;
900 else if(d->num_data_region_clusters < 65525) {
901 d->num_fat_bits = 16;
903 else {
904 d->num_fat_bits = 32;
907 de_dbg(c, "bits per cluster id (calculated): %u", (UI)d->num_fat_bits);
909 retval = 1;
911 done:
912 if(!retval) {
913 de_err(c, "Invalid or unsupported boot sector");
915 de_dbg_indent(c, -1);
916 return retval;
919 static int do_read_fat(deark *c, lctx *d)
921 i64 pos1;
922 i64 pos;
923 i64 fat_idx_to_read = 0;
924 int retval = 0;
925 i64 i;
927 pos1 = sectornum_to_offset(c, d, d->num_rsvd_sectors + fat_idx_to_read*d->num_sectors_per_fat);
928 de_dbg(c, "FAT#%d at %"I64_FMT, (int)fat_idx_to_read, pos1);
929 de_dbg_indent(c, 1);
931 if(d->num_cluster_identifiers > (i64)(DE_MAX_SANE_OBJECT_SIZE/sizeof(u32))) goto done;
932 d->num_fat_entries = d->num_cluster_identifiers;
933 d->fat_nextcluster = de_mallocarray(c, d->num_fat_entries, sizeof(u32));
934 d->cluster_used_flags = de_malloc(c, d->num_fat_entries);
936 pos = pos1;
937 if(d->num_fat_bits==12) {
938 for(i=0; i<d->num_fat_entries+1; i+=2) {
939 UI val;
941 val = (UI)dbuf_getint_ext(c->infile, pos, 3, 1, 0);
942 pos += 3;
943 if(i < d->num_fat_entries) {
944 d->fat_nextcluster[i] = (u32)(val & 0xfff);
946 if(i+1 < d->num_fat_entries) {
947 d->fat_nextcluster[i+1] = (u32)(val >> 12);
951 else if(d->num_fat_bits==16) {
952 for(i=0; i<d->num_fat_entries; i++) {
953 d->fat_nextcluster[i] = (u32)de_getu16le_p(&pos);
956 else {
957 de_err(c, "This type of FAT is not supported");
958 goto done;
961 if(c->debug_level>=3) {
962 for(i=0; i<d->num_fat_entries; i++) {
963 de_dbg3(c, "fat[%"I64_FMT"]: %"I64_FMT, i, (i64)d->fat_nextcluster[i]);
967 retval = 1;
968 done:
969 de_dbg_indent(c, -1);
970 return retval;
973 static void de_run_fat(deark *c, de_module_params *mparams)
975 lctx *d = NULL;
976 const char *s;
977 int got_root_dir = 0;
978 de_encoding default_encoding = DE_ENCODING_CP437_G;
980 if(mparams) {
981 // out_params.flags:
982 // 0x1 = No valid FAT directory structure found
983 mparams->out_params.flags = 0;
986 d = de_malloc(c, sizeof(lctx));
988 d->prescan_root_dir = (u8)de_get_ext_option_bool(c, "fat:scanroot", 1);
989 d->opt_check_root_dir = (u8)de_get_ext_option_bool(c, "fat:checkroot", 1);
990 s = de_get_ext_option(c, "fat:subfmt");
991 if(s) {
992 if(!de_strcmp(s, "pc")) {
993 d->subfmt_req = FAT_SUBFMT_PC;
995 else if(!de_strcmp(s, "atarist")) {
996 d->subfmt_req = FAT_SUBFMT_ATARIST;
999 d->subfmt = d->subfmt_req;
1000 if(d->subfmt==FAT_SUBFMT_ATARIST) {
1001 default_encoding = DE_ENCODING_ATARIST;
1004 d->input_encoding = de_get_input_encoding(c, mparams, default_encoding);
1006 // TODO: Detect MBR?
1007 if(!do_boot_sector(c, d, 0)) goto done;
1008 if(d->num_fat_bits==0) goto done;
1010 switch(d->platform) {
1011 case FAT_PLATFORM_PC:
1012 de_declare_fmtf(c, "FAT%d - PC", d->num_fat_bits);
1013 break;
1014 case FAT_PLATFORM_ATARIST:
1015 de_declare_fmtf(c, "FAT%d - Atari ST", d->num_fat_bits);
1016 break;
1017 default:
1018 de_declare_fmtf(c, "FAT%d - Unknown platform", d->num_fat_bits);
1019 break;
1022 if(!do_read_fat(c, d)) goto done;
1024 if(d->opt_check_root_dir) {
1025 if(!root_dir_seems_valid(c, d)) {
1026 de_warn(c, "This file does not appear to contain a valid FAT "
1027 "directory structure. (\"-opt fat:checkroot=0\" to try anyway)");
1028 goto done;
1032 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1033 got_root_dir = 1;
1034 do_root_dir(c, d);
1036 done:
1037 if(!got_root_dir) {
1038 // Inform the parent module that we failed to do anything.
1039 if(mparams) {
1040 mparams->out_params.flags |= 0x1;
1044 if(d) {
1045 de_free(c, d->fat_nextcluster);
1046 de_free(c, d->cluster_used_flags);
1047 de_free(c, d->cluster_used_flags_saved);
1048 if(d->curpath) de_strarray_destroy(d->curpath);
1049 if(d->ea_data) dbuf_close(d->ea_data);
1050 de_free(c, d);
1054 static int de_identify_fat(deark *c)
1056 i64 bytes_per_sector;
1057 i64 max_root_dir_entries;
1058 i64 num_rsvd_sectors;
1059 int confidence = 0;
1060 int has_pc_sig;
1061 int has_ext;
1062 u8 sectors_per_cluster;
1063 u8 num_fats;
1064 u8 media_descr;
1065 u8 b[32];
1067 // TODO: This needs a lot of work.
1068 // It's good enough for most FAT12 floppy disk images.
1070 de_read(b, 0, sizeof(b));
1071 bytes_per_sector = de_getu16le_direct(&b[11]);
1072 sectors_per_cluster = b[13];
1073 num_rsvd_sectors = de_getu16le_direct(&b[14]);
1074 num_fats = b[16];
1075 max_root_dir_entries = de_getu16le_direct(&b[17]);
1076 media_descr = b[21];
1078 if(bytes_per_sector!=512) return 0;
1079 switch(sectors_per_cluster) {
1080 case 1: case 2: case 4: case 8:
1081 case 16: case 32: case 64: case 128:
1082 break;
1083 default:
1084 return 0;
1086 if(num_fats!=1 && num_fats!=2) return 0;
1087 if(media_descr<0xe5 && media_descr!=0) return 0; // Media descriptor
1089 confidence = 1;
1090 if(b[0]==0xeb && b[2]==0x90) confidence += 2;
1091 else if(b[0]==0xe9) confidence += 1;
1092 else if(b[0]==0x60) confidence += 1;
1093 has_pc_sig = (de_getu16be(510)==0x55aa);
1094 if(has_pc_sig) confidence += 2;
1095 if(num_fats==2) confidence += 1;
1096 if(media_descr>=0xe5) confidence += 1;
1097 if(num_rsvd_sectors==1) confidence += 1;
1098 if(max_root_dir_entries==112 || max_root_dir_entries==224) confidence += 2;
1100 has_ext = de_input_file_has_ext(c, "ima") ||
1101 de_input_file_has_ext(c, "img") ||
1102 de_input_file_has_ext(c, "st");
1104 if(confidence>=6) return (has_ext?100:80);
1105 else if(confidence>=4) return (has_ext?60:9);
1106 else return 0;
1109 static void de_help_fat(deark *c)
1111 de_msg(c, "-opt fat:checkroot=0 : Read the directory structure, even if it "
1112 "seems invalid");
1113 de_msg(c, "-opt fat:scanroot=0 : Do not scan the root directory to look for "
1114 "special files");
1117 void de_module_fat(deark *c, struct deark_module_info *mi)
1119 mi->id = "fat";
1120 mi->desc = "FAT disk image";
1121 mi->run_fn = de_run_fat;
1122 mi->identify_fn = de_identify_fat;
1123 mi->help_fn = de_help_fat;
1126 ///////////////////////// LoadDskF/SaveDskF format (OS/2-centric)
1128 struct skf_ctx {
1129 int to_raw;
1130 int new_fmt;
1131 int is_compressed;
1132 u32 checksum_reported;
1133 i64 hdr_size;
1134 i64 expected_dcmpr_size; // 0 if unknown
1135 i64 padded_size; // 0 if unknown
1138 static void loaddskf_pad_ima_file(deark *c, struct skf_ctx *d, dbuf *outf)
1140 i64 num_padding_bytes;
1141 u8 padding_value;
1143 if(d->padded_size<=0) goto done;
1144 num_padding_bytes = d->padded_size - outf->len;
1145 if(num_padding_bytes<=0) goto done;
1146 de_dbg(c, "[adding padding]");
1148 // TODO: Does it matter what we pad with? Possibilities include 0x00, 0xe5, 0xf6.
1149 padding_value = 0x00;
1150 dbuf_write_run(outf, padding_value, num_padding_bytes);
1151 done:
1155 static void loaddskf_convert_noncmpr_to_ima(deark *c, struct skf_ctx *d)
1157 dbuf *outf = NULL;
1159 outf = dbuf_create_output_file(c, "ima", NULL, 0);
1160 de_dbg(c, "[copying]");
1161 dbuf_copy(c->infile, d->hdr_size, c->infile->len - d->hdr_size, outf);
1162 loaddskf_pad_ima_file(c, d, outf);
1163 dbuf_close(outf);
1166 static void loaddskf_decode_as_fat(deark *c, struct skf_ctx *d)
1168 i64 dlen = c->infile->len - d->hdr_size;
1170 de_dbg(c, "decoding as FAT, pos=%"I64_FMT", len=%"I64_FMT, d->hdr_size, dlen);
1171 if(dlen<=0) goto done;
1173 de_dbg_indent(c, 1);
1174 de_run_module_by_id_on_slice(c, "fat", NULL, c->infile, d->hdr_size, dlen);
1175 de_dbg_indent(c, -1);
1176 done:
1180 static void loaddskf_decompress(deark *c, struct skf_ctx *d)
1182 struct de_dfilter_in_params dcmpri;
1183 struct de_dfilter_out_params dcmpro;
1184 struct de_dfilter_results dres;
1185 dbuf *outf = NULL;
1187 if(d->to_raw) {
1188 outf = dbuf_create_output_file(c, "ima", NULL, 0);
1190 else {
1191 outf = dbuf_create_output_file(c, "unc.dsk", NULL, 0);
1192 dbuf_write(outf, (const u8*)"\xaa\x59", 2);
1193 dbuf_copy(c->infile, 2, d->hdr_size-2, outf);
1196 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
1197 dcmpri.f = c->infile;
1198 dcmpri.pos = d->hdr_size;
1199 dcmpri.len = c->infile->len - dcmpri.pos;
1200 dcmpro.f = outf;
1202 de_dbg(c, "[decompressing]");
1203 dskdcmps_run(c, &dcmpri, &dcmpro, &dres);
1204 if(dres.errcode) {
1205 de_err(c, "Decompression failed: %s", de_dfilter_get_errmsg(c, &dres));
1206 goto done;
1209 if(d->to_raw) {
1210 loaddskf_pad_ima_file(c, d, outf);
1212 // TODO: If !d->to_raw, maybe we should still ensure it decompressed to
1213 // the expected size.
1215 done:
1216 dbuf_close(outf);
1219 static int loaddskf_read_header(deark *c, struct skf_ctx *d)
1221 i64 bytes_per_sector;
1222 i64 num_sectors_per_track;
1223 i64 num_cylinders;
1224 i64 num_heads;
1225 i64 num_sectors_in_image;
1226 int retval = 0;
1228 de_dbg(c, "header");
1229 de_dbg_indent(c, 1);
1231 bytes_per_sector = de_getu16le(4);
1232 de_dbg(c, "bytes per sector: %u", (UI)bytes_per_sector);
1233 d->checksum_reported = (u32)de_getu32le(20); // TODO: What is this?
1234 de_dbg(c, "checksum (reported): 0x%08x", (UI)d->checksum_reported);
1235 num_cylinders = de_getu16le(24);
1236 de_dbg(c, "cylinders: %u", (UI)num_cylinders);
1237 num_heads = de_getu16le(26);
1238 de_dbg(c, "number of heads: %u", (UI)num_heads);
1239 num_sectors_per_track = de_getu16le(28);
1240 de_dbg(c, "sectors per track: %u", (UI)num_sectors_per_track);
1241 num_sectors_in_image = de_getu16le(34);
1242 de_dbg(c, "num sectors in image: %u", (UI)num_sectors_in_image);
1244 d->hdr_size = de_getu16le(38);
1245 if(d->hdr_size==0) d->hdr_size = 512;
1246 de_dbg(c, "header size: %"I64_FMT, d->hdr_size);
1247 if(d->hdr_size<40 || d->hdr_size>c->infile->len) {
1248 goto done;
1251 retval = 1;
1253 if(num_cylinders<20 || num_cylinders>200 ||
1254 num_heads<1 || num_heads>2 ||
1255 num_sectors_per_track<8 || num_sectors_per_track>200 ||
1256 bytes_per_sector<128 || bytes_per_sector>2048)
1258 de_warn(c, "Unexpected disk geometry. Something may have failed.");
1259 goto done;
1262 d->expected_dcmpr_size = num_sectors_in_image * bytes_per_sector;
1263 d->padded_size = num_cylinders * num_heads * num_sectors_per_track * bytes_per_sector;
1264 de_dbg(c, "expected uncmpr image size: %"I64_FMT", padded=%"I64_FMT,
1265 d->expected_dcmpr_size, d->padded_size);
1267 retval = 1;
1269 done:
1270 if(!retval) {
1271 de_err(c, "Failed to parse LoadDskF file");
1273 de_dbg_indent(c, -1);
1274 return retval;
1277 static void de_run_loaddskf(deark *c, de_module_params *mparams)
1279 struct skf_ctx *d = NULL;
1280 const char *subfmt_name;
1281 UI sig;
1283 d = de_malloc(c, sizeof(struct skf_ctx));
1284 d->to_raw = de_get_ext_option_bool(c, "loaddskf:toraw", 0);
1286 sig = (UI)de_getu16be(0);
1287 switch(sig) {
1288 case 0xaa58:
1289 subfmt_name = "old";
1290 break;
1291 case 0xaa59:
1292 subfmt_name = "new";
1293 d->new_fmt = 1;
1294 break;
1295 case 0xaa5a:
1296 subfmt_name = "new, compressed";
1297 d->new_fmt = 1;
1298 d->is_compressed = 1;
1299 break;
1300 default:
1301 de_err(c, "Not a LoadDskF file");
1302 goto done;
1305 de_declare_fmtf(c, "LoadDskF (%s)", subfmt_name);
1306 if(!loaddskf_read_header(c, d)) goto done;
1307 if(d->is_compressed) {
1308 loaddskf_decompress(c, d);
1310 else {
1311 if(d->to_raw) {
1312 loaddskf_convert_noncmpr_to_ima(c, d);
1314 else {
1315 loaddskf_decode_as_fat(c, d);
1319 done:
1320 de_free(c, d);
1323 static int de_identify_loaddskf(deark *c)
1325 UI sig;
1327 sig = (UI)de_getu16be(0);
1328 if(sig==0xaa58 || sig==0xaa59 || sig==0xaa5a) {
1329 if((UI)de_getu16be(2)==0xf000) {
1330 return 100;
1332 return 9;
1334 return 0;
1337 static void de_help_loaddskf(deark *c)
1339 de_msg(c, "-opt loaddskf:toraw : Convert to raw FAT/IMA format");
1342 void de_module_loaddskf(deark *c, struct deark_module_info *mi)
1344 mi->id = "loaddskf";
1345 mi->desc = "LoadDskF/SaveDskF disk image";
1346 mi->run_fn = de_run_loaddskf;
1347 mi->identify_fn = de_identify_loaddskf;
1348 mi->help_fn = de_help_loaddskf;