Moved ea_data module to its own file
[deark.git] / modules / fat.c
blob0211fa6d164cfbfba2d05ee3d821b14ee01dcda8
1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // FAT disk image
7 #include <deark-private.h>
8 DE_DECLARE_MODULE(de_module_fat);
9 DE_DECLARE_MODULE(de_module_loaddskf);
11 #define MAX_NESTING_LEVEL 16
13 struct member_data {
14 u8 fn_base[8];
15 u8 fn_ext[3];
16 u8 is_subdir;
17 u8 is_special;
18 UI attribs;
19 UI ea_handle;
20 i64 fn_base_len, fn_ext_len;
21 i64 filesize;
22 i64 first_cluster;
23 de_ucstring *short_fn;
24 de_ucstring *long_fn;
25 struct de_timestamp mod_time;
28 struct dirctx {
29 u8 lfn_valid;
30 u8 first_seq_num;
31 u8 prev_seq_num;
32 u8 name_cksum;
33 i64 dir_entry_count;
34 i64 pending_lfn_bytesused;
35 #define LFN_CHARS_PER_FRAGMENT 13
36 #define LFN_MAX_FRAGMENTS 20
37 u8 pending_lfn[LFN_CHARS_PER_FRAGMENT*2*LFN_MAX_FRAGMENTS];
40 typedef struct localctx_struct {
41 de_encoding input_encoding;
42 u8 opt_check_root_dir;
43 u8 prescan_root_dir;
45 // TODO: Decide how to handle different variants of FAT.
46 #define FAT_SUBFMT_UNKNOWN 0
47 #define FAT_SUBFMT_PC 1
48 #define FAT_SUBFMT_ATARIST 2
49 int subfmt_req;
50 int subfmt;
51 #define FAT_PLATFORM_UNKNOWN 0
52 #define FAT_PLATFORM_PC 1
53 #define FAT_PLATFORM_ATARIST 2
54 int platform;
56 u8 num_fat_bits; // 12, 16, or 32. 0 if unknown.
57 u8 has_atarist_checksum;
58 i64 bytes_per_sector;
59 i64 sectors_per_cluster;
60 i64 bytes_per_cluster;
61 i64 num_sectors;
62 i64 data_region_sector;
63 i64 data_region_pos;
64 i64 num_data_region_clusters;
65 i64 num_rsvd_sectors;
66 i64 num_fats;
67 i64 num_sectors_per_fat;
68 i64 max_root_dir_entries16;
69 i64 root_dir_sector;
70 i64 num_cluster_identifiers;
71 struct de_strarray *curpath;
73 i64 num_fat_entries;
74 u32 *fat_nextcluster; // array[num_fat_entries]
75 u8 *cluster_used_flags; // array[num_fat_entries]
76 u8 *cluster_used_flags_saved; // array[num_fat_entries] (or NULL)
77 dbuf *ea_data; // NULL if not available
78 } lctx;
80 static void ucstring_append_hexbytes(de_ucstring *s, const u8 *buf, i64 buflen)
82 i64 i;
84 for(i = 0; i<buflen; i++) {
85 if(i>0) ucstring_append_char(s, ' ');
86 ucstring_append_char(s, (de_rune)de_get_hexchar((int)(buf[i]/16)));
87 ucstring_append_char(s, (de_rune)de_get_hexchar((int)(buf[i]%16)));
91 static void dbg_hexbytes_oneline_mem(deark *c, const u8 *buf, i64 buflen, const char *label)
93 de_ucstring *s = NULL;
95 if(buflen<0) buflen = 0;
96 if(buflen>64) buflen = 64;
97 s = ucstring_create(c);
98 ucstring_append_hexbytes(s, buf, buflen);
99 de_dbg(c, "%s: %s", label, ucstring_getpsz_d(s));
100 ucstring_destroy(s);
103 static void dbg_hexbytes_oneline(deark *c, dbuf *f, i64 pos, i64 len, const char *label)
105 u8 buf[64];
107 if(len<0) len = 0;
108 if(len>(i64)sizeof(buf)) len = (i64)sizeof(buf);
109 dbuf_read(f, buf, pos, len);
110 dbg_hexbytes_oneline_mem(c, buf, len, label);
113 static void fat_save_cluster_use_flags(deark *c, lctx *d)
115 if(!d->cluster_used_flags) return;
116 if(!d->cluster_used_flags_saved) {
117 d->cluster_used_flags_saved = de_malloc(c, d->num_fat_entries);
119 de_memcpy(d->cluster_used_flags_saved, d->cluster_used_flags,
120 (size_t)d->num_fat_entries);
123 static void fat_restore_cluster_use_flags(deark *c, lctx *d)
125 if(!d->cluster_used_flags_saved || !d->cluster_used_flags) return;
126 de_memcpy(d->cluster_used_flags, d->cluster_used_flags_saved,
127 (size_t)d->num_fat_entries);
130 static i64 sectornum_to_offset(deark *c, lctx *d, i64 secnum)
132 return secnum * d->bytes_per_sector;
135 static int is_good_clusternum(lctx *d, i64 cnum)
137 if(cnum<2) return 0;
138 if(cnum >= d->num_cluster_identifiers) return 0;
139 return 1;
142 static i64 clusternum_to_offset(deark *c, lctx *d, i64 cnum)
144 return d->data_region_pos + (cnum-2) * d->bytes_per_cluster;
147 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
149 char timestamp_buf[64];
151 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
152 de_dbg(c, "%s: %s", name, timestamp_buf);
155 static i64 get_unpadded_len(const u8 *s, i64 len1)
157 i64 i;
158 i64 len = len1;
160 // Stop at NUL, I guess.
161 for(i=0; i<len1; i++) {
162 if(s[i]==0x00) {
163 len = i;
164 break;
168 for(i=len; i>0; i--) {
169 if(s[i-1]!=' ') {
170 return i;
173 return 0;
176 static int extract_file_lowlevel(deark *c, lctx *d, struct member_data *md, dbuf *outf)
178 int retval = 0;
179 i64 cur_cluster;
180 i64 nbytes_remaining;
182 cur_cluster = md->first_cluster;
183 if(md->is_subdir) {
184 nbytes_remaining = 0;
186 else {
187 nbytes_remaining = md->filesize;
190 while(1) {
191 i64 dpos;
192 i64 nbytes_to_copy;
194 if(nbytes_remaining <= 0) break;
195 if(!is_good_clusternum(d, cur_cluster)) break;
196 if(d->cluster_used_flags[cur_cluster]) break;
197 d->cluster_used_flags[cur_cluster] = 1;
198 if(c->debug_level>=3) de_dbg3(c, "cluster: %d", (int)cur_cluster);
199 dpos = clusternum_to_offset(c, d, cur_cluster);
200 nbytes_to_copy = de_min_int(d->bytes_per_cluster, nbytes_remaining);
201 dbuf_copy(c->infile, dpos, nbytes_to_copy, outf);
202 nbytes_remaining -= nbytes_to_copy;
203 cur_cluster = (i64)d->fat_nextcluster[cur_cluster];
206 if(nbytes_remaining>0) {
207 goto done;
210 retval = 1;
211 done:
212 return retval;
215 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
217 dbuf *outf = NULL;
218 de_finfo *fi = NULL;
219 de_ucstring *fullfn = NULL;
221 if(!md->is_subdir) {
222 if(md->filesize > d->num_data_region_clusters * d->bytes_per_cluster) {
223 de_err(c, "%s: Bad file size", ucstring_getpsz_d(md->short_fn));
224 goto done;
228 fi = de_finfo_create(c);
229 fullfn = ucstring_create(c);
230 de_strarray_make_path(d->curpath, fullfn, DE_MPFLAG_NOTRAILINGSLASH);
231 de_finfo_set_name_from_ucstring(c, fi, fullfn, DE_SNFLAG_FULLPATH);
232 fi->original_filename_flag = 1;
233 if(md->is_subdir) {
234 fi->is_directory = 1;
236 if(md->mod_time.is_valid) {
237 fi->timestamp[DE_TIMESTAMPIDX_MODIFY] = md->mod_time;
240 outf = dbuf_create_output_file(c, NULL, fi, 0);
242 if(!extract_file_lowlevel(c, d, md, outf)) {
243 de_err(c, "%s: File extraction failed", ucstring_getpsz_d(md->short_fn));
244 goto done;
247 done:
248 dbuf_close(outf);
249 ucstring_destroy(fullfn);
250 de_finfo_destroy(c, fi);
253 static void do_subdir(deark *c, lctx *d, struct member_data *md, int nesting_level);
255 static void do_vfat_entry(deark *c, lctx *d, struct dirctx *dctx, i64 pos1, u8 seq_num_raw)
257 u8 seq_num;
258 u8 fn_cksum;
259 int is_first_entry = 0;
260 i64 startpos_in_lfn;
262 if(seq_num_raw==0xe5) {
263 de_dbg(c, "[deleted VFAT entry]");
264 dctx->lfn_valid = 0;
265 goto done;
268 de_dbg(c, "seq number: 0x%02x", (UI)seq_num_raw);
270 seq_num = seq_num_raw & 0xbf;
272 if(seq_num<1 || seq_num>LFN_MAX_FRAGMENTS) {
273 de_warn(c, "Bad VFAT sequence number (%u)", (UI)seq_num);
274 dctx->lfn_valid = 0;
275 goto done;
278 if(seq_num_raw & 0x40) {
279 is_first_entry = 1;
280 de_zeromem(dctx->pending_lfn, sizeof(dctx->pending_lfn));
281 dctx->first_seq_num = seq_num;
282 dctx->lfn_valid = 1;
284 else {
285 if(!dctx->lfn_valid || (seq_num+1 != dctx->prev_seq_num)) {
286 de_dbg(c, "[stray VFAT entry]");
287 dctx->lfn_valid = 0;
288 goto done;
291 dctx->prev_seq_num = seq_num;
293 startpos_in_lfn = LFN_CHARS_PER_FRAGMENT*2*((i64)seq_num-1);
295 de_read(&dctx->pending_lfn[startpos_in_lfn+ 0], pos1+ 1, 10); // 5 chars
296 fn_cksum = de_getbyte(pos1+13);
297 de_read(&dctx->pending_lfn[startpos_in_lfn+10], pos1+14, 12); // 6 more chars
298 de_read(&dctx->pending_lfn[startpos_in_lfn+22], pos1+28, 4); // 2 more chars
299 de_dbg(c, "filename checksum (reported): 0x%02x", (UI)fn_cksum);
300 if(!is_first_entry) {
301 if(fn_cksum != dctx->name_cksum) {
302 de_dbg(c, "[inconsistent VFAT checksums]");
303 dctx->lfn_valid = 0;
306 dctx->name_cksum = fn_cksum;
308 done:
312 static void vfat_cksum_update(const u8 *buf, size_t buflen, u8 *cksum)
314 size_t i;
316 for(i=0; i<buflen; i++) {
317 *cksum = (((*cksum) & 1) << 7) + ((*cksum) >> 1) + buf[i];
321 // If the long file name seems valid, sets it in md->long_fn for later use.
322 static void handle_vfat_lfn(deark *c, lctx *d, struct dirctx *dctx,
323 struct member_data *md)
325 u8 cksum_calc = 0;
326 i64 max_len_in_ucs2_chars;
327 i64 len_in_ucs2_chars = 0;
328 i64 i;
330 if(!dctx->lfn_valid) goto done;
331 if(dctx->prev_seq_num != 1) goto done;
332 if(md->long_fn) goto done;
334 vfat_cksum_update(md->fn_base, 8, &cksum_calc);
335 vfat_cksum_update(md->fn_ext, 3, &cksum_calc);
336 de_dbg(c, "filename checksum (calculated): 0x%02x", (UI)cksum_calc);
337 if(cksum_calc != dctx->name_cksum) goto done;
339 max_len_in_ucs2_chars = LFN_CHARS_PER_FRAGMENT * (i64)dctx->first_seq_num;
340 if(max_len_in_ucs2_chars > (i64)(sizeof(dctx->pending_lfn)/2)) goto done;
341 for(i=0; i<max_len_in_ucs2_chars; i++) {
342 if(dctx->pending_lfn[i*2]==0x00 && dctx->pending_lfn[i*2+1]==0x00) break;
343 if(dctx->pending_lfn[i*2]==0xff && dctx->pending_lfn[i*2+1]==0xff) break;
344 len_in_ucs2_chars++;
347 md->long_fn = ucstring_create(c);
348 ucstring_append_bytes(md->long_fn, dctx->pending_lfn, len_in_ucs2_chars*2,
349 0, DE_ENCODING_UTF16LE);
350 de_dbg(c, "long filename: \"%s\"", ucstring_getpsz_d(md->long_fn));
352 done:
356 // Reads from md->fn_base* and md->fn_ext*, writes to md->short_fn
357 static void decode_short_filename(deark *c, lctx *d, struct member_data *md)
359 if(md->fn_base_len>0) {
360 ucstring_append_bytes(md->short_fn, md->fn_base, md->fn_base_len, 0, d->input_encoding);
362 else {
363 ucstring_append_char(md->short_fn, '_');
365 if(md->fn_ext_len>0) {
366 ucstring_append_char(md->short_fn, '.');
367 ucstring_append_bytes(md->short_fn, md->fn_ext, md->fn_ext_len, 0, d->input_encoding);
371 static void decode_volume_label_name(deark *c, lctx *d, struct member_data *md)
373 if(md->fn_ext_len>0) {
374 ucstring_append_bytes(md->short_fn, md->fn_base, 8, 0, d->input_encoding);
375 ucstring_append_bytes(md->short_fn, md->fn_ext, md->fn_ext_len, 0, d->input_encoding);
377 else {
378 ucstring_append_bytes(md->short_fn, md->fn_base, md->fn_base_len, 0, d->input_encoding);
382 // md is that of the file whose EA data is being requested.
383 // Uses md->ea_handle.
384 static void do_fat_eadata_item(deark *c, lctx *d, struct member_data *md)
386 de_module_params *mparams = NULL;
388 if(!d->ea_data) goto done;
389 if(md->ea_handle==0) goto done;
390 mparams = de_malloc(c, sizeof(de_module_params));
391 mparams->in_params.input_encoding = d->input_encoding;
392 mparams->in_params.flags = 0x1;
393 mparams->in_params.uint1 = (u32)md->ea_handle;
394 de_dbg(c, "reading OS/2 extended attributes");
395 de_dbg_indent(c, 1);
396 // TODO: Better filenames for icons that may be extracted.
397 de_run_module_by_id_on_slice(c, "ea_data", mparams, d->ea_data, 0, d->ea_data->len);
398 de_dbg_indent(c, -1);
400 done:
401 de_free(c, mparams);
404 // md is that of the "EA DATA" file itself.
405 static void do_fat_eadata(deark *c, lctx *d, struct member_data *md)
407 int ret;
409 if(d->ea_data) goto done;
410 d->ea_data = dbuf_create_membuf(c, 0, 0);
411 dbuf_set_length_limit(d->ea_data, c->infile->len);
412 ret = extract_file_lowlevel(c, d, md, d->ea_data);
413 if(!ret) {
414 dbuf_close(d->ea_data);
415 d->ea_data = NULL;
416 goto done;
418 de_dbg(c, "[read EA data, len=%"I64_FMT"]", d->ea_data->len);
419 done:
423 // Returns 0 if this is the end-of-directory marker.
424 static int do_dir_entry(deark *c, lctx *d, struct dirctx *dctx,
425 i64 pos1, int nesting_level, int scanmode)
427 u8 firstbyte;
428 i64 ddate, dtime;
429 int retval = 0;
430 int is_deleted = 0;
431 int is_volume_label = 0;
432 int need_curpath_pop = 0;
433 de_ucstring *descr = NULL;
434 struct member_data *md = NULL;
436 md = de_malloc(c, sizeof(struct member_data));
438 de_dbg(c, "dir entry at %"I64_FMT, pos1);
439 de_dbg_indent(c, 1);
441 de_read(md->fn_base, pos1+0, 8);
442 de_read(md->fn_ext, pos1+8, 3);
443 firstbyte = md->fn_base[0];
445 if(firstbyte==0x00) {
446 de_dbg(c, "[end of dir marker]");
447 goto done;
449 retval = 1;
451 md->attribs = (UI)de_getbyte(pos1+11);
452 descr = ucstring_create(c);
453 de_describe_dos_attribs(c, md->attribs, descr, 0x1);
454 de_dbg(c, "attribs: 0x%02x (%s)", md->attribs, ucstring_getpsz_d(descr));
455 if((md->attribs & 0x3f)==0x0f) {
456 do_vfat_entry(c, d, dctx, pos1, firstbyte);
457 goto done;
460 if((md->attribs & 0x18) == 0x00) {
461 ; // Normal file
463 else if((md->attribs & 0x18) == 0x08) {
464 is_volume_label = 1;
465 md->is_special = 1;
467 else if((md->attribs & 0x18) == 0x10) {
468 md->is_subdir = 1;
470 else {
471 de_warn(c, "Invalid directory entry");
472 md->is_special = 1;
473 dctx->lfn_valid = 0;
474 goto done;
477 if(dctx->lfn_valid) {
478 handle_vfat_lfn(c, d, dctx, md);
479 dctx->lfn_valid = 0;
482 if(firstbyte==0xe5) {
483 de_dbg(c, "[deleted]");
484 is_deleted = 1;
485 md->fn_base[0] = '?';
487 else if(firstbyte==0x05) {
488 md->fn_base[0] = 0xe5;
491 md->fn_base_len = get_unpadded_len(md->fn_base, 8);
492 md->fn_ext_len = get_unpadded_len(md->fn_ext, 3);
494 if(md->is_subdir && md->fn_base_len>=1 && md->fn_base[0]=='.') {
495 // special "." and ".." dirs
496 md->is_special = 1;
499 md->short_fn = ucstring_create(c);
500 if(is_volume_label) {
501 decode_volume_label_name(c, d, md);
503 else {
504 decode_short_filename(c, d, md);
507 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(md->short_fn));
509 if(ucstring_isnonempty(md->long_fn)) {
510 de_strarray_push(d->curpath, md->long_fn);
512 else {
513 de_strarray_push(d->curpath, md->short_fn);
515 need_curpath_pop = 1;
517 if(!scanmode && d->num_fat_bits<32) {
518 md->ea_handle = (UI)de_getu16le(pos1+20);
519 if(md->ea_handle) {
520 de_dbg(c, "EA handle (if OS/2): %u", md->ea_handle);
524 dtime = de_getu16le(pos1+22);
525 ddate = de_getu16le(pos1+24);
526 de_dos_datetime_to_timestamp(&md->mod_time, ddate, dtime);
527 dbg_timestamp(c, &md->mod_time, "mod time");
529 // TODO: This is wrong for FAT32.
530 md->first_cluster = de_getu16le(pos1+26);
531 de_dbg(c, "first cluster: %"I64_FMT, md->first_cluster);
533 md->filesize = de_getu32le(pos1+28);
534 de_dbg(c, "file size: %"I64_FMT, md->filesize);
536 // (Done reading dir entry)
538 if(is_deleted) goto done;
540 if(scanmode) {
541 if(!md->is_subdir && !md->is_special && (md->attribs&0x04) &&
542 md->fn_base_len==7 && md->fn_ext_len==3 &&
543 !de_memcmp(md->fn_base, "EA DATA", 7) &&
544 !de_memcmp(md->fn_ext, " SF", 3) )
546 do_fat_eadata(c, d, md);
547 goto done;
549 de_dbg2(c, "[scan mode - not extracting]");
550 goto done;
553 if(md->ea_handle!=0 && d->ea_data) {
554 do_fat_eadata_item(c, d, md);
557 if(!md->is_subdir && !md->is_special) {
558 do_extract_file(c, d, md);
560 else if(md->is_subdir && !md->is_special) {
561 do_extract_file(c, d, md);
562 do_subdir(c, d, md, nesting_level+1);
565 done:
566 ucstring_destroy(descr);
567 if(md) {
568 ucstring_destroy(md->short_fn);
569 ucstring_destroy(md->long_fn);
571 if(need_curpath_pop) {
572 de_strarray_pop(d->curpath);
574 de_dbg_indent(c, -1);
575 return retval;
578 // Process a contiguous block of directory entries
579 // Returns 0 if an end-of-dir marker was found.
580 static int do_dir_entries(deark *c, lctx *d, struct dirctx *dctx,
581 i64 pos1, i64 len, int nesting_level, int scanmode)
583 i64 num_entries;
584 i64 i;
585 int retval = 0;
587 num_entries = len/32;
588 de_dbg(c, "num entries: %"I64_FMT, num_entries);
590 for(i=0; i<num_entries; i++) {
591 if(!do_dir_entry(c, d, dctx, pos1+32*i, nesting_level, scanmode)) {
592 goto done;
594 dctx->dir_entry_count++;
597 retval = 1;
598 done:
599 return retval;
602 static void destroy_dirctx(deark *c, struct dirctx *dctx)
604 if(!dctx) return;
605 de_free(c, dctx);
608 static void do_subdir(deark *c, lctx *d, struct member_data *md, int nesting_level)
610 int saved_indent_level;
611 i64 cur_cluster_num;
612 i64 cur_cluster_pos;
613 struct dirctx *dctx = NULL;
615 de_dbg_indent_save(c, &saved_indent_level);
617 if(nesting_level >= MAX_NESTING_LEVEL) {
618 de_err(c, "Directories nested too deeply");
619 goto done;
622 dctx = de_malloc(c, sizeof(struct dirctx));
624 cur_cluster_num = md->first_cluster;
625 if(!is_good_clusternum(d, cur_cluster_num)) {
626 de_err(c, "Bad subdirectory entry");
627 goto done;
629 cur_cluster_pos = clusternum_to_offset(c, d, cur_cluster_num);
630 de_dbg(c, "subdir starting at %"I64_FMT, cur_cluster_pos);
631 de_dbg_indent(c, 1);
633 while(1) {
634 if(!is_good_clusternum(d, cur_cluster_num)) {
635 break;
637 cur_cluster_pos = clusternum_to_offset(c, d, cur_cluster_num);
638 de_dbg(c, "[subdir cluster %"I64_FMT" at %"I64_FMT"]", cur_cluster_num, cur_cluster_pos);
640 if(d->cluster_used_flags[cur_cluster_num]) {
641 goto done;
643 d->cluster_used_flags[cur_cluster_num] = 1;
645 if(!do_dir_entries(c, d, dctx, cur_cluster_pos, d->bytes_per_cluster, nesting_level, 0)) {
646 break;
649 cur_cluster_num = d->fat_nextcluster[cur_cluster_num];
652 done:
653 destroy_dirctx(c, dctx);
654 de_dbg_indent_restore(c, saved_indent_level);
657 static void do_root_dir(deark *c, lctx *d)
659 i64 pos1;
660 struct dirctx *dctx = NULL;
662 dctx = de_malloc(c, sizeof(struct dirctx));
663 pos1 = sectornum_to_offset(c, d, d->root_dir_sector);
664 de_dbg(c, "dir at %"I64_FMT, pos1);
665 de_dbg_indent(c, 1);
666 if(pos1<d->bytes_per_sector) goto done;
667 if(d->prescan_root_dir) {
668 de_dbg(c, "[scanning root dir]");
669 // This feature causes us to intentionally read some clusters more than once,
670 // so we have to work around our protections against doing that.
671 fat_save_cluster_use_flags(c, d);
672 de_dbg_indent(c, 1);
673 (void)do_dir_entries(c, d, dctx, pos1, d->max_root_dir_entries16 * 32, 0, 1);
674 de_dbg_indent(c, -1);
675 fat_restore_cluster_use_flags(c, d);
676 de_dbg(c, "[done scanning root dir]");
678 (void)do_dir_entries(c, d, dctx, pos1, d->max_root_dir_entries16 * 32, 0, 0);
679 done:
680 destroy_dirctx(c, dctx);
681 de_dbg_indent(c, -1);
684 static int root_dir_seems_valid(deark *c, lctx *d)
686 i64 pos1;
687 i64 max_entries_to_check;
688 i64 i;
689 i64 entrycount = 0;
690 i64 errcount = 0;
692 if(d->num_fat_bits==32) return 1;
694 if(d->max_root_dir_entries16<=0) return 0;
695 pos1 = sectornum_to_offset(c, d, d->root_dir_sector);
696 if(pos1 + d->max_root_dir_entries16 * 32 > c->infile->len) {
697 return 0;
700 max_entries_to_check = de_max_int(d->max_root_dir_entries16, 10);
701 for(i=0; i<max_entries_to_check; i++) {
702 i64 entrypos;
703 u8 firstbyte;
704 u8 attribs;
706 entrypos = pos1 + 32*i;
707 firstbyte = de_getbyte(entrypos);
708 if(firstbyte==0x00) break;
709 if(firstbyte==0xe5) continue; // Don't validate deleted entries
710 entrycount++;
711 attribs = de_getbyte(entrypos+11);
712 if(attribs & 0xc0) {
713 errcount++;
715 else if((attribs & 0x3f) == 0x0f) {
716 ; // LFN; OK
718 else if((attribs & 0x18)==0x18) {
719 errcount++; // dir + vol.label not valid
722 // TODO: It's really lame to only validate the attribs field, when there's
723 // so much more we could be doing. But it's a hard problem. We don't want
724 // to be too sensitive to minor errors.
727 if(errcount>1 || (errcount==1 && entrycount<=1)) {
728 return 0;
730 return 1;
733 static void do_atarist_boot_checksum(deark *c, lctx *d, i64 pos1)
735 i64 i;
736 UI ck = 0;
738 for(i=0; i<256; i++) {
739 ck += (UI)de_getu16be(pos1+i*2);
740 ck &= 0xffff;
743 de_dbg(c, "Atari ST checksum: 0x%04x", ck);
744 if(ck==0x1234) {
745 d->has_atarist_checksum = 1;
749 static void do_oem_name(deark *c, lctx *d, i64 pos, i64 len)
751 struct de_stringreaderdata *srd;
752 i64 i;
754 srd = dbuf_read_string(c->infile, pos, len, len, 0, DE_ENCODING_ASCII);
756 // Require printable ASCII.
757 for(i=0; i<len; i++) {
758 if(srd->sz[i]<32 || srd->sz[i]>126) {
759 goto done;
763 de_dbg(c, "OEM name: \"%s\"", ucstring_getpsz_d(srd->str));
765 done:
766 de_destroy_stringreaderdata(c, srd);
769 static int do_boot_sector(deark *c, lctx *d, i64 pos1)
771 i64 pos;
772 i64 num_data_region_sectors;
773 i64 num_root_dir_sectors;
774 i64 num_sectors_per_fat16;
775 i64 num_sectors_per_fat32 = 0;
776 i64 num_sectors16;
777 i64 num_sectors32 = 0;
778 i64 num_sectors_per_track;
779 i64 num_heads;
780 i64 jmpinstrlen;
781 u8 b;
782 u8 cksum_sig[2];
783 int retval = 0;
785 de_dbg(c, "boot sector at %"I64_FMT, pos1);
786 de_dbg_indent(c, 1);
788 // BIOS parameter block
789 jmpinstrlen = (d->subfmt==FAT_SUBFMT_ATARIST)?2:3;
790 dbg_hexbytes_oneline(c, c->infile, pos1, jmpinstrlen, "jump instr");
792 if(d->subfmt==FAT_SUBFMT_ATARIST) {
793 do_oem_name(c, d, pos1+2, 6);
794 dbg_hexbytes_oneline(c, c->infile, pos1+8, 3, "serial num");
796 else {
797 do_oem_name(c, d, pos1+3, 8);
800 pos = pos1+11;
801 d->bytes_per_sector = de_getu16le_p(&pos);
802 de_dbg(c, "bytes per sector: %d", (int)d->bytes_per_sector);
803 d->sectors_per_cluster = (i64)de_getbyte_p(&pos);
804 de_dbg(c, "sectors per cluster: %d", (int)d->sectors_per_cluster);
805 d->num_rsvd_sectors = de_getu16le_p(&pos);
807 de_dbg(c, "reserved sectors: %d", (int)d->num_rsvd_sectors);
808 if(d->num_rsvd_sectors==0) {
809 // This happens on some Atari ST disks. Don't know why.
810 d->num_rsvd_sectors = 1;
813 d->num_fats = (i64)de_getbyte_p(&pos);
814 de_dbg(c, "number of FATs: %d", (int)d->num_fats);
816 // This is expected to be 0 for FAT32.
817 d->max_root_dir_entries16 = de_getu16le_p(&pos);
818 de_dbg(c, "max number of root dir entries (if FAT12/16): %d", (int)d->max_root_dir_entries16);
820 num_sectors16 = de_getu16le_p(&pos);
821 de_dbg(c, "number of sectors (old 16-bit field): %d", (int)num_sectors16);
822 b = de_getbyte_p(&pos);
823 de_dbg(c, "media descriptor: 0x%02x", (UI)b);
824 num_sectors_per_fat16 = de_getu16le_p(&pos);
825 de_dbg(c, "sectors per FAT (if FAT12/16): %d", (int)num_sectors_per_fat16);
827 num_sectors_per_track = de_getu16le_p(&pos);
828 de_dbg(c, "sectors per track: %d", (int)num_sectors_per_track);
829 num_heads = de_getu16le_p(&pos);
830 de_dbg(c, "number of heads: %d", (int)num_heads);
832 pos = pos1+0x1fe;
833 de_read(cksum_sig, pos, 2);
834 dbg_hexbytes_oneline_mem(c, cksum_sig, 2, "boot sector signature");
836 do_atarist_boot_checksum(c, d, pos1);
837 if(d->has_atarist_checksum) {
838 d->platform = FAT_PLATFORM_ATARIST;
839 de_dbg(c, "[This is probably a bootable Atari ST disk.]");
841 else if(cksum_sig[0]==0x55 && cksum_sig[1]==0xaa) {
842 d->platform = FAT_PLATFORM_PC;
843 de_dbg(c, "[Disk has PC-compatible boot code.]");
846 if(num_sectors16==0) {
847 num_sectors32 = de_getu32le(pos1+32);
848 de_dbg(c, "num sectors (new 32-bit field): %"I64_FMT, num_sectors32);
851 if(num_sectors_per_fat16==0) {
852 num_sectors_per_fat32 = de_getu32le(pos1+36);
853 de_dbg(c, "sectors per FAT (if FAT32): %u", (UI)num_sectors_per_fat32);
856 if(num_sectors_per_fat16==0) {
857 d->num_sectors_per_fat = num_sectors_per_fat32;
859 else {
860 d->num_sectors_per_fat = num_sectors_per_fat16;
863 if(num_sectors16==0) {
864 d->num_sectors = num_sectors32;
866 else {
867 d->num_sectors = num_sectors16;
870 if(d->sectors_per_cluster<1) goto done;
871 if(d->bytes_per_sector<32) goto done;
872 d->bytes_per_cluster = d->bytes_per_sector * d->sectors_per_cluster;
873 d->root_dir_sector = d->num_rsvd_sectors + d->num_sectors_per_fat * d->num_fats;
874 de_dbg(c, "root dir pos (calculated): %"I64_FMT" (sector %"I64_FMT")",
875 sectornum_to_offset(c, d, d->root_dir_sector), d->root_dir_sector);
877 // num_root_dir_sectors is expected to be 0 for FAT32.
878 num_root_dir_sectors = (d->max_root_dir_entries16*32 + d->bytes_per_sector - 1)/d->bytes_per_sector;
880 num_data_region_sectors = d->num_sectors - (d->root_dir_sector + num_root_dir_sectors);
881 if(num_data_region_sectors<0) goto done;
882 d->num_data_region_clusters = num_data_region_sectors / d->sectors_per_cluster;
883 de_dbg(c, "num clusters (calculated): %"I64_FMT, d->num_data_region_clusters);
885 d->data_region_sector = d->root_dir_sector + num_root_dir_sectors;
886 d->data_region_pos = d->data_region_sector * d->bytes_per_sector;
887 de_dbg(c, "data region pos (calculated): %"I64_FMT" (sector %"I64_FMT")", d->data_region_pos,
888 d->data_region_sector);
890 // (The first cluster is numbered "2".)
891 d->num_cluster_identifiers = d->num_data_region_clusters + 2;
893 if(d->num_data_region_clusters < 4085) {
894 d->num_fat_bits = 12;
896 else if(d->num_data_region_clusters < 65525) {
897 d->num_fat_bits = 16;
899 else {
900 d->num_fat_bits = 32;
903 de_dbg(c, "bits per cluster id (calculated): %u", (UI)d->num_fat_bits);
905 retval = 1;
907 done:
908 if(!retval) {
909 de_err(c, "Invalid or unsupported boot sector");
911 de_dbg_indent(c, -1);
912 return retval;
915 static int do_read_fat(deark *c, lctx *d)
917 i64 pos1;
918 i64 pos;
919 i64 fat_idx_to_read = 0;
920 int retval = 0;
921 i64 i;
923 pos1 = sectornum_to_offset(c, d, d->num_rsvd_sectors + fat_idx_to_read*d->num_sectors_per_fat);
924 de_dbg(c, "FAT#%d at %"I64_FMT, (int)fat_idx_to_read, pos1);
925 de_dbg_indent(c, 1);
927 if(d->num_cluster_identifiers > (i64)(DE_MAX_SANE_OBJECT_SIZE/sizeof(u32))) goto done;
928 d->num_fat_entries = d->num_cluster_identifiers;
929 d->fat_nextcluster = de_mallocarray(c, d->num_fat_entries, sizeof(u32));
930 d->cluster_used_flags = de_malloc(c, d->num_fat_entries);
932 pos = pos1;
933 if(d->num_fat_bits==12) {
934 for(i=0; i<d->num_fat_entries+1; i+=2) {
935 UI val;
937 val = (UI)dbuf_getint_ext(c->infile, pos, 3, 1, 0);
938 pos += 3;
939 if(i < d->num_fat_entries) {
940 d->fat_nextcluster[i] = (u32)(val & 0xfff);
942 if(i+1 < d->num_fat_entries) {
943 d->fat_nextcluster[i+1] = (u32)(val >> 12);
947 else if(d->num_fat_bits==16) {
948 for(i=0; i<d->num_fat_entries; i++) {
949 d->fat_nextcluster[i] = (u32)de_getu16le_p(&pos);
952 else {
953 de_err(c, "This type of FAT is not supported");
954 goto done;
957 if(c->debug_level>=3) {
958 for(i=0; i<d->num_fat_entries; i++) {
959 de_dbg3(c, "fat[%"I64_FMT"]: %"I64_FMT, i, (i64)d->fat_nextcluster[i]);
963 retval = 1;
964 done:
965 de_dbg_indent(c, -1);
966 return retval;
969 static void de_run_fat(deark *c, de_module_params *mparams)
971 lctx *d = NULL;
972 const char *s;
973 int got_root_dir = 0;
974 de_encoding default_encoding = DE_ENCODING_CP437_G;
976 if(mparams) {
977 // out_params.flags:
978 // 0x1 = No valid FAT directory structure found
979 mparams->out_params.flags = 0;
982 d = de_malloc(c, sizeof(lctx));
984 d->prescan_root_dir = (u8)de_get_ext_option_bool(c, "fat:scanroot", 1);
985 d->opt_check_root_dir = (u8)de_get_ext_option_bool(c, "fat:checkroot", 1);
986 s = de_get_ext_option(c, "fat:subfmt");
987 if(s) {
988 if(!de_strcmp(s, "pc")) {
989 d->subfmt_req = FAT_SUBFMT_PC;
991 else if(!de_strcmp(s, "atarist")) {
992 d->subfmt_req = FAT_SUBFMT_ATARIST;
995 d->subfmt = d->subfmt_req;
996 if(d->subfmt==FAT_SUBFMT_ATARIST) {
997 default_encoding = DE_ENCODING_ATARIST;
1000 d->input_encoding = de_get_input_encoding(c, mparams, default_encoding);
1002 // TODO: Detect MBR?
1003 if(!do_boot_sector(c, d, 0)) goto done;
1004 if(d->num_fat_bits==0) goto done;
1006 switch(d->platform) {
1007 case FAT_PLATFORM_PC:
1008 de_declare_fmtf(c, "FAT%d - PC", d->num_fat_bits);
1009 break;
1010 case FAT_PLATFORM_ATARIST:
1011 de_declare_fmtf(c, "FAT%d - Atari ST", d->num_fat_bits);
1012 break;
1013 default:
1014 de_declare_fmtf(c, "FAT%d - Unknown platform", d->num_fat_bits);
1015 break;
1018 if(!do_read_fat(c, d)) goto done;
1020 if(d->opt_check_root_dir) {
1021 if(!root_dir_seems_valid(c, d)) {
1022 de_warn(c, "This file does not appear to contain a valid FAT "
1023 "directory structure. (\"-opt fat:checkroot=0\" to try anyway)");
1024 goto done;
1028 d->curpath = de_strarray_create(c, MAX_NESTING_LEVEL+10);
1029 got_root_dir = 1;
1030 do_root_dir(c, d);
1032 done:
1033 if(!got_root_dir) {
1034 // Inform the parent module that we failed to do anything.
1035 if(mparams) {
1036 mparams->out_params.flags |= 0x1;
1040 if(d) {
1041 de_free(c, d->fat_nextcluster);
1042 de_free(c, d->cluster_used_flags);
1043 de_free(c, d->cluster_used_flags_saved);
1044 if(d->curpath) de_strarray_destroy(d->curpath);
1045 if(d->ea_data) dbuf_close(d->ea_data);
1046 de_free(c, d);
1050 static int de_identify_fat(deark *c)
1052 i64 bytes_per_sector;
1053 i64 max_root_dir_entries;
1054 i64 num_rsvd_sectors;
1055 int confidence = 0;
1056 int has_pc_sig;
1057 int has_ext;
1058 u8 sectors_per_cluster;
1059 u8 num_fats;
1060 u8 media_descr;
1061 u8 b[32];
1063 // TODO: This needs a lot of work.
1064 // It's good enough for most FAT12 floppy disk images.
1066 de_read(b, 0, sizeof(b));
1067 bytes_per_sector = de_getu16le_direct(&b[11]);
1068 sectors_per_cluster = b[13];
1069 num_rsvd_sectors = de_getu16le_direct(&b[14]);
1070 num_fats = b[16];
1071 max_root_dir_entries = de_getu16le_direct(&b[17]);
1072 media_descr = b[21];
1074 if(bytes_per_sector!=512) return 0;
1075 switch(sectors_per_cluster) {
1076 case 1: case 2: case 4: case 8:
1077 case 16: case 32: case 64: case 128:
1078 break;
1079 default:
1080 return 0;
1082 if(num_fats!=1 && num_fats!=2) return 0;
1083 if(media_descr<0xe5 && media_descr!=0) return 0; // Media descriptor
1085 confidence = 1;
1086 if(b[0]==0xeb && b[2]==0x90) confidence += 2;
1087 else if(b[0]==0xe9) confidence += 1;
1088 else if(b[0]==0x60) confidence += 1;
1089 has_pc_sig = (de_getu16be(510)==0x55aa);
1090 if(has_pc_sig) confidence += 2;
1091 if(num_fats==2) confidence += 1;
1092 if(media_descr>=0xe5) confidence += 1;
1093 if(num_rsvd_sectors==1) confidence += 1;
1094 if(max_root_dir_entries==112 || max_root_dir_entries==224) confidence += 2;
1096 has_ext = de_input_file_has_ext(c, "ima") ||
1097 de_input_file_has_ext(c, "img") ||
1098 de_input_file_has_ext(c, "st");
1100 if(confidence>=6) return (has_ext?100:80);
1101 else if(confidence>=4) return (has_ext?60:9);
1102 else return 0;
1105 static void de_help_fat(deark *c)
1107 de_msg(c, "-opt fat:checkroot=0 : Read the directory structure, even if it "
1108 "seems invalid");
1109 de_msg(c, "-opt fat:scanroot=0 : Do not scan the root directory to look for "
1110 "special files");
1113 void de_module_fat(deark *c, struct deark_module_info *mi)
1115 mi->id = "fat";
1116 mi->desc = "FAT disk image";
1117 mi->run_fn = de_run_fat;
1118 mi->identify_fn = de_identify_fat;
1119 mi->help_fn = de_help_fat;
1122 ///////////////////////// LoadDskF/SaveDskF format (OS/2-centric)
1124 // We barely support this format, but if it's uncompressed, we'll try to skip
1125 // past the header, and pretend it's FAT.
1127 struct skf_ctx {
1128 int new_fmt;
1129 i64 hdr_size;
1132 static void loaddskf_decode_as_fat(deark *c, struct skf_ctx *d)
1134 i64 dlen = c->infile->len - d->hdr_size;
1136 de_dbg(c, "decoding as FAT, pos=%"I64_FMT", len=%"I64_FMT, d->hdr_size, dlen);
1137 if(dlen<=0) goto done;
1139 de_dbg_indent(c, 1);
1140 de_run_module_by_id_on_slice(c, "fat", NULL, c->infile, d->hdr_size, dlen);
1141 de_dbg_indent(c, -1);
1142 done:
1146 static int loaddskf_read_header(deark *c, struct skf_ctx *d)
1148 int retval = 0;
1150 d->hdr_size = de_getu16le(38);
1151 de_dbg(c, "header size: %"I64_FMT, d->hdr_size);
1152 if((UI)de_getu16be(d->hdr_size + 510) != 0x55aa) {
1153 goto done;
1155 retval = 1;
1157 done:
1158 if(!retval) {
1159 de_err(c, "Failed to parse LoadDskF file");
1161 return retval;
1164 static void de_run_loaddskf(deark *c, de_module_params *mparams)
1166 struct skf_ctx *d = NULL;
1167 UI sig;
1169 d = de_malloc(c, sizeof(struct skf_ctx));
1170 sig = (UI)de_getu16be(0);
1171 switch(sig) {
1172 case 0xaa58:
1173 break;
1174 case 0xaa59:
1175 d->new_fmt = 1;
1176 break;
1177 case 0xaa5a:
1178 de_err(c, "Compressed LoadDskF files are not supported");
1179 goto done;
1180 default:
1181 de_err(c, "Not a LoadDskF file");
1182 goto done;
1185 if(!loaddskf_read_header(c, d)) goto done;
1186 loaddskf_decode_as_fat(c, d);
1188 done:
1189 de_free(c, d);
1192 static int de_identify_loaddskf(deark *c)
1194 UI sig;
1196 sig = (UI)de_getu16be(0);
1197 if(sig==0xaa58 || sig==0xaa59 || sig==0xaa5a) {
1198 if((UI)de_getu16be(2)==0xf000) {
1199 return 100;
1201 return 9;
1203 return 0;
1206 void de_module_loaddskf(deark *c, struct deark_module_info *mi)
1208 mi->id = "loaddskf";
1209 mi->desc = "LoadDskF/SaveDskF disk image";
1210 mi->run_fn = de_run_loaddskf;
1211 mi->identify_fn = de_identify_loaddskf;