1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // HFS (Mac filesystem)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_hfs
);
13 #define CDRTYPE_FILE 2
15 struct ExtDescriptor
{
21 struct dirid_item_struct
{
26 // Represents one record in a leaf node (one file or directory)
32 struct de_stringreaderdata
*name_srd
;
45 i64 bthFNode
; // Used if this is a header node
48 unsigned int *offsets
;
51 typedef struct localctx_struct
{
55 i64 num_files_in_root_dir
;
62 struct ExtDescriptor drXTExtRec
[3];
63 struct ExtDescriptor drCTExtRec
[3];
65 struct de_inthashtable
*nodes_seen
;
66 struct de_inthashtable
*dirid_hash
;
69 static i64
block_dpos(lctx
*d
, i64 blknum
)
71 return blknum
* d
->blocksize
;
74 static i64
allocation_blk_dpos(lctx
*d
, i64 ablknum
)
76 return (d
->blocksize
* d
->drAlBlSt
) + (d
->drAlBlkSiz
* ablknum
);
79 static i64
node_dpos(lctx
*d
, i64 nodenum
)
83 // If the catalog were contiguous, this would be the offset we want, from the
84 // start of the catalog.
87 if(n
< d
->drCTExtRec
[0].num_alloc_blks
* d
->drAlBlkSiz
) {
88 // It's in the first extent.
89 return allocation_blk_dpos(d
, d
->drCTExtRec
[0].first_alloc_blk
) + n
;
91 // Not in first extent. Account for its size, and try the second extent.
92 n
-= d
->drCTExtRec
[0].num_alloc_blks
* d
->drAlBlkSiz
;
93 if(n
< d
->drCTExtRec
[1].num_alloc_blks
* d
->drAlBlkSiz
) {
94 // It's in the second extent.
95 return allocation_blk_dpos(d
, d
->drCTExtRec
[1].first_alloc_blk
) + n
;
97 // Not in second extent. Account for its size, and assume it's in the third extent.
98 n
-= d
->drCTExtRec
[1].num_alloc_blks
* d
->drAlBlkSiz
;
99 return allocation_blk_dpos(d
, d
->drCTExtRec
[2].first_alloc_blk
) + n
;
102 // returned_ts can be NULL.
103 static void read_one_timestamp(deark
*c
, lctx
*d
, i64 pos
, struct de_timestamp
*returned_ts
,
107 struct de_timestamp ts
;
108 char timestamp_buf
[64];
110 de_zeromem(&ts
, sizeof(struct de_timestamp
));
112 ts_raw
= de_getu32be(pos
);
114 de_mac_time_to_timestamp(ts_raw
, &ts
);
122 de_timestamp_to_string(&ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
125 de_strlcpy(timestamp_buf
, "unknown", sizeof(timestamp_buf
));
127 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, ts_raw
, timestamp_buf
);
130 static void read_ExtDataRecs(deark
*c
, lctx
*d
, i64 pos1
,
131 struct ExtDescriptor
*eds
, size_t num_eds
, const char *name
)
136 for(i
=0; i
<num_eds
; i
++) {
137 eds
[i
].first_alloc_blk
= de_getu16be_p(&pos
);
138 eds
[i
].num_alloc_blks
= de_getu16be_p(&pos
);
139 de_dbg(c
, "%s[%u]: first_blk=%u, num_blks=%u", name
, (unsigned int)i
,
140 (unsigned int)eds
[i
].first_alloc_blk
, (unsigned int)eds
[i
].num_alloc_blks
);
144 static int do_master_directory_blocks(deark
*c
, lctx
*d
, i64 blknum
)
148 i64 catalog_num_alloc_blocks
;
149 de_ucstring
*s
= NULL
;
152 pos
= block_dpos(d
, blknum
);
153 de_dbg(c
, "master directory blocks at %"I64_FMT
" (block %"I64_FMT
")", pos
, blknum
);
156 pos
+= 2; // signature
157 read_one_timestamp(c
, d
, pos
, NULL
, "vol. create date");
159 read_one_timestamp(c
, d
, pos
, NULL
, "vol. last mod date");
163 d
->num_files_in_root_dir
= de_getu16be_p(&pos
); // drNmFls
164 de_dbg(c
, "num. files in root dir: %d", (int)d
->num_files_in_root_dir
);
166 pos
+= 2; // first block of volume bitmap
167 pos
+= 2; // start of next allocation search
169 d
->drNmAlBlks
= de_getu16be_p(&pos
);
170 de_dbg(c
, "drNmAlBlks: %d", (int)d
->drNmAlBlks
);
171 d
->drAlBlkSiz
= de_getu32be_p(&pos
);
172 de_dbg(c
, "drAlBlkSiz: %u", (unsigned int)d
->drAlBlkSiz
);
173 d
->drClpSiz
= de_getu32be_p(&pos
);
174 de_dbg(c
, "drClpSiz: %u", (unsigned int)d
->drClpSiz
);
175 d
->drAlBlSt
= de_getu16be_p(&pos
);
176 de_dbg(c
, "drAlBlSt: %d", (int)d
->drAlBlSt
);
177 pos
+= 4; // drNxtCNID
178 pos
+= 2; // drFreeBks
180 nlen
= de_getbyte_p(&pos
);
181 s
= ucstring_create(c
);
182 dbuf_read_to_ucstring_n(c
->infile
, pos
, nlen
, 27, s
, 0, d
->input_encoding
);
183 de_dbg(c
, "volume name: \"%s\"", ucstring_getpsz_d(s
));
186 pos
+= 4; // drVolBkUp
187 pos
+= 2; // drVSeqNum
189 pos
+= 4; // drXTClpSiz
190 pos
+= 4; // drCTClpSiz
191 pos
+= 2; // drNmRtDirs
192 pos
+= 4; // drFilCnt
193 pos
+= 4; // drDirCnt
194 pos
+= 4*8; // drFndrInfo
195 pos
+= 2; // drVCSize
196 pos
+= 2; // drVBMCSize
197 pos
+= 2; // drCtlCSize
199 d
->drXTFlSize
= de_getu32be_p(&pos
);
200 de_dbg(c
, "drXTFlSize: %"I64_FMT
, d
->drXTFlSize
);
201 read_ExtDataRecs(c
, d
, pos
, d
->drCTExtRec
, 3, "drXTFlSize");
204 d
->drCTFlSize
= de_getu32be_p(&pos
);
205 de_dbg(c
, "drCTFlSize: %"I64_FMT
, d
->drCTFlSize
);
206 read_ExtDataRecs(c
, d
, pos
, d
->drCTExtRec
, 3, "drCTExtRec");
209 catalog_num_alloc_blocks
= d
->drCTExtRec
[0].num_alloc_blks
+
210 d
->drCTExtRec
[1].num_alloc_blks
+ d
->drCTExtRec
[2].num_alloc_blks
;
212 if(d
->drCTFlSize
> catalog_num_alloc_blocks
* d
->drAlBlkSiz
) {
213 // TODO: Support this
214 de_err(c
, "Catalog has more than 3 fragments, not supported");
220 de_dbg_indent(c
, -1);
225 static const char *get_node_type_name(int t
)
227 const char *name
= NULL
;
229 case 0: name
="index"; break;
230 case 1: name
="header"; break;
231 case 2: name
="map"; break;
232 case -1: name
="leaf"; break;
234 return name
?name
:"?";
237 static void do_header_node(deark
*c
, lctx
*d
, struct nodedata
*nd
)
243 if(nd
->nrecs
<3) goto done
;
244 // offset[0] = B* tree header record
245 // offset[1] = not important
246 // offset[2] = B* tree map record, not important
247 pos
= nd
->dpos
+ nd
->offsets
[0];
248 if(pos
+512 > c
->infile
->len
) goto done
;
250 de_dbg(c
, "header node B*-tree header record at %"I64_FMT
, pos
);
253 n
= de_getu16be_p(&pos
);
254 de_dbg(c
, "bthDepth: %"I64_FMT
, n
);
255 bthRoot
= de_getu32be_p(&pos
);
256 de_dbg(c
, "bthRoot: %"I64_FMT
, bthRoot
);
257 n
= de_getu32be_p(&pos
);
258 de_dbg(c
, "bthNRecs: %"I64_FMT
, n
);
259 nd
->bthFNode
= de_getu32be_p(&pos
);
260 de_dbg(c
, "bthFNode: %"I64_FMT
, nd
->bthFNode
);
261 n
= de_getu32be_p(&pos
);
262 de_dbg(c
, "bthLNode: %"I64_FMT
, n
);
263 n
= de_getu16be_p(&pos
);
264 de_dbg(c
, "bthNodeSize: %"I64_FMT
, n
);
265 n
= de_getu16be_p(&pos
);
266 de_dbg(c
, "bthKeyLen: %"I64_FMT
, n
);
267 n
= de_getu32be_p(&pos
);
268 de_dbg(c
, "bthNNodes: %"I64_FMT
, n
);
269 n
= de_getu32be_p(&pos
);
270 de_dbg(c
, "bthFree: %"I64_FMT
, n
);
277 static const char *get_cdrType_name(int n
)
279 const char *name
= NULL
;
281 case 1: name
="directory"; break;
282 case 2: name
="file"; break;
283 case 3: name
="directory thread"; break;
284 case 4: name
="file thread"; break;
286 return name
?name
:"?";
289 static void squash_slashes(de_ucstring
*s
, i64 pos1
)
293 for(i
=pos1
; i
<s
->len
; i
++) {
300 static void do_leaf_node_record_directory_pass1(deark
*c
, lctx
*d
, struct nodedata
*nd
,
301 struct recorddata
*rd
)
303 i64 pos
= rd
->datapos
;
305 struct dirid_item_struct
*dirid_item
= NULL
;
307 dirid_item
= de_malloc(c
, sizeof(struct dirid_item_struct
));
309 pos
+= 2; // common fields, already read
310 pos
+= 2; // dirFlags
312 dirID
= (u32
)de_getu32be_p(&pos
);
313 de_dbg(c
, "dirDirID: %u", (unsigned int)dirID
);
315 dirid_item
->name
= ucstring_clone(rd
->name_srd
->str
);
316 squash_slashes(dirid_item
->name
, 0);
317 dirid_item
->ParID
= rd
->ParID
;
319 de_inthashtable_add_item(c
, d
->dirid_hash
, (i64
)dirID
, (void*)dirid_item
);
323 static void get_full_path_from_dirid(deark
*c
, lctx
*d
, u32 dirid
, de_ucstring
*s
,
328 struct dirid_item_struct
*dirid_item
;
330 if(depth
>20) goto done
;
331 if(dirid
==0) goto done
;
332 ret
= de_inthashtable_get_item(c
, d
->dirid_hash
, (i64
)dirid
, &item
);
333 if(!ret
&& dirid
>1) {
334 de_warn(c
, "Unknown parent directory (ID %u)", (unsigned int)dirid
);
337 dirid_item
= (struct dirid_item_struct
*)item
;
339 if(dirid_item
->ParID
!=0 && dirid_item
->ParID
!=dirid
) {
340 get_full_path_from_dirid(c
, d
, dirid_item
->ParID
, s
, depth
+1);
343 if(!ucstring_isnonempty(dirid_item
->name
)) goto done
;
344 ucstring_append_ucstring(s
, dirid_item
->name
);
345 ucstring_append_sz(s
, "/", DE_ENCODING_LATIN1
);
349 static void read_timestamp_fields(deark
*c
, lctx
*d
, i64 pos1
,
354 read_one_timestamp(c
, d
, pos
, &fi1
->timestamp
[DE_TIMESTAMPIDX_CREATE
], "create date");
356 read_one_timestamp(c
, d
, pos
, &fi1
->timestamp
[DE_TIMESTAMPIDX_MODIFY
], "mod date");
358 read_one_timestamp(c
, d
, pos
, &fi1
->timestamp
[DE_TIMESTAMPIDX_BACKUP
], "backup date");
362 static void do_extract_dir(deark
*c
, lctx
*d
, struct nodedata
*nd
,
363 struct recorddata
*rd
, struct de_advfile
*advf
)
365 i64 pos
= rd
->datapos
;
367 pos
+= 2; // common fields, already read
368 pos
+= 2; // dirFlags
370 pos
+= 4; // dirDirID
372 read_timestamp_fields(c
, d
, pos
, advf
->mainfork
.fi
);
375 advf
->mainfork
.fi
->is_directory
= 1;
376 advf
->mainfork
.fork_exists
= 1;
377 advf
->mainfork
.fork_len
= 0;
379 // Note that we don't have to set a callback function for 0-length "files".
380 de_advfile_run(advf
);
386 u8 extract_error_flag
;
390 struct ExtDescriptor ExtRec
[3];
395 struct recorddata
*rd
;
396 struct fork_info
*fki_data
;
397 struct fork_info
*fki_rsrc
;
400 // Figure out whether we think we can extract the fork.
401 static void do_extract_fork_init(deark
*c
, lctx
*d
, struct recorddata
*rd
,
402 struct fork_info
*fki
)
406 len_avail
= d
->drAlBlkSiz
* (fki
->ExtRec
[0].num_alloc_blks
+
407 fki
->ExtRec
[1].num_alloc_blks
+ fki
->ExtRec
[2].num_alloc_blks
);
408 if(fki
->logical_eof
> len_avail
) {
409 // TODO: Need to be able to read the Extents Overflow tree.
410 de_err(c
, "%s: Files with more than 3 fragments are not supported",
411 rd
->name_srd
?ucstring_getpsz(rd
->name_srd
->str
):"");
412 fki
->extract_error_flag
= 1;
420 static void do_extract_fork_run(deark
*c
, lctx
*d
, struct recorddata
*rd
,
421 struct fork_info
*fki
, dbuf
*outf
)
423 i64 nbytes_still_to_write
;
426 nbytes_still_to_write
= fki
->logical_eof
;
430 i64 nbytes_to_write_this_time
;
432 if(nbytes_still_to_write
<=0) break;
434 fragment_dpos
= allocation_blk_dpos(d
, fki
->ExtRec
[k
].first_alloc_blk
);
435 nbytes_to_write_this_time
= d
->drAlBlkSiz
* fki
->ExtRec
[k
].num_alloc_blks
;
436 if(nbytes_to_write_this_time
> nbytes_still_to_write
) {
437 nbytes_to_write_this_time
= nbytes_still_to_write
;
440 if(fragment_dpos
+ nbytes_to_write_this_time
> c
->infile
->len
) {
441 de_err(c
, "Member file data goes beyond end of file");
445 dbuf_copy(c
->infile
, fragment_dpos
, nbytes_to_write_this_time
, outf
);
447 nbytes_still_to_write
-= nbytes_to_write_this_time
;
454 static void read_finder_info(deark
*c
, lctx
*d
, struct de_advfile
*advf
, i64 pos1
)
458 struct de_fourcc filetype
;
459 struct de_fourcc creator
;
461 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
462 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
463 de_memcpy(advf
->typecode
, filetype
.bytes
, 4);
464 advf
->has_typecode
= 1;
466 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
467 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
468 de_memcpy(advf
->creatorcode
, creator
.bytes
, 4);
469 advf
->has_creatorcode
= 1;
472 flags
= (unsigned int)de_getu16be(pos
);
473 de_dbg(c
, "finder flags: 0x%04x", flags
);
474 advf
->finderflags
= (u16
)flags
;
475 advf
->has_finderflags
= 1;
478 static int my_advfile_cbfn(deark
*c
, struct de_advfile
*advf
,
479 struct de_advfile_cbparams
*afp
)
481 struct extract_ctx
*ectx
= (struct extract_ctx
*)advf
->userdata
;
483 if(afp
->whattodo
== DE_ADVFILE_WRITEMAIN
) {
484 do_extract_fork_run(c
, ectx
->d
, ectx
->rd
, ectx
->fki_data
, afp
->outf
);
486 else if(afp
->whattodo
== DE_ADVFILE_WRITERSRC
) {
487 do_extract_fork_run(c
, ectx
->d
, ectx
->rd
, ectx
->fki_rsrc
, afp
->outf
);
493 static void do_extract_file(deark
*c
, lctx
*d
, struct nodedata
*nd
,
494 struct recorddata
*rd
, struct de_advfile
*advf
)
496 i64 pos
= rd
->datapos
;
498 struct extract_ctx
*ectx
= NULL
;
500 ectx
= de_malloc(c
, sizeof(struct extract_ctx
));
503 ectx
->fki_data
= de_malloc(c
, sizeof(struct fork_info
));
504 ectx
->fki_rsrc
= de_malloc(c
, sizeof(struct fork_info
));
505 ectx
->fki_rsrc
->is_rsrc
= 1;
507 pos
+= 2; // common fields, already read
509 n
= (i64
)de_getbyte_p(&pos
);
510 de_dbg(c
, "filFlags: %d", (int)n
);
512 n
= (i64
)de_getbyte_p(&pos
);
513 de_dbg(c
, "filTyp: %d", (int)n
);
515 read_finder_info(c
, d
, advf
, pos
);
516 pos
+= 16; // filUsrWds, Finder info
518 pos
+= 4; // filFlNum, file id
520 ectx
->fki_data
->first_alloc_blk
= de_getu16be_p(&pos
);
521 de_dbg(c
, "data fork first alloc blk: %d", (int)ectx
->fki_data
->first_alloc_blk
);
522 ectx
->fki_data
->logical_eof
= de_getu32be_p(&pos
);
523 de_dbg(c
, "data fork logical eof: %d", (int)ectx
->fki_data
->logical_eof
);
524 ectx
->fki_data
->physical_eof
= de_getu32be_p(&pos
);
525 de_dbg(c
, "data fork physical eof: %d", (int)ectx
->fki_data
->physical_eof
);
527 ectx
->fki_rsrc
->first_alloc_blk
= de_getu16be_p(&pos
);
528 de_dbg(c
, "rsrc fork first alloc blk: %d", (int)ectx
->fki_rsrc
->first_alloc_blk
);
529 ectx
->fki_rsrc
->logical_eof
= de_getu32be_p(&pos
);
530 de_dbg(c
, "rsrc fork logical eof: %d", (int)ectx
->fki_rsrc
->logical_eof
);
531 ectx
->fki_rsrc
->physical_eof
= de_getu32be_p(&pos
);
532 de_dbg(c
, "rsrc fork physical eof: %d", (int)ectx
->fki_rsrc
->physical_eof
);
534 read_timestamp_fields(c
, d
, pos
, advf
->mainfork
.fi
);
537 pos
+= 16; // filFndrInfo sizeof(FXInfo)
539 n
= de_getu16be_p(&pos
);
540 de_dbg(c
, "filClpSize: %d", (int)n
);
542 read_ExtDataRecs(c
, d
, pos
, ectx
->fki_data
->ExtRec
, 3, "filExtRec");
544 read_ExtDataRecs(c
, d
, pos
, ectx
->fki_rsrc
->ExtRec
, 3, "filRExtRec");
547 ectx
->fki_rsrc
->fork_exists
= (ectx
->fki_rsrc
->logical_eof
>0);
548 ectx
->fki_data
->fork_exists
= (ectx
->fki_data
->logical_eof
>0 || !ectx
->fki_rsrc
->fork_exists
);
550 if(ectx
->fki_data
->fork_exists
) {
551 do_extract_fork_init(c
, d
, rd
, ectx
->fki_data
);
552 if(!ectx
->fki_data
->extract_error_flag
) {
553 advf
->mainfork
.fork_len
= ectx
->fki_data
->logical_eof
;
554 advf
->mainfork
.fork_exists
= 1;
557 if(ectx
->fki_rsrc
->fork_exists
) {
558 do_extract_fork_init(c
, d
, rd
, ectx
->fki_rsrc
);
559 if(!ectx
->fki_rsrc
->extract_error_flag
) {
560 advf
->rsrcfork
.fork_len
= ectx
->fki_rsrc
->logical_eof
;
561 advf
->rsrcfork
.fork_exists
= 1;
565 advf
->userdata
= (void*)ectx
;
566 advf
->writefork_cbfn
= my_advfile_cbfn
;
569 de_advfile_set_orig_filename(advf
, rd
->name_srd
->sz
,
570 rd
->name_srd
->sz_strlen
);
573 de_advfile_run(advf
);
575 de_free(c
, ectx
->fki_data
);
576 de_free(c
, ectx
->fki_rsrc
);
580 static void do_leaf_node_record_extract_item(deark
*c
, lctx
*d
, struct nodedata
*nd
,
581 struct recorddata
*rd
)
583 struct de_advfile
*advf
= NULL
;
586 advf
= de_advfile_create(c
);
587 advf
->original_filename_flag
= 1;
589 // TODO: This is not very efficient. Maybe we should at least cache the
590 // previous file's path, since it's usually the same.
591 get_full_path_from_dirid(c
, d
, rd
->ParID
, advf
->filename
, 0);
593 de_dbg(c
, "path: \"%s\"", ucstring_getpsz_d(advf
->filename
));
594 oldlen
= advf
->filename
->len
;
595 if(rd
->name_srd
&& ucstring_isnonempty(rd
->name_srd
->str
)) {
596 ucstring_append_ucstring(advf
->filename
, rd
->name_srd
->str
);
599 ucstring_append_sz(advf
->filename
, "_", DE_ENCODING_LATIN1
);
602 squash_slashes(advf
->filename
, oldlen
);
604 advf
->snflags
= DE_SNFLAG_FULLPATH
;
606 if(rd
->cdrType
==CDRTYPE_DIR
) {
607 do_extract_dir(c
, d
, nd
, rd
, advf
);
609 else if(rd
->cdrType
==CDRTYPE_FILE
) {
610 do_extract_file(c
, d
, nd
, rd
, advf
);
613 de_advfile_destroy(advf
);
616 static void do_leaf_node_record(deark
*c
, lctx
*d
, struct nodedata
*nd
, i64 idx
, int pass
)
622 struct recorddata
*rd
= NULL
;
624 rd
= de_malloc(c
, sizeof(struct recorddata
));
625 pos1_rel
= nd
->offsets
[idx
];
626 rd
->pos1
= nd
->dpos
+ pos1_rel
;
627 len
= nd
->offsets
[idx
+1] - nd
->offsets
[idx
];
628 de_dbg(c
, "leaf node record[%d] at %"I64_FMT
"+%"I64_FMT
", len=%"I64_FMT
,
629 (int)idx
, nd
->dpos
, pos1_rel
, len
);
632 // == Catalog File Key
634 ckrKeyLen
= (i64
)de_getbyte_p(&pos
);
635 de_dbg(c
, "ckrKeyLen: %d", (int)ckrKeyLen
);
637 de_dbg(c
, "[deleted record]");
641 rd
->datapos
= rd
->pos1
+ 1 + ckrKeyLen
;
642 if((ckrKeyLen
%2)==0) rd
->datapos
++; // padding
644 // Look ahead to get the cdrType
645 rd
->cdrType
= (int)dbuf_geti8(c
->infile
, rd
->datapos
);
646 de_dbg(c
, "cdrType: %d (%s)", rd
->cdrType
, get_cdrType_name(rd
->cdrType
));
649 if(rd
->cdrType
!=CDRTYPE_DIR
) goto done
;
652 if(rd
->cdrType
!=CDRTYPE_DIR
&& rd
->cdrType
!=CDRTYPE_FILE
) goto done
;
659 rd
->ParID
= (u32
)de_getu32be_p(&pos
);
660 de_dbg(c
, "ckrParID: %u", (unsigned int)rd
->ParID
);
662 nlen
= (i64
)de_getbyte_p(&pos
);
663 de_dbg(c
, "name len: %d", (int)nlen
);
664 rd
->name_srd
= dbuf_read_string(c
->infile
, pos
, nlen
, nlen
, 0, d
->input_encoding
);
665 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(rd
->name_srd
->str
));
667 // == Catalog File Data Record
669 switch(rd
->cdrType
) {
672 do_leaf_node_record_directory_pass1(c
, d
, nd
, rd
);
675 do_leaf_node_record_extract_item(c
, d
, nd
, rd
);
680 do_leaf_node_record_extract_item(c
, d
, nd
, rd
);
686 de_dbg_indent(c
, -1);
688 de_destroy_stringreaderdata(c
, rd
->name_srd
);
693 static void do_leaf_node(deark
*c
, lctx
*d
, struct nodedata
*nd
, int pass
)
697 for(i
=0; i
<nd
->nrecs
; i
++) {
698 do_leaf_node_record(c
, d
, nd
, i
, pass
);
702 static void destroy_nodedata(deark
*c
, struct nodedata
*nd
)
705 de_free(c
, nd
->offsets
);
709 // Caller must allocate nd, set some fields in it, call this function,
710 // and is responsible for destroying nd.
711 // pass is relevant only for leaf nodes.
712 static int do_node(deark
*c
, lctx
*d
, struct nodedata
*nd
, int pass
)
716 int saved_indent_level
;
719 de_dbg_indent_save(c
, &saved_indent_level
);
721 if(d
->nesting_level
>20) goto done
;
722 if(nd
->nodenum
==0 && !nd
->expecting_header
) goto done
;
725 if(!de_inthashtable_add_item(c
, d
->nodes_seen
, nd
->nodenum
, NULL
)) {
726 de_err(c
, "Invalid node list");
732 nd
->dpos
= node_dpos(d
, nd
->nodenum
);
735 de_dbg(c
, "node #%"I64_FMT
" at %"I64_FMT
, nd
->nodenum
, nd
->dpos
);
738 // == 14-byte NodeDescriptor ==
739 nd
->f_link
= de_getu32be_p(&pos
);
740 de_dbg(c
, "fwd link: %"I64_FMT
, nd
->f_link
);
741 nd
->b_link
= de_getu32be_p(&pos
);
742 de_dbg(c
, "bwd link: %"I64_FMT
, nd
->b_link
);
744 nd
->node_type
= (int)dbuf_geti8(c
->infile
, pos
++);
745 de_dbg(c
, "node type: %d (%s)", nd
->node_type
, get_node_type_name(nd
->node_type
));
746 nd
->node_level
= (int)dbuf_geti8(c
->infile
, pos
++);
747 de_dbg(c
, "node level: %d", nd
->node_level
);
748 nd
->nrecs
= (unsigned int)de_getu16be_p(&pos
);
749 de_dbg(c
, "number of records: %u", nd
->nrecs
);
750 if(nd
->nrecs
>250) goto done
;
753 // == The offset table at the end of the node ==
754 nd
->num_offsets
= (i64
)nd
->nrecs
+1;
755 nd
->offsets
= de_mallocarray(c
, nd
->num_offsets
, sizeof(unsigned int));
757 pos
= nd
->dpos
+512 - 2*nd
->num_offsets
;
758 for(i
=0; i
<nd
->num_offsets
; i
++) {
760 i64 idx
= nd
->num_offsets
- 1 - i
;
761 nd
->offsets
[idx
] = (unsigned int)de_getu16be_p(&pos
);
762 if(i
==0) de_strlcpy(nbuf
, "free space", sizeof(nbuf
));
763 else de_snprintf(nbuf
, sizeof(nbuf
), "rec %u", (unsigned int)idx
);
764 de_dbg(c
, "offset to %s: %u", nbuf
, (unsigned int)nd
->offsets
[idx
]);
767 if(nd
->node_type
== -1) {
768 do_leaf_node(c
, d
, nd
, pass
);
770 else if(nd
->node_type
==1) {
771 do_header_node(c
, d
, nd
);
775 de_dbg_indent_restore(c
, saved_indent_level
);
780 static int do_all_leaf_nodes(deark
*c
, lctx
*d
, struct nodedata
*hdr_node
, int pass
)
783 struct nodedata
*nd
= NULL
;
786 de_dbg(c
, "reading leaf nodes, pass %d", pass
);
789 // Read all leaf nodes, using the leaf-to-leaf links
790 curr_nodenum
= hdr_node
->bthFNode
;
792 while(curr_nodenum
!=0) {
793 nd
= de_malloc(c
, sizeof(struct nodedata
));
794 nd
->nodenum
= curr_nodenum
;
796 if(!do_node(c
, d
, nd
, pass
)) goto done
;
798 curr_nodenum
= nd
->f_link
;
799 destroy_nodedata(c
, nd
);
805 destroy_nodedata(c
, nd
);
806 de_dbg_indent(c
, -1);
810 static int do_catalog(deark
*c
, lctx
*d
)
813 struct nodedata
*hdr_node
= NULL
;
814 int saved_indent_level
;
817 de_dbg_indent_save(c
, &saved_indent_level
);
818 pos
= allocation_blk_dpos(d
, d
->drCTExtRec
[0].first_alloc_blk
);
819 de_dbg(c
, "catalog (first extent at %"I64_FMT
")", pos
);
821 hdr_node
= de_malloc(c
, sizeof(struct nodedata
));
822 hdr_node
->expecting_header
= 1;
823 hdr_node
->nodenum
= 0;
825 if(!do_node(c
, d
, hdr_node
, 0)) goto done
;
826 de_dbg_indent(c
, -1);
828 if(hdr_node
->node_type
!= 1) {
829 de_err(c
, "Expected header node not found");
833 // TODO: In the leaf list, is it possible/legal for a parent-dir-ID number to
834 // appear before the record for that dir-ID? I haven't seen it happen, but
835 // for all I know it is possible. If it doesn't happen, that would be good,
836 // because we wouldn't have to make an extra pass to collect directory info.
837 // But for now, we'll make two passes.
839 // Pass 1 to figure out the directory tree structure, and detect node loops
840 if(!do_all_leaf_nodes(c
, d
, hdr_node
, 1)) goto done
;
841 // Pass 2 to extract files
842 if(!do_all_leaf_nodes(c
, d
, hdr_node
, 2)) goto done
;
846 destroy_nodedata(c
, hdr_node
);
847 de_dbg_indent_restore(c
, saved_indent_level
);
851 static void destroy_dirid_hash(deark
*c
, lctx
*d
)
853 if(!d
->dirid_hash
) return;
857 void *removed_item
= NULL
;
858 struct dirid_item_struct
*dirid_item
;
859 if(!de_inthashtable_remove_any_item(c
, d
->dirid_hash
, &key
, &removed_item
)) {
862 dirid_item
= (struct dirid_item_struct
*)removed_item
;
863 ucstring_destroy(dirid_item
->name
);
864 de_free(c
, dirid_item
);
867 de_inthashtable_destroy(c
, d
->dirid_hash
);
870 static void de_run_hfs(deark
*c
, de_module_params
*mparams
)
874 d
= de_malloc(c
, sizeof(lctx
));
876 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_MACROMAN
);
879 d
->nodes_seen
= de_inthashtable_create(c
);
880 d
->dirid_hash
= de_inthashtable_create(c
);
882 if(!do_master_directory_blocks(c
, d
, 2)) goto done
;
884 if(!do_catalog(c
, d
)) goto done
;
888 de_inthashtable_destroy(c
, d
->nodes_seen
);
889 destroy_dirid_hash(c
, d
);
894 static int de_identify_hfs(deark
*c
)
899 if(dbuf_memcmp(c
->infile
, 1024, "BD", 2)) return 0;
901 // Allocation block size must be a nonzero multiple of 512.
902 drAlBlkSiz
= de_getu32be(1024+20);
903 if(drAlBlkSiz
==0 || (drAlBlkSiz
%512)!=0) return 0;
905 has_ext
= de_input_file_has_ext(c
, "hfs");
906 return has_ext
?90:15;
909 void de_module_hfs(deark
*c
, struct deark_module_info
*mi
)
912 mi
->desc
= "HFS filesystem image";
913 mi
->run_fn
= de_run_hfs
;
914 mi
->identify_fn
= de_identify_hfs
;