zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / hfs.c
blobc9aaf04d07ae29167e323cf8498b9ca7ac2cd63e
1 // This file is part of Deark.
2 // Copyright (C) 2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // HFS (Mac filesystem)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_hfs);
12 #define CDRTYPE_DIR 1
13 #define CDRTYPE_FILE 2
15 struct ExtDescriptor {
16 i64 first_alloc_blk;
17 i64 num_alloc_blks;
20 // Used by dirid_hash
21 struct dirid_item_struct {
22 u32 ParID;
23 de_ucstring *name;
26 // Represents one record in a leaf node (one file or directory)
27 struct recorddata {
28 i64 pos1;
29 i64 datapos;
30 int cdrType;
31 u32 ParID;
32 struct de_stringreaderdata *name_srd;
35 struct nodedata {
36 int expecting_header;
37 i64 nodenum;
39 i64 dpos;
40 i64 f_link, b_link;
41 unsigned int nrecs;
42 int node_type;
43 int node_level;
45 i64 bthFNode; // Used if this is a header node
47 i64 num_offsets;
48 unsigned int *offsets;
51 typedef struct localctx_struct {
52 int input_encoding;
53 int nesting_level;
54 i64 blocksize;
55 i64 num_files_in_root_dir;
56 i64 drNmAlBlks;
57 i64 drAlBlkSiz;
58 i64 drClpSiz;
59 i64 drAlBlSt;
60 i64 drXTFlSize;
61 i64 drCTFlSize;
62 struct ExtDescriptor drXTExtRec[3];
63 struct ExtDescriptor drCTExtRec[3];
65 struct de_inthashtable *nodes_seen;
66 struct de_inthashtable *dirid_hash;
67 } lctx;
69 static i64 block_dpos(lctx *d, i64 blknum)
71 return blknum * d->blocksize;
74 static i64 allocation_blk_dpos(lctx *d, i64 ablknum)
76 return (d->blocksize * d->drAlBlSt) + (d->drAlBlkSiz * ablknum);
79 static i64 node_dpos(lctx *d, i64 nodenum)
81 i64 n;
83 // If the catalog were contiguous, this would be the offset we want, from the
84 // start of the catalog.
85 n = 512 * nodenum;
87 if(n < d->drCTExtRec[0].num_alloc_blks * d->drAlBlkSiz) {
88 // It's in the first extent.
89 return allocation_blk_dpos(d, d->drCTExtRec[0].first_alloc_blk) + n;
91 // Not in first extent. Account for its size, and try the second extent.
92 n -= d->drCTExtRec[0].num_alloc_blks * d->drAlBlkSiz;
93 if(n < d->drCTExtRec[1].num_alloc_blks * d->drAlBlkSiz) {
94 // It's in the second extent.
95 return allocation_blk_dpos(d, d->drCTExtRec[1].first_alloc_blk) + n;
97 // Not in second extent. Account for its size, and assume it's in the third extent.
98 n -= d->drCTExtRec[1].num_alloc_blks * d->drAlBlkSiz;
99 return allocation_blk_dpos(d, d->drCTExtRec[2].first_alloc_blk) + n;
102 // returned_ts can be NULL.
103 static void read_one_timestamp(deark *c, lctx *d, i64 pos, struct de_timestamp *returned_ts,
104 const char *name)
106 i64 ts_raw;
107 struct de_timestamp ts;
108 char timestamp_buf[64];
110 de_zeromem(&ts, sizeof(struct de_timestamp));
111 ts.is_valid = 0;
112 ts_raw = de_getu32be(pos);
113 if(ts_raw!=0) {
114 de_mac_time_to_timestamp(ts_raw, &ts);
117 if(returned_ts) {
118 *returned_ts = ts;
121 if(ts.is_valid) {
122 de_timestamp_to_string(&ts, timestamp_buf, sizeof(timestamp_buf), 0);
124 else {
125 de_strlcpy(timestamp_buf, "unknown", sizeof(timestamp_buf));
127 de_dbg(c, "%s: %"I64_FMT" (%s)", name, ts_raw, timestamp_buf);
130 static void read_ExtDataRecs(deark *c, lctx *d, i64 pos1,
131 struct ExtDescriptor *eds, size_t num_eds, const char *name)
133 size_t i;
134 i64 pos = pos1;
136 for(i=0; i<num_eds; i++) {
137 eds[i].first_alloc_blk = de_getu16be_p(&pos);
138 eds[i].num_alloc_blks = de_getu16be_p(&pos);
139 de_dbg(c, "%s[%u]: first_blk=%u, num_blks=%u", name, (unsigned int)i,
140 (unsigned int)eds[i].first_alloc_blk, (unsigned int)eds[i].num_alloc_blks);
144 static int do_master_directory_blocks(deark *c, lctx *d, i64 blknum)
146 i64 pos;
147 i64 nlen;
148 i64 catalog_num_alloc_blocks;
149 de_ucstring *s = NULL;
150 int retval = 0;
152 pos = block_dpos(d, blknum);
153 de_dbg(c, "master directory blocks at %"I64_FMT" (block %"I64_FMT")", pos, blknum);
154 de_dbg_indent(c, 1);
156 pos += 2; // signature
157 read_one_timestamp(c, d, pos, NULL, "vol. create date");
158 pos += 4;
159 read_one_timestamp(c, d, pos, NULL, "vol. last mod date");
160 pos += 4;
161 pos += 2; // attribs
163 d->num_files_in_root_dir = de_getu16be_p(&pos); // drNmFls
164 de_dbg(c, "num. files in root dir: %d", (int)d->num_files_in_root_dir);
166 pos += 2; // first block of volume bitmap
167 pos += 2; // start of next allocation search
169 d->drNmAlBlks = de_getu16be_p(&pos);
170 de_dbg(c, "drNmAlBlks: %d", (int)d->drNmAlBlks);
171 d->drAlBlkSiz = de_getu32be_p(&pos);
172 de_dbg(c, "drAlBlkSiz: %u", (unsigned int)d->drAlBlkSiz);
173 d->drClpSiz = de_getu32be_p(&pos);
174 de_dbg(c, "drClpSiz: %u", (unsigned int)d->drClpSiz);
175 d->drAlBlSt = de_getu16be_p(&pos);
176 de_dbg(c, "drAlBlSt: %d", (int)d->drAlBlSt);
177 pos += 4; // drNxtCNID
178 pos += 2; // drFreeBks
180 nlen = de_getbyte_p(&pos);
181 s = ucstring_create(c);
182 dbuf_read_to_ucstring_n(c->infile, pos, nlen, 27, s, 0, d->input_encoding);
183 de_dbg(c, "volume name: \"%s\"", ucstring_getpsz_d(s));
184 pos += 27;
186 pos += 4; // drVolBkUp
187 pos += 2; // drVSeqNum
188 pos += 4; // drWrCnt
189 pos += 4; // drXTClpSiz
190 pos += 4; // drCTClpSiz
191 pos += 2; // drNmRtDirs
192 pos += 4; // drFilCnt
193 pos += 4; // drDirCnt
194 pos += 4*8; // drFndrInfo
195 pos += 2; // drVCSize
196 pos += 2; // drVBMCSize
197 pos += 2; // drCtlCSize
199 d->drXTFlSize = de_getu32be_p(&pos);
200 de_dbg(c, "drXTFlSize: %"I64_FMT, d->drXTFlSize);
201 read_ExtDataRecs(c, d, pos, d->drCTExtRec, 3, "drXTFlSize");
202 pos += 12;
204 d->drCTFlSize = de_getu32be_p(&pos);
205 de_dbg(c, "drCTFlSize: %"I64_FMT, d->drCTFlSize);
206 read_ExtDataRecs(c, d, pos, d->drCTExtRec, 3, "drCTExtRec");
207 pos += 12;
209 catalog_num_alloc_blocks = d->drCTExtRec[0].num_alloc_blks +
210 d->drCTExtRec[1].num_alloc_blks + d->drCTExtRec[2].num_alloc_blks;
212 if(d->drCTFlSize > catalog_num_alloc_blocks * d->drAlBlkSiz) {
213 // TODO: Support this
214 de_err(c, "Catalog has more than 3 fragments, not supported");
215 goto done;
218 retval = 1;
219 done:
220 de_dbg_indent(c, -1);
221 ucstring_destroy(s);
222 return retval;
225 static const char *get_node_type_name(int t)
227 const char *name = NULL;
228 switch(t) {
229 case 0: name="index"; break;
230 case 1: name="header"; break;
231 case 2: name="map"; break;
232 case -1: name="leaf"; break;
234 return name?name:"?";
237 static void do_header_node(deark *c, lctx *d, struct nodedata *nd)
239 i64 pos;
240 i64 bthRoot;
241 i64 n;
243 if(nd->nrecs<3) goto done;
244 // offset[0] = B* tree header record
245 // offset[1] = not important
246 // offset[2] = B* tree map record, not important
247 pos = nd->dpos + nd->offsets[0];
248 if(pos+512 > c->infile->len) goto done;
250 de_dbg(c, "header node B*-tree header record at %"I64_FMT, pos);
251 de_dbg_indent(c, 1);
253 n = de_getu16be_p(&pos);
254 de_dbg(c, "bthDepth: %"I64_FMT, n);
255 bthRoot = de_getu32be_p(&pos);
256 de_dbg(c, "bthRoot: %"I64_FMT, bthRoot);
257 n = de_getu32be_p(&pos);
258 de_dbg(c, "bthNRecs: %"I64_FMT, n);
259 nd->bthFNode = de_getu32be_p(&pos);
260 de_dbg(c, "bthFNode: %"I64_FMT, nd->bthFNode);
261 n = de_getu32be_p(&pos);
262 de_dbg(c, "bthLNode: %"I64_FMT, n);
263 n = de_getu16be_p(&pos);
264 de_dbg(c, "bthNodeSize: %"I64_FMT, n);
265 n = de_getu16be_p(&pos);
266 de_dbg(c, "bthKeyLen: %"I64_FMT, n);
267 n = de_getu32be_p(&pos);
268 de_dbg(c, "bthNNodes: %"I64_FMT, n);
269 n = de_getu32be_p(&pos);
270 de_dbg(c, "bthFree: %"I64_FMT, n);
272 de_dbg_indent(c, 1);
273 done:
277 static const char *get_cdrType_name(int n)
279 const char *name = NULL;
280 switch(n) {
281 case 1: name="directory"; break;
282 case 2: name="file"; break;
283 case 3: name="directory thread"; break;
284 case 4: name="file thread"; break;
286 return name?name:"?";
289 static void squash_slashes(de_ucstring *s, i64 pos1)
291 i64 i;
293 for(i=pos1; i<s->len; i++) {
294 if(s->str[i]=='/') {
295 s->str[i] = '_';
300 static void do_leaf_node_record_directory_pass1(deark *c, lctx *d, struct nodedata *nd,
301 struct recorddata *rd)
303 i64 pos = rd->datapos;
304 u32 dirID;
305 struct dirid_item_struct *dirid_item = NULL;
307 dirid_item = de_malloc(c, sizeof(struct dirid_item_struct));
309 pos += 2; // common fields, already read
310 pos += 2; // dirFlags
311 pos += 2; // valence
312 dirID = (u32)de_getu32be_p(&pos);
313 de_dbg(c, "dirDirID: %u", (unsigned int)dirID);
315 dirid_item->name = ucstring_clone(rd->name_srd->str);
316 squash_slashes(dirid_item->name, 0);
317 dirid_item->ParID = rd->ParID;
319 de_inthashtable_add_item(c, d->dirid_hash, (i64)dirID, (void*)dirid_item);
320 dirid_item = NULL;
323 static void get_full_path_from_dirid(deark *c, lctx *d, u32 dirid, de_ucstring *s,
324 int depth)
326 void *item;
327 int ret;
328 struct dirid_item_struct *dirid_item;
330 if(depth>20) goto done;
331 if(dirid==0) goto done;
332 ret = de_inthashtable_get_item(c, d->dirid_hash, (i64)dirid, &item);
333 if(!ret && dirid>1) {
334 de_warn(c, "Unknown parent directory (ID %u)", (unsigned int)dirid);
336 if(!ret) goto done;
337 dirid_item = (struct dirid_item_struct*)item;
339 if(dirid_item->ParID!=0 && dirid_item->ParID!=dirid) {
340 get_full_path_from_dirid(c, d, dirid_item->ParID, s, depth+1);
343 if(!ucstring_isnonempty(dirid_item->name)) goto done;
344 ucstring_append_ucstring(s, dirid_item->name);
345 ucstring_append_sz(s, "/", DE_ENCODING_LATIN1);
346 done:
349 static void read_timestamp_fields(deark *c, lctx *d, i64 pos1,
350 de_finfo *fi1)
352 i64 pos = pos1;
354 read_one_timestamp(c, d, pos, &fi1->timestamp[DE_TIMESTAMPIDX_CREATE], "create date");
355 pos += 4;
356 read_one_timestamp(c, d, pos, &fi1->timestamp[DE_TIMESTAMPIDX_MODIFY], "mod date");
357 pos += 4;
358 read_one_timestamp(c, d, pos, &fi1->timestamp[DE_TIMESTAMPIDX_BACKUP], "backup date");
359 //pos += 4;
362 static void do_extract_dir(deark *c, lctx *d, struct nodedata *nd,
363 struct recorddata *rd, struct de_advfile *advf)
365 i64 pos = rd->datapos;
367 pos += 2; // common fields, already read
368 pos += 2; // dirFlags
369 pos += 2; // dirVal
370 pos += 4; // dirDirID
372 read_timestamp_fields(c, d, pos, advf->mainfork.fi);
373 //pos += 12;
375 advf->mainfork.fi->is_directory = 1;
376 advf->mainfork.fork_exists = 1;
377 advf->mainfork.fork_len = 0;
379 // Note that we don't have to set a callback function for 0-length "files".
380 de_advfile_run(advf);
383 struct fork_info {
384 u8 is_rsrc;
385 u8 fork_exists;
386 u8 extract_error_flag;
387 i64 first_alloc_blk;
388 i64 logical_eof;
389 i64 physical_eof;
390 struct ExtDescriptor ExtRec[3];
393 struct extract_ctx {
394 lctx *d;
395 struct recorddata *rd;
396 struct fork_info *fki_data;
397 struct fork_info *fki_rsrc;
400 // Figure out whether we think we can extract the fork.
401 static void do_extract_fork_init(deark *c, lctx *d, struct recorddata *rd,
402 struct fork_info *fki)
404 i64 len_avail;
406 len_avail = d->drAlBlkSiz * (fki->ExtRec[0].num_alloc_blks +
407 fki->ExtRec[1].num_alloc_blks + fki->ExtRec[2].num_alloc_blks);
408 if(fki->logical_eof > len_avail) {
409 // TODO: Need to be able to read the Extents Overflow tree.
410 de_err(c, "%s: Files with more than 3 fragments are not supported",
411 rd->name_srd?ucstring_getpsz(rd->name_srd->str):"");
412 fki->extract_error_flag = 1;
413 goto done;
416 done:
420 static void do_extract_fork_run(deark *c, lctx *d, struct recorddata *rd,
421 struct fork_info *fki, dbuf *outf)
423 i64 nbytes_still_to_write;
424 size_t k;
426 nbytes_still_to_write = fki->logical_eof;
428 for(k=0; k<3; k++) {
429 i64 fragment_dpos;
430 i64 nbytes_to_write_this_time;
432 if(nbytes_still_to_write<=0) break;
434 fragment_dpos = allocation_blk_dpos(d, fki->ExtRec[k].first_alloc_blk);
435 nbytes_to_write_this_time = d->drAlBlkSiz * fki->ExtRec[k].num_alloc_blks;
436 if(nbytes_to_write_this_time > nbytes_still_to_write) {
437 nbytes_to_write_this_time = nbytes_still_to_write;
440 if(fragment_dpos + nbytes_to_write_this_time > c->infile->len) {
441 de_err(c, "Member file data goes beyond end of file");
442 goto done;
445 dbuf_copy(c->infile, fragment_dpos, nbytes_to_write_this_time, outf);
447 nbytes_still_to_write -= nbytes_to_write_this_time;
450 done:
454 static void read_finder_info(deark *c, lctx *d, struct de_advfile *advf, i64 pos1)
456 i64 pos = pos1;
457 unsigned int flags;
458 struct de_fourcc filetype;
459 struct de_fourcc creator;
461 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
462 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
463 de_memcpy(advf->typecode, filetype.bytes, 4);
464 advf->has_typecode = 1;
465 pos += 4;
466 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
467 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
468 de_memcpy(advf->creatorcode, creator.bytes, 4);
469 advf->has_creatorcode = 1;
470 pos += 4;
472 flags = (unsigned int)de_getu16be(pos);
473 de_dbg(c, "finder flags: 0x%04x", flags);
474 advf->finderflags = (u16)flags;
475 advf->has_finderflags = 1;
478 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
479 struct de_advfile_cbparams *afp)
481 struct extract_ctx *ectx = (struct extract_ctx*)advf->userdata;
483 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
484 do_extract_fork_run(c, ectx->d, ectx->rd, ectx->fki_data, afp->outf);
486 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
487 do_extract_fork_run(c, ectx->d, ectx->rd, ectx->fki_rsrc, afp->outf);
490 return 1;
493 static void do_extract_file(deark *c, lctx *d, struct nodedata *nd,
494 struct recorddata *rd, struct de_advfile *advf)
496 i64 pos = rd->datapos;
497 i64 n;
498 struct extract_ctx *ectx = NULL;
500 ectx = de_malloc(c, sizeof(struct extract_ctx));
501 ectx->d = d;
502 ectx->rd = rd;
503 ectx->fki_data = de_malloc(c, sizeof(struct fork_info));
504 ectx->fki_rsrc = de_malloc(c, sizeof(struct fork_info));
505 ectx->fki_rsrc->is_rsrc = 1;
507 pos += 2; // common fields, already read
509 n = (i64)de_getbyte_p(&pos);
510 de_dbg(c, "filFlags: %d", (int)n);
512 n = (i64)de_getbyte_p(&pos);
513 de_dbg(c, "filTyp: %d", (int)n);
515 read_finder_info(c, d, advf, pos);
516 pos += 16; // filUsrWds, Finder info
518 pos += 4; // filFlNum, file id
520 ectx->fki_data->first_alloc_blk = de_getu16be_p(&pos);
521 de_dbg(c, "data fork first alloc blk: %d", (int)ectx->fki_data->first_alloc_blk);
522 ectx->fki_data->logical_eof = de_getu32be_p(&pos);
523 de_dbg(c, "data fork logical eof: %d", (int)ectx->fki_data->logical_eof);
524 ectx->fki_data->physical_eof = de_getu32be_p(&pos);
525 de_dbg(c, "data fork physical eof: %d", (int)ectx->fki_data->physical_eof);
527 ectx->fki_rsrc->first_alloc_blk = de_getu16be_p(&pos);
528 de_dbg(c, "rsrc fork first alloc blk: %d", (int)ectx->fki_rsrc->first_alloc_blk);
529 ectx->fki_rsrc->logical_eof = de_getu32be_p(&pos);
530 de_dbg(c, "rsrc fork logical eof: %d", (int)ectx->fki_rsrc->logical_eof);
531 ectx->fki_rsrc->physical_eof = de_getu32be_p(&pos);
532 de_dbg(c, "rsrc fork physical eof: %d", (int)ectx->fki_rsrc->physical_eof);
534 read_timestamp_fields(c, d, pos, advf->mainfork.fi);
535 pos += 12;
537 pos += 16; // filFndrInfo sizeof(FXInfo)
539 n = de_getu16be_p(&pos);
540 de_dbg(c, "filClpSize: %d", (int)n);
542 read_ExtDataRecs(c, d, pos, ectx->fki_data->ExtRec, 3, "filExtRec");
543 pos += 12;
544 read_ExtDataRecs(c, d, pos, ectx->fki_rsrc->ExtRec, 3, "filRExtRec");
545 pos += 12;
547 ectx->fki_rsrc->fork_exists = (ectx->fki_rsrc->logical_eof>0);
548 ectx->fki_data->fork_exists = (ectx->fki_data->logical_eof>0 || !ectx->fki_rsrc->fork_exists);
550 if(ectx->fki_data->fork_exists) {
551 do_extract_fork_init(c, d, rd, ectx->fki_data);
552 if(!ectx->fki_data->extract_error_flag) {
553 advf->mainfork.fork_len = ectx->fki_data->logical_eof;
554 advf->mainfork.fork_exists = 1;
557 if(ectx->fki_rsrc->fork_exists) {
558 do_extract_fork_init(c, d, rd, ectx->fki_rsrc);
559 if(!ectx->fki_rsrc->extract_error_flag) {
560 advf->rsrcfork.fork_len = ectx->fki_rsrc->logical_eof;
561 advf->rsrcfork.fork_exists = 1;
565 advf->userdata = (void*)ectx;
566 advf->writefork_cbfn = my_advfile_cbfn;
568 if(rd->name_srd) {
569 de_advfile_set_orig_filename(advf, rd->name_srd->sz,
570 rd->name_srd->sz_strlen);
573 de_advfile_run(advf);
575 de_free(c, ectx->fki_data);
576 de_free(c, ectx->fki_rsrc);
577 de_free(c, ectx);
580 static void do_leaf_node_record_extract_item(deark *c, lctx *d, struct nodedata *nd,
581 struct recorddata *rd)
583 struct de_advfile *advf = NULL;
584 i64 oldlen;
586 advf = de_advfile_create(c);
587 advf->original_filename_flag = 1;
589 // TODO: This is not very efficient. Maybe we should at least cache the
590 // previous file's path, since it's usually the same.
591 get_full_path_from_dirid(c, d, rd->ParID, advf->filename, 0);
593 de_dbg(c, "path: \"%s\"", ucstring_getpsz_d(advf->filename));
594 oldlen = advf->filename->len;
595 if(rd->name_srd && ucstring_isnonempty(rd->name_srd->str)) {
596 ucstring_append_ucstring(advf->filename, rd->name_srd->str);
598 else {
599 ucstring_append_sz(advf->filename, "_", DE_ENCODING_LATIN1);
602 squash_slashes(advf->filename, oldlen);
604 advf->snflags = DE_SNFLAG_FULLPATH;
606 if(rd->cdrType==CDRTYPE_DIR) {
607 do_extract_dir(c, d, nd, rd, advf);
609 else if(rd->cdrType==CDRTYPE_FILE) {
610 do_extract_file(c, d, nd, rd, advf);
613 de_advfile_destroy(advf);
616 static void do_leaf_node_record(deark *c, lctx *d, struct nodedata *nd, i64 idx, int pass)
618 i64 pos1_rel, pos;
619 i64 len;
620 i64 ckrKeyLen;
621 i64 nlen;
622 struct recorddata *rd = NULL;
624 rd = de_malloc(c, sizeof(struct recorddata));
625 pos1_rel = nd->offsets[idx];
626 rd->pos1 = nd->dpos + pos1_rel;
627 len = nd->offsets[idx+1] - nd->offsets[idx];
628 de_dbg(c, "leaf node record[%d] at %"I64_FMT"+%"I64_FMT", len=%"I64_FMT,
629 (int)idx, nd->dpos, pos1_rel, len);
630 de_dbg_indent(c, 1);
632 // == Catalog File Key
633 pos = rd->pos1;
634 ckrKeyLen = (i64)de_getbyte_p(&pos);
635 de_dbg(c, "ckrKeyLen: %d", (int)ckrKeyLen);
636 if(ckrKeyLen==0) {
637 de_dbg(c, "[deleted record]");
638 goto done;
641 rd->datapos = rd->pos1 + 1 + ckrKeyLen;
642 if((ckrKeyLen%2)==0) rd->datapos++; // padding
644 // Look ahead to get the cdrType
645 rd->cdrType = (int)dbuf_geti8(c->infile, rd->datapos);
646 de_dbg(c, "cdrType: %d (%s)", rd->cdrType, get_cdrType_name(rd->cdrType));
648 if(pass==1) {
649 if(rd->cdrType!=CDRTYPE_DIR) goto done;
651 else if(pass==2) {
652 if(rd->cdrType!=CDRTYPE_DIR && rd->cdrType!=CDRTYPE_FILE) goto done;
654 else {
655 goto done;
658 pos++; // ckrResrv1
659 rd->ParID = (u32)de_getu32be_p(&pos);
660 de_dbg(c, "ckrParID: %u", (unsigned int)rd->ParID);
662 nlen = (i64)de_getbyte_p(&pos);
663 de_dbg(c, "name len: %d", (int)nlen);
664 rd->name_srd = dbuf_read_string(c->infile, pos, nlen, nlen, 0, d->input_encoding);
665 de_dbg(c, "name: \"%s\"", ucstring_getpsz_d(rd->name_srd->str));
667 // == Catalog File Data Record
669 switch(rd->cdrType) {
670 case CDRTYPE_DIR:
671 if(pass==1) {
672 do_leaf_node_record_directory_pass1(c, d, nd, rd);
674 else if(pass==2) {
675 do_leaf_node_record_extract_item(c, d, nd, rd);
677 break;
678 case CDRTYPE_FILE:
679 if(pass==2) {
680 do_leaf_node_record_extract_item(c, d, nd, rd);
682 break;
685 done:
686 de_dbg_indent(c, -1);
687 if(rd) {
688 de_destroy_stringreaderdata(c, rd->name_srd);
689 de_free(c, rd);
693 static void do_leaf_node(deark *c, lctx *d, struct nodedata *nd, int pass)
695 i64 i;
697 for(i=0; i<nd->nrecs; i++) {
698 do_leaf_node_record(c, d, nd, i, pass);
702 static void destroy_nodedata(deark *c, struct nodedata *nd)
704 if(!nd) return;
705 de_free(c, nd->offsets);
706 de_free(c, nd);
709 // Caller must allocate nd, set some fields in it, call this function,
710 // and is responsible for destroying nd.
711 // pass is relevant only for leaf nodes.
712 static int do_node(deark *c, lctx *d, struct nodedata *nd, int pass)
714 i64 pos;
715 i64 i;
716 int saved_indent_level;
717 int retval = 0;
719 de_dbg_indent_save(c, &saved_indent_level);
720 d->nesting_level++;
721 if(d->nesting_level>20) goto done;
722 if(nd->nodenum==0 && !nd->expecting_header) goto done;
724 if(pass==1) {
725 if(!de_inthashtable_add_item(c, d->nodes_seen, nd->nodenum, NULL)) {
726 de_err(c, "Invalid node list");
727 goto done;
730 retval = 1;
732 nd->dpos = node_dpos(d, nd->nodenum);
733 pos = nd->dpos;
735 de_dbg(c, "node #%"I64_FMT" at %"I64_FMT, nd->nodenum, nd->dpos);
736 de_dbg_indent(c, 1);
738 // == 14-byte NodeDescriptor ==
739 nd->f_link = de_getu32be_p(&pos);
740 de_dbg(c, "fwd link: %"I64_FMT, nd->f_link);
741 nd->b_link = de_getu32be_p(&pos);
742 de_dbg(c, "bwd link: %"I64_FMT, nd->b_link);
744 nd->node_type = (int)dbuf_geti8(c->infile, pos++);
745 de_dbg(c, "node type: %d (%s)", nd->node_type, get_node_type_name(nd->node_type));
746 nd->node_level = (int)dbuf_geti8(c->infile, pos++);
747 de_dbg(c, "node level: %d", nd->node_level);
748 nd->nrecs = (unsigned int)de_getu16be_p(&pos);
749 de_dbg(c, "number of records: %u", nd->nrecs);
750 if(nd->nrecs>250) goto done;
751 pos += 2; // ndResv2
753 // == The offset table at the end of the node ==
754 nd->num_offsets = (i64)nd->nrecs+1;
755 nd->offsets = de_mallocarray(c, nd->num_offsets, sizeof(unsigned int));
757 pos = nd->dpos+512 - 2*nd->num_offsets;
758 for(i=0; i<nd->num_offsets; i++) {
759 char nbuf[32];
760 i64 idx = nd->num_offsets - 1 - i;
761 nd->offsets[idx] = (unsigned int)de_getu16be_p(&pos);
762 if(i==0) de_strlcpy(nbuf, "free space", sizeof(nbuf));
763 else de_snprintf(nbuf, sizeof(nbuf), "rec %u", (unsigned int)idx);
764 de_dbg(c, "offset to %s: %u", nbuf, (unsigned int)nd->offsets[idx]);
767 if(nd->node_type == -1) {
768 do_leaf_node(c, d, nd, pass);
770 else if(nd->node_type==1) {
771 do_header_node(c, d, nd);
774 done:
775 de_dbg_indent_restore(c, saved_indent_level);
776 d->nesting_level--;
777 return retval;
780 static int do_all_leaf_nodes(deark *c, lctx *d, struct nodedata *hdr_node, int pass)
782 i64 curr_nodenum;
783 struct nodedata *nd = NULL;
784 int retval = 0;
786 de_dbg(c, "reading leaf nodes, pass %d", pass);
787 de_dbg_indent(c, 1);
789 // Read all leaf nodes, using the leaf-to-leaf links
790 curr_nodenum = hdr_node->bthFNode;
792 while(curr_nodenum!=0) {
793 nd = de_malloc(c, sizeof(struct nodedata));
794 nd->nodenum = curr_nodenum;
796 if(!do_node(c, d, nd, pass)) goto done;
798 curr_nodenum = nd->f_link;
799 destroy_nodedata(c, nd);
800 nd = NULL;
802 retval = 1;
804 done:
805 destroy_nodedata(c, nd);
806 de_dbg_indent(c, -1);
807 return retval;
810 static int do_catalog(deark *c, lctx *d)
812 i64 pos;
813 struct nodedata *hdr_node = NULL;
814 int saved_indent_level;
815 int retval = 0;
817 de_dbg_indent_save(c, &saved_indent_level);
818 pos = allocation_blk_dpos(d, d->drCTExtRec[0].first_alloc_blk);
819 de_dbg(c, "catalog (first extent at %"I64_FMT")", pos);
821 hdr_node = de_malloc(c, sizeof(struct nodedata));
822 hdr_node->expecting_header = 1;
823 hdr_node->nodenum = 0;
824 de_dbg_indent(c, 1);
825 if(!do_node(c, d, hdr_node, 0)) goto done;
826 de_dbg_indent(c, -1);
828 if(hdr_node->node_type != 1) {
829 de_err(c, "Expected header node not found");
830 goto done;
833 // TODO: In the leaf list, is it possible/legal for a parent-dir-ID number to
834 // appear before the record for that dir-ID? I haven't seen it happen, but
835 // for all I know it is possible. If it doesn't happen, that would be good,
836 // because we wouldn't have to make an extra pass to collect directory info.
837 // But for now, we'll make two passes.
839 // Pass 1 to figure out the directory tree structure, and detect node loops
840 if(!do_all_leaf_nodes(c, d, hdr_node, 1)) goto done;
841 // Pass 2 to extract files
842 if(!do_all_leaf_nodes(c, d, hdr_node, 2)) goto done;
844 retval = 1;
845 done:
846 destroy_nodedata(c, hdr_node);
847 de_dbg_indent_restore(c, saved_indent_level);
848 return retval;
851 static void destroy_dirid_hash(deark *c, lctx *d)
853 if(!d->dirid_hash) return;
855 while(1) {
856 i64 key;
857 void *removed_item = NULL;
858 struct dirid_item_struct *dirid_item;
859 if(!de_inthashtable_remove_any_item(c, d->dirid_hash, &key, &removed_item)) {
860 break;
862 dirid_item = (struct dirid_item_struct *)removed_item;
863 ucstring_destroy(dirid_item->name);
864 de_free(c, dirid_item);
867 de_inthashtable_destroy(c, d->dirid_hash);
870 static void de_run_hfs(deark *c, de_module_params *mparams)
872 lctx *d = NULL;
874 d = de_malloc(c, sizeof(lctx));
876 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
878 d->blocksize = 512;
879 d->nodes_seen = de_inthashtable_create(c);
880 d->dirid_hash = de_inthashtable_create(c);
882 if(!do_master_directory_blocks(c, d, 2)) goto done;
884 if(!do_catalog(c, d)) goto done;
886 done:
887 if(d) {
888 de_inthashtable_destroy(c, d->nodes_seen);
889 destroy_dirid_hash(c, d);
890 de_free(c, d);
894 static int de_identify_hfs(deark *c)
896 i64 drAlBlkSiz;
897 int has_ext;
899 if(dbuf_memcmp(c->infile, 1024, "BD", 2)) return 0;
901 // Allocation block size must be a nonzero multiple of 512.
902 drAlBlkSiz = de_getu32be(1024+20);
903 if(drAlBlkSiz==0 || (drAlBlkSiz%512)!=0) return 0;
905 has_ext = de_input_file_has_ext(c, "hfs");
906 return has_ext?90:15;
909 void de_module_hfs(deark *c, struct deark_module_info *mi)
911 mi->id = "hfs";
912 mi->desc = "HFS filesystem image";
913 mi->run_fn = de_run_hfs;
914 mi->identify_fn = de_identify_hfs;