1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // AppleSingle and AppleDouble
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_applesd
);
12 typedef struct localctx_struct
{
17 struct de_advfile
*advf
;
22 struct entry_id_struct
;
29 const struct entry_id_struct
*eid
;
32 typedef void (*handler_fn_type
)(deark
*c
, lctx
*d
, struct entry_struct
*e
);
34 struct entry_id_struct
{
40 // I'm about 60% sure that the standard elements that are presumably strings
41 // were intended to be raw ASCII-like characters. Too bad they didn't mention
42 // that in the spec. It's common to find files that contain "Pascal" strings,
43 // where the first byte is the length of the (rest of the) string.
44 // It's also common for string elements (whether Pascal or not) to have extra
45 // NUL bytes at the end of them, for no apparent reason.
46 static int is_pascal_string(deark
*c
, lctx
*d
, struct entry_struct
*e
, u8 firstbyte
)
48 if(e
->length
<1) return 0;
50 // Assume this field won't be larger than any Pascal string could need.
51 if(e
->length
> 256) return 0;
53 if(1+(i64
)firstbyte
> e
->length
) return 0; // A Pascal string wouldn't fit.
55 // This could be wrong, if a non-Pascal string starts with a nonprintable char.
56 if(firstbyte
<32) return 1;
58 // At this point, we could do more heuristics, such as testing whether the
59 // non-NUL bytes stop exactly where they should for a Pascal string.
60 // But perfection is impossible.
61 // For now, just assume it's not a Pascal string. Worst case, the decoded
62 // string will have a garbage character prepended.
63 // TODO: Maybe add a user option.
67 static void handler_string(deark
*c
, lctx
*d
, struct entry_struct
*e
)
69 struct de_stringreaderdata
*srd
= NULL
;
72 if(e
->length
<1) goto done
;
74 firstbyte
= de_getbyte(e
->offset
);
77 de_dbg(c
, "string is apparently empty");
80 else if(is_pascal_string(c
, d
, e
, firstbyte
)) {
81 i64 slen
= (i64
)firstbyte
;
83 de_dbg(c
, "guessing this is a Pascal string, len: %u", (unsigned int)slen
);
84 srd
= dbuf_read_string(c
->infile
, e
->offset
+1, slen
, slen
, 0, d
->input_encoding
);
87 srd
= dbuf_read_string(c
->infile
, e
->offset
, e
->length
, 1024,
88 DE_CONVFLAG_STOP_AT_NUL
, d
->input_encoding
);
91 de_dbg(c
, "%s: \"%s\"", e
->eid
->name
, ucstring_getpsz_d(srd
->str
));
93 if(e
->id
==3 && srd
->str
->len
>0) { // id 3 = real name
94 ucstring_empty(d
->advf
->filename
);
95 ucstring_append_ucstring(d
->advf
->filename
, srd
->str
);
96 d
->advf
->original_filename_flag
= 1;
97 de_advfile_set_orig_filename(d
->advf
, srd
->sz
, srd
->sz_strlen
);
101 de_destroy_stringreaderdata(c
, srd
);
104 // Read, debug, and store in caller-supplied returned_ts.
105 static void do_one_date(deark
*c
, lctx
*d
, i64 pos
, const char *name
,
106 struct de_timestamp
*returned_ts
)
109 struct de_timestamp ts
;
110 char timestamp_buf
[64];
112 de_zeromem(&ts
, sizeof(struct de_timestamp
));
113 dt
= de_geti32be(pos
);
114 if(dt
== -0x80000000LL
) {
115 de_strlcpy(timestamp_buf
, "unknown", sizeof(timestamp_buf
));
118 // Epoch is Jan 1, 2001. There are 30 years, with 7 leap days, between
119 // that and the Unix time epoch.
120 de_unix_time_to_timestamp(dt
+ ((365*30 + 7)*86400), &ts
, 0x1);
121 de_timestamp_to_string(&ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
123 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, dt
, timestamp_buf
);
129 static void handler_dates(deark
*c
, lctx
*d
, struct entry_struct
*e
)
131 if(e
->length
<16) return;
132 do_one_date(c
, d
, e
->offset
, "creation date", &d
->advf
->mainfork
.fi
->timestamp
[DE_TIMESTAMPIDX_CREATE
]);
133 do_one_date(c
, d
, e
->offset
+4, "mod date", &d
->advf
->mainfork
.fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
]);
134 do_one_date(c
, d
, e
->offset
+8, "backup date", &d
->advf
->mainfork
.fi
->timestamp
[DE_TIMESTAMPIDX_BACKUP
]);
135 do_one_date(c
, d
, e
->offset
+12, "access date", &d
->advf
->mainfork
.fi
->timestamp
[DE_TIMESTAMPIDX_ACCESS
]);
138 static void do_finder_orig(deark
*c
, lctx
*d
, struct entry_struct
*e
)
141 struct de_fourcc filetype
;
142 struct de_fourcc creator
;
144 // TODO: This entry has a different format if this is an AppleDouble file
145 // whose companion data "file" is a directory. But I don't know the proper
146 // way to tell if that is the case.
148 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
149 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
150 de_memcpy(d
->advf
->typecode
, filetype
.bytes
, 4);
151 d
->advf
->has_typecode
= 1;
153 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
154 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
155 de_memcpy(d
->advf
->creatorcode
, creator
.bytes
, 4);
156 d
->advf
->has_creatorcode
= 1;
159 d
->advf
->finderflags
= (u16
)dbuf_getu16be_p(c
->infile
, &pos
);
160 d
->advf
->has_finderflags
= 1;
161 de_dbg(c
, "flags: 0x%04x", (unsigned int)d
->advf
->finderflags
);
164 static void do_xattr_entry(deark
*c
, lctx
*d
, struct de_stringreaderdata
*name
,
167 if(pos1
+len
> c
->infile
->len
) return;
169 if(len
>=8 && !dbuf_memcmp(c
->infile
, pos1
, (const void*)"bplist00", 8)) {
170 de_dbg(c
, "binary plist");
172 fmtutil_handle_plist(c
, c
->infile
, pos1
, len
, NULL
, 0);
173 de_dbg_indent(c
, -1);
176 de_dbg_hexdump(c
, c
->infile
, pos1
, len
, 256, NULL
, 0x1);
180 static void do_finder_xattr(deark
*c
, lctx
*d
, struct entry_struct
*e
)
189 int saved_indent_level
;
190 struct de_stringreaderdata
*name
= NULL
;
192 de_dbg_indent_save(c
, &saved_indent_level
);
193 pos
+= 32; // original finder data
195 // At this point, we are most likely at file offset 82, and there are
196 // normally 2 padding bytes for alignment. (This is really a hybrid format
197 // that violates the AppleDouble conventions.)
198 // I don't know for sure what we should do if we're somehow not at an
199 // offset such that (offset mod 4)==2.
200 pos
= de_pad_to_4(pos
);
202 de_dbg(c
, "xattr table at %"I64_FMT
, pos
);
204 pos
+= 4; // magic "ATTR"
205 pos
+= 4; // debug_tag
206 total_size
= de_getu32be_p(&pos
);
207 de_dbg(c
, "total size: %"I64_FMT
, total_size
);
208 data_start
= de_getu32be_p(&pos
);
209 de_dbg(c
, "data start: %"I64_FMT
, data_start
);
210 data_length
= de_getu32be_p(&pos
);
211 de_dbg(c
, "data length: %"I64_FMT
, data_length
);
212 pos
+= 3*4; // reserved
213 flags
= (unsigned int)de_getu16be_p(&pos
);
214 de_dbg(c
, "flags: 0x%04x", flags
);
215 num_attrs
= de_getu16be_p(&pos
);
216 de_dbg(c
, "num attrs: %d", (int)num_attrs
);
218 for(k
=0; k
<num_attrs
; k
++) {
219 i64 entry_dpos
, entry_dlen
, entry_nlen
;
220 unsigned int entry_flags
;
222 // "Entries are aligned on 4 byte boundaries"
223 pos
= de_pad_to_4(pos
);
225 if(pos
>= c
->infile
->len
) goto done
;
227 // TODO: but I don't know
228 // what that means for the decoder.
230 de_dbg(c
, "xattr entry[%d] at %"I64_FMT
, (int)k
, pos
);
232 entry_dpos
= de_getu32be_p(&pos
);
233 de_dbg(c
, "dpos: %"I64_FMT
, entry_dpos
);
234 entry_dlen
= de_getu32be_p(&pos
);
235 de_dbg(c
, "dlen: %"I64_FMT
, entry_dlen
);
236 entry_flags
= (unsigned int)de_getu16be_p(&pos
);
237 de_dbg(c
, "flags: 0x%04x", entry_flags
);
238 entry_nlen
= (i64
)de_getbyte_p(&pos
);
241 de_destroy_stringreaderdata(c
, name
);
243 name
= dbuf_read_string(c
->infile
, pos
, entry_nlen
, entry_nlen
,
244 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_UTF8
);
245 de_dbg(c
, "name: \"%s\"", ucstring_getpsz_d(name
->str
));
247 do_xattr_entry(c
, d
, name
, entry_dpos
, entry_dlen
);
249 de_dbg_indent(c
, -1);
253 de_destroy_stringreaderdata(c
, name
);
254 de_dbg_indent_restore(c
, saved_indent_level
);
257 static void handler_finder(deark
*c
, lctx
*d
, struct entry_struct
*e
)
259 int has_orig_finder_info
= 0;
262 if(e
->length
>=32 && (de_getbyte(e
->offset
) || de_getbyte(e
->offset
+4))) {
263 has_orig_finder_info
= 1;
265 if(e
->length
>=62 && !dbuf_memcmp(c
->infile
, e
->offset
+34, (const void*)"ATTR", 4)) {
269 if(has_orig_finder_info
) {
270 do_finder_orig(c
, d
, e
);
273 do_finder_xattr(c
, d
, e
);
277 static void handler_data(deark
*c
, lctx
*d
, struct entry_struct
*e
)
279 if(d
->is_appledouble
) {
280 de_warn(c
, "AppleDouble header files should not have a data fork.");
283 d
->advf
->mainfork
.fork_exists
= 1;
284 d
->data_fork_pos
= e
->offset
;
285 d
->advf
->mainfork
.fork_len
= e
->length
;
288 static void do_extract_rsrc(deark
*c
, lctx
*d
, struct entry_struct
*e
)
291 de_ucstring
*fname
= NULL
;
293 if(e
->length
<1) goto done
;
295 d
->advf
->rsrcfork
.fork_exists
= 1;
296 d
->rsrc_fork_pos
= e
->offset
;
297 d
->advf
->rsrcfork
.fork_len
= e
->length
;
300 de_finfo_destroy(c
, fi
);
301 ucstring_destroy(fname
);
304 static void do_decode_rsrc(deark
*c
, lctx
*d
, struct entry_struct
*e
)
306 if(e
->length
<1) return;
307 de_dbg(c
, "decoding as resource format");
309 de_run_module_by_id_on_slice2(c
, "macrsrc", NULL
, c
->infile
,
310 e
->offset
, e
->length
);
311 de_dbg_indent(c
, -1);
314 static void handler_rsrc(deark
*c
, lctx
*d
, struct entry_struct
*e
)
316 if(d
->extract_rsrc
) {
317 do_extract_rsrc(c
, d
, e
);
320 do_decode_rsrc(c
, d
, e
);
324 static const struct entry_id_struct entry_id_arr
[] = {
325 {1, "data fork", handler_data
},
326 {2, "resource fork", handler_rsrc
},
327 {3, "real name", handler_string
},
328 {4, "comment", handler_string
},
329 {5, "b/w icon", NULL
},
330 {6, "color icon", NULL
},
331 {8, "file dates", handler_dates
},
332 {9, "Finder info", handler_finder
},
333 {10, "Macintosh file info", NULL
},
334 {11, "ProDOS file info", NULL
},
335 {12, "MS-DOS file info", NULL
},
336 {13, "short name", handler_string
},
337 {14, "AFP file info", NULL
},
338 {15, "directory ID", NULL
}
341 static const struct entry_id_struct
*find_entry_id_info(unsigned int id
)
345 for(k
=0; k
<DE_ARRAYCOUNT(entry_id_arr
); k
++) {
346 if(entry_id_arr
[k
].id
==id
) return &entry_id_arr
[k
];
351 static void do_sd_entry(deark
*c
, lctx
*d
, unsigned int idx
, i64 pos1
)
353 struct entry_struct e
;
354 const struct entry_id_struct
*eid
;
357 de_zeromem(&e
, sizeof(struct entry_struct
));
359 e
.id
= (unsigned int)de_getu32be_p(&pos
);
360 eid
= find_entry_id_info(e
.id
);
361 de_dbg(c
, "id: %u (%s)", e
.id
, eid
?eid
->name
:"?");
362 e
.offset
= de_getu32be_p(&pos
);
363 de_dbg(c
, "offset: %"I64_FMT
, e
.offset
);
364 e
.length
= de_getu32be_p(&pos
);
365 de_dbg(c
, "length: %"I64_FMT
, e
.length
);
367 if(e
.offset
> c
->infile
->len
) goto done
;
368 if(e
.offset
+e
.length
> c
->infile
->len
) {
369 de_warn(c
, "Entry %u goes beyond end of file. Reducing size from %"I64_FMT
370 " to %"I64_FMT
".", e
.idx
, e
.length
, c
->infile
->len
-e
.offset
);
371 e
.length
= c
->infile
->len
- e
.offset
;
374 if(eid
&& eid
->hfn
) {
383 static int my_advfile_cbfn(deark
*c
, struct de_advfile
*advf
,
384 struct de_advfile_cbparams
*afp
)
386 lctx
*d
= (lctx
*)advf
->userdata
;
388 if(afp
->whattodo
== DE_ADVFILE_WRITEMAIN
) {
389 dbuf_copy(c
->infile
, d
->data_fork_pos
, advf
->mainfork
.fork_len
, afp
->outf
);
391 else if(afp
->whattodo
== DE_ADVFILE_WRITERSRC
) {
392 dbuf_copy(c
->infile
, d
->rsrc_fork_pos
, advf
->rsrcfork
.fork_len
, afp
->outf
);
397 static void de_run_sd_internal(deark
*c
, lctx
*d
)
402 i64 entry_descriptors_pos
;
404 if(d
->is_appledouble
) {
405 de_declare_fmt(c
, "AppleDouble header file");
408 de_declare_fmt(c
, "AppleSingle");
411 d
->input_encoding
= de_get_input_encoding(c
, NULL
, DE_ENCODING_MACROMAN
);
413 d
->advf
= de_advfile_create(c
);
414 d
->advf
->userdata
= (void*)d
;
415 d
->advf
->writefork_cbfn
= my_advfile_cbfn
;
416 ucstring_append_sz(d
->advf
->filename
, "bin", DE_ENCODING_LATIN1
);
418 pos
+= 4; // signature
419 d
->version
= (u32
)de_getu32be_p(&pos
);
420 de_dbg(c
, "version: 0x%08x", (unsigned int)d
->version
);
422 // For v1, this field is "Home file system" (TODO: Decode this.)
423 // For v2, it is unused.
426 nentries
= de_getu16be_p(&pos
);
427 de_dbg(c
, "number of entries: %d", (int)nentries
);
429 entry_descriptors_pos
= pos
;
431 for(k
=0; k
<nentries
; k
++) {
432 if(pos
+12>c
->infile
->len
) break;
433 de_dbg(c
, "entry[%u]", (unsigned int)k
);
435 do_sd_entry(c
, d
, (unsigned int)k
, entry_descriptors_pos
+12*k
);
436 de_dbg_indent(c
, -1);
439 // There's no good reason to ever "convert" to AppleSingle. (We don't
440 // have a way to combine forks that start out in separate files.)
441 d
->advf
->no_applesingle
= 1;
443 if(!d
->advf
->mainfork
.fork_exists
|| !d
->advf
->rsrcfork
.fork_exists
) {
444 // If either fork does not exist, don't do anything fancy.
445 // (If both exist, we allow conversion to AppleDouble.)
446 d
->advf
->no_appledouble
= 1;
449 de_advfile_run(d
->advf
);
451 de_advfile_destroy(d
->advf
);
454 static void de_run_applesd(deark
*c
, de_module_params
*mparams
)
458 d
= de_malloc(c
, sizeof(lctx
));
459 if(de_getbyte(3)==0x00)
460 d
->is_appledouble
= 0;
462 d
->is_appledouble
= 1;
463 // AppleDouble default = decode resource fork
464 // AppleSingle default = extract resource fork
465 d
->extract_rsrc
= de_get_ext_option_bool(c
, "applesd:extractrsrc", d
->is_appledouble
?0:1);
466 de_run_sd_internal(c
, d
);
470 static int de_identify_applesd(deark
*c
)
475 if(n
==0x00051607) return 100; // AppleDouble
476 if(n
==0x00051600) return 100; // AppleSingle
480 static void de_help_applesd(deark
*c
)
482 de_msg(c
, "-opt applesd:extractrsrc=<0|1> : Decode (0) or extract (1) the "
484 de_msg(c
, "-opt macrsrc:extractraw : Extract all resources to files (if "
485 "decoding the resource fork)");
488 void de_module_applesd(deark
*c
, struct deark_module_info
*mi
)
491 mi
->id_alias
[0] = "applesingle";
492 mi
->id_alias
[1] = "appledouble";
493 mi
->desc
= "AppleSingle/AppleDouble";
494 mi
->run_fn
= de_run_applesd
;
495 mi
->identify_fn
= de_identify_applesd
;
496 mi
->help_fn
= de_help_applesd
;