Refactoring the iff decoder
[deark.git] / modules / applesd.c
blob52007e2c9561a2640a1726814315808f8fdabfbd
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // AppleSingle and AppleDouble
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_applesd);
12 typedef struct localctx_struct {
13 u32 version;
14 int is_appledouble;
15 int input_encoding;
16 int extract_rsrc;
17 struct de_advfile *advf;
18 i64 rsrc_fork_pos;
19 i64 data_fork_pos;
20 } lctx;
22 struct entry_id_struct;
24 struct entry_struct {
25 unsigned int idx;
26 unsigned int id;
27 i64 offset;
28 i64 length;
29 const struct entry_id_struct *eid;
32 typedef void (*handler_fn_type)(deark *c, lctx *d, struct entry_struct *e);
34 struct entry_id_struct {
35 unsigned int id;
36 const char *name;
37 handler_fn_type hfn;
40 // I'm about 60% sure that the standard elements that are presumably strings
41 // were intended to be raw ASCII-like characters. Too bad they didn't mention
42 // that in the spec. It's common to find files that contain "Pascal" strings,
43 // where the first byte is the length of the (rest of the) string.
44 // It's also common for string elements (whether Pascal or not) to have extra
45 // NUL bytes at the end of them, for no apparent reason.
46 static int is_pascal_string(deark *c, lctx *d, struct entry_struct *e, u8 firstbyte)
48 if(e->length<1) return 0;
50 // Assume this field won't be larger than any Pascal string could need.
51 if(e->length > 256) return 0;
53 if(1+(i64)firstbyte > e->length) return 0; // A Pascal string wouldn't fit.
55 // This could be wrong, if a non-Pascal string starts with a nonprintable char.
56 if(firstbyte<32) return 1;
58 // At this point, we could do more heuristics, such as testing whether the
59 // non-NUL bytes stop exactly where they should for a Pascal string.
60 // But perfection is impossible.
61 // For now, just assume it's not a Pascal string. Worst case, the decoded
62 // string will have a garbage character prepended.
63 // TODO: Maybe add a user option.
64 return 0;
67 static void handler_string(deark *c, lctx *d, struct entry_struct *e)
69 struct de_stringreaderdata *srd = NULL;
70 u8 firstbyte;
72 if(e->length<1) goto done;
74 firstbyte = de_getbyte(e->offset);
76 if(firstbyte==0x00) {
77 de_dbg(c, "string is apparently empty");
78 goto done;
80 else if(is_pascal_string(c, d, e, firstbyte)) {
81 i64 slen = (i64)firstbyte;
83 de_dbg(c, "guessing this is a Pascal string, len: %u", (unsigned int)slen);
84 srd = dbuf_read_string(c->infile, e->offset+1, slen, slen, 0, d->input_encoding);
86 else {
87 srd = dbuf_read_string(c->infile, e->offset, e->length, 1024,
88 DE_CONVFLAG_STOP_AT_NUL, d->input_encoding);
91 de_dbg(c, "%s: \"%s\"", e->eid->name, ucstring_getpsz_d(srd->str));
93 if(e->id==3 && srd->str->len>0) { // id 3 = real name
94 ucstring_empty(d->advf->filename);
95 ucstring_append_ucstring(d->advf->filename, srd->str);
96 d->advf->original_filename_flag = 1;
97 de_advfile_set_orig_filename(d->advf, srd->sz, srd->sz_strlen);
100 done:
101 de_destroy_stringreaderdata(c, srd);
104 // Read, debug, and store in caller-supplied returned_ts.
105 static void do_one_date(deark *c, lctx *d, i64 pos, const char *name,
106 struct de_timestamp *returned_ts)
108 i64 dt;
109 struct de_timestamp ts;
110 char timestamp_buf[64];
112 de_zeromem(&ts, sizeof(struct de_timestamp));
113 dt = de_geti32be(pos);
114 if(dt == -0x80000000LL) {
115 de_strlcpy(timestamp_buf, "unknown", sizeof(timestamp_buf));
117 else {
118 // Epoch is Jan 1, 2001. There are 30 years, with 7 leap days, between
119 // that and the Unix time epoch.
120 de_unix_time_to_timestamp(dt + ((365*30 + 7)*86400), &ts, 0x1);
121 de_timestamp_to_string(&ts, timestamp_buf, sizeof(timestamp_buf), 0);
123 de_dbg(c, "%s: %"I64_FMT" (%s)", name, dt, timestamp_buf);
124 if(returned_ts) {
125 *returned_ts = ts;
129 static void handler_dates(deark *c, lctx *d, struct entry_struct *e)
131 if(e->length<16) return;
132 do_one_date(c, d, e->offset, "creation date", &d->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_CREATE]);
133 do_one_date(c, d, e->offset+4, "mod date", &d->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_MODIFY]);
134 do_one_date(c, d, e->offset+8, "backup date", &d->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_BACKUP]);
135 do_one_date(c, d, e->offset+12, "access date", &d->advf->mainfork.fi->timestamp[DE_TIMESTAMPIDX_ACCESS]);
138 static void do_finder_orig(deark *c, lctx *d, struct entry_struct *e)
140 i64 pos = e->offset;
141 struct de_fourcc filetype;
142 struct de_fourcc creator;
144 // TODO: This entry has a different format if this is an AppleDouble file
145 // whose companion data "file" is a directory. But I don't know the proper
146 // way to tell if that is the case.
148 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
149 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
150 de_memcpy(d->advf->typecode, filetype.bytes, 4);
151 d->advf->has_typecode = 1;
152 pos += 4;
153 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
154 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
155 de_memcpy(d->advf->creatorcode, creator.bytes, 4);
156 d->advf->has_creatorcode = 1;
157 pos += 4;
159 d->advf->finderflags = (u16)dbuf_getu16be_p(c->infile, &pos);
160 d->advf->has_finderflags = 1;
161 de_dbg(c, "flags: 0x%04x", (unsigned int)d->advf->finderflags);
164 static void do_xattr_entry(deark *c, lctx *d, struct de_stringreaderdata *name,
165 i64 pos1, i64 len)
167 if(pos1+len > c->infile->len) return;
169 if(len>=8 && !dbuf_memcmp(c->infile, pos1, (const void*)"bplist00", 8)) {
170 de_dbg(c, "binary plist");
171 de_dbg_indent(c, 1);
172 fmtutil_handle_plist(c, c->infile, pos1, len, NULL, 0);
173 de_dbg_indent(c, -1);
175 else {
176 de_dbg_hexdump(c, c->infile, pos1, len, 256, NULL, 0x1);
180 static void do_finder_xattr(deark *c, lctx *d, struct entry_struct *e)
182 i64 total_size;
183 i64 data_start;
184 i64 data_length;
185 i64 num_attrs;
186 i64 k;
187 unsigned int flags;
188 i64 pos = e->offset;
189 int saved_indent_level;
190 struct de_stringreaderdata *name = NULL;
192 de_dbg_indent_save(c, &saved_indent_level);
193 pos += 32; // original finder data
195 // At this point, we are most likely at file offset 82, and there are
196 // normally 2 padding bytes for alignment. (This is really a hybrid format
197 // that violates the AppleDouble conventions.)
198 // I don't know for sure what we should do if we're somehow not at an
199 // offset such that (offset mod 4)==2.
200 pos = de_pad_to_4(pos);
202 de_dbg(c, "xattr table at %"I64_FMT, pos);
203 de_dbg_indent(c, 1);
204 pos += 4; // magic "ATTR"
205 pos += 4; // debug_tag
206 total_size = de_getu32be_p(&pos);
207 de_dbg(c, "total size: %"I64_FMT, total_size);
208 data_start = de_getu32be_p(&pos);
209 de_dbg(c, "data start: %"I64_FMT, data_start);
210 data_length = de_getu32be_p(&pos);
211 de_dbg(c, "data length: %"I64_FMT, data_length);
212 pos += 3*4; // reserved
213 flags = (unsigned int)de_getu16be_p(&pos);
214 de_dbg(c, "flags: 0x%04x", flags);
215 num_attrs = de_getu16be_p(&pos);
216 de_dbg(c, "num attrs: %d", (int)num_attrs);
218 for(k=0; k<num_attrs; k++) {
219 i64 entry_dpos, entry_dlen, entry_nlen;
220 unsigned int entry_flags;
222 // "Entries are aligned on 4 byte boundaries"
223 pos = de_pad_to_4(pos);
225 if(pos >= c->infile->len) goto done;
227 // TODO: but I don't know
228 // what that means for the decoder.
230 de_dbg(c, "xattr entry[%d] at %"I64_FMT, (int)k, pos);
231 de_dbg_indent(c, 1);
232 entry_dpos = de_getu32be_p(&pos);
233 de_dbg(c, "dpos: %"I64_FMT, entry_dpos);
234 entry_dlen = de_getu32be_p(&pos);
235 de_dbg(c, "dlen: %"I64_FMT, entry_dlen);
236 entry_flags = (unsigned int)de_getu16be_p(&pos);
237 de_dbg(c, "flags: 0x%04x", entry_flags);
238 entry_nlen = (i64)de_getbyte_p(&pos);
240 if(name) {
241 de_destroy_stringreaderdata(c, name);
243 name = dbuf_read_string(c->infile, pos, entry_nlen, entry_nlen,
244 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_UTF8);
245 de_dbg(c, "name: \"%s\"", ucstring_getpsz_d(name->str));
247 do_xattr_entry(c, d, name, entry_dpos, entry_dlen);
248 pos += entry_nlen;
249 de_dbg_indent(c, -1);
252 done:
253 de_destroy_stringreaderdata(c, name);
254 de_dbg_indent_restore(c, saved_indent_level);
257 static void handler_finder(deark *c, lctx *d, struct entry_struct *e)
259 int has_orig_finder_info = 0;
260 int has_xattr = 0;
262 if(e->length>=32 && (de_getbyte(e->offset) || de_getbyte(e->offset+4))) {
263 has_orig_finder_info = 1;
265 if(e->length>=62 && !dbuf_memcmp(c->infile, e->offset+34, (const void*)"ATTR", 4)) {
266 has_xattr = 1;
269 if(has_orig_finder_info) {
270 do_finder_orig(c, d, e);
272 if(has_xattr) {
273 do_finder_xattr(c, d, e);
277 static void handler_data(deark *c, lctx *d, struct entry_struct *e)
279 if(d->is_appledouble) {
280 de_warn(c, "AppleDouble header files should not have a data fork.");
283 d->advf->mainfork.fork_exists = 1;
284 d->data_fork_pos = e->offset;
285 d->advf->mainfork.fork_len = e->length;
288 static void do_extract_rsrc(deark *c, lctx *d, struct entry_struct *e)
290 de_finfo *fi = NULL;
291 de_ucstring *fname = NULL;
293 if(e->length<1) goto done;
295 d->advf->rsrcfork.fork_exists = 1;
296 d->rsrc_fork_pos = e->offset;
297 d->advf->rsrcfork.fork_len = e->length;
299 done:
300 de_finfo_destroy(c, fi);
301 ucstring_destroy(fname);
304 static void do_decode_rsrc(deark *c, lctx *d, struct entry_struct *e)
306 if(e->length<1) return;
307 de_dbg(c, "decoding as resource format");
308 de_dbg_indent(c, 1);
309 de_run_module_by_id_on_slice2(c, "macrsrc", NULL, c->infile,
310 e->offset, e->length);
311 de_dbg_indent(c, -1);
314 static void handler_rsrc(deark *c, lctx *d, struct entry_struct *e)
316 if(d->extract_rsrc) {
317 do_extract_rsrc(c, d, e);
319 else {
320 do_decode_rsrc(c, d, e);
324 static const struct entry_id_struct entry_id_arr[] = {
325 {1, "data fork", handler_data},
326 {2, "resource fork", handler_rsrc},
327 {3, "real name", handler_string},
328 {4, "comment", handler_string},
329 {5, "b/w icon", NULL},
330 {6, "color icon", NULL},
331 {8, "file dates", handler_dates},
332 {9, "Finder info", handler_finder},
333 {10, "Macintosh file info", NULL},
334 {11, "ProDOS file info", NULL},
335 {12, "MS-DOS file info", NULL},
336 {13, "short name", handler_string},
337 {14, "AFP file info", NULL},
338 {15, "directory ID", NULL}
341 static const struct entry_id_struct *find_entry_id_info(unsigned int id)
343 size_t k;
345 for(k=0; k<DE_ARRAYCOUNT(entry_id_arr); k++) {
346 if(entry_id_arr[k].id==id) return &entry_id_arr[k];
348 return NULL;
351 static void do_sd_entry(deark *c, lctx *d, unsigned int idx, i64 pos1)
353 struct entry_struct e;
354 const struct entry_id_struct *eid;
355 i64 pos = pos1;
357 de_zeromem(&e, sizeof(struct entry_struct));
358 e.idx = idx;
359 e.id = (unsigned int)de_getu32be_p(&pos);
360 eid = find_entry_id_info(e.id);
361 de_dbg(c, "id: %u (%s)", e.id, eid?eid->name:"?");
362 e.offset = de_getu32be_p(&pos);
363 de_dbg(c, "offset: %"I64_FMT, e.offset);
364 e.length = de_getu32be_p(&pos);
365 de_dbg(c, "length: %"I64_FMT, e.length);
367 if(e.offset > c->infile->len) goto done;
368 if(e.offset+e.length > c->infile->len) {
369 de_warn(c, "Entry %u goes beyond end of file. Reducing size from %"I64_FMT
370 " to %"I64_FMT".", e.idx, e.length, c->infile->len-e.offset);
371 e.length = c->infile->len - e.offset;
374 if(eid && eid->hfn) {
375 e.eid = eid;
376 eid->hfn(c, d, &e);
379 done:
383 static int my_advfile_cbfn(deark *c, struct de_advfile *advf,
384 struct de_advfile_cbparams *afp)
386 lctx *d = (lctx*)advf->userdata;
388 if(afp->whattodo == DE_ADVFILE_WRITEMAIN) {
389 dbuf_copy(c->infile, d->data_fork_pos, advf->mainfork.fork_len, afp->outf);
391 else if(afp->whattodo == DE_ADVFILE_WRITERSRC) {
392 dbuf_copy(c->infile, d->rsrc_fork_pos, advf->rsrcfork.fork_len, afp->outf);
394 return 1;
397 static void de_run_sd_internal(deark *c, lctx *d)
399 i64 pos = 0;
400 i64 nentries;
401 i64 k;
402 i64 entry_descriptors_pos;
404 if(d->is_appledouble) {
405 de_declare_fmt(c, "AppleDouble header file");
407 else {
408 de_declare_fmt(c, "AppleSingle");
411 d->input_encoding = de_get_input_encoding(c, NULL, DE_ENCODING_MACROMAN);
413 d->advf = de_advfile_create(c);
414 d->advf->userdata = (void*)d;
415 d->advf->writefork_cbfn = my_advfile_cbfn;
416 ucstring_append_sz(d->advf->filename, "bin", DE_ENCODING_LATIN1);
418 pos += 4; // signature
419 d->version = (u32)de_getu32be_p(&pos);
420 de_dbg(c, "version: 0x%08x", (unsigned int)d->version);
422 // For v1, this field is "Home file system" (TODO: Decode this.)
423 // For v2, it is unused.
424 pos += 16;
426 nentries = de_getu16be_p(&pos);
427 de_dbg(c, "number of entries: %d", (int)nentries);
429 entry_descriptors_pos = pos;
431 for(k=0; k<nentries; k++) {
432 if(pos+12>c->infile->len) break;
433 de_dbg(c, "entry[%u]", (unsigned int)k);
434 de_dbg_indent(c, 1);
435 do_sd_entry(c, d, (unsigned int)k, entry_descriptors_pos+12*k);
436 de_dbg_indent(c, -1);
439 // There's no good reason to ever "convert" to AppleSingle. (We don't
440 // have a way to combine forks that start out in separate files.)
441 d->advf->no_applesingle = 1;
443 if(!d->advf->mainfork.fork_exists || !d->advf->rsrcfork.fork_exists) {
444 // If either fork does not exist, don't do anything fancy.
445 // (If both exist, we allow conversion to AppleDouble.)
446 d->advf->no_appledouble = 1;
449 de_advfile_run(d->advf);
451 de_advfile_destroy(d->advf);
454 static void de_run_applesd(deark *c, de_module_params *mparams)
456 lctx *d = NULL;
458 d = de_malloc(c, sizeof(lctx));
459 if(de_getbyte(3)==0x00)
460 d->is_appledouble = 0;
461 else
462 d->is_appledouble = 1;
463 // AppleDouble default = decode resource fork
464 // AppleSingle default = extract resource fork
465 d->extract_rsrc = de_get_ext_option_bool(c, "applesd:extractrsrc", d->is_appledouble?0:1);
466 de_run_sd_internal(c, d);
467 de_free(c, d);
470 static int de_identify_applesd(deark *c)
472 i64 n;
474 n = de_getu32be(0);
475 if(n==0x00051607) return 100; // AppleDouble
476 if(n==0x00051600) return 100; // AppleSingle
477 return 0;
480 static void de_help_applesd(deark *c)
482 de_msg(c, "-opt applesd:extractrsrc=<0|1> : Decode (0) or extract (1) the "
483 "resource fork");
484 de_msg(c, "-opt macrsrc:extractraw : Extract all resources to files (if "
485 "decoding the resource fork)");
488 void de_module_applesd(deark *c, struct deark_module_info *mi)
490 mi->id = "applesd";
491 mi->id_alias[0] = "applesingle";
492 mi->id_alias[1] = "appledouble";
493 mi->desc = "AppleSingle/AppleDouble";
494 mi->run_fn = de_run_applesd;
495 mi->identify_fn = de_identify_applesd;
496 mi->help_fn = de_help_applesd;