New "ea_data" module
[deark.git] / modules / dsstore.c
blob8b1a11779d1b463549fc3c84926ae19268d43561
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // .DS_Store
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_dsstore);
12 #define HDRSIZE 4
13 #define DSSTORE_MAX_DEPTH 16
15 #define CODE_blob 0x626c6f62U
16 #define CODE_bool 0x626f6f6cU
17 #define CODE_comp 0x636f6d70U
18 #define CODE_dutc 0x64757463U
19 #define CODE_long 0x6c6f6e67U
20 #define CODE_shor 0x73686f72U
21 #define CODE_type 0x74797065U
22 #define CODE_ustr 0x75737472U
24 struct record_info {
25 i64 dpos;
26 i64 dlen;
27 de_ucstring *filename;
28 struct de_fourcc rtype;
29 struct de_fourcc dtype;
32 struct addr_table_entry {
33 u32 addr_code;
34 u8 decoded;
37 typedef struct localctx_struct {
38 i64 infoblk_offs;
39 i64 infoblk_size;
41 int found_dsdb;
42 u32 dsdb_block_id;
44 u32 root_node_block_id;
46 i64 blkcount;
47 struct addr_table_entry *block_addr_table; // 'blkcount' entries
48 int depth;
49 } lctx;
51 static void do_dir_entry(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
53 u32 blk_id;
54 i64 pos = pos1;
55 i64 nlen;
56 struct de_stringreaderdata *name_srd = NULL;
58 nlen = (i64)de_getbyte_p(&pos);
59 *bytes_consumed = 1 + nlen + 4;
60 name_srd = dbuf_read_string(c->infile, pos, nlen, nlen, 0, DE_ENCODING_MACROMAN);
61 pos += nlen;
62 de_dbg(c, "name: \"%s\"", ucstring_getpsz(name_srd->str));
63 blk_id = (u32)de_getu32be_p(&pos);
64 de_dbg(c, "block id: %u", (unsigned int)blk_id);
66 if(!de_strcmp(name_srd->sz, "DSDB")) {
67 d->found_dsdb = 1;
68 d->dsdb_block_id = blk_id;
71 de_destroy_stringreaderdata(c, name_srd);
74 static void do_info_block(deark *c, lctx *d)
76 i64 pos = d->infoblk_offs;
77 i64 dircount;
78 i64 blk_addr_array_start;
79 i64 blk_addr_array_size_padded;
80 i64 k;
81 int saved_indent_level;
83 de_dbg_indent_save(c, &saved_indent_level);
84 de_dbg(c, "info block at %"I64_FMT, d->infoblk_offs);
85 de_dbg_indent(c, 1);
86 d->blkcount = de_getu32be_p(&pos);
87 de_dbg(c, "block count: %u", (unsigned int)d->blkcount);
88 if(d->blkcount>1000000) goto done;
89 pos += 4; // unknown
91 blk_addr_array_start = pos;
92 blk_addr_array_size_padded = de_pad_to_n(d->blkcount*4, 1024);
94 de_dbg(c, "block address table at %d", (int)pos);
95 d->block_addr_table = de_mallocarray(c, d->blkcount, sizeof(struct addr_table_entry));
96 de_dbg_indent(c, 1);
97 for(k=0; k<d->blkcount; k++) {
98 d->block_addr_table[k].addr_code = (u32)de_getu32be_p(&pos);
99 if(d->block_addr_table[k].addr_code!=0) {
100 de_dbg(c, "addr[%d] = 0x%08x", (int)k,
101 (unsigned int)d->block_addr_table[k].addr_code);
104 de_dbg_indent(c, -1);
106 pos = blk_addr_array_start + blk_addr_array_size_padded;
107 dircount = de_getu32be_p(&pos);
108 de_dbg(c, "dir count: %u", (unsigned int)dircount);
109 if(dircount>1000000) goto done;
110 for(k=0; k<dircount; k++) {
111 i64 bytes_consumed;
113 de_dbg(c, "dir entry[%d] at %"I64_FMT, (int)k, pos);
114 de_dbg_indent(c, 1);
115 do_dir_entry(c, d, pos, &bytes_consumed);
116 pos += bytes_consumed;
117 de_dbg_indent(c, -1);
120 done:
121 de_dbg_indent_restore(c, saved_indent_level);
124 static int block_id_to_offset_and_size(deark *c, lctx *d, u32 blk_id,
125 i64 *poffs, i64 *psize)
127 int retval = 0;
128 u32 addr_code;
129 unsigned int size_indicator;
131 if((i64)blk_id>=d->blkcount) {
132 goto done;
135 addr_code = d->block_addr_table[blk_id].addr_code;
136 size_indicator = addr_code&0x1f;
137 *psize = de_pow2((i64)size_indicator);
138 *poffs = HDRSIZE+(i64)(addr_code-size_indicator);
140 retval = 1;
141 done:
142 if(!retval) {
143 *poffs = c->infile->len;
144 *psize = 0;
146 return retval;
149 static void do_blob(deark *c, lctx *d, struct record_info *ri)
151 i64 len;
152 i64 blobpos;
154 len = de_getu32be(ri->dpos);
155 de_dbg(c, "blob len: %d", (int)len);
156 ri->dlen = 4+len;
157 blobpos = ri->dpos+4;
159 if(len>=8 && !dbuf_memcmp(c->infile, blobpos, "bplist00", 8)) {
160 de_finfo *fi = NULL;
161 de_ucstring *fn = NULL;
163 de_dbg(c, "binary plist at %d", (int)blobpos);
164 de_dbg_indent(c, 1);
165 fn = ucstring_create(c);
166 if(c->filenames_from_file) {
167 ucstring_append_ucstring(fn, ri->filename);
168 ucstring_append_sz(fn, ".", DE_ENCODING_LATIN1);
170 ucstring_printf(fn, DE_ENCODING_LATIN1, "%s.plist", ri->rtype.id_sanitized_sz);
171 fi = de_finfo_create(c);
172 de_finfo_set_name_from_ucstring(c, fi, fn, 0);
173 fmtutil_handle_plist(c, c->infile, blobpos, len, fi, 0);
174 ucstring_destroy(fn);
175 de_finfo_destroy(c, fi);
176 de_dbg_indent(c, -1);
178 else {
179 de_dbg_hexdump(c, c->infile, blobpos, len, 256, NULL, 0x1);
183 static void do_ustr(deark *c, lctx *d, struct record_info *ri)
185 i64 len;
186 de_ucstring *s = NULL;
188 len = de_getu32be(ri->dpos);
189 ri->dlen = 4+len;
190 s = ucstring_create(c);
191 dbuf_read_to_ucstring_n(c->infile, ri->dpos+4, len*2, DE_DBG_MAX_STRLEN*2, s, 0,
192 DE_ENCODING_UTF16BE);
193 de_dbg(c, "value: \"%s\"", ucstring_getpsz_d(s));
194 ucstring_destroy(s);
197 static void do_record_int(deark *c, lctx *d, struct record_info *ri,
198 i64 dpos, i64 dlen)
200 i64 val;
202 val = dbuf_getint_ext(c->infile, dpos, (unsigned int)dlen, 0, 0);
203 de_dbg(c, "value: %"I64_FMT, val);
206 static void do_record_date(deark *c, lctx *d, struct record_info *ri)
208 u64 val1;
209 i64 val2;
210 struct de_timestamp ts;
211 char timestamp_buf[64];
213 val1 = dbuf_getu64be(c->infile, ri->dpos);
214 val2 = (i64)(val1>>16);
215 de_mac_time_to_timestamp(val2, &ts);
216 ts.tzcode = DE_TZCODE_UTC;
217 de_timestamp_to_string(&ts, timestamp_buf, sizeof(timestamp_buf), 0);
218 de_dbg(c, "date: %"U64_FMT" (%s)", val1, timestamp_buf);
221 // Returns 1 if we calculated the bytes_consumed.
222 static int do_record(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
224 i64 nlen;
225 i64 pos = pos1;
226 struct record_info ri;
227 int retval = 0;
229 de_zeromem(&ri, sizeof(struct record_info));
231 nlen = de_getu32be_p(&pos);
232 if(nlen>2048) goto done;
233 ri.filename = ucstring_create(c);
234 dbuf_read_to_ucstring(c->infile, pos, nlen*2, ri.filename, 0, DE_ENCODING_UTF16BE);
235 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(ri.filename));
236 pos += 2*nlen;
238 dbuf_read_fourcc(c->infile, pos, &ri.rtype, 4, 0x0);
239 de_dbg(c, "record type: '%s'", ri.rtype.id_dbgstr);
240 pos += 4;
242 dbuf_read_fourcc(c->infile, pos, &ri.dtype, 4, 0x0);
243 de_dbg(c, "data type: '%s'", ri.dtype.id_dbgstr);
244 pos += 4;
246 ri.dpos = pos;
248 switch(ri.dtype.id) {
249 case CODE_blob:
250 do_blob(c, d, &ri);
251 break;
252 case CODE_bool:
253 ri.dlen = 1;
254 do_record_int(c, d, &ri, ri.dpos, ri.dlen);
255 break;
256 case CODE_comp:
257 ri.dlen = 8;
258 do_record_int(c, d, &ri, ri.dpos, ri.dlen);
259 break;
260 case CODE_dutc:
261 ri.dlen = 8;
262 do_record_date(c, d, &ri);
263 break;
264 case CODE_long:
265 ri.dlen = 4;
266 do_record_int(c, d, &ri, ri.dpos, ri.dlen);
267 break;
268 case CODE_shor:
269 ri.dlen = 4;
270 do_record_int(c, d, &ri, ri.dpos+2, 2);
271 break;
272 case CODE_type:
273 ri.dlen = 4;
274 break;
275 case CODE_ustr:
276 do_ustr(c, d, &ri);
277 break;
278 default:
279 de_warn(c, "Unknown data type '%s'. Remaining records in this node cannot "
280 "be processed.", ri.dtype.id_sanitized_sz);
281 goto done;
283 if(ri.dlen<1) goto done;
284 pos += ri.dlen;
286 retval = 1;
287 done:
288 *bytes_consumed = pos-pos1;
289 ucstring_destroy(ri.filename);
290 return retval;
293 static void do_one_node(deark *c, lctx *d, u32 blk_id)
295 i64 node_offs, node_size;
296 unsigned int mode;
297 i64 count;
298 i64 pos;
299 i64 k;
300 int saved_indent_level;
302 de_dbg_indent_save(c, &saved_indent_level);
304 d->depth++;
305 if(d->depth > DSSTORE_MAX_DEPTH) goto done;
306 if(blk_id >= d->blkcount) goto done;
307 if(d->block_addr_table[blk_id].decoded) goto done;
308 d->block_addr_table[blk_id].decoded = 1;
310 if(!block_id_to_offset_and_size(c, d, blk_id, &node_offs,
311 &node_size))
313 goto done;
316 de_dbg(c, "node: id=%u, offs=%d, len=%d", (unsigned int)blk_id,
317 (int)node_offs, (int)node_size);
318 de_dbg_indent(c, 1);
319 pos = node_offs;
320 mode = (unsigned int)de_getu32be_p(&pos);
321 de_dbg(c, "mode: %u", mode);
322 count = de_getu32be_p(&pos);
323 de_dbg(c, "count: %d", (int)count);
326 // If 'mode' is 0, there are 'count' records here.
327 i64 bytes_consumed = 0;
329 if(mode!=0) {
330 u32 next_blk_id;
331 next_blk_id = (u32)de_getu32be_p(&pos);
332 de_dbg(c, "next block id: %u", (unsigned int)next_blk_id);
333 do_one_node(c, d, next_blk_id);
336 for(k=0; k<count; k++) {
337 de_dbg(c, "record[%d] at %d (for node@%d)", (int)k, (int)pos, (int)node_offs);
338 de_dbg_indent(c, 1);
339 if(!do_record(c, d, pos, &bytes_consumed)) break;
340 de_dbg_indent(c, -1);
341 pos += bytes_consumed;
345 done:
346 de_dbg_indent_restore(c, saved_indent_level);
347 d->depth--;
350 static int do_dsdb(deark *c, lctx *d)
352 i64 dsdb_offs, dsdb_size;
353 i64 pos;
354 i64 n;
355 int retval = 0;
357 if(!d->found_dsdb) goto done;
358 if(!block_id_to_offset_and_size(c, d, d->dsdb_block_id, &dsdb_offs,
359 &dsdb_size))
361 goto done;
364 de_dbg(c, "DSDB block: id=%u, offs=%d, len=%d", (unsigned int)d->dsdb_block_id,
365 (int)dsdb_offs, (int)dsdb_size);
367 de_dbg_indent(c, 1);
368 pos = dsdb_offs;
369 d->root_node_block_id = (u32)de_getu32be_p(&pos);
370 de_dbg(c, "root node block id: %u", (unsigned int)d->root_node_block_id);
372 n = de_getu32be_p(&pos);
373 de_dbg(c, "num levels: %d", (int)n);
374 n = de_getu32be_p(&pos);
375 de_dbg(c, "num records in tree: %d", (int)n);
376 n = de_getu32be_p(&pos);
377 de_dbg(c, "num blocks in tree: %d", (int)n);
378 de_dbg_indent(c, -1);
380 retval = 1;
381 done:
382 return retval;
385 static void de_run_dsstore(deark *c, de_module_params *mparams)
387 lctx *d = NULL;
388 i64 pos;
390 d = de_malloc(c, sizeof(lctx));
392 pos = 8;
393 d->infoblk_offs = de_getu32be_p(&pos);
394 de_dbg(c, "info block offset: (%d+)%"I64_FMT, HDRSIZE, d->infoblk_offs);
395 d->infoblk_offs += HDRSIZE;
396 d->infoblk_size = de_getu32be_p(&pos);
397 de_dbg(c, "info block size: %"I64_FMT, d->infoblk_size);
399 do_info_block(c, d);
401 if(!d->found_dsdb) {
402 de_err(c, "DSDB block not found. This file is probably corrupted, or "
403 "an unsupported version.");
404 goto done;
406 if(!do_dsdb(c, d)) goto done;
408 do_one_node(c, d, d->root_node_block_id);
410 done:
411 if(d) {
412 de_free(c, d->block_addr_table);
413 de_free(c, d);
417 static int de_identify_dsstore(deark *c)
419 if(!dbuf_memcmp(c->infile, 0, "\x00\x00\x00\x01" "Bud1", 8))
420 return 100;
421 return 0;
424 static void de_help_dsstore(deark *c)
426 de_msg(c, "-opt extractplist : Write plist records to files");
429 void de_module_dsstore(deark *c, struct deark_module_info *mi)
431 mi->id = "dsstore";
432 mi->desc = "Mac Finder .DS_Store format";
433 mi->run_fn = de_run_dsstore;
434 mi->identify_fn = de_identify_dsstore;
435 mi->help_fn = de_help_dsstore;