bmp: Rewrote the RLE decompressor
[deark.git] / modules / plist.c
blobbe8bb73be6c70061d8207b3776729f1784fa4d05
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // Binary PLIST (property list format used mainly by Apple)
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_plist);
11 #define MAX_PLIST_NESTING_LEVEL 10
12 #define MAX_PLIST_OBJECTS 1000000
14 struct objref_struct {
15 u32 offs;
18 typedef struct localctx_struct {
19 int nesting_level;
20 int exceeded_max_objects;
21 i64 object_count; // Number of objects we've decoded so far
23 unsigned int nbytes_per_objref_table_entry;
24 unsigned int nbytes_per_object_refnum;
25 i64 top_object_refnum;
26 i64 objref_table_start;
28 // objref_table maps object refnums to file offsets.
29 // It has .num_objrefs elements.
30 i64 num_objrefs;
31 struct objref_struct *objref_table;
32 } lctx;
34 static int do_header(deark *c, lctx *d, i64 pos)
36 int retval = 0;
38 de_dbg(c, "header at %d", (int)pos);
40 if(dbuf_memcmp(c->infile, pos, "bplist", 6)) {
41 de_err(c, "Not in binary PLIST format");
42 goto done;
44 if(dbuf_memcmp(c->infile, pos+6, "00", 2)) {
45 // TODO: Support other versions?
46 de_err(c, "Unsupported binary PLIST version");
47 goto done;
50 retval = 1;
51 done:
52 return retval;
55 static int do_trailer(deark *c, lctx *d, i64 pos1)
57 int retval = 0;
58 i64 pos = pos1;
60 de_dbg(c, "trailer at %d", (int)pos);
61 de_dbg_indent(c, 1);
62 pos += 5; // unused
63 pos++; // sort version
65 d->nbytes_per_objref_table_entry = (unsigned int)de_getbyte_p(&pos);
66 de_dbg(c, "bytes per objref table entry: %u", d->nbytes_per_objref_table_entry);
68 d->nbytes_per_object_refnum = (unsigned int)de_getbyte_p(&pos);
69 de_dbg(c, "bytes per object refnum: %u", d->nbytes_per_object_refnum);
71 d->num_objrefs = de_geti64be(pos);
72 de_dbg(c, "num objrefs: %d", (int)d->num_objrefs);
73 pos += 8;
75 d->top_object_refnum = de_geti64be(pos);
76 de_dbg(c, "root object refnum: %"I64_FMT, d->top_object_refnum);
77 pos += 8;
79 d->objref_table_start = de_geti64be(pos);
80 de_dbg(c, "objref table start: %"I64_FMT, d->objref_table_start);
81 dbuf_constrain_offset(c->infile, &d->objref_table_start);
82 pos += 8;
84 if(d->nbytes_per_objref_table_entry<1 || d->nbytes_per_objref_table_entry>8 ||
85 d->nbytes_per_object_refnum<1 || d->nbytes_per_object_refnum>8)
87 de_err(c, "Bad or unsupported PLIST format");
88 goto done;
91 if(d->num_objrefs<0 || d->num_objrefs>MAX_PLIST_OBJECTS) {
92 de_err(c, "Too many PLIST objects (%"I64_FMT")", d->num_objrefs);
93 goto done;
96 retval = 1;
97 done:
98 de_dbg_indent(c, -1);
99 return retval;
102 static int do_one_object_by_refnum(deark *c, lctx *d, i64 refnum);
104 static void report_nesting_level_exceeded(deark *c, lctx *d)
106 de_err(c, "Maximum nesting level exceeded");
109 static void do_object_array_or_set(deark *c, lctx *d, const char *tn,
110 i64 objpos, i64 pos1, i64 numitems)
112 i64 k;
113 i64 pos = pos1;
114 int saved_indent_level;
116 de_dbg_indent_save(c, &saved_indent_level);
117 d->nesting_level++;
118 if(d->nesting_level>MAX_PLIST_NESTING_LEVEL) {
119 report_nesting_level_exceeded(c, d);
120 goto done;
123 for(k=0; k<numitems; k++) {
124 i64 refnum;
126 if(d->exceeded_max_objects) goto done;
127 de_dbg(c, "item[%d] (for %s@%"I64_FMT")", (int)k, tn, objpos);
128 de_dbg_indent(c, 1);
130 refnum = dbuf_getint_ext(c->infile, pos, d->nbytes_per_object_refnum, 0, 0);
131 pos += (i64)d->nbytes_per_object_refnum;
133 de_dbg(c, "refnum: %u", (unsigned int)refnum);
134 if(!do_one_object_by_refnum(c, d, refnum)) goto done;
136 de_dbg_indent(c, -1);
139 done:
140 d->nesting_level--;
141 de_dbg_indent_restore(c, saved_indent_level);
144 static void do_object_dict(deark *c, lctx *d, i64 objpos, i64 pos1,
145 i64 dictsize)
147 i64 k;
148 int saved_indent_level;
150 de_dbg_indent_save(c, &saved_indent_level);
151 d->nesting_level++;
152 if(d->nesting_level>MAX_PLIST_NESTING_LEVEL) {
153 report_nesting_level_exceeded(c, d);
154 goto done;
157 for(k=0; k<dictsize; k++) {
158 i64 keyrefnum;
159 i64 valrefnum;
161 if(d->exceeded_max_objects) goto done;
162 de_dbg(c, "entry[%d] (for dict@%"I64_FMT")", (int)k, objpos);
163 de_dbg_indent(c, 1);
165 keyrefnum = dbuf_getint_ext(c->infile, pos1+k*(i64)d->nbytes_per_object_refnum,
166 d->nbytes_per_object_refnum, 0, 0);
167 de_dbg(c, "key objrefnum: %u", (unsigned int)keyrefnum);
168 de_dbg_indent(c, 1);
169 if(!do_one_object_by_refnum(c, d, keyrefnum)) goto done;
170 de_dbg_indent(c, -1);
172 valrefnum = dbuf_getint_ext(c->infile, pos1+(dictsize+k)*(i64)d->nbytes_per_object_refnum,
173 d->nbytes_per_object_refnum, 0, 0);
174 de_dbg(c, "val objrefnum: %u", (unsigned int)valrefnum);
175 de_dbg_indent(c, 1);
176 if(!do_one_object_by_refnum(c, d, valrefnum)) goto done;
177 de_dbg_indent(c, -1);
179 de_dbg_indent(c, -1);
182 done:
183 d->nesting_level--;
184 de_dbg_indent_restore(c, saved_indent_level);
187 // "ASCII" string
188 static void do_object_string(deark *c, lctx *d, i64 pos, i64 len)
190 de_ucstring *s = NULL;
192 s = ucstring_create(c);
193 dbuf_read_to_ucstring_n(c->infile, pos, len, DE_DBG_MAX_STRLEN, s, 0, DE_ENCODING_ASCII);
194 de_dbg(c, "value: \"%s\"", ucstring_getpsz_d(s));
195 ucstring_destroy(s);
198 static void do_object_utf16string(deark *c, lctx *d, i64 pos, i64 len)
200 de_ucstring *s = NULL;
202 s = ucstring_create(c);
203 dbuf_read_to_ucstring_n(c->infile, pos, len*2, DE_DBG_MAX_STRLEN*2, s, 0, DE_ENCODING_UTF16BE);
204 de_dbg(c, "value: \"%s\"", ucstring_getpsz_d(s));
205 ucstring_destroy(s);
208 static void do_object_real(deark *c, lctx *d, i64 pos, i64 dlen_raw)
210 double val;
212 if(dlen_raw==2) {
213 val = dbuf_getfloat32x(c->infile, pos, 0);
215 else if(dlen_raw==3) {
216 val = dbuf_getfloat64x(c->infile, pos, 0);
218 else {
219 return;
222 de_dbg(c, "value: %f", val);
225 static void do_object_int(deark *c, lctx *d, i64 pos, i64 dlen_raw)
227 unsigned int nbytes;
228 i64 n;
230 if(dlen_raw<0 || dlen_raw>3) return;
231 nbytes = 1U<<(unsigned int)dlen_raw;
232 n = dbuf_getint_ext(c->infile, pos, nbytes, 0, 1);
233 de_dbg(c, "value: %"I64_FMT, n);
236 static void do_object_date(deark *c, lctx *d, i64 pos)
238 double val_flt;
239 i64 val_int;
240 struct de_timestamp ts;
241 char timestamp_buf[64];
243 val_flt = dbuf_getfloat64x(c->infile, pos, 0);
244 val_int = (i64)val_flt;
245 // Epoch is Jan 1, 2001. There are 31 years, with 8 leap days, between
246 // that and the Unix time epoch.
247 de_unix_time_to_timestamp(val_int + ((365*31 + 8)*86400), &ts, 0x1);
248 de_timestamp_to_string(&ts, timestamp_buf, sizeof(timestamp_buf), 0);
249 de_dbg(c, "value: %f (%s)", val_flt, timestamp_buf);
252 // Returns 0 if we should stop processing the file
253 static int do_one_object_by_offset(deark *c, lctx *d, i64 pos1)
255 i64 pos = pos1;
256 u8 marker;
257 u8 m1, m2;
258 int has_size;
259 i64 dlen_raw;
260 const char *tn;
262 // In this format, it is easy for an aggregate object to contain itself, or an
263 // ancestor, making the number of objects infinite. We could detect this if we
264 // wanted, but doesn't really solve the problem. Even without recursion, the
265 // number of objects can grow exponentially, and a small file could easily
266 // contain trillions of objects. Instead, we'll enforce an arbitrary limit to
267 // the number of objects we decode.
268 // TODO: If we were to decode each aggregate object only once, would that be a
269 // good solution, or would it make the output less useful?
270 if(d->exceeded_max_objects) return 0;
271 if(d->object_count>=MAX_PLIST_OBJECTS) {
272 d->exceeded_max_objects = 1;
273 de_err(c, "Too many objects encountered (max=%d)", MAX_PLIST_OBJECTS);
274 return 0;
277 de_dbg(c, "object at %"I64_FMT, pos);
278 de_dbg_indent(c, 1);
280 d->object_count++;
282 if(pos<8 || pos>=c->infile->len-32) goto done;
284 marker = de_getbyte_p(&pos);
285 de_dbg(c, "marker: 0x%02x", (unsigned int)marker);
287 m1 = (marker&0xf0)>>4;
288 m2 = marker&0x0f;
290 tn = "?";
291 has_size = 0;
292 dlen_raw = 0;
294 switch(m1) {
295 case 0x0:
296 if(m2==0x8 || m2==0x9) {
297 tn = "bool";
299 break;
300 case 0x1:
301 tn = "int";
302 has_size = 1;
303 break;
304 case 0x2:
305 tn = "real";
306 has_size = 1;
307 break;
308 case 0x3:
309 if(m2==0x3) {
310 tn = "date";
312 break;
313 case 0x4:
314 tn = "binary data";
315 has_size = 1;
316 break;
317 case 0x5:
318 tn = "string";
319 has_size = 1;
320 break;
321 case 0x6:
322 tn = "UTF-16 string";
323 has_size = 1;
324 break;
325 case 0x8: // TODO
326 tn = "uid";
327 has_size = 1;
328 break;
329 case 0xa:
330 tn = "array";
331 has_size = 1;
332 break;
333 case 0xc:
334 tn = "set";
335 has_size = 1;
336 break;
337 case 0xd:
338 tn = "dict";
339 has_size = 1;
340 break;
343 de_dbg(c, "data type: %s", tn);
345 if(has_size) {
346 if(m2==0xf) {
347 u8 x;
348 unsigned int nbytes_in_len;
349 x = de_getbyte_p(&pos);
350 // 0x10 = size is a 1-byte int
351 // 0x11 = 2-byte int, 0x12 = 4-byte int, 0x13 = 8-byte int
352 if(x<0x10 || x>0x13) goto done;
353 nbytes_in_len = 1U<<(unsigned int)(x-0x10);
354 dlen_raw = dbuf_getint_ext(c->infile, pos, nbytes_in_len, 0, 0);
355 dbuf_constrain_length(c->infile, 0, &dlen_raw);
356 pos += (i64)nbytes_in_len;
358 else {
359 dlen_raw = (i64)m2;
361 de_dbg(c, "size (logical): %"I64_FMT, dlen_raw);
364 if(m1==0x0 && (m2==0x8 || m2==0x9)) {
365 de_dbg(c, "value: %s", (m2==0x8)?"false":"true");
367 else if(m1==0x1) {
368 do_object_int(c, d, pos, dlen_raw);
370 else if(m1==0x2) {
371 do_object_real(c, d, pos, dlen_raw);
373 else if(m1==0x3 && m2==0x3) {
374 do_object_date(c, d, pos);
376 else if(m1==0x4) {
377 de_dbg(c, "binary data at %"I64_FMT", len=%"I64_FMT, pos, dlen_raw);
378 de_dbg_indent(c, 1);
379 de_dbg_hexdump(c, c->infile, pos, dlen_raw, 256, NULL, 0x1);
380 de_dbg_indent(c, -1);
382 else if(m1==0x5) {
383 do_object_string(c, d, pos, dlen_raw);
385 else if(m1==0x6) {
386 do_object_utf16string(c, d, pos, dlen_raw);
388 else if(m1==0xa) {
389 do_object_array_or_set(c, d, tn, pos1, pos, dlen_raw);
391 else if(m1==0xc) {
392 do_object_array_or_set(c, d, tn, pos1, pos, dlen_raw);
394 else if(m1==0xd) {
395 do_object_dict(c, d, pos1, pos, dlen_raw);
397 else {
398 de_dbg(c, "[don't know how to decode this data type]");
401 done:
402 de_dbg_indent(c, -1);
403 return 1;
406 // Returns 0 if we should stop processing the file
407 static int do_one_object_by_refnum(deark *c, lctx *d, i64 refnum)
409 if(refnum<0 || refnum>=d->num_objrefs) return 1;
410 return do_one_object_by_offset(c, d, d->objref_table[refnum].offs);
413 static void read_offset_table(deark *c, lctx *d)
415 i64 k;
416 i64 pos = d->objref_table_start;
418 de_dbg(c, "objref table at %"I64_FMT, pos);
419 de_dbg_indent(c, 1);
421 d->objref_table = de_mallocarray(c, d->num_objrefs, sizeof(struct objref_struct));
423 for(k=0; k<d->num_objrefs; k++) {
424 i64 offs;
426 if(pos+(i64)d->nbytes_per_objref_table_entry > c->infile->len-32) break;
427 offs = dbuf_getint_ext(c->infile, pos, d->nbytes_per_objref_table_entry, 0, 0);
428 if(c->debug_level>=2)
429 de_dbg(c, "objref[%"I64_FMT"] offset: %"I64_FMT, k, offs);
430 dbuf_constrain_offset(c->infile, &offs);
431 d->objref_table[k].offs = (u32)offs;
432 pos += (i64)d->nbytes_per_objref_table_entry;
435 de_dbg_indent(c, -1);
438 static void de_run_plist(deark *c, de_module_params *mparams)
440 lctx *d = NULL;
442 d = de_malloc(c, sizeof(lctx));
444 if(c->infile->len>0xffffffffU) {
445 // We *could* support huge PLIST files, but until I learn that they
446 // are valid, for efficiency I'll make sure an offset can fit in
447 // 4 bytes.
448 de_err(c, "PLIST too large (%"I64_FMT")", c->infile->len);
449 goto done;
452 if(!do_header(c, d, 0)) goto done;
454 if(!do_trailer(c, d, c->infile->len-32)) goto done;
455 read_offset_table(c, d);
457 do_one_object_by_refnum(c, d, d->top_object_refnum);
459 done:
460 if(d) {
461 de_free(c, d->objref_table);
462 de_free(c, d);
466 static int de_identify_plist(deark *c)
468 if(!dbuf_memcmp(c->infile, 0, "bplist00", 8))
469 return 100;
470 return 0;
473 void de_module_plist(deark *c, struct deark_module_info *mi)
475 mi->id = "plist";
476 mi->desc = ".plist property list, binary format";
477 mi->run_fn = de_run_plist;
478 mi->identify_fn = de_identify_plist;