zip: Small change to attributes decompression
[deark.git] / src / fmtutil.c
blobab728413141eb6647b4604775d071496d8c2a924
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // This file is for format-specific functions that are used by multiple modules.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 void fmtutil_get_bmp_compression_name(u32 code, char *s, size_t s_len,
13 int is_os2v2)
15 const char *name1 = "?";
16 switch(code) {
17 case 0: name1 = "BI_RGB, uncompressed"; break;
18 case 1: name1 = "BI_RLE8"; break;
19 case 2: name1 = "BI_RLE4"; break;
20 case 3:
21 if(is_os2v2)
22 name1 = "Huffman 1D";
23 else
24 name1 = "BI_BITFIELDS, uncompressed";
25 break;
26 case 4:
27 if(is_os2v2)
28 name1 = "RLE24";
29 else
30 name1 = "BI_JPEG";
31 break;
32 case 5: name1 = "BI_PNG"; break;
34 de_strlcpy(s, name1, s_len);
37 // Gathers information about a DIB.
38 // If DE_BMPINFO_HAS_FILEHEADER flag is set, pos points to the BITMAPFILEHEADER.
39 // Otherwise, it points to the BITMAPINFOHEADER.
40 // Caller allocates bi.
41 // Returns 0 if BMP is invalid.
42 int fmtutil_get_bmpinfo(deark *c, dbuf *f, struct de_bmpinfo *bi, i64 pos,
43 i64 len, unsigned int flags)
45 i64 fhs; // file header size
46 i64 bmih_pos;
47 struct de_fourcc cmpr4cc;
48 char cmprname_dbgstr[80];
50 de_zeromem(bi, sizeof(struct de_bmpinfo));
51 de_zeromem(&cmpr4cc, sizeof(struct de_fourcc));
53 fhs = (flags & DE_BMPINFO_HAS_FILEHEADER) ? 14 : 0;
55 if(fhs+len < 16) return 0;
57 if(fhs) {
58 if(flags & DE_BMPINFO_HAS_HOTSPOT) {
59 bi->hotspot_x = (int)dbuf_getu16le(f, pos+6);
60 bi->hotspot_y = (int)dbuf_getu16le(f, pos+8);
61 de_dbg(c, "hotspot: (%d,%d)", bi->hotspot_x, bi->hotspot_y);
64 bi->bitsoffset = dbuf_getu32le(f, pos+10);
65 de_dbg(c, "bits offset: %d", (int)bi->bitsoffset);
68 bmih_pos = pos + fhs;
70 bi->infohdrsize = dbuf_getu32le(f, bmih_pos);
72 if(bi->infohdrsize==0x474e5089 && (flags & DE_BMPINFO_ICO_FORMAT)) {
73 // We don't examine PNG-formatted icons, but we can identify them.
74 bi->infohdrsize = 0;
75 bi->file_format = DE_BMPINFO_FMT_PNG;
76 return 1;
79 de_dbg(c, "info header size: %d", (int)bi->infohdrsize);
81 if(bi->infohdrsize==12) {
82 bi->bytes_per_pal_entry = 3;
83 bi->width = dbuf_getu16le(f, bmih_pos+4);
84 bi->height = dbuf_getu16le(f, bmih_pos+6);
85 bi->bitcount = dbuf_getu16le(f, bmih_pos+10);
87 else if(bi->infohdrsize>=16 && bi->infohdrsize<=124) {
88 bi->bytes_per_pal_entry = 4;
89 bi->width = dbuf_getu32le(f, bmih_pos+4);
90 bi->height = dbuf_geti32le(f, bmih_pos+8);
91 if(bi->height<0) {
92 bi->is_topdown = 1;
93 bi->height = -bi->height;
95 bi->bitcount = dbuf_getu16le(f, bmih_pos+14);
96 if(bi->infohdrsize>=20) {
97 bi->compression_field = (u32)dbuf_getu32le(f, bmih_pos+16);
98 if(flags & DE_BMPINFO_CMPR_IS_4CC) {
99 dbuf_read_fourcc(f, bmih_pos+16, &cmpr4cc, 4, 0x0);
102 if(bi->infohdrsize>=24) {
103 bi->sizeImage_field = dbuf_getu32le(f, bmih_pos+20);
105 if(bi->infohdrsize>=36) {
106 bi->pal_entries = dbuf_getu32le(f, bmih_pos+32);
109 else {
110 return 0;
113 if(flags & DE_BMPINFO_ICO_FORMAT) bi->height /= 2;
115 if(bi->bitcount>=1 && bi->bitcount<=8) {
116 if(bi->pal_entries==0) {
117 bi->pal_entries = de_pow2(bi->bitcount);
119 // I think the NumColors field (in icons) is supposed to be the maximum number of
120 // colors implied by the bit depth, not the number of colors in the palette.
121 bi->num_colors = de_pow2(bi->bitcount);
123 else {
124 // An arbitrary value. All that matters is that it's >=256.
125 bi->num_colors = 16777216;
128 de_dbg_dimensions(c, bi->width, bi->height);
129 de_dbg(c, "bit count: %d", (int)bi->bitcount);
131 if((flags & DE_BMPINFO_CMPR_IS_4CC) && (bi->compression_field>0xffff)) {
132 de_snprintf(cmprname_dbgstr, sizeof(cmprname_dbgstr), "'%s'", cmpr4cc.id_dbgstr);
134 else {
135 fmtutil_get_bmp_compression_name(bi->compression_field,
136 cmprname_dbgstr, sizeof(cmprname_dbgstr), 0);
138 de_dbg(c, "compression: %u (%s)", (unsigned int)bi->compression_field, cmprname_dbgstr);
140 if(bi->sizeImage_field!=0) {
141 de_dbg(c, "sizeImage: %u", (unsigned int)bi->sizeImage_field);
144 de_dbg(c, "palette entries: %u", (unsigned int)bi->pal_entries);
145 if(bi->pal_entries>256 && bi->bitcount>8) {
146 de_warn(c, "Ignoring bad palette size (%u entries)", (unsigned int)bi->pal_entries);
147 bi->pal_entries = 0;
150 bi->pal_bytes = bi->bytes_per_pal_entry*bi->pal_entries;
151 bi->size_of_headers_and_pal = fhs + bi->infohdrsize + bi->pal_bytes;
153 // FIXME: cmpr type 3 doesn't always mean BITFIELDS
154 if(bi->compression_field==3) {
155 bi->size_of_headers_and_pal += 12; // BITFIELDS
158 bi->is_compressed = !((bi->compression_field==0) ||
159 (bi->compression_field==3 && bi->bitcount>1));
161 if(!de_good_image_dimensions(c, bi->width, bi->height)) {
162 return 0;
165 // TODO: This needs work, to decide how to handle compressed images.
166 // TODO: What about BI_BITFIELDS images?
167 if(bi->compression_field==0) {
168 // Try to figure out the true size of the resource, minus any padding.
170 bi->rowspan = ((bi->bitcount*bi->width +31)/32)*4;
171 bi->foreground_size = bi->rowspan * bi->height;
172 de_dbg(c, "foreground size: %d", (int)bi->foreground_size);
174 if(flags & DE_BMPINFO_ICO_FORMAT) {
175 bi->mask_rowspan = ((bi->width +31)/32)*4;
176 bi->mask_size = bi->mask_rowspan * bi->height;
177 de_dbg(c, "mask size: %d", (int)bi->mask_size);
179 else {
180 bi->mask_size = 0;
183 bi->total_size = bi->size_of_headers_and_pal + bi->foreground_size + bi->mask_size;
185 else {
186 // Don't try to figure out the true size of compressed or other unusual images.
187 bi->total_size = len;
190 return 1;
193 // TODO: Document and review whether the bi->total_size and
194 // bi->size_of_headers_and_pal fields include the 14-byte fileheader.
195 void fmtutil_generate_bmpfileheader(deark *c, dbuf *outf, const struct de_bmpinfo *bi,
196 i64 file_size_override)
198 i64 file_size_to_write;
200 dbuf_write(outf, (const u8*)"BM", 2);
202 if(file_size_override)
203 file_size_to_write = file_size_override;
204 else
205 file_size_to_write = 14 + bi->total_size;
206 dbuf_writeu32le(outf, file_size_to_write);
208 dbuf_write_zeroes(outf, 4);
209 dbuf_writeu32le(outf, 14 + bi->size_of_headers_and_pal);
212 // Extracts Exif if extract_level>=2, or "extractexif" option is set.
213 // Otherwise decodes.
214 void fmtutil_handle_exif2(deark *c, i64 pos, i64 len,
215 u32 *returned_flags, u32 *orientation, u32 *exifversion)
217 int user_opt;
218 de_module_params *mparams = NULL;
220 if(returned_flags) {
221 *returned_flags = 0;
224 user_opt = de_get_ext_option_bool(c, "extractexif", -1);
225 if(user_opt==1 || (c->extract_level>=2 && user_opt!=0)) {
226 // Writing raw Exif data isn't very useful, but do so if requested.
227 dbuf_create_file_from_slice(c->infile, pos, len, "exif.tif", NULL, DE_CREATEFLAG_IS_AUX);
229 // Caller will have to reprocess the Exif file to extract anything from it.
230 return;
233 mparams = de_malloc(c, sizeof(de_module_params));
234 mparams->in_params.codes = "E";
236 de_run_module_by_id_on_slice(c, "tiff", mparams, c->infile, pos, len);
237 if(returned_flags) {
238 // FIXME: It's an unfortunate bug that returned_flags does not work if
239 // extract_level>=2, but for now there's no reasonable way to fix it.
240 // We have to process -- not extract -- the Exif chunk if we want to
241 // know what's in it.
242 *returned_flags = mparams->out_params.flags;
243 if((mparams->out_params.flags & 0x20) && orientation) {
244 *orientation = mparams->out_params.uint1;
247 if((mparams->out_params.flags & 0x40) && exifversion) {
248 *exifversion = mparams->out_params.uint2;
252 de_free(c, mparams);
255 void fmtutil_handle_exif(deark *c, i64 pos, i64 len)
257 fmtutil_handle_exif2(c, pos, len, NULL, NULL, NULL);
260 static void wrap_in_tiff(deark *c, dbuf *f, i64 dpos, i64 dlen,
261 const char *swstring, unsigned int tag, const char *ext, unsigned int createflags);
263 // Either extract IPTC-IIM data to a file, or drill down into it.
264 // flags:
265 // 0 = default behavior (currently: depends on c->extract_level and options)
266 // 2 = this came from our TIFF-encapsulated format
267 void fmtutil_handle_iptc(deark *c, dbuf *f, i64 pos, i64 len,
268 unsigned int flags)
270 int should_decode;
271 int should_extract;
272 int user_opt;
273 int extract_fmt = 1; // 0=raw, 1=TIFF-wrapped
275 if(len<1) return;
277 user_opt = de_get_ext_option_bool(c, "extractiptc", -1);
279 if(user_opt==1 || (c->extract_level>=2 && user_opt!=0)) {
280 should_decode = 0;
281 should_extract = 1;
282 if(flags&0x2) {
283 // Avoid "extracting" in a way that would just recreate the exact same file.
284 extract_fmt = 0;
287 else {
288 should_decode = 1;
289 should_extract = 0;
292 if(should_decode) {
293 de_run_module_by_id_on_slice(c, "iptc", NULL, f, pos, len);
296 if(should_extract && extract_fmt==0) {
297 dbuf_create_file_from_slice(f, pos, len, "iptc", NULL, DE_CREATEFLAG_IS_AUX);
299 else if(should_extract && extract_fmt==1) {
300 wrap_in_tiff(c, f, pos, len, "Deark extracted IPTC", 33723, "iptctiff",
301 DE_CREATEFLAG_IS_AUX);
305 // If oparams is not NULL, if must be initialized by the caller. If the data is
306 // decoded, oparams will be used by the submodule, and values may be returned in
307 // it.
308 // flags:
309 // 0 = default behavior (currently: always decode)
310 // 1 = always write to file
311 // 2 = this came from our TIFF-encapsulated format
312 void fmtutil_handle_photoshop_rsrc2(deark *c, dbuf *f, i64 pos, i64 len,
313 unsigned int flags, struct de_module_out_params *oparams)
315 int should_decode;
316 int should_extract;
317 int extract_fmt = 1; // 0=raw, 1=TIFF-wrapped
319 if(flags&0x1) {
320 should_decode = 0;
321 should_extract = 1;
323 else if(de_get_ext_option_bool(c, "extract8bim", 0)) {
324 should_extract = 1;
325 should_decode = 0;
326 if(flags&0x2) {
327 // Avoid "extracting" in a way that would just recreate the exact same file.
328 extract_fmt = 0;
331 else {
332 should_decode = 1;
333 should_extract = 0;
336 if(should_decode) {
337 de_module_params *mparams = NULL;
339 mparams = de_malloc(c, sizeof(de_module_params));
340 mparams->in_params.codes = "R";
341 if(oparams) {
342 // Since mparams->out_params is an embedded struct, not a pointer,
343 // we have to copy oparam's fields to and from it.
344 mparams->out_params = *oparams; // struct copy
346 de_run_module_by_id_on_slice(c, "psd", mparams, f, pos, len);
347 if(oparams) {
348 *oparams = mparams->out_params; // struct copy
350 de_free(c, mparams);
353 if(should_extract && extract_fmt==0) {
354 dbuf_create_file_from_slice(f, pos, len, "8bim", NULL, DE_CREATEFLAG_IS_AUX);
356 else if(should_extract && extract_fmt==1) {
357 wrap_in_tiff(c, f, pos, len, "Deark extracted 8BIM", 34377, "8bimtiff",
358 DE_CREATEFLAG_IS_AUX);
362 void fmtutil_handle_photoshop_rsrc(deark *c, dbuf *f, i64 pos, i64 len,
363 unsigned int flags)
365 fmtutil_handle_photoshop_rsrc2(c, f, pos, len, flags, NULL);
368 // flags:
369 // 0 = default behavior (currently: decode unless -opt extractplist was used)
370 void fmtutil_handle_plist(deark *c, dbuf *f, i64 pos, i64 len,
371 de_finfo *fi, unsigned int flags)
373 if(de_get_ext_option_bool(c, "extractplist", 0)) {
374 dbuf_create_file_from_slice(f, pos, len,
375 fi?NULL:"plist", fi, DE_CREATEFLAG_IS_AUX);
376 return;
379 de_run_module_by_id_on_slice(c, "plist", NULL, f, pos, len);
382 // Caller allocates sdd. It does not need to be initialized.
383 // flags: 0x1 = Print a debug message if signature is found.
384 int fmtutil_detect_SAUCE(deark *c, dbuf *f, struct de_SAUCE_detection_data *sdd,
385 unsigned int flags)
387 de_zeromem(sdd, sizeof(struct de_SAUCE_detection_data));
388 if(f->len<128) return 0;
389 if(dbuf_memcmp(f, f->len-128, "SAUCE00", 7)) return 0;
390 if(flags & 0x1) {
391 de_dbg(c, "SAUCE metadata, signature at %"I64_FMT, f->len-128);
393 sdd->has_SAUCE = 1;
394 sdd->data_type = dbuf_getbyte(f, f->len-128+94);
395 sdd->file_type = dbuf_getbyte(f, f->len-128+95);
396 return (int)sdd->has_SAUCE;
399 void fmtutil_handle_SAUCE(deark *c, dbuf *f, struct de_SAUCE_info *si)
401 de_module_params mparams;
403 de_zeromem(&mparams, sizeof(de_module_params));
404 mparams.out_params.obj1 = (void*)si;
405 de_run_module_by_id_on_slice(c, "sauce", &mparams, f, 0, f->len);
408 struct de_SAUCE_info *fmtutil_create_SAUCE(deark *c)
410 return de_malloc(c, sizeof(struct de_SAUCE_info));
413 void fmtutil_free_SAUCE(deark *c, struct de_SAUCE_info *si)
415 if(!si) return;
416 ucstring_destroy(si->title);
417 ucstring_destroy(si->artist);
418 ucstring_destroy(si->organization);
419 ucstring_destroy(si->comment);
420 de_free(c, si);
423 // Helper functions for the "boxes" (or "atoms") format used by MP4, JPEG 2000, etc.
425 double dbuf_fmtutil_read_fixed_16_16(dbuf *f, i64 pos)
427 i64 n;
428 n = dbuf_geti32be(f, pos);
429 return ((double)n)/65536.0;
432 static void do_box_sequence(deark *c, struct de_boxesctx *bctx,
433 i64 pos1, i64 len, i64 max_nboxes, int level);
435 // Make a printable version of a UUID (or a big-endian GUID).
436 // Caller supplies s.
437 void fmtutil_render_uuid(deark *c, const u8 *uuid, char *s, size_t s_len)
439 de_snprintf(s, s_len, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
440 uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7],
441 uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]);
444 // Swap some bytes to convert a (little-endian) GUID to a UUID, in-place
445 void fmtutil_guid_to_uuid(u8 *id)
447 u8 tmp[16];
448 de_memcpy(tmp, id, 16);
449 id[0] = tmp[3]; id[1] = tmp[2]; id[2] = tmp[1]; id[3] = tmp[0];
450 id[4] = tmp[5]; id[5] = tmp[4];
451 id[6] = tmp[7]; id[7] = tmp[6];
454 #define DE_BOX_uuid 0x75756964U
456 static int do_box(deark *c, struct de_boxesctx *bctx, i64 pos, i64 len,
457 int level, i64 *pbytes_consumed)
459 i64 size32, size64;
460 i64 header_len; // Not including UUIDs
461 i64 payload_len; // Including UUIDs
462 i64 total_len;
463 struct de_fourcc box4cc;
464 char uuid_string[50];
465 int ret;
466 int retval = 0;
467 struct de_boxdata *parentbox;
468 struct de_boxdata *curbox;
470 parentbox = bctx->curbox;
471 bctx->curbox = de_malloc(c, sizeof(struct de_boxdata));
472 curbox = bctx->curbox;
473 curbox->parent = parentbox;
475 if(len<8) {
476 de_dbg(c, "(ignoring %d extra bytes at %"I64_FMT")", (int)len, pos);
477 goto done;
480 size32 = dbuf_getu32be(bctx->f, pos);
481 dbuf_read_fourcc(bctx->f, pos+4, &box4cc, 4, 0x0);
482 curbox->boxtype = box4cc.id;
484 if(size32>=8) {
485 header_len = 8;
486 payload_len = size32-8;
488 else if(size32==0) {
489 header_len = 8;
490 payload_len = len-8;
492 else if(size32==1) {
493 if(len<16) {
494 de_dbg(c, "(ignoring %d extra bytes at %"I64_FMT")", (int)len, pos);
495 goto done;
497 header_len = 16;
498 size64 = dbuf_geti64be(bctx->f, pos+8);
499 if(size64<16) goto done;
500 payload_len = size64-16;
502 else {
503 de_err(c, "Invalid or unsupported box format");
504 goto done;
507 total_len = header_len + payload_len;
509 if(curbox->boxtype==DE_BOX_uuid && payload_len>=16) {
510 curbox->is_uuid = 1;
511 dbuf_read(bctx->f, curbox->uuid, pos+header_len, 16);
514 curbox->level = level;
515 curbox->box_pos = pos;
516 curbox->box_len = total_len;
517 curbox->payload_pos = pos+header_len;
518 curbox->payload_len = payload_len;
519 if(curbox->is_uuid) {
520 curbox->payload_pos += 16;
521 curbox->payload_len -= 16;
524 if(bctx->identify_box_fn) {
525 bctx->identify_box_fn(c, bctx);
528 if(c->debug_level>0) {
529 char name_str[80];
531 if(curbox->box_name) {
532 de_snprintf(name_str, sizeof(name_str), " (%s)", curbox->box_name);
534 else {
535 name_str[0] = '\0';
538 if(curbox->is_uuid) {
539 fmtutil_render_uuid(c, curbox->uuid, uuid_string, sizeof(uuid_string));
540 de_dbg(c, "box '%s'{%s}%s at %"I64_FMT", len=%"I64_FMT,
541 box4cc.id_dbgstr, uuid_string, name_str,
542 pos, total_len);
544 else {
545 de_dbg(c, "box '%s'%s at %"I64_FMT", len=%"I64_FMT", dlen=%"I64_FMT,
546 box4cc.id_dbgstr, name_str, pos,
547 total_len, payload_len);
551 if(total_len > len) {
552 de_err(c, "Invalid oversized box, or unexpected end of file "
553 "(box at %"I64_FMT" ends at %"I64_FMT", "
554 "parent ends at %"I64_FMT")",
555 pos, pos+total_len, pos+len);
556 goto done;
559 de_dbg_indent(c, 1);
560 ret = bctx->handle_box_fn(c, bctx);
561 de_dbg_indent(c, -1);
562 if(!ret) goto done;
564 if(curbox->is_superbox) {
565 i64 children_pos, children_len;
566 i64 max_nchildren;
568 de_dbg_indent(c, 1);
569 children_pos = curbox->payload_pos + curbox->extra_bytes_before_children;
570 children_len = curbox->payload_len - curbox->extra_bytes_before_children;
571 max_nchildren = (curbox->num_children_is_known) ? curbox->num_children : -1;
572 do_box_sequence(c, bctx, children_pos, children_len, max_nchildren, level+1);
573 de_dbg_indent(c, -1);
576 *pbytes_consumed = total_len;
577 retval = 1;
579 done:
580 de_free(c, bctx->curbox);
581 bctx->curbox = parentbox; // Restore the curbox pointer
582 return retval;
585 // max_nboxes: -1 = no maximum
586 static void do_box_sequence(deark *c, struct de_boxesctx *bctx,
587 i64 pos1, i64 len, i64 max_nboxes, int level)
589 i64 pos;
590 i64 box_len;
591 i64 endpos;
592 int ret;
593 i64 box_count = 0;
595 if(level >= 32) { // An arbitrary recursion limit.
596 return;
599 pos = pos1;
600 endpos = pos1 + len;
602 while(pos < endpos) {
603 if(max_nboxes>=0 && box_count>=max_nboxes) break;
604 ret = do_box(c, bctx, pos, endpos-pos, level, &box_len);
605 if(!ret) break;
606 box_count++;
607 pos += box_len;
611 // Handle some box types that might be common to multiple formats.
612 // This function should be called as needed by the client's box handler function.
613 // TODO: A way to identify (name) the boxes that we handle here.
614 int fmtutil_default_box_handler(deark *c, struct de_boxesctx *bctx)
616 struct de_boxdata *curbox = bctx->curbox;
618 if(curbox->is_uuid) {
619 if(!de_memcmp(curbox->uuid, "\xb1\x4b\xf8\xbd\x08\x3d\x4b\x43\xa5\xae\x8c\xd7\xd5\xa6\xce\x03", 16)) {
620 de_dbg(c, "GeoTIFF data at %"I64_FMT", len=%"I64_FMT, curbox->payload_pos, curbox->payload_len);
621 dbuf_create_file_from_slice(bctx->f, curbox->payload_pos, curbox->payload_len, "geo.tif", NULL, DE_CREATEFLAG_IS_AUX);
623 else if(!de_memcmp(curbox->uuid, "\xbe\x7a\xcf\xcb\x97\xa9\x42\xe8\x9c\x71\x99\x94\x91\xe3\xaf\xac", 16)) {
624 de_dbg(c, "XMP data at %"I64_FMT", len=%"I64_FMT, curbox->payload_pos, curbox->payload_len);
625 dbuf_create_file_from_slice(bctx->f, curbox->payload_pos, curbox->payload_len, "xmp", NULL, DE_CREATEFLAG_IS_AUX);
627 else if(!de_memcmp(curbox->uuid, "\x2c\x4c\x01\x00\x85\x04\x40\xb9\xa0\x3e\x56\x21\x48\xd6\xdf\xeb", 16)) {
628 de_dbg(c, "Photoshop resources at %"I64_FMT", len=%"I64_FMT, curbox->payload_pos, curbox->payload_len);
629 de_dbg_indent(c, 1);
630 fmtutil_handle_photoshop_rsrc(c, bctx->f, curbox->payload_pos, curbox->payload_len, 0x0);
631 de_dbg_indent(c, -1);
633 else if(!de_memcmp(curbox->uuid, "\x05\x37\xcd\xab\x9d\x0c\x44\x31\xa7\x2a\xfa\x56\x1f\x2a\x11\x3e", 16) ||
634 !de_memcmp(curbox->uuid, "JpgTiffExif->JP2", 16))
636 de_dbg(c, "Exif data at %"I64_FMT", len=%"I64_FMT, curbox->payload_pos, curbox->payload_len);
637 de_dbg_indent(c, 1);
638 fmtutil_handle_exif(c, curbox->payload_pos, curbox->payload_len);
639 de_dbg_indent(c, -1);
642 return 1;
645 void fmtutil_read_boxes_format(deark *c, struct de_boxesctx *bctx)
647 if(!bctx->f || !bctx->handle_box_fn) return; // Internal error
648 if(bctx->curbox) return; // Internal error
649 do_box_sequence(c, bctx, 0, bctx->f->len, -1, 0);
652 static u8 scale_7_to_255(u8 x)
654 return (u8)(0.5+(((double)x)*(255.0/7.0)));
657 static u8 scale_15_to_255(u8 x)
659 return x*17;
662 void fmtutil_read_atari_palette(deark *c, dbuf *f, i64 pos,
663 de_color *dstpal, i64 ncolors_to_read, i64 ncolors_used, unsigned int flags)
665 i64 i;
666 unsigned int n;
667 int pal_bits = 0; // 9, 12, or 15. 0 = not yet determined
668 u8 cr, cg, cb;
669 u8 cr1, cg1, cb1;
670 char cbuf[32];
671 char tmps[64];
672 const char *s;
674 s = de_get_ext_option(c, "atari:palbits");
675 if(s) {
676 pal_bits = de_atoi(s);
679 if(pal_bits==0 && (flags&DE_FLAG_ATARI_15BIT_PAL)) {
680 pal_bits = 15;
683 if(pal_bits==0) {
684 // Pre-scan the palette, and try to guess whether Atari STE-style 12-bit
685 // colors are used, instead of the usual 9-bit colors.
686 // I don't know the best way to do this. Sometimes the 4th bit in each
687 // nibble is used for extra color detail, and sometimes it just seems to
688 // contain garbage. Maybe the logic should also depend on the file
689 // format, or the number of colors.
690 int bit_3_used = 0;
691 int nibble_3_used = 0;
693 for(i=0; i<ncolors_to_read; i++) {
694 n = (unsigned int)dbuf_getu16be(f, pos + i*2);
695 if(n&0xf000) {
696 nibble_3_used = 1;
698 if(n&0x0888) {
699 bit_3_used = 1;
703 if(bit_3_used && !nibble_3_used) {
704 de_dbg(c, "12-bit palette colors detected");
705 pal_bits = 12;
709 if(pal_bits<12) { // Default to 9 if <12
710 pal_bits = 9;
712 else if(pal_bits<15) {
713 pal_bits = 12;
715 else {
716 pal_bits = 15;
719 for(i=0; i<ncolors_to_read; i++) {
720 n = (unsigned int)dbuf_getu16be(f, pos + 2*i);
722 if(pal_bits==15) {
723 cr1 = (u8)((n>>6)&0x1c);
724 if(n&0x0800) cr1+=2;
725 if(n&0x8000) cr1++;
726 cg1 = (u8)((n>>2)&0x1c);
727 if(n&0x0080) cg1+=2;
728 if(n&0x4000) cg1++;
729 cb1 = (u8)((n<<2)&0x1c);
730 if(n&0x0008) cb1+=2;
731 if(n&0x2000) cb1++;
732 cr = de_scale_n_to_255(31, cr1);
733 cg = de_scale_n_to_255(31, cg1);
734 cb = de_scale_n_to_255(31, cb1);
735 de_snprintf(cbuf, sizeof(cbuf), "%2d,%2d,%2d",
736 (int)cr1, (int)cg1, (int)cb1);
738 else if(pal_bits==12) {
739 cr1 = (u8)((n>>7)&0x0e);
740 if(n&0x800) cr1++;
741 cg1 = (u8)((n>>3)&0x0e);
742 if(n&0x080) cg1++;
743 cb1 = (u8)((n<<1)&0x0e);
744 if(n&0x008) cb1++;
745 cr = scale_15_to_255(cr1);
746 cg = scale_15_to_255(cg1);
747 cb = scale_15_to_255(cb1);
748 de_snprintf(cbuf, sizeof(cbuf), "%2d,%2d,%2d",
749 (int)cr1, (int)cg1, (int)cb1);
751 else {
752 cr1 = (u8)((n>>8)&0x07);
753 cg1 = (u8)((n>>4)&0x07);
754 cb1 = (u8)(n&0x07);
755 cr = scale_7_to_255(cr1);
756 cg = scale_7_to_255(cg1);
757 cb = scale_7_to_255(cb1);
758 de_snprintf(cbuf, sizeof(cbuf), "%d,%d,%d",
759 (int)cr1, (int)cg1, (int)cb1);
762 dstpal[i] = DE_MAKE_RGB(cr, cg, cb);
763 de_snprintf(tmps, sizeof(tmps), "0x%04x (%s) "DE_CHAR_RIGHTARROW" ", n, cbuf);
764 de_dbg_pal_entry2(c, i, dstpal[i], tmps, NULL,
765 (i>=ncolors_used)?" [unused]":"");
770 * Given an x-coordinate and a color index, returns the corresponding
771 * Spectrum palette index.
773 * by Steve Belczyk; placed in the public domain December, 1990.
774 * [Adapted for Deark.]
776 static unsigned int spectrum512_FindIndex(i64 x, unsigned int c)
778 i64 x1;
780 x1 = 10 * (i64)c;
782 if (c & 1) /* If c is odd */
783 x1 = x1 - 5;
784 else /* If c is even */
785 x1 = x1 + 1;
787 if (x >= x1 && x < x1+160)
788 c = c + 16;
789 else if (x >= x1+160)
790 c = c + 32;
792 return c;
795 static int decode_atari_image_paletted(deark *c, struct atari_img_decode_data *adata)
797 i64 i, j;
798 i64 plane;
799 i64 rowspan;
800 u8 b;
801 u32 v;
802 i64 planespan;
803 i64 ncolors;
805 planespan = 2*((adata->w+15)/16);
806 rowspan = planespan*adata->bpp;
807 if(adata->ncolors>0)
808 ncolors = adata->ncolors;
809 else
810 ncolors = ((i64)1)<<adata->bpp;
812 for(j=0; j<adata->h; j++) {
813 for(i=0; i<adata->w; i++) {
814 v = 0;
816 for(plane=0; plane<adata->bpp; plane++) {
817 if(adata->was_compressed==0) {
818 // TODO: Simplify this.
819 if(adata->bpp==1) {
820 b = de_get_bits_symbol(adata->unc_pixels, 1, j*rowspan, i);
822 else if(adata->bpp==2) {
823 b = de_get_bits_symbol(adata->unc_pixels, 1,
824 j*rowspan + 2*plane + (i/16)*2, i);
826 else if(adata->bpp==4) {
827 b = de_get_bits_symbol(adata->unc_pixels, 1,
828 j*rowspan + 2*plane + (i/2-(i/2)%16)+8*((i%32)/16), i%16);
830 else if(adata->bpp==8) {
831 b = de_get_bits_symbol(adata->unc_pixels, 1,
832 j*rowspan + 2*plane + (i-i%16), i%16);
834 else {
835 b = 0;
838 else {
839 b = de_get_bits_symbol(adata->unc_pixels, 1, j*rowspan + plane*planespan, i);
841 if(b) v |= 1<<plane;
844 if(adata->is_spectrum512) {
845 v = spectrum512_FindIndex(i, v);
846 if(j>0) {
847 v += (unsigned int)(48*(j));
850 if(v>=(unsigned int)ncolors) v=(unsigned int)(ncolors-1);
852 de_bitmap_setpixel_rgb(adata->img, i, j, adata->pal[v]);
855 return 1;
858 static int decode_atari_image_16(deark *c, struct atari_img_decode_data *adata)
860 i64 i, j;
861 i64 rowspan;
862 u32 v;
864 rowspan = adata->w * 2;
866 for(j=0; j<adata->h; j++) {
867 for(i=0; i<adata->w; i++) {
868 v = (u32)dbuf_getu16be(adata->unc_pixels, j*rowspan + 2*i);
869 v = de_rgb565_to_888(v);
870 de_bitmap_setpixel_rgb(adata->img, i, j,v);
873 return 1;
876 int fmtutil_atari_decode_image(deark *c, struct atari_img_decode_data *adata)
878 switch(adata->bpp) {
879 case 16:
880 return decode_atari_image_16(c, adata);
881 case 8: case 4: case 2: case 1:
882 return decode_atari_image_paletted(c, adata);
885 de_err(c, "Unsupported bits/pixel (%d)", (int)adata->bpp);
886 return 0;
889 void fmtutil_atari_set_standard_density(deark *c, struct atari_img_decode_data *adata,
890 de_finfo *fi)
892 switch(adata->bpp) {
893 case 4:
894 fi->density.code = DE_DENSITY_UNK_UNITS;
895 fi->density.xdens = 240.0;
896 fi->density.ydens = 200.0;
897 break;
898 case 2:
899 fi->density.code = DE_DENSITY_UNK_UNITS;
900 fi->density.xdens = 480.0;
901 fi->density.ydens = 200.0;
902 break;
903 case 1:
904 fi->density.code = DE_DENSITY_UNK_UNITS;
905 fi->density.xdens = 480.0;
906 fi->density.ydens = 400.0;
907 break;
911 void fmtutil_atari_help_palbits(deark *c)
913 de_msg(c, "-opt atari:palbits=<9|12|15> : Numer of significant bits "
914 "per palette color");
917 #define CODE__c_ 0x28632920U // "(c) "
918 #define CODE_ANNO 0x414e4e4fU
919 #define CODE_AUTH 0x41555448U
920 #define CODE_NAME 0x4e414d45U
921 #define CODE_TEXT 0x54455854U
922 #define CODE_RIFF 0x52494646U
924 static void do_iff_text_chunk(deark *c, struct de_iffctx *ictx, i64 dpos, i64 dlen,
925 const char *name)
927 de_ucstring *s = NULL;
929 if(dlen<1) return;
930 s = ucstring_create(c);
931 dbuf_read_to_ucstring_n(ictx->f,
932 dpos, dlen, DE_DBG_MAX_STRLEN,
933 s, DE_CONVFLAG_STOP_AT_NUL, ictx->input_encoding);
934 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz(s));
935 ucstring_destroy(s);
938 static void do_iff_anno(deark *c, struct de_iffctx *ictx, i64 pos, i64 len)
940 i64 foundpos;
942 if(len<1) return;
944 // Some ANNO chunks seem to be padded with one or more NUL bytes. Probably
945 // best not to save them.
946 if(dbuf_search_byte(ictx->f, 0x00, pos, len, &foundpos)) {
947 len = foundpos - pos;
949 if(len<1) return;
950 if(c->extract_level>=2) {
951 dbuf_create_file_from_slice(ictx->f, pos, len, "anno.txt", NULL, DE_CREATEFLAG_IS_AUX);
953 else {
954 de_ucstring *s = NULL;
955 s = ucstring_create(c);
956 dbuf_read_to_ucstring_n(ictx->f, pos, len, DE_DBG_MAX_STRLEN, s, 0, ictx->input_encoding);
957 de_dbg(c, "annotation: \"%s\"", ucstring_getpsz(s));
958 ucstring_destroy(s);
962 void fmtutil_default_iff_chunk_identify(deark *c, struct de_iffctx *ictx)
964 const char *name = NULL;
966 switch(ictx->chunkctx->chunk4cc.id) {
967 case CODE__c_ : name="copyright"; break;
968 case CODE_ANNO: name="annotation"; break;
969 case CODE_AUTH: name="author"; break;
972 if(name) {
973 ictx->chunkctx->chunk_name = name;
977 // Note that some of these chunks are *not* defined in the generic IFF
978 // specification.
979 // They might be defined in the 8SVX specification. They seem to have
980 // become unofficial standard chunks.
981 static int de_fmtutil_default_iff_chunk_handler(deark *c, struct de_iffctx *ictx)
983 i64 dpos = ictx->chunkctx->dpos;
984 i64 dlen = ictx->chunkctx->dlen;
985 u32 chunktype = ictx->chunkctx->chunk4cc.id;
987 switch(chunktype) {
988 // Note that chunks appearing here should also be listed below,
989 // in de_fmtutil_is_standard_iff_chunk().
990 case CODE__c_:
991 do_iff_text_chunk(c, ictx, dpos, dlen, "copyright");
992 break;
993 case CODE_ANNO:
994 do_iff_anno(c, ictx, dpos, dlen);
995 break;
996 case CODE_AUTH:
997 do_iff_text_chunk(c, ictx, dpos, dlen, "author");
998 break;
999 case CODE_NAME:
1000 do_iff_text_chunk(c, ictx, dpos, dlen, "name");
1001 break;
1002 case CODE_TEXT:
1003 do_iff_text_chunk(c, ictx, dpos, dlen, "text");
1004 break;
1007 // Note we do not set ictx->handled. The caller is responsible for that.
1008 return 1;
1011 // ictx can be NULL
1012 int fmtutil_is_standard_iff_chunk(deark *c, struct de_iffctx *ictx,
1013 u32 ct)
1015 switch(ct) {
1016 case CODE__c_:
1017 case CODE_ANNO:
1018 case CODE_AUTH:
1019 case CODE_NAME:
1020 case CODE_TEXT:
1021 return 1;
1023 return 0;
1026 static void fourcc_clear(struct de_fourcc *fourcc)
1028 de_zeromem(fourcc, sizeof(struct de_fourcc));
1031 static int do_iff_chunk_sequence(deark *c, struct de_iffctx *ictx,
1032 i64 pos1, i64 len, int level);
1034 // Returns 0 if we can't continue
1035 static int do_iff_chunk(deark *c, struct de_iffctx *ictx, i64 pos, i64 bytes_avail,
1036 int level, i64 *pbytes_consumed)
1038 int ret;
1039 i64 chunk_dlen_raw;
1040 i64 chunk_dlen_padded;
1041 i64 data_bytes_avail;
1042 i64 hdrsize;
1043 struct de_iffchunkctx chunkctx;
1044 int saved_indent_level;
1045 int retval = 0;
1046 char name_str[80];
1048 de_zeromem(&chunkctx, sizeof(struct de_iffchunkctx));
1050 de_dbg_indent_save(c, &saved_indent_level);
1052 hdrsize = 4+ictx->sizeof_len;
1053 if(bytes_avail<hdrsize) {
1054 de_warn(c, "Ignoring %"I64_FMT" bytes at %"I64_FMT"; too small "
1055 "to be a chunk", bytes_avail, pos);
1056 goto done;
1058 data_bytes_avail = bytes_avail-hdrsize;
1060 dbuf_read_fourcc(ictx->f, pos, &chunkctx.chunk4cc, 4,
1061 ictx->reversed_4cc ? DE_4CCFLAG_REVERSED : 0x0);
1062 if(chunkctx.chunk4cc.id==0 && level==0) {
1063 de_warn(c, "Chunk ID not found at %"I64_FMT"; assuming the data ends "
1064 "here", pos);
1065 goto done;
1068 if(ictx->sizeof_len==2) {
1069 chunk_dlen_raw = dbuf_getu16x(ictx->f, pos+4, ictx->is_le);
1071 else {
1072 chunk_dlen_raw = dbuf_getu32x(ictx->f, pos+4, ictx->is_le);
1074 chunkctx.dlen = chunk_dlen_raw;
1075 chunkctx.dpos = pos+hdrsize;
1077 // TODO: Setting these fields (prior to the identify function) is enough
1078 // for now, but we should also set the other fields here if we can.
1079 ictx->level = level;
1080 ictx->chunkctx = &chunkctx;
1082 if(ictx->preprocess_chunk_fn) {
1083 ictx->preprocess_chunk_fn(c, ictx);
1086 if(chunkctx.chunk_name) {
1087 de_snprintf(name_str, sizeof(name_str), " (%s)", chunkctx.chunk_name);
1089 else {
1090 name_str[0] = '\0';
1093 de_dbg(c, "chunk '%s'%s at %"I64_FMT", dpos=%"I64_FMT", dlen=%"I64_FMT,
1094 chunkctx.chunk4cc.id_dbgstr, name_str, pos,
1095 chunkctx.dpos, chunkctx.dlen);
1096 de_dbg_indent(c, 1);
1098 if(chunkctx.dlen > data_bytes_avail) {
1099 int should_warn = 1;
1101 if(chunkctx.chunk4cc.id==CODE_RIFF && pos==0 && bytes_avail==ictx->f->len) {
1102 // Hack:
1103 // This apparent error, in which the RIFF chunk's length field gives the
1104 // length of the entire file, is too common (particularly in .ani files)
1105 // to warn about.
1106 should_warn = 0;
1109 if(should_warn) {
1110 de_warn(c, "Invalid oversized chunk, or unexpected end of file "
1111 "(chunk at %d ends at %" I64_FMT ", "
1112 "parent ends at %" I64_FMT ")",
1113 (int)pos, chunkctx.dlen+chunkctx.dpos, pos+bytes_avail);
1116 chunkctx.dlen = data_bytes_avail; // Try to continue
1117 de_dbg(c, "adjusting chunk data len to %"I64_FMT, chunkctx.dlen);
1120 chunk_dlen_padded = de_pad_to_n(chunkctx.dlen, ictx->alignment);
1121 *pbytes_consumed = hdrsize + chunk_dlen_padded;
1123 // We've set *pbytes_consumed, so we can return "success"
1124 retval = 1;
1126 // Set ictx fields, prior to calling the handler
1127 chunkctx.pos = pos;
1128 chunkctx.len = bytes_avail;
1129 ictx->handled = 0;
1130 ictx->is_std_container = 0;
1131 ictx->is_raw_container = 0;
1133 ret = ictx->handle_chunk_fn(c, ictx);
1134 if(!ret) {
1135 retval = 0;
1136 goto done;
1139 if(ictx->is_std_container || ictx->is_raw_container) {
1140 i64 contents_dpos, contents_dlen;
1142 ictx->chunkctx = NULL;
1143 ictx->curr_container_fmt4cc = chunkctx.chunk4cc;
1144 fourcc_clear(&ictx->curr_container_contentstype4cc);
1146 if(ictx->is_std_container) {
1147 contents_dpos = chunkctx.dpos+4;
1148 contents_dlen = chunkctx.dlen-4;
1150 // First 4 bytes of payload are the "contents type" or "FORM type"
1151 dbuf_read_fourcc(ictx->f, chunkctx.dpos, &ictx->curr_container_contentstype4cc, 4,
1152 ictx->reversed_4cc ? DE_4CCFLAG_REVERSED : 0);
1154 if(level==0) {
1155 ictx->main_fmt4cc = ictx->curr_container_fmt4cc;
1156 ictx->main_contentstype4cc = ictx->curr_container_contentstype4cc; // struct copy
1158 de_dbg(c, "contents type: '%s'", ictx->curr_container_contentstype4cc.id_dbgstr);
1160 if(ictx->on_std_container_start_fn) {
1161 // Call only for standard-format containers.
1162 ret = ictx->on_std_container_start_fn(c, ictx);
1163 if(!ret) goto done;
1166 else { // ictx->is_raw_container
1167 contents_dpos = chunkctx.dpos;
1168 contents_dlen = chunkctx.dlen;
1171 ret = do_iff_chunk_sequence(c, ictx, contents_dpos, contents_dlen, level+1);
1172 if(!ret) {
1173 retval = 0;
1174 goto done;
1177 if(ictx->on_container_end_fn) {
1178 // Call for all containers (not just standard-format containers).
1180 // TODO: Decide exactly what ictx->* fields to set here.
1181 ictx->level = level;
1183 ictx->chunkctx = NULL;
1184 ret = ictx->on_container_end_fn(c, ictx);
1185 if(!ret) {
1186 retval = 0;
1187 goto done;
1191 else if(!ictx->handled) {
1192 de_fmtutil_default_iff_chunk_handler(c, ictx);
1195 done:
1196 fourcc_clear(&ictx->curr_container_fmt4cc);
1197 fourcc_clear(&ictx->curr_container_contentstype4cc);
1199 de_dbg_indent_restore(c, saved_indent_level);
1200 return retval;
1203 static int do_iff_chunk_sequence(deark *c, struct de_iffctx *ictx,
1204 i64 pos1, i64 len, int level)
1206 i64 pos;
1207 i64 endpos;
1208 i64 chunk_len;
1209 struct de_fourcc saved_container_fmt4cc;
1210 struct de_fourcc saved_container_contentstype4cc;
1211 int ret;
1213 if(level >= 16) { // An arbitrary recursion limit.
1214 return 0;
1217 endpos = pos1+len;
1218 saved_container_fmt4cc = ictx->curr_container_fmt4cc;
1219 saved_container_contentstype4cc = ictx->curr_container_contentstype4cc;
1221 pos = pos1;
1222 while(pos < endpos) {
1223 ictx->curr_container_fmt4cc = saved_container_fmt4cc;
1224 ictx->curr_container_contentstype4cc = saved_container_contentstype4cc;
1226 if(ictx->handle_nonchunk_data_fn) {
1227 i64 skip_len = 0;
1228 ret = ictx->handle_nonchunk_data_fn(c, ictx, pos, &skip_len);
1229 if(ret && skip_len>0) {
1230 pos += de_pad_to_n(skip_len, ictx->alignment);
1231 continue;
1235 ret = do_iff_chunk(c, ictx, pos, endpos-pos, level, &chunk_len);
1236 if(!ret) return 0;
1237 pos += chunk_len;
1240 ictx->curr_container_fmt4cc = saved_container_fmt4cc;
1241 ictx->curr_container_contentstype4cc = saved_container_contentstype4cc;
1243 return 1;
1246 void fmtutil_read_iff_format(deark *c, struct de_iffctx *ictx,
1247 i64 pos, i64 len)
1249 if(!ictx->f || !ictx->handle_chunk_fn) return; // Internal error
1251 ictx->level = 0;
1252 fourcc_clear(&ictx->main_fmt4cc);
1253 fourcc_clear(&ictx->main_contentstype4cc);
1254 fourcc_clear(&ictx->curr_container_fmt4cc);
1255 fourcc_clear(&ictx->curr_container_contentstype4cc);
1256 if(ictx->alignment==0) {
1257 ictx->alignment = 2;
1259 if(ictx->sizeof_len==0) {
1260 ictx->sizeof_len = 4;
1263 if(ictx->input_encoding==DE_ENCODING_UNKNOWN) {
1264 ictx->input_encoding = DE_ENCODING_ASCII;
1267 do_iff_chunk_sequence(c, ictx, pos, len, 0);
1270 const char *fmtutil_tiff_orientation_name(i64 n)
1272 static const char *names[9] = {
1273 "?", "top-left", "top-right", "bottom-right", "bottom-left",
1274 "left-top", "right-top", "right-bottom", "left-bottom"
1276 if(n>=1 && n<=8) return names[n];
1277 return names[0];
1280 const char *fmtutil_get_windows_charset_name(u8 cs)
1282 struct csname_struct { u8 id; const char *name; };
1283 static const struct csname_struct csname_arr[] = {
1284 {0x00, "ANSI"},
1285 {0x01, "default"},
1286 {0x02, "symbol"},
1287 {0x4d, "Mac"},
1288 {0x80, "Shift-JIS"},
1289 {0x81, "Hangul"},
1290 {0x82, "Johab"},
1291 {0x86, "GB2312"},
1292 {0x88, "BIG5"},
1293 {0xa1, "Greek"},
1294 {0xa2, "Turkish"},
1295 {0xa3, "Vietnamese"},
1296 {0xb1, "Hebrew"},
1297 {0xb2, "Arabic"},
1298 {0xba, "Baltic"},
1299 {0xcc, "Russian"},
1300 {0xde, "Thai"},
1301 {0xee, "Eastern Europe"},
1302 {0xff, "OEM"}
1304 size_t i;
1306 for(i=0; i<DE_ARRAYCOUNT(csname_arr); i++) {
1307 if(cs==csname_arr[i].id) return csname_arr[i].name;
1309 return "?";
1312 const char *fmtutil_get_windows_cb_data_type_name(unsigned int ty)
1314 const char *name = "?";
1316 switch(ty) {
1317 case 1: name="CF_TEXT"; break;
1318 case 2: name="CF_BITMAP"; break;
1319 case 3: name="CF_METAFILEPICT"; break;
1320 case 6: name="CF_TIFF"; break;
1321 case 7: name="CF_OEMTEXT"; break;
1322 case 8: name="CF_DIB"; break;
1323 case 11: name="CF_RIFF"; break;
1324 case 12: name="CF_WAVE"; break;
1325 case 13: name="CF_UNICODETEXT"; break;
1326 case 14: name="CF_ENHMETAFILE"; break;
1327 case 17: name="CF_DIBV5"; break;
1329 return name;
1332 // Search for the ZIP "end of central directory" object.
1333 // Also useful for detecting hybrid ZIP files, such as self-extracting EXE.
1334 int fmtutil_find_zip_eocd(deark *c, dbuf *f, i64 *foundpos)
1336 u32 sig;
1337 u8 *buf = NULL;
1338 int retval = 0;
1339 i64 buf_offset;
1340 i64 buf_size;
1341 i64 i;
1343 *foundpos = 0;
1344 if(f->len < 22) goto done;
1346 // End-of-central-dir record usually starts 22 bytes from EOF. Try that first.
1347 sig = (u32)dbuf_getu32le(f, f->len - 22);
1348 if(sig == 0x06054b50U) {
1349 *foundpos = f->len - 22;
1350 retval = 1;
1351 goto done;
1354 // Search for the signature.
1355 // The end-of-central-directory record could theoretically appear anywhere
1356 // in the file. We'll follow Info-Zip/UnZip's lead and search the last 66000
1357 // bytes.
1358 #define MAX_ZIP_EOCD_SEARCH 66000
1359 buf_size = f->len;
1360 if(buf_size > MAX_ZIP_EOCD_SEARCH) buf_size = MAX_ZIP_EOCD_SEARCH;
1362 buf = de_malloc(c, buf_size);
1363 buf_offset = f->len - buf_size;
1364 dbuf_read(f, buf, buf_offset, buf_size);
1366 for(i=buf_size-22; i>=0; i--) {
1367 if(buf[i]=='P' && buf[i+1]=='K' && buf[i+2]==5 && buf[i+3]==6) {
1368 *foundpos = buf_offset + i;
1369 retval = 1;
1370 goto done;
1374 done:
1375 de_free(c, buf);
1376 return retval;
1379 // Quick & dirty encoder that can wrap some formats in a TIFF container.
1380 static void wrap_in_tiff(deark *c, dbuf *f, i64 dpos, i64 dlen,
1381 const char *swstring, unsigned int tag, const char *ext, unsigned int createflags)
1383 dbuf *outf = NULL;
1384 i64 ifdoffs;
1385 i64 sw_len, sw_len_padded;
1386 i64 data_len_padded;
1388 sw_len = 1+(i64)de_strlen(swstring);
1389 if(sw_len<=4) return;
1390 sw_len_padded = de_pad_to_2(sw_len);
1392 if(dlen>4) {
1393 data_len_padded = de_pad_to_2(dlen);
1395 else {
1396 data_len_padded = 0;
1399 outf = dbuf_create_output_file(c, ext, NULL, 0);
1400 dbuf_write(outf, (const u8*)"\x4d\x4d\x00\x2a", 4);
1401 ifdoffs = 8 + sw_len_padded + data_len_padded;
1402 dbuf_writeu32be(outf, ifdoffs);
1403 dbuf_write(outf, (const u8*)swstring, sw_len);
1404 if(sw_len%2) dbuf_writebyte(outf, 0);
1405 if(dlen>4) {
1406 dbuf_copy(f, dpos, dlen, outf);
1407 if(dlen%2) dbuf_writebyte(outf, 0);
1410 dbuf_writeu16be(outf, 2); // number of dir entries;
1412 dbuf_writeu16be(outf, 305); // Software tag
1413 dbuf_writeu16be(outf, 2); // type=ASCII
1414 dbuf_writeu32be(outf, sw_len);
1415 dbuf_writeu32be(outf, 8); // offset
1417 dbuf_writeu16be(outf, (i64)tag);
1418 dbuf_writeu16be(outf, 1);
1419 dbuf_writeu32be(outf, dlen);
1420 if(dlen>4) {
1421 dbuf_writeu32be(outf, 8+sw_len_padded);
1423 else {
1424 dbuf_copy(f, dpos, dlen, outf);
1425 dbuf_write_zeroes(outf, 4-dlen);
1428 dbuf_writeu32be(outf, 0); // end of IFD
1429 dbuf_close(outf);
1432 // Find ID3 tag data at the beginning and end of file, process it, and return
1433 // information about its location.
1434 // Caller allocates id3i.
1435 void fmtutil_handle_id3(deark *c, dbuf *f, struct de_id3info *id3i,
1436 unsigned int flags)
1438 i64 id3v1pos = 0;
1439 int look_for_id3v1;
1441 de_zeromem(id3i, sizeof(struct de_id3info));
1442 id3i->main_start = 0;
1443 id3i->main_end = f->len;
1445 id3i->has_id3v2 = !dbuf_memcmp(f, 0, "ID3", 3);
1446 if(id3i->has_id3v2) {
1447 de_module_params id3v2mparams;
1449 de_dbg(c, "ID3v2 data at %d", 0);
1450 de_dbg_indent(c, 1);
1451 de_zeromem(&id3v2mparams, sizeof(de_module_params));
1452 id3v2mparams.in_params.codes = "I";
1453 de_run_module_by_id_on_slice(c, "id3", &id3v2mparams, f, 0, f->len);
1454 de_dbg_indent(c, -1);
1455 id3i->main_start += id3v2mparams.out_params.int64_1;
1458 look_for_id3v1 = 1;
1459 if(look_for_id3v1) {
1460 id3v1pos = f->len-128;
1461 if(!dbuf_memcmp(f, id3v1pos, "TAG", 3)) {
1462 id3i->has_id3v1 = 1;
1466 if(id3i->has_id3v1) {
1467 de_module_params id3v1mparams;
1469 de_dbg(c, "ID3v1 data at %"I64_FMT, id3v1pos);
1470 de_dbg_indent(c, 1);
1471 de_zeromem(&id3v1mparams, sizeof(de_module_params));
1472 id3v1mparams.in_params.codes = "1";
1473 de_run_module_by_id_on_slice(c, "id3", &id3v1mparams, f, id3v1pos, 128);
1474 de_dbg_indent(c, -1);
1475 id3i->main_end = id3v1pos;
1479 static void dbg_timestamp(deark *c, struct de_timestamp *ts, const char *name)
1481 char timestamp_buf[64];
1483 de_timestamp_to_string(ts, timestamp_buf, sizeof(timestamp_buf), 0);
1484 de_dbg(c, "%s: %s", name, timestamp_buf);
1487 void fmtutil_riscos_read_load_exec(deark *c, dbuf *f, struct de_riscos_file_attrs *rfa, i64 pos1)
1489 i64 pos = pos1;
1491 rfa->load_addr = (u32)dbuf_getu32le_p(f, &pos);
1492 rfa->exec_addr = (u32)dbuf_getu32le_p(f, &pos);
1493 de_dbg(c, "load/exec addrs: 0x%08x, 0x%08x", (unsigned int)rfa->load_addr,
1494 (unsigned int)rfa->exec_addr);
1495 de_dbg_indent(c, 1);
1496 if((rfa->load_addr&0xfff00000U)==0xfff00000U) {
1497 rfa->file_type = (unsigned int)((rfa->load_addr&0xfff00)>>8);
1498 rfa->file_type_known = 1;
1499 de_dbg(c, "file type: %03X", rfa->file_type);
1501 de_riscos_loadexec_to_timestamp(rfa->load_addr, rfa->exec_addr, &rfa->mod_time);
1502 dbg_timestamp(c, &rfa->mod_time, "timestamp");
1504 de_dbg_indent(c, -1);
1507 void fmtutil_riscos_read_attribs_field(deark *c, dbuf *f, struct de_riscos_file_attrs *rfa,
1508 i64 pos, unsigned int flags)
1510 rfa->attribs = (u32)dbuf_getu32le(f, pos);
1511 de_dbg(c, "attribs: 0x%08x", (unsigned int)rfa->attribs);
1512 de_dbg_indent(c, 1);
1513 rfa->crc_from_attribs = rfa->attribs>>16;
1514 if(flags & DE_RISCOS_FLAG_HAS_CRC) {
1515 de_dbg(c, "crc (reported): 0x%04x", (unsigned int)rfa->crc_from_attribs);
1517 if(flags & DE_RISCOS_FLAG_HAS_LZWMAXBITS) {
1518 rfa->lzwmaxbits = (unsigned int)((rfa->attribs&0xff00)>>8);
1519 de_dbg(c, "lzw maxbits: %u", rfa->lzwmaxbits);
1521 de_dbg_indent(c, -1);
1524 struct pict_rect {
1525 i64 t, l, b, r;
1528 // Note: Code duplicated in pict.c
1529 static double pict_read_fixed(dbuf *f, i64 pos)
1531 i64 n;
1533 // I think QuickDraw's "Fixed point" numbers are signed, but I don't know
1534 // how negative numbers are handled.
1535 n = dbuf_geti32be(f, pos);
1536 return ((double)n)/65536.0;
1539 // Read a QuickDraw Rectangle. Caller supplies rect struct.
1540 // Note: Code duplicated in pict.c
1541 static void pict_read_rect(dbuf *f, i64 pos,
1542 struct pict_rect *rect, const char *dbgname)
1544 rect->t = dbuf_geti16be(f, pos);
1545 rect->l = dbuf_geti16be(f, pos+2);
1546 rect->b = dbuf_geti16be(f, pos+4);
1547 rect->r = dbuf_geti16be(f, pos+6);
1549 if(dbgname) {
1550 de_dbg(f->c, "%s: (%d,%d)-(%d,%d)", dbgname, (int)rect->l, (int)rect->t,
1551 (int)rect->r, (int)rect->b);
1555 // Sometimes-present baseAddr field (4 bytes)
1556 void fmtutil_macbitmap_read_baseaddr(deark *c, dbuf *f, struct fmtutil_macbitmap_info *bi, i64 pos)
1558 i64 n;
1559 de_dbg(c, "baseAddr part of PixMap, at %d", (int)pos);
1560 de_dbg_indent(c, 1);
1561 n = dbuf_getu32be(f, pos);
1562 de_dbg(c, "baseAddr: 0x%08x", (unsigned int)n);
1563 de_dbg_indent(c, -1);
1566 void fmtutil_macbitmap_read_rowbytes_and_bounds(deark *c, dbuf *f,
1567 struct fmtutil_macbitmap_info *bi, i64 pos)
1569 struct pict_rect tmprect;
1570 i64 rowbytes_code;
1572 de_dbg(c, "rowBytes/bounds part of bitmap/PixMap header, at %d", (int)pos);
1573 de_dbg_indent(c, 1);
1574 rowbytes_code = dbuf_getu16be(f, pos);
1575 bi->rowbytes = rowbytes_code & 0x7fff;
1576 bi->pixmap_flag = (rowbytes_code & 0x8000)?1:0;
1577 de_dbg(c, "rowBytes: %d", (int)bi->rowbytes);
1578 de_dbg(c, "pixmap flag: %d", bi->pixmap_flag);
1580 pict_read_rect(f, pos+2, &tmprect, "rect");
1581 bi->npwidth = tmprect.r - tmprect.l;
1582 bi->pdwidth = bi->npwidth; // default
1583 bi->height = tmprect.b - tmprect.t;
1585 de_dbg_indent(c, -1);
1588 // Pixmap fields that aren't read by read_baseaddr or read_rowbytes_and_bounds
1589 // (36 bytes)
1590 void fmtutil_macbitmap_read_pixmap_only_fields(deark *c, dbuf *f, struct fmtutil_macbitmap_info *bi,
1591 i64 pos)
1593 i64 pixmap_version;
1594 i64 pack_size;
1595 i64 plane_bytes;
1596 i64 n;
1598 de_dbg(c, "additional PixMap header fields, at %d", (int)pos);
1599 de_dbg_indent(c, 1);
1601 pixmap_version = dbuf_getu16be(f, pos+0);
1602 de_dbg(c, "pixmap version: %d", (int)pixmap_version);
1604 bi->packing_type = dbuf_getu16be(f, pos+2);
1605 de_dbg(c, "packing type: %d", (int)bi->packing_type);
1607 pack_size = dbuf_getu32be(f, pos+4);
1608 de_dbg(c, "pixel data length: %d", (int)pack_size);
1610 bi->hdpi = pict_read_fixed(f, pos+8);
1611 bi->vdpi = pict_read_fixed(f, pos+12);
1612 de_dbg(c, "dpi: %.2f"DE_CHAR_TIMES"%.2f", bi->hdpi, bi->vdpi);
1614 bi->pixeltype = dbuf_getu16be(f, pos+16);
1615 bi->pixelsize = dbuf_getu16be(f, pos+18);
1616 bi->cmpcount = dbuf_getu16be(f, pos+20);
1617 bi->cmpsize = dbuf_getu16be(f, pos+22);
1618 de_dbg(c, "pixel type=%d, bits/pixel=%d, components/pixel=%d, bits/comp=%d",
1619 (int)bi->pixeltype, (int)bi->pixelsize, (int)bi->cmpcount, (int)bi->cmpsize);
1621 if(bi->pixelsize>0) {
1622 bi->pdwidth = (bi->rowbytes*8)/bi->pixelsize;
1624 if(bi->pdwidth < bi->npwidth) {
1625 bi->pdwidth = bi->npwidth;
1628 plane_bytes = dbuf_getu32be(f, pos+24);
1629 de_dbg(c, "plane bytes: %d", (int)plane_bytes);
1631 bi->pmTable = (u32)dbuf_getu32be(f, pos+28);
1632 de_dbg(c, "pmTable: 0x%08x", (unsigned int)bi->pmTable);
1634 n = dbuf_getu32be(f, pos+32);
1635 de_dbg(c, "pmReserved: 0x%08x", (unsigned int)n);
1637 de_dbg_indent(c, -1);
1640 int fmtutil_macbitmap_read_colortable(deark *c, dbuf *f,
1641 struct fmtutil_macbitmap_info *bi, i64 pos, i64 *bytes_used)
1643 i64 ct_id;
1644 u32 ct_flags;
1645 i64 ct_size;
1646 i64 k, z;
1647 u32 s[4];
1648 u8 cr, cg, cb;
1649 u32 clr;
1650 char tmps[64];
1652 *bytes_used = 0;
1653 de_dbg(c, "color table at %"I64_FMT, pos);
1654 de_dbg_indent(c, 1);
1656 ct_id = dbuf_getu32be(f, pos);
1657 ct_flags = (u32)dbuf_getu16be(f, pos+4); // a.k.a. transIndex
1658 ct_size = dbuf_getu16be(f, pos+6);
1659 bi->num_pal_entries = ct_size+1;
1660 de_dbg(c, "color table id=0x%08x, flags=0x%04x, colors=%d", (unsigned int)ct_id,
1661 (unsigned int)ct_flags, (int)bi->num_pal_entries);
1663 for(k=0; k<bi->num_pal_entries; k++) {
1664 for(z=0; z<4; z++) {
1665 s[z] = (u32)dbuf_getu16be(f, pos+8+8*k+2*z);
1667 cr = (u8)(s[1]>>8);
1668 cg = (u8)(s[2]>>8);
1669 cb = (u8)(s[3]>>8);
1670 clr = DE_MAKE_RGB(cr,cg,cb);
1671 de_snprintf(tmps, sizeof(tmps), "(%5d,%5d,%5d,idx=%3d) "DE_CHAR_RIGHTARROW" ",
1672 (int)s[1], (int)s[2], (int)s[3], (int)s[0]);
1673 de_dbg_pal_entry2(c, k, clr, tmps, NULL, NULL);
1675 // Some files don't have the palette indices set. Most PICT decoders ignore
1676 // the indices if the "device" flag of ct_flags is set, and that seems to
1677 // work (though it's not clearly documented).
1678 if(ct_flags & 0x8000U) {
1679 s[0] = (u32)k;
1682 if(s[0]<=255) {
1683 bi->pal[s[0]] = clr;
1687 de_dbg_indent(c, -1);
1688 *bytes_used = 8 + 8*bi->num_pal_entries;
1689 return 1;
1692 // "compressed unsigned short" - a variable-length integer format
1693 // TODO: This is duplicated in shg.c
1694 i64 fmtutil_hlp_get_cus_p(dbuf *f, i64 *ppos)
1696 i64 x1, x2;
1698 x1 = (i64)dbuf_getbyte_p(f, ppos);
1699 if(x1%2 == 0) {
1700 // If it's even, divide by two.
1701 return x1>>1;
1703 // If it's odd, divide by two, and add 128 times the value of
1704 // the next byte.
1705 x2 = (i64)dbuf_getbyte_p(f, ppos);
1706 return (x1>>1) | (x2<<7);
1709 // "compressed signed short"
1710 i64 fmtutil_hlp_get_css_p(dbuf *f, i64 *ppos)
1712 i64 x1, x2;
1714 x1 = (i64)dbuf_getbyte_p(f, ppos);
1715 if(x1%2 == 0) {
1716 // If it's even, divide by two, and subtract 64
1717 return (x1>>1) - 64;
1719 // If it's odd, divide by two, add 128 times the value of
1720 // the next byte, and subtract 16384.
1721 x1 >>= 1;
1722 x2 = (i64)dbuf_getbyte_p(f, ppos);
1723 x1 += x2 * 128;
1724 x1 -= 16384;
1725 return x1;
1728 // "compressed unsigned long"
1729 i64 fmtutil_hlp_get_cul_p(dbuf *f, i64 *ppos)
1731 i64 x1, x2;
1732 x1 = dbuf_getu16le_p(f, ppos);
1733 if(x1%2 == 0) {
1734 // If it's even, divide by two.
1735 return x1>>1;
1737 // If it's odd, divide by two, and add 32768 times the value of
1738 // the next two bytes.
1739 x2 = dbuf_getu16le_p(f, ppos);
1740 return (x1>>1) | (x2<<15);
1743 // "compressed signed long"
1744 i64 fmtutil_hlp_get_csl_p(dbuf *f, i64 *ppos)
1746 i64 x1, x2;
1748 x1 = dbuf_getu16le_p(f, ppos);
1750 if(x1%2 == 0) {
1751 // If it's even, divide by two, and subtract 16384
1752 return (x1>>1) - 16384;
1754 // If it's odd, divide by two, add 32768 times the value of
1755 // the next two bytes, and subtract 67108864.
1756 x1 >>= 1;
1757 x2 = dbuf_getu16le_p(f, ppos);
1758 x1 += x2*32768;
1759 x1 -= 67108864;
1760 return x1;