nrg: Improved support for v2
[deark.git] / modules / jpeg.c
blob5f34acc23ca4c5fca660218356c16ec5dad8cffa
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Extract various things from JPEG & JPEG-LS files.
6 // Extract embedded JPEG files from arbitrary files.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_jpeg);
12 DE_DECLARE_MODULE(de_module_jpegscan);
14 struct fpxr_entity_struct {
15 size_t index;
16 struct de_stringreaderdata *name_srd;
17 dbuf *stream;
18 i64 stream_size;
19 int is_storage;
20 int done_flag;
23 struct fpxr_data_struct {
24 size_t num_entities;
25 struct fpxr_entity_struct *entities;
28 typedef struct localctx_struct {
29 u8 is_jpegls;
31 u8 has_jfif_seg, has_jfif_thumb, has_jfxx_seg;
32 u8 has_exif_seg, has_exif_gps, has_spiff_seg, has_mpf_seg, has_afcp;
33 u8 exif_before_jfif;
34 u8 has_psd, has_iptc, has_xmp, has_xmp_ext, has_iccprofile, has_flashpix;
35 u8 is_baseline, is_progressive, is_lossless, is_arithmetic, is_hierarchical;
36 u8 is_jpeghdr, is_jpegxt, is_mpo, is_jps;
37 u8 has_restart_markers;
38 u8 precision;
39 u8 has_adobeapp14;
40 u8 color_transform; // valid if(has_adobeapp14)
41 u8 has_revcolorxform;
42 u8 exif_cosited;
43 int scan_count;
45 u8 found_soi;
46 u8 found_sof;
47 i64 ncomp;
49 u8 jfif_ver_h, jfif_ver_l; // valid if(has_jfif_seg)
50 u32 exif_orientation; // valid if != 0, and(has_exif_seg)
51 u32 exif_version_as_uint32; // valid if != 0, and(has_exif_seg)
52 dbuf *iccprofile_file;
53 dbuf *hdr_residual_file;
55 int extxmp_found;
56 int extxmp_warned_flag; // Have we warned about multiple extxmp digests?
57 int extxmp_error_flag;
58 dbuf *extxmp_membuf;
59 u8 extxmp_digest[32];
60 i64 extxmp_total_len;
62 int is_subsampled;
63 de_ucstring *sampling_code;
65 struct fpxr_data_struct *fpxr_data;
66 } lctx;
68 struct marker_info;
70 typedef void (*handler_fn_type)(deark *c, lctx *d,
71 const struct marker_info *mi, i64 pos, i64 data_size);
73 #define FLAG_JPEG_COMPAT 0x0001
74 #define FLAG_JPEGLS_COMPAT 0x0002
75 #define FLAG_NO_DATA 0x0100
76 #define FLAG_IS_SOF 0x0200
78 struct marker_info {
79 u8 seg_type;
80 unsigned int flags;
81 char shortname[12];
82 char longname[80];
83 handler_fn_type hfn;
86 // Static info about markers/segments.
87 struct marker_info1 {
88 u8 seg_type;
89 unsigned int flags;
90 const char *shortname;
91 const char *longname;
92 handler_fn_type hfn;
95 static void do_icc_profile_segment(deark *c, lctx *d, i64 pos, i64 data_size)
97 u8 b1, b2;
99 if(data_size<2) return; // bogus data
100 b1 = de_getbyte(pos);
101 b2 = de_getbyte(pos+1);
102 de_dbg(c, "icc profile segment at %d datasize=%d part %d of %d", (int)pos, (int)(data_size-2), b1, b2);
104 if(!d->iccprofile_file) {
105 d->has_iccprofile = 1;
106 d->iccprofile_file = dbuf_create_output_file(c, "icc", NULL, DE_CREATEFLAG_IS_AUX);
108 dbuf_copy(c->infile, pos+2, data_size-2, d->iccprofile_file);
110 if(b1==b2) {
111 // If this is the final piece of the ICC profile, close the file.
112 // That way, if for some reason there's another profile in the file, we'll put
113 // it in a separate file.
114 dbuf_close(d->iccprofile_file);
115 d->iccprofile_file = NULL;
119 // Extract JPEG-HDR residual images.
120 // Note: This code is based on reverse engineering, and may not be correct.
121 static void do_jpeghdr_segment(deark *c, lctx *d, i64 pos1,
122 i64 data_size1, int is_ext)
124 int ret;
125 i64 pos = 0;
126 i64 data_size;
128 // Payload should begin after the first NUL byte. Search for it.
129 ret = dbuf_search_byte(c->infile, 0x00, pos1, data_size1, &pos);
130 if(!ret) {
131 de_warn(c, "Bad or unsupported JPEG-HDR data");
132 return;
134 pos++;
135 data_size = pos1+data_size1 - pos;
137 if(is_ext) {
138 de_dbg(c, "JPEG-HDR residual image continuation, pos=%d size=%d",
139 (int)pos, (int)data_size);
141 else {
142 de_dbg(c, "JPEG-HDR residual image start, pos=%d size=%d",
143 (int)pos, (int)data_size);
145 // Close any previous file
146 if(d->hdr_residual_file) {
147 dbuf_close(d->hdr_residual_file);
148 d->hdr_residual_file = NULL;
151 // Make sure it looks like an embedded JPEG file
152 if(dbuf_memcmp(c->infile, pos, "\xff\xd8", 2)) {
153 de_warn(c, "Bad or unsupported JPEG-HDR format");
154 return;
157 d->hdr_residual_file = dbuf_create_output_file(c, "residual.jpg", NULL, DE_CREATEFLAG_IS_AUX);
160 if(!d->hdr_residual_file) return;
161 dbuf_copy(c->infile, pos, data_size, d->hdr_residual_file);
164 static void do_jpegxt_segment(deark *c, lctx *d, i64 pos,
165 i64 data_size)
167 i64 n;
168 if(data_size<14) return;
169 n = de_getu16be(pos);
170 de_dbg(c, "enumerator: %u", (unsigned int)n);
171 n = de_getu32be(pos+2);
172 de_dbg(c, "seq number: %u", (unsigned int)n);
173 de_dbg_indent(c, 1);
174 de_run_module_by_id_on_slice2(c, "bmff", "T", c->infile, pos+6, data_size-6);
175 de_dbg_indent(c, -1);
178 // Decode an uncompressed JFIF thumbnail.
179 // This code has not been properly tested, because I can't find any files in
180 // the wild that have these kinds of thumbnails.
181 static void extract_unc_jfif_thumbnail(deark *c, lctx *d,
182 i64 pos1, i64 len, i64 w, i64 h, int has_pal,
183 const char *token)
185 i64 i, j;
186 i64 pos = pos1;
187 de_bitmap *img = NULL;
188 u32 clr;
189 i64 rowspan;
191 img = de_bitmap_create(c, w, h, 3);
193 if(has_pal) {
194 u32 pal[256];
195 de_read_palette_rgb(c->infile, pos, 256, 3, pal, 256, 0);
196 pos += 768;
197 de_convert_image_paletted(c->infile, pos, 8, w, pal, img, 0);
199 else {
200 rowspan = 3*w;
201 for(j=0; j<h; j++) {
202 for(i=0; i<w; i++) {
203 clr = dbuf_getRGB(c->infile, pos + j*rowspan + i*3, 0);
204 de_bitmap_setpixel_rgb(img, i, j, clr);
209 de_bitmap_write_to_file(img, token, DE_CREATEFLAG_IS_AUX);
211 de_bitmap_destroy(img);
214 static void do_jfif_segment(deark *c, lctx *d,
215 i64 pos, i64 data_size)
217 u8 units;
218 const char *units_name;
219 i64 xdens, ydens;
220 i64 tn_w, tn_h;
222 d->has_jfif_seg = 1;
223 if(data_size<9) return;
224 d->jfif_ver_h = de_getbyte(pos);
225 d->jfif_ver_l = de_getbyte(pos+1);
226 de_dbg(c, "JFIF version: %d.%02d", (int)d->jfif_ver_h, (int)d->jfif_ver_l);
227 units = de_getbyte(pos+2);
228 xdens = de_getu16be(pos+3);
229 ydens = de_getu16be(pos+5);
230 if(units==1) units_name="dpi";
231 else if(units==2) units_name="dots/cm";
232 else units_name="(unspecified)";
233 de_dbg(c, "density: %d"DE_CHAR_TIMES"%d, units=%s", (int)xdens, (int)ydens, units_name);
235 tn_w = (i64)de_getbyte(pos+7);
236 tn_h = (i64)de_getbyte(pos+8);
237 de_dbg(c, "thumbnail dimensions: %d"DE_CHAR_TIMES"%d", (int)tn_w, (int)tn_h);
238 if(tn_w>0 && tn_h>0 && data_size>9) {
239 d->has_jfif_thumb = 1;
240 if(tn_w*tn_h*3 != data_size-9) {
241 de_warn(c, "Expected %d bytes of JFIF thumbnail image data at %d, found %d",
242 (int)(tn_w*tn_h*3), (int)(pos+9), (int)(data_size-9));
244 extract_unc_jfif_thumbnail(c, d, pos+9, data_size-9, tn_w, tn_h,
245 0, "jfifthumb");
249 static void do_jfxx_segment(deark *c, lctx *d,
250 i64 pos, i64 data_size)
252 u8 t;
254 d->has_jfxx_seg = 1;
255 de_dbg(c, "JFXX segment at %d datasize=%d", (int)pos, (int)data_size);
256 if(data_size<1) return;
258 t = de_getbyte(pos);
259 de_dbg(c, "thumbnail type: 0x%02x", (unsigned int)t);
261 if(t==0x10) { // thumbnail coded using JPEG
262 // TODO: JPEG-formatted thumbnails are forbidden from containing JFIF segments.
263 // They essentially inherit them from their parent.
264 // So, maybe, when we extract a thumbnail, we should insert an artificial JFIF
265 // segment into it. We currently don't do that.
266 // (However, this is not at all important.)
267 dbuf_create_file_from_slice(c->infile, pos+1, data_size-1, "jfxxthumb.jpg", NULL, DE_CREATEFLAG_IS_AUX);
269 else if(t==0x11 || t==0x13) {
270 i64 tn_w, tn_h;
272 if(data_size<3) return;
273 tn_w = (i64)de_getbyte(pos+1);
274 tn_h = (i64)de_getbyte(pos+2);
275 de_dbg(c, "JFXX thumbnail dimensions: %d"DE_CHAR_TIMES"%d", (int)tn_w, (int)tn_h);
276 extract_unc_jfif_thumbnail(c, d, pos+3, data_size-3, tn_w, tn_h,
277 (t==0x11)?1:0, "jfxxthumb");
281 static void do_adobeapp14_segment(deark *c, lctx *d,
282 i64 pos, i64 data_size)
284 const char *tname;
286 if(data_size<7) return;
287 d->has_adobeapp14 = 1;
288 d->color_transform = de_getbyte(pos+6);
289 if(d->color_transform==0) tname="RGB or CMYK";
290 else if(d->color_transform==1) tname="YCbCr";
291 else if(d->color_transform==2) tname="YCCK";
292 else tname="unknown";
293 de_dbg(c, "color transform: %d (%s)", (int)d->color_transform, tname);
296 static void do_exif_segment(deark *c, lctx *d,
297 i64 pos, i64 data_size)
299 u32 exifflags = 0;
300 u32 exiforientation = 0;
301 u32 exifversion = 0;
303 if(data_size<8) return;
304 // Note that Exif has an additional padding byte after the APP ID NUL terminator.
305 de_dbg(c, "Exif data at %d, size=%d", (int)pos, (int)data_size);
306 d->has_exif_seg = 1;
307 if(!d->has_jfif_seg) {
308 d->exif_before_jfif = 1;
310 de_dbg_indent(c, 1);
311 fmtutil_handle_exif2(c, pos, data_size,
312 &exifflags, &exiforientation, &exifversion);
313 if(exifflags&0x08)
314 d->has_exif_gps = 1;
315 if(exifflags&0x10)
316 d->exif_cosited = 1;
317 if(exifflags&0x20)
318 d->exif_orientation = exiforientation;
319 if(exifflags&0x40)
320 d->exif_version_as_uint32 = exifversion;
321 de_dbg_indent(c, -1);
324 static void do_photoshop_segment(deark *c, lctx *d,
325 i64 pos, i64 data_size)
327 struct de_module_out_params oparams;
329 de_zeromem(&oparams, sizeof(struct de_module_out_params));
330 // TODO: Can Photoshop resources span multiple JPEG segments? I have
331 // a file in which that seems to be the case.
332 de_dbg(c, "photoshop data at %d, size=%d", (int)pos, (int)data_size);
333 d->has_psd = 1;
334 de_dbg_indent(c, 1);
335 fmtutil_handle_photoshop_rsrc2(c, c->infile, pos, data_size, 0x0, &oparams);
336 if(oparams.flags&0x02)
337 d->has_iptc = 1;
338 de_dbg_indent(c, -1);
341 static void do_mpf_segment(deark *c, lctx *d,
342 i64 pos, i64 data_size)
344 de_module_params *mparams = NULL;
346 d->has_mpf_seg = 1;
347 de_dbg(c, "MPF data at %d, size=%d", (int)pos, (int)data_size);
348 de_dbg_indent(c, 1);
350 mparams = de_malloc(c, sizeof(de_module_params));
352 mparams->in_params.codes = "M";
353 mparams->in_params.flags |= 0x01;
354 mparams->in_params.offset_in_parent = pos;
355 mparams->in_params.parent_dbuf = c->infile;
357 de_run_module_by_id_on_slice(c, "tiff", mparams, c->infile, pos, data_size);
359 if(mparams->out_params.flags & 0x80) {
360 if(mparams->out_params.uint3 > 1) {
361 // We want to set the is_mpo flag if there is an MPEntry tag which
362 // says there is more than one non-thumbnail image.
363 // This is so we can declare the format to be "JPEG/MPO".
364 d->is_mpo = 1;
368 de_free(c, mparams);
369 de_dbg_indent(c, -1);
372 static void do_jps_segment(deark *c, lctx *d, i64 pos1, i64 len)
374 i64 pos = pos1;
375 i64 blk_len;
376 u32 st_descr;
377 de_ucstring *flags_str = NULL;
378 de_ucstring *comment = NULL;
379 unsigned int mtype;
381 // Descriptor block
382 if(len<8) goto done;
383 blk_len = de_getu16be_p(&pos);
384 if(blk_len<4) goto done;
385 st_descr = (u32)de_getu32be(pos);
387 flags_str = ucstring_create(c);
388 mtype = (unsigned int)(st_descr&0x000000ff);
389 switch(mtype) {
390 case 0: ucstring_append_flags_item(flags_str, "MONOSCOPIC_IMAGE"); break;
391 case 1: ucstring_append_flags_item(flags_str, "STEREOSCOPIC_IMAGE"); break;
393 if(mtype==0) {
394 switch((st_descr&0x0000ff00)>>8) {
395 case 0: ucstring_append_flags_item(flags_str, "EYE_BOTH"); break;
396 case 1: ucstring_append_flags_item(flags_str, "EYE_LEFT"); break;
397 case 2: ucstring_append_flags_item(flags_str, "EYE_RIGHT"); break;
400 else if(mtype==1) {
401 switch((st_descr&0x0000ff00)>>8) {
402 case 1: ucstring_append_flags_item(flags_str, "LAYOUT_INTERLEAVED"); break;
403 case 2: ucstring_append_flags_item(flags_str, "LAYOUT_SIDEBYSIDE"); break;
404 case 3: ucstring_append_flags_item(flags_str, "LAYOUT_OVERUNDER"); break;
405 case 4: ucstring_append_flags_item(flags_str, "LAYOUT_ANAGLYPH"); break;
408 ucstring_append_flags_item(flags_str, (st_descr&0x00010000)?"half-height":"full-height");
409 ucstring_append_flags_item(flags_str, (st_descr&0x00020000)?"half-width":"full-width");
410 // TODO: FIELD ORDER BIT
411 // TODO: SEPARATION
413 de_dbg(c, "stereoscopic descriptor: 0x%08x (%s)", (unsigned int)st_descr,
414 ucstring_getpsz(flags_str));
415 pos += blk_len;
417 // Comment block
418 if(pos1+len-pos<2) goto done;
419 blk_len = de_getu16be_p(&pos);
420 if(pos+blk_len > pos1+len) goto done;
421 comment = ucstring_create(c);
422 dbuf_read_to_ucstring_n(c->infile, pos, blk_len, DE_DBG_MAX_STRLEN, comment,
423 0, DE_ENCODING_ASCII);
424 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(comment));
426 done:
427 ucstring_destroy(flags_str);
428 ucstring_destroy(comment);
431 static void do_arot_segment(deark *c, lctx *d, i64 pos, i64 len)
433 i64 nvals;
435 if(len<8) goto done;
436 nvals = de_getu32be(pos);
437 de_dbg(c, "number of values: %u", (unsigned int)nvals);
439 done:
443 static void do_xmp_extension_segment(deark *c, lctx *d,
444 i64 pos1, i64 data_size)
446 i64 thisseg_full_extxmp_len;
447 i64 segment_offset;
448 u8 thisseg_digest_raw[32];
449 de_ucstring *digest_str = NULL;
450 i64 pos = pos1;
451 i64 dlen;
452 int is_first_segment = 0;
454 de_dbg(c, "extended XMP segment, dpos=%d, dlen=%d", (int)pos1, (int)(data_size));
455 de_dbg_indent(c, 1);
456 if(d->extxmp_error_flag) goto done;
458 de_read(thisseg_digest_raw, pos, 32);
459 pos += 32;
460 digest_str = ucstring_create(c);
461 ucstring_append_bytes(digest_str, thisseg_digest_raw, 32, 0, DE_ENCODING_ASCII);
462 de_dbg(c, "digest: \"%s\"", ucstring_getpsz(digest_str));
464 if(d->extxmp_found && de_memcmp(thisseg_digest_raw, d->extxmp_digest, 32)) {
465 // We only care about the extended XMP segments whose digest matches that
466 // indicated in the main XMP segment. Unfortunately, we don't know what that
467 // is, because we don't parse XMP. We'll just hope that the first extended
468 // XMP segment has the correct digest.
469 if(!d->extxmp_warned_flag) {
470 de_warn(c, "Multiple extended XMP blocks found. All but the first will be ignored.");
471 d->extxmp_warned_flag = 1;
473 goto done;
476 if(!d->extxmp_found) {
477 is_first_segment = 1;
478 d->extxmp_found = 1;
479 de_memcpy(d->extxmp_digest, thisseg_digest_raw, 32);
482 thisseg_full_extxmp_len = de_getu32be_p(&pos);
483 if(is_first_segment) {
484 d->extxmp_total_len = thisseg_full_extxmp_len;
486 de_dbg(c, "full ext. XMP length: %d", (int)thisseg_full_extxmp_len);
487 if(thisseg_full_extxmp_len != d->extxmp_total_len) {
488 de_warn(c, "Inconsistent extended XMP block lengths");
489 d->extxmp_error_flag = 1;
490 goto done;
493 if(d->extxmp_total_len > 10000000) {
494 de_warn(c, "Extended XMP block too large");
495 d->extxmp_error_flag = 1;
496 goto done;
499 segment_offset = de_getu32be_p(&pos);
500 de_dbg(c, "offset of this segment: %d", (int)segment_offset);
502 dlen = data_size - (pos-pos1);
503 de_dbg(c, "[%d bytes of ext. XMP data at %d]", (int)dlen, (int)pos);
505 if(segment_offset + dlen > d->extxmp_total_len) {
506 de_warn(c, "Extended XMP segment too long");
507 d->extxmp_error_flag = 1;
508 goto done;
511 if(!d->extxmp_membuf) {
512 d->extxmp_membuf = dbuf_create_membuf(c, d->extxmp_total_len, 0x1);
514 dbuf_copy_at(c->infile, pos, dlen, d->extxmp_membuf, segment_offset);
516 done:
517 de_dbg_indent(c, -1);
518 ucstring_destroy(digest_str);
521 static void destroy_fpxr_data(deark *c, lctx *d)
523 size_t k;
524 if(!d->fpxr_data) return;
526 for(k=0; k<d->fpxr_data->num_entities; k++) {
527 if(d->fpxr_data->entities[k].name_srd) {
528 de_destroy_stringreaderdata(c, d->fpxr_data->entities[k].name_srd);
529 d->fpxr_data->entities[k].name_srd = NULL;
532 dbuf_close(d->fpxr_data->entities[k].stream);
533 d->fpxr_data->entities[k].stream = NULL;
536 de_free(c, d->fpxr_data->entities);
537 de_free(c, d->fpxr_data);
538 d->fpxr_data = NULL;
541 static void do_fpxr_olepropset_stream(deark *c, lctx *d, struct fpxr_entity_struct *fe)
543 de_dbg(c, "decoding Flashpix stream %d (OLE property set)", (int)fe->index);
544 de_dbg_indent(c, 1);
545 de_run_module_by_id_on_slice(c, "olepropset", NULL, fe->stream, 0, fe->stream->len);
546 de_dbg_indent(c, -1);
549 static void do_fpxr_fujifilm_preview(deark *c, lctx *d, struct fpxr_entity_struct *fe)
551 if(fe->stream->len < 100) return;
552 if(dbuf_memcmp(fe->stream, 47, "\xff\xd8\xff", 3)) return;
553 dbuf_create_file_from_slice(fe->stream, 47, fe->stream->len-47, "fujipreview.jpg",
554 NULL, DE_CREATEFLAG_IS_AUX);
557 static int ucstring_contains_char(de_ucstring *s, i32 ch)
559 i64 k;
561 if(!s) return 0;
562 for(k=0; k<s->len; k++) {
563 if(s->str[k]==ch) return 1;
565 return 0;
568 // Called after we've saved all of a stream's data.
569 static void finalize_fpxr_stream(deark *c, lctx *d, struct fpxr_entity_struct *fe)
571 de_finfo *fi = NULL;
572 dbuf *outf = NULL;
573 de_ucstring *name2 = NULL;
575 if(!fe || !fe->stream) goto done;
576 if(fe->done_flag || fe->is_storage) goto done;
578 if(fe->stream->len != fe->stream_size) {
579 de_warn(c, "Expected FPXR stream #%u to have %"I64_FMT" bytes, found %"I64_FMT,
580 (unsigned int)fe->index, fe->stream_size, fe->stream->len);
583 // Process some known streams
584 if(fe->name_srd) {
585 if(fe->name_srd->sz_utf8 && !de_strcmp(fe->name_srd->sz_utf8, "/FUJIFILM/Preview")) {
586 do_fpxr_fujifilm_preview(c, d, fe);
589 // The FlashPix spec says "Names in an IStorage that begin with the
590 // value '\0x05' are reserved exclusively for the storage of property
591 // sets."
593 // It probably means the last *component* of the name begins with 0x05.
594 // 0x05 shouldn't appear anywhere else, I think, so I'll just search
595 // the whole string for it.
596 if(ucstring_contains_char(fe->name_srd->str, 0x05)) {
597 do_fpxr_olepropset_stream(c, d, fe);
601 if(c->extract_level<2) goto done;
603 fi = de_finfo_create(c);
605 name2 = ucstring_create(c);
606 if(fe->name_srd) {
607 ucstring_append_ucstring(name2, fe->name_srd->str);
609 if(name2->len>0) {
610 ucstring_append_char(name2, '.');
612 ucstring_append_sz(name2, "fpxr.bin", DE_ENCODING_LATIN1);
613 de_finfo_set_name_from_ucstring(c, fi, name2, 0);
615 outf = dbuf_create_output_file(c, NULL, fi, DE_CREATEFLAG_IS_AUX);
616 dbuf_copy(fe->stream, 0, fe->stream->len, outf);
618 done:
619 if(fe && fe->stream) {
620 dbuf_close(fe->stream);
621 fe->stream = NULL;
623 if(fe) {
624 fe->done_flag = 1;
626 ucstring_destroy(name2);
627 dbuf_close(outf);
628 de_finfo_destroy(c, fi);
631 // Clean up incomplete FPXR streams.
632 // This function shouldn't be necessary, but I've seen some streams that don't
633 // have their full expected length, even though they seem to contain useful data.
634 // If we didn't do this, we would never process short streams at all.
635 static void finalize_all_fpxr_streams(deark *c, lctx *d)
637 size_t k;
639 if(!d->fpxr_data) return;
641 for(k=0; k<d->fpxr_data->num_entities; k++) {
642 struct fpxr_entity_struct *fe = &d->fpxr_data->entities[k];
643 if(fe->stream) {
644 finalize_fpxr_stream(c, d, fe);
647 destroy_fpxr_data(c, d);
650 static void append_fpxr_stream_data(deark *c, lctx *d, size_t stream_idx,
651 i64 pos, i64 len)
653 struct fpxr_entity_struct *fe = NULL;
655 if(!d->fpxr_data) return;
656 if(stream_idx > d->fpxr_data->num_entities) return;
657 fe = &d->fpxr_data->entities[stream_idx];
658 if(fe->done_flag) return;
660 // TODO: More validation could be done here.
661 // We're just assuming the FPXR chunks are correctly formed, and in the
662 // right order.
663 // Note that the chunk size (len) is a calculated value, and is constrained
664 // to the size of a JPEG segment (64KB). So it should be okay to trust it.
666 // If we haven't done it yet, create a membuf for this stream.
667 if(!fe->stream) {
668 fe->stream = dbuf_create_membuf(c, len, 0);
671 // Save the stream data to the membuf.
672 // We make a copy of the stream, because it could be split up into chunks,
673 // *and* we might want to parse it.
674 dbuf_copy(c->infile, pos, len, fe->stream);
676 if(fe->stream->len >= fe->stream_size) {
677 finalize_fpxr_stream(c, d, fe);
681 static void do_fpxr_segment(deark *c, lctx *d, i64 pos1, i64 len)
683 i64 pos = pos1;
684 size_t k;
685 i64 nbytesleft;
686 int saved_indent_level;
687 u8 ver;
688 u8 segtype;
689 const char *name;
691 de_dbg_indent_save(c, &saved_indent_level);
692 if(len<2) goto done;
693 ver = de_getbyte_p(&pos);
695 de_dbg(c, "version: %u", (unsigned int)ver);
696 segtype = de_getbyte_p(&pos);
697 switch(segtype) {
698 case 1: name = "contents list"; break;
699 case 2: name = "stream data"; break;
700 default: name = "?";
702 de_dbg(c, "segment type: %u (%s)", (unsigned int)segtype, name);
704 if(segtype==1) { // contents list
705 // Initialize our saved fpxr data
706 destroy_fpxr_data(c, d);
707 d->fpxr_data = de_malloc(c, sizeof(struct fpxr_data_struct));
709 if(len<4) goto done;
711 d->fpxr_data->num_entities = (size_t)de_getu16be_p(&pos);
712 de_dbg(c, "interoperability count: %u", (unsigned int)d->fpxr_data->num_entities);
713 d->fpxr_data->entities = de_mallocarray(c, d->fpxr_data->num_entities, sizeof(struct fpxr_entity_struct));
715 for(k=0; k<d->fpxr_data->num_entities; k++) {
716 i64 bytes_consumed = 0;
717 struct fpxr_entity_struct *fe;
718 i64 esize;
719 u8 defval;
720 u8 clsid_buf[16];
721 char clsid_string[50];
723 if(pos>=pos1+len) goto done;
724 fe = &d->fpxr_data->entities[k];
725 fe->index = k;
726 de_dbg(c, "entity[%d] at %d", (int)k, (int)pos);
727 de_dbg_indent(c, 1);
729 esize = de_getu32be_p(&pos);
730 if(esize==0xffffffffLL) {
731 fe->is_storage = 1;
733 de_dbg(c, "entity type: %s", fe->is_storage?"storage":"stream");
734 if(!fe->is_storage) {
735 de_dbg(c, "stream size: %u", (unsigned int)esize);
736 fe->stream_size = esize;
739 defval = de_getbyte_p(&pos);
740 de_dbg(c, "default value: 0x%02x", (unsigned int)defval);
742 nbytesleft = pos1+len-pos;
743 if(!dbuf_get_utf16_NULterm_len(c->infile, pos, nbytesleft, &bytes_consumed)) goto done;
744 fe->name_srd = dbuf_read_string(c->infile, pos, bytes_consumed-2, bytes_consumed-2,
745 DE_CONVFLAG_WANT_UTF8, DE_ENCODING_UTF16LE);
746 de_dbg(c, "entity name: \"%s\"", ucstring_getpsz_d(fe->name_srd->str));
747 pos += bytes_consumed;
749 if(fe->is_storage) { // read Entity class ID
750 de_read(clsid_buf, pos, 16);
751 pos += 16;
752 fmtutil_guid_to_uuid(clsid_buf);
753 fmtutil_render_uuid(c, clsid_buf, clsid_string, sizeof(clsid_string));
754 de_dbg(c, "class id: {%s}", clsid_string);
756 de_dbg_indent(c, -1);
759 else if(segtype==2) { // stream data
760 size_t stream_idx;
761 i64 stream_offset;
763 if(len<6) goto done;
765 stream_idx = (size_t)de_getu16be_p(&pos);
766 de_dbg(c, "index to contents list: %d", (int)stream_idx);
768 // The Exif spec (2.31) says this field is at offset 0x0C, but I'm
769 // assuming that's a clerical error that should be 0x0D.
770 stream_offset = de_getu32be_p(&pos);
771 de_dbg(c, "offset to flashpix stream: %u", (unsigned int)stream_offset);
773 nbytesleft = pos1+len-pos;
774 if(nbytesleft>0) {
775 de_dbg(c, "[%d bytes of flashpix stream data, at %d]", (int)nbytesleft, (int)pos);
777 append_fpxr_stream_data(c, d, stream_idx, pos, nbytesleft);
780 done:
781 de_dbg_indent_restore(c, saved_indent_level);
784 static void do_ducky_stringblock(deark *c, lctx *d,
785 i64 pos1, i64 len, const char *name)
787 i64 pos = pos1;
788 i64 nchars;
789 de_ucstring *s = NULL;
791 if(len<4) goto done;
792 nchars = de_getu32be_p(&pos);
793 if(nchars*2 > len-4) goto done;
795 s = ucstring_create(c);
796 dbuf_read_to_ucstring_n(c->infile, pos, nchars*2, DE_DBG_MAX_STRLEN, s,
797 0, DE_ENCODING_UTF16BE);
798 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz(s));
799 done:
800 ucstring_destroy(s);
803 static void do_ducky_segment(deark *c, lctx *d, i64 pos1, i64 len)
805 i64 pos = pos1;
806 u32 blktype;
807 i64 blklen;
808 i64 n;
810 while(1) {
811 blktype = (u32)de_getu16be_p(&pos);
812 if(blktype==0) break;
813 if(pos+2 > pos1+len) break;
814 blklen = de_getu16be_p(&pos);
815 if(pos+blklen > pos1+len) break;
816 switch(blktype) {
817 case 1:
818 if(blklen==4) {
819 n = de_getu32be(pos);
820 de_dbg(c, "quality: %d", (int)n);
822 break;
823 case 2:
824 do_ducky_stringblock(c, d, pos, blklen, "comment");
825 break;
826 case 3:
827 do_ducky_stringblock(c, d, pos, blklen, "copyright");
828 break;
830 pos += blklen;
834 static void do_meta_segment(deark *c, lctx *d, i64 pos1, i64 len)
836 if(len<1) return;
838 de_dbg(c, "\"Meta\" data at %d, size=%d", (int)(pos1+1), (int)(len-1));
839 de_dbg_indent(c, 1);
840 // TODO: The 3rd param below should probably represent some sort of TIFF
841 // tag "namespace".
842 de_run_module_by_id_on_slice2(c, "tiff", NULL, c->infile, pos1+1, len-1);
843 de_dbg_indent(c, -1);
847 // ITU-T Rec. T.86 says nothing about canonicalizing the APP ID, but in
848 // practice, some apps are sloppy about capitalization, and trailing spaces.
849 static void normalize_app_id(const char *app_id_orig, char *app_id_normalized,
850 size_t app_id_normalized_len)
852 i64 id_strlen;
853 i64 i;
855 de_strlcpy(app_id_normalized, app_id_orig, app_id_normalized_len);
856 id_strlen = (i64)de_strlen(app_id_normalized);
858 // Strip trailing spaces.
859 while(id_strlen>0 && app_id_normalized[id_strlen-1]==' ') {
860 app_id_normalized[id_strlen-1] = '\0';
861 id_strlen--;
864 for(i=0; i<id_strlen; i++) {
865 if(app_id_normalized[i]>='a' && app_id_normalized[i]<='z') {
866 app_id_normalized[i] -= 32;
871 #define APPSEGTYPE_UNKNOWN 0
872 #define APPSEGTYPE_JFIF 2
873 #define APPSEGTYPE_JFXX 3
874 #define APPSEGTYPE_SPIFF 5
875 #define APPSEGTYPE_EXIF 6
876 #define APPSEGTYPE_FPXR 7
877 #define APPSEGTYPE_ADOBEAPP14 9
878 #define APPSEGTYPE_ICC_PROFILE 10
879 #define APPSEGTYPE_PHOTOSHOP 11
880 #define APPSEGTYPE_DUCKY 12
881 #define APPSEGTYPE_XMP 14
882 #define APPSEGTYPE_XMP_EXTENSION 15
883 #define APPSEGTYPE_JPEGXT 20
884 #define APPSEGTYPE_MPF 21
885 #define APPSEGTYPE_JPS 22
886 #define APPSEGTYPE_HDR_RI_VER 24
887 #define APPSEGTYPE_HDR_RI_EXT 25
888 #define APPSEGTYPE_META 26
889 #define APPSEGTYPE_AROT 27
890 #define APPSEGTYPE_MSRGBA 100
891 #define APPSEGTYPE_RBSWAP 101
893 struct app_id_info_struct {
894 int app_id_found;
895 int appsegtype;
896 i64 payload_pos;
897 de_ucstring *app_id_str; // valid if(app_id_found)
898 const char *app_type_name;
901 #define MAX_APP_ID_LEN 80
902 struct app_id_decode_struct {
903 // In:
904 u8 raw_bytes[MAX_APP_ID_LEN];
905 i64 nraw_bytes;
907 // Out:
908 char app_id_orig[MAX_APP_ID_LEN];
909 char app_id_normalized[MAX_APP_ID_LEN];
910 i64 app_id_orig_strlen;
911 int has_app_id;
914 // Caller allocates ad, and initializes the "In" fields.
915 static void decode_app_id(struct app_id_decode_struct *ad)
917 i64 k;
919 if(ad->nraw_bytes<2) return;
920 if(ad->raw_bytes[0]<32 || ad->raw_bytes[0]>126) return;
922 // Might have an app id.
923 for(k=0; k<ad->nraw_bytes; k++) {
924 if(ad->raw_bytes[k]==0) {
925 ad->has_app_id = 1;
926 ad->app_id_orig_strlen = k;
927 break;
931 if(ad->has_app_id) {
932 // We'll assume this is an app id
933 de_strlcpy(ad->app_id_orig, (const char*)ad->raw_bytes, sizeof(ad->app_id_orig));
934 normalize_app_id(ad->app_id_orig, ad->app_id_normalized, sizeof(ad->app_id_normalized));
938 // Caller allocates app_id_info, and initializes it to all 0.
939 // Caller must free ->app_id_str.
940 static void detect_app_seg_type(deark *c, lctx *d, const struct marker_info *mi,
941 i64 seg_data_pos, i64 seg_data_size, struct app_id_info_struct *app_id_info)
943 i64 sig_size = 0;
944 i64 payload_size;
945 u8 seg_type = mi->seg_type;
946 struct app_id_decode_struct ad;
948 de_zeromem(&ad, sizeof(struct app_id_decode_struct));
950 // defaults:
951 app_id_info->app_id_found = 0;
952 app_id_info->appsegtype = APPSEGTYPE_UNKNOWN;
953 app_id_info->app_type_name = "?";
955 ad.nraw_bytes = (i64)sizeof(ad.raw_bytes);
956 if(ad.nraw_bytes>seg_data_size)
957 ad.nraw_bytes = seg_data_size;
958 if(ad.nraw_bytes<2) goto done;
959 de_read(ad.raw_bytes, seg_data_pos, ad.nraw_bytes-1);
961 decode_app_id(&ad);
963 if(ad.has_app_id) {
964 app_id_info->app_id_str = ucstring_create(c);
965 ucstring_append_bytes(app_id_info->app_id_str, (const u8*)ad.app_id_orig, ad.app_id_orig_strlen, 0,
966 DE_ENCODING_ASCII);
969 if(seg_type==0xe1 && ad.nraw_bytes>20 && ad.has_app_id && !de_strcmp(ad.app_id_orig, "XMP")) {
970 // Ugly hack. I've seen a fair number of files in which the first four
971 // bytes of the "http://ns.adobe.com/xap/1.0/" signature seem to have
972 // been corrupted, and replaced with "XMP\0".
973 struct app_id_decode_struct ad2;
975 de_zeromem(&ad2, sizeof(struct app_id_decode_struct));
976 de_memcpy(ad2.raw_bytes, ad.raw_bytes, (size_t)ad.nraw_bytes);
977 ad2.nraw_bytes = ad.nraw_bytes;
978 // Try to patch the app ID, decode it, and see what happens.
979 de_memcpy(ad2.raw_bytes, (const u8*)"http", 4);
981 decode_app_id(&ad2);
983 // If that seemed to work, replace the old "normalized" ID with the patched one.
984 if(ad2.has_app_id) {
985 de_strlcpy(ad.app_id_normalized, ad2.app_id_normalized, sizeof(ad.app_id_normalized));
986 // Need to update orig_strlen, so we can find the payload data position.
987 // (ad.app_id_orig can stay the same.)
988 ad.app_id_orig_strlen = ad2.app_id_orig_strlen;
992 if(ad.has_app_id) {
993 app_id_info->app_id_found = 1;
994 sig_size = ad.app_id_orig_strlen + 1;
997 payload_size = seg_data_size - sig_size;
998 if(payload_size<0) goto done;
1000 if(seg_type==0xe0 && !de_strcmp(ad.app_id_normalized, "JFIF")) {
1001 app_id_info->appsegtype = APPSEGTYPE_JFIF;
1002 app_id_info->app_type_name = "JFIF";
1004 else if(seg_type==0xe0 && !de_strcmp(ad.app_id_normalized, "JFXX")) {
1005 app_id_info->appsegtype = APPSEGTYPE_JFXX;
1006 app_id_info->app_type_name = "JFIF-JFXX";
1008 else if(seg_type==0xee && ad.nraw_bytes>=5 && !de_strncmp((const char*)ad.raw_bytes, "Adobe", 5)) {
1009 app_id_info->appsegtype = APPSEGTYPE_ADOBEAPP14;
1010 app_id_info->app_type_name = "AdobeAPP14";
1011 sig_size = 5;
1013 else if(seg_type==0xec && ad.nraw_bytes>=5 && !de_strncmp((const char*)ad.raw_bytes, "Ducky", 5)) {
1014 app_id_info->appsegtype = APPSEGTYPE_DUCKY;
1015 app_id_info->app_type_name = "Ducky";
1016 sig_size = 5;
1018 else if(seg_type==0xe1 && seg_data_size>=6 && !de_strcmp(ad.app_id_normalized, "EXIF")) {
1019 app_id_info->appsegtype = APPSEGTYPE_EXIF;
1020 app_id_info->app_type_name = "Exif";
1021 // We arbitrarily consider the "padding byte" to be part of the signature.
1022 sig_size = 6;
1024 else if((seg_type==0xe1 || seg_type==0xe3) && ad.nraw_bytes>=14 &&
1025 !de_memcmp(ad.raw_bytes, "Meta\0\0", 6) &&
1026 (ad.raw_bytes[6]=='I' || ad.raw_bytes[6]=='M'))
1028 // This seems to be some Kodak imitation of an Exif segment.
1029 // ExifTool says APP3, but all I've seen is APP1.
1030 app_id_info->appsegtype = APPSEGTYPE_META;
1031 app_id_info->app_type_name = "Meta";
1033 else if(seg_type==0xe2 && !de_strcmp(ad.app_id_normalized, "ICC_PROFILE")) {
1034 app_id_info->appsegtype = APPSEGTYPE_ICC_PROFILE;
1035 app_id_info->app_type_name = "ICC profile";
1037 else if(seg_type==0xe2 && !de_strcmp(ad.app_id_normalized, "FPXR")) {
1038 app_id_info->appsegtype = APPSEGTYPE_FPXR;
1039 app_id_info->app_type_name = "Exif Flashpix Ready";
1041 else if(seg_type==0xe8 && !de_strcmp(ad.app_id_normalized, "SPIFF")) {
1042 app_id_info->appsegtype = APPSEGTYPE_SPIFF;
1043 app_id_info->app_type_name = "SPIFF";
1045 else if(seg_type==0xed && !de_strcmp(ad.app_id_normalized, "PHOTOSHOP 3.0")) {
1046 app_id_info->appsegtype = APPSEGTYPE_PHOTOSHOP;
1047 app_id_info->app_type_name = "Photoshop resources";
1049 else if(seg_type==0xe1 && !de_strcmp(ad.app_id_normalized, "HTTP://NS.ADOBE.COM/XAP/1.0/")) {
1050 app_id_info->appsegtype = APPSEGTYPE_XMP;
1051 app_id_info->app_type_name = "XMP";
1053 else if(seg_type==0xe1 && ad.nraw_bytes>=32 && !de_memcmp(ad.raw_bytes, "<?xpacket begin=", 16)) {
1054 // I have a few files like this, that are missing the XMP signature.
1055 app_id_info->appsegtype = APPSEGTYPE_XMP;
1056 app_id_info->app_type_name = "XMP";
1057 sig_size = 0;
1059 else if(seg_type==0xe1 && !de_strcmp(ad.app_id_normalized, "HTTP://NS.ADOBE.COM/XMP/EXTENSION/")) {
1060 app_id_info->appsegtype = APPSEGTYPE_XMP_EXTENSION;
1061 app_id_info->app_type_name = "XMP extension";
1063 else if(seg_type==0xeb && ad.nraw_bytes>=10 && !de_strncmp((const char*)ad.raw_bytes, "HDR_RI ver", 10)) {
1064 app_id_info->appsegtype = APPSEGTYPE_HDR_RI_VER;
1065 app_id_info->app_type_name = "JPEG-HDR Ver";
1067 else if(seg_type==0xeb && ad.nraw_bytes>=10 && !de_strncmp((const char*)ad.raw_bytes, "HDR_RI ext", 10)) {
1068 app_id_info->appsegtype = APPSEGTYPE_HDR_RI_EXT;
1069 app_id_info->app_type_name = "JPEG-HDR Ext";
1071 else if(seg_type==0xeb && ad.nraw_bytes>=2 && !de_strncmp((const char*)ad.raw_bytes, "JP", 2)) {
1072 app_id_info->appsegtype = APPSEGTYPE_JPEGXT;
1073 app_id_info->app_type_name = "JPEG XT";
1074 sig_size = 2;
1076 else if(seg_type==0xe2 && !de_strcmp(ad.app_id_normalized, "MPF")) {
1077 app_id_info->appsegtype = APPSEGTYPE_MPF;
1078 app_id_info->app_type_name = "Multi-Picture Format";
1080 else if(seg_type==0xe3 && ad.nraw_bytes>=8 && !de_strncmp((const char*)ad.raw_bytes, "_JPSJPS_", 8)) {
1081 // This signature is not NUL terminated.
1082 app_id_info->appsegtype = APPSEGTYPE_JPS;
1083 app_id_info->app_type_name = "JPS";
1084 sig_size = 8;
1086 else if(seg_type==0xea && seg_data_size>=6 && !de_strcmp(ad.app_id_normalized, "AROT")) {
1087 app_id_info->appsegtype = APPSEGTYPE_AROT;
1088 app_id_info->app_type_name = "Apple absolute rotational angle delta";
1089 // Guessing that there's a "padding byte" that's part of the signature.
1090 sig_size = 6;
1092 else if(seg_type==0xe1 && !de_strcmp(ad.app_id_orig, "Deark_MSRGBA")) {
1093 app_id_info->appsegtype = APPSEGTYPE_MSRGBA;
1094 app_id_info->app_type_name = "RGBA JPEG headers from Thumbs.db";
1096 else if(seg_type==0xe1 && !de_strcmp(ad.app_id_orig, "Deark_RB_swap")) {
1097 app_id_info->appsegtype = APPSEGTYPE_RBSWAP;
1098 app_id_info->app_type_name = "Flag for swapped red/blue";
1101 done:
1102 app_id_info->payload_pos = seg_data_pos + sig_size;
1105 // seg_size is the data size, excluding the marker and length fields.
1106 static void handler_app(deark *c, lctx *d,
1107 const struct marker_info *mi, i64 seg_data_pos, i64 seg_data_size)
1109 int appsegtype;
1110 i64 payload_pos;
1111 i64 payload_size;
1112 struct app_id_info_struct app_id_info;
1114 de_zeromem(&app_id_info, sizeof(struct app_id_info_struct));
1116 detect_app_seg_type(c, d, mi, seg_data_pos, seg_data_size, &app_id_info);
1117 appsegtype = app_id_info.appsegtype;
1118 payload_pos = app_id_info.payload_pos;
1119 if(app_id_info.app_id_found) {
1120 de_dbg(c, "app id: \"%s\", identified as: %s", ucstring_getpsz(app_id_info.app_id_str),
1121 app_id_info.app_type_name);
1123 else {
1124 de_dbg(c, "app id: (not found), identified as: %s", app_id_info.app_type_name);
1127 payload_size = seg_data_pos + seg_data_size - payload_pos;
1128 if(payload_size<0) goto done;
1130 switch(appsegtype) {
1131 case APPSEGTYPE_JFIF:
1132 do_jfif_segment(c, d, payload_pos, payload_size);
1133 break;
1134 case APPSEGTYPE_JFXX:
1135 do_jfxx_segment(c, d, payload_pos, payload_size);
1136 break;
1137 case APPSEGTYPE_ADOBEAPP14:
1138 do_adobeapp14_segment(c, d, payload_pos, payload_size);
1139 break;
1140 case APPSEGTYPE_EXIF:
1141 do_exif_segment(c, d, payload_pos, payload_size);
1142 break;
1143 case APPSEGTYPE_META:
1144 do_meta_segment(c, d, payload_pos, payload_size);
1145 break;
1146 case APPSEGTYPE_ICC_PROFILE:
1147 do_icc_profile_segment(c, d, payload_pos, payload_size);
1148 break;
1149 case APPSEGTYPE_FPXR:
1150 d->has_flashpix = 1;
1151 do_fpxr_segment(c, d, payload_pos, payload_size);
1152 break;
1153 case APPSEGTYPE_SPIFF:
1154 d->has_spiff_seg = 1;
1155 break;
1156 case APPSEGTYPE_PHOTOSHOP:
1157 do_photoshop_segment(c, d, payload_pos, payload_size);
1158 break;
1159 case APPSEGTYPE_DUCKY:
1160 do_ducky_segment(c, d, payload_pos, payload_size);
1161 break;
1162 case APPSEGTYPE_XMP:
1163 de_dbg(c, "XMP data at %d, size=%d", (int)(payload_pos), (int)(payload_size));
1164 d->has_xmp = 1;
1165 dbuf_create_file_from_slice(c->infile, payload_pos, payload_size, "xmp", NULL, DE_CREATEFLAG_IS_AUX);
1166 break;
1167 case APPSEGTYPE_XMP_EXTENSION:
1168 d->has_xmp_ext = 1;
1169 do_xmp_extension_segment(c, d, payload_pos, payload_size);
1170 break;
1171 case APPSEGTYPE_HDR_RI_VER:
1172 d->is_jpeghdr = 1;
1173 do_jpeghdr_segment(c, d, seg_data_pos, seg_data_size, 0);
1174 break;
1175 case APPSEGTYPE_HDR_RI_EXT:
1176 do_jpeghdr_segment(c, d, seg_data_pos, seg_data_size, 1);
1177 break;
1178 case APPSEGTYPE_JPEGXT:
1179 d->is_jpegxt = 1;
1180 do_jpegxt_segment(c, d, payload_pos, payload_size);
1181 break;
1182 case APPSEGTYPE_MPF:
1183 do_mpf_segment(c, d, payload_pos, payload_size);
1184 break;
1185 case APPSEGTYPE_JPS:
1186 d->is_jps = 1;
1187 do_jps_segment(c, d, payload_pos, payload_size);
1188 break;
1189 case APPSEGTYPE_AROT:
1190 do_arot_segment(c, d, payload_pos, payload_size);
1191 break;
1192 default:
1193 if(c->debug_level>=2) {
1194 de_dbg_hexdump(c, c->infile, seg_data_pos, seg_data_size, 256, "segment data", 0x1);
1196 break;
1199 done:
1200 if(app_id_info.app_id_str) {
1201 ucstring_destroy(app_id_info.app_id_str);
1205 static void handler_jpg8(deark *c, lctx *d,
1206 const struct marker_info *mi, i64 seg_data_pos, i64 seg_data_size)
1208 u8 id;
1209 const char *name = "?";
1211 if(seg_data_size<1) return;
1212 id = de_getbyte(seg_data_pos);
1213 if(id==0x0d) {
1214 d->has_revcolorxform = 1;
1215 name="inverse color transform specification";
1217 de_dbg(c, "id: 0x%02x (%s)", (unsigned int)id, name);
1220 static void declare_jpeg_fmt(deark *c, lctx *d, u8 seg_type)
1222 const char *name = "JPEG (other)";
1224 // The declared format is only an executive summary of the kind of JPEG.
1225 // It does not come close to covering all possible combinations of attributes.
1226 // (The "summary:" line goes a bit further.)
1227 if(d->is_jpegls) { name = "JPEG-LS"; }
1228 else if(d->is_mpo) { name = "JPEG/MPO"; }
1229 else if(d->is_jps) { name = "JPEG/JPS"; }
1230 else if(d->is_jpegxt) { name = "JPEG/JPEG_XT"; }
1231 else if(d->is_jpeghdr) { name = "JPEG-HDR"; }
1232 else if(d->is_lossless) { name = "JPEG/lossless"; }
1233 else if(d->has_jfif_seg && d->has_exif_seg) { name = "JPEG/JFIF+Exif"; }
1234 else if(d->has_jfif_seg) { name = "JPEG/JFIF"; }
1235 else if(d->has_exif_seg) { name = "JPEG/Exif"; }
1236 de_declare_fmt(c, name);
1239 static void handler_sof(deark *c, lctx *d,
1240 const struct marker_info *mi, i64 pos, i64 data_size)
1242 i64 w, h;
1243 u8 b;
1244 i64 i;
1245 const char *attr_lossy = "DCT";
1246 const char *attr_cmpr = "huffman";
1247 const char *attr_progr = "non-progr.";
1248 const char *attr_hier = "non-hier.";
1249 u8 seg_type = mi->seg_type;
1251 if(data_size<6) return;
1253 if(d->fpxr_data) {
1254 finalize_all_fpxr_streams(c, d);
1257 if(seg_type>=0xc1 && seg_type<=0xcf && (seg_type%4)!=0) {
1258 if((seg_type%4)==3) { d->is_lossless=1; attr_lossy="lossless"; }
1259 if(seg_type%16>=9) { d->is_arithmetic=1; attr_cmpr="arithmetic"; }
1260 if((seg_type%4)==2) { d->is_progressive=1; attr_progr="progressive"; }
1261 if((seg_type%8)>=5) { d->is_hierarchical=1; attr_hier="hierarchical"; }
1262 de_dbg(c, "image type: %s, %s, %s, %s",
1263 attr_lossy, attr_cmpr, attr_progr, attr_hier);
1265 else if(seg_type==0xc0) {
1266 d->is_baseline = 1;
1267 de_dbg(c, "image type: baseline (%s, %s, %s, %s)",
1268 attr_lossy, attr_cmpr, attr_progr, attr_hier);
1270 else if(seg_type==0xf7) {
1271 de_dbg(c, "image type: JPEG-LS");
1274 // By now we have hopefully collected the info we need to decide what JPEG
1275 // format we're dealing with.
1276 declare_jpeg_fmt(c, d, seg_type);
1278 d->precision = de_getbyte(pos);
1279 de_dbg(c, "precision: %d", (int)d->precision);
1280 h = de_getu16be(pos+1);
1281 w = de_getu16be(pos+3);
1282 de_dbg_dimensions(c, w, h);
1283 d->ncomp = (i64)de_getbyte(pos+5);
1284 de_dbg(c, "number of components: %d", (int)d->ncomp);
1286 // per-component data
1287 if(data_size<6+3*d->ncomp) goto done;
1288 for(i=0; i<d->ncomp; i++) {
1289 u8 comp_id;
1290 i64 sf1, sf2;
1291 u8 qtid;
1292 comp_id = de_getbyte(pos+6+3*i+0);
1293 b = de_getbyte(pos+6+3*i+1);
1294 sf1 = (i64)(b>>4);
1295 sf2 = (i64)(b&0x0f);
1296 if(sf1!=1 || sf2!=1) d->is_subsampled = 1;
1297 ucstring_printf(d->sampling_code, DE_ENCODING_LATIN1, "%d%d", (int)sf1, (int)sf2);
1298 qtid = de_getbyte(pos+6+3*i+2);
1299 de_dbg(c, "cmp #%d: id=%d sampling=%d"DE_CHAR_TIMES"%d quant_table=Q%d",
1300 (int)i, (int)comp_id, (int)sf1, (int)sf2, (int)qtid);
1303 done:
1307 static void handler_dri(deark *c, lctx *d,
1308 const struct marker_info *mi, i64 pos, i64 data_size)
1310 i64 ri;
1311 if(data_size!=2) return;
1312 ri = de_getu16be(pos);
1313 de_dbg(c, "restart interval: %d", (int)ri);
1314 if(ri!=0) d->has_restart_markers = 1;
1317 static void dump_htable_summary(deark *c, lctx *d, const u8 *codecounts)
1319 i64 k;
1320 de_ucstring *s = NULL;
1322 if(c->debug_level<2) return;
1324 s = ucstring_create(c);
1325 for(k=0; k<16; k++) {
1326 ucstring_printf(s, DE_ENCODING_LATIN1, " %3u",
1327 (unsigned int)codecounts[k]);
1328 if(k%8==7) { // end of a debug line
1329 de_dbg(c, "number of codes of len[%d-%2d]:%s",
1330 (int)(k-7+1), (int)(k+1),
1331 ucstring_getpsz(s));
1332 ucstring_empty(s);
1335 ucstring_destroy(s);
1338 // Just because we can, derive and display the Huffman code table (at
1339 // sufficiently high debug levels).
1340 static void dump_htable_details(deark *c, lctx *d, i64 pos1, const u8 *codecounts,
1341 i64 num_huff_codes)
1343 struct fmtutil_huffman_decoder *ht = NULL;
1344 UI symlen;
1345 i64 pos = pos1;
1347 // TODO: Is there any case (e.g. lossless) where >162 codes are allowed?
1348 if(c->debug_level>=3 && (num_huff_codes>=1 && num_huff_codes<=162)) {
1351 else {
1352 return;
1355 ht = fmtutil_huffman_create_decoder(c, 0, 0);
1357 // Note: Per the JPEG spec, "the all-1-bits code word of any length is
1358 // reserved as a prefix for longer code words". So we should not expect to
1359 // see such a code in the derived codebook.
1361 for(symlen=1; symlen<=16; symlen++) {
1362 UI num_syms_of_this_length;
1363 UI k;
1365 num_syms_of_this_length = (UI)codecounts[symlen-1];
1366 for(k=0; k<num_syms_of_this_length; k++) {
1367 u8 sym;
1369 sym = de_getbyte_p(&pos);
1370 fmtutil_huffman_record_a_code_length(c, ht->builder, (fmtutil_huffman_valtype)sym, symlen);
1374 // We do this only for the side effect of the debug messages.
1375 (void)fmtutil_huffman_make_canonical_code(c, ht->bk, ht->builder, 0);
1377 fmtutil_huffman_destroy_decoder(c, ht);
1380 static void handler_dht(deark *c, lctx *d,
1381 const struct marker_info *mi, i64 pos1, i64 data_size)
1383 i64 pos = pos1;
1384 u8 b;
1385 u8 table_class;
1386 u8 table_id;
1387 i64 num_huff_codes;
1388 i64 k;
1389 u8 codecounts[16];
1391 while(1) {
1392 if(pos >= pos1+data_size) goto done;
1394 b = de_getbyte(pos);
1395 table_class = b>>4;
1396 table_id = b&0x0f;
1397 de_dbg(c, "table: %s%d, at %d", table_class==0?"DC":"AC",
1398 (int)table_id, (int)pos);
1400 de_read(codecounts, pos+1, 16);
1401 num_huff_codes = 0;
1402 for(k=0; k<16; k++) {
1403 num_huff_codes += (i64)codecounts[k];
1405 de_dbg_indent(c, 1);
1406 dump_htable_summary(c, d, codecounts);
1407 de_dbg(c, "number of codes: %d", (int)num_huff_codes);
1408 pos += 1 + 16;
1409 dump_htable_details(c, d, pos, codecounts, num_huff_codes);
1410 de_dbg_indent(c, -1);
1411 pos += num_huff_codes;
1414 done:
1418 // DAC = Define arithmetic coding conditioning
1419 static void handler_dac(deark *c, lctx *d,
1420 const struct marker_info *mi, i64 pos1, i64 data_size)
1422 i64 ntables;
1423 i64 i;
1424 u8 b;
1425 u8 cs;
1426 u8 table_class;
1427 u8 table_id;
1429 ntables = data_size/2;
1430 for(i=0; i<ntables; i++) {
1431 b = de_getbyte(pos1+i*2);
1432 table_class = b>>4;
1433 table_id = b&0x0f;
1434 de_dbg(c, "table: %s%u", table_class==0?"DC":"AC",
1435 (unsigned int)table_id);
1436 cs = de_getbyte(pos1+i*2+1);
1437 de_dbg_indent(c, 1);
1438 de_dbg(c, "conditioning value: %d", (int)cs);
1439 de_dbg_indent(c, -1);
1443 static void dump_qtable_data(deark *c, lctx *d, i64 pos, u8 precision_code)
1445 u8 qbuf[64];
1446 i64 k;
1447 de_ucstring *s = NULL;
1448 static const u8 zigzag[64] = {
1449 0, 1, 5, 6,14,15,27,28,
1450 2, 4, 7,13,16,26,29,42,
1451 3, 8,12,17,25,30,41,43,
1452 9,11,18,24,31,40,44,53,
1453 10,19,23,32,39,45,52,54,
1454 20,22,33,38,46,51,55,60,
1455 21,34,37,47,50,56,59,61,
1456 35,36,48,49,57,58,62,63
1459 if(c->debug_level<2) return;
1460 if(precision_code!=0) return;
1462 de_read(qbuf, pos, 64);
1463 s = ucstring_create(c);
1464 for(k=0; k<64; k++) {
1465 ucstring_printf(s, DE_ENCODING_LATIN1, " %3u",
1466 (unsigned int)qbuf[(unsigned int)zigzag[k]]);
1467 if(k%8==7) { // end of a debug line
1468 de_dbg(c, "data:%s", ucstring_getpsz(s));
1469 ucstring_empty(s);
1472 ucstring_destroy(s);
1475 static void handler_dqt(deark *c, lctx *d,
1476 const struct marker_info *mi, i64 pos1, i64 data_size)
1478 i64 pos = pos1;
1479 u8 b;
1480 u8 precision_code;
1481 u8 table_id;
1482 i64 qsize;
1483 const char *s;
1485 while(1) {
1486 if(pos >= pos1+data_size) goto done;
1488 b = de_getbyte(pos);
1489 precision_code = b>>4;
1490 table_id = b&0x0f;
1491 if(precision_code==0) {
1492 s="8-bit";
1493 qsize = 64;
1495 else if(precision_code==1) {
1496 s="16-bit";
1497 qsize = 128;
1499 else {
1500 s="?";
1501 qsize = 0;
1503 de_dbg(c, "table: Q%d, at %d", table_id, (int)pos);
1505 de_dbg_indent(c, 1);
1506 de_dbg(c, "precision: %d (%s)", (int)precision_code, s);
1507 dump_qtable_data(c, d, pos+1, precision_code);
1508 de_dbg_indent(c, -1);
1510 if(qsize==0) goto done;
1512 pos += 1 + qsize;
1515 done:
1519 static void handle_comment(deark *c, lctx *d, i64 pos, i64 comment_size,
1520 de_encoding encoding_base)
1522 de_ucstring *s = NULL;
1523 de_ext_encoding encoding_ext;
1524 int write_to_file;
1526 // If c->extract_level>=2, write the comment to a file;
1527 // otherwise if we have debugging output, write (at least part of) it
1528 // to the debug output;
1529 // otherwise do nothing.
1531 if(c->extract_level<2 && c->debug_level<1) return;
1532 if(comment_size<1) return;
1534 write_to_file = (c->extract_level>=2);
1536 if(write_to_file && encoding_base==DE_ENCODING_UNKNOWN) {
1537 // If we don't know the encoding, dump the raw bytes to a file.
1538 dbuf_create_file_from_slice(c->infile, pos, comment_size, "comment.txt",
1539 NULL, DE_CREATEFLAG_IS_AUX);
1540 goto done;
1543 if(encoding_base==DE_ENCODING_UNKNOWN) {
1544 // In this case, we're printing the comment in the debug info.
1545 // If we don't know the encoding, pretend it's ASCII-like.
1546 encoding_ext = DE_EXTENC_MAKE(DE_ENCODING_ASCII, DE_ENCSUBTYPE_PRINTABLE);
1548 else {
1549 encoding_ext = encoding_base;
1552 s = ucstring_create(c);
1553 dbuf_read_to_ucstring(c->infile, pos, comment_size, s, 0, encoding_ext);
1555 if(write_to_file) {
1556 dbuf *outf = NULL;
1557 outf = dbuf_create_output_file(c, "comment.txt", NULL, DE_CREATEFLAG_IS_AUX);
1558 ucstring_write_as_utf8(c, s, outf, 1);
1559 dbuf_close(outf);
1561 else {
1562 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(s));
1565 done:
1566 ucstring_destroy(s);
1569 static void handler_com(deark *c, lctx *d,
1570 const struct marker_info *mi, i64 pos, i64 data_size)
1572 // Note that a JPEG COM-segment comment is an arbitrary sequence of bytes, so
1573 // there's no way to know what text encoding it uses, or even whether it is text.
1574 // We'll use the user's "-inenc" encoding, or DE_ENCODING_UNKNOWN by default.
1575 handle_comment(c, d, pos, data_size, c->input_encoding);
1578 static void handler_sos(deark *c, lctx *d,
1579 const struct marker_info *mi, i64 pos, i64 data_size)
1581 i64 ncomp;
1582 i64 i;
1583 u8 cs;
1584 u8 b;
1585 u8 ss, se, ax;
1586 u8 actable, dctable;
1588 if(data_size<1) goto done;
1590 d->scan_count++;
1591 ncomp = (i64)de_getbyte(pos);
1592 de_dbg(c, "number of components in scan: %d", (int)ncomp);
1593 if(data_size < 4 + 2*ncomp) goto done;
1595 for(i=0; i<ncomp; i++) {
1596 cs = de_getbyte(pos+1+i*2);
1597 de_dbg(c, "component #%d id: %d", (int)i, (int)cs);
1598 de_dbg_indent(c, 1);
1599 b = de_getbyte(pos+1+i*2+1);
1600 dctable = b>>4;
1601 actable = b&0x0f;
1602 de_dbg(c, "tables to use: DC%d, AC%d", (int)dctable, (int)actable);
1603 de_dbg_indent(c, -1);
1606 ss = de_getbyte(pos+1+ncomp*2);
1607 se = de_getbyte(pos+1+ncomp*2+1);
1608 ax = de_getbyte(pos+1+ncomp*2+2);
1609 de_dbg(c, "spectral selection start/end: %d, %d", (int)ss, (int)se);
1610 de_dbg(c, "successive approx. bit pos high/low: %u, %u",
1611 (unsigned int)(ax>>4), (unsigned int)(ax&0x0f));
1613 done:
1617 static const struct marker_info1 marker_info1_arr[] = {
1618 {0x01, 0x0101, "TEM", NULL, NULL},
1619 {0xc4, 0x0001, "DHT", "Define Huffman table", handler_dht},
1620 {0xc8, 0x0201, "JPG", NULL, handler_sof},
1621 {0xcc, 0x0001, "DAC", "Define arithmetic coding conditioning", handler_dac},
1622 {0xd8, 0x0103, "SOI", "Start of image", NULL},
1623 {0xd9, 0x0103, "EOI", "End of image", NULL},
1624 {0xda, 0x0003, "SOS", "Start of scan", handler_sos},
1625 {0xdb, 0x0001, "DQT", "Define quantization table", handler_dqt},
1626 {0xdc, 0x0001, "DNL", "Define number of lines", NULL},
1627 {0xdd, 0x0003, "DRI", "Define restart interval", handler_dri},
1628 {0xde, 0x0001, "DHP", "Define hierarchical progression", NULL},
1629 {0xdf, 0x0001, "EXP", "Expand reference component", NULL},
1630 {0xf7, 0x0202, "SOF55", "JPEG-LS start of frame", handler_sof},
1631 {0xf8, 0x0001, "JPG8", NULL, handler_jpg8},
1632 {0xf8, 0x0002, "LSE", "JPEG-LS preset parameters", NULL},
1633 {0xfe, 0x0003, "COM", "Comment", handler_com}
1636 // Caller allocates mi
1637 static int get_marker_info(deark *c, lctx *d, u8 seg_type,
1638 struct marker_info *mi)
1640 i64 k;
1642 de_zeromem(mi, sizeof(struct marker_info));
1643 mi->seg_type = seg_type;
1645 // First, try to find the segment type in the static marker info.
1646 for(k=0; k<(i64)DE_ARRAYCOUNT(marker_info1_arr); k++) {
1647 const struct marker_info1 *mi1 = &marker_info1_arr[k];
1649 if(!d->is_jpegls && !(mi1->flags&FLAG_JPEG_COMPAT)) continue;
1650 if(d->is_jpegls && !(mi1->flags&FLAG_JPEGLS_COMPAT)) continue;
1652 if(mi1->seg_type == seg_type) {
1653 mi->flags = mi1->flags;
1654 mi->hfn = mi1->hfn;
1655 de_strlcpy(mi->shortname, mi1->shortname, sizeof(mi->shortname));
1656 if(mi1->longname) {
1657 de_snprintf(mi->longname, sizeof(mi->longname), "%s: %s",
1658 mi1->shortname, mi1->longname);
1660 goto done;
1664 // Handle some pattern-based markers.
1666 if(seg_type>=0xe0 && seg_type<=0xef) {
1667 de_snprintf(mi->shortname, sizeof(mi->shortname), "APP%d", (int)(seg_type-0xe0));
1668 mi->hfn = handler_app;
1669 goto done;
1672 if(seg_type>=0xc0 && seg_type<=0xcf) {
1673 de_snprintf(mi->shortname, sizeof(mi->shortname), "SOF%d", (int)(seg_type-0xc0));
1674 de_snprintf(mi->longname, sizeof(mi->longname), "%s: Start of frame", mi->shortname);
1675 mi->flags |= FLAG_IS_SOF;
1676 mi->hfn = handler_sof;
1677 goto done;
1680 if(seg_type>=0xd0 && seg_type<=0xd7) {
1681 int rstn = (int)(seg_type-0xd0);
1682 de_snprintf(mi->shortname, sizeof(mi->shortname), "RST%d", rstn);
1683 de_snprintf(mi->longname, sizeof(mi->longname), "%s: Restart with mod 8 count %d",
1684 mi->shortname, rstn);
1685 mi->flags |= FLAG_NO_DATA;
1686 goto done;
1689 if(seg_type>=0xf0 && seg_type<=0xfd) {
1690 de_snprintf(mi->shortname, sizeof(mi->shortname), "JPG%d", (int)(seg_type-0xf0));
1691 goto done;
1694 de_strlcpy(mi->shortname, "???", sizeof(mi->shortname));
1695 de_strlcpy(mi->longname, "???", sizeof(mi->longname));
1696 return 0;
1698 done:
1699 if(!mi->longname[0]) {
1700 // If no longname was set, use the shortname
1701 de_strlcpy(mi->longname, mi->shortname, sizeof(mi->longname));
1703 return 1;
1706 static void do_segment(deark *c, lctx *d, const struct marker_info *mi,
1707 i64 payload_pos, i64 payload_size)
1709 de_dbg(c, "segment 0x%02x (%s) at %d, dpos=%d, dlen=%d",
1710 (unsigned int)mi->seg_type, mi->longname, (int)(payload_pos-4),
1711 (int)payload_pos, (int)payload_size);
1713 if(mi->hfn) {
1714 // If a handler function is available, use it.
1715 de_dbg_indent(c, 1);
1716 mi->hfn(c, d, mi, payload_pos, payload_size);
1717 de_dbg_indent(c, -1);
1721 // TODO: This is very similar to detect_jpeg_len().
1722 // Maybe they should be consolidated.
1723 static int do_read_scan_data(deark *c, lctx *d,
1724 i64 pos1, i64 *bytes_consumed)
1726 i64 pos = pos1;
1727 u8 b0, b1;
1728 struct marker_info mi;
1730 *bytes_consumed = c->infile->len - pos1; // default
1731 de_dbg(c, "scan data at %d", (int)pos1);
1733 de_dbg_indent(c, 1);
1735 while(1) {
1736 if(pos >= c->infile->len) goto done;
1737 b0 = de_getbyte_p(&pos);
1738 if(b0==0xff) {
1739 b1 = de_getbyte_p(&pos);
1740 if(b1==0x00) {
1741 ; // an escaped 0xff
1743 else if(d->is_jpegls && b1<0x80) {
1744 // In JPEG-LS, 0xff bytes are not escaped if they're followed by a
1745 // a byte less than 0x80.
1748 else if(b1>=0xd0 && b1<=0xd7) { // an RSTn marker
1749 if(c->debug_level>=2) {
1750 get_marker_info(c, d, b1, &mi);
1751 de_dbg2(c, "marker 0x%02x (%s) at %d", (unsigned int)b1,
1752 mi.longname, (int)(pos-2));
1755 else if(b1==0xff) { // a "fill byte" (are they allowed here?)
1756 pos--;
1758 else {
1759 // A marker that is not part of the scan.
1760 // Subtract the bytes consumed by it, and stop.
1761 pos -= 2;
1762 *bytes_consumed = pos - pos1;
1763 de_dbg(c, "end of scan data found at %d (len=%d)", (int)pos, (int)*bytes_consumed);
1764 break;
1769 done:
1770 de_dbg_indent(c, -1);
1771 return 1;
1774 // Caller supplies s[5].
1775 static void exif_version_to_string(u32 v, char *s)
1777 s[0] = de_byte_to_printable_char((u8)((v>>24)&0xff));
1778 s[1] = de_byte_to_printable_char((u8)((v>>16)&0xff));
1779 s[2] = de_byte_to_printable_char((u8)((v>>8)&0xff));
1780 s[3] = de_byte_to_printable_char((u8)(v&0xff));
1781 s[4] = '\0';
1784 // Print a summary line indicating the main characteristics of this image.
1785 static void print_summary(deark *c, lctx *d)
1787 de_ucstring *summary = NULL;
1789 if(d->is_jpegls) goto done;
1790 if(!d->found_sof) goto done;
1791 if(!d->found_soi) goto done;
1793 summary = ucstring_create(c);
1795 if(d->is_baseline) ucstring_append_sz(summary, " baseline", DE_ENCODING_LATIN1);
1796 if(d->is_lossless) ucstring_append_sz(summary, " lossless", DE_ENCODING_LATIN1);
1797 if(d->is_progressive) ucstring_append_sz(summary, " progressive", DE_ENCODING_LATIN1);
1798 if(d->is_arithmetic) ucstring_append_sz(summary, " arithmetic", DE_ENCODING_LATIN1);
1799 if(d->is_hierarchical) ucstring_append_sz(summary, " hierarchical", DE_ENCODING_LATIN1);
1800 ucstring_printf(summary, DE_ENCODING_LATIN1, " cmpts=%d", (int)d->ncomp);
1801 if(d->is_subsampled) {
1802 // The subsampling type code printed here is not the standard way to denote
1803 // subsampling, but the standard notation is incomprehensible, and doesn't
1804 // cover all the possible cases.
1805 ucstring_append_sz(summary, " subsampling=", DE_ENCODING_LATIN1);
1806 ucstring_append_ucstring(summary, d->sampling_code);
1808 ucstring_printf(summary, DE_ENCODING_LATIN1, " bits=%d", (int)d->precision);
1810 if(d->has_restart_markers) ucstring_append_sz(summary, " rst", DE_ENCODING_LATIN1);
1811 if(d->has_jfif_seg) {
1812 ucstring_printf(summary, DE_ENCODING_LATIN1, " JFIF=%u.%02u",
1813 (unsigned int)d->jfif_ver_h, (unsigned int)d->jfif_ver_l);
1815 if(d->has_spiff_seg) ucstring_append_sz(summary, " SPIFF", DE_ENCODING_LATIN1);
1816 if(d->has_exif_seg) {
1817 ucstring_append_sz(summary, " Exif", DE_ENCODING_LATIN1);
1818 if(d->exif_version_as_uint32!=0) {
1819 char tmps[5];
1820 exif_version_to_string(d->exif_version_as_uint32, tmps);
1821 ucstring_printf(summary, DE_ENCODING_LATIN1, "=%s", tmps);
1824 if(d->has_adobeapp14)
1825 ucstring_printf(summary, DE_ENCODING_LATIN1, " colorxform=%d", (int)d->color_transform);
1826 if(d->has_revcolorxform) ucstring_append_sz(summary, " rev-colorxform", DE_ENCODING_LATIN1);
1828 if(d->has_jfif_thumb) ucstring_append_sz(summary, " JFIFthumbnail", DE_ENCODING_LATIN1);
1829 if(d->has_jfxx_seg) ucstring_append_sz(summary, " JFXX", DE_ENCODING_LATIN1);
1830 if(d->has_flashpix) ucstring_append_sz(summary, " FlashPix", DE_ENCODING_LATIN1);
1831 if(d->is_jpeghdr) ucstring_append_sz(summary, " HDR", DE_ENCODING_LATIN1);
1832 if(d->is_jpegxt) ucstring_append_sz(summary, " XT", DE_ENCODING_LATIN1);
1833 if(d->has_mpf_seg) ucstring_append_sz(summary, " MPO", DE_ENCODING_LATIN1);
1834 if(d->is_jps) ucstring_append_sz(summary, " JPS", DE_ENCODING_LATIN1);
1835 if(d->has_iccprofile) ucstring_append_sz(summary, " ICC", DE_ENCODING_LATIN1);
1836 if(d->has_xmp) ucstring_append_sz(summary, " XMP", DE_ENCODING_LATIN1);
1837 if(d->has_xmp_ext) ucstring_append_sz(summary, " XMPext", DE_ENCODING_LATIN1);
1838 if(d->has_psd) ucstring_append_sz(summary, " PSD", DE_ENCODING_LATIN1);
1839 if(d->has_iptc) ucstring_append_sz(summary, " IPTC", DE_ENCODING_LATIN1);
1840 if(d->has_exif_gps) ucstring_append_sz(summary, " GPS", DE_ENCODING_LATIN1);
1841 if(d->has_afcp) ucstring_append_sz(summary, " AFCP", DE_ENCODING_LATIN1);
1843 if(d->scan_count!=1) ucstring_printf(summary, DE_ENCODING_LATIN1, " scans=%d", d->scan_count);
1845 de_dbg(c, "summary:%s", ucstring_getpsz(summary));
1847 done:
1848 ucstring_destroy(summary);
1851 static void do_post_sof_stuff(deark *c, lctx *d)
1853 if(d->is_jpegls) return;
1855 if(d->has_jfif_seg && d->has_exif_seg && !d->exif_before_jfif &&
1856 (d->jfif_ver_h==1 && (d->jfif_ver_l==1 || d->jfif_ver_l==2)))
1858 if(d->exif_orientation>1) {
1859 de_dbg(c, "Note: Image has an ambiguous orientation: JFIF says "
1860 "%s; Exif says %s",
1861 fmtutil_tiff_orientation_name(1),
1862 fmtutil_tiff_orientation_name((i64)d->exif_orientation));
1865 if(d->exif_cosited && d->is_subsampled && d->ncomp>1) {
1866 de_dbg(c, "Note: Image has an ambiguous subsampling position: JFIF says "
1867 "centered; Exif says cosited");
1870 // TODO: Another thing we could check for is a significant conflict in
1871 // the JFIF and Exif density settings.
1875 // Tasks to do at the end of normal JPEG data (after we've found the EOI marker,
1876 // or an unexpected end of file).
1877 // This does not handle data that might exist after the EOI; that might still
1878 // be read later.
1879 static void do_at_end_of_jpeg(deark *c, lctx *d)
1881 dbuf_close(d->iccprofile_file);
1882 d->iccprofile_file = NULL;
1883 dbuf_close(d->hdr_residual_file);
1884 d->hdr_residual_file = NULL;
1885 destroy_fpxr_data(c, d);
1887 if(d->extxmp_membuf && !d->extxmp_error_flag) {
1888 dbuf *tmpdbuf = NULL;
1889 tmpdbuf = dbuf_create_output_file(c, "xmp", NULL, DE_CREATEFLAG_IS_AUX);
1890 dbuf_copy(d->extxmp_membuf, 0, d->extxmp_total_len, tmpdbuf);
1891 dbuf_close(tmpdbuf);
1893 dbuf_close(d->extxmp_membuf);
1894 d->extxmp_membuf = NULL;
1897 static void do_destroy_lctx(deark *c, lctx *d)
1899 if(!d) return;
1900 ucstring_destroy(d->sampling_code);
1901 de_free(c, d);
1904 // Process a single JPEG image (through the EOI marker).
1905 // Returns nonzero if EOI was found.
1906 static int do_jpeg_stream(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
1908 u8 b;
1909 i64 pos = pos1;
1910 i64 seg_size;
1911 u8 seg_type;
1912 int found_marker;
1913 struct marker_info mi;
1914 i64 scan_byte_count;
1915 int sof_count = 0;
1916 int retval = 0;
1918 d->sampling_code = ucstring_create(c);
1920 found_marker = 0;
1921 while(1) {
1922 if(pos>=c->infile->len)
1923 break;
1924 b = de_getbyte_p(&pos);
1925 if(b==0xff) {
1926 found_marker = 1;
1927 continue;
1930 if(!found_marker) {
1931 // Not an 0xff byte, and not preceded by an 0xff byte. Just ignore it.
1932 continue;
1935 found_marker = 0; // Reset this flag.
1937 if(b==0x00) {
1938 continue; // Escaped 0xff
1941 seg_type = b;
1943 if(seg_type==0xf7 && !d->found_sof) {
1944 d->is_jpegls = 1;
1947 get_marker_info(c, d, seg_type, &mi);
1949 if(mi.flags & FLAG_IS_SOF) {
1950 d->found_sof = 1;
1953 if(mi.flags & FLAG_NO_DATA) {
1954 de_dbg(c, "marker 0x%02x (%s) at %d", (unsigned int)seg_type,
1955 mi.longname, (int)(pos-2));
1957 if(seg_type==0xd9) { // EOI
1958 retval = 1;
1959 goto done;
1962 if(seg_type==0xd8 && !d->found_soi) {
1963 d->found_soi = 1;
1966 continue;
1969 // If we get here, we're reading a segment that has a size field.
1970 seg_size = de_getu16be(pos);
1971 if(pos<2) break; // bogus size
1973 do_segment(c, d, &mi, pos+2, seg_size-2);
1975 pos += seg_size;
1977 if(mi.flags & FLAG_IS_SOF) {
1978 if(sof_count==0) {
1979 do_post_sof_stuff(c, d);
1981 sof_count++;
1984 if(seg_type==0xda) {
1985 // If we read an SOS segment, now read the untagged image data that
1986 // should follow it.
1987 if(!do_read_scan_data(c, d, pos, &scan_byte_count)) {
1988 break;
1990 pos += scan_byte_count;
1994 done:
1995 do_at_end_of_jpeg(c, d);
1996 *bytes_consumed = pos - pos1;
1997 return retval;
2000 static void do_afcp_segment(deark *c, lctx *d, i64 endpos)
2002 d->has_afcp = 1;
2003 de_dbg(c, "AFCP segment found at end of file");
2005 de_dbg_indent(c, 1);
2006 de_run_module_by_id_on_slice(c, "afcp", NULL, c->infile, 0, endpos);
2007 de_dbg_indent(c, -1);
2010 static void de_run_jpeg(deark *c, de_module_params *mparams)
2012 i64 pos;
2013 i64 bytes_consumed;
2014 int retval_stream = 0;
2015 i64 foundpos;
2016 i64 extra_bytes_at_eof;
2017 i64 nbytes_to_scan;
2018 lctx *d = NULL;
2020 d = de_malloc(c, sizeof(lctx));
2021 pos = 0;
2022 bytes_consumed = 0;
2023 retval_stream = do_jpeg_stream(c, d, pos, &bytes_consumed);
2024 if(!retval_stream) goto done;
2025 pos += bytes_consumed;
2027 if(bytes_consumed<1) goto done;
2028 if(pos >= c->infile->len) goto done;
2030 if(c->module_nesting_level>1) goto done;
2031 extra_bytes_at_eof = c->infile->len - pos;
2032 if(extra_bytes_at_eof<4) goto done;
2034 if(extra_bytes_at_eof>=24) {
2035 u8 tbuf[12];
2037 de_read(tbuf, c->infile->len-12, sizeof(tbuf));
2038 if(tbuf[0]=='A' && tbuf[1]=='X' && tbuf[2]=='S' &&
2039 (tbuf[3]=='!' || tbuf[3]=='*'))
2041 do_afcp_segment(c, d, c->infile->len);
2042 goto done;
2046 if(d->has_mpf_seg) {
2047 // In this case, it is normal for there to be multiple JPEG streams,
2048 // and we should have already extracted the extras.
2049 goto done;
2052 nbytes_to_scan = de_min_int(extra_bytes_at_eof, 512);
2053 if(dbuf_search(c->infile, (const u8*)"\xff\xd8\xff", 3, pos,
2054 nbytes_to_scan, &foundpos))
2056 de_info(c, "Note: This file might contain multiple JPEG images. "
2057 "Use \"-m jpegscan\" to extract them.");
2058 goto done;
2061 if(dbuf_is_all_zeroes(c->infile, pos, extra_bytes_at_eof)) goto done;
2063 de_info(c, "Note: %"I64_FMT" bytes of unidentified data found at end "
2064 "of file (starting at %"I64_FMT").", extra_bytes_at_eof, pos);
2066 done:
2067 if(d) {
2068 if(retval_stream) {
2069 print_summary(c, d);
2071 do_destroy_lctx(c, d);
2075 typedef struct scanctx_struct {
2076 i64 len;
2077 u8 is_jpegls;
2078 } scanctx;
2080 static int detect_jpeg_len(deark *c, scanctx *d, i64 pos1, i64 len)
2082 u8 b0, b1;
2083 i64 pos;
2084 i64 seg_size;
2085 int in_scan = 0;
2086 int found_sof = 0;
2087 int found_scan = 0;
2089 d->len = 0;
2090 d->is_jpegls = 0;
2091 pos = pos1;
2093 while(1) {
2094 if(pos>=pos1+len)
2095 break;
2096 b0 = de_getbyte(pos);
2098 if(b0!=0xff) {
2099 pos++;
2100 continue;
2103 // Peek at the next byte (after this 0xff byte).
2104 b1 = de_getbyte(pos+1);
2106 if(b1==0xff) {
2107 // A "fill byte", not a marker.
2108 pos++;
2109 continue;
2111 else if(b1==0x00 || (d->is_jpegls && b1<0x80 && in_scan)) {
2112 // An escape sequence, not a marker.
2113 pos+=2;
2114 continue;
2116 else if(b1==0xd9) { // EOI. That's what we're looking for.
2117 if(!found_sof || !found_scan) return 0;
2118 pos+=2;
2119 d->len = pos-pos1;
2120 return 1;
2122 else if(b1==0xf7) {
2123 de_dbg(c, "Looks like a JPEG-LS file.");
2124 found_sof = 1;
2125 d->is_jpegls = 1;
2127 else if(b1>=0xc0 && b1<=0xcf && b1!=0xc4 && b1!=0xc8 && b1!=0xcc) {
2128 found_sof = 1;
2131 if(b1==0xda) { // SOS - Start of scan
2132 if(!found_sof) return 0;
2133 found_scan = 1;
2134 in_scan = 1;
2136 else if(b1>=0xd0 && b1<=0xd7) {
2137 // RSTn markers don't change the in_scan state.
2140 else {
2141 in_scan = 0;
2144 if((b1>=0xd0 && b1<=0xda) || b1==0x01) {
2145 // Markers that have no content.
2146 pos+=2;
2147 continue;
2150 // Everything else should be a marker segment, with a length field.
2151 seg_size = de_getu16be(pos+2);
2152 if(seg_size<2) break; // bogus size
2154 pos += seg_size+2;
2157 return 0;
2160 static void de_run_jpegscan(deark *c, de_module_params *mparams)
2162 i64 pos = 0;
2163 i64 foundpos = 0;
2164 scanctx *d = NULL;
2165 int ret;
2167 d = de_malloc(c, sizeof(*d));
2169 while(1) {
2170 if(pos >= c->infile->len) break;
2172 ret = dbuf_search(c->infile, (const u8*)"\xff\xd8\xff", 3,
2173 pos, c->infile->len-pos, &foundpos);
2174 if(!ret) break; // No more JPEGs in file.
2176 de_dbg(c, "Found possible JPEG file at %d", (int)foundpos);
2178 pos = foundpos;
2180 if(detect_jpeg_len(c, d, pos, c->infile->len-pos)) {
2181 de_dbg(c, "length=%d", (int)d->len);
2182 dbuf_create_file_from_slice(c->infile, pos, d->len,
2183 d->is_jpegls ? "jls" : "jpg", NULL, 0);
2184 pos += d->len;
2186 else {
2187 de_dbg(c, "Doesn't seem to be a valid JPEG.");
2188 pos++;
2192 de_free(c, d);
2195 static int de_identify_jpeg(deark *c)
2197 if(!dbuf_memcmp(c->infile, 0, "\xff\xd8\xff", 3)) {
2198 return 100;
2200 return 0;
2203 void de_module_jpeg(deark *c, struct deark_module_info *mi)
2205 mi->id = "jpeg";
2206 mi->desc = "JPEG image";
2207 mi->desc2 = "resources only";
2208 mi->run_fn = de_run_jpeg;
2209 mi->identify_fn = de_identify_jpeg;
2212 void de_module_jpegscan(deark *c, struct deark_module_info *mi)
2214 mi->id = "jpegscan";
2215 mi->desc = "Extract embedded JPEG images from arbitrary files";
2216 mi->run_fn = de_run_jpegscan;