1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Extract various things from JPEG & JPEG-LS files.
6 // Extract embedded JPEG files from arbitrary files.
8 #include <deark-config.h>
9 #include <deark-private.h>
10 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_jpeg
);
12 DE_DECLARE_MODULE(de_module_jpegscan
);
14 struct fpxr_entity_struct
{
16 struct de_stringreaderdata
*name_srd
;
23 struct fpxr_data_struct
{
25 struct fpxr_entity_struct
*entities
;
28 typedef struct localctx_struct
{
31 u8 has_jfif_seg
, has_jfif_thumb
, has_jfxx_seg
;
32 u8 has_exif_seg
, has_exif_gps
, has_spiff_seg
, has_mpf_seg
, has_afcp
;
34 u8 has_psd
, has_iptc
, has_xmp
, has_xmp_ext
, has_iccprofile
, has_flashpix
;
35 u8 is_baseline
, is_progressive
, is_lossless
, is_arithmetic
, is_hierarchical
;
36 u8 is_jpeghdr
, is_jpegxt
, is_mpo
, is_jps
;
37 u8 has_restart_markers
;
40 u8 color_transform
; // valid if(has_adobeapp14)
49 u8 jfif_ver_h
, jfif_ver_l
; // valid if(has_jfif_seg)
50 u32 exif_orientation
; // valid if != 0, and(has_exif_seg)
51 u32 exif_version_as_uint32
; // valid if != 0, and(has_exif_seg)
52 dbuf
*iccprofile_file
;
53 dbuf
*hdr_residual_file
;
56 int extxmp_warned_flag
; // Have we warned about multiple extxmp digests?
57 int extxmp_error_flag
;
63 de_ucstring
*sampling_code
;
65 struct fpxr_data_struct
*fpxr_data
;
70 typedef void (*handler_fn_type
)(deark
*c
, lctx
*d
,
71 const struct marker_info
*mi
, i64 pos
, i64 data_size
);
73 #define FLAG_JPEG_COMPAT 0x0001
74 #define FLAG_JPEGLS_COMPAT 0x0002
75 #define FLAG_NO_DATA 0x0100
76 #define FLAG_IS_SOF 0x0200
86 // Static info about markers/segments.
90 const char *shortname
;
95 static void do_icc_profile_segment(deark
*c
, lctx
*d
, i64 pos
, i64 data_size
)
99 if(data_size
<2) return; // bogus data
100 b1
= de_getbyte(pos
);
101 b2
= de_getbyte(pos
+1);
102 de_dbg(c
, "icc profile segment at %d datasize=%d part %d of %d", (int)pos
, (int)(data_size
-2), b1
, b2
);
104 if(!d
->iccprofile_file
) {
105 d
->has_iccprofile
= 1;
106 d
->iccprofile_file
= dbuf_create_output_file(c
, "icc", NULL
, DE_CREATEFLAG_IS_AUX
);
108 dbuf_copy(c
->infile
, pos
+2, data_size
-2, d
->iccprofile_file
);
111 // If this is the final piece of the ICC profile, close the file.
112 // That way, if for some reason there's another profile in the file, we'll put
113 // it in a separate file.
114 dbuf_close(d
->iccprofile_file
);
115 d
->iccprofile_file
= NULL
;
119 // Extract JPEG-HDR residual images.
120 // Note: This code is based on reverse engineering, and may not be correct.
121 static void do_jpeghdr_segment(deark
*c
, lctx
*d
, i64 pos1
,
122 i64 data_size1
, int is_ext
)
128 // Payload should begin after the first NUL byte. Search for it.
129 ret
= dbuf_search_byte(c
->infile
, 0x00, pos1
, data_size1
, &pos
);
131 de_warn(c
, "Bad or unsupported JPEG-HDR data");
135 data_size
= pos1
+data_size1
- pos
;
138 de_dbg(c
, "JPEG-HDR residual image continuation, pos=%d size=%d",
139 (int)pos
, (int)data_size
);
142 de_dbg(c
, "JPEG-HDR residual image start, pos=%d size=%d",
143 (int)pos
, (int)data_size
);
145 // Close any previous file
146 if(d
->hdr_residual_file
) {
147 dbuf_close(d
->hdr_residual_file
);
148 d
->hdr_residual_file
= NULL
;
151 // Make sure it looks like an embedded JPEG file
152 if(dbuf_memcmp(c
->infile
, pos
, "\xff\xd8", 2)) {
153 de_warn(c
, "Bad or unsupported JPEG-HDR format");
157 d
->hdr_residual_file
= dbuf_create_output_file(c
, "residual.jpg", NULL
, DE_CREATEFLAG_IS_AUX
);
160 if(!d
->hdr_residual_file
) return;
161 dbuf_copy(c
->infile
, pos
, data_size
, d
->hdr_residual_file
);
164 static void do_jpegxt_segment(deark
*c
, lctx
*d
, i64 pos
,
168 if(data_size
<14) return;
169 n
= de_getu16be(pos
);
170 de_dbg(c
, "enumerator: %u", (unsigned int)n
);
171 n
= de_getu32be(pos
+2);
172 de_dbg(c
, "seq number: %u", (unsigned int)n
);
174 de_run_module_by_id_on_slice2(c
, "bmff", "T", c
->infile
, pos
+6, data_size
-6);
175 de_dbg_indent(c
, -1);
178 // Decode an uncompressed JFIF thumbnail.
179 // This code has not been properly tested, because I can't find any files in
180 // the wild that have these kinds of thumbnails.
181 static void extract_unc_jfif_thumbnail(deark
*c
, lctx
*d
,
182 i64 pos1
, i64 len
, i64 w
, i64 h
, int has_pal
,
187 de_bitmap
*img
= NULL
;
191 img
= de_bitmap_create(c
, w
, h
, 3);
195 de_read_palette_rgb(c
->infile
, pos
, 256, 3, pal
, 256, 0);
197 de_convert_image_paletted(c
->infile
, pos
, 8, w
, pal
, img
, 0);
203 clr
= dbuf_getRGB(c
->infile
, pos
+ j
*rowspan
+ i
*3, 0);
204 de_bitmap_setpixel_rgb(img
, i
, j
, clr
);
209 de_bitmap_write_to_file(img
, token
, DE_CREATEFLAG_IS_AUX
);
211 de_bitmap_destroy(img
);
214 static void do_jfif_segment(deark
*c
, lctx
*d
,
215 i64 pos
, i64 data_size
)
218 const char *units_name
;
223 if(data_size
<9) return;
224 d
->jfif_ver_h
= de_getbyte(pos
);
225 d
->jfif_ver_l
= de_getbyte(pos
+1);
226 de_dbg(c
, "JFIF version: %d.%02d", (int)d
->jfif_ver_h
, (int)d
->jfif_ver_l
);
227 units
= de_getbyte(pos
+2);
228 xdens
= de_getu16be(pos
+3);
229 ydens
= de_getu16be(pos
+5);
230 if(units
==1) units_name
="dpi";
231 else if(units
==2) units_name
="dots/cm";
232 else units_name
="(unspecified)";
233 de_dbg(c
, "density: %d"DE_CHAR_TIMES
"%d, units=%s", (int)xdens
, (int)ydens
, units_name
);
235 tn_w
= (i64
)de_getbyte(pos
+7);
236 tn_h
= (i64
)de_getbyte(pos
+8);
237 de_dbg(c
, "thumbnail dimensions: %d"DE_CHAR_TIMES
"%d", (int)tn_w
, (int)tn_h
);
238 if(tn_w
>0 && tn_h
>0 && data_size
>9) {
239 d
->has_jfif_thumb
= 1;
240 if(tn_w
*tn_h
*3 != data_size
-9) {
241 de_warn(c
, "Expected %d bytes of JFIF thumbnail image data at %d, found %d",
242 (int)(tn_w
*tn_h
*3), (int)(pos
+9), (int)(data_size
-9));
244 extract_unc_jfif_thumbnail(c
, d
, pos
+9, data_size
-9, tn_w
, tn_h
,
249 static void do_jfxx_segment(deark
*c
, lctx
*d
,
250 i64 pos
, i64 data_size
)
255 de_dbg(c
, "JFXX segment at %d datasize=%d", (int)pos
, (int)data_size
);
256 if(data_size
<1) return;
259 de_dbg(c
, "thumbnail type: 0x%02x", (unsigned int)t
);
261 if(t
==0x10) { // thumbnail coded using JPEG
262 // TODO: JPEG-formatted thumbnails are forbidden from containing JFIF segments.
263 // They essentially inherit them from their parent.
264 // So, maybe, when we extract a thumbnail, we should insert an artificial JFIF
265 // segment into it. We currently don't do that.
266 // (However, this is not at all important.)
267 dbuf_create_file_from_slice(c
->infile
, pos
+1, data_size
-1, "jfxxthumb.jpg", NULL
, DE_CREATEFLAG_IS_AUX
);
269 else if(t
==0x11 || t
==0x13) {
272 if(data_size
<3) return;
273 tn_w
= (i64
)de_getbyte(pos
+1);
274 tn_h
= (i64
)de_getbyte(pos
+2);
275 de_dbg(c
, "JFXX thumbnail dimensions: %d"DE_CHAR_TIMES
"%d", (int)tn_w
, (int)tn_h
);
276 extract_unc_jfif_thumbnail(c
, d
, pos
+3, data_size
-3, tn_w
, tn_h
,
277 (t
==0x11)?1:0, "jfxxthumb");
281 static void do_adobeapp14_segment(deark
*c
, lctx
*d
,
282 i64 pos
, i64 data_size
)
286 if(data_size
<7) return;
287 d
->has_adobeapp14
= 1;
288 d
->color_transform
= de_getbyte(pos
+6);
289 if(d
->color_transform
==0) tname
="RGB or CMYK";
290 else if(d
->color_transform
==1) tname
="YCbCr";
291 else if(d
->color_transform
==2) tname
="YCCK";
292 else tname
="unknown";
293 de_dbg(c
, "color transform: %d (%s)", (int)d
->color_transform
, tname
);
296 static void do_exif_segment(deark
*c
, lctx
*d
,
297 i64 pos
, i64 data_size
)
300 u32 exiforientation
= 0;
303 if(data_size
<8) return;
304 // Note that Exif has an additional padding byte after the APP ID NUL terminator.
305 de_dbg(c
, "Exif data at %d, size=%d", (int)pos
, (int)data_size
);
307 if(!d
->has_jfif_seg
) {
308 d
->exif_before_jfif
= 1;
311 fmtutil_handle_exif2(c
, pos
, data_size
,
312 &exifflags
, &exiforientation
, &exifversion
);
318 d
->exif_orientation
= exiforientation
;
320 d
->exif_version_as_uint32
= exifversion
;
321 de_dbg_indent(c
, -1);
324 static void do_photoshop_segment(deark
*c
, lctx
*d
,
325 i64 pos
, i64 data_size
)
327 struct de_module_out_params oparams
;
329 de_zeromem(&oparams
, sizeof(struct de_module_out_params
));
330 // TODO: Can Photoshop resources span multiple JPEG segments? I have
331 // a file in which that seems to be the case.
332 de_dbg(c
, "photoshop data at %d, size=%d", (int)pos
, (int)data_size
);
335 fmtutil_handle_photoshop_rsrc2(c
, c
->infile
, pos
, data_size
, 0x0, &oparams
);
336 if(oparams
.flags
&0x02)
338 de_dbg_indent(c
, -1);
341 static void do_mpf_segment(deark
*c
, lctx
*d
,
342 i64 pos
, i64 data_size
)
344 de_module_params
*mparams
= NULL
;
347 de_dbg(c
, "MPF data at %d, size=%d", (int)pos
, (int)data_size
);
350 mparams
= de_malloc(c
, sizeof(de_module_params
));
352 mparams
->in_params
.codes
= "M";
353 mparams
->in_params
.flags
|= 0x01;
354 mparams
->in_params
.offset_in_parent
= pos
;
355 mparams
->in_params
.parent_dbuf
= c
->infile
;
357 de_run_module_by_id_on_slice(c
, "tiff", mparams
, c
->infile
, pos
, data_size
);
359 if(mparams
->out_params
.flags
& 0x80) {
360 if(mparams
->out_params
.uint3
> 1) {
361 // We want to set the is_mpo flag if there is an MPEntry tag which
362 // says there is more than one non-thumbnail image.
363 // This is so we can declare the format to be "JPEG/MPO".
369 de_dbg_indent(c
, -1);
372 static void do_jps_segment(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
377 de_ucstring
*flags_str
= NULL
;
378 de_ucstring
*comment
= NULL
;
383 blk_len
= de_getu16be_p(&pos
);
384 if(blk_len
<4) goto done
;
385 st_descr
= (u32
)de_getu32be(pos
);
387 flags_str
= ucstring_create(c
);
388 mtype
= (unsigned int)(st_descr
&0x000000ff);
390 case 0: ucstring_append_flags_item(flags_str
, "MONOSCOPIC_IMAGE"); break;
391 case 1: ucstring_append_flags_item(flags_str
, "STEREOSCOPIC_IMAGE"); break;
394 switch((st_descr
&0x0000ff00)>>8) {
395 case 0: ucstring_append_flags_item(flags_str
, "EYE_BOTH"); break;
396 case 1: ucstring_append_flags_item(flags_str
, "EYE_LEFT"); break;
397 case 2: ucstring_append_flags_item(flags_str
, "EYE_RIGHT"); break;
401 switch((st_descr
&0x0000ff00)>>8) {
402 case 1: ucstring_append_flags_item(flags_str
, "LAYOUT_INTERLEAVED"); break;
403 case 2: ucstring_append_flags_item(flags_str
, "LAYOUT_SIDEBYSIDE"); break;
404 case 3: ucstring_append_flags_item(flags_str
, "LAYOUT_OVERUNDER"); break;
405 case 4: ucstring_append_flags_item(flags_str
, "LAYOUT_ANAGLYPH"); break;
408 ucstring_append_flags_item(flags_str
, (st_descr
&0x00010000)?"half-height":"full-height");
409 ucstring_append_flags_item(flags_str
, (st_descr
&0x00020000)?"half-width":"full-width");
410 // TODO: FIELD ORDER BIT
413 de_dbg(c
, "stereoscopic descriptor: 0x%08x (%s)", (unsigned int)st_descr
,
414 ucstring_getpsz(flags_str
));
418 if(pos1
+len
-pos
<2) goto done
;
419 blk_len
= de_getu16be_p(&pos
);
420 if(pos
+blk_len
> pos1
+len
) goto done
;
421 comment
= ucstring_create(c
);
422 dbuf_read_to_ucstring_n(c
->infile
, pos
, blk_len
, DE_DBG_MAX_STRLEN
, comment
,
423 0, DE_ENCODING_ASCII
);
424 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(comment
));
427 ucstring_destroy(flags_str
);
428 ucstring_destroy(comment
);
431 static void do_arot_segment(deark
*c
, lctx
*d
, i64 pos
, i64 len
)
436 nvals
= de_getu32be(pos
);
437 de_dbg(c
, "number of values: %u", (unsigned int)nvals
);
443 static void do_xmp_extension_segment(deark
*c
, lctx
*d
,
444 i64 pos1
, i64 data_size
)
446 i64 thisseg_full_extxmp_len
;
448 u8 thisseg_digest_raw
[32];
449 de_ucstring
*digest_str
= NULL
;
452 int is_first_segment
= 0;
454 de_dbg(c
, "extended XMP segment, dpos=%d, dlen=%d", (int)pos1
, (int)(data_size
));
456 if(d
->extxmp_error_flag
) goto done
;
458 de_read(thisseg_digest_raw
, pos
, 32);
460 digest_str
= ucstring_create(c
);
461 ucstring_append_bytes(digest_str
, thisseg_digest_raw
, 32, 0, DE_ENCODING_ASCII
);
462 de_dbg(c
, "digest: \"%s\"", ucstring_getpsz(digest_str
));
464 if(d
->extxmp_found
&& de_memcmp(thisseg_digest_raw
, d
->extxmp_digest
, 32)) {
465 // We only care about the extended XMP segments whose digest matches that
466 // indicated in the main XMP segment. Unfortunately, we don't know what that
467 // is, because we don't parse XMP. We'll just hope that the first extended
468 // XMP segment has the correct digest.
469 if(!d
->extxmp_warned_flag
) {
470 de_warn(c
, "Multiple extended XMP blocks found. All but the first will be ignored.");
471 d
->extxmp_warned_flag
= 1;
476 if(!d
->extxmp_found
) {
477 is_first_segment
= 1;
479 de_memcpy(d
->extxmp_digest
, thisseg_digest_raw
, 32);
482 thisseg_full_extxmp_len
= de_getu32be_p(&pos
);
483 if(is_first_segment
) {
484 d
->extxmp_total_len
= thisseg_full_extxmp_len
;
486 de_dbg(c
, "full ext. XMP length: %d", (int)thisseg_full_extxmp_len
);
487 if(thisseg_full_extxmp_len
!= d
->extxmp_total_len
) {
488 de_warn(c
, "Inconsistent extended XMP block lengths");
489 d
->extxmp_error_flag
= 1;
493 if(d
->extxmp_total_len
> 10000000) {
494 de_warn(c
, "Extended XMP block too large");
495 d
->extxmp_error_flag
= 1;
499 segment_offset
= de_getu32be_p(&pos
);
500 de_dbg(c
, "offset of this segment: %d", (int)segment_offset
);
502 dlen
= data_size
- (pos
-pos1
);
503 de_dbg(c
, "[%d bytes of ext. XMP data at %d]", (int)dlen
, (int)pos
);
505 if(segment_offset
+ dlen
> d
->extxmp_total_len
) {
506 de_warn(c
, "Extended XMP segment too long");
507 d
->extxmp_error_flag
= 1;
511 if(!d
->extxmp_membuf
) {
512 d
->extxmp_membuf
= dbuf_create_membuf(c
, d
->extxmp_total_len
, 0x1);
514 dbuf_copy_at(c
->infile
, pos
, dlen
, d
->extxmp_membuf
, segment_offset
);
517 de_dbg_indent(c
, -1);
518 ucstring_destroy(digest_str
);
521 static void destroy_fpxr_data(deark
*c
, lctx
*d
)
524 if(!d
->fpxr_data
) return;
526 for(k
=0; k
<d
->fpxr_data
->num_entities
; k
++) {
527 if(d
->fpxr_data
->entities
[k
].name_srd
) {
528 de_destroy_stringreaderdata(c
, d
->fpxr_data
->entities
[k
].name_srd
);
529 d
->fpxr_data
->entities
[k
].name_srd
= NULL
;
532 dbuf_close(d
->fpxr_data
->entities
[k
].stream
);
533 d
->fpxr_data
->entities
[k
].stream
= NULL
;
536 de_free(c
, d
->fpxr_data
->entities
);
537 de_free(c
, d
->fpxr_data
);
541 static void do_fpxr_olepropset_stream(deark
*c
, lctx
*d
, struct fpxr_entity_struct
*fe
)
543 de_dbg(c
, "decoding Flashpix stream %d (OLE property set)", (int)fe
->index
);
545 de_run_module_by_id_on_slice(c
, "olepropset", NULL
, fe
->stream
, 0, fe
->stream
->len
);
546 de_dbg_indent(c
, -1);
549 static void do_fpxr_fujifilm_preview(deark
*c
, lctx
*d
, struct fpxr_entity_struct
*fe
)
551 if(fe
->stream
->len
< 100) return;
552 if(dbuf_memcmp(fe
->stream
, 47, "\xff\xd8\xff", 3)) return;
553 dbuf_create_file_from_slice(fe
->stream
, 47, fe
->stream
->len
-47, "fujipreview.jpg",
554 NULL
, DE_CREATEFLAG_IS_AUX
);
557 static int ucstring_contains_char(de_ucstring
*s
, i32 ch
)
562 for(k
=0; k
<s
->len
; k
++) {
563 if(s
->str
[k
]==ch
) return 1;
568 // Called after we've saved all of a stream's data.
569 static void finalize_fpxr_stream(deark
*c
, lctx
*d
, struct fpxr_entity_struct
*fe
)
573 de_ucstring
*name2
= NULL
;
575 if(!fe
|| !fe
->stream
) goto done
;
576 if(fe
->done_flag
|| fe
->is_storage
) goto done
;
578 if(fe
->stream
->len
!= fe
->stream_size
) {
579 de_warn(c
, "Expected FPXR stream #%u to have %"I64_FMT
" bytes, found %"I64_FMT
,
580 (unsigned int)fe
->index
, fe
->stream_size
, fe
->stream
->len
);
583 // Process some known streams
585 if(fe
->name_srd
->sz_utf8
&& !de_strcmp(fe
->name_srd
->sz_utf8
, "/FUJIFILM/Preview")) {
586 do_fpxr_fujifilm_preview(c
, d
, fe
);
589 // The FlashPix spec says "Names in an IStorage that begin with the
590 // value '\0x05' are reserved exclusively for the storage of property
593 // It probably means the last *component* of the name begins with 0x05.
594 // 0x05 shouldn't appear anywhere else, I think, so I'll just search
595 // the whole string for it.
596 if(ucstring_contains_char(fe
->name_srd
->str
, 0x05)) {
597 do_fpxr_olepropset_stream(c
, d
, fe
);
601 if(c
->extract_level
<2) goto done
;
603 fi
= de_finfo_create(c
);
605 name2
= ucstring_create(c
);
607 ucstring_append_ucstring(name2
, fe
->name_srd
->str
);
610 ucstring_append_char(name2
, '.');
612 ucstring_append_sz(name2
, "fpxr.bin", DE_ENCODING_LATIN1
);
613 de_finfo_set_name_from_ucstring(c
, fi
, name2
, 0);
615 outf
= dbuf_create_output_file(c
, NULL
, fi
, DE_CREATEFLAG_IS_AUX
);
616 dbuf_copy(fe
->stream
, 0, fe
->stream
->len
, outf
);
619 if(fe
&& fe
->stream
) {
620 dbuf_close(fe
->stream
);
626 ucstring_destroy(name2
);
628 de_finfo_destroy(c
, fi
);
631 // Clean up incomplete FPXR streams.
632 // This function shouldn't be necessary, but I've seen some streams that don't
633 // have their full expected length, even though they seem to contain useful data.
634 // If we didn't do this, we would never process short streams at all.
635 static void finalize_all_fpxr_streams(deark
*c
, lctx
*d
)
639 if(!d
->fpxr_data
) return;
641 for(k
=0; k
<d
->fpxr_data
->num_entities
; k
++) {
642 struct fpxr_entity_struct
*fe
= &d
->fpxr_data
->entities
[k
];
644 finalize_fpxr_stream(c
, d
, fe
);
647 destroy_fpxr_data(c
, d
);
650 static void append_fpxr_stream_data(deark
*c
, lctx
*d
, size_t stream_idx
,
653 struct fpxr_entity_struct
*fe
= NULL
;
655 if(!d
->fpxr_data
) return;
656 if(stream_idx
> d
->fpxr_data
->num_entities
) return;
657 fe
= &d
->fpxr_data
->entities
[stream_idx
];
658 if(fe
->done_flag
) return;
660 // TODO: More validation could be done here.
661 // We're just assuming the FPXR chunks are correctly formed, and in the
663 // Note that the chunk size (len) is a calculated value, and is constrained
664 // to the size of a JPEG segment (64KB). So it should be okay to trust it.
666 // If we haven't done it yet, create a membuf for this stream.
668 fe
->stream
= dbuf_create_membuf(c
, len
, 0);
671 // Save the stream data to the membuf.
672 // We make a copy of the stream, because it could be split up into chunks,
673 // *and* we might want to parse it.
674 dbuf_copy(c
->infile
, pos
, len
, fe
->stream
);
676 if(fe
->stream
->len
>= fe
->stream_size
) {
677 finalize_fpxr_stream(c
, d
, fe
);
681 static void do_fpxr_segment(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
686 int saved_indent_level
;
691 de_dbg_indent_save(c
, &saved_indent_level
);
693 ver
= de_getbyte_p(&pos
);
695 de_dbg(c
, "version: %u", (unsigned int)ver
);
696 segtype
= de_getbyte_p(&pos
);
698 case 1: name
= "contents list"; break;
699 case 2: name
= "stream data"; break;
702 de_dbg(c
, "segment type: %u (%s)", (unsigned int)segtype
, name
);
704 if(segtype
==1) { // contents list
705 // Initialize our saved fpxr data
706 destroy_fpxr_data(c
, d
);
707 d
->fpxr_data
= de_malloc(c
, sizeof(struct fpxr_data_struct
));
711 d
->fpxr_data
->num_entities
= (size_t)de_getu16be_p(&pos
);
712 de_dbg(c
, "interoperability count: %u", (unsigned int)d
->fpxr_data
->num_entities
);
713 d
->fpxr_data
->entities
= de_mallocarray(c
, d
->fpxr_data
->num_entities
, sizeof(struct fpxr_entity_struct
));
715 for(k
=0; k
<d
->fpxr_data
->num_entities
; k
++) {
716 i64 bytes_consumed
= 0;
717 struct fpxr_entity_struct
*fe
;
721 char clsid_string
[50];
723 if(pos
>=pos1
+len
) goto done
;
724 fe
= &d
->fpxr_data
->entities
[k
];
726 de_dbg(c
, "entity[%d] at %d", (int)k
, (int)pos
);
729 esize
= de_getu32be_p(&pos
);
730 if(esize
==0xffffffffLL
) {
733 de_dbg(c
, "entity type: %s", fe
->is_storage
?"storage":"stream");
734 if(!fe
->is_storage
) {
735 de_dbg(c
, "stream size: %u", (unsigned int)esize
);
736 fe
->stream_size
= esize
;
739 defval
= de_getbyte_p(&pos
);
740 de_dbg(c
, "default value: 0x%02x", (unsigned int)defval
);
742 nbytesleft
= pos1
+len
-pos
;
743 if(!dbuf_get_utf16_NULterm_len(c
->infile
, pos
, nbytesleft
, &bytes_consumed
)) goto done
;
744 fe
->name_srd
= dbuf_read_string(c
->infile
, pos
, bytes_consumed
-2, bytes_consumed
-2,
745 DE_CONVFLAG_WANT_UTF8
, DE_ENCODING_UTF16LE
);
746 de_dbg(c
, "entity name: \"%s\"", ucstring_getpsz_d(fe
->name_srd
->str
));
747 pos
+= bytes_consumed
;
749 if(fe
->is_storage
) { // read Entity class ID
750 de_read(clsid_buf
, pos
, 16);
752 fmtutil_guid_to_uuid(clsid_buf
);
753 fmtutil_render_uuid(c
, clsid_buf
, clsid_string
, sizeof(clsid_string
));
754 de_dbg(c
, "class id: {%s}", clsid_string
);
756 de_dbg_indent(c
, -1);
759 else if(segtype
==2) { // stream data
765 stream_idx
= (size_t)de_getu16be_p(&pos
);
766 de_dbg(c
, "index to contents list: %d", (int)stream_idx
);
768 // The Exif spec (2.31) says this field is at offset 0x0C, but I'm
769 // assuming that's a clerical error that should be 0x0D.
770 stream_offset
= de_getu32be_p(&pos
);
771 de_dbg(c
, "offset to flashpix stream: %u", (unsigned int)stream_offset
);
773 nbytesleft
= pos1
+len
-pos
;
775 de_dbg(c
, "[%d bytes of flashpix stream data, at %d]", (int)nbytesleft
, (int)pos
);
777 append_fpxr_stream_data(c
, d
, stream_idx
, pos
, nbytesleft
);
781 de_dbg_indent_restore(c
, saved_indent_level
);
784 static void do_ducky_stringblock(deark
*c
, lctx
*d
,
785 i64 pos1
, i64 len
, const char *name
)
789 de_ucstring
*s
= NULL
;
792 nchars
= de_getu32be_p(&pos
);
793 if(nchars
*2 > len
-4) goto done
;
795 s
= ucstring_create(c
);
796 dbuf_read_to_ucstring_n(c
->infile
, pos
, nchars
*2, DE_DBG_MAX_STRLEN
, s
,
797 0, DE_ENCODING_UTF16BE
);
798 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz(s
));
803 static void do_ducky_segment(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
811 blktype
= (u32
)de_getu16be_p(&pos
);
812 if(blktype
==0) break;
813 if(pos
+2 > pos1
+len
) break;
814 blklen
= de_getu16be_p(&pos
);
815 if(pos
+blklen
> pos1
+len
) break;
819 n
= de_getu32be(pos
);
820 de_dbg(c
, "quality: %d", (int)n
);
824 do_ducky_stringblock(c
, d
, pos
, blklen
, "comment");
827 do_ducky_stringblock(c
, d
, pos
, blklen
, "copyright");
834 static void do_meta_segment(deark
*c
, lctx
*d
, i64 pos1
, i64 len
)
838 de_dbg(c
, "\"Meta\" data at %d, size=%d", (int)(pos1
+1), (int)(len
-1));
840 // TODO: The 3rd param below should probably represent some sort of TIFF
842 de_run_module_by_id_on_slice2(c
, "tiff", NULL
, c
->infile
, pos1
+1, len
-1);
843 de_dbg_indent(c
, -1);
847 // ITU-T Rec. T.86 says nothing about canonicalizing the APP ID, but in
848 // practice, some apps are sloppy about capitalization, and trailing spaces.
849 static void normalize_app_id(const char *app_id_orig
, char *app_id_normalized
,
850 size_t app_id_normalized_len
)
855 de_strlcpy(app_id_normalized
, app_id_orig
, app_id_normalized_len
);
856 id_strlen
= (i64
)de_strlen(app_id_normalized
);
858 // Strip trailing spaces.
859 while(id_strlen
>0 && app_id_normalized
[id_strlen
-1]==' ') {
860 app_id_normalized
[id_strlen
-1] = '\0';
864 for(i
=0; i
<id_strlen
; i
++) {
865 if(app_id_normalized
[i
]>='a' && app_id_normalized
[i
]<='z') {
866 app_id_normalized
[i
] -= 32;
871 #define APPSEGTYPE_UNKNOWN 0
872 #define APPSEGTYPE_JFIF 2
873 #define APPSEGTYPE_JFXX 3
874 #define APPSEGTYPE_SPIFF 5
875 #define APPSEGTYPE_EXIF 6
876 #define APPSEGTYPE_FPXR 7
877 #define APPSEGTYPE_ADOBEAPP14 9
878 #define APPSEGTYPE_ICC_PROFILE 10
879 #define APPSEGTYPE_PHOTOSHOP 11
880 #define APPSEGTYPE_DUCKY 12
881 #define APPSEGTYPE_XMP 14
882 #define APPSEGTYPE_XMP_EXTENSION 15
883 #define APPSEGTYPE_JPEGXT 20
884 #define APPSEGTYPE_MPF 21
885 #define APPSEGTYPE_JPS 22
886 #define APPSEGTYPE_HDR_RI_VER 24
887 #define APPSEGTYPE_HDR_RI_EXT 25
888 #define APPSEGTYPE_META 26
889 #define APPSEGTYPE_AROT 27
890 #define APPSEGTYPE_MSRGBA 100
891 #define APPSEGTYPE_RBSWAP 101
893 struct app_id_info_struct
{
897 de_ucstring
*app_id_str
; // valid if(app_id_found)
898 const char *app_type_name
;
901 #define MAX_APP_ID_LEN 80
902 struct app_id_decode_struct
{
904 u8 raw_bytes
[MAX_APP_ID_LEN
];
908 char app_id_orig
[MAX_APP_ID_LEN
];
909 char app_id_normalized
[MAX_APP_ID_LEN
];
910 i64 app_id_orig_strlen
;
914 // Caller allocates ad, and initializes the "In" fields.
915 static void decode_app_id(struct app_id_decode_struct
*ad
)
919 if(ad
->nraw_bytes
<2) return;
920 if(ad
->raw_bytes
[0]<32 || ad
->raw_bytes
[0]>126) return;
922 // Might have an app id.
923 for(k
=0; k
<ad
->nraw_bytes
; k
++) {
924 if(ad
->raw_bytes
[k
]==0) {
926 ad
->app_id_orig_strlen
= k
;
932 // We'll assume this is an app id
933 de_strlcpy(ad
->app_id_orig
, (const char*)ad
->raw_bytes
, sizeof(ad
->app_id_orig
));
934 normalize_app_id(ad
->app_id_orig
, ad
->app_id_normalized
, sizeof(ad
->app_id_normalized
));
938 // Caller allocates app_id_info, and initializes it to all 0.
939 // Caller must free ->app_id_str.
940 static void detect_app_seg_type(deark
*c
, lctx
*d
, const struct marker_info
*mi
,
941 i64 seg_data_pos
, i64 seg_data_size
, struct app_id_info_struct
*app_id_info
)
945 u8 seg_type
= mi
->seg_type
;
946 struct app_id_decode_struct ad
;
948 de_zeromem(&ad
, sizeof(struct app_id_decode_struct
));
951 app_id_info
->app_id_found
= 0;
952 app_id_info
->appsegtype
= APPSEGTYPE_UNKNOWN
;
953 app_id_info
->app_type_name
= "?";
955 ad
.nraw_bytes
= (i64
)sizeof(ad
.raw_bytes
);
956 if(ad
.nraw_bytes
>seg_data_size
)
957 ad
.nraw_bytes
= seg_data_size
;
958 if(ad
.nraw_bytes
<2) goto done
;
959 de_read(ad
.raw_bytes
, seg_data_pos
, ad
.nraw_bytes
-1);
964 app_id_info
->app_id_str
= ucstring_create(c
);
965 ucstring_append_bytes(app_id_info
->app_id_str
, (const u8
*)ad
.app_id_orig
, ad
.app_id_orig_strlen
, 0,
969 if(seg_type
==0xe1 && ad
.nraw_bytes
>20 && ad
.has_app_id
&& !de_strcmp(ad
.app_id_orig
, "XMP")) {
970 // Ugly hack. I've seen a fair number of files in which the first four
971 // bytes of the "http://ns.adobe.com/xap/1.0/" signature seem to have
972 // been corrupted, and replaced with "XMP\0".
973 struct app_id_decode_struct ad2
;
975 de_zeromem(&ad2
, sizeof(struct app_id_decode_struct
));
976 de_memcpy(ad2
.raw_bytes
, ad
.raw_bytes
, (size_t)ad
.nraw_bytes
);
977 ad2
.nraw_bytes
= ad
.nraw_bytes
;
978 // Try to patch the app ID, decode it, and see what happens.
979 de_memcpy(ad2
.raw_bytes
, (const u8
*)"http", 4);
983 // If that seemed to work, replace the old "normalized" ID with the patched one.
985 de_strlcpy(ad
.app_id_normalized
, ad2
.app_id_normalized
, sizeof(ad
.app_id_normalized
));
986 // Need to update orig_strlen, so we can find the payload data position.
987 // (ad.app_id_orig can stay the same.)
988 ad
.app_id_orig_strlen
= ad2
.app_id_orig_strlen
;
993 app_id_info
->app_id_found
= 1;
994 sig_size
= ad
.app_id_orig_strlen
+ 1;
997 payload_size
= seg_data_size
- sig_size
;
998 if(payload_size
<0) goto done
;
1000 if(seg_type
==0xe0 && !de_strcmp(ad
.app_id_normalized
, "JFIF")) {
1001 app_id_info
->appsegtype
= APPSEGTYPE_JFIF
;
1002 app_id_info
->app_type_name
= "JFIF";
1004 else if(seg_type
==0xe0 && !de_strcmp(ad
.app_id_normalized
, "JFXX")) {
1005 app_id_info
->appsegtype
= APPSEGTYPE_JFXX
;
1006 app_id_info
->app_type_name
= "JFIF-JFXX";
1008 else if(seg_type
==0xee && ad
.nraw_bytes
>=5 && !de_strncmp((const char*)ad
.raw_bytes
, "Adobe", 5)) {
1009 app_id_info
->appsegtype
= APPSEGTYPE_ADOBEAPP14
;
1010 app_id_info
->app_type_name
= "AdobeAPP14";
1013 else if(seg_type
==0xec && ad
.nraw_bytes
>=5 && !de_strncmp((const char*)ad
.raw_bytes
, "Ducky", 5)) {
1014 app_id_info
->appsegtype
= APPSEGTYPE_DUCKY
;
1015 app_id_info
->app_type_name
= "Ducky";
1018 else if(seg_type
==0xe1 && seg_data_size
>=6 && !de_strcmp(ad
.app_id_normalized
, "EXIF")) {
1019 app_id_info
->appsegtype
= APPSEGTYPE_EXIF
;
1020 app_id_info
->app_type_name
= "Exif";
1021 // We arbitrarily consider the "padding byte" to be part of the signature.
1024 else if((seg_type
==0xe1 || seg_type
==0xe3) && ad
.nraw_bytes
>=14 &&
1025 !de_memcmp(ad
.raw_bytes
, "Meta\0\0", 6) &&
1026 (ad
.raw_bytes
[6]=='I' || ad
.raw_bytes
[6]=='M'))
1028 // This seems to be some Kodak imitation of an Exif segment.
1029 // ExifTool says APP3, but all I've seen is APP1.
1030 app_id_info
->appsegtype
= APPSEGTYPE_META
;
1031 app_id_info
->app_type_name
= "Meta";
1033 else if(seg_type
==0xe2 && !de_strcmp(ad
.app_id_normalized
, "ICC_PROFILE")) {
1034 app_id_info
->appsegtype
= APPSEGTYPE_ICC_PROFILE
;
1035 app_id_info
->app_type_name
= "ICC profile";
1037 else if(seg_type
==0xe2 && !de_strcmp(ad
.app_id_normalized
, "FPXR")) {
1038 app_id_info
->appsegtype
= APPSEGTYPE_FPXR
;
1039 app_id_info
->app_type_name
= "Exif Flashpix Ready";
1041 else if(seg_type
==0xe8 && !de_strcmp(ad
.app_id_normalized
, "SPIFF")) {
1042 app_id_info
->appsegtype
= APPSEGTYPE_SPIFF
;
1043 app_id_info
->app_type_name
= "SPIFF";
1045 else if(seg_type
==0xed && !de_strcmp(ad
.app_id_normalized
, "PHOTOSHOP 3.0")) {
1046 app_id_info
->appsegtype
= APPSEGTYPE_PHOTOSHOP
;
1047 app_id_info
->app_type_name
= "Photoshop resources";
1049 else if(seg_type
==0xe1 && !de_strcmp(ad
.app_id_normalized
, "HTTP://NS.ADOBE.COM/XAP/1.0/")) {
1050 app_id_info
->appsegtype
= APPSEGTYPE_XMP
;
1051 app_id_info
->app_type_name
= "XMP";
1053 else if(seg_type
==0xe1 && ad
.nraw_bytes
>=32 && !de_memcmp(ad
.raw_bytes
, "<?xpacket begin=", 16)) {
1054 // I have a few files like this, that are missing the XMP signature.
1055 app_id_info
->appsegtype
= APPSEGTYPE_XMP
;
1056 app_id_info
->app_type_name
= "XMP";
1059 else if(seg_type
==0xe1 && !de_strcmp(ad
.app_id_normalized
, "HTTP://NS.ADOBE.COM/XMP/EXTENSION/")) {
1060 app_id_info
->appsegtype
= APPSEGTYPE_XMP_EXTENSION
;
1061 app_id_info
->app_type_name
= "XMP extension";
1063 else if(seg_type
==0xeb && ad
.nraw_bytes
>=10 && !de_strncmp((const char*)ad
.raw_bytes
, "HDR_RI ver", 10)) {
1064 app_id_info
->appsegtype
= APPSEGTYPE_HDR_RI_VER
;
1065 app_id_info
->app_type_name
= "JPEG-HDR Ver";
1067 else if(seg_type
==0xeb && ad
.nraw_bytes
>=10 && !de_strncmp((const char*)ad
.raw_bytes
, "HDR_RI ext", 10)) {
1068 app_id_info
->appsegtype
= APPSEGTYPE_HDR_RI_EXT
;
1069 app_id_info
->app_type_name
= "JPEG-HDR Ext";
1071 else if(seg_type
==0xeb && ad
.nraw_bytes
>=2 && !de_strncmp((const char*)ad
.raw_bytes
, "JP", 2)) {
1072 app_id_info
->appsegtype
= APPSEGTYPE_JPEGXT
;
1073 app_id_info
->app_type_name
= "JPEG XT";
1076 else if(seg_type
==0xe2 && !de_strcmp(ad
.app_id_normalized
, "MPF")) {
1077 app_id_info
->appsegtype
= APPSEGTYPE_MPF
;
1078 app_id_info
->app_type_name
= "Multi-Picture Format";
1080 else if(seg_type
==0xe3 && ad
.nraw_bytes
>=8 && !de_strncmp((const char*)ad
.raw_bytes
, "_JPSJPS_", 8)) {
1081 // This signature is not NUL terminated.
1082 app_id_info
->appsegtype
= APPSEGTYPE_JPS
;
1083 app_id_info
->app_type_name
= "JPS";
1086 else if(seg_type
==0xea && seg_data_size
>=6 && !de_strcmp(ad
.app_id_normalized
, "AROT")) {
1087 app_id_info
->appsegtype
= APPSEGTYPE_AROT
;
1088 app_id_info
->app_type_name
= "Apple absolute rotational angle delta";
1089 // Guessing that there's a "padding byte" that's part of the signature.
1092 else if(seg_type
==0xe1 && !de_strcmp(ad
.app_id_orig
, "Deark_MSRGBA")) {
1093 app_id_info
->appsegtype
= APPSEGTYPE_MSRGBA
;
1094 app_id_info
->app_type_name
= "RGBA JPEG headers from Thumbs.db";
1096 else if(seg_type
==0xe1 && !de_strcmp(ad
.app_id_orig
, "Deark_RB_swap")) {
1097 app_id_info
->appsegtype
= APPSEGTYPE_RBSWAP
;
1098 app_id_info
->app_type_name
= "Flag for swapped red/blue";
1102 app_id_info
->payload_pos
= seg_data_pos
+ sig_size
;
1105 // seg_size is the data size, excluding the marker and length fields.
1106 static void handler_app(deark
*c
, lctx
*d
,
1107 const struct marker_info
*mi
, i64 seg_data_pos
, i64 seg_data_size
)
1112 struct app_id_info_struct app_id_info
;
1114 de_zeromem(&app_id_info
, sizeof(struct app_id_info_struct
));
1116 detect_app_seg_type(c
, d
, mi
, seg_data_pos
, seg_data_size
, &app_id_info
);
1117 appsegtype
= app_id_info
.appsegtype
;
1118 payload_pos
= app_id_info
.payload_pos
;
1119 if(app_id_info
.app_id_found
) {
1120 de_dbg(c
, "app id: \"%s\", identified as: %s", ucstring_getpsz(app_id_info
.app_id_str
),
1121 app_id_info
.app_type_name
);
1124 de_dbg(c
, "app id: (not found), identified as: %s", app_id_info
.app_type_name
);
1127 payload_size
= seg_data_pos
+ seg_data_size
- payload_pos
;
1128 if(payload_size
<0) goto done
;
1130 switch(appsegtype
) {
1131 case APPSEGTYPE_JFIF
:
1132 do_jfif_segment(c
, d
, payload_pos
, payload_size
);
1134 case APPSEGTYPE_JFXX
:
1135 do_jfxx_segment(c
, d
, payload_pos
, payload_size
);
1137 case APPSEGTYPE_ADOBEAPP14
:
1138 do_adobeapp14_segment(c
, d
, payload_pos
, payload_size
);
1140 case APPSEGTYPE_EXIF
:
1141 do_exif_segment(c
, d
, payload_pos
, payload_size
);
1143 case APPSEGTYPE_META
:
1144 do_meta_segment(c
, d
, payload_pos
, payload_size
);
1146 case APPSEGTYPE_ICC_PROFILE
:
1147 do_icc_profile_segment(c
, d
, payload_pos
, payload_size
);
1149 case APPSEGTYPE_FPXR
:
1150 d
->has_flashpix
= 1;
1151 do_fpxr_segment(c
, d
, payload_pos
, payload_size
);
1153 case APPSEGTYPE_SPIFF
:
1154 d
->has_spiff_seg
= 1;
1156 case APPSEGTYPE_PHOTOSHOP
:
1157 do_photoshop_segment(c
, d
, payload_pos
, payload_size
);
1159 case APPSEGTYPE_DUCKY
:
1160 do_ducky_segment(c
, d
, payload_pos
, payload_size
);
1162 case APPSEGTYPE_XMP
:
1163 de_dbg(c
, "XMP data at %d, size=%d", (int)(payload_pos
), (int)(payload_size
));
1165 dbuf_create_file_from_slice(c
->infile
, payload_pos
, payload_size
, "xmp", NULL
, DE_CREATEFLAG_IS_AUX
);
1167 case APPSEGTYPE_XMP_EXTENSION
:
1169 do_xmp_extension_segment(c
, d
, payload_pos
, payload_size
);
1171 case APPSEGTYPE_HDR_RI_VER
:
1173 do_jpeghdr_segment(c
, d
, seg_data_pos
, seg_data_size
, 0);
1175 case APPSEGTYPE_HDR_RI_EXT
:
1176 do_jpeghdr_segment(c
, d
, seg_data_pos
, seg_data_size
, 1);
1178 case APPSEGTYPE_JPEGXT
:
1180 do_jpegxt_segment(c
, d
, payload_pos
, payload_size
);
1182 case APPSEGTYPE_MPF
:
1183 do_mpf_segment(c
, d
, payload_pos
, payload_size
);
1185 case APPSEGTYPE_JPS
:
1187 do_jps_segment(c
, d
, payload_pos
, payload_size
);
1189 case APPSEGTYPE_AROT
:
1190 do_arot_segment(c
, d
, payload_pos
, payload_size
);
1193 if(c
->debug_level
>=2) {
1194 de_dbg_hexdump(c
, c
->infile
, seg_data_pos
, seg_data_size
, 256, "segment data", 0x1);
1200 if(app_id_info
.app_id_str
) {
1201 ucstring_destroy(app_id_info
.app_id_str
);
1205 static void handler_jpg8(deark
*c
, lctx
*d
,
1206 const struct marker_info
*mi
, i64 seg_data_pos
, i64 seg_data_size
)
1209 const char *name
= "?";
1211 if(seg_data_size
<1) return;
1212 id
= de_getbyte(seg_data_pos
);
1214 d
->has_revcolorxform
= 1;
1215 name
="inverse color transform specification";
1217 de_dbg(c
, "id: 0x%02x (%s)", (unsigned int)id
, name
);
1220 static void declare_jpeg_fmt(deark
*c
, lctx
*d
, u8 seg_type
)
1222 const char *name
= "JPEG (other)";
1224 // The declared format is only an executive summary of the kind of JPEG.
1225 // It does not come close to covering all possible combinations of attributes.
1226 // (The "summary:" line goes a bit further.)
1227 if(d
->is_jpegls
) { name
= "JPEG-LS"; }
1228 else if(d
->is_mpo
) { name
= "JPEG/MPO"; }
1229 else if(d
->is_jps
) { name
= "JPEG/JPS"; }
1230 else if(d
->is_jpegxt
) { name
= "JPEG/JPEG_XT"; }
1231 else if(d
->is_jpeghdr
) { name
= "JPEG-HDR"; }
1232 else if(d
->is_lossless
) { name
= "JPEG/lossless"; }
1233 else if(d
->has_jfif_seg
&& d
->has_exif_seg
) { name
= "JPEG/JFIF+Exif"; }
1234 else if(d
->has_jfif_seg
) { name
= "JPEG/JFIF"; }
1235 else if(d
->has_exif_seg
) { name
= "JPEG/Exif"; }
1236 de_declare_fmt(c
, name
);
1239 static void handler_sof(deark
*c
, lctx
*d
,
1240 const struct marker_info
*mi
, i64 pos
, i64 data_size
)
1245 const char *attr_lossy
= "DCT";
1246 const char *attr_cmpr
= "huffman";
1247 const char *attr_progr
= "non-progr.";
1248 const char *attr_hier
= "non-hier.";
1249 u8 seg_type
= mi
->seg_type
;
1251 if(data_size
<6) return;
1254 finalize_all_fpxr_streams(c
, d
);
1257 if(seg_type
>=0xc1 && seg_type
<=0xcf && (seg_type
%4)!=0) {
1258 if((seg_type
%4)==3) { d
->is_lossless
=1; attr_lossy
="lossless"; }
1259 if(seg_type
%16>=9) { d
->is_arithmetic
=1; attr_cmpr
="arithmetic"; }
1260 if((seg_type
%4)==2) { d
->is_progressive
=1; attr_progr
="progressive"; }
1261 if((seg_type
%8)>=5) { d
->is_hierarchical
=1; attr_hier
="hierarchical"; }
1262 de_dbg(c
, "image type: %s, %s, %s, %s",
1263 attr_lossy
, attr_cmpr
, attr_progr
, attr_hier
);
1265 else if(seg_type
==0xc0) {
1267 de_dbg(c
, "image type: baseline (%s, %s, %s, %s)",
1268 attr_lossy
, attr_cmpr
, attr_progr
, attr_hier
);
1270 else if(seg_type
==0xf7) {
1271 de_dbg(c
, "image type: JPEG-LS");
1274 // By now we have hopefully collected the info we need to decide what JPEG
1275 // format we're dealing with.
1276 declare_jpeg_fmt(c
, d
, seg_type
);
1278 d
->precision
= de_getbyte(pos
);
1279 de_dbg(c
, "precision: %d", (int)d
->precision
);
1280 h
= de_getu16be(pos
+1);
1281 w
= de_getu16be(pos
+3);
1282 de_dbg_dimensions(c
, w
, h
);
1283 d
->ncomp
= (i64
)de_getbyte(pos
+5);
1284 de_dbg(c
, "number of components: %d", (int)d
->ncomp
);
1286 // per-component data
1287 if(data_size
<6+3*d
->ncomp
) goto done
;
1288 for(i
=0; i
<d
->ncomp
; i
++) {
1292 comp_id
= de_getbyte(pos
+6+3*i
+0);
1293 b
= de_getbyte(pos
+6+3*i
+1);
1295 sf2
= (i64
)(b
&0x0f);
1296 if(sf1
!=1 || sf2
!=1) d
->is_subsampled
= 1;
1297 ucstring_printf(d
->sampling_code
, DE_ENCODING_LATIN1
, "%d%d", (int)sf1
, (int)sf2
);
1298 qtid
= de_getbyte(pos
+6+3*i
+2);
1299 de_dbg(c
, "cmp #%d: id=%d sampling=%d"DE_CHAR_TIMES
"%d quant_table=Q%d",
1300 (int)i
, (int)comp_id
, (int)sf1
, (int)sf2
, (int)qtid
);
1307 static void handler_dri(deark
*c
, lctx
*d
,
1308 const struct marker_info
*mi
, i64 pos
, i64 data_size
)
1311 if(data_size
!=2) return;
1312 ri
= de_getu16be(pos
);
1313 de_dbg(c
, "restart interval: %d", (int)ri
);
1314 if(ri
!=0) d
->has_restart_markers
= 1;
1317 static void dump_htable_summary(deark
*c
, lctx
*d
, const u8
*codecounts
)
1320 de_ucstring
*s
= NULL
;
1322 if(c
->debug_level
<2) return;
1324 s
= ucstring_create(c
);
1325 for(k
=0; k
<16; k
++) {
1326 ucstring_printf(s
, DE_ENCODING_LATIN1
, " %3u",
1327 (unsigned int)codecounts
[k
]);
1328 if(k
%8==7) { // end of a debug line
1329 de_dbg(c
, "number of codes of len[%d-%2d]:%s",
1330 (int)(k
-7+1), (int)(k
+1),
1331 ucstring_getpsz(s
));
1335 ucstring_destroy(s
);
1338 // Just because we can, derive and display the Huffman code table (at
1339 // sufficiently high debug levels).
1340 static void dump_htable_details(deark
*c
, lctx
*d
, i64 pos1
, const u8
*codecounts
,
1343 struct fmtutil_huffman_decoder
*ht
= NULL
;
1347 // TODO: Is there any case (e.g. lossless) where >162 codes are allowed?
1348 if(c
->debug_level
>=3 && (num_huff_codes
>=1 && num_huff_codes
<=162)) {
1355 ht
= fmtutil_huffman_create_decoder(c
, 0, 0);
1357 // Note: Per the JPEG spec, "the all-1-bits code word of any length is
1358 // reserved as a prefix for longer code words". So we should not expect to
1359 // see such a code in the derived codebook.
1361 for(symlen
=1; symlen
<=16; symlen
++) {
1362 UI num_syms_of_this_length
;
1365 num_syms_of_this_length
= (UI
)codecounts
[symlen
-1];
1366 for(k
=0; k
<num_syms_of_this_length
; k
++) {
1369 sym
= de_getbyte_p(&pos
);
1370 fmtutil_huffman_record_a_code_length(c
, ht
->builder
, (fmtutil_huffman_valtype
)sym
, symlen
);
1374 // We do this only for the side effect of the debug messages.
1375 (void)fmtutil_huffman_make_canonical_code(c
, ht
->bk
, ht
->builder
, 0);
1377 fmtutil_huffman_destroy_decoder(c
, ht
);
1380 static void handler_dht(deark
*c
, lctx
*d
,
1381 const struct marker_info
*mi
, i64 pos1
, i64 data_size
)
1392 if(pos
>= pos1
+data_size
) goto done
;
1394 b
= de_getbyte(pos
);
1397 de_dbg(c
, "table: %s%d, at %d", table_class
==0?"DC":"AC",
1398 (int)table_id
, (int)pos
);
1400 de_read(codecounts
, pos
+1, 16);
1402 for(k
=0; k
<16; k
++) {
1403 num_huff_codes
+= (i64
)codecounts
[k
];
1405 de_dbg_indent(c
, 1);
1406 dump_htable_summary(c
, d
, codecounts
);
1407 de_dbg(c
, "number of codes: %d", (int)num_huff_codes
);
1409 dump_htable_details(c
, d
, pos
, codecounts
, num_huff_codes
);
1410 de_dbg_indent(c
, -1);
1411 pos
+= num_huff_codes
;
1418 // DAC = Define arithmetic coding conditioning
1419 static void handler_dac(deark
*c
, lctx
*d
,
1420 const struct marker_info
*mi
, i64 pos1
, i64 data_size
)
1429 ntables
= data_size
/2;
1430 for(i
=0; i
<ntables
; i
++) {
1431 b
= de_getbyte(pos1
+i
*2);
1434 de_dbg(c
, "table: %s%u", table_class
==0?"DC":"AC",
1435 (unsigned int)table_id
);
1436 cs
= de_getbyte(pos1
+i
*2+1);
1437 de_dbg_indent(c
, 1);
1438 de_dbg(c
, "conditioning value: %d", (int)cs
);
1439 de_dbg_indent(c
, -1);
1443 static void dump_qtable_data(deark
*c
, lctx
*d
, i64 pos
, u8 precision_code
)
1447 de_ucstring
*s
= NULL
;
1448 static const u8 zigzag
[64] = {
1449 0, 1, 5, 6,14,15,27,28,
1450 2, 4, 7,13,16,26,29,42,
1451 3, 8,12,17,25,30,41,43,
1452 9,11,18,24,31,40,44,53,
1453 10,19,23,32,39,45,52,54,
1454 20,22,33,38,46,51,55,60,
1455 21,34,37,47,50,56,59,61,
1456 35,36,48,49,57,58,62,63
1459 if(c
->debug_level
<2) return;
1460 if(precision_code
!=0) return;
1462 de_read(qbuf
, pos
, 64);
1463 s
= ucstring_create(c
);
1464 for(k
=0; k
<64; k
++) {
1465 ucstring_printf(s
, DE_ENCODING_LATIN1
, " %3u",
1466 (unsigned int)qbuf
[(unsigned int)zigzag
[k
]]);
1467 if(k
%8==7) { // end of a debug line
1468 de_dbg(c
, "data:%s", ucstring_getpsz(s
));
1472 ucstring_destroy(s
);
1475 static void handler_dqt(deark
*c
, lctx
*d
,
1476 const struct marker_info
*mi
, i64 pos1
, i64 data_size
)
1486 if(pos
>= pos1
+data_size
) goto done
;
1488 b
= de_getbyte(pos
);
1489 precision_code
= b
>>4;
1491 if(precision_code
==0) {
1495 else if(precision_code
==1) {
1503 de_dbg(c
, "table: Q%d, at %d", table_id
, (int)pos
);
1505 de_dbg_indent(c
, 1);
1506 de_dbg(c
, "precision: %d (%s)", (int)precision_code
, s
);
1507 dump_qtable_data(c
, d
, pos
+1, precision_code
);
1508 de_dbg_indent(c
, -1);
1510 if(qsize
==0) goto done
;
1519 static void handle_comment(deark
*c
, lctx
*d
, i64 pos
, i64 comment_size
,
1520 de_encoding encoding_base
)
1522 de_ucstring
*s
= NULL
;
1523 de_ext_encoding encoding_ext
;
1526 // If c->extract_level>=2, write the comment to a file;
1527 // otherwise if we have debugging output, write (at least part of) it
1528 // to the debug output;
1529 // otherwise do nothing.
1531 if(c
->extract_level
<2 && c
->debug_level
<1) return;
1532 if(comment_size
<1) return;
1534 write_to_file
= (c
->extract_level
>=2);
1536 if(write_to_file
&& encoding_base
==DE_ENCODING_UNKNOWN
) {
1537 // If we don't know the encoding, dump the raw bytes to a file.
1538 dbuf_create_file_from_slice(c
->infile
, pos
, comment_size
, "comment.txt",
1539 NULL
, DE_CREATEFLAG_IS_AUX
);
1543 if(encoding_base
==DE_ENCODING_UNKNOWN
) {
1544 // In this case, we're printing the comment in the debug info.
1545 // If we don't know the encoding, pretend it's ASCII-like.
1546 encoding_ext
= DE_EXTENC_MAKE(DE_ENCODING_ASCII
, DE_ENCSUBTYPE_PRINTABLE
);
1549 encoding_ext
= encoding_base
;
1552 s
= ucstring_create(c
);
1553 dbuf_read_to_ucstring(c
->infile
, pos
, comment_size
, s
, 0, encoding_ext
);
1557 outf
= dbuf_create_output_file(c
, "comment.txt", NULL
, DE_CREATEFLAG_IS_AUX
);
1558 ucstring_write_as_utf8(c
, s
, outf
, 1);
1562 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz_d(s
));
1566 ucstring_destroy(s
);
1569 static void handler_com(deark
*c
, lctx
*d
,
1570 const struct marker_info
*mi
, i64 pos
, i64 data_size
)
1572 // Note that a JPEG COM-segment comment is an arbitrary sequence of bytes, so
1573 // there's no way to know what text encoding it uses, or even whether it is text.
1574 // We'll use the user's "-inenc" encoding, or DE_ENCODING_UNKNOWN by default.
1575 handle_comment(c
, d
, pos
, data_size
, c
->input_encoding
);
1578 static void handler_sos(deark
*c
, lctx
*d
,
1579 const struct marker_info
*mi
, i64 pos
, i64 data_size
)
1586 u8 actable
, dctable
;
1588 if(data_size
<1) goto done
;
1591 ncomp
= (i64
)de_getbyte(pos
);
1592 de_dbg(c
, "number of components in scan: %d", (int)ncomp
);
1593 if(data_size
< 4 + 2*ncomp
) goto done
;
1595 for(i
=0; i
<ncomp
; i
++) {
1596 cs
= de_getbyte(pos
+1+i
*2);
1597 de_dbg(c
, "component #%d id: %d", (int)i
, (int)cs
);
1598 de_dbg_indent(c
, 1);
1599 b
= de_getbyte(pos
+1+i
*2+1);
1602 de_dbg(c
, "tables to use: DC%d, AC%d", (int)dctable
, (int)actable
);
1603 de_dbg_indent(c
, -1);
1606 ss
= de_getbyte(pos
+1+ncomp
*2);
1607 se
= de_getbyte(pos
+1+ncomp
*2+1);
1608 ax
= de_getbyte(pos
+1+ncomp
*2+2);
1609 de_dbg(c
, "spectral selection start/end: %d, %d", (int)ss
, (int)se
);
1610 de_dbg(c
, "successive approx. bit pos high/low: %u, %u",
1611 (unsigned int)(ax
>>4), (unsigned int)(ax
&0x0f));
1617 static const struct marker_info1 marker_info1_arr
[] = {
1618 {0x01, 0x0101, "TEM", NULL
, NULL
},
1619 {0xc4, 0x0001, "DHT", "Define Huffman table", handler_dht
},
1620 {0xc8, 0x0201, "JPG", NULL
, handler_sof
},
1621 {0xcc, 0x0001, "DAC", "Define arithmetic coding conditioning", handler_dac
},
1622 {0xd8, 0x0103, "SOI", "Start of image", NULL
},
1623 {0xd9, 0x0103, "EOI", "End of image", NULL
},
1624 {0xda, 0x0003, "SOS", "Start of scan", handler_sos
},
1625 {0xdb, 0x0001, "DQT", "Define quantization table", handler_dqt
},
1626 {0xdc, 0x0001, "DNL", "Define number of lines", NULL
},
1627 {0xdd, 0x0003, "DRI", "Define restart interval", handler_dri
},
1628 {0xde, 0x0001, "DHP", "Define hierarchical progression", NULL
},
1629 {0xdf, 0x0001, "EXP", "Expand reference component", NULL
},
1630 {0xf7, 0x0202, "SOF55", "JPEG-LS start of frame", handler_sof
},
1631 {0xf8, 0x0001, "JPG8", NULL
, handler_jpg8
},
1632 {0xf8, 0x0002, "LSE", "JPEG-LS preset parameters", NULL
},
1633 {0xfe, 0x0003, "COM", "Comment", handler_com
}
1636 // Caller allocates mi
1637 static int get_marker_info(deark
*c
, lctx
*d
, u8 seg_type
,
1638 struct marker_info
*mi
)
1642 de_zeromem(mi
, sizeof(struct marker_info
));
1643 mi
->seg_type
= seg_type
;
1645 // First, try to find the segment type in the static marker info.
1646 for(k
=0; k
<(i64
)DE_ARRAYCOUNT(marker_info1_arr
); k
++) {
1647 const struct marker_info1
*mi1
= &marker_info1_arr
[k
];
1649 if(!d
->is_jpegls
&& !(mi1
->flags
&FLAG_JPEG_COMPAT
)) continue;
1650 if(d
->is_jpegls
&& !(mi1
->flags
&FLAG_JPEGLS_COMPAT
)) continue;
1652 if(mi1
->seg_type
== seg_type
) {
1653 mi
->flags
= mi1
->flags
;
1655 de_strlcpy(mi
->shortname
, mi1
->shortname
, sizeof(mi
->shortname
));
1657 de_snprintf(mi
->longname
, sizeof(mi
->longname
), "%s: %s",
1658 mi1
->shortname
, mi1
->longname
);
1664 // Handle some pattern-based markers.
1666 if(seg_type
>=0xe0 && seg_type
<=0xef) {
1667 de_snprintf(mi
->shortname
, sizeof(mi
->shortname
), "APP%d", (int)(seg_type
-0xe0));
1668 mi
->hfn
= handler_app
;
1672 if(seg_type
>=0xc0 && seg_type
<=0xcf) {
1673 de_snprintf(mi
->shortname
, sizeof(mi
->shortname
), "SOF%d", (int)(seg_type
-0xc0));
1674 de_snprintf(mi
->longname
, sizeof(mi
->longname
), "%s: Start of frame", mi
->shortname
);
1675 mi
->flags
|= FLAG_IS_SOF
;
1676 mi
->hfn
= handler_sof
;
1680 if(seg_type
>=0xd0 && seg_type
<=0xd7) {
1681 int rstn
= (int)(seg_type
-0xd0);
1682 de_snprintf(mi
->shortname
, sizeof(mi
->shortname
), "RST%d", rstn
);
1683 de_snprintf(mi
->longname
, sizeof(mi
->longname
), "%s: Restart with mod 8 count %d",
1684 mi
->shortname
, rstn
);
1685 mi
->flags
|= FLAG_NO_DATA
;
1689 if(seg_type
>=0xf0 && seg_type
<=0xfd) {
1690 de_snprintf(mi
->shortname
, sizeof(mi
->shortname
), "JPG%d", (int)(seg_type
-0xf0));
1694 de_strlcpy(mi
->shortname
, "???", sizeof(mi
->shortname
));
1695 de_strlcpy(mi
->longname
, "???", sizeof(mi
->longname
));
1699 if(!mi
->longname
[0]) {
1700 // If no longname was set, use the shortname
1701 de_strlcpy(mi
->longname
, mi
->shortname
, sizeof(mi
->longname
));
1706 static void do_segment(deark
*c
, lctx
*d
, const struct marker_info
*mi
,
1707 i64 payload_pos
, i64 payload_size
)
1709 de_dbg(c
, "segment 0x%02x (%s) at %d, dpos=%d, dlen=%d",
1710 (unsigned int)mi
->seg_type
, mi
->longname
, (int)(payload_pos
-4),
1711 (int)payload_pos
, (int)payload_size
);
1714 // If a handler function is available, use it.
1715 de_dbg_indent(c
, 1);
1716 mi
->hfn(c
, d
, mi
, payload_pos
, payload_size
);
1717 de_dbg_indent(c
, -1);
1721 // TODO: This is very similar to detect_jpeg_len().
1722 // Maybe they should be consolidated.
1723 static int do_read_scan_data(deark
*c
, lctx
*d
,
1724 i64 pos1
, i64
*bytes_consumed
)
1728 struct marker_info mi
;
1730 *bytes_consumed
= c
->infile
->len
- pos1
; // default
1731 de_dbg(c
, "scan data at %d", (int)pos1
);
1733 de_dbg_indent(c
, 1);
1736 if(pos
>= c
->infile
->len
) goto done
;
1737 b0
= de_getbyte_p(&pos
);
1739 b1
= de_getbyte_p(&pos
);
1741 ; // an escaped 0xff
1743 else if(d
->is_jpegls
&& b1
<0x80) {
1744 // In JPEG-LS, 0xff bytes are not escaped if they're followed by a
1745 // a byte less than 0x80.
1748 else if(b1
>=0xd0 && b1
<=0xd7) { // an RSTn marker
1749 if(c
->debug_level
>=2) {
1750 get_marker_info(c
, d
, b1
, &mi
);
1751 de_dbg2(c
, "marker 0x%02x (%s) at %d", (unsigned int)b1
,
1752 mi
.longname
, (int)(pos
-2));
1755 else if(b1
==0xff) { // a "fill byte" (are they allowed here?)
1759 // A marker that is not part of the scan.
1760 // Subtract the bytes consumed by it, and stop.
1762 *bytes_consumed
= pos
- pos1
;
1763 de_dbg(c
, "end of scan data found at %d (len=%d)", (int)pos
, (int)*bytes_consumed
);
1770 de_dbg_indent(c
, -1);
1774 // Caller supplies s[5].
1775 static void exif_version_to_string(u32 v
, char *s
)
1777 s
[0] = de_byte_to_printable_char((u8
)((v
>>24)&0xff));
1778 s
[1] = de_byte_to_printable_char((u8
)((v
>>16)&0xff));
1779 s
[2] = de_byte_to_printable_char((u8
)((v
>>8)&0xff));
1780 s
[3] = de_byte_to_printable_char((u8
)(v
&0xff));
1784 // Print a summary line indicating the main characteristics of this image.
1785 static void print_summary(deark
*c
, lctx
*d
)
1787 de_ucstring
*summary
= NULL
;
1789 if(d
->is_jpegls
) goto done
;
1790 if(!d
->found_sof
) goto done
;
1791 if(!d
->found_soi
) goto done
;
1793 summary
= ucstring_create(c
);
1795 if(d
->is_baseline
) ucstring_append_sz(summary
, " baseline", DE_ENCODING_LATIN1
);
1796 if(d
->is_lossless
) ucstring_append_sz(summary
, " lossless", DE_ENCODING_LATIN1
);
1797 if(d
->is_progressive
) ucstring_append_sz(summary
, " progressive", DE_ENCODING_LATIN1
);
1798 if(d
->is_arithmetic
) ucstring_append_sz(summary
, " arithmetic", DE_ENCODING_LATIN1
);
1799 if(d
->is_hierarchical
) ucstring_append_sz(summary
, " hierarchical", DE_ENCODING_LATIN1
);
1800 ucstring_printf(summary
, DE_ENCODING_LATIN1
, " cmpts=%d", (int)d
->ncomp
);
1801 if(d
->is_subsampled
) {
1802 // The subsampling type code printed here is not the standard way to denote
1803 // subsampling, but the standard notation is incomprehensible, and doesn't
1804 // cover all the possible cases.
1805 ucstring_append_sz(summary
, " subsampling=", DE_ENCODING_LATIN1
);
1806 ucstring_append_ucstring(summary
, d
->sampling_code
);
1808 ucstring_printf(summary
, DE_ENCODING_LATIN1
, " bits=%d", (int)d
->precision
);
1810 if(d
->has_restart_markers
) ucstring_append_sz(summary
, " rst", DE_ENCODING_LATIN1
);
1811 if(d
->has_jfif_seg
) {
1812 ucstring_printf(summary
, DE_ENCODING_LATIN1
, " JFIF=%u.%02u",
1813 (unsigned int)d
->jfif_ver_h
, (unsigned int)d
->jfif_ver_l
);
1815 if(d
->has_spiff_seg
) ucstring_append_sz(summary
, " SPIFF", DE_ENCODING_LATIN1
);
1816 if(d
->has_exif_seg
) {
1817 ucstring_append_sz(summary
, " Exif", DE_ENCODING_LATIN1
);
1818 if(d
->exif_version_as_uint32
!=0) {
1820 exif_version_to_string(d
->exif_version_as_uint32
, tmps
);
1821 ucstring_printf(summary
, DE_ENCODING_LATIN1
, "=%s", tmps
);
1824 if(d
->has_adobeapp14
)
1825 ucstring_printf(summary
, DE_ENCODING_LATIN1
, " colorxform=%d", (int)d
->color_transform
);
1826 if(d
->has_revcolorxform
) ucstring_append_sz(summary
, " rev-colorxform", DE_ENCODING_LATIN1
);
1828 if(d
->has_jfif_thumb
) ucstring_append_sz(summary
, " JFIFthumbnail", DE_ENCODING_LATIN1
);
1829 if(d
->has_jfxx_seg
) ucstring_append_sz(summary
, " JFXX", DE_ENCODING_LATIN1
);
1830 if(d
->has_flashpix
) ucstring_append_sz(summary
, " FlashPix", DE_ENCODING_LATIN1
);
1831 if(d
->is_jpeghdr
) ucstring_append_sz(summary
, " HDR", DE_ENCODING_LATIN1
);
1832 if(d
->is_jpegxt
) ucstring_append_sz(summary
, " XT", DE_ENCODING_LATIN1
);
1833 if(d
->has_mpf_seg
) ucstring_append_sz(summary
, " MPO", DE_ENCODING_LATIN1
);
1834 if(d
->is_jps
) ucstring_append_sz(summary
, " JPS", DE_ENCODING_LATIN1
);
1835 if(d
->has_iccprofile
) ucstring_append_sz(summary
, " ICC", DE_ENCODING_LATIN1
);
1836 if(d
->has_xmp
) ucstring_append_sz(summary
, " XMP", DE_ENCODING_LATIN1
);
1837 if(d
->has_xmp_ext
) ucstring_append_sz(summary
, " XMPext", DE_ENCODING_LATIN1
);
1838 if(d
->has_psd
) ucstring_append_sz(summary
, " PSD", DE_ENCODING_LATIN1
);
1839 if(d
->has_iptc
) ucstring_append_sz(summary
, " IPTC", DE_ENCODING_LATIN1
);
1840 if(d
->has_exif_gps
) ucstring_append_sz(summary
, " GPS", DE_ENCODING_LATIN1
);
1841 if(d
->has_afcp
) ucstring_append_sz(summary
, " AFCP", DE_ENCODING_LATIN1
);
1843 if(d
->scan_count
!=1) ucstring_printf(summary
, DE_ENCODING_LATIN1
, " scans=%d", d
->scan_count
);
1845 de_dbg(c
, "summary:%s", ucstring_getpsz(summary
));
1848 ucstring_destroy(summary
);
1851 static void do_post_sof_stuff(deark
*c
, lctx
*d
)
1853 if(d
->is_jpegls
) return;
1855 if(d
->has_jfif_seg
&& d
->has_exif_seg
&& !d
->exif_before_jfif
&&
1856 (d
->jfif_ver_h
==1 && (d
->jfif_ver_l
==1 || d
->jfif_ver_l
==2)))
1858 if(d
->exif_orientation
>1) {
1859 de_dbg(c
, "Note: Image has an ambiguous orientation: JFIF says "
1861 fmtutil_tiff_orientation_name(1),
1862 fmtutil_tiff_orientation_name((i64
)d
->exif_orientation
));
1865 if(d
->exif_cosited
&& d
->is_subsampled
&& d
->ncomp
>1) {
1866 de_dbg(c
, "Note: Image has an ambiguous subsampling position: JFIF says "
1867 "centered; Exif says cosited");
1870 // TODO: Another thing we could check for is a significant conflict in
1871 // the JFIF and Exif density settings.
1875 // Tasks to do at the end of normal JPEG data (after we've found the EOI marker,
1876 // or an unexpected end of file).
1877 // This does not handle data that might exist after the EOI; that might still
1879 static void do_at_end_of_jpeg(deark
*c
, lctx
*d
)
1881 dbuf_close(d
->iccprofile_file
);
1882 d
->iccprofile_file
= NULL
;
1883 dbuf_close(d
->hdr_residual_file
);
1884 d
->hdr_residual_file
= NULL
;
1885 destroy_fpxr_data(c
, d
);
1887 if(d
->extxmp_membuf
&& !d
->extxmp_error_flag
) {
1888 dbuf
*tmpdbuf
= NULL
;
1889 tmpdbuf
= dbuf_create_output_file(c
, "xmp", NULL
, DE_CREATEFLAG_IS_AUX
);
1890 dbuf_copy(d
->extxmp_membuf
, 0, d
->extxmp_total_len
, tmpdbuf
);
1891 dbuf_close(tmpdbuf
);
1893 dbuf_close(d
->extxmp_membuf
);
1894 d
->extxmp_membuf
= NULL
;
1897 static void do_destroy_lctx(deark
*c
, lctx
*d
)
1900 ucstring_destroy(d
->sampling_code
);
1904 // Process a single JPEG image (through the EOI marker).
1905 // Returns nonzero if EOI was found.
1906 static int do_jpeg_stream(deark
*c
, lctx
*d
, i64 pos1
, i64
*bytes_consumed
)
1913 struct marker_info mi
;
1914 i64 scan_byte_count
;
1918 d
->sampling_code
= ucstring_create(c
);
1922 if(pos
>=c
->infile
->len
)
1924 b
= de_getbyte_p(&pos
);
1931 // Not an 0xff byte, and not preceded by an 0xff byte. Just ignore it.
1935 found_marker
= 0; // Reset this flag.
1938 continue; // Escaped 0xff
1943 if(seg_type
==0xf7 && !d
->found_sof
) {
1947 get_marker_info(c
, d
, seg_type
, &mi
);
1949 if(mi
.flags
& FLAG_IS_SOF
) {
1953 if(mi
.flags
& FLAG_NO_DATA
) {
1954 de_dbg(c
, "marker 0x%02x (%s) at %d", (unsigned int)seg_type
,
1955 mi
.longname
, (int)(pos
-2));
1957 if(seg_type
==0xd9) { // EOI
1962 if(seg_type
==0xd8 && !d
->found_soi
) {
1969 // If we get here, we're reading a segment that has a size field.
1970 seg_size
= de_getu16be(pos
);
1971 if(pos
<2) break; // bogus size
1973 do_segment(c
, d
, &mi
, pos
+2, seg_size
-2);
1977 if(mi
.flags
& FLAG_IS_SOF
) {
1979 do_post_sof_stuff(c
, d
);
1984 if(seg_type
==0xda) {
1985 // If we read an SOS segment, now read the untagged image data that
1986 // should follow it.
1987 if(!do_read_scan_data(c
, d
, pos
, &scan_byte_count
)) {
1990 pos
+= scan_byte_count
;
1995 do_at_end_of_jpeg(c
, d
);
1996 *bytes_consumed
= pos
- pos1
;
2000 static void do_afcp_segment(deark
*c
, lctx
*d
, i64 endpos
)
2003 de_dbg(c
, "AFCP segment found at end of file");
2005 de_dbg_indent(c
, 1);
2006 de_run_module_by_id_on_slice(c
, "afcp", NULL
, c
->infile
, 0, endpos
);
2007 de_dbg_indent(c
, -1);
2010 static void de_run_jpeg(deark
*c
, de_module_params
*mparams
)
2014 int retval_stream
= 0;
2016 i64 extra_bytes_at_eof
;
2020 d
= de_malloc(c
, sizeof(lctx
));
2023 retval_stream
= do_jpeg_stream(c
, d
, pos
, &bytes_consumed
);
2024 if(!retval_stream
) goto done
;
2025 pos
+= bytes_consumed
;
2027 if(bytes_consumed
<1) goto done
;
2028 if(pos
>= c
->infile
->len
) goto done
;
2030 if(c
->module_nesting_level
>1) goto done
;
2031 extra_bytes_at_eof
= c
->infile
->len
- pos
;
2032 if(extra_bytes_at_eof
<4) goto done
;
2034 if(extra_bytes_at_eof
>=24) {
2037 de_read(tbuf
, c
->infile
->len
-12, sizeof(tbuf
));
2038 if(tbuf
[0]=='A' && tbuf
[1]=='X' && tbuf
[2]=='S' &&
2039 (tbuf
[3]=='!' || tbuf
[3]=='*'))
2041 do_afcp_segment(c
, d
, c
->infile
->len
);
2046 if(d
->has_mpf_seg
) {
2047 // In this case, it is normal for there to be multiple JPEG streams,
2048 // and we should have already extracted the extras.
2052 nbytes_to_scan
= de_min_int(extra_bytes_at_eof
, 512);
2053 if(dbuf_search(c
->infile
, (const u8
*)"\xff\xd8\xff", 3, pos
,
2054 nbytes_to_scan
, &foundpos
))
2056 de_info(c
, "Note: This file might contain multiple JPEG images. "
2057 "Use \"-m jpegscan\" to extract them.");
2061 if(dbuf_is_all_zeroes(c
->infile
, pos
, extra_bytes_at_eof
)) goto done
;
2063 de_info(c
, "Note: %"I64_FMT
" bytes of unidentified data found at end "
2064 "of file (starting at %"I64_FMT
").", extra_bytes_at_eof
, pos
);
2069 print_summary(c
, d
);
2071 do_destroy_lctx(c
, d
);
2075 typedef struct scanctx_struct
{
2080 static int detect_jpeg_len(deark
*c
, scanctx
*d
, i64 pos1
, i64 len
)
2096 b0
= de_getbyte(pos
);
2103 // Peek at the next byte (after this 0xff byte).
2104 b1
= de_getbyte(pos
+1);
2107 // A "fill byte", not a marker.
2111 else if(b1
==0x00 || (d
->is_jpegls
&& b1
<0x80 && in_scan
)) {
2112 // An escape sequence, not a marker.
2116 else if(b1
==0xd9) { // EOI. That's what we're looking for.
2117 if(!found_sof
|| !found_scan
) return 0;
2123 de_dbg(c
, "Looks like a JPEG-LS file.");
2127 else if(b1
>=0xc0 && b1
<=0xcf && b1
!=0xc4 && b1
!=0xc8 && b1
!=0xcc) {
2131 if(b1
==0xda) { // SOS - Start of scan
2132 if(!found_sof
) return 0;
2136 else if(b1
>=0xd0 && b1
<=0xd7) {
2137 // RSTn markers don't change the in_scan state.
2144 if((b1
>=0xd0 && b1
<=0xda) || b1
==0x01) {
2145 // Markers that have no content.
2150 // Everything else should be a marker segment, with a length field.
2151 seg_size
= de_getu16be(pos
+2);
2152 if(seg_size
<2) break; // bogus size
2160 static void de_run_jpegscan(deark
*c
, de_module_params
*mparams
)
2167 d
= de_malloc(c
, sizeof(*d
));
2170 if(pos
>= c
->infile
->len
) break;
2172 ret
= dbuf_search(c
->infile
, (const u8
*)"\xff\xd8\xff", 3,
2173 pos
, c
->infile
->len
-pos
, &foundpos
);
2174 if(!ret
) break; // No more JPEGs in file.
2176 de_dbg(c
, "Found possible JPEG file at %d", (int)foundpos
);
2180 if(detect_jpeg_len(c
, d
, pos
, c
->infile
->len
-pos
)) {
2181 de_dbg(c
, "length=%d", (int)d
->len
);
2182 dbuf_create_file_from_slice(c
->infile
, pos
, d
->len
,
2183 d
->is_jpegls
? "jls" : "jpg", NULL
, 0);
2187 de_dbg(c
, "Doesn't seem to be a valid JPEG.");
2195 static int de_identify_jpeg(deark
*c
)
2197 if(!dbuf_memcmp(c
->infile
, 0, "\xff\xd8\xff", 3)) {
2203 void de_module_jpeg(deark
*c
, struct deark_module_info
*mi
)
2206 mi
->desc
= "JPEG image";
2207 mi
->desc2
= "resources only";
2208 mi
->run_fn
= de_run_jpeg
;
2209 mi
->identify_fn
= de_identify_jpeg
;
2212 void de_module_jpegscan(deark
*c
, struct deark_module_info
*mi
)
2214 mi
->id
= "jpegscan";
2215 mi
->desc
= "Extract embedded JPEG images from arbitrary files";
2216 mi
->run_fn
= de_run_jpegscan
;