fnt: Improved error handling, etc.
[deark.git] / modules / j2c.c
blob169d20cbd757eb485f10db3a2876699f2ca0c10d
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // J2C - JPEG 2000 codestream
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_j2c);
11 struct page_ctx {
12 i64 ncomp;
13 i64 j2c_sot_pos;
14 i64 j2c_sot_length;
17 typedef struct localctx_struct {
18 int reserved;
19 } lctx;
21 struct marker_info;
23 typedef void (*handler_fn_type)(deark *c, lctx *d, struct page_ctx *pg,
24 const struct marker_info *mi, i64 pos, i64 data_size);
26 #define FLAG_NO_DATA 0x0100
28 struct marker_info {
29 u8 seg_type;
30 unsigned int flags;
31 char shortname[12];
32 char longname[80];
33 handler_fn_type hfn;
36 // Static info about markers/segments.
37 struct marker_info1 {
38 u8 seg_type;
39 unsigned int flags;
40 const char *shortname;
41 const char *longname;
42 handler_fn_type hfn;
45 static void handle_comment(deark *c, lctx *d, i64 pos, i64 comment_size)
47 int write_to_file;
49 // If c->extract_level>=2, write the comment to a file;
50 // otherwise if we have debugging output, write (at least part of) it
51 // to the debug output;
52 // otherwise do nothing.
54 if(c->extract_level<2 && c->debug_level<1) return;
55 if(comment_size<1) return;
57 write_to_file = (c->extract_level>=2);
59 if(write_to_file) {
60 dbuf *outf = NULL;
62 outf = dbuf_create_output_file(c, "comment.txt", NULL, DE_CREATEFLAG_IS_AUX);
63 dbuf_copy_slice_convert_to_utf8(c->infile, pos, comment_size,
64 DE_ENCODING_LATIN1, outf, 0x2);
65 dbuf_close(outf);
67 else {
68 de_ucstring *s = NULL;
70 s = ucstring_create(c);
71 dbuf_read_to_ucstring_n(c->infile, pos, comment_size, DE_DBG_MAX_STRLEN,
72 s, 0, DE_ENCODING_LATIN1);
73 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(s));
74 ucstring_destroy(s);
78 static void handler_cme(deark *c, lctx *d, struct page_ctx *pg,
79 const struct marker_info *mi, i64 pos, i64 data_size)
81 i64 reg_val;
82 i64 comment_pos;
83 i64 comment_size;
84 const char *name;
86 if(data_size<2) goto done;
88 reg_val = de_getu16be(pos);
89 switch(reg_val) {
90 case 0: name="binary"; break;
91 case 1: name="text"; break;
92 default: name="?";
94 de_dbg(c, "comment/extension type: %d (%s)", (int)reg_val, name);
96 comment_pos = pos+2;
97 comment_size = data_size-2;
99 if(reg_val==1) {
100 handle_comment(c, d, comment_pos, comment_size);
102 else {
103 de_dbg_hexdump(c, c->infile, comment_pos, comment_size, 256, NULL, 0x1);
106 done:
110 static void handler_siz(deark *c, lctx *d, struct page_ctx *pg,
111 const struct marker_info *mi, i64 pos1, i64 len)
113 unsigned int capa;
114 i64 w, h;
115 i64 pos = pos1;
116 i64 ncomp;
117 i64 k;
119 capa = (unsigned int)de_getu16be_p(&pos);
120 de_dbg(c, "capabilities: 0x%04x", capa);
122 w = de_getu32be_p(&pos);
123 h = de_getu32be_p(&pos);
124 de_dbg(c, "dimensions of reference grid: %"I64_FMT DE_CHAR_TIMES "%"I64_FMT, w, h);
126 w = de_getu32be_p(&pos);
127 h = de_getu32be_p(&pos);
128 de_dbg(c, "offset to image area: %"I64_FMT",%"I64_FMT, w, h);
130 w = de_getu32be_p(&pos);
131 h = de_getu32be_p(&pos);
132 de_dbg(c, "dimensions of reference tile: %"I64_FMT DE_CHAR_TIMES "%"I64_FMT, w, h);
134 w = de_getu32be_p(&pos);
135 h = de_getu32be_p(&pos);
136 de_dbg(c, "offset to first tile: %"I64_FMT",%"I64_FMT, w, h);
138 ncomp = de_getu16be_p(&pos);
139 de_dbg(c, "number of components: %d", (int)ncomp);
141 for(k=0; k<ncomp; k++) {
142 u8 prec, xr, yr;
144 if(pos >= pos1+len) goto done;
145 de_dbg(c, "component[%d] info at %"I64_FMT, (int)k, pos);
146 de_dbg_indent(c, 1);
147 prec = de_getbyte_p(&pos);
148 de_dbg(c, "precision: %d", (int)prec);
149 xr = de_getbyte_p(&pos);
150 yr = de_getbyte_p(&pos);
151 de_dbg(c, "separation: %d,%d", (int)xr, (int)yr);
152 de_dbg_indent(c, -1);
155 done:
159 static void handler_tlm(deark *c, lctx *d, struct page_ctx *pg,
160 const struct marker_info *mi, i64 pos1, i64 len)
162 u8 b;
163 u8 item_size_code;
164 i64 item_size;
165 i64 pos = pos1;
166 u8 t_code, p_code;
167 i64 t_size, p_size;
168 i64 num_items;
169 i64 k;
171 if(len<2) goto done;
172 b = de_getbyte_p(&pos);
173 de_dbg(c, "index: %d", (int)b);
175 item_size_code = (i64)de_getbyte_p(&pos);
176 de_dbg(c, "item size code: 0x%02x", (unsigned int)item_size_code);
177 de_dbg_indent(c, 1);
178 t_code = (item_size_code & 0x30)>>4;
179 de_dbg(c, "size code for number field: %d", (int)t_code);
180 p_code = (item_size_code & 0x40)>>6;
181 de_dbg(c, "size code for length field: %d", (int)p_code);
182 de_dbg_indent(c, -1);
183 if(t_code==0) t_size=0;
184 else if(t_code==1) t_size = 1;
185 else if(t_code==2) t_size = 2;
186 else goto done;
187 if(p_code==0) p_size = 2;
188 else p_size = 4;
189 item_size = t_size + p_size;
191 num_items = (pos1 + len - pos)/item_size;
192 de_dbg(c, "calculated number of items: %d", (int)num_items);
194 for(k=0; k<num_items; k++) {
195 i64 x;
196 de_dbg(c, "item[%d] at %"I64_FMT, (int)k, pos);
197 de_dbg_indent(c, 1);
198 if(t_size>0) {
199 if(t_size==1) {
200 x = (i64)de_getbyte_p(&pos);
202 else {
203 x = de_getu16be_p(&pos);
205 de_dbg(c, "tile number: %u", (unsigned int)x);
208 if(p_size==2) {
209 x = de_getu16be_p(&pos);
211 else {
212 x = de_getu32be_p(&pos);
214 de_dbg(c, "tile length: %u", (unsigned int)x);
215 de_dbg_indent(c, -1);
218 done:
222 static void handler_sot(deark *c, lctx *d, struct page_ctx *pg,
223 const struct marker_info *mi, i64 pos1, i64 len)
225 i64 x;
226 i64 b;
227 i64 pos = pos1;
229 pg->j2c_sot_pos = 0;
230 pg->j2c_sot_length = 0;
231 if(len<8) return;
233 pg->j2c_sot_pos = pos1 - 4;
234 x = de_getu16be_p(&pos);
235 de_dbg(c, "tile number: %d", (int)x);
236 pg->j2c_sot_length = de_getu32be_p(&pos);
237 de_dbg(c, "length: %u", (unsigned int)pg->j2c_sot_length);
238 b = de_getbyte_p(&pos);
239 de_dbg(c, "tile-part instance: %d", (int)b);
240 b = de_getbyte_p(&pos);
241 de_dbg(c, "number of tile-parts: %d", (int)b);
244 static void handler_cod(deark *c, lctx *d, struct page_ctx *pg,
245 const struct marker_info *mi, i64 pos1, i64 len)
247 i64 pos = pos1;
248 u8 coding_style;
249 de_ucstring *s = NULL;
250 u8 b;
251 i64 n;
253 if(len<5) goto done;
254 coding_style = de_getbyte_p(&pos);
255 s = ucstring_create(c);
257 if((coding_style&0xf8)==0) {
258 switch(coding_style&0x01) {
259 case 0x0: ucstring_append_flags_item(s, "entropy coder, without partitions"); break;
260 case 0x1: ucstring_append_flags_item(s, "entropy coder, with partitions"); break;
262 switch((coding_style&0x02)>>1) {
263 case 0x0: ucstring_append_flags_item(s, "no SOP segments"); break;
264 case 0x1: ucstring_append_flags_item(s, "has SOP segments"); break;
266 switch((coding_style&0x04)>>2) {
267 case 0x0: ucstring_append_flags_item(s, "no EPH segments"); break;
268 case 0x1: ucstring_append_flags_item(s, "has EPH segments"); break;
271 else {
272 ucstring_append_flags_item(s, "?");
274 de_dbg(c, "coding style: 0x%02x (%s)", (unsigned int)coding_style,
275 ucstring_getpsz(s));
277 b = de_getbyte_p(&pos);
278 de_dbg(c, "progression order: %d", (int)b);
279 n = de_getu16be_p(&pos);
280 de_dbg(c, "number of layers: %d", (int)n);
281 (void)de_getbyte_p(&pos);
283 if(pos < pos1+len) {
284 // TODO
285 de_dbg2(c, "[not decoding the rest of this segment]");
288 done:
289 ucstring_destroy(s);
292 static void handler_qcd(deark *c, lctx *d, struct page_ctx *pg,
293 const struct marker_info *mi, i64 pos1, i64 len)
295 i64 pos = pos1;
296 u8 q_style;
298 if(len<1) goto done;
299 q_style = de_getbyte_p(&pos);
300 de_dbg(c, "quantization style: 0x%02x", (unsigned int)q_style);
302 if(pos < pos1+len) {
303 // TODO
304 de_dbg2(c, "[not decoding the rest of this segment]");
306 done:
310 static void handler_qcc(deark *c, lctx *d, struct page_ctx *pg,
311 const struct marker_info *mi, i64 pos1, i64 len)
313 i64 pos = pos1;
314 i64 compnum;
316 if(pg->ncomp<257) {
317 compnum = de_getbyte_p(&pos);
319 else {
320 compnum = de_getu16be_p(&pos);
322 de_dbg(c, "component number: %d", (int)compnum);
324 if(pos < pos1+len) {
325 // TODO
326 de_dbg2(c, "[not decoding the rest of this segment]");
330 static const struct marker_info1 marker_info1_arr[] = {
331 {0x4f, 0x0100, "SOC", "Start of codestream", NULL},
332 {0x51, 0x0000, "SIZ", "Image and tile size", handler_siz},
333 {0x52, 0x0000, "COD", "Coding style default", handler_cod},
334 {0x53, 0x0000, "COC", "Coding style component", NULL},
335 {0x55, 0x0000, "TLM", "Tile-part lengths, main header", handler_tlm},
336 {0x57, 0x0000, "PLM", "Packet length, main header", NULL},
337 {0x58, 0x0000, "PLT", "Packet length, tile-part header", NULL},
338 {0x5c, 0x0000, "QCD", "Quantization default", handler_qcd},
339 {0x5d, 0x0000, "QCC", "Quantization component", handler_qcc},
340 {0x5e, 0x0000, "RGN", "Region-of-interest", NULL},
341 {0x5f, 0x0000, "POD", "Progression order default", NULL},
342 {0x60, 0x0000, "PPM", "Packed packet headers, main header", NULL},
343 {0x61, 0x0000, "PPT", "Packed packet headers, tile-part header", NULL},
344 {0x64, 0x0000, "CME", "Comment and extension", handler_cme},
345 {0x90, 0x0000, "SOT", "Start of tile-part", handler_sot},
346 {0x91, 0x0000, "SOP", "Start of packet", NULL},
347 {0x92, 0x0100, "EPH", "End of packet header", NULL},
348 {0x93, 0x0100, "SOD", "Start of data", NULL},
349 {0xd9, 0x0100, "EOC", "End of codestream", NULL}
352 // Caller allocates mi
353 static int get_marker_info(deark *c, lctx *d, struct page_ctx *pg, u8 seg_type,
354 struct marker_info *mi)
356 i64 k;
358 de_zeromem(mi, sizeof(struct marker_info));
359 mi->seg_type = seg_type;
361 // First, try to find the segment type in the static marker info.
362 for(k=0; k<(i64)DE_ARRAYCOUNT(marker_info1_arr); k++) {
363 const struct marker_info1 *mi1 = &marker_info1_arr[k];
365 if(mi1->seg_type == seg_type) {
366 mi->flags = mi1->flags;
367 mi->hfn = mi1->hfn;
368 de_strlcpy(mi->shortname, mi1->shortname, sizeof(mi->shortname));
369 if(mi1->longname) {
370 de_snprintf(mi->longname, sizeof(mi->longname), "%s: %s",
371 mi1->shortname, mi1->longname);
373 goto done;
377 // Handle some pattern-based markers.
379 // fcd15444-1: "The marker range 0xFF30 - 0xFF3F is reserved [...] for markers
380 // without marker parameters."
381 if(seg_type>=0x30 && seg_type<=0x3f) {
382 mi->flags |= FLAG_NO_DATA;
385 de_strlcpy(mi->shortname, "???", sizeof(mi->shortname));
386 de_strlcpy(mi->longname, "???", sizeof(mi->longname));
387 return 0;
389 done:
390 if(!mi->longname[0]) {
391 // If no longname was set, use the shortname
392 de_strlcpy(mi->longname, mi->shortname, sizeof(mi->longname));
394 return 1;
397 static void do_segment(deark *c, lctx *d, struct page_ctx *pg, const struct marker_info *mi,
398 i64 payload_pos, i64 payload_size)
400 de_dbg(c, "segment 0x%02x (%s) at %d, dpos=%d, dlen=%d",
401 (unsigned int)mi->seg_type, mi->longname, (int)(payload_pos-4),
402 (int)payload_pos, (int)payload_size);
404 if(mi->hfn) {
405 // If a handler function is available, use it.
406 de_dbg_indent(c, 1);
407 mi->hfn(c, d, pg, mi, payload_pos, payload_size);
408 de_dbg_indent(c, -1);
412 static int do_read_scan_data(deark *c, lctx *d, struct page_ctx *pg,
413 i64 pos1, i64 *bytes_consumed)
415 i64 pos = pos1;
416 u8 b0, b1;
418 *bytes_consumed = c->infile->len - pos1; // default
419 de_dbg(c, "scan data at %d", (int)pos1);
421 de_dbg_indent(c, 1);
423 if(pg->j2c_sot_length>0) {
424 // The previous SOT segment may have told us where this scan data ends.
425 *bytes_consumed = pg->j2c_sot_pos + pg->j2c_sot_length - pos1;
426 if(*bytes_consumed < 0) *bytes_consumed = 0;
427 de_dbg(c, "[%"I64_FMT" bytes of scan data at %"I64_FMT"]",
428 *bytes_consumed, pos1);
429 pg->j2c_sot_pos = 0;
430 pg->j2c_sot_length = 0;
431 goto done;
434 while(1) {
435 if(pos >= c->infile->len) goto done;
436 b0 = de_getbyte_p(&pos);
437 if(b0==0xff) {
438 b1 = de_getbyte_p(&pos);
439 if(b1==0x00) {
440 ; // an escaped 0xff
442 else if(b1<0x90) {
443 // In J2C, 0xff bytes are not escaped if they're followed by a
444 // a byte less than 0x90.
447 else if(b1==0xff) { // a "fill byte" (TODO: Does J2C have these?)
448 pos--;
450 else {
451 // A marker that is not part of the scan.
452 // Subtract the bytes consumed by it, and stop.
453 pos -= 2;
454 *bytes_consumed = pos - pos1;
455 de_dbg(c, "end of scan data found at %d (len=%d)", (int)pos, (int)*bytes_consumed);
456 break;
461 done:
462 de_dbg_indent(c, -1);
463 return 1;
466 // Process a single JPEG codestream (through the EOC marker).
467 // Note: This module is structured like this because the code was split off
468 // from the jpeg module. Support for multiple codestreams is disabled, and
469 // might never need to be implemented.)
470 static int do_j2c_page(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
472 u8 b;
473 i64 pos = pos1;
474 i64 seg_size;
475 u8 seg_type;
476 int found_marker;
477 struct marker_info mi;
478 i64 scan_byte_count;
479 int retval = 0;
480 struct page_ctx *pg = NULL;
482 pg = de_malloc(c, sizeof(struct page_ctx));
484 found_marker = 0;
485 while(1) {
486 if(pos>=c->infile->len)
487 break;
488 b = de_getbyte_p(&pos);
489 if(b==0xff) {
490 found_marker = 1;
491 continue;
494 if(!found_marker) {
495 // Not an 0xff byte, and not preceded by an 0xff byte. Just ignore it.
496 continue;
499 found_marker = 0; // Reset this flag.
501 if(b==0x00) {
502 continue; // Escaped 0xff
505 seg_type = b;
507 get_marker_info(c, d, pg, seg_type, &mi);
509 if(mi.flags & FLAG_NO_DATA) {
510 de_dbg(c, "marker 0x%02x (%s) at %d", (unsigned int)seg_type,
511 mi.longname, (int)(pos-2));
513 if(seg_type==0xd9) { // EOC
514 retval = 1;
515 goto done;
518 if(seg_type==0x93) {
519 // SOD (JPEG 2000 marker sort of like SOS)
520 if(!do_read_scan_data(c, d, pg, pos, &scan_byte_count)) {
521 break;
523 pos += scan_byte_count;
526 continue;
529 // If we get here, we're reading a segment that has a size field.
530 seg_size = de_getu16be(pos);
531 if(pos<2) break; // bogus size
533 do_segment(c, d, pg, &mi, pos+2, seg_size-2);
535 pos += seg_size;
538 done:
539 if(pg) {
540 de_free(c, pg);
543 *bytes_consumed = pos - pos1;
544 return retval;
547 static void do_j2c_internal(deark *c, lctx *d)
549 i64 pos;
550 i64 bytes_consumed;
552 pos = 0;
553 if(pos >= c->infile->len) goto done;
554 bytes_consumed = 0;
555 do_j2c_page(c, d, pos, &bytes_consumed);
556 done:
560 static void de_run_j2c(deark *c, de_module_params *mparams)
562 lctx *d = NULL;
564 de_declare_fmt(c, "JPEG 2000 codestream");
565 d = de_malloc(c, sizeof(lctx));
566 do_j2c_internal(c, d);
567 de_free(c, d);
570 static int de_identify_j2c(deark *c)
572 if(!dbuf_memcmp(c->infile, 0, "\xff\x4f\xff\x51", 4))
573 return 100;
574 return 0;
577 void de_module_j2c(deark *c, struct deark_module_info *mi)
579 mi->id = "j2c";
580 mi->desc = "JPEG 2000 codestream";
581 mi->run_fn = de_run_j2c;
582 mi->identify_fn = de_identify_j2c;