Refactoring to use de_read_simple_palette
[deark.git] / modules / j2c.c
blobce6940b7e513cbbb1465325828e6ab9e94e8f16d
1 // This file is part of Deark.
2 // Copyright (C) 2018 Jason Summers
3 // See the file COPYING for terms of use.
5 // J2C - JPEG 2000 codestream
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_j2c);
11 struct page_ctx {
12 i64 ncomp;
13 i64 j2c_sot_pos;
14 i64 j2c_sot_length;
17 typedef struct localctx_struct {
18 int reserved;
19 } lctx;
21 struct marker_info;
23 typedef void (*handler_fn_type)(deark *c, lctx *d, struct page_ctx *pg,
24 const struct marker_info *mi, i64 pos, i64 data_size);
26 #define FLAG_NO_DATA 0x0100
28 struct marker_info {
29 u8 seg_type;
30 unsigned int flags;
31 char shortname[12];
32 char longname[80];
33 handler_fn_type hfn;
36 // Static info about markers/segments.
37 struct marker_info1 {
38 u8 seg_type;
39 unsigned int flags;
40 const char *shortname;
41 const char *longname;
42 handler_fn_type hfn;
45 static void handle_comment(deark *c, lctx *d, i64 pos, i64 comment_size)
47 de_ucstring *s = NULL;
48 int write_to_file;
50 // If c->extract_level>=2, write the comment to a file;
51 // otherwise if we have debugging output, write (at least part of) it
52 // to the debug output;
53 // otherwise do nothing.
55 if(c->extract_level<2 && c->debug_level<1) return;
56 if(comment_size<1) return;
58 write_to_file = (c->extract_level>=2);
60 s = ucstring_create(c);
61 dbuf_read_to_ucstring(c->infile, pos, comment_size, s, 0, DE_ENCODING_LATIN1);
63 if(write_to_file) {
64 dbuf *outf = NULL;
65 outf = dbuf_create_output_file(c, "comment.txt", NULL, DE_CREATEFLAG_IS_AUX);
66 ucstring_write_as_utf8(c, s, outf, 1);
67 dbuf_close(outf);
69 else {
70 de_dbg(c, "comment: \"%s\"", ucstring_getpsz_d(s));
73 ucstring_destroy(s);
76 static void handler_cme(deark *c, lctx *d, struct page_ctx *pg,
77 const struct marker_info *mi, i64 pos, i64 data_size)
79 i64 reg_val;
80 i64 comment_pos;
81 i64 comment_size;
82 const char *name;
84 if(data_size<2) goto done;
86 reg_val = de_getu16be(pos);
87 switch(reg_val) {
88 case 0: name="binary"; break;
89 case 1: name="text"; break;
90 default: name="?";
92 de_dbg(c, "comment/extension type: %d (%s)", (int)reg_val, name);
94 comment_pos = pos+2;
95 comment_size = data_size-2;
97 if(reg_val==1) {
98 handle_comment(c, d, comment_pos, comment_size);
100 else {
101 de_dbg_hexdump(c, c->infile, comment_pos, comment_size, 256, NULL, 0x1);
104 done:
108 static void handler_siz(deark *c, lctx *d, struct page_ctx *pg,
109 const struct marker_info *mi, i64 pos1, i64 len)
111 unsigned int capa;
112 i64 w, h;
113 i64 pos = pos1;
114 i64 ncomp;
115 i64 k;
117 capa = (unsigned int)de_getu16be_p(&pos);
118 de_dbg(c, "capabilities: 0x%04x", capa);
120 w = de_getu32be_p(&pos);
121 h = de_getu32be_p(&pos);
122 de_dbg(c, "dimensions of reference grid: %"I64_FMT DE_CHAR_TIMES "%"I64_FMT, w, h);
124 w = de_getu32be_p(&pos);
125 h = de_getu32be_p(&pos);
126 de_dbg(c, "offset to image area: %"I64_FMT",%"I64_FMT, w, h);
128 w = de_getu32be_p(&pos);
129 h = de_getu32be_p(&pos);
130 de_dbg(c, "dimensions of reference tile: %"I64_FMT DE_CHAR_TIMES "%"I64_FMT, w, h);
132 w = de_getu32be_p(&pos);
133 h = de_getu32be_p(&pos);
134 de_dbg(c, "offset to first tile: %"I64_FMT",%"I64_FMT, w, h);
136 ncomp = de_getu16be_p(&pos);
137 de_dbg(c, "number of components: %d", (int)ncomp);
139 for(k=0; k<ncomp; k++) {
140 u8 prec, xr, yr;
142 if(pos >= pos1+len) goto done;
143 de_dbg(c, "component[%d] info at %"I64_FMT, (int)k, pos);
144 de_dbg_indent(c, 1);
145 prec = de_getbyte_p(&pos);
146 de_dbg(c, "precision: %d", (int)prec);
147 xr = de_getbyte_p(&pos);
148 yr = de_getbyte_p(&pos);
149 de_dbg(c, "separation: %d,%d", (int)xr, (int)yr);
150 de_dbg_indent(c, -1);
153 done:
157 static void handler_tlm(deark *c, lctx *d, struct page_ctx *pg,
158 const struct marker_info *mi, i64 pos1, i64 len)
160 u8 b;
161 u8 item_size_code;
162 i64 item_size;
163 i64 pos = pos1;
164 u8 t_code, p_code;
165 i64 t_size, p_size;
166 i64 num_items;
167 i64 k;
169 if(len<2) goto done;
170 b = de_getbyte_p(&pos);
171 de_dbg(c, "index: %d", (int)b);
173 item_size_code = (i64)de_getbyte_p(&pos);
174 de_dbg(c, "item size code: 0x%02x", (unsigned int)item_size_code);
175 de_dbg_indent(c, 1);
176 t_code = (item_size_code & 0x30)>>4;
177 de_dbg(c, "size code for number field: %d", (int)t_code);
178 p_code = (item_size_code & 0x40)>>6;
179 de_dbg(c, "size code for length field: %d", (int)p_code);
180 de_dbg_indent(c, -1);
181 if(t_code==0) t_size=0;
182 else if(t_code==1) t_size = 1;
183 else if(t_code==2) t_size = 2;
184 else goto done;
185 if(p_code==0) p_size = 2;
186 else p_size = 4;
187 item_size = t_size + p_size;
189 num_items = (pos1 + len - pos)/item_size;
190 de_dbg(c, "calculated number of items: %d", (int)num_items);
192 for(k=0; k<num_items; k++) {
193 i64 x;
194 de_dbg(c, "item[%d] at %"I64_FMT, (int)k, pos);
195 de_dbg_indent(c, 1);
196 if(t_size>0) {
197 if(t_size==1) {
198 x = (i64)de_getbyte_p(&pos);
200 else {
201 x = de_getu16be_p(&pos);
203 de_dbg(c, "tile number: %u", (unsigned int)x);
206 if(p_size==2) {
207 x = de_getu16be_p(&pos);
209 else {
210 x = de_getu32be_p(&pos);
212 de_dbg(c, "tile length: %u", (unsigned int)x);
213 de_dbg_indent(c, -1);
216 done:
220 static void handler_sot(deark *c, lctx *d, struct page_ctx *pg,
221 const struct marker_info *mi, i64 pos1, i64 len)
223 i64 x;
224 i64 b;
225 i64 pos = pos1;
227 pg->j2c_sot_pos = 0;
228 pg->j2c_sot_length = 0;
229 if(len<8) return;
231 pg->j2c_sot_pos = pos1 - 4;
232 x = de_getu16be_p(&pos);
233 de_dbg(c, "tile number: %d", (int)x);
234 pg->j2c_sot_length = de_getu32be_p(&pos);
235 de_dbg(c, "length: %u", (unsigned int)pg->j2c_sot_length);
236 b = de_getbyte_p(&pos);
237 de_dbg(c, "tile-part instance: %d", (int)b);
238 b = de_getbyte_p(&pos);
239 de_dbg(c, "number of tile-parts: %d", (int)b);
242 static void handler_cod(deark *c, lctx *d, struct page_ctx *pg,
243 const struct marker_info *mi, i64 pos1, i64 len)
245 i64 pos = pos1;
246 u8 coding_style;
247 de_ucstring *s = NULL;
248 u8 b;
249 i64 n;
251 if(len<5) goto done;
252 coding_style = de_getbyte_p(&pos);
253 s = ucstring_create(c);
255 if((coding_style&0xf8)==0) {
256 switch(coding_style&0x01) {
257 case 0x0: ucstring_append_flags_item(s, "entropy coder, without partitions"); break;
258 case 0x1: ucstring_append_flags_item(s, "entropy coder, with partitions"); break;
260 switch((coding_style&0x02)>>1) {
261 case 0x0: ucstring_append_flags_item(s, "no SOP segments"); break;
262 case 0x1: ucstring_append_flags_item(s, "has SOP segments"); break;
264 switch((coding_style&0x04)>>2) {
265 case 0x0: ucstring_append_flags_item(s, "no EPH segments"); break;
266 case 0x1: ucstring_append_flags_item(s, "has EPH segments"); break;
269 else {
270 ucstring_append_flags_item(s, "?");
272 de_dbg(c, "coding style: 0x%02x (%s)", (unsigned int)coding_style,
273 ucstring_getpsz(s));
275 b = de_getbyte_p(&pos);
276 de_dbg(c, "progression order: %d", (int)b);
277 n = de_getu16be_p(&pos);
278 de_dbg(c, "number of layers: %d", (int)n);
279 (void)de_getbyte_p(&pos);
281 if(pos < pos1+len) {
282 // TODO
283 de_dbg2(c, "[not decoding the rest of this segment]");
286 done:
287 ucstring_destroy(s);
290 static void handler_qcd(deark *c, lctx *d, struct page_ctx *pg,
291 const struct marker_info *mi, i64 pos1, i64 len)
293 i64 pos = pos1;
294 u8 q_style;
296 if(len<1) goto done;
297 q_style = de_getbyte_p(&pos);
298 de_dbg(c, "quantization style: 0x%02x", (unsigned int)q_style);
300 if(pos < pos1+len) {
301 // TODO
302 de_dbg2(c, "[not decoding the rest of this segment]");
304 done:
308 static void handler_qcc(deark *c, lctx *d, struct page_ctx *pg,
309 const struct marker_info *mi, i64 pos1, i64 len)
311 i64 pos = pos1;
312 i64 compnum;
314 if(pg->ncomp<257) {
315 compnum = de_getbyte_p(&pos);
317 else {
318 compnum = de_getu16be_p(&pos);
320 de_dbg(c, "component number: %d", (int)compnum);
322 if(pos < pos1+len) {
323 // TODO
324 de_dbg2(c, "[not decoding the rest of this segment]");
328 static const struct marker_info1 marker_info1_arr[] = {
329 {0x4f, 0x0100, "SOC", "Start of codestream", NULL},
330 {0x51, 0x0000, "SIZ", "Image and tile size", handler_siz},
331 {0x52, 0x0000, "COD", "Coding style default", handler_cod},
332 {0x53, 0x0000, "COC", "Coding style component", NULL},
333 {0x55, 0x0000, "TLM", "Tile-part lengths, main header", handler_tlm},
334 {0x57, 0x0000, "PLM", "Packet length, main header", NULL},
335 {0x58, 0x0000, "PLT", "Packet length, tile-part header", NULL},
336 {0x5c, 0x0000, "QCD", "Quantization default", handler_qcd},
337 {0x5d, 0x0000, "QCC", "Quantization component", handler_qcc},
338 {0x5e, 0x0000, "RGN", "Region-of-interest", NULL},
339 {0x5f, 0x0000, "POD", "Progression order default", NULL},
340 {0x60, 0x0000, "PPM", "Packed packet headers, main header", NULL},
341 {0x61, 0x0000, "PPT", "Packed packet headers, tile-part header", NULL},
342 {0x64, 0x0000, "CME", "Comment and extension", handler_cme},
343 {0x90, 0x0000, "SOT", "Start of tile-part", handler_sot},
344 {0x91, 0x0000, "SOP", "Start of packet", NULL},
345 {0x92, 0x0100, "EPH", "End of packet header", NULL},
346 {0x93, 0x0100, "SOD", "Start of data", NULL},
347 {0xd9, 0x0100, "EOC", "End of codestream", NULL}
350 // Caller allocates mi
351 static int get_marker_info(deark *c, lctx *d, struct page_ctx *pg, u8 seg_type,
352 struct marker_info *mi)
354 i64 k;
356 de_zeromem(mi, sizeof(struct marker_info));
357 mi->seg_type = seg_type;
359 // First, try to find the segment type in the static marker info.
360 for(k=0; k<(i64)DE_ARRAYCOUNT(marker_info1_arr); k++) {
361 const struct marker_info1 *mi1 = &marker_info1_arr[k];
363 if(mi1->seg_type == seg_type) {
364 mi->flags = mi1->flags;
365 mi->hfn = mi1->hfn;
366 de_strlcpy(mi->shortname, mi1->shortname, sizeof(mi->shortname));
367 if(mi1->longname) {
368 de_snprintf(mi->longname, sizeof(mi->longname), "%s: %s",
369 mi1->shortname, mi1->longname);
371 goto done;
375 // Handle some pattern-based markers.
377 // fcd15444-1: "The marker range 0xFF30 - 0xFF3F is reserved [...] for markers
378 // without marker parameters."
379 if(seg_type>=0x30 && seg_type<=0x3f) {
380 mi->flags |= FLAG_NO_DATA;
383 de_strlcpy(mi->shortname, "???", sizeof(mi->shortname));
384 de_strlcpy(mi->longname, "???", sizeof(mi->longname));
385 return 0;
387 done:
388 if(!mi->longname[0]) {
389 // If no longname was set, use the shortname
390 de_strlcpy(mi->longname, mi->shortname, sizeof(mi->longname));
392 return 1;
395 static void do_segment(deark *c, lctx *d, struct page_ctx *pg, const struct marker_info *mi,
396 i64 payload_pos, i64 payload_size)
398 de_dbg(c, "segment 0x%02x (%s) at %d, dpos=%d, dlen=%d",
399 (unsigned int)mi->seg_type, mi->longname, (int)(payload_pos-4),
400 (int)payload_pos, (int)payload_size);
402 if(mi->hfn) {
403 // If a handler function is available, use it.
404 de_dbg_indent(c, 1);
405 mi->hfn(c, d, pg, mi, payload_pos, payload_size);
406 de_dbg_indent(c, -1);
410 static int do_read_scan_data(deark *c, lctx *d, struct page_ctx *pg,
411 i64 pos1, i64 *bytes_consumed)
413 i64 pos = pos1;
414 u8 b0, b1;
416 *bytes_consumed = c->infile->len - pos1; // default
417 de_dbg(c, "scan data at %d", (int)pos1);
419 de_dbg_indent(c, 1);
421 if(pg->j2c_sot_length>0) {
422 // The previous SOT segment may have told us where this scan data ends.
423 *bytes_consumed = pg->j2c_sot_pos + pg->j2c_sot_length - pos1;
424 if(*bytes_consumed < 0) *bytes_consumed = 0;
425 de_dbg(c, "[%"I64_FMT" bytes of scan data at %"I64_FMT"]",
426 *bytes_consumed, pos1);
427 pg->j2c_sot_pos = 0;
428 pg->j2c_sot_length = 0;
429 goto done;
432 while(1) {
433 if(pos >= c->infile->len) goto done;
434 b0 = de_getbyte_p(&pos);
435 if(b0==0xff) {
436 b1 = de_getbyte_p(&pos);
437 if(b1==0x00) {
438 ; // an escaped 0xff
440 else if(b1<0x90) {
441 // In J2C, 0xff bytes are not escaped if they're followed by a
442 // a byte less than 0x90.
445 else if(b1==0xff) { // a "fill byte" (TODO: Does J2C have these?)
446 pos--;
448 else {
449 // A marker that is not part of the scan.
450 // Subtract the bytes consumed by it, and stop.
451 pos -= 2;
452 *bytes_consumed = pos - pos1;
453 de_dbg(c, "end of scan data found at %d (len=%d)", (int)pos, (int)*bytes_consumed);
454 break;
459 done:
460 de_dbg_indent(c, -1);
461 return 1;
464 // Process a single JPEG codestream (through the EOC marker).
465 // Note: This module is structured like this because the code was split off
466 // from the jpeg module. Support for multiple codestreams is disabled, and
467 // might never need to be implemented.)
468 static int do_j2c_page(deark *c, lctx *d, i64 pos1, i64 *bytes_consumed)
470 u8 b;
471 i64 pos = pos1;
472 i64 seg_size;
473 u8 seg_type;
474 int found_marker;
475 struct marker_info mi;
476 i64 scan_byte_count;
477 int retval = 0;
478 struct page_ctx *pg = NULL;
480 pg = de_malloc(c, sizeof(struct page_ctx));
482 found_marker = 0;
483 while(1) {
484 if(pos>=c->infile->len)
485 break;
486 b = de_getbyte_p(&pos);
487 if(b==0xff) {
488 found_marker = 1;
489 continue;
492 if(!found_marker) {
493 // Not an 0xff byte, and not preceded by an 0xff byte. Just ignore it.
494 continue;
497 found_marker = 0; // Reset this flag.
499 if(b==0x00) {
500 continue; // Escaped 0xff
503 seg_type = b;
505 get_marker_info(c, d, pg, seg_type, &mi);
507 if(mi.flags & FLAG_NO_DATA) {
508 de_dbg(c, "marker 0x%02x (%s) at %d", (unsigned int)seg_type,
509 mi.longname, (int)(pos-2));
511 if(seg_type==0xd9) { // EOC
512 retval = 1;
513 goto done;
516 if(seg_type==0x93) {
517 // SOD (JPEG 2000 marker sort of like SOS)
518 if(!do_read_scan_data(c, d, pg, pos, &scan_byte_count)) {
519 break;
521 pos += scan_byte_count;
524 continue;
527 // If we get here, we're reading a segment that has a size field.
528 seg_size = de_getu16be(pos);
529 if(pos<2) break; // bogus size
531 do_segment(c, d, pg, &mi, pos+2, seg_size-2);
533 pos += seg_size;
536 done:
537 if(pg) {
538 de_free(c, pg);
541 *bytes_consumed = pos - pos1;
542 return retval;
545 static void do_j2c_internal(deark *c, lctx *d)
547 i64 pos;
548 i64 bytes_consumed;
550 pos = 0;
551 if(pos >= c->infile->len) goto done;
552 bytes_consumed = 0;
553 do_j2c_page(c, d, pos, &bytes_consumed);
554 done:
558 static void de_run_j2c(deark *c, de_module_params *mparams)
560 lctx *d = NULL;
562 de_declare_fmt(c, "JPEG 2000 codestream");
563 d = de_malloc(c, sizeof(lctx));
564 do_j2c_internal(c, d);
565 de_free(c, d);
568 static int de_identify_j2c(deark *c)
570 if(!dbuf_memcmp(c->infile, 0, "\xff\x4f\xff\x51", 4))
571 return 100;
572 return 0;
575 void de_module_j2c(deark *c, struct deark_module_info *mi)
577 mi->id = "j2c";
578 mi->desc = "JPEG 2000 codestream";
579 mi->run_fn = de_run_j2c;
580 mi->identify_fn = de_identify_j2c;