Added support for cp932 (Shift-JIS) conversion
[deark.git] / modules / pnm.c
blobe2b502ff08f895c5cbd13051e8d713210178a524
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Netpbm formats
6 // PNM (PBM, PGM, PPM)
7 // PAM
9 #include <deark-config.h>
10 #include <deark-private.h>
11 DE_DECLARE_MODULE(de_module_pnm);
13 // Numbers 1-6 are assumed to match the "Px" number in the file signature.
14 #define FMT_PBM_ASCII 1
15 #define FMT_PGM_ASCII 2
16 #define FMT_PPM_ASCII 3
17 #define FMT_PBM_BINARY 4
18 #define FMT_PGM_BINARY 5
19 #define FMT_PPM_BINARY 6
20 #define FMT_PAM 7
22 struct page_ctx {
23 int fmt;
24 const char *fmt_name;
25 i64 width, height;
26 int maxval;
28 int pam_depth; // = samples per pixel
29 #define PAMSUBTYPE_GRAY 1
30 #define PAMSUBTYPE_RGB 2
31 int pam_subtype;
32 int has_alpha;
34 i64 hdr_parse_pos;
35 i64 image_data_len;
38 typedef struct localctx_struct {
39 int last_fmt;
40 i64 last_bytesused;
41 } lctx;
43 static int fmt_is_pbm(int fmt)
45 return (fmt==FMT_PBM_ASCII || fmt==FMT_PBM_BINARY);
48 static int fmt_is_ppm(int fmt)
50 return (fmt==FMT_PPM_ASCII || fmt==FMT_PPM_BINARY);
53 static int fmt_is_binary(int fmt)
55 return (fmt==FMT_PBM_BINARY || fmt==FMT_PGM_BINARY ||
56 fmt==FMT_PPM_BINARY || fmt==FMT_PAM);
59 static int is_pnm_whitespace(u8 b)
61 // Whitespace = space, CR, LF, TAB, VT, or FF
62 return (b==9 || b==10 || b==11 || b==12 || b==13 || b==32);
65 static int read_next_token(deark *c, lctx *d, struct page_ctx *pg,
66 char *tokenbuf, size_t tokenbuflen)
68 u8 b;
69 size_t token_len = 0;
70 int in_comment = 0;
72 token_len = 0;
73 while(1) {
74 if(pg->hdr_parse_pos >= c->infile->len) return 0;
76 if(token_len >= tokenbuflen) {
77 return 0; // Token too long.
80 b = de_getbyte_p(&pg->hdr_parse_pos);
82 if(in_comment) {
83 if(b==10 || b==13) {
84 in_comment = 0;
86 continue;
88 else if(b=='#') {
89 in_comment = 1;
90 continue;
92 else if(is_pnm_whitespace(b)) {
93 if(token_len>0) {
94 tokenbuf[token_len] = '\0';
95 return 1;
97 else {
98 continue; // Skip leading whitespace.
101 else {
102 // Append the character to the token.
103 tokenbuf[token_len] = (char)b;
104 token_len++;
108 return 0;
111 static int read_pnm_header(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
113 char tokenbuf[100];
114 int retval = 0;
116 de_dbg(c, "header at %"I64_FMT, pos1);
117 de_dbg_indent(c, 1);
119 de_dbg(c, "format: %s", pg->fmt_name);
120 pg->hdr_parse_pos = pos1+2; // Skip "P?"
122 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
123 pg->width = de_atoi64(tokenbuf);
124 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
125 pg->height = de_atoi64(tokenbuf);
126 de_dbg_dimensions(c, pg->width, pg->height);
128 if(fmt_is_pbm(pg->fmt)) {
129 pg->maxval = 1;
131 else {
132 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
133 pg->maxval = de_atoi(tokenbuf);
134 de_dbg(c, "maxval: %d", pg->maxval);
137 retval = 1;
138 done:
139 de_dbg_indent(c, -1);
140 return retval;
143 // Read a token from a NUL-terminated string.
144 static int read_next_pam_token(deark *c, lctx *d, struct page_ctx *pg,
145 const char *linebuf, size_t linebuflen,
146 char *tokenbuf, size_t tokenbuflen, i64 *curpos)
148 u8 b;
149 i64 token_len = 0;
150 i64 linepos;
152 token_len = 0;
154 linepos = *curpos;
155 while(1) {
156 if(token_len >= (i64)tokenbuflen) {
157 // Token too long.
158 return 0;
161 if(linepos >= (i64)linebuflen) {
162 return 0;
164 b = linebuf[linepos++];
165 if(b==0) break; // End of line
167 if(is_pnm_whitespace(b)) {
168 if(token_len>0) {
169 break;
171 else {
172 continue; // Skip leading whitespace.
175 else {
176 // Append the character to the token.
177 tokenbuf[token_len++] = b;
181 tokenbuf[token_len] = '\0';
182 *curpos = linepos;
183 return 1;
186 static int read_pam_header_line(deark *c, lctx *d, struct page_ctx *pg, i64 pos,
187 i64 *content_len, i64 *total_len,
188 char *linebuf, size_t linebuf_len)
190 int ret;
191 i64 amt_to_read;
193 linebuf[0]='\0';
195 ret = dbuf_find_line(c->infile, pos,
196 content_len, total_len);
198 if(!ret) return 0;
200 amt_to_read = *content_len;
201 if(amt_to_read > (i64)(linebuf_len-1)) amt_to_read = (i64)(linebuf_len-1);
203 de_read((u8*)linebuf, pos, amt_to_read);
205 *content_len = amt_to_read;
206 linebuf[amt_to_read] = '\0';
207 return 1;
210 static int read_pam_header(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
212 int ret;
213 i64 pos = pos1;
214 int retval = 0;
215 int tupltype_line_count = 0;
216 char linebuf[200];
217 char token1buf[200];
218 char token2buf[200];
220 de_dbg(c, "header at %"I64_FMT, pos1);
221 de_dbg_indent(c, 1);
223 pos += 3; // Skip "P7\n"
225 while(1) {
226 i64 content_len;
227 i64 total_len;
228 i64 curpos;
230 ret = read_pam_header_line(c, d, pg, pos, &content_len, &total_len,
231 linebuf, sizeof(linebuf));
232 pos += total_len;
234 if(!ret) {
235 de_err(c, "Invalid PAM header");
236 goto done;
239 if(content_len>0 && (de_getbyte(pos)=='#')) {
240 // comment line
241 pos += total_len;
242 continue;
245 curpos = 0;
246 if(!read_next_pam_token(c, d, pg, linebuf, sizeof(linebuf),
247 token1buf, sizeof(token1buf), &curpos))
249 goto done;
252 if(!de_strcmp(token1buf, "ENDHDR")) {
253 break;
256 // Other header lines have a param
257 if(!read_next_pam_token(c, d, pg, linebuf, sizeof(linebuf),
258 token2buf, sizeof(token2buf), &curpos))
260 goto done;
263 if(!de_strcmp(token1buf, "WIDTH")) {
264 pg->width = de_atoi64(token2buf);
266 else if(!de_strcmp(token1buf, "HEIGHT")) {
267 pg->height = de_atoi64(token2buf);
269 else if(!de_strcmp(token1buf, "DEPTH")) {
270 pg->pam_depth = de_atoi(token2buf);
272 else if(!de_strcmp(token1buf, "MAXVAL")) {
273 pg->maxval = de_atoi(token2buf);
275 else if(!de_strcmp(token1buf, "TUPLTYPE")) {
276 // FIXME: The "TUPLTYPE" line(s) is wacky, and seems underspecified.
277 // We do not support it correctly.
278 // But I doubt any real PAM encoders are pathological enough to
279 // require us to support its wackiness.
280 if(tupltype_line_count>0) {
281 de_err(c, "Multiple TUPLTYPE lines are not supported");
282 goto done;
284 tupltype_line_count++;
286 if(!de_strcmp(token2buf, "BLACKANDWHITE")) {
287 pg->pam_subtype = PAMSUBTYPE_GRAY;
288 pg->maxval = 1;
290 else if(!de_strcmp(token2buf, "BLACKANDWHITE_ALPHA")) {
291 pg->pam_subtype = PAMSUBTYPE_GRAY;
292 pg->has_alpha = 1;
293 pg->maxval = 1;
295 else if(!de_strcmp(token2buf, "GRAYSCALE")) {
296 pg->pam_subtype = PAMSUBTYPE_GRAY;
298 else if(!de_strcmp(token2buf, "GRAYSCALE_ALPHA")) {
299 pg->pam_subtype = PAMSUBTYPE_GRAY;
300 pg->has_alpha = 1;
302 else if(!de_strcmp(token2buf, "RGB")) {
303 pg->pam_subtype = PAMSUBTYPE_RGB;
305 else if(!de_strcmp(token2buf, "RGB_ALPHA")) {
306 pg->pam_subtype = PAMSUBTYPE_RGB;
307 pg->has_alpha = 1;
309 else {
310 de_err(c, "Unsupported color type");
311 goto done;
316 if(tupltype_line_count==0) {
317 // The TUPLTYPE field is technically optional, but the image is not
318 // portable without it.
319 switch(pg->pam_depth) {
320 case 1:
321 pg->pam_subtype = PAMSUBTYPE_GRAY;
322 break;
323 case 2:
324 pg->pam_subtype = PAMSUBTYPE_GRAY;
325 pg->has_alpha = 1;
326 break;
327 case 3:
328 pg->pam_subtype = PAMSUBTYPE_RGB;
329 break;
330 case 4:
331 pg->pam_subtype = PAMSUBTYPE_RGB;
332 pg->has_alpha = 1;
333 break;
336 if(pg->pam_subtype!=0) {
337 de_warn(c, "Color type not specified. Attempting to guess.");
341 pg->hdr_parse_pos = pos;
342 retval = 1;
343 done:
344 de_dbg_indent(c, -1);
345 return retval;
348 static int do_image_pbm_ascii(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
350 de_bitmap *img = NULL;
351 i64 xpos, ypos;
352 i64 pos = pos1;
353 u8 b;
354 u8 v;
356 img = de_bitmap_create(c, pg->width, pg->height, 1);
358 xpos=0; ypos=0;
359 while(1) {
360 if(pos >= c->infile->len) break; // end of file
361 if(ypos==(pg->height-1) && xpos>=pg->width) break; // end of image
362 if(ypos>=pg->height) break;
364 b = de_getbyte_p(&pos);
365 if(b=='1') v=0;
366 else if(b=='0') v=255;
367 else continue;
369 de_bitmap_setpixel_gray(img, xpos, ypos, v);
370 xpos++;
371 if(xpos>=pg->width) {
372 ypos++;
373 xpos=0;
377 de_bitmap_write_to_file_finfo(img, NULL, DE_CREATEFLAG_IS_BWIMG);
378 de_bitmap_destroy(img);
379 return 1;
382 static int do_image_pgm_ppm_pam_binary(deark *c, lctx *d, struct page_ctx *pg,
383 dbuf *inf, i64 pos1);
385 struct pgm_ppm_ascii_ctx {
386 u8 intermed_nbytes_per_sample;
387 i64 sample_count;
388 dbuf *intermed_img;
389 size_t samplebuf_used;
390 char samplebuf[32];
393 static void pgm_ppm_ascii_handle_sample(struct pgm_ppm_ascii_ctx *actx)
395 i64 v;
397 actx->samplebuf[actx->samplebuf_used] = '\0'; // NUL terminate for de_atoi64()
398 v = de_atoi64((const char*)actx->samplebuf);
399 actx->samplebuf_used = 0;
401 if(actx->intermed_nbytes_per_sample==1) {
402 dbuf_writebyte(actx->intermed_img, (u8)v);
404 else {
405 dbuf_writeu16be(actx->intermed_img, v);
408 actx->sample_count++;
411 // Convert the ASCII image data to binary, then call the function to process
412 // that binary data.
413 static int do_image_pgm_ppm_ascii(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
415 int nsamples_per_pixel;
416 i64 nsamples_per_image;
417 i64 intermed_nbytes_per_image;
418 i64 pos = pos1;
419 int retval = 0;
420 struct pgm_ppm_ascii_ctx actx;
422 de_zeromem(&actx, sizeof(struct pgm_ppm_ascii_ctx));
423 if(fmt_is_ppm(pg->fmt)) nsamples_per_pixel = 3;
424 else nsamples_per_pixel = 1;
426 nsamples_per_image = (i64)nsamples_per_pixel * pg->height * pg->width;
427 actx.intermed_nbytes_per_sample = (pg->maxval>255) ? 2 : 1;
428 intermed_nbytes_per_image = nsamples_per_image * (i64)actx.intermed_nbytes_per_sample;
430 actx.intermed_img = dbuf_create_membuf(c, intermed_nbytes_per_image, 0x1);
431 actx.samplebuf_used=0;
433 actx.sample_count = 0;
435 while(1) {
436 u8 b;
438 if(actx.sample_count >= nsamples_per_image) break;
439 if(pos >= c->infile->len) { // end of file
440 if(actx.samplebuf_used>0) {
441 pgm_ppm_ascii_handle_sample(&actx);
443 break;
446 b = de_getbyte_p(&pos);
447 if(is_pnm_whitespace(b)) {
448 if(actx.samplebuf_used>0) {
449 // Completed a sample
450 pgm_ppm_ascii_handle_sample(&actx);
452 else { // Skip extra whitespace
453 continue;
456 else {
457 // Non-whitespace. Save for later.
458 if(actx.samplebuf_used < sizeof(actx.samplebuf_used)-1) {
459 actx.samplebuf[actx.samplebuf_used++] = b;
464 retval = do_image_pgm_ppm_pam_binary(c, d, pg, actx.intermed_img, 0);
465 dbuf_close(actx.intermed_img);
466 return retval;
469 static int do_image_pbm_binary(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
471 i64 rowspan;
473 rowspan = (pg->width+7)/8;
474 pg->image_data_len = rowspan * pg->height;
476 de_convert_and_write_image_bilevel2(c->infile, pos1, pg->width, pg->height,
477 rowspan, DE_CVTF_WHITEISZERO, NULL, 0);
478 return 1;
481 static int do_image_pgm_ppm_pam_binary(deark *c, lctx *d, struct page_ctx *pg,
482 dbuf *inf, i64 pos1)
484 de_bitmap *img = NULL;
485 de_bitmap *imglo = NULL;
486 i64 rowspan;
487 int nsamples_per_pixel; // For both input and output
488 u8 nbytes_per_sample;
489 i64 i, j;
490 i64 pos = pos1;
491 UI samp_ori[4];
492 u8 samp_adj[4]; // most significant 8 bits
493 u8 samp_adj_lo[4];
494 u32 clr;
495 int retval = 0;
497 de_zeromem(samp_adj_lo, sizeof(samp_adj_lo));
499 if(pg->fmt==FMT_PAM) {
500 nsamples_per_pixel = pg->pam_depth;
502 if((pg->pam_subtype==PAMSUBTYPE_GRAY && !pg->has_alpha && nsamples_per_pixel==1) ||
503 (pg->pam_subtype==PAMSUBTYPE_GRAY && pg->has_alpha && nsamples_per_pixel==2) ||
504 (pg->pam_subtype==PAMSUBTYPE_RGB && !pg->has_alpha && nsamples_per_pixel==3) ||
505 (pg->pam_subtype==PAMSUBTYPE_RGB && pg->has_alpha && nsamples_per_pixel==4))
509 else {
510 de_err(c, "Unsupported PAM format");
511 goto done;
514 else if(fmt_is_ppm(pg->fmt)) {
515 nsamples_per_pixel = 3;
517 else {
518 nsamples_per_pixel = 1;
521 if(nsamples_per_pixel<1 || nsamples_per_pixel>4) {
522 de_err(c, "Unsupported samples/pixel: %d", nsamples_per_pixel);
525 if(pg->maxval<=255) nbytes_per_sample = 1;
526 else nbytes_per_sample = 2;
528 rowspan = pg->width * (i64)nsamples_per_pixel * (i64)nbytes_per_sample;
529 pg->image_data_len = rowspan * pg->height;
531 img = de_bitmap_create(c, pg->width, pg->height, nsamples_per_pixel);
532 if(nbytes_per_sample!=1) {
533 imglo = de_bitmap_create(c, pg->width, pg->height, nsamples_per_pixel);
536 for(j=0; j<pg->height; j++) {
537 for(i=0; i<pg->width; i++) {
538 int k;
540 for(k=0; k<nsamples_per_pixel; k++) {
541 if(nbytes_per_sample==1) {
542 samp_ori[k] = dbuf_getbyte_p(inf, &pos);
544 else {
545 samp_ori[k] = (UI)dbuf_getu16be_p(inf, &pos);
548 if(nbytes_per_sample==1) {
549 samp_adj[k] = de_scale_n_to_255(pg->maxval, samp_ori[k]);
551 else {
552 de_scale_n_to_16bit(pg->maxval, (int)samp_ori[k], &samp_adj[k], &samp_adj_lo[k]);
556 switch(nsamples_per_pixel) {
557 case 4:
558 clr = DE_MAKE_RGBA(samp_adj[0], samp_adj[1], samp_adj[2], samp_adj[3]);
559 de_bitmap_setpixel_rgba(img, i, j, clr);
560 if(imglo) {
561 clr = DE_MAKE_RGBA(samp_adj_lo[0], samp_adj_lo[1], samp_adj_lo[2], samp_adj_lo[3]);
562 de_bitmap_setpixel_rgba(imglo, i, j, clr);
564 break;
565 case 3:
566 clr = DE_MAKE_RGB(samp_adj[0], samp_adj[1], samp_adj[2]);
567 de_bitmap_setpixel_rgb(img, i, j, clr);
568 if(imglo) {
569 clr = DE_MAKE_RGB(samp_adj_lo[0], samp_adj_lo[1], samp_adj_lo[2]);
570 de_bitmap_setpixel_rgb(imglo, i, j, clr);
572 break;
573 case 2:
574 clr = DE_MAKE_RGBA(samp_adj[0], samp_adj[0], samp_adj[0], samp_adj[1]);
575 de_bitmap_setpixel_rgba(img, i, j, clr);
576 if(imglo) {
577 clr = DE_MAKE_RGBA(samp_adj_lo[0], samp_adj_lo[0], samp_adj_lo[0], samp_adj_lo[1]);
578 de_bitmap_setpixel_rgba(imglo, i, j, clr);
580 break;
581 default: // Assuming nsamples==1
582 de_bitmap_setpixel_gray(img, i, j, samp_adj[0]);
583 if(imglo) {
584 de_bitmap_setpixel_gray(imglo, i, j, samp_adj_lo[0]);
590 de_bitmap16_write_to_file_finfo(img, imglo, NULL, DE_CREATEFLAG_OPT_IMAGE);
591 retval = 1;
593 done:
594 de_bitmap_destroy(img);
595 de_bitmap_destroy(imglo);
596 return retval;
599 static int do_image(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
601 int retval = 0;
603 de_dbg(c, "image data at %"I64_FMT, pos1);
604 de_dbg_indent(c, 1);
606 if(pg->maxval<1 || pg->maxval>65535) {
607 de_err(c, "Invalid maxval: %d", pg->maxval);
608 goto done;
610 if(!de_good_image_dimensions(c, pg->width, pg->height)) goto done;
612 switch(pg->fmt) {
613 case FMT_PBM_ASCII:
614 if(!do_image_pbm_ascii(c, d, pg, pos1)) goto done;
615 break;
616 case FMT_PGM_ASCII:
617 case FMT_PPM_ASCII:
618 if(!do_image_pgm_ppm_ascii(c, d, pg, pos1)) goto done;
619 break;
620 case FMT_PBM_BINARY:
621 if(!do_image_pbm_binary(c, d, pg, pos1)) goto done;
622 break;
623 case FMT_PGM_BINARY:
624 case FMT_PPM_BINARY:
625 case FMT_PAM:
626 if(!do_image_pgm_ppm_pam_binary(c, d, pg, c->infile, pos1)) goto done;
627 break;
628 default:
629 de_err(c, "Unsupported PNM format");
630 goto done;
633 retval = 1;
635 done:
636 de_dbg_indent(c, -1);
637 return retval;
640 static int identify_fmt(deark *c, i64 pos)
642 u8 buf[3];
644 de_read(buf, pos, 3);
645 if(buf[0]!='P') return 0;
647 if(buf[1]=='7' && buf[2]==0x0a)
648 return FMT_PAM;
649 if(buf[1]>='1' && buf[1]<='6')
650 return buf[1] - '0';
651 return 0;
654 static const char *get_fmt_name(int fmt)
656 const char *name="unknown";
657 switch(fmt) {
658 case FMT_PBM_ASCII: name="PBM plain"; break;
659 case FMT_PGM_ASCII: name="PGM plain"; break;
660 case FMT_PPM_ASCII: name="PPM plain"; break;
661 case FMT_PBM_BINARY: name="PBM"; break;
662 case FMT_PGM_BINARY: name="PGM"; break;
663 case FMT_PPM_BINARY: name="PPM"; break;
664 case FMT_PAM: name="PAM"; break;
666 return name;
669 static int do_page(deark *c, lctx *d, int pagenum, i64 pos1)
671 struct page_ctx *pg = NULL;
672 int retval = 0;
674 de_dbg(c, "image at %"I64_FMT, pos1);
675 de_dbg_indent(c, 1);
677 pg = de_malloc(c, sizeof(struct page_ctx));
679 pg->fmt = identify_fmt(c, pos1);
680 d->last_fmt = pg->fmt;
681 pg->fmt_name = get_fmt_name(pg->fmt);
682 if(pg->fmt==0) {
683 de_err(c, "Not PNM/PAM format");
684 goto done;
687 if(pagenum==0) {
688 de_declare_fmt(c, pg->fmt_name);
691 if(pg->fmt==FMT_PAM) {
692 if(!read_pam_header(c, d, pg, pos1)) goto done;
694 else {
695 if(!read_pnm_header(c, d, pg, pos1)) goto done;
698 if(!do_image(c, d, pg, pg->hdr_parse_pos)) {
699 goto done;
702 d->last_bytesused = (pg->hdr_parse_pos + pg->image_data_len) - pos1;
704 retval = 1;
705 done:
706 de_dbg_indent(c, -1);
707 de_free(c, pg);
708 return retval;
711 static void de_run_pnm(deark *c, de_module_params *mparams)
713 lctx *d = NULL;
714 i64 pos;
715 int ret;
716 int pagenum = 0;
718 d = de_malloc(c, sizeof(lctx));
720 pos = 0;
721 while(1) {
722 if(c->infile->len - pos < 8) break;
723 d->last_fmt = 0;
724 d->last_bytesused = 0;
725 ret = do_page(c, d, pagenum, pos);
726 if(!ret) break;
727 if(d->last_bytesused<8) break;
729 if(!fmt_is_binary(d->last_fmt))
731 break; // ASCII formats don't support multiple images
734 pos += d->last_bytesused;
735 pagenum++;
738 de_free(c, d);
741 static int de_identify_pnm(deark *c)
743 int fmt;
745 fmt = identify_fmt(c, 0);
746 if(fmt!=0) return 40;
747 return 0;
750 void de_module_pnm(deark *c, struct deark_module_info *mi)
752 mi->id = "pnm";
753 mi->desc = "Netpbm formats (PNM, PBM, PGM, PPM, PAM)";
754 mi->run_fn = de_run_pnm;
755 mi->identify_fn = de_identify_pnm;