1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
9 #include <deark-config.h>
10 #include <deark-private.h>
11 DE_DECLARE_MODULE(de_module_pnm
);
13 // Numbers 1-6 are assumed to match the "Px" number in the file signature.
14 #define FMT_PBM_ASCII 1
15 #define FMT_PGM_ASCII 2
16 #define FMT_PPM_ASCII 3
17 #define FMT_PBM_BINARY 4
18 #define FMT_PGM_BINARY 5
19 #define FMT_PPM_BINARY 6
28 int pam_depth
; // = samples per pixel
29 #define PAMSUBTYPE_GRAY 1
30 #define PAMSUBTYPE_RGB 2
38 typedef struct localctx_struct
{
43 static int fmt_is_pbm(int fmt
)
45 return (fmt
==FMT_PBM_ASCII
|| fmt
==FMT_PBM_BINARY
);
48 static int fmt_is_ppm(int fmt
)
50 return (fmt
==FMT_PPM_ASCII
|| fmt
==FMT_PPM_BINARY
);
53 static int fmt_is_binary(int fmt
)
55 return (fmt
==FMT_PBM_BINARY
|| fmt
==FMT_PGM_BINARY
||
56 fmt
==FMT_PPM_BINARY
|| fmt
==FMT_PAM
);
59 static int is_pnm_whitespace(u8 b
)
61 // Whitespace = space, CR, LF, TAB, VT, or FF
62 return (b
==9 || b
==10 || b
==11 || b
==12 || b
==13 || b
==32);
65 static int read_next_token(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
66 char *tokenbuf
, size_t tokenbuflen
)
74 if(pg
->hdr_parse_pos
>= c
->infile
->len
) return 0;
76 if(token_len
>= tokenbuflen
) {
77 return 0; // Token too long.
80 b
= de_getbyte_p(&pg
->hdr_parse_pos
);
92 else if(is_pnm_whitespace(b
)) {
94 tokenbuf
[token_len
] = '\0';
98 continue; // Skip leading whitespace.
102 // Append the character to the token.
103 tokenbuf
[token_len
] = (char)b
;
111 static int read_pnm_header(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
116 de_dbg(c
, "header at %"I64_FMT
, pos1
);
119 de_dbg(c
, "format: %s", pg
->fmt_name
);
120 pg
->hdr_parse_pos
= pos1
+2; // Skip "P?"
122 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
123 pg
->width
= de_atoi64(tokenbuf
);
124 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
125 pg
->height
= de_atoi64(tokenbuf
);
126 de_dbg_dimensions(c
, pg
->width
, pg
->height
);
128 if(fmt_is_pbm(pg
->fmt
)) {
132 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
133 pg
->maxval
= de_atoi(tokenbuf
);
134 de_dbg(c
, "maxval: %d", pg
->maxval
);
139 de_dbg_indent(c
, -1);
143 // Read a token from a NUL-terminated string.
144 static int read_next_pam_token(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
145 const char *linebuf
, size_t linebuflen
,
146 char *tokenbuf
, size_t tokenbuflen
, i64
*curpos
)
156 if(token_len
>= (i64
)tokenbuflen
) {
161 if(linepos
>= (i64
)linebuflen
) {
164 b
= linebuf
[linepos
++];
165 if(b
==0) break; // End of line
167 if(is_pnm_whitespace(b
)) {
172 continue; // Skip leading whitespace.
176 // Append the character to the token.
177 tokenbuf
[token_len
++] = b
;
181 tokenbuf
[token_len
] = '\0';
186 static int read_pam_header_line(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos
,
187 i64
*content_len
, i64
*total_len
,
188 char *linebuf
, size_t linebuf_len
)
195 ret
= dbuf_find_line(c
->infile
, pos
,
196 content_len
, total_len
);
200 amt_to_read
= *content_len
;
201 if(amt_to_read
> (i64
)(linebuf_len
-1)) amt_to_read
= (i64
)(linebuf_len
-1);
203 de_read((u8
*)linebuf
, pos
, amt_to_read
);
205 *content_len
= amt_to_read
;
206 linebuf
[amt_to_read
] = '\0';
210 static int read_pam_header(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
215 int tupltype_line_count
= 0;
220 de_dbg(c
, "header at %"I64_FMT
, pos1
);
223 pos
+= 3; // Skip "P7\n"
230 ret
= read_pam_header_line(c
, d
, pg
, pos
, &content_len
, &total_len
,
231 linebuf
, sizeof(linebuf
));
235 de_err(c
, "Invalid PAM header");
239 if(content_len
>0 && (de_getbyte(pos
)=='#')) {
246 if(!read_next_pam_token(c
, d
, pg
, linebuf
, sizeof(linebuf
),
247 token1buf
, sizeof(token1buf
), &curpos
))
252 if(!de_strcmp(token1buf
, "ENDHDR")) {
256 // Other header lines have a param
257 if(!read_next_pam_token(c
, d
, pg
, linebuf
, sizeof(linebuf
),
258 token2buf
, sizeof(token2buf
), &curpos
))
263 if(!de_strcmp(token1buf
, "WIDTH")) {
264 pg
->width
= de_atoi64(token2buf
);
266 else if(!de_strcmp(token1buf
, "HEIGHT")) {
267 pg
->height
= de_atoi64(token2buf
);
269 else if(!de_strcmp(token1buf
, "DEPTH")) {
270 pg
->pam_depth
= de_atoi(token2buf
);
272 else if(!de_strcmp(token1buf
, "MAXVAL")) {
273 pg
->maxval
= de_atoi(token2buf
);
275 else if(!de_strcmp(token1buf
, "TUPLTYPE")) {
276 // FIXME: The "TUPLTYPE" line(s) is wacky, and seems underspecified.
277 // We do not support it correctly.
278 // But I doubt any real PAM encoders are pathological enough to
279 // require us to support its wackiness.
280 if(tupltype_line_count
>0) {
281 de_err(c
, "Multiple TUPLTYPE lines are not supported");
284 tupltype_line_count
++;
286 if(!de_strcmp(token2buf
, "BLACKANDWHITE")) {
287 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
290 else if(!de_strcmp(token2buf
, "BLACKANDWHITE_ALPHA")) {
291 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
295 else if(!de_strcmp(token2buf
, "GRAYSCALE")) {
296 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
298 else if(!de_strcmp(token2buf
, "GRAYSCALE_ALPHA")) {
299 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
302 else if(!de_strcmp(token2buf
, "RGB")) {
303 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
305 else if(!de_strcmp(token2buf
, "RGB_ALPHA")) {
306 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
310 de_err(c
, "Unsupported color type");
316 if(tupltype_line_count
==0) {
317 // The TUPLTYPE field is technically optional, but the image is not
318 // portable without it.
319 switch(pg
->pam_depth
) {
321 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
324 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
328 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
331 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
336 if(pg
->pam_subtype
!=0) {
337 de_warn(c
, "Color type not specified. Attempting to guess.");
341 pg
->hdr_parse_pos
= pos
;
344 de_dbg_indent(c
, -1);
348 static int do_image_pbm_ascii(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
350 de_bitmap
*img
= NULL
;
356 img
= de_bitmap_create(c
, pg
->width
, pg
->height
, 1);
360 if(pos
>= c
->infile
->len
) break; // end of file
361 if(ypos
==(pg
->height
-1) && xpos
>=pg
->width
) break; // end of image
362 if(ypos
>=pg
->height
) break;
364 b
= de_getbyte_p(&pos
);
366 else if(b
=='0') v
=255;
369 de_bitmap_setpixel_gray(img
, xpos
, ypos
, v
);
371 if(xpos
>=pg
->width
) {
377 de_bitmap_write_to_file_finfo(img
, NULL
, DE_CREATEFLAG_IS_BWIMG
);
378 de_bitmap_destroy(img
);
382 static int do_image_pgm_ppm_pam_binary(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
383 dbuf
*inf
, i64 pos1
);
385 struct pgm_ppm_ascii_ctx
{
386 u8 intermed_nbytes_per_sample
;
389 size_t samplebuf_used
;
393 static void pgm_ppm_ascii_handle_sample(struct pgm_ppm_ascii_ctx
*actx
)
397 actx
->samplebuf
[actx
->samplebuf_used
] = '\0'; // NUL terminate for de_atoi64()
398 v
= de_atoi64((const char*)actx
->samplebuf
);
399 actx
->samplebuf_used
= 0;
401 if(actx
->intermed_nbytes_per_sample
==1) {
402 dbuf_writebyte(actx
->intermed_img
, (u8
)v
);
405 dbuf_writeu16be(actx
->intermed_img
, v
);
408 actx
->sample_count
++;
411 // Convert the ASCII image data to binary, then call the function to process
413 static int do_image_pgm_ppm_ascii(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
415 int nsamples_per_pixel
;
416 i64 nsamples_per_image
;
417 i64 intermed_nbytes_per_image
;
420 struct pgm_ppm_ascii_ctx actx
;
422 de_zeromem(&actx
, sizeof(struct pgm_ppm_ascii_ctx
));
423 if(fmt_is_ppm(pg
->fmt
)) nsamples_per_pixel
= 3;
424 else nsamples_per_pixel
= 1;
426 nsamples_per_image
= (i64
)nsamples_per_pixel
* pg
->height
* pg
->width
;
427 actx
.intermed_nbytes_per_sample
= (pg
->maxval
>255) ? 2 : 1;
428 intermed_nbytes_per_image
= nsamples_per_image
* (i64
)actx
.intermed_nbytes_per_sample
;
430 actx
.intermed_img
= dbuf_create_membuf(c
, intermed_nbytes_per_image
, 0x1);
431 actx
.samplebuf_used
=0;
433 actx
.sample_count
= 0;
438 if(actx
.sample_count
>= nsamples_per_image
) break;
439 if(pos
>= c
->infile
->len
) { // end of file
440 if(actx
.samplebuf_used
>0) {
441 pgm_ppm_ascii_handle_sample(&actx
);
446 b
= de_getbyte_p(&pos
);
447 if(is_pnm_whitespace(b
)) {
448 if(actx
.samplebuf_used
>0) {
449 // Completed a sample
450 pgm_ppm_ascii_handle_sample(&actx
);
452 else { // Skip extra whitespace
457 // Non-whitespace. Save for later.
458 if(actx
.samplebuf_used
< sizeof(actx
.samplebuf_used
)-1) {
459 actx
.samplebuf
[actx
.samplebuf_used
++] = b
;
464 retval
= do_image_pgm_ppm_pam_binary(c
, d
, pg
, actx
.intermed_img
, 0);
465 dbuf_close(actx
.intermed_img
);
469 static int do_image_pbm_binary(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
473 rowspan
= (pg
->width
+7)/8;
474 pg
->image_data_len
= rowspan
* pg
->height
;
476 de_convert_and_write_image_bilevel2(c
->infile
, pos1
, pg
->width
, pg
->height
,
477 rowspan
, DE_CVTF_WHITEISZERO
, NULL
, 0);
481 static int do_image_pgm_ppm_pam_binary(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
484 de_bitmap
*img
= NULL
;
485 de_bitmap
*imglo
= NULL
;
487 int nsamples_per_pixel
; // For both input and output
488 u8 nbytes_per_sample
;
492 u8 samp_adj
[4]; // most significant 8 bits
497 de_zeromem(samp_adj_lo
, sizeof(samp_adj_lo
));
499 if(pg
->fmt
==FMT_PAM
) {
500 nsamples_per_pixel
= pg
->pam_depth
;
502 if((pg
->pam_subtype
==PAMSUBTYPE_GRAY
&& !pg
->has_alpha
&& nsamples_per_pixel
==1) ||
503 (pg
->pam_subtype
==PAMSUBTYPE_GRAY
&& pg
->has_alpha
&& nsamples_per_pixel
==2) ||
504 (pg
->pam_subtype
==PAMSUBTYPE_RGB
&& !pg
->has_alpha
&& nsamples_per_pixel
==3) ||
505 (pg
->pam_subtype
==PAMSUBTYPE_RGB
&& pg
->has_alpha
&& nsamples_per_pixel
==4))
510 de_err(c
, "Unsupported PAM format");
514 else if(fmt_is_ppm(pg
->fmt
)) {
515 nsamples_per_pixel
= 3;
518 nsamples_per_pixel
= 1;
521 if(nsamples_per_pixel
<1 || nsamples_per_pixel
>4) {
522 de_err(c
, "Unsupported samples/pixel: %d", nsamples_per_pixel
);
525 if(pg
->maxval
<=255) nbytes_per_sample
= 1;
526 else nbytes_per_sample
= 2;
528 rowspan
= pg
->width
* (i64
)nsamples_per_pixel
* (i64
)nbytes_per_sample
;
529 pg
->image_data_len
= rowspan
* pg
->height
;
531 img
= de_bitmap_create(c
, pg
->width
, pg
->height
, nsamples_per_pixel
);
532 if(nbytes_per_sample
!=1) {
533 imglo
= de_bitmap_create(c
, pg
->width
, pg
->height
, nsamples_per_pixel
);
536 for(j
=0; j
<pg
->height
; j
++) {
537 for(i
=0; i
<pg
->width
; i
++) {
540 for(k
=0; k
<nsamples_per_pixel
; k
++) {
541 if(nbytes_per_sample
==1) {
542 samp_ori
[k
] = dbuf_getbyte_p(inf
, &pos
);
545 samp_ori
[k
] = (UI
)dbuf_getu16be_p(inf
, &pos
);
548 if(nbytes_per_sample
==1) {
549 samp_adj
[k
] = de_scale_n_to_255(pg
->maxval
, samp_ori
[k
]);
552 de_scale_n_to_16bit(pg
->maxval
, (int)samp_ori
[k
], &samp_adj
[k
], &samp_adj_lo
[k
]);
556 switch(nsamples_per_pixel
) {
558 clr
= DE_MAKE_RGBA(samp_adj
[0], samp_adj
[1], samp_adj
[2], samp_adj
[3]);
559 de_bitmap_setpixel_rgba(img
, i
, j
, clr
);
561 clr
= DE_MAKE_RGBA(samp_adj_lo
[0], samp_adj_lo
[1], samp_adj_lo
[2], samp_adj_lo
[3]);
562 de_bitmap_setpixel_rgba(imglo
, i
, j
, clr
);
566 clr
= DE_MAKE_RGB(samp_adj
[0], samp_adj
[1], samp_adj
[2]);
567 de_bitmap_setpixel_rgb(img
, i
, j
, clr
);
569 clr
= DE_MAKE_RGB(samp_adj_lo
[0], samp_adj_lo
[1], samp_adj_lo
[2]);
570 de_bitmap_setpixel_rgb(imglo
, i
, j
, clr
);
574 clr
= DE_MAKE_RGBA(samp_adj
[0], samp_adj
[0], samp_adj
[0], samp_adj
[1]);
575 de_bitmap_setpixel_rgba(img
, i
, j
, clr
);
577 clr
= DE_MAKE_RGBA(samp_adj_lo
[0], samp_adj_lo
[0], samp_adj_lo
[0], samp_adj_lo
[1]);
578 de_bitmap_setpixel_rgba(imglo
, i
, j
, clr
);
581 default: // Assuming nsamples==1
582 de_bitmap_setpixel_gray(img
, i
, j
, samp_adj
[0]);
584 de_bitmap_setpixel_gray(imglo
, i
, j
, samp_adj_lo
[0]);
590 de_bitmap16_write_to_file_finfo(img
, imglo
, NULL
, DE_CREATEFLAG_OPT_IMAGE
);
594 de_bitmap_destroy(img
);
595 de_bitmap_destroy(imglo
);
599 static int do_image(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
603 de_dbg(c
, "image data at %"I64_FMT
, pos1
);
606 if(pg
->maxval
<1 || pg
->maxval
>65535) {
607 de_err(c
, "Invalid maxval: %d", pg
->maxval
);
610 if(!de_good_image_dimensions(c
, pg
->width
, pg
->height
)) goto done
;
614 if(!do_image_pbm_ascii(c
, d
, pg
, pos1
)) goto done
;
618 if(!do_image_pgm_ppm_ascii(c
, d
, pg
, pos1
)) goto done
;
621 if(!do_image_pbm_binary(c
, d
, pg
, pos1
)) goto done
;
626 if(!do_image_pgm_ppm_pam_binary(c
, d
, pg
, c
->infile
, pos1
)) goto done
;
629 de_err(c
, "Unsupported PNM format");
636 de_dbg_indent(c
, -1);
640 static int identify_fmt(deark
*c
, i64 pos
)
644 de_read(buf
, pos
, 3);
645 if(buf
[0]!='P') return 0;
647 if(buf
[1]=='7' && buf
[2]==0x0a)
649 if(buf
[1]>='1' && buf
[1]<='6')
654 static const char *get_fmt_name(int fmt
)
656 const char *name
="unknown";
658 case FMT_PBM_ASCII
: name
="PBM plain"; break;
659 case FMT_PGM_ASCII
: name
="PGM plain"; break;
660 case FMT_PPM_ASCII
: name
="PPM plain"; break;
661 case FMT_PBM_BINARY
: name
="PBM"; break;
662 case FMT_PGM_BINARY
: name
="PGM"; break;
663 case FMT_PPM_BINARY
: name
="PPM"; break;
664 case FMT_PAM
: name
="PAM"; break;
669 static int do_page(deark
*c
, lctx
*d
, int pagenum
, i64 pos1
)
671 struct page_ctx
*pg
= NULL
;
674 de_dbg(c
, "image at %"I64_FMT
, pos1
);
677 pg
= de_malloc(c
, sizeof(struct page_ctx
));
679 pg
->fmt
= identify_fmt(c
, pos1
);
680 d
->last_fmt
= pg
->fmt
;
681 pg
->fmt_name
= get_fmt_name(pg
->fmt
);
683 de_err(c
, "Not PNM/PAM format");
688 de_declare_fmt(c
, pg
->fmt_name
);
691 if(pg
->fmt
==FMT_PAM
) {
692 if(!read_pam_header(c
, d
, pg
, pos1
)) goto done
;
695 if(!read_pnm_header(c
, d
, pg
, pos1
)) goto done
;
698 if(!do_image(c
, d
, pg
, pg
->hdr_parse_pos
)) {
702 d
->last_bytesused
= (pg
->hdr_parse_pos
+ pg
->image_data_len
) - pos1
;
706 de_dbg_indent(c
, -1);
711 static void de_run_pnm(deark
*c
, de_module_params
*mparams
)
718 d
= de_malloc(c
, sizeof(lctx
));
722 if(c
->infile
->len
- pos
< 8) break;
724 d
->last_bytesused
= 0;
725 ret
= do_page(c
, d
, pagenum
, pos
);
727 if(d
->last_bytesused
<8) break;
729 if(!fmt_is_binary(d
->last_fmt
))
731 break; // ASCII formats don't support multiple images
734 pos
+= d
->last_bytesused
;
741 static int de_identify_pnm(deark
*c
)
745 fmt
= identify_fmt(c
, 0);
746 if(fmt
!=0) return 40;
750 void de_module_pnm(deark
*c
, struct deark_module_info
*mi
)
753 mi
->desc
= "Netpbm formats (PNM, PBM, PGM, PPM, PAM)";
754 mi
->run_fn
= de_run_pnm
;
755 mi
->identify_fn
= de_identify_pnm
;