1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
9 #include <deark-config.h>
10 #include <deark-private.h>
11 DE_DECLARE_MODULE(de_module_pnm
);
13 // Numbers 1-6 are assumed to match the "Px" number in the file signature.
14 #define FMT_PBM_ASCII 1
15 #define FMT_PGM_ASCII 2
16 #define FMT_PPM_ASCII 3
17 #define FMT_PBM_BINARY 4
18 #define FMT_PGM_BINARY 5
19 #define FMT_PPM_BINARY 6
29 #define PAMSUBTYPE_GRAY 1
30 #define PAMSUBTYPE_RGB 2
38 typedef struct localctx_struct
{
43 static int fmt_is_pbm(int fmt
)
45 return (fmt
==FMT_PBM_ASCII
|| fmt
==FMT_PBM_BINARY
);
48 static int fmt_is_ppm(int fmt
)
50 return (fmt
==FMT_PPM_ASCII
|| fmt
==FMT_PPM_BINARY
);
53 static int fmt_is_binary(int fmt
)
55 return (fmt
==FMT_PBM_BINARY
|| fmt
==FMT_PGM_BINARY
||
56 fmt
==FMT_PPM_BINARY
|| fmt
==FMT_PAM
);
59 static int is_pnm_whitespace(u8 b
)
61 // Whitspace = space, CR, LF, TAB, VT, or FF
62 return (b
==9 || b
==10 || b
==11 || b
==12 || b
==13 || b
==32);
65 static int read_next_token(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
66 char *tokenbuf
, size_t tokenbuflen
)
74 if(pg
->hdr_parse_pos
>= c
->infile
->len
) return 0;
76 if(token_len
>= tokenbuflen
) {
77 return 0; // Token too long.
80 b
= de_getbyte(pg
->hdr_parse_pos
++);
92 else if(is_pnm_whitespace(b
)) {
94 tokenbuf
[token_len
] = '\0';
98 continue; // Skip leading whitespace.
102 // Append the character to the token.
103 tokenbuf
[token_len
] = (char)b
;
111 static int read_pnm_header(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
116 de_dbg(c
, "header at %d", (int)pos1
);
119 de_dbg(c
, "format: %s", pg
->fmt_name
);
120 pg
->hdr_parse_pos
= pos1
+2; // Skip "P?"
122 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
123 pg
->width
= de_atoi64(tokenbuf
);
124 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
125 pg
->height
= de_atoi64(tokenbuf
);
126 de_dbg_dimensions(c
, pg
->width
, pg
->height
);
128 if(fmt_is_pbm(pg
->fmt
)) {
132 if(!read_next_token(c
, d
, pg
, tokenbuf
, sizeof(tokenbuf
))) goto done
;
133 pg
->maxval
= de_atoi64(tokenbuf
);
134 de_dbg(c
, "maxval: %d", (int)pg
->maxval
);
139 de_dbg_indent(c
, -1);
143 // Read a token from a NUL-terminated string.
144 static int read_next_pam_token(deark
*c
, lctx
*d
, struct page_ctx
*pg
,
145 const char *linebuf
, size_t linebuflen
,
146 char *tokenbuf
, size_t tokenbuflen
, i64
*curpos
)
156 if(token_len
>= (i64
)tokenbuflen
) {
161 if(linepos
>= (i64
)linebuflen
) {
164 b
= linebuf
[linepos
++];
165 if(b
==0) break; // End of line
167 if(is_pnm_whitespace(b
)) {
172 continue; // Skip leading whitespace.
176 // Append the character to the token.
177 tokenbuf
[token_len
++] = b
;
181 tokenbuf
[token_len
] = '\0';
186 static int read_pam_header_line(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos
,
187 i64
*content_len
, i64
*total_len
,
188 char *linebuf
, size_t linebuf_len
)
195 ret
= dbuf_find_line(c
->infile
, pos
,
196 content_len
, total_len
);
200 amt_to_read
= *content_len
;
201 if(amt_to_read
> (i64
)(linebuf_len
-1)) amt_to_read
= (i64
)(linebuf_len
-1);
203 de_read((u8
*)linebuf
, pos
, amt_to_read
);
205 *content_len
= amt_to_read
;
206 linebuf
[amt_to_read
] = '\0';
210 static int read_pam_header(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
215 int tupltype_line_count
= 0;
220 de_dbg(c
, "header at %d", (int)pos1
);
223 pos
+= 3; // Skip "P7\n"
230 ret
= read_pam_header_line(c
, d
, pg
, pos
, &content_len
, &total_len
,
231 linebuf
, sizeof(linebuf
));
235 de_err(c
, "Invalid PAM header");
239 if(content_len
>0 && (de_getbyte(pos
)=='#')) {
246 if(!read_next_pam_token(c
, d
, pg
, linebuf
, sizeof(linebuf
),
247 token1buf
, sizeof(token1buf
), &curpos
))
252 if(!de_strcmp(token1buf
, "ENDHDR")) {
256 // Other header lines have a param
257 if(!read_next_pam_token(c
, d
, pg
, linebuf
, sizeof(linebuf
),
258 token2buf
, sizeof(token2buf
), &curpos
))
263 if(!de_strcmp(token1buf
, "WIDTH")) {
264 pg
->width
= de_atoi64(token2buf
);
266 else if(!de_strcmp(token1buf
, "HEIGHT")) {
267 pg
->height
= de_atoi64(token2buf
);
269 else if(!de_strcmp(token1buf
, "DEPTH")) {
270 pg
->pam_num_samples
= de_atoi64(token2buf
);
272 else if(!de_strcmp(token1buf
, "MAXVAL")) {
273 pg
->maxval
= de_atoi64(token2buf
);
275 else if(!de_strcmp(token1buf
, "TUPLTYPE")) {
276 // FIXME: The "TUPLTYPE" line(s) is wacky, and seems underspecified.
277 // We do not support it correctly.
278 // But I doubt any real PAM encoders are pathological enough to
279 // require us to support its wackiness.
280 if(tupltype_line_count
>0) {
281 de_err(c
, "Multiple TUPLTYPE lines are not supported");
284 tupltype_line_count
++;
286 if(!de_strcmp(token2buf
, "BLACKANDWHITE")) {
287 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
290 else if(!de_strcmp(token2buf
, "BLACKANDWHITE_ALPHA")) {
291 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
295 else if(!de_strcmp(token2buf
, "GRAYSCALE")) {
296 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
298 else if(!de_strcmp(token2buf
, "GRAYSCALE_ALPHA")) {
299 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
302 else if(!de_strcmp(token2buf
, "RGB")) {
303 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
305 else if(!de_strcmp(token2buf
, "RGB_ALPHA")) {
306 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
310 de_err(c
, "Unsupported color type");
316 if(tupltype_line_count
==0) {
317 // The TUPLTYPE field is technically optional, but the image is not
318 // portable without it.
319 switch(pg
->pam_num_samples
) {
321 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
324 pg
->pam_subtype
= PAMSUBTYPE_GRAY
;
328 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
331 pg
->pam_subtype
= PAMSUBTYPE_RGB
;
336 if(pg
->pam_subtype
!=0) {
337 de_warn(c
, "Color type not specified. Attempting to guess.");
341 pg
->hdr_parse_pos
= pos
;
344 de_dbg_indent(c
, -1);
348 static int do_image_pbm_ascii(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
350 de_bitmap
*img
= NULL
;
356 img
= de_bitmap_create(c
, pg
->width
, pg
->height
, 1);
360 if(pos
>= c
->infile
->len
) break; // end of file
361 if(ypos
==(pg
->height
-1) && xpos
>=pg
->width
) break; // end of image
362 if(ypos
>=pg
->height
) break;
364 b
= de_getbyte(pos
++);
366 else if(b
=='0') v
=255;
369 de_bitmap_setpixel_gray(img
, xpos
, ypos
, v
);
371 if(xpos
>=pg
->width
) {
377 de_bitmap_write_to_file_finfo(img
, NULL
, 0);
378 de_bitmap_destroy(img
);
382 static int do_image_pgm_ppm_ascii(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
384 de_bitmap
*img
= NULL
;
385 i64 nsamples
; // For both input and output
387 i64 xpos
, ypos
, sampidx
;
389 size_t samplebuf_used
;
392 if(fmt_is_ppm(pg
->fmt
)) nsamples
=3;
395 img
= de_bitmap_create(c
, pg
->width
, pg
->height
, (int)nsamples
);
402 if(pos
>= c
->infile
->len
) break; // end of file
403 if(ypos
==(pg
->height
-1) && xpos
>=pg
->width
) break; // end of image
404 if(ypos
>=pg
->height
) break;
406 b
= de_getbyte(pos
++);
407 if(is_pnm_whitespace(b
)) {
408 if(samplebuf_used
>0) {
412 // Completed a sample
413 samplebuf
[samplebuf_used
] = '\0'; // NUL terminate for de_atoi64()
414 v
= de_atoi64((const char*)samplebuf
);
415 v_adj
= de_scale_n_to_255(pg
->maxval
, v
);
419 de_bitmap_setsample(img
, xpos
, ypos
, sampidx
, v_adj
);
422 de_bitmap_setpixel_gray(img
, xpos
, ypos
, v_adj
);
426 if(sampidx
>=nsamples
) {
429 if(xpos
>=pg
->width
) {
436 else { // Skip extra whitespace
441 // Non-whitespace. Save for later.
442 if(samplebuf_used
< sizeof(samplebuf_used
)-1) {
443 samplebuf
[samplebuf_used
++] = b
;
447 de_bitmap_write_to_file(img
, NULL
, 0);
449 de_bitmap_destroy(img
);
453 static int do_image_pbm_binary(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
457 rowspan
= (pg
->width
+7)/8;
458 pg
->image_data_len
= rowspan
* pg
->height
;
460 de_convert_and_write_image_bilevel2(c
->infile
, pos1
, pg
->width
, pg
->height
,
461 rowspan
, DE_CVTF_WHITEISZERO
, NULL
, 0);
465 static int do_image_pgm_ppm_pam_binary(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
467 de_bitmap
*img
= NULL
;
469 i64 nsamples
; // For both input and output
470 i64 bytes_per_sample
;
473 unsigned int samp_ori
[4];
478 if(pg
->fmt
==FMT_PAM
) {
479 nsamples
= pg
->pam_num_samples
;
481 if((pg
->pam_subtype
==PAMSUBTYPE_GRAY
&& !pg
->has_alpha
&& nsamples
==1) ||
482 (pg
->pam_subtype
==PAMSUBTYPE_GRAY
&& pg
->has_alpha
&& nsamples
==2) ||
483 (pg
->pam_subtype
==PAMSUBTYPE_RGB
&& !pg
->has_alpha
&& nsamples
==3) ||
484 (pg
->pam_subtype
==PAMSUBTYPE_RGB
&& pg
->has_alpha
&& nsamples
==4))
489 de_err(c
, "Unsupported PAM format");
493 else if(fmt_is_ppm(pg
->fmt
)) {
500 if(nsamples
<1 || nsamples
>4) {
501 de_err(c
, "Unsupported number of samples: %d", (int)nsamples
);
504 if(pg
->maxval
<=255) bytes_per_sample
=1;
505 else bytes_per_sample
=2;
507 rowspan
= pg
->width
* nsamples
* bytes_per_sample
;
508 pg
->image_data_len
= rowspan
* pg
->height
;
510 img
= de_bitmap_create(c
, pg
->width
, pg
->height
, (int)nsamples
);
512 for(j
=0; j
<pg
->height
; j
++) {
513 for(i
=0; i
<pg
->width
; i
++) {
514 for(k
=0; k
<nsamples
; k
++) {
515 if(bytes_per_sample
==1) {
516 samp_ori
[k
] = de_getbyte(pos
++);
519 samp_ori
[k
] = (unsigned int)de_getbyte(pos
++) << 8 ;
520 samp_ori
[k
] |= (unsigned int)de_getbyte(pos
++);
523 samp_adj
[k
] = de_scale_n_to_255(pg
->maxval
, samp_ori
[k
]);
528 clr
= DE_MAKE_RGBA(samp_adj
[0], samp_adj
[1], samp_adj
[2], samp_adj
[3]);
529 de_bitmap_setpixel_rgba(img
, i
, j
, clr
);
532 clr
= DE_MAKE_RGB(samp_adj
[0], samp_adj
[1], samp_adj
[2]);
533 de_bitmap_setpixel_rgb(img
, i
, j
, clr
);
536 clr
= DE_MAKE_RGBA(samp_adj
[0], samp_adj
[0], samp_adj
[0], samp_adj
[1]);
537 de_bitmap_setpixel_rgba(img
, i
, j
, clr
);
539 default: // Assuming nsamples==1
540 de_bitmap_setpixel_gray(img
, i
, j
, samp_adj
[0]);
545 de_bitmap_write_to_file(img
, NULL
, 0);
549 de_bitmap_destroy(img
);
553 static int do_image(deark
*c
, lctx
*d
, struct page_ctx
*pg
, i64 pos1
)
557 de_dbg(c
, "image data at %d", (int)pos1
);
560 if(pg
->maxval
<1 || pg
->maxval
>65535) {
561 de_err(c
, "Invalid maxval: %d", (int)pg
->maxval
);
564 if(!de_good_image_dimensions(c
, pg
->width
, pg
->height
)) goto done
;
568 if(!do_image_pbm_ascii(c
, d
, pg
, pos1
)) goto done
;
572 if(!do_image_pgm_ppm_ascii(c
, d
, pg
, pos1
)) goto done
;
575 if(!do_image_pbm_binary(c
, d
, pg
, pos1
)) goto done
;
580 if(!do_image_pgm_ppm_pam_binary(c
, d
, pg
, pos1
)) goto done
;
583 de_err(c
, "Unsupported PNM format");
590 de_dbg_indent(c
, -1);
594 static int identify_fmt(deark
*c
, i64 pos
)
598 de_read(buf
, pos
, 3);
599 if(buf
[0]!='P') return 0;
601 if(buf
[1]=='7' && buf
[2]==0x0a)
603 if(buf
[1]>='1' && buf
[1]<='6')
608 static const char *get_fmt_name(int fmt
)
610 const char *name
="unknown";
612 case FMT_PBM_ASCII
: name
="PBM plain"; break;
613 case FMT_PGM_ASCII
: name
="PGM plain"; break;
614 case FMT_PPM_ASCII
: name
="PPM plain"; break;
615 case FMT_PBM_BINARY
: name
="PBM"; break;
616 case FMT_PGM_BINARY
: name
="PGM"; break;
617 case FMT_PPM_BINARY
: name
="PPM"; break;
618 case FMT_PAM
: name
="PAM"; break;
623 static int do_page(deark
*c
, lctx
*d
, int pagenum
, i64 pos1
)
625 struct page_ctx
*pg
= NULL
;
628 de_dbg(c
, "image at %d", (int)pos1
);
631 pg
= de_malloc(c
, sizeof(struct page_ctx
));
633 pg
->fmt
= identify_fmt(c
, pos1
);
634 d
->last_fmt
= pg
->fmt
;
635 pg
->fmt_name
= get_fmt_name(pg
->fmt
);
637 de_err(c
, "Not PNM/PAM format");
642 de_declare_fmt(c
, pg
->fmt_name
);
645 if(pg
->fmt
==FMT_PAM
) {
646 if(!read_pam_header(c
, d
, pg
, pos1
)) goto done
;
649 if(!read_pnm_header(c
, d
, pg
, pos1
)) goto done
;
652 if(!do_image(c
, d
, pg
, pg
->hdr_parse_pos
)) {
656 d
->last_bytesused
= (pg
->hdr_parse_pos
+ pg
->image_data_len
) - pos1
;
660 de_dbg_indent(c
, -1);
665 static void de_run_pnm(deark
*c
, de_module_params
*mparams
)
672 d
= de_malloc(c
, sizeof(lctx
));
676 if(c
->infile
->len
- pos
< 8) break;
678 d
->last_bytesused
= 0;
679 ret
= do_page(c
, d
, pagenum
, pos
);
681 if(d
->last_bytesused
<8) break;
683 if(!fmt_is_binary(d
->last_fmt
))
685 break; // ASCII formats don't support multiple images
688 pos
+= d
->last_bytesused
;
695 static int de_identify_pnm(deark
*c
)
699 fmt
= identify_fmt(c
, 0);
700 if(fmt
!=0) return 40;
704 void de_module_pnm(deark
*c
, struct deark_module_info
*mi
)
707 mi
->desc
= "Netpbm formats (PNM, PBM, PGM, PPM, PAM)";
708 mi
->run_fn
= de_run_pnm
;
709 mi
->identify_fn
= de_identify_pnm
;