New "videomaster" module
[deark.git] / modules / pnm.c
blob55adf4fe4fbbcb996622e82e6fed0d711ea5c5e3
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // Netpbm formats
6 // PNM (PBM, PGM, PPM)
7 // PAM
9 #include <deark-config.h>
10 #include <deark-private.h>
11 DE_DECLARE_MODULE(de_module_pnm);
13 // Numbers 1-6 are assumed to match the "Px" number in the file signature.
14 #define FMT_PBM_ASCII 1
15 #define FMT_PGM_ASCII 2
16 #define FMT_PPM_ASCII 3
17 #define FMT_PBM_BINARY 4
18 #define FMT_PGM_BINARY 5
19 #define FMT_PPM_BINARY 6
20 #define FMT_PAM 7
22 struct page_ctx {
23 int fmt;
24 const char *fmt_name;
25 i64 width, height;
26 i64 maxval;
28 i64 pam_num_samples;
29 #define PAMSUBTYPE_GRAY 1
30 #define PAMSUBTYPE_RGB 2
31 int pam_subtype;
32 int has_alpha;
34 i64 hdr_parse_pos;
35 i64 image_data_len;
38 typedef struct localctx_struct {
39 int last_fmt;
40 i64 last_bytesused;
41 } lctx;
43 static int fmt_is_pbm(int fmt)
45 return (fmt==FMT_PBM_ASCII || fmt==FMT_PBM_BINARY);
48 static int fmt_is_ppm(int fmt)
50 return (fmt==FMT_PPM_ASCII || fmt==FMT_PPM_BINARY);
53 static int fmt_is_binary(int fmt)
55 return (fmt==FMT_PBM_BINARY || fmt==FMT_PGM_BINARY ||
56 fmt==FMT_PPM_BINARY || fmt==FMT_PAM);
59 static int is_pnm_whitespace(u8 b)
61 // Whitspace = space, CR, LF, TAB, VT, or FF
62 return (b==9 || b==10 || b==11 || b==12 || b==13 || b==32);
65 static int read_next_token(deark *c, lctx *d, struct page_ctx *pg,
66 char *tokenbuf, size_t tokenbuflen)
68 u8 b;
69 size_t token_len = 0;
70 int in_comment = 0;
72 token_len = 0;
73 while(1) {
74 if(pg->hdr_parse_pos >= c->infile->len) return 0;
76 if(token_len >= tokenbuflen) {
77 return 0; // Token too long.
80 b = de_getbyte(pg->hdr_parse_pos++);
82 if(in_comment) {
83 if(b==10 || b==13) {
84 in_comment = 0;
86 continue;
88 else if(b=='#') {
89 in_comment = 1;
90 continue;
92 else if(is_pnm_whitespace(b)) {
93 if(token_len>0) {
94 tokenbuf[token_len] = '\0';
95 return 1;
97 else {
98 continue; // Skip leading whitespace.
101 else {
102 // Append the character to the token.
103 tokenbuf[token_len] = (char)b;
104 token_len++;
108 return 0;
111 static int read_pnm_header(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
113 char tokenbuf[100];
114 int retval = 0;
116 de_dbg(c, "header at %d", (int)pos1);
117 de_dbg_indent(c, 1);
119 de_dbg(c, "format: %s", pg->fmt_name);
120 pg->hdr_parse_pos = pos1+2; // Skip "P?"
122 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
123 pg->width = de_atoi64(tokenbuf);
124 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
125 pg->height = de_atoi64(tokenbuf);
126 de_dbg_dimensions(c, pg->width, pg->height);
128 if(fmt_is_pbm(pg->fmt)) {
129 pg->maxval = 1;
131 else {
132 if(!read_next_token(c, d, pg, tokenbuf, sizeof(tokenbuf))) goto done;
133 pg->maxval = de_atoi64(tokenbuf);
134 de_dbg(c, "maxval: %d", (int)pg->maxval);
137 retval = 1;
138 done:
139 de_dbg_indent(c, -1);
140 return retval;
143 // Read a token from a NUL-terminated string.
144 static int read_next_pam_token(deark *c, lctx *d, struct page_ctx *pg,
145 const char *linebuf, size_t linebuflen,
146 char *tokenbuf, size_t tokenbuflen, i64 *curpos)
148 u8 b;
149 i64 token_len = 0;
150 i64 linepos;
152 token_len = 0;
154 linepos = *curpos;
155 while(1) {
156 if(token_len >= (i64)tokenbuflen) {
157 // Token too long.
158 return 0;
161 if(linepos >= (i64)linebuflen) {
162 return 0;
164 b = linebuf[linepos++];
165 if(b==0) break; // End of line
167 if(is_pnm_whitespace(b)) {
168 if(token_len>0) {
169 break;
171 else {
172 continue; // Skip leading whitespace.
175 else {
176 // Append the character to the token.
177 tokenbuf[token_len++] = b;
181 tokenbuf[token_len] = '\0';
182 *curpos = linepos;
183 return 1;
186 static int read_pam_header_line(deark *c, lctx *d, struct page_ctx *pg, i64 pos,
187 i64 *content_len, i64 *total_len,
188 char *linebuf, size_t linebuf_len)
190 int ret;
191 i64 amt_to_read;
193 linebuf[0]='\0';
195 ret = dbuf_find_line(c->infile, pos,
196 content_len, total_len);
198 if(!ret) return 0;
200 amt_to_read = *content_len;
201 if(amt_to_read > (i64)(linebuf_len-1)) amt_to_read = (i64)(linebuf_len-1);
203 de_read((u8*)linebuf, pos, amt_to_read);
205 *content_len = amt_to_read;
206 linebuf[amt_to_read] = '\0';
207 return 1;
210 static int read_pam_header(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
212 int ret;
213 i64 pos = pos1;
214 int retval = 0;
215 int tupltype_line_count = 0;
216 char linebuf[200];
217 char token1buf[200];
218 char token2buf[200];
220 de_dbg(c, "header at %d", (int)pos1);
221 de_dbg_indent(c, 1);
223 pos += 3; // Skip "P7\n"
225 while(1) {
226 i64 content_len;
227 i64 total_len;
228 i64 curpos;
230 ret = read_pam_header_line(c, d, pg, pos, &content_len, &total_len,
231 linebuf, sizeof(linebuf));
232 pos += total_len;
234 if(!ret) {
235 de_err(c, "Invalid PAM header");
236 goto done;
239 if(content_len>0 && (de_getbyte(pos)=='#')) {
240 // comment line
241 pos += total_len;
242 continue;
245 curpos = 0;
246 if(!read_next_pam_token(c, d, pg, linebuf, sizeof(linebuf),
247 token1buf, sizeof(token1buf), &curpos))
249 goto done;
252 if(!de_strcmp(token1buf, "ENDHDR")) {
253 break;
256 // Other header lines have a param
257 if(!read_next_pam_token(c, d, pg, linebuf, sizeof(linebuf),
258 token2buf, sizeof(token2buf), &curpos))
260 goto done;
263 if(!de_strcmp(token1buf, "WIDTH")) {
264 pg->width = de_atoi64(token2buf);
266 else if(!de_strcmp(token1buf, "HEIGHT")) {
267 pg->height = de_atoi64(token2buf);
269 else if(!de_strcmp(token1buf, "DEPTH")) {
270 pg->pam_num_samples = de_atoi64(token2buf);
272 else if(!de_strcmp(token1buf, "MAXVAL")) {
273 pg->maxval = de_atoi64(token2buf);
275 else if(!de_strcmp(token1buf, "TUPLTYPE")) {
276 // FIXME: The "TUPLTYPE" line(s) is wacky, and seems underspecified.
277 // We do not support it correctly.
278 // But I doubt any real PAM encoders are pathological enough to
279 // require us to support its wackiness.
280 if(tupltype_line_count>0) {
281 de_err(c, "Multiple TUPLTYPE lines are not supported");
282 goto done;
284 tupltype_line_count++;
286 if(!de_strcmp(token2buf, "BLACKANDWHITE")) {
287 pg->pam_subtype = PAMSUBTYPE_GRAY;
288 pg->maxval = 1;
290 else if(!de_strcmp(token2buf, "BLACKANDWHITE_ALPHA")) {
291 pg->pam_subtype = PAMSUBTYPE_GRAY;
292 pg->has_alpha = 1;
293 pg->maxval = 1;
295 else if(!de_strcmp(token2buf, "GRAYSCALE")) {
296 pg->pam_subtype = PAMSUBTYPE_GRAY;
298 else if(!de_strcmp(token2buf, "GRAYSCALE_ALPHA")) {
299 pg->pam_subtype = PAMSUBTYPE_GRAY;
300 pg->has_alpha = 1;
302 else if(!de_strcmp(token2buf, "RGB")) {
303 pg->pam_subtype = PAMSUBTYPE_RGB;
305 else if(!de_strcmp(token2buf, "RGB_ALPHA")) {
306 pg->pam_subtype = PAMSUBTYPE_RGB;
307 pg->has_alpha = 1;
309 else {
310 de_err(c, "Unsupported color type");
311 goto done;
316 if(tupltype_line_count==0) {
317 // The TUPLTYPE field is technically optional, but the image is not
318 // portable without it.
319 switch(pg->pam_num_samples) {
320 case 1:
321 pg->pam_subtype = PAMSUBTYPE_GRAY;
322 break;
323 case 2:
324 pg->pam_subtype = PAMSUBTYPE_GRAY;
325 pg->has_alpha = 1;
326 break;
327 case 3:
328 pg->pam_subtype = PAMSUBTYPE_RGB;
329 break;
330 case 4:
331 pg->pam_subtype = PAMSUBTYPE_RGB;
332 pg->has_alpha = 1;
333 break;
336 if(pg->pam_subtype!=0) {
337 de_warn(c, "Color type not specified. Attempting to guess.");
341 pg->hdr_parse_pos = pos;
342 retval = 1;
343 done:
344 de_dbg_indent(c, -1);
345 return retval;
348 static int do_image_pbm_ascii(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
350 de_bitmap *img = NULL;
351 i64 xpos, ypos;
352 i64 pos = pos1;
353 u8 b;
354 u8 v;
356 img = de_bitmap_create(c, pg->width, pg->height, 1);
358 xpos=0; ypos=0;
359 while(1) {
360 if(pos >= c->infile->len) break; // end of file
361 if(ypos==(pg->height-1) && xpos>=pg->width) break; // end of image
362 if(ypos>=pg->height) break;
364 b = de_getbyte(pos++);
365 if(b=='1') v=0;
366 else if(b=='0') v=255;
367 else continue;
369 de_bitmap_setpixel_gray(img, xpos, ypos, v);
370 xpos++;
371 if(xpos>=pg->width) {
372 ypos++;
373 xpos=0;
377 de_bitmap_write_to_file_finfo(img, NULL, 0);
378 de_bitmap_destroy(img);
379 return 1;
382 static int do_image_pgm_ppm_ascii(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
384 de_bitmap *img = NULL;
385 i64 nsamples; // For both input and output
386 i64 pos = pos1;
387 i64 xpos, ypos, sampidx;
388 char samplebuf[32];
389 size_t samplebuf_used;
390 u8 b;
392 if(fmt_is_ppm(pg->fmt)) nsamples=3;
393 else nsamples=1;
395 img = de_bitmap_create(c, pg->width, pg->height, (int)nsamples);
397 xpos=0; ypos=0;
398 sampidx=0;
399 samplebuf_used=0;
401 while(1) {
402 if(pos >= c->infile->len) break; // end of file
403 if(ypos==(pg->height-1) && xpos>=pg->width) break; // end of image
404 if(ypos>=pg->height) break;
406 b = de_getbyte(pos++);
407 if(is_pnm_whitespace(b)) {
408 if(samplebuf_used>0) {
409 i64 v;
410 u8 v_adj;
412 // Completed a sample
413 samplebuf[samplebuf_used] = '\0'; // NUL terminate for de_atoi64()
414 v = de_atoi64((const char*)samplebuf);
415 v_adj = de_scale_n_to_255(pg->maxval, v);
416 samplebuf_used = 0;
418 if(nsamples>1) {
419 de_bitmap_setsample(img, xpos, ypos, sampidx, v_adj);
421 else {
422 de_bitmap_setpixel_gray(img, xpos, ypos, v_adj);
425 sampidx++;
426 if(sampidx>=nsamples) {
427 sampidx=0;
428 xpos++;
429 if(xpos>=pg->width) {
430 xpos=0;
431 ypos++;
436 else { // Skip extra whitespace
437 continue;
440 else {
441 // Non-whitespace. Save for later.
442 if(samplebuf_used < sizeof(samplebuf_used)-1) {
443 samplebuf[samplebuf_used++] = b;
447 de_bitmap_write_to_file(img, NULL, 0);
449 de_bitmap_destroy(img);
450 return 1;
453 static int do_image_pbm_binary(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
455 i64 rowspan;
457 rowspan = (pg->width+7)/8;
458 pg->image_data_len = rowspan * pg->height;
460 de_convert_and_write_image_bilevel2(c->infile, pos1, pg->width, pg->height,
461 rowspan, DE_CVTF_WHITEISZERO, NULL, 0);
462 return 1;
465 static int do_image_pgm_ppm_pam_binary(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
467 de_bitmap *img = NULL;
468 i64 rowspan;
469 i64 nsamples; // For both input and output
470 i64 bytes_per_sample;
471 i64 i, j, k;
472 i64 pos = pos1;
473 unsigned int samp_ori[4];
474 u8 samp_adj[4];
475 u32 clr;
476 int retval = 0;
478 if(pg->fmt==FMT_PAM) {
479 nsamples = pg->pam_num_samples;
481 if((pg->pam_subtype==PAMSUBTYPE_GRAY && !pg->has_alpha && nsamples==1) ||
482 (pg->pam_subtype==PAMSUBTYPE_GRAY && pg->has_alpha && nsamples==2) ||
483 (pg->pam_subtype==PAMSUBTYPE_RGB && !pg->has_alpha && nsamples==3) ||
484 (pg->pam_subtype==PAMSUBTYPE_RGB && pg->has_alpha && nsamples==4))
488 else {
489 de_err(c, "Unsupported PAM format");
490 goto done;
493 else if(fmt_is_ppm(pg->fmt)) {
494 nsamples=3;
496 else {
497 nsamples=1;
500 if(nsamples<1 || nsamples>4) {
501 de_err(c, "Unsupported number of samples: %d", (int)nsamples);
504 if(pg->maxval<=255) bytes_per_sample=1;
505 else bytes_per_sample=2;
507 rowspan = pg->width * nsamples * bytes_per_sample;
508 pg->image_data_len = rowspan * pg->height;
510 img = de_bitmap_create(c, pg->width, pg->height, (int)nsamples);
512 for(j=0; j<pg->height; j++) {
513 for(i=0; i<pg->width; i++) {
514 for(k=0; k<nsamples; k++) {
515 if(bytes_per_sample==1) {
516 samp_ori[k] = de_getbyte(pos++);
518 else {
519 samp_ori[k] = (unsigned int)de_getbyte(pos++) << 8 ;
520 samp_ori[k] |= (unsigned int)de_getbyte(pos++);
523 samp_adj[k] = de_scale_n_to_255(pg->maxval, samp_ori[k]);
526 switch(nsamples) {
527 case 4:
528 clr = DE_MAKE_RGBA(samp_adj[0], samp_adj[1], samp_adj[2], samp_adj[3]);
529 de_bitmap_setpixel_rgba(img, i, j, clr);
530 break;
531 case 3:
532 clr = DE_MAKE_RGB(samp_adj[0], samp_adj[1], samp_adj[2]);
533 de_bitmap_setpixel_rgb(img, i, j, clr);
534 break;
535 case 2:
536 clr = DE_MAKE_RGBA(samp_adj[0], samp_adj[0], samp_adj[0], samp_adj[1]);
537 de_bitmap_setpixel_rgba(img, i, j, clr);
538 break;
539 default: // Assuming nsamples==1
540 de_bitmap_setpixel_gray(img, i, j, samp_adj[0]);
545 de_bitmap_write_to_file(img, NULL, 0);
546 retval = 1;
548 done:
549 de_bitmap_destroy(img);
550 return retval;
553 static int do_image(deark *c, lctx *d, struct page_ctx *pg, i64 pos1)
555 int retval = 0;
557 de_dbg(c, "image data at %d", (int)pos1);
558 de_dbg_indent(c, 1);
560 if(pg->maxval<1 || pg->maxval>65535) {
561 de_err(c, "Invalid maxval: %d", (int)pg->maxval);
562 goto done;
564 if(!de_good_image_dimensions(c, pg->width, pg->height)) goto done;
566 switch(pg->fmt) {
567 case FMT_PBM_ASCII:
568 if(!do_image_pbm_ascii(c, d, pg, pos1)) goto done;
569 break;
570 case FMT_PGM_ASCII:
571 case FMT_PPM_ASCII:
572 if(!do_image_pgm_ppm_ascii(c, d, pg, pos1)) goto done;
573 break;
574 case FMT_PBM_BINARY:
575 if(!do_image_pbm_binary(c, d, pg, pos1)) goto done;
576 break;
577 case FMT_PGM_BINARY:
578 case FMT_PPM_BINARY:
579 case FMT_PAM:
580 if(!do_image_pgm_ppm_pam_binary(c, d, pg, pos1)) goto done;
581 break;
582 default:
583 de_err(c, "Unsupported PNM format");
584 goto done;
587 retval = 1;
589 done:
590 de_dbg_indent(c, -1);
591 return retval;
594 static int identify_fmt(deark *c, i64 pos)
596 u8 buf[3];
598 de_read(buf, pos, 3);
599 if(buf[0]!='P') return 0;
601 if(buf[1]=='7' && buf[2]==0x0a)
602 return FMT_PAM;
603 if(buf[1]>='1' && buf[1]<='6')
604 return buf[1] - '0';
605 return 0;
608 static const char *get_fmt_name(int fmt)
610 const char *name="unknown";
611 switch(fmt) {
612 case FMT_PBM_ASCII: name="PBM plain"; break;
613 case FMT_PGM_ASCII: name="PGM plain"; break;
614 case FMT_PPM_ASCII: name="PPM plain"; break;
615 case FMT_PBM_BINARY: name="PBM"; break;
616 case FMT_PGM_BINARY: name="PGM"; break;
617 case FMT_PPM_BINARY: name="PPM"; break;
618 case FMT_PAM: name="PAM"; break;
620 return name;
623 static int do_page(deark *c, lctx *d, int pagenum, i64 pos1)
625 struct page_ctx *pg = NULL;
626 int retval = 0;
628 de_dbg(c, "image at %d", (int)pos1);
629 de_dbg_indent(c, 1);
631 pg = de_malloc(c, sizeof(struct page_ctx));
633 pg->fmt = identify_fmt(c, pos1);
634 d->last_fmt = pg->fmt;
635 pg->fmt_name = get_fmt_name(pg->fmt);
636 if(pg->fmt==0) {
637 de_err(c, "Not PNM/PAM format");
638 goto done;
641 if(pagenum==0) {
642 de_declare_fmt(c, pg->fmt_name);
645 if(pg->fmt==FMT_PAM) {
646 if(!read_pam_header(c, d, pg, pos1)) goto done;
648 else {
649 if(!read_pnm_header(c, d, pg, pos1)) goto done;
652 if(!do_image(c, d, pg, pg->hdr_parse_pos)) {
653 goto done;
656 d->last_bytesused = (pg->hdr_parse_pos + pg->image_data_len) - pos1;
658 retval = 1;
659 done:
660 de_dbg_indent(c, -1);
661 de_free(c, pg);
662 return retval;
665 static void de_run_pnm(deark *c, de_module_params *mparams)
667 lctx *d = NULL;
668 i64 pos;
669 int ret;
670 int pagenum = 0;
672 d = de_malloc(c, sizeof(lctx));
674 pos = 0;
675 while(1) {
676 if(c->infile->len - pos < 8) break;
677 d->last_fmt = 0;
678 d->last_bytesused = 0;
679 ret = do_page(c, d, pagenum, pos);
680 if(!ret) break;
681 if(d->last_bytesused<8) break;
683 if(!fmt_is_binary(d->last_fmt))
685 break; // ASCII formats don't support multiple images
688 pos += d->last_bytesused;
689 pagenum++;
692 de_free(c, d);
695 static int de_identify_pnm(deark *c)
697 int fmt;
699 fmt = identify_fmt(c, 0);
700 if(fmt!=0) return 40;
701 return 0;
704 void de_module_pnm(deark *c, struct deark_module_info *mi)
706 mi->id = "pnm";
707 mi->desc = "Netpbm formats (PNM, PBM, PGM, PPM, PAM)";
708 mi->run_fn = de_run_pnm;
709 mi->identify_fn = de_identify_pnm;