1 // This file is part of Deark.
2 // Copyright (C) 2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // Fax3/Fax4/etc. decompressor
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 struct fax34_huffman_tree
{
13 struct fmtutil_huffman_decoder
*htwb
[2]; // [0]=white, [1]=black
14 struct fmtutil_huffman_decoder
*_2d_codes
;
18 struct de_dfilter_in_params
*dcmpri
;
19 struct de_dfilter_out_params
*dcmpro
;
20 struct de_dfilter_results
*dres
;
22 struct de_fax34_params
*fax34params
;
24 u8 rows_padded_to_next_byte
;
26 i64 image_width
, image_height
;
29 struct de_bitreader bitrd
;
31 u8
*curr_row
; // array[image_width]
32 u8
*prev_row
; // array[image_width]
35 u8
*tmp_row_packed
; // array[rowspan_final]
38 UI f2d_h_codes_remaining
;
40 i64 ypos
; // Row of the next pixel to be decoded
41 i64 a0
; // Next output pixel x-position. Can be -1; the 2-d decoder sometimes
42 // needs -1 and 0 to be distinct "reference pixel positions".
51 #define FAX2D_7ZEROES 0
54 #define FAX2D_EXTENSION 3
55 #define FAX2D_V_BIAS 100
57 static const u8 fax34_2dvals
[11] = {
58 FAX2D_7ZEROES
, // EOFB, etc...
59 FAX2D_EXTENSION
, // Extension...
60 FAX2D_V_BIAS
-3, // VL3
61 FAX2D_V_BIAS
+3, // VR3
62 FAX2D_V_BIAS
-2, // VL2
63 FAX2D_V_BIAS
+2, // VR2
66 FAX2D_V_BIAS
-1, // VL1
67 FAX2D_V_BIAS
+1, // VR1
71 static const u8 fax34_2dcodes
[11] = {
85 static const u8 fax34_2dcodelengths
[11] = {
86 7, 7, 7, 7, 6, 6, 4, 3, 3, 3, 1
89 static void create_fax34_huffman_tree2d(deark
*c
, struct fax34_huffman_tree
*f34ht
)
93 f34ht
->_2d_codes
= fmtutil_huffman_create_decoder(c
, 11, 11);
95 // Note that this tree could be constructed using
96 // fmtutil_huffman_make_canonical_tree(..., FMTUTIL_MCTFLAG_LEFT_ALIGN_BRANCHES),
97 // so we don't actually need the fax34_2dcodes table. But it's not worth it, to
98 // get rid of an 11-byte table.
100 for(i
=0; i
<DE_ARRAYCOUNT(fax34_2dcodes
); i
++) {
101 fmtutil_huffman_add_code(c
, f34ht
->_2d_codes
->bk
, (u64
)fax34_2dcodes
[i
],
102 (UI
)fax34_2dcodelengths
[i
], (fmtutil_huffman_valtype
)fax34_2dvals
[i
]);
106 // To make a well-formed Huffman tree, we need to account for all the codes
107 // beginning with 8 zeroes. Generally, this will be the start of an EOL or sync
108 // code, the remainder of which will be handled with special logic.
109 #define FAX1D_8ZEROES (-1)
111 // Same for both white & black codes. 0 <= i <= 104
112 static fmtutil_huffman_valtype
getfax34val(size_t i
)
115 return (fmtutil_huffman_valtype
)i
;
118 return (fmtutil_huffman_valtype
)((i
-63)*64);
120 return FAX1D_8ZEROES
; // i==104, presumably
123 // Some codes in the next two tables are up to 13 bits in size, but bits before the
124 // last 8 are always 0, so we can use an 8-bit integer type.
125 static const u8 fax34whitecodes
[105] = {
126 0x35,0x7,0x7,0x8,0xb,0xc,0xe,0xf,0x13,0x14,0x7,0x8,0x8,0x3,0x34,0x35,0x2a,0x2b,0x27,
127 0xc,0x8,0x17,0x3,0x4,0x28,0x2b,0x13,0x24,0x18,0x2,0x3,0x1a,0x1b,0x12,0x13,0x14,0x15,
128 0x16,0x17,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x4,0x5,0xa,0xb,0x52,0x53,0x54,0x55,0x24,0x25,
129 0x58,0x59,0x5a,0x5b,0x4a,0x4b,0x32,0x33,0x34,0x1b,0x12,0x17,0x37,0x36,0x37,0x64,0x65,
130 0x68,0x67,0xcc,0xcd,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0x98,0x99,0x9a,
131 0x18,0x9b,0x8,0xc,0xd,0x12,0x13,0x14,0x15,0x16,0x17,0x1c,0x1d,0x1e,0x1f,0
133 static const u8 fax34blackcodes
[105] = {
134 0x37,0x2,0x3,0x2,0x3,0x3,0x2,0x3,0x5,0x4,0x4,0x5,0x7,0x4,0x7,0x18,0x17,0x18,0x8,0x67,
135 0x68,0x6c,0x37,0x28,0x17,0x18,0xca,0xcb,0xcc,0xcd,0x68,0x69,0x6a,0x6b,0xd2,0xd3,0xd4,
136 0xd5,0xd6,0xd7,0x6c,0x6d,0xda,0xdb,0x54,0x55,0x56,0x57,0x64,0x65,0x52,0x53,0x24,0x37,
137 0x38,0x27,0x28,0x58,0x59,0x2b,0x2c,0x5a,0x66,0x67,0xf,0xc8,0xc9,0x5b,0x33,0x34,0x35,
138 0x6c,0x6d,0x4a,0x4b,0x4c,0x4d,0x72,0x73,0x74,0x75,0x76,0x77,0x52,0x53,0x54,0x55,0x5a,
139 0x5b,0x64,0x65,0x8,0xc,0xd,0x12,0x13,0x14,0x15,0x16,0x17,0x1c,0x1d,0x1e,0x1f,0
142 // High 4 bits is the length of the white code.
143 // Low 4 bits is the length of the black code.
144 static const u8 fax34codelengths
[105] = {
145 0x8a,0x63,0x42,0x42,0x43,0x44,0x44,0x45,0x56,0x56,0x57,0x57,0x67,0x68,0x68,0x69,
146 0x6a,0x6a,0x7a,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b,0x7b,0x7c,0x7c,0x7c,0x8c,0x8c,0x8c,
147 0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,
148 0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,0x8c,
149 0x5a,0x5c,0x6c,0x7c,0x8c,0x8c,0x8c,0x8d,0x8d,0x8d,0x9d,0x9d,0x9d,0x9d,0x9d,0x9d,
150 0x9d,0x9d,0x9d,0x9d,0x9d,0x9d,0x9d,0x9d,0x9d,0x6d,0x9d,0xbb,0xbb,0xbb,0xcc,0xcc,
151 0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0xcc,0x88
154 static UI
getfax34codelength(UI isblack
, size_t i
)
158 return (UI
)(fax34codelengths
[i
] & 0x0f);
159 return (UI
)(fax34codelengths
[i
] >> 4);
164 static struct fax34_huffman_tree
*create_fax34_huffman_tree(deark
*c
, int need_2d
)
166 struct fax34_huffman_tree
*f34ht
;
168 static const size_t num_white_codes
= DE_ARRAYCOUNT(fax34whitecodes
);
169 static const size_t num_black_codes
= DE_ARRAYCOUNT(fax34blackcodes
);
171 f34ht
= de_malloc(c
, sizeof(struct fax34_huffman_tree
));
172 f34ht
->htwb
[0] = fmtutil_huffman_create_decoder(c
, (i64
)num_white_codes
+10, 0);
173 f34ht
->htwb
[1] = fmtutil_huffman_create_decoder(c
, (i64
)num_black_codes
+10, 0);
176 create_fax34_huffman_tree2d(c
, f34ht
);
179 for(i
=0; i
<num_white_codes
; i
++) {
180 fmtutil_huffman_add_code(c
, f34ht
->htwb
[0]->bk
, (u64
)fax34whitecodes
[i
],
181 getfax34codelength(0, i
), getfax34val(i
));
183 for(i
=0; i
<num_black_codes
; i
++) {
184 fmtutil_huffman_add_code(c
, f34ht
->htwb
[1]->bk
, (u64
)fax34blackcodes
[i
],
185 getfax34codelength(1, i
), getfax34val(i
));
191 static void destroy_fax34_huffman_tree(deark
*c
, struct fax34_huffman_tree
*f34ht
)
194 fmtutil_huffman_destroy_decoder(c
, f34ht
->htwb
[0]);
195 fmtutil_huffman_destroy_decoder(c
, f34ht
->htwb
[1]);
196 if(f34ht
->_2d_codes
) fmtutil_huffman_destroy_decoder(c
, f34ht
->_2d_codes
);
200 static void fax34_on_eol(deark
*c
, struct fax_ctx
*fc
, int is_real
)
204 de_dbg3(c
, "%sEOL", is_real
?"":"implicit ");
206 if(fc
->ypos
>= fc
->image_height
) goto done
;
208 // [Pack curr_row into bits (tmp_row_packed)]
209 de_zeromem(fc
->tmp_row_packed
, (size_t)fc
->rowspan_final
);
210 for(i
=0; i
<fc
->image_width
; i
++) {
211 if(fc
->curr_row
[i
]) {
212 fc
->tmp_row_packed
[i
/8] |= 1U<<(7-i
%8);
216 // [Write tmp_row_packed to fc->dcmpro]
217 dbuf_write(fc
->dcmpro
->f
, fc
->tmp_row_packed
, fc
->rowspan_final
);
218 fc
->nbytes_written
+= fc
->rowspan_final
;
221 de_memcpy(fc
->prev_row
, fc
->curr_row
, (size_t)fc
->image_width
);
224 // initialize curr_row
225 de_zeromem(fc
->curr_row
, (size_t)fc
->image_width
);
231 fc
->pending_run_len
= 0;
232 fc
->f2d_h_codes_remaining
= 0;
233 fc
->have_read_tag_bit
= 0;
236 // Record run_len pixels as fc0->a0_color, updating fc->a0.
237 // respect_negative_a0==0: If fc->a0 == -1, sets it to 0 first.
238 // respect_negative_a0==1: If fc->a0 == -1, only sets rec_len-1 pixels.
239 static void fax34_record_run(deark
*c
, struct fax_ctx
*fc
, i64 run_len
,
240 int respect_negative_a0
)
243 u8 color
= fc
->a0_color
;
245 de_dbg3(c
, "run c=%u len=%d", (UI
)color
, (int)run_len
);
248 if(respect_negative_a0
) {
254 if(color
==0) { // Pixels are initialized to 0, don't need to set them.
258 for(i
=0; (i
<run_len
) && (fc
->a0
< fc
->image_width
); i
++) {
259 fc
->curr_row
[fc
->a0
++] = color
;
263 if(fc
->a0
> fc
->image_width
) {
264 fc
->a0
= fc
->image_width
;
268 static void init_fax34_bitreader(deark
*c
, struct fax_ctx
*fc
)
270 de_zeromem(&fc
->bitrd
, sizeof(struct de_bitreader
));
271 fc
->bitrd
.f
= fc
->dcmpri
->f
;
272 fc
->bitrd
.curpos
= fc
->dcmpri
->pos
;
273 fc
->bitrd
.endpos
= fc
->dcmpri
->pos
+ fc
->dcmpri
->len
;
274 fc
->bitrd
.bbll
.is_lsb
= fc
->fax34params
->is_lsb
;
277 // Read up to and including the next '1' bit
278 static int fax34_finish_sync(deark
*c
, struct fax_ctx
*fc
, i64 max_bits_to_search
,
281 i64 nbits_searched
= 0;
287 if(nbits_searched
>= max_bits_to_search
) {
291 n
= de_bitreader_getbits(&fc
->bitrd
, 1);
292 if(fc
->bitrd
.eof_flag
) goto done
;
300 if(pnbits_read
) *pnbits_read
= nbits_searched
;
304 // Read up to and including run of 8 or more '0' bits, followed by a '1' bit.
305 static int fax34_full_sync(deark
*c
, struct fax_ctx
*fc
, i64 max_bits_to_search
)
307 i64 nbits_searched
= 0;
314 if(nbits_searched
>= max_bits_to_search
) {
317 n
= de_bitreader_getbits(&fc
->bitrd
, 1);
318 if(fc
->bitrd
.eof_flag
) goto done
;
331 retval
= fax34_finish_sync(c
, fc
, max_bits_to_search
-nbits_searched
, NULL
);
337 // Sets fc->b1 appropriately, according to fc->a0 and fc->prev_row and
339 static void find_b1(struct fax_ctx
*fc
)
343 for(i
=fc
->a0
+1; i
<fc
->image_width
; i
++) {
346 // first prev_row pixel to the right of a0, of opposite color to a0_color
347 if(fc
->prev_row
[i
]==fc
->a0_color
) continue;
349 // Looking for a "changing element". I.e. if both it and the pixel to the left
350 // exist, they must have different colors.
352 // Leftmost pixel is "changing" if it is black
353 if(fc
->prev_row
[i
]==0) continue;
356 if(fc
->prev_row
[i
-1] == fc
->prev_row
[i
]) continue;
362 fc
->b1
= fc
->image_width
;
365 static void find_b1_and_b2(struct fax_ctx
*fc
)
371 for(i
=fc
->b1
+1; i
<fc
->image_width
; i
++) {
372 // Looking for a "changing element", i.e. one having a different color from
373 // the pixel to its left.
374 if(fc
->prev_row
[i
-1] == fc
->prev_row
[i
]) continue;
378 fc
->b2
= fc
->image_width
;
381 static void do_decompress_fax34(deark
*c
, struct fax_ctx
*fc
,
382 struct fax34_huffman_tree
*f34ht
)
385 static const char errmsg_UNEXPECTEDEOD
[] = "Unexpected end of compressed data";
386 static const char errmsg_HUFFDECODEERR
[] = "Huffman decode error";
387 static const char errmsg_NOEOL
[] = "Failed to find EOL mark";
388 static const char errmsg_UNSUPPEXT
[] = "Decoding error or unsupported Fax extension";
391 init_fax34_bitreader(c
, fc
);
393 if(fc
->has_eol_codes
) {
394 if(!fax34_full_sync(c
, fc
, 1024)) {
395 if(fc
->fax34params
->tiff_cmpr_meth
==3 && fc
->dcmpri
->len
>0) {
396 de_dbg(c
, "[no sync mark found, trying to compensate]");
397 fc
->has_eol_codes
= 0;
398 fc
->rows_padded_to_next_byte
= 0;
399 init_fax34_bitreader(c
, fc
);
402 de_strlcpy(errmsg
, "Failed to find sync mark", sizeof(errmsg
));
408 fc
->pending_run_len
= 0;
409 fc
->f2d_h_codes_remaining
= 0;
413 fc
->have_read_tag_bit
= 0;
418 fmtutil_huffman_valtype val
= 0;
420 if(fc
->ypos
>= fc
->image_height
||
421 ((fc
->ypos
== fc
->image_height
-1) && (fc
->a0
>= fc
->image_width
)))
423 goto done
; // Normal completion
425 if(fc
->dcmpro
->len_known
&& fc
->nbytes_written
>fc
->dcmpro
->expected_len
) {
426 goto done
; // Sufficient output
429 if(fc
->bitrd
.eof_flag
) {
430 de_strlcpy(errmsg
, errmsg_UNEXPECTEDEOD
, sizeof(errmsg
));
434 if(!fc
->has_eol_codes
&& (fc
->a0
>= fc
->image_width
) && fc
->f2d_h_codes_remaining
==0) {
435 if(fc
->rows_padded_to_next_byte
) {
436 de_bitbuf_lowlevel_empty(&fc
->bitrd
.bbll
);
438 fax34_on_eol(c
, fc
, 0);
441 if(fc
->is_2d
&& fc
->fax34params
->tiff_cmpr_meth
==3 && !fc
->have_read_tag_bit
) {
442 fc
->tag_bit
= (u8
)de_bitreader_getbits(&fc
->bitrd
, 1);
443 fc
->have_read_tag_bit
= 1;
448 if(fc
->f2d_h_codes_remaining
== 0) {
449 if(fc
->fax34params
->tiff_cmpr_meth
==3) {
461 ret
= fmtutil_huffman_read_next_value(f34ht
->_2d_codes
->bk
, &fc
->bitrd
, &val
, NULL
);
463 if(fc
->bitrd
.eof_flag
) {
464 de_strlcpy(errmsg
, errmsg_UNEXPECTEDEOD
, sizeof(errmsg
));
467 de_strlcpy(errmsg
, errmsg_HUFFDECODEERR
, sizeof(errmsg
));
471 de_dbg3(c
, "val: %d", (int)val
);
473 if(val
>=FAX2D_V_BIAS
-3 && val
<=FAX2D_V_BIAS
+3) { // VL(3), ..., V(0), ..., VR(3)
477 de_dbg3(c
, "at %d b1=%d", (int)fc
->a0
, (int)fc
->b1
);
478 run_len
= fc
->b1
- fc
->a0
+ ((i64
)val
-FAX2D_V_BIAS
);
480 fax34_record_run(c
, fc
, run_len
, 1);
481 fc
->a0_color
= fc
->a0_color
?0:1;
483 else if(val
==FAX2D_P
) {
485 fax34_record_run(c
, fc
, fc
->b2
- fc
->a0
, 1);
487 else if(val
==FAX2D_H
) {
488 fc
->f2d_h_codes_remaining
= 2;
490 else if(val
==FAX2D_7ZEROES
) {
491 if(fc
->has_eol_codes
) {
492 if(!fax34_finish_sync(c
, fc
, 64, NULL
)) {
493 de_strlcpy(errmsg
, errmsg_NOEOL
, sizeof(errmsg
));
496 fax34_on_eol(c
, fc
, 1);
499 // Full EOFB should be 000000000001000000000001
504 else if(val
==FAX2D_EXTENSION
) {
507 extnum
= (UI
)de_bitreader_getbits(&fc
->bitrd
, 3);
508 // TODO?: Support uncompressed mode
509 de_snprintf(errmsg
, sizeof(errmsg
), "%s (%u)", errmsg_UNSUPPEXT
, extnum
);
513 goto done
; // Should be impossible
517 ret
= fmtutil_huffman_read_next_value(f34ht
->htwb
[(UI
)fc
->a0_color
]->bk
, &fc
->bitrd
, &val
, NULL
);
519 if(fc
->bitrd
.eof_flag
) {
520 de_strlcpy(errmsg
, errmsg_UNEXPECTEDEOD
, sizeof(errmsg
));
523 de_strlcpy(errmsg
, errmsg_HUFFDECODEERR
, sizeof(errmsg
));
528 if(val
==FAX1D_8ZEROES
) {
529 if(!fc
->has_eol_codes
|| fc
->f2d_h_codes_remaining
!=0) {
530 de_strlcpy(errmsg
, errmsg_HUFFDECODEERR
, sizeof(errmsg
));
535 if(!fax34_finish_sync(c
, fc
, 64, &nbits_read
)) {
536 de_strlcpy(errmsg
, errmsg_NOEOL
, sizeof(errmsg
));
540 de_strlcpy(errmsg
, errmsg_UNSUPPEXT
, sizeof(errmsg
));
541 // TODO: Attempt error recovery?
544 // TODO: Check for premature EOL?
545 fax34_on_eol(c
, fc
, 1);
548 fc
->pending_run_len
+= (i64
)val
;
549 fax34_record_run(c
, fc
, fc
->pending_run_len
, 0);
550 fc
->pending_run_len
= 0;
551 fc
->a0_color
= fc
->a0_color
?0:1;
552 if(fc
->f2d_h_codes_remaining
>0) {
553 fc
->f2d_h_codes_remaining
--;
556 else { // make-up code
557 fc
->pending_run_len
+= (i64
)val
;
564 fax34_on_eol(c
, fc
, 0); // Make sure we emit the last row
569 de_warn(c
, "[%s] Failed to decode entire strip: %s", fc
->modname
, errmsg
);
572 de_dfilter_set_errorf(c
, fc
->dres
, fc
->modname
, "%s", errmsg
);
577 // Note - This always decodes white pixels to bit value 0, and black to 1.
578 void fmtutil_fax34_codectype1(deark
*c
, struct de_dfilter_in_params
*dcmpri
,
579 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
580 void *codec_private_params
)
582 struct fax_ctx
*fc
= NULL
;
583 struct fax34_huffman_tree
*f34ht
= NULL
;
585 fc
= de_malloc(c
, sizeof(struct fax_ctx
));
586 fc
->modname
= "fax_decode";
587 fc
->fax34params
= (struct de_fax34_params
*)codec_private_params
;
591 fc
->image_width
= fc
->fax34params
->image_width
;
592 fc
->image_height
= fc
->fax34params
->image_height
;
594 if((fc
->fax34params
->tiff_cmpr_meth
==3 && (fc
->fax34params
->t4options
& 0x1)) ||
595 (fc
->fax34params
->tiff_cmpr_meth
==4))
600 if(fc
->fax34params
->tiff_cmpr_meth
==2) {
601 fc
->has_eol_codes
= 0;
602 fc
->rows_padded_to_next_byte
= 1;
604 else if(fc
->fax34params
->tiff_cmpr_meth
==3) {
605 fc
->has_eol_codes
= 1;
608 if(fc
->image_width
< 1 ||
609 fc
->image_width
> c
->max_image_dimension
)
614 fc
->rowspan_final
= (fc
->image_width
+7)/8;
615 if(fc
->rowspan_final
< fc
->fax34params
->out_rowspan
) {
616 fc
->rowspan_final
= fc
->fax34params
->out_rowspan
;
619 fc
->curr_row
= de_malloc(c
, fc
->image_width
);
620 fc
->prev_row
= de_malloc(c
, fc
->image_width
);
621 fc
->tmp_row_packed
= de_malloc(c
, fc
->rowspan_final
);
623 f34ht
= create_fax34_huffman_tree(c
, (int)fc
->is_2d
);
624 do_decompress_fax34(c
, fc
, f34ht
);
627 destroy_fax34_huffman_tree(c
, f34ht
);
629 de_free(c
, fc
->curr_row
);
630 de_free(c
, fc
->prev_row
);
631 de_free(c
, fc
->tmp_row_packed
);