1 // This file is part of Deark.
2 // Copyright (C) 2019-2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // This file is in the "foreign" subdirectory because I want to make it usable
6 // independently of Deark. I haven't decided if it will be released by itself.
9 #define DELZW_UINT8 unsigned char
12 #define DELZW_UINT16 uint16_t
15 #define DELZW_UINT32 uint32_t
18 #define DELZW_OFF_T off_t
21 #define DELZW_MEMCPY memcpy
24 #define DELZW_STRLCPY strlcpy
26 #ifndef DELZW_VSNPRINTF
27 #define DELZW_VSNPRINTF vsnprintf
30 #define DELZW_CALLOC(u, nmemb, size, ty) (ty)calloc((nmemb), (size))
33 #define DELZW_FREE(u, ptr) free(ptr)
35 #ifndef DELZW_GNUC_ATTRIBUTE
36 #define DELZW_GNUC_ATTRIBUTE(x)
39 #define DELZW_CODE DELZW_UINT32 // int type used in most cases
40 #define DELZW_CODE_MINRANGE DELZW_UINT16 // int type used for parents in table entries
41 #define DELZW_MINMINCODESIZE 3
42 #define DELZW_MAXMAXCODESIZE 16
43 #define DELZW_NBITS_TO_MAXCODE(n) ((DELZW_CODE)((1<<(n))-1))
44 #define DELZW_NBITS_TO_NCODES(n) ((DELZW_CODE)(1<<(n)))
46 struct delzwctx_struct
;
47 typedef struct delzwctx_struct delzwctx
;
49 struct delzw_tableentry
{
50 DELZW_CODE_MINRANGE parent
;
52 #define DELZW_CODETYPE_INVALID 0x00
53 #define DELZW_CODETYPE_STATIC 0x01
54 #define DELZW_CODETYPE_DYN_UNUSED 0x02
55 #define DELZW_CODETYPE_DYN_USED 0x03
56 #define DELZW_CODETYPE_CLEAR 0x08
57 #define DELZW_CODETYPE_STOP 0x09
58 #define DELZW_CODETYPE_SPECIAL 0x0f
63 struct delzw_tableentry2
{
64 #define DELZW_NEXTPTR_NONE 0xffff // Note - This table is only used with 12-bit codes
65 DELZW_CODE_MINRANGE next
;
68 // Normally, the client must consume all the bytes in 'buf', and return 'size'.
69 // The other options are:
70 // - Set *outflags to 1, and return a number <='size'. This indicates that
71 // that decompression can stop; the client has all the data it needs.
72 // - Return a number !='size'. This is interpreted as a write error, and
73 // decompression will stop.
74 typedef size_t (*delzw_cb_write_type
)(delzwctx
*dc
, const DELZW_UINT8
*buf
, size_t size
,
75 unsigned int *outflags
);
77 typedef void (*delzw_cb_debugmsg_type
)(delzwctx
*dc
, int level
, const char *msg
);
78 typedef void (*delzw_cb_generic_type
)(delzwctx
*dc
);
80 struct delzwctx_struct
{
81 // Fields the user can or must set:
84 delzw_cb_write_type cb_write
;
85 delzw_cb_debugmsg_type cb_debugmsg
;
86 delzw_cb_generic_type cb_after_header_parsed
;
88 #define DELZW_BASEFMT_UNIXCOMPRESS 1
89 #define DELZW_BASEFMT_GIF 2
90 #define DELZW_BASEFMT_ZIPSHRINK 3
91 #define DELZW_BASEFMT_ZOOLZD 4
92 #define DELZW_BASEFMT_TIFFOLD 5
93 #define DELZW_BASEFMT_TIFF 6
94 #define DELZW_BASEFMT_ARC5 7
97 #define DELZW_HEADERTYPE_NONE 0
98 #define DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE 1
99 #define DELZW_HEADERTYPE_ARC1BYTE 2
102 unsigned int gif_root_codesize
;
104 int stop_on_invalid_code
;
106 int output_len_known
;
107 DELZW_OFF_T output_expected_len
;
109 // Fields that may be set by the user, or derived from other fields:
110 int auto_inc_codesize
;
111 int unixcompress_has_clear_code
;
112 int arc5_has_stop_code
;
113 unsigned int min_codesize
;
114 unsigned int max_codesize
;
119 int early_codesize_inc
;
120 int has_partial_clearing
;
124 DELZW_UINT8 header_unixcompress_mode
;
125 DELZW_UINT8 header_unixcompress_max_codesize
;
126 DELZW_UINT8 header_unixcompress_block_mode
; // = 1 or 0
129 #define DELZW_ERRCODE_OK 0
130 #define DELZW_ERRCODE_GENERIC_ERROR 1
131 #define DELZW_ERRCODE_BAD_CDATA 2
132 #define DELZW_ERRCODE_MALLOC_FAILED 3
133 #define DELZW_ERRCODE_WRITE_FAILED 7
134 #define DELZW_ERRCODE_INSUFFICIENT_CDATA 8
135 #define DELZW_ERRCODE_UNSUPPORTED_OPTION 9
136 #define DELZW_ERRCODE_INTERNAL_ERROR 10
138 int stop_writing_flag
;
140 #define DELZW_STATE_INIT 0
141 #define DELZW_STATE_READING_HEADER 1
142 #define DELZW_STATE_READING_CODES 2
143 #define DELZW_STATE_FINISHED 3
145 DELZW_OFF_T total_nbytes_processed
;
146 DELZW_OFF_T uncmpr_nbytes_written
; // (Not including those in outbuf)
147 DELZW_OFF_T uncmpr_nbytes_decoded
; // (Including those in outbuf)
149 DELZW_OFF_T ncodes_in_this_bitgroup
;
150 DELZW_OFF_T nbytes_left_to_skip
;
152 unsigned int curr_codesize
;
156 DELZW_CODE last_code_added
;
157 DELZW_UINT8 last_value
;
158 DELZW_CODE highest_code_ever_used
;
159 DELZW_CODE free_code_search_start
;
160 DELZW_CODE first_dynamic_code
;
161 int escaped_code_is_pending
;
163 unsigned int bitreader_buf
;
164 unsigned int bitreader_nbits_in_buf
;
166 size_t outbuf_nbytes_used
;
168 DELZW_CODE ct_capacity
;
169 DELZW_CODE ct_code_count
; // Note - Not always maintained if not needed
170 struct delzw_tableentry
*ct
;
171 struct delzw_tableentry2
*ct2
;
173 DELZW_UINT8 header_buf
[3];
175 size_t valbuf_capacity
;
180 #define DELZW_OUTBUF_SIZE 1024
181 DELZW_UINT8 outbuf
[DELZW_OUTBUF_SIZE
];
184 static void delzw_debugmsg(delzwctx
*dc
, int level
, const char *fmt
, ...)
185 DELZW_GNUC_ATTRIBUTE ((format (printf
, 3, 4)));
187 static void delzw_debugmsg(delzwctx
*dc
, int level
, const char *fmt
, ...)
192 if(!dc
->cb_debugmsg
) return;
193 if(level
>dc
->debug_level
) return;
196 DELZW_VSNPRINTF(msg
, sizeof(msg
), fmt
, ap
);
198 dc
->cb_debugmsg(dc
, level
, msg
);
201 static void delzw_dumptable(delzwctx
*dc
)
204 for(k
=0; k
<dc
->highest_code_ever_used
; k
++) {
205 delzw_debugmsg(dc
, 4, "[%d] ty=%d p=%d v=%d f=%d",
206 (int)k
, (int)dc
->ct
[k
].codetype
, (int)dc
->ct
[k
].parent
,
207 (int)dc
->ct
[k
].value
, (int)dc
->ct
[k
].flags
);
211 static void delzw_stop(delzwctx
*dc
, const char *reason
)
213 if(dc
->state
== DELZW_STATE_FINISHED
) return;
214 delzw_debugmsg(dc
, 2, "stopping due to %s", reason
);
215 dc
->state
= DELZW_STATE_FINISHED
;
218 static void delzw_set_errorf(delzwctx
*dc
, int errcode
, const char *fmt
, ...)
219 DELZW_GNUC_ATTRIBUTE ((format (printf
, 3, 4)));
221 static void delzw_set_errorf(delzwctx
*dc
, int errcode
, const char *fmt
, ...)
225 delzw_stop(dc
, "error");
226 if(dc
->errcode
) return;
227 dc
->errcode
= errcode
;
229 DELZW_VSNPRINTF(dc
->errmsg
, sizeof(dc
->errmsg
), fmt
, ap
);
233 static void delzw_set_error(delzwctx
*dc
, int errcode
, const char *msg
)
235 delzw_stop(dc
, "error");
236 if(dc
->errcode
) return;
237 dc
->errcode
= errcode
;
238 if(!msg
|| !msg
[0]) {
239 msg
= "LZW decompression error";
241 DELZW_STRLCPY(dc
->errmsg
, msg
, sizeof(dc
->errmsg
));
244 static delzwctx
*delzw_create(void *userdata
)
248 dc
= DELZW_CALLOC(userdata
, 1, sizeof(delzwctx
), delzwctx
*);
249 dc
->userdata
= userdata
;
253 static void delzw_destroy(delzwctx
*dc
)
256 DELZW_FREE(dc
->userdata
, dc
->ct
);
257 if(dc
->ct2
) DELZW_FREE(dc
->userdata
, dc
->ct2
);
258 DELZW_FREE(dc
->userdata
, dc
->valbuf
);
259 DELZW_FREE(dc
->userdata
, dc
);
262 static void delzw_write_unbuffered(delzwctx
*dc
, const DELZW_UINT8
*buf
, size_t n1
)
264 DELZW_OFF_T nbytes_written
;
265 unsigned int outflags
= 0;
266 DELZW_OFF_T n
= (DELZW_OFF_T
)n1
;
268 if(dc
->stop_writing_flag
) return;
269 if(dc
->output_len_known
) {
270 if(dc
->uncmpr_nbytes_written
+ n
> dc
->output_expected_len
) {
271 n
= dc
->output_expected_len
- dc
->uncmpr_nbytes_written
;
275 nbytes_written
= (DELZW_OFF_T
)dc
->cb_write(dc
, buf
, (size_t)n
, &outflags
);
276 if((outflags
& 0x1) && (nbytes_written
<=n
)) {
277 dc
->stop_writing_flag
= 1;
278 delzw_stop(dc
, "client request");
280 else if(nbytes_written
!= n
) {
281 delzw_set_error(dc
, DELZW_ERRCODE_WRITE_FAILED
, "Write failed");
284 dc
->uncmpr_nbytes_written
+= nbytes_written
;
287 static void delzw_flush(delzwctx
*dc
)
289 if(dc
->outbuf_nbytes_used
<1) return;
290 delzw_write_unbuffered(dc
, dc
->outbuf
, dc
->outbuf_nbytes_used
);
291 dc
->outbuf_nbytes_used
= 0;
294 static void delzw_write(delzwctx
*dc
, const DELZW_UINT8
*buf
, size_t n
)
296 if(dc
->errcode
) return;
298 // If there's enough room in outbuf, copy it there, and we're done.
299 if(dc
->outbuf_nbytes_used
+ n
<= DELZW_OUTBUF_SIZE
) {
300 DELZW_MEMCPY(&dc
->outbuf
[dc
->outbuf_nbytes_used
], buf
, n
);
301 dc
->outbuf_nbytes_used
+= n
;
305 // Flush anything currently in outbuf.
307 if(dc
->stop_writing_flag
) return;
309 // If too big for outbuf, write without buffering.
310 if(n
> DELZW_OUTBUF_SIZE
) {
311 delzw_write_unbuffered(dc
, buf
, n
);
315 // Otherwise copy to outbuf
316 DELZW_MEMCPY(dc
->outbuf
, buf
, n
);
317 dc
->outbuf_nbytes_used
+= n
;
320 static void delzw_process_unixcompress_3byteheader(delzwctx
*dc
)
322 if(dc
->header_buf
[0]!=0x1f || dc
->header_buf
[1]!=0x9d) {
323 delzw_set_error(dc
, DELZW_ERRCODE_BAD_CDATA
, "Not in compress format");
327 dc
->header_unixcompress_mode
= dc
->header_buf
[2];
328 dc
->header_unixcompress_max_codesize
= (dc
->header_unixcompress_mode
& 0x1f);
329 dc
->header_unixcompress_block_mode
= (dc
->header_unixcompress_mode
& 0x80) ? 1 : 0;
330 delzw_debugmsg(dc
, 2, "LZW mode=0x%02x, maxbits=%u, blockmode=%u",
331 (unsigned int)dc
->header_unixcompress_mode
,
332 (unsigned int)dc
->header_unixcompress_max_codesize
,
333 (unsigned int)dc
->header_unixcompress_block_mode
);
335 dc
->max_codesize
= (unsigned int)dc
->header_unixcompress_max_codesize
;
336 dc
->unixcompress_has_clear_code
= (int)dc
->header_unixcompress_block_mode
;
339 static void delzw_process_arc_1byteheader(delzwctx
*dc
)
341 dc
->header_unixcompress_max_codesize
= (dc
->header_buf
[0] & 0x1f);
342 dc
->max_codesize
= (unsigned int)dc
->header_unixcompress_max_codesize
;
343 delzw_debugmsg(dc
, 2, "max code size: %u", dc
->max_codesize
);
344 dc
->unixcompress_has_clear_code
= 1;
347 static void delzw_clear_bitbuf(delzwctx
*dc
)
349 dc
->bitreader_nbits_in_buf
= 0;
350 dc
->bitreader_buf
= 0;
353 static void delzw_add_byte_to_bitbuf(delzwctx
*dc
, DELZW_UINT8 b
)
355 // Add a byte's worth of bits to the pending code
357 dc
->bitreader_buf
|= ((unsigned int)b
)<<dc
->bitreader_nbits_in_buf
;
360 dc
->bitreader_buf
= (dc
->bitreader_buf
<<8) | b
;
362 dc
->bitreader_nbits_in_buf
+= 8;
365 static DELZW_CODE
delzw_get_code(delzwctx
*dc
, unsigned int nbits
)
370 n
= dc
->bitreader_buf
& ((1U<<nbits
)-1U);
371 dc
->bitreader_buf
>>= nbits
;
372 dc
->bitreader_nbits_in_buf
-= nbits
;
375 dc
->bitreader_nbits_in_buf
-= nbits
;
376 n
= (dc
->bitreader_buf
>> dc
->bitreader_nbits_in_buf
) & ((1U<<nbits
)-1U);
378 return (DELZW_CODE
)n
;
381 // Is this a valid code with a value (a static, or in-use dynamic code)?
382 static int delzw_code_is_in_table(delzwctx
*dc
, DELZW_CODE code
)
384 DELZW_UINT8 codetype
= dc
->ct
[code
].codetype
;
386 if(codetype
==DELZW_CODETYPE_STATIC
) return 1;
387 if(codetype
==DELZW_CODETYPE_DYN_USED
) return 1;
391 // Decode an LZW code to one or more values, and write the values.
392 // Updates ctx->last_value.
393 static void delzw_emit_code(delzwctx
*dc
, DELZW_CODE code1
)
395 DELZW_CODE code
= code1
;
396 size_t valbuf_pos
= dc
->valbuf_capacity
; // = First entry that's used
399 if(code
>= dc
->ct_capacity
) {
400 delzw_set_errorf(dc
, DELZW_ERRCODE_GENERIC_ERROR
, "Bad LZW code (%d)", (int)code
);
405 // We must be in an infinite loop (probably an internal error).
406 delzw_set_error(dc
, DELZW_ERRCODE_GENERIC_ERROR
, NULL
);
407 if(dc
->debug_level
>=4) {
413 // valbuf is a stack, essentially. We fill it in the reverse direction,
414 // to make it simpler to write the final byte sequence.
417 if(dc
->ct
[code
].codetype
==DELZW_CODETYPE_DYN_UNUSED
) {
418 dc
->valbuf
[valbuf_pos
] = dc
->last_value
;
423 dc
->valbuf
[valbuf_pos
] = dc
->ct
[code
].value
;
425 if(dc
->ct
[code
].codetype
==DELZW_CODETYPE_STATIC
) {
426 dc
->last_value
= dc
->ct
[code
].value
;
430 // Traverse the tree, back toward the root codes.
431 code
= dc
->ct
[code
].parent
;
434 // Write out the collected values.
435 delzw_write(dc
, &dc
->valbuf
[valbuf_pos
], dc
->valbuf_capacity
- valbuf_pos
);
436 dc
->uncmpr_nbytes_decoded
+= (DELZW_OFF_T
)(dc
->valbuf_capacity
- valbuf_pos
);
439 static void delzw_find_first_free_entry(delzwctx
*dc
, DELZW_CODE
*pentry
)
443 for(k
=dc
->free_code_search_start
; k
<dc
->ct_capacity
; k
++) {
444 if(dc
->ct
[k
].codetype
==DELZW_CODETYPE_DYN_UNUSED
) {
445 *pentry
= (DELZW_CODE
)k
;
450 *pentry
= (DELZW_CODE
)(dc
->ct_capacity
-1);
451 delzw_set_error(dc
, DELZW_ERRCODE_BAD_CDATA
, "LZW table unexpectedly full");
454 static void delzw_unixcompress_end_bitgroup(delzwctx
*dc
)
456 DELZW_OFF_T ncodes_alloc
;
457 DELZW_OFF_T nbits_left_to_skip
;
459 // The Unix 'compress' format has a quirk.
460 // The codes are written 8 at a time, with all 8 having the same codesize.
461 // The codesize cannot change in the middle of a block of 8. If it needs to,
462 // the remainder of the block is unused padding, which we must skip over.
463 // This is relevant when we encounter a clear code. It is also potentially
464 // relevant when the codesize is auto-incremented. But except possibly for
465 // the first group of codes (the 9-bit codes), the natural number of codes of
466 // a given size is always (?) a power of 2, and a multiple of 8. So, usually
467 // no padding is present at the auto-increment position.
468 // As it happens, when code 256 is used as the clear code, it reduces the
469 // natural number of 9-bit codes from 257 to 256, and since 256 is a multiple
470 // of 8, still no padding is present.
471 // But "v2" format does not use a clear code, and AFAICT it does have padding
472 // after the 9-bit codes.
474 ncodes_alloc
= ((dc
->ncodes_in_this_bitgroup
+ 7)/8)*8;
475 nbits_left_to_skip
= (ncodes_alloc
- dc
->ncodes_in_this_bitgroup
) * dc
->curr_codesize
;
478 // Each "bitgroup" has a whole number of bytes.
479 // When we get here, we've just read a code, so the bitreader's buffer can have no more than
481 // All of the bits in it will be part of the "bits to skip". After accounting for them, we'll
482 // be left with a whole number of *bytes* left to skip, which always start on a byte boundary
483 // in the input stream.
484 // So, whenever the main input loop needs to skip anything, it will be a whole byte, and the
485 // bitreader's buffer will be empty. That's good; it makes it easier to deal with this
488 if(nbits_left_to_skip
>0) {
489 delzw_debugmsg(dc
, 2, "padding bits: %d", (int)nbits_left_to_skip
);
492 dc
->ncodes_in_this_bitgroup
= 0;
493 if(dc
->bitreader_nbits_in_buf
>7 || dc
->bitreader_nbits_in_buf
>nbits_left_to_skip
) {
494 delzw_set_error(dc
, DELZW_ERRCODE_INTERNAL_ERROR
, NULL
);
498 nbits_left_to_skip
-= dc
->bitreader_nbits_in_buf
;
499 if(nbits_left_to_skip
%8 != 0) {
500 delzw_set_error(dc
, DELZW_ERRCODE_INTERNAL_ERROR
, NULL
);
504 delzw_clear_bitbuf(dc
);
505 dc
->nbytes_left_to_skip
= nbits_left_to_skip
/8;
508 static void delzw_increase_codesize(delzwctx
*dc
)
510 if(dc
->basefmt
==DELZW_BASEFMT_UNIXCOMPRESS
) {
511 delzw_unixcompress_end_bitgroup(dc
);
514 if(dc
->curr_codesize
<dc
->max_codesize
) {
516 delzw_debugmsg(dc
, 2, "increased code size to %u", dc
->curr_codesize
);
520 static DELZW_CODE
delzw_get_hashed_code(delzwctx
*dc
, DELZW_CODE code
,
527 h
= ((code
+(DELZW_CODE
)value
) | 0x0800) & 0xffff;
528 h
= ((h
*h
) >> 6) % dc
->ct_capacity
;
530 if(dc
->ct
[h
].codetype
==DELZW_CODETYPE_DYN_UNUSED
) {
534 // Collision - First, walk to the end of the duplicates list
536 while(dc
->ct2
[h
].next
!= DELZW_NEXTPTR_NONE
) {
540 if(count
> dc
->ct_capacity
) {
541 delzw_set_error(dc
, DELZW_ERRCODE_GENERIC_ERROR
, NULL
);
548 // Then search for an open slot
555 h
%= dc
->ct_capacity
;
557 if(dc
->ct
[h
].codetype
==DELZW_CODETYPE_DYN_UNUSED
)
561 if(count
> dc
->ct_capacity
) {
562 delzw_set_error(dc
, DELZW_ERRCODE_GENERIC_ERROR
, NULL
);
567 dc
->ct2
[saved_h
].next
= h
;
571 static void delzw_hashed_add_code_to_dict(delzwctx
*dc
, DELZW_CODE code
, DELZW_UINT8 value
)
575 if(dc
->ct_code_count
>= dc
->ct_capacity
) {
579 idx
= delzw_get_hashed_code(dc
, code
, value
);
580 if(dc
->errcode
) return;
582 dc
->ct
[idx
].parent
= (DELZW_CODE_MINRANGE
)dc
->oldcode
;
583 dc
->ct
[idx
].value
= value
;
584 dc
->ct
[idx
].codetype
= DELZW_CODETYPE_DYN_USED
;
586 dc
->last_code_added
= idx
;
589 static void delzw_hashed_add_root_code_to_dict(delzwctx
*dc
, DELZW_UINT8 value
)
593 idx
= delzw_get_hashed_code(dc
, 0xffff, value
);
594 if(dc
->errcode
) return;
596 dc
->ct
[idx
].value
= value
;
597 dc
->ct
[idx
].codetype
= DELZW_CODETYPE_STATIC
;
601 // Add a code to the dictionary.
602 // Sets delzw->last_code_added to the position where it was added.
603 static void delzw_add_to_dict(delzwctx
*dc
, DELZW_CODE parent
, DELZW_UINT8 value
)
608 delzw_hashed_add_code_to_dict(dc
, parent
, value
);
612 if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
) {
613 delzw_find_first_free_entry(dc
, &newpos
);
616 newpos
= dc
->free_code_search_start
;
618 if(dc
->errcode
) return;
619 if(newpos
>= dc
->ct_capacity
) {
623 if(newpos
< dc
->first_dynamic_code
) {
624 delzw_set_error(dc
, DELZW_ERRCODE_GENERIC_ERROR
, NULL
);
628 dc
->ct
[newpos
].parent
= (DELZW_CODE_MINRANGE
)parent
;
629 dc
->ct
[newpos
].value
= value
;
630 dc
->ct
[newpos
].codetype
= DELZW_CODETYPE_DYN_USED
;
632 dc
->last_code_added
= newpos
;
633 dc
->free_code_search_start
= newpos
+1;
634 if(newpos
> dc
->highest_code_ever_used
) {
635 dc
->highest_code_ever_used
= newpos
;
638 if(dc
->auto_inc_codesize
) {
639 if(dc
->early_codesize_inc
) {
640 if(dc
->free_code_search_start
>=DELZW_NBITS_TO_MAXCODE(dc
->curr_codesize
)) {
641 delzw_increase_codesize(dc
);
645 if(dc
->free_code_search_start
>DELZW_NBITS_TO_MAXCODE(dc
->curr_codesize
)) {
646 delzw_increase_codesize(dc
);
652 static void delzw_process_data_code(delzwctx
*dc
, DELZW_CODE code
)
654 if(code
>= dc
->ct_capacity
) {
658 if(!dc
->have_oldcode
) {
659 // Special case for the first code.
660 delzw_emit_code(dc
, code
);
662 dc
->have_oldcode
= 1;
663 dc
->last_value
= dc
->ct
[code
].value
;
667 if(delzw_code_is_in_table(dc
, code
)) {
668 delzw_emit_code(dc
, code
);
669 if(dc
->errcode
) return;
671 // Let k = the first character of the translation of the code.
672 // Add <oldcode>k to the dictionary.
673 delzw_add_to_dict(dc
, dc
->oldcode
, dc
->last_value
);
676 if(code
>dc
->free_code_search_start
&& !dc
->has_partial_clearing
&& !dc
->is_hashed
) {
677 if(dc
->stop_on_invalid_code
) {
678 delzw_debugmsg(dc
, 1, "bad code: %d when max=%d (assuming data stops here)",
679 (int)code
, (int)dc
->free_code_search_start
);
680 delzw_stop(dc
, "bad LZW code");
683 delzw_set_errorf(dc
, DELZW_ERRCODE_BAD_CDATA
, "Bad LZW code (%d when max=%d)",
684 (int)code
, (int)dc
->free_code_search_start
);
688 // Let k = the first char of the translation of oldcode.
689 // Add <oldcode>k to the dictionary.
690 delzw_add_to_dict(dc
, dc
->oldcode
, dc
->last_value
);
691 if(dc
->errcode
) return;
693 // Write <oldcode>k to the output stream.
694 delzw_emit_code(dc
, dc
->last_code_added
);
700 static void delzw_clear_one_dynamic_code(delzwctx
*dc
, DELZW_CODE code
)
702 if(code
<dc
->first_dynamic_code
|| code
>=dc
->ct_capacity
) return;
703 dc
->ct
[code
].codetype
= DELZW_CODETYPE_DYN_UNUSED
;
704 dc
->ct
[code
].parent
= 0;
705 dc
->ct
[code
].value
= 0;
708 static void delzw_clear(delzwctx
*dc
)
712 delzw_debugmsg(dc
, 2, "clear code");
714 if(dc
->basefmt
==DELZW_BASEFMT_UNIXCOMPRESS
) {
715 delzw_unixcompress_end_bitgroup(dc
);
718 for(i
=dc
->first_dynamic_code
; i
<=dc
->highest_code_ever_used
; i
++) {
719 delzw_clear_one_dynamic_code(dc
, i
);
722 dc
->curr_codesize
= dc
->min_codesize
;
723 dc
->free_code_search_start
= dc
->first_dynamic_code
;
724 dc
->have_oldcode
= 0;
726 dc
->last_code_added
= 0;
729 delzw_debugmsg(dc
, 2, "code size: %u", dc
->curr_codesize
);
732 static void delzw_partial_clear(delzwctx
*dc
)
736 delzw_debugmsg(dc
, 2, "partial clear code");
738 for(i
=dc
->first_dynamic_code
; i
<=dc
->highest_code_ever_used
; i
++) {
739 // If this code is in use
740 if(dc
->ct
[i
].codetype
==DELZW_CODETYPE_DYN_USED
) {
741 // and its parent is a dynamic code,
742 // mark its parent as having a child
743 if(dc
->ct
[i
].parent
>=257) {
744 dc
->ct
[dc
->ct
[i
].parent
].flags
= 1;
749 for(i
=dc
->first_dynamic_code
; i
<=dc
->highest_code_ever_used
; i
++) {
750 if(dc
->ct
[i
].flags
==0) {
751 // If this code has no children, clear it
752 delzw_clear_one_dynamic_code(dc
, i
);
755 // Leave all flags clear, for next time
760 dc
->free_code_search_start
= dc
->first_dynamic_code
;
763 static void delzw_process_code(delzwctx
*dc
, DELZW_CODE code
)
765 if(dc
->debug_level
>=3) {
766 delzw_debugmsg(dc
, 3, "code=%d oc=%d lca=%d lv=%d next=%d",
768 (int)dc
->oldcode
, (int)dc
->last_code_added
, (int)dc
->last_value
,
769 (int)dc
->free_code_search_start
);
772 if(dc
->escaped_code_is_pending
) {
773 dc
->escaped_code_is_pending
= 0;
774 if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
) {
775 if(code
==1 && (dc
->curr_codesize
<dc
->max_codesize
)) {
776 delzw_increase_codesize(dc
);
779 delzw_partial_clear(dc
);
782 delzw_set_error(dc
, DELZW_ERRCODE_BAD_CDATA
, NULL
);
788 if(code
>= dc
->ct_capacity
) return;
790 switch(dc
->ct
[code
].codetype
) {
791 case DELZW_CODETYPE_STATIC
:
792 case DELZW_CODETYPE_DYN_UNUSED
:
793 case DELZW_CODETYPE_DYN_USED
:
794 delzw_process_data_code(dc
, code
);
796 case DELZW_CODETYPE_CLEAR
:
799 case DELZW_CODETYPE_STOP
:
800 delzw_stop(dc
, "stop code");
802 case DELZW_CODETYPE_SPECIAL
:
803 if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
&& code
==256) {
804 dc
->escaped_code_is_pending
= 1;
810 static void delzw_on_decompression_start(delzwctx
*dc
)
812 if(dc
->basefmt
!=DELZW_BASEFMT_ZIPSHRINK
&&
813 dc
->basefmt
!=DELZW_BASEFMT_GIF
&&
814 dc
->basefmt
!=DELZW_BASEFMT_UNIXCOMPRESS
&&
815 dc
->basefmt
!=DELZW_BASEFMT_ZOOLZD
&&
816 dc
->basefmt
!=DELZW_BASEFMT_TIFF
&&
817 dc
->basefmt
!=DELZW_BASEFMT_TIFFOLD
&&
818 dc
->basefmt
!=DELZW_BASEFMT_ARC5
)
820 delzw_set_error(dc
, DELZW_ERRCODE_UNSUPPORTED_OPTION
, "Unsupported LZW format");
824 if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
) {
825 dc
->has_partial_clearing
= 1;
827 else if(dc
->basefmt
==DELZW_BASEFMT_ARC5
) {
831 if(dc
->header_type
==DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE
) {
834 else if(dc
->header_type
==DELZW_HEADERTYPE_ARC1BYTE
) {
842 // Process the header, if any.
843 // Set any remaining params needed, and validate params.
844 // This is called upon encountering the first byte after the header.
845 // (If zero bytes of data were compressed, it might never be called.)
846 static void delzw_on_codes_start(delzwctx
*dc
)
850 if(dc
->errcode
) goto done
;
852 if(dc
->header_size
> 0) {
853 delzw_debugmsg(dc
, 2, "processing header");
855 if(dc
->header_type
==DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE
) {
856 delzw_process_unixcompress_3byteheader(dc
);
858 else if(dc
->header_type
==DELZW_HEADERTYPE_ARC1BYTE
) {
859 delzw_process_arc_1byteheader(dc
);
862 if(dc
->cb_after_header_parsed
) {
863 dc
->cb_after_header_parsed(dc
);
867 delzw_debugmsg(dc
, 2, "start of codes");
869 if(dc
->basefmt
==DELZW_BASEFMT_UNIXCOMPRESS
) {
870 dc
->min_codesize
= 9;
872 else if(dc
->basefmt
==DELZW_BASEFMT_GIF
) {
873 dc
->auto_inc_codesize
= 1;
874 dc
->min_codesize
= dc
->gif_root_codesize
+ 1;
875 dc
->max_codesize
= 12;
877 else if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
) {
878 dc
->min_codesize
= 9;
879 dc
->max_codesize
= 13;
881 else if(dc
->basefmt
==DELZW_BASEFMT_ZOOLZD
) {
882 dc
->min_codesize
= 9;
883 if(dc
->max_codesize
==0) {
884 dc
->max_codesize
= 13;
887 else if(dc
->basefmt
==DELZW_BASEFMT_TIFF
) {
889 dc
->early_codesize_inc
= 1;
890 dc
->min_codesize
= 9;
891 if(dc
->max_codesize
==0) {
892 dc
->max_codesize
= 12;
895 else if(dc
->basefmt
==DELZW_BASEFMT_TIFFOLD
) {
896 dc
->min_codesize
= 9;
897 if(dc
->max_codesize
==0) {
898 dc
->max_codesize
= 12;
901 else if(dc
->basefmt
==DELZW_BASEFMT_ARC5
) {
903 dc
->auto_inc_codesize
= 0;
904 if(dc
->max_codesize
==0) {
905 dc
->max_codesize
= 12;
907 dc
->min_codesize
= dc
->max_codesize
;
910 if(dc
->min_codesize
<DELZW_MINMINCODESIZE
|| dc
->min_codesize
>DELZW_MAXMAXCODESIZE
||
911 dc
->max_codesize
<DELZW_MINMINCODESIZE
|| dc
->max_codesize
>DELZW_MAXMAXCODESIZE
||
912 dc
->min_codesize
>dc
->max_codesize
)
914 delzw_set_errorf(dc
, DELZW_ERRCODE_UNSUPPORTED_OPTION
, "Unsupported code size (%u,%u)",
915 dc
->min_codesize
, dc
->max_codesize
);
919 delzw_debugmsg(dc
, 2, "code size: %u, max=%u", dc
->min_codesize
, dc
->max_codesize
);
921 dc
->curr_codesize
= dc
->min_codesize
;
923 dc
->ct_capacity
= ((DELZW_CODE
)1)<<dc
->max_codesize
;
924 dc
->ct
= DELZW_CALLOC(dc
->userdata
, dc
->ct_capacity
, sizeof(struct delzw_tableentry
),
925 struct delzw_tableentry
*);
927 dc
->ct2
= DELZW_CALLOC(dc
->userdata
, dc
->ct_capacity
, sizeof(struct delzw_tableentry2
),
928 struct delzw_tableentry2
*);
930 dc
->valbuf_capacity
= dc
->ct_capacity
;
931 dc
->valbuf
= DELZW_CALLOC(dc
->userdata
, dc
->valbuf_capacity
, 1, DELZW_UINT8
*);
933 if(dc
->basefmt
==DELZW_BASEFMT_UNIXCOMPRESS
) {
934 for(i
=0; i
<256; i
++) {
935 dc
->ct
[i
].codetype
= DELZW_CODETYPE_STATIC
;
936 dc
->ct
[i
].value
= (DELZW_UINT8
)i
;
939 if(dc
->unixcompress_has_clear_code
) {
940 dc
->ct
[256].codetype
= DELZW_CODETYPE_CLEAR
;
941 dc
->first_dynamic_code
= 257;
944 dc
->first_dynamic_code
= 256;
947 else if(dc
->basefmt
==DELZW_BASEFMT_GIF
) {
948 DELZW_CODE n
= DELZW_NBITS_TO_NCODES(dc
->gif_root_codesize
);
951 dc
->ct
[i
].codetype
= DELZW_CODETYPE_STATIC
;
952 dc
->ct
[i
].value
= (i
<=255)?((DELZW_UINT8
)i
):0;
954 dc
->ct
[n
].codetype
= DELZW_CODETYPE_CLEAR
;
955 dc
->ct
[n
+1].codetype
= DELZW_CODETYPE_STOP
;
956 dc
->first_dynamic_code
= n
+2;
958 else if(dc
->basefmt
==DELZW_BASEFMT_ZIPSHRINK
) {
959 dc
->first_dynamic_code
= 257;
961 for(i
=0; i
<256; i
++) {
962 dc
->ct
[i
].codetype
= DELZW_CODETYPE_STATIC
;
963 dc
->ct
[i
].value
= (DELZW_UINT8
)i
;
965 dc
->ct
[256].codetype
= DELZW_CODETYPE_SPECIAL
;
967 else if(dc
->basefmt
==DELZW_BASEFMT_ZOOLZD
) {
968 for(i
=0; i
<256; i
++) {
969 dc
->ct
[i
].codetype
= DELZW_CODETYPE_STATIC
;
970 dc
->ct
[i
].value
= (DELZW_UINT8
)i
;
972 dc
->ct
[256].codetype
= DELZW_CODETYPE_CLEAR
;
973 dc
->ct
[257].codetype
= DELZW_CODETYPE_STOP
;
974 dc
->first_dynamic_code
= 258;
976 else if(dc
->basefmt
==DELZW_BASEFMT_TIFF
|| dc
->basefmt
==DELZW_BASEFMT_TIFFOLD
) {
977 for(i
=0; i
<256; i
++) {
978 dc
->ct
[i
].codetype
= DELZW_CODETYPE_STATIC
;
979 dc
->ct
[i
].value
= (DELZW_UINT8
)i
;
981 dc
->ct
[256].codetype
= DELZW_CODETYPE_CLEAR
;
982 dc
->ct
[257].codetype
= DELZW_CODETYPE_STOP
;
983 dc
->first_dynamic_code
= 258;
987 for(i
=0; i
<dc
->ct_capacity
; i
++) {
988 dc
->ct2
[i
].next
= DELZW_NEXTPTR_NONE
;
992 for(i
=dc
->first_dynamic_code
; i
<dc
->ct_capacity
; i
++) {
993 dc
->ct
[i
].codetype
= DELZW_CODETYPE_DYN_UNUSED
;
995 dc
->free_code_search_start
= dc
->first_dynamic_code
;
998 if(dc
->arc5_has_stop_code
) {
999 dc
->ct
[0].codetype
= DELZW_CODETYPE_STOP
;
1000 dc
->ct_code_count
++;
1003 for(i
=0; i
<256; i
++) {
1004 delzw_hashed_add_root_code_to_dict(dc
, (DELZW_UINT8
)i
);
1012 static int delzw_have_enough_output(delzwctx
*dc
)
1014 if(dc
->output_len_known
) {
1015 if(dc
->uncmpr_nbytes_written
+ (DELZW_OFF_T
)dc
->outbuf_nbytes_used
>=
1016 dc
->output_expected_len
)
1024 static void delzw_process_byte(delzwctx
*dc
, DELZW_UINT8 b
)
1026 if(dc
->state
==DELZW_STATE_INIT
) {
1027 delzw_on_decompression_start(dc
);
1028 dc
->state
= DELZW_STATE_READING_HEADER
;
1031 if(dc
->state
==DELZW_STATE_READING_HEADER
) {
1032 if(dc
->total_nbytes_processed
< (DELZW_OFF_T
)dc
->header_size
) {
1033 dc
->header_buf
[dc
->total_nbytes_processed
] = b
;
1037 // (This is the first byte after the header.)
1038 delzw_on_codes_start(dc
);
1039 dc
->state
= DELZW_STATE_READING_CODES
;
1042 if(dc
->state
==DELZW_STATE_READING_CODES
) {
1043 if(dc
->nbytes_left_to_skip
>0) {
1044 dc
->nbytes_left_to_skip
--;
1048 delzw_add_byte_to_bitbuf(dc
, b
);
1053 if(dc
->errcode
) break;
1054 if(dc
->bitreader_nbits_in_buf
< dc
->curr_codesize
) {
1058 code
= delzw_get_code(dc
, dc
->curr_codesize
);
1059 dc
->ncodes_in_this_bitgroup
++;
1060 delzw_process_code(dc
, code
);
1062 if(dc
->state
!= DELZW_STATE_READING_CODES
) {
1065 if(dc
->nbytes_left_to_skip
>0) {
1072 static void delzw_addbuf(delzwctx
*dc
, const DELZW_UINT8
*buf
, size_t buf_len
)
1076 if(dc
->debug_level
>=3) {
1077 delzw_debugmsg(dc
, 3, "received %d bytes of input", (int)buf_len
);
1080 for(i
=0; i
<buf_len
; i
++) {
1081 if(dc
->errcode
) break;
1082 if(dc
->state
== DELZW_STATE_FINISHED
) break;
1083 if(delzw_have_enough_output(dc
)) {
1084 delzw_stop(dc
, "sufficient output");
1087 delzw_process_byte(dc
, buf
[i
]);
1088 dc
->total_nbytes_processed
++;
1092 static void delzw_finish(delzwctx
*dc
)
1098 if(dc
->output_len_known
&& (dc
->uncmpr_nbytes_decoded
==dc
->output_expected_len
)) {
1099 reason
= "end of input and sufficient output";
1102 reason
= "end of input";
1105 delzw_stop(dc
, reason
);