Minor refactoring of the IFF and box-format parsers
[deark.git] / foreign / delzw.h
blob83526928d4703b7e3ec4636ff83c7972dd940a72
1 // This file is part of Deark.
2 // Copyright (C) 2019-2020 Jason Summers
3 // See the file COPYING for terms of use.
5 // This file is in the "foreign" subdirectory because I want to make it usable
6 // independently of Deark. I haven't decided if it will be released by itself.
8 #ifndef DELZW_UINT8
9 #define DELZW_UINT8 unsigned char
10 #endif
11 #ifndef DELZW_UINT16
12 #define DELZW_UINT16 uint16_t
13 #endif
14 #ifndef DELZW_UINT32
15 #define DELZW_UINT32 uint32_t
16 #endif
17 #ifndef DELZW_OFF_T
18 #define DELZW_OFF_T off_t
19 #endif
20 #ifndef DELZW_MEMCPY
21 #define DELZW_MEMCPY memcpy
22 #endif
23 #ifndef DELZW_STRLCPY
24 #define DELZW_STRLCPY strlcpy
25 #endif
26 #ifndef DELZW_VSNPRINTF
27 #define DELZW_VSNPRINTF vsnprintf
28 #endif
29 #ifndef DELZW_CALLOC
30 #define DELZW_CALLOC(u, nmemb, size, ty) (ty)calloc((nmemb), (size))
31 #endif
32 #ifndef DELZW_FREE
33 #define DELZW_FREE(u, ptr) free(ptr)
34 #endif
35 #ifndef DELZW_GNUC_ATTRIBUTE
36 #define DELZW_GNUC_ATTRIBUTE(x)
37 #endif
39 #define DELZW_CODE DELZW_UINT32 // int type used in most cases
40 #define DELZW_CODE_MINRANGE DELZW_UINT16 // int type used for parents in table entries
41 #define DELZW_MINMINCODESIZE 3
42 #define DELZW_MAXMAXCODESIZE 16
43 #define DELZW_NBITS_TO_MAXCODE(n) ((DELZW_CODE)((1<<(n))-1))
44 #define DELZW_NBITS_TO_NCODES(n) ((DELZW_CODE)(1<<(n)))
46 struct delzwctx_struct;
47 typedef struct delzwctx_struct delzwctx;
49 struct delzw_tableentry {
50 DELZW_CODE_MINRANGE parent;
51 DELZW_UINT8 value;
52 #define DELZW_CODETYPE_INVALID 0x00
53 #define DELZW_CODETYPE_STATIC 0x01
54 #define DELZW_CODETYPE_DYN_UNUSED 0x02
55 #define DELZW_CODETYPE_DYN_USED 0x03
56 #define DELZW_CODETYPE_CLEAR 0x08
57 #define DELZW_CODETYPE_STOP 0x09
58 #define DELZW_CODETYPE_SPECIAL 0x0f
59 DELZW_UINT8 codetype;
60 DELZW_UINT8 flags;
63 struct delzw_tableentry2 {
64 #define DELZW_NEXTPTR_NONE 0xffff // Note - This table is only used with 12-bit codes
65 DELZW_CODE_MINRANGE next;
68 // Normally, the client must consume all the bytes in 'buf', and return 'size'.
69 // The other options are:
70 // - Set *outflags to 1, and return a number <='size'. This indicates that
71 // that decompression can stop; the client has all the data it needs.
72 // - Return a number !='size'. This is interpreted as a write error, and
73 // decompression will stop.
74 typedef size_t (*delzw_cb_write_type)(delzwctx *dc, const DELZW_UINT8 *buf, size_t size,
75 unsigned int *outflags);
77 typedef void (*delzw_cb_debugmsg_type)(delzwctx *dc, int level, const char *msg);
78 typedef void (*delzw_cb_generic_type)(delzwctx *dc);
80 struct delzwctx_struct {
81 // Fields the user can or must set:
82 void *userdata;
83 int debug_level;
84 delzw_cb_write_type cb_write;
85 delzw_cb_debugmsg_type cb_debugmsg;
86 delzw_cb_generic_type cb_after_header_parsed;
88 #define DELZW_BASEFMT_UNIXCOMPRESS 1
89 #define DELZW_BASEFMT_GIF 2
90 #define DELZW_BASEFMT_ZIPSHRINK 3
91 #define DELZW_BASEFMT_ZOOLZD 4
92 #define DELZW_BASEFMT_TIFFOLD 5
93 #define DELZW_BASEFMT_TIFF 6
94 #define DELZW_BASEFMT_ARC5 7
95 int basefmt;
97 #define DELZW_HEADERTYPE_NONE 0
98 #define DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE 1
99 #define DELZW_HEADERTYPE_ARC1BYTE 2
100 int header_type;
102 unsigned int gif_root_codesize;
104 int stop_on_invalid_code;
106 int output_len_known;
107 DELZW_OFF_T output_expected_len;
109 // Fields that may be set by the user, or derived from other fields:
110 int auto_inc_codesize;
111 int unixcompress_has_clear_code;
112 int arc5_has_stop_code;
113 unsigned int min_codesize;
114 unsigned int max_codesize;
116 // Derived fields:
117 size_t header_size;
118 int is_msb;
119 int early_codesize_inc;
120 int has_partial_clearing;
121 int is_hashed;
123 // Informational:
124 DELZW_UINT8 header_unixcompress_mode;
125 DELZW_UINT8 header_unixcompress_max_codesize;
126 DELZW_UINT8 header_unixcompress_block_mode; // = 1 or 0
128 // Internal state:
129 #define DELZW_ERRCODE_OK 0
130 #define DELZW_ERRCODE_GENERIC_ERROR 1
131 #define DELZW_ERRCODE_BAD_CDATA 2
132 #define DELZW_ERRCODE_MALLOC_FAILED 3
133 #define DELZW_ERRCODE_WRITE_FAILED 7
134 #define DELZW_ERRCODE_INSUFFICIENT_CDATA 8
135 #define DELZW_ERRCODE_UNSUPPORTED_OPTION 9
136 #define DELZW_ERRCODE_INTERNAL_ERROR 10
137 int errcode;
138 int stop_writing_flag;
140 #define DELZW_STATE_INIT 0
141 #define DELZW_STATE_READING_HEADER 1
142 #define DELZW_STATE_READING_CODES 2
143 #define DELZW_STATE_FINISHED 3
144 int state;
145 DELZW_OFF_T total_nbytes_processed;
146 DELZW_OFF_T uncmpr_nbytes_written; // (Not including those in outbuf)
147 DELZW_OFF_T uncmpr_nbytes_decoded; // (Including those in outbuf)
149 DELZW_OFF_T ncodes_in_this_bitgroup;
150 DELZW_OFF_T nbytes_left_to_skip;
152 unsigned int curr_codesize;
154 int have_oldcode;
155 DELZW_CODE oldcode;
156 DELZW_CODE last_code_added;
157 DELZW_UINT8 last_value;
158 DELZW_CODE highest_code_ever_used;
159 DELZW_CODE free_code_search_start;
160 DELZW_CODE first_dynamic_code;
161 int escaped_code_is_pending;
163 unsigned int bitreader_buf;
164 unsigned int bitreader_nbits_in_buf;
166 size_t outbuf_nbytes_used;
168 DELZW_CODE ct_capacity;
169 DELZW_CODE ct_code_count; // Note - Not always maintained if not needed
170 struct delzw_tableentry *ct;
171 struct delzw_tableentry2 *ct2;
173 DELZW_UINT8 header_buf[3];
175 size_t valbuf_capacity;
176 DELZW_UINT8 *valbuf;
178 char errmsg[80];
180 #define DELZW_OUTBUF_SIZE 1024
181 DELZW_UINT8 outbuf[DELZW_OUTBUF_SIZE];
184 static void delzw_debugmsg(delzwctx *dc, int level, const char *fmt, ...)
185 DELZW_GNUC_ATTRIBUTE ((format (printf, 3, 4)));
187 static void delzw_debugmsg(delzwctx *dc, int level, const char *fmt, ...)
189 va_list ap;
190 char msg[200];
192 if(!dc->cb_debugmsg) return;
193 if(level>dc->debug_level) return;
195 va_start(ap, fmt);
196 DELZW_VSNPRINTF(msg, sizeof(msg), fmt, ap);
197 va_end(ap);
198 dc->cb_debugmsg(dc, level, msg);
201 static void delzw_dumptable(delzwctx *dc)
203 DELZW_CODE k;
204 for(k=0; k<dc->highest_code_ever_used; k++) {
205 delzw_debugmsg(dc, 4, "[%d] ty=%d p=%d v=%d f=%d",
206 (int)k, (int)dc->ct[k].codetype, (int)dc->ct[k].parent,
207 (int)dc->ct[k].value, (int)dc->ct[k].flags);
211 static void delzw_stop(delzwctx *dc, const char *reason)
213 if(dc->state == DELZW_STATE_FINISHED) return;
214 delzw_debugmsg(dc, 2, "stopping due to %s", reason);
215 dc->state = DELZW_STATE_FINISHED;
218 static void delzw_set_errorf(delzwctx *dc, int errcode, const char *fmt, ...)
219 DELZW_GNUC_ATTRIBUTE ((format (printf, 3, 4)));
221 static void delzw_set_errorf(delzwctx *dc, int errcode, const char *fmt, ...)
223 va_list ap;
225 delzw_stop(dc, "error");
226 if(dc->errcode) return;
227 dc->errcode = errcode;
228 va_start(ap, fmt);
229 DELZW_VSNPRINTF(dc->errmsg, sizeof(dc->errmsg), fmt, ap);
230 va_end(ap);
233 static void delzw_set_error(delzwctx *dc, int errcode, const char *msg)
235 delzw_stop(dc, "error");
236 if(dc->errcode) return;
237 dc->errcode = errcode;
238 if(!msg || !msg[0]) {
239 msg = "LZW decompression error";
241 DELZW_STRLCPY(dc->errmsg, msg, sizeof(dc->errmsg));
244 static delzwctx *delzw_create(void *userdata)
246 delzwctx *dc;
248 dc = DELZW_CALLOC(userdata, 1, sizeof(delzwctx), delzwctx *);
249 dc->userdata = userdata;
250 return dc;
253 static void delzw_destroy(delzwctx *dc)
255 if(!dc) return;
256 DELZW_FREE(dc->userdata, dc->ct);
257 if(dc->ct2) DELZW_FREE(dc->userdata, dc->ct2);
258 DELZW_FREE(dc->userdata, dc->valbuf);
259 DELZW_FREE(dc->userdata, dc);
262 static void delzw_write_unbuffered(delzwctx *dc, const DELZW_UINT8 *buf, size_t n1)
264 DELZW_OFF_T nbytes_written;
265 unsigned int outflags = 0;
266 DELZW_OFF_T n = (DELZW_OFF_T)n1;
268 if(dc->stop_writing_flag) return;
269 if(dc->output_len_known) {
270 if(dc->uncmpr_nbytes_written + n > dc->output_expected_len) {
271 n = dc->output_expected_len - dc->uncmpr_nbytes_written;
274 if(n<1) return;
275 nbytes_written = (DELZW_OFF_T)dc->cb_write(dc, buf, (size_t)n, &outflags);
276 if((outflags & 0x1) && (nbytes_written<=n)) {
277 dc->stop_writing_flag = 1;
278 delzw_stop(dc, "client request");
280 else if(nbytes_written != n) {
281 delzw_set_error(dc, DELZW_ERRCODE_WRITE_FAILED, "Write failed");
282 return;
284 dc->uncmpr_nbytes_written += nbytes_written;
287 static void delzw_flush(delzwctx *dc)
289 if(dc->outbuf_nbytes_used<1) return;
290 delzw_write_unbuffered(dc, dc->outbuf, dc->outbuf_nbytes_used);
291 dc->outbuf_nbytes_used = 0;
294 static void delzw_write(delzwctx *dc, const DELZW_UINT8 *buf, size_t n)
296 if(dc->errcode) return;
298 // If there's enough room in outbuf, copy it there, and we're done.
299 if(dc->outbuf_nbytes_used + n <= DELZW_OUTBUF_SIZE) {
300 DELZW_MEMCPY(&dc->outbuf[dc->outbuf_nbytes_used], buf, n);
301 dc->outbuf_nbytes_used += n;
302 return;
305 // Flush anything currently in outbuf.
306 delzw_flush(dc);
307 if(dc->stop_writing_flag) return;
309 // If too big for outbuf, write without buffering.
310 if(n > DELZW_OUTBUF_SIZE) {
311 delzw_write_unbuffered(dc, buf, n);
312 return;
315 // Otherwise copy to outbuf
316 DELZW_MEMCPY(dc->outbuf, buf, n);
317 dc->outbuf_nbytes_used += n;
320 static void delzw_process_unixcompress_3byteheader(delzwctx *dc)
322 if(dc->header_buf[0]!=0x1f || dc->header_buf[1]!=0x9d) {
323 delzw_set_error(dc, DELZW_ERRCODE_BAD_CDATA, "Not in compress format");
324 return;
327 dc->header_unixcompress_mode = dc->header_buf[2];
328 dc->header_unixcompress_max_codesize = (dc->header_unixcompress_mode & 0x1f);
329 dc->header_unixcompress_block_mode = (dc->header_unixcompress_mode & 0x80) ? 1 : 0;
330 delzw_debugmsg(dc, 2, "LZW mode=0x%02x, maxbits=%u, blockmode=%u",
331 (unsigned int)dc->header_unixcompress_mode,
332 (unsigned int)dc->header_unixcompress_max_codesize,
333 (unsigned int)dc->header_unixcompress_block_mode);
335 dc->max_codesize = (unsigned int)dc->header_unixcompress_max_codesize;
336 dc->unixcompress_has_clear_code = (int)dc->header_unixcompress_block_mode;
339 static void delzw_process_arc_1byteheader(delzwctx *dc)
341 dc->header_unixcompress_max_codesize = (dc->header_buf[0] & 0x1f);
342 dc->max_codesize = (unsigned int)dc->header_unixcompress_max_codesize;
343 delzw_debugmsg(dc, 2, "max code size: %u", dc->max_codesize);
344 dc->unixcompress_has_clear_code = 1;
347 static void delzw_clear_bitbuf(delzwctx *dc)
349 dc->bitreader_nbits_in_buf = 0;
350 dc->bitreader_buf = 0;
353 static void delzw_add_byte_to_bitbuf(delzwctx *dc, DELZW_UINT8 b)
355 // Add a byte's worth of bits to the pending code
356 if(dc->is_msb==0) {
357 dc->bitreader_buf |= ((unsigned int)b)<<dc->bitreader_nbits_in_buf;
359 else {
360 dc->bitreader_buf = (dc->bitreader_buf<<8) | b;
362 dc->bitreader_nbits_in_buf += 8;
365 static DELZW_CODE delzw_get_code(delzwctx *dc, unsigned int nbits)
367 unsigned int n;
369 if(dc->is_msb==0) {
370 n = dc->bitreader_buf & ((1U<<nbits)-1U);
371 dc->bitreader_buf >>= nbits;
372 dc->bitreader_nbits_in_buf -= nbits;
374 else {
375 dc->bitreader_nbits_in_buf -= nbits;
376 n = (dc->bitreader_buf >> dc->bitreader_nbits_in_buf) & ((1U<<nbits)-1U);
378 return (DELZW_CODE)n;
381 // Is this a valid code with a value (a static, or in-use dynamic code)?
382 static int delzw_code_is_in_table(delzwctx *dc, DELZW_CODE code)
384 DELZW_UINT8 codetype = dc->ct[code].codetype;
386 if(codetype==DELZW_CODETYPE_STATIC) return 1;
387 if(codetype==DELZW_CODETYPE_DYN_USED) return 1;
388 return 0;
391 // Decode an LZW code to one or more values, and write the values.
392 // Updates ctx->last_value.
393 static void delzw_emit_code(delzwctx *dc, DELZW_CODE code1)
395 DELZW_CODE code = code1;
396 size_t valbuf_pos = dc->valbuf_capacity; // = First entry that's used
398 while(1) {
399 if(code >= dc->ct_capacity) {
400 delzw_set_errorf(dc, DELZW_ERRCODE_GENERIC_ERROR, "Bad LZW code (%d)", (int)code);
401 return;
404 if(valbuf_pos==0) {
405 // We must be in an infinite loop (probably an internal error).
406 delzw_set_error(dc, DELZW_ERRCODE_GENERIC_ERROR, NULL);
407 if(dc->debug_level>=4) {
408 delzw_dumptable(dc);
410 return;
413 // valbuf is a stack, essentially. We fill it in the reverse direction,
414 // to make it simpler to write the final byte sequence.
415 valbuf_pos--;
417 if(dc->ct[code].codetype==DELZW_CODETYPE_DYN_UNUSED) {
418 dc->valbuf[valbuf_pos] = dc->last_value;
419 code = dc->oldcode;
420 continue;
423 dc->valbuf[valbuf_pos] = dc->ct[code].value;
425 if(dc->ct[code].codetype==DELZW_CODETYPE_STATIC) {
426 dc->last_value = dc->ct[code].value;
427 break;
430 // Traverse the tree, back toward the root codes.
431 code = dc->ct[code].parent;
434 // Write out the collected values.
435 delzw_write(dc, &dc->valbuf[valbuf_pos], dc->valbuf_capacity - valbuf_pos);
436 dc->uncmpr_nbytes_decoded += (DELZW_OFF_T)(dc->valbuf_capacity - valbuf_pos);
439 static void delzw_find_first_free_entry(delzwctx *dc, DELZW_CODE *pentry)
441 DELZW_CODE k;
443 for(k=dc->free_code_search_start; k<dc->ct_capacity; k++) {
444 if(dc->ct[k].codetype==DELZW_CODETYPE_DYN_UNUSED) {
445 *pentry = (DELZW_CODE)k;
446 return;
450 *pentry = (DELZW_CODE)(dc->ct_capacity-1);
451 delzw_set_error(dc, DELZW_ERRCODE_BAD_CDATA, "LZW table unexpectedly full");
454 static void delzw_unixcompress_end_bitgroup(delzwctx *dc)
456 DELZW_OFF_T ncodes_alloc;
457 DELZW_OFF_T nbits_left_to_skip;
459 // The Unix 'compress' format has a quirk.
460 // The codes are written 8 at a time, with all 8 having the same codesize.
461 // The codesize cannot change in the middle of a block of 8. If it needs to,
462 // the remainder of the block is unused padding, which we must skip over.
463 // This is relevant when we encounter a clear code. It is also potentially
464 // relevant when the codesize is auto-incremented. But except possibly for
465 // the first group of codes (the 9-bit codes), the natural number of codes of
466 // a given size is always (?) a power of 2, and a multiple of 8. So, usually
467 // no padding is present at the auto-increment position.
468 // As it happens, when code 256 is used as the clear code, it reduces the
469 // natural number of 9-bit codes from 257 to 256, and since 256 is a multiple
470 // of 8, still no padding is present.
471 // But "v2" format does not use a clear code, and AFAICT it does have padding
472 // after the 9-bit codes.
474 ncodes_alloc = ((dc->ncodes_in_this_bitgroup + 7)/8)*8;
475 nbits_left_to_skip = (ncodes_alloc - dc->ncodes_in_this_bitgroup) * dc->curr_codesize;
477 // My thinking:
478 // Each "bitgroup" has a whole number of bytes.
479 // When we get here, we've just read a code, so the bitreader's buffer can have no more than
480 // 7 bits in it.
481 // All of the bits in it will be part of the "bits to skip". After accounting for them, we'll
482 // be left with a whole number of *bytes* left to skip, which always start on a byte boundary
483 // in the input stream.
484 // So, whenever the main input loop needs to skip anything, it will be a whole byte, and the
485 // bitreader's buffer will be empty. That's good; it makes it easier to deal with this
486 // padding.
488 if(nbits_left_to_skip>0) {
489 delzw_debugmsg(dc, 2, "padding bits: %d", (int)nbits_left_to_skip);
492 dc->ncodes_in_this_bitgroup = 0;
493 if(dc->bitreader_nbits_in_buf>7 || dc->bitreader_nbits_in_buf>nbits_left_to_skip) {
494 delzw_set_error(dc, DELZW_ERRCODE_INTERNAL_ERROR, NULL);
495 return;
498 nbits_left_to_skip -= dc->bitreader_nbits_in_buf;
499 if(nbits_left_to_skip%8 != 0) {
500 delzw_set_error(dc, DELZW_ERRCODE_INTERNAL_ERROR, NULL);
501 return;
504 delzw_clear_bitbuf(dc);
505 dc->nbytes_left_to_skip = nbits_left_to_skip/8;
508 static void delzw_increase_codesize(delzwctx *dc)
510 if(dc->basefmt==DELZW_BASEFMT_UNIXCOMPRESS) {
511 delzw_unixcompress_end_bitgroup(dc);
514 if(dc->curr_codesize<dc->max_codesize) {
515 dc->curr_codesize++;
516 delzw_debugmsg(dc, 2, "increased code size to %u", dc->curr_codesize);
520 static DELZW_CODE delzw_get_hashed_code(delzwctx *dc, DELZW_CODE code,
521 DELZW_UINT8 value)
523 DELZW_CODE h;
524 DELZW_CODE saved_h;
525 DELZW_UINT32 count;
527 h = ((code+(DELZW_CODE)value) | 0x0800) & 0xffff;
528 h = ((h*h) >> 6) % dc->ct_capacity;
530 if(dc->ct[h].codetype==DELZW_CODETYPE_DYN_UNUSED) {
531 return h;
534 // Collision - First, walk to the end of the duplicates list
535 count = 0;
536 while(dc->ct2[h].next != DELZW_NEXTPTR_NONE) {
537 h = dc->ct2[h].next;
539 count++;
540 if(count > dc->ct_capacity) {
541 delzw_set_error(dc, DELZW_ERRCODE_GENERIC_ERROR, NULL);
542 return 0;
546 saved_h = h;
548 // Then search for an open slot
549 count = 0;
550 while(1) {
551 if(count==0)
552 h += 101;
553 else
554 h += 1;
555 h %= dc->ct_capacity;
557 if(dc->ct[h].codetype==DELZW_CODETYPE_DYN_UNUSED)
558 break;
560 count++;
561 if(count > dc->ct_capacity) {
562 delzw_set_error(dc, DELZW_ERRCODE_GENERIC_ERROR, NULL);
563 return 0;
567 dc->ct2[saved_h].next = h;
568 return h;
571 static void delzw_hashed_add_code_to_dict(delzwctx *dc, DELZW_CODE code, DELZW_UINT8 value)
573 DELZW_CODE idx;
575 if(dc->ct_code_count >= dc->ct_capacity) {
576 return;
579 idx = delzw_get_hashed_code(dc, code, value);
580 if(dc->errcode) return;
582 dc->ct[idx].parent = (DELZW_CODE_MINRANGE)dc->oldcode;
583 dc->ct[idx].value = value;
584 dc->ct[idx].codetype = DELZW_CODETYPE_DYN_USED;
585 dc->ct_code_count++;
586 dc->last_code_added = idx;
589 static void delzw_hashed_add_root_code_to_dict(delzwctx *dc, DELZW_UINT8 value)
591 int idx;
593 idx = delzw_get_hashed_code(dc, 0xffff, value);
594 if(dc->errcode) return;
596 dc->ct[idx].value = value;
597 dc->ct[idx].codetype = DELZW_CODETYPE_STATIC;
598 dc->ct_code_count++;
601 // Add a code to the dictionary.
602 // Sets delzw->last_code_added to the position where it was added.
603 static void delzw_add_to_dict(delzwctx *dc, DELZW_CODE parent, DELZW_UINT8 value)
605 DELZW_CODE newpos;
607 if(dc->is_hashed) {
608 delzw_hashed_add_code_to_dict(dc, parent, value);
609 return;
612 if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK) {
613 delzw_find_first_free_entry(dc, &newpos);
615 else {
616 newpos = dc->free_code_search_start;
618 if(dc->errcode) return;
619 if(newpos >= dc->ct_capacity) {
620 return;
623 if(newpos < dc->first_dynamic_code) {
624 delzw_set_error(dc, DELZW_ERRCODE_GENERIC_ERROR, NULL);
625 return;
628 dc->ct[newpos].parent = (DELZW_CODE_MINRANGE)parent;
629 dc->ct[newpos].value = value;
630 dc->ct[newpos].codetype = DELZW_CODETYPE_DYN_USED;
631 dc->ct_code_count++;
632 dc->last_code_added = newpos;
633 dc->free_code_search_start = newpos+1;
634 if(newpos > dc->highest_code_ever_used) {
635 dc->highest_code_ever_used = newpos;
638 if(dc->auto_inc_codesize) {
639 if(dc->early_codesize_inc) {
640 if(dc->free_code_search_start>=DELZW_NBITS_TO_MAXCODE(dc->curr_codesize)) {
641 delzw_increase_codesize(dc);
644 else {
645 if(dc->free_code_search_start>DELZW_NBITS_TO_MAXCODE(dc->curr_codesize)) {
646 delzw_increase_codesize(dc);
652 static void delzw_process_data_code(delzwctx *dc, DELZW_CODE code)
654 if(code >= dc->ct_capacity) {
655 return;
658 if(!dc->have_oldcode) {
659 // Special case for the first code.
660 delzw_emit_code(dc, code);
661 dc->oldcode = code;
662 dc->have_oldcode = 1;
663 dc->last_value = dc->ct[code].value;
664 return;
667 if(delzw_code_is_in_table(dc, code)) {
668 delzw_emit_code(dc, code);
669 if(dc->errcode) return;
671 // Let k = the first character of the translation of the code.
672 // Add <oldcode>k to the dictionary.
673 delzw_add_to_dict(dc, dc->oldcode, dc->last_value);
675 else {
676 if(code>dc->free_code_search_start && !dc->has_partial_clearing && !dc->is_hashed) {
677 if(dc->stop_on_invalid_code) {
678 delzw_debugmsg(dc, 1, "bad code: %d when max=%d (assuming data stops here)",
679 (int)code, (int)dc->free_code_search_start);
680 delzw_stop(dc, "bad LZW code");
681 return;
683 delzw_set_errorf(dc, DELZW_ERRCODE_BAD_CDATA, "Bad LZW code (%d when max=%d)",
684 (int)code, (int)dc->free_code_search_start);
685 return;
688 // Let k = the first char of the translation of oldcode.
689 // Add <oldcode>k to the dictionary.
690 delzw_add_to_dict(dc, dc->oldcode, dc->last_value);
691 if(dc->errcode) return;
693 // Write <oldcode>k to the output stream.
694 delzw_emit_code(dc, dc->last_code_added);
697 dc->oldcode = code;
700 static void delzw_clear_one_dynamic_code(delzwctx *dc, DELZW_CODE code)
702 if(code<dc->first_dynamic_code || code>=dc->ct_capacity) return;
703 dc->ct[code].codetype = DELZW_CODETYPE_DYN_UNUSED;
704 dc->ct[code].parent = 0;
705 dc->ct[code].value = 0;
708 static void delzw_clear(delzwctx *dc)
710 DELZW_CODE i;
712 delzw_debugmsg(dc, 2, "clear code");
714 if(dc->basefmt==DELZW_BASEFMT_UNIXCOMPRESS) {
715 delzw_unixcompress_end_bitgroup(dc);
718 for(i=dc->first_dynamic_code; i<=dc->highest_code_ever_used; i++) {
719 delzw_clear_one_dynamic_code(dc, i);
722 dc->curr_codesize = dc->min_codesize;
723 dc->free_code_search_start = dc->first_dynamic_code;
724 dc->have_oldcode = 0;
725 dc->oldcode = 0;
726 dc->last_code_added = 0;
727 dc->last_value = 0;
729 delzw_debugmsg(dc, 2, "code size: %u", dc->curr_codesize);
732 static void delzw_partial_clear(delzwctx *dc)
734 DELZW_CODE i;
736 delzw_debugmsg(dc, 2, "partial clear code");
738 for(i=dc->first_dynamic_code; i<=dc->highest_code_ever_used; i++) {
739 // If this code is in use
740 if(dc->ct[i].codetype==DELZW_CODETYPE_DYN_USED) {
741 // and its parent is a dynamic code,
742 // mark its parent as having a child
743 if(dc->ct[i].parent>=257) {
744 dc->ct[dc->ct[i].parent].flags = 1;
749 for(i=dc->first_dynamic_code; i<=dc->highest_code_ever_used; i++) {
750 if(dc->ct[i].flags==0) {
751 // If this code has no children, clear it
752 delzw_clear_one_dynamic_code(dc, i);
754 else {
755 // Leave all flags clear, for next time
756 dc->ct[i].flags = 0;
760 dc->free_code_search_start = dc->first_dynamic_code;
763 static void delzw_process_code(delzwctx *dc, DELZW_CODE code)
765 if(dc->debug_level>=3) {
766 delzw_debugmsg(dc, 3, "code=%d oc=%d lca=%d lv=%d next=%d",
767 (int)code,
768 (int)dc->oldcode, (int)dc->last_code_added, (int)dc->last_value,
769 (int)dc->free_code_search_start);
772 if(dc->escaped_code_is_pending) {
773 dc->escaped_code_is_pending = 0;
774 if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK) {
775 if(code==1 && (dc->curr_codesize<dc->max_codesize)) {
776 delzw_increase_codesize(dc);
778 else if(code==2) {
779 delzw_partial_clear(dc);
781 else {
782 delzw_set_error(dc, DELZW_ERRCODE_BAD_CDATA, NULL);
785 return;
788 if(code >= dc->ct_capacity) return;
790 switch(dc->ct[code].codetype) {
791 case DELZW_CODETYPE_STATIC:
792 case DELZW_CODETYPE_DYN_UNUSED:
793 case DELZW_CODETYPE_DYN_USED:
794 delzw_process_data_code(dc, code);
795 break;
796 case DELZW_CODETYPE_CLEAR:
797 delzw_clear(dc);
798 break;
799 case DELZW_CODETYPE_STOP:
800 delzw_stop(dc, "stop code");
801 break;
802 case DELZW_CODETYPE_SPECIAL:
803 if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK && code==256) {
804 dc->escaped_code_is_pending = 1;
806 break;
810 static void delzw_on_decompression_start(delzwctx *dc)
812 if(dc->basefmt!=DELZW_BASEFMT_ZIPSHRINK &&
813 dc->basefmt!=DELZW_BASEFMT_GIF &&
814 dc->basefmt!=DELZW_BASEFMT_UNIXCOMPRESS &&
815 dc->basefmt!=DELZW_BASEFMT_ZOOLZD &&
816 dc->basefmt!=DELZW_BASEFMT_TIFF &&
817 dc->basefmt!=DELZW_BASEFMT_TIFFOLD &&
818 dc->basefmt!=DELZW_BASEFMT_ARC5)
820 delzw_set_error(dc, DELZW_ERRCODE_UNSUPPORTED_OPTION, "Unsupported LZW format");
821 goto done;
824 if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK) {
825 dc->has_partial_clearing = 1;
827 else if(dc->basefmt==DELZW_BASEFMT_ARC5) {
828 dc->is_hashed = 1;
831 if(dc->header_type==DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE) {
832 dc->header_size = 3;
834 else if(dc->header_type==DELZW_HEADERTYPE_ARC1BYTE) {
835 dc->header_size = 1;
838 done:
842 // Process the header, if any.
843 // Set any remaining params needed, and validate params.
844 // This is called upon encountering the first byte after the header.
845 // (If zero bytes of data were compressed, it might never be called.)
846 static void delzw_on_codes_start(delzwctx *dc)
848 DELZW_CODE i;
850 if(dc->errcode) goto done;
852 if(dc->header_size > 0) {
853 delzw_debugmsg(dc, 2, "processing header");
855 if(dc->header_type==DELZW_HEADERTYPE_UNIXCOMPRESS3BYTE) {
856 delzw_process_unixcompress_3byteheader(dc);
858 else if(dc->header_type==DELZW_HEADERTYPE_ARC1BYTE) {
859 delzw_process_arc_1byteheader(dc);
862 if(dc->cb_after_header_parsed) {
863 dc->cb_after_header_parsed(dc);
867 delzw_debugmsg(dc, 2, "start of codes");
869 if(dc->basefmt==DELZW_BASEFMT_UNIXCOMPRESS) {
870 dc->min_codesize = 9;
872 else if(dc->basefmt==DELZW_BASEFMT_GIF) {
873 dc->auto_inc_codesize = 1;
874 dc->min_codesize = dc->gif_root_codesize + 1;
875 dc->max_codesize = 12;
877 else if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK) {
878 dc->min_codesize = 9;
879 dc->max_codesize = 13;
881 else if(dc->basefmt==DELZW_BASEFMT_ZOOLZD) {
882 dc->min_codesize = 9;
883 if(dc->max_codesize==0) {
884 dc->max_codesize = 13;
887 else if(dc->basefmt==DELZW_BASEFMT_TIFF) {
888 dc->is_msb = 1;
889 dc->early_codesize_inc = 1;
890 dc->min_codesize = 9;
891 if(dc->max_codesize==0) {
892 dc->max_codesize = 12;
895 else if(dc->basefmt==DELZW_BASEFMT_TIFFOLD) {
896 dc->min_codesize = 9;
897 if(dc->max_codesize==0) {
898 dc->max_codesize = 12;
901 else if(dc->basefmt==DELZW_BASEFMT_ARC5) {
902 dc->is_msb = 1;
903 dc->auto_inc_codesize = 0;
904 if(dc->max_codesize==0) {
905 dc->max_codesize = 12;
907 dc->min_codesize = dc->max_codesize;
910 if(dc->min_codesize<DELZW_MINMINCODESIZE || dc->min_codesize>DELZW_MAXMAXCODESIZE ||
911 dc->max_codesize<DELZW_MINMINCODESIZE || dc->max_codesize>DELZW_MAXMAXCODESIZE ||
912 dc->min_codesize>dc->max_codesize)
914 delzw_set_errorf(dc, DELZW_ERRCODE_UNSUPPORTED_OPTION, "Unsupported code size (%u,%u)",
915 dc->min_codesize, dc->max_codesize);
916 goto done;
919 delzw_debugmsg(dc, 2, "code size: %u, max=%u", dc->min_codesize, dc->max_codesize);
921 dc->curr_codesize = dc->min_codesize;
923 dc->ct_capacity = ((DELZW_CODE)1)<<dc->max_codesize;
924 dc->ct = DELZW_CALLOC(dc->userdata, dc->ct_capacity, sizeof(struct delzw_tableentry),
925 struct delzw_tableentry *);
926 if(dc->is_hashed) {
927 dc->ct2 = DELZW_CALLOC(dc->userdata, dc->ct_capacity, sizeof(struct delzw_tableentry2),
928 struct delzw_tableentry2 *);
930 dc->valbuf_capacity = dc->ct_capacity;
931 dc->valbuf = DELZW_CALLOC(dc->userdata, dc->valbuf_capacity, 1, DELZW_UINT8 *);
933 if(dc->basefmt==DELZW_BASEFMT_UNIXCOMPRESS) {
934 for(i=0; i<256; i++) {
935 dc->ct[i].codetype = DELZW_CODETYPE_STATIC;
936 dc->ct[i].value = (DELZW_UINT8)i;
939 if(dc->unixcompress_has_clear_code) {
940 dc->ct[256].codetype = DELZW_CODETYPE_CLEAR;
941 dc->first_dynamic_code = 257;
943 else {
944 dc->first_dynamic_code = 256;
947 else if(dc->basefmt==DELZW_BASEFMT_GIF) {
948 DELZW_CODE n = DELZW_NBITS_TO_NCODES(dc->gif_root_codesize);
950 for(i=0; i<n; i++) {
951 dc->ct[i].codetype = DELZW_CODETYPE_STATIC;
952 dc->ct[i].value = (i<=255)?((DELZW_UINT8)i):0;
954 dc->ct[n].codetype = DELZW_CODETYPE_CLEAR;
955 dc->ct[n+1].codetype = DELZW_CODETYPE_STOP;
956 dc->first_dynamic_code = n+2;
958 else if(dc->basefmt==DELZW_BASEFMT_ZIPSHRINK) {
959 dc->first_dynamic_code = 257;
961 for(i=0; i<256; i++) {
962 dc->ct[i].codetype = DELZW_CODETYPE_STATIC;
963 dc->ct[i].value = (DELZW_UINT8)i;
965 dc->ct[256].codetype = DELZW_CODETYPE_SPECIAL;
967 else if(dc->basefmt==DELZW_BASEFMT_ZOOLZD) {
968 for(i=0; i<256; i++) {
969 dc->ct[i].codetype = DELZW_CODETYPE_STATIC;
970 dc->ct[i].value = (DELZW_UINT8)i;
972 dc->ct[256].codetype = DELZW_CODETYPE_CLEAR;
973 dc->ct[257].codetype = DELZW_CODETYPE_STOP;
974 dc->first_dynamic_code = 258;
976 else if(dc->basefmt==DELZW_BASEFMT_TIFF || dc->basefmt==DELZW_BASEFMT_TIFFOLD) {
977 for(i=0; i<256; i++) {
978 dc->ct[i].codetype = DELZW_CODETYPE_STATIC;
979 dc->ct[i].value = (DELZW_UINT8)i;
981 dc->ct[256].codetype = DELZW_CODETYPE_CLEAR;
982 dc->ct[257].codetype = DELZW_CODETYPE_STOP;
983 dc->first_dynamic_code = 258;
986 if(dc->is_hashed) {
987 for(i=0; i<dc->ct_capacity; i++) {
988 dc->ct2[i].next = DELZW_NEXTPTR_NONE;
992 for(i=dc->first_dynamic_code; i<dc->ct_capacity; i++) {
993 dc->ct[i].codetype = DELZW_CODETYPE_DYN_UNUSED;
995 dc->free_code_search_start = dc->first_dynamic_code;
997 if(dc->is_hashed) {
998 if(dc->arc5_has_stop_code) {
999 dc->ct[0].codetype = DELZW_CODETYPE_STOP;
1000 dc->ct_code_count++;
1003 for(i=0; i<256; i++) {
1004 delzw_hashed_add_root_code_to_dict(dc, (DELZW_UINT8)i);
1008 done:
1012 static int delzw_have_enough_output(delzwctx *dc)
1014 if(dc->output_len_known) {
1015 if(dc->uncmpr_nbytes_written + (DELZW_OFF_T)dc->outbuf_nbytes_used >=
1016 dc->output_expected_len)
1018 return 1;
1021 return 0;
1024 static void delzw_process_byte(delzwctx *dc, DELZW_UINT8 b)
1026 if(dc->state==DELZW_STATE_INIT) {
1027 delzw_on_decompression_start(dc);
1028 dc->state = DELZW_STATE_READING_HEADER;
1031 if(dc->state==DELZW_STATE_READING_HEADER) {
1032 if(dc->total_nbytes_processed < (DELZW_OFF_T)dc->header_size) {
1033 dc->header_buf[dc->total_nbytes_processed] = b;
1034 return;
1037 // (This is the first byte after the header.)
1038 delzw_on_codes_start(dc);
1039 dc->state = DELZW_STATE_READING_CODES;
1042 if(dc->state==DELZW_STATE_READING_CODES) {
1043 if(dc->nbytes_left_to_skip>0) {
1044 dc->nbytes_left_to_skip--;
1045 return;
1048 delzw_add_byte_to_bitbuf(dc, b);
1050 while(1) {
1051 DELZW_CODE code;
1053 if(dc->errcode) break;
1054 if(dc->bitreader_nbits_in_buf < dc->curr_codesize) {
1055 break;
1058 code = delzw_get_code(dc, dc->curr_codesize);
1059 dc->ncodes_in_this_bitgroup++;
1060 delzw_process_code(dc, code);
1062 if(dc->state != DELZW_STATE_READING_CODES) {
1063 break;
1065 if(dc->nbytes_left_to_skip>0) {
1066 break;
1072 static void delzw_addbuf(delzwctx *dc, const DELZW_UINT8 *buf, size_t buf_len)
1074 size_t i;
1076 if(dc->debug_level>=3) {
1077 delzw_debugmsg(dc, 3, "received %d bytes of input", (int)buf_len);
1080 for(i=0; i<buf_len; i++) {
1081 if(dc->errcode) break;
1082 if(dc->state == DELZW_STATE_FINISHED) break;
1083 if(delzw_have_enough_output(dc)) {
1084 delzw_stop(dc, "sufficient output");
1085 break;
1087 delzw_process_byte(dc, buf[i]);
1088 dc->total_nbytes_processed++;
1092 static void delzw_finish(delzwctx *dc)
1094 const char *reason;
1096 delzw_flush(dc);
1098 if(dc->output_len_known && (dc->uncmpr_nbytes_decoded==dc->output_expected_len)) {
1099 reason = "end of input and sufficient output";
1101 else {
1102 reason = "end of input";
1105 delzw_stop(dc, reason);