build: update gnulib submodule to latest
[gzip.git] / unlzw.c
blob63f941c6213533bf3140ce642bc16d5bfe3e02a9
1 /* unlzw.c -- decompress files in LZW format.
2 * The code in this file is directly derived from the public domain 'compress'
3 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
4 * Ken Turkowski, Dave Mack and Peter Jannesen.
6 * This is a temporary version which will be rewritten in some future version
7 * to accommodate in-memory decompression.
8 */
10 #include <config.h>
11 #include "tailor.h"
13 #include <unistd.h>
14 #include <fcntl.h>
16 #include "gzip.h"
17 #include "lzw.h"
19 typedef unsigned char char_type;
20 typedef long code_int;
21 typedef unsigned long count_int;
22 typedef unsigned short count_short;
23 typedef unsigned long cmp_code_int;
25 #define MAXCODE(n) (1L << (n))
27 #ifndef REGISTERS
28 # define REGISTERS 2
29 #endif
30 #define REG1
31 #define REG2
32 #define REG3
33 #define REG4
34 #define REG5
35 #define REG6
36 #define REG7
37 #define REG8
38 #define REG9
39 #define REG10
40 #define REG11
41 #define REG12
42 #define REG13
43 #define REG14
44 #define REG15
45 #define REG16
46 #if REGISTERS >= 1
47 # undef REG1
48 # define REG1 register
49 #endif
50 #if REGISTERS >= 2
51 # undef REG2
52 # define REG2 register
53 #endif
54 #if REGISTERS >= 3
55 # undef REG3
56 # define REG3 register
57 #endif
58 #if REGISTERS >= 4
59 # undef REG4
60 # define REG4 register
61 #endif
62 #if REGISTERS >= 5
63 # undef REG5
64 # define REG5 register
65 #endif
66 #if REGISTERS >= 6
67 # undef REG6
68 # define REG6 register
69 #endif
70 #if REGISTERS >= 7
71 # undef REG7
72 # define REG7 register
73 #endif
74 #if REGISTERS >= 8
75 # undef REG8
76 # define REG8 register
77 #endif
78 #if REGISTERS >= 9
79 # undef REG9
80 # define REG9 register
81 #endif
82 #if REGISTERS >= 10
83 # undef REG10
84 # define REG10 register
85 #endif
86 #if REGISTERS >= 11
87 # undef REG11
88 # define REG11 register
89 #endif
90 #if REGISTERS >= 12
91 # undef REG12
92 # define REG12 register
93 #endif
94 #if REGISTERS >= 13
95 # undef REG13
96 # define REG13 register
97 #endif
98 #if REGISTERS >= 14
99 # undef REG14
100 # define REG14 register
101 #endif
102 #if REGISTERS >= 15
103 # undef REG15
104 # define REG15 register
105 #endif
106 #if REGISTERS >= 16
107 # undef REG16
108 # define REG16 register
109 #endif
111 #ifndef BYTEORDER
112 # define BYTEORDER 0000
113 #endif
115 #ifndef NOALLIGN
116 # define NOALLIGN 0
117 #endif
120 union bytes {
121 long word;
122 struct {
123 #if BYTEORDER == 4321
124 char_type b1;
125 char_type b2;
126 char_type b3;
127 char_type b4;
128 #else
129 #if BYTEORDER == 1234
130 char_type b4;
131 char_type b3;
132 char_type b2;
133 char_type b1;
134 #else
135 # undef BYTEORDER
136 int dummy;
137 #endif
138 #endif
139 } bytes;
142 #if BYTEORDER == 4321 && NOALLIGN == 1
143 # define input(b,o,c,n,m){ \
144 (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
145 (o) += (n); \
147 #else
148 # define input(b,o,c,n,m){ \
149 REG1 char_type *p = &(b)[(o)>>3]; \
150 (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
151 ((long)(p[2])<<16))>>((o)&0x7))&(m); \
152 (o) += (n); \
154 #endif
156 #ifndef MAXSEG_64K
157 /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
158 # define tab_prefixof(i) tab_prefix[i]
159 # define clear_tab_prefixof() memzero(tab_prefix, 256);
160 #else
161 /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
162 /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
163 ush *tab_prefix[2];
164 # define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
165 # define clear_tab_prefixof() \
166 memzero(tab_prefix0, 128), \
167 memzero(tab_prefix1, 128);
168 #endif
169 #define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
170 #define tab_suffixof(i) tab_suffix[i]
172 int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
174 /* ============================================================================
175 * Decompress in to out. This routine adapts to the codes in the
176 * file building the "string" table on-the-fly; requiring no table to
177 * be stored in the compressed file.
178 * IN assertions: the buffer inbuf contains already the beginning of
179 * the compressed data, from offsets iptr to insize-1 included.
180 * The magic header has already been checked and skipped.
181 * bytes_in and bytes_out have been initialized.
183 int unlzw(in, out)
184 int in, out; /* input and output file descriptors */
186 REG2 char_type *stackp;
187 REG3 code_int code;
188 REG4 int finchar;
189 REG5 code_int oldcode;
190 REG6 code_int incode;
191 REG7 long inbits;
192 REG8 long posbits;
193 REG9 int outpos;
194 /* REG10 int insize; (global) */
195 REG11 unsigned bitmask;
196 REG12 code_int free_ent;
197 REG13 code_int maxcode;
198 REG14 code_int maxmaxcode;
199 REG15 int n_bits;
200 REG16 int rsize;
202 #ifdef MAXSEG_64K
203 tab_prefix[0] = tab_prefix0;
204 tab_prefix[1] = tab_prefix1;
205 #endif
206 maxbits = get_byte();
207 block_mode = maxbits & BLOCK_MODE;
208 if ((maxbits & LZW_RESERVED) != 0) {
209 WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n",
210 program_name, ifname, maxbits & LZW_RESERVED));
212 maxbits &= BIT_MASK;
213 maxmaxcode = MAXCODE(maxbits);
215 if (maxbits > BITS) {
216 fprintf(stderr,
217 "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
218 program_name, ifname, maxbits, BITS);
219 exit_code = ERROR;
220 return ERROR;
222 rsize = insize;
223 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
224 bitmask = (1<<n_bits)-1;
225 oldcode = -1;
226 finchar = 0;
227 outpos = 0;
228 posbits = inptr<<3;
230 free_ent = ((block_mode) ? FIRST : 256);
232 clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */
234 for (code = 255 ; code >= 0 ; --code) {
235 tab_suffixof(code) = (char_type)code;
237 do {
238 REG1 int i;
239 int e;
240 int o;
242 resetbuf:
243 o = posbits >> 3;
244 e = o <= insize ? insize - o : 0;
246 for (i = 0 ; i < e ; ++i) {
247 inbuf[i] = inbuf[i+o];
249 insize = e;
250 posbits = 0;
252 if (insize < INBUF_EXTRA) {
253 rsize = read_buffer (in, (char *) inbuf + insize, INBUFSIZ);
254 if (rsize == -1) {
255 read_error();
257 insize += rsize;
258 bytes_in += (off_t)rsize;
260 inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 :
261 ((long)insize<<3)-(n_bits-1));
263 while (inbits > posbits) {
264 if (free_ent > maxcode) {
265 posbits = ((posbits-1) +
266 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
267 ++n_bits;
268 if (n_bits == maxbits) {
269 maxcode = maxmaxcode;
270 } else {
271 maxcode = MAXCODE(n_bits)-1;
273 bitmask = (1<<n_bits)-1;
274 goto resetbuf;
276 input(inbuf,posbits,code,n_bits,bitmask);
277 Tracev((stderr, "%d ", code));
279 if (oldcode == -1) {
280 if (256 <= code)
281 gzip_error ("corrupt input.");
282 outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code));
283 continue;
285 if (code == CLEAR && block_mode) {
286 clear_tab_prefixof();
287 free_ent = FIRST - 1;
288 posbits = ((posbits-1) +
289 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
290 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
291 bitmask = (1<<n_bits)-1;
292 goto resetbuf;
294 incode = code;
295 stackp = de_stack;
297 if (code >= free_ent) { /* Special case for KwKwK string. */
298 if (code > free_ent) {
299 #ifdef DEBUG
300 char_type *p;
302 posbits -= n_bits;
303 p = &inbuf[posbits>>3];
304 fprintf(stderr,
305 "code:%ld free_ent:%ld n_bits:%d insize:%u\n",
306 code, free_ent, n_bits, insize);
307 fprintf(stderr,
308 "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
309 posbits, p[-1],p[0],p[1],p[2],p[3]);
310 #endif
311 if (!test && outpos > 0) {
312 write_buf(out, (char*)outbuf, outpos);
313 bytes_out += (off_t)outpos;
315 gzip_error (to_stdout
316 ? "corrupt input."
317 : "corrupt input. Use zcat to recover some data.");
319 *--stackp = (char_type)finchar;
320 code = oldcode;
323 while ((cmp_code_int)code >= (cmp_code_int)256) {
324 /* Generate output characters in reverse order */
325 *--stackp = tab_suffixof(code);
326 code = tab_prefixof(code);
328 *--stackp = (char_type)(finchar = tab_suffixof(code));
330 /* And put them out in forward order */
332 REG1 int i;
334 if (outpos+(i = (de_stack-stackp)) >= OUTBUFSIZ) {
335 do {
336 if (i > OUTBUFSIZ-outpos) i = OUTBUFSIZ-outpos;
338 if (i > 0) {
339 memcpy(outbuf+outpos, stackp, i);
340 outpos += i;
342 if (outpos >= OUTBUFSIZ) {
343 if (!test) {
344 write_buf(out, (char*)outbuf, outpos);
345 bytes_out += (off_t)outpos;
347 outpos = 0;
349 stackp+= i;
350 } while ((i = (de_stack-stackp)) > 0);
351 } else {
352 memcpy(outbuf+outpos, stackp, i);
353 outpos += i;
357 if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */
359 tab_prefixof(code) = (unsigned short)oldcode;
360 tab_suffixof(code) = (char_type)finchar;
361 free_ent = code+1;
363 oldcode = incode; /* Remember previous code. */
365 } while (rsize != 0);
367 if (!test && outpos > 0) {
368 write_buf(out, (char*)outbuf, outpos);
369 bytes_out += (off_t)outpos;
371 return OK;