gzip: drop mentions of Amiga, VMS
[gzip.git] / unlzw.c
blobd5b01f598b13915460762c47675acef4504f4bf1
1 /* unlzw.c -- decompress files in LZW format.
2 * The code in this file is directly derived from the public domain 'compress'
3 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
4 * Ken Turkowski, Dave Mack and Peter Jannesen.
6 * This is a temporary version which will be rewritten in some future version
7 * to accommodate in-memory decompression.
8 */
10 #include <config.h>
12 #include <unistd.h>
13 #include <fcntl.h>
15 #include "tailor.h"
16 #include "gzip.h"
17 #include "lzw.h"
19 typedef unsigned char char_type;
20 typedef long code_int;
21 typedef unsigned long count_int;
22 typedef unsigned short count_short;
23 typedef unsigned long cmp_code_int;
25 #define MAXCODE(n) (1L << (n))
27 #ifndef BYTEORDER
28 # define BYTEORDER 0000
29 #endif
31 #ifndef NOALLIGN
32 # define NOALLIGN 0
33 #endif
36 union bytes {
37 long word;
38 struct {
39 #if BYTEORDER == 4321
40 char_type b1;
41 char_type b2;
42 char_type b3;
43 char_type b4;
44 #else
45 #if BYTEORDER == 1234
46 char_type b4;
47 char_type b3;
48 char_type b2;
49 char_type b1;
50 #else
51 # undef BYTEORDER
52 int dummy;
53 #endif
54 #endif
55 } bytes;
58 #if BYTEORDER == 4321 && NOALLIGN == 1
59 # define input(b,o,c,n,m){ \
60 (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
61 (o) += (n); \
63 #else
64 # define input(b,o,c,n,m){ \
65 char_type *p = &(b)[(o)>>3]; \
66 (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
67 ((long)(p[2])<<16))>>((o)&0x7))&(m); \
68 (o) += (n); \
70 #endif
72 #ifndef MAXSEG_64K
73 /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
74 # define tab_prefixof(i) tab_prefix[i]
75 # define clear_tab_prefixof() memzero(tab_prefix, 256);
76 #else
77 /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
78 /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
79 ush *tab_prefix[2];
80 # define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
81 # define clear_tab_prefixof() \
82 memzero(tab_prefix0, 128), \
83 memzero(tab_prefix1, 128);
84 #endif
85 #define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
86 #define tab_suffixof(i) tab_suffix[i]
88 int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
90 /* ============================================================================
91 * Decompress in to out. This routine adapts to the codes in the
92 * file building the "string" table on-the-fly; requiring no table to
93 * be stored in the compressed file.
94 * IN assertions: the buffer inbuf contains already the beginning of
95 * the compressed data, from offsets iptr to insize-1 included.
96 * The magic header has already been checked and skipped.
97 * bytes_in and bytes_out have been initialized.
99 int unlzw(in, out)
100 int in, out; /* input and output file descriptors */
102 char_type *stackp;
103 code_int code;
104 int finchar;
105 code_int oldcode;
106 code_int incode;
107 long inbits;
108 long posbits;
109 int outpos;
110 /* int insize; (global) */
111 unsigned bitmask;
112 code_int free_ent;
113 code_int maxcode;
114 code_int maxmaxcode;
115 int n_bits;
116 int rsize;
118 #ifdef MAXSEG_64K
119 tab_prefix[0] = tab_prefix0;
120 tab_prefix[1] = tab_prefix1;
121 #endif
122 maxbits = get_byte();
123 block_mode = maxbits & BLOCK_MODE;
124 if ((maxbits & LZW_RESERVED) != 0) {
125 WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n",
126 program_name, ifname, (unsigned int) maxbits & LZW_RESERVED));
128 maxbits &= BIT_MASK;
129 maxmaxcode = MAXCODE(maxbits);
131 if (maxbits > BITS) {
132 fprintf(stderr,
133 "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
134 program_name, ifname, maxbits, BITS);
135 exit_code = ERROR;
136 return ERROR;
138 rsize = insize;
139 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
140 bitmask = (1<<n_bits)-1;
141 oldcode = -1;
142 finchar = 0;
143 outpos = 0;
144 posbits = inptr<<3;
146 free_ent = ((block_mode) ? FIRST : 256);
148 clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */
150 for (code = 255 ; code >= 0 ; --code) {
151 tab_suffixof(code) = (char_type)code;
153 do {
154 int i;
155 int e;
156 int o;
158 resetbuf:
159 o = posbits >> 3;
160 e = o <= insize ? insize - o : 0;
162 for (i = 0 ; i < e ; ++i) {
163 inbuf[i] = inbuf[i+o];
165 insize = e;
166 posbits = 0;
168 if (insize < INBUF_EXTRA) {
169 rsize = read_buffer (in, (char *) inbuf + insize, INBUFSIZ);
170 if (rsize == -1) {
171 read_error();
173 insize += rsize;
174 bytes_in += (off_t)rsize;
176 inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 :
177 ((long)insize<<3)-(n_bits-1));
179 while (inbits > posbits) {
180 if (free_ent > maxcode) {
181 posbits = ((posbits-1) +
182 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
183 ++n_bits;
184 if (n_bits == maxbits) {
185 maxcode = maxmaxcode;
186 } else {
187 maxcode = MAXCODE(n_bits)-1;
189 bitmask = (1<<n_bits)-1;
190 goto resetbuf;
192 input(inbuf,posbits,code,n_bits,bitmask);
193 Tracev((stderr, "%ld ", code));
195 if (oldcode == -1) {
196 if (256 <= code)
197 gzip_error ("corrupt input.");
198 outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code));
199 continue;
201 if (code == CLEAR && block_mode) {
202 clear_tab_prefixof();
203 free_ent = FIRST - 1;
204 posbits = ((posbits-1) +
205 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
206 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
207 bitmask = (1<<n_bits)-1;
208 goto resetbuf;
210 incode = code;
211 stackp = de_stack;
213 if (code >= free_ent) { /* Special case for KwKwK string. */
214 if (code > free_ent) {
215 #ifdef DEBUG
216 char_type *p;
218 posbits -= n_bits;
219 p = &inbuf[posbits>>3];
220 fprintf(stderr,
221 "code:%ld free_ent:%ld n_bits:%d insize:%u\n",
222 code, free_ent, n_bits, insize);
223 fprintf(stderr,
224 "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
225 posbits, p[-1],p[0],p[1],p[2],p[3]);
226 #endif
227 if (!test && outpos > 0) {
228 write_buf(out, (char*)outbuf, outpos);
229 bytes_out += (off_t)outpos;
231 gzip_error (to_stdout
232 ? "corrupt input."
233 : "corrupt input. Use zcat to recover some data.");
235 *--stackp = (char_type)finchar;
236 code = oldcode;
239 while ((cmp_code_int)code >= (cmp_code_int)256) {
240 /* Generate output characters in reverse order */
241 *--stackp = tab_suffixof(code);
242 code = tab_prefixof(code);
244 *--stackp = (char_type)(finchar = tab_suffixof(code));
246 /* And put them out in forward order */
248 int i;
250 if (outpos+(i = (de_stack-stackp)) >= OUTBUFSIZ) {
251 do {
252 if (i > OUTBUFSIZ-outpos) i = OUTBUFSIZ-outpos;
254 if (i > 0) {
255 memcpy(outbuf+outpos, stackp, i);
256 outpos += i;
258 if (outpos >= OUTBUFSIZ) {
259 if (!test) {
260 write_buf(out, (char*)outbuf, outpos);
261 bytes_out += (off_t)outpos;
263 outpos = 0;
265 stackp+= i;
266 } while ((i = (de_stack-stackp)) > 0);
267 } else {
268 memcpy(outbuf+outpos, stackp, i);
269 outpos += i;
273 if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */
275 tab_prefixof(code) = (unsigned short)oldcode;
276 tab_suffixof(code) = (char_type)finchar;
277 free_ent = code+1;
279 oldcode = incode; /* Remember previous code. */
281 } while (rsize != 0);
283 if (!test && outpos > 0) {
284 write_buf(out, (char*)outbuf, outpos);
285 bytes_out += (off_t)outpos;
287 return OK;