1 /* decomp16: decompress 16bit compressed files on a 16bit Intel processor
3 * Version 1.3 of 25 Mar 92.
5 * This was written by John N. White on 6/30/91 and is Public Domain.
6 * Patched to run under news by Will Rose, Feb 92.
7 * J N White's (earlier) patches added by Will Rose, 20 Feb 92.
8 * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
9 * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
11 * decomp16 can use as as little as 512 bytes of stack; since it forks
12 * four additional copies, it's probably worth using minimum stack rather
13 * than the 8192 byte Minix default. To reduce memory still further,
14 * change BUFSZ below to 256; it is currently set to 1024 for speed. The
15 * minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
17 * This program acts as a filter:
18 * decomp16 < compressed_file > decompressed_file
19 * The arguments -0 to -4 run only the corresponding pass.
21 * decomp16 -4 < compressed_file > 3;
22 * decomp16 -3 < 3 > 2;
23 * decomp16 -2 < 2 > 1;
24 * decomp16 -1 < 1 > 0;
25 * decomp16 -0 < 0 > decompressed_file
26 * will also work, as will connecting the passes by explicit pipes if
27 * there is enough memory to do so. File name arguments can also be
28 * given directly on the command line.
30 * Compress uses a modified LZW compression algorithm. A compressed file
31 * is a set of indices into a dictionary of strings. The number of bits
32 * used to store each index depends on the number of entries currently
33 * in the dictionary. If there are between 257 and 512 entries, 9 bits
34 * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
35 * consists of 0-255 (which are the corresponding chars) and 256 (which
36 * is a special CLEAR code). As each index in the compressed file is read,
37 * a new entry is added to the dictionary consisting of the current string
38 * with the first char of the next string appended. When the dictionary
39 * is full, no further entries are added. If a CLEAR code is received,
40 * the dictionary will be completely reset. The first two bytes of the
41 * compressed file are a magic number, and the third byte indicates the
42 * maximum number of bits, and whether the CLEAR code is used (older versions
43 * of compress didn't have CLEAR).
45 * This program works by forking four more copies of itself. The five
46 * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
47 * to supply its input, which in turn forks and reads from copy 2, etc.
48 * This sequence is used so that when the program exits, all writes
49 * are completed and a program that has exec'd uncompress (such as news)
50 * can immediately use the uncompressed data when the wait() call returns.
52 * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
53 * program will run as that copy, reading from stdin and writing to stdout.
54 * This allows decompressing with very limited RAM because only one of the
55 * five passes is in memory at a time.
57 * The compressed data is a series of string indices (and a header at
58 * the beginning and an occasional CLEAR code). As these indices flow
59 * through the pipes, each program decodes the ones it can. The result
60 * of each decoding will be indices that the following programs can handle.
62 * Each of the 65536 strings in the dictionary is an earlier string with
63 * some character added to the end (except for the the 256 predefined
64 * single char strings). When new entries are made to the dictionary,
65 * the string index part will just be the last index to pass through.
66 * But the char part is the first char of the next string, which isn't
67 * known yet. So the string can be stored as a pair of indices. When
68 * this string is specified, it is converted to this pair of indices,
69 * which are flagged so that the first will be decoded in full while
70 * the second will be decoded to its first char. The dictionary takes
71 * 256k to store (64k strings of 2 indices of 2 bytes each). This is
72 * too big for a 64k data segment, so it is divided into 5 equal parts.
73 * Copy 4 of the program maintains the high part and copy 0 holds the
77 #include <sys/types.h>
82 #define BUFSZ 1024 /* size of i/o buffers */
83 #define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
84 #define DICTSZ (unsigned)13056 /* # of local dictionary entries */
85 #define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
89 int fdin
, fdout
, fderr
; /* input, output, and error file descriptors */
90 int ibufstart
, obufind
, ibufend
;/* i/o buffer indices */
91 int ipbufind
= BUFSZ_2
; /* pipe buffer indices */
93 int pnum
= -1; /* ID of this copy */
94 unsigned short ipbuf
[BUFSZ_2
]; /* for buffering input */
95 unsigned short opbuf
[BUFSZ_2
]; /* for buffering output */
96 unsigned char *ibuf
= (unsigned char *) ipbuf
;
97 unsigned char *obuf
= (unsigned char *) opbuf
;
99 unsigned short dindex
[DICTSZ
]; /* dictionary: index to substring */
100 unsigned short dchar
[DICTSZ
]; /* dictionary: last char of string */
101 unsigned iindex
, tindex
, tindex2
; /* holds index being processed */
102 unsigned base
; /* where in global dict local dict starts */
104 unsigned locend
; /* where in global dict local dict ends */
105 unsigned curend
= 256; /* current end of global dict */
106 unsigned maxend
; /* max end of global dict */
107 int dcharp
; /* ptr to dchar that needs next index entry */
108 int curbits
; /* number of bits for getbits() to read */
109 int maxbits
; /* limit on number of bits */
110 int clearflg
; /* if set, allow CLEAR */
111 int inmod
; /* mod 8 for getbits() */
113 _PROTOTYPE(int main
, (int argc
, char **argv
));
114 _PROTOTYPE(void ffork
, (void));
115 _PROTOTYPE(void die
, (char *s
));
116 _PROTOTYPE(void myputc
, (unsigned c
));
117 _PROTOTYPE(unsigned mygetc
, (void));
118 _PROTOTYPE(void getbits
, (void));
119 _PROTOTYPE(void getpipe
, (void));
120 _PROTOTYPE(void putpipe
, (unsigned u
, int flag
));
130 /* Find the program name */
132 while (argv
[0][j
] != '\0') j
++;
133 len
= (unsigned int) j
;
135 if (argv
[0][j
] == '/') break;
136 if (argv
[0][j
] == '/') j
++;
140 /* Sort out the flags */
141 for (k
= 1; k
< argc
; k
++) {
142 if (argv
[k
][0] == '-') {
145 case '0': /* pass numbers */
149 case '4': pnum
= c
- '0'; break;
150 case 'd': /* used by news */
153 (void) write(1, "Usage: ", 7);
154 (void) write(1, cp
, len
);
155 (void) write(1, " [-#] [in] [out]\n", 17);
160 /* Once it's checked, lose it anyway */
161 for (j
= k
; j
< argc
; j
++) argv
[j
] = argv
[j
+ 1];
167 /* Default i/o settings */
172 /* Try to open specific files and connect them to stdin/stdout */
174 if ((fdtmp
= open(argv
[1], 0)) == -1) die("input open failed");
176 if ((fdin
= dup(fdtmp
)) == -1) die("input dup failed\n");
180 (void) unlink(argv
[2]);
181 if ((fdtmp
= creat(argv
[2], 0666)) == -1) die("output creat failed");
183 if ((fdout
= dup(fdtmp
)) == -1) die("output dup failed\n");
187 /* Sort out type of compression */
188 if (pnum
== -1 || pnum
== 4) {/* if this is pass 4 */
189 /* Check header of compressed file */
190 if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */
191 die("not a compressed file\n");
192 iindex
= mygetc(); /* get compression style */
194 getpipe(); /* get compression style */
196 maxbits
= iindex
& 0x1F;
197 clearflg
= ((iindex
& 0x80) != 0) ? TRUE
: FALSE
;
198 if (maxbits
< 9 || maxbits
> 16) /* check for valid maxbits */
199 die("can't decompress\n");
200 if (pnum
!= -1 && pnum
!= 0)
201 putpipe(iindex
, 0); /* pass style to next copy */
203 /* Fork off an ancestor if necessary - ffork() increments pnum */
206 if (pnum
== 0) ffork();
207 if (pnum
== 1) ffork();
208 if (pnum
== 2) ffork();
209 if (pnum
== 3) ffork();
212 /* Preliminary inits. Note: end/maxend/curend are highest, not
214 base
= DICTSZ
* pnum
+ 256;
215 locend
= base
+ DICTSZ
- 1;
216 maxend
= (1 << maxbits
) - 1;
217 if (maxend
> locend
) maxend
= locend
;
220 curend
= 255 + (clearflg
? 1 : 0); /* init dictionary */
221 dcharp
= DICTSZ
; /* flag for none needed */
222 curbits
= 9; /* init curbits (for copy 0) */
223 while (TRUE
) { /* for each index in input */
224 if (pnum
== 4) {/* get index using getbits() */
225 if (curbits
< maxbits
&& (1 << curbits
) <= curend
) {
226 /* Curbits needs to be increased */
227 /* Due to uglyness in compress, these
228 * indices in the compressed file are
230 while (inmod
) getbits();
235 getpipe(); /* get next index */
237 if (iindex
== 256 && clearflg
) {
238 if (pnum
> 0) putpipe(iindex
, 0);
239 /* Due to uglyness in compress, these indices
240 * in the compressed file are wasted */
241 while (inmod
) getbits();
245 /* Convert the index part, ignoring spawned chars */
246 while (tindex
>= base
) tindex
= dindex
[tindex
- base
];
247 /* Pass on the index */
249 /* Save the char of the last added entry, if any */
250 if (dcharp
< DICTSZ
) dchar
[dcharp
++] = tindex
;
251 if (curend
< maxend
&& ++curend
> (base
- 1))
252 dindex
[dcharp
= (curend
- base
)] = iindex
;
254 /* Do spawned chars. They are naturally produced in
255 * the wrong order. To get them in the right order
256 * without using memory, a series of passes,
257 * progressively less deep, are used */
259 while ((tindex
= iindex
) >= tbase
) {/* for each char to spawn*/
260 while ((tindex2
= dindex
[tindex
- base
]) >= tbase
)
261 tindex
= tindex2
; /* scan to desired char */
262 putpipe(dchar
[tindex
-base
], 1); /* put it to the pipe*/
264 if (tbase
== 0) break; /* it's a wrap */
273 * Fork off the previous pass - the parent reads from the child.
279 if (pipe(pfd
) == -1) die("pipe() error\n");
280 if ((j
= fork()) == -1) die("fork() error\n");
281 if (j
== 0) { /* this is the child */
282 if (close(1) == -1) die("close(1) error\n");
283 if (dup(pfd
[1]) != 1) die("dup(1) error\n");
284 (void) close(pfd
[0]);
286 } else { /* this is the parent */
287 if (close(0) == -1) die("close(0) error\n");
288 if (dup(pfd
[0]) != 0) die("dup(0) error\n");
289 (void) close(pfd
[1]);
296 * If s is a message, write it to stderr. Flush buffers if needed. Then exit.
301 /* Flush stdout buffer if needed */
303 if (write(fdout
, (char *) obuf
, (unsigned) obufind
) != obufind
)
304 s
= "bad stdout write\n";
308 /* Flush pipe if needed */
310 putpipe(EOF_INDEX
, 0);
312 /* Write any error message */
313 if (s
!= (char *) NULL
) {
314 while (*s
) (void) write(fderr
, s
++, 1);
316 exit((s
== (char *) NULL
) ? 0 : 1);
322 * Put a char to stdout.
328 if (obufind
>= BUFSZ
) { /* if stdout buffer full */
329 if (write(fdout
, (char *) obuf
, BUFSZ
) != BUFSZ
) /* flush to stdout */
330 die("bad stdout write\n");
338 * Get a char from stdin. If EOF, then die() and exit.
342 if (ibufstart
>= ibufend
) { /* if stdin buffer empty */
343 if ((ibufend
= read(fdin
, (char *) ibuf
, BUFSZ
)) <= 0)
344 die((char *) NULL
); /* if EOF, do normal exit */
347 return(ibuf
[ibufstart
++] & 0xff);
353 * Put curbits bits into index from stdin. Note: only copy 4 uses this.
354 * The bits within a byte are in the correct order. But when the bits
355 * cross a byte boundry, the lowest bits will be in the higher part of
356 * the current byte, and the higher bits will be in the lower part of
362 static unsigned curbyte
; /* byte having bits extracted from it */
363 static int left
; /* how many bits are left in curbyte */
365 inmod
= (inmod
+ 1) & 7; /* count input mod 8 */
368 if (curbits
- have
> 8) {
369 iindex
|= mygetc() << have
;
372 iindex
|= ((curbyte
= mygetc()) << have
) & ~((unsigned) 0xFFFF << curbits
);
373 curbyte
>>= curbits
- have
;
374 left
= 8 - (curbits
- have
);
380 * Get an index from the pipeline. If flagged firstonly, handle it here.
385 static int n
= 0; /* number of flags in flags */
387 while (TRUE
) { /* while index with firstonly flag set */
389 if (ipbufind
>= BUFSZ_2
) { /* if pipe input buffer
391 if (read(fdin
, (char *) ipbuf
, BUFSZ
) != BUFSZ
)
392 die("bad pipe read\n");
395 flags
= ipbuf
[ipbufind
++];
398 iindex
= ipbuf
[ipbufind
++];
400 die((iindex
== EOF_INDEX
) ? (char *) NULL
: "invalid data\n");
403 /* Assume flags < 0 if highest remaining flag is set */
404 if (flags
< 0) { /* if firstonly flag for index is not set */
405 while (iindex
>= base
) iindex
= dindex
[iindex
- base
];
408 return; /* return with valid non-firstonly index */
415 * put an index into the pipeline.
417 void putpipe(u
, flag
)
421 static unsigned short flags
, *flagp
;
422 static int n
= 0; /* number of flags in flags */
424 if (pnum
== 0) { /* if we should write to stdout */
425 myputc(u
); /* index will be the char value */
428 if (n
== 0) { /* if we need to reserve a flag entry */
430 flagp
= opbuf
+ opbufind
;
433 opbuf
[opbufind
++] = u
; /* add index to buffer */
434 flags
= (flags
<< 1) | flag
; /* add firstonly flag */
435 if (++n
>= 15) { /* if block of 15 indices */
437 *flagp
= flags
; /* insert flags entry */
438 if (opbufind
>= BUFSZ_2
) { /* if pipe out buffer full */
440 if (write(fdout
, (char *) opbuf
, BUFSZ
) != BUFSZ
)
441 die("bad pipe write\n");