tools/llvm: Do not build with symbols
[minix3.git] / minix / commands / decomp16 / decomp16.c
blob1cb349dc4a89130630587bd39ee6dafe2113a05d
1 /* decomp16: decompress 16bit compressed files on a 16bit Intel processor
3 * Version 1.3 of 25 Mar 92.
5 * This was written by John N. White on 6/30/91 and is Public Domain.
6 * Patched to run under news by Will Rose, Feb 92.
7 * J N White's (earlier) patches added by Will Rose, 20 Feb 92.
8 * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
9 * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
11 * decomp16 can use as as little as 512 bytes of stack; since it forks
12 * four additional copies, it's probably worth using minimum stack rather
13 * than the 8192 byte Minix default. To reduce memory still further,
14 * change BUFSZ below to 256; it is currently set to 1024 for speed. The
15 * minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
17 * This program acts as a filter:
18 * decomp16 < compressed_file > decompressed_file
19 * The arguments -0 to -4 run only the corresponding pass.
20 * Thus:
21 * decomp16 -4 < compressed_file > 3;
22 * decomp16 -3 < 3 > 2;
23 * decomp16 -2 < 2 > 1;
24 * decomp16 -1 < 1 > 0;
25 * decomp16 -0 < 0 > decompressed_file
26 * will also work, as will connecting the passes by explicit pipes if
27 * there is enough memory to do so. File name arguments can also be
28 * given directly on the command line.
30 * Compress uses a modified LZW compression algorithm. A compressed file
31 * is a set of indices into a dictionary of strings. The number of bits
32 * used to store each index depends on the number of entries currently
33 * in the dictionary. If there are between 257 and 512 entries, 9 bits
34 * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
35 * consists of 0-255 (which are the corresponding chars) and 256 (which
36 * is a special CLEAR code). As each index in the compressed file is read,
37 * a new entry is added to the dictionary consisting of the current string
38 * with the first char of the next string appended. When the dictionary
39 * is full, no further entries are added. If a CLEAR code is received,
40 * the dictionary will be completely reset. The first two bytes of the
41 * compressed file are a magic number, and the third byte indicates the
42 * maximum number of bits, and whether the CLEAR code is used (older versions
43 * of compress didn't have CLEAR).
45 * This program works by forking four more copies of itself. The five
46 * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
47 * to supply its input, which in turn forks and reads from copy 2, etc.
48 * This sequence is used so that when the program exits, all writes
49 * are completed and a program that has exec'd uncompress (such as news)
50 * can immediately use the uncompressed data when the wait() call returns.
52 * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
53 * program will run as that copy, reading from stdin and writing to stdout.
54 * This allows decompressing with very limited RAM because only one of the
55 * five passes is in memory at a time.
57 * The compressed data is a series of string indices (and a header at
58 * the beginning and an occasional CLEAR code). As these indices flow
59 * through the pipes, each program decodes the ones it can. The result
60 * of each decoding will be indices that the following programs can handle.
62 * Each of the 65536 strings in the dictionary is an earlier string with
63 * some character added to the end (except for the the 256 predefined
64 * single char strings). When new entries are made to the dictionary,
65 * the string index part will just be the last index to pass through.
66 * But the char part is the first char of the next string, which isn't
67 * known yet. So the string can be stored as a pair of indices. When
68 * this string is specified, it is converted to this pair of indices,
69 * which are flagged so that the first will be decoded in full while
70 * the second will be decoded to its first char. The dictionary takes
71 * 256k to store (64k strings of 2 indices of 2 bytes each). This is
72 * too big for a 64k data segment, so it is divided into 5 equal parts.
73 * Copy 4 of the program maintains the high part and copy 0 holds the
74 * low part.
77 #include <sys/types.h>
78 #include <fcntl.h>
79 #include <stdlib.h>
80 #include <unistd.h>
82 #define BUFSZ 1024 /* size of i/o buffers */
83 #define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */
84 #define DICTSZ (unsigned)13056 /* # of local dictionary entries */
85 #define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */
86 #define FALSE 0
87 #define TRUE ~FALSE
89 int fdin, fdout, fderr; /* input, output, and error file descriptors */
90 int ibufstart, obufind, ibufend;/* i/o buffer indices */
91 int ipbufind = BUFSZ_2; /* pipe buffer indices */
92 int opbufind = 0;
93 int pnum = -1; /* ID of this copy */
94 unsigned short ipbuf[BUFSZ_2]; /* for buffering input */
95 unsigned short opbuf[BUFSZ_2]; /* for buffering output */
96 unsigned char *ibuf = (unsigned char *) ipbuf;
97 unsigned char *obuf = (unsigned char *) opbuf;
99 unsigned short dindex[DICTSZ]; /* dictionary: index to substring */
100 unsigned short dchar[DICTSZ]; /* dictionary: last char of string */
101 unsigned iindex, tindex, tindex2; /* holds index being processed */
102 unsigned base; /* where in global dict local dict starts */
103 unsigned tbase;
104 unsigned locend; /* where in global dict local dict ends */
105 unsigned curend = 256; /* current end of global dict */
106 unsigned maxend; /* max end of global dict */
107 int dcharp; /* ptr to dchar that needs next index entry */
108 int curbits; /* number of bits for getbits() to read */
109 int maxbits; /* limit on number of bits */
110 int clearflg; /* if set, allow CLEAR */
111 int inmod; /* mod 8 for getbits() */
113 int main(int argc, char **argv);
114 void ffork(void);
115 void die(char *s);
116 void myputc(unsigned c);
117 unsigned mygetc(void);
118 void getbits(void);
119 void getpipe(void);
120 void putpipe(unsigned u, int flag);
122 int main(int argc, char **argv)
124 char c, *cp;
125 int j, k, fdtmp;
126 unsigned int len;
128 /* Find the program name */
129 j = 0;
130 while (argv[0][j] != '\0') j++;
131 len = (unsigned int) j;
132 while (j--)
133 if (argv[0][j] == '/') break;
134 if (argv[0][j] == '/') j++;
135 cp = argv[0] + j;
136 len -= j;
138 /* Sort out the flags */
139 for (k = 1; k < argc; k++) {
140 if (argv[k][0] == '-') {
141 c = argv[k][1];
142 switch (c) {
143 case '0': /* pass numbers */
144 case '1':
145 case '2':
146 case '3':
147 case '4': pnum = c - '0'; break;
148 case 'd': /* used by news */
149 break;
150 default:
151 (void) write(1, "Usage: ", 7);
152 (void) write(1, cp, len);
153 (void) write(1, " [-#] [in] [out]\n", 17);
154 exit(0);
155 break;
158 /* Once it's checked, lose it anyway */
159 for (j = k; j < argc; j++) argv[j] = argv[j + 1];
160 argc--;
161 k--;
165 /* Default i/o settings */
166 fdin = 0;
167 fdout = 1;
168 fderr = 2;
170 /* Try to open specific files and connect them to stdin/stdout */
171 if (argc > 1) {
172 if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
173 (void) close(0);
174 if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
175 (void) close(fdtmp);
177 if (argc > 2) {
178 (void) unlink(argv[2]);
179 if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
180 (void) close(1);
181 if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
182 (void) close(fdtmp);
185 /* Sort out type of compression */
186 if (pnum == -1 || pnum == 4) {/* if this is pass 4 */
187 /* Check header of compressed file */
188 if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */
189 die("not a compressed file\n");
190 iindex = mygetc(); /* get compression style */
191 } else
192 getpipe(); /* get compression style */
194 maxbits = iindex & 0x1F;
195 clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
196 if (maxbits < 9 || maxbits > 16) /* check for valid maxbits */
197 die("can't decompress\n");
198 if (pnum != -1 && pnum != 0)
199 putpipe(iindex, 0); /* pass style to next copy */
201 /* Fork off an ancestor if necessary - ffork() increments pnum */
202 if (pnum == -1) {
203 pnum = 0;
204 if (pnum == 0) ffork();
205 if (pnum == 1) ffork();
206 if (pnum == 2) ffork();
207 if (pnum == 3) ffork();
210 /* Preliminary inits. Note: end/maxend/curend are highest, not
211 * highest + 1 */
212 base = DICTSZ * pnum + 256;
213 locend = base + DICTSZ - 1;
214 maxend = (1 << maxbits) - 1;
215 if (maxend > locend) maxend = locend;
217 while (TRUE) {
218 curend = 255 + (clearflg ? 1 : 0); /* init dictionary */
219 dcharp = DICTSZ; /* flag for none needed */
220 curbits = 9; /* init curbits (for copy 0) */
221 while (TRUE) { /* for each index in input */
222 if (pnum == 4) {/* get index using getbits() */
223 if (curbits < maxbits && (1 << curbits) <= curend) {
224 /* Curbits needs to be increased */
225 /* Due to uglyness in compress, these
226 * indices in the compressed file are
227 * wasted */
228 while (inmod) getbits();
229 curbits++;
231 getbits();
232 } else
233 getpipe(); /* get next index */
235 if (iindex == 256 && clearflg) {
236 if (pnum > 0) putpipe(iindex, 0);
237 /* Due to uglyness in compress, these indices
238 * in the compressed file are wasted */
239 while (inmod) getbits();
240 break;
242 tindex = iindex;
243 /* Convert the index part, ignoring spawned chars */
244 while (tindex >= base) tindex = dindex[tindex - base];
245 /* Pass on the index */
246 putpipe(tindex, 0);
247 /* Save the char of the last added entry, if any */
248 if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
249 if (curend < maxend && ++curend > (base - 1))
250 dindex[dcharp = (curend - base)] = iindex;
252 /* Do spawned chars. They are naturally produced in
253 * the wrong order. To get them in the right order
254 * without using memory, a series of passes,
255 * progressively less deep, are used */
256 tbase = base;
257 while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
258 while ((tindex2 = dindex[tindex - base]) >= tbase)
259 tindex = tindex2; /* scan to desired char */
260 putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
261 tbase = tindex + 1;
262 if (tbase == 0) break; /* it's a wrap */
269 /* F f o r k
271 * Fork off the previous pass - the parent reads from the child.
273 void ffork()
275 int j, pfd[2];
277 if (pipe(pfd) == -1) die("pipe() error\n");
278 if ((j = fork()) == -1) die("fork() error\n");
279 if (j == 0) { /* this is the child */
280 if (close(1) == -1) die("close(1) error\n");
281 if (dup(pfd[1]) != 1) die("dup(1) error\n");
282 (void) close(pfd[0]);
283 pnum++;
284 } else { /* this is the parent */
285 if (close(0) == -1) die("close(0) error\n");
286 if (dup(pfd[0]) != 0) die("dup(0) error\n");
287 (void) close(pfd[1]);
292 /* D i e
294 * If s is a message, write it to stderr. Flush buffers if needed. Then exit.
296 void die(char *s)
298 /* Flush stdout buffer if needed */
299 if (obufind != 0) {
300 if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
301 s = "bad stdout write\n";
302 obufind = 0;
305 /* Flush pipe if needed */
307 putpipe(EOF_INDEX, 0);
308 while (opbufind);
309 /* Write any error message */
310 if (s != (char *) NULL) {
311 while (*s) (void) write(fderr, s++, 1);
313 exit((s == (char *) NULL) ? 0 : 1);
317 /* M p u t c
319 * Put a char to stdout.
321 void myputc(unsigned c)
323 obuf[obufind++] = c;
324 if (obufind >= BUFSZ) { /* if stdout buffer full */
325 if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ) /* flush to stdout */
326 die("bad stdout write\n");
327 obufind = 0;
332 /* M y g e t c
334 * Get a char from stdin. If EOF, then die() and exit.
336 unsigned mygetc()
338 if (ibufstart >= ibufend) { /* if stdin buffer empty */
339 if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
340 die((char *) NULL); /* if EOF, do normal exit */
341 ibufstart = 0;
343 return(ibuf[ibufstart++] & 0xff);
347 /* G e t b i t s
349 * Put curbits bits into index from stdin. Note: only copy 4 uses this.
350 * The bits within a byte are in the correct order. But when the bits
351 * cross a byte boundry, the lowest bits will be in the higher part of
352 * the current byte, and the higher bits will be in the lower part of
353 * the next byte.
355 void getbits()
357 int have;
358 static unsigned curbyte; /* byte having bits extracted from it */
359 static int left; /* how many bits are left in curbyte */
361 inmod = (inmod + 1) & 7; /* count input mod 8 */
362 iindex = curbyte;
363 have = left;
364 if (curbits - have > 8) {
365 iindex |= mygetc() << have;
366 have += 8;
368 iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
369 curbyte >>= curbits - have;
370 left = 8 - (curbits - have);
374 /* G e t p i p e
376 * Get an index from the pipeline. If flagged firstonly, handle it here.
378 void getpipe()
380 static short flags;
381 static int n = 0; /* number of flags in flags */
383 while (TRUE) { /* while index with firstonly flag set */
384 if (n <= 0) {
385 if (ipbufind >= BUFSZ_2) { /* if pipe input buffer
386 * empty */
387 if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
388 die("bad pipe read\n");
389 ipbufind = 0;
391 flags = ipbuf[ipbufind++];
392 n = 15;
394 iindex = ipbuf[ipbufind++];
395 if (iindex > curend)
396 die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
397 flags <<= 1;
398 n--;
399 /* Assume flags < 0 if highest remaining flag is set */
400 if (flags < 0) { /* if firstonly flag for index is not set */
401 while (iindex >= base) iindex = dindex[iindex - base];
402 putpipe(iindex, 1);
403 } else
404 return; /* return with valid non-firstonly index */
409 /* P u t p i p e
411 * put an index into the pipeline.
413 void putpipe(unsigned u, int flag)
415 static unsigned short flags, *flagp;
416 static int n = 0; /* number of flags in flags */
418 if (pnum == 0) { /* if we should write to stdout */
419 myputc(u); /* index will be the char value */
420 return;
422 if (n == 0) { /* if we need to reserve a flag entry */
423 flags = 0;
424 flagp = opbuf + opbufind;
425 opbufind++;
427 opbuf[opbufind++] = u; /* add index to buffer */
428 flags = (flags << 1) | flag; /* add firstonly flag */
429 if (++n >= 15) { /* if block of 15 indices */
430 n = 0;
431 *flagp = flags; /* insert flags entry */
432 if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */
433 opbufind = 0;
434 if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
435 die("bad pipe write\n");