minix/commands/decomp16/decomp16.c

   1 /* decomp16: decompress 16bit compressed files on a 16bit Intel processor
   2  *
   3  * Version 1.3 of 25 Mar 92.
   4  *
   5  * This was written by John N. White on 6/30/91 and is Public Domain.
   6  * Patched to run under news by Will Rose, Feb 92.
   7  * J N White's (earlier) patches added by Will Rose, 20 Feb 92.
   8  * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
   9  * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
  10  *
  11  * decomp16 can use as as little as 512 bytes of stack; since it forks
  12  * four additional copies, it's probably worth using minimum stack rather
  13  * than the 8192 byte Minix default.  To reduce memory still further,
  14  * change BUFSZ below to 256; it is currently set to 1024 for speed.  The
  15  * minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
  16  *
  17  * This program acts as a filter:
  18  *    decomp16 < compressed_file > decompressed_file
  19  * The arguments -0 to -4 run only the corresponding pass.
  20  * Thus:
  21  *    decomp16 -4 < compressed_file > 3;
  22  *    decomp16 -3 < 3 > 2;
  23  *    decomp16 -2 < 2 > 1;
  24  *    decomp16 -1 < 1 > 0;
  25  *    decomp16 -0 < 0 > decompressed_file
  26  * will also work, as will connecting the passes by explicit pipes if
  27  * there is enough memory to do so.  File name arguments can also be
  28  * given directly on the command line.
  29  *
  30  * Compress uses a modified LZW compression algorithm. A compressed file
  31  * is a set of indices into a dictionary of strings. The number of bits
  32  * used to store each index depends on the number of entries currently
  33  * in the dictionary. If there are between 257 and 512 entries, 9 bits
  34  * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
  35  * consists of 0-255 (which are the corresponding chars) and 256 (which
  36  * is a special CLEAR code). As each index in the compressed file is read,
  37  * a new entry is added to the dictionary consisting of the current string
  38  * with the first char of the next string appended. When the dictionary
  39  * is full, no further entries are added. If a CLEAR code is received,
  40  * the dictionary will be completely reset. The first two bytes of the
  41  * compressed file are a magic number, and the third byte indicates the
  42  * maximum number of bits, and whether the CLEAR code is used (older versions
  43  * of compress didn't have CLEAR).
  44  *
  45  * This program works by forking four more copies of itself. The five
  46  * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
  47  * to supply its input, which in turn forks and reads from copy 2, etc.
  48  * This sequence is used so that when the program exits, all writes
  49  * are completed and a program that has exec'd uncompress (such as news)
  50  * can immediately use the uncompressed data when the wait() call returns.
  51  *
  52  * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
  53  * program will run as that copy, reading from stdin and writing to stdout.
  54  * This allows decompressing with very limited RAM because only one of the
  55  * five passes is in memory at a time.
  56  *
  57  * The compressed data is a series of string indices (and a header at
  58  * the beginning and an occasional CLEAR code). As these indices flow
  59  * through the pipes, each program decodes the ones it can. The result
  60  * of each decoding will be indices that the following programs can handle.
  61  *
  62  * Each of the 65536 strings in the dictionary is an earlier string with
  63  * some character added to the end (except for the the 256 predefined
  64  * single char strings). When new entries are made to the dictionary,
  65  * the string index part will just be the last index to pass through.
  66  * But the char part is the first char of the next string, which isn't
  67  * known yet. So the string can be stored as a pair of indices. When
  68  * this string is specified, it is converted to this pair of indices,
  69  * which are flagged so that the first will be decoded in full while
  70  * the second will be decoded to its first char. The dictionary takes
  71  * 256k to store (64k strings of 2 indices of 2 bytes each). This is
  72  * too big for a 64k data segment, so it is divided into 5 equal parts.
  73  * Copy 4 of the program maintains the high part and copy 0 holds the
  74  * low part.
  75  */
  76
  77 #include <sys/types.h>
  78 #include <fcntl.h>
  79 #include <stdlib.h>
  80 #include <unistd.h>
  81
  82 #define BUFSZ           1024    /* size of i/o buffers */
  83 #define BUFSZ_2         (BUFSZ/2)       /* # of unsigned shorts in i/o bufs */
  84 #define DICTSZ          (unsigned)13056 /* # of local dictionary entries */
  85 #define EOF_INDEX       (unsigned short)0xFFFF  /* EOF flag for pipeline */
  86 #define FALSE           0
  87 #define TRUE            ~FALSE
  88
  89 int fdin, fdout, fderr;         /* input, output, and error file descriptors */
  90 int ibufstart, obufind, ibufend;/* i/o buffer indices */
  91 int ipbufind = BUFSZ_2;         /* pipe buffer indices */
  92 int opbufind = 0;
  93 int pnum = -1;                  /* ID of this copy */
  94 unsigned short ipbuf[BUFSZ_2];  /* for buffering input */
  95 unsigned short opbuf[BUFSZ_2];  /* for buffering output */
  96 unsigned char *ibuf = (unsigned char *) ipbuf;
  97 unsigned char *obuf = (unsigned char *) opbuf;
  98
  99 unsigned short dindex[DICTSZ];  /* dictionary: index to substring */
 100 unsigned short dchar[DICTSZ];   /* dictionary: last char of string */
 101 unsigned iindex, tindex, tindex2;       /* holds index being processed */
 102 unsigned base;                  /* where in global dict local dict starts */
 103 unsigned tbase;
 104 unsigned locend;                /* where in global dict local dict ends */
 105 unsigned curend = 256;          /* current end of global dict */
 106 unsigned maxend;                /* max end of global dict */
 107 int dcharp;                     /* ptr to dchar that needs next index entry */
 108 int curbits;                    /* number of bits for getbits() to read */
 109 int maxbits;                    /* limit on number of bits */
 110 int clearflg;                   /* if set, allow CLEAR */
 111 int inmod;                      /* mod 8 for getbits() */
 112
 113 int main(int argc, char **argv);
 114 void ffork(void);
 115 void die(char *s);
 116 void myputc(unsigned c);
 117 unsigned mygetc(void);
 118 void getbits(void);
 119 void getpipe(void);
 120 void putpipe(unsigned u, int flag);
 121
 122 int main(int argc, char **argv)
 123 {
 124   char c, *cp;
 125   int j, k, fdtmp;
 126   unsigned int len;
 127
 128   /* Find the program name */
 129   j = 0;
 130   while (argv[0][j] != '\0') j++;
 131   len = (unsigned int) j;
 132   while (j--)
 133         if (argv[0][j] == '/') break;
 134   if (argv[0][j] == '/') j++;
 135   cp = argv[0] + j;
 136   len -= j;
 137
 138   /* Sort out the flags */
 139   for (k = 1; k < argc; k++) {
 140         if (argv[k][0] == '-') {
 141                 c = argv[k][1];
 142                 switch (c) {
 143                     case '0':   /* pass numbers */
 144                     case '1':
 145                     case '2':
 146                     case '3':
 147                     case '4':   pnum = c - '0'; break;
 148                     case 'd':   /* used by news */
 149                         break;
 150                     default:
 151                         (void) write(1, "Usage: ", 7);
 152                         (void) write(1, cp, len);
 153                         (void) write(1, " [-#] [in] [out]\n", 17);
 154                         exit(0);
 155                         break;
 156                 }
 157
 158                 /* Once it's checked, lose it anyway */
 159                 for (j = k; j < argc; j++) argv[j] = argv[j + 1];
 160                 argc--;
 161                 k--;
 162         }
 163   }
 164
 165   /* Default i/o settings */
 166   fdin = 0;
 167   fdout = 1;
 168   fderr = 2;
 169
 170   /* Try to open specific files and connect them to stdin/stdout */
 171   if (argc > 1) {
 172         if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
 173         (void) close(0);
 174         if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
 175         (void) close(fdtmp);
 176   }
 177   if (argc > 2) {
 178         (void) unlink(argv[2]);
 179         if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
 180         (void) close(1);
 181         if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
 182         (void) close(fdtmp);
 183   }
 184
 185   /* Sort out type of compression */
 186   if (pnum == -1 || pnum == 4) {/* if this is pass 4 */
 187         /* Check header of compressed file */
 188         if (mygetc() != 0x1F || mygetc() != 0x9D)      /* check magic number */
 189                 die("not a compressed file\n");
 190         iindex = mygetc();      /* get compression style */
 191   } else
 192         getpipe();              /* get compression style */
 193
 194   maxbits = iindex & 0x1F;
 195   clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
 196   if (maxbits < 9 || maxbits > 16)      /* check for valid maxbits */
 197         die("can't decompress\n");
 198   if (pnum != -1 && pnum != 0)
 199         putpipe(iindex, 0);     /* pass style to next copy */
 200
 201   /* Fork off an ancestor if necessary - ffork() increments pnum */
 202   if (pnum == -1) {
 203         pnum = 0;
 204         if (pnum == 0) ffork();
 205         if (pnum == 1) ffork();
 206         if (pnum == 2) ffork();
 207         if (pnum == 3) ffork();
 208   }
 209
 210   /* Preliminary inits. Note: end/maxend/curend are highest, not
 211    * highest + 1 */
 212   base = DICTSZ * pnum + 256;
 213   locend = base + DICTSZ - 1;
 214   maxend = (1 << maxbits) - 1;
 215   if (maxend > locend) maxend = locend;
 216
 217   while (TRUE) {
 218         curend = 255 + (clearflg ? 1 : 0);      /* init dictionary */
 219         dcharp = DICTSZ;        /* flag for none needed */
 220         curbits = 9;            /* init curbits (for copy 0) */
 221         while (TRUE) {          /* for each index in input */
 222                 if (pnum == 4) {/* get index using getbits() */
 223                         if (curbits < maxbits && (1 << curbits) <= curend) {
 224                                 /* Curbits needs to be increased */
 225                                 /* Due to uglyness in compress, these
 226                                  * indices in the compressed file are
 227                                  * wasted */
 228                                 while (inmod) getbits();
 229                                 curbits++;
 230                         }
 231                         getbits();
 232                 } else
 233                         getpipe();      /* get next index */
 234
 235                 if (iindex == 256 && clearflg) {
 236                         if (pnum > 0) putpipe(iindex, 0);
 237                         /* Due to uglyness in compress, these indices
 238                          * in the compressed file are wasted */
 239                         while (inmod) getbits();
 240                         break;
 241                 }
 242                 tindex = iindex;
 243                 /* Convert the index part, ignoring spawned chars */
 244                 while (tindex >= base) tindex = dindex[tindex - base];
 245                 /* Pass on the index */
 246                 putpipe(tindex, 0);
 247                 /* Save the char of the last added entry, if any */
 248                 if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
 249                 if (curend < maxend && ++curend > (base - 1))
 250                         dindex[dcharp = (curend - base)] = iindex;
 251
 252                 /* Do spawned chars. They are naturally produced in
 253                  * the wrong order. To get them in the right order
 254                  * without using memory, a series of passes,
 255                  * progressively less deep, are used */
 256                 tbase = base;
 257                 while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
 258                         while ((tindex2 = dindex[tindex - base]) >= tbase)
 259                                 tindex = tindex2;    /* scan to desired char */
 260                         putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
 261                         tbase = tindex + 1;
 262                         if (tbase == 0) break;  /* it's a wrap */
 263                 }
 264         }
 265   }
 266 }
 267
 268
 269 /* F f o r k
 270  *
 271  * Fork off the previous pass - the parent reads from the child.
 272  */
 273 void ffork()
 274 {
 275   int j, pfd[2];
 276
 277   if (pipe(pfd) == -1) die("pipe() error\n");
 278   if ((j = fork()) == -1) die("fork() error\n");
 279   if (j == 0) {                 /* this is the child */
 280         if (close(1) == -1) die("close(1) error\n");
 281         if (dup(pfd[1]) != 1) die("dup(1) error\n");
 282         (void) close(pfd[0]);
 283         pnum++;
 284   } else {                      /* this is the parent */
 285         if (close(0) == -1) die("close(0) error\n");
 286         if (dup(pfd[0]) != 0) die("dup(0) error\n");
 287         (void) close(pfd[1]);
 288   }
 289 }
 290
 291
 292 /* D i e
 293  *
 294  * If s is a message, write it to stderr. Flush buffers if needed. Then exit.
 295  */
 296 void die(char *s)
 297 {
 298   /* Flush stdout buffer if needed */
 299   if (obufind != 0) {
 300         if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
 301                 s = "bad stdout write\n";
 302         obufind = 0;
 303   }
 304
 305   /* Flush pipe if needed */
 306   do
 307         putpipe(EOF_INDEX, 0);
 308   while (opbufind);
 309   /* Write any error message */
 310   if (s != (char *) NULL) {
 311         while (*s) (void) write(fderr, s++, 1);
 312   }
 313   exit((s == (char *) NULL) ? 0 : 1);
 314 }
 315
 316
 317 /* M p u t c
 318  *
 319  * Put a char to stdout.
 320  */
 321 void myputc(unsigned c)
 322 {
 323   obuf[obufind++] = c;
 324   if (obufind >= BUFSZ) {       /* if stdout buffer full */
 325         if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ)        /* flush to stdout */
 326                 die("bad stdout write\n");
 327         obufind = 0;
 328   }
 329 }
 330
 331
 332 /* M y g e t c
 333  *
 334  * Get a char from stdin. If EOF, then die() and exit.
 335  */
 336 unsigned mygetc()
 337 {
 338   if (ibufstart >= ibufend) {   /* if stdin buffer empty */
 339         if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
 340                 die((char *) NULL);     /* if EOF, do normal exit */
 341         ibufstart = 0;
 342   }
 343   return(ibuf[ibufstart++] & 0xff);
 344 }
 345
 346
 347 /* G e t b i t s
 348  *
 349  * Put curbits bits into index from stdin. Note: only copy 4 uses this.
 350  * The bits within a byte are in the correct order. But when the bits
 351  * cross a byte boundry, the lowest bits will be in the higher part of
 352  * the current byte, and the higher bits will be in the lower part of
 353  * the next byte.
 354  */
 355 void getbits()
 356 {
 357   int have;
 358   static unsigned curbyte;      /* byte having bits extracted from it */
 359   static int left;              /* how many bits are left in curbyte */
 360
 361   inmod = (inmod + 1) & 7;      /* count input mod 8 */
 362   iindex = curbyte;
 363   have = left;
 364   if (curbits - have > 8) {
 365         iindex |= mygetc() << have;
 366         have += 8;
 367   }
 368   iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
 369   curbyte >>= curbits - have;
 370   left = 8 - (curbits - have);
 371 }
 372
 373
 374 /* G e t p i p e
 375  *
 376  * Get an index from the pipeline. If flagged firstonly, handle it here.
 377  */
 378 void getpipe()
 379 {
 380   static short flags;
 381   static int n = 0;             /* number of flags in flags */
 382
 383   while (TRUE) {                /* while index with firstonly flag set */
 384         if (n <= 0) {
 385                 if (ipbufind >= BUFSZ_2) {      /* if pipe input buffer
 386                                                  * empty */
 387                         if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
 388                                 die("bad pipe read\n");
 389                         ipbufind = 0;
 390                 }
 391                 flags = ipbuf[ipbufind++];
 392                 n = 15;
 393         }
 394         iindex = ipbuf[ipbufind++];
 395         if (iindex > curend)
 396                 die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
 397         flags <<= 1;
 398         n--;
 399         /* Assume flags < 0 if highest remaining flag is set */
 400         if (flags < 0) {        /* if firstonly flag for index is not set */
 401                 while (iindex >= base) iindex = dindex[iindex - base];
 402                 putpipe(iindex, 1);
 403         } else
 404                 return;         /* return with valid non-firstonly index */
 405   }
 406 }
 407
 408
 409 /* P u t p i p e
 410  *
 411  * put an index into the pipeline.
 412  */
 413 void putpipe(unsigned u, int flag)
 414 {
 415   static unsigned short flags, *flagp;
 416   static int n = 0;             /* number of flags in flags */
 417
 418   if (pnum == 0) {              /* if we should write to stdout */
 419         myputc(u);              /* index will be the char value */
 420         return;
 421   }
 422   if (n == 0) {                 /* if we need to reserve a flag entry */
 423         flags = 0;
 424         flagp = opbuf + opbufind;
 425         opbufind++;
 426   }
 427   opbuf[opbufind++] = u;        /* add index to buffer */
 428   flags = (flags << 1) | flag;  /* add firstonly flag */
 429   if (++n >= 15) {              /* if block of 15 indices */
 430         n = 0;
 431         *flagp = flags;         /* insert flags entry */
 432         if (opbufind >= BUFSZ_2) {      /* if pipe out buffer full */
 433                 opbufind = 0;
 434                 if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
 435                         die("bad pipe write\n");
 436         }
 437   }
 438 }