mkfs, mkproto: minor improvements
[minix.git] / commands / compress / compress.c
blob8f271be74dff5009b9139c1b7776c25de3e7d178
1 /* compress - Reduce file size using Modified Lempel-Ziv encoding */
3 /*
4 * compress.c - File compression ala IEEE Computer, June 1984.
6 * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
7 * Jim McKie (decvax!mcvax!jim)
8 * Steve Davies (decvax!vax135!petsd!peora!srd)
9 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
10 * James A. Woods (decvax!ihnp4!ames!jaw)
11 * Joe Orost (decvax!vax135!petsd!joe)
13 * Richard Todd Port to MINIX
14 * Andy Tanenbaum Cleanup
17 * Algorithm from "A Technique for High Performance Data Compression",
18 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
20 * Usage: compress [-dfvc] [-b bits] [file ...]
21 * Inputs:
22 * -d: If given, decompression is done instead.
24 * -c: Write output on stdout.
26 * -b: Parameter limits the max number of bits/code.
28 * -f: Forces output file to be generated, even if one already
29 * exists, and even if no space is saved by compressing.
30 * If -f is not used, the user will be prompted if stdin is
31 * a tty, otherwise, the output file will not be overwritten.
33 * -v: Write compression statistics
35 * file ...: Files to be compressed. If none specified, stdin
36 * is used.
37 * Outputs:
38 * file.Z: Compressed form of file with same mode, owner, and utimes
39 * or stdout (if stdin used as input)
41 * Assumptions:
42 * When filenames are given, replaces with the compressed version
43 * (.Z suffix) only if the file decreases in size.
44 * Algorithm:
45 * Modified Lempel-Ziv method (LZW). Basically finds common
46 * substrings and replaces them with a variable size code. This is
47 * deterministic, and can be done on the fly. Thus, the decompression
48 * procedure needs no input table, but tracks the way the table was built.
52 #define AZTEC86 1
54 #define min(a,b) ((a>b) ? b : a)
57 * Set USERMEM to the maximum amount of physical user memory available
58 * in bytes. USERMEM is used to determine the maximum BITS that can be used
59 * for compression.
61 * SACREDMEM is the amount of physical memory saved for others; compress
62 * will hog the rest.
64 #ifndef SACREDMEM
65 #define SACREDMEM 0
66 #endif
68 #ifndef USERMEM
69 # define USERMEM 450000 /* default user memory */
70 #endif
72 #define REGISTER register
73 #define DOTZ ".Z"
75 #include <limits.h>
76 #include <dirent.h>
78 /* The default for Minix is -b13, but we can do -b16 if the machine can. */
79 #define DEFAULTBITS 13
80 #if INT_MAX == 32767
81 # define BITS 13
82 #else
83 # define BITS 16
84 #endif
86 #ifdef USERMEM
87 # if USERMEM >= (433484+SACREDMEM)
88 # define PBITS 16
89 # else
90 # if USERMEM >= (229600+SACREDMEM)
91 # define PBITS 15
92 # else
93 # if USERMEM >= (127536+SACREDMEM)
94 # define PBITS 14
95 # else
96 # if USERMEM >= (73464+SACREDMEM)
97 # define PBITS 13
98 # else
99 # define PBITS 12
100 # endif
101 # endif
102 # endif
103 # endif
104 # undef USERMEM
105 #endif /* USERMEM */
107 #ifdef PBITS /* Preferred BITS for this memory size */
108 # ifndef BITS
109 # define BITS PBITS
110 # endif
111 #endif /* PBITS */
113 #if BITS == 16
114 # define HSIZE 69001 /* 95% occupancy */
115 #endif
116 #if BITS == 15
117 # define HSIZE 35023 /* 94% occupancy */
118 #endif
119 #if BITS == 14
120 # define HSIZE 18013 /* 91% occupancy */
121 #endif
122 #if BITS == 13
123 # define HSIZE 9001 /* 91% occupancy */
124 #endif
125 #if BITS <= 12
126 # define HSIZE 5003 /* 80% occupancy */
127 #endif
131 * a code_int must be able to hold 2**BITS values of type int, and also -1
133 #if BITS > 15
134 typedef long int code_int;
135 #else
136 typedef int code_int;
137 #endif
139 #ifdef SIGNED_COMPARE_SLOW
140 typedef unsigned long int count_int;
141 typedef unsigned short int count_short;
142 #else
143 typedef long int count_int;
144 #endif
146 #ifdef NO_UCHAR
147 typedef char char_type;
148 #else
149 typedef unsigned char char_type;
150 #endif /* UCHAR */
151 char_type magic_header[] = "\037\235"; /* 1F 9D */
153 /* Defines for third byte of header */
154 #define BIT_MASK 0x1f
155 #define BLOCK_MASK 0x80
156 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
157 a fourth header byte (for expansion).
159 #define INIT_BITS 9 /* initial number of bits/code */
161 #include <sys/types.h>
162 #include <sys/stat.h>
163 #include <fcntl.h>
164 #include <ctype.h>
165 #include <signal.h>
166 #include <stdlib.h>
167 #include <string.h>
168 #include <unistd.h>
169 #include <utime.h>
170 #include <stdio.h>
172 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
174 int n_bits; /* number of bits/code */
175 int maxbits = DEFAULTBITS; /* user settable max # bits/code */
176 code_int maxcode; /* maximum code, given n_bits */
177 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */
178 #ifdef COMPATIBLE /* But wrong! */
179 # define MAXCODE(n_bits) (1 << (n_bits) - 1)
180 #else
181 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
182 #endif /* COMPATIBLE */
184 #ifndef AZTEC86
185 count_int htab [HSIZE];
186 unsigned short codetab [HSIZE];
187 #else
188 count_int *htab;
189 unsigned short *codetab;
190 # define HTABSIZE ((size_t)(HSIZE*sizeof(count_int)))
191 # define CODETABSIZE ((size_t)(HSIZE*sizeof(unsigned short)))
194 #define htabof(i) htab[i]
195 #define codetabof(i) codetab[i]
196 #endif /* XENIX_16 */
197 code_int hsize = HSIZE; /* for dynamic table sizing */
198 count_int fsize;
201 * To save much memory, we overlay the table used by compress() with those
202 * used by decompress(). The tab_prefix table is the same size and type
203 * as the codetab. The tab_suffix table needs 2**BITS characters. We
204 * get this from the beginning of htab. The output stack uses the rest
205 * of htab, and contains characters. There is plenty of room for any
206 * possible stack (stack used to be 8000 characters).
209 #define tab_prefixof(i) codetabof(i)
210 #ifdef XENIX_16
211 # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
212 # define de_stack ((char_type *)(htab2))
213 #else /* Normal machine */
214 # define tab_suffixof(i) ((char_type *)(htab))[i]
215 # define de_stack ((char_type *)&tab_suffixof(1<<BITS))
216 #endif /* XENIX_16 */
218 code_int free_ent = 0; /* first unused entry */
219 int exit_stat = 0;
221 int main(int argc, char **argv);
222 void Usage(void);
223 void compress(void);
224 void onintr(int dummy);
225 void oops(int dummy);
226 void output(code_int code);
227 int foreground(void);
228 void decompress(void);
229 code_int getcode(void);
230 void writeerr(void);
231 void copystat(char *ifname, char *ofname);
232 int foreground(void);
233 void cl_block(void);
234 void cl_hash(count_int hsize);
235 void prratio(FILE *stream, long int num, long int den);
236 void version(void);
238 void Usage() {
239 #ifdef DEBUG
240 fprintf(stderr,"Usage: compress [-dDVfc] [-b maxbits] [file ...]\n");
242 int debug = 0;
243 #else
244 fprintf(stderr,"Usage: compress [-dfvcV] [-b maxbits] [file ...]\n");
246 #endif /* DEBUG */
247 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */
248 int zcat_flg = 0; /* Write output on stdout, suppress messages */
249 int quiet = 0; /* don't tell me about compression */
252 * block compression parameters -- after all codes are used up,
253 * and compression rate changes, start over.
255 int block_compress = BLOCK_MASK;
256 int clear_flg = 0;
257 long int ratio = 0;
258 #define CHECK_GAP 10000 /* ratio check interval */
259 count_int checkpoint = CHECK_GAP;
261 * the next two codes should not be changed lightly, as they must not
262 * lie within the contiguous general code space.
264 #define FIRST 257 /* first free entry */
265 #define CLEAR 256 /* table clear output code */
267 int force = 0;
268 char ofname [100];
269 #ifdef DEBUG
270 int verbose = 0;
271 #endif /* DEBUG */
273 #ifndef METAWARE
274 #ifdef AZTEC86
275 void
276 #else
278 #endif
279 #ifndef __STDC__
280 (*bgnd_flag)();
281 #else
282 (*bgnd_flag)(int);
283 #endif
284 #endif
286 int do_decomp = 0;
289 int main(argc, argv)
290 int argc;
291 char **argv;
293 int overwrite = 0; /* Do not overwrite unless given -f flag */
294 char tempname[100];
295 char **filelist, **fileptr;
296 char *cp;
297 struct stat statbuf;
298 #ifndef METAWARE
299 if ( (bgnd_flag = signal ( SIGINT, SIG_IGN )) != SIG_IGN ) {
300 signal ( SIGINT, onintr );
301 signal ( SIGSEGV, oops );
303 #endif
304 #ifdef AZTEC86
305 #ifdef METAWARE
306 _setmode(NULL,_ALL_FILES_BINARY);
307 _setmode(stdin,_BINARY);
308 _setmode(stdout,_BINARY);
309 _setmode(stderr,_TEXT);
310 #endif
311 if (NULL == (htab = (count_int *)malloc(HTABSIZE)))
313 fprintf(stderr,"Can't allocate htab\n");
314 exit(1);
316 if (NULL == (codetab = (unsigned short *)malloc(CODETABSIZE)))
318 fprintf(stderr,"Can't allocate codetab\n");
319 exit(1);
321 #endif
322 #ifdef COMPATIBLE
323 nomagic = 1; /* Original didn't have a magic number */
324 #endif /* COMPATIBLE */
326 filelist = fileptr = (char **)(malloc((size_t)(argc * sizeof(*argv))));
327 *filelist = NULL;
329 if((cp = strrchr(argv[0], '/')) != 0) {
330 cp++;
331 } else {
332 cp = argv[0];
334 if(strcmp(cp, "uncompress") == 0) {
335 do_decomp = 1;
336 } else if(strcmp(cp, "zcat") == 0) {
337 do_decomp = 1;
338 zcat_flg = 1;
341 #ifdef BSD4_2
342 /* 4.2BSD dependent - take it out if not */
343 setlinebuf( stderr );
344 #endif /* BSD4_2 */
346 /* Argument Processing
347 * All flags are optional.
348 * -D => debug
349 * -V => print Version; debug verbose
350 * -d => do_decomp
351 * -v => unquiet
352 * -f => force overwrite of output file
353 * -n => no header: useful to uncompress old files
354 * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
355 * given also.
356 * -c => cat all output to stdout
357 * -C => generate output compatible with compress 2.0.
358 * if a string is left, must be an input filename.
360 for (argc--, argv++; argc > 0; argc--, argv++)
362 if (**argv == '-')
363 { /* A flag argument */
364 while (*++(*argv))
365 { /* Process all flags in this arg */
366 switch (**argv)
368 #ifdef DEBUG
369 case 'D':
370 debug = 1;
371 break;
372 case 'V':
373 verbose = 1;
374 version();
375 break;
376 #else
377 case 'V':
378 version();
379 break;
380 #endif /* DEBUG */
381 case 'v':
382 quiet = 0;
383 break;
384 case 'd':
385 do_decomp = 1;
386 break;
387 case 'f':
388 case 'F':
389 overwrite = 1;
390 force = 1;
391 break;
392 case 'n':
393 nomagic = 1;
394 break;
395 case 'C':
396 block_compress = 0;
397 break;
398 case 'b':
399 if (!ARGVAL())
401 fprintf(stderr, "Missing maxbits\n");
402 Usage();
403 exit(1);
405 maxbits = atoi(*argv);
406 goto nextarg;
407 case 'c':
408 zcat_flg = 1;
409 break;
410 case 'q':
411 quiet = 1;
412 break;
413 default:
414 fprintf(stderr, "Unknown flag: '%c'; ", **argv);
415 Usage();
416 exit(1);
420 else
421 { /* Input file name */
422 *fileptr++ = *argv; /* Build input file list */
423 *fileptr = NULL;
424 /* process nextarg; */
426 nextarg: continue;
429 if(maxbits < INIT_BITS) maxbits = INIT_BITS;
430 if (maxbits > BITS) maxbits = BITS;
431 maxmaxcode = 1 << maxbits;
433 if (*filelist != NULL)
435 for (fileptr = filelist; *fileptr; fileptr++)
437 exit_stat = 0;
438 if (do_decomp != 0)
439 { /* DECOMPRESSION */
440 /* Check for .Z suffix */
441 #ifndef PCDOS
442 if (strcmp(*fileptr + strlen(*fileptr) - 2, DOTZ) != 0)
443 #else
444 if (strcmp(*fileptr + strlen(*fileptr) - 1, DOTZ) != 0)
445 #endif
447 /* No .Z: tack one on */
448 strcpy(tempname, *fileptr);
449 #ifndef PCDOS
450 strcat(tempname, DOTZ);
451 #else
452 /* either tack one on or replace last character */
454 char *dot;
455 if (NULL == (dot = strchr(tempname,'.')))
457 strcat(tempname,".Z");
459 else
460 /* if there is a dot then either tack a z on
461 or replace last character */
463 if (strlen(dot) < 4)
464 strcat(tempname,DOTZ);
465 else
466 dot[3] = 'Z';
469 #endif
470 *fileptr = tempname;
472 /* Open input file */
473 if ((freopen(*fileptr, "r", stdin)) == NULL)
475 perror(*fileptr); continue;
477 /* Check the magic number */
478 if (nomagic == 0)
480 unsigned magic1, magic2;
481 if (((magic1 = getc(stdin)) != (magic_header[0] & 0xFF))
482 || ((magic2 = getc(stdin)) != (magic_header[1] & 0xFF)))
484 fprintf(stderr,
485 "%s: not in compressed format %x %x\n",
486 *fileptr,magic1,magic2);
487 continue;
489 maxbits = getc(stdin); /* set -b from file */
490 block_compress = maxbits & BLOCK_MASK;
491 maxbits &= BIT_MASK;
492 maxmaxcode = 1 << maxbits;
493 if(maxbits > BITS)
495 fprintf(stderr,
496 "%s: compressed with %d bits, can only handle %d bits\n",
497 *fileptr, maxbits, BITS);
498 continue;
501 /* Generate output filename */
502 strcpy(ofname, *fileptr);
503 #ifndef PCDOS
504 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */
505 #else
506 /* kludge to handle various common three character extension */
508 char *dot;
509 char fixup = '\0';
510 /* first off, map name to upper case */
511 for (dot = ofname; *dot; dot++)
512 *dot = toupper(*dot);
513 if (NULL == (dot = strchr(ofname,'.')))
515 fprintf(stderr,"Bad filename %s\n",ofname);
516 exit(1);
518 if (strlen(dot) == 4)
519 /* we got three letter extensions */
521 if (strcmp(dot,".EXZ") == 0)
522 fixup = 'E';
523 else if (strcmp(dot,".COZ") == 0)
524 fixup = 'M';
525 else if (strcmp(dot,".BAZ") == 0)
526 fixup = 'S';
527 else if (strcmp(dot,".OBZ") == 0)
528 fixup = 'J';
529 else if (strcmp(dot,".SYZ") == 0)
530 fixup = 'S';
531 else if (strcmp(dot,".DOZ") == 0)
532 fixup = 'C';
535 /* replace the Z */
536 ofname[strlen(*fileptr) - 1] = fixup;
538 #endif
539 } else
540 { /* COMPRESSION */
541 if (strcmp(*fileptr + strlen(*fileptr) - 2, DOTZ) == 0)
543 fprintf(stderr, "%s: already has .Z suffix -- no change\n",
544 *fileptr);
545 continue;
547 /* Open input file */
548 if ((freopen(*fileptr, "r", stdin)) == NULL)
550 perror(*fileptr); continue;
552 (void)stat( *fileptr, &statbuf );
553 fsize = (long) statbuf.st_size;
555 * tune hash table size for small files -- ad hoc,
556 * but the sizes match earlier #defines, which
557 * serve as upper bounds on the number of output codes.
559 hsize = HSIZE; /*lint -e506 -e712 */
560 if ( fsize < (1 << 12) )
561 hsize = min ( 5003, HSIZE );
562 else if ( fsize < (1 << 13) )
563 hsize = min ( 9001, HSIZE );
564 else if ( fsize < (1 << 14) )
565 hsize = min ( 18013, HSIZE );
566 else if ( fsize < (1 << 15) )
567 hsize = min ( 35023, HSIZE );
568 else if ( fsize < 47000 )
569 hsize = min ( 50021, HSIZE ); /*lint +e506 +e712 */
571 /* Generate output filename */
572 strcpy(ofname, *fileptr);
573 #ifndef BSD4_2 /* Short filenames */
574 if ((cp=strrchr(ofname,'/')) != NULL)
575 cp++;
576 else
577 cp = ofname;
578 if (strlen(cp) >= NAME_MAX-3)
580 fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
581 continue;
583 #ifdef PCDOS
584 else
586 /* either tack one on or replace last character */
587 char *dot;
588 if (NULL == (dot = strchr(cp,'.')))
590 strcat(cp,".Z");
592 else
593 /* if there is a dot then either tack a z on
594 or replace last character */
596 if (strlen(dot) < 4)
597 strcat(cp,DOTZ);
598 else
599 dot[3] = 'Z';
602 #endif
603 #endif /* BSD4_2 Long filenames allowed */
604 #ifndef PCDOS
605 /* PCDOS takes care of this above */
606 strcat(ofname, DOTZ);
607 #endif
609 /* Check for overwrite of existing file */
610 if (overwrite == 0 && zcat_flg == 0)
612 if (stat(ofname, &statbuf) == 0)
614 char response[2]; int fd;
615 response[0] = 'n';
616 fprintf(stderr, "%s already exists;", ofname);
617 if (foreground())
619 fd = open("/dev/tty", O_RDONLY);
620 fprintf(stderr,
621 " do you wish to overwrite %s (y or n)? ", ofname);
622 fflush(stderr);
623 (void)read(fd, response, 2);
624 while (response[1] != '\n')
626 if (read(fd, response+1, 1) < 0)
627 { /* Ack! */
628 perror("stderr");
629 break;
632 close(fd);
634 if (response[0] != 'y')
636 fprintf(stderr, "\tnot overwritten\n");
637 continue;
641 if(zcat_flg == 0)
642 { /* Open output file */
643 if (freopen(ofname, "w", stdout) == NULL)
645 perror(ofname);
646 continue;
648 if(!quiet)
649 fprintf(stderr, "%s: ", *fileptr);
652 /* Actually do the compression/decompression */
653 if (do_decomp == 0)
654 compress();
655 #ifndef DEBUG
656 else
657 decompress();
658 #else
659 else if (debug == 0)
660 decompress();
661 else
662 printcodes();
663 if (verbose)
664 dump_tab();
665 #endif /* DEBUG */
666 if(zcat_flg == 0)
668 copystat(*fileptr, ofname); /* Copy stats */
669 if((exit_stat == 1) || (!quiet))
670 putc('\n', stderr);
673 } else
674 { /* Standard input */
675 if (do_decomp == 0)
677 compress();
678 #ifdef DEBUG
679 if(verbose) dump_tab();
680 #endif /* DEBUG */
681 if(!quiet)
682 putc('\n', stderr);
683 } else
685 /* Check the magic number */
686 if (nomagic == 0)
688 if ((getc(stdin)!=(magic_header[0] & 0xFF))
689 || (getc(stdin)!=(magic_header[1] & 0xFF)))
691 fprintf(stderr, "stdin: not in compressed format\n");
692 exit(1);
694 maxbits = getc(stdin); /* set -b from file */
695 block_compress = maxbits & BLOCK_MASK;
696 maxbits &= BIT_MASK;
697 maxmaxcode = 1 << maxbits;
698 fsize = 100000; /* assume stdin large for USERMEM */
699 if(maxbits > BITS)
701 fprintf(stderr,
702 "stdin: compressed with %d bits, can only handle %d bits\n",
703 maxbits, BITS);
704 exit(1);
707 #ifndef DEBUG
708 decompress();
709 #else
710 if (debug == 0) decompress();
711 else printcodes();
712 if (verbose) dump_tab();
713 #endif /* DEBUG */
716 return(exit_stat);
719 static int offset;
720 long int in_count = 1; /* length of input */
721 long int bytes_out; /* length of compressed output */
722 long int out_count = 0; /* # of codes output (for debugging) */
725 * compress stdin to stdout
727 * Algorithm: use open addressing double hashing (no chaining) on the
728 * prefix code / next character combination. We do a variant of Knuth's
729 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
730 * secondary probe. Here, the modular division first probe is gives way
731 * to a faster exclusive-or manipulation. Also do block compression with
732 * an adaptive reset, whereby the code table is cleared when the compression
733 * ratio decreases, but after the table fills. The variable-length output
734 * codes are re-sized at this point, and a special CLEAR code is generated
735 * for the decompressor. Late addition: construct the table according to
736 * file size for noticeable speed improvement on small files. Please direct
737 * questions about this implementation to ames!jaw.
740 void compress()
742 REGISTER long fcode;
743 REGISTER code_int i = 0;
744 REGISTER int c;
745 REGISTER code_int ent;
746 #ifdef XENIX_16
747 REGISTER code_int disp;
748 #else /* Normal machine */
749 REGISTER int disp;
750 #endif
751 REGISTER code_int hsize_reg;
752 REGISTER int hshift;
754 #ifndef COMPATIBLE
755 if (nomagic == 0)
757 putc(magic_header[0],stdout);
758 putc(magic_header[1],stdout);
759 putc((char)(maxbits | block_compress),stdout);
760 if(ferror(stdout))
761 writeerr();
763 #endif /* COMPATIBLE */
765 offset = 0;
766 bytes_out = 3; /* includes 3-byte header mojo */
767 out_count = 0;
768 clear_flg = 0;
769 ratio = 0;
770 in_count = 1;
771 checkpoint = CHECK_GAP;
772 maxcode = MAXCODE(n_bits = INIT_BITS);
773 free_ent = ((block_compress) ? FIRST : 256 );
775 ent = getc(stdin);
777 hshift = 0;
778 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L )
779 hshift++;
780 hshift = 8 - hshift; /* set hash code range bound */
782 hsize_reg = hsize;
783 cl_hash( (count_int) hsize_reg); /* clear hash table */
785 #ifdef SIGNED_COMPARE_SLOW
786 while ( (c = getc(stdin)) != (unsigned) EOF )
787 #else
788 while ( (c = getc(stdin)) != EOF )
789 #endif
791 in_count++;
792 fcode = (long) (((long) c << maxbits) + ent);
793 i = ((c << hshift) ^ ent); /* xor hashing */
795 if ( htabof (i) == fcode )
797 ent = codetabof (i);
798 continue;
799 } else if ( (long)htabof (i) < 0 ) /* empty slot */
800 goto nomatch;
801 disp = hsize_reg - i; /* secondary hash (after G. Knott) */
802 if ( i == 0 )
803 disp = 1;
804 probe:
805 if ( (i -= disp) < 0 )
806 i += hsize_reg;
808 if ( htabof (i) == fcode )
810 ent = codetabof (i);
811 continue;
813 if ( (long)htabof (i) > 0 )
814 goto probe;
815 nomatch:
816 output ( (code_int) ent );
817 out_count++;
818 ent = c;
819 #ifdef SIGNED_COMPARE_SLOW
820 if ( (unsigned) free_ent < (unsigned) maxmaxcode)
821 #else
822 if ( free_ent < maxmaxcode )
823 #endif
825 codetabof (i) = free_ent++; /* code -> hashtable */
826 htabof (i) = fcode;
828 else if ( (count_int)in_count >= checkpoint && block_compress )
829 cl_block ();
832 * Put out the final code.
834 output( (code_int)ent );
835 out_count++;
836 output( (code_int)-1 );
839 * Print out stats on stderr
841 if(zcat_flg == 0 && !quiet)
843 #ifdef DEBUG
844 fprintf( stderr,
845 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
846 in_count, out_count, bytes_out );
847 prratio( stderr, in_count, bytes_out );
848 fprintf( stderr, "\n");
849 fprintf( stderr, "\tCompression as in compact: " );
850 prratio( stderr, in_count-bytes_out, in_count );
851 fprintf( stderr, "\n");
852 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
853 free_ent - 1, n_bits );
854 #else /* !DEBUG */
855 fprintf( stderr, "Compression: " );
856 prratio( stderr, in_count-bytes_out, in_count );
857 #endif /* DEBUG */
859 if(bytes_out > in_count) /* exit(2) if no savings */
860 exit_stat = 2;
861 return;
864 /*****************************************************************
865 * TAG( output )
867 * Output the given code.
868 * Inputs:
869 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
870 * that n_bits =< (long)wordsize - 1.
871 * Outputs:
872 * Outputs code to the file.
873 * Assumptions:
874 * Chars are 8 bits long.
875 * Algorithm:
876 * Maintain a BITS character long buffer (so that 8 codes will
877 * fit in it exactly). Use the VAX insv instruction to insert each
878 * code in turn. When the buffer fills up empty it and start over.
881 static char buf[BITS];
883 #ifndef vax
884 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
885 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
886 #endif /* vax */
887 void output( code )
888 code_int code;
890 #ifdef DEBUG
891 static int col = 0;
892 #endif /* DEBUG */
895 * On the VAX, it is important to have the REGISTER declarations
896 * in exactly the order given, or the asm will break.
898 REGISTER int r_off = offset, bits= n_bits;
899 REGISTER char * bp = buf;
900 #ifndef BREAKHIGHC
901 #ifdef METAWARE
902 int temp;
903 #endif
904 #endif
905 #ifdef DEBUG
906 if ( verbose )
907 fprintf( stderr, "%5d%c", code,
908 (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
909 #endif /* DEBUG */
910 if ( code >= 0 )
912 #ifdef vax
913 /* VAX DEPENDENT!! Implementation on other machines is below.
915 * Translation: Insert BITS bits from the argument starting at
916 * offset bits from the beginning of buf.
918 0; /* Work around for pcc -O bug with asm and if stmt */
919 asm( "insv 4(ap),r11,r10,(r9)" );
920 #else /* not a vax */
922 * byte/bit numbering on the VAX is simulated by the following code
925 * Get to the first byte.
927 bp += (r_off >> 3);
928 r_off &= 7;
930 * Since code is always >= 8 bits, only need to mask the first
931 * hunk on the left.
933 #ifndef BREAKHIGHC
934 #ifdef METAWARE
935 *bp &= rmask[r_off];
936 temp = (code << r_off) & lmask[r_off];
937 *bp |= temp;
938 #else
939 *bp = (*bp & rmask[r_off]) | ((code << r_off) & lmask[r_off]);
940 #endif
941 #else
942 *bp = (*bp & rmask[r_off]) | ((code << r_off) & lmask[r_off]);
943 #endif
944 bp++;
945 bits -= (8 - r_off);
946 code >>= (8 - r_off);
947 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
948 if ( bits >= 8 )
950 *bp++ = code;
951 code >>= 8;
952 bits -= 8;
954 /* Last bits. */
955 if(bits)
956 *bp = code;
957 #endif /* vax */
958 offset += n_bits;
959 if ( offset == (n_bits << 3) )
961 bp = buf;
962 bits = n_bits;
963 bytes_out += bits;
965 putc(*bp++,stdout);
966 while(--bits);
967 offset = 0;
971 * If the next entry is going to be too big for the code size,
972 * then increase it, if possible.
974 if ( free_ent > maxcode || (clear_flg > 0))
977 * Write the whole buffer, because the input side won't
978 * discover the size increase until after it has read it.
980 if ( offset > 0 )
982 if( fwrite( buf, (size_t)1, (size_t)n_bits, stdout ) != n_bits)
983 writeerr();
984 bytes_out += n_bits;
986 offset = 0;
988 if ( clear_flg )
990 maxcode = MAXCODE (n_bits = INIT_BITS);
991 clear_flg = 0;
993 else
995 n_bits++;
996 if ( n_bits == maxbits )
997 maxcode = maxmaxcode;
998 else
999 maxcode = MAXCODE(n_bits);
1001 #ifdef DEBUG
1002 if ( debug )
1004 fprintf( stderr, "\nChange to %d bits\n", n_bits );
1005 col = 0;
1007 #endif /* DEBUG */
1009 } else
1012 * At EOF, write the rest of the buffer.
1014 if ( offset > 0 )
1015 fwrite( buf, (size_t)1, (size_t)(offset + 7) / 8, stdout );
1016 bytes_out += (offset + 7) / 8;
1017 offset = 0;
1018 fflush( stdout );
1019 #ifdef DEBUG
1020 if ( verbose )
1021 fprintf( stderr, "\n" );
1022 #endif /* DEBUG */
1023 if( ferror( stdout ) )
1024 writeerr();
1028 * Decompress stdin to stdout. This routine adapts to the codes in the
1029 * file building the "string" table on-the-fly; requiring no table to
1030 * be stored in the compressed file. The tables used herein are shared
1031 * with those of the compress() routine. See the definitions above.
1034 void decompress() {
1035 REGISTER char_type *stackp;
1036 REGISTER int finchar;
1037 REGISTER code_int code, oldcode, incode;
1040 * As above, initialize the first 256 entries in the table.
1042 maxcode = MAXCODE(n_bits = INIT_BITS);
1043 for ( code = 255; code >= 0; code-- ) {
1044 tab_prefixof(code) = 0;
1045 tab_suffixof(code) = (char_type)code;
1047 free_ent = ((block_compress) ? FIRST : 256 );
1049 finchar = oldcode = getcode();
1050 if(oldcode == -1) /* EOF already? */
1051 return; /* Get out of here */
1052 putc( (char)finchar,stdout ); /* first code must be 8 bits = char */
1053 if(ferror(stdout)) /* Crash if can't write */
1054 writeerr();
1055 stackp = de_stack;
1057 while ( (code = getcode()) > -1 ) {
1059 if ( (code == CLEAR) && block_compress ) {
1060 for ( code = 255; code >= 0; code-- )
1061 tab_prefixof(code) = 0;
1062 clear_flg = 1;
1063 free_ent = FIRST - 1;
1064 if ( (code = getcode ()) == -1 ) /* O, untimely death! */
1065 break;
1067 incode = code;
1069 * Special case for KwKwK string.
1071 if ( code >= free_ent ) {
1072 *stackp++ = finchar;
1073 code = oldcode;
1077 * Generate output characters in reverse order
1079 #ifdef SIGNED_COMPARE_SLOW
1080 while ( ((unsigned long)code) >= ((unsigned long)256) ) {
1081 #else
1082 while ( code >= 256 ) {
1083 #endif
1084 *stackp++ = tab_suffixof(code);
1085 code = tab_prefixof(code);
1087 *stackp++ = finchar = tab_suffixof(code);
1090 * And put them out in forward order
1093 putc ( *--stackp ,stdout);
1094 while ( stackp > de_stack );
1097 * Generate the new entry.
1099 if ( (code=free_ent) < maxmaxcode )
1101 tab_prefixof(code) = (unsigned short)oldcode;
1102 tab_suffixof(code) = finchar;
1103 free_ent = code+1;
1106 * Remember previous code.
1108 oldcode = incode;
1110 fflush( stdout );
1111 if(ferror(stdout))
1112 writeerr();
1115 /*****************************************************************
1116 * TAG( getcode )
1118 * Read one code from the standard input. If EOF, return -1.
1119 * Inputs:
1120 * stdin
1121 * Outputs:
1122 * code or -1 is returned.
1125 code_int
1126 getcode()
1129 * On the VAX, it is important to have the REGISTER declarations
1130 * in exactly the order given, or the asm will break.
1132 REGISTER code_int code;
1133 static int offset = 0, size = 0;
1134 static char_type buf[BITS];
1135 REGISTER int r_off, bits;
1136 REGISTER char_type *bp = buf;
1138 if ( clear_flg > 0 || offset >= size || free_ent > maxcode )
1141 * If the next entry will be too big for the current code
1142 * size, then we must increase the size. This implies reading
1143 * a new buffer full, too.
1145 if ( free_ent > maxcode )
1147 n_bits++;
1148 if ( n_bits == maxbits )
1149 maxcode = maxmaxcode; /* won't get any bigger now */
1150 else
1151 maxcode = MAXCODE(n_bits);
1153 if ( clear_flg > 0)
1155 maxcode = MAXCODE (n_bits = INIT_BITS);
1156 clear_flg = 0;
1158 size = fread( buf, (size_t)1, (size_t)n_bits, stdin );
1159 if ( size <= 0 )
1160 return -1; /* end of file */
1161 offset = 0;
1162 /* Round size down to integral number of codes */
1163 size = (size << 3) - (n_bits - 1);
1165 r_off = offset;
1166 bits = n_bits;
1167 #ifdef vax
1168 asm( "extzv r10,r9,(r8),r11" );
1169 #else /* not a vax */
1171 * Get to the first byte.
1173 bp += (r_off >> 3);
1174 r_off &= 7;
1175 /* Get first part (low order bits) */
1176 #ifdef NO_UCHAR
1177 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
1178 #else
1179 code = (*bp++ >> r_off);
1180 #endif /* NO_UCHAR */
1181 bits -= (8 - r_off);
1182 r_off = 8 - r_off; /* now, offset into code word */
1183 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1184 if ( bits >= 8 )
1186 #ifdef NO_UCHAR
1187 code |= (*bp++ & 0xff) << r_off;
1188 #else
1189 code |= *bp++ << r_off;
1190 #endif /* NO_UCHAR */
1191 r_off += 8;
1192 bits -= 8;
1194 /* high order bits. */
1195 code |= (*bp & rmask[bits]) << r_off;
1196 #endif /* vax */
1197 offset += n_bits;
1199 return code;
1202 #ifndef AZTEC86
1203 char *
1204 strrchr(s, c) /* For those who don't have it in libc.a */
1205 REGISTER char *s, c;
1207 char *p;
1208 for (p = NULL; *s; s++)
1209 if (*s == c)
1210 p = s;
1211 return(p);
1213 #endif
1216 #ifndef METAWARE
1217 #ifdef DEBUG
1218 printcodes()
1221 * Just print out codes from input file. For debugging.
1223 code_int code;
1224 int col = 0, bits;
1226 bits = n_bits = INIT_BITS;
1227 maxcode = MAXCODE(n_bits);
1228 free_ent = ((block_compress) ? FIRST : 256 );
1229 while ( ( code = getcode() ) >= 0 ) {
1230 if ( (code == CLEAR) && block_compress ) {
1231 free_ent = FIRST - 1;
1232 clear_flg = 1;
1234 else if ( free_ent < maxmaxcode )
1235 free_ent++;
1236 if ( bits != n_bits ) {
1237 fprintf(stderr, "\nChange to %d bits\n", n_bits );
1238 bits = n_bits;
1239 col = 0;
1241 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
1243 putc( '\n', stderr );
1244 exit( 0 );
1246 #ifdef DEBUG2
1247 code_int sorttab[1<<BITS]; /* sorted pointers into htab */
1248 #define STACK_SIZE 500
1249 static char stack[STACK_SIZE];
1250 /* dumptab doesn't use main stack now -prevents distressing crashes */
1251 dump_tab() /* dump string table */
1253 REGISTER int i, first;
1254 REGISTER ent;
1255 int stack_top = STACK_SIZE;
1256 REGISTER c;
1258 if(do_decomp == 0) { /* compressing */
1259 REGISTER int flag = 1;
1261 for(i=0; i<hsize; i++) { /* build sort pointers */
1262 if((long)htabof(i) >= 0) {
1263 sorttab[codetabof(i)] = i;
1266 first = block_compress ? FIRST : 256;
1267 for(i = first; i < free_ent; i++) {
1268 fprintf(stderr, "%5d: \"", i);
1269 stack[--stack_top] = '\n';
1270 stack[--stack_top] = '"'; /* " */
1271 stack_top = in_stack((int)(htabof(sorttab[i])>>maxbits)&0xff,
1272 stack_top);
1273 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
1274 ent > 256;
1275 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
1276 stack_top = in_stack((int)(htabof(sorttab[ent]) >> maxbits),
1277 stack_top);
1279 stack_top = in_stack(ent, stack_top);
1280 fwrite( &stack[stack_top], (size_t)1, (size_t)(STACK_SIZE-stack_top), stderr);
1281 stack_top = STACK_SIZE;
1283 } else if(!debug) { /* decompressing */
1285 for ( i = 0; i < free_ent; i++ ) {
1286 ent = i;
1287 c = tab_suffixof(ent);
1288 if ( isascii(c) && isprint(c) )
1289 fprintf( stderr, "%5d: %5d/'%c' \"",
1290 ent, tab_prefixof(ent), c );
1291 else
1292 fprintf( stderr, "%5d: %5d/\\%03o \"",
1293 ent, tab_prefixof(ent), c );
1294 stack[--stack_top] = '\n';
1295 stack[--stack_top] = '"'; /* " */
1296 for ( ; ent != NULL;
1297 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
1298 stack_top = in_stack(tab_suffixof(ent), stack_top);
1300 fwrite( &stack[stack_top], (size_t)1, (size_t)(STACK_SIZE - stack_top), stderr );
1301 stack_top = STACK_SIZE;
1307 in_stack(c, stack_top)
1308 REGISTER int c, stack_top;
1310 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
1311 stack[--stack_top] = c;
1312 } else {
1313 switch( c ) {
1314 case '\n': stack[--stack_top] = 'n'; break;
1315 case '\t': stack[--stack_top] = 't'; break;
1316 case '\b': stack[--stack_top] = 'b'; break;
1317 case '\f': stack[--stack_top] = 'f'; break;
1318 case '\r': stack[--stack_top] = 'r'; break;
1319 case '\\': stack[--stack_top] = '\\'; break;
1320 default:
1321 stack[--stack_top] = '0' + c % 8;
1322 stack[--stack_top] = '0' + (c / 8) % 8;
1323 stack[--stack_top] = '0' + c / 64;
1324 break;
1326 stack[--stack_top] = '\\';
1328 if (stack_top<0) {
1329 fprintf(stderr,"dump_tab stack overflow!!!\n");
1330 exit(1);
1332 return stack_top;
1334 #else
1335 dump_tab() {}
1336 #endif /* DEBUG2 */
1337 #endif /* DEBUG */
1338 #endif /* METAWARE */
1340 void writeerr()
1342 perror ( ofname );
1343 unlink ( ofname );
1344 exit ( 1 );
1347 void copystat(ifname, ofname)
1348 char *ifname, *ofname;
1350 struct stat statbuf;
1351 int mode;
1352 #ifndef AZTEC86
1353 time_t timep[2];
1354 #else
1355 unsigned long timep[2];
1356 #endif
1357 fflush(stdout);
1358 close(fileno(stdout));
1359 if (stat(ifname, &statbuf))
1360 { /* Get stat on input file */
1361 perror(ifname);
1362 return;
1364 #ifndef PCDOS
1365 /* meddling with UNIX-style file modes */
1366 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/)
1368 if(quiet)
1369 fprintf(stderr, "%s: ", ifname);
1370 fprintf(stderr, " -- not a regular file: unchanged");
1371 exit_stat = 1;
1372 } else if (statbuf.st_nlink > 1)
1374 if(quiet)
1375 fprintf(stderr, "%s: ", ifname);
1376 fprintf(stderr, " -- has %d other links: unchanged",
1377 statbuf.st_nlink - 1);
1378 exit_stat = 1;
1379 } else
1380 #endif
1381 if (exit_stat == 2 && (!force))
1382 { /* No compression: remove file.Z */
1383 if(!quiet)
1384 fprintf(stderr, " -- file unchanged");
1385 } else
1386 { /* ***** Successful Compression ***** */
1387 exit_stat = 0;
1388 #ifndef PCDOS
1389 mode = statbuf.st_mode & 07777;
1390 #else
1391 mode = statbuf.st_attr & 07777;
1392 #endif
1393 if (chmod(ofname, mode)) /* Copy modes */
1394 perror(ofname);
1395 #ifndef PCDOS
1396 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
1397 timep[0] = statbuf.st_atime;
1398 timep[1] = statbuf.st_mtime;
1399 #else
1400 timep[0] = statbuf.st_mtime;
1401 timep[1] = statbuf.st_mtime;
1402 #endif
1403 utime(ofname, (struct utimbuf *)timep); /* Update last accessed and modified times */
1405 if (unlink(ifname))
1406 perror(ifname);
1408 if(!quiet)
1409 if(do_decomp == 0)
1410 fprintf(stderr, " -- compressed to %s", ofname);
1411 else
1412 fprintf(stderr, " -- decompressed to %s", ofname);
1413 return; /* Successful return */
1416 /* Unsuccessful return -- one of the tests failed */
1417 if (unlink(ofname))
1418 perror(ofname);
1421 * This routine returns 1 if we are running in the foreground and stderr
1422 * is a tty.
1424 int foreground()
1426 #ifndef METAWARE
1427 if(bgnd_flag) { /* background? */
1428 return(0);
1429 } else { /* foreground */
1430 #endif
1431 if(isatty(2)) { /* and stderr is a tty */
1432 return(1);
1433 } else {
1434 return(0);
1436 #ifndef METAWARE
1438 #endif
1440 #ifndef METAWARE
1441 void onintr (dummy)
1442 int dummy; /* to keep the compiler happy */
1444 (void)signal(SIGINT,SIG_IGN);
1445 unlink ( ofname );
1446 exit ( 1 );
1449 void oops (dummy) /* wild pointer -- assume bad input */
1450 int dummy; /* to keep the compiler happy */
1452 (void)signal(SIGSEGV,SIG_IGN);
1453 if ( do_decomp == 1 )
1454 fprintf ( stderr, "uncompress: corrupt input\n" );
1455 unlink ( ofname );
1456 exit ( 1 );
1458 #endif
1459 void cl_block () /* table clear for block compress */
1461 REGISTER long int rat;
1463 checkpoint = in_count + CHECK_GAP;
1464 #ifdef DEBUG
1465 if ( debug ) {
1466 fprintf ( stderr, "count: %ld, ratio: ", in_count );
1467 prratio ( stderr, in_count, bytes_out );
1468 fprintf ( stderr, "\n");
1470 #endif /* DEBUG */
1472 if(in_count > 0x007fffff) { /* shift will overflow */
1473 rat = bytes_out >> 8;
1474 if(rat == 0) { /* Don't divide by zero */
1475 rat = 0x7fffffff;
1476 } else {
1477 rat = in_count / rat;
1479 } else {
1480 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
1482 if ( rat > ratio ) {
1483 ratio = rat;
1484 } else {
1485 ratio = 0;
1486 #ifdef DEBUG
1487 if(verbose)
1488 dump_tab(); /* dump string table */
1489 #endif
1490 cl_hash ( (count_int) hsize );
1491 free_ent = FIRST;
1492 clear_flg = 1;
1493 output ( (code_int) CLEAR );
1494 #ifdef DEBUG
1495 if(debug)
1496 fprintf ( stderr, "clear\n" );
1497 #endif /* DEBUG */
1501 void cl_hash(hsize) /* reset code table */
1502 REGISTER count_int hsize;
1504 #ifdef AZTEC86
1505 #ifdef PCDOS
1506 /* This function only in PC-DOS lib, not in MINIX lib */
1507 memset(htab,-1, hsize * sizeof(count_int));
1508 #else
1509 /* MINIX and all non-PC machines do it this way */
1510 #ifndef XENIX_16 /* Normal machine */
1511 REGISTER count_int *htab_p = htab+hsize;
1512 #else
1513 REGISTER j;
1514 REGISTER long k = hsize;
1515 REGISTER count_int *htab_p;
1516 #endif
1517 REGISTER long i;
1518 REGISTER long m1 = -1;
1520 #ifdef XENIX_16
1521 for(j=0; j<=8 && k>=0; j++,k-=8192)
1523 i = 8192;
1524 if(k < 8192)
1526 i = k;
1528 htab_p = &(htab[j][i]);
1529 i -= 16;
1530 if(i > 0)
1532 #else
1533 i = hsize - 16;
1534 #endif
1536 { /* might use Sys V memset(3) here */
1537 *(htab_p-16) = m1;
1538 *(htab_p-15) = m1;
1539 *(htab_p-14) = m1;
1540 *(htab_p-13) = m1;
1541 *(htab_p-12) = m1;
1542 *(htab_p-11) = m1;
1543 *(htab_p-10) = m1;
1544 *(htab_p-9) = m1;
1545 *(htab_p-8) = m1;
1546 *(htab_p-7) = m1;
1547 *(htab_p-6) = m1;
1548 *(htab_p-5) = m1;
1549 *(htab_p-4) = m1;
1550 *(htab_p-3) = m1;
1551 *(htab_p-2) = m1;
1552 *(htab_p-1) = m1;
1553 htab_p -= 16;
1554 } while ((i -= 16) >= 0);
1555 #ifdef XENIX_16
1558 #endif
1559 for ( i += 16; i > 0; i-- )
1560 *--htab_p = m1;
1561 #endif
1562 #endif
1565 void prratio(stream, num, den)
1566 FILE *stream;
1567 long int num;
1568 long int den;
1570 REGISTER int q; /* Doesn't need to be long */
1571 if(num > 214748L)
1572 { /* 2147483647/10000 */
1573 q = (int)(num / (den / 10000L));
1574 } else
1576 q = (int)(10000L * num / den); /* Long calculations, though */
1578 if (q < 0)
1580 putc('-', stream);
1581 q = -q;
1583 fprintf(stream, "%d.%02d%c", q / 100, q % 100, '%');
1586 void version()
1588 fprintf(stderr, "compress 4.1\n");
1589 fprintf(stderr, "Options: ");
1590 #ifdef vax
1591 fprintf(stderr, "vax, ");
1592 #endif
1593 #ifdef _MINIX
1594 fprintf(stderr, "MINIX, ");
1595 #endif
1596 #ifdef NO_UCHAR
1597 fprintf(stderr, "NO_UCHAR, ");
1598 #endif
1599 #ifdef SIGNED_COMPARE_SLOW
1600 fprintf(stderr, "SIGNED_COMPARE_SLOW, ");
1601 #endif
1602 #ifdef XENIX_16
1603 fprintf(stderr, "XENIX_16, ");
1604 #endif
1605 #ifdef COMPATIBLE
1606 fprintf(stderr, "COMPATIBLE, ");
1607 #endif
1608 #ifdef DEBUG
1609 fprintf(stderr, "DEBUG, ");
1610 #endif
1611 #ifdef BSD4_2
1612 fprintf(stderr, "BSD4_2, ");
1613 #endif
1614 fprintf(stderr, "BITS = %d\n", BITS);
1616 /* End of text from uok.UUCP:net.sources */