2 * Compress - data compression program
6 * machine variants which require cc -Dmachine: pdp11, z8000, pcxt
10 * Set USERMEM to the maximum amount of physical user memory available
11 * in bytes. USERMEM is used to determine the maximum BITS that can be used
14 * SACREDMEM is the amount of physical memory saved for others; compress
22 # define USERMEM 450000 /* default user memory */
25 #ifdef interdata /* (Perkin-Elmer) */
26 #define SIGNED_COMPARE_SLOW /* signed compare is slower than unsigned */
30 # define BITS 12 /* max bits/code for 16-bit machine */
31 # define NO_UCHAR /* also if "unsigned char" functions as signed char */
33 #endif /* pdp11 */ /* don't forget to compile with -i */
37 # undef vax /* weird preprocessor */
41 #ifdef MSDOS /* Microsoft C 3.0 for MS-DOS */
43 # ifdef BIG /* then this is a large data compilation */
44 # undef DEBUG /* DEBUG makes the executible too big */
47 # else /* this is a small model compilation */
60 # if USERMEM >= (433484+SACREDMEM)
63 # if USERMEM >= (229600+SACREDMEM)
66 # if USERMEM >= (127536+SACREDMEM)
69 # if USERMEM >= (73464+SACREDMEM)
80 #ifdef PBITS /* Preferred BITS for this memory size */
87 # define HSIZE 69001 /* 95% occupancy */
90 # define HSIZE 35023 /* 94% occupancy */
93 # define HSIZE 18013 /* 91% occupancy */
96 # define HSIZE 9001 /* 91% occupancy */
99 # define HSIZE 5003 /* 80% occupancy */
102 #ifdef M_XENIX /* Stupid compiler can't handle arrays with */
103 # if BITS == 16 /* more than 65535 bytes - so we fake it */
106 # if BITS > 13 /* Code only handles BITS = 12, 13, or 16 */
113 * a code_int must be able to hold 2**BITS values of type int, and also -1
116 typedef long int code_int
;
118 typedef int code_int
;
121 #ifdef SIGNED_COMPARE_SLOW
122 typedef unsigned long int count_int
;
123 typedef unsigned short int count_short
;
125 typedef long int count_int
;
129 typedef char char_type
;
131 typedef unsigned char char_type
;
133 char_type magic_header
[] = { "\037\235" }; /* 1F 9D */
135 /* Defines for third byte of header */
136 #define BIT_MASK 0x1f
137 #define BLOCK_MASK 0x80
138 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
139 a fourth header byte (for expansion).
141 #define INIT_BITS 9 /* initial number of bits/code */
143 #define min(a, b) ((a) > (b) ? (b) : (a))
146 * compress.c - File compression ala IEEE Computer, June 1984.
148 * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
149 * Jim McKie (decvax!mcvax!jim)
150 * Steve Davies (decvax!vax135!petsd!peora!srd)
151 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
152 * James A. Woods (decvax!ihnp4!ames!jaw)
153 * Joe Orost (decvax!vax135!petsd!joe)
155 * $Header: /tmp/bonefish/open-beos/current/src/apps/bin/compress/compress.c,v 1.1 2004/06/07 21:23:09 korli Exp $
156 * $Log: compress.c,v $
157 * Revision 1.1 2004/06/07 21:23:09 korli
160 * Revision 1.12 1996/02/24 02:45:03 cyril
163 * Revision 1.11 1996/02/18 23:50:39 cyril
164 * final clean up for new resource policy.
166 * Revision 1.10 1996/01/25 09:53:14 dbg
167 * deal with the now correct posix headers
169 * Revision 1.9 1996/01/23 19:17:39 btaylor
172 * Revision 1.8 1996/01/11 18:28:19 dbg
173 * deal with the new posix environment (i.e. we can now
174 * include <sys/stat.h> and <sys/types.h> because they
175 * exist and we don't need as many workarounds for missing
176 * things in the posix environment).
178 * Revision 1.7 1995/12/21 18:58:15 erich
179 * Add macro for isascii, which doesn't really exist.
181 * Revision 1.1 1995/12/13 22:51:52 ming
182 * DR6 FROZEN ON 12/13/95 14:00:00
184 * Revision 1.6 1995/11/08 22:49:32 robert
185 * FILE_NAME_LENGTH -> B_FILE_NAME_LENGTH
187 * Revision 1.5 1995/08/30 02:42:55 herold
188 * ### API ### remove all traces of int type from OS.h. Change
189 * FILENAME_LENGTH to FILE_NAME_LENGTH. Lotsa buck for not much
190 * bang, but there it is...
192 * Revision 1.4 1995/06/05 22:34:07 erich
193 * Attempt to make stuff in ./gnu compile...sort of.
195 * Revision 1.3 1995/02/22 19:38:43 peter
196 * Update 'tar' and 'compress' tools to work with new resource/data files.
197 * 'tar' as some glitches (when using the 'u' flag), but we'll worry about
200 * Revision 1.2 1994/10/04 17:38:00 herold
201 * don't depend on /usr/include being around - use Unix.h instead
203 * Revision 1.1 1993/10/14 22:46:10 moofie
206 * Revision 4.0.1 87/11/27 21:45:00 rms (Richard Stallman)
207 * Once copystat has run, don't delete output file on interrupt.
208 * Mention more flags in the Usage string.
210 * Revision 4.0 85/07/30 12:50:00 joe
211 * Removed ferror() calls in output routine on every output except first.
212 * Prepared for release to the world.
214 * Revision 3.6 85/07/04 01:22:21 joe
215 * Remove much wasted storage by overlaying hash table with the tables
216 * used by decompress: tab_suffix[1<<BITS], stack[8000]. Updated USERMEM
217 * computations. Fixed dump_tab() DEBUG routine.
219 * Revision 3.5 85/06/30 20:47:21 jaw
220 * Change hash function to use exclusive-or. Rip out hash cache. These
221 * speedups render the megamemory version defunct, for now. Make decoder
222 * stack global. Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
224 * Revision 3.4 85/06/27 12:00:00 ken
225 * Get rid of all floating-point calculations by doing all compression ratio
226 * calculations in fixed point.
228 * Revision 3.3 85/06/24 21:53:24 joe
229 * Incorporate portability suggestion for M_XENIX. Got rid of text on #else
230 * and #endif lines. Cleaned up #ifdefs for vax and interdata.
232 * Revision 3.2 85/06/06 21:53:24 jaw
233 * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
234 * Default to "quiet" output (no compression statistics).
236 * Revision 3.1 85/05/12 18:56:13 jaw
237 * Integrate decompress() stack speedups (from early pointer mods by McKie).
238 * Repair multi-file USERMEM gaffe. Unify 'force' flags to mimic semantics
239 * of SVR2 'pack'. Streamline block-compress table clear logic. Increase
240 * output byte count by magic number size.
242 * Revision 3.0 84/11/27 11:50:00 petsd!joe
243 * Set HSIZE depending on BITS. Set BITS depending on USERMEM. Unrolled
244 * loops in clear routines. Added "-C" flag for 2.0 compatibility. Used
245 * unsigned compares on Perkin-Elmer. Fixed foreground check.
247 * Revision 2.7 84/11/16 19:35:39 ames!jaw
248 * Cache common hash codes based on input statistics; this improves
249 * performance for low-density raster images. Pass on #ifdef bundle
252 * Revision 2.6 84/11/05 19:18:21 ames!jaw
253 * Vary size of hash tables to reduce time for small files.
254 * Tune PDP-11 hash function.
256 * Revision 2.5 84/10/30 20:15:14 ames!jaw
257 * Junk chaining; replace with the simpler (and, on the VAX, faster)
258 * double hashing, discussed within. Make block compression standard.
260 * Revision 2.4 84/10/16 11:11:11 ames!jaw
261 * Introduce adaptive reset for block compression, to boost the rate
262 * another several percent. (See mailing list notes.)
264 * Revision 2.3 84/09/22 22:00:00 petsd!joe
265 * Implemented "-B" block compress. Implemented REVERSE sorting of tab_next.
266 * Bug fix for last bits. Changed fwrite to putchar loop everywhere.
268 * Revision 2.2 84/09/18 14:12:21 ames!jaw
269 * Fold in news changes, small machine typedef from thomas,
270 * #ifdef interdata from joe.
272 * Revision 2.1 84/09/10 12:34:56 ames!jaw
273 * Configured fast table lookup for 32-bit machines.
274 * This cuts user time in half for b <= FBITS, and is useful for news batching
275 * from VAX to PDP sites. Also sped up decompress() [fwrite->putc] and
276 * added signal catcher [plus beef in writeerr()] to delete effluvia.
278 * Revision 2.0 84/08/28 22:00:00 petsd!joe
279 * Add check for foreground before prompting user. Insert maxbits into
280 * compressed file. Force file being uncompressed to end with ".Z".
281 * Added "-c" flag and "zcat". Prepared for release.
283 * Revision 1.10 84/08/24 18:28:00 turtlevax!ken
284 * Will only compress regular files (no directories), added a magic number
285 * header (plus an undocumented -n flag to handle old files without headers),
286 * added -f flag to force overwriting of possibly existing destination file,
287 * otherwise the user is prompted for a response. Will tack on a .Z to a
288 * filename if it doesn't have one when decompressing. Will only replace
289 * file if it was compressed.
291 * Revision 1.9 84/08/16 17:28:00 turtlevax!ken
292 * Removed scanargs(), getopt(), added .Z extension and unlimited number of
293 * filenames to compress. Flags may be clustered (-Ddvb12) or separated
294 * (-D -d -v -b 12), or combination thereof. Modes and other status is
295 * copied with copystat(). -O bug for 4.2 seems to have disappeared with
298 * Revision 1.8 84/08/09 23:15:00 joe
299 * Made it compatible with vax version, installed jim's fixes/enhancements
301 * Revision 1.6 84/08/01 22:08:00 joe
302 * Sped up algorithm significantly by sorting the compress chain.
304 * Revision 1.5 84/07/13 13:11:00 srd
305 * Added C version of vax asm routines. Changed structure to arrays to
306 * save much memory. Do unsigned compares where possible (faster on
309 * Revision 1.4 84/07/05 03:11:11 thomas
310 * Clean up the code a little and lint it. (Lint complains about all
311 * the regs used in the asm, but I'm not going to "fix" this.)
313 * Revision 1.3 84/07/05 02:06:54 thomas
316 * Revision 1.2 84/07/05 00:27:27 thomas
317 * Add variable bit length output.
320 static char rcs_ident
[] = "$Header: /tmp/bonefish/open-beos/current/src/apps/bin/compress/compress.c,v 1.1 2004/06/07 21:23:09 korli Exp $";
325 #include <sys/types.h>
326 #include <sys/stat.h>
337 #define ARGVAL() (*++(*argv) || (--argc && *++argv))
339 int n_bits
; /* number of bits/code */
340 int maxbits
= BITS
; /* user settable max # bits/code */
341 code_int maxcode
; /* maximum code, given n_bits */
342 code_int maxmaxcode
= (code_int
)1 << BITS
; /* should NEVER generate this code */
343 #ifdef COMPATIBLE /* But wrong! */
344 # define MAXCODE(n_bits) ((code_int) 1 << (n_bits) - 1)
346 # define MAXCODE(n_bits) (((code_int) 1 << (n_bits)) - 1)
347 #endif /* COMPATIBLE */
352 count_int far htab0
[8192];
353 count_int far htab1
[8192];
354 count_int far htab2
[8192];
355 count_int far htab3
[8192];
356 count_int far htab4
[8192];
357 count_int far htab5
[8192];
358 count_int far htab6
[8192];
359 count_int far htab7
[8192];
360 count_int far htab8
[HSIZE
-65536];
361 count_int far
* htab
[9] = {
362 htab0
, htab1
, htab2
, htab3
, htab4
, htab5
, htab6
, htab7
, htab8
};
364 unsigned short far code0tab
[16384];
365 unsigned short far code1tab
[16384];
366 unsigned short far code2tab
[16384];
367 unsigned short far code3tab
[16384];
368 unsigned short far code4tab
[16384];
369 unsigned short far
* codetab
[5] = {
370 code0tab
, code1tab
, code2tab
, code3tab
, code4tab
};
374 count_int htab0
[8192];
375 count_int htab1
[8192];
376 count_int htab2
[8192];
377 count_int htab3
[8192];
378 count_int htab4
[8192];
379 count_int htab5
[8192];
380 count_int htab6
[8192];
381 count_int htab7
[8192];
382 count_int htab8
[HSIZE
-65536];
383 count_int
* htab
[9] = {
384 htab0
, htab1
, htab2
, htab3
, htab4
, htab5
, htab6
, htab7
, htab8
};
386 unsigned short code0tab
[16384];
387 unsigned short code1tab
[16384];
388 unsigned short code2tab
[16384];
389 unsigned short code3tab
[16384];
390 unsigned short code4tab
[16384];
391 unsigned short * codetab
[5] = {
392 code0tab
, code1tab
, code2tab
, code3tab
, code4tab
};
396 #define htabof(i) (htab[(i) >> 13][(i) & 0x1fff])
397 #define codetabof(i) (codetab[(i) >> 14][(i) & 0x3fff])
399 #else /* Normal machine */
400 count_int htab
[HSIZE
];
401 unsigned short codetab
[HSIZE
];
402 #define htabof(i) htab[i]
403 #define codetabof(i) codetab[i]
405 #endif /* XENIX_16 */
406 code_int hsize
= HSIZE
; /* for dynamic table sizing */
410 * To save much memory, we overlay the table used by compress() with those
411 * used by decompress(). The tab_prefix table is the same size and type
412 * as the codetab. The tab_suffix table needs 2**BITS characters. We
413 * get this from the beginning of htab. The output stack uses the rest
414 * of htab, and contains characters. There is plenty of room for any
415 * possible stack (stack used to be 8000 characters).
418 #define tab_prefixof(i) codetabof(i)
422 # define tab_suffixof(i) ((char_type far *)htab[(i)>>15])[(i) & 0x7fff]
423 # define de_stack ((char_type far *)(htab2))
425 # define tab_suffixof(i) ((char_type *)htab[(i)>>15])[(i) & 0x7fff]
426 # define de_stack ((char_type *)(htab2))
428 #else /* Normal machine */
429 # define tab_suffixof(i) ((char_type *)(htab))[i]
430 # define de_stack ((char_type *)&tab_suffixof((code_int)1<<BITS))
431 #endif /* XENIX_16 */
433 code_int free_ent
= 0; /* first unused entry */
442 fprintf(stderr
,"Usage: compress [-cdDfivV] [-b maxbits] [file ...]\n");
444 fprintf(stderr
,"Usage: compress [-cdDfvV] [-b maxbits] [file ...]\n");
452 fprintf(stderr
,"Usage: compress [-cdfivV] [-b maxbits] [file ...]\n");
454 fprintf(stderr
,"Usage: compress [-cdfvV] [-b maxbits] [file ...]\n");
459 int nomagic
= 0; /* Use a 3-byte magic number header, unless old file */
460 int zcat_flg
= 0; /* Write output on stdout, suppress messages */
461 int precious
= 1; /* Don't unlink output file on interrupt */
462 int quiet
= 1; /* don't tell me about compression */
465 * block compression parameters -- after all codes are used up,
466 * and compression rate changes, start over.
468 int block_compress
= BLOCK_MASK
;
471 #define CHECK_GAP 10000 /* ratio check interval */
472 count_int checkpoint
= CHECK_GAP
;
474 * the next two codes should not be changed lightly, as they must not
475 * lie within the contiguous general code space.
477 #define FIRST 257 /* first free entry */
478 #define CLEAR 256 /* table clear output code */
484 # define PATH_SEP '\\'
485 int image
= 2; /* 1 <=> image (binary) mode; 2 <=> text mode */
487 # define PATH_SEP '/'
498 void decompress(void);
499 void copystat(char *ifname
, char *ofname
);
501 /*****************************************************************
504 * Algorithm from "A Technique for High Performance Data Compression",
505 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
507 * Usage: compress [-cdfivV] [-b bits] [file ...]
510 * -c: Write output on stdout, don't remove original.
512 * -d: If given, decompression is done instead.
514 * -f: Forces output file to be generated, even if one already
515 * exists, and even if no space is saved by compressing.
516 * If -f is not used, the user will be prompted if stdin is
517 * a tty, otherwise, the output file will not be overwritten.
519 * -i: Image mode (defined only under MS-DOS). Prevents
520 * conversion between UNIX text representation (LF line
521 * termination) in compressed form and MS-DOS text
522 * representation (CR-LF line termination) in uncompressed
523 * form. Useful with non-text files.
525 * -v: Write compression statistics
527 * -V: Write version and compilation options.
529 * -b: Parameter limits the max number of bits/code.
531 * file ...: Files to be compressed. If none specified, stdin
534 * file.Z: Compressed form of file with same mode, owner, and utimes
535 * or stdout (if stdin used as input)
538 * When filenames are given, replaces with the compressed version
539 * (.Z suffix) only if the file decreases in size.
541 * Modified Lempel-Ziv method (LZW). Basically finds common
542 * substrings and replaces them with a variable size code. This is
543 * deterministic, and can be done on the fly. Thus, the decompression
544 * procedure needs no input table, but tracks the way the table was built.
548 register int argc
; char **argv
;
550 int overwrite
= 0; /* Do not overwrite unless given -f flag */
552 char **filelist
, **fileptr
;
553 char *cp
, *rindex(), *malloc();
555 extern void onintr();
564 if ( (bgnd_flag
= signal ( SIGINT
, SIG_IGN
)) != SIG_IGN
) {
567 signal ( SIGINT
, onintr
);
570 signal ( SIGSEGV
, oops
);
575 nomagic
= 1; /* Original didn't have a magic number */
576 #endif /* COMPATIBLE */
578 filelist
= fileptr
= (char **)(malloc(argc
* sizeof(*argv
)));
581 if((cp
= rindex(argv
[0], PATH_SEP
)) != 0) {
588 if(strcmp(cp
, "UNCOMPRE.EXE") == 0) {
590 if(strcmp(cp
, "uncompress") == 0) {
596 } else if(strcmp(cp
, "ZCAT.EXE") == 0) {
598 } else if(strcmp(cp
, "zcat") == 0) {
606 /* 4.2BSD dependent - take it out if not */
607 setlinebuf( stderr
);
610 /* Argument Processing
611 * All flags are optional.
613 * -V => print Version; debug verbose
616 * -f => force overwrite of output file
617 * -n => no header: useful to uncompress old files
618 * -b maxbits => maxbits. If -b is specified, then maxbits MUST be
620 * -c => cat all output to stdout
621 * -C => generate output compatible with compress 2.0.
622 * if a string is left, must be an input filename.
624 for (argc
--, argv
++; argc
> 0; argc
--, argv
++) {
625 if (**argv
== '-') { /* A flag argument */
626 while (*++(*argv
)) { /* Process all flags in this arg */
667 fprintf(stderr
, "Missing maxbits\n");
671 maxbits
= atoi(*argv
);
680 fprintf(stderr
, "Unknown flag: '%c'; ", **argv
);
686 else { /* Input file name */
687 *fileptr
++ = *argv
; /* Build input file list */
689 /* process nextarg; */
694 if(maxbits
< INIT_BITS
) maxbits
= INIT_BITS
;
695 if (maxbits
> BITS
) maxbits
= BITS
;
696 maxmaxcode
= (code_int
) 1 << maxbits
;
698 if (*filelist
!= NULL
) {
699 for (fileptr
= filelist
; *fileptr
; fileptr
++) {
701 if (do_decomp
!= 0) { /* DECOMPRESSION */
704 /* Check for .Z or XZ suffix; add one if necessary */
705 cp
= *fileptr
+ strlen(*fileptr
) - 2;
706 if ((*cp
!= '.' && *cp
!= 'X' && *cp
!= 'x') ||
707 (*(++cp
) != 'Z' && *cp
!= 'z')) {
708 strcpy(tempname
, *fileptr
);
709 if ((cp
=rindex(tempname
,'.')) == NULL
)
710 strcat(tempname
, ".Z");
711 else if(*(++cp
) == '\0') strcat(tempname
, "Z");
714 strcat(tempname
, "XZ");
719 /* Check for .Z suffix */
720 if (strcmp(*fileptr
+ strlen(*fileptr
) - 2, ".Z") != 0) {
721 /* No .Z: tack one on */
722 strcpy(tempname
, *fileptr
);
723 strcat(tempname
, ".Z");
728 /* Open input file for decompression */
731 if ((freopen(*fileptr
, "rb", stdin
)) == NULL
) {
733 if ((freopen(*fileptr
, "r", stdin
)) == NULL
) {
736 perror(*fileptr
); continue;
738 /* Check the magic number */
740 if ((getchar() != (magic_header
[0] & 0xFF))
741 || (getchar() != (magic_header
[1] & 0xFF))) {
742 fprintf(stderr
, "%s: not in compressed format\n",
746 maxbits
= getchar(); /* set -b from file */
747 block_compress
= maxbits
& BLOCK_MASK
;
749 maxmaxcode
= (code_int
) 1 << maxbits
;
752 "%s: compressed with %d bits, can only handle %d bits\n",
753 *fileptr
, maxbits
, BITS
);
757 /* Generate output filename */
758 strcpy(ofname
, *fileptr
);
759 ofname
[strlen(*fileptr
) - 2] = '\0'; /* Strip off .Z */
760 } else { /* COMPRESSION */
763 cp
= *fileptr
+ strlen(*fileptr
) - 2;
764 if ((*cp
== '.' || *cp
== 'X' || *cp
== 'x') &&
765 (*(++cp
) == 'Z' || *cp
== 'z')) {
766 fprintf(stderr
,"%s: already has %s suffix -- no change\n",
769 if (strcmp(*fileptr
+ strlen(*fileptr
) - 2, ".Z") == 0) {
770 fprintf(stderr
, "%s: already has .Z suffix -- no change\n",
776 /* Open input file for compression */
779 if ((freopen(*fileptr
, image
== 2 ? "rt" : "rb", stdin
))
782 if ((freopen(*fileptr
, "r", stdin
)) == NULL
) {
785 perror(*fileptr
); continue;
787 stat ( *fileptr
, &statbuf
);
788 fsize
= (long) statbuf
.st_size
;
790 * tune hash table size for small files -- ad hoc,
791 * but the sizes match earlier #defines, which
792 * serve as upper bounds on the number of output codes.
795 if ( fsize
< (1 << 12) )
796 hsize
= min ( 5003, HSIZE
);
797 else if ( fsize
< (1 << 13) )
798 hsize
= min ( 9001, HSIZE
);
799 else if ( fsize
< (1 << 14) )
800 hsize
= min ( 18013, HSIZE
);
801 else if ( fsize
< (1 << 15) )
802 hsize
= min ( 35023, HSIZE
);
803 else if ( fsize
< 47000 )
804 hsize
= min ( 50021, HSIZE
);
806 /* Generate output filename */
807 strcpy(ofname
, *fileptr
);
808 #if !defined BSD4_2 && !defined BEOS /* Short filenames */
809 if ((cp
= rindex(ofname
, PATH_SEP
)) != NULL
) cp
++;
812 if (zcat_flg
== 0 && (sufp
= rindex(cp
, '.')) != NULL
&&
813 strlen(sufp
) > 2) fprintf(stderr
,
814 "%s: part of filename extension will be replaced by XZ\n",
817 if (strlen(cp
) > 12) {
818 fprintf(stderr
,"%s: filename too long to tack on .Z\n",cp
);
822 #endif /* BSD4_2 Long filenames allowed */
825 if ((cp
= rindex(ofname
, '.')) == NULL
) strcat(ofname
, ".Z");
827 if(*(++cp
) != '\0') *(++cp
) = '\0';
828 strcat(ofname
, "XZ");
831 strcat(ofname
, ".Z");
836 /* Check for overwrite of existing file */
837 if (overwrite
== 0 && zcat_flg
== 0) {
838 if (stat(ofname
, &statbuf
) == 0) {
841 fprintf(stderr
, "%s already exists;", ofname
);
846 " do you wish to overwrite %s (y or n)? ", ofname
);
848 read(2, response
, 2);
849 while (response
[1] != '\n') {
850 if (read(2, response
+1, 1) < 0) { /* Ack! */
851 perror("stderr"); break;
857 if (response
[0] != 'y') {
858 fprintf(stderr
, "\tnot overwritten\n");
863 if(zcat_flg
== 0) { /* Open output file */
866 if (freopen(ofname
, do_decomp
&& image
== 2 ? "wt" : "wb",
869 if (freopen(ofname
, "w", stdout
) == NULL
) {
872 perror(ofname
); continue;
875 fprintf(stderr
, "%s: ", *fileptr
);
878 /* Actually do the compression/decompression */
879 if (do_decomp
== 0) compress();
883 else if (debug
== 0) decompress();
885 if (verbose
) dump_tab();
888 copystat(*fileptr
, ofname
); /* Copy stats */
889 if((exit_stat
== 1) || (!quiet
))
893 } else { /* Standard input */
894 if (do_decomp
== 0) {
897 if(verbose
) dump_tab();
902 /* Check the magic number */
904 if ((getchar()!=(magic_header
[0] & 0xFF))
905 || (getchar()!=(magic_header
[1] & 0xFF))) {
906 fprintf(stderr
, "stdin: not in compressed format\n");
909 maxbits
= getchar(); /* set -b from file */
910 block_compress
= maxbits
& BLOCK_MASK
;
912 maxmaxcode
= (code_int
) 1 << maxbits
;
913 fsize
= 100000; /* assume stdin large for USERMEM */
916 "stdin: compressed with %d bits, can only handle %d bits\n",
924 if (debug
== 0) decompress();
926 if (verbose
) dump_tab();
934 long int in_count
= 1; /* length of input */
935 long int bytes_out
; /* length of compressed output */
936 long int out_count
= 0; /* # of codes output (for debugging) */
939 * compress stdin to stdout
941 * Algorithm: use open addressing double hashing (no chaining) on the
942 * prefix code / next character combination. We do a variant of Knuth's
943 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
944 * secondary probe. Here, the modular division first probe is gives way
945 * to a faster exclusive-or manipulation. Also do block compression with
946 * an adaptive reset, whereby the code table is cleared when the compression
947 * ratio decreases, but after the table fills. The variable-length output
948 * codes are re-sized at this point, and a special CLEAR code is generated
949 * for the decompressor. Late addition: construct the table according to
950 * file size for noticeable speed improvement on small files. Please direct
951 * questions about this implementation to ames!jaw.
957 register code_int i
= 0;
959 register code_int ent
;
960 register code_int disp
;
961 register code_int hsize_reg
;
966 putchar(magic_header
[0]); putchar(magic_header
[1]);
967 putchar((char)(maxbits
| block_compress
));
971 #endif /* COMPATIBLE */
974 bytes_out
= 3; /* includes 3-byte header mojo */
979 checkpoint
= CHECK_GAP
;
980 maxcode
= MAXCODE(n_bits
= INIT_BITS
);
981 free_ent
= ((block_compress
) ? FIRST
: 256 );
986 for ( fcode
= (long) hsize
; fcode
< 65536L; fcode
*= 2L )
988 hshift
= 8 - hshift
; /* set hash code range bound */
991 cl_hash( (count_int
) hsize_reg
); /* clear hash table */
993 #ifdef SIGNED_COMPARE_SLOW
994 while ( (c
= getchar()) != (unsigned) EOF
) {
996 while ( (c
= getchar()) != EOF
) {
1000 if (c
== '\n') in_count
+= image
; else /* include CR if text mode */
1005 fcode
= (long) (((long) c
<< maxbits
) + ent
);
1006 i
= (((code_int
)c
<< hshift
) ^ ent
); /* xor hashing */
1008 if ( htabof (i
) == fcode
) {
1009 ent
= codetabof (i
);
1011 } else if ( (long)htabof (i
) < 0 ) /* empty slot */
1013 disp
= hsize_reg
- i
; /* secondary hash (after G. Knott) */
1017 if ( (i
-= disp
) < 0 )
1020 if ( htabof (i
) == fcode
) {
1021 ent
= codetabof (i
);
1024 if ( (long)htabof (i
) > 0 )
1027 output ( (code_int
) ent
);
1030 #ifdef SIGNED_COMPARE_SLOW
1031 if ( (unsigned) free_ent
< (unsigned) maxmaxcode
) {
1033 if ( free_ent
< maxmaxcode
) {
1035 codetabof (i
) = free_ent
++; /* code -> hashtable */
1038 else if ( (count_int
)in_count
>= checkpoint
&& block_compress
)
1042 * Put out the final code.
1044 output( (code_int
)ent
);
1046 output( (code_int
)-1 );
1049 * Print out stats on stderr
1051 if(zcat_flg
== 0 && !quiet
) {
1054 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
1055 in_count
, out_count
, bytes_out
);
1056 prratio( stderr
, in_count
, bytes_out
);
1057 fprintf( stderr
, "\n");
1058 fprintf( stderr
, "\tCompression as in compact: " );
1059 prratio( stderr
, in_count
-bytes_out
, in_count
);
1060 fprintf( stderr
, "\n");
1061 fprintf( stderr
, "\tLargest code (of last block) was %d (%d bits)\n",
1062 free_ent
- 1, n_bits
);
1064 fprintf( stderr
, "Compression: " );
1065 prratio( stderr
, in_count
-bytes_out
, in_count
);
1068 if(bytes_out
> in_count
) /* exit(2) if no savings */
1073 /*****************************************************************
1076 * Output the given code.
1078 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
1079 * that n_bits =< (long)wordsize - 1.
1081 * Outputs code to the file.
1083 * Chars are 8 bits long.
1085 * Maintain a BITS character long buffer (so that 8 codes will
1086 * fit in it exactly). Use the VAX insv instruction to insert each
1087 * code in turn. When the buffer fills up empty it and start over.
1090 static char buf
[BITS
];
1093 char_type lmask
[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
1094 char_type rmask
[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
1105 * On the VAX, it is important to have the register declarations
1106 * in exactly the order given, or the asm will break.
1108 register int r_off
= offset
, bits
= n_bits
;
1109 register char * bp
= buf
;
1113 fprintf( stderr
, "%5d%c", code
,
1114 (col
+=6) >= 74 ? (col
= 0, '\n') : ' ' );
1118 /* VAX DEPENDENT!! Implementation on other machines is below.
1120 * Translation: Insert BITS bits from the argument starting at
1121 * offset bits from the beginning of buf.
1123 0; /* Work around for pcc -O bug with asm and if stmt */
1124 asm( "insv 4(ap),r11,r10,(r9)" );
1125 #else /* not a vax */
1127 * byte/bit numbering on the VAX is simulated by the following code
1130 * Get to the first byte.
1135 * Since code is always >= 8 bits, only need to mask the first
1138 *bp
= (*bp
& rmask
[r_off
]) | (code
<< r_off
) & lmask
[r_off
];
1140 bits
-= (8 - r_off
);
1142 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1153 if ( offset
== (n_bits
<< 3) ) {
1164 * If the next entry is going to be too big for the code size,
1165 * then increase it, if possible.
1167 if ( free_ent
> maxcode
|| (clear_flg
> 0))
1170 * Write the whole buffer, because the input side won't
1171 * discover the size increase until after it has read it.
1174 if( fwrite( buf
, 1, n_bits
, stdout
) != n_bits
)
1176 bytes_out
+= n_bits
;
1181 maxcode
= MAXCODE (n_bits
= INIT_BITS
);
1186 if ( n_bits
== maxbits
)
1187 maxcode
= maxmaxcode
;
1189 maxcode
= MAXCODE(n_bits
);
1193 fprintf( stderr
, "\nChange to %d bits\n", n_bits
);
1200 * At EOF, write the rest of the buffer.
1203 fwrite( buf
, 1, (offset
+ 7) / 8, stdout
);
1204 bytes_out
+= (offset
+ 7) / 8;
1209 fprintf( stderr
, "\n" );
1211 if( ferror( stdout
) )
1217 * Decompress stdin to stdout. This routine adapts to the codes in the
1218 * file building the "string" table on-the-fly; requiring no table to
1219 * be stored in the compressed file. The tables used herein are shared
1220 * with those of the compress() routine. See the definitions above.
1227 register char_type far
*stackp
;
1229 register char_type
*stackp
;
1232 register int finchar
;
1233 register code_int code
, oldcode
, incode
;
1236 * As above, initialize the first 256 entries in the table.
1238 maxcode
= MAXCODE(n_bits
= INIT_BITS
);
1239 for ( code
= 255; code
>= 0; code
-- ) {
1240 tab_prefixof(code
) = 0;
1241 tab_suffixof(code
) = (char_type
)code
;
1243 free_ent
= ((block_compress
) ? FIRST
: 256 );
1245 finchar
= oldcode
= getcode();
1246 if(oldcode
== -1) /* EOF already? */
1247 return; /* Get out of here */
1248 putchar( (char)finchar
); /* first code must be 8 bits = char */
1249 if(ferror(stdout
)) /* Crash if can't write */
1253 while ( (code
= getcode()) > -1 ) {
1255 if ( (code
== CLEAR
) && block_compress
) {
1256 for ( code
= 255; code
>= 0; code
-- )
1257 tab_prefixof(code
) = 0;
1259 free_ent
= FIRST
- 1;
1260 if ( (code
= getcode ()) == -1 ) /* O, untimely death! */
1265 * Special case for KwKwK string.
1267 if ( code
>= free_ent
) {
1268 *stackp
++ = finchar
;
1273 * Generate output characters in reverse order
1275 #ifdef SIGNED_COMPARE_SLOW
1276 while ( ((unsigned long)code
) >= ((unsigned long)256) ) {
1278 while ( code
>= 256 ) {
1280 *stackp
++ = tab_suffixof(code
);
1281 code
= tab_prefixof(code
);
1283 *stackp
++ = finchar
= tab_suffixof(code
);
1286 * And put them out in forward order
1289 putchar ( *--stackp
);
1290 while ( stackp
> de_stack
);
1293 * Generate the new entry.
1295 if ( (code
=free_ent
) < maxmaxcode
) {
1296 tab_prefixof(code
) = (unsigned short)oldcode
;
1297 tab_suffixof(code
) = finchar
;
1301 * Remember previous code.
1310 /*****************************************************************
1313 * Read one code from the standard input. If EOF, return -1.
1317 * code or -1 is returned.
1323 * On the VAX, it is important to have the register declarations
1324 * in exactly the order given, or the asm will break.
1326 register code_int code
;
1327 static int offset
= 0, size
= 0;
1328 static char_type buf
[BITS
];
1329 register int r_off
, bits
;
1330 register char_type
*bp
= buf
;
1332 if ( clear_flg
> 0 || offset
>= size
|| free_ent
> maxcode
) {
1334 * If the next entry will be too big for the current code
1335 * size, then we must increase the size. This implies reading
1336 * a new buffer full, too.
1338 if ( free_ent
> maxcode
) {
1340 if ( n_bits
== maxbits
)
1341 maxcode
= maxmaxcode
; /* won't get any bigger now */
1343 maxcode
= MAXCODE(n_bits
);
1345 if ( clear_flg
> 0) {
1346 maxcode
= MAXCODE (n_bits
= INIT_BITS
);
1349 size
= fread( buf
, 1, n_bits
, stdin
);
1351 return -1; /* end of file */
1353 /* Round size down to integral number of codes */
1354 size
= (size
<< 3) - (n_bits
- 1);
1359 asm( "extzv r10,r9,(r8),r11" );
1360 #else /* not a vax */
1362 * Get to the first byte.
1366 /* Get first part (low order bits) */
1368 code
= ((*bp
++ >> r_off
) & rmask
[8 - r_off
]) & 0xff;
1370 code
= (*bp
++ >> r_off
);
1371 #endif /* NO_UCHAR */
1372 bits
-= (8 - r_off
);
1373 r_off
= 8 - r_off
; /* now, offset into code word */
1374 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
1377 code
|= (*bp
++ & 0xff) << r_off
;
1379 code
|= *bp
++ << r_off
;
1380 #endif /* NO_UCHAR */
1384 /* high order bits. */
1385 code
|= (*bp
& rmask
[bits
]) << r_off
;
1393 rindex(s
, c
) /* For those who don't have it in libc.a */
1394 register char *s
, c
;
1397 for (p
= NULL
; *s
; s
++)
1407 * Just print out codes from input file. For debugging.
1412 bits
= n_bits
= INIT_BITS
;
1413 maxcode
= MAXCODE(n_bits
);
1414 free_ent
= ((block_compress
) ? FIRST
: 256 );
1415 while ( ( code
= getcode() ) >= 0 ) {
1416 if ( (code
== CLEAR
) && block_compress
) {
1417 free_ent
= FIRST
- 1;
1420 else if ( free_ent
< maxmaxcode
)
1422 if ( bits
!= n_bits
) {
1423 fprintf(stderr
, "\nChange to %d bits\n", n_bits
);
1427 fprintf(stderr
, "%5d%c", code
, (col
+=6) >= 74 ? (col
= 0, '\n') : ' ' );
1429 putc( '\n', stderr
);
1433 code_int sorttab
[1<<BITS
]; /* sorted pointers into htab */
1435 dump_tab() /* dump string table */
1437 register int i
, first
;
1439 #define STACK_SIZE 15000
1440 int stack_top
= STACK_SIZE
;
1443 if(do_decomp
== 0) { /* compressing */
1444 register int flag
= 1;
1446 for(i
=0; i
<hsize
; i
++) { /* build sort pointers */
1447 if((long)htabof(i
) >= 0) {
1448 sorttab
[codetabof(i
)] = i
;
1451 first
= block_compress
? FIRST
: 256;
1452 for(i
= first
; i
< free_ent
; i
++) {
1453 fprintf(stderr
, "%5d: \"", i
);
1454 de_stack
[--stack_top
] = '\n';
1455 de_stack
[--stack_top
] = '"';
1456 stack_top
= in_stack((htabof(sorttab
[i
])>>maxbits
)&0xff,
1458 for(ent
=htabof(sorttab
[i
]) & ((1<<maxbits
)-1);
1460 ent
=htabof(sorttab
[ent
]) & ((1<<maxbits
)-1)) {
1461 stack_top
= in_stack(htabof(sorttab
[ent
]) >> maxbits
,
1464 stack_top
= in_stack(ent
, stack_top
);
1465 fwrite( &de_stack
[stack_top
], 1, STACK_SIZE
-stack_top
, stderr
);
1466 stack_top
= STACK_SIZE
;
1468 } else if(!debug
) { /* decompressing */
1470 for ( i
= 0; i
< free_ent
; i
++ ) {
1472 c
= tab_suffixof(ent
);
1473 if ( isascii(c
) && isprint(c
) )
1474 fprintf( stderr
, "%5d: %5d/'%c' \"",
1475 ent
, tab_prefixof(ent
), c
);
1477 fprintf( stderr
, "%5d: %5d/\\%03o \"",
1478 ent
, tab_prefixof(ent
), c
);
1479 de_stack
[--stack_top
] = '\n';
1480 de_stack
[--stack_top
] = '"';
1481 for ( ; ent
!= NULL
;
1482 ent
= (ent
>= FIRST
? tab_prefixof(ent
) : NULL
) ) {
1483 stack_top
= in_stack(tab_suffixof(ent
), stack_top
);
1485 fwrite( &de_stack
[stack_top
], 1, STACK_SIZE
- stack_top
, stderr
);
1486 stack_top
= STACK_SIZE
;
1492 in_stack(c
, stack_top
)
1493 register c
, stack_top
;
1495 if ( (isascii(c
) && isprint(c
) && c
!= '\\') || c
== ' ' ) {
1496 de_stack
[--stack_top
] = c
;
1499 case '\n': de_stack
[--stack_top
] = 'n'; break;
1500 case '\t': de_stack
[--stack_top
] = 't'; break;
1501 case '\b': de_stack
[--stack_top
] = 'b'; break;
1502 case '\f': de_stack
[--stack_top
] = 'f'; break;
1503 case '\r': de_stack
[--stack_top
] = 'r'; break;
1504 case '\\': de_stack
[--stack_top
] = '\\'; break;
1506 de_stack
[--stack_top
] = '0' + c
% 8;
1507 de_stack
[--stack_top
] = '0' + (c
/ 8) % 8;
1508 de_stack
[--stack_top
] = '0' + c
/ 64;
1511 de_stack
[--stack_top
] = '\\';
1525 copystat(char *ifname
, char *ofname
)
1527 struct stat statbuf
;
1529 struct utimbuf timep
;
1532 if (_osmajor
< 3) freopen("CON","at",stdout
); else /* MS-DOS 2.xx bug */
1536 if (stat(ifname
, &statbuf
)) { /* Get stat on input file */
1542 if ((statbuf
.st_mode
& S_IFMT
/*0170000*/) != S_IFREG
/*0100000*/) {
1544 fprintf(stderr
, "%s: ", ifname
);
1545 fprintf(stderr
, " -- not a regular file: unchanged");
1547 } else if (statbuf
.st_nlink
> 1) {
1549 fprintf(stderr
, "%s: ", ifname
);
1550 fprintf(stderr
, " -- has %d other links: unchanged",
1551 statbuf
.st_nlink
- 1);
1553 } else if (exit_stat
== 2 && (!force
)) { /* No compression: remove file.Z */
1555 if (exit_stat
== 2 && (!force
)) { /* No compression: remove file.Z */
1559 fprintf(stderr
, " -- file unchanged");
1560 } else { /* ***** Successful Compression ***** */
1562 mode
= statbuf
.st_mode
& 07777;
1563 if (chmod(ofname
, mode
)) /* Copy modes */
1567 chown(ofname
, statbuf
.st_uid
, statbuf
.st_gid
); /* Copy ownership */
1570 timep
.actime
= statbuf
.st_atime
;
1571 timep
.modtime
= statbuf
.st_mtime
;
1572 utime(ofname
, &timep
); /* Update last accessed and modified times */
1575 if (unlink(ifname
)) { /* Remove input file */
1579 fprintf(stderr
, " -- replaced with %s", ofname
);
1580 return; /* Successful return */
1583 /* Unsuccessful return -- one of the tests failed */
1584 if (unlink(ofname
)) {
1591 * This routine returns 1 if we are running in the foreground and stderr
1596 if(bgnd_flag
!= SIG_DFL
) { /* background? */
1598 } else { /* foreground */
1599 if(isatty(2)) { /* and stderr is a tty */
1618 oops ( ) /* wild pointer -- assume bad input */
1620 if ( do_decomp
== 1 )
1621 fprintf ( stderr
, "uncompress: corrupt input\n" );
1627 cl_block () /* table clear for block compress */
1629 register long int rat
;
1631 checkpoint
= in_count
+ CHECK_GAP
;
1634 fprintf ( stderr
, "count: %ld, ratio: ", in_count
);
1635 prratio ( stderr
, in_count
, bytes_out
);
1636 fprintf ( stderr
, "\n");
1640 if(in_count
> 0x007fffff) { /* shift will overflow */
1641 rat
= bytes_out
>> 8;
1642 if(rat
== 0) { /* Don't divide by zero */
1645 rat
= in_count
/ rat
;
1648 rat
= (in_count
<< 8) / bytes_out
; /* 8 fractional bits */
1650 if ( rat
> ratio
) {
1656 dump_tab(); /* dump string table */
1658 cl_hash ( (count_int
) hsize
);
1661 output ( (code_int
) CLEAR
);
1664 fprintf ( stderr
, "clear\n" );
1669 cl_hash(hsize
) /* reset code table */
1670 register count_int hsize
;
1675 register long k
= hsize
;
1678 register count_int far
*htab_p
;
1680 register count_int
*htab_p
;
1683 #else /* Normal machine */
1684 register count_int
*htab_p
= htab
+hsize
;
1685 #endif /* XENIX_16 */
1688 register long m1
= -1;
1691 for(j
=0; j
<=8 && k
>=0; j
++,k
-=8192) {
1696 htab_p
= &(htab
[j
][i
]);
1702 do { /* might use Sys V memset(3) here */
1720 } while ((i
-= 16) >= 0);
1725 for ( i
+= 16; i
> 0; i
-- )
1729 prratio(stream
, num
, den
)
1735 register long q
; /* permits |result| > 655.36% */
1737 register int q
; /* Doesn't need to be long */
1740 if(num
> 214748L) { /* 2147483647/10000 */
1741 q
= num
/ (den
/ 10000L);
1743 q
= 10000L * num
/ den
; /* Long calculations, though */
1749 fprintf(stream
, "%d.%02d%%", (int)(q
/ 100), (int)(q
% 100));
1754 fprintf(stderr
, "%s\n", rcs_ident
);
1755 fprintf(stderr
, "Options: ");
1757 fprintf(stderr
, "vax, ");
1760 fprintf(stderr
, "NO_UCHAR, ");
1762 #ifdef SIGNED_COMPARE_SLOW
1763 fprintf(stderr
, "SIGNED_COMPARE_SLOW, ");
1766 fprintf(stderr
, "MSDOS, ");
1769 fprintf(stderr
, "XENIX_16, ");
1772 fprintf(stderr
, "COMPATIBLE, ");
1775 fprintf(stderr
, "DEBUG, ");
1778 fprintf(stderr
, "BSD4_2, ");
1780 fprintf(stderr
, "BITS = %d\n", BITS
);