2 ----------------------------------------------------------------------------
6 This is the LZHuf compression algorithm as used in DPBOX and F6FBB.
8 ----------------------------------------------------------------------------
10 /**************************************************************
12 written by Haruyasu Yoshizaki 11/20/1988
13 some minor changes 4/6/1989
14 comments translated by Haruhiko Okumura 4/7/1989
16 minor beautifications and adjustments for compiling under Linux
17 by Markus Gutschke <gutschk@math.uni-muenster.de>
20 Modifications to allow use as a filter by Ken Yap <ken_yap@users.sourceforge.net>.
23 Small mod to cope with running on big-endian machines
24 by Jim Hague <jim.hague@acm.org)
27 Make compression statistics report shorter
28 by Ken Yap <ken_yap@users.sourceforge.net>.
30 **************************************************************/
41 #define Fprintf(x) fprintf x
42 #if defined(ENCODE) || defined(DECODE)
43 static char wterr
[] = "Can't write.";
45 static unsigned long int codesize
= 0;
47 static unsigned long int printcount
= 0;
54 FILE *infile
, *outfile
;
56 #if defined(ENCODE) || defined(DECODE)
57 static unsigned long int textsize
= 0;
59 static __inline__
void Error(char *message
)
61 Fprintf((stderr
, "\n%s\n", message
));
65 /* These will be a complete waste of time on a lo-endian */
66 /* system, but it only gets done once so WTF. */
67 static unsigned long i86ul_to_host(unsigned long ul
)
69 unsigned long res
= 0;
78 for (i
= 3; i
>= 0; i
--)
79 res
= (res
<< 8) + u
.c
[i
];
83 static unsigned long host_to_i86ul(unsigned long ul
)
92 for (i
= 0; i
< 4; i
++)
101 /********** LZSS compression **********/
103 #define N 4096 /* buffer size */
104 /* Attention: When using this file for f6fbb-type compressed data exchange,
105 set N to 2048 ! (DL8HBS) */
106 #define F 60 /* lookahead buffer size */
108 #define NIL N /* leaf of tree */
110 #if defined(ENCODE) || defined(DECODE)
116 static int match_position
, match_length
,
117 lson
[N
+ 1], rson
[N
+ 257], dad
[N
+ 1];
119 static void InitTree(void) /* initialize trees */
123 for (i
= N
+ 1; i
<= N
+ 256; i
++)
124 rson
[i
] = NIL
; /* root */
125 for (i
= 0; i
< N
; i
++)
126 dad
[i
] = NIL
; /* node */
129 static void InsertNode(int r
) /* insert to tree */
138 rson
[r
] = lson
[r
] = NIL
;
158 for (i
= 1; i
< F
; i
++)
159 if ((cmp
= key
[i
] - text_buf
[p
+ i
]) != 0)
162 if (i
> match_length
) {
163 match_position
= ((r
- p
) & (N
- 1)) - 1;
164 if ((match_length
= i
) >= F
)
167 if (i
== match_length
) {
168 if ((c
= ((r
- p
) & (N
- 1)) - 1) < match_position
) {
179 if (rson
[dad
[p
]] == p
)
183 dad
[p
] = NIL
; /* remove p */
186 static void DeleteNode(int p
) /* remove from tree */
191 return; /* not registered */
199 if (rson
[q
] != NIL
) {
202 } while (rson
[q
] != NIL
);
203 rson
[dad
[q
]] = lson
[q
];
204 dad
[lson
[q
]] = dad
[q
];
212 if (rson
[dad
[p
]] == p
)
222 #define N_CHAR (256 - THRESHOLD + F)
223 /* kinds of characters (character code = 0..N_CHAR-1) */
224 #define T (N_CHAR * 2 - 1) /* size of table */
225 #define R (T - 1) /* position of root */
226 #define MAX_FREQ 0x8000 /* updates tree when the */
227 /* root frequency comes to this value. */
228 typedef unsigned char uchar
;
230 /* table for encoding and decoding the upper 6 bits of position */
235 static uchar p_len
[64] = {
236 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
237 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
238 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
239 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
240 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
241 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
242 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
243 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
246 static uchar p_code
[64] = {
247 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
248 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
249 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
250 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
251 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
252 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
253 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
254 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
260 static uchar d_code
[256] = {
261 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
262 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
263 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
265 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
266 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
267 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
268 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
269 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
270 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
271 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
272 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
273 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
274 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
275 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
276 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
277 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
278 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
279 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
280 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
281 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
282 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
283 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
284 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
285 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
286 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
287 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
288 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
289 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
290 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
291 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
292 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
295 static uchar d_len
[256] = {
296 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
297 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
298 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
299 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
300 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
301 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
302 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
303 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
304 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
305 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
306 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
307 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
308 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
309 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
310 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
311 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
312 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
313 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
314 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
315 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
316 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
317 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
318 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
319 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
320 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
321 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
322 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
323 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
324 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
325 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
326 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
327 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
331 #if defined(ENCODE) || defined(DECODE)
332 static unsigned freq
[T
+ 1]; /* frequency table */
334 static int prnt
[T
+ N_CHAR
]; /* pointers to parent nodes, except for the */
335 /* elements [T..T + N_CHAR - 1] which are used to get */
336 /* the positions of leaves corresponding to the codes. */
338 static int son
[T
]; /* pointers to child nodes (son[], son[] + 1) */
342 static unsigned getbuf
= 0;
343 static uchar getlen
= 0;
345 static int GetBit(void) /* get one bit */
349 while (getlen
<= 8) {
350 if ((i
= getc(infile
)) < 0) i
= 0;
351 getbuf
|= i
<< (8 - getlen
);
357 return ((signed short)i
< 0);
360 static int GetByte(void) /* get one byte */
364 while (getlen
<= 8) {
365 if ((signed short)(i
= getc(infile
)) < 0) i
= 0;
366 getbuf
|= i
<< (8 - getlen
);
377 static unsigned putbuf
= 0;
378 static uchar putlen
= 0;
380 static void Putcode(int l
, unsigned c
) /* output c bits of code */
382 putbuf
|= c
>> putlen
;
383 if ((putlen
+= l
) >= 8) {
384 if (putc(putbuf
>> 8, outfile
) == EOF
) {
387 if ((putlen
-= 8) >= 8) {
388 if (putc(putbuf
, outfile
) == EOF
) {
395 putbuf
= c
<< (l
- putlen
);
406 /* initialization of tree */
408 #if defined(ENCODE) || defined(DECODE)
409 static void StartHuff(void)
413 for (i
= 0; i
< N_CHAR
; i
++) {
420 freq
[j
] = freq
[i
] + freq
[i
+ 1];
422 prnt
[i
] = prnt
[i
+ 1] = j
;
429 /* reconstruction of tree */
431 static void reconst(void)
436 /* collect leaf nodes in the first half of the table */
437 /* and replace the freq by (freq + 1) / 2. */
439 for (i
= 0; i
< T
; i
++) {
441 freq
[j
] = (freq
[i
] + 1) / 2;
446 /* begin constructing tree by connecting sons */
447 for (i
= 0, j
= N_CHAR
; j
< T
; i
+= 2, j
++) {
449 f
= freq
[j
] = freq
[i
] + freq
[k
];
450 for (k
= j
- 1; f
< freq
[k
]; k
--);
453 memmove(&freq
[k
+ 1], &freq
[k
], l
);
455 memmove(&son
[k
+ 1], &son
[k
], l
);
459 for (i
= 0; i
< T
; i
++) {
460 if ((k
= son
[i
]) >= T
) {
463 prnt
[k
] = prnt
[k
+ 1] = i
;
468 /* increment frequency of given code by one, and update tree */
470 static void update(int c
)
474 if (freq
[R
] == MAX_FREQ
) {
481 /* if the order is disturbed, exchange nodes */
482 if (k
> freq
[l
= c
+ 1]) {
483 while (k
> freq
[++l
]);
490 if (i
< T
) prnt
[i
+ 1] = l
;
496 if (j
< T
) prnt
[j
+ 1] = c
;
501 } while ((c
= prnt
[c
]) != 0); /* repeat up to root */
507 static unsigned code
, len
;
510 static void EncodeChar(unsigned c
)
519 /* travel from leaf to root */
523 /* if node's address is odd-numbered, choose bigger brother node */
524 if (k
& 1) i
+= 0x8000;
527 } while ((k
= prnt
[k
]) != R
);
536 static void EncodePosition(unsigned c
)
540 /* output upper 6 bits by table lookup */
542 Putcode(p_len
[i
], (unsigned)p_code
[i
] << 8);
544 /* output lower 6 bits verbatim */
545 Putcode(6, (c
& 0x3f) << 10);
548 static void EncodeEnd(void)
551 if (putc(putbuf
>> 8, outfile
) == EOF
) {
562 static int DecodeChar(void)
568 /* travel from root to leaf, */
569 /* choosing the smaller child node (son[]) if the read bit is 0, */
570 /* the bigger (son[]+1} if 1 */
580 static int DecodePosition(void)
584 /* recover upper 6 bits from table */
586 c
= (unsigned)d_code
[i
] << 6;
589 /* read lower 6 bits verbatim */
592 i
= (i
<< 1) + GetBit();
594 return c
| (i
& 0x3f);
601 void Encode(void) /* compression */
603 int i
, c
, len
, r
, s
, last_match_length
;
606 fseek(infile
, 0L, 2);
607 textsize
= ftell(infile
);
609 if ((signed long)textsize
< 0)
610 Fprintf((stderr
, "Errno: %d", errno
));
612 tw
= host_to_i86ul(textsize
);
613 if (fwrite(&tw
, sizeof tw
, 1, outfile
) < 1)
614 Error(wterr
); /* output size of text */
618 textsize
= 0; /* rewind and re-read */
623 for (i
= s
; i
< r
; i
++)
625 for (len
= 0; len
< F
&& (c
= getc(infile
)) != EOF
; len
++)
626 text_buf
[r
+ len
] = c
;
628 for (i
= 1; i
<= F
; i
++)
632 if (match_length
> len
)
634 if (match_length
<= THRESHOLD
) {
636 EncodeChar(text_buf
[r
]);
638 EncodeChar(255 - THRESHOLD
+ match_length
);
639 EncodePosition(match_position
);
641 last_match_length
= match_length
;
642 for (i
= 0; i
< last_match_length
&&
643 (c
= getc(infile
)) != EOF
; i
++) {
648 s
= (s
+ 1) & (N
- 1);
649 r
= (r
+ 1) & (N
- 1);
652 if ((textsize
+= i
) > printcount
) {
653 #if defined(VERBOSE) && defined(EXTRAVERBOSE)
654 Fprintf((stderr
, "%12ld\r", textsize
));
658 while (i
++ < last_match_length
) {
660 s
= (s
+ 1) & (N
- 1);
661 r
= (r
+ 1) & (N
- 1);
662 if (--len
) InsertNode(r
);
667 Fprintf((stderr
, "input size %ld bytes\n", codesize
));
668 Fprintf((stderr
, "output size %ld bytes\n", textsize
));
669 Fprintf((stderr
, "input/output %.3f\n", (double)codesize
/ textsize
));
671 Fprintf((stderr
, "input/output = %ld/%ld = %.3f\n", codesize
, textsize
,
672 (double)codesize
/ textsize
));
678 void Decode(void) /* recover */
681 unsigned long int count
;
684 if (fread(&tw
, sizeof tw
, 1, infile
) < 1)
685 Error("Can't read"); /* read size of text */
686 textsize
= i86ul_to_host(tw
);
690 for (i
= 0; i
< N
- F
; i
++)
693 for (count
= 0; count
< textsize
; ) {
696 if (putc(c
, outfile
) == EOF
) {
703 i
= (r
- DecodePosition() - 1) & (N
- 1);
704 j
= c
- 255 + THRESHOLD
;
705 for (k
= 0; k
< j
; k
++) {
706 c
= text_buf
[(i
+ k
) & (N
- 1)];
707 if (putc(c
, outfile
) == EOF
) {
715 if (count
> printcount
) {
716 #if defined(VERBOSE) && defined(EXTRAVERBOSE)
717 Fprintf((stderr
, "%12ld\r", count
));
722 Fprintf((stderr
, "%12ld\n", count
));
727 int main(int argc
, char *argv
[])
735 if ((f
= tmpfile()) == NULL
) {
739 while ((c
= getchar()) != EOF
)
743 else if (argc
!= 4) {
744 Fprintf((stderr
, "'lzhuf e file1 file2' encodes file1 into file2.\n"
745 "'lzhuf d file2 file1' decodes file2 into file1.\n"));
749 if ((s
= argv
[1], s
[1] || strpbrk(s
, "DEde") == NULL
)
750 || (s
= argv
[2], (infile
= fopen(s
, "rb")) == NULL
)
751 || (s
= argv
[3], (outfile
= fopen(s
, "wb")) == NULL
)) {
752 Fprintf((stderr
, "??? %s\n", s
));
756 if (toupper(*argv
[1]) == 'E')