Working on lzhuf
[deark.git] / foreign / lzhuf.h
blobb876f23974aa03e3b4bd4374df5447b67212fd16
1 // See the file readme-lzhuf.txt for more information about this file.
2 // Modifications for Deark are Copyright (C) 2021 Jason Summers, and have the
3 // same terms of use as the main part of Deark.
4 // Alternatively, at your option, the modifications for Deark may be treated
5 // as public domain.
7 // Intro from the original software:
9 /**************************************************************
10 lzhuf.c
11 written by Haruyasu Yoshizaki 11/20/1988
12 some minor changes 4/6/1989
13 comments translated by Haruhiko Okumura 4/7/1989
14 **************************************************************/
17 #define LZHUF_N 4096 /* buffer size */
18 #define LZHUF_F 60 /* lookahead buffer size */
19 #define LZHUF_THRESHOLD 2
20 #define LZHUF_N_CHAR (256 - LZHUF_THRESHOLD + LZHUF_F)
21 /* kinds of characters (character code = 0..N_CHAR-1) */
22 #define LZHUF_T (LZHUF_N_CHAR * 2 - 1) /* size of table */
23 #define LZHUF_R (LZHUF_T - 1) /* position of root */
24 #define LZHUF_MAX_FREQ 0x8000 /* updates tree when the */
25 /* root frequency comes to this value. */
27 struct lzahuf_ctx {
28 deark *c;
29 const char *modname;
30 struct de_dfilter_in_params *dcmpri;
31 struct de_dfilter_out_params *dcmpro;
32 struct de_dfilter_results *dres;
33 int errflag;
34 i64 nbytes_written;
35 struct de_lz77buffer *ringbuf;
36 struct de_bitreader bitrd;
38 u16 freq[LZHUF_T + 1]; /* frequency table */
40 i16 prnt[LZHUF_T + LZHUF_N_CHAR]; /* pointers to parent nodes, except for the */
41 /* elements [T..T + N_CHAR - 1] which are used to get */
42 /* the positions of leaves corresponding to the codes. */
44 i16 son[LZHUF_T]; /* pointers to child nodes (son[], son[] + 1) */
48 /* initialization of tree */
50 static void lzhuf_StartHuff(struct lzahuf_ctx *cctx)
52 int i, j;
54 for (i = 0; i < LZHUF_N_CHAR; i++) {
55 cctx->freq[i] = 1;
56 cctx->son[i] = i + LZHUF_T;
57 cctx->prnt[i + LZHUF_T] = i;
59 i = 0; j = LZHUF_N_CHAR;
60 while (j <= LZHUF_R) {
61 cctx->freq[j] = cctx->freq[i] + cctx->freq[i + 1];
62 cctx->son[j] = i;
63 cctx->prnt[i] = cctx->prnt[i + 1] = j;
64 i += 2; j++;
66 cctx->freq[LZHUF_T] = 0xffff;
67 cctx->prnt[LZHUF_R] = 0;
71 /* reconstruction of tree */
73 static void lzhuf_reconst(struct lzahuf_ctx *cctx)
75 int i, j, k;
76 UI f, l;
78 /* collect leaf nodes in the first half of the table */
79 /* and replace the freq by (freq + 1) / 2. */
80 j = 0;
81 for (i = 0; i < LZHUF_T; i++) {
82 if (cctx->son[i] >= LZHUF_T) {
83 cctx->freq[j] = (cctx->freq[i] + 1) / 2;
84 cctx->son[j] = cctx->son[i];
85 j++;
88 /* begin constructing tree by connecting sons */
89 for (i = 0, j = LZHUF_N_CHAR; j < LZHUF_T; i += 2, j++) {
90 k = i + 1;
91 f = cctx->freq[j] = cctx->freq[i] + cctx->freq[k];
92 for (k = j - 1; f < cctx->freq[k]; k--);
93 k++;
94 l = (j - k);
95 de_memmove(&cctx->freq[k + 1], &cctx->freq[k], l*sizeof(cctx->freq[0]));
96 cctx->freq[k] = f;
97 de_memmove(&cctx->son[k + 1], &cctx->son[k], l*sizeof(cctx->son[0]));
98 cctx->son[k] = i;
100 /* connect prnt */
101 for (i = 0; i < LZHUF_T; i++) {
102 if ((k = cctx->son[i]) >= LZHUF_T) {
103 cctx->prnt[k] = i;
104 } else {
105 cctx->prnt[k] = cctx->prnt[k + 1] = i;
111 /* increment frequency of given code by one, and update tree */
113 static void lzhuf_update(struct lzahuf_ctx *cctx, int c)
115 int i, j, l;
116 UI k;
117 int counter = 0;
119 if (cctx->freq[LZHUF_R] == LZHUF_MAX_FREQ) {
120 lzhuf_reconst(cctx);
122 c = cctx->prnt[c + LZHUF_T];
123 do {
124 counter++;
125 if(counter > LZHUF_T) { // infinite loop?
126 cctx->errflag = 1;
127 return;
130 k = ++cctx->freq[c];
132 /* if the order is disturbed, exchange nodes */
133 if (k > cctx->freq[l = c + 1]) {
134 while (k > cctx->freq[++l]);
135 l--;
136 cctx->freq[c] = cctx->freq[l];
137 cctx->freq[l] = k;
139 i = cctx->son[c];
140 cctx->prnt[i] = l;
141 if (i < LZHUF_T) cctx->prnt[i + 1] = l;
143 j = cctx->son[l];
144 cctx->son[l] = i;
146 cctx->prnt[j] = c;
147 if (j < LZHUF_T) cctx->prnt[j + 1] = c;
148 cctx->son[c] = j;
150 c = l;
152 } while ((c = cctx->prnt[c]) != 0); /* repeat up to root */
155 static int lzhuf_DecodeChar(struct lzahuf_ctx *cctx)
157 UI c;
158 int counter = 0;
160 c = cctx->son[LZHUF_R];
162 /* travel from root to leaf, */
163 /* choosing the smaller child node (son[]) if the read bit is 0, */
164 /* the bigger (son[]+1} if 1 */
165 while (c < LZHUF_T) {
166 counter++;
167 if(counter > LZHUF_T) { // infinite loop?
168 cctx->errflag = 1;
169 return 0;
172 c += (UI)de_bitreader_getbits(&cctx->bitrd, 1);
173 c = cctx->son[c];
175 c -= LZHUF_T;
176 lzhuf_update(cctx, c);
177 return c;
180 static int lzhuf_DecodePosition(struct lzahuf_ctx *cctx)
182 UI i, j, c;
184 /* recover upper 6 bits from table */
185 i = (UI)de_bitreader_getbits(&cctx->bitrd, 8);
186 c = (UI)fmtutil_get_lzhuf_d_code(i) << 6;
187 j = fmtutil_get_lzhuf_d_len(i);
189 /* read lower 6 bits verbatim */
190 j -= 2;
191 i = (i<<j) | (UI)de_bitreader_getbits(&cctx->bitrd, j);
192 return c | (i & 0x3f);
195 static int lzah_have_enough_output(struct lzahuf_ctx *cctx)
197 if(cctx->dcmpro->len_known) {
198 if(cctx->nbytes_written >= cctx->dcmpro->expected_len) {
199 return 1;
202 return 0;
205 static void lzah_lz77buf_writebytecb(struct de_lz77buffer *rb, u8 n)
207 struct lzahuf_ctx *cctx = (struct lzahuf_ctx*)rb->userdata;
209 if(lzah_have_enough_output(cctx)) {
210 return;
212 dbuf_writebyte(cctx->dcmpro->f, n);
213 cctx->nbytes_written++;
216 static void lzhuf_Decode(struct lzahuf_ctx *cctx) /* recover */
218 int i, j, c;
220 lzhuf_StartHuff(cctx);
222 cctx->ringbuf = de_lz77buffer_create(cctx->c, LZHUF_N);
223 cctx->ringbuf->userdata = (void*)cctx;
224 cctx->ringbuf->writebyte_cb = lzah_lz77buf_writebytecb;
225 de_lz77buffer_clear(cctx->ringbuf, 0x20);
226 de_lz77buffer_set_curpos(cctx->ringbuf, LZHUF_N - LZHUF_F);
228 while(1) {
229 if(cctx->errflag) goto done;
230 if(lzah_have_enough_output(cctx)) {
231 goto done;
233 if(cctx->bitrd.eof_flag) {
234 goto done;
237 c = lzhuf_DecodeChar(cctx);
238 if(cctx->errflag) goto done;
239 if (c < 256) {
240 de_lz77buffer_add_literal_byte(cctx->ringbuf, (u8)c);
242 else {
243 // i is the distance back
244 i = lzhuf_DecodePosition(cctx);
245 if(cctx->errflag) goto done;
247 // j is the match length
248 j = c - 255 + LZHUF_THRESHOLD;
250 de_lz77buffer_copy_from_hist(cctx->ringbuf,
251 (UI)(cctx->ringbuf->curpos - (UI)i - 1), j);
255 done:
256 if(cctx->errflag) {
257 de_dfilter_set_generic_error(cctx->c, cctx->dres, cctx->modname);
259 de_lz77buffer_destroy(cctx->c, cctx->ringbuf);
260 cctx->ringbuf = NULL;