8 * Copyright (C) 1991-1998, Thomas G. Lane.
9 * This file is part of the Independent JPEG Group's software.
10 * For conditions of distribution and use, see the accompanying README file.
12 * This file contains Huffman entropy decoding routines which are shared
13 * by the sequential, progressive and lossless decoders.
16 #define JPEG_INTERNALS
19 #include "jchuff.h" /* Declarations shared with jc*huff.c */
23 * Compute the derived values for a Huffman table.
24 * This routine also performs some validation checks on the table.
28 jpeg_make_c_derived_tbl (j_compress_ptr cinfo
, boolean isDC
, int tblno
,
29 c_derived_tbl
** pdtbl
)
33 int p
, i
, l
, lastp
, si
, maxsymbol
;
35 unsigned int huffcode
[257];
38 /* Note that huffsize[] and huffcode[] are filled in code-length order,
39 * paralleling the order of the symbols themselves in htbl->huffval[].
42 /* Find the input Huffman table */
43 if (tblno
< 0 || tblno
>= NUM_HUFF_TBLS
)
44 ERREXIT1(cinfo
, JERR_NO_HUFF_TABLE
, tblno
);
46 isDC
? cinfo
->dc_huff_tbl_ptrs
[tblno
] : cinfo
->ac_huff_tbl_ptrs
[tblno
];
48 ERREXIT1(cinfo
, JERR_NO_HUFF_TABLE
, tblno
);
50 /* Allocate a workspace if we haven't already done so. */
52 *pdtbl
= (c_derived_tbl
*)
53 (*cinfo
->mem
->alloc_small
) ((j_common_ptr
) cinfo
, JPOOL_IMAGE
,
54 SIZEOF(c_derived_tbl
));
57 /* Figure C.1: make table of Huffman code length for each symbol */
60 for (l
= 1; l
<= 16; l
++) {
61 i
= (int) htbl
->bits
[l
];
62 if (i
< 0 || p
+ i
> 256) /* protect against table overrun */
63 ERREXIT(cinfo
, JERR_BAD_HUFF_TABLE
);
65 huffsize
[p
++] = (char) l
;
70 /* Figure C.2: generate the codes themselves */
71 /* We also validate that the counts represent a legal Huffman code tree. */
77 while (((int) huffsize
[p
]) == si
) {
81 /* code is now 1 more than the last code used for codelength si; but
82 * it must still fit in si bits, since no code is allowed to be all ones.
84 if (((INT32
) code
) >= (((INT32
) 1) << si
))
85 ERREXIT(cinfo
, JERR_BAD_HUFF_TABLE
);
90 /* Figure C.3: generate encoding tables */
91 /* These are code and size indexed by symbol value */
93 /* Set all codeless symbols to have code length 0;
94 * this lets us detect duplicate VAL entries here, and later
95 * allows emit_bits to detect any attempt to emit such symbols.
97 MEMZERO(dtbl
->ehufsi
, SIZEOF(dtbl
->ehufsi
));
99 /* This is also a convenient place to check for out-of-range
100 * and duplicated VAL entries. We allow 0..255 for AC symbols
101 * but only 0..16 for DC. (We could constrain them further
102 * based on data depth and mode, but this seems enough.)
104 maxsymbol
= isDC
? 16 : 255;
106 for (p
= 0; p
< lastp
; p
++) {
107 i
= htbl
->huffval
[p
];
108 if (i
< 0 || i
> maxsymbol
|| dtbl
->ehufsi
[i
])
109 ERREXIT(cinfo
, JERR_BAD_HUFF_TABLE
);
110 dtbl
->ehufco
[i
] = huffcode
[p
];
111 dtbl
->ehufsi
[i
] = huffsize
[p
];
117 * Generate the best Huffman code table for the given counts, fill htbl.
119 * The JPEG standard requires that no symbol be assigned a codeword of all
120 * one bits (so that padding bits added at the end of a compressed segment
121 * can't look like a valid code). Because of the canonical ordering of
122 * codewords, this just means that there must be an unused slot in the
123 * longest codeword length category. Section K.2 of the JPEG spec suggests
124 * reserving such a slot by pretending that symbol 256 is a valid symbol
125 * with count 1. In theory that's not optimal; giving it count zero but
126 * including it in the symbol set anyway should give a better Huffman code.
127 * But the theoretically better code actually seems to come out worse in
128 * practice, because it produces more all-ones bytes (which incur stuffed
129 * zero bytes in the final file). In any case the difference is tiny.
131 * The JPEG standard requires Huffman codes to be no more than 16 bits long.
132 * If some symbols have a very small but nonzero probability, the Huffman tree
133 * must be adjusted to meet the code length restriction. We currently use
134 * the adjustment method suggested in JPEG section K.2. This method is *not*
135 * optimal; it may not choose the best possible limited-length code. But
136 * typically only very-low-frequency symbols will be given less-than-optimal
137 * lengths, so the code is almost optimal. Experimental comparisons against
138 * an optimal limited-length-code algorithm indicate that the difference is
139 * microscopic --- usually less than a hundredth of a percent of total size.
140 * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
144 jpeg_gen_optimal_table (j_compress_ptr cinfo
, JHUFF_TBL
* htbl
, long freq
[])
146 #define MAX_CLEN 32 /* assumed maximum initial code length */
147 UINT8 bits
[MAX_CLEN
+1]; /* bits[k] = # of symbols with code length k */
148 int codesize
[257]; /* codesize[k] = code length of symbol k */
149 int others
[257]; /* next symbol in current branch of tree */
154 /* This algorithm is explained in section K.2 of the JPEG standard */
156 MEMZERO(bits
, SIZEOF(bits
));
157 MEMZERO(codesize
, SIZEOF(codesize
));
158 for (i
= 0; i
< 257; i
++)
159 others
[i
] = -1; /* init links to empty */
161 freq
[256] = 1; /* make sure 256 has a nonzero count */
162 /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
163 * that no real symbol is given code-value of all ones, because 256
164 * will be placed last in the largest codeword category.
167 /* Huffman's basic algorithm to assign optimal code lengths to symbols */
170 /* Find the smallest nonzero frequency, set c1 = its symbol */
171 /* In case of ties, take the larger symbol number */
174 for (i
= 0; i
<= 256; i
++) {
175 if (freq
[i
] && freq
[i
] <= v
) {
181 /* Find the next smallest nonzero frequency, set c2 = its symbol */
182 /* In case of ties, take the larger symbol number */
185 for (i
= 0; i
<= 256; i
++) {
186 if (freq
[i
] && freq
[i
] <= v
&& i
!= c1
) {
192 /* Done if we've merged everything into one frequency */
196 /* Else merge the two counts/trees */
197 freq
[c1
] += freq
[c2
];
200 /* Increment the codesize of everything in c1's tree branch */
202 while (others
[c1
] >= 0) {
207 others
[c1
] = c2
; /* chain c2 onto c1's tree branch */
209 /* Increment the codesize of everything in c2's tree branch */
211 while (others
[c2
] >= 0) {
217 /* Now count the number of symbols of each code length */
218 for (i
= 0; i
<= 256; i
++) {
220 /* The JPEG standard seems to think that this can't happen, */
221 /* but I'm paranoid... */
222 if (codesize
[i
] > MAX_CLEN
)
223 ERREXIT(cinfo
, JERR_HUFF_CLEN_OVERFLOW
);
229 /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
230 * Huffman procedure assigned any such lengths, we must adjust the coding.
231 * Here is what the JPEG spec says about how this next bit works:
232 * Since symbols are paired for the longest Huffman code, the symbols are
233 * removed from this length category two at a time. The prefix for the pair
234 * (which is one bit shorter) is allocated to one of the pair; then,
235 * skipping the BITS entry for that prefix length, a code word from the next
236 * shortest nonzero BITS entry is converted into a prefix for two code words
240 for (i
= MAX_CLEN
; i
> 16; i
--) {
241 while (bits
[i
] > 0) {
242 j
= i
- 2; /* find length of new prefix to be used */
246 bits
[i
] -= 2; /* remove two symbols */
247 bits
[i
-1]++; /* one goes in this length */
248 bits
[j
+1] += 2; /* two new symbols in this length */
249 bits
[j
]--; /* symbol of this length is now a prefix */
253 /* Remove the count for the pseudo-symbol 256 from the largest codelength */
254 while (bits
[i
] == 0) /* find largest codelength still in use */
258 /* Return final symbol counts (only for lengths 0..16) */
259 MEMCOPY(htbl
->bits
, bits
, SIZEOF(htbl
->bits
));
261 /* Return a list of the symbols sorted by code length */
262 /* It's not real clear to me why we don't need to consider the codelength
263 * changes made above, but the JPEG spec seems to think this works.
266 for (i
= 1; i
<= MAX_CLEN
; i
++) {
267 for (j
= 0; j
<= 255; j
++) {
268 if (codesize
[j
] == i
) {
269 htbl
->huffval
[p
] = (UINT8
) j
;
275 /* Set sent_table FALSE so updated table will be written to JPEG file. */
276 htbl
->sent_table
= FALSE
;