2 * Copyright (C) Matthieu Suiche 2008
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the author nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 #include "../lib/util/byteorder.h"
40 #define __CHECK_BYTES(__size, __index, __needed) do { \
41 if (unlikely(__index >= __size)) { \
44 uint32_t __avail = __size - __index; \
45 if (unlikely(__needed > __avail)) { \
53 * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
56 * The window in which we look for matches is 8192 bytes. That means with
57 * random data a value of 13 is getting close to no collisions, while a 12
58 * will miss about half the possible matches. With compressible data there
59 * will generally be fewer and less diverse entries, so collisions are rarer.
61 * In the testsuite, bith 12 and 13 give better compression than Windows, but
62 * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
64 #define LZX_PLAIN_COMP_HASH_BITS 12
66 * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
67 * circular hash table for a match, before we give up. A bigger number will
68 * generally lead to better but slower compression, but a stupidly big number
71 #define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
72 #define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
74 static inline uint16_t three_byte_hash(const uint8_t *bytes
)
76 uint16_t a
= bytes
[0];
77 uint16_t b
= bytes
[1] ^ 0x2e;
78 uint16_t c
= bytes
[2] ^ 0x55;
80 uint16_t d
= ((a
+ b
) << 8) ^ (ca
<< 5) ^ (c
+ b
) ^ (0xcab + a
);
85 static inline void store_match(uint32_t *hash_table
,
90 uint32_t o
= hash_table
[h
];
96 /* there is nothing there yet */
97 hash_table
[h
] = offset
;
100 for (i
= 1; i
< LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
101 h2
= (h
+ i
) & HASH_MASK
;
102 if (hash_table
[h2
] >= offset
) {
103 hash_table
[h2
] = offset
;
108 * There are no slots, but we really want to store this, so we'll kick
109 * out the one with the longest distance.
112 worst_score
= offset
- o
;
113 for (i
= 1; i
< LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
115 h2
= (h
+ i
) & HASH_MASK
;
118 if (score
> worst_score
) {
123 hash_table
[worst_h
] = offset
;
128 const uint8_t *there
;
133 static inline struct match
lookup_match(uint32_t *hash_table
,
143 const uint8_t *there
= NULL
;
144 const uint8_t *here
= data
+ offset
;
145 struct match best
= {0};
147 for (i
= 0; i
< LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
148 h2
= (h
+ i
) & HASH_MASK
;
152 * Either this is 0xffffffff, or something is really
155 * In setting this, we would never have stepped over
156 * an 0xffffffff, so we won't now.
160 if (offset
- o
> 8192) {
161 /* Too far away to use */
166 * When we already have a long match, we can try to avoid
167 * measuring out another long, but shorter match.
169 if (best
.length
> 1000 &&
170 there
[best
.length
- 1] != best
.there
[best
.length
- 1]) {
175 len
< max_len
&& here
[len
] == there
[len
];
180 if (len
> best
.length
) {
189 struct write_context
{
191 uint32_t compressed_pos
;
192 uint32_t max_compressed_size
;
196 uint32_t nibble_index
;
200 #define CHECK_INPUT_BYTES(__needed) \
201 __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
202 #define CHECK_OUTPUT_BYTES(__needed) \
203 __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
206 static inline ssize_t
push_indicator_bit(struct write_context
*wc
, uint32_t bit
)
208 wc
->indic
= (wc
->indic
<< 1) | bit
;
211 if (wc
->indic_bit
== 32) {
212 PUSH_LE_U32(wc
->compressed
, wc
->indic_pos
, wc
->indic
);
214 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
215 wc
->indic_pos
= wc
->compressed_pos
;
216 wc
->compressed_pos
+= sizeof(uint32_t);
218 return wc
->indic_pos
;
222 static ssize_t
encode_match(struct write_context
*wc
,
226 uint32_t match_len
= match
.length
- 3;
227 uint32_t best_offset
= here
- match
.there
- 1;
230 if (best_offset
> 8191) {
234 CHECK_OUTPUT_BYTES(sizeof(uint16_t));
235 metadata
= (uint16_t)((best_offset
<< 3) | MIN(match_len
, 7));
236 PUSH_LE_U16(wc
->compressed
, wc
->compressed_pos
, metadata
);
237 wc
->compressed_pos
+= sizeof(uint16_t);
239 if (match_len
>= 7) {
242 if (wc
->nibble_index
== 0) {
243 wc
->nibble_index
= wc
->compressed_pos
;
245 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
246 wc
->compressed
[wc
->nibble_index
] = MIN(match_len
, 15);
247 wc
->compressed_pos
+= sizeof(uint8_t);
249 wc
->compressed
[wc
->nibble_index
] |= MIN(match_len
, 15) << 4;
250 wc
->nibble_index
= 0;
253 if (match_len
>= 15) {
256 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
257 wc
->compressed
[wc
->compressed_pos
] = MIN(match_len
, 255);
258 wc
->compressed_pos
+= sizeof(uint8_t);
260 if (match_len
>= 255) {
261 /* Additional match_len */
265 if (match_len
< (1 << 16)) {
266 CHECK_OUTPUT_BYTES(sizeof(uint16_t));
267 PUSH_LE_U16(wc
->compressed
, wc
->compressed_pos
,
269 wc
->compressed_pos
+= sizeof(uint16_t);
271 CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
273 PUSH_LE_U16(wc
->compressed
,
274 wc
->compressed_pos
, 0);
275 wc
->compressed_pos
+= sizeof(uint16_t);
277 PUSH_LE_U32(wc
->compressed
,
280 wc
->compressed_pos
+= sizeof(uint32_t);
285 return push_indicator_bit(wc
, 1);
288 #undef CHECK_OUTPUT_BYTES
289 #define CHECK_OUTPUT_BYTES(__needed) \
290 __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
293 ssize_t
lzxpress_compress(const uint8_t *uncompressed
,
294 uint32_t uncompressed_size
,
296 uint32_t max_compressed_size
)
299 * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
301 * It avoids Huffman encoding by including literal bytes inline when a
302 * match is not found. Every so often it includes a uint32 bit map
303 * flagging which positions contain matches and which contain
304 * literals. The encoding of matches is of variable size, depending on
305 * the match length; they are always at least 16 bits long, and can
306 * implicitly use unused half-bytes from earlier in the stream.
309 uint32_t uncompressed_pos
;
310 struct write_context wc
= {
315 .compressed
= compressed
,
317 .max_compressed_size
= max_compressed_size
319 uint32_t hash_table
[1 << LZX_PLAIN_COMP_HASH_BITS
];
320 memset(hash_table
, 0xff, sizeof(hash_table
));
322 if (!uncompressed_size
) {
326 uncompressed_pos
= 0;
327 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
328 PUSH_LE_U32(wc
.compressed
, wc
.compressed_pos
, 0);
329 wc
.compressed_pos
+= sizeof(uint32_t);
331 while ((uncompressed_pos
< uncompressed_size
) &&
332 (wc
.compressed_pos
< wc
.max_compressed_size
)) {
334 /* maximum len we can encode into metadata */
335 const uint32_t max_len
= MIN(0xFFFF + 3,
336 uncompressed_size
- uncompressed_pos
);
337 const uint8_t *here
= uncompressed
+ uncompressed_pos
;
339 struct match match
= {0};
342 h
= three_byte_hash(here
);
343 match
= lookup_match(hash_table
,
349 store_match(hash_table
, h
, uncompressed_pos
);
355 if (match
.there
== NULL
) {
357 * This is going to be a literal byte, which we flag
358 * by setting a bit in an indicator field somewhere
359 * earlier in the stream.
361 CHECK_INPUT_BYTES(sizeof(uint8_t));
362 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
363 wc
.compressed
[wc
.compressed_pos
++] = *here
;
366 ret
= push_indicator_bit(&wc
, 0);
371 ret
= encode_match(&wc
, match
, here
);
375 uncompressed_pos
+= match
.length
;
379 if (wc
.indic_bit
!= 0) {
380 wc
.indic
<<= 32 - wc
.indic_bit
;
382 wc
.indic
|= UINT32_MAX
>> wc
.indic_bit
;
383 PUSH_LE_U32(wc
.compressed
, wc
.indic_pos
, wc
.indic
);
385 return wc
.compressed_pos
;
388 ssize_t
lzxpress_decompress(const uint8_t *input
,
391 uint32_t max_output_size
)
394 * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
395 * Algorithm Details".
397 uint32_t output_index
, input_index
;
398 uint32_t indicator
, indicator_bit
;
399 uint32_t nibble_index
;
401 if (input_size
== 0) {
411 #undef CHECK_INPUT_BYTES
412 #define CHECK_INPUT_BYTES(__needed) \
413 __CHECK_BYTES(input_size, input_index, __needed)
414 #undef CHECK_OUTPUT_BYTES
415 #define CHECK_OUTPUT_BYTES(__needed) \
416 __CHECK_BYTES(max_output_size, output_index, __needed)
419 if (indicator_bit
== 0) {
420 CHECK_INPUT_BYTES(sizeof(uint32_t));
421 indicator
= PULL_LE_U32(input
, input_index
);
422 input_index
+= sizeof(uint32_t);
423 if (input_index
== input_size
) {
425 * The compressor left room for indicator
426 * flags for data that doesn't exist.
435 * check whether the bit specified by indicator_bit is set or not
436 * set in indicator. For example, if indicator_bit has value 4
437 * check whether the 4th bit of the value in indicator is set
439 if (((indicator
>> indicator_bit
) & 1) == 0) {
440 CHECK_INPUT_BYTES(sizeof(uint8_t));
441 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
442 output
[output_index
] = input
[input_index
];
443 input_index
+= sizeof(uint8_t);
444 output_index
+= sizeof(uint8_t);
449 CHECK_INPUT_BYTES(sizeof(uint16_t));
450 length
= PULL_LE_U16(input
, input_index
);
451 input_index
+= sizeof(uint16_t);
452 offset
= (length
>> 3) + 1;
456 if (nibble_index
== 0) {
457 CHECK_INPUT_BYTES(sizeof(uint8_t));
458 nibble_index
= input_index
;
459 length
= input
[input_index
] & 0xf;
460 input_index
+= sizeof(uint8_t);
462 length
= input
[nibble_index
] >> 4;
467 CHECK_INPUT_BYTES(sizeof(uint8_t));
468 length
= input
[input_index
];
469 input_index
+= sizeof(uint8_t);
471 CHECK_INPUT_BYTES(sizeof(uint16_t));
472 length
= PULL_LE_U16(input
, input_index
);
473 input_index
+= sizeof(uint16_t);
475 CHECK_INPUT_BYTES(sizeof(uint32_t));
476 length
= PULL_LE_U32(input
, input_index
);
477 input_index
+= sizeof(uint32_t);
480 if (length
< (15 + 7)) {
495 for (; length
> 0; --length
) {
496 if (offset
> output_index
) {
499 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
500 output
[output_index
] = output
[output_index
- offset
];
501 output_index
+= sizeof(uint8_t);
504 } while ((output_index
< max_output_size
) && (input_index
< (input_size
)));