lib/compression/lzxpress.c

   1 /*
   2  * Copyright (C) Matthieu Suiche 2008
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the author nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  */
  34
  35 #include "replace.h"
  36 #include "lzxpress.h"
  37 #include "../lib/util/byteorder.h"
  38
  39
  40 #define __CHECK_BYTES(__size, __index, __needed) do { \
  41         if (unlikely(__index >= __size)) { \
  42                 return -1; \
  43         } else { \
  44                 uint32_t __avail = __size - __index; \
  45                 if (unlikely(__needed > __avail)) { \
  46                         return -1; \
  47                 } \
  48         } \
  49 } while(0)
  50
  51
  52 /*
  53  * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
  54  * matches will be.
  55  *
  56  * The window in which we look for matches is 8192 bytes. That means with
  57  * random data a value of 13 is getting close to no collisions, while a 12
  58  * will miss about half the possible matches. With compressible data there
  59  * will generally be fewer and less diverse entries, so collisions are rarer.
  60  *
  61  * In the testsuite, bith 12 and 13 give better compression than Windows, but
  62  * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
  63  */
  64 #define LZX_PLAIN_COMP_HASH_BITS 12
  65 /*
  66  * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
  67  * circular hash table for a match, before we give up. A bigger number will
  68  * generally lead to better but slower compression, but a stupidly big number
  69  * will just be worse.
  70  */
  71 #define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
  72 #define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
  73
  74 static inline uint16_t three_byte_hash(const uint8_t *bytes)
  75 {
  76         uint16_t a = bytes[0];
  77         uint16_t b = bytes[1] ^ 0x2e;
  78         uint16_t c = bytes[2] ^ 0x55;
  79         uint16_t ca = c - a;
  80         uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a);
  81         return d & HASH_MASK;
  82 }
  83
  84
  85 static inline void store_match(uint32_t *hash_table,
  86                                uint16_t h,
  87                                uint32_t offset)
  88 {
  89         int i;
  90         uint32_t o = hash_table[h];
  91         uint16_t h2;
  92         uint16_t worst_h;
  93         int worst_score;
  94
  95         if (o >= offset) {
  96                 /* there is nothing there yet */
  97                 hash_table[h] = offset;
  98                 return;
  99         }
 100         for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
 101                 h2 = (h + i) & HASH_MASK;
 102                 if (hash_table[h2] >= offset) {
 103                         hash_table[h2] = offset;
 104                         return;
 105                 }
 106         }
 107         /*
 108          * There are no slots, but we really want to store this, so we'll kick
 109          * out the one with the longest distance.
 110          */
 111         worst_h = h;
 112         worst_score = offset - o;
 113         for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
 114                 int score;
 115                 h2 = (h + i) & HASH_MASK;
 116                 o = hash_table[h2];
 117                 score = offset - o;
 118                 if (score > worst_score) {
 119                         worst_score = score;
 120                         worst_h = h2;
 121                 }
 122         }
 123         hash_table[worst_h] = offset;
 124 }
 125
 126
 127 struct match {
 128         const uint8_t *there;
 129         uint32_t length;
 130 };
 131
 132
 133 static inline struct match lookup_match(uint32_t *hash_table,
 134                                         uint16_t h,
 135                                         const uint8_t *data,
 136                                         uint32_t offset,
 137                                         size_t max_len)
 138 {
 139         int i;
 140         uint32_t o;
 141         uint16_t h2;
 142         size_t len;
 143         const uint8_t *there = NULL;
 144         const uint8_t *here = data + offset;
 145         struct match best = {0};
 146
 147         for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
 148                 h2 = (h + i) & HASH_MASK;
 149                 o = hash_table[h2];
 150                 if (o >= offset) {
 151                         /*
 152                          * Either this is 0xffffffff, or something is really
 153                          * wrong.
 154                          *
 155                          * In setting this, we would never have stepped over
 156                          * an 0xffffffff, so we won't now.
 157                          */
 158                         break;
 159                 }
 160                 if (offset - o > 8192) {
 161                         /* Too far away to use */
 162                         continue;
 163                 }
 164                 there = data + o;
 165                 /*
 166                  * When we already have a long match, we can try to avoid
 167                  * measuring out another long, but shorter match.
 168                  */
 169                 if (best.length > 1000 &&
 170                     there[best.length - 1] != best.there[best.length - 1]) {
 171                         continue;
 172                 }
 173
 174                 for (len = 0;
 175                      len < max_len && here[len] == there[len];
 176                      len++) {
 177                         /* counting */
 178                 }
 179                 if (len > 2) {
 180                         if (len > best.length) {
 181                                 best.length = len;
 182                                 best.there = there;
 183                         }
 184                 }
 185         }
 186         return best;
 187 }
 188
 189 struct write_context {
 190         uint8_t *compressed;
 191         uint32_t compressed_pos;
 192         uint32_t max_compressed_size;
 193         uint32_t indic;
 194         uint32_t indic_bit;
 195         uint32_t indic_pos;
 196         uint32_t nibble_index;
 197 };
 198
 199
 200 #define CHECK_INPUT_BYTES(__needed) \
 201         __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
 202 #define CHECK_OUTPUT_BYTES(__needed) \
 203         __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
 204
 205
 206 static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
 207 {
 208         wc->indic = (wc->indic << 1) | bit;
 209         wc->indic_bit += 1;
 210
 211         if (wc->indic_bit == 32) {
 212                 PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
 213                 wc->indic_bit = 0;
 214                 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
 215                 wc->indic_pos = wc->compressed_pos;
 216                 wc->compressed_pos += sizeof(uint32_t);
 217         }
 218         return wc->indic_pos;
 219 }
 220
 221
 222 static ssize_t encode_match(struct write_context *wc,
 223                             struct match match,
 224                             const uint8_t *here)
 225 {
 226         uint32_t match_len = match.length - 3;
 227         uint32_t best_offset = here - match.there - 1;
 228         uint16_t metadata;
 229
 230         if (best_offset > 8191) {
 231                 return -1;
 232         }
 233
 234         CHECK_OUTPUT_BYTES(sizeof(uint16_t));
 235         metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
 236         PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
 237         wc->compressed_pos += sizeof(uint16_t);
 238
 239         if (match_len >= 7) {
 240                 match_len -= 7;
 241
 242                 if (wc->nibble_index == 0) {
 243                         wc->nibble_index = wc->compressed_pos;
 244
 245                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
 246                         wc->compressed[wc->nibble_index] = MIN(match_len, 15);
 247                         wc->compressed_pos += sizeof(uint8_t);
 248                 } else {
 249                         wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
 250                         wc->nibble_index = 0;
 251                 }
 252
 253                 if (match_len >= 15) {
 254                         match_len -= 15;
 255
 256                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
 257                         wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
 258                         wc->compressed_pos += sizeof(uint8_t);
 259
 260                         if (match_len >= 255) {
 261                                 /* Additional match_len */
 262
 263                                 match_len += 7 + 15;
 264
 265                                 if (match_len < (1 << 16)) {
 266                                         CHECK_OUTPUT_BYTES(sizeof(uint16_t));
 267                                         PUSH_LE_U16(wc->compressed, wc->compressed_pos,
 268                                                     match_len);
 269                                         wc->compressed_pos += sizeof(uint16_t);
 270                                 } else {
 271                                         CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
 272                                                            sizeof(uint32_t));
 273                                         PUSH_LE_U16(wc->compressed,
 274                                                     wc->compressed_pos, 0);
 275                                         wc->compressed_pos += sizeof(uint16_t);
 276
 277                                         PUSH_LE_U32(wc->compressed,
 278                                                     wc->compressed_pos,
 279                                                     match_len);
 280                                         wc->compressed_pos += sizeof(uint32_t);
 281                                 }
 282                         }
 283                 }
 284         }
 285         return push_indicator_bit(wc, 1);
 286 }
 287
 288 #undef CHECK_OUTPUT_BYTES
 289 #define CHECK_OUTPUT_BYTES(__needed) \
 290         __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
 291
 292
 293 ssize_t lzxpress_compress(const uint8_t *uncompressed,
 294                           uint32_t uncompressed_size,
 295                           uint8_t *compressed,
 296                           uint32_t max_compressed_size)
 297 {
 298         /*
 299          * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
 300          *
 301          * It avoids Huffman encoding by including literal bytes inline when a
 302          * match is not found. Every so often it includes a uint32 bit map
 303          * flagging which positions contain matches and which contain
 304          * literals. The encoding of matches is of variable size, depending on
 305          * the match length; they are always at least 16 bits long, and can
 306          * implicitly use unused half-bytes from earlier in the stream.
 307          */
 308         ssize_t ret;
 309         uint32_t uncompressed_pos;
 310         struct write_context wc = {
 311                 .indic = 0,
 312                 .indic_pos = 0,
 313                 .indic_bit = 0,
 314                 .nibble_index = 0,
 315                 .compressed = compressed,
 316                 .compressed_pos = 0,
 317                 .max_compressed_size = max_compressed_size
 318         };
 319         uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS];
 320         memset(hash_table, 0xff, sizeof(hash_table));
 321
 322         if (!uncompressed_size) {
 323                 return 0;
 324         }
 325
 326         uncompressed_pos = 0;
 327         CHECK_OUTPUT_BYTES(sizeof(uint32_t));
 328         PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0);
 329         wc.compressed_pos += sizeof(uint32_t);
 330
 331         while ((uncompressed_pos < uncompressed_size) &&
 332                (wc.compressed_pos < wc.max_compressed_size)) {
 333
 334                 /* maximum len we can encode into metadata */
 335                 const uint32_t max_len = MIN(0xFFFF + 3,
 336                                              uncompressed_size - uncompressed_pos);
 337                 const uint8_t *here = uncompressed + uncompressed_pos;
 338                 uint16_t h;
 339                 struct match match = {0};
 340
 341                 if (max_len >= 3) {
 342                         h = three_byte_hash(here);
 343                         match = lookup_match(hash_table,
 344                                              h,
 345                                              uncompressed,
 346                                              uncompressed_pos,
 347                                              max_len);
 348
 349                         store_match(hash_table, h, uncompressed_pos);
 350                 } else {
 351                         match.there = NULL;
 352                         match.length = 0;
 353                 }
 354
 355                 if (match.there == NULL) {
 356                         /*
 357                          * This is going to be a literal byte, which we flag
 358                          * by setting a bit in an indicator field somewhere
 359                          * earlier in the stream.
 360                          */
 361                         CHECK_INPUT_BYTES(sizeof(uint8_t));
 362                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
 363                         wc.compressed[wc.compressed_pos++] = *here;
 364                         uncompressed_pos++;
 365
 366                         ret = push_indicator_bit(&wc, 0);
 367                         if (ret < 0) {
 368                                 return ret;
 369                         }
 370                 } else {
 371                         ret = encode_match(&wc, match, here);
 372                         if (ret < 0) {
 373                                 return ret;
 374                         }
 375                         uncompressed_pos += match.length;
 376                 }
 377         }
 378
 379         if (wc.indic_bit != 0) {
 380                 wc.indic <<= 32 - wc.indic_bit;
 381         }
 382         wc.indic |= UINT32_MAX >> wc.indic_bit;
 383         PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
 384
 385         return wc.compressed_pos;
 386 }
 387
 388 ssize_t lzxpress_decompress(const uint8_t *input,
 389                             uint32_t input_size,
 390                             uint8_t *output,
 391                             uint32_t max_output_size)
 392 {
 393         /*
 394          * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
 395          * Algorithm Details".
 396          */
 397         uint32_t output_index, input_index;
 398         uint32_t indicator, indicator_bit;
 399         uint32_t nibble_index;
 400
 401         if (input_size == 0) {
 402                 return 0;
 403         }
 404
 405         output_index = 0;
 406         input_index = 0;
 407         indicator = 0;
 408         indicator_bit = 0;
 409         nibble_index = 0;
 410
 411 #undef CHECK_INPUT_BYTES
 412 #define CHECK_INPUT_BYTES(__needed) \
 413         __CHECK_BYTES(input_size, input_index, __needed)
 414 #undef CHECK_OUTPUT_BYTES
 415 #define CHECK_OUTPUT_BYTES(__needed) \
 416         __CHECK_BYTES(max_output_size, output_index, __needed)
 417
 418         do {
 419                 if (indicator_bit == 0) {
 420                         CHECK_INPUT_BYTES(sizeof(uint32_t));
 421                         indicator = PULL_LE_U32(input, input_index);
 422                         input_index += sizeof(uint32_t);
 423                         if (input_index == input_size) {
 424                                 /*
 425                                  * The compressor left room for indicator
 426                                  * flags for data that doesn't exist.
 427                                  */
 428                                 break;
 429                         }
 430                         indicator_bit = 32;
 431                 }
 432                 indicator_bit--;
 433
 434                 /*
 435                  * check whether the bit specified by indicator_bit is set or not
 436                  * set in indicator. For example, if indicator_bit has value 4
 437                  * check whether the 4th bit of the value in indicator is set
 438                  */
 439                 if (((indicator >> indicator_bit) & 1) == 0) {
 440                         CHECK_INPUT_BYTES(sizeof(uint8_t));
 441                         CHECK_OUTPUT_BYTES(sizeof(uint8_t));
 442                         output[output_index] = input[input_index];
 443                         input_index += sizeof(uint8_t);
 444                         output_index += sizeof(uint8_t);
 445                 } else {
 446                         uint32_t length;
 447                         uint32_t offset;
 448
 449                         CHECK_INPUT_BYTES(sizeof(uint16_t));
 450                         length = PULL_LE_U16(input, input_index);
 451                         input_index += sizeof(uint16_t);
 452                         offset = (length >> 3) + 1;
 453                         length &= 7;
 454
 455                         if (length == 7) {
 456                                 if (nibble_index == 0) {
 457                                         CHECK_INPUT_BYTES(sizeof(uint8_t));
 458                                         nibble_index = input_index;
 459                                         length = input[input_index] & 0xf;
 460                                         input_index += sizeof(uint8_t);
 461                                 } else {
 462                                         length = input[nibble_index] >> 4;
 463                                         nibble_index = 0;
 464                                 }
 465
 466                                 if (length == 15) {
 467                                         CHECK_INPUT_BYTES(sizeof(uint8_t));
 468                                         length = input[input_index];
 469                                         input_index += sizeof(uint8_t);
 470                                         if (length == 255) {
 471                                                 CHECK_INPUT_BYTES(sizeof(uint16_t));
 472                                                 length = PULL_LE_U16(input, input_index);
 473                                                 input_index += sizeof(uint16_t);
 474                                                 if (length == 0) {
 475                                                         CHECK_INPUT_BYTES(sizeof(uint32_t));
 476                                                         length = PULL_LE_U32(input, input_index);
 477                                                         input_index += sizeof(uint32_t);
 478                                                 }
 479
 480                                                 if (length < (15 + 7)) {
 481                                                         return -1;
 482                                                 }
 483                                                 length -= (15 + 7);
 484                                         }
 485                                         length += 15;
 486                                 }
 487                                 length += 7;
 488                         }
 489                         length += 3;
 490
 491                         if (length == 0) {
 492                                 return -1;
 493                         }
 494
 495                         for (; length > 0; --length) {
 496                                 if (offset > output_index) {
 497                                         return -1;
 498                                 }
 499                                 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
 500                                 output[output_index] = output[output_index - offset];
 501                                 output_index += sizeof(uint8_t);
 502                         }
 503                 }
 504         } while ((output_index < max_output_size) && (input_index < (input_size)));
 505
 506         return output_index;
 507 }