lib/compression/tests/test_lzx_huffman.c

   1 /*
   2  * Samba compression library - LGPLv3
   3  *
   4  * Copyright © Catalyst IT 2022
   5  *
   6  * Written by Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
   7  *
   8  *  ** NOTE! The following LGPL license applies to this file.
   9  *  ** It does NOT imply that all of Samba is released under the LGPL
  10  *
  11  *  This library is free software; you can redistribute it and/or
  12  *  modify it under the terms of the GNU Lesser General Public
  13  *  License as published by the Free Software Foundation; either
  14  *  version 3 of the License, or (at your option) any later version.
  15  *
  16  *  This library is distributed in the hope that it will be useful,
  17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  *  Lesser General Public License for more details.
  20  *
  21  *  You should have received a copy of the GNU Lesser General Public
  22  *  License along with this library; if not, see <http://www.gnu.org/licenses/>.
  23  */
  24
  25 #include <stdarg.h>
  26 #include <stddef.h>
  27 #include <setjmp.h>
  28 #include <cmocka.h>
  29 #include <stdbool.h>
  30 #include <sys/stat.h>
  31 #include "replace.h"
  32 #include <talloc.h>
  33 #include "lzxpress_huffman.h"
  34 #include "lib/util/stable_sort.h"
  35 #include "lib/util/data_blob.h"
  36
  37 /* set LZXHUFF_DEBUG_FILES to true to save round-trip files in /tmp. */
  38 #define LZXHUFF_DEBUG_FILES false
  39
  40 /* set LZXHUFF_DEBUG_VERBOSE to true to print more. */
  41 #define LZXHUFF_DEBUG_VERBOSE false
  42
  43
  44 #if LZXHUFF_DEBUG_VERBOSE
  45 #define debug_message(...) print_message(__VA_ARGS__)
  46
  47 #include <time.h>
  48
  49 struct timespec start = {0};
  50 struct timespec end = {0};
  51 static void debug_start_timer(void)
  52 {
  53         clock_gettime(CLOCK_MONOTONIC, &start);
  54 }
  55
  56 static void debug_end_timer(const char *name, size_t len)
  57 {
  58         uint64_t ns;
  59         double secs;
  60         double rate;
  61         clock_gettime(CLOCK_MONOTONIC, &end);
  62         ns = end.tv_nsec;
  63         ns += end.tv_sec * 1000 * 1000 * 1000;
  64         ns -= start.tv_nsec;
  65         ns -= start.tv_sec * 1000 * 1000 * 1000;
  66         secs = ns / 1e9;
  67         rate = len / (secs * 1024 * 1024);
  68         debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n",
  69                       name, len, secs, rate);
  70 }
  71
  72 #else
  73 #define debug_message(...) /* debug_message */
  74 #define debug_start_timer(...) /* debug_start_timer */
  75 #define debug_end_timer(...) /* debug_end_timer */
  76 #endif
  77
  78
  79 struct lzx_pair {
  80         const char *name;
  81         DATA_BLOB compressed;
  82         DATA_BLOB decompressed;
  83 };
  84
  85 struct lzx_file_pair {
  86         const char *name;
  87         const char *compressed_file;
  88         const char *decompressed_file;
  89 };
  90
  91
  92 #define DECOMP_DIR "testdata/compression/decompressed"
  93 #define COMP_DIR "testdata/compression/compressed-huffman"
  94 #define MORE_COMP_DIR "testdata/compression/compressed-more-huffman"
  95
  96
  97 #define VARRGH(...) __VA_ARGS__
  98
  99 #define BLOB_FROM_ARRAY(...)                             \
 100         {                                                \
 101                 .data = (uint8_t[]){__VA_ARGS__},          \
 102                 .length = sizeof((uint8_t[]){__VA_ARGS__}) \
 103         }
 104
 105 #define BLOB_FROM_STRING(s)                                      \
 106         {                                                            \
 107                 .data = discard_const_p(uint8_t, s),                 \
 108                 .length = (sizeof(s) - 1)                    \
 109         }
 110
 111
 112 const char * file_names[] = {
 113         "27826-8.txt",
 114         "5d049b4cb1bd933f5e8ex19",
 115         "638e61e96d54279981c3x5",
 116         "64k-minus-one-zeros",
 117         "64k-plus-one-zeros",
 118         "64k-zeros",
 119         "96f696a4e5ce56c61a3dx10",
 120         "9e0b6a12febf38e98f13",
 121         "abc-times-101",
 122         "abc-times-105",
 123         "abc-times-200",
 124         "and_rand",
 125         "and_rand-128k+",
 126         "b63289ccc7f218c0d56b",
 127         "beta-variate1-128k+",
 128         "beta-variate2-128k+",
 129         "beta-variate3-128k+",
 130         "decayed_alphabet_128k+",
 131         "decayed_alphabet_64k",
 132         "exp_shuffle",
 133         "exp_shuffle-128k+",
 134         "f00842317dc6d5695b02",
 135         "fib_shuffle",
 136         "fib_shuffle-128k+",
 137         "fuzzing-0fc2d461b56cd8103c91",
 138         "fuzzing-17c961778538cc10ab7c",
 139         "fuzzing-3591f9dc02bb00a54b60",
 140         "fuzzing-3ec3bca27bb9eb40c128",
 141         "fuzzing-80b4fa18ff5f8dd04862",
 142         "fuzzing-a3115a81d1ac500318f9",
 143         "generate-windows-test-vectors.c",
 144         "midsummer-nights-dream.txt",
 145         "notes-on-the-underground.txt",
 146         "pg22009.txt",
 147         "repeating",
 148         "repeating-exactly-64k",
 149         "setup.log",
 150         "skewed_choices",
 151         "skewed_choices-128k+",
 152         /* These ones were deathly slow in fuzzing at one point */
 153         "slow-015ddc36a71412ccc50d",
 154         "slow-100e9f966a7feb9ca40a",
 155         "slow-2a671c3cff4f1574cbab",
 156         "slow-33d90a24e70515b14cd0",
 157         "slow-49d8c05261e3f412fc72",
 158         "slow-50a249d2fe56873e56a0",
 159         "slow-63e9f0b52235fb0129fa",
 160         "slow-73b7f971d65908ac0095",
 161         "slow-8b61e3dd267908544531",
 162         "slow-9d1c5a079b0462986f1f",
 163         "slow-aa7262a821dabdcf04a6",
 164         "slow-b8a91d142b0d2af7f5ca",
 165         "slow-c79142457734bbc8d575",
 166         "slow-d736544545b90d83fe75",
 167         "slow-e3b9bdfaed7d1a606fdb",
 168         "slow-f3f1c02a9d006e5e1703",
 169         "square_series",
 170         "square_series-128k+",
 171         "trigram_128k+",
 172         "trigram_64k",
 173         "trigram_sum_128k+",
 174         "trigram_sum_64k",
 175         NULL
 176 };
 177
 178 struct lzx_pair bidirectional_pairs[] = {
 179
 180         {.name = "abc__100_repeats", /* [MS-XCA] 3.2 example 2. */
 181          .decompressed = BLOB_FROM_STRING(
 182                  "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
 183                  "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
 184                  "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
 185                  "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
 186                  "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
 187                  ),
 188          .compressed = BLOB_FROM_ARRAY(
 189                  /*
 190                   * The 'a', 'b', and 'c' bytes are 0x61, 0x62, 0x63. No other
 191                   * symbols occur. That means we need 48 0x00 bytes for the
 192                   * first 96 symbol-nybbles, then some short codes, then zeros
 193                   * again for the rest of the literals.
 194                   */
 195                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 196                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 197                  0,0,0,0,0, 0,0,0,
 198                  0x30, 0x23, /* a_ cb */
 199                  0,0,0,0,0, 0,0,0,0,0,
 200                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 201                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 100 bytes */
 202                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 203                  0,0,0,0,0, 0,0,0, /* here end the 0-255 literals (128 bytes) */
 204                  0x02, /* 'EOF' symbol 256 (byte 128 low) */
 205                  0,0,0,0,0, 0,0,0,0,0, 0,                    /* 140 bytes */
 206                  0,0,0,
 207                  0x20, /* codepoint 287 (byte 143 high) */
 208                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,         /* 160 bytes */
 209                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 210                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 211                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 212                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 bytes */
 213                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,
 214                  /*
 215                   * So that's the tree.
 216                   *
 217                   * length 2 codes for 'c', EOF, 287
 218                   * length 3 for 'a', 'b'.
 219                   *
 220                   * c:   00
 221                   * EOF: 01
 222                   * 287: 10
 223                   * a:   110
 224                   * b:   111
 225                   *
 226                   * thus the literal string "abc" is 110-111-00.
 227                   *
 228                   * Now for the lz77 match definitions for EOF and 287.
 229                   *
 230                   * Why 287? It encodes the match distance and offset.
 231                   *
 232                   * 287 - 256 = 31
 233                   *
 234                   * _length = 31 % 16 = 15
 235                   * _distance = 31 / 16 = 1
 236                   *
 237                   * (it's easier to look at the hex, 0x11f:
 238                   * 1xx means a match; x1x is _distance; xxf is _length)
 239                   *
 240                   * _distance 1 means a two bit distance (10 or 11; 2 or 3).
 241                   * That means the next bit will be the least significant bit
 242                   * of distance (1 in this case, meaning distance 3).
 243                   *
 244                   * if _length is 15, real length is included inline.
 245                   *
 246                   * 'EOF' == 256 means _length = 0, _distance = 0.
 247                   *
 248                   * _distance 0 means 1, so no further bits needed.
 249                   * _length 0 means length 3.
 250                   *
 251                   * but when used as EOF, this doesn't matter.
 252                   */
 253                  0xa8, 0xdc, 0x00, 0x00, 0xff, 0x26, 0x01
 254                   /* These remaining bytes are:
 255                   *
 256                   * 10101000 11011100 00000000 00000000 11111111
 257                   * 00100110 00000001
 258                   *
 259                   * and we read them as 16 chunks (i.e. flipping odd/even order)
 260                   *
 261                   * 110-111-00  10-1-01-000
 262                   *   a  b   c 287 | EOF
 263                   *                |
 264                   *                this is the 287 distance low bit.
 265                   *
 266                   * The last 3 bits are not used. The 287 length is sort of
 267                   * out of band, coming up soon (because 287 encodes length
 268                   * 15; most codes do not do this).
 269                   *
 270                   * 00000000 00000000
 271                   *
 272                   * This padding is there because the first 32 bits are read
 273                   * at the beginning of decoding. If there were more things to
 274                   * be encoded, they would be in here.
 275                   *
 276                   * 11111111
 277                   *
 278                   * This byte is pulled as the length for the 287 match.
 279                   * Because it is 0xff, we pull a further 2 bytes for the
 280                   * actual length, i.e. a 16 bit number greater than 270.
 281                   *
 282                   * 00000001 00100110
 283                   *
 284                   * that is 0x126 = 294 = the match length - 3 (because we're
 285                   * encoding ["abc", <copy from 3 back, 297 chars>, EOF]).
 286                   *
 287                   */
 288                  )
 289         },
 290         {.name = "abcdefghijklmnopqrstuvwxyz", /* [MS-XCA] 3.2 example 1. */
 291          .decompressed = BLOB_FROM_STRING("abcdefghijklmnopqrstuvwxyz"),
 292          .compressed = BLOB_FROM_ARRAY(
 293                  /*
 294                   * In this case there are no matches encoded as there are no
 295                   * repeated symbols. Including the EOF, there are 27 symbols
 296                   * all occurring exactly as frequently as each other (once).
 297                   * From that we would expect the codes to be mostly 5 bits
 298                   * long, because 27 < 2^5 (32), but greater than 2^4. And
 299                   * that's what we see.
 300                   */
 301                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 302                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 303                  0,0,0,0,0, 0,0,0,
 304                  /* 14 non-zero bytes for 26 letters/nibbles */
 305                  0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
 306                  0x55, 0x55, 0x55, 0x45, 0x44, 0x04,
 307                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,   /* 80 */
 308                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 309                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 310                  0,0,0,0,0, 0,0,0,
 311                  0x04,                                 /* 0x100 EOF */
 312                  /* no matches */
 313                  0,0,0,0,0, 0,0,0,0,0, 0,                    /* 140 */
 314                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 315                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 316                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 317                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0,
 318                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 */
 319                  0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,
 320
 321                  0xd8, 0x52, 0x3e, 0xd7, 0x94, 0x11, 0x5b, 0xe9,
 322                  0x19, 0x5f, 0xf9, 0xd6, 0x7c, 0xdf, 0x8d, 0x04,
 323                  0x00, 0x00, 0x00, 0x00)
 324         },
 325         {0}
 326 };
 327
 328
 329 static void test_lzxpress_huffman_decompress(void **state)
 330 {
 331         size_t i;
 332         ssize_t written;
 333         uint8_t *dest = NULL;
 334         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 335         for (i = 0; bidirectional_pairs[i].name != NULL; i++) {
 336                 struct lzx_pair p = bidirectional_pairs[i];
 337                 dest = talloc_array(mem_ctx, uint8_t, p.decompressed.length);
 338
 339                 debug_message("%s compressed %zu decomp %zu\n", p.name,
 340                               p.compressed.length,
 341                               p.decompressed.length);
 342
 343                 written = lzxpress_huffman_decompress(p.compressed.data,
 344                                                       p.compressed.length,
 345                                                       dest,
 346                                                       p.decompressed.length);
 347                 assert_int_not_equal(written, -1);
 348                 assert_int_equal(written, p.decompressed.length);
 349
 350                 assert_memory_equal(dest, p.decompressed.data, p.decompressed.length);
 351                 talloc_free(dest);
 352         }
 353 }
 354
 355 static void test_lzxpress_huffman_compress(void **state)
 356 {
 357         size_t i;
 358         ssize_t written;
 359         uint8_t *dest = NULL;
 360         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 361         for (i = 0; bidirectional_pairs[i].name != NULL; i++) {
 362                 struct lzx_pair p = bidirectional_pairs[i];
 363                 debug_message("%s compressed %zu decomp %zu\n", p.name,
 364                               p.compressed.length,
 365                               p.decompressed.length);
 366
 367                 written = lzxpress_huffman_compress_talloc(mem_ctx,
 368                                                            p.decompressed.data,
 369                                                            p.decompressed.length,
 370                                                            &dest);
 371
 372                 assert_int_not_equal(written, -1);
 373                 assert_int_equal(written, p.compressed.length);
 374                 assert_memory_equal(dest, p.compressed.data, p.compressed.length);
 375                 talloc_free(dest);
 376         }
 377 }
 378
 379
 380 static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx,
 381                                     const char *filename)
 382 {
 383         DATA_BLOB b = {0};
 384         FILE *fh = fopen(filename, "rb");
 385         int ret;
 386         struct stat s;
 387         size_t len;
 388         if (fh == NULL) {
 389                 debug_message("could not open '%s'\n", filename);
 390                 return b;
 391         }
 392         ret = fstat(fileno(fh), &s);
 393         if (ret != 0) {
 394                 fclose(fh);
 395                 return b;
 396         }
 397         b.data = talloc_array(mem_ctx, uint8_t, s.st_size);
 398         if (b.data == NULL) {
 399                 fclose(fh);
 400                 return b;
 401         }
 402         len = fread(b.data, 1, s.st_size, fh);
 403         if (ferror(fh) || len != s.st_size) {
 404                 TALLOC_FREE(b.data);
 405         } else {
 406                 b.length = len;
 407         }
 408         fclose(fh);
 409         return b;
 410 }
 411
 412
 413
 414 static void test_lzxpress_huffman_decompress_files(void **state)
 415 {
 416         size_t i;
 417         int score = 0;
 418         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 419         for (i = 0; file_names[i] != NULL; i++) {
 420                 char filename[200];
 421                 uint8_t *dest = NULL;
 422                 ssize_t written;
 423                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
 424                 struct lzx_pair p = {
 425                         .name = file_names[i]
 426                 };
 427
 428                 debug_message("%s\n", p.name);
 429
 430                 snprintf(filename, sizeof(filename),
 431                          "%s/%s.decomp", DECOMP_DIR, p.name);
 432
 433                 p.decompressed = datablob_from_file(tmp_ctx, filename);
 434                 assert_non_null(p.decompressed.data);
 435
 436                 snprintf(filename, sizeof(filename),
 437                          "%s/%s.lzhuff", COMP_DIR, p.name);
 438
 439                 p.compressed = datablob_from_file(tmp_ctx, filename);
 440                 assert_non_null(p.compressed.data);
 441
 442                 dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
 443                 debug_start_timer();
 444                 written = lzxpress_huffman_decompress(p.compressed.data,
 445                                                       p.compressed.length,
 446                                                       dest,
 447                                                       p.decompressed.length);
 448                 debug_end_timer("decompress", p.decompressed.length);
 449                 if (written != -1 &&
 450                     written == p.decompressed.length &&
 451                     memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
 452                         debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
 453                         score++;
 454                 } else {
 455                         debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
 456                                       p.name);
 457                         debug_message("size %zd vs reference %zu\n",
 458                                       written, p.decompressed.length);
 459                 }
 460                 talloc_free(tmp_ctx);
 461         }
 462         debug_message("%d/%zu correct\n", score, i);
 463         assert_int_equal(score, i);
 464 }
 465
 466
 467 static void test_lzxpress_huffman_decompress_more_compressed_files(void **state)
 468 {
 469         /*
 470          * This tests the decompression of files that have been compressed on
 471          * Windows with the level turned up (to 1, default for MS-XCA is 0).
 472          *
 473          * The format is identical, but it will have tried harder to find
 474          * matches.
 475          */
 476         size_t i;
 477         int score = 0;
 478         int found = 0;
 479         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 480         for (i = 0; file_names[i] != NULL; i++) {
 481                 char filename[200];
 482                 uint8_t *dest = NULL;
 483                 ssize_t written;
 484                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
 485                 struct lzx_pair p = {
 486                         .name = file_names[i]
 487                 };
 488
 489                 debug_message("%s\n", p.name);
 490
 491                 snprintf(filename, sizeof(filename),
 492                          "%s/%s.decomp", DECOMP_DIR, p.name);
 493
 494                 p.decompressed = datablob_from_file(tmp_ctx, filename);
 495                 assert_non_null(p.decompressed.data);
 496
 497                 snprintf(filename, sizeof(filename),
 498                          "%s/%s.lzhuff", MORE_COMP_DIR, p.name);
 499
 500                 p.compressed = datablob_from_file(tmp_ctx, filename);
 501                 if (p.compressed.data == NULL) {
 502                         /*
 503                          * We don't have all the vectors in the
 504                          * more-compressed directory, which is OK, we skip
 505                          * them.
 506                          */
 507                         continue;
 508                 }
 509                 found++;
 510                 dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
 511                 debug_start_timer();
 512                 written = lzxpress_huffman_decompress(p.compressed.data,
 513                                                       p.compressed.length,
 514                                                       dest,
 515                                                       p.decompressed.length);
 516                 debug_end_timer("decompress", p.decompressed.length);
 517                 if (written == p.decompressed.length &&
 518                     memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
 519                         debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
 520                         score++;
 521                 } else {
 522                         debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
 523                                       p.name);
 524                         debug_message("size %zd vs reference %zu\n",
 525                                       written, p.decompressed.length);
 526                 }
 527                 talloc_free(tmp_ctx);
 528         }
 529         debug_message("%d/%d correct\n", score, found);
 530         assert_int_equal(score, found);
 531 }
 532
 533
 534 /*
 535  * attempt_round_trip() tests whether a data blob can survive a compression
 536  * and decompression cycle. If save_name is not NULL and LZXHUFF_DEBUG_FILES
 537  * evals to true, the various stages are saved in files with that name and the
 538  * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed
 539  * has data, it'll print a message saying whether the compressed data matches
 540  * that.
 541  */
 542
 543 static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx,
 544                                   DATA_BLOB original,
 545                                   const char *save_name,
 546                                   DATA_BLOB ref_compressed)
 547 {
 548         TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
 549         DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL,
 550                                                 original.length * 4 / 3 + 260);
 551         DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL,
 552                                                 original.length);
 553         ssize_t comp_written, decomp_written;
 554         debug_start_timer();
 555         comp_written = lzxpress_huffman_compress_talloc(tmp_ctx,
 556                                                         original.data,
 557                                                         original.length,
 558                                                         &compressed.data);
 559         debug_end_timer("compress", original.length);
 560         if (comp_written <= 0) {
 561                 talloc_free(tmp_ctx);
 562                 return -1;
 563         }
 564
 565         if (ref_compressed.data != NULL) {
 566                 /*
 567                  * This is informational, not an assertion; there are
 568                  * ~infinite legitimate ways to compress the data, many as
 569                  * good as each other (think of compression as a language, not
 570                  * a format).
 571                  */
 572                 debug_message("compressed size %zd vs reference %zu\n",
 573                               comp_written, ref_compressed.length);
 574
 575                 if (comp_written == compressed.length &&
 576                     memcmp(compressed.data, ref_compressed.data, comp_written) == 0) {
 577                         debug_message("\033[1;32mbyte identical!\033[0m\n");
 578                 }
 579         }
 580         debug_start_timer();
 581         decomp_written = lzxpress_huffman_decompress(compressed.data,
 582                                                      comp_written,
 583                                                      decompressed.data,
 584                                                      original.length);
 585         debug_end_timer("decompress", original.length);
 586         if (save_name != NULL && LZXHUFF_DEBUG_FILES) {
 587                 char s[300];
 588                 FILE *fh = NULL;
 589
 590                 snprintf(s, sizeof(s), "%s-original", save_name);
 591                 fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
 592                 fh = fopen(s, "w");
 593                 fwrite(original.data, 1, original.length, fh);
 594                 fclose(fh);
 595
 596                 snprintf(s, sizeof(s), "%s-compressed", save_name);
 597                 fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s);
 598                 fh = fopen(s, "w");
 599                 fwrite(compressed.data, 1, comp_written, fh);
 600                 fclose(fh);
 601                 /*
 602                  * We save the decompressed file using original.length, not
 603                  * the returned size. If these differ, the returned size will
 604                  * be -1. By saving the whole buffer we can see at what point
 605                  * it went haywire.
 606                  */
 607                 snprintf(s, sizeof(s), "%s-decompressed", save_name);
 608                 fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
 609                 fh = fopen(s, "w");
 610                 fwrite(decompressed.data, 1, original.length, fh);
 611                 fclose(fh);
 612         }
 613
 614         if (original.length != decomp_written ||
 615             memcmp(decompressed.data,
 616                    original.data,
 617                    original.length) != 0) {
 618                 debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n",
 619                               decomp_written,
 620                               original.length);
 621                 talloc_free(tmp_ctx);
 622                 return -1;
 623         }
 624         talloc_free(tmp_ctx);
 625         return comp_written;
 626 }
 627
 628
 629 static void test_lzxpress_huffman_round_trip(void **state)
 630 {
 631         size_t i;
 632         int score = 0;
 633         ssize_t compressed_total = 0;
 634         ssize_t reference_total = 0;
 635         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 636         for (i = 0; file_names[i] != NULL; i++) {
 637                 char filename[200];
 638                 char *debug_files = NULL;
 639                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
 640                 ssize_t comp_size;
 641                 struct lzx_pair p = {
 642                         .name = file_names[i]
 643                 };
 644                 debug_message("-------------------\n");
 645                 debug_message("%s\n", p.name);
 646
 647                 snprintf(filename, sizeof(filename),
 648                          "%s/%s.decomp", DECOMP_DIR, p.name);
 649
 650                 p.decompressed = datablob_from_file(tmp_ctx, filename);
 651                 assert_non_null(p.decompressed.data);
 652
 653                 snprintf(filename, sizeof(filename),
 654                          "%s/%s.lzhuff", COMP_DIR, p.name);
 655
 656                 p.compressed = datablob_from_file(tmp_ctx, filename);
 657                 if (p.compressed.data == NULL) {
 658                         debug_message(
 659                                 "Could not load %s reference file %s\n",
 660                                 p.name, filename);
 661                         debug_message("%s decompressed %zu\n", p.name,
 662                                       p.decompressed.length);
 663                 } else {
 664                         debug_message("%s: reference compressed %zu decomp %zu\n",
 665                                       p.name,
 666                                       p.compressed.length,
 667                                       p.decompressed.length);
 668                 }
 669                 if (1) {
 670                         /*
 671                          * We're going to save copies in /tmp.
 672                          */
 673                         snprintf(filename, sizeof(filename),
 674                                  "/tmp/lzxhuffman-%s", p.name);
 675                         debug_files = filename;
 676                 }
 677
 678                 comp_size = attempt_round_trip(mem_ctx, p.decompressed,
 679                                                debug_files,
 680                                                p.compressed);
 681                 if (comp_size > 0) {
 682                         debug_message("\033[1;32mround trip!\033[0m\n");
 683                         score++;
 684                         if (p.compressed.length) {
 685                                 compressed_total += comp_size;
 686                                 reference_total += p.compressed.length;
 687                         }
 688                 }
 689                 talloc_free(tmp_ctx);
 690         }
 691         debug_message("%d/%zu correct\n", score, i);
 692         print_message("\033[1;34mtotal compressed size: %zu\033[0m\n",
 693                       compressed_total);
 694         print_message("total reference size:  %zd \n", reference_total);
 695         print_message("diff:                  %7zd \n",
 696                       reference_total - compressed_total);
 697         assert_true(reference_total != 0);
 698         print_message("ratio: \033[1;3%dm%.2f\033[0m \n",
 699                       2 + (compressed_total >= reference_total),
 700                       ((double)compressed_total) / reference_total);
 701         /*
 702          * Assert that the compression is *about* as good as Windows. Of course
 703          * it doesn't matter if we do better, but mysteriously getting better
 704          * is usually a sign that something is wrong.
 705          *
 706          * At the time of writing, compressed_total is 2674004, or 10686 more
 707          * than the Windows reference total. That's < 0.5% difference, we're
 708          * asserting at 2%.
 709          */
 710         assert_true(labs(compressed_total - reference_total) <
 711                     compressed_total / 50);
 712
 713         assert_int_equal(score, i);
 714         talloc_free(mem_ctx);
 715 }
 716
 717 /*
 718  * Bob Jenkins' Small Fast RNG.
 719  *
 720  * We don't need it to be this good, but we do need it to be reproduceable
 721  * across platforms, which rand() etc aren't.
 722  *
 723  * http://burtleburtle.net/bob/rand/smallprng.html
 724  */
 725
 726 struct jsf_rng {
 727         uint32_t a;
 728         uint32_t b;
 729         uint32_t c;
 730         uint32_t d;
 731 };
 732
 733 #define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
 734
 735 static uint32_t jsf32(struct jsf_rng *x) {
 736         uint32_t e = x->a - ROTATE32(x->b, 27);
 737         x->a = x->b ^ ROTATE32(x->c, 17);
 738         x->b = x->c + x->d;
 739         x->c = x->d + e;
 740         x->d = e + x->a;
 741         return x->d;
 742 }
 743
 744 static void jsf32_init(struct jsf_rng *x, uint32_t seed) {
 745         size_t i;
 746         x->a = 0xf1ea5eed;
 747         x->b = x->c = x->d = seed;
 748         for (i = 0; i < 20; ++i) {
 749                 jsf32(x);
 750         }
 751 }
 752
 753
 754 static void test_lzxpress_huffman_long_gpl_round_trip(void **state)
 755 {
 756         /*
 757          * We use a kind of model-free Markov model to generate a massively
 758          * extended pastiche of the GPLv3 (chosen because it is right there in
 759          * "COPYING" and won't change often).
 760          *
 761          * The point is to check a round trip of a very long message with
 762          * multiple repetitions on many scales, without having to add a very
 763          * large file.
 764          */
 765         size_t i, j, k;
 766         uint8_t c;
 767         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 768         DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING");
 769         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
 770         DATA_BLOB ref = {0};
 771         ssize_t comp_size;
 772         struct jsf_rng rng;
 773
 774         if (gpl.data == NULL) {
 775                 print_message("could not read COPYING\n");
 776                 fail();
 777         }
 778
 779         jsf32_init(&rng, 1);
 780
 781         j = 1;
 782         original.data[0] = gpl.data[0];
 783         for (i = 1; i < original.length; i++) {
 784                 size_t m;
 785                 char p = original.data[i - 1];
 786                 c = gpl.data[j];
 787                 original.data[i] = c;
 788                 j++;
 789                 m = (j + jsf32(&rng)) % (gpl.length - 50);
 790                 for (k = m; k < m + 30; k++) {
 791                         if (p == gpl.data[k] &&
 792                             c == gpl.data[k + 1]) {
 793                                 j = k + 2;
 794                                 break;
 795                         }
 796                 }
 797                 if (j == gpl.length) {
 798                         j = 1;
 799                 }
 800         }
 801
 802         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref);
 803         assert_true(comp_size > 0);
 804         assert_true(comp_size < original.length);
 805
 806         talloc_free(mem_ctx);
 807 }
 808
 809
 810 static void test_lzxpress_huffman_long_random_graph_round_trip(void **state)
 811 {
 812         size_t i;
 813         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 814         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
 815         DATA_BLOB ref = {0};
 816         /*
 817          * There's a random trigram graph, with each pair of sequential bytes
 818          * pointing to a successor. This would probably fall into a fairly
 819          * simple loop, but we introduce damage into the system, randomly
 820          * flipping about 1 bit in 64.
 821          *
 822          * The result is semi-structured and compressible.
 823          */
 824         uint8_t *d = original.data;
 825         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
 826         uint32_t *table32 = (void*)table;
 827         ssize_t comp_size;
 828         struct jsf_rng rng;
 829
 830         jsf32_init(&rng, 1);
 831         for (i = 0; i < (65536 / 4); i++) {
 832                 table32[i] = jsf32(&rng);
 833         }
 834
 835         d[0] = 'a';
 836         d[1] = 'b';
 837
 838         for (i = 2; i < original.length; i++) {
 839                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
 840                 uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng);
 841                 damage &= (damage >> 16);
 842                 k ^= damage & 0xffff;
 843                 d[i] = table[k];
 844         }
 845
 846         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref);
 847         assert_true(comp_size > 0);
 848         assert_true(comp_size < original.length);
 849
 850         talloc_free(mem_ctx);
 851 }
 852
 853
 854 static void test_lzxpress_huffman_chaos_graph_round_trip(void **state)
 855 {
 856         size_t i;
 857         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 858         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
 859         DATA_BLOB ref = {0};
 860         /*
 861          * There's a random trigram graph, with each pair of sequential bytes
 862          * pointing to a successor. This would probably fall into a fairly
 863          * simple loop, but we keep changing the graph. The result is long
 864          * periods of stability separatd by bursts of noise.
 865          */
 866         uint8_t *d = original.data;
 867         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
 868         uint32_t *table32 = (void*)table;
 869         ssize_t comp_size;
 870         struct jsf_rng rng;
 871
 872         jsf32_init(&rng, 1);
 873         for (i = 0; i < (65536 / 4); i++) {
 874                 table32[i] = jsf32(&rng);
 875         }
 876
 877         d[0] = 'a';
 878         d[1] = 'b';
 879
 880         for (i = 2; i < original.length; i++) {
 881                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
 882                 uint32_t damage = jsf32(&rng);
 883                 d[i] = table[k];
 884                 if ((damage >> 29) == 0) {
 885                         uint16_t index = damage & 0xffff;
 886                         uint8_t value = (damage >> 16) & 0xff;
 887                         table[index] = value;
 888                 }
 889         }
 890
 891         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref);
 892         assert_true(comp_size > 0);
 893         assert_true(comp_size < original.length);
 894
 895         talloc_free(mem_ctx);
 896 }
 897
 898
 899 static void test_lzxpress_huffman_sparse_random_graph_round_trip(void **state)
 900 {
 901         size_t i;
 902         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 903         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
 904         DATA_BLOB ref = {0};
 905         /*
 906          * There's a random trigram graph, with each pair of sequential bytes
 907          * pointing to a successor. This will fall into a fairly simple loops,
 908          * but we introduce damage into the system, randomly mangling about 1
 909          * byte in 65536.
 910          *
 911          * The result has very long repetitive runs, which should lead to
 912          * oversized blocks.
 913          */
 914         uint8_t *d = original.data;
 915         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
 916         uint32_t *table32 = (void*)table;
 917         ssize_t comp_size;
 918         struct jsf_rng rng;
 919
 920         jsf32_init(&rng, 3);
 921         for (i = 0; i < (65536 / 4); i++) {
 922                 table32[i] = jsf32(&rng);
 923         }
 924
 925         d[0] = 'a';
 926         d[1] = 'b';
 927
 928         for (i = 2; i < original.length; i++) {
 929                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
 930                 uint32_t damage = jsf32(&rng);
 931                 if ((damage & 0xffff0000) == 0) {
 932                         k ^= damage & 0xffff;
 933                 }
 934                 d[i] = table[k];
 935         }
 936
 937         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref);
 938         assert_true(comp_size > 0);
 939         assert_true(comp_size < original.length);
 940
 941         talloc_free(mem_ctx);
 942 }
 943
 944
 945 static void test_lzxpress_huffman_random_noise_round_trip(void **state)
 946 {
 947         size_t i;
 948         size_t len = 1024 * 1024;
 949         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 950         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
 951         DATA_BLOB ref = {0};
 952         ssize_t comp_size;
 953         /*
 954          * We are filling this up with incompressible noise, but we can assert
 955          * quite tight bounds on how badly it will fail to compress.
 956          *
 957          * Specifically, with randomly distributed codes, the Huffman table
 958          * should come out as roughly even, averaging 8 bit codes. Then there
 959          * will be a 256 byte table every 64k, which is a 1/256 overhead (i.e.
 960          * the compressed length will be 257/256 the original *on average*).
 961          * We assert it is less than 1 in 200 but more than 1 in 300.
 962          */
 963         uint32_t *d32 = (uint32_t*)((void*)original.data);
 964         struct jsf_rng rng;
 965         jsf32_init(&rng, 2);
 966
 967         for (i = 0; i < (len / 4); i++) {
 968                 d32[i] = jsf32(&rng);
 969         }
 970
 971         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref);
 972         assert_true(comp_size > 0);
 973         assert_true(comp_size > original.length + original.length / 300);
 974         assert_true(comp_size < original.length + original.length / 200);
 975         debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
 976                       len, comp_size, ((double)comp_size) / len);
 977
 978         talloc_free(mem_ctx);
 979 }
 980
 981
 982 static void test_lzxpress_huffman_overlong_matches(void **state)
 983 {
 984         size_t i, j = 0;
 985         TALLOC_CTX *mem_ctx = talloc_new(NULL);
 986         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024);
 987         DATA_BLOB ref = {0};
 988         uint8_t *d = original.data;
 989         char filename[300];
 990         /*
 991          * We are testing with something like "aaaaaaaaaaaaaaaaaaaaaaabbbbb"
 992          * where typically the number of "a"s is > 65536, and the number of
 993          * "b"s is < 42.
 994          */
 995         ssize_t na[] = {65535, 65536, 65537, 65559, 65575, 200000, -1};
 996         ssize_t nb[] = {1, 2, 20, 39, 40, 41, 42, -1};
 997         int score = 0;
 998         ssize_t comp_size;
 999
1000         for (i = 0; na[i] >= 0; i++) {
1001                 ssize_t a = na[i];
1002                 memset(d, 'a', a);
1003                 for (j = 0; nb[j] >= 0; j++) {
1004                         ssize_t b = nb[j];
1005                         memset(d + a, 'b', b);
1006                         original.length = a + b;
1007                         snprintf(filename, sizeof(filename),
1008                                  "/tmp/overlong-%zd-%zd", a, b);
1009                         comp_size = attempt_round_trip(mem_ctx,
1010                                                        original,
1011                                                        filename, ref);
1012                         if (comp_size > 0) {
1013                                 score++;
1014                         }
1015                 }
1016         }
1017         debug_message("%d/%zu correct\n", score, i * j);
1018         assert_int_equal(score, i * j);
1019         talloc_free(mem_ctx);
1020 }
1021
1022
1023 static void test_lzxpress_huffman_overlong_matches_abc(void **state)
1024 {
1025         size_t i, j = 0, k = 0;
1026         TALLOC_CTX *mem_ctx = talloc_new(NULL);
1027         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024);
1028         DATA_BLOB ref = {0};
1029         uint8_t *d = original.data;
1030         char filename[300];
1031         /*
1032          * We are testing with something like "aaaabbbbcc" where typically
1033          * the number of "a"s + "b"s is around 65536, and the number of "c"s
1034          * is < 43.
1035          */
1036         ssize_t nab[] = {1, 21, 32767, 32768, 32769, -1};
1037         ssize_t nc[] = {1, 2, 20, 39, 40, 41, 42, -1};
1038         int score = 0;
1039         ssize_t comp_size;
1040
1041         for (i = 0; nab[i] >= 0; i++) {
1042                 ssize_t a = nab[i];
1043                 memset(d, 'a', a);
1044                 for (j = 0; nab[j] >= 0; j++) {
1045                         ssize_t b = nab[j];
1046                         memset(d + a, 'b', b);
1047                         for (k = 0; nc[k] >= 0; k++) {
1048                                 ssize_t c = nc[k];
1049                                 memset(d + a + b, 'c', c);
1050                                 original.length = a + b + c;
1051                                 snprintf(filename, sizeof(filename),
1052                                          "/tmp/overlong-abc-%zd-%zd-%zd",
1053                                          a, b, c);
1054                                 comp_size = attempt_round_trip(mem_ctx,
1055                                                                original,
1056                                                                filename, ref);
1057                                 if (comp_size > 0) {
1058                                         score++;
1059                                 }
1060                         }
1061                 }
1062         }
1063         debug_message("%d/%zu correct\n", score, i * j * k);
1064         assert_int_equal(score, i * j * k);
1065         talloc_free(mem_ctx);
1066 }
1067
1068
1069 static void test_lzxpress_huffman_extremely_compressible_middle(void **state)
1070 {
1071         size_t len = 192 * 1024;
1072         TALLOC_CTX *mem_ctx = talloc_new(NULL);
1073         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
1074         DATA_BLOB ref = {0};
1075         ssize_t comp_size;
1076         /*
1077          * When a middle block (i.e. not the first and not the last of >= 3),
1078          * can be entirely expressed as a match starting in the previous
1079          * block, the Huffman tree would end up with 1 element, which does not
1080          * work for the code construction. It really wants to use both bits.
1081          * So we need to ensure we have some way of dealing with this.
1082          */
1083         memset(original.data, 'a', 0x10000 - 1);
1084         memset(original.data + 0x10000 - 1, 'b', 0x10000 + 1);
1085         memset(original.data + 0x20000, 'a', 0x10000);
1086         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/compressible-middle", ref);
1087         assert_true(comp_size > 0);
1088         assert_true(comp_size < 1024);
1089         debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
1090                       len, comp_size, ((double)comp_size) / len);
1091
1092         talloc_free(mem_ctx);
1093 }
1094
1095
1096 static void test_lzxpress_huffman_max_length_limit(void **state)
1097 {
1098         size_t len = 65 * 1024 * 1024;
1099         TALLOC_CTX *mem_ctx = talloc_new(NULL);
1100         DATA_BLOB original = data_blob_talloc_zero(mem_ctx, len);
1101         DATA_BLOB ref = {0};
1102         ssize_t comp_size;
1103         /*
1104          * Reputedly Windows has a 64MB limit in the maximum match length it
1105          * will encode. We follow this, and test that here with nearly 65 MB
1106          * of zeros between two letters; this should be encoded in three
1107          * blocks:
1108          *
1109          * 1. 'a', 64M × '\0'
1110          * 2. (1M - 2) × '\0' -- finishing off what would have been the same match
1111          * 3. 'b' EOF
1112          *
1113          * Which we can assert by saying the length is > 768, < 1024.
1114          */
1115         original.data[0] = 'a';
1116         original.data[len - 1] = 'b';
1117         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/max-length-limit", ref);
1118         assert_true(comp_size > 0x300);
1119         assert_true(comp_size < 0x400);
1120         debug_message("original size %zu; compressed size %zd; ratio %.3f\n",
1121                       len, comp_size, ((double)comp_size) / len);
1122
1123         talloc_free(mem_ctx);
1124 }
1125
1126
1127 static void test_lzxpress_huffman_short_boring_strings(void **state)
1128 {
1129         size_t len = 64 * 1024;
1130         TALLOC_CTX *mem_ctx = talloc_new(NULL);
1131         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
1132         DATA_BLOB ref = {0};
1133         ssize_t comp_size;
1134         ssize_t lengths[] = {
1135                 1, 2, 20, 39, 40, 41, 42, 256, 270, 273, 274, 1000, 64000, -1};
1136         char filename[300];
1137         size_t i;
1138         /*
1139          * How do short repetitive strings work? We're poking at the limit
1140          * around which LZ77 comprssion is turned on.
1141          *
1142          * For this test we don't change the blob memory between runs, just
1143          * the declared length.
1144          */
1145         memset(original.data, 'a', len);
1146         for (i = 0; lengths[i] >= 0; i++) {
1147                 original.length = lengths[i];
1148                 snprintf(filename, sizeof(filename),
1149                          "/tmp/short-boring-%zu",
1150                          original.length);
1151                 comp_size = attempt_round_trip(mem_ctx, original, filename, ref);
1152                 if (original.length < 41) {
1153                         assert_true(comp_size > 256 + original.length / 8);
1154                 } else if (original.length < 274) {
1155                         assert_true(comp_size == 261);
1156                 } else {
1157                         assert_true(comp_size == 263);
1158                 }
1159                 assert_true(comp_size < 261 + original.length / 8);
1160         }
1161         /* let's just show we didn't change the original */
1162         for (i = 0; i < len; i++) {
1163                 if (original.data[i] != 'a') {
1164                         fail_msg("input data[%zu] was changed! (%2x, expected %2x)\n",
1165                                  i, original.data[i], 'a');
1166                 }
1167         }
1168
1169         talloc_free(mem_ctx);
1170 }
1171
1172
1173 static void test_lzxpress_huffman_compress_empty_or_null(void **state)
1174 {
1175         /*
1176          * We expect these to fail with a -1, except the last one, which does
1177          * the real thing.
1178          */
1179         ssize_t ret;
1180         const uint8_t *input = bidirectional_pairs[0].decompressed.data;
1181         size_t ilen = bidirectional_pairs[0].decompressed.length;
1182         size_t olen = bidirectional_pairs[0].compressed.length;
1183         uint8_t output[olen];
1184         struct lzxhuff_compressor_mem cmp_mem;
1185
1186         ret = lzxpress_huffman_compress(&cmp_mem, input, 0, output, olen);
1187         assert_int_equal(ret, -1LL);
1188         ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, 0);
1189         assert_int_equal(ret, -1LL);
1190
1191         ret = lzxpress_huffman_compress(&cmp_mem, NULL, ilen, output, olen);
1192         assert_int_equal(ret, -1LL);
1193         ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, NULL, olen);
1194         assert_int_equal(ret, -1LL);
1195         ret = lzxpress_huffman_compress(NULL, input, ilen, output, olen);
1196         assert_int_equal(ret, -1LL);
1197
1198         ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, olen);
1199         assert_int_equal(ret, olen);
1200 }
1201
1202
1203 static void test_lzxpress_huffman_decompress_empty_or_null(void **state)
1204 {
1205         /*
1206          * We expect these to fail with a -1, except the last one.
1207          */
1208         ssize_t ret;
1209         const uint8_t *input = bidirectional_pairs[0].compressed.data;
1210         size_t ilen = bidirectional_pairs[0].compressed.length;
1211         size_t olen = bidirectional_pairs[0].decompressed.length;
1212         uint8_t output[olen];
1213
1214         ret = lzxpress_huffman_decompress(input, 0, output, olen);
1215         assert_int_equal(ret, -1LL);
1216         ret = lzxpress_huffman_decompress(input, ilen, output, 0);
1217         assert_int_equal(ret, -1LL);
1218
1219         ret = lzxpress_huffman_decompress(NULL, ilen, output, olen);
1220         assert_int_equal(ret, -1LL);
1221         ret = lzxpress_huffman_decompress(input, ilen, NULL, olen);
1222         assert_int_equal(ret, -1LL);
1223
1224         ret = lzxpress_huffman_decompress(input, ilen, output, olen);
1225         assert_int_equal(ret, olen);
1226 }
1227
1228
1229 int main(void) {
1230         const struct CMUnitTest tests[] = {
1231                 cmocka_unit_test(test_lzxpress_huffman_short_boring_strings),
1232                 cmocka_unit_test(test_lzxpress_huffman_max_length_limit),
1233                 cmocka_unit_test(test_lzxpress_huffman_extremely_compressible_middle),
1234                 cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip),
1235                 cmocka_unit_test(test_lzxpress_huffman_chaos_graph_round_trip),
1236                 cmocka_unit_test(test_lzxpress_huffman_sparse_random_graph_round_trip),
1237                 cmocka_unit_test(test_lzxpress_huffman_round_trip),
1238                 cmocka_unit_test(test_lzxpress_huffman_decompress_files),
1239                 cmocka_unit_test(test_lzxpress_huffman_decompress_more_compressed_files),
1240                 cmocka_unit_test(test_lzxpress_huffman_compress),
1241                 cmocka_unit_test(test_lzxpress_huffman_decompress),
1242                 cmocka_unit_test(test_lzxpress_huffman_long_gpl_round_trip),
1243                 cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip),
1244                 cmocka_unit_test(test_lzxpress_huffman_random_noise_round_trip),
1245                 cmocka_unit_test(test_lzxpress_huffman_overlong_matches_abc),
1246                 cmocka_unit_test(test_lzxpress_huffman_overlong_matches),
1247                 cmocka_unit_test(test_lzxpress_huffman_decompress_empty_or_null),
1248                 cmocka_unit_test(test_lzxpress_huffman_compress_empty_or_null),
1249         };
1250         if (!isatty(1)) {
1251                 cmocka_set_message_output(CM_OUTPUT_SUBUNIT);
1252         }
1253
1254         return cmocka_run_group_tests(tests, NULL, NULL);
1255 }