src/liblzma/lzma/lzma_decoder.c

   1 // SPDX-License-Identifier: 0BSD
   2
   3 ///////////////////////////////////////////////////////////////////////////////
   4 //
   5 /// \file       lzma_decoder.c
   6 /// \brief      LZMA decoder
   7 ///
   8 //  Authors:    Igor Pavlov
   9 //              Lasse Collin
  10 //              Jia Tan
  11 //
  12 ///////////////////////////////////////////////////////////////////////////////
  13
  14 #include "lz_decoder.h"
  15 #include "lzma_common.h"
  16 #include "lzma_decoder.h"
  17 #include "range_decoder.h"
  18
  19 // The macros unroll loops with switch statements.
  20 // Silence warnings about missing fall-through comments.
  21 #if TUKLIB_GNUC_REQ(7, 0)
  22 #       pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
  23 #endif
  24
  25 // Minimum number of input bytes to safely decode one LZMA symbol.
  26 // The worst case is that we decode 22 bits using probabilities and 26
  27 // direct bits. This may decode at maximum 20 bytes of input.
  28 #define LZMA_IN_REQUIRED 20
  29
  30
  31 // Macros for (somewhat) size-optimized code.
  32 // This is used to decode the match length (how many bytes must be repeated
  33 // from the dictionary). This version is used in the Resumable mode and
  34 // does not unroll any loops.
  35 #define len_decode(target, ld, pos_state, seq) \
  36 do { \
  37 case seq ## _CHOICE: \
  38         rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
  39                 rc_update_0(ld.choice); \
  40                 probs = ld.low[pos_state];\
  41                 limit = LEN_LOW_SYMBOLS; \
  42                 target = MATCH_LEN_MIN; \
  43         } else { \
  44                 rc_update_1(ld.choice); \
  45 case seq ## _CHOICE2: \
  46                 rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
  47                         rc_update_0(ld.choice2); \
  48                         probs = ld.mid[pos_state]; \
  49                         limit = LEN_MID_SYMBOLS; \
  50                         target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
  51                 } else { \
  52                         rc_update_1(ld.choice2); \
  53                         probs = ld.high; \
  54                         limit = LEN_HIGH_SYMBOLS; \
  55                         target = MATCH_LEN_MIN + LEN_LOW_SYMBOLS \
  56                                         + LEN_MID_SYMBOLS; \
  57                 } \
  58         } \
  59         symbol = 1; \
  60 case seq ## _BITTREE: \
  61         do { \
  62                 rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
  63         } while (symbol < limit); \
  64         target += symbol - limit; \
  65 } while (0)
  66
  67
  68 // This is the faster version of the match length decoder that does not
  69 // worry about being resumable. It unrolls the bittree decoding loop.
  70 #define len_decode_fast(target, ld, pos_state) \
  71 do { \
  72         symbol = 1; \
  73         rc_if_0(ld.choice) { \
  74                 rc_update_0(ld.choice); \
  75                 rc_bittree3(ld.low[pos_state], \
  76                                 -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \
  77                 target = symbol; \
  78         } else { \
  79                 rc_update_1(ld.choice); \
  80                 rc_if_0(ld.choice2) { \
  81                         rc_update_0(ld.choice2); \
  82                         rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \
  83                                         + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \
  84                         target = symbol; \
  85                 } else { \
  86                         rc_update_1(ld.choice2); \
  87                         rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \
  88                                         + MATCH_LEN_MIN \
  89                                         + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \
  90                         target = symbol; \
  91                 } \
  92         } \
  93 } while (0)
  94
  95
  96 /// Length decoder probabilities; see comments in lzma_common.h.
  97 typedef struct {
  98         probability choice;
  99         probability choice2;
 100         probability low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
 101         probability mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
 102         probability high[LEN_HIGH_SYMBOLS];
 103 } lzma_length_decoder;
 104
 105
 106 typedef struct {
 107         ///////////////////
 108         // Probabilities //
 109         ///////////////////
 110
 111         /// Literals; see comments in lzma_common.h.
 112         probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE];
 113
 114         /// If 1, it's a match. Otherwise it's a single 8-bit literal.
 115         probability is_match[STATES][POS_STATES_MAX];
 116
 117         /// If 1, it's a repeated match. The distance is one of rep0 .. rep3.
 118         probability is_rep[STATES];
 119
 120         /// If 0, distance of a repeated match is rep0.
 121         /// Otherwise check is_rep1.
 122         probability is_rep0[STATES];
 123
 124         /// If 0, distance of a repeated match is rep1.
 125         /// Otherwise check is_rep2.
 126         probability is_rep1[STATES];
 127
 128         /// If 0, distance of a repeated match is rep2. Otherwise it is rep3.
 129         probability is_rep2[STATES];
 130
 131         /// If 1, the repeated match has length of one byte. Otherwise
 132         /// the length is decoded from rep_len_decoder.
 133         probability is_rep0_long[STATES][POS_STATES_MAX];
 134
 135         /// Probability tree for the highest two bits of the match distance.
 136         /// There is a separate probability tree for match lengths of
 137         /// 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
 138         probability dist_slot[DIST_STATES][DIST_SLOTS];
 139
 140         /// Probability trees for additional bits for match distance when the
 141         /// distance is in the range [4, 127].
 142         probability pos_special[FULL_DISTANCES - DIST_MODEL_END];
 143
 144         /// Probability tree for the lowest four bits of a match distance
 145         /// that is equal to or greater than 128.
 146         probability pos_align[ALIGN_SIZE];
 147
 148         /// Length of a normal match
 149         lzma_length_decoder match_len_decoder;
 150
 151         /// Length of a repeated match
 152         lzma_length_decoder rep_len_decoder;
 153
 154         ///////////////////
 155         // Decoder state //
 156         ///////////////////
 157
 158         // Range coder
 159         lzma_range_decoder rc;
 160
 161         // Types of the most recently seen LZMA symbols
 162         lzma_lzma_state state;
 163
 164         uint32_t rep0;      ///< Distance of the latest match
 165         uint32_t rep1;      ///< Distance of second latest match
 166         uint32_t rep2;      ///< Distance of third latest match
 167         uint32_t rep3;      ///< Distance of fourth latest match
 168
 169         uint32_t pos_mask; // (1U << pb) - 1
 170         uint32_t literal_context_bits;
 171         uint32_t literal_mask;
 172
 173         /// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
 174         /// payload marker is expected.
 175         lzma_vli uncompressed_size;
 176
 177         /// True if end of payload marker (EOPM) is allowed even when
 178         /// uncompressed_size is known; false if EOPM must not be present.
 179         /// This is ignored if uncompressed_size == LZMA_VLI_UNKNOWN.
 180         bool allow_eopm;
 181
 182         ////////////////////////////////
 183         // State of incomplete symbol //
 184         ////////////////////////////////
 185
 186         /// Position where to continue the decoder loop
 187         enum {
 188                 SEQ_NORMALIZE,
 189                 SEQ_IS_MATCH,
 190                 SEQ_LITERAL,
 191                 SEQ_LITERAL_MATCHED,
 192                 SEQ_LITERAL_WRITE,
 193                 SEQ_IS_REP,
 194                 SEQ_MATCH_LEN_CHOICE,
 195                 SEQ_MATCH_LEN_CHOICE2,
 196                 SEQ_MATCH_LEN_BITTREE,
 197                 SEQ_DIST_SLOT,
 198                 SEQ_DIST_MODEL,
 199                 SEQ_DIRECT,
 200                 SEQ_ALIGN,
 201                 SEQ_EOPM,
 202                 SEQ_IS_REP0,
 203                 SEQ_SHORTREP,
 204                 SEQ_IS_REP0_LONG,
 205                 SEQ_IS_REP1,
 206                 SEQ_IS_REP2,
 207                 SEQ_REP_LEN_CHOICE,
 208                 SEQ_REP_LEN_CHOICE2,
 209                 SEQ_REP_LEN_BITTREE,
 210                 SEQ_COPY,
 211         } sequence;
 212
 213         /// Base of the current probability tree
 214         probability *probs;
 215
 216         /// Symbol being decoded. This is also used as an index variable in
 217         /// bittree decoders: probs[symbol]
 218         uint32_t symbol;
 219
 220         /// Used as a loop termination condition on bittree decoders and
 221         /// direct bits decoder.
 222         uint32_t limit;
 223
 224         /// Matched literal decoder: 0x100 or 0 to help avoiding branches.
 225         /// Bittree reverse decoders: Offset of the next bit: 1 << offset
 226         uint32_t offset;
 227
 228         /// If decoding a literal: match byte.
 229         /// If decoding a match: length of the match.
 230         uint32_t len;
 231 } lzma_lzma1_decoder;
 232
 233
 234 static lzma_ret
 235 lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
 236                 const uint8_t *restrict in,
 237                 size_t *restrict in_pos, size_t in_size)
 238 {
 239         lzma_lzma1_decoder *restrict coder = coder_ptr;
 240
 241         ////////////////////
 242         // Initialization //
 243         ////////////////////
 244
 245         {
 246                 const lzma_ret ret = rc_read_init(
 247                                 &coder->rc, in, in_pos, in_size);
 248                 if (ret != LZMA_STREAM_END)
 249                         return ret;
 250         }
 251
 252         ///////////////
 253         // Variables //
 254         ///////////////
 255
 256         // Making local copies of often-used variables improves both
 257         // speed and readability.
 258
 259         lzma_dict dict = *dictptr;
 260
 261         const size_t dict_start = dict.pos;
 262
 263         // Range decoder
 264         rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED);
 265
 266         // State
 267         uint32_t state = coder->state;
 268         uint32_t rep0 = coder->rep0;
 269         uint32_t rep1 = coder->rep1;
 270         uint32_t rep2 = coder->rep2;
 271         uint32_t rep3 = coder->rep3;
 272
 273         const uint32_t pos_mask = coder->pos_mask;
 274
 275         // These variables are actually needed only if we last time ran
 276         // out of input in the middle of the decoder loop.
 277         probability *probs = coder->probs;
 278         uint32_t symbol = coder->symbol;
 279         uint32_t limit = coder->limit;
 280         uint32_t offset = coder->offset;
 281         uint32_t len = coder->len;
 282
 283         const uint32_t literal_mask = coder->literal_mask;
 284         const uint32_t literal_context_bits = coder->literal_context_bits;
 285
 286         // Temporary variables
 287         uint32_t pos_state = dict.pos & pos_mask;
 288
 289         lzma_ret ret = LZMA_OK;
 290
 291         // This is true when the next LZMA symbol is allowed to be EOPM.
 292         // That is, if this is false, then EOPM is considered
 293         // an invalid symbol and we will return LZMA_DATA_ERROR.
 294         //
 295         // EOPM is always required (not just allowed) when
 296         // the uncompressed size isn't known. When uncompressed size
 297         // is known, eopm_is_valid may be set to true later.
 298         bool eopm_is_valid = coder->uncompressed_size == LZMA_VLI_UNKNOWN;
 299
 300         // If uncompressed size is known and there is enough output space
 301         // to decode all the data, limit the available buffer space so that
 302         // the main loop won't try to decode past the end of the stream.
 303         bool might_finish_without_eopm = false;
 304         if (coder->uncompressed_size != LZMA_VLI_UNKNOWN
 305                         && coder->uncompressed_size <= dict.limit - dict.pos) {
 306                 dict.limit = dict.pos + (size_t)(coder->uncompressed_size);
 307                 might_finish_without_eopm = true;
 308         }
 309
 310         // The main decoder loop. The "switch" is used to resume the decoder at
 311         // correct location. Once resumed, the "switch" is no longer used.
 312         // The decoder loops is split into two modes:
 313         //
 314         // 1 - Non-resumable mode (fast). This is used when it is guaranteed
 315         //     there is enough input to decode the next symbol. If the output
 316         //     limit is reached, then the decoder loop will save the place
 317         //     for the resumable mode to continue. This mode is not used if
 318         //     HAVE_SMALL is defined. This is faster than Resumable mode
 319         //     because it reduces the number of branches needed and allows
 320         //     for more compiler optimizations.
 321         //
 322         // 2 - Resumable mode (slow). This is used when a previous decoder
 323         //     loop did not have enough space in the input or output buffers
 324         //     to complete. It uses sequence enum values to set remind
 325         //     coder->sequence where to resume in the decoder loop. This
 326         //     is the only mode used when HAVE_SMALL is defined.
 327
 328         switch (coder->sequence)
 329         while (true) {
 330                 // Calculate new pos_state. This is skipped on the first loop
 331                 // since we already calculated it when setting up the local
 332                 // variables.
 333                 pos_state = dict.pos & pos_mask;
 334
 335 #ifndef HAVE_SMALL
 336
 337                 ///////////////////////////////
 338                 // Non-resumable Mode (fast) //
 339                 ///////////////////////////////
 340
 341                 // Go to Resumable mode (1) if there is not enough input to
 342                 // safely decode any possible LZMA symbol or (2) if the
 343                 // dictionary is full, which may need special checks that
 344                 // are only done in the Resumable mode.
 345                 if (unlikely(!rc_is_fast_allowed()
 346                                 || dict.pos == dict.limit))
 347                         goto slow;
 348
 349                 // Decode the first bit from the next LZMA symbol.
 350                 // If the bit is a 0, then we handle it as a literal.
 351                 // If the bit is a 1, then it is a match of previously
 352                 // decoded data.
 353                 rc_if_0(coder->is_match[state][pos_state]) {
 354                         /////////////////////
 355                         // Decode literal. //
 356                         /////////////////////
 357
 358                         // Update the RC that we have decoded a 0.
 359                         rc_update_0(coder->is_match[state][pos_state]);
 360
 361                         // Get the correct probability array from lp and
 362                         // lc params.
 363                         probs = literal_subcoder(coder->literal,
 364                                         literal_context_bits, literal_mask,
 365                                         dict.pos, dict_get0(&dict));
 366
 367                         if (is_literal_state(state)) {
 368                                 update_literal_normal(state);
 369
 370                                 // Decode literal without match byte.
 371                                 rc_bittree8(probs, 0);
 372                         } else {
 373                                 update_literal_matched(state);
 374
 375                                 // Decode literal with match byte.
 376                                 rc_matched_literal(probs,
 377                                                 dict_get(&dict, rep0));
 378                         }
 379
 380                         // Write decoded literal to dictionary
 381                         dict_put(&dict, symbol);
 382                         continue;
 383                 }
 384
 385                 ///////////////////
 386                 // Decode match. //
 387                 ///////////////////
 388
 389                 // Instead of a new byte we are going to decode a
 390                 // distance-length pair. The distance represents how far
 391                 // back in the dictionary to begin copying. The length
 392                 // represents how many bytes to copy.
 393
 394                 rc_update_1(coder->is_match[state][pos_state]);
 395
 396                 rc_if_0(coder->is_rep[state]) {
 397                         ///////////////////
 398                         // Simple match. //
 399                         ///////////////////
 400
 401                         // Not a repeated match. In this case,
 402                         // the length (how many bytes to copy) must be
 403                         // decoded first. Then, the distance (where to
 404                         // start copying) is decoded.
 405                         //
 406                         // This is also how we know when we are done
 407                         // decoding. If the distance decodes to UINT32_MAX,
 408                         // then we know to stop decoding (end of payload
 409                         // marker).
 410
 411                         rc_update_0(coder->is_rep[state]);
 412                         update_match(state);
 413
 414                         // The latest three match distances are kept in
 415                         // memory in case there are repeated matches.
 416                         rep3 = rep2;
 417                         rep2 = rep1;
 418                         rep1 = rep0;
 419
 420                         // Decode the length of the match.
 421                         len_decode_fast(len, coder->match_len_decoder,
 422                                         pos_state);
 423
 424                         // Next, decode the distance into rep0.
 425
 426                         // The next 6 bits determine how to decode the
 427                         // rest of the distance.
 428                         probs = coder->dist_slot[get_dist_state(len)];
 429
 430                         rc_bittree6(probs, -DIST_SLOTS);
 431                         assert(symbol <= 63);
 432
 433                         if (symbol < DIST_MODEL_START) {
 434                                 // If the decoded symbol is < DIST_MODEL_START
 435                                 // then we use its value directly as the
 436                                 // match distance. No other bits are needed.
 437                                 // The only possible distance values
 438                                 // are [0, 3].
 439                                 rep0 = symbol;
 440                         } else {
 441                                 // Use the first two bits of symbol as the
 442                                 // highest bits of the match distance.
 443
 444                                 // "limit" represents the number of low bits
 445                                 // to decode.
 446                                 limit = (symbol >> 1) - 1;
 447                                 assert(limit >= 1 && limit <= 30);
 448                                 rep0 = 2 + (symbol & 1);
 449
 450                                 if (symbol < DIST_MODEL_END) {
 451                                         // When symbol is > DIST_MODEL_START,
 452                                         // but symbol < DIST_MODEL_END, then
 453                                         // it can decode distances between
 454                                         // [4, 127].
 455                                         assert(limit <= 5);
 456                                         rep0 <<= limit;
 457                                         assert(rep0 <= 96);
 458
 459                                         // -1 is fine, because we start
 460                                         // decoding at probs[1], not probs[0].
 461                                         // NOTE: This violates the C standard,
 462                                         // since we are doing pointer
 463                                         // arithmetic past the beginning of
 464                                         // the array.
 465                                         assert((int32_t)(rep0 - symbol - 1)
 466                                                         >= -1);
 467                                         assert((int32_t)(rep0 - symbol - 1)
 468                                                         <= 82);
 469                                         probs = coder->pos_special + rep0
 470                                                         - symbol - 1;
 471                                         symbol = 1;
 472                                         offset = 1;
 473
 474                                         // Variable number (1-5) of bits
 475                                         // from a reverse bittree. This
 476                                         // isn't worth manual unrolling.
 477                                         //
 478                                         // NOTE: Making one or many of the
 479                                         // variables (probs, symbol, offset,
 480                                         // or limit) local here (instead of
 481                                         // using those declared outside the
 482                                         // main loop) can affect code size
 483                                         // and performance which isn't a
 484                                         // surprise but it's not so clear
 485                                         // what is the best.
 486                                         do {
 487                                                 rc_bit_add_if_1(probs,
 488                                                                 rep0, offset);
 489                                                 offset <<= 1;
 490                                         } while (--limit > 0);
 491                                 } else {
 492                                         // The distance is >= 128. Decode the
 493                                         // lower bits without probabilities
 494                                         // except the lowest four bits.
 495                                         assert(symbol >= 14);
 496                                         assert(limit >= 6);
 497
 498                                         limit -= ALIGN_BITS;
 499                                         assert(limit >= 2);
 500
 501                                         rc_direct(rep0, limit);
 502
 503                                         // Decode the lowest four bits using
 504                                         // probabilities.
 505                                         rep0 <<= ALIGN_BITS;
 506                                         rc_bittree_rev4(coder->pos_align);
 507                                         rep0 += symbol;
 508
 509                                         // If the end of payload marker (EOPM)
 510                                         // is detected, jump to the safe code.
 511                                         // The EOPM handling isn't speed
 512                                         // critical at all.
 513                                         //
 514                                         // A final normalization is needed
 515                                         // after the EOPM (there can be a
 516                                         // dummy byte to read in some cases).
 517                                         // If the normalization was done here
 518                                         // in the fast code, it would need to
 519                                         // be taken into account in the value
 520                                         // of LZMA_IN_REQUIRED. Using the
 521                                         // safe code allows keeping
 522                                         // LZMA_IN_REQUIRED as 20 instead of
 523                                         // 21.
 524                                         if (rep0 == UINT32_MAX)
 525                                                 goto eopm;
 526                                 }
 527                         }
 528
 529                         // Validate the distance we just decoded.
 530                         if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
 531                                 ret = LZMA_DATA_ERROR;
 532                                 goto out;
 533                         }
 534
 535                 } else {
 536                         rc_update_1(coder->is_rep[state]);
 537
 538                         /////////////////////
 539                         // Repeated match. //
 540                         /////////////////////
 541
 542                         // The match distance is a value that we have decoded
 543                         // recently. The latest four match distances are
 544                         // available as rep0, rep1, rep2 and rep3. We will
 545                         // now decode which of them is the new distance.
 546                         //
 547                         // There cannot be a match if we haven't produced
 548                         // any output, so check that first.
 549                         if (unlikely(!dict_is_distance_valid(&dict, 0))) {
 550                                 ret = LZMA_DATA_ERROR;
 551                                 goto out;
 552                         }
 553
 554                         rc_if_0(coder->is_rep0[state]) {
 555                                 rc_update_0(coder->is_rep0[state]);
 556                                 // The distance is rep0.
 557
 558                                 // Decode the next bit to determine if 1 byte
 559                                 // should be copied from rep0 distance or
 560                                 // if the number of bytes needs to be decoded.
 561
 562                                 // If the next bit is 0, then it is a
 563                                 // "Short Rep Match" and only 1 bit is copied.
 564                                 // Otherwise, the length of the match is
 565                                 // decoded after the "else" statement.
 566                                 rc_if_0(coder->is_rep0_long[state][pos_state]) {
 567                                         rc_update_0(coder->is_rep0_long[
 568                                                         state][pos_state]);
 569
 570                                         update_short_rep(state);
 571                                         dict_put(&dict, dict_get(&dict, rep0));
 572                                         continue;
 573                                 }
 574
 575                                 // Repeating more than one byte at
 576                                 // distance of rep0.
 577                                 rc_update_1(coder->is_rep0_long[
 578                                                 state][pos_state]);
 579
 580                         } else {
 581                                 rc_update_1(coder->is_rep0[state]);
 582
 583                                 // The distance is rep1, rep2 or rep3. Once
 584                                 // we find out which one of these three, it
 585                                 // is stored to rep0 and rep1, rep2 and rep3
 586                                 // are updated accordingly. There is no
 587                                 // "Short Rep Match" option, so the length
 588                                 // of the match must always be decoded next.
 589                                 rc_if_0(coder->is_rep1[state]) {
 590                                         // The distance is rep1.
 591                                         rc_update_0(coder->is_rep1[state]);
 592
 593                                         const uint32_t distance = rep1;
 594                                         rep1 = rep0;
 595                                         rep0 = distance;
 596
 597                                 } else {
 598                                         rc_update_1(coder->is_rep1[state]);
 599
 600                                         rc_if_0(coder->is_rep2[state]) {
 601                                                 // The distance is rep2.
 602                                                 rc_update_0(coder->is_rep2[
 603                                                                 state]);
 604
 605                                                 const uint32_t distance = rep2;
 606                                                 rep2 = rep1;
 607                                                 rep1 = rep0;
 608                                                 rep0 = distance;
 609
 610                                         } else {
 611                                                 // The distance is rep3.
 612                                                 rc_update_1(coder->is_rep2[
 613                                                                 state]);
 614
 615                                                 const uint32_t distance = rep3;
 616                                                 rep3 = rep2;
 617                                                 rep2 = rep1;
 618                                                 rep1 = rep0;
 619                                                 rep0 = distance;
 620                                         }
 621                                 }
 622                         }
 623
 624                         update_long_rep(state);
 625
 626                         // Decode the length of the repeated match.
 627                         len_decode_fast(len, coder->rep_len_decoder,
 628                                         pos_state);
 629                 }
 630
 631                 /////////////////////////////////
 632                 // Repeat from history buffer. //
 633                 /////////////////////////////////
 634
 635                 // The length is always between these limits. There is no way
 636                 // to trigger the algorithm to set len outside this range.
 637                 assert(len >= MATCH_LEN_MIN);
 638                 assert(len <= MATCH_LEN_MAX);
 639
 640                 // Repeat len bytes from distance of rep0.
 641                 if (unlikely(dict_repeat(&dict, rep0, &len))) {
 642                         coder->sequence = SEQ_COPY;
 643                         goto out;
 644                 }
 645
 646                 continue;
 647
 648 slow:
 649 #endif
 650         ///////////////////////////
 651         // Resumable Mode (slow) //
 652         ///////////////////////////
 653
 654         // This is very similar to Non-resumable Mode, so most of the
 655         // comments are not repeated. The main differences are:
 656         // - case labels are used to resume at the correct location.
 657         // - Loops are not unrolled.
 658         // - Range coder macros take an extra sequence argument
 659         //   so they can save to coder->sequence the location to
 660         //   resume in case there is not enough input.
 661         case SEQ_NORMALIZE:
 662         case SEQ_IS_MATCH:
 663                 if (unlikely(might_finish_without_eopm
 664                                 && dict.pos == dict.limit)) {
 665                         // In rare cases there is a useless byte that needs
 666                         // to be read anyway.
 667                         rc_normalize_safe(SEQ_NORMALIZE);
 668
 669                         // If the range decoder state is such that we can
 670                         // be at the end of the LZMA stream, then the
 671                         // decoding is finished.
 672                         if (rc_is_finished(rc)) {
 673                                 ret = LZMA_STREAM_END;
 674                                 goto out;
 675                         }
 676
 677                         // If the caller hasn't allowed EOPM to be present
 678                         // together with known uncompressed size, then the
 679                         // LZMA stream is corrupt.
 680                         if (!coder->allow_eopm) {
 681                                 ret = LZMA_DATA_ERROR;
 682                                 goto out;
 683                         }
 684
 685                         // Otherwise continue decoding with the expectation
 686                         // that the next LZMA symbol is EOPM.
 687                         eopm_is_valid = true;
 688                 }
 689
 690                 rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
 691                         /////////////////////
 692                         // Decode literal. //
 693                         /////////////////////
 694
 695                         rc_update_0(coder->is_match[state][pos_state]);
 696
 697                         probs = literal_subcoder(coder->literal,
 698                                         literal_context_bits, literal_mask,
 699                                         dict.pos, dict_get0(&dict));
 700                         symbol = 1;
 701
 702                         if (is_literal_state(state)) {
 703                                 update_literal_normal(state);
 704
 705                                 // Decode literal without match byte.
 706                                 // The "slow" version does not unroll
 707                                 // the loop.
 708         case SEQ_LITERAL:
 709                                 do {
 710                                         rc_bit_safe(probs[symbol], , ,
 711                                                         SEQ_LITERAL);
 712                                 } while (symbol < (1 << 8));
 713                         } else {
 714                                 update_literal_matched(state);
 715
 716                                 // Decode literal with match byte.
 717                                 len = (uint32_t)(dict_get(&dict, rep0)) << 1;
 718
 719                                 offset = 0x100;
 720
 721         case SEQ_LITERAL_MATCHED:
 722                                 do {
 723                                         const uint32_t match_bit
 724                                                         = len & offset;
 725                                         const uint32_t subcoder_index
 726                                                         = offset + match_bit
 727                                                         + symbol;
 728
 729                                         rc_bit_safe(probs[subcoder_index],
 730                                                         offset &= ~match_bit,
 731                                                         offset &= match_bit,
 732                                                         SEQ_LITERAL_MATCHED);
 733
 734                                         // It seems to be faster to do this
 735                                         // here instead of putting it to the
 736                                         // beginning of the loop and then
 737                                         // putting the "case" in the middle
 738                                         // of the loop.
 739                                         len <<= 1;
 740
 741                                 } while (symbol < (1 << 8));
 742                         }
 743
 744         case SEQ_LITERAL_WRITE:
 745                         if (dict_put_safe(&dict, symbol)) {
 746                                 coder->sequence = SEQ_LITERAL_WRITE;
 747                                 goto out;
 748                         }
 749
 750                         continue;
 751                 }
 752
 753                 ///////////////////
 754                 // Decode match. //
 755                 ///////////////////
 756
 757                 rc_update_1(coder->is_match[state][pos_state]);
 758
 759         case SEQ_IS_REP:
 760                 rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) {
 761                         ///////////////////
 762                         // Simple match. //
 763                         ///////////////////
 764
 765                         rc_update_0(coder->is_rep[state]);
 766                         update_match(state);
 767
 768                         rep3 = rep2;
 769                         rep2 = rep1;
 770                         rep1 = rep0;
 771
 772                         len_decode(len, coder->match_len_decoder,
 773                                         pos_state, SEQ_MATCH_LEN);
 774
 775                         probs = coder->dist_slot[get_dist_state(len)];
 776                         symbol = 1;
 777
 778         case SEQ_DIST_SLOT:
 779                         do {
 780                                 rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT);
 781                         } while (symbol < DIST_SLOTS);
 782
 783                         symbol -= DIST_SLOTS;
 784                         assert(symbol <= 63);
 785
 786                         if (symbol < DIST_MODEL_START) {
 787                                 rep0 = symbol;
 788                         } else {
 789                                 limit = (symbol >> 1) - 1;
 790                                 assert(limit >= 1 && limit <= 30);
 791                                 rep0 = 2 + (symbol & 1);
 792
 793                                 if (symbol < DIST_MODEL_END) {
 794                                         assert(limit <= 5);
 795                                         rep0 <<= limit;
 796                                         assert(rep0 <= 96);
 797                                         // -1 is fine, because we start
 798                                         // decoding at probs[1], not probs[0].
 799                                         // NOTE: This violates the C standard,
 800                                         // since we are doing pointer
 801                                         // arithmetic past the beginning of
 802                                         // the array.
 803                                         assert((int32_t)(rep0 - symbol - 1)
 804                                                         >= -1);
 805                                         assert((int32_t)(rep0 - symbol - 1)
 806                                                         <= 82);
 807                                         probs = coder->pos_special + rep0
 808                                                         - symbol - 1;
 809                                         symbol = 1;
 810                                         offset = 0;
 811         case SEQ_DIST_MODEL:
 812                                         do {
 813                                                 rc_bit_safe(probs[symbol], ,
 814                                                         rep0 += 1U << offset,
 815                                                         SEQ_DIST_MODEL);
 816                                         } while (++offset < limit);
 817                                 } else {
 818                                         assert(symbol >= 14);
 819                                         assert(limit >= 6);
 820                                         limit -= ALIGN_BITS;
 821                                         assert(limit >= 2);
 822         case SEQ_DIRECT:
 823                                         rc_direct_safe(rep0, limit,
 824                                                         SEQ_DIRECT);
 825
 826                                         rep0 <<= ALIGN_BITS;
 827                                         symbol = 0;
 828                                         offset = 1;
 829         case SEQ_ALIGN:
 830                                         do {
 831                                                 rc_bit_last_safe(
 832                                                         coder->pos_align[
 833                                                                 offset
 834                                                                 + symbol],
 835                                                         ,
 836                                                         symbol += offset,
 837                                                         SEQ_ALIGN);
 838                                                 offset <<= 1;
 839                                         } while (offset < ALIGN_SIZE);
 840
 841                                         rep0 += symbol;
 842
 843                                         if (rep0 == UINT32_MAX) {
 844                                                 // End of payload marker was
 845                                                 // found. It may only be
 846                                                 // present if
 847                                                 //   - uncompressed size is
 848                                                 //     unknown or
 849                                                 //   - after known uncompressed
 850                                                 //     size amount of bytes has
 851                                                 //     been decompressed and
 852                                                 //     caller has indicated
 853                                                 //     that EOPM might be used
 854                                                 //     (it's not allowed in
 855                                                 //     LZMA2).
 856 #ifndef HAVE_SMALL
 857 eopm:
 858 #endif
 859                                                 if (!eopm_is_valid) {
 860                                                         ret = LZMA_DATA_ERROR;
 861                                                         goto out;
 862                                                 }
 863
 864         case SEQ_EOPM:
 865                                                 // LZMA1 stream with
 866                                                 // end-of-payload marker.
 867                                                 rc_normalize_safe(SEQ_EOPM);
 868                                                 ret = rc_is_finished(rc)
 869                                                         ? LZMA_STREAM_END
 870                                                         : LZMA_DATA_ERROR;
 871                                                 goto out;
 872                                         }
 873                                 }
 874                         }
 875
 876                         if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
 877                                 ret = LZMA_DATA_ERROR;
 878                                 goto out;
 879                         }
 880
 881                 } else {
 882                         /////////////////////
 883                         // Repeated match. //
 884                         /////////////////////
 885
 886                         rc_update_1(coder->is_rep[state]);
 887
 888                         if (unlikely(!dict_is_distance_valid(&dict, 0))) {
 889                                 ret = LZMA_DATA_ERROR;
 890                                 goto out;
 891                         }
 892
 893         case SEQ_IS_REP0:
 894                         rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) {
 895                                 rc_update_0(coder->is_rep0[state]);
 896
 897         case SEQ_IS_REP0_LONG:
 898                                 rc_if_0_safe(coder->is_rep0_long
 899                                                 [state][pos_state],
 900                                                 SEQ_IS_REP0_LONG) {
 901                                         rc_update_0(coder->is_rep0_long[
 902                                                         state][pos_state]);
 903
 904                                         update_short_rep(state);
 905
 906         case SEQ_SHORTREP:
 907                                         if (dict_put_safe(&dict,
 908                                                         dict_get(&dict,
 909                                                         rep0))) {
 910                                                 coder->sequence = SEQ_SHORTREP;
 911                                                 goto out;
 912                                         }
 913
 914                                         continue;
 915                                 }
 916
 917                                 rc_update_1(coder->is_rep0_long[
 918                                                 state][pos_state]);
 919
 920                         } else {
 921                                 rc_update_1(coder->is_rep0[state]);
 922
 923         case SEQ_IS_REP1:
 924                                 rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) {
 925                                         rc_update_0(coder->is_rep1[state]);
 926
 927                                         const uint32_t distance = rep1;
 928                                         rep1 = rep0;
 929                                         rep0 = distance;
 930
 931                                 } else {
 932                                         rc_update_1(coder->is_rep1[state]);
 933         case SEQ_IS_REP2:
 934                                         rc_if_0_safe(coder->is_rep2[state],
 935                                                         SEQ_IS_REP2) {
 936                                                 rc_update_0(coder->is_rep2[
 937                                                                 state]);
 938
 939                                                 const uint32_t distance = rep2;
 940                                                 rep2 = rep1;
 941                                                 rep1 = rep0;
 942                                                 rep0 = distance;
 943
 944                                         } else {
 945                                                 rc_update_1(coder->is_rep2[
 946                                                                 state]);
 947
 948                                                 const uint32_t distance = rep3;
 949                                                 rep3 = rep2;
 950                                                 rep2 = rep1;
 951                                                 rep1 = rep0;
 952                                                 rep0 = distance;
 953                                         }
 954                                 }
 955                         }
 956
 957                         update_long_rep(state);
 958
 959                         len_decode(len, coder->rep_len_decoder,
 960                                         pos_state, SEQ_REP_LEN);
 961                 }
 962
 963                 /////////////////////////////////
 964                 // Repeat from history buffer. //
 965                 /////////////////////////////////
 966
 967                 assert(len >= MATCH_LEN_MIN);
 968                 assert(len <= MATCH_LEN_MAX);
 969
 970         case SEQ_COPY:
 971                 if (unlikely(dict_repeat(&dict, rep0, &len))) {
 972                         coder->sequence = SEQ_COPY;
 973                         goto out;
 974                 }
 975         }
 976
 977 out:
 978         // Save state
 979
 980         // NOTE: Must not copy dict.limit.
 981         dictptr->pos = dict.pos;
 982         dictptr->full = dict.full;
 983
 984         rc_from_local(coder->rc, *in_pos);
 985
 986         coder->state = state;
 987         coder->rep0 = rep0;
 988         coder->rep1 = rep1;
 989         coder->rep2 = rep2;
 990         coder->rep3 = rep3;
 991
 992         coder->probs = probs;
 993         coder->symbol = symbol;
 994         coder->limit = limit;
 995         coder->offset = offset;
 996         coder->len = len;
 997
 998         // Update the remaining amount of uncompressed data if uncompressed
 999         // size was known.
1000         if (coder->uncompressed_size != LZMA_VLI_UNKNOWN) {
1001                 coder->uncompressed_size -= dict.pos - dict_start;
1002
1003                 // If we have gotten all the output but the decoder wants
1004                 // to write more output, the file is corrupt. There are
1005                 // three SEQ values where output is produced.
1006                 if (coder->uncompressed_size == 0 && ret == LZMA_OK
1007                                 && (coder->sequence == SEQ_LITERAL_WRITE
1008                                         || coder->sequence == SEQ_SHORTREP
1009                                         || coder->sequence == SEQ_COPY))
1010                         ret = LZMA_DATA_ERROR;
1011         }
1012
1013         if (ret == LZMA_STREAM_END) {
1014                 // Reset the range decoder so that it is ready to reinitialize
1015                 // for a new LZMA2 chunk.
1016                 rc_reset(coder->rc);
1017                 coder->sequence = SEQ_IS_MATCH;
1018         }
1019
1020         return ret;
1021 }
1022
1023
1024 static void
1025 lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
1026                 bool allow_eopm)
1027 {
1028         lzma_lzma1_decoder *coder = coder_ptr;
1029         coder->uncompressed_size = uncompressed_size;
1030         coder->allow_eopm = allow_eopm;
1031 }
1032
1033
1034 static void
1035 lzma_decoder_reset(void *coder_ptr, const void *opt)
1036 {
1037         lzma_lzma1_decoder *coder = coder_ptr;
1038         const lzma_options_lzma *options = opt;
1039
1040         // NOTE: We assume that lc/lp/pb are valid since they were
1041         // successfully decoded with lzma_lzma_decode_properties().
1042
1043         // Calculate pos_mask. We don't need pos_bits as is for anything.
1044         coder->pos_mask = (1U << options->pb) - 1;
1045
1046         // Initialize the literal decoder.
1047         literal_init(coder->literal, options->lc, options->lp);
1048
1049         coder->literal_context_bits = options->lc;
1050         coder->literal_mask = literal_mask_calc(options->lc, options->lp);
1051
1052         // State
1053         coder->state = STATE_LIT_LIT;
1054         coder->rep0 = 0;
1055         coder->rep1 = 0;
1056         coder->rep2 = 0;
1057         coder->rep3 = 0;
1058         coder->pos_mask = (1U << options->pb) - 1;
1059
1060         // Range decoder
1061         rc_reset(coder->rc);
1062
1063         // Bit and bittree decoders
1064         for (uint32_t i = 0; i < STATES; ++i) {
1065                 for (uint32_t j = 0; j <= coder->pos_mask; ++j) {
1066                         bit_reset(coder->is_match[i][j]);
1067                         bit_reset(coder->is_rep0_long[i][j]);
1068                 }
1069
1070                 bit_reset(coder->is_rep[i]);
1071                 bit_reset(coder->is_rep0[i]);
1072                 bit_reset(coder->is_rep1[i]);
1073                 bit_reset(coder->is_rep2[i]);
1074         }
1075
1076         for (uint32_t i = 0; i < DIST_STATES; ++i)
1077                 bittree_reset(coder->dist_slot[i], DIST_SLOT_BITS);
1078
1079         for (uint32_t i = 0; i < FULL_DISTANCES - DIST_MODEL_END; ++i)
1080                 bit_reset(coder->pos_special[i]);
1081
1082         bittree_reset(coder->pos_align, ALIGN_BITS);
1083
1084         // Len decoders (also bit/bittree)
1085         const uint32_t num_pos_states = 1U << options->pb;
1086         bit_reset(coder->match_len_decoder.choice);
1087         bit_reset(coder->match_len_decoder.choice2);
1088         bit_reset(coder->rep_len_decoder.choice);
1089         bit_reset(coder->rep_len_decoder.choice2);
1090
1091         for (uint32_t pos_state = 0; pos_state < num_pos_states; ++pos_state) {
1092                 bittree_reset(coder->match_len_decoder.low[pos_state],
1093                                 LEN_LOW_BITS);
1094                 bittree_reset(coder->match_len_decoder.mid[pos_state],
1095                                 LEN_MID_BITS);
1096
1097                 bittree_reset(coder->rep_len_decoder.low[pos_state],
1098                                 LEN_LOW_BITS);
1099                 bittree_reset(coder->rep_len_decoder.mid[pos_state],
1100                                 LEN_MID_BITS);
1101         }
1102
1103         bittree_reset(coder->match_len_decoder.high, LEN_HIGH_BITS);
1104         bittree_reset(coder->rep_len_decoder.high, LEN_HIGH_BITS);
1105
1106         coder->sequence = SEQ_IS_MATCH;
1107         coder->probs = NULL;
1108         coder->symbol = 0;
1109         coder->limit = 0;
1110         coder->offset = 0;
1111         coder->len = 0;
1112
1113         return;
1114 }
1115
1116
1117 extern lzma_ret
1118 lzma_lzma_decoder_create(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1119                 const lzma_options_lzma *options, lzma_lz_options *lz_options)
1120 {
1121         if (lz->coder == NULL) {
1122                 lz->coder = lzma_alloc(sizeof(lzma_lzma1_decoder), allocator);
1123                 if (lz->coder == NULL)
1124                         return LZMA_MEM_ERROR;
1125
1126                 lz->code = &lzma_decode;
1127                 lz->reset = &lzma_decoder_reset;
1128                 lz->set_uncompressed = &lzma_decoder_uncompressed;
1129         }
1130
1131         // All dictionary sizes are OK here. LZ decoder will take care of
1132         // the special cases.
1133         lz_options->dict_size = options->dict_size;
1134         lz_options->preset_dict = options->preset_dict;
1135         lz_options->preset_dict_size = options->preset_dict_size;
1136
1137         return LZMA_OK;
1138 }
1139
1140
1141 /// Allocate and initialize LZMA decoder. This is used only via LZ
1142 /// initialization (lzma_lzma_decoder_init() passes function pointer to
1143 /// the LZ initialization).
1144 static lzma_ret
1145 lzma_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
1146                 lzma_vli id, const void *options, lzma_lz_options *lz_options)
1147 {
1148         if (!is_lclppb_valid(options))
1149                 return LZMA_PROG_ERROR;
1150
1151         lzma_vli uncomp_size = LZMA_VLI_UNKNOWN;
1152         bool allow_eopm = true;
1153
1154         if (id == LZMA_FILTER_LZMA1EXT) {
1155                 const lzma_options_lzma *opt = options;
1156
1157                 // Only one flag is supported.
1158                 if (opt->ext_flags & ~LZMA_LZMA1EXT_ALLOW_EOPM)
1159                         return LZMA_OPTIONS_ERROR;
1160
1161                 // FIXME? Using lzma_vli instead of uint64_t is weird because
1162                 // this has nothing to do with .xz headers and variable-length
1163                 // integer encoding. On the other hand, using LZMA_VLI_UNKNOWN
1164                 // instead of UINT64_MAX is clearer when unknown size is
1165                 // meant. A problem with using lzma_vli is that now we
1166                 // allow > LZMA_VLI_MAX which is fine in this file but
1167                 // it's still confusing. Note that alone_decoder.c also
1168                 // allows > LZMA_VLI_MAX when setting uncompressed size.
1169                 uncomp_size = opt->ext_size_low
1170                                 + ((uint64_t)(opt->ext_size_high) << 32);
1171                 allow_eopm = (opt->ext_flags & LZMA_LZMA1EXT_ALLOW_EOPM) != 0
1172                                 || uncomp_size == LZMA_VLI_UNKNOWN;
1173         }
1174
1175         return_if_error(lzma_lzma_decoder_create(
1176                         lz, allocator, options, lz_options));
1177
1178         lzma_decoder_reset(lz->coder, options);
1179         lzma_decoder_uncompressed(lz->coder, uncomp_size, allow_eopm);
1180
1181         return LZMA_OK;
1182 }
1183
1184
1185 extern lzma_ret
1186 lzma_lzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
1187                 const lzma_filter_info *filters)
1188 {
1189         // LZMA can only be the last filter in the chain. This is enforced
1190         // by the raw_decoder initialization.
1191         assert(filters[1].init == NULL);
1192
1193         return lzma_lz_decoder_init(next, allocator, filters,
1194                         &lzma_decoder_init);
1195 }
1196
1197
1198 extern bool
1199 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
1200 {
1201         if (byte > (4 * 5 + 4) * 9 + 8)
1202                 return true;
1203
1204         // See the file format specification to understand this.
1205         options->pb = byte / (9 * 5);
1206         byte -= options->pb * 9 * 5;
1207         options->lp = byte / 9;
1208         options->lc = byte - options->lp * 9;
1209
1210         return options->lc + options->lp > LZMA_LCLP_MAX;
1211 }
1212
1213
1214 extern uint64_t
1215 lzma_lzma_decoder_memusage_nocheck(const void *options)
1216 {
1217         const lzma_options_lzma *const opt = options;
1218         return sizeof(lzma_lzma1_decoder)
1219                         + lzma_lz_decoder_memusage(opt->dict_size);
1220 }
1221
1222
1223 extern uint64_t
1224 lzma_lzma_decoder_memusage(const void *options)
1225 {
1226         if (!is_lclppb_valid(options))
1227                 return UINT64_MAX;
1228
1229         return lzma_lzma_decoder_memusage_nocheck(options);
1230 }
1231
1232
1233 extern lzma_ret
1234 lzma_lzma_props_decode(void **options, const lzma_allocator *allocator,
1235                 const uint8_t *props, size_t props_size)
1236 {
1237         if (props_size != 5)
1238                 return LZMA_OPTIONS_ERROR;
1239
1240         lzma_options_lzma *opt
1241                         = lzma_alloc(sizeof(lzma_options_lzma), allocator);
1242         if (opt == NULL)
1243                 return LZMA_MEM_ERROR;
1244
1245         if (lzma_lzma_lclppb_decode(opt, props[0]))
1246                 goto error;
1247
1248         // All dictionary sizes are accepted, including zero. LZ decoder
1249         // will automatically use a dictionary at least a few KiB even if
1250         // a smaller dictionary is requested.
1251         opt->dict_size = read32le(props + 1);
1252
1253         opt->preset_dict = NULL;
1254         opt->preset_dict_size = 0;
1255
1256         *options = opt;
1257
1258         return LZMA_OK;
1259
1260 error:
1261         lzma_free(opt, allocator);
1262         return LZMA_OPTIONS_ERROR;
1263 }