lib/zstd/compress/zstd_opt.c

   1 /*
   2  * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
   3  * All rights reserved.
   4  *
   5  * This source code is licensed under both the BSD-style license (found in the
   6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
   7  * in the COPYING file in the root directory of this source tree).
   8  * You may select, at your option, one of the above-listed licenses.
   9  */
  10
  11 #include "zstd_compress_internal.h"
  12 #include "hist.h"
  13 #include "zstd_opt.h"
  14
  15
  16 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
  17 #define ZSTD_MAX_PRICE     (1<<30)
  18
  19 #define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
  20
  21
  22 /*-*************************************
  23 *  Price functions for optimal parser
  24 ***************************************/
  25
  26 #if 0    /* approximation at bit level (for tests) */
  27 #  define BITCOST_ACCURACY 0
  28 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  29 #  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
  30 #elif 0  /* fractional bit accuracy (for tests) */
  31 #  define BITCOST_ACCURACY 8
  32 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  33 #  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
  34 #else    /* opt==approx, ultra==accurate */
  35 #  define BITCOST_ACCURACY 8
  36 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  37 #  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
  38 #endif
  39
  40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
  41 {
  42     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
  43 }
  44
  45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
  46 {
  47     U32 const stat = rawStat + 1;
  48     U32 const hb = ZSTD_highbit32(stat);
  49     U32 const BWeight = hb * BITCOST_MULTIPLIER;
  50     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
  51     U32 const weight = BWeight + FWeight;
  52     assert(hb + BITCOST_ACCURACY < 31);
  53     return weight;
  54 }
  55
  56 #if (DEBUGLEVEL>=2)
  57 /* debugging function,
  58  * @return price in bytes as fractional value
  59  * for debug messages only */
  60 MEM_STATIC double ZSTD_fCost(U32 price)
  61 {
  62     return (double)price / (BITCOST_MULTIPLIER*8);
  63 }
  64 #endif
  65
  66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
  67 {
  68     return optPtr->literalCompressionMode != ZSTD_ps_disable;
  69 }
  70
  71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
  72 {
  73     if (ZSTD_compressedLiterals(optPtr))
  74         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
  75     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
  76     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
  77     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
  78 }
  79
  80
  81 static U32 sum_u32(const unsigned table[], size_t nbElts)
  82 {
  83     size_t n;
  84     U32 total = 0;
  85     for (n=0; n<nbElts; n++) {
  86         total += table[n];
  87     }
  88     return total;
  89 }
  90
  91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
  92 {
  93     U32 s, sum=0;
  94     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
  95     assert(shift < 30);
  96     for (s=0; s<lastEltIndex+1; s++) {
  97         table[s] = 1 + (table[s] >> shift);
  98         sum += table[s];
  99     }
 100     return sum;
 101 }
 102
 103 /* ZSTD_scaleStats() :
 104  * reduce all elements in table is sum too large
 105  * return the resulting sum of elements */
 106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
 107 {
 108     U32 const prevsum = sum_u32(table, lastEltIndex+1);
 109     U32 const factor = prevsum >> logTarget;
 110     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
 111     assert(logTarget < 30);
 112     if (factor <= 1) return prevsum;
 113     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
 114 }
 115
 116 /* ZSTD_rescaleFreqs() :
 117  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
 118  *    take hints from dictionary if there is one
 119  *    and init from zero if there is none,
 120  *    using src for literals stats, and baseline stats for sequence symbols
 121  * otherwise downscale existing stats, to be used as seed for next block.
 122  */
 123 static void
 124 ZSTD_rescaleFreqs(optState_t* const optPtr,
 125             const BYTE* const src, size_t const srcSize,
 126                   int const optLevel)
 127 {
 128     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
 129     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
 130     optPtr->priceType = zop_dynamic;
 131
 132     if (optPtr->litLengthSum == 0) {  /* first block : init */
 133         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
 134             DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
 135             optPtr->priceType = zop_predef;
 136         }
 137
 138         assert(optPtr->symbolCosts != NULL);
 139         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
 140             /* huffman table presumed generated by dictionary */
 141             optPtr->priceType = zop_dynamic;
 142
 143             if (compressedLiterals) {
 144                 unsigned lit;
 145                 assert(optPtr->litFreq != NULL);
 146                 optPtr->litSum = 0;
 147                 for (lit=0; lit<=MaxLit; lit++) {
 148                     U32 const scaleLog = 11;   /* scale to 2K */
 149                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
 150                     assert(bitCost <= scaleLog);
 151                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
 152                     optPtr->litSum += optPtr->litFreq[lit];
 153             }   }
 154
 155             {   unsigned ll;
 156                 FSE_CState_t llstate;
 157                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
 158                 optPtr->litLengthSum = 0;
 159                 for (ll=0; ll<=MaxLL; ll++) {
 160                     U32 const scaleLog = 10;   /* scale to 1K */
 161                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
 162                     assert(bitCost < scaleLog);
 163                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
 164                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
 165             }   }
 166
 167             {   unsigned ml;
 168                 FSE_CState_t mlstate;
 169                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
 170                 optPtr->matchLengthSum = 0;
 171                 for (ml=0; ml<=MaxML; ml++) {
 172                     U32 const scaleLog = 10;
 173                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
 174                     assert(bitCost < scaleLog);
 175                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
 176                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
 177             }   }
 178
 179             {   unsigned of;
 180                 FSE_CState_t ofstate;
 181                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
 182                 optPtr->offCodeSum = 0;
 183                 for (of=0; of<=MaxOff; of++) {
 184                     U32 const scaleLog = 10;
 185                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
 186                     assert(bitCost < scaleLog);
 187                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
 188                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
 189             }   }
 190
 191         } else {  /* not a dictionary */
 192
 193             assert(optPtr->litFreq != NULL);
 194             if (compressedLiterals) {
 195                 unsigned lit = MaxLit;
 196                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
 197                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
 198             }
 199
 200             {   unsigned const baseLLfreqs[MaxLL+1] = {
 201                     4, 2, 1, 1, 1, 1, 1, 1,
 202                     1, 1, 1, 1, 1, 1, 1, 1,
 203                     1, 1, 1, 1, 1, 1, 1, 1,
 204                     1, 1, 1, 1, 1, 1, 1, 1,
 205                     1, 1, 1, 1
 206                 };
 207                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
 208                 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
 209             }
 210
 211             {   unsigned ml;
 212                 for (ml=0; ml<=MaxML; ml++)
 213                     optPtr->matchLengthFreq[ml] = 1;
 214             }
 215             optPtr->matchLengthSum = MaxML+1;
 216
 217             {   unsigned const baseOFCfreqs[MaxOff+1] = {
 218                     6, 2, 1, 1, 2, 3, 4, 4,
 219                     4, 3, 2, 1, 1, 1, 1, 1,
 220                     1, 1, 1, 1, 1, 1, 1, 1,
 221                     1, 1, 1, 1, 1, 1, 1, 1
 222                 };
 223                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
 224                 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
 225             }
 226
 227
 228         }
 229
 230     } else {   /* new block : re-use previous statistics, scaled down */
 231
 232         if (compressedLiterals)
 233             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
 234         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
 235         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
 236         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
 237     }
 238
 239     ZSTD_setBasePrices(optPtr, optLevel);
 240 }
 241
 242 /* ZSTD_rawLiteralsCost() :
 243  * price of literals (only) in specified segment (which length can be 0).
 244  * does not include price of literalLength symbol */
 245 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
 246                                 const optState_t* const optPtr,
 247                                 int optLevel)
 248 {
 249     if (litLength == 0) return 0;
 250
 251     if (!ZSTD_compressedLiterals(optPtr))
 252         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
 253
 254     if (optPtr->priceType == zop_predef)
 255         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
 256
 257     /* dynamic statistics */
 258     {   U32 price = litLength * optPtr->litSumBasePrice;
 259         U32 u;
 260         for (u=0; u < litLength; u++) {
 261             assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
 262             price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
 263         }
 264         return price;
 265     }
 266 }
 267
 268 /* ZSTD_litLengthPrice() :
 269  * cost of literalLength symbol */
 270 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
 271 {
 272     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
 273     if (optPtr->priceType == zop_predef)
 274         return WEIGHT(litLength, optLevel);
 275     /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
 276      * because it isn't representable in the zstd format. So instead just
 277      * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
 278      * would be all literals.
 279      */
 280     if (litLength == ZSTD_BLOCKSIZE_MAX)
 281         return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
 282
 283     /* dynamic statistics */
 284     {   U32 const llCode = ZSTD_LLcode(litLength);
 285         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
 286              + optPtr->litLengthSumBasePrice
 287              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
 288     }
 289 }
 290
 291 /* ZSTD_getMatchPrice() :
 292  * Provides the cost of the match part (offset + matchLength) of a sequence
 293  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
 294  * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
 295  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
 296  */
 297 FORCE_INLINE_TEMPLATE U32
 298 ZSTD_getMatchPrice(U32 const offcode,
 299                    U32 const matchLength,
 300              const optState_t* const optPtr,
 301                    int const optLevel)
 302 {
 303     U32 price;
 304     U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
 305     U32 const mlBase = matchLength - MINMATCH;
 306     assert(matchLength >= MINMATCH);
 307
 308     if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
 309         return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
 310
 311     /* dynamic statistics */
 312     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
 313     if ((optLevel<2) /*static*/ && offCode >= 20)
 314         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
 315
 316     /* match Length */
 317     {   U32 const mlCode = ZSTD_MLcode(mlBase);
 318         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
 319     }
 320
 321     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
 322
 323     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
 324     return price;
 325 }
 326
 327 /* ZSTD_updateStats() :
 328  * assumption : literals + litLengtn <= iend */
 329 static void ZSTD_updateStats(optState_t* const optPtr,
 330                              U32 litLength, const BYTE* literals,
 331                              U32 offsetCode, U32 matchLength)
 332 {
 333     /* literals */
 334     if (ZSTD_compressedLiterals(optPtr)) {
 335         U32 u;
 336         for (u=0; u < litLength; u++)
 337             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
 338         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
 339     }
 340
 341     /* literal Length */
 342     {   U32 const llCode = ZSTD_LLcode(litLength);
 343         optPtr->litLengthFreq[llCode]++;
 344         optPtr->litLengthSum++;
 345     }
 346
 347     /* offset code : expected to follow storeSeq() numeric representation */
 348     {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
 349         assert(offCode <= MaxOff);
 350         optPtr->offCodeFreq[offCode]++;
 351         optPtr->offCodeSum++;
 352     }
 353
 354     /* match Length */
 355     {   U32 const mlBase = matchLength - MINMATCH;
 356         U32 const mlCode = ZSTD_MLcode(mlBase);
 357         optPtr->matchLengthFreq[mlCode]++;
 358         optPtr->matchLengthSum++;
 359     }
 360 }
 361
 362
 363 /* ZSTD_readMINMATCH() :
 364  * function safe only for comparisons
 365  * assumption : memPtr must be at least 4 bytes before end of buffer */
 366 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 367 {
 368     switch (length)
 369     {
 370     default :
 371     case 4 : return MEM_read32(memPtr);
 372     case 3 : if (MEM_isLittleEndian())
 373                 return MEM_read32(memPtr)<<8;
 374              else
 375                 return MEM_read32(memPtr)>>8;
 376     }
 377 }
 378
 379
 380 /* Update hashTable3 up to ip (excluded)
 381    Assumption : always within prefix (i.e. not within extDict) */
 382 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
 383                                               U32* nextToUpdate3,
 384                                               const BYTE* const ip)
 385 {
 386     U32* const hashTable3 = ms->hashTable3;
 387     U32 const hashLog3 = ms->hashLog3;
 388     const BYTE* const base = ms->window.base;
 389     U32 idx = *nextToUpdate3;
 390     U32 const target = (U32)(ip - base);
 391     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
 392     assert(hashLog3 > 0);
 393
 394     while(idx < target) {
 395         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
 396         idx++;
 397     }
 398
 399     *nextToUpdate3 = target;
 400     return hashTable3[hash3];
 401 }
 402
 403
 404 /*-*************************************
 405 *  Binary Tree search
 406 ***************************************/
 407 /* ZSTD_insertBt1() : add one or multiple positions to tree.
 408  * @param ip assumed <= iend-8 .
 409  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
 410  * @return : nb of positions added */
 411 static U32 ZSTD_insertBt1(
 412                 const ZSTD_matchState_t* ms,
 413                 const BYTE* const ip, const BYTE* const iend,
 414                 U32 const target,
 415                 U32 const mls, const int extDict)
 416 {
 417     const ZSTD_compressionParameters* const cParams = &ms->cParams;
 418     U32*   const hashTable = ms->hashTable;
 419     U32    const hashLog = cParams->hashLog;
 420     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
 421     U32*   const bt = ms->chainTable;
 422     U32    const btLog  = cParams->chainLog - 1;
 423     U32    const btMask = (1 << btLog) - 1;
 424     U32 matchIndex = hashTable[h];
 425     size_t commonLengthSmaller=0, commonLengthLarger=0;
 426     const BYTE* const base = ms->window.base;
 427     const BYTE* const dictBase = ms->window.dictBase;
 428     const U32 dictLimit = ms->window.dictLimit;
 429     const BYTE* const dictEnd = dictBase + dictLimit;
 430     const BYTE* const prefixStart = base + dictLimit;
 431     const BYTE* match;
 432     const U32 curr = (U32)(ip-base);
 433     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
 434     U32* smallerPtr = bt + 2*(curr&btMask);
 435     U32* largerPtr  = smallerPtr + 1;
 436     U32 dummy32;   /* to be nullified at the end */
 437     /* windowLow is based on target because
 438      * we only need positions that will be in the window at the end of the tree update.
 439      */
 440     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
 441     U32 matchEndIdx = curr+8+1;
 442     size_t bestLength = 8;
 443     U32 nbCompares = 1U << cParams->searchLog;
 444 #ifdef ZSTD_C_PREDICT
 445     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
 446     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
 447     predictedSmall += (predictedSmall>0);
 448     predictedLarge += (predictedLarge>0);
 449 #endif /* ZSTD_C_PREDICT */
 450
 451     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
 452
 453     assert(curr <= target);
 454     assert(ip <= iend-8);   /* required for h calculation */
 455     hashTable[h] = curr;   /* Update Hash Table */
 456
 457     assert(windowLow > 0);
 458     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
 459         U32* const nextPtr = bt + 2*(matchIndex & btMask);
 460         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
 461         assert(matchIndex < curr);
 462
 463 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
 464         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
 465         if (matchIndex == predictedSmall) {
 466             /* no need to check length, result known */
 467             *smallerPtr = matchIndex;
 468             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
 469             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
 470             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
 471             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
 472             continue;
 473         }
 474         if (matchIndex == predictedLarge) {
 475             *largerPtr = matchIndex;
 476             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
 477             largerPtr = nextPtr;
 478             matchIndex = nextPtr[0];
 479             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
 480             continue;
 481         }
 482 #endif
 483
 484         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
 485             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
 486             match = base + matchIndex;
 487             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
 488         } else {
 489             match = dictBase + matchIndex;
 490             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
 491             if (matchIndex+matchLength >= dictLimit)
 492                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
 493         }
 494
 495         if (matchLength > bestLength) {
 496             bestLength = matchLength;
 497             if (matchLength > matchEndIdx - matchIndex)
 498                 matchEndIdx = matchIndex + (U32)matchLength;
 499         }
 500
 501         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
 502             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
 503         }
 504
 505         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
 506             /* match is smaller than current */
 507             *smallerPtr = matchIndex;             /* update smaller idx */
 508             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
 509             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
 510             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
 511             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
 512         } else {
 513             /* match is larger than current */
 514             *largerPtr = matchIndex;
 515             commonLengthLarger = matchLength;
 516             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
 517             largerPtr = nextPtr;
 518             matchIndex = nextPtr[0];
 519     }   }
 520
 521     *smallerPtr = *largerPtr = 0;
 522     {   U32 positions = 0;
 523         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
 524         assert(matchEndIdx > curr + 8);
 525         return MAX(positions, matchEndIdx - (curr + 8));
 526     }
 527 }
 528
 529 FORCE_INLINE_TEMPLATE
 530 void ZSTD_updateTree_internal(
 531                 ZSTD_matchState_t* ms,
 532                 const BYTE* const ip, const BYTE* const iend,
 533                 const U32 mls, const ZSTD_dictMode_e dictMode)
 534 {
 535     const BYTE* const base = ms->window.base;
 536     U32 const target = (U32)(ip - base);
 537     U32 idx = ms->nextToUpdate;
 538     DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
 539                 idx, target, dictMode);
 540
 541     while(idx < target) {
 542         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
 543         assert(idx < (U32)(idx + forward));
 544         idx += forward;
 545     }
 546     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
 547     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
 548     ms->nextToUpdate = target;
 549 }
 550
 551 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
 552     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
 553 }
 554
 555 FORCE_INLINE_TEMPLATE
 556 U32 ZSTD_insertBtAndGetAllMatches (
 557                     ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
 558                     ZSTD_matchState_t* ms,
 559                     U32* nextToUpdate3,
 560                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
 561                     const U32 rep[ZSTD_REP_NUM],
 562                     U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
 563                     const U32 lengthToBeat,
 564                     U32 const mls /* template */)
 565 {
 566     const ZSTD_compressionParameters* const cParams = &ms->cParams;
 567     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
 568     const BYTE* const base = ms->window.base;
 569     U32 const curr = (U32)(ip-base);
 570     U32 const hashLog = cParams->hashLog;
 571     U32 const minMatch = (mls==3) ? 3 : 4;
 572     U32* const hashTable = ms->hashTable;
 573     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
 574     U32 matchIndex  = hashTable[h];
 575     U32* const bt   = ms->chainTable;
 576     U32 const btLog = cParams->chainLog - 1;
 577     U32 const btMask= (1U << btLog) - 1;
 578     size_t commonLengthSmaller=0, commonLengthLarger=0;
 579     const BYTE* const dictBase = ms->window.dictBase;
 580     U32 const dictLimit = ms->window.dictLimit;
 581     const BYTE* const dictEnd = dictBase + dictLimit;
 582     const BYTE* const prefixStart = base + dictLimit;
 583     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
 584     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
 585     U32 const matchLow = windowLow ? windowLow : 1;
 586     U32* smallerPtr = bt + 2*(curr&btMask);
 587     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
 588     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
 589     U32 dummy32;   /* to be nullified at the end */
 590     U32 mnum = 0;
 591     U32 nbCompares = 1U << cParams->searchLog;
 592
 593     const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
 594     const ZSTD_compressionParameters* const dmsCParams =
 595                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
 596     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
 597     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
 598     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
 599     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
 600     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
 601     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
 602     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
 603     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
 604     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
 605
 606     size_t bestLength = lengthToBeat-1;
 607     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
 608
 609     /* check repCode */
 610     assert(ll0 <= 1);   /* necessarily 1 or 0 */
 611     {   U32 const lastR = ZSTD_REP_NUM + ll0;
 612         U32 repCode;
 613         for (repCode = ll0; repCode < lastR; repCode++) {
 614             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
 615             U32 const repIndex = curr - repOffset;
 616             U32 repLen = 0;
 617             assert(curr >= dictLimit);
 618             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
 619                 /* We must validate the repcode offset because when we're using a dictionary the
 620                  * valid offset range shrinks when the dictionary goes out of bounds.
 621                  */
 622                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
 623                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
 624                 }
 625             } else {  /* repIndex < dictLimit || repIndex >= curr */
 626                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
 627                                              dmsBase + repIndex - dmsIndexDelta :
 628                                              dictBase + repIndex;
 629                 assert(curr >= windowLow);
 630                 if ( dictMode == ZSTD_extDict
 631                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
 632                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
 633                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
 634                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
 635                 }
 636                 if (dictMode == ZSTD_dictMatchState
 637                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
 638                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
 639                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
 640                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
 641             }   }
 642             /* save longer solution */
 643             if (repLen > bestLength) {
 644                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
 645                             repCode, ll0, repOffset, repLen);
 646                 bestLength = repLen;
 647                 matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
 648                 matches[mnum].len = (U32)repLen;
 649                 mnum++;
 650                 if ( (repLen > sufficient_len)
 651                    | (ip+repLen == iLimit) ) {  /* best possible */
 652                     return mnum;
 653     }   }   }   }
 654
 655     /* HC3 match finder */
 656     if ((mls == 3) /*static*/ && (bestLength < mls)) {
 657         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
 658         if ((matchIndex3 >= matchLow)
 659           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
 660             size_t mlen;
 661             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
 662                 const BYTE* const match = base + matchIndex3;
 663                 mlen = ZSTD_count(ip, match, iLimit);
 664             } else {
 665                 const BYTE* const match = dictBase + matchIndex3;
 666                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
 667             }
 668
 669             /* save best solution */
 670             if (mlen >= mls /* == 3 > bestLength */) {
 671                 DEBUGLOG(8, "found small match with hlog3, of length %u",
 672                             (U32)mlen);
 673                 bestLength = mlen;
 674                 assert(curr > matchIndex3);
 675                 assert(mnum==0);  /* no prior solution */
 676                 matches[0].off = STORE_OFFSET(curr - matchIndex3);
 677                 matches[0].len = (U32)mlen;
 678                 mnum = 1;
 679                 if ( (mlen > sufficient_len) |
 680                      (ip+mlen == iLimit) ) {  /* best possible length */
 681                     ms->nextToUpdate = curr+1;  /* skip insertion */
 682                     return 1;
 683         }   }   }
 684         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
 685     }  /* if (mls == 3) */
 686
 687     hashTable[h] = curr;   /* Update Hash Table */
 688
 689     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
 690         U32* const nextPtr = bt + 2*(matchIndex & btMask);
 691         const BYTE* match;
 692         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
 693         assert(curr > matchIndex);
 694
 695         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
 696             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
 697             match = base + matchIndex;
 698             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
 699             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
 700         } else {
 701             match = dictBase + matchIndex;
 702             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
 703             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
 704             if (matchIndex+matchLength >= dictLimit)
 705                 match = base + matchIndex;   /* prepare for match[matchLength] read */
 706         }
 707
 708         if (matchLength > bestLength) {
 709             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
 710                     (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
 711             assert(matchEndIdx > matchIndex);
 712             if (matchLength > matchEndIdx - matchIndex)
 713                 matchEndIdx = matchIndex + (U32)matchLength;
 714             bestLength = matchLength;
 715             matches[mnum].off = STORE_OFFSET(curr - matchIndex);
 716             matches[mnum].len = (U32)matchLength;
 717             mnum++;
 718             if ( (matchLength > ZSTD_OPT_NUM)
 719                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
 720                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
 721                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
 722         }   }
 723
 724         if (match[matchLength] < ip[matchLength]) {
 725             /* match smaller than current */
 726             *smallerPtr = matchIndex;             /* update smaller idx */
 727             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
 728             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
 729             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
 730             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
 731         } else {
 732             *largerPtr = matchIndex;
 733             commonLengthLarger = matchLength;
 734             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
 735             largerPtr = nextPtr;
 736             matchIndex = nextPtr[0];
 737     }   }
 738
 739     *smallerPtr = *largerPtr = 0;
 740
 741     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
 742     if (dictMode == ZSTD_dictMatchState && nbCompares) {
 743         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
 744         U32 dictMatchIndex = dms->hashTable[dmsH];
 745         const U32* const dmsBt = dms->chainTable;
 746         commonLengthSmaller = commonLengthLarger = 0;
 747         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
 748             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
 749             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
 750             const BYTE* match = dmsBase + dictMatchIndex;
 751             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
 752             if (dictMatchIndex+matchLength >= dmsHighLimit)
 753                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
 754
 755             if (matchLength > bestLength) {
 756                 matchIndex = dictMatchIndex + dmsIndexDelta;
 757                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
 758                         (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
 759                 if (matchLength > matchEndIdx - matchIndex)
 760                     matchEndIdx = matchIndex + (U32)matchLength;
 761                 bestLength = matchLength;
 762                 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
 763                 matches[mnum].len = (U32)matchLength;
 764                 mnum++;
 765                 if ( (matchLength > ZSTD_OPT_NUM)
 766                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
 767                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
 768             }   }
 769
 770             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
 771             if (match[matchLength] < ip[matchLength]) {
 772                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
 773                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
 774             } else {
 775                 /* match is larger than current */
 776                 commonLengthLarger = matchLength;
 777                 dictMatchIndex = nextPtr[0];
 778     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
 779
 780     assert(matchEndIdx > curr+8);
 781     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
 782     return mnum;
 783 }
 784
 785 typedef U32 (*ZSTD_getAllMatchesFn)(
 786     ZSTD_match_t*,
 787     ZSTD_matchState_t*,
 788     U32*,
 789     const BYTE*,
 790     const BYTE*,
 791     const U32 rep[ZSTD_REP_NUM],
 792     U32 const ll0,
 793     U32 const lengthToBeat);
 794
 795 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
 796         ZSTD_match_t* matches,
 797         ZSTD_matchState_t* ms,
 798         U32* nextToUpdate3,
 799         const BYTE* ip,
 800         const BYTE* const iHighLimit,
 801         const U32 rep[ZSTD_REP_NUM],
 802         U32 const ll0,
 803         U32 const lengthToBeat,
 804         const ZSTD_dictMode_e dictMode,
 805         const U32 mls)
 806 {
 807     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
 808     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
 809     if (ip < ms->window.base + ms->nextToUpdate)
 810         return 0;   /* skipped area */
 811     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
 812     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
 813 }
 814
 815 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
 816
 817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
 818     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
 819             ZSTD_match_t* matches,                             \
 820             ZSTD_matchState_t* ms,                             \
 821             U32* nextToUpdate3,                                \
 822             const BYTE* ip,                                    \
 823             const BYTE* const iHighLimit,                      \
 824             const U32 rep[ZSTD_REP_NUM],                       \
 825             U32 const ll0,                                     \
 826             U32 const lengthToBeat)                            \
 827     {                                                          \
 828         return ZSTD_btGetAllMatches_internal(                  \
 829                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
 830                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
 831     }
 832
 833 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
 834     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
 835     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
 836     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
 837     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
 838
 839 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
 840 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
 841 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
 842
 843 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
 844     {                                            \
 845         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
 846         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
 847         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
 848         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
 849     }
 850
 851 static ZSTD_getAllMatchesFn
 852 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
 853 {
 854     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
 855         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
 856         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
 857         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
 858     };
 859     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
 860     assert((U32)dictMode < 3);
 861     assert(mls - 3 < 4);
 862     return getAllMatchesFns[(int)dictMode][mls - 3];
 863 }
 864
 865 /* ***********************
 866 *  LDM helper functions  *
 867 *************************/
 868
 869 /* Struct containing info needed to make decision about ldm inclusion */
 870 typedef struct {
 871     rawSeqStore_t seqStore;   /* External match candidates store for this block */
 872     U32 startPosInBlock;      /* Start position of the current match candidate */
 873     U32 endPosInBlock;        /* End position of the current match candidate */
 874     U32 offset;               /* Offset of the match candidate */
 875 } ZSTD_optLdm_t;
 876
 877 /* ZSTD_optLdm_skipRawSeqStoreBytes():
 878  * Moves forward in @rawSeqStore by @nbBytes,
 879  * which will update the fields 'pos' and 'posInSequence'.
 880  */
 881 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
 882 {
 883     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
 884     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
 885         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
 886         if (currPos >= currSeq.litLength + currSeq.matchLength) {
 887             currPos -= currSeq.litLength + currSeq.matchLength;
 888             rawSeqStore->pos++;
 889         } else {
 890             rawSeqStore->posInSequence = currPos;
 891             break;
 892         }
 893     }
 894     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
 895         rawSeqStore->posInSequence = 0;
 896     }
 897 }
 898
 899 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
 900  * Calculates the beginning and end of the next match in the current block.
 901  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
 902  */
 903 static void
 904 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
 905                                        U32 blockBytesRemaining)
 906 {
 907     rawSeq currSeq;
 908     U32 currBlockEndPos;
 909     U32 literalsBytesRemaining;
 910     U32 matchBytesRemaining;
 911
 912     /* Setting match end position to MAX to ensure we never use an LDM during this block */
 913     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
 914         optLdm->startPosInBlock = UINT_MAX;
 915         optLdm->endPosInBlock = UINT_MAX;
 916         return;
 917     }
 918     /* Calculate appropriate bytes left in matchLength and litLength
 919      * after adjusting based on ldmSeqStore->posInSequence */
 920     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
 921     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
 922     currBlockEndPos = currPosInBlock + blockBytesRemaining;
 923     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
 924             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
 925             0;
 926     matchBytesRemaining = (literalsBytesRemaining == 0) ?
 927             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
 928             currSeq.matchLength;
 929
 930     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
 931     if (literalsBytesRemaining >= blockBytesRemaining) {
 932         optLdm->startPosInBlock = UINT_MAX;
 933         optLdm->endPosInBlock = UINT_MAX;
 934         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
 935         return;
 936     }
 937
 938     /* Matches may be < MINMATCH by this process. In that case, we will reject them
 939        when we are deciding whether or not to add the ldm */
 940     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
 941     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
 942     optLdm->offset = currSeq.offset;
 943
 944     if (optLdm->endPosInBlock > currBlockEndPos) {
 945         /* Match ends after the block ends, we can't use the whole match */
 946         optLdm->endPosInBlock = currBlockEndPos;
 947         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
 948     } else {
 949         /* Consume nb of bytes equal to size of sequence left */
 950         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
 951     }
 952 }
 953
 954 /* ZSTD_optLdm_maybeAddMatch():
 955  * Adds a match if it's long enough,
 956  * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
 957  * into 'matches'. Maintains the correct ordering of 'matches'.
 958  */
 959 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
 960                                       const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
 961 {
 962     U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
 963     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
 964     U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
 965
 966     /* Ensure that current block position is not outside of the match */
 967     if (currPosInBlock < optLdm->startPosInBlock
 968       || currPosInBlock >= optLdm->endPosInBlock
 969       || candidateMatchLength < MINMATCH) {
 970         return;
 971     }
 972
 973     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
 974         U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
 975         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
 976                  candidateOffCode, candidateMatchLength, currPosInBlock);
 977         matches[*nbMatches].len = candidateMatchLength;
 978         matches[*nbMatches].off = candidateOffCode;
 979         (*nbMatches)++;
 980     }
 981 }
 982
 983 /* ZSTD_optLdm_processMatchCandidate():
 984  * Wrapper function to update ldm seq store and call ldm functions as necessary.
 985  */
 986 static void
 987 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
 988                                   ZSTD_match_t* matches, U32* nbMatches,
 989                                   U32 currPosInBlock, U32 remainingBytes)
 990 {
 991     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
 992         return;
 993     }
 994
 995     if (currPosInBlock >= optLdm->endPosInBlock) {
 996         if (currPosInBlock > optLdm->endPosInBlock) {
 997             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
 998              * at the end of a match from the ldm seq store, and will often be some bytes
 999              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1000              */
1001             U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1003         }
1004         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005     }
1006     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1007 }
1008
1009
1010 /*-*******************************
1011 *  Optimal parser
1012 *********************************/
1013
1014 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015 {
1016     return sol.litlen + sol.mlen;
1017 }
1018
1019 #if 0 /* debug */
1020
1021 static void
1022 listStats(const U32* table, int lastEltID)
1023 {
1024     int const nbElts = lastEltID + 1;
1025     int enb;
1026     for (enb=0; enb < nbElts; enb++) {
1027         (void)table;
1028         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1029         RAWLOG(2, "%4i,", table[enb]);
1030     }
1031     RAWLOG(2, " \n");
1032 }
1033
1034 #endif
1035
1036 FORCE_INLINE_TEMPLATE size_t
1037 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038                                seqStore_t* seqStore,
1039                                U32 rep[ZSTD_REP_NUM],
1040                          const void* src, size_t srcSize,
1041                          const int optLevel,
1042                          const ZSTD_dictMode_e dictMode)
1043 {
1044     optState_t* const optStatePtr = &ms->opt;
1045     const BYTE* const istart = (const BYTE*)src;
1046     const BYTE* ip = istart;
1047     const BYTE* anchor = istart;
1048     const BYTE* const iend = istart + srcSize;
1049     const BYTE* const ilimit = iend - 8;
1050     const BYTE* const base = ms->window.base;
1051     const BYTE* const prefixStart = base + ms->window.dictLimit;
1052     const ZSTD_compressionParameters* const cParams = &ms->cParams;
1053
1054     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1055
1056     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058     U32 nextToUpdate3 = ms->nextToUpdate;
1059
1060     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061     ZSTD_match_t* const matches = optStatePtr->matchTable;
1062     ZSTD_optimal_t lastSequence;
1063     ZSTD_optLdm_t optLdm;
1064
1065     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1068
1069     /* init */
1070     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072     assert(optLevel <= 2);
1073     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074     ip += (ip==prefixStart);
1075
1076     /* Match Loop */
1077     while (ip < ilimit) {
1078         U32 cur, last_pos = 0;
1079
1080         /* find first match */
1081         {   U32 const litlen = (U32)(ip - anchor);
1082             U32 const ll0 = !litlen;
1083             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085                                               (U32)(ip-istart), (U32)(iend - ip));
1086             if (!nbMatches) { ip++; continue; }
1087
1088             /* initialize opt[0] */
1089             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090             opt[0].mlen = 0;  /* means is_a_literal */
1091             opt[0].litlen = litlen;
1092             /* We don't need to include the actual price of the literals because
1093              * it is static for the duration of the forward pass, and is included
1094              * in every price. We include the literal length to avoid negative
1095              * prices when we subtract the previous literal length.
1096              */
1097             opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1098
1099             /* large match -> immediate encoding */
1100             {   U32 const maxML = matches[nbMatches-1].len;
1101                 U32 const maxOffcode = matches[nbMatches-1].off;
1102                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103                             nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1104
1105                 if (maxML > sufficient_len) {
1106                     lastSequence.litlen = litlen;
1107                     lastSequence.mlen = maxML;
1108                     lastSequence.off = maxOffcode;
1109                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110                                 maxML, sufficient_len);
1111                     cur = 0;
1112                     last_pos = ZSTD_totalLen(lastSequence);
1113                     goto _shortestPath;
1114             }   }
1115
1116             /* set prices for first matches starting position == 0 */
1117             assert(opt[0].price >= 0);
1118             {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119                 U32 pos;
1120                 U32 matchNb;
1121                 for (pos = 1; pos < minMatch; pos++) {
1122                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1123                 }
1124                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125                     U32 const offcode = matches[matchNb].off;
1126                     U32 const end = matches[matchNb].len;
1127                     for ( ; pos <= end ; pos++ ) {
1128                         U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129                         U32 const sequencePrice = literalsPrice + matchPrice;
1130                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131                                     pos, ZSTD_fCost(sequencePrice));
1132                         opt[pos].mlen = pos;
1133                         opt[pos].off = offcode;
1134                         opt[pos].litlen = litlen;
1135                         opt[pos].price = (int)sequencePrice;
1136                 }   }
1137                 last_pos = pos-1;
1138             }
1139         }
1140
1141         /* check further positions */
1142         for (cur = 1; cur <= last_pos; cur++) {
1143             const BYTE* const inr = ip + cur;
1144             assert(cur < ZSTD_OPT_NUM);
1145             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1146
1147             /* Fix current position with one literal if cheaper */
1148             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149                 int const price = opt[cur-1].price
1150                                 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151                                 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152                                 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153                 assert(price < 1000000000); /* overflow check */
1154                 if (price <= opt[cur].price) {
1155                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158                     opt[cur].mlen = 0;
1159                     opt[cur].off = 0;
1160                     opt[cur].litlen = litlen;
1161                     opt[cur].price = price;
1162                 } else {
1163                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165                                 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1166                 }
1167             }
1168
1169             /* Set the repcodes of the current position. We must do it here
1170              * because we rely on the repcodes of the 2nd to last sequence being
1171              * correct to set the next chunks repcodes during the backward
1172              * traversal.
1173              */
1174             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175             assert(cur >= opt[cur].mlen);
1176             if (opt[cur].mlen != 0) {
1177                 U32 const prev = cur - opt[cur].mlen;
1178                 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180             } else {
1181                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1182             }
1183
1184             /* last match must start at a minimum distance of 8 from oend */
1185             if (inr > ilimit) continue;
1186
1187             if (cur == last_pos) break;
1188
1189             if ( (optLevel==0) /*static_test*/
1190               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191                 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193             }
1194
1195             assert(opt[cur].price >= 0);
1196             {   U32 const ll0 = (opt[cur].mlen != 0);
1197                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198                 U32 const previousPrice = (U32)opt[cur].price;
1199                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201                 U32 matchNb;
1202
1203                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204                                                   (U32)(inr-istart), (U32)(iend-inr));
1205
1206                 if (!nbMatches) {
1207                     DEBUGLOG(7, "rPos:%u : no match found", cur);
1208                     continue;
1209                 }
1210
1211                 {   U32 const maxML = matches[nbMatches-1].len;
1212                     DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213                                 inr-istart, cur, nbMatches, maxML);
1214
1215                     if ( (maxML > sufficient_len)
1216                       || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217                         lastSequence.mlen = maxML;
1218                         lastSequence.off = matches[nbMatches-1].off;
1219                         lastSequence.litlen = litlen;
1220                         cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221                         last_pos = cur + ZSTD_totalLen(lastSequence);
1222                         if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1223                         goto _shortestPath;
1224                 }   }
1225
1226                 /* set prices using matches found at position == cur */
1227                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228                     U32 const offset = matches[matchNb].off;
1229                     U32 const lastML = matches[matchNb].len;
1230                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231                     U32 mlen;
1232
1233                     DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234                                 matchNb, matches[matchNb].off, lastML, litlen);
1235
1236                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1237                         U32 const pos = cur + mlen;
1238                         int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239
1240                         if ((pos > last_pos) || (price < opt[pos].price)) {
1241                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1244                             opt[pos].mlen = mlen;
1245                             opt[pos].off = offset;
1246                             opt[pos].litlen = litlen;
1247                             opt[pos].price = price;
1248                         } else {
1249                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252                         }
1253             }   }   }
1254         }  /* for (cur = 1; cur <= last_pos; cur++) */
1255
1256         lastSequence = opt[last_pos];
1257         cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1258         assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1259
1260 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1261         assert(opt[0].mlen == 0);
1262
1263         /* Set the next chunk's repcodes based on the repcodes of the beginning
1264          * of the last match, and the last sequence. This avoids us having to
1265          * update them while traversing the sequences.
1266          */
1267         if (lastSequence.mlen != 0) {
1268             repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269             ZSTD_memcpy(rep, &reps, sizeof(reps));
1270         } else {
1271             ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1272         }
1273
1274         {   U32 const storeEnd = cur + 1;
1275             U32 storeStart = storeEnd;
1276             U32 seqPos = cur;
1277
1278             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279                         last_pos, cur); (void)last_pos;
1280             assert(storeEnd < ZSTD_OPT_NUM);
1281             DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282                         storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283             opt[storeEnd] = lastSequence;
1284             while (seqPos > 0) {
1285                 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286                 storeStart--;
1287                 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288                             seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289                 opt[storeStart] = opt[seqPos];
1290                 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1291             }
1292
1293             /* save sequences */
1294             DEBUGLOG(6, "sending selected sequences into seqStore")
1295             {   U32 storePos;
1296                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297                     U32 const llen = opt[storePos].litlen;
1298                     U32 const mlen = opt[storePos].mlen;
1299                     U32 const offCode = opt[storePos].off;
1300                     U32 const advance = llen + mlen;
1301                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
1303
1304                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305                         assert(storePos == storeEnd);   /* must be last sequence */
1306                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1307                         continue;   /* will finish */
1308                     }
1309
1310                     assert(anchor + llen <= iend);
1311                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313                     anchor += advance;
1314                     ip = anchor;
1315             }   }
1316             ZSTD_setBasePrices(optStatePtr, optLevel);
1317         }
1318     }   /* while (ip < ilimit) */
1319
1320     /* Return the last literals size */
1321     return (size_t)(iend - anchor);
1322 }
1323
1324 static size_t ZSTD_compressBlock_opt0(
1325         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327 {
1328     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329 }
1330
1331 static size_t ZSTD_compressBlock_opt2(
1332         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334 {
1335     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336 }
1337
1338 size_t ZSTD_compressBlock_btopt(
1339         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340         const void* src, size_t srcSize)
1341 {
1342     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344 }
1345
1346
1347
1348
1349 /* ZSTD_initStats_ultra():
1350  * make a first compression pass, just to seed stats with more accurate starting values.
1351  * only works on first block, with no dictionary and no ldm.
1352  * this function cannot error, hence its contract must be respected.
1353  */
1354 static void
1355 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356                      seqStore_t* seqStore,
1357                      U32 rep[ZSTD_REP_NUM],
1358                const void* src, size_t srcSize)
1359 {
1360     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1361     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1362
1363     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364     assert(ms->opt.litLengthSum == 0);    /* first block */
1365     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1366     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1367     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1368
1369     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1370
1371     /* invalidate first scan from history */
1372     ZSTD_resetSeqStore(seqStore);
1373     ms->window.base -= srcSize;
1374     ms->window.dictLimit += (U32)srcSize;
1375     ms->window.lowLimit = ms->window.dictLimit;
1376     ms->nextToUpdate = ms->window.dictLimit;
1377
1378 }
1379
1380 size_t ZSTD_compressBlock_btultra(
1381         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382         const void* src, size_t srcSize)
1383 {
1384     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1386 }
1387
1388 size_t ZSTD_compressBlock_btultra2(
1389         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390         const void* src, size_t srcSize)
1391 {
1392     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394
1395     /* 2-pass strategy:
1396      * this strategy makes a first pass over first block to collect statistics
1397      * and seed next round's statistics with it.
1398      * After 1st pass, function forgets everything, and starts a new block.
1399      * Consequently, this can only work if no data has been previously loaded in tables,
1400      * aka, no dictionary, no prefix, no ldm preprocessing.
1401      * The compression ratio gain is generally small (~0.5% on first block),
1402      * the cost is 2x cpu time on first block. */
1403     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404     if ( (ms->opt.litLengthSum==0)   /* first block */
1405       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1406       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1407       && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
1408       && (srcSize > ZSTD_PREDEF_THRESHOLD)
1409       ) {
1410         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411     }
1412
1413     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414 }
1415
1416 size_t ZSTD_compressBlock_btopt_dictMatchState(
1417         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418         const void* src, size_t srcSize)
1419 {
1420     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421 }
1422
1423 size_t ZSTD_compressBlock_btultra_dictMatchState(
1424         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425         const void* src, size_t srcSize)
1426 {
1427     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1428 }
1429
1430 size_t ZSTD_compressBlock_btopt_extDict(
1431         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432         const void* src, size_t srcSize)
1433 {
1434     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1435 }
1436
1437 size_t ZSTD_compressBlock_btultra_extDict(
1438         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439         const void* src, size_t srcSize)
1440 {
1441     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442 }
1443
1444 /* note : no btultra2 variant for extDict nor dictMatchState,
1445  * because btultra2 is not meant to work with dictionaries
1446  * and is only specific for the first block (no prefix) */