printf: Remove unused 'bprintf'
[drm/drm-misc.git] / lib / zstd / compress / zstd_opt.c
blobfd82acfda62f6cdb3a7fb1505d4b6d623805195d
1 /*
2 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
13 #include "zstd_opt.h"
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_MAX_PRICE (1<<30)
19 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
22 /*-*************************************
23 * Price functions for optimal parser
24 ***************************************/
26 #if 0 /* approximation at bit level (for tests) */
27 # define BITCOST_ACCURACY 0
28 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29 # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30 #elif 0 /* fractional bit accuracy (for tests) */
31 # define BITCOST_ACCURACY 8
32 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34 #else /* opt==approx, ultra==accurate */
35 # define BITCOST_ACCURACY 8
36 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38 #endif
40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
42 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
47 U32 const stat = rawStat + 1;
48 U32 const hb = ZSTD_highbit32(stat);
49 U32 const BWeight = hb * BITCOST_MULTIPLIER;
50 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51 U32 const weight = BWeight + FWeight;
52 assert(hb + BITCOST_ACCURACY < 31);
53 return weight;
56 #if (DEBUGLEVEL>=2)
57 /* debugging function,
58 * @return price in bytes as fractional value
59 * for debug messages only */
60 MEM_STATIC double ZSTD_fCost(U32 price)
62 return (double)price / (BITCOST_MULTIPLIER*8);
64 #endif
66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
68 return optPtr->literalCompressionMode != ZSTD_ps_disable;
71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
73 if (ZSTD_compressedLiterals(optPtr))
74 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
81 static U32 sum_u32(const unsigned table[], size_t nbElts)
83 size_t n;
84 U32 total = 0;
85 for (n=0; n<nbElts; n++) {
86 total += table[n];
88 return total;
91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
93 U32 s, sum=0;
94 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95 assert(shift < 30);
96 for (s=0; s<lastEltIndex+1; s++) {
97 table[s] = 1 + (table[s] >> shift);
98 sum += table[s];
100 return sum;
103 /* ZSTD_scaleStats() :
104 * reduce all elements in table is sum too large
105 * return the resulting sum of elements */
106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
108 U32 const prevsum = sum_u32(table, lastEltIndex+1);
109 U32 const factor = prevsum >> logTarget;
110 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111 assert(logTarget < 30);
112 if (factor <= 1) return prevsum;
113 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
116 /* ZSTD_rescaleFreqs() :
117 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118 * take hints from dictionary if there is one
119 * and init from zero if there is none,
120 * using src for literals stats, and baseline stats for sequence symbols
121 * otherwise downscale existing stats, to be used as seed for next block.
123 static void
124 ZSTD_rescaleFreqs(optState_t* const optPtr,
125 const BYTE* const src, size_t const srcSize,
126 int const optLevel)
128 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130 optPtr->priceType = zop_dynamic;
132 if (optPtr->litLengthSum == 0) { /* first block : init */
133 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135 optPtr->priceType = zop_predef;
138 assert(optPtr->symbolCosts != NULL);
139 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140 /* huffman table presumed generated by dictionary */
141 optPtr->priceType = zop_dynamic;
143 if (compressedLiterals) {
144 unsigned lit;
145 assert(optPtr->litFreq != NULL);
146 optPtr->litSum = 0;
147 for (lit=0; lit<=MaxLit; lit++) {
148 U32 const scaleLog = 11; /* scale to 2K */
149 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150 assert(bitCost <= scaleLog);
151 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152 optPtr->litSum += optPtr->litFreq[lit];
155 { unsigned ll;
156 FSE_CState_t llstate;
157 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158 optPtr->litLengthSum = 0;
159 for (ll=0; ll<=MaxLL; ll++) {
160 U32 const scaleLog = 10; /* scale to 1K */
161 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162 assert(bitCost < scaleLog);
163 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
167 { unsigned ml;
168 FSE_CState_t mlstate;
169 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170 optPtr->matchLengthSum = 0;
171 for (ml=0; ml<=MaxML; ml++) {
172 U32 const scaleLog = 10;
173 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174 assert(bitCost < scaleLog);
175 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
179 { unsigned of;
180 FSE_CState_t ofstate;
181 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182 optPtr->offCodeSum = 0;
183 for (of=0; of<=MaxOff; of++) {
184 U32 const scaleLog = 10;
185 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186 assert(bitCost < scaleLog);
187 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188 optPtr->offCodeSum += optPtr->offCodeFreq[of];
191 } else { /* not a dictionary */
193 assert(optPtr->litFreq != NULL);
194 if (compressedLiterals) {
195 unsigned lit = MaxLit;
196 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
197 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
200 { unsigned const baseLLfreqs[MaxLL+1] = {
201 4, 2, 1, 1, 1, 1, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1,
203 1, 1, 1, 1, 1, 1, 1, 1,
204 1, 1, 1, 1, 1, 1, 1, 1,
205 1, 1, 1, 1
207 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
208 optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
211 { unsigned ml;
212 for (ml=0; ml<=MaxML; ml++)
213 optPtr->matchLengthFreq[ml] = 1;
215 optPtr->matchLengthSum = MaxML+1;
217 { unsigned const baseOFCfreqs[MaxOff+1] = {
218 6, 2, 1, 1, 2, 3, 4, 4,
219 4, 3, 2, 1, 1, 1, 1, 1,
220 1, 1, 1, 1, 1, 1, 1, 1,
221 1, 1, 1, 1, 1, 1, 1, 1
223 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
224 optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
230 } else { /* new block : re-use previous statistics, scaled down */
232 if (compressedLiterals)
233 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
234 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
235 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
236 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
239 ZSTD_setBasePrices(optPtr, optLevel);
242 /* ZSTD_rawLiteralsCost() :
243 * price of literals (only) in specified segment (which length can be 0).
244 * does not include price of literalLength symbol */
245 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246 const optState_t* const optPtr,
247 int optLevel)
249 if (litLength == 0) return 0;
251 if (!ZSTD_compressedLiterals(optPtr))
252 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
254 if (optPtr->priceType == zop_predef)
255 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
257 /* dynamic statistics */
258 { U32 price = litLength * optPtr->litSumBasePrice;
259 U32 u;
260 for (u=0; u < litLength; u++) {
261 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
262 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
264 return price;
268 /* ZSTD_litLengthPrice() :
269 * cost of literalLength symbol */
270 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
272 assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273 if (optPtr->priceType == zop_predef)
274 return WEIGHT(litLength, optLevel);
275 /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276 * because it isn't representable in the zstd format. So instead just
277 * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278 * would be all literals.
280 if (litLength == ZSTD_BLOCKSIZE_MAX)
281 return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
283 /* dynamic statistics */
284 { U32 const llCode = ZSTD_LLcode(litLength);
285 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
286 + optPtr->litLengthSumBasePrice
287 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
291 /* ZSTD_getMatchPrice() :
292 * Provides the cost of the match part (offset + matchLength) of a sequence
293 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294 * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
295 * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
297 FORCE_INLINE_TEMPLATE U32
298 ZSTD_getMatchPrice(U32 const offcode,
299 U32 const matchLength,
300 const optState_t* const optPtr,
301 int const optLevel)
303 U32 price;
304 U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
305 U32 const mlBase = matchLength - MINMATCH;
306 assert(matchLength >= MINMATCH);
308 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
309 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
311 /* dynamic statistics */
312 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
313 if ((optLevel<2) /*static*/ && offCode >= 20)
314 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
316 /* match Length */
317 { U32 const mlCode = ZSTD_MLcode(mlBase);
318 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
321 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
323 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
324 return price;
327 /* ZSTD_updateStats() :
328 * assumption : literals + litLengtn <= iend */
329 static void ZSTD_updateStats(optState_t* const optPtr,
330 U32 litLength, const BYTE* literals,
331 U32 offsetCode, U32 matchLength)
333 /* literals */
334 if (ZSTD_compressedLiterals(optPtr)) {
335 U32 u;
336 for (u=0; u < litLength; u++)
337 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
338 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
341 /* literal Length */
342 { U32 const llCode = ZSTD_LLcode(litLength);
343 optPtr->litLengthFreq[llCode]++;
344 optPtr->litLengthSum++;
347 /* offset code : expected to follow storeSeq() numeric representation */
348 { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
349 assert(offCode <= MaxOff);
350 optPtr->offCodeFreq[offCode]++;
351 optPtr->offCodeSum++;
354 /* match Length */
355 { U32 const mlBase = matchLength - MINMATCH;
356 U32 const mlCode = ZSTD_MLcode(mlBase);
357 optPtr->matchLengthFreq[mlCode]++;
358 optPtr->matchLengthSum++;
363 /* ZSTD_readMINMATCH() :
364 * function safe only for comparisons
365 * assumption : memPtr must be at least 4 bytes before end of buffer */
366 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
368 switch (length)
370 default :
371 case 4 : return MEM_read32(memPtr);
372 case 3 : if (MEM_isLittleEndian())
373 return MEM_read32(memPtr)<<8;
374 else
375 return MEM_read32(memPtr)>>8;
380 /* Update hashTable3 up to ip (excluded)
381 Assumption : always within prefix (i.e. not within extDict) */
382 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383 U32* nextToUpdate3,
384 const BYTE* const ip)
386 U32* const hashTable3 = ms->hashTable3;
387 U32 const hashLog3 = ms->hashLog3;
388 const BYTE* const base = ms->window.base;
389 U32 idx = *nextToUpdate3;
390 U32 const target = (U32)(ip - base);
391 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
392 assert(hashLog3 > 0);
394 while(idx < target) {
395 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
396 idx++;
399 *nextToUpdate3 = target;
400 return hashTable3[hash3];
404 /*-*************************************
405 * Binary Tree search
406 ***************************************/
407 /* ZSTD_insertBt1() : add one or multiple positions to tree.
408 * @param ip assumed <= iend-8 .
409 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410 * @return : nb of positions added */
411 static U32 ZSTD_insertBt1(
412 const ZSTD_matchState_t* ms,
413 const BYTE* const ip, const BYTE* const iend,
414 U32 const target,
415 U32 const mls, const int extDict)
417 const ZSTD_compressionParameters* const cParams = &ms->cParams;
418 U32* const hashTable = ms->hashTable;
419 U32 const hashLog = cParams->hashLog;
420 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
421 U32* const bt = ms->chainTable;
422 U32 const btLog = cParams->chainLog - 1;
423 U32 const btMask = (1 << btLog) - 1;
424 U32 matchIndex = hashTable[h];
425 size_t commonLengthSmaller=0, commonLengthLarger=0;
426 const BYTE* const base = ms->window.base;
427 const BYTE* const dictBase = ms->window.dictBase;
428 const U32 dictLimit = ms->window.dictLimit;
429 const BYTE* const dictEnd = dictBase + dictLimit;
430 const BYTE* const prefixStart = base + dictLimit;
431 const BYTE* match;
432 const U32 curr = (U32)(ip-base);
433 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
434 U32* smallerPtr = bt + 2*(curr&btMask);
435 U32* largerPtr = smallerPtr + 1;
436 U32 dummy32; /* to be nullified at the end */
437 /* windowLow is based on target because
438 * we only need positions that will be in the window at the end of the tree update.
440 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
441 U32 matchEndIdx = curr+8+1;
442 size_t bestLength = 8;
443 U32 nbCompares = 1U << cParams->searchLog;
444 #ifdef ZSTD_C_PREDICT
445 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
446 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
447 predictedSmall += (predictedSmall>0);
448 predictedLarge += (predictedLarge>0);
449 #endif /* ZSTD_C_PREDICT */
451 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
453 assert(curr <= target);
454 assert(ip <= iend-8); /* required for h calculation */
455 hashTable[h] = curr; /* Update Hash Table */
457 assert(windowLow > 0);
458 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
459 U32* const nextPtr = bt + 2*(matchIndex & btMask);
460 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
461 assert(matchIndex < curr);
463 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
464 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
465 if (matchIndex == predictedSmall) {
466 /* no need to check length, result known */
467 *smallerPtr = matchIndex;
468 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
469 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
470 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
471 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
472 continue;
474 if (matchIndex == predictedLarge) {
475 *largerPtr = matchIndex;
476 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
477 largerPtr = nextPtr;
478 matchIndex = nextPtr[0];
479 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
480 continue;
482 #endif
484 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
485 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
486 match = base + matchIndex;
487 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
488 } else {
489 match = dictBase + matchIndex;
490 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
491 if (matchIndex+matchLength >= dictLimit)
492 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
495 if (matchLength > bestLength) {
496 bestLength = matchLength;
497 if (matchLength > matchEndIdx - matchIndex)
498 matchEndIdx = matchIndex + (U32)matchLength;
501 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
502 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
505 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
506 /* match is smaller than current */
507 *smallerPtr = matchIndex; /* update smaller idx */
508 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
509 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
510 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
511 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
512 } else {
513 /* match is larger than current */
514 *largerPtr = matchIndex;
515 commonLengthLarger = matchLength;
516 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
517 largerPtr = nextPtr;
518 matchIndex = nextPtr[0];
521 *smallerPtr = *largerPtr = 0;
522 { U32 positions = 0;
523 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
524 assert(matchEndIdx > curr + 8);
525 return MAX(positions, matchEndIdx - (curr + 8));
529 FORCE_INLINE_TEMPLATE
530 void ZSTD_updateTree_internal(
531 ZSTD_matchState_t* ms,
532 const BYTE* const ip, const BYTE* const iend,
533 const U32 mls, const ZSTD_dictMode_e dictMode)
535 const BYTE* const base = ms->window.base;
536 U32 const target = (U32)(ip - base);
537 U32 idx = ms->nextToUpdate;
538 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
539 idx, target, dictMode);
541 while(idx < target) {
542 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
543 assert(idx < (U32)(idx + forward));
544 idx += forward;
546 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
547 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
548 ms->nextToUpdate = target;
551 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
552 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
555 FORCE_INLINE_TEMPLATE
556 U32 ZSTD_insertBtAndGetAllMatches (
557 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
558 ZSTD_matchState_t* ms,
559 U32* nextToUpdate3,
560 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561 const U32 rep[ZSTD_REP_NUM],
562 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563 const U32 lengthToBeat,
564 U32 const mls /* template */)
566 const ZSTD_compressionParameters* const cParams = &ms->cParams;
567 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
568 const BYTE* const base = ms->window.base;
569 U32 const curr = (U32)(ip-base);
570 U32 const hashLog = cParams->hashLog;
571 U32 const minMatch = (mls==3) ? 3 : 4;
572 U32* const hashTable = ms->hashTable;
573 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
574 U32 matchIndex = hashTable[h];
575 U32* const bt = ms->chainTable;
576 U32 const btLog = cParams->chainLog - 1;
577 U32 const btMask= (1U << btLog) - 1;
578 size_t commonLengthSmaller=0, commonLengthLarger=0;
579 const BYTE* const dictBase = ms->window.dictBase;
580 U32 const dictLimit = ms->window.dictLimit;
581 const BYTE* const dictEnd = dictBase + dictLimit;
582 const BYTE* const prefixStart = base + dictLimit;
583 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
584 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
585 U32 const matchLow = windowLow ? windowLow : 1;
586 U32* smallerPtr = bt + 2*(curr&btMask);
587 U32* largerPtr = bt + 2*(curr&btMask) + 1;
588 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
589 U32 dummy32; /* to be nullified at the end */
590 U32 mnum = 0;
591 U32 nbCompares = 1U << cParams->searchLog;
593 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594 const ZSTD_compressionParameters* const dmsCParams =
595 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
597 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
598 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
599 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
600 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
601 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
602 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
603 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
604 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
606 size_t bestLength = lengthToBeat-1;
607 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
609 /* check repCode */
610 assert(ll0 <= 1); /* necessarily 1 or 0 */
611 { U32 const lastR = ZSTD_REP_NUM + ll0;
612 U32 repCode;
613 for (repCode = ll0; repCode < lastR; repCode++) {
614 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
615 U32 const repIndex = curr - repOffset;
616 U32 repLen = 0;
617 assert(curr >= dictLimit);
618 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
619 /* We must validate the repcode offset because when we're using a dictionary the
620 * valid offset range shrinks when the dictionary goes out of bounds.
622 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
623 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
625 } else { /* repIndex < dictLimit || repIndex >= curr */
626 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
627 dmsBase + repIndex - dmsIndexDelta :
628 dictBase + repIndex;
629 assert(curr >= windowLow);
630 if ( dictMode == ZSTD_extDict
631 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
632 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
633 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
636 if (dictMode == ZSTD_dictMatchState
637 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
638 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
639 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
642 /* save longer solution */
643 if (repLen > bestLength) {
644 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645 repCode, ll0, repOffset, repLen);
646 bestLength = repLen;
647 matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
648 matches[mnum].len = (U32)repLen;
649 mnum++;
650 if ( (repLen > sufficient_len)
651 | (ip+repLen == iLimit) ) { /* best possible */
652 return mnum;
653 } } } }
655 /* HC3 match finder */
656 if ((mls == 3) /*static*/ && (bestLength < mls)) {
657 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
658 if ((matchIndex3 >= matchLow)
659 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
660 size_t mlen;
661 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
662 const BYTE* const match = base + matchIndex3;
663 mlen = ZSTD_count(ip, match, iLimit);
664 } else {
665 const BYTE* const match = dictBase + matchIndex3;
666 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
669 /* save best solution */
670 if (mlen >= mls /* == 3 > bestLength */) {
671 DEBUGLOG(8, "found small match with hlog3, of length %u",
672 (U32)mlen);
673 bestLength = mlen;
674 assert(curr > matchIndex3);
675 assert(mnum==0); /* no prior solution */
676 matches[0].off = STORE_OFFSET(curr - matchIndex3);
677 matches[0].len = (U32)mlen;
678 mnum = 1;
679 if ( (mlen > sufficient_len) |
680 (ip+mlen == iLimit) ) { /* best possible length */
681 ms->nextToUpdate = curr+1; /* skip insertion */
682 return 1;
683 } } }
684 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
685 } /* if (mls == 3) */
687 hashTable[h] = curr; /* Update Hash Table */
689 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
690 U32* const nextPtr = bt + 2*(matchIndex & btMask);
691 const BYTE* match;
692 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
693 assert(curr > matchIndex);
695 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
696 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
697 match = base + matchIndex;
698 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
699 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
700 } else {
701 match = dictBase + matchIndex;
702 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
703 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
704 if (matchIndex+matchLength >= dictLimit)
705 match = base + matchIndex; /* prepare for match[matchLength] read */
708 if (matchLength > bestLength) {
709 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
711 assert(matchEndIdx > matchIndex);
712 if (matchLength > matchEndIdx - matchIndex)
713 matchEndIdx = matchIndex + (U32)matchLength;
714 bestLength = matchLength;
715 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
716 matches[mnum].len = (U32)matchLength;
717 mnum++;
718 if ( (matchLength > ZSTD_OPT_NUM)
719 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
720 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
721 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
724 if (match[matchLength] < ip[matchLength]) {
725 /* match smaller than current */
726 *smallerPtr = matchIndex; /* update smaller idx */
727 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
728 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
729 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
730 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
731 } else {
732 *largerPtr = matchIndex;
733 commonLengthLarger = matchLength;
734 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
735 largerPtr = nextPtr;
736 matchIndex = nextPtr[0];
739 *smallerPtr = *largerPtr = 0;
741 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
742 if (dictMode == ZSTD_dictMatchState && nbCompares) {
743 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
744 U32 dictMatchIndex = dms->hashTable[dmsH];
745 const U32* const dmsBt = dms->chainTable;
746 commonLengthSmaller = commonLengthLarger = 0;
747 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
748 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
749 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
750 const BYTE* match = dmsBase + dictMatchIndex;
751 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
752 if (dictMatchIndex+matchLength >= dmsHighLimit)
753 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
755 if (matchLength > bestLength) {
756 matchIndex = dictMatchIndex + dmsIndexDelta;
757 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758 (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
759 if (matchLength > matchEndIdx - matchIndex)
760 matchEndIdx = matchIndex + (U32)matchLength;
761 bestLength = matchLength;
762 matches[mnum].off = STORE_OFFSET(curr - matchIndex);
763 matches[mnum].len = (U32)matchLength;
764 mnum++;
765 if ( (matchLength > ZSTD_OPT_NUM)
766 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
767 break; /* drop, to guarantee consistency (miss a little bit of compression) */
770 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
771 if (match[matchLength] < ip[matchLength]) {
772 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
773 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
774 } else {
775 /* match is larger than current */
776 commonLengthLarger = matchLength;
777 dictMatchIndex = nextPtr[0];
778 } } } /* if (dictMode == ZSTD_dictMatchState) */
780 assert(matchEndIdx > curr+8);
781 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
782 return mnum;
785 typedef U32 (*ZSTD_getAllMatchesFn)(
786 ZSTD_match_t*,
787 ZSTD_matchState_t*,
788 U32*,
789 const BYTE*,
790 const BYTE*,
791 const U32 rep[ZSTD_REP_NUM],
792 U32 const ll0,
793 U32 const lengthToBeat);
795 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
796 ZSTD_match_t* matches,
797 ZSTD_matchState_t* ms,
798 U32* nextToUpdate3,
799 const BYTE* ip,
800 const BYTE* const iHighLimit,
801 const U32 rep[ZSTD_REP_NUM],
802 U32 const ll0,
803 U32 const lengthToBeat,
804 const ZSTD_dictMode_e dictMode,
805 const U32 mls)
807 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
808 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
809 if (ip < ms->window.base + ms->nextToUpdate)
810 return 0; /* skipped area */
811 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
812 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
815 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
817 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
818 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
819 ZSTD_match_t* matches, \
820 ZSTD_matchState_t* ms, \
821 U32* nextToUpdate3, \
822 const BYTE* ip, \
823 const BYTE* const iHighLimit, \
824 const U32 rep[ZSTD_REP_NUM], \
825 U32 const ll0, \
826 U32 const lengthToBeat) \
828 return ZSTD_btGetAllMatches_internal( \
829 matches, ms, nextToUpdate3, ip, iHighLimit, \
830 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
833 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
834 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
835 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
836 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
837 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
839 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
840 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
841 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
843 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
845 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
846 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
847 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
848 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
851 static ZSTD_getAllMatchesFn
852 ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
854 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
856 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
857 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
859 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
860 assert((U32)dictMode < 3);
861 assert(mls - 3 < 4);
862 return getAllMatchesFns[(int)dictMode][mls - 3];
865 /* ***********************
866 * LDM helper functions *
867 *************************/
869 /* Struct containing info needed to make decision about ldm inclusion */
870 typedef struct {
871 rawSeqStore_t seqStore; /* External match candidates store for this block */
872 U32 startPosInBlock; /* Start position of the current match candidate */
873 U32 endPosInBlock; /* End position of the current match candidate */
874 U32 offset; /* Offset of the match candidate */
875 } ZSTD_optLdm_t;
877 /* ZSTD_optLdm_skipRawSeqStoreBytes():
878 * Moves forward in @rawSeqStore by @nbBytes,
879 * which will update the fields 'pos' and 'posInSequence'.
881 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
883 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
885 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
886 if (currPos >= currSeq.litLength + currSeq.matchLength) {
887 currPos -= currSeq.litLength + currSeq.matchLength;
888 rawSeqStore->pos++;
889 } else {
890 rawSeqStore->posInSequence = currPos;
891 break;
894 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
895 rawSeqStore->posInSequence = 0;
899 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
900 * Calculates the beginning and end of the next match in the current block.
901 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
903 static void
904 ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
905 U32 blockBytesRemaining)
907 rawSeq currSeq;
908 U32 currBlockEndPos;
909 U32 literalsBytesRemaining;
910 U32 matchBytesRemaining;
912 /* Setting match end position to MAX to ensure we never use an LDM during this block */
913 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
914 optLdm->startPosInBlock = UINT_MAX;
915 optLdm->endPosInBlock = UINT_MAX;
916 return;
918 /* Calculate appropriate bytes left in matchLength and litLength
919 * after adjusting based on ldmSeqStore->posInSequence */
920 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
921 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
922 currBlockEndPos = currPosInBlock + blockBytesRemaining;
923 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
924 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
926 matchBytesRemaining = (literalsBytesRemaining == 0) ?
927 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
928 currSeq.matchLength;
930 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
931 if (literalsBytesRemaining >= blockBytesRemaining) {
932 optLdm->startPosInBlock = UINT_MAX;
933 optLdm->endPosInBlock = UINT_MAX;
934 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
935 return;
938 /* Matches may be < MINMATCH by this process. In that case, we will reject them
939 when we are deciding whether or not to add the ldm */
940 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
942 optLdm->offset = currSeq.offset;
944 if (optLdm->endPosInBlock > currBlockEndPos) {
945 /* Match ends after the block ends, we can't use the whole match */
946 optLdm->endPosInBlock = currBlockEndPos;
947 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
948 } else {
949 /* Consume nb of bytes equal to size of sequence left */
950 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
954 /* ZSTD_optLdm_maybeAddMatch():
955 * Adds a match if it's long enough,
956 * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
957 * into 'matches'. Maintains the correct ordering of 'matches'.
959 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960 const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
962 U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963 /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
964 U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
966 /* Ensure that current block position is not outside of the match */
967 if (currPosInBlock < optLdm->startPosInBlock
968 || currPosInBlock >= optLdm->endPosInBlock
969 || candidateMatchLength < MINMATCH) {
970 return;
973 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974 U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976 candidateOffCode, candidateMatchLength, currPosInBlock);
977 matches[*nbMatches].len = candidateMatchLength;
978 matches[*nbMatches].off = candidateOffCode;
979 (*nbMatches)++;
983 /* ZSTD_optLdm_processMatchCandidate():
984 * Wrapper function to update ldm seq store and call ldm functions as necessary.
986 static void
987 ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988 ZSTD_match_t* matches, U32* nbMatches,
989 U32 currPosInBlock, U32 remainingBytes)
991 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992 return;
995 if (currPosInBlock >= optLdm->endPosInBlock) {
996 if (currPosInBlock > optLdm->endPosInBlock) {
997 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
998 * at the end of a match from the ldm seq store, and will often be some bytes
999 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
1001 U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
1002 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
1004 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1006 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1010 /*-*******************************
1011 * Optimal parser
1012 *********************************/
1014 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1016 return sol.litlen + sol.mlen;
1019 #if 0 /* debug */
1021 static void
1022 listStats(const U32* table, int lastEltID)
1024 int const nbElts = lastEltID + 1;
1025 int enb;
1026 for (enb=0; enb < nbElts; enb++) {
1027 (void)table;
1028 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1029 RAWLOG(2, "%4i,", table[enb]);
1031 RAWLOG(2, " \n");
1034 #endif
1036 FORCE_INLINE_TEMPLATE size_t
1037 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038 seqStore_t* seqStore,
1039 U32 rep[ZSTD_REP_NUM],
1040 const void* src, size_t srcSize,
1041 const int optLevel,
1042 const ZSTD_dictMode_e dictMode)
1044 optState_t* const optStatePtr = &ms->opt;
1045 const BYTE* const istart = (const BYTE*)src;
1046 const BYTE* ip = istart;
1047 const BYTE* anchor = istart;
1048 const BYTE* const iend = istart + srcSize;
1049 const BYTE* const ilimit = iend - 8;
1050 const BYTE* const base = ms->window.base;
1051 const BYTE* const prefixStart = base + ms->window.dictLimit;
1052 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1054 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1056 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1057 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1058 U32 nextToUpdate3 = ms->nextToUpdate;
1060 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061 ZSTD_match_t* const matches = optStatePtr->matchTable;
1062 ZSTD_optimal_t lastSequence;
1063 ZSTD_optLdm_t optLdm;
1065 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1069 /* init */
1070 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1071 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1072 assert(optLevel <= 2);
1073 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1074 ip += (ip==prefixStart);
1076 /* Match Loop */
1077 while (ip < ilimit) {
1078 U32 cur, last_pos = 0;
1080 /* find first match */
1081 { U32 const litlen = (U32)(ip - anchor);
1082 U32 const ll0 = !litlen;
1083 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085 (U32)(ip-istart), (U32)(iend - ip));
1086 if (!nbMatches) { ip++; continue; }
1088 /* initialize opt[0] */
1089 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090 opt[0].mlen = 0; /* means is_a_literal */
1091 opt[0].litlen = litlen;
1092 /* We don't need to include the actual price of the literals because
1093 * it is static for the duration of the forward pass, and is included
1094 * in every price. We include the literal length to avoid negative
1095 * prices when we subtract the previous literal length.
1097 opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1099 /* large match -> immediate encoding */
1100 { U32 const maxML = matches[nbMatches-1].len;
1101 U32 const maxOffcode = matches[nbMatches-1].off;
1102 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103 nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1105 if (maxML > sufficient_len) {
1106 lastSequence.litlen = litlen;
1107 lastSequence.mlen = maxML;
1108 lastSequence.off = maxOffcode;
1109 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1110 maxML, sufficient_len);
1111 cur = 0;
1112 last_pos = ZSTD_totalLen(lastSequence);
1113 goto _shortestPath;
1116 /* set prices for first matches starting position == 0 */
1117 assert(opt[0].price >= 0);
1118 { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119 U32 pos;
1120 U32 matchNb;
1121 for (pos = 1; pos < minMatch; pos++) {
1122 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1124 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125 U32 const offcode = matches[matchNb].off;
1126 U32 const end = matches[matchNb].len;
1127 for ( ; pos <= end ; pos++ ) {
1128 U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129 U32 const sequencePrice = literalsPrice + matchPrice;
1130 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131 pos, ZSTD_fCost(sequencePrice));
1132 opt[pos].mlen = pos;
1133 opt[pos].off = offcode;
1134 opt[pos].litlen = litlen;
1135 opt[pos].price = (int)sequencePrice;
1137 last_pos = pos-1;
1141 /* check further positions */
1142 for (cur = 1; cur <= last_pos; cur++) {
1143 const BYTE* const inr = ip + cur;
1144 assert(cur < ZSTD_OPT_NUM);
1145 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1147 /* Fix current position with one literal if cheaper */
1148 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1149 int const price = opt[cur-1].price
1150 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1153 assert(price < 1000000000); /* overflow check */
1154 if (price <= opt[cur].price) {
1155 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158 opt[cur].mlen = 0;
1159 opt[cur].off = 0;
1160 opt[cur].litlen = litlen;
1161 opt[cur].price = price;
1162 } else {
1163 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1169 /* Set the repcodes of the current position. We must do it here
1170 * because we rely on the repcodes of the 2nd to last sequence being
1171 * correct to set the next chunks repcodes during the backward
1172 * traversal.
1174 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1175 assert(cur >= opt[cur].mlen);
1176 if (opt[cur].mlen != 0) {
1177 U32 const prev = cur - opt[cur].mlen;
1178 repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180 } else {
1181 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1184 /* last match must start at a minimum distance of 8 from oend */
1185 if (inr > ilimit) continue;
1187 if (cur == last_pos) break;
1189 if ( (optLevel==0) /*static_test*/
1190 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1192 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1195 assert(opt[cur].price >= 0);
1196 { U32 const ll0 = (opt[cur].mlen != 0);
1197 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198 U32 const previousPrice = (U32)opt[cur].price;
1199 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1200 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201 U32 matchNb;
1203 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204 (U32)(inr-istart), (U32)(iend-inr));
1206 if (!nbMatches) {
1207 DEBUGLOG(7, "rPos:%u : no match found", cur);
1208 continue;
1211 { U32 const maxML = matches[nbMatches-1].len;
1212 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213 inr-istart, cur, nbMatches, maxML);
1215 if ( (maxML > sufficient_len)
1216 || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217 lastSequence.mlen = maxML;
1218 lastSequence.off = matches[nbMatches-1].off;
1219 lastSequence.litlen = litlen;
1220 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221 last_pos = cur + ZSTD_totalLen(lastSequence);
1222 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1223 goto _shortestPath;
1226 /* set prices using matches found at position == cur */
1227 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1228 U32 const offset = matches[matchNb].off;
1229 U32 const lastML = matches[matchNb].len;
1230 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231 U32 mlen;
1233 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234 matchNb, matches[matchNb].off, lastML, litlen);
1236 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1237 U32 const pos = cur + mlen;
1238 int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1240 if ((pos > last_pos) || (price < opt[pos].price)) {
1241 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1244 opt[pos].mlen = mlen;
1245 opt[pos].off = offset;
1246 opt[pos].litlen = litlen;
1247 opt[pos].price = price;
1248 } else {
1249 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1250 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1251 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1253 } } }
1254 } /* for (cur = 1; cur <= last_pos; cur++) */
1256 lastSequence = opt[last_pos];
1257 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1258 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1260 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1261 assert(opt[0].mlen == 0);
1263 /* Set the next chunk's repcodes based on the repcodes of the beginning
1264 * of the last match, and the last sequence. This avoids us having to
1265 * update them while traversing the sequences.
1267 if (lastSequence.mlen != 0) {
1268 repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269 ZSTD_memcpy(rep, &reps, sizeof(reps));
1270 } else {
1271 ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1274 { U32 const storeEnd = cur + 1;
1275 U32 storeStart = storeEnd;
1276 U32 seqPos = cur;
1278 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279 last_pos, cur); (void)last_pos;
1280 assert(storeEnd < ZSTD_OPT_NUM);
1281 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283 opt[storeEnd] = lastSequence;
1284 while (seqPos > 0) {
1285 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1286 storeStart--;
1287 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289 opt[storeStart] = opt[seqPos];
1290 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1293 /* save sequences */
1294 DEBUGLOG(6, "sending selected sequences into seqStore")
1295 { U32 storePos;
1296 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297 U32 const llen = opt[storePos].litlen;
1298 U32 const mlen = opt[storePos].mlen;
1299 U32 const offCode = opt[storePos].off;
1300 U32 const advance = llen + mlen;
1301 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302 anchor - istart, (unsigned)llen, (unsigned)mlen);
1304 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305 assert(storePos == storeEnd); /* must be last sequence */
1306 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1307 continue; /* will finish */
1310 assert(anchor + llen <= iend);
1311 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312 ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1313 anchor += advance;
1314 ip = anchor;
1316 ZSTD_setBasePrices(optStatePtr, optLevel);
1318 } /* while (ip < ilimit) */
1320 /* Return the last literals size */
1321 return (size_t)(iend - anchor);
1324 static size_t ZSTD_compressBlock_opt0(
1325 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1328 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1331 static size_t ZSTD_compressBlock_opt2(
1332 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1335 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1338 size_t ZSTD_compressBlock_btopt(
1339 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340 const void* src, size_t srcSize)
1342 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1349 /* ZSTD_initStats_ultra():
1350 * make a first compression pass, just to seed stats with more accurate starting values.
1351 * only works on first block, with no dictionary and no ldm.
1352 * this function cannot error, hence its contract must be respected.
1354 static void
1355 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356 seqStore_t* seqStore,
1357 U32 rep[ZSTD_REP_NUM],
1358 const void* src, size_t srcSize)
1360 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1361 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1363 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1364 assert(ms->opt.litLengthSum == 0); /* first block */
1365 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1366 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1367 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1369 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1371 /* invalidate first scan from history */
1372 ZSTD_resetSeqStore(seqStore);
1373 ms->window.base -= srcSize;
1374 ms->window.dictLimit += (U32)srcSize;
1375 ms->window.lowLimit = ms->window.dictLimit;
1376 ms->nextToUpdate = ms->window.dictLimit;
1380 size_t ZSTD_compressBlock_btultra(
1381 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382 const void* src, size_t srcSize)
1384 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1385 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1388 size_t ZSTD_compressBlock_btultra2(
1389 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390 const void* src, size_t srcSize)
1392 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1395 /* 2-pass strategy:
1396 * this strategy makes a first pass over first block to collect statistics
1397 * and seed next round's statistics with it.
1398 * After 1st pass, function forgets everything, and starts a new block.
1399 * Consequently, this can only work if no data has been previously loaded in tables,
1400 * aka, no dictionary, no prefix, no ldm preprocessing.
1401 * The compression ratio gain is generally small (~0.5% on first block),
1402 * the cost is 2x cpu time on first block. */
1403 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1404 if ( (ms->opt.litLengthSum==0) /* first block */
1405 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1406 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1407 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1408 && (srcSize > ZSTD_PREDEF_THRESHOLD)
1410 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1413 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1416 size_t ZSTD_compressBlock_btopt_dictMatchState(
1417 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418 const void* src, size_t srcSize)
1420 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1423 size_t ZSTD_compressBlock_btultra_dictMatchState(
1424 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425 const void* src, size_t srcSize)
1427 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1430 size_t ZSTD_compressBlock_btopt_extDict(
1431 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432 const void* src, size_t srcSize)
1434 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1437 size_t ZSTD_compressBlock_btultra_extDict(
1438 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439 const void* src, size_t srcSize)
1441 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1444 /* note : no btultra2 variant for extDict nor dictMatchState,
1445 * because btultra2 is not meant to work with dictionaries
1446 * and is only specific for the first block (no prefix) */