2 * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 #include "zstd_compress_internal.h"
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18 #define ZSTD_MAX_PRICE (1<<30)
20 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23 /*-*************************************
24 * Price functions for optimal parser
25 ***************************************/
27 #if 0 /* approximation at bit level */
28 # define BITCOST_ACCURACY 0
29 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30 # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31 #elif 0 /* fractional bit accuracy */
32 # define BITCOST_ACCURACY 8
33 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
35 #else /* opt==approx, ultra==accurate */
36 # define BITCOST_ACCURACY 8
37 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41 MEM_STATIC U32
ZSTD_bitWeight(U32 stat
)
43 return (ZSTD_highbit32(stat
+1) * BITCOST_MULTIPLIER
);
46 MEM_STATIC U32
ZSTD_fracWeight(U32 rawStat
)
48 U32
const stat
= rawStat
+ 1;
49 U32
const hb
= ZSTD_highbit32(stat
);
50 U32
const BWeight
= hb
* BITCOST_MULTIPLIER
;
51 U32
const FWeight
= (stat
<< BITCOST_ACCURACY
) >> hb
;
52 U32
const weight
= BWeight
+ FWeight
;
53 assert(hb
+ BITCOST_ACCURACY
< 31);
58 /* debugging function,
59 * @return price in bytes as fractional value
60 * for debug messages only */
61 MEM_STATIC
double ZSTD_fCost(U32 price
)
63 return (double)price
/ (BITCOST_MULTIPLIER
*8);
67 static int ZSTD_compressedLiterals(optState_t
const* const optPtr
)
69 return optPtr
->literalCompressionMode
!= ZSTD_lcm_uncompressed
;
72 static void ZSTD_setBasePrices(optState_t
* optPtr
, int optLevel
)
74 if (ZSTD_compressedLiterals(optPtr
))
75 optPtr
->litSumBasePrice
= WEIGHT(optPtr
->litSum
, optLevel
);
76 optPtr
->litLengthSumBasePrice
= WEIGHT(optPtr
->litLengthSum
, optLevel
);
77 optPtr
->matchLengthSumBasePrice
= WEIGHT(optPtr
->matchLengthSum
, optLevel
);
78 optPtr
->offCodeSumBasePrice
= WEIGHT(optPtr
->offCodeSum
, optLevel
);
82 /* ZSTD_downscaleStat() :
83 * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84 * return the resulting sum of elements */
85 static U32
ZSTD_downscaleStat(unsigned* table
, U32 lastEltIndex
, int malus
)
88 DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex
+1);
89 assert(ZSTD_FREQ_DIV
+malus
> 0 && ZSTD_FREQ_DIV
+malus
< 31);
90 for (s
=0; s
<lastEltIndex
+1; s
++) {
91 table
[s
] = 1 + (table
[s
] >> (ZSTD_FREQ_DIV
+malus
));
97 /* ZSTD_rescaleFreqs() :
98 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99 * take hints from dictionary if there is one
100 * or init from zero, using src for literals stats, or flat 1 for match symbols
101 * otherwise downscale existing stats, to be used as seed for next block.
104 ZSTD_rescaleFreqs(optState_t
* const optPtr
,
105 const BYTE
* const src
, size_t const srcSize
,
108 int const compressedLiterals
= ZSTD_compressedLiterals(optPtr
);
109 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize
);
110 optPtr
->priceType
= zop_dynamic
;
112 if (optPtr
->litLengthSum
== 0) { /* first block : init */
113 if (srcSize
<= ZSTD_PREDEF_THRESHOLD
) { /* heuristic */
114 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
115 optPtr
->priceType
= zop_predef
;
118 assert(optPtr
->symbolCosts
!= NULL
);
119 if (optPtr
->symbolCosts
->huf
.repeatMode
== HUF_repeat_valid
) {
120 /* huffman table presumed generated by dictionary */
121 optPtr
->priceType
= zop_dynamic
;
123 if (compressedLiterals
) {
125 assert(optPtr
->litFreq
!= NULL
);
127 for (lit
=0; lit
<=MaxLit
; lit
++) {
128 U32
const scaleLog
= 11; /* scale to 2K */
129 U32
const bitCost
= HUF_getNbBits(optPtr
->symbolCosts
->huf
.CTable
, lit
);
130 assert(bitCost
<= scaleLog
);
131 optPtr
->litFreq
[lit
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
132 optPtr
->litSum
+= optPtr
->litFreq
[lit
];
136 FSE_CState_t llstate
;
137 FSE_initCState(&llstate
, optPtr
->symbolCosts
->fse
.litlengthCTable
);
138 optPtr
->litLengthSum
= 0;
139 for (ll
=0; ll
<=MaxLL
; ll
++) {
140 U32
const scaleLog
= 10; /* scale to 1K */
141 U32
const bitCost
= FSE_getMaxNbBits(llstate
.symbolTT
, ll
);
142 assert(bitCost
< scaleLog
);
143 optPtr
->litLengthFreq
[ll
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
144 optPtr
->litLengthSum
+= optPtr
->litLengthFreq
[ll
];
148 FSE_CState_t mlstate
;
149 FSE_initCState(&mlstate
, optPtr
->symbolCosts
->fse
.matchlengthCTable
);
150 optPtr
->matchLengthSum
= 0;
151 for (ml
=0; ml
<=MaxML
; ml
++) {
152 U32
const scaleLog
= 10;
153 U32
const bitCost
= FSE_getMaxNbBits(mlstate
.symbolTT
, ml
);
154 assert(bitCost
< scaleLog
);
155 optPtr
->matchLengthFreq
[ml
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
156 optPtr
->matchLengthSum
+= optPtr
->matchLengthFreq
[ml
];
160 FSE_CState_t ofstate
;
161 FSE_initCState(&ofstate
, optPtr
->symbolCosts
->fse
.offcodeCTable
);
162 optPtr
->offCodeSum
= 0;
163 for (of
=0; of
<=MaxOff
; of
++) {
164 U32
const scaleLog
= 10;
165 U32
const bitCost
= FSE_getMaxNbBits(ofstate
.symbolTT
, of
);
166 assert(bitCost
< scaleLog
);
167 optPtr
->offCodeFreq
[of
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
168 optPtr
->offCodeSum
+= optPtr
->offCodeFreq
[of
];
171 } else { /* not a dictionary */
173 assert(optPtr
->litFreq
!= NULL
);
174 if (compressedLiterals
) {
175 unsigned lit
= MaxLit
;
176 HIST_count_simple(optPtr
->litFreq
, &lit
, src
, srcSize
); /* use raw first block to init statistics */
177 optPtr
->litSum
= ZSTD_downscaleStat(optPtr
->litFreq
, MaxLit
, 1);
181 for (ll
=0; ll
<=MaxLL
; ll
++)
182 optPtr
->litLengthFreq
[ll
] = 1;
184 optPtr
->litLengthSum
= MaxLL
+1;
187 for (ml
=0; ml
<=MaxML
; ml
++)
188 optPtr
->matchLengthFreq
[ml
] = 1;
190 optPtr
->matchLengthSum
= MaxML
+1;
193 for (of
=0; of
<=MaxOff
; of
++)
194 optPtr
->offCodeFreq
[of
] = 1;
196 optPtr
->offCodeSum
= MaxOff
+1;
200 } else { /* new block : re-use previous statistics, scaled down */
202 if (compressedLiterals
)
203 optPtr
->litSum
= ZSTD_downscaleStat(optPtr
->litFreq
, MaxLit
, 1);
204 optPtr
->litLengthSum
= ZSTD_downscaleStat(optPtr
->litLengthFreq
, MaxLL
, 0);
205 optPtr
->matchLengthSum
= ZSTD_downscaleStat(optPtr
->matchLengthFreq
, MaxML
, 0);
206 optPtr
->offCodeSum
= ZSTD_downscaleStat(optPtr
->offCodeFreq
, MaxOff
, 0);
209 ZSTD_setBasePrices(optPtr
, optLevel
);
212 /* ZSTD_rawLiteralsCost() :
213 * price of literals (only) in specified segment (which length can be 0).
214 * does not include price of literalLength symbol */
215 static U32
ZSTD_rawLiteralsCost(const BYTE
* const literals
, U32
const litLength
,
216 const optState_t
* const optPtr
,
219 if (litLength
== 0) return 0;
221 if (!ZSTD_compressedLiterals(optPtr
))
222 return (litLength
<< 3) * BITCOST_MULTIPLIER
; /* Uncompressed - 8 bytes per literal. */
224 if (optPtr
->priceType
== zop_predef
)
225 return (litLength
*6) * BITCOST_MULTIPLIER
; /* 6 bit per literal - no statistic used */
227 /* dynamic statistics */
228 { U32 price
= litLength
* optPtr
->litSumBasePrice
;
230 for (u
=0; u
< litLength
; u
++) {
231 assert(WEIGHT(optPtr
->litFreq
[literals
[u
]], optLevel
) <= optPtr
->litSumBasePrice
); /* literal cost should never be negative */
232 price
-= WEIGHT(optPtr
->litFreq
[literals
[u
]], optLevel
);
238 /* ZSTD_litLengthPrice() :
239 * cost of literalLength symbol */
240 static U32
ZSTD_litLengthPrice(U32
const litLength
, const optState_t
* const optPtr
, int optLevel
)
242 if (optPtr
->priceType
== zop_predef
) return WEIGHT(litLength
, optLevel
);
244 /* dynamic statistics */
245 { U32
const llCode
= ZSTD_LLcode(litLength
);
246 return (LL_bits
[llCode
] * BITCOST_MULTIPLIER
)
247 + optPtr
->litLengthSumBasePrice
248 - WEIGHT(optPtr
->litLengthFreq
[llCode
], optLevel
);
252 /* ZSTD_getMatchPrice() :
253 * Provides the cost of the match part (offset + matchLength) of a sequence
254 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
255 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
256 FORCE_INLINE_TEMPLATE U32
257 ZSTD_getMatchPrice(U32
const offset
,
258 U32
const matchLength
,
259 const optState_t
* const optPtr
,
263 U32
const offCode
= ZSTD_highbit32(offset
+1);
264 U32
const mlBase
= matchLength
- MINMATCH
;
265 assert(matchLength
>= MINMATCH
);
267 if (optPtr
->priceType
== zop_predef
) /* fixed scheme, do not use statistics */
268 return WEIGHT(mlBase
, optLevel
) + ((16 + offCode
) * BITCOST_MULTIPLIER
);
270 /* dynamic statistics */
271 price
= (offCode
* BITCOST_MULTIPLIER
) + (optPtr
->offCodeSumBasePrice
- WEIGHT(optPtr
->offCodeFreq
[offCode
], optLevel
));
272 if ((optLevel
<2) /*static*/ && offCode
>= 20)
273 price
+= (offCode
-19)*2 * BITCOST_MULTIPLIER
; /* handicap for long distance offsets, favor decompression speed */
276 { U32
const mlCode
= ZSTD_MLcode(mlBase
);
277 price
+= (ML_bits
[mlCode
] * BITCOST_MULTIPLIER
) + (optPtr
->matchLengthSumBasePrice
- WEIGHT(optPtr
->matchLengthFreq
[mlCode
], optLevel
));
280 price
+= BITCOST_MULTIPLIER
/ 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
282 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength
, price
);
286 /* ZSTD_updateStats() :
287 * assumption : literals + litLengtn <= iend */
288 static void ZSTD_updateStats(optState_t
* const optPtr
,
289 U32 litLength
, const BYTE
* literals
,
290 U32 offsetCode
, U32 matchLength
)
293 if (ZSTD_compressedLiterals(optPtr
)) {
295 for (u
=0; u
< litLength
; u
++)
296 optPtr
->litFreq
[literals
[u
]] += ZSTD_LITFREQ_ADD
;
297 optPtr
->litSum
+= litLength
*ZSTD_LITFREQ_ADD
;
301 { U32
const llCode
= ZSTD_LLcode(litLength
);
302 optPtr
->litLengthFreq
[llCode
]++;
303 optPtr
->litLengthSum
++;
306 /* match offset code (0-2=>repCode; 3+=>offset+2) */
307 { U32
const offCode
= ZSTD_highbit32(offsetCode
+1);
308 assert(offCode
<= MaxOff
);
309 optPtr
->offCodeFreq
[offCode
]++;
310 optPtr
->offCodeSum
++;
314 { U32
const mlBase
= matchLength
- MINMATCH
;
315 U32
const mlCode
= ZSTD_MLcode(mlBase
);
316 optPtr
->matchLengthFreq
[mlCode
]++;
317 optPtr
->matchLengthSum
++;
322 /* ZSTD_readMINMATCH() :
323 * function safe only for comparisons
324 * assumption : memPtr must be at least 4 bytes before end of buffer */
325 MEM_STATIC U32
ZSTD_readMINMATCH(const void* memPtr
, U32 length
)
330 case 4 : return MEM_read32(memPtr
);
331 case 3 : if (MEM_isLittleEndian())
332 return MEM_read32(memPtr
)<<8;
334 return MEM_read32(memPtr
)>>8;
339 /* Update hashTable3 up to ip (excluded)
340 Assumption : always within prefix (i.e. not within extDict) */
341 static U32
ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t
* ms
,
343 const BYTE
* const ip
)
345 U32
* const hashTable3
= ms
->hashTable3
;
346 U32
const hashLog3
= ms
->hashLog3
;
347 const BYTE
* const base
= ms
->window
.base
;
348 U32 idx
= *nextToUpdate3
;
349 U32
const target
= (U32
)(ip
- base
);
350 size_t const hash3
= ZSTD_hash3Ptr(ip
, hashLog3
);
351 assert(hashLog3
> 0);
353 while(idx
< target
) {
354 hashTable3
[ZSTD_hash3Ptr(base
+idx
, hashLog3
)] = idx
;
358 *nextToUpdate3
= target
;
359 return hashTable3
[hash3
];
363 /*-*************************************
365 ***************************************/
366 /** ZSTD_insertBt1() : add one or multiple positions to tree.
367 * ip : assumed <= iend-8 .
368 * @return : nb of positions added */
369 static U32
ZSTD_insertBt1(
370 ZSTD_matchState_t
* ms
,
371 const BYTE
* const ip
, const BYTE
* const iend
,
372 U32
const mls
, const int extDict
)
374 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
375 U32
* const hashTable
= ms
->hashTable
;
376 U32
const hashLog
= cParams
->hashLog
;
377 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
378 U32
* const bt
= ms
->chainTable
;
379 U32
const btLog
= cParams
->chainLog
- 1;
380 U32
const btMask
= (1 << btLog
) - 1;
381 U32 matchIndex
= hashTable
[h
];
382 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
383 const BYTE
* const base
= ms
->window
.base
;
384 const BYTE
* const dictBase
= ms
->window
.dictBase
;
385 const U32 dictLimit
= ms
->window
.dictLimit
;
386 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
387 const BYTE
* const prefixStart
= base
+ dictLimit
;
389 const U32 current
= (U32
)(ip
-base
);
390 const U32 btLow
= btMask
>= current
? 0 : current
- btMask
;
391 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
392 U32
* largerPtr
= smallerPtr
+ 1;
393 U32 dummy32
; /* to be nullified at the end */
394 U32
const windowLow
= ms
->window
.lowLimit
;
395 U32 matchEndIdx
= current
+8+1;
396 size_t bestLength
= 8;
397 U32 nbCompares
= 1U << cParams
->searchLog
;
398 #ifdef ZSTD_C_PREDICT
399 U32 predictedSmall
= *(bt
+ 2*((current
-1)&btMask
) + 0);
400 U32 predictedLarge
= *(bt
+ 2*((current
-1)&btMask
) + 1);
401 predictedSmall
+= (predictedSmall
>0);
402 predictedLarge
+= (predictedLarge
>0);
403 #endif /* ZSTD_C_PREDICT */
405 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current
);
407 assert(ip
<= iend
-8); /* required for h calculation */
408 hashTable
[h
] = current
; /* Update Hash Table */
410 assert(windowLow
> 0);
411 while (nbCompares
-- && (matchIndex
>= windowLow
)) {
412 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
413 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
414 assert(matchIndex
< current
);
416 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
417 const U32
* predictPtr
= bt
+ 2*((matchIndex
-1) & btMask
); /* written this way, as bt is a roll buffer */
418 if (matchIndex
== predictedSmall
) {
419 /* no need to check length, result known */
420 *smallerPtr
= matchIndex
;
421 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
422 smallerPtr
= nextPtr
+1; /* new "smaller" => larger of match */
423 matchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
424 predictedSmall
= predictPtr
[1] + (predictPtr
[1]>0);
427 if (matchIndex
== predictedLarge
) {
428 *largerPtr
= matchIndex
;
429 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
431 matchIndex
= nextPtr
[0];
432 predictedLarge
= predictPtr
[0] + (predictPtr
[0]>0);
437 if (!extDict
|| (matchIndex
+matchLength
>= dictLimit
)) {
438 assert(matchIndex
+matchLength
>= dictLimit
); /* might be wrong if actually extDict */
439 match
= base
+ matchIndex
;
440 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iend
);
442 match
= dictBase
+ matchIndex
;
443 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iend
, dictEnd
, prefixStart
);
444 if (matchIndex
+matchLength
>= dictLimit
)
445 match
= base
+ matchIndex
; /* to prepare for next usage of match[matchLength] */
448 if (matchLength
> bestLength
) {
449 bestLength
= matchLength
;
450 if (matchLength
> matchEndIdx
- matchIndex
)
451 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
454 if (ip
+matchLength
== iend
) { /* equal : no way to know if inf or sup */
455 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
458 if (match
[matchLength
] < ip
[matchLength
]) { /* necessarily within buffer */
459 /* match is smaller than current */
460 *smallerPtr
= matchIndex
; /* update smaller idx */
461 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
462 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
463 smallerPtr
= nextPtr
+1; /* new "candidate" => larger than match, which was smaller than target */
464 matchIndex
= nextPtr
[1]; /* new matchIndex, larger than previous and closer to current */
466 /* match is larger than current */
467 *largerPtr
= matchIndex
;
468 commonLengthLarger
= matchLength
;
469 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
471 matchIndex
= nextPtr
[0];
474 *smallerPtr
= *largerPtr
= 0;
476 if (bestLength
> 384) positions
= MIN(192, (U32
)(bestLength
- 384)); /* speed optimization */
477 assert(matchEndIdx
> current
+ 8);
478 return MAX(positions
, matchEndIdx
- (current
+ 8));
482 FORCE_INLINE_TEMPLATE
483 void ZSTD_updateTree_internal(
484 ZSTD_matchState_t
* ms
,
485 const BYTE
* const ip
, const BYTE
* const iend
,
486 const U32 mls
, const ZSTD_dictMode_e dictMode
)
488 const BYTE
* const base
= ms
->window
.base
;
489 U32
const target
= (U32
)(ip
- base
);
490 U32 idx
= ms
->nextToUpdate
;
491 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
492 idx
, target
, dictMode
);
494 while(idx
< target
) {
495 U32
const forward
= ZSTD_insertBt1(ms
, base
+idx
, iend
, mls
, dictMode
== ZSTD_extDict
);
496 assert(idx
< (U32
)(idx
+ forward
));
499 assert((size_t)(ip
- base
) <= (size_t)(U32
)(-1));
500 assert((size_t)(iend
- base
) <= (size_t)(U32
)(-1));
501 ms
->nextToUpdate
= target
;
504 void ZSTD_updateTree(ZSTD_matchState_t
* ms
, const BYTE
* ip
, const BYTE
* iend
) {
505 ZSTD_updateTree_internal(ms
, ip
, iend
, ms
->cParams
.minMatch
, ZSTD_noDict
);
508 FORCE_INLINE_TEMPLATE
509 U32
ZSTD_insertBtAndGetAllMatches (
510 ZSTD_match_t
* matches
, /* store result (found matches) in this table (presumed large enough) */
511 ZSTD_matchState_t
* ms
,
513 const BYTE
* const ip
, const BYTE
* const iLimit
, const ZSTD_dictMode_e dictMode
,
514 const U32 rep
[ZSTD_REP_NUM
],
515 U32
const ll0
, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
516 const U32 lengthToBeat
,
517 U32
const mls
/* template */)
519 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
520 U32
const sufficient_len
= MIN(cParams
->targetLength
, ZSTD_OPT_NUM
-1);
521 const BYTE
* const base
= ms
->window
.base
;
522 U32
const current
= (U32
)(ip
-base
);
523 U32
const hashLog
= cParams
->hashLog
;
524 U32
const minMatch
= (mls
==3) ? 3 : 4;
525 U32
* const hashTable
= ms
->hashTable
;
526 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
527 U32 matchIndex
= hashTable
[h
];
528 U32
* const bt
= ms
->chainTable
;
529 U32
const btLog
= cParams
->chainLog
- 1;
530 U32
const btMask
= (1U << btLog
) - 1;
531 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
532 const BYTE
* const dictBase
= ms
->window
.dictBase
;
533 U32
const dictLimit
= ms
->window
.dictLimit
;
534 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
535 const BYTE
* const prefixStart
= base
+ dictLimit
;
536 U32
const btLow
= (btMask
>= current
) ? 0 : current
- btMask
;
537 U32
const windowLow
= ZSTD_getLowestMatchIndex(ms
, current
, cParams
->windowLog
);
538 U32
const matchLow
= windowLow
? windowLow
: 1;
539 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
540 U32
* largerPtr
= bt
+ 2*(current
&btMask
) + 1;
541 U32 matchEndIdx
= current
+8+1; /* farthest referenced position of any match => detects repetitive patterns */
542 U32 dummy32
; /* to be nullified at the end */
544 U32 nbCompares
= 1U << cParams
->searchLog
;
546 const ZSTD_matchState_t
* dms
= dictMode
== ZSTD_dictMatchState
? ms
->dictMatchState
: NULL
;
547 const ZSTD_compressionParameters
* const dmsCParams
=
548 dictMode
== ZSTD_dictMatchState
? &dms
->cParams
: NULL
;
549 const BYTE
* const dmsBase
= dictMode
== ZSTD_dictMatchState
? dms
->window
.base
: NULL
;
550 const BYTE
* const dmsEnd
= dictMode
== ZSTD_dictMatchState
? dms
->window
.nextSrc
: NULL
;
551 U32
const dmsHighLimit
= dictMode
== ZSTD_dictMatchState
? (U32
)(dmsEnd
- dmsBase
) : 0;
552 U32
const dmsLowLimit
= dictMode
== ZSTD_dictMatchState
? dms
->window
.lowLimit
: 0;
553 U32
const dmsIndexDelta
= dictMode
== ZSTD_dictMatchState
? windowLow
- dmsHighLimit
: 0;
554 U32
const dmsHashLog
= dictMode
== ZSTD_dictMatchState
? dmsCParams
->hashLog
: hashLog
;
555 U32
const dmsBtLog
= dictMode
== ZSTD_dictMatchState
? dmsCParams
->chainLog
- 1 : btLog
;
556 U32
const dmsBtMask
= dictMode
== ZSTD_dictMatchState
? (1U << dmsBtLog
) - 1 : 0;
557 U32
const dmsBtLow
= dictMode
== ZSTD_dictMatchState
&& dmsBtMask
< dmsHighLimit
- dmsLowLimit
? dmsHighLimit
- dmsBtMask
: dmsLowLimit
;
559 size_t bestLength
= lengthToBeat
-1;
560 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current
);
563 assert(ll0
<= 1); /* necessarily 1 or 0 */
564 { U32
const lastR
= ZSTD_REP_NUM
+ ll0
;
566 for (repCode
= ll0
; repCode
< lastR
; repCode
++) {
567 U32
const repOffset
= (repCode
==ZSTD_REP_NUM
) ? (rep
[0] - 1) : rep
[repCode
];
568 U32
const repIndex
= current
- repOffset
;
570 assert(current
>= dictLimit
);
571 if (repOffset
-1 /* intentional overflow, discards 0 and -1 */ < current
-dictLimit
) { /* equivalent to `current > repIndex >= dictLimit` */
572 /* We must validate the repcode offset because when we're using a dictionary the
573 * valid offset range shrinks when the dictionary goes out of bounds.
575 if ((repIndex
>= windowLow
) & (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(ip
- repOffset
, minMatch
))) {
576 repLen
= (U32
)ZSTD_count(ip
+minMatch
, ip
+minMatch
-repOffset
, iLimit
) + minMatch
;
578 } else { /* repIndex < dictLimit || repIndex >= current */
579 const BYTE
* const repMatch
= dictMode
== ZSTD_dictMatchState
?
580 dmsBase
+ repIndex
- dmsIndexDelta
:
582 assert(current
>= windowLow
);
583 if ( dictMode
== ZSTD_extDict
584 && ( ((repOffset
-1) /*intentional overflow*/ < current
- windowLow
) /* equivalent to `current > repIndex >= windowLow` */
585 & (((U32
)((dictLimit
-1) - repIndex
) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
586 && (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
)) ) {
587 repLen
= (U32
)ZSTD_count_2segments(ip
+minMatch
, repMatch
+minMatch
, iLimit
, dictEnd
, prefixStart
) + minMatch
;
589 if (dictMode
== ZSTD_dictMatchState
590 && ( ((repOffset
-1) /*intentional overflow*/ < current
- (dmsLowLimit
+ dmsIndexDelta
)) /* equivalent to `current > repIndex >= dmsLowLimit` */
591 & ((U32
)((dictLimit
-1) - repIndex
) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
592 && (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
)) ) {
593 repLen
= (U32
)ZSTD_count_2segments(ip
+minMatch
, repMatch
+minMatch
, iLimit
, dmsEnd
, prefixStart
) + minMatch
;
595 /* save longer solution */
596 if (repLen
> bestLength
) {
597 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
598 repCode
, ll0
, repOffset
, repLen
);
600 matches
[mnum
].off
= repCode
- ll0
;
601 matches
[mnum
].len
= (U32
)repLen
;
603 if ( (repLen
> sufficient_len
)
604 | (ip
+repLen
== iLimit
) ) { /* best possible */
608 /* HC3 match finder */
609 if ((mls
== 3) /*static*/ && (bestLength
< mls
)) {
610 U32
const matchIndex3
= ZSTD_insertAndFindFirstIndexHash3(ms
, nextToUpdate3
, ip
);
611 if ((matchIndex3
>= matchLow
)
612 & (current
- matchIndex3
< (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
614 if ((dictMode
== ZSTD_noDict
) /*static*/ || (dictMode
== ZSTD_dictMatchState
) /*static*/ || (matchIndex3
>= dictLimit
)) {
615 const BYTE
* const match
= base
+ matchIndex3
;
616 mlen
= ZSTD_count(ip
, match
, iLimit
);
618 const BYTE
* const match
= dictBase
+ matchIndex3
;
619 mlen
= ZSTD_count_2segments(ip
, match
, iLimit
, dictEnd
, prefixStart
);
622 /* save best solution */
623 if (mlen
>= mls
/* == 3 > bestLength */) {
624 DEBUGLOG(8, "found small match with hlog3, of length %u",
627 assert(current
> matchIndex3
);
628 assert(mnum
==0); /* no prior solution */
629 matches
[0].off
= (current
- matchIndex3
) + ZSTD_REP_MOVE
;
630 matches
[0].len
= (U32
)mlen
;
632 if ( (mlen
> sufficient_len
) |
633 (ip
+mlen
== iLimit
) ) { /* best possible length */
634 ms
->nextToUpdate
= current
+1; /* skip insertion */
637 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
640 hashTable
[h
] = current
; /* Update Hash Table */
642 while (nbCompares
-- && (matchIndex
>= matchLow
)) {
643 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
645 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
646 assert(current
> matchIndex
);
648 if ((dictMode
== ZSTD_noDict
) || (dictMode
== ZSTD_dictMatchState
) || (matchIndex
+matchLength
>= dictLimit
)) {
649 assert(matchIndex
+matchLength
>= dictLimit
); /* ensure the condition is correct when !extDict */
650 match
= base
+ matchIndex
;
651 if (matchIndex
>= dictLimit
) assert(memcmp(match
, ip
, matchLength
) == 0); /* ensure early section of match is equal as expected */
652 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iLimit
);
654 match
= dictBase
+ matchIndex
;
655 assert(memcmp(match
, ip
, matchLength
) == 0); /* ensure early section of match is equal as expected */
656 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iLimit
, dictEnd
, prefixStart
);
657 if (matchIndex
+matchLength
>= dictLimit
)
658 match
= base
+ matchIndex
; /* prepare for match[matchLength] read */
661 if (matchLength
> bestLength
) {
662 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
663 (U32
)matchLength
, current
- matchIndex
, current
- matchIndex
+ ZSTD_REP_MOVE
);
664 assert(matchEndIdx
> matchIndex
);
665 if (matchLength
> matchEndIdx
- matchIndex
)
666 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
667 bestLength
= matchLength
;
668 matches
[mnum
].off
= (current
- matchIndex
) + ZSTD_REP_MOVE
;
669 matches
[mnum
].len
= (U32
)matchLength
;
671 if ( (matchLength
> ZSTD_OPT_NUM
)
672 | (ip
+matchLength
== iLimit
) /* equal : no way to know if inf or sup */) {
673 if (dictMode
== ZSTD_dictMatchState
) nbCompares
= 0; /* break should also skip searching dms */
674 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
678 if (match
[matchLength
] < ip
[matchLength
]) {
679 /* match smaller than current */
680 *smallerPtr
= matchIndex
; /* update smaller idx */
681 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
682 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
683 smallerPtr
= nextPtr
+1; /* new candidate => larger than match, which was smaller than current */
684 matchIndex
= nextPtr
[1]; /* new matchIndex, larger than previous, closer to current */
686 *largerPtr
= matchIndex
;
687 commonLengthLarger
= matchLength
;
688 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
690 matchIndex
= nextPtr
[0];
693 *smallerPtr
= *largerPtr
= 0;
695 if (dictMode
== ZSTD_dictMatchState
&& nbCompares
) {
696 size_t const dmsH
= ZSTD_hashPtr(ip
, dmsHashLog
, mls
);
697 U32 dictMatchIndex
= dms
->hashTable
[dmsH
];
698 const U32
* const dmsBt
= dms
->chainTable
;
699 commonLengthSmaller
= commonLengthLarger
= 0;
700 while (nbCompares
-- && (dictMatchIndex
> dmsLowLimit
)) {
701 const U32
* const nextPtr
= dmsBt
+ 2*(dictMatchIndex
& dmsBtMask
);
702 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
703 const BYTE
* match
= dmsBase
+ dictMatchIndex
;
704 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iLimit
, dmsEnd
, prefixStart
);
705 if (dictMatchIndex
+matchLength
>= dmsHighLimit
)
706 match
= base
+ dictMatchIndex
+ dmsIndexDelta
; /* to prepare for next usage of match[matchLength] */
708 if (matchLength
> bestLength
) {
709 matchIndex
= dictMatchIndex
+ dmsIndexDelta
;
710 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
711 (U32
)matchLength
, current
- matchIndex
, current
- matchIndex
+ ZSTD_REP_MOVE
);
712 if (matchLength
> matchEndIdx
- matchIndex
)
713 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
714 bestLength
= matchLength
;
715 matches
[mnum
].off
= (current
- matchIndex
) + ZSTD_REP_MOVE
;
716 matches
[mnum
].len
= (U32
)matchLength
;
718 if ( (matchLength
> ZSTD_OPT_NUM
)
719 | (ip
+matchLength
== iLimit
) /* equal : no way to know if inf or sup */) {
720 break; /* drop, to guarantee consistency (miss a little bit of compression) */
724 if (dictMatchIndex
<= dmsBtLow
) { break; } /* beyond tree size, stop the search */
725 if (match
[matchLength
] < ip
[matchLength
]) {
726 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
727 dictMatchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
729 /* match is larger than current */
730 commonLengthLarger
= matchLength
;
731 dictMatchIndex
= nextPtr
[0];
736 assert(matchEndIdx
> current
+8);
737 ms
->nextToUpdate
= matchEndIdx
- 8; /* skip repetitive patterns */
742 FORCE_INLINE_TEMPLATE U32
ZSTD_BtGetAllMatches (
743 ZSTD_match_t
* matches
, /* store result (match found, increasing size) in this table */
744 ZSTD_matchState_t
* ms
,
746 const BYTE
* ip
, const BYTE
* const iHighLimit
, const ZSTD_dictMode_e dictMode
,
747 const U32 rep
[ZSTD_REP_NUM
],
749 U32
const lengthToBeat
)
751 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
752 U32
const matchLengthSearch
= cParams
->minMatch
;
753 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
754 if (ip
< ms
->window
.base
+ ms
->nextToUpdate
) return 0; /* skipped area */
755 ZSTD_updateTree_internal(ms
, ip
, iHighLimit
, matchLengthSearch
, dictMode
);
756 switch(matchLengthSearch
)
758 case 3 : return ZSTD_insertBtAndGetAllMatches(matches
, ms
, nextToUpdate3
, ip
, iHighLimit
, dictMode
, rep
, ll0
, lengthToBeat
, 3);
760 case 4 : return ZSTD_insertBtAndGetAllMatches(matches
, ms
, nextToUpdate3
, ip
, iHighLimit
, dictMode
, rep
, ll0
, lengthToBeat
, 4);
761 case 5 : return ZSTD_insertBtAndGetAllMatches(matches
, ms
, nextToUpdate3
, ip
, iHighLimit
, dictMode
, rep
, ll0
, lengthToBeat
, 5);
763 case 6 : return ZSTD_insertBtAndGetAllMatches(matches
, ms
, nextToUpdate3
, ip
, iHighLimit
, dictMode
, rep
, ll0
, lengthToBeat
, 6);
768 /*-*******************************
770 *********************************/
773 static U32
ZSTD_totalLen(ZSTD_optimal_t sol
)
775 return sol
.litlen
+ sol
.mlen
;
781 listStats(const U32
* table
, int lastEltID
)
783 int const nbElts
= lastEltID
+ 1;
785 for (enb
=0; enb
< nbElts
; enb
++) {
787 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
788 RAWLOG(2, "%4i,", table
[enb
]);
795 FORCE_INLINE_TEMPLATE
size_t
796 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t
* ms
,
797 seqStore_t
* seqStore
,
798 U32 rep
[ZSTD_REP_NUM
],
799 const void* src
, size_t srcSize
,
801 const ZSTD_dictMode_e dictMode
)
803 optState_t
* const optStatePtr
= &ms
->opt
;
804 const BYTE
* const istart
= (const BYTE
*)src
;
805 const BYTE
* ip
= istart
;
806 const BYTE
* anchor
= istart
;
807 const BYTE
* const iend
= istart
+ srcSize
;
808 const BYTE
* const ilimit
= iend
- 8;
809 const BYTE
* const base
= ms
->window
.base
;
810 const BYTE
* const prefixStart
= base
+ ms
->window
.dictLimit
;
811 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
813 U32
const sufficient_len
= MIN(cParams
->targetLength
, ZSTD_OPT_NUM
-1);
814 U32
const minMatch
= (cParams
->minMatch
== 3) ? 3 : 4;
815 U32 nextToUpdate3
= ms
->nextToUpdate
;
817 ZSTD_optimal_t
* const opt
= optStatePtr
->priceTable
;
818 ZSTD_match_t
* const matches
= optStatePtr
->matchTable
;
819 ZSTD_optimal_t lastSequence
;
822 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
823 (U32
)(ip
- base
), ms
->window
.dictLimit
, ms
->nextToUpdate
);
824 assert(optLevel
<= 2);
825 ZSTD_rescaleFreqs(optStatePtr
, (const BYTE
*)src
, srcSize
, optLevel
);
826 ip
+= (ip
==prefixStart
);
829 while (ip
< ilimit
) {
830 U32 cur
, last_pos
= 0;
832 /* find first match */
833 { U32
const litlen
= (U32
)(ip
- anchor
);
834 U32
const ll0
= !litlen
;
835 U32
const nbMatches
= ZSTD_BtGetAllMatches(matches
, ms
, &nextToUpdate3
, ip
, iend
, dictMode
, rep
, ll0
, minMatch
);
836 if (!nbMatches
) { ip
++; continue; }
838 /* initialize opt[0] */
839 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) opt
[0].rep
[i
] = rep
[i
]; }
840 opt
[0].mlen
= 0; /* means is_a_literal */
841 opt
[0].litlen
= litlen
;
842 /* We don't need to include the actual price of the literals because
843 * it is static for the duration of the forward pass, and is included
844 * in every price. We include the literal length to avoid negative
845 * prices when we subtract the previous literal length.
847 opt
[0].price
= ZSTD_litLengthPrice(litlen
, optStatePtr
, optLevel
);
849 /* large match -> immediate encoding */
850 { U32
const maxML
= matches
[nbMatches
-1].len
;
851 U32
const maxOffset
= matches
[nbMatches
-1].off
;
852 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
853 nbMatches
, maxML
, maxOffset
, (U32
)(ip
-prefixStart
));
855 if (maxML
> sufficient_len
) {
856 lastSequence
.litlen
= litlen
;
857 lastSequence
.mlen
= maxML
;
858 lastSequence
.off
= maxOffset
;
859 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
860 maxML
, sufficient_len
);
862 last_pos
= ZSTD_totalLen(lastSequence
);
866 /* set prices for first matches starting position == 0 */
867 { U32
const literalsPrice
= opt
[0].price
+ ZSTD_litLengthPrice(0, optStatePtr
, optLevel
);
870 for (pos
= 1; pos
< minMatch
; pos
++) {
871 opt
[pos
].price
= ZSTD_MAX_PRICE
; /* mlen, litlen and price will be fixed during forward scanning */
873 for (matchNb
= 0; matchNb
< nbMatches
; matchNb
++) {
874 U32
const offset
= matches
[matchNb
].off
;
875 U32
const end
= matches
[matchNb
].len
;
876 for ( ; pos
<= end
; pos
++ ) {
877 U32
const matchPrice
= ZSTD_getMatchPrice(offset
, pos
, optStatePtr
, optLevel
);
878 U32
const sequencePrice
= literalsPrice
+ matchPrice
;
879 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
880 pos
, ZSTD_fCost(sequencePrice
));
882 opt
[pos
].off
= offset
;
883 opt
[pos
].litlen
= litlen
;
884 opt
[pos
].price
= sequencePrice
;
890 /* check further positions */
891 for (cur
= 1; cur
<= last_pos
; cur
++) {
892 const BYTE
* const inr
= ip
+ cur
;
893 assert(cur
< ZSTD_OPT_NUM
);
894 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr
-istart
, cur
)
896 /* Fix current position with one literal if cheaper */
897 { U32
const litlen
= (opt
[cur
-1].mlen
== 0) ? opt
[cur
-1].litlen
+ 1 : 1;
898 int const price
= opt
[cur
-1].price
899 + ZSTD_rawLiteralsCost(ip
+cur
-1, 1, optStatePtr
, optLevel
)
900 + ZSTD_litLengthPrice(litlen
, optStatePtr
, optLevel
)
901 - ZSTD_litLengthPrice(litlen
-1, optStatePtr
, optLevel
);
902 assert(price
< 1000000000); /* overflow check */
903 if (price
<= opt
[cur
].price
) {
904 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
905 inr
-istart
, cur
, ZSTD_fCost(price
), ZSTD_fCost(opt
[cur
].price
), litlen
,
906 opt
[cur
-1].rep
[0], opt
[cur
-1].rep
[1], opt
[cur
-1].rep
[2]);
909 opt
[cur
].litlen
= litlen
;
910 opt
[cur
].price
= price
;
912 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
913 inr
-istart
, cur
, ZSTD_fCost(price
), ZSTD_fCost(opt
[cur
].price
),
914 opt
[cur
].rep
[0], opt
[cur
].rep
[1], opt
[cur
].rep
[2]);
918 /* Set the repcodes of the current position. We must do it here
919 * because we rely on the repcodes of the 2nd to last sequence being
920 * correct to set the next chunks repcodes during the backward
923 ZSTD_STATIC_ASSERT(sizeof(opt
[cur
].rep
) == sizeof(repcodes_t
));
924 assert(cur
>= opt
[cur
].mlen
);
925 if (opt
[cur
].mlen
!= 0) {
926 U32
const prev
= cur
- opt
[cur
].mlen
;
927 repcodes_t newReps
= ZSTD_updateRep(opt
[prev
].rep
, opt
[cur
].off
, opt
[cur
].litlen
==0);
928 memcpy(opt
[cur
].rep
, &newReps
, sizeof(repcodes_t
));
930 memcpy(opt
[cur
].rep
, opt
[cur
- 1].rep
, sizeof(repcodes_t
));
933 /* last match must start at a minimum distance of 8 from oend */
934 if (inr
> ilimit
) continue;
936 if (cur
== last_pos
) break;
938 if ( (optLevel
==0) /*static_test*/
939 && (opt
[cur
+1].price
<= opt
[cur
].price
+ (BITCOST_MULTIPLIER
/2)) ) {
940 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur
+1);
941 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
944 { U32
const ll0
= (opt
[cur
].mlen
!= 0);
945 U32
const litlen
= (opt
[cur
].mlen
== 0) ? opt
[cur
].litlen
: 0;
946 U32
const previousPrice
= opt
[cur
].price
;
947 U32
const basePrice
= previousPrice
+ ZSTD_litLengthPrice(0, optStatePtr
, optLevel
);
948 U32
const nbMatches
= ZSTD_BtGetAllMatches(matches
, ms
, &nextToUpdate3
, inr
, iend
, dictMode
, opt
[cur
].rep
, ll0
, minMatch
);
951 DEBUGLOG(7, "rPos:%u : no match found", cur
);
955 { U32
const maxML
= matches
[nbMatches
-1].len
;
956 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
957 inr
-istart
, cur
, nbMatches
, maxML
);
959 if ( (maxML
> sufficient_len
)
960 || (cur
+ maxML
>= ZSTD_OPT_NUM
) ) {
961 lastSequence
.mlen
= maxML
;
962 lastSequence
.off
= matches
[nbMatches
-1].off
;
963 lastSequence
.litlen
= litlen
;
964 cur
-= (opt
[cur
].mlen
==0) ? opt
[cur
].litlen
: 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
965 last_pos
= cur
+ ZSTD_totalLen(lastSequence
);
966 if (cur
> ZSTD_OPT_NUM
) cur
= 0; /* underflow => first match */
970 /* set prices using matches found at position == cur */
971 for (matchNb
= 0; matchNb
< nbMatches
; matchNb
++) {
972 U32
const offset
= matches
[matchNb
].off
;
973 U32
const lastML
= matches
[matchNb
].len
;
974 U32
const startML
= (matchNb
>0) ? matches
[matchNb
-1].len
+1 : minMatch
;
977 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
978 matchNb
, matches
[matchNb
].off
, lastML
, litlen
);
980 for (mlen
= lastML
; mlen
>= startML
; mlen
--) { /* scan downward */
981 U32
const pos
= cur
+ mlen
;
982 int const price
= basePrice
+ ZSTD_getMatchPrice(offset
, mlen
, optStatePtr
, optLevel
);
984 if ((pos
> last_pos
) || (price
< opt
[pos
].price
)) {
985 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
986 pos
, mlen
, ZSTD_fCost(price
), ZSTD_fCost(opt
[pos
].price
));
987 while (last_pos
< pos
) { opt
[last_pos
+1].price
= ZSTD_MAX_PRICE
; last_pos
++; } /* fill empty positions */
988 opt
[pos
].mlen
= mlen
;
989 opt
[pos
].off
= offset
;
990 opt
[pos
].litlen
= litlen
;
991 opt
[pos
].price
= price
;
993 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
994 pos
, mlen
, ZSTD_fCost(price
), ZSTD_fCost(opt
[pos
].price
));
995 if (optLevel
==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
998 } /* for (cur = 1; cur <= last_pos; cur++) */
1000 lastSequence
= opt
[last_pos
];
1001 cur
= last_pos
> ZSTD_totalLen(lastSequence
) ? last_pos
- ZSTD_totalLen(lastSequence
) : 0; /* single sequence, and it starts before `ip` */
1002 assert(cur
< ZSTD_OPT_NUM
); /* control overflow*/
1004 _shortestPath
: /* cur, last_pos, best_mlen, best_off have to be set */
1005 assert(opt
[0].mlen
== 0);
1007 /* Set the next chunk's repcodes based on the repcodes of the beginning
1008 * of the last match, and the last sequence. This avoids us having to
1009 * update them while traversing the sequences.
1011 if (lastSequence
.mlen
!= 0) {
1012 repcodes_t reps
= ZSTD_updateRep(opt
[cur
].rep
, lastSequence
.off
, lastSequence
.litlen
==0);
1013 memcpy(rep
, &reps
, sizeof(reps
));
1015 memcpy(rep
, opt
[cur
].rep
, sizeof(repcodes_t
));
1018 { U32
const storeEnd
= cur
+ 1;
1019 U32 storeStart
= storeEnd
;
1022 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1023 last_pos
, cur
); (void)last_pos
;
1024 assert(storeEnd
< ZSTD_OPT_NUM
);
1025 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1026 storeEnd
, lastSequence
.litlen
, lastSequence
.mlen
, lastSequence
.off
);
1027 opt
[storeEnd
] = lastSequence
;
1028 while (seqPos
> 0) {
1029 U32
const backDist
= ZSTD_totalLen(opt
[seqPos
]);
1031 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1032 seqPos
, storeStart
, opt
[seqPos
].litlen
, opt
[seqPos
].mlen
, opt
[seqPos
].off
);
1033 opt
[storeStart
] = opt
[seqPos
];
1034 seqPos
= (seqPos
> backDist
) ? seqPos
- backDist
: 0;
1037 /* save sequences */
1038 DEBUGLOG(6, "sending selected sequences into seqStore")
1040 for (storePos
=storeStart
; storePos
<= storeEnd
; storePos
++) {
1041 U32
const llen
= opt
[storePos
].litlen
;
1042 U32
const mlen
= opt
[storePos
].mlen
;
1043 U32
const offCode
= opt
[storePos
].off
;
1044 U32
const advance
= llen
+ mlen
;
1045 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1046 anchor
- istart
, (unsigned)llen
, (unsigned)mlen
);
1048 if (mlen
==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1049 assert(storePos
== storeEnd
); /* must be last sequence */
1050 ip
= anchor
+ llen
; /* last "sequence" is a bunch of literals => don't progress anchor */
1051 continue; /* will finish */
1054 assert(anchor
+ llen
<= iend
);
1055 ZSTD_updateStats(optStatePtr
, llen
, anchor
, offCode
, mlen
);
1056 ZSTD_storeSeq(seqStore
, llen
, anchor
, iend
, offCode
, mlen
-MINMATCH
);
1060 ZSTD_setBasePrices(optStatePtr
, optLevel
);
1062 } /* while (ip < ilimit) */
1064 /* Return the last literals size */
1065 return (size_t)(iend
- anchor
);
1069 size_t ZSTD_compressBlock_btopt(
1070 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1071 const void* src
, size_t srcSize
)
1073 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1074 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_noDict
);
1078 /* used in 2-pass strategy */
1079 static U32
ZSTD_upscaleStat(unsigned* table
, U32 lastEltIndex
, int bonus
)
1082 assert(ZSTD_FREQ_DIV
+bonus
>= 0);
1083 for (s
=0; s
<lastEltIndex
+1; s
++) {
1084 table
[s
] <<= ZSTD_FREQ_DIV
+bonus
;
1091 /* used in 2-pass strategy */
1092 MEM_STATIC
void ZSTD_upscaleStats(optState_t
* optPtr
)
1094 if (ZSTD_compressedLiterals(optPtr
))
1095 optPtr
->litSum
= ZSTD_upscaleStat(optPtr
->litFreq
, MaxLit
, 0);
1096 optPtr
->litLengthSum
= ZSTD_upscaleStat(optPtr
->litLengthFreq
, MaxLL
, 0);
1097 optPtr
->matchLengthSum
= ZSTD_upscaleStat(optPtr
->matchLengthFreq
, MaxML
, 0);
1098 optPtr
->offCodeSum
= ZSTD_upscaleStat(optPtr
->offCodeFreq
, MaxOff
, 0);
1101 /* ZSTD_initStats_ultra():
1102 * make a first compression pass, just to seed stats with more accurate starting values.
1103 * only works on first block, with no dictionary and no ldm.
1104 * this function cannot error, hence its contract must be respected.
1107 ZSTD_initStats_ultra(ZSTD_matchState_t
* ms
,
1108 seqStore_t
* seqStore
,
1109 U32 rep
[ZSTD_REP_NUM
],
1110 const void* src
, size_t srcSize
)
1112 U32 tmpRep
[ZSTD_REP_NUM
]; /* updated rep codes will sink here */
1113 memcpy(tmpRep
, rep
, sizeof(tmpRep
));
1115 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize
);
1116 assert(ms
->opt
.litLengthSum
== 0); /* first block */
1117 assert(seqStore
->sequences
== seqStore
->sequencesStart
); /* no ldm */
1118 assert(ms
->window
.dictLimit
== ms
->window
.lowLimit
); /* no dictionary */
1119 assert(ms
->window
.dictLimit
- ms
->nextToUpdate
<= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1121 ZSTD_compressBlock_opt_generic(ms
, seqStore
, tmpRep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
); /* generate stats into ms->opt*/
1123 /* invalidate first scan from history */
1124 ZSTD_resetSeqStore(seqStore
);
1125 ms
->window
.base
-= srcSize
;
1126 ms
->window
.dictLimit
+= (U32
)srcSize
;
1127 ms
->window
.lowLimit
= ms
->window
.dictLimit
;
1128 ms
->nextToUpdate
= ms
->window
.dictLimit
;
1130 /* re-inforce weight of collected statistics */
1131 ZSTD_upscaleStats(&ms
->opt
);
1134 size_t ZSTD_compressBlock_btultra(
1135 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1136 const void* src
, size_t srcSize
)
1138 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize
);
1139 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
);
1142 size_t ZSTD_compressBlock_btultra2(
1143 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1144 const void* src
, size_t srcSize
)
1146 U32
const current
= (U32
)((const BYTE
*)src
- ms
->window
.base
);
1147 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize
);
1150 * this strategy makes a first pass over first block to collect statistics
1151 * and seed next round's statistics with it.
1152 * After 1st pass, function forgets everything, and starts a new block.
1153 * Consequently, this can only work if no data has been previously loaded in tables,
1154 * aka, no dictionary, no prefix, no ldm preprocessing.
1155 * The compression ratio gain is generally small (~0.5% on first block),
1156 * the cost is 2x cpu time on first block. */
1157 assert(srcSize
<= ZSTD_BLOCKSIZE_MAX
);
1158 if ( (ms
->opt
.litLengthSum
==0) /* first block */
1159 && (seqStore
->sequences
== seqStore
->sequencesStart
) /* no ldm */
1160 && (ms
->window
.dictLimit
== ms
->window
.lowLimit
) /* no dictionary */
1161 && (current
== ms
->window
.dictLimit
) /* start of frame, nothing already loaded nor skipped */
1162 && (srcSize
> ZSTD_PREDEF_THRESHOLD
)
1164 ZSTD_initStats_ultra(ms
, seqStore
, rep
, src
, srcSize
);
1167 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
);
1170 size_t ZSTD_compressBlock_btopt_dictMatchState(
1171 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1172 const void* src
, size_t srcSize
)
1174 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_dictMatchState
);
1177 size_t ZSTD_compressBlock_btultra_dictMatchState(
1178 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1179 const void* src
, size_t srcSize
)
1181 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_dictMatchState
);
1184 size_t ZSTD_compressBlock_btopt_extDict(
1185 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1186 const void* src
, size_t srcSize
)
1188 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_extDict
);
1191 size_t ZSTD_compressBlock_btultra_extDict(
1192 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1193 const void* src
, size_t srcSize
)
1195 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_extDict
);
1198 /* note : no btultra2 variant for extDict nor dictMatchState,
1199 * because btultra2 is not meant to work with dictionaries
1200 * and is only specific for the first block (no prefix) */