2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of https://github.com/facebook/zstd.
7 * An additional grant of patent rights can be found in the PATENTS file in the
10 * This program is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License version 2 as published by the
12 * Free Software Foundation. This program is dual-licensed; you may select
13 * either version 2 of the GNU General Public License ("GPL") or BSD license
17 /* Note : this file is intended to be included within zstd_compress.c */
19 #ifndef ZSTD_OPT_H_91842398743
20 #define ZSTD_OPT_H_91842398743
22 #define ZSTD_LITFREQ_ADD 2
23 #define ZSTD_FREQ_DIV 4
24 #define ZSTD_MAX_PRICE (1 << 30)
26 /*-*************************************
27 * Price functions for optimal parser
28 ***************************************/
29 FORCE_INLINE
void ZSTD_setLog2Prices(seqStore_t
*ssPtr
)
31 ssPtr
->log2matchLengthSum
= ZSTD_highbit32(ssPtr
->matchLengthSum
+ 1);
32 ssPtr
->log2litLengthSum
= ZSTD_highbit32(ssPtr
->litLengthSum
+ 1);
33 ssPtr
->log2litSum
= ZSTD_highbit32(ssPtr
->litSum
+ 1);
34 ssPtr
->log2offCodeSum
= ZSTD_highbit32(ssPtr
->offCodeSum
+ 1);
35 ssPtr
->factor
= 1 + ((ssPtr
->litSum
>> 5) / ssPtr
->litLengthSum
) + ((ssPtr
->litSum
<< 1) / (ssPtr
->litSum
+ ssPtr
->matchSum
));
38 ZSTD_STATIC
void ZSTD_rescaleFreqs(seqStore_t
*ssPtr
, const BYTE
*src
, size_t srcSize
)
42 ssPtr
->cachedLiterals
= NULL
;
43 ssPtr
->cachedPrice
= ssPtr
->cachedLitLength
= 0;
44 ssPtr
->staticPrices
= 0;
46 if (ssPtr
->litLengthSum
== 0) {
48 ssPtr
->staticPrices
= 1;
50 for (u
= 0; u
<= MaxLit
; u
++)
51 ssPtr
->litFreq
[u
] = 0;
52 for (u
= 0; u
< srcSize
; u
++)
53 ssPtr
->litFreq
[src
[u
]]++;
56 ssPtr
->litLengthSum
= MaxLL
+ 1;
57 ssPtr
->matchLengthSum
= MaxML
+ 1;
58 ssPtr
->offCodeSum
= (MaxOff
+ 1);
59 ssPtr
->matchSum
= (ZSTD_LITFREQ_ADD
<< Litbits
);
61 for (u
= 0; u
<= MaxLit
; u
++) {
62 ssPtr
->litFreq
[u
] = 1 + (ssPtr
->litFreq
[u
] >> ZSTD_FREQ_DIV
);
63 ssPtr
->litSum
+= ssPtr
->litFreq
[u
];
65 for (u
= 0; u
<= MaxLL
; u
++)
66 ssPtr
->litLengthFreq
[u
] = 1;
67 for (u
= 0; u
<= MaxML
; u
++)
68 ssPtr
->matchLengthFreq
[u
] = 1;
69 for (u
= 0; u
<= MaxOff
; u
++)
70 ssPtr
->offCodeFreq
[u
] = 1;
72 ssPtr
->matchLengthSum
= 0;
73 ssPtr
->litLengthSum
= 0;
74 ssPtr
->offCodeSum
= 0;
78 for (u
= 0; u
<= MaxLit
; u
++) {
79 ssPtr
->litFreq
[u
] = 1 + (ssPtr
->litFreq
[u
] >> (ZSTD_FREQ_DIV
+ 1));
80 ssPtr
->litSum
+= ssPtr
->litFreq
[u
];
82 for (u
= 0; u
<= MaxLL
; u
++) {
83 ssPtr
->litLengthFreq
[u
] = 1 + (ssPtr
->litLengthFreq
[u
] >> (ZSTD_FREQ_DIV
+ 1));
84 ssPtr
->litLengthSum
+= ssPtr
->litLengthFreq
[u
];
86 for (u
= 0; u
<= MaxML
; u
++) {
87 ssPtr
->matchLengthFreq
[u
] = 1 + (ssPtr
->matchLengthFreq
[u
] >> ZSTD_FREQ_DIV
);
88 ssPtr
->matchLengthSum
+= ssPtr
->matchLengthFreq
[u
];
89 ssPtr
->matchSum
+= ssPtr
->matchLengthFreq
[u
] * (u
+ 3);
91 ssPtr
->matchSum
*= ZSTD_LITFREQ_ADD
;
92 for (u
= 0; u
<= MaxOff
; u
++) {
93 ssPtr
->offCodeFreq
[u
] = 1 + (ssPtr
->offCodeFreq
[u
] >> ZSTD_FREQ_DIV
);
94 ssPtr
->offCodeSum
+= ssPtr
->offCodeFreq
[u
];
98 ZSTD_setLog2Prices(ssPtr
);
101 FORCE_INLINE U32
ZSTD_getLiteralPrice(seqStore_t
*ssPtr
, U32 litLength
, const BYTE
*literals
)
105 if (ssPtr
->staticPrices
)
106 return ZSTD_highbit32((U32
)litLength
+ 1) + (litLength
* 6);
109 return ssPtr
->log2litLengthSum
- ZSTD_highbit32(ssPtr
->litLengthFreq
[0] + 1);
112 if (ssPtr
->cachedLiterals
== literals
) {
113 U32
const additional
= litLength
- ssPtr
->cachedLitLength
;
114 const BYTE
*literals2
= ssPtr
->cachedLiterals
+ ssPtr
->cachedLitLength
;
115 price
= ssPtr
->cachedPrice
+ additional
* ssPtr
->log2litSum
;
116 for (u
= 0; u
< additional
; u
++)
117 price
-= ZSTD_highbit32(ssPtr
->litFreq
[literals2
[u
]] + 1);
118 ssPtr
->cachedPrice
= price
;
119 ssPtr
->cachedLitLength
= litLength
;
121 price
= litLength
* ssPtr
->log2litSum
;
122 for (u
= 0; u
< litLength
; u
++)
123 price
-= ZSTD_highbit32(ssPtr
->litFreq
[literals
[u
]] + 1);
125 if (litLength
>= 12) {
126 ssPtr
->cachedLiterals
= literals
;
127 ssPtr
->cachedPrice
= price
;
128 ssPtr
->cachedLitLength
= litLength
;
134 const BYTE LL_deltaCode
= 19;
135 const BYTE llCode
= (litLength
> 63) ? (BYTE
)ZSTD_highbit32(litLength
) + LL_deltaCode
: LL_Code
[litLength
];
136 price
+= LL_bits
[llCode
] + ssPtr
->log2litLengthSum
- ZSTD_highbit32(ssPtr
->litLengthFreq
[llCode
] + 1);
142 FORCE_INLINE U32
ZSTD_getPrice(seqStore_t
*seqStorePtr
, U32 litLength
, const BYTE
*literals
, U32 offset
, U32 matchLength
, const int ultra
)
146 BYTE
const offCode
= (BYTE
)ZSTD_highbit32(offset
+ 1);
148 if (seqStorePtr
->staticPrices
)
149 return ZSTD_getLiteralPrice(seqStorePtr
, litLength
, literals
) + ZSTD_highbit32((U32
)matchLength
+ 1) + 16 + offCode
;
151 price
= offCode
+ seqStorePtr
->log2offCodeSum
- ZSTD_highbit32(seqStorePtr
->offCodeFreq
[offCode
] + 1);
152 if (!ultra
&& offCode
>= 20)
153 price
+= (offCode
- 19) * 2;
157 const BYTE ML_deltaCode
= 36;
158 const BYTE mlCode
= (matchLength
> 127) ? (BYTE
)ZSTD_highbit32(matchLength
) + ML_deltaCode
: ML_Code
[matchLength
];
159 price
+= ML_bits
[mlCode
] + seqStorePtr
->log2matchLengthSum
- ZSTD_highbit32(seqStorePtr
->matchLengthFreq
[mlCode
] + 1);
162 return price
+ ZSTD_getLiteralPrice(seqStorePtr
, litLength
, literals
) + seqStorePtr
->factor
;
165 ZSTD_STATIC
void ZSTD_updatePrice(seqStore_t
*seqStorePtr
, U32 litLength
, const BYTE
*literals
, U32 offset
, U32 matchLength
)
170 seqStorePtr
->litSum
+= litLength
* ZSTD_LITFREQ_ADD
;
171 for (u
= 0; u
< litLength
; u
++)
172 seqStorePtr
->litFreq
[literals
[u
]] += ZSTD_LITFREQ_ADD
;
176 const BYTE LL_deltaCode
= 19;
177 const BYTE llCode
= (litLength
> 63) ? (BYTE
)ZSTD_highbit32(litLength
) + LL_deltaCode
: LL_Code
[litLength
];
178 seqStorePtr
->litLengthFreq
[llCode
]++;
179 seqStorePtr
->litLengthSum
++;
184 BYTE
const offCode
= (BYTE
)ZSTD_highbit32(offset
+ 1);
185 seqStorePtr
->offCodeSum
++;
186 seqStorePtr
->offCodeFreq
[offCode
]++;
191 const BYTE ML_deltaCode
= 36;
192 const BYTE mlCode
= (matchLength
> 127) ? (BYTE
)ZSTD_highbit32(matchLength
) + ML_deltaCode
: ML_Code
[matchLength
];
193 seqStorePtr
->matchLengthFreq
[mlCode
]++;
194 seqStorePtr
->matchLengthSum
++;
197 ZSTD_setLog2Prices(seqStorePtr
);
200 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
202 while (last_pos < pos) { \
203 opt[last_pos + 1].price = ZSTD_MAX_PRICE; \
206 opt[pos].mlen = mlen_; \
207 opt[pos].off = offset_; \
208 opt[pos].litlen = litlen_; \
209 opt[pos].price = price_; \
212 /* Update hashTable3 up to ip (excluded)
213 Assumption : always within prefix (i.e. not within extDict) */
215 U32
ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx
*zc
, const BYTE
*ip
)
217 U32
*const hashTable3
= zc
->hashTable3
;
218 U32
const hashLog3
= zc
->hashLog3
;
219 const BYTE
*const base
= zc
->base
;
220 U32 idx
= zc
->nextToUpdate3
;
221 const U32 target
= zc
->nextToUpdate3
= (U32
)(ip
- base
);
222 const size_t hash3
= ZSTD_hash3Ptr(ip
, hashLog3
);
224 while (idx
< target
) {
225 hashTable3
[ZSTD_hash3Ptr(base
+ idx
, hashLog3
)] = idx
;
229 return hashTable3
[hash3
];
232 /*-*************************************
234 ***************************************/
235 static U32
ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx
*zc
, const BYTE
*const ip
, const BYTE
*const iLimit
, U32 nbCompares
, const U32 mls
, U32 extDict
,
236 ZSTD_match_t
*matches
, const U32 minMatchLen
)
238 const BYTE
*const base
= zc
->base
;
239 const U32 curr
= (U32
)(ip
- base
);
240 const U32 hashLog
= zc
->params
.cParams
.hashLog
;
241 const size_t h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
242 U32
*const hashTable
= zc
->hashTable
;
243 U32 matchIndex
= hashTable
[h
];
244 U32
*const bt
= zc
->chainTable
;
245 const U32 btLog
= zc
->params
.cParams
.chainLog
- 1;
246 const U32 btMask
= (1U << btLog
) - 1;
247 size_t commonLengthSmaller
= 0, commonLengthLarger
= 0;
248 const BYTE
*const dictBase
= zc
->dictBase
;
249 const U32 dictLimit
= zc
->dictLimit
;
250 const BYTE
*const dictEnd
= dictBase
+ dictLimit
;
251 const BYTE
*const prefixStart
= base
+ dictLimit
;
252 const U32 btLow
= btMask
>= curr
? 0 : curr
- btMask
;
253 const U32 windowLow
= zc
->lowLimit
;
254 U32
*smallerPtr
= bt
+ 2 * (curr
& btMask
);
255 U32
*largerPtr
= bt
+ 2 * (curr
& btMask
) + 1;
256 U32 matchEndIdx
= curr
+ 8;
257 U32 dummy32
; /* to be nullified at the end */
260 const U32 minMatch
= (mls
== 3) ? 3 : 4;
261 size_t bestLength
= minMatchLen
- 1;
263 if (minMatch
== 3) { /* HC3 match finder */
264 U32
const matchIndex3
= ZSTD_insertAndFindFirstIndexHash3(zc
, ip
);
265 if (matchIndex3
> windowLow
&& (curr
- matchIndex3
< (1 << 18))) {
268 if ((!extDict
) || matchIndex3
>= dictLimit
) {
269 match
= base
+ matchIndex3
;
270 if (match
[bestLength
] == ip
[bestLength
])
271 currMl
= ZSTD_count(ip
, match
, iLimit
);
273 match
= dictBase
+ matchIndex3
;
274 if (ZSTD_readMINMATCH(match
, MINMATCH
) ==
275 ZSTD_readMINMATCH(ip
, MINMATCH
)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
276 currMl
= ZSTD_count_2segments(ip
+ MINMATCH
, match
+ MINMATCH
, iLimit
, dictEnd
, prefixStart
) + MINMATCH
;
279 /* save best solution */
280 if (currMl
> bestLength
) {
282 matches
[mnum
].off
= ZSTD_REP_MOVE_OPT
+ curr
- matchIndex3
;
283 matches
[mnum
].len
= (U32
)currMl
;
285 if (currMl
> ZSTD_OPT_NUM
)
287 if (ip
+ currMl
== iLimit
)
288 goto update
; /* best possible, and avoid read overflow*/
293 hashTable
[h
] = curr
; /* Update Hash Table */
295 while (nbCompares
-- && (matchIndex
> windowLow
)) {
296 U32
*nextPtr
= bt
+ 2 * (matchIndex
& btMask
);
297 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
300 if ((!extDict
) || (matchIndex
+ matchLength
>= dictLimit
)) {
301 match
= base
+ matchIndex
;
302 if (match
[matchLength
] == ip
[matchLength
]) {
303 matchLength
+= ZSTD_count(ip
+ matchLength
+ 1, match
+ matchLength
+ 1, iLimit
) + 1;
306 match
= dictBase
+ matchIndex
;
307 matchLength
+= ZSTD_count_2segments(ip
+ matchLength
, match
+ matchLength
, iLimit
, dictEnd
, prefixStart
);
308 if (matchIndex
+ matchLength
>= dictLimit
)
309 match
= base
+ matchIndex
; /* to prepare for next usage of match[matchLength] */
312 if (matchLength
> bestLength
) {
313 if (matchLength
> matchEndIdx
- matchIndex
)
314 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
315 bestLength
= matchLength
;
316 matches
[mnum
].off
= ZSTD_REP_MOVE_OPT
+ curr
- matchIndex
;
317 matches
[mnum
].len
= (U32
)matchLength
;
319 if (matchLength
> ZSTD_OPT_NUM
)
321 if (ip
+ matchLength
== iLimit
) /* equal : no way to know if inf or sup */
322 break; /* drop, to guarantee consistency (miss a little bit of compression) */
325 if (match
[matchLength
] < ip
[matchLength
]) {
326 /* match is smaller than curr */
327 *smallerPtr
= matchIndex
; /* update smaller idx */
328 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
329 if (matchIndex
<= btLow
) {
330 smallerPtr
= &dummy32
;
332 } /* beyond tree size, stop the search */
333 smallerPtr
= nextPtr
+ 1; /* new "smaller" => larger of match */
334 matchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to curr) */
336 /* match is larger than curr */
337 *largerPtr
= matchIndex
;
338 commonLengthLarger
= matchLength
;
339 if (matchIndex
<= btLow
) {
340 largerPtr
= &dummy32
;
342 } /* beyond tree size, stop the search */
344 matchIndex
= nextPtr
[0];
348 *smallerPtr
= *largerPtr
= 0;
351 zc
->nextToUpdate
= (matchEndIdx
> curr
+ 8) ? matchEndIdx
- 8 : curr
+ 1;
355 /** Tree updater, providing best match */
356 static U32
ZSTD_BtGetAllMatches(ZSTD_CCtx
*zc
, const BYTE
*const ip
, const BYTE
*const iLimit
, const U32 maxNbAttempts
, const U32 mls
, ZSTD_match_t
*matches
,
357 const U32 minMatchLen
)
359 if (ip
< zc
->base
+ zc
->nextToUpdate
)
360 return 0; /* skipped area */
361 ZSTD_updateTree(zc
, ip
, iLimit
, maxNbAttempts
, mls
);
362 return ZSTD_insertBtAndGetAllMatches(zc
, ip
, iLimit
, maxNbAttempts
, mls
, 0, matches
, minMatchLen
);
365 static U32
ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx
*zc
, /* Index table will be updated */
366 const BYTE
*ip
, const BYTE
*const iHighLimit
, const U32 maxNbAttempts
, const U32 matchLengthSearch
,
367 ZSTD_match_t
*matches
, const U32 minMatchLen
)
369 switch (matchLengthSearch
) {
370 case 3: return ZSTD_BtGetAllMatches(zc
, ip
, iHighLimit
, maxNbAttempts
, 3, matches
, minMatchLen
);
372 case 4: return ZSTD_BtGetAllMatches(zc
, ip
, iHighLimit
, maxNbAttempts
, 4, matches
, minMatchLen
);
373 case 5: return ZSTD_BtGetAllMatches(zc
, ip
, iHighLimit
, maxNbAttempts
, 5, matches
, minMatchLen
);
375 case 6: return ZSTD_BtGetAllMatches(zc
, ip
, iHighLimit
, maxNbAttempts
, 6, matches
, minMatchLen
);
379 /** Tree updater, providing best match */
380 static U32
ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx
*zc
, const BYTE
*const ip
, const BYTE
*const iLimit
, const U32 maxNbAttempts
, const U32 mls
,
381 ZSTD_match_t
*matches
, const U32 minMatchLen
)
383 if (ip
< zc
->base
+ zc
->nextToUpdate
)
384 return 0; /* skipped area */
385 ZSTD_updateTree_extDict(zc
, ip
, iLimit
, maxNbAttempts
, mls
);
386 return ZSTD_insertBtAndGetAllMatches(zc
, ip
, iLimit
, maxNbAttempts
, mls
, 1, matches
, minMatchLen
);
389 static U32
ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx
*zc
, /* Index table will be updated */
390 const BYTE
*ip
, const BYTE
*const iHighLimit
, const U32 maxNbAttempts
, const U32 matchLengthSearch
,
391 ZSTD_match_t
*matches
, const U32 minMatchLen
)
393 switch (matchLengthSearch
) {
394 case 3: return ZSTD_BtGetAllMatches_extDict(zc
, ip
, iHighLimit
, maxNbAttempts
, 3, matches
, minMatchLen
);
396 case 4: return ZSTD_BtGetAllMatches_extDict(zc
, ip
, iHighLimit
, maxNbAttempts
, 4, matches
, minMatchLen
);
397 case 5: return ZSTD_BtGetAllMatches_extDict(zc
, ip
, iHighLimit
, maxNbAttempts
, 5, matches
, minMatchLen
);
399 case 6: return ZSTD_BtGetAllMatches_extDict(zc
, ip
, iHighLimit
, maxNbAttempts
, 6, matches
, minMatchLen
);
403 /*-*******************************
405 *********************************/
407 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx
*ctx
, const void *src
, size_t srcSize
, const int ultra
)
409 seqStore_t
*seqStorePtr
= &(ctx
->seqStore
);
410 const BYTE
*const istart
= (const BYTE
*)src
;
411 const BYTE
*ip
= istart
;
412 const BYTE
*anchor
= istart
;
413 const BYTE
*const iend
= istart
+ srcSize
;
414 const BYTE
*const ilimit
= iend
- 8;
415 const BYTE
*const base
= ctx
->base
;
416 const BYTE
*const prefixStart
= base
+ ctx
->dictLimit
;
418 const U32 maxSearches
= 1U << ctx
->params
.cParams
.searchLog
;
419 const U32 sufficient_len
= ctx
->params
.cParams
.targetLength
;
420 const U32 mls
= ctx
->params
.cParams
.searchLength
;
421 const U32 minMatch
= (ctx
->params
.cParams
.searchLength
== 3) ? 3 : 4;
423 ZSTD_optimal_t
*opt
= seqStorePtr
->priceTable
;
424 ZSTD_match_t
*matches
= seqStorePtr
->matchTable
;
426 U32 offset
, rep
[ZSTD_REP_NUM
];
429 ctx
->nextToUpdate3
= ctx
->nextToUpdate
;
430 ZSTD_rescaleFreqs(seqStorePtr
, (const BYTE
*)src
, srcSize
);
431 ip
+= (ip
== prefixStart
);
434 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
435 rep
[i
] = ctx
->rep
[i
];
439 while (ip
< ilimit
) {
440 U32 cur
, match_num
, last_pos
, litlen
, price
;
441 U32 u
, mlen
, best_mlen
, best_off
, litLength
;
442 memset(opt
, 0, sizeof(ZSTD_optimal_t
));
444 litlen
= (U32
)(ip
- anchor
);
448 U32 i
, last_i
= ZSTD_REP_CHECK
+ (ip
== anchor
);
449 for (i
= (ip
== anchor
); i
< last_i
; i
++) {
450 const S32 repCur
= (i
== ZSTD_REP_MOVE_OPT
) ? (rep
[0] - 1) : rep
[i
];
451 if ((repCur
> 0) && (repCur
< (S32
)(ip
- prefixStart
)) &&
452 (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(ip
- repCur
, minMatch
))) {
453 mlen
= (U32
)ZSTD_count(ip
+ minMatch
, ip
+ minMatch
- repCur
, iend
) + minMatch
;
454 if (mlen
> sufficient_len
|| mlen
>= ZSTD_OPT_NUM
) {
461 best_off
= i
- (ip
== anchor
);
463 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, best_off
, mlen
- MINMATCH
, ultra
);
464 if (mlen
> last_pos
|| price
< opt
[mlen
].price
)
465 SET_PRICE(mlen
, mlen
, i
, litlen
, price
); /* note : macro modifies last_pos */
467 } while (mlen
>= minMatch
);
472 match_num
= ZSTD_BtGetAllMatches_selectMLS(ctx
, ip
, iend
, maxSearches
, mls
, matches
, minMatch
);
474 if (!last_pos
&& !match_num
) {
479 if (match_num
&& (matches
[match_num
- 1].len
> sufficient_len
|| matches
[match_num
- 1].len
>= ZSTD_OPT_NUM
)) {
480 best_mlen
= matches
[match_num
- 1].len
;
481 best_off
= matches
[match_num
- 1].off
;
487 /* set prices using matches at position = 0 */
488 best_mlen
= (last_pos
) ? last_pos
: minMatch
;
489 for (u
= 0; u
< match_num
; u
++) {
490 mlen
= (u
> 0) ? matches
[u
- 1].len
+ 1 : best_mlen
;
491 best_mlen
= matches
[u
].len
;
492 while (mlen
<= best_mlen
) {
493 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
494 if (mlen
> last_pos
|| price
< opt
[mlen
].price
)
495 SET_PRICE(mlen
, mlen
, matches
[u
].off
, litlen
, price
); /* note : macro modifies last_pos */
500 if (last_pos
< minMatch
) {
505 /* initialize opt[0] */
508 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
509 opt
[0].rep
[i
] = rep
[i
];
512 opt
[0].litlen
= litlen
;
514 /* check further positions */
515 for (cur
= 1; cur
<= last_pos
; cur
++) {
518 if (opt
[cur
- 1].mlen
== 1) {
519 litlen
= opt
[cur
- 1].litlen
+ 1;
521 price
= opt
[cur
- litlen
].price
+ ZSTD_getLiteralPrice(seqStorePtr
, litlen
, inr
- litlen
);
523 price
= ZSTD_getLiteralPrice(seqStorePtr
, litlen
, anchor
);
526 price
= opt
[cur
- 1].price
+ ZSTD_getLiteralPrice(seqStorePtr
, litlen
, inr
- 1);
529 if (cur
> last_pos
|| price
<= opt
[cur
].price
)
530 SET_PRICE(cur
, 1, 0, litlen
, price
);
535 if (inr
> ilimit
) /* last match must start at a minimum distance of 8 from oend */
538 mlen
= opt
[cur
].mlen
;
539 if (opt
[cur
].off
> ZSTD_REP_MOVE_OPT
) {
540 opt
[cur
].rep
[2] = opt
[cur
- mlen
].rep
[1];
541 opt
[cur
].rep
[1] = opt
[cur
- mlen
].rep
[0];
542 opt
[cur
].rep
[0] = opt
[cur
].off
- ZSTD_REP_MOVE_OPT
;
544 opt
[cur
].rep
[2] = (opt
[cur
].off
> 1) ? opt
[cur
- mlen
].rep
[1] : opt
[cur
- mlen
].rep
[2];
545 opt
[cur
].rep
[1] = (opt
[cur
].off
> 0) ? opt
[cur
- mlen
].rep
[0] : opt
[cur
- mlen
].rep
[1];
547 ((opt
[cur
].off
== ZSTD_REP_MOVE_OPT
) && (mlen
!= 1)) ? (opt
[cur
- mlen
].rep
[0] - 1) : (opt
[cur
- mlen
].rep
[opt
[cur
].off
]);
550 best_mlen
= minMatch
;
552 U32 i
, last_i
= ZSTD_REP_CHECK
+ (mlen
!= 1);
553 for (i
= (opt
[cur
].mlen
!= 1); i
< last_i
; i
++) { /* check rep */
554 const S32 repCur
= (i
== ZSTD_REP_MOVE_OPT
) ? (opt
[cur
].rep
[0] - 1) : opt
[cur
].rep
[i
];
555 if ((repCur
> 0) && (repCur
< (S32
)(inr
- prefixStart
)) &&
556 (ZSTD_readMINMATCH(inr
, minMatch
) == ZSTD_readMINMATCH(inr
- repCur
, minMatch
))) {
557 mlen
= (U32
)ZSTD_count(inr
+ minMatch
, inr
+ minMatch
- repCur
, iend
) + minMatch
;
559 if (mlen
> sufficient_len
|| cur
+ mlen
>= ZSTD_OPT_NUM
) {
566 best_off
= i
- (opt
[cur
].mlen
!= 1);
567 if (mlen
> best_mlen
)
571 if (opt
[cur
].mlen
== 1) {
572 litlen
= opt
[cur
].litlen
;
574 price
= opt
[cur
- litlen
].price
+ ZSTD_getPrice(seqStorePtr
, litlen
, inr
- litlen
,
575 best_off
, mlen
- MINMATCH
, ultra
);
577 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, best_off
, mlen
- MINMATCH
, ultra
);
580 price
= opt
[cur
].price
+ ZSTD_getPrice(seqStorePtr
, 0, NULL
, best_off
, mlen
- MINMATCH
, ultra
);
583 if (cur
+ mlen
> last_pos
|| price
<= opt
[cur
+ mlen
].price
)
584 SET_PRICE(cur
+ mlen
, mlen
, i
, litlen
, price
);
586 } while (mlen
>= minMatch
);
591 match_num
= ZSTD_BtGetAllMatches_selectMLS(ctx
, inr
, iend
, maxSearches
, mls
, matches
, best_mlen
);
593 if (match_num
> 0 && (matches
[match_num
- 1].len
> sufficient_len
|| cur
+ matches
[match_num
- 1].len
>= ZSTD_OPT_NUM
)) {
594 best_mlen
= matches
[match_num
- 1].len
;
595 best_off
= matches
[match_num
- 1].off
;
600 /* set prices using matches at position = cur */
601 for (u
= 0; u
< match_num
; u
++) {
602 mlen
= (u
> 0) ? matches
[u
- 1].len
+ 1 : best_mlen
;
603 best_mlen
= matches
[u
].len
;
605 while (mlen
<= best_mlen
) {
606 if (opt
[cur
].mlen
== 1) {
607 litlen
= opt
[cur
].litlen
;
609 price
= opt
[cur
- litlen
].price
+ ZSTD_getPrice(seqStorePtr
, litlen
, ip
+ cur
- litlen
,
610 matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
612 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
615 price
= opt
[cur
].price
+ ZSTD_getPrice(seqStorePtr
, 0, NULL
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
618 if (cur
+ mlen
> last_pos
|| (price
< opt
[cur
+ mlen
].price
))
619 SET_PRICE(cur
+ mlen
, mlen
, matches
[u
].off
, litlen
, price
);
626 best_mlen
= opt
[last_pos
].mlen
;
627 best_off
= opt
[last_pos
].off
;
628 cur
= last_pos
- best_mlen
;
631 _storeSequence
: /* cur, last_pos, best_mlen, best_off have to be set */
635 mlen
= opt
[cur
].mlen
;
636 offset
= opt
[cur
].off
;
637 opt
[cur
].mlen
= best_mlen
;
638 opt
[cur
].off
= best_off
;
646 for (u
= 0; u
<= last_pos
;) {
650 for (cur
= 0; cur
< last_pos
;) {
651 mlen
= opt
[cur
].mlen
;
657 offset
= opt
[cur
].off
;
659 litLength
= (U32
)(ip
- anchor
);
661 if (offset
> ZSTD_REP_MOVE_OPT
) {
664 rep
[0] = offset
- ZSTD_REP_MOVE_OPT
;
668 best_off
= (offset
== ZSTD_REP_MOVE_OPT
) ? (rep
[0] - 1) : (rep
[offset
]);
678 ZSTD_updatePrice(seqStorePtr
, litLength
, anchor
, offset
, mlen
- MINMATCH
);
679 ZSTD_storeSeq(seqStorePtr
, litLength
, anchor
, offset
, mlen
- MINMATCH
);
680 anchor
= ip
= ip
+ mlen
;
682 } /* for (cur=0; cur < last_pos; ) */
684 /* Save reps for next block */
687 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
688 ctx
->repToConfirm
[i
] = rep
[i
];
693 size_t const lastLLSize
= iend
- anchor
;
694 memcpy(seqStorePtr
->lit
, anchor
, lastLLSize
);
695 seqStorePtr
->lit
+= lastLLSize
;
700 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx
*ctx
, const void *src
, size_t srcSize
, const int ultra
)
702 seqStore_t
*seqStorePtr
= &(ctx
->seqStore
);
703 const BYTE
*const istart
= (const BYTE
*)src
;
704 const BYTE
*ip
= istart
;
705 const BYTE
*anchor
= istart
;
706 const BYTE
*const iend
= istart
+ srcSize
;
707 const BYTE
*const ilimit
= iend
- 8;
708 const BYTE
*const base
= ctx
->base
;
709 const U32 lowestIndex
= ctx
->lowLimit
;
710 const U32 dictLimit
= ctx
->dictLimit
;
711 const BYTE
*const prefixStart
= base
+ dictLimit
;
712 const BYTE
*const dictBase
= ctx
->dictBase
;
713 const BYTE
*const dictEnd
= dictBase
+ dictLimit
;
715 const U32 maxSearches
= 1U << ctx
->params
.cParams
.searchLog
;
716 const U32 sufficient_len
= ctx
->params
.cParams
.targetLength
;
717 const U32 mls
= ctx
->params
.cParams
.searchLength
;
718 const U32 minMatch
= (ctx
->params
.cParams
.searchLength
== 3) ? 3 : 4;
720 ZSTD_optimal_t
*opt
= seqStorePtr
->priceTable
;
721 ZSTD_match_t
*matches
= seqStorePtr
->matchTable
;
725 U32 offset
, rep
[ZSTD_REP_NUM
];
728 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
729 rep
[i
] = ctx
->rep
[i
];
732 ctx
->nextToUpdate3
= ctx
->nextToUpdate
;
733 ZSTD_rescaleFreqs(seqStorePtr
, (const BYTE
*)src
, srcSize
);
734 ip
+= (ip
== prefixStart
);
737 while (ip
< ilimit
) {
738 U32 cur
, match_num
, last_pos
, litlen
, price
;
739 U32 u
, mlen
, best_mlen
, best_off
, litLength
;
740 U32 curr
= (U32
)(ip
- base
);
741 memset(opt
, 0, sizeof(ZSTD_optimal_t
));
743 opt
[0].litlen
= (U32
)(ip
- anchor
);
747 U32 i
, last_i
= ZSTD_REP_CHECK
+ (ip
== anchor
);
748 for (i
= (ip
== anchor
); i
< last_i
; i
++) {
749 const S32 repCur
= (i
== ZSTD_REP_MOVE_OPT
) ? (rep
[0] - 1) : rep
[i
];
750 const U32 repIndex
= (U32
)(curr
- repCur
);
751 const BYTE
*const repBase
= repIndex
< dictLimit
? dictBase
: base
;
752 const BYTE
*const repMatch
= repBase
+ repIndex
;
753 if ((repCur
> 0 && repCur
<= (S32
)curr
) &&
754 (((U32
)((dictLimit
- 1) - repIndex
) >= 3) & (repIndex
> lowestIndex
)) /* intentional overflow */
755 && (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
))) {
756 /* repcode detected we should take it */
757 const BYTE
*const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
758 mlen
= (U32
)ZSTD_count_2segments(ip
+ minMatch
, repMatch
+ minMatch
, iend
, repEnd
, prefixStart
) + minMatch
;
760 if (mlen
> sufficient_len
|| mlen
>= ZSTD_OPT_NUM
) {
768 best_off
= i
- (ip
== anchor
);
769 litlen
= opt
[0].litlen
;
771 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, best_off
, mlen
- MINMATCH
, ultra
);
772 if (mlen
> last_pos
|| price
< opt
[mlen
].price
)
773 SET_PRICE(mlen
, mlen
, i
, litlen
, price
); /* note : macro modifies last_pos */
775 } while (mlen
>= minMatch
);
780 match_num
= ZSTD_BtGetAllMatches_selectMLS_extDict(ctx
, ip
, iend
, maxSearches
, mls
, matches
, minMatch
); /* first search (depth 0) */
782 if (!last_pos
&& !match_num
) {
789 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
790 opt
[0].rep
[i
] = rep
[i
];
794 if (match_num
&& (matches
[match_num
- 1].len
> sufficient_len
|| matches
[match_num
- 1].len
>= ZSTD_OPT_NUM
)) {
795 best_mlen
= matches
[match_num
- 1].len
;
796 best_off
= matches
[match_num
- 1].off
;
802 best_mlen
= (last_pos
) ? last_pos
: minMatch
;
804 /* set prices using matches at position = 0 */
805 for (u
= 0; u
< match_num
; u
++) {
806 mlen
= (u
> 0) ? matches
[u
- 1].len
+ 1 : best_mlen
;
807 best_mlen
= matches
[u
].len
;
808 litlen
= opt
[0].litlen
;
809 while (mlen
<= best_mlen
) {
810 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
811 if (mlen
> last_pos
|| price
< opt
[mlen
].price
)
812 SET_PRICE(mlen
, mlen
, matches
[u
].off
, litlen
, price
);
817 if (last_pos
< minMatch
) {
822 /* check further positions */
823 for (cur
= 1; cur
<= last_pos
; cur
++) {
826 if (opt
[cur
- 1].mlen
== 1) {
827 litlen
= opt
[cur
- 1].litlen
+ 1;
829 price
= opt
[cur
- litlen
].price
+ ZSTD_getLiteralPrice(seqStorePtr
, litlen
, inr
- litlen
);
831 price
= ZSTD_getLiteralPrice(seqStorePtr
, litlen
, anchor
);
834 price
= opt
[cur
- 1].price
+ ZSTD_getLiteralPrice(seqStorePtr
, litlen
, inr
- 1);
837 if (cur
> last_pos
|| price
<= opt
[cur
].price
)
838 SET_PRICE(cur
, 1, 0, litlen
, price
);
843 if (inr
> ilimit
) /* last match must start at a minimum distance of 8 from oend */
846 mlen
= opt
[cur
].mlen
;
847 if (opt
[cur
].off
> ZSTD_REP_MOVE_OPT
) {
848 opt
[cur
].rep
[2] = opt
[cur
- mlen
].rep
[1];
849 opt
[cur
].rep
[1] = opt
[cur
- mlen
].rep
[0];
850 opt
[cur
].rep
[0] = opt
[cur
].off
- ZSTD_REP_MOVE_OPT
;
852 opt
[cur
].rep
[2] = (opt
[cur
].off
> 1) ? opt
[cur
- mlen
].rep
[1] : opt
[cur
- mlen
].rep
[2];
853 opt
[cur
].rep
[1] = (opt
[cur
].off
> 0) ? opt
[cur
- mlen
].rep
[0] : opt
[cur
- mlen
].rep
[1];
855 ((opt
[cur
].off
== ZSTD_REP_MOVE_OPT
) && (mlen
!= 1)) ? (opt
[cur
- mlen
].rep
[0] - 1) : (opt
[cur
- mlen
].rep
[opt
[cur
].off
]);
858 best_mlen
= minMatch
;
860 U32 i
, last_i
= ZSTD_REP_CHECK
+ (mlen
!= 1);
861 for (i
= (mlen
!= 1); i
< last_i
; i
++) {
862 const S32 repCur
= (i
== ZSTD_REP_MOVE_OPT
) ? (opt
[cur
].rep
[0] - 1) : opt
[cur
].rep
[i
];
863 const U32 repIndex
= (U32
)(curr
+ cur
- repCur
);
864 const BYTE
*const repBase
= repIndex
< dictLimit
? dictBase
: base
;
865 const BYTE
*const repMatch
= repBase
+ repIndex
;
866 if ((repCur
> 0 && repCur
<= (S32
)(curr
+ cur
)) &&
867 (((U32
)((dictLimit
- 1) - repIndex
) >= 3) & (repIndex
> lowestIndex
)) /* intentional overflow */
868 && (ZSTD_readMINMATCH(inr
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
))) {
869 /* repcode detected */
870 const BYTE
*const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
871 mlen
= (U32
)ZSTD_count_2segments(inr
+ minMatch
, repMatch
+ minMatch
, iend
, repEnd
, prefixStart
) + minMatch
;
873 if (mlen
> sufficient_len
|| cur
+ mlen
>= ZSTD_OPT_NUM
) {
880 best_off
= i
- (opt
[cur
].mlen
!= 1);
881 if (mlen
> best_mlen
)
885 if (opt
[cur
].mlen
== 1) {
886 litlen
= opt
[cur
].litlen
;
888 price
= opt
[cur
- litlen
].price
+ ZSTD_getPrice(seqStorePtr
, litlen
, inr
- litlen
,
889 best_off
, mlen
- MINMATCH
, ultra
);
891 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, best_off
, mlen
- MINMATCH
, ultra
);
894 price
= opt
[cur
].price
+ ZSTD_getPrice(seqStorePtr
, 0, NULL
, best_off
, mlen
- MINMATCH
, ultra
);
897 if (cur
+ mlen
> last_pos
|| price
<= opt
[cur
+ mlen
].price
)
898 SET_PRICE(cur
+ mlen
, mlen
, i
, litlen
, price
);
900 } while (mlen
>= minMatch
);
905 match_num
= ZSTD_BtGetAllMatches_selectMLS_extDict(ctx
, inr
, iend
, maxSearches
, mls
, matches
, minMatch
);
907 if (match_num
> 0 && (matches
[match_num
- 1].len
> sufficient_len
|| cur
+ matches
[match_num
- 1].len
>= ZSTD_OPT_NUM
)) {
908 best_mlen
= matches
[match_num
- 1].len
;
909 best_off
= matches
[match_num
- 1].off
;
914 /* set prices using matches at position = cur */
915 for (u
= 0; u
< match_num
; u
++) {
916 mlen
= (u
> 0) ? matches
[u
- 1].len
+ 1 : best_mlen
;
917 best_mlen
= matches
[u
].len
;
919 while (mlen
<= best_mlen
) {
920 if (opt
[cur
].mlen
== 1) {
921 litlen
= opt
[cur
].litlen
;
923 price
= opt
[cur
- litlen
].price
+ ZSTD_getPrice(seqStorePtr
, litlen
, ip
+ cur
- litlen
,
924 matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
926 price
= ZSTD_getPrice(seqStorePtr
, litlen
, anchor
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
929 price
= opt
[cur
].price
+ ZSTD_getPrice(seqStorePtr
, 0, NULL
, matches
[u
].off
- 1, mlen
- MINMATCH
, ultra
);
932 if (cur
+ mlen
> last_pos
|| (price
< opt
[cur
+ mlen
].price
))
933 SET_PRICE(cur
+ mlen
, mlen
, matches
[u
].off
, litlen
, price
);
938 } /* for (cur = 1; cur <= last_pos; cur++) */
940 best_mlen
= opt
[last_pos
].mlen
;
941 best_off
= opt
[last_pos
].off
;
942 cur
= last_pos
- best_mlen
;
945 _storeSequence
: /* cur, last_pos, best_mlen, best_off have to be set */
949 mlen
= opt
[cur
].mlen
;
950 offset
= opt
[cur
].off
;
951 opt
[cur
].mlen
= best_mlen
;
952 opt
[cur
].off
= best_off
;
960 for (u
= 0; u
<= last_pos
;) {
964 for (cur
= 0; cur
< last_pos
;) {
965 mlen
= opt
[cur
].mlen
;
971 offset
= opt
[cur
].off
;
973 litLength
= (U32
)(ip
- anchor
);
975 if (offset
> ZSTD_REP_MOVE_OPT
) {
978 rep
[0] = offset
- ZSTD_REP_MOVE_OPT
;
982 best_off
= (offset
== ZSTD_REP_MOVE_OPT
) ? (rep
[0] - 1) : (rep
[offset
]);
993 ZSTD_updatePrice(seqStorePtr
, litLength
, anchor
, offset
, mlen
- MINMATCH
);
994 ZSTD_storeSeq(seqStorePtr
, litLength
, anchor
, offset
, mlen
- MINMATCH
);
995 anchor
= ip
= ip
+ mlen
;
997 } /* for (cur=0; cur < last_pos; ) */
999 /* Save reps for next block */
1002 for (i
= 0; i
< ZSTD_REP_NUM
; i
++)
1003 ctx
->repToConfirm
[i
] = rep
[i
];
1008 size_t lastLLSize
= iend
- anchor
;
1009 memcpy(seqStorePtr
->lit
, anchor
, lastLLSize
);
1010 seqStorePtr
->lit
+= lastLLSize
;
1014 #endif /* ZSTD_OPT_H_91842398743 */