2 * Copyright (c) Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /*-*************************************
13 ***************************************/
14 #include "zstd_compress_sequences.h"
17 * -log2(x / 256) lookup table for x in [0, 256).
19 * Else: Return floor(-log2(x / 256) * 256)
21 static unsigned const kInverseProbabilityLog256
[256] = {
22 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
23 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
24 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
25 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
26 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
27 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
28 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
29 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
30 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
31 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
32 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
33 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
34 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
35 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
36 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
37 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
38 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
39 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
40 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
41 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
42 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
46 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable
const* ctable
) {
47 void const* ptr
= ctable
;
48 U16
const* u16ptr
= (U16
const*)ptr
;
49 U32
const maxSymbolValue
= MEM_read16(u16ptr
+ 1);
50 return maxSymbolValue
;
54 * Returns true if we should use ncount=-1 else we should
55 * use ncount=1 for low probability symbols instead.
57 static unsigned ZSTD_useLowProbCount(size_t const nbSeq
)
59 /* Heuristic: This should cover most blocks <= 16K and
60 * start to fade out after 16K to about 32K depending on
67 * Returns the cost in bytes of encoding the normalized count header.
68 * Returns an error if any of the helper functions return an error.
70 static size_t ZSTD_NCountCost(unsigned const* count
, unsigned const max
,
71 size_t const nbSeq
, unsigned const FSELog
)
73 BYTE wksp
[FSE_NCOUNTBOUND
];
75 const U32 tableLog
= FSE_optimalTableLog(FSELog
, nbSeq
, max
);
76 FORWARD_IF_ERROR(FSE_normalizeCount(norm
, tableLog
, count
, nbSeq
, max
, ZSTD_useLowProbCount(nbSeq
)), "");
77 return FSE_writeNCount(wksp
, sizeof(wksp
), norm
, max
, tableLog
);
81 * Returns the cost in bits of encoding the distribution described by count
82 * using the entropy bound.
84 static size_t ZSTD_entropyCost(unsigned const* count
, unsigned const max
, size_t const total
)
90 for (s
= 0; s
<= max
; ++s
) {
91 unsigned norm
= (unsigned)((256 * count
[s
]) / total
);
92 if (count
[s
] != 0 && norm
== 0)
94 assert(count
[s
] < total
);
95 cost
+= count
[s
] * kInverseProbabilityLog256
[norm
];
101 * Returns the cost in bits of encoding the distribution in count using ctable.
102 * Returns an error if ctable cannot represent all the symbols in count.
104 size_t ZSTD_fseBitCost(
105 FSE_CTable
const* ctable
,
106 unsigned const* count
,
109 unsigned const kAccuracyLog
= 8;
113 FSE_initCState(&cstate
, ctable
);
114 if (ZSTD_getFSEMaxSymbolValue(ctable
) < max
) {
115 DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
116 ZSTD_getFSEMaxSymbolValue(ctable
), max
);
117 return ERROR(GENERIC
);
119 for (s
= 0; s
<= max
; ++s
) {
120 unsigned const tableLog
= cstate
.stateLog
;
121 unsigned const badCost
= (tableLog
+ 1) << kAccuracyLog
;
122 unsigned const bitCost
= FSE_bitCost(cstate
.symbolTT
, tableLog
, s
, kAccuracyLog
);
125 if (bitCost
>= badCost
) {
126 DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s
);
127 return ERROR(GENERIC
);
129 cost
+= (size_t)count
[s
] * bitCost
;
131 return cost
>> kAccuracyLog
;
135 * Returns the cost in bits of encoding the distribution in count using the
136 * table described by norm. The max symbol support by norm is assumed >= max.
137 * norm must be valid for every symbol with non-zero probability in count.
139 size_t ZSTD_crossEntropyCost(short const* norm
, unsigned accuracyLog
,
140 unsigned const* count
, unsigned const max
)
142 unsigned const shift
= 8 - accuracyLog
;
145 assert(accuracyLog
<= 8);
146 for (s
= 0; s
<= max
; ++s
) {
147 unsigned const normAcc
= (norm
[s
] != -1) ? (unsigned)norm
[s
] : 1;
148 unsigned const norm256
= normAcc
<< shift
;
150 assert(norm256
< 256);
151 cost
+= count
[s
] * kInverseProbabilityLog256
[norm256
];
157 ZSTD_selectEncodingType(
158 FSE_repeat
* repeatMode
, unsigned const* count
, unsigned const max
,
159 size_t const mostFrequent
, size_t nbSeq
, unsigned const FSELog
,
160 FSE_CTable
const* prevCTable
,
161 short const* defaultNorm
, U32 defaultNormLog
,
162 ZSTD_defaultPolicy_e
const isDefaultAllowed
,
163 ZSTD_strategy
const strategy
)
165 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed
== 0 && ZSTD_defaultAllowed
!= 0);
166 if (mostFrequent
== nbSeq
) {
167 *repeatMode
= FSE_repeat_none
;
168 if (isDefaultAllowed
&& nbSeq
<= 2) {
169 /* Prefer set_basic over set_rle when there are 2 or less symbols,
170 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
171 * If basic encoding isn't possible, always choose RLE.
173 DEBUGLOG(5, "Selected set_basic");
176 DEBUGLOG(5, "Selected set_rle");
179 if (strategy
< ZSTD_lazy
) {
180 if (isDefaultAllowed
) {
181 size_t const staticFse_nbSeq_max
= 1000;
182 size_t const mult
= 10 - strategy
;
183 size_t const baseLog
= 3;
184 size_t const dynamicFse_nbSeq_min
= (((size_t)1 << defaultNormLog
) * mult
) >> baseLog
; /* 28-36 for offset, 56-72 for lengths */
185 assert(defaultNormLog
>= 5 && defaultNormLog
<= 6); /* xx_DEFAULTNORMLOG */
186 assert(mult
<= 9 && mult
>= 7);
187 if ( (*repeatMode
== FSE_repeat_valid
)
188 && (nbSeq
< staticFse_nbSeq_max
) ) {
189 DEBUGLOG(5, "Selected set_repeat");
192 if ( (nbSeq
< dynamicFse_nbSeq_min
)
193 || (mostFrequent
< (nbSeq
>> (defaultNormLog
-1))) ) {
194 DEBUGLOG(5, "Selected set_basic");
195 /* The format allows default tables to be repeated, but it isn't useful.
196 * When using simple heuristics to select encoding type, we don't want
197 * to confuse these tables with dictionaries. When running more careful
198 * analysis, we don't need to waste time checking both repeating tables
199 * and default tables.
201 *repeatMode
= FSE_repeat_none
;
206 size_t const basicCost
= isDefaultAllowed
? ZSTD_crossEntropyCost(defaultNorm
, defaultNormLog
, count
, max
) : ERROR(GENERIC
);
207 size_t const repeatCost
= *repeatMode
!= FSE_repeat_none
? ZSTD_fseBitCost(prevCTable
, count
, max
) : ERROR(GENERIC
);
208 size_t const NCountCost
= ZSTD_NCountCost(count
, max
, nbSeq
, FSELog
);
209 size_t const compressedCost
= (NCountCost
<< 3) + ZSTD_entropyCost(count
, max
, nbSeq
);
211 if (isDefaultAllowed
) {
212 assert(!ZSTD_isError(basicCost
));
213 assert(!(*repeatMode
== FSE_repeat_valid
&& ZSTD_isError(repeatCost
)));
215 assert(!ZSTD_isError(NCountCost
));
216 assert(compressedCost
< ERROR(maxCode
));
217 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
218 (unsigned)basicCost
, (unsigned)repeatCost
, (unsigned)compressedCost
);
219 if (basicCost
<= repeatCost
&& basicCost
<= compressedCost
) {
220 DEBUGLOG(5, "Selected set_basic");
221 assert(isDefaultAllowed
);
222 *repeatMode
= FSE_repeat_none
;
225 if (repeatCost
<= compressedCost
) {
226 DEBUGLOG(5, "Selected set_repeat");
227 assert(!ZSTD_isError(repeatCost
));
230 assert(compressedCost
< basicCost
&& compressedCost
< repeatCost
);
232 DEBUGLOG(5, "Selected set_compressed");
233 *repeatMode
= FSE_repeat_check
;
234 return set_compressed
;
238 S16 norm
[MaxSeq
+ 1];
239 U32 wksp
[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq
, MaxFSELog
)];
240 } ZSTD_BuildCTableWksp
;
243 ZSTD_buildCTable(void* dst
, size_t dstCapacity
,
244 FSE_CTable
* nextCTable
, U32 FSELog
, symbolEncodingType_e type
,
245 unsigned* count
, U32 max
,
246 const BYTE
* codeTable
, size_t nbSeq
,
247 const S16
* defaultNorm
, U32 defaultNormLog
, U32 defaultMax
,
248 const FSE_CTable
* prevCTable
, size_t prevCTableSize
,
249 void* entropyWorkspace
, size_t entropyWorkspaceSize
)
251 BYTE
* op
= (BYTE
*)dst
;
252 const BYTE
* const oend
= op
+ dstCapacity
;
253 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity
);
257 FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable
, (BYTE
)max
), "");
258 RETURN_ERROR_IF(dstCapacity
==0, dstSize_tooSmall
, "not enough space");
262 ZSTD_memcpy(nextCTable
, prevCTable
, prevCTableSize
);
265 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable
, defaultNorm
, defaultMax
, defaultNormLog
, entropyWorkspace
, entropyWorkspaceSize
), ""); /* note : could be pre-calculated */
267 case set_compressed
: {
268 ZSTD_BuildCTableWksp
* wksp
= (ZSTD_BuildCTableWksp
*)entropyWorkspace
;
269 size_t nbSeq_1
= nbSeq
;
270 const U32 tableLog
= FSE_optimalTableLog(FSELog
, nbSeq
, max
);
271 if (count
[codeTable
[nbSeq
-1]] > 1) {
272 count
[codeTable
[nbSeq
-1]]--;
276 assert(entropyWorkspaceSize
>= sizeof(ZSTD_BuildCTableWksp
));
277 (void)entropyWorkspaceSize
;
278 FORWARD_IF_ERROR(FSE_normalizeCount(wksp
->norm
, tableLog
, count
, nbSeq_1
, max
, ZSTD_useLowProbCount(nbSeq_1
)), "FSE_normalizeCount failed");
280 { size_t const NCountSize
= FSE_writeNCount(op
, (size_t)(oend
- op
), wksp
->norm
, max
, tableLog
); /* overflow protected */
281 FORWARD_IF_ERROR(NCountSize
, "FSE_writeNCount failed");
282 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable
, wksp
->norm
, max
, tableLog
, wksp
->wksp
, sizeof(wksp
->wksp
)), "FSE_buildCTable_wksp failed");
286 default: assert(0); RETURN_ERROR(GENERIC
, "impossible to reach");
290 FORCE_INLINE_TEMPLATE
size_t
291 ZSTD_encodeSequences_body(
292 void* dst
, size_t dstCapacity
,
293 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
294 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
295 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
296 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
298 BIT_CStream_t blockStream
;
299 FSE_CState_t stateMatchLength
;
300 FSE_CState_t stateOffsetBits
;
301 FSE_CState_t stateLitLength
;
304 ERR_isError(BIT_initCStream(&blockStream
, dst
, dstCapacity
)),
305 dstSize_tooSmall
, "not enough space remaining");
306 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
307 (int)(blockStream
.endPtr
- blockStream
.startPtr
),
308 (unsigned)dstCapacity
);
311 FSE_initCState2(&stateMatchLength
, CTable_MatchLength
, mlCodeTable
[nbSeq
-1]);
312 FSE_initCState2(&stateOffsetBits
, CTable_OffsetBits
, ofCodeTable
[nbSeq
-1]);
313 FSE_initCState2(&stateLitLength
, CTable_LitLength
, llCodeTable
[nbSeq
-1]);
314 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].litLength
, LL_bits
[llCodeTable
[nbSeq
-1]]);
315 if (MEM_32bits()) BIT_flushBits(&blockStream
);
316 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].mlBase
, ML_bits
[mlCodeTable
[nbSeq
-1]]);
317 if (MEM_32bits()) BIT_flushBits(&blockStream
);
319 U32
const ofBits
= ofCodeTable
[nbSeq
-1];
320 unsigned const extraBits
= ofBits
- MIN(ofBits
, STREAM_ACCUMULATOR_MIN
-1);
322 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offBase
, extraBits
);
323 BIT_flushBits(&blockStream
);
325 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offBase
>> extraBits
,
328 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offBase
, ofCodeTable
[nbSeq
-1]);
330 BIT_flushBits(&blockStream
);
333 for (n
=nbSeq
-2 ; n
<nbSeq
; n
--) { /* intentional underflow */
334 BYTE
const llCode
= llCodeTable
[n
];
335 BYTE
const ofCode
= ofCodeTable
[n
];
336 BYTE
const mlCode
= mlCodeTable
[n
];
337 U32
const llBits
= LL_bits
[llCode
];
338 U32
const ofBits
= ofCode
;
339 U32
const mlBits
= ML_bits
[mlCode
];
340 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
341 (unsigned)sequences
[n
].litLength
,
342 (unsigned)sequences
[n
].mlBase
+ MINMATCH
,
343 (unsigned)sequences
[n
].offBase
);
346 FSE_encodeSymbol(&blockStream
, &stateOffsetBits
, ofCode
); /* 15 */ /* 15 */
347 FSE_encodeSymbol(&blockStream
, &stateMatchLength
, mlCode
); /* 24 */ /* 24 */
348 if (MEM_32bits()) BIT_flushBits(&blockStream
); /* (7)*/
349 FSE_encodeSymbol(&blockStream
, &stateLitLength
, llCode
); /* 16 */ /* 33 */
350 if (MEM_32bits() || (ofBits
+mlBits
+llBits
>= 64-7-(LLFSELog
+MLFSELog
+OffFSELog
)))
351 BIT_flushBits(&blockStream
); /* (7)*/
352 BIT_addBits(&blockStream
, sequences
[n
].litLength
, llBits
);
353 if (MEM_32bits() && ((llBits
+mlBits
)>24)) BIT_flushBits(&blockStream
);
354 BIT_addBits(&blockStream
, sequences
[n
].mlBase
, mlBits
);
355 if (MEM_32bits() || (ofBits
+mlBits
+llBits
> 56)) BIT_flushBits(&blockStream
);
357 unsigned const extraBits
= ofBits
- MIN(ofBits
, STREAM_ACCUMULATOR_MIN
-1);
359 BIT_addBits(&blockStream
, sequences
[n
].offBase
, extraBits
);
360 BIT_flushBits(&blockStream
); /* (7)*/
362 BIT_addBits(&blockStream
, sequences
[n
].offBase
>> extraBits
,
363 ofBits
- extraBits
); /* 31 */
365 BIT_addBits(&blockStream
, sequences
[n
].offBase
, ofBits
); /* 31 */
367 BIT_flushBits(&blockStream
); /* (7)*/
368 DEBUGLOG(7, "remaining space : %i", (int)(blockStream
.endPtr
- blockStream
.ptr
));
371 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength
.stateLog
);
372 FSE_flushCState(&blockStream
, &stateMatchLength
);
373 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits
.stateLog
);
374 FSE_flushCState(&blockStream
, &stateOffsetBits
);
375 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength
.stateLog
);
376 FSE_flushCState(&blockStream
, &stateLitLength
);
378 { size_t const streamSize
= BIT_closeCStream(&blockStream
);
379 RETURN_ERROR_IF(streamSize
==0, dstSize_tooSmall
, "not enough space");
385 ZSTD_encodeSequences_default(
386 void* dst
, size_t dstCapacity
,
387 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
388 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
389 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
390 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
392 return ZSTD_encodeSequences_body(dst
, dstCapacity
,
393 CTable_MatchLength
, mlCodeTable
,
394 CTable_OffsetBits
, ofCodeTable
,
395 CTable_LitLength
, llCodeTable
,
396 sequences
, nbSeq
, longOffsets
);
402 static BMI2_TARGET_ATTRIBUTE
size_t
403 ZSTD_encodeSequences_bmi2(
404 void* dst
, size_t dstCapacity
,
405 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
406 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
407 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
408 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
410 return ZSTD_encodeSequences_body(dst
, dstCapacity
,
411 CTable_MatchLength
, mlCodeTable
,
412 CTable_OffsetBits
, ofCodeTable
,
413 CTable_LitLength
, llCodeTable
,
414 sequences
, nbSeq
, longOffsets
);
419 size_t ZSTD_encodeSequences(
420 void* dst
, size_t dstCapacity
,
421 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
422 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
423 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
424 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
, int bmi2
)
426 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity
);
429 return ZSTD_encodeSequences_bmi2(dst
, dstCapacity
,
430 CTable_MatchLength
, mlCodeTable
,
431 CTable_OffsetBits
, ofCodeTable
,
432 CTable_LitLength
, llCodeTable
,
433 sequences
, nbSeq
, longOffsets
);
437 return ZSTD_encodeSequences_default(dst
, dstCapacity
,
438 CTable_MatchLength
, mlCodeTable
,
439 CTable_OffsetBits
, ofCodeTable
,
440 CTable_LitLength
, llCodeTable
,
441 sequences
, nbSeq
, longOffsets
);