2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /*-*************************************
13 ***************************************/
14 #include "zstd_compress_sequences.h"
17 * -log2(x / 256) lookup table for x in [0, 256).
19 * Else: Return floor(-log2(x / 256) * 256)
21 static unsigned const kInverseProbabilityLog256
[256] = {
22 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
23 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
24 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
25 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
26 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
27 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
28 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
29 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
30 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
31 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
32 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
33 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
34 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
35 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
36 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
37 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
38 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
39 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
40 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
41 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
42 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
46 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable
const* ctable
) {
47 void const* ptr
= ctable
;
48 U16
const* u16ptr
= (U16
const*)ptr
;
49 U32
const maxSymbolValue
= MEM_read16(u16ptr
+ 1);
50 return maxSymbolValue
;
54 * Returns the cost in bytes of encoding the normalized count header.
55 * Returns an error if any of the helper functions return an error.
57 static size_t ZSTD_NCountCost(unsigned const* count
, unsigned const max
,
58 size_t const nbSeq
, unsigned const FSELog
)
60 BYTE wksp
[FSE_NCOUNTBOUND
];
62 const U32 tableLog
= FSE_optimalTableLog(FSELog
, nbSeq
, max
);
63 FORWARD_IF_ERROR(FSE_normalizeCount(norm
, tableLog
, count
, nbSeq
, max
), "");
64 return FSE_writeNCount(wksp
, sizeof(wksp
), norm
, max
, tableLog
);
68 * Returns the cost in bits of encoding the distribution described by count
69 * using the entropy bound.
71 static size_t ZSTD_entropyCost(unsigned const* count
, unsigned const max
, size_t const total
)
75 for (s
= 0; s
<= max
; ++s
) {
76 unsigned norm
= (unsigned)((256 * count
[s
]) / total
);
77 if (count
[s
] != 0 && norm
== 0)
79 assert(count
[s
] < total
);
80 cost
+= count
[s
] * kInverseProbabilityLog256
[norm
];
86 * Returns the cost in bits of encoding the distribution in count using ctable.
87 * Returns an error if ctable cannot represent all the symbols in count.
89 size_t ZSTD_fseBitCost(
90 FSE_CTable
const* ctable
,
91 unsigned const* count
,
94 unsigned const kAccuracyLog
= 8;
98 FSE_initCState(&cstate
, ctable
);
99 if (ZSTD_getFSEMaxSymbolValue(ctable
) < max
) {
100 DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
101 ZSTD_getFSEMaxSymbolValue(ctable
), max
);
102 return ERROR(GENERIC
);
104 for (s
= 0; s
<= max
; ++s
) {
105 unsigned const tableLog
= cstate
.stateLog
;
106 unsigned const badCost
= (tableLog
+ 1) << kAccuracyLog
;
107 unsigned const bitCost
= FSE_bitCost(cstate
.symbolTT
, tableLog
, s
, kAccuracyLog
);
110 if (bitCost
>= badCost
) {
111 DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s
);
112 return ERROR(GENERIC
);
114 cost
+= (size_t)count
[s
] * bitCost
;
116 return cost
>> kAccuracyLog
;
120 * Returns the cost in bits of encoding the distribution in count using the
121 * table described by norm. The max symbol support by norm is assumed >= max.
122 * norm must be valid for every symbol with non-zero probability in count.
124 size_t ZSTD_crossEntropyCost(short const* norm
, unsigned accuracyLog
,
125 unsigned const* count
, unsigned const max
)
127 unsigned const shift
= 8 - accuracyLog
;
130 assert(accuracyLog
<= 8);
131 for (s
= 0; s
<= max
; ++s
) {
132 unsigned const normAcc
= (norm
[s
] != -1) ? (unsigned)norm
[s
] : 1;
133 unsigned const norm256
= normAcc
<< shift
;
135 assert(norm256
< 256);
136 cost
+= count
[s
] * kInverseProbabilityLog256
[norm256
];
142 ZSTD_selectEncodingType(
143 FSE_repeat
* repeatMode
, unsigned const* count
, unsigned const max
,
144 size_t const mostFrequent
, size_t nbSeq
, unsigned const FSELog
,
145 FSE_CTable
const* prevCTable
,
146 short const* defaultNorm
, U32 defaultNormLog
,
147 ZSTD_defaultPolicy_e
const isDefaultAllowed
,
148 ZSTD_strategy
const strategy
)
150 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed
== 0 && ZSTD_defaultAllowed
!= 0);
151 if (mostFrequent
== nbSeq
) {
152 *repeatMode
= FSE_repeat_none
;
153 if (isDefaultAllowed
&& nbSeq
<= 2) {
154 /* Prefer set_basic over set_rle when there are 2 or less symbols,
155 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
156 * If basic encoding isn't possible, always choose RLE.
158 DEBUGLOG(5, "Selected set_basic");
161 DEBUGLOG(5, "Selected set_rle");
164 if (strategy
< ZSTD_lazy
) {
165 if (isDefaultAllowed
) {
166 size_t const staticFse_nbSeq_max
= 1000;
167 size_t const mult
= 10 - strategy
;
168 size_t const baseLog
= 3;
169 size_t const dynamicFse_nbSeq_min
= (((size_t)1 << defaultNormLog
) * mult
) >> baseLog
; /* 28-36 for offset, 56-72 for lengths */
170 assert(defaultNormLog
>= 5 && defaultNormLog
<= 6); /* xx_DEFAULTNORMLOG */
171 assert(mult
<= 9 && mult
>= 7);
172 if ( (*repeatMode
== FSE_repeat_valid
)
173 && (nbSeq
< staticFse_nbSeq_max
) ) {
174 DEBUGLOG(5, "Selected set_repeat");
177 if ( (nbSeq
< dynamicFse_nbSeq_min
)
178 || (mostFrequent
< (nbSeq
>> (defaultNormLog
-1))) ) {
179 DEBUGLOG(5, "Selected set_basic");
180 /* The format allows default tables to be repeated, but it isn't useful.
181 * When using simple heuristics to select encoding type, we don't want
182 * to confuse these tables with dictionaries. When running more careful
183 * analysis, we don't need to waste time checking both repeating tables
184 * and default tables.
186 *repeatMode
= FSE_repeat_none
;
191 size_t const basicCost
= isDefaultAllowed
? ZSTD_crossEntropyCost(defaultNorm
, defaultNormLog
, count
, max
) : ERROR(GENERIC
);
192 size_t const repeatCost
= *repeatMode
!= FSE_repeat_none
? ZSTD_fseBitCost(prevCTable
, count
, max
) : ERROR(GENERIC
);
193 size_t const NCountCost
= ZSTD_NCountCost(count
, max
, nbSeq
, FSELog
);
194 size_t const compressedCost
= (NCountCost
<< 3) + ZSTD_entropyCost(count
, max
, nbSeq
);
196 if (isDefaultAllowed
) {
197 assert(!ZSTD_isError(basicCost
));
198 assert(!(*repeatMode
== FSE_repeat_valid
&& ZSTD_isError(repeatCost
)));
200 assert(!ZSTD_isError(NCountCost
));
201 assert(compressedCost
< ERROR(maxCode
));
202 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
203 (unsigned)basicCost
, (unsigned)repeatCost
, (unsigned)compressedCost
);
204 if (basicCost
<= repeatCost
&& basicCost
<= compressedCost
) {
205 DEBUGLOG(5, "Selected set_basic");
206 assert(isDefaultAllowed
);
207 *repeatMode
= FSE_repeat_none
;
210 if (repeatCost
<= compressedCost
) {
211 DEBUGLOG(5, "Selected set_repeat");
212 assert(!ZSTD_isError(repeatCost
));
215 assert(compressedCost
< basicCost
&& compressedCost
< repeatCost
);
217 DEBUGLOG(5, "Selected set_compressed");
218 *repeatMode
= FSE_repeat_check
;
219 return set_compressed
;
223 ZSTD_buildCTable(void* dst
, size_t dstCapacity
,
224 FSE_CTable
* nextCTable
, U32 FSELog
, symbolEncodingType_e type
,
225 unsigned* count
, U32 max
,
226 const BYTE
* codeTable
, size_t nbSeq
,
227 const S16
* defaultNorm
, U32 defaultNormLog
, U32 defaultMax
,
228 const FSE_CTable
* prevCTable
, size_t prevCTableSize
,
229 void* entropyWorkspace
, size_t entropyWorkspaceSize
)
231 BYTE
* op
= (BYTE
*)dst
;
232 const BYTE
* const oend
= op
+ dstCapacity
;
233 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity
);
237 FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable
, (BYTE
)max
), "");
238 RETURN_ERROR_IF(dstCapacity
==0, dstSize_tooSmall
, "not enough space");
242 memcpy(nextCTable
, prevCTable
, prevCTableSize
);
245 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable
, defaultNorm
, defaultMax
, defaultNormLog
, entropyWorkspace
, entropyWorkspaceSize
), ""); /* note : could be pre-calculated */
247 case set_compressed
: {
248 S16 norm
[MaxSeq
+ 1];
249 size_t nbSeq_1
= nbSeq
;
250 const U32 tableLog
= FSE_optimalTableLog(FSELog
, nbSeq
, max
);
251 if (count
[codeTable
[nbSeq
-1]] > 1) {
252 count
[codeTable
[nbSeq
-1]]--;
256 FORWARD_IF_ERROR(FSE_normalizeCount(norm
, tableLog
, count
, nbSeq_1
, max
), "");
257 { size_t const NCountSize
= FSE_writeNCount(op
, oend
- op
, norm
, max
, tableLog
); /* overflow protected */
258 FORWARD_IF_ERROR(NCountSize
, "FSE_writeNCount failed");
259 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable
, norm
, max
, tableLog
, entropyWorkspace
, entropyWorkspaceSize
), "");
263 default: assert(0); RETURN_ERROR(GENERIC
, "impossible to reach");
267 FORCE_INLINE_TEMPLATE
size_t
268 ZSTD_encodeSequences_body(
269 void* dst
, size_t dstCapacity
,
270 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
271 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
272 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
273 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
275 BIT_CStream_t blockStream
;
276 FSE_CState_t stateMatchLength
;
277 FSE_CState_t stateOffsetBits
;
278 FSE_CState_t stateLitLength
;
281 ERR_isError(BIT_initCStream(&blockStream
, dst
, dstCapacity
)),
282 dstSize_tooSmall
, "not enough space remaining");
283 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
284 (int)(blockStream
.endPtr
- blockStream
.startPtr
),
285 (unsigned)dstCapacity
);
288 FSE_initCState2(&stateMatchLength
, CTable_MatchLength
, mlCodeTable
[nbSeq
-1]);
289 FSE_initCState2(&stateOffsetBits
, CTable_OffsetBits
, ofCodeTable
[nbSeq
-1]);
290 FSE_initCState2(&stateLitLength
, CTable_LitLength
, llCodeTable
[nbSeq
-1]);
291 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].litLength
, LL_bits
[llCodeTable
[nbSeq
-1]]);
292 if (MEM_32bits()) BIT_flushBits(&blockStream
);
293 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].matchLength
, ML_bits
[mlCodeTable
[nbSeq
-1]]);
294 if (MEM_32bits()) BIT_flushBits(&blockStream
);
296 U32
const ofBits
= ofCodeTable
[nbSeq
-1];
297 unsigned const extraBits
= ofBits
- MIN(ofBits
, STREAM_ACCUMULATOR_MIN
-1);
299 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offset
, extraBits
);
300 BIT_flushBits(&blockStream
);
302 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offset
>> extraBits
,
305 BIT_addBits(&blockStream
, sequences
[nbSeq
-1].offset
, ofCodeTable
[nbSeq
-1]);
307 BIT_flushBits(&blockStream
);
310 for (n
=nbSeq
-2 ; n
<nbSeq
; n
--) { /* intentional underflow */
311 BYTE
const llCode
= llCodeTable
[n
];
312 BYTE
const ofCode
= ofCodeTable
[n
];
313 BYTE
const mlCode
= mlCodeTable
[n
];
314 U32
const llBits
= LL_bits
[llCode
];
315 U32
const ofBits
= ofCode
;
316 U32
const mlBits
= ML_bits
[mlCode
];
317 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
318 (unsigned)sequences
[n
].litLength
,
319 (unsigned)sequences
[n
].matchLength
+ MINMATCH
,
320 (unsigned)sequences
[n
].offset
);
323 FSE_encodeSymbol(&blockStream
, &stateOffsetBits
, ofCode
); /* 15 */ /* 15 */
324 FSE_encodeSymbol(&blockStream
, &stateMatchLength
, mlCode
); /* 24 */ /* 24 */
325 if (MEM_32bits()) BIT_flushBits(&blockStream
); /* (7)*/
326 FSE_encodeSymbol(&blockStream
, &stateLitLength
, llCode
); /* 16 */ /* 33 */
327 if (MEM_32bits() || (ofBits
+mlBits
+llBits
>= 64-7-(LLFSELog
+MLFSELog
+OffFSELog
)))
328 BIT_flushBits(&blockStream
); /* (7)*/
329 BIT_addBits(&blockStream
, sequences
[n
].litLength
, llBits
);
330 if (MEM_32bits() && ((llBits
+mlBits
)>24)) BIT_flushBits(&blockStream
);
331 BIT_addBits(&blockStream
, sequences
[n
].matchLength
, mlBits
);
332 if (MEM_32bits() || (ofBits
+mlBits
+llBits
> 56)) BIT_flushBits(&blockStream
);
334 unsigned const extraBits
= ofBits
- MIN(ofBits
, STREAM_ACCUMULATOR_MIN
-1);
336 BIT_addBits(&blockStream
, sequences
[n
].offset
, extraBits
);
337 BIT_flushBits(&blockStream
); /* (7)*/
339 BIT_addBits(&blockStream
, sequences
[n
].offset
>> extraBits
,
340 ofBits
- extraBits
); /* 31 */
342 BIT_addBits(&blockStream
, sequences
[n
].offset
, ofBits
); /* 31 */
344 BIT_flushBits(&blockStream
); /* (7)*/
345 DEBUGLOG(7, "remaining space : %i", (int)(blockStream
.endPtr
- blockStream
.ptr
));
348 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength
.stateLog
);
349 FSE_flushCState(&blockStream
, &stateMatchLength
);
350 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits
.stateLog
);
351 FSE_flushCState(&blockStream
, &stateOffsetBits
);
352 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength
.stateLog
);
353 FSE_flushCState(&blockStream
, &stateLitLength
);
355 { size_t const streamSize
= BIT_closeCStream(&blockStream
);
356 RETURN_ERROR_IF(streamSize
==0, dstSize_tooSmall
, "not enough space");
362 ZSTD_encodeSequences_default(
363 void* dst
, size_t dstCapacity
,
364 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
365 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
366 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
367 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
369 return ZSTD_encodeSequences_body(dst
, dstCapacity
,
370 CTable_MatchLength
, mlCodeTable
,
371 CTable_OffsetBits
, ofCodeTable
,
372 CTable_LitLength
, llCodeTable
,
373 sequences
, nbSeq
, longOffsets
);
379 static TARGET_ATTRIBUTE("bmi2") size_t
380 ZSTD_encodeSequences_bmi2(
381 void* dst
, size_t dstCapacity
,
382 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
383 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
384 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
385 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
)
387 return ZSTD_encodeSequences_body(dst
, dstCapacity
,
388 CTable_MatchLength
, mlCodeTable
,
389 CTable_OffsetBits
, ofCodeTable
,
390 CTable_LitLength
, llCodeTable
,
391 sequences
, nbSeq
, longOffsets
);
396 size_t ZSTD_encodeSequences(
397 void* dst
, size_t dstCapacity
,
398 FSE_CTable
const* CTable_MatchLength
, BYTE
const* mlCodeTable
,
399 FSE_CTable
const* CTable_OffsetBits
, BYTE
const* ofCodeTable
,
400 FSE_CTable
const* CTable_LitLength
, BYTE
const* llCodeTable
,
401 seqDef
const* sequences
, size_t nbSeq
, int longOffsets
, int bmi2
)
403 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity
);
406 return ZSTD_encodeSequences_bmi2(dst
, dstCapacity
,
407 CTable_MatchLength
, mlCodeTable
,
408 CTable_OffsetBits
, ofCodeTable
,
409 CTable_LitLength
, llCodeTable
,
410 sequences
, nbSeq
, longOffsets
);
414 return ZSTD_encodeSequences_default(dst
, dstCapacity
,
415 CTable_MatchLength
, mlCodeTable
,
416 CTable_OffsetBits
, ofCodeTable
,
417 CTable_LitLength
, llCodeTable
,
418 sequences
, nbSeq
, longOffsets
);