2 * Copyright (c) Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /* zstd_decompress_block :
12 * this module takes care of decompressing _compressed_ block */
14 /*-*******************************************************
16 *********************************************************/
17 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18 #include "../common/compiler.h" /* prefetch */
19 #include "../common/cpu.h" /* bmi2 */
20 #include "../common/mem.h" /* low level memory routines */
21 #define FSE_STATIC_LINKING_ONLY
22 #include "../common/fse.h"
23 #define HUF_STATIC_LINKING_ONLY
24 #include "../common/huf.h"
25 #include "../common/zstd_internal.h"
26 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28 #include "zstd_decompress_block.h"
30 /*_*******************************************************
32 **********************************************************/
34 /* These two optional macros force the use one way or another of the two
35 * ZSTD_decompressSequences implementations. You can't force in both directions
38 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
44 /*_*******************************************************
46 **********************************************************/
47 static void ZSTD_copy4(void* dst
, const void* src
) { ZSTD_memcpy(dst
, src
, 4); }
50 /*-*************************************************************
52 ***************************************************************/
54 /*! ZSTD_getcBlockSize() :
55 * Provides the size of compressed block from block header `src` */
56 size_t ZSTD_getcBlockSize(const void* src
, size_t srcSize
,
57 blockProperties_t
* bpPtr
)
59 RETURN_ERROR_IF(srcSize
< ZSTD_blockHeaderSize
, srcSize_wrong
, "");
61 { U32
const cBlockHeader
= MEM_readLE24(src
);
62 U32
const cSize
= cBlockHeader
>> 3;
63 bpPtr
->lastBlock
= cBlockHeader
& 1;
64 bpPtr
->blockType
= (blockType_e
)((cBlockHeader
>> 1) & 3);
65 bpPtr
->origSize
= cSize
; /* only useful for RLE */
66 if (bpPtr
->blockType
== bt_rle
) return 1;
67 RETURN_ERROR_IF(bpPtr
->blockType
== bt_reserved
, corruption_detected
, "");
72 /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
73 static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx
* dctx
, void* const dst
, const size_t dstCapacity
, const size_t litSize
,
74 const streaming_operation streaming
, const size_t expectedWriteSize
, const unsigned splitImmediately
)
76 if (streaming
== not_streaming
&& dstCapacity
> ZSTD_BLOCKSIZE_MAX
+ WILDCOPY_OVERLENGTH
+ litSize
+ WILDCOPY_OVERLENGTH
)
78 /* room for litbuffer to fit without read faulting */
79 dctx
->litBuffer
= (BYTE
*)dst
+ ZSTD_BLOCKSIZE_MAX
+ WILDCOPY_OVERLENGTH
;
80 dctx
->litBufferEnd
= dctx
->litBuffer
+ litSize
;
81 dctx
->litBufferLocation
= ZSTD_in_dst
;
83 else if (litSize
> ZSTD_LITBUFFEREXTRASIZE
)
85 /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86 if (splitImmediately
) {
87 /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88 dctx
->litBuffer
= (BYTE
*)dst
+ expectedWriteSize
- litSize
+ ZSTD_LITBUFFEREXTRASIZE
- WILDCOPY_OVERLENGTH
;
89 dctx
->litBufferEnd
= dctx
->litBuffer
+ litSize
- ZSTD_LITBUFFEREXTRASIZE
;
92 /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
93 dctx
->litBuffer
= (BYTE
*)dst
+ expectedWriteSize
- litSize
;
94 dctx
->litBufferEnd
= (BYTE
*)dst
+ expectedWriteSize
;
96 dctx
->litBufferLocation
= ZSTD_split
;
100 /* fits entirely within litExtraBuffer, so no split is necessary */
101 dctx
->litBuffer
= dctx
->litExtraBuffer
;
102 dctx
->litBufferEnd
= dctx
->litBuffer
+ litSize
;
103 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
107 /* Hidden declaration for fullbench */
108 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx
* dctx
,
109 const void* src
, size_t srcSize
,
110 void* dst
, size_t dstCapacity
, const streaming_operation streaming
);
111 /*! ZSTD_decodeLiteralsBlock() :
112 * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113 * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114 * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115 * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
117 * @return : nb of bytes read from src (< srcSize )
118 * note : symbol not declared but exposed for fullbench */
119 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx
* dctx
,
120 const void* src
, size_t srcSize
, /* note : srcSize < BLOCKSIZE */
121 void* dst
, size_t dstCapacity
, const streaming_operation streaming
)
123 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
124 RETURN_ERROR_IF(srcSize
< MIN_CBLOCK_SIZE
, corruption_detected
, "");
126 { const BYTE
* const istart
= (const BYTE
*) src
;
127 symbolEncodingType_e
const litEncType
= (symbolEncodingType_e
)(istart
[0] & 3);
132 DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
133 RETURN_ERROR_IF(dctx
->litEntropy
==0, dictionary_corrupted
, "");
137 RETURN_ERROR_IF(srcSize
< 5, corruption_detected
, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
138 { size_t lhSize
, litSize
, litCSize
;
140 U32
const lhlCode
= (istart
[0] >> 2) & 3;
141 U32
const lhc
= MEM_readLE32(istart
);
143 size_t expectedWriteSize
= MIN(ZSTD_BLOCKSIZE_MAX
, dstCapacity
);
146 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
147 /* 2 - 2 - 10 - 10 */
148 singleStream
= !lhlCode
;
150 litSize
= (lhc
>> 4) & 0x3FF;
151 litCSize
= (lhc
>> 14) & 0x3FF;
154 /* 2 - 2 - 14 - 14 */
156 litSize
= (lhc
>> 4) & 0x3FFF;
157 litCSize
= lhc
>> 18;
160 /* 2 - 2 - 18 - 18 */
162 litSize
= (lhc
>> 4) & 0x3FFFF;
163 litCSize
= (lhc
>> 22) + ((size_t)istart
[4] << 10);
166 RETURN_ERROR_IF(litSize
> 0 && dst
== NULL
, dstSize_tooSmall
, "NULL not handled");
167 RETURN_ERROR_IF(litSize
> ZSTD_BLOCKSIZE_MAX
, corruption_detected
, "");
168 RETURN_ERROR_IF(litCSize
+ lhSize
> srcSize
, corruption_detected
, "");
169 RETURN_ERROR_IF(expectedWriteSize
< litSize
, dstSize_tooSmall
, "");
170 ZSTD_allocateLiteralsBuffer(dctx
, dst
, dstCapacity
, litSize
, streaming
, expectedWriteSize
, 0);
172 /* prefetch huffman table if cold */
173 if (dctx
->ddictIsCold
&& (litSize
> 768 /* heuristic */)) {
174 PREFETCH_AREA(dctx
->HUFptr
, sizeof(dctx
->entropy
.hufTable
));
177 if (litEncType
==set_repeat
) {
179 hufSuccess
= HUF_decompress1X_usingDTable_bmi2(
180 dctx
->litBuffer
, litSize
, istart
+lhSize
, litCSize
,
181 dctx
->HUFptr
, ZSTD_DCtx_get_bmi2(dctx
));
183 hufSuccess
= HUF_decompress4X_usingDTable_bmi2(
184 dctx
->litBuffer
, litSize
, istart
+lhSize
, litCSize
,
185 dctx
->HUFptr
, ZSTD_DCtx_get_bmi2(dctx
));
189 #if defined(HUF_FORCE_DECOMPRESS_X2)
190 hufSuccess
= HUF_decompress1X_DCtx_wksp(
191 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
192 istart
+lhSize
, litCSize
, dctx
->workspace
,
193 sizeof(dctx
->workspace
));
195 hufSuccess
= HUF_decompress1X1_DCtx_wksp_bmi2(
196 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
197 istart
+lhSize
, litCSize
, dctx
->workspace
,
198 sizeof(dctx
->workspace
), ZSTD_DCtx_get_bmi2(dctx
));
201 hufSuccess
= HUF_decompress4X_hufOnly_wksp_bmi2(
202 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
203 istart
+lhSize
, litCSize
, dctx
->workspace
,
204 sizeof(dctx
->workspace
), ZSTD_DCtx_get_bmi2(dctx
));
207 if (dctx
->litBufferLocation
== ZSTD_split
)
209 ZSTD_memcpy(dctx
->litExtraBuffer
, dctx
->litBufferEnd
- ZSTD_LITBUFFEREXTRASIZE
, ZSTD_LITBUFFEREXTRASIZE
);
210 ZSTD_memmove(dctx
->litBuffer
+ ZSTD_LITBUFFEREXTRASIZE
- WILDCOPY_OVERLENGTH
, dctx
->litBuffer
, litSize
- ZSTD_LITBUFFEREXTRASIZE
);
211 dctx
->litBuffer
+= ZSTD_LITBUFFEREXTRASIZE
- WILDCOPY_OVERLENGTH
;
212 dctx
->litBufferEnd
-= WILDCOPY_OVERLENGTH
;
215 RETURN_ERROR_IF(HUF_isError(hufSuccess
), corruption_detected
, "");
217 dctx
->litPtr
= dctx
->litBuffer
;
218 dctx
->litSize
= litSize
;
219 dctx
->litEntropy
= 1;
220 if (litEncType
==set_compressed
) dctx
->HUFptr
= dctx
->entropy
.hufTable
;
221 return litCSize
+ lhSize
;
225 { size_t litSize
, lhSize
;
226 U32
const lhlCode
= ((istart
[0]) >> 2) & 3;
227 size_t expectedWriteSize
= MIN(ZSTD_BLOCKSIZE_MAX
, dstCapacity
);
230 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
232 litSize
= istart
[0] >> 3;
236 litSize
= MEM_readLE16(istart
) >> 4;
240 litSize
= MEM_readLE24(istart
) >> 4;
244 RETURN_ERROR_IF(litSize
> 0 && dst
== NULL
, dstSize_tooSmall
, "NULL not handled");
245 RETURN_ERROR_IF(expectedWriteSize
< litSize
, dstSize_tooSmall
, "");
246 ZSTD_allocateLiteralsBuffer(dctx
, dst
, dstCapacity
, litSize
, streaming
, expectedWriteSize
, 1);
247 if (lhSize
+litSize
+WILDCOPY_OVERLENGTH
> srcSize
) { /* risk reading beyond src buffer with wildcopy */
248 RETURN_ERROR_IF(litSize
+lhSize
> srcSize
, corruption_detected
, "");
249 if (dctx
->litBufferLocation
== ZSTD_split
)
251 ZSTD_memcpy(dctx
->litBuffer
, istart
+ lhSize
, litSize
- ZSTD_LITBUFFEREXTRASIZE
);
252 ZSTD_memcpy(dctx
->litExtraBuffer
, istart
+ lhSize
+ litSize
- ZSTD_LITBUFFEREXTRASIZE
, ZSTD_LITBUFFEREXTRASIZE
);
256 ZSTD_memcpy(dctx
->litBuffer
, istart
+ lhSize
, litSize
);
258 dctx
->litPtr
= dctx
->litBuffer
;
259 dctx
->litSize
= litSize
;
260 return lhSize
+litSize
;
262 /* direct reference into compressed stream */
263 dctx
->litPtr
= istart
+lhSize
;
264 dctx
->litSize
= litSize
;
265 dctx
->litBufferEnd
= dctx
->litPtr
+ litSize
;
266 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
267 return lhSize
+litSize
;
271 { U32
const lhlCode
= ((istart
[0]) >> 2) & 3;
272 size_t litSize
, lhSize
;
273 size_t expectedWriteSize
= MIN(ZSTD_BLOCKSIZE_MAX
, dstCapacity
);
276 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
278 litSize
= istart
[0] >> 3;
282 litSize
= MEM_readLE16(istart
) >> 4;
286 litSize
= MEM_readLE24(istart
) >> 4;
287 RETURN_ERROR_IF(srcSize
<4, corruption_detected
, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
290 RETURN_ERROR_IF(litSize
> 0 && dst
== NULL
, dstSize_tooSmall
, "NULL not handled");
291 RETURN_ERROR_IF(litSize
> ZSTD_BLOCKSIZE_MAX
, corruption_detected
, "");
292 RETURN_ERROR_IF(expectedWriteSize
< litSize
, dstSize_tooSmall
, "");
293 ZSTD_allocateLiteralsBuffer(dctx
, dst
, dstCapacity
, litSize
, streaming
, expectedWriteSize
, 1);
294 if (dctx
->litBufferLocation
== ZSTD_split
)
296 ZSTD_memset(dctx
->litBuffer
, istart
[lhSize
], litSize
- ZSTD_LITBUFFEREXTRASIZE
);
297 ZSTD_memset(dctx
->litExtraBuffer
, istart
[lhSize
], ZSTD_LITBUFFEREXTRASIZE
);
301 ZSTD_memset(dctx
->litBuffer
, istart
[lhSize
], litSize
);
303 dctx
->litPtr
= dctx
->litBuffer
;
304 dctx
->litSize
= litSize
;
308 RETURN_ERROR(corruption_detected
, "impossible");
313 /* Default FSE distribution tables.
314 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
315 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
316 * They were generated programmatically with following method :
317 * - start from default distributions, present in /lib/common/zstd_internal.h
318 * - generate tables normally, using ZSTD_buildFSETable()
319 * - printout the content of tables
320 * - pretify output, report below, test with fuzzer to ensure it's correct */
322 /* Default FSE distribution table for Literal Lengths */
323 static const ZSTD_seqSymbol LL_defaultDTable
[(1<<LL_DEFAULTNORMLOG
)+1] = {
324 { 1, 1, 1, LL_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
325 /* nextState, nbAddBits, nbBits, baseVal */
326 { 0, 0, 4, 0}, { 16, 0, 4, 0},
327 { 32, 0, 5, 1}, { 0, 0, 5, 3},
328 { 0, 0, 5, 4}, { 0, 0, 5, 6},
329 { 0, 0, 5, 7}, { 0, 0, 5, 9},
330 { 0, 0, 5, 10}, { 0, 0, 5, 12},
331 { 0, 0, 6, 14}, { 0, 1, 5, 16},
332 { 0, 1, 5, 20}, { 0, 1, 5, 22},
333 { 0, 2, 5, 28}, { 0, 3, 5, 32},
334 { 0, 4, 5, 48}, { 32, 6, 5, 64},
335 { 0, 7, 5, 128}, { 0, 8, 6, 256},
336 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
337 { 32, 0, 4, 0}, { 0, 0, 4, 1},
338 { 0, 0, 5, 2}, { 32, 0, 5, 4},
339 { 0, 0, 5, 5}, { 32, 0, 5, 7},
340 { 0, 0, 5, 8}, { 32, 0, 5, 10},
341 { 0, 0, 5, 11}, { 0, 0, 6, 13},
342 { 32, 1, 5, 16}, { 0, 1, 5, 18},
343 { 32, 1, 5, 22}, { 0, 2, 5, 24},
344 { 32, 3, 5, 32}, { 0, 3, 5, 40},
345 { 0, 6, 4, 64}, { 16, 6, 4, 64},
346 { 32, 7, 5, 128}, { 0, 9, 6, 512},
347 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
348 { 16, 0, 4, 1}, { 32, 0, 5, 2},
349 { 32, 0, 5, 3}, { 32, 0, 5, 5},
350 { 32, 0, 5, 6}, { 32, 0, 5, 8},
351 { 32, 0, 5, 9}, { 32, 0, 5, 11},
352 { 32, 0, 5, 12}, { 0, 0, 6, 15},
353 { 32, 1, 5, 18}, { 32, 1, 5, 20},
354 { 32, 2, 5, 24}, { 32, 2, 5, 28},
355 { 32, 3, 5, 40}, { 32, 4, 5, 48},
356 { 0, 16, 6,65536}, { 0, 15, 6,32768},
357 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
358 }; /* LL_defaultDTable */
360 /* Default FSE distribution table for Offset Codes */
361 static const ZSTD_seqSymbol OF_defaultDTable
[(1<<OF_DEFAULTNORMLOG
)+1] = {
362 { 1, 1, 1, OF_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
363 /* nextState, nbAddBits, nbBits, baseVal */
364 { 0, 0, 5, 0}, { 0, 6, 4, 61},
365 { 0, 9, 5, 509}, { 0, 15, 5,32765},
366 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
367 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
368 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
369 { 0, 5, 5, 29}, { 0, 8, 4, 253},
370 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
371 { 0, 2, 5, 1}, { 16, 7, 4, 125},
372 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
373 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
374 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
375 { 0, 19, 5,524285}, { 0, 1, 5, 1},
376 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
377 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
378 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
379 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
380 }; /* OF_defaultDTable */
383 /* Default FSE distribution table for Match Lengths */
384 static const ZSTD_seqSymbol ML_defaultDTable
[(1<<ML_DEFAULTNORMLOG
)+1] = {
385 { 1, 1, 1, ML_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
386 /* nextState, nbAddBits, nbBits, baseVal */
387 { 0, 0, 6, 3}, { 0, 0, 4, 4},
388 { 32, 0, 5, 5}, { 0, 0, 5, 6},
389 { 0, 0, 5, 8}, { 0, 0, 5, 9},
390 { 0, 0, 5, 11}, { 0, 0, 6, 13},
391 { 0, 0, 6, 16}, { 0, 0, 6, 19},
392 { 0, 0, 6, 22}, { 0, 0, 6, 25},
393 { 0, 0, 6, 28}, { 0, 0, 6, 31},
394 { 0, 0, 6, 34}, { 0, 1, 6, 37},
395 { 0, 1, 6, 41}, { 0, 2, 6, 47},
396 { 0, 3, 6, 59}, { 0, 4, 6, 83},
397 { 0, 7, 6, 131}, { 0, 9, 6, 515},
398 { 16, 0, 4, 4}, { 0, 0, 4, 5},
399 { 32, 0, 5, 6}, { 0, 0, 5, 7},
400 { 32, 0, 5, 9}, { 0, 0, 5, 10},
401 { 0, 0, 6, 12}, { 0, 0, 6, 15},
402 { 0, 0, 6, 18}, { 0, 0, 6, 21},
403 { 0, 0, 6, 24}, { 0, 0, 6, 27},
404 { 0, 0, 6, 30}, { 0, 0, 6, 33},
405 { 0, 1, 6, 35}, { 0, 1, 6, 39},
406 { 0, 2, 6, 43}, { 0, 3, 6, 51},
407 { 0, 4, 6, 67}, { 0, 5, 6, 99},
408 { 0, 8, 6, 259}, { 32, 0, 4, 4},
409 { 48, 0, 4, 4}, { 16, 0, 4, 5},
410 { 32, 0, 5, 7}, { 32, 0, 5, 8},
411 { 32, 0, 5, 10}, { 32, 0, 5, 11},
412 { 0, 0, 6, 14}, { 0, 0, 6, 17},
413 { 0, 0, 6, 20}, { 0, 0, 6, 23},
414 { 0, 0, 6, 26}, { 0, 0, 6, 29},
415 { 0, 0, 6, 32}, { 0, 16, 6,65539},
416 { 0, 15, 6,32771}, { 0, 14, 6,16387},
417 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
418 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
419 }; /* ML_defaultDTable */
422 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol
* dt
, U32 baseValue
, U8 nbAddBits
)
425 ZSTD_seqSymbol_header
* const DTableH
= (ZSTD_seqSymbol_header
*)ptr
;
426 ZSTD_seqSymbol
* const cell
= dt
+ 1;
428 DTableH
->tableLog
= 0;
429 DTableH
->fastMode
= 0;
433 assert(nbAddBits
< 255);
434 cell
->nbAdditionalBits
= nbAddBits
;
435 cell
->baseValue
= baseValue
;
439 /* ZSTD_buildFSETable() :
440 * generate FSE decoding table for one symbol (ll, ml or off)
441 * cannot fail if input is valid =>
442 * all inputs are presumed validated at this stage */
443 FORCE_INLINE_TEMPLATE
444 void ZSTD_buildFSETable_body(ZSTD_seqSymbol
* dt
,
445 const short* normalizedCounter
, unsigned maxSymbolValue
,
446 const U32
* baseValue
, const U8
* nbAdditionalBits
,
447 unsigned tableLog
, void* wksp
, size_t wkspSize
)
449 ZSTD_seqSymbol
* const tableDecode
= dt
+1;
450 U32
const maxSV1
= maxSymbolValue
+ 1;
451 U32
const tableSize
= 1 << tableLog
;
453 U16
* symbolNext
= (U16
*)wksp
;
454 BYTE
* spread
= (BYTE
*)(symbolNext
+ MaxSeq
+ 1);
455 U32 highThreshold
= tableSize
- 1;
459 assert(maxSymbolValue
<= MaxSeq
);
460 assert(tableLog
<= MaxFSELog
);
461 assert(wkspSize
>= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE
);
463 /* Init, lay down lowprob symbols */
464 { ZSTD_seqSymbol_header DTableH
;
465 DTableH
.tableLog
= tableLog
;
466 DTableH
.fastMode
= 1;
467 { S16
const largeLimit
= (S16
)(1 << (tableLog
-1));
469 for (s
=0; s
<maxSV1
; s
++) {
470 if (normalizedCounter
[s
]==-1) {
471 tableDecode
[highThreshold
--].baseValue
= s
;
474 if (normalizedCounter
[s
] >= largeLimit
) DTableH
.fastMode
=0;
475 assert(normalizedCounter
[s
]>=0);
476 symbolNext
[s
] = (U16
)normalizedCounter
[s
];
478 ZSTD_memcpy(dt
, &DTableH
, sizeof(DTableH
));
482 assert(tableSize
<= 512);
483 /* Specialized symbol spreading for the case when there are
484 * no low probability (-1 count) symbols. When compressing
485 * small blocks we avoid low probability symbols to hit this
486 * case, since header decoding speed matters more.
488 if (highThreshold
== tableSize
- 1) {
489 size_t const tableMask
= tableSize
-1;
490 size_t const step
= FSE_TABLESTEP(tableSize
);
491 /* First lay down the symbols in order.
492 * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
493 * misses since small blocks generally have small table logs, so nearly
494 * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
495 * our buffer to handle the over-write.
498 U64
const add
= 0x0101010101010101ull
;
502 for (s
=0; s
<maxSV1
; ++s
, sv
+= add
) {
504 int const n
= normalizedCounter
[s
];
505 MEM_write64(spread
+ pos
, sv
);
506 for (i
= 8; i
< n
; i
+= 8) {
507 MEM_write64(spread
+ pos
+ i
, sv
);
512 /* Now we spread those positions across the table.
513 * The benefit of doing it in two stages is that we avoid the the
514 * variable size inner loop, which caused lots of branch misses.
515 * Now we can run through all the positions without any branch misses.
516 * We unroll the loop twice, since that is what emperically worked best.
521 size_t const unroll
= 2;
522 assert(tableSize
% unroll
== 0); /* FSE_MIN_TABLELOG is 5 */
523 for (s
= 0; s
< (size_t)tableSize
; s
+= unroll
) {
525 for (u
= 0; u
< unroll
; ++u
) {
526 size_t const uPosition
= (position
+ (u
* step
)) & tableMask
;
527 tableDecode
[uPosition
].baseValue
= spread
[s
+ u
];
529 position
= (position
+ (unroll
* step
)) & tableMask
;
531 assert(position
== 0);
534 U32
const tableMask
= tableSize
-1;
535 U32
const step
= FSE_TABLESTEP(tableSize
);
537 for (s
=0; s
<maxSV1
; s
++) {
539 int const n
= normalizedCounter
[s
];
540 for (i
=0; i
<n
; i
++) {
541 tableDecode
[position
].baseValue
= s
;
542 position
= (position
+ step
) & tableMask
;
543 while (position
> highThreshold
) position
= (position
+ step
) & tableMask
; /* lowprob area */
545 assert(position
== 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
548 /* Build Decoding table */
551 for (u
=0; u
<tableSize
; u
++) {
552 U32
const symbol
= tableDecode
[u
].baseValue
;
553 U32
const nextState
= symbolNext
[symbol
]++;
554 tableDecode
[u
].nbBits
= (BYTE
) (tableLog
- BIT_highbit32(nextState
) );
555 tableDecode
[u
].nextState
= (U16
) ( (nextState
<< tableDecode
[u
].nbBits
) - tableSize
);
556 assert(nbAdditionalBits
[symbol
] < 255);
557 tableDecode
[u
].nbAdditionalBits
= nbAdditionalBits
[symbol
];
558 tableDecode
[u
].baseValue
= baseValue
[symbol
];
563 /* Avoids the FORCE_INLINE of the _body() function. */
564 static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol
* dt
,
565 const short* normalizedCounter
, unsigned maxSymbolValue
,
566 const U32
* baseValue
, const U8
* nbAdditionalBits
,
567 unsigned tableLog
, void* wksp
, size_t wkspSize
)
569 ZSTD_buildFSETable_body(dt
, normalizedCounter
, maxSymbolValue
,
570 baseValue
, nbAdditionalBits
, tableLog
, wksp
, wkspSize
);
574 BMI2_TARGET_ATTRIBUTE
static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol
* dt
,
575 const short* normalizedCounter
, unsigned maxSymbolValue
,
576 const U32
* baseValue
, const U8
* nbAdditionalBits
,
577 unsigned tableLog
, void* wksp
, size_t wkspSize
)
579 ZSTD_buildFSETable_body(dt
, normalizedCounter
, maxSymbolValue
,
580 baseValue
, nbAdditionalBits
, tableLog
, wksp
, wkspSize
);
584 void ZSTD_buildFSETable(ZSTD_seqSymbol
* dt
,
585 const short* normalizedCounter
, unsigned maxSymbolValue
,
586 const U32
* baseValue
, const U8
* nbAdditionalBits
,
587 unsigned tableLog
, void* wksp
, size_t wkspSize
, int bmi2
)
591 ZSTD_buildFSETable_body_bmi2(dt
, normalizedCounter
, maxSymbolValue
,
592 baseValue
, nbAdditionalBits
, tableLog
, wksp
, wkspSize
);
597 ZSTD_buildFSETable_body_default(dt
, normalizedCounter
, maxSymbolValue
,
598 baseValue
, nbAdditionalBits
, tableLog
, wksp
, wkspSize
);
602 /*! ZSTD_buildSeqTable() :
603 * @return : nb bytes read from src,
604 * or an error code if it fails */
605 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol
* DTableSpace
, const ZSTD_seqSymbol
** DTablePtr
,
606 symbolEncodingType_e type
, unsigned max
, U32 maxLog
,
607 const void* src
, size_t srcSize
,
608 const U32
* baseValue
, const U8
* nbAdditionalBits
,
609 const ZSTD_seqSymbol
* defaultTable
, U32 flagRepeatTable
,
610 int ddictIsCold
, int nbSeq
, U32
* wksp
, size_t wkspSize
,
616 RETURN_ERROR_IF(!srcSize
, srcSize_wrong
, "");
617 RETURN_ERROR_IF((*(const BYTE
*)src
) > max
, corruption_detected
, "");
618 { U32
const symbol
= *(const BYTE
*)src
;
619 U32
const baseline
= baseValue
[symbol
];
620 U8
const nbBits
= nbAdditionalBits
[symbol
];
621 ZSTD_buildSeqTable_rle(DTableSpace
, baseline
, nbBits
);
623 *DTablePtr
= DTableSpace
;
626 *DTablePtr
= defaultTable
;
629 RETURN_ERROR_IF(!flagRepeatTable
, corruption_detected
, "");
630 /* prefetch FSE table if used */
631 if (ddictIsCold
&& (nbSeq
> 24 /* heuristic */)) {
632 const void* const pStart
= *DTablePtr
;
633 size_t const pSize
= sizeof(ZSTD_seqSymbol
) * (SEQSYMBOL_TABLE_SIZE(maxLog
));
634 PREFETCH_AREA(pStart
, pSize
);
637 case set_compressed
:
640 size_t const headerSize
= FSE_readNCount(norm
, &max
, &tableLog
, src
, srcSize
);
641 RETURN_ERROR_IF(FSE_isError(headerSize
), corruption_detected
, "");
642 RETURN_ERROR_IF(tableLog
> maxLog
, corruption_detected
, "");
643 ZSTD_buildFSETable(DTableSpace
, norm
, max
, baseValue
, nbAdditionalBits
, tableLog
, wksp
, wkspSize
, bmi2
);
644 *DTablePtr
= DTableSpace
;
649 RETURN_ERROR(GENERIC
, "impossible");
653 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx
* dctx
, int* nbSeqPtr
,
654 const void* src
, size_t srcSize
)
656 const BYTE
* const istart
= (const BYTE
*)src
;
657 const BYTE
* const iend
= istart
+ srcSize
;
658 const BYTE
* ip
= istart
;
660 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
663 RETURN_ERROR_IF(srcSize
< MIN_SEQUENCES_SIZE
, srcSize_wrong
, "");
669 RETURN_ERROR_IF(srcSize
!= 1, srcSize_wrong
, "");
674 RETURN_ERROR_IF(ip
+2 > iend
, srcSize_wrong
, "");
675 nbSeq
= MEM_readLE16(ip
) + LONGNBSEQ
;
678 RETURN_ERROR_IF(ip
>= iend
, srcSize_wrong
, "");
679 nbSeq
= ((nbSeq
-0x80)<<8) + *ip
++;
684 /* FSE table descriptors */
685 RETURN_ERROR_IF(ip
+1 > iend
, srcSize_wrong
, ""); /* minimum possible size: 1 byte for symbol encoding types */
686 { symbolEncodingType_e
const LLtype
= (symbolEncodingType_e
)(*ip
>> 6);
687 symbolEncodingType_e
const OFtype
= (symbolEncodingType_e
)((*ip
>> 4) & 3);
688 symbolEncodingType_e
const MLtype
= (symbolEncodingType_e
)((*ip
>> 2) & 3);
692 { size_t const llhSize
= ZSTD_buildSeqTable(dctx
->entropy
.LLTable
, &dctx
->LLTptr
,
693 LLtype
, MaxLL
, LLFSELog
,
696 LL_defaultDTable
, dctx
->fseEntropy
,
697 dctx
->ddictIsCold
, nbSeq
,
698 dctx
->workspace
, sizeof(dctx
->workspace
),
699 ZSTD_DCtx_get_bmi2(dctx
));
700 RETURN_ERROR_IF(ZSTD_isError(llhSize
), corruption_detected
, "ZSTD_buildSeqTable failed");
704 { size_t const ofhSize
= ZSTD_buildSeqTable(dctx
->entropy
.OFTable
, &dctx
->OFTptr
,
705 OFtype
, MaxOff
, OffFSELog
,
708 OF_defaultDTable
, dctx
->fseEntropy
,
709 dctx
->ddictIsCold
, nbSeq
,
710 dctx
->workspace
, sizeof(dctx
->workspace
),
711 ZSTD_DCtx_get_bmi2(dctx
));
712 RETURN_ERROR_IF(ZSTD_isError(ofhSize
), corruption_detected
, "ZSTD_buildSeqTable failed");
716 { size_t const mlhSize
= ZSTD_buildSeqTable(dctx
->entropy
.MLTable
, &dctx
->MLTptr
,
717 MLtype
, MaxML
, MLFSELog
,
720 ML_defaultDTable
, dctx
->fseEntropy
,
721 dctx
->ddictIsCold
, nbSeq
,
722 dctx
->workspace
, sizeof(dctx
->workspace
),
723 ZSTD_DCtx_get_bmi2(dctx
));
724 RETURN_ERROR_IF(ZSTD_isError(mlhSize
), corruption_detected
, "ZSTD_buildSeqTable failed");
741 const ZSTD_seqSymbol
* table
;
745 BIT_DStream_t DStream
;
746 ZSTD_fseState stateLL
;
747 ZSTD_fseState stateOffb
;
748 ZSTD_fseState stateML
;
749 size_t prevOffset
[ZSTD_REP_NUM
];
752 /*! ZSTD_overlapCopy8() :
753 * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
754 * If the offset is < 8 then the offset is spread to at least 8 bytes.
756 * Precondition: *ip <= *op
757 * Postcondition: *op - *op >= 8
759 HINT_INLINE
void ZSTD_overlapCopy8(BYTE
** op
, BYTE
const** ip
, size_t offset
) {
762 /* close range match, overlap */
763 static const U32 dec32table
[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
764 static const int dec64table
[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
765 int const sub2
= dec64table
[offset
];
770 *ip
+= dec32table
[offset
];
771 ZSTD_copy4(*op
+4, *ip
);
774 ZSTD_copy8(*op
, *ip
);
778 assert(*op
- *ip
>= 8);
781 /*! ZSTD_safecopy() :
782 * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
783 * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
784 * This function is only called in the uncommon case where the sequence is near the end of the block. It
785 * should be fast for a single long sequence, but can be slow for several short sequences.
787 * @param ovtype controls the overlap detection
788 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
789 * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
790 * The src buffer must be before the dst buffer.
792 static void ZSTD_safecopy(BYTE
* op
, const BYTE
* const oend_w
, BYTE
const* ip
, ptrdiff_t length
, ZSTD_overlap_e ovtype
) {
793 ptrdiff_t const diff
= op
- ip
;
794 BYTE
* const oend
= op
+ length
;
796 assert((ovtype
== ZSTD_no_overlap
&& (diff
<= -8 || diff
>= 8 || op
>= oend_w
)) ||
797 (ovtype
== ZSTD_overlap_src_before_dst
&& diff
>= 0));
800 /* Handle short lengths. */
801 while (op
< oend
) *op
++ = *ip
++;
804 if (ovtype
== ZSTD_overlap_src_before_dst
) {
805 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
807 ZSTD_overlapCopy8(&op
, &ip
, diff
);
809 assert(op
- ip
>= 8);
813 if (oend
<= oend_w
) {
814 /* No risk of overwrite. */
815 ZSTD_wildcopy(op
, ip
, length
, ovtype
);
819 /* Wildcopy until we get close to the end. */
820 assert(oend
> oend_w
);
821 ZSTD_wildcopy(op
, ip
, oend_w
- op
, ovtype
);
825 /* Handle the leftovers. */
826 while (op
< oend
) *op
++ = *ip
++;
829 /* ZSTD_safecopyDstBeforeSrc():
830 * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
831 * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
832 static void ZSTD_safecopyDstBeforeSrc(BYTE
* op
, BYTE
const* ip
, ptrdiff_t length
) {
833 ptrdiff_t const diff
= op
- ip
;
834 BYTE
* const oend
= op
+ length
;
836 if (length
< 8 || diff
> -8) {
837 /* Handle short lengths, close overlaps, and dst not before src. */
838 while (op
< oend
) *op
++ = *ip
++;
842 if (op
<= oend
- WILDCOPY_OVERLENGTH
&& diff
< -WILDCOPY_VECLEN
) {
843 ZSTD_wildcopy(op
, ip
, oend
- WILDCOPY_OVERLENGTH
- op
, ZSTD_no_overlap
);
844 ip
+= oend
- WILDCOPY_OVERLENGTH
- op
;
845 op
+= oend
- WILDCOPY_OVERLENGTH
- op
;
848 /* Handle the leftovers. */
849 while (op
< oend
) *op
++ = *ip
++;
852 /* ZSTD_execSequenceEnd():
853 * This version handles cases that are near the end of the output buffer. It requires
854 * more careful checks to make sure there is no overflow. By separating out these hard
855 * and unlikely cases, we can speed up the common cases.
857 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
858 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
861 size_t ZSTD_execSequenceEnd(BYTE
* op
,
862 BYTE
* const oend
, seq_t sequence
,
863 const BYTE
** litPtr
, const BYTE
* const litLimit
,
864 const BYTE
* const prefixStart
, const BYTE
* const virtualStart
, const BYTE
* const dictEnd
)
866 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
867 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
868 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
869 const BYTE
* match
= oLitEnd
- sequence
.offset
;
870 BYTE
* const oend_w
= oend
- WILDCOPY_OVERLENGTH
;
872 /* bounds checks : careful of address space overflow in 32-bit mode */
873 RETURN_ERROR_IF(sequenceLength
> (size_t)(oend
- op
), dstSize_tooSmall
, "last match must fit within dstBuffer");
874 RETURN_ERROR_IF(sequence
.litLength
> (size_t)(litLimit
- *litPtr
), corruption_detected
, "try to read beyond literal buffer");
875 assert(op
< op
+ sequenceLength
);
876 assert(oLitEnd
< op
+ sequenceLength
);
879 ZSTD_safecopy(op
, oend_w
, *litPtr
, sequence
.litLength
, ZSTD_no_overlap
);
884 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
885 /* offset beyond prefix */
886 RETURN_ERROR_IF(sequence
.offset
> (size_t)(oLitEnd
- virtualStart
), corruption_detected
, "");
887 match
= dictEnd
- (prefixStart
- match
);
888 if (match
+ sequence
.matchLength
<= dictEnd
) {
889 ZSTD_memmove(oLitEnd
, match
, sequence
.matchLength
);
890 return sequenceLength
;
892 /* span extDict & currentPrefixSegment */
893 { size_t const length1
= dictEnd
- match
;
894 ZSTD_memmove(oLitEnd
, match
, length1
);
895 op
= oLitEnd
+ length1
;
896 sequence
.matchLength
-= length1
;
900 ZSTD_safecopy(op
, oend_w
, match
, sequence
.matchLength
, ZSTD_overlap_src_before_dst
);
901 return sequenceLength
;
904 /* ZSTD_execSequenceEndSplitLitBuffer():
905 * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
908 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE
* op
,
909 BYTE
* const oend
, const BYTE
* const oend_w
, seq_t sequence
,
910 const BYTE
** litPtr
, const BYTE
* const litLimit
,
911 const BYTE
* const prefixStart
, const BYTE
* const virtualStart
, const BYTE
* const dictEnd
)
913 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
914 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
915 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
916 const BYTE
* match
= oLitEnd
- sequence
.offset
;
919 /* bounds checks : careful of address space overflow in 32-bit mode */
920 RETURN_ERROR_IF(sequenceLength
> (size_t)(oend
- op
), dstSize_tooSmall
, "last match must fit within dstBuffer");
921 RETURN_ERROR_IF(sequence
.litLength
> (size_t)(litLimit
- *litPtr
), corruption_detected
, "try to read beyond literal buffer");
922 assert(op
< op
+ sequenceLength
);
923 assert(oLitEnd
< op
+ sequenceLength
);
926 RETURN_ERROR_IF(op
> *litPtr
&& op
< *litPtr
+ sequence
.litLength
, dstSize_tooSmall
, "output should not catch up to and overwrite literal buffer");
927 ZSTD_safecopyDstBeforeSrc(op
, *litPtr
, sequence
.litLength
);
932 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
933 /* offset beyond prefix */
934 RETURN_ERROR_IF(sequence
.offset
> (size_t)(oLitEnd
- virtualStart
), corruption_detected
, "");
935 match
= dictEnd
- (prefixStart
- match
);
936 if (match
+ sequence
.matchLength
<= dictEnd
) {
937 ZSTD_memmove(oLitEnd
, match
, sequence
.matchLength
);
938 return sequenceLength
;
940 /* span extDict & currentPrefixSegment */
941 { size_t const length1
= dictEnd
- match
;
942 ZSTD_memmove(oLitEnd
, match
, length1
);
943 op
= oLitEnd
+ length1
;
944 sequence
.matchLength
-= length1
;
948 ZSTD_safecopy(op
, oend_w
, match
, sequence
.matchLength
, ZSTD_overlap_src_before_dst
);
949 return sequenceLength
;
953 size_t ZSTD_execSequence(BYTE
* op
,
954 BYTE
* const oend
, seq_t sequence
,
955 const BYTE
** litPtr
, const BYTE
* const litLimit
,
956 const BYTE
* const prefixStart
, const BYTE
* const virtualStart
, const BYTE
* const dictEnd
)
958 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
959 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
960 BYTE
* const oMatchEnd
= op
+ sequenceLength
; /* risk : address space overflow (32-bits) */
961 BYTE
* const oend_w
= oend
- WILDCOPY_OVERLENGTH
; /* risk : address space underflow on oend=NULL */
962 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
963 const BYTE
* match
= oLitEnd
- sequence
.offset
;
965 assert(op
!= NULL
/* Precondition */);
966 assert(oend_w
< oend
/* No underflow */);
967 /* Handle edge cases in a slow path:
968 * - Read beyond end of literals
969 * - Match end is within WILDCOPY_OVERLIMIT of oend
970 * - 32-bit mode and the match length overflows
973 iLitEnd
> litLimit
||
974 oMatchEnd
> oend_w
||
975 (MEM_32bits() && (size_t)(oend
- op
) < sequenceLength
+ WILDCOPY_OVERLENGTH
)))
976 return ZSTD_execSequenceEnd(op
, oend
, sequence
, litPtr
, litLimit
, prefixStart
, virtualStart
, dictEnd
);
978 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
979 assert(op
<= oLitEnd
/* No overflow */);
980 assert(oLitEnd
< oMatchEnd
/* Non-zero match & no overflow */);
981 assert(oMatchEnd
<= oend
/* No underflow */);
982 assert(iLitEnd
<= litLimit
/* Literal length is in bounds */);
983 assert(oLitEnd
<= oend_w
/* Can wildcopy literals */);
984 assert(oMatchEnd
<= oend_w
/* Can wildcopy matches */);
987 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
988 * We likely don't need the full 32-byte wildcopy.
990 assert(WILDCOPY_OVERLENGTH
>= 16);
991 ZSTD_copy16(op
, (*litPtr
));
992 if (UNLIKELY(sequence
.litLength
> 16)) {
993 ZSTD_wildcopy(op
+ 16, (*litPtr
) + 16, sequence
.litLength
- 16, ZSTD_no_overlap
);
996 *litPtr
= iLitEnd
; /* update for next sequence */
999 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
1000 /* offset beyond prefix -> go into extDict */
1001 RETURN_ERROR_IF(UNLIKELY(sequence
.offset
> (size_t)(oLitEnd
- virtualStart
)), corruption_detected
, "");
1002 match
= dictEnd
+ (match
- prefixStart
);
1003 if (match
+ sequence
.matchLength
<= dictEnd
) {
1004 ZSTD_memmove(oLitEnd
, match
, sequence
.matchLength
);
1005 return sequenceLength
;
1007 /* span extDict & currentPrefixSegment */
1008 { size_t const length1
= dictEnd
- match
;
1009 ZSTD_memmove(oLitEnd
, match
, length1
);
1010 op
= oLitEnd
+ length1
;
1011 sequence
.matchLength
-= length1
;
1012 match
= prefixStart
;
1015 /* Match within prefix of 1 or more bytes */
1016 assert(op
<= oMatchEnd
);
1017 assert(oMatchEnd
<= oend_w
);
1018 assert(match
>= prefixStart
);
1019 assert(sequence
.matchLength
>= 1);
1021 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1022 * without overlap checking.
1024 if (LIKELY(sequence
.offset
>= WILDCOPY_VECLEN
)) {
1025 /* We bet on a full wildcopy for matches, since we expect matches to be
1026 * longer than literals (in general). In silesia, ~10% of matches are longer
1029 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
, ZSTD_no_overlap
);
1030 return sequenceLength
;
1032 assert(sequence
.offset
< WILDCOPY_VECLEN
);
1034 /* Copy 8 bytes and spread the offset to be >= 8. */
1035 ZSTD_overlapCopy8(&op
, &match
, sequence
.offset
);
1037 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1038 if (sequence
.matchLength
> 8) {
1039 assert(op
< oMatchEnd
);
1040 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
- 8, ZSTD_overlap_src_before_dst
);
1042 return sequenceLength
;
1046 size_t ZSTD_execSequenceSplitLitBuffer(BYTE
* op
,
1047 BYTE
* const oend
, const BYTE
* const oend_w
, seq_t sequence
,
1048 const BYTE
** litPtr
, const BYTE
* const litLimit
,
1049 const BYTE
* const prefixStart
, const BYTE
* const virtualStart
, const BYTE
* const dictEnd
)
1051 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
1052 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
1053 BYTE
* const oMatchEnd
= op
+ sequenceLength
; /* risk : address space overflow (32-bits) */
1054 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
1055 const BYTE
* match
= oLitEnd
- sequence
.offset
;
1057 assert(op
!= NULL
/* Precondition */);
1058 assert(oend_w
< oend
/* No underflow */);
1059 /* Handle edge cases in a slow path:
1060 * - Read beyond end of literals
1061 * - Match end is within WILDCOPY_OVERLIMIT of oend
1062 * - 32-bit mode and the match length overflows
1065 iLitEnd
> litLimit
||
1066 oMatchEnd
> oend_w
||
1067 (MEM_32bits() && (size_t)(oend
- op
) < sequenceLength
+ WILDCOPY_OVERLENGTH
)))
1068 return ZSTD_execSequenceEndSplitLitBuffer(op
, oend
, oend_w
, sequence
, litPtr
, litLimit
, prefixStart
, virtualStart
, dictEnd
);
1070 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1071 assert(op
<= oLitEnd
/* No overflow */);
1072 assert(oLitEnd
< oMatchEnd
/* Non-zero match & no overflow */);
1073 assert(oMatchEnd
<= oend
/* No underflow */);
1074 assert(iLitEnd
<= litLimit
/* Literal length is in bounds */);
1075 assert(oLitEnd
<= oend_w
/* Can wildcopy literals */);
1076 assert(oMatchEnd
<= oend_w
/* Can wildcopy matches */);
1079 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1080 * We likely don't need the full 32-byte wildcopy.
1082 assert(WILDCOPY_OVERLENGTH
>= 16);
1083 ZSTD_copy16(op
, (*litPtr
));
1084 if (UNLIKELY(sequence
.litLength
> 16)) {
1085 ZSTD_wildcopy(op
+16, (*litPtr
)+16, sequence
.litLength
-16, ZSTD_no_overlap
);
1088 *litPtr
= iLitEnd
; /* update for next sequence */
1091 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
1092 /* offset beyond prefix -> go into extDict */
1093 RETURN_ERROR_IF(UNLIKELY(sequence
.offset
> (size_t)(oLitEnd
- virtualStart
)), corruption_detected
, "");
1094 match
= dictEnd
+ (match
- prefixStart
);
1095 if (match
+ sequence
.matchLength
<= dictEnd
) {
1096 ZSTD_memmove(oLitEnd
, match
, sequence
.matchLength
);
1097 return sequenceLength
;
1099 /* span extDict & currentPrefixSegment */
1100 { size_t const length1
= dictEnd
- match
;
1101 ZSTD_memmove(oLitEnd
, match
, length1
);
1102 op
= oLitEnd
+ length1
;
1103 sequence
.matchLength
-= length1
;
1104 match
= prefixStart
;
1106 /* Match within prefix of 1 or more bytes */
1107 assert(op
<= oMatchEnd
);
1108 assert(oMatchEnd
<= oend_w
);
1109 assert(match
>= prefixStart
);
1110 assert(sequence
.matchLength
>= 1);
1112 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1113 * without overlap checking.
1115 if (LIKELY(sequence
.offset
>= WILDCOPY_VECLEN
)) {
1116 /* We bet on a full wildcopy for matches, since we expect matches to be
1117 * longer than literals (in general). In silesia, ~10% of matches are longer
1120 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
, ZSTD_no_overlap
);
1121 return sequenceLength
;
1123 assert(sequence
.offset
< WILDCOPY_VECLEN
);
1125 /* Copy 8 bytes and spread the offset to be >= 8. */
1126 ZSTD_overlapCopy8(&op
, &match
, sequence
.offset
);
1128 /* If the match length is > 8 bytes, then continue with the wildcopy. */
1129 if (sequence
.matchLength
> 8) {
1130 assert(op
< oMatchEnd
);
1131 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
-8, ZSTD_overlap_src_before_dst
);
1133 return sequenceLength
;
1138 ZSTD_initFseState(ZSTD_fseState
* DStatePtr
, BIT_DStream_t
* bitD
, const ZSTD_seqSymbol
* dt
)
1140 const void* ptr
= dt
;
1141 const ZSTD_seqSymbol_header
* const DTableH
= (const ZSTD_seqSymbol_header
*)ptr
;
1142 DStatePtr
->state
= BIT_readBits(bitD
, DTableH
->tableLog
);
1143 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1144 (U32
)DStatePtr
->state
, DTableH
->tableLog
);
1145 BIT_reloadDStream(bitD
);
1146 DStatePtr
->table
= dt
+ 1;
1149 FORCE_INLINE_TEMPLATE
void
1150 ZSTD_updateFseStateWithDInfo(ZSTD_fseState
* DStatePtr
, BIT_DStream_t
* bitD
, U16 nextState
, U32 nbBits
)
1152 size_t const lowBits
= BIT_readBits(bitD
, nbBits
);
1153 DStatePtr
->state
= nextState
+ lowBits
;
1156 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1157 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1158 * bits before reloading. This value is the maximum number of bytes we read
1159 * after reloading when we are decoding long offsets.
1161 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1162 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1163 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1166 typedef enum { ZSTD_lo_isRegularOffset
, ZSTD_lo_isLongOffset
=1 } ZSTD_longOffset_e
;
1168 FORCE_INLINE_TEMPLATE seq_t
1169 ZSTD_decodeSequence(seqState_t
* seqState
, const ZSTD_longOffset_e longOffsets
)
1172 const ZSTD_seqSymbol
* const llDInfo
= seqState
->stateLL
.table
+ seqState
->stateLL
.state
;
1173 const ZSTD_seqSymbol
* const mlDInfo
= seqState
->stateML
.table
+ seqState
->stateML
.state
;
1174 const ZSTD_seqSymbol
* const ofDInfo
= seqState
->stateOffb
.table
+ seqState
->stateOffb
.state
;
1175 seq
.matchLength
= mlDInfo
->baseValue
;
1176 seq
.litLength
= llDInfo
->baseValue
;
1177 { U32
const ofBase
= ofDInfo
->baseValue
;
1178 BYTE
const llBits
= llDInfo
->nbAdditionalBits
;
1179 BYTE
const mlBits
= mlDInfo
->nbAdditionalBits
;
1180 BYTE
const ofBits
= ofDInfo
->nbAdditionalBits
;
1181 BYTE
const totalBits
= llBits
+mlBits
+ofBits
;
1183 U16
const llNext
= llDInfo
->nextState
;
1184 U16
const mlNext
= mlDInfo
->nextState
;
1185 U16
const ofNext
= ofDInfo
->nextState
;
1186 U32
const llnbBits
= llDInfo
->nbBits
;
1187 U32
const mlnbBits
= mlDInfo
->nbBits
;
1188 U32
const ofnbBits
= ofDInfo
->nbBits
;
1190 * As gcc has better branch and block analyzers, sometimes it is only
1191 * valuable to mark likelyness for clang, it gives around 3-4% of
1197 #if defined(__clang__)
1198 if (LIKELY(ofBits
> 1)) {
1202 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset
== 1);
1203 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32
== 5);
1204 assert(ofBits
<= MaxOff
);
1205 if (MEM_32bits() && longOffsets
&& (ofBits
>= STREAM_ACCUMULATOR_MIN_32
)) {
1206 U32
const extraBits
= ofBits
- MIN(ofBits
, 32 - seqState
->DStream
.bitsConsumed
);
1207 offset
= ofBase
+ (BIT_readBitsFast(&seqState
->DStream
, ofBits
- extraBits
) << extraBits
);
1208 BIT_reloadDStream(&seqState
->DStream
);
1209 if (extraBits
) offset
+= BIT_readBitsFast(&seqState
->DStream
, extraBits
);
1210 assert(extraBits
<= LONG_OFFSETS_MAX_EXTRA_BITS_32
); /* to avoid another reload */
1212 offset
= ofBase
+ BIT_readBitsFast(&seqState
->DStream
, ofBits
/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1213 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
);
1215 seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
1216 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
1217 seqState
->prevOffset
[0] = offset
;
1219 U32
const ll0
= (llDInfo
->baseValue
== 0);
1220 if (LIKELY((ofBits
== 0))) {
1221 offset
= seqState
->prevOffset
[ll0
];
1222 seqState
->prevOffset
[1] = seqState
->prevOffset
[!ll0
];
1223 seqState
->prevOffset
[0] = offset
;
1225 offset
= ofBase
+ ll0
+ BIT_readBitsFast(&seqState
->DStream
, 1);
1226 { size_t temp
= (offset
==3) ? seqState
->prevOffset
[0] - 1 : seqState
->prevOffset
[offset
];
1227 temp
+= !temp
; /* 0 is not valid; input is corrupted; force offset to 1 */
1228 if (offset
!= 1) seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
1229 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
1230 seqState
->prevOffset
[0] = offset
= temp
;
1232 seq
.offset
= offset
;
1235 #if defined(__clang__)
1236 if (UNLIKELY(mlBits
> 0))
1240 seq
.matchLength
+= BIT_readBitsFast(&seqState
->DStream
, mlBits
/*>0*/);
1242 if (MEM_32bits() && (mlBits
+llBits
>= STREAM_ACCUMULATOR_MIN_32
-LONG_OFFSETS_MAX_EXTRA_BITS_32
))
1243 BIT_reloadDStream(&seqState
->DStream
);
1244 if (MEM_64bits() && UNLIKELY(totalBits
>= STREAM_ACCUMULATOR_MIN_64
-(LLFSELog
+MLFSELog
+OffFSELog
)))
1245 BIT_reloadDStream(&seqState
->DStream
);
1246 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1247 ZSTD_STATIC_ASSERT(16+LLFSELog
+MLFSELog
+OffFSELog
< STREAM_ACCUMULATOR_MIN_64
);
1249 #if defined(__clang__)
1250 if (UNLIKELY(llBits
> 0))
1254 seq
.litLength
+= BIT_readBitsFast(&seqState
->DStream
, llBits
/*>0*/);
1257 BIT_reloadDStream(&seqState
->DStream
);
1259 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1260 (U32
)seq
.litLength
, (U32
)seq
.matchLength
, (U32
)seq
.offset
);
1262 ZSTD_updateFseStateWithDInfo(&seqState
->stateLL
, &seqState
->DStream
, llNext
, llnbBits
); /* <= 9 bits */
1263 ZSTD_updateFseStateWithDInfo(&seqState
->stateML
, &seqState
->DStream
, mlNext
, mlnbBits
); /* <= 9 bits */
1264 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
); /* <= 18 bits */
1265 ZSTD_updateFseStateWithDInfo(&seqState
->stateOffb
, &seqState
->DStream
, ofNext
, ofnbBits
); /* <= 8 bits */
1271 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1272 MEM_STATIC
int ZSTD_dictionaryIsActive(ZSTD_DCtx
const* dctx
, BYTE
const* prefixStart
, BYTE
const* oLitEnd
)
1274 size_t const windowSize
= dctx
->fParams
.windowSize
;
1275 /* No dictionary used. */
1276 if (dctx
->dictContentEndForFuzzing
== NULL
) return 0;
1277 /* Dictionary is our prefix. */
1278 if (prefixStart
== dctx
->dictContentBeginForFuzzing
) return 1;
1279 /* Dictionary is not our ext-dict. */
1280 if (dctx
->dictEnd
!= dctx
->dictContentEndForFuzzing
) return 0;
1281 /* Dictionary is not within our window size. */
1282 if ((size_t)(oLitEnd
- prefixStart
) >= windowSize
) return 0;
1283 /* Dictionary is active. */
1287 MEM_STATIC
void ZSTD_assertValidSequence(
1288 ZSTD_DCtx
const* dctx
,
1289 BYTE
const* op
, BYTE
const* oend
,
1291 BYTE
const* prefixStart
, BYTE
const* virtualStart
)
1294 size_t const windowSize
= dctx
->fParams
.windowSize
;
1295 size_t const sequenceSize
= seq
.litLength
+ seq
.matchLength
;
1296 BYTE
const* const oLitEnd
= op
+ seq
.litLength
;
1297 DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1298 (U32
)seq
.litLength
, (U32
)seq
.matchLength
, (U32
)seq
.offset
);
1300 assert((size_t)(oend
- op
) >= sequenceSize
);
1301 assert(sequenceSize
<= ZSTD_BLOCKSIZE_MAX
);
1302 if (ZSTD_dictionaryIsActive(dctx
, prefixStart
, oLitEnd
)) {
1303 size_t const dictSize
= (size_t)((char const*)dctx
->dictContentEndForFuzzing
- (char const*)dctx
->dictContentBeginForFuzzing
);
1304 /* Offset must be within the dictionary. */
1305 assert(seq
.offset
<= (size_t)(oLitEnd
- virtualStart
));
1306 assert(seq
.offset
<= windowSize
+ dictSize
);
1308 /* Offset must be within our window. */
1309 assert(seq
.offset
<= windowSize
);
1312 (void)dctx
, (void)op
, (void)oend
, (void)seq
, (void)prefixStart
, (void)virtualStart
;
1317 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1320 FORCE_INLINE_TEMPLATE
size_t
1322 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx
* dctx
,
1323 void* dst
, size_t maxDstSize
,
1324 const void* seqStart
, size_t seqSize
, int nbSeq
,
1325 const ZSTD_longOffset_e isLongOffset
,
1328 const BYTE
* ip
= (const BYTE
*)seqStart
;
1329 const BYTE
* const iend
= ip
+ seqSize
;
1330 BYTE
* const ostart
= (BYTE
*)dst
;
1331 BYTE
* const oend
= ostart
+ maxDstSize
;
1333 const BYTE
* litPtr
= dctx
->litPtr
;
1334 const BYTE
* litBufferEnd
= dctx
->litBufferEnd
;
1335 const BYTE
* const prefixStart
= (const BYTE
*) (dctx
->prefixStart
);
1336 const BYTE
* const vBase
= (const BYTE
*) (dctx
->virtualStart
);
1337 const BYTE
* const dictEnd
= (const BYTE
*) (dctx
->dictEnd
);
1338 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
1341 /* Regen sequences */
1343 seqState_t seqState
;
1344 dctx
->fseEntropy
= 1;
1345 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) seqState
.prevOffset
[i
] = dctx
->entropy
.rep
[i
]; }
1347 ERR_isError(BIT_initDStream(&seqState
.DStream
, ip
, iend
-ip
)),
1348 corruption_detected
, "");
1349 ZSTD_initFseState(&seqState
.stateLL
, &seqState
.DStream
, dctx
->LLTptr
);
1350 ZSTD_initFseState(&seqState
.stateOffb
, &seqState
.DStream
, dctx
->OFTptr
);
1351 ZSTD_initFseState(&seqState
.stateML
, &seqState
.DStream
, dctx
->MLTptr
);
1352 assert(dst
!= NULL
);
1355 BIT_DStream_unfinished
< BIT_DStream_completed
&&
1356 BIT_DStream_endOfBuffer
< BIT_DStream_completed
&&
1357 BIT_DStream_completed
< BIT_DStream_overflow
);
1359 /* decompress without overrunning litPtr begins */
1361 seq_t sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1362 /* Align the decompression loop to 32 + 16 bytes.
1364 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1365 * speed swings based on the alignment of the decompression loop. This
1366 * performance swing is caused by parts of the decompression loop falling
1367 * out of the DSB. The entire decompression loop should fit in the DSB,
1368 * when it can't we get much worse performance. You can measure if you've
1369 * hit the good case or the bad case with this perf command for some
1370 * compressed file test.zst:
1372 * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1373 * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1375 * If you see most cycles served out of the MITE you've hit the bad case.
1376 * If you see most cycles served out of the DSB you've hit the good case.
1377 * If it is pretty even then you may be in an okay case.
1379 * This issue has been reproduced on the following CPUs:
1380 * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1381 * Use Instruments->Counters to get DSB/MITE cycles.
1382 * I never got performance swings, but I was able to
1383 * go from the good case of mostly DSB to half of the
1384 * cycles served from MITE.
1385 * - Coffeelake: Intel i9-9900k
1386 * - Coffeelake: Intel i7-9700k
1388 * I haven't been able to reproduce the instability or DSB misses on any
1389 * of the following CPUS:
1391 * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1394 * Alignment is done for each of the three major decompression loops:
1395 * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1396 * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1397 * - ZSTD_decompressSequences_body
1398 * Alignment choices are made to minimize large swings on bad cases and influence on performance
1399 * from changes external to this code, rather than to overoptimize on the current commit.
1401 * If you are seeing performance stability this script can help test.
1402 * It tests on 4 commits in zstd where I saw performance change.
1404 * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1406 #if defined(__x86_64__)
1407 __asm__(".p2align 6");
1409 /* good for gcc-7, gcc-9, and gcc-11 */
1411 __asm__(".p2align 5");
1413 __asm__(".p2align 4");
1414 # if __GNUC__ == 8 || __GNUC__ == 10
1415 /* good for gcc-8 and gcc-10 */
1417 __asm__(".p2align 3");
1422 /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1423 for (; litPtr
+ sequence
.litLength
<= dctx
->litBufferEnd
; ) {
1424 size_t const oneSeqSize
= ZSTD_execSequenceSplitLitBuffer(op
, oend
, litPtr
+ sequence
.litLength
- WILDCOPY_OVERLENGTH
, sequence
, &litPtr
, litBufferEnd
, prefixStart
, vBase
, dictEnd
);
1425 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1426 assert(!ZSTD_isError(oneSeqSize
));
1427 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequence
, prefixStart
, vBase
);
1429 if (UNLIKELY(ZSTD_isError(oneSeqSize
)))
1431 DEBUGLOG(6, "regenerated sequence size : %u", (U32
)oneSeqSize
);
1433 if (UNLIKELY(!--nbSeq
))
1435 BIT_reloadDStream(&(seqState
.DStream
));
1436 sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1439 /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1441 const size_t leftoverLit
= dctx
->litBufferEnd
- litPtr
;
1444 RETURN_ERROR_IF(leftoverLit
> (size_t)(oend
- op
), dstSize_tooSmall
, "remaining lit must fit within dstBuffer");
1445 ZSTD_safecopyDstBeforeSrc(op
, litPtr
, leftoverLit
);
1446 sequence
.litLength
-= leftoverLit
;
1449 litPtr
= dctx
->litExtraBuffer
;
1450 litBufferEnd
= dctx
->litExtraBuffer
+ ZSTD_LITBUFFEREXTRASIZE
;
1451 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
1453 size_t const oneSeqSize
= ZSTD_execSequence(op
, oend
, sequence
, &litPtr
, litBufferEnd
, prefixStart
, vBase
, dictEnd
);
1454 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455 assert(!ZSTD_isError(oneSeqSize
));
1456 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequence
, prefixStart
, vBase
);
1458 if (UNLIKELY(ZSTD_isError(oneSeqSize
)))
1460 DEBUGLOG(6, "regenerated sequence size : %u", (U32
)oneSeqSize
);
1463 BIT_reloadDStream(&(seqState
.DStream
));
1468 if (nbSeq
> 0) /* there is remaining lit from extra buffer */
1471 #if defined(__x86_64__)
1472 __asm__(".p2align 6");
1475 /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1476 __asm__(".p2align 4");
1478 __asm__(".p2align 3");
1479 # elif __GNUC__ >= 11
1480 __asm__(".p2align 3");
1482 __asm__(".p2align 5");
1484 __asm__(".p2align 3");
1489 seq_t
const sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1490 size_t const oneSeqSize
= ZSTD_execSequence(op
, oend
, sequence
, &litPtr
, litBufferEnd
, prefixStart
, vBase
, dictEnd
);
1491 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1492 assert(!ZSTD_isError(oneSeqSize
));
1493 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequence
, prefixStart
, vBase
);
1495 if (UNLIKELY(ZSTD_isError(oneSeqSize
)))
1497 DEBUGLOG(6, "regenerated sequence size : %u", (U32
)oneSeqSize
);
1499 if (UNLIKELY(!--nbSeq
))
1501 BIT_reloadDStream(&(seqState
.DStream
));
1505 /* check if reached exact end */
1506 DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq
);
1507 RETURN_ERROR_IF(nbSeq
, corruption_detected
, "");
1508 RETURN_ERROR_IF(BIT_reloadDStream(&seqState
.DStream
) < BIT_DStream_completed
, corruption_detected
, "");
1509 /* save reps for next block */
1510 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) dctx
->entropy
.rep
[i
] = (U32
)(seqState
.prevOffset
[i
]); }
1513 /* last literal segment */
1514 if (dctx
->litBufferLocation
== ZSTD_split
) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1516 size_t const lastLLSize
= litBufferEnd
- litPtr
;
1517 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
- op
), dstSize_tooSmall
, "");
1519 ZSTD_memmove(op
, litPtr
, lastLLSize
);
1522 litPtr
= dctx
->litExtraBuffer
;
1523 litBufferEnd
= dctx
->litExtraBuffer
+ ZSTD_LITBUFFEREXTRASIZE
;
1524 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
1526 { size_t const lastLLSize
= litBufferEnd
- litPtr
;
1527 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
-op
), dstSize_tooSmall
, "");
1529 ZSTD_memcpy(op
, litPtr
, lastLLSize
);
1537 FORCE_INLINE_TEMPLATE
size_t
1539 ZSTD_decompressSequences_body(ZSTD_DCtx
* dctx
,
1540 void* dst
, size_t maxDstSize
,
1541 const void* seqStart
, size_t seqSize
, int nbSeq
,
1542 const ZSTD_longOffset_e isLongOffset
,
1545 const BYTE
* ip
= (const BYTE
*)seqStart
;
1546 const BYTE
* const iend
= ip
+ seqSize
;
1547 BYTE
* const ostart
= (BYTE
*)dst
;
1548 BYTE
* const oend
= dctx
->litBufferLocation
== ZSTD_not_in_dst
? ostart
+ maxDstSize
: dctx
->litBuffer
;
1550 const BYTE
* litPtr
= dctx
->litPtr
;
1551 const BYTE
* const litEnd
= litPtr
+ dctx
->litSize
;
1552 const BYTE
* const prefixStart
= (const BYTE
*)(dctx
->prefixStart
);
1553 const BYTE
* const vBase
= (const BYTE
*)(dctx
->virtualStart
);
1554 const BYTE
* const dictEnd
= (const BYTE
*)(dctx
->dictEnd
);
1555 DEBUGLOG(5, "ZSTD_decompressSequences_body");
1558 /* Regen sequences */
1560 seqState_t seqState
;
1561 dctx
->fseEntropy
= 1;
1562 { U32 i
; for (i
= 0; i
< ZSTD_REP_NUM
; i
++) seqState
.prevOffset
[i
] = dctx
->entropy
.rep
[i
]; }
1564 ERR_isError(BIT_initDStream(&seqState
.DStream
, ip
, iend
- ip
)),
1565 corruption_detected
, "");
1566 ZSTD_initFseState(&seqState
.stateLL
, &seqState
.DStream
, dctx
->LLTptr
);
1567 ZSTD_initFseState(&seqState
.stateOffb
, &seqState
.DStream
, dctx
->OFTptr
);
1568 ZSTD_initFseState(&seqState
.stateML
, &seqState
.DStream
, dctx
->MLTptr
);
1569 assert(dst
!= NULL
);
1572 BIT_DStream_unfinished
< BIT_DStream_completed
&&
1573 BIT_DStream_endOfBuffer
< BIT_DStream_completed
&&
1574 BIT_DStream_completed
< BIT_DStream_overflow
);
1576 #if defined(__x86_64__)
1577 __asm__(".p2align 6");
1580 __asm__(".p2align 5");
1582 __asm__(".p2align 3");
1584 __asm__(".p2align 4");
1586 __asm__(".p2align 3");
1591 seq_t
const sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1592 size_t const oneSeqSize
= ZSTD_execSequence(op
, oend
, sequence
, &litPtr
, litEnd
, prefixStart
, vBase
, dictEnd
);
1593 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1594 assert(!ZSTD_isError(oneSeqSize
));
1595 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequence
, prefixStart
, vBase
);
1597 if (UNLIKELY(ZSTD_isError(oneSeqSize
)))
1599 DEBUGLOG(6, "regenerated sequence size : %u", (U32
)oneSeqSize
);
1601 if (UNLIKELY(!--nbSeq
))
1603 BIT_reloadDStream(&(seqState
.DStream
));
1606 /* check if reached exact end */
1607 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq
);
1608 RETURN_ERROR_IF(nbSeq
, corruption_detected
, "");
1609 RETURN_ERROR_IF(BIT_reloadDStream(&seqState
.DStream
) < BIT_DStream_completed
, corruption_detected
, "");
1610 /* save reps for next block */
1611 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) dctx
->entropy
.rep
[i
] = (U32
)(seqState
.prevOffset
[i
]); }
1614 /* last literal segment */
1615 { size_t const lastLLSize
= litEnd
- litPtr
;
1616 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
-op
), dstSize_tooSmall
, "");
1618 ZSTD_memcpy(op
, litPtr
, lastLLSize
);
1627 ZSTD_decompressSequences_default(ZSTD_DCtx
* dctx
,
1628 void* dst
, size_t maxDstSize
,
1629 const void* seqStart
, size_t seqSize
, int nbSeq
,
1630 const ZSTD_longOffset_e isLongOffset
,
1633 return ZSTD_decompressSequences_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1637 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx
* dctx
,
1638 void* dst
, size_t maxDstSize
,
1639 const void* seqStart
, size_t seqSize
, int nbSeq
,
1640 const ZSTD_longOffset_e isLongOffset
,
1643 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1645 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1647 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1649 FORCE_INLINE_TEMPLATE
size_t
1650 ZSTD_prefetchMatch(size_t prefetchPos
, seq_t
const sequence
,
1651 const BYTE
* const prefixStart
, const BYTE
* const dictEnd
)
1653 prefetchPos
+= sequence
.litLength
;
1654 { const BYTE
* const matchBase
= (sequence
.offset
> prefetchPos
) ? dictEnd
: prefixStart
;
1655 const BYTE
* const match
= matchBase
+ prefetchPos
- sequence
.offset
; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1656 * No consequence though : memory address is only used for prefetching, not for dereferencing */
1657 PREFETCH_L1(match
); PREFETCH_L1(match
+CACHELINE_SIZE
); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1659 return prefetchPos
+ sequence
.matchLength
;
1662 /* This decoding function employs prefetching
1663 * to reduce latency impact of cache misses.
1664 * It's generally employed when block contains a significant portion of long-distance matches
1665 * or when coupled with a "cold" dictionary */
1666 FORCE_INLINE_TEMPLATE
size_t
1667 ZSTD_decompressSequencesLong_body(
1669 void* dst
, size_t maxDstSize
,
1670 const void* seqStart
, size_t seqSize
, int nbSeq
,
1671 const ZSTD_longOffset_e isLongOffset
,
1674 const BYTE
* ip
= (const BYTE
*)seqStart
;
1675 const BYTE
* const iend
= ip
+ seqSize
;
1676 BYTE
* const ostart
= (BYTE
*)dst
;
1677 BYTE
* const oend
= dctx
->litBufferLocation
== ZSTD_in_dst
? dctx
->litBuffer
: ostart
+ maxDstSize
;
1679 const BYTE
* litPtr
= dctx
->litPtr
;
1680 const BYTE
* litBufferEnd
= dctx
->litBufferEnd
;
1681 const BYTE
* const prefixStart
= (const BYTE
*) (dctx
->prefixStart
);
1682 const BYTE
* const dictStart
= (const BYTE
*) (dctx
->virtualStart
);
1683 const BYTE
* const dictEnd
= (const BYTE
*) (dctx
->dictEnd
);
1686 /* Regen sequences */
1688 #define STORED_SEQS 8
1689 #define STORED_SEQS_MASK (STORED_SEQS-1)
1690 #define ADVANCED_SEQS STORED_SEQS
1691 seq_t sequences
[STORED_SEQS
];
1692 int const seqAdvance
= MIN(nbSeq
, ADVANCED_SEQS
);
1693 seqState_t seqState
;
1695 size_t prefetchPos
= (size_t)(op
-prefixStart
); /* track position relative to prefixStart */
1697 dctx
->fseEntropy
= 1;
1698 { int i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) seqState
.prevOffset
[i
] = dctx
->entropy
.rep
[i
]; }
1699 assert(dst
!= NULL
);
1702 ERR_isError(BIT_initDStream(&seqState
.DStream
, ip
, iend
-ip
)),
1703 corruption_detected
, "");
1704 ZSTD_initFseState(&seqState
.stateLL
, &seqState
.DStream
, dctx
->LLTptr
);
1705 ZSTD_initFseState(&seqState
.stateOffb
, &seqState
.DStream
, dctx
->OFTptr
);
1706 ZSTD_initFseState(&seqState
.stateML
, &seqState
.DStream
, dctx
->MLTptr
);
1708 /* prepare in advance */
1709 for (seqNb
=0; (BIT_reloadDStream(&seqState
.DStream
) <= BIT_DStream_completed
) && (seqNb
<seqAdvance
); seqNb
++) {
1710 seq_t
const sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1711 prefetchPos
= ZSTD_prefetchMatch(prefetchPos
, sequence
, prefixStart
, dictEnd
);
1712 sequences
[seqNb
] = sequence
;
1714 RETURN_ERROR_IF(seqNb
<seqAdvance
, corruption_detected
, "");
1716 /* decompress without stomping litBuffer */
1717 for (; (BIT_reloadDStream(&(seqState
.DStream
)) <= BIT_DStream_completed
) && (seqNb
< nbSeq
); seqNb
++) {
1718 seq_t sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
1721 if (dctx
->litBufferLocation
== ZSTD_split
&& litPtr
+ sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
].litLength
> dctx
->litBufferEnd
)
1723 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1724 const size_t leftoverLit
= dctx
->litBufferEnd
- litPtr
;
1727 RETURN_ERROR_IF(leftoverLit
> (size_t)(oend
- op
), dstSize_tooSmall
, "remaining lit must fit within dstBuffer");
1728 ZSTD_safecopyDstBeforeSrc(op
, litPtr
, leftoverLit
);
1729 sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
].litLength
-= leftoverLit
;
1732 litPtr
= dctx
->litExtraBuffer
;
1733 litBufferEnd
= dctx
->litExtraBuffer
+ ZSTD_LITBUFFEREXTRASIZE
;
1734 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
1735 oneSeqSize
= ZSTD_execSequence(op
, oend
, sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
], &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
);
1736 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1737 assert(!ZSTD_isError(oneSeqSize
));
1738 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
], prefixStart
, dictStart
);
1740 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1742 prefetchPos
= ZSTD_prefetchMatch(prefetchPos
, sequence
, prefixStart
, dictEnd
);
1743 sequences
[seqNb
& STORED_SEQS_MASK
] = sequence
;
1748 /* lit buffer is either wholly contained in first or second split, or not split at all*/
1749 oneSeqSize
= dctx
->litBufferLocation
== ZSTD_split
?
1750 ZSTD_execSequenceSplitLitBuffer(op
, oend
, litPtr
+ sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
].litLength
- WILDCOPY_OVERLENGTH
, sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
], &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
) :
1751 ZSTD_execSequence(op
, oend
, sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
], &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
);
1752 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1753 assert(!ZSTD_isError(oneSeqSize
));
1754 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequences
[(seqNb
- ADVANCED_SEQS
) & STORED_SEQS_MASK
], prefixStart
, dictStart
);
1756 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1758 prefetchPos
= ZSTD_prefetchMatch(prefetchPos
, sequence
, prefixStart
, dictEnd
);
1759 sequences
[seqNb
& STORED_SEQS_MASK
] = sequence
;
1763 RETURN_ERROR_IF(seqNb
<nbSeq
, corruption_detected
, "");
1766 seqNb
-= seqAdvance
;
1767 for ( ; seqNb
<nbSeq
; seqNb
++) {
1768 seq_t
*sequence
= &(sequences
[seqNb
&STORED_SEQS_MASK
]);
1769 if (dctx
->litBufferLocation
== ZSTD_split
&& litPtr
+ sequence
->litLength
> dctx
->litBufferEnd
)
1771 const size_t leftoverLit
= dctx
->litBufferEnd
- litPtr
;
1774 RETURN_ERROR_IF(leftoverLit
> (size_t)(oend
- op
), dstSize_tooSmall
, "remaining lit must fit within dstBuffer");
1775 ZSTD_safecopyDstBeforeSrc(op
, litPtr
, leftoverLit
);
1776 sequence
->litLength
-= leftoverLit
;
1779 litPtr
= dctx
->litExtraBuffer
;
1780 litBufferEnd
= dctx
->litExtraBuffer
+ ZSTD_LITBUFFEREXTRASIZE
;
1781 dctx
->litBufferLocation
= ZSTD_not_in_dst
;
1783 size_t const oneSeqSize
= ZSTD_execSequence(op
, oend
, *sequence
, &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
);
1784 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1785 assert(!ZSTD_isError(oneSeqSize
));
1786 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequences
[seqNb
&STORED_SEQS_MASK
], prefixStart
, dictStart
);
1788 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1794 size_t const oneSeqSize
= dctx
->litBufferLocation
== ZSTD_split
?
1795 ZSTD_execSequenceSplitLitBuffer(op
, oend
, litPtr
+ sequence
->litLength
- WILDCOPY_OVERLENGTH
, *sequence
, &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
) :
1796 ZSTD_execSequence(op
, oend
, *sequence
, &litPtr
, litBufferEnd
, prefixStart
, dictStart
, dictEnd
);
1797 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1798 assert(!ZSTD_isError(oneSeqSize
));
1799 if (frame
) ZSTD_assertValidSequence(dctx
, op
, oend
, sequences
[seqNb
&STORED_SEQS_MASK
], prefixStart
, dictStart
);
1801 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1806 /* save reps for next block */
1807 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) dctx
->entropy
.rep
[i
] = (U32
)(seqState
.prevOffset
[i
]); }
1810 /* last literal segment */
1811 if (dctx
->litBufferLocation
== ZSTD_split
) /* first deplete literal buffer in dst, then copy litExtraBuffer */
1813 size_t const lastLLSize
= litBufferEnd
- litPtr
;
1814 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
- op
), dstSize_tooSmall
, "");
1816 ZSTD_memmove(op
, litPtr
, lastLLSize
);
1819 litPtr
= dctx
->litExtraBuffer
;
1820 litBufferEnd
= dctx
->litExtraBuffer
+ ZSTD_LITBUFFEREXTRASIZE
;
1822 { size_t const lastLLSize
= litBufferEnd
- litPtr
;
1823 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
-op
), dstSize_tooSmall
, "");
1825 ZSTD_memmove(op
, litPtr
, lastLLSize
);
1834 ZSTD_decompressSequencesLong_default(ZSTD_DCtx
* dctx
,
1835 void* dst
, size_t maxDstSize
,
1836 const void* seqStart
, size_t seqSize
, int nbSeq
,
1837 const ZSTD_longOffset_e isLongOffset
,
1840 return ZSTD_decompressSequencesLong_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1842 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1848 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1849 static BMI2_TARGET_ATTRIBUTE
size_t
1851 ZSTD_decompressSequences_bmi2(ZSTD_DCtx
* dctx
,
1852 void* dst
, size_t maxDstSize
,
1853 const void* seqStart
, size_t seqSize
, int nbSeq
,
1854 const ZSTD_longOffset_e isLongOffset
,
1857 return ZSTD_decompressSequences_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1859 static BMI2_TARGET_ATTRIBUTE
size_t
1861 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx
* dctx
,
1862 void* dst
, size_t maxDstSize
,
1863 const void* seqStart
, size_t seqSize
, int nbSeq
,
1864 const ZSTD_longOffset_e isLongOffset
,
1867 return ZSTD_decompressSequences_bodySplitLitBuffer(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1869 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1871 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1872 static BMI2_TARGET_ATTRIBUTE
size_t
1873 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx
* dctx
,
1874 void* dst
, size_t maxDstSize
,
1875 const void* seqStart
, size_t seqSize
, int nbSeq
,
1876 const ZSTD_longOffset_e isLongOffset
,
1879 return ZSTD_decompressSequencesLong_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1881 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1883 #endif /* DYNAMIC_BMI2 */
1885 typedef size_t (*ZSTD_decompressSequences_t
)(
1887 void* dst
, size_t maxDstSize
,
1888 const void* seqStart
, size_t seqSize
, int nbSeq
,
1889 const ZSTD_longOffset_e isLongOffset
,
1892 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1894 ZSTD_decompressSequences(ZSTD_DCtx
* dctx
, void* dst
, size_t maxDstSize
,
1895 const void* seqStart
, size_t seqSize
, int nbSeq
,
1896 const ZSTD_longOffset_e isLongOffset
,
1899 DEBUGLOG(5, "ZSTD_decompressSequences");
1901 if (ZSTD_DCtx_get_bmi2(dctx
)) {
1902 return ZSTD_decompressSequences_bmi2(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1905 return ZSTD_decompressSequences_default(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1908 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx
* dctx
, void* dst
, size_t maxDstSize
,
1909 const void* seqStart
, size_t seqSize
, int nbSeq
,
1910 const ZSTD_longOffset_e isLongOffset
,
1913 DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1915 if (ZSTD_DCtx_get_bmi2(dctx
)) {
1916 return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1919 return ZSTD_decompressSequencesSplitLitBuffer_default(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1921 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1924 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1925 /* ZSTD_decompressSequencesLong() :
1926 * decompression function triggered when a minimum share of offsets is considered "long",
1928 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1929 * This function will try to mitigate main memory latency through the use of prefetching */
1931 ZSTD_decompressSequencesLong(ZSTD_DCtx
* dctx
,
1932 void* dst
, size_t maxDstSize
,
1933 const void* seqStart
, size_t seqSize
, int nbSeq
,
1934 const ZSTD_longOffset_e isLongOffset
,
1937 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1939 if (ZSTD_DCtx_get_bmi2(dctx
)) {
1940 return ZSTD_decompressSequencesLong_bmi2(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1943 return ZSTD_decompressSequencesLong_default(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
, frame
);
1945 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1949 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1950 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1951 /* ZSTD_getLongOffsetsShare() :
1952 * condition : offTable must be valid
1953 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1954 * compared to maximum possible of (1<<OffFSELog) */
1956 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol
* offTable
)
1958 const void* ptr
= offTable
;
1959 U32
const tableLog
= ((const ZSTD_seqSymbol_header
*)ptr
)[0].tableLog
;
1960 const ZSTD_seqSymbol
* table
= offTable
+ 1;
1961 U32
const max
= 1 << tableLog
;
1963 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog
);
1965 assert(max
<= (1 << OffFSELog
)); /* max not too large */
1966 for (u
=0; u
<max
; u
++) {
1967 if (table
[u
].nbAdditionalBits
> 22) total
+= 1;
1970 assert(tableLog
<= OffFSELog
);
1971 total
<<= (OffFSELog
- tableLog
); /* scale to OffFSELog */
1978 ZSTD_decompressBlock_internal(ZSTD_DCtx
* dctx
,
1979 void* dst
, size_t dstCapacity
,
1980 const void* src
, size_t srcSize
, const int frame
, const streaming_operation streaming
)
1981 { /* blockType == blockCompressed */
1982 const BYTE
* ip
= (const BYTE
*)src
;
1983 /* isLongOffset must be true if there are long offsets.
1984 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1985 * We don't expect that to be the case in 64-bit mode.
1986 * In block mode, window size is not known, so we have to be conservative.
1987 * (note: but it could be evaluated from current-lowLimit)
1989 ZSTD_longOffset_e
const isLongOffset
= (ZSTD_longOffset_e
)(MEM_32bits() && (!frame
|| (dctx
->fParams
.windowSize
> (1ULL << STREAM_ACCUMULATOR_MIN
))));
1990 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32
)srcSize
);
1992 RETURN_ERROR_IF(srcSize
>= ZSTD_BLOCKSIZE_MAX
, srcSize_wrong
, "");
1994 /* Decode literals section */
1995 { size_t const litCSize
= ZSTD_decodeLiteralsBlock(dctx
, src
, srcSize
, dst
, dstCapacity
, streaming
);
1996 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32
)litCSize
);
1997 if (ZSTD_isError(litCSize
)) return litCSize
;
1999 srcSize
-= litCSize
;
2002 /* Build Decoding Tables */
2004 /* These macros control at build-time which decompressor implementation
2005 * we use. If neither is defined, we do some inspection and dispatch at
2008 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2009 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2010 int usePrefetchDecoder
= dctx
->ddictIsCold
;
2013 size_t const seqHSize
= ZSTD_decodeSeqHeaders(dctx
, &nbSeq
, ip
, srcSize
);
2014 if (ZSTD_isError(seqHSize
)) return seqHSize
;
2016 srcSize
-= seqHSize
;
2018 RETURN_ERROR_IF(dst
== NULL
&& nbSeq
> 0, dstSize_tooSmall
, "NULL not handled");
2020 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2021 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2022 if ( !usePrefetchDecoder
2023 && (!frame
|| (dctx
->fParams
.windowSize
> (1<<24)))
2024 && (nbSeq
>ADVANCED_SEQS
) ) { /* could probably use a larger nbSeq limit */
2025 U32
const shareLongOffsets
= ZSTD_getLongOffsetsShare(dctx
->OFTptr
);
2026 U32
const minShare
= MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2027 usePrefetchDecoder
= (shareLongOffsets
>= minShare
);
2031 dctx
->ddictIsCold
= 0;
2033 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2034 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2035 if (usePrefetchDecoder
)
2037 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2038 return ZSTD_decompressSequencesLong(dctx
, dst
, dstCapacity
, ip
, srcSize
, nbSeq
, isLongOffset
, frame
);
2041 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2043 if (dctx
->litBufferLocation
== ZSTD_split
)
2044 return ZSTD_decompressSequencesSplitLitBuffer(dctx
, dst
, dstCapacity
, ip
, srcSize
, nbSeq
, isLongOffset
, frame
);
2046 return ZSTD_decompressSequences(dctx
, dst
, dstCapacity
, ip
, srcSize
, nbSeq
, isLongOffset
, frame
);
2052 void ZSTD_checkContinuity(ZSTD_DCtx
* dctx
, const void* dst
, size_t dstSize
)
2054 if (dst
!= dctx
->previousDstEnd
&& dstSize
> 0) { /* not contiguous */
2055 dctx
->dictEnd
= dctx
->previousDstEnd
;
2056 dctx
->virtualStart
= (const char*)dst
- ((const char*)(dctx
->previousDstEnd
) - (const char*)(dctx
->prefixStart
));
2057 dctx
->prefixStart
= dst
;
2058 dctx
->previousDstEnd
= dst
;
2063 size_t ZSTD_decompressBlock(ZSTD_DCtx
* dctx
,
2064 void* dst
, size_t dstCapacity
,
2065 const void* src
, size_t srcSize
)
2068 ZSTD_checkContinuity(dctx
, dst
, dstCapacity
);
2069 dSize
= ZSTD_decompressBlock_internal(dctx
, dst
, dstCapacity
, src
, srcSize
, /* frame */ 0, not_streaming
);
2070 dctx
->previousDstEnd
= (char*)dst
+ dSize
;