2 * Copyright (c) Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /*-*************************************
13 ***************************************/
14 #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15 #include "../common/mem.h"
16 #include "hist.h" /* HIST_countFast_wksp */
17 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
18 #include "../common/fse.h"
19 #define HUF_STATIC_LINKING_ONLY
20 #include "../common/huf.h"
21 #include "zstd_compress_internal.h"
22 #include "zstd_compress_sequences.h"
23 #include "zstd_compress_literals.h"
24 #include "zstd_fast.h"
25 #include "zstd_double_fast.h"
26 #include "zstd_lazy.h"
29 #include "zstd_compress_superblock.h"
31 /* ***************************************************************
33 *****************************************************************/
36 * Select how default decompression function ZSTD_compress() allocates its context,
37 * on stack (0, default), or into heap (1).
38 * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
43 * Maximum size of the hash table dedicated to find 3-bytes matches,
44 * in log format, aka 17 => 1 << 17 == 128Ki positions.
45 * This structure is only used in zstd_opt.
46 * Since allocation is centralized for all strategies, it has to be known here.
47 * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,
48 * so that zstd_opt.c doesn't need to know about this constant.
50 #ifndef ZSTD_HASHLOG3_MAX
51 # define ZSTD_HASHLOG3_MAX 17
54 /*-*************************************
56 ***************************************/
57 /* ZSTD_compressBound()
58 * Note that the result from this function is only compatible with the "normal"
59 * full-block strategy.
60 * When there are a lot of small blocks due to frequent flush in streaming mode
61 * the overhead of headers can make the compressed data to be larger than the
62 * return value of ZSTD_compressBound().
64 size_t ZSTD_compressBound(size_t srcSize
) {
65 return ZSTD_COMPRESSBOUND(srcSize
);
69 /*-*************************************
70 * Context memory management
71 ***************************************/
73 const void* dictContent
;
74 size_t dictContentSize
;
75 ZSTD_dictContentType_e dictContentType
; /* The dictContentType the CDict was created with */
76 U32
* entropyWorkspace
; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
78 ZSTD_matchState_t matchState
;
79 ZSTD_compressedBlockState_t cBlockState
;
80 ZSTD_customMem customMem
;
82 int compressionLevel
; /* 0 indicates that advanced API was used to select CDict params */
83 ZSTD_paramSwitch_e useRowMatchFinder
; /* Indicates whether the CDict was created with params that would use
84 * row-based matchfinder. Unless the cdict is reloaded, we will use
85 * the same greedy/lazy matchfinder at compression time.
87 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
89 ZSTD_CCtx
* ZSTD_createCCtx(void)
91 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem
);
94 static void ZSTD_initCCtx(ZSTD_CCtx
* cctx
, ZSTD_customMem memManager
)
97 ZSTD_memset(cctx
, 0, sizeof(*cctx
));
98 cctx
->customMem
= memManager
;
99 cctx
->bmi2
= ZSTD_cpuSupportsBmi2();
100 { size_t const err
= ZSTD_CCtx_reset(cctx
, ZSTD_reset_parameters
);
101 assert(!ZSTD_isError(err
));
106 ZSTD_CCtx
* ZSTD_createCCtx_advanced(ZSTD_customMem customMem
)
108 ZSTD_STATIC_ASSERT(zcss_init
==0);
109 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
==(0ULL - 1));
110 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
111 { ZSTD_CCtx
* const cctx
= (ZSTD_CCtx
*)ZSTD_customMalloc(sizeof(ZSTD_CCtx
), customMem
);
112 if (!cctx
) return NULL
;
113 ZSTD_initCCtx(cctx
, customMem
);
118 ZSTD_CCtx
* ZSTD_initStaticCCtx(void* workspace
, size_t workspaceSize
)
122 if (workspaceSize
<= sizeof(ZSTD_CCtx
)) return NULL
; /* minimum size */
123 if ((size_t)workspace
& 7) return NULL
; /* must be 8-aligned */
124 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_static_alloc
);
126 cctx
= (ZSTD_CCtx
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CCtx
));
127 if (cctx
== NULL
) return NULL
;
129 ZSTD_memset(cctx
, 0, sizeof(ZSTD_CCtx
));
130 ZSTD_cwksp_move(&cctx
->workspace
, &ws
);
131 cctx
->staticSize
= workspaceSize
;
133 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
134 if (!ZSTD_cwksp_check_available(&cctx
->workspace
, ENTROPY_WORKSPACE_SIZE
+ 2 * sizeof(ZSTD_compressedBlockState_t
))) return NULL
;
135 cctx
->blockState
.prevCBlock
= (ZSTD_compressedBlockState_t
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, sizeof(ZSTD_compressedBlockState_t
));
136 cctx
->blockState
.nextCBlock
= (ZSTD_compressedBlockState_t
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, sizeof(ZSTD_compressedBlockState_t
));
137 cctx
->entropyWorkspace
= (U32
*)ZSTD_cwksp_reserve_object(&cctx
->workspace
, ENTROPY_WORKSPACE_SIZE
);
138 cctx
->bmi2
= ZSTD_cpuid_bmi2(ZSTD_cpuid());
143 * Clears and frees all of the dictionaries in the CCtx.
145 static void ZSTD_clearAllDicts(ZSTD_CCtx
* cctx
)
147 ZSTD_customFree(cctx
->localDict
.dictBuffer
, cctx
->customMem
);
148 ZSTD_freeCDict(cctx
->localDict
.cdict
);
149 ZSTD_memset(&cctx
->localDict
, 0, sizeof(cctx
->localDict
));
150 ZSTD_memset(&cctx
->prefixDict
, 0, sizeof(cctx
->prefixDict
));
154 static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict
)
156 size_t const bufferSize
= dict
.dictBuffer
!= NULL
? dict
.dictSize
: 0;
157 size_t const cdictSize
= ZSTD_sizeof_CDict(dict
.cdict
);
158 return bufferSize
+ cdictSize
;
161 static void ZSTD_freeCCtxContent(ZSTD_CCtx
* cctx
)
163 assert(cctx
!= NULL
);
164 assert(cctx
->staticSize
== 0);
165 ZSTD_clearAllDicts(cctx
);
166 ZSTD_cwksp_free(&cctx
->workspace
, cctx
->customMem
);
169 size_t ZSTD_freeCCtx(ZSTD_CCtx
* cctx
)
171 if (cctx
==NULL
) return 0; /* support free on NULL */
172 RETURN_ERROR_IF(cctx
->staticSize
, memory_allocation
,
173 "not compatible with static CCtx");
175 int cctxInWorkspace
= ZSTD_cwksp_owns_buffer(&cctx
->workspace
, cctx
);
176 ZSTD_freeCCtxContent(cctx
);
177 if (!cctxInWorkspace
) {
178 ZSTD_customFree(cctx
, cctx
->customMem
);
185 static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx
* cctx
)
192 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx
* cctx
)
194 if (cctx
==NULL
) return 0; /* support sizeof on NULL */
195 /* cctx may be in the workspace */
196 return (cctx
->workspace
.workspace
== cctx
? 0 : sizeof(*cctx
))
197 + ZSTD_cwksp_sizeof(&cctx
->workspace
)
198 + ZSTD_sizeof_localDict(cctx
->localDict
)
199 + ZSTD_sizeof_mtctx(cctx
);
202 size_t ZSTD_sizeof_CStream(const ZSTD_CStream
* zcs
)
204 return ZSTD_sizeof_CCtx(zcs
); /* same object */
207 /* private API call, for dictBuilder only */
208 const seqStore_t
* ZSTD_getSeqStore(const ZSTD_CCtx
* ctx
) { return &(ctx
->seqStore
); }
210 /* Returns true if the strategy supports using a row based matchfinder */
211 static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy
) {
212 return (strategy
>= ZSTD_greedy
&& strategy
<= ZSTD_lazy2
);
215 /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
216 * for this compression.
218 static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy
, const ZSTD_paramSwitch_e mode
) {
219 assert(mode
!= ZSTD_ps_auto
);
220 return ZSTD_rowMatchFinderSupported(strategy
) && (mode
== ZSTD_ps_enable
);
223 /* Returns row matchfinder usage given an initial mode and cParams */
224 static ZSTD_paramSwitch_e
ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode
,
225 const ZSTD_compressionParameters
* const cParams
) {
226 #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
227 int const kHasSIMD128
= 1;
229 int const kHasSIMD128
= 0;
231 if (mode
!= ZSTD_ps_auto
) return mode
; /* if requested enabled, but no SIMD, we still will use row matchfinder */
232 mode
= ZSTD_ps_disable
;
233 if (!ZSTD_rowMatchFinderSupported(cParams
->strategy
)) return mode
;
235 if (cParams
->windowLog
> 14) mode
= ZSTD_ps_enable
;
237 if (cParams
->windowLog
> 17) mode
= ZSTD_ps_enable
;
242 /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */
243 static ZSTD_paramSwitch_e
ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode
,
244 const ZSTD_compressionParameters
* const cParams
) {
245 if (mode
!= ZSTD_ps_auto
) return mode
;
246 return (cParams
->strategy
>= ZSTD_btopt
&& cParams
->windowLog
>= 17) ? ZSTD_ps_enable
: ZSTD_ps_disable
;
249 /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
250 static int ZSTD_allocateChainTable(const ZSTD_strategy strategy
,
251 const ZSTD_paramSwitch_e useRowMatchFinder
,
252 const U32 forDDSDict
) {
253 assert(useRowMatchFinder
!= ZSTD_ps_auto
);
254 /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
255 * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
257 return forDDSDict
|| ((strategy
!= ZSTD_fast
) && !ZSTD_rowMatchFinderUsed(strategy
, useRowMatchFinder
));
260 /* Returns 1 if compression parameters are such that we should
261 * enable long distance matching (wlog >= 27, strategy >= btopt).
262 * Returns 0 otherwise.
264 static ZSTD_paramSwitch_e
ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode
,
265 const ZSTD_compressionParameters
* const cParams
) {
266 if (mode
!= ZSTD_ps_auto
) return mode
;
267 return (cParams
->strategy
>= ZSTD_btopt
&& cParams
->windowLog
>= 27) ? ZSTD_ps_enable
: ZSTD_ps_disable
;
270 static ZSTD_CCtx_params
ZSTD_makeCCtxParamsFromCParams(
271 ZSTD_compressionParameters cParams
)
273 ZSTD_CCtx_params cctxParams
;
274 /* should not matter, as all cParams are presumed properly defined */
275 ZSTD_CCtxParams_init(&cctxParams
, ZSTD_CLEVEL_DEFAULT
);
276 cctxParams
.cParams
= cParams
;
278 /* Adjust advanced params according to cParams */
279 cctxParams
.ldmParams
.enableLdm
= ZSTD_resolveEnableLdm(cctxParams
.ldmParams
.enableLdm
, &cParams
);
280 if (cctxParams
.ldmParams
.enableLdm
== ZSTD_ps_enable
) {
281 ZSTD_ldm_adjustParameters(&cctxParams
.ldmParams
, &cParams
);
282 assert(cctxParams
.ldmParams
.hashLog
>= cctxParams
.ldmParams
.bucketSizeLog
);
283 assert(cctxParams
.ldmParams
.hashRateLog
< 32);
285 cctxParams
.useBlockSplitter
= ZSTD_resolveBlockSplitterMode(cctxParams
.useBlockSplitter
, &cParams
);
286 cctxParams
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
.useRowMatchFinder
, &cParams
);
287 assert(!ZSTD_checkCParams(cParams
));
291 static ZSTD_CCtx_params
* ZSTD_createCCtxParams_advanced(
292 ZSTD_customMem customMem
)
294 ZSTD_CCtx_params
* params
;
295 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
296 params
= (ZSTD_CCtx_params
*)ZSTD_customCalloc(
297 sizeof(ZSTD_CCtx_params
), customMem
);
298 if (!params
) { return NULL
; }
299 ZSTD_CCtxParams_init(params
, ZSTD_CLEVEL_DEFAULT
);
300 params
->customMem
= customMem
;
304 ZSTD_CCtx_params
* ZSTD_createCCtxParams(void)
306 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem
);
309 size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params
* params
)
311 if (params
== NULL
) { return 0; }
312 ZSTD_customFree(params
, params
->customMem
);
316 size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params
* params
)
318 return ZSTD_CCtxParams_init(params
, ZSTD_CLEVEL_DEFAULT
);
321 size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params
* cctxParams
, int compressionLevel
) {
322 RETURN_ERROR_IF(!cctxParams
, GENERIC
, "NULL pointer!");
323 ZSTD_memset(cctxParams
, 0, sizeof(*cctxParams
));
324 cctxParams
->compressionLevel
= compressionLevel
;
325 cctxParams
->fParams
.contentSizeFlag
= 1;
329 #define ZSTD_NO_CLEVEL 0
332 * Initializes the cctxParams from params and compressionLevel.
333 * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
335 static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params
* cctxParams
, ZSTD_parameters
const* params
, int compressionLevel
)
337 assert(!ZSTD_checkCParams(params
->cParams
));
338 ZSTD_memset(cctxParams
, 0, sizeof(*cctxParams
));
339 cctxParams
->cParams
= params
->cParams
;
340 cctxParams
->fParams
= params
->fParams
;
341 /* Should not matter, as all cParams are presumed properly defined.
342 * But, set it for tracing anyway.
344 cctxParams
->compressionLevel
= compressionLevel
;
345 cctxParams
->useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
->useRowMatchFinder
, ¶ms
->cParams
);
346 cctxParams
->useBlockSplitter
= ZSTD_resolveBlockSplitterMode(cctxParams
->useBlockSplitter
, ¶ms
->cParams
);
347 cctxParams
->ldmParams
.enableLdm
= ZSTD_resolveEnableLdm(cctxParams
->ldmParams
.enableLdm
, ¶ms
->cParams
);
348 DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
349 cctxParams
->useRowMatchFinder
, cctxParams
->useBlockSplitter
, cctxParams
->ldmParams
.enableLdm
);
352 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params
* cctxParams
, ZSTD_parameters params
)
354 RETURN_ERROR_IF(!cctxParams
, GENERIC
, "NULL pointer!");
355 FORWARD_IF_ERROR( ZSTD_checkCParams(params
.cParams
) , "");
356 ZSTD_CCtxParams_init_internal(cctxParams
, ¶ms
, ZSTD_NO_CLEVEL
);
361 * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
362 * @param param Validated zstd parameters.
364 static void ZSTD_CCtxParams_setZstdParams(
365 ZSTD_CCtx_params
* cctxParams
, const ZSTD_parameters
* params
)
367 assert(!ZSTD_checkCParams(params
->cParams
));
368 cctxParams
->cParams
= params
->cParams
;
369 cctxParams
->fParams
= params
->fParams
;
370 /* Should not matter, as all cParams are presumed properly defined.
371 * But, set it for tracing anyway.
373 cctxParams
->compressionLevel
= ZSTD_NO_CLEVEL
;
376 ZSTD_bounds
ZSTD_cParam_getBounds(ZSTD_cParameter param
)
378 ZSTD_bounds bounds
= { 0, 0, 0 };
382 case ZSTD_c_compressionLevel
:
383 bounds
.lowerBound
= ZSTD_minCLevel();
384 bounds
.upperBound
= ZSTD_maxCLevel();
387 case ZSTD_c_windowLog
:
388 bounds
.lowerBound
= ZSTD_WINDOWLOG_MIN
;
389 bounds
.upperBound
= ZSTD_WINDOWLOG_MAX
;
393 bounds
.lowerBound
= ZSTD_HASHLOG_MIN
;
394 bounds
.upperBound
= ZSTD_HASHLOG_MAX
;
397 case ZSTD_c_chainLog
:
398 bounds
.lowerBound
= ZSTD_CHAINLOG_MIN
;
399 bounds
.upperBound
= ZSTD_CHAINLOG_MAX
;
402 case ZSTD_c_searchLog
:
403 bounds
.lowerBound
= ZSTD_SEARCHLOG_MIN
;
404 bounds
.upperBound
= ZSTD_SEARCHLOG_MAX
;
407 case ZSTD_c_minMatch
:
408 bounds
.lowerBound
= ZSTD_MINMATCH_MIN
;
409 bounds
.upperBound
= ZSTD_MINMATCH_MAX
;
412 case ZSTD_c_targetLength
:
413 bounds
.lowerBound
= ZSTD_TARGETLENGTH_MIN
;
414 bounds
.upperBound
= ZSTD_TARGETLENGTH_MAX
;
417 case ZSTD_c_strategy
:
418 bounds
.lowerBound
= ZSTD_STRATEGY_MIN
;
419 bounds
.upperBound
= ZSTD_STRATEGY_MAX
;
422 case ZSTD_c_contentSizeFlag
:
423 bounds
.lowerBound
= 0;
424 bounds
.upperBound
= 1;
427 case ZSTD_c_checksumFlag
:
428 bounds
.lowerBound
= 0;
429 bounds
.upperBound
= 1;
432 case ZSTD_c_dictIDFlag
:
433 bounds
.lowerBound
= 0;
434 bounds
.upperBound
= 1;
437 case ZSTD_c_nbWorkers
:
438 bounds
.lowerBound
= 0;
439 bounds
.upperBound
= 0;
443 bounds
.lowerBound
= 0;
444 bounds
.upperBound
= 0;
447 case ZSTD_c_overlapLog
:
448 bounds
.lowerBound
= 0;
449 bounds
.upperBound
= 0;
452 case ZSTD_c_enableDedicatedDictSearch
:
453 bounds
.lowerBound
= 0;
454 bounds
.upperBound
= 1;
457 case ZSTD_c_enableLongDistanceMatching
:
458 bounds
.lowerBound
= 0;
459 bounds
.upperBound
= 1;
462 case ZSTD_c_ldmHashLog
:
463 bounds
.lowerBound
= ZSTD_LDM_HASHLOG_MIN
;
464 bounds
.upperBound
= ZSTD_LDM_HASHLOG_MAX
;
467 case ZSTD_c_ldmMinMatch
:
468 bounds
.lowerBound
= ZSTD_LDM_MINMATCH_MIN
;
469 bounds
.upperBound
= ZSTD_LDM_MINMATCH_MAX
;
472 case ZSTD_c_ldmBucketSizeLog
:
473 bounds
.lowerBound
= ZSTD_LDM_BUCKETSIZELOG_MIN
;
474 bounds
.upperBound
= ZSTD_LDM_BUCKETSIZELOG_MAX
;
477 case ZSTD_c_ldmHashRateLog
:
478 bounds
.lowerBound
= ZSTD_LDM_HASHRATELOG_MIN
;
479 bounds
.upperBound
= ZSTD_LDM_HASHRATELOG_MAX
;
482 /* experimental parameters */
483 case ZSTD_c_rsyncable
:
484 bounds
.lowerBound
= 0;
485 bounds
.upperBound
= 1;
488 case ZSTD_c_forceMaxWindow
:
489 bounds
.lowerBound
= 0;
490 bounds
.upperBound
= 1;
494 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1
< ZSTD_f_zstd1_magicless
);
495 bounds
.lowerBound
= ZSTD_f_zstd1
;
496 bounds
.upperBound
= ZSTD_f_zstd1_magicless
; /* note : how to ensure at compile time that this is the highest value enum ? */
499 case ZSTD_c_forceAttachDict
:
500 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach
< ZSTD_dictForceLoad
);
501 bounds
.lowerBound
= ZSTD_dictDefaultAttach
;
502 bounds
.upperBound
= ZSTD_dictForceLoad
; /* note : how to ensure at compile time that this is the highest value enum ? */
505 case ZSTD_c_literalCompressionMode
:
506 ZSTD_STATIC_ASSERT(ZSTD_ps_auto
< ZSTD_ps_enable
&& ZSTD_ps_enable
< ZSTD_ps_disable
);
507 bounds
.lowerBound
= (int)ZSTD_ps_auto
;
508 bounds
.upperBound
= (int)ZSTD_ps_disable
;
511 case ZSTD_c_targetCBlockSize
:
512 bounds
.lowerBound
= ZSTD_TARGETCBLOCKSIZE_MIN
;
513 bounds
.upperBound
= ZSTD_TARGETCBLOCKSIZE_MAX
;
516 case ZSTD_c_srcSizeHint
:
517 bounds
.lowerBound
= ZSTD_SRCSIZEHINT_MIN
;
518 bounds
.upperBound
= ZSTD_SRCSIZEHINT_MAX
;
521 case ZSTD_c_stableInBuffer
:
522 case ZSTD_c_stableOutBuffer
:
523 bounds
.lowerBound
= (int)ZSTD_bm_buffered
;
524 bounds
.upperBound
= (int)ZSTD_bm_stable
;
527 case ZSTD_c_blockDelimiters
:
528 bounds
.lowerBound
= (int)ZSTD_sf_noBlockDelimiters
;
529 bounds
.upperBound
= (int)ZSTD_sf_explicitBlockDelimiters
;
532 case ZSTD_c_validateSequences
:
533 bounds
.lowerBound
= 0;
534 bounds
.upperBound
= 1;
537 case ZSTD_c_useBlockSplitter
:
538 bounds
.lowerBound
= (int)ZSTD_ps_auto
;
539 bounds
.upperBound
= (int)ZSTD_ps_disable
;
542 case ZSTD_c_useRowMatchFinder
:
543 bounds
.lowerBound
= (int)ZSTD_ps_auto
;
544 bounds
.upperBound
= (int)ZSTD_ps_disable
;
547 case ZSTD_c_deterministicRefPrefix
:
548 bounds
.lowerBound
= 0;
549 bounds
.upperBound
= 1;
553 bounds
.error
= ERROR(parameter_unsupported
);
558 /* ZSTD_cParam_clampBounds:
559 * Clamps the value into the bounded range.
561 static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam
, int* value
)
563 ZSTD_bounds
const bounds
= ZSTD_cParam_getBounds(cParam
);
564 if (ZSTD_isError(bounds
.error
)) return bounds
.error
;
565 if (*value
< bounds
.lowerBound
) *value
= bounds
.lowerBound
;
566 if (*value
> bounds
.upperBound
) *value
= bounds
.upperBound
;
570 #define BOUNDCHECK(cParam, val) { \
571 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
572 parameter_outOfBound, "Param out of bounds"); \
576 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param
)
580 case ZSTD_c_compressionLevel
:
582 case ZSTD_c_chainLog
:
583 case ZSTD_c_searchLog
:
584 case ZSTD_c_minMatch
:
585 case ZSTD_c_targetLength
:
586 case ZSTD_c_strategy
:
590 case ZSTD_c_windowLog
:
591 case ZSTD_c_contentSizeFlag
:
592 case ZSTD_c_checksumFlag
:
593 case ZSTD_c_dictIDFlag
:
594 case ZSTD_c_forceMaxWindow
:
595 case ZSTD_c_nbWorkers
:
597 case ZSTD_c_overlapLog
:
598 case ZSTD_c_rsyncable
:
599 case ZSTD_c_enableDedicatedDictSearch
:
600 case ZSTD_c_enableLongDistanceMatching
:
601 case ZSTD_c_ldmHashLog
:
602 case ZSTD_c_ldmMinMatch
:
603 case ZSTD_c_ldmBucketSizeLog
:
604 case ZSTD_c_ldmHashRateLog
:
605 case ZSTD_c_forceAttachDict
:
606 case ZSTD_c_literalCompressionMode
:
607 case ZSTD_c_targetCBlockSize
:
608 case ZSTD_c_srcSizeHint
:
609 case ZSTD_c_stableInBuffer
:
610 case ZSTD_c_stableOutBuffer
:
611 case ZSTD_c_blockDelimiters
:
612 case ZSTD_c_validateSequences
:
613 case ZSTD_c_useBlockSplitter
:
614 case ZSTD_c_useRowMatchFinder
:
615 case ZSTD_c_deterministicRefPrefix
:
621 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx
* cctx
, ZSTD_cParameter param
, int value
)
623 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param
, value
);
624 if (cctx
->streamStage
!= zcss_init
) {
625 if (ZSTD_isUpdateAuthorized(param
)) {
626 cctx
->cParamsChanged
= 1;
628 RETURN_ERROR(stage_wrong
, "can only set params in ctx init stage");
633 case ZSTD_c_nbWorkers
:
634 RETURN_ERROR_IF((value
!=0) && cctx
->staticSize
, parameter_unsupported
,
635 "MT not compatible with static alloc");
638 case ZSTD_c_compressionLevel
:
639 case ZSTD_c_windowLog
:
641 case ZSTD_c_chainLog
:
642 case ZSTD_c_searchLog
:
643 case ZSTD_c_minMatch
:
644 case ZSTD_c_targetLength
:
645 case ZSTD_c_strategy
:
646 case ZSTD_c_ldmHashRateLog
:
648 case ZSTD_c_contentSizeFlag
:
649 case ZSTD_c_checksumFlag
:
650 case ZSTD_c_dictIDFlag
:
651 case ZSTD_c_forceMaxWindow
:
652 case ZSTD_c_forceAttachDict
:
653 case ZSTD_c_literalCompressionMode
:
655 case ZSTD_c_overlapLog
:
656 case ZSTD_c_rsyncable
:
657 case ZSTD_c_enableDedicatedDictSearch
:
658 case ZSTD_c_enableLongDistanceMatching
:
659 case ZSTD_c_ldmHashLog
:
660 case ZSTD_c_ldmMinMatch
:
661 case ZSTD_c_ldmBucketSizeLog
:
662 case ZSTD_c_targetCBlockSize
:
663 case ZSTD_c_srcSizeHint
:
664 case ZSTD_c_stableInBuffer
:
665 case ZSTD_c_stableOutBuffer
:
666 case ZSTD_c_blockDelimiters
:
667 case ZSTD_c_validateSequences
:
668 case ZSTD_c_useBlockSplitter
:
669 case ZSTD_c_useRowMatchFinder
:
670 case ZSTD_c_deterministicRefPrefix
:
673 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
675 return ZSTD_CCtxParams_setParameter(&cctx
->requestedParams
, param
, value
);
678 size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params
* CCtxParams
,
679 ZSTD_cParameter param
, int value
)
681 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param
, value
);
685 BOUNDCHECK(ZSTD_c_format
, value
);
686 CCtxParams
->format
= (ZSTD_format_e
)value
;
687 return (size_t)CCtxParams
->format
;
689 case ZSTD_c_compressionLevel
: {
690 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param
, &value
), "");
692 CCtxParams
->compressionLevel
= ZSTD_CLEVEL_DEFAULT
; /* 0 == default */
694 CCtxParams
->compressionLevel
= value
;
695 if (CCtxParams
->compressionLevel
>= 0) return (size_t)CCtxParams
->compressionLevel
;
696 return 0; /* return type (size_t) cannot represent negative values */
699 case ZSTD_c_windowLog
:
700 if (value
!=0) /* 0 => use default */
701 BOUNDCHECK(ZSTD_c_windowLog
, value
);
702 CCtxParams
->cParams
.windowLog
= (U32
)value
;
703 return CCtxParams
->cParams
.windowLog
;
705 case ZSTD_c_hashLog
:
706 if (value
!=0) /* 0 => use default */
707 BOUNDCHECK(ZSTD_c_hashLog
, value
);
708 CCtxParams
->cParams
.hashLog
= (U32
)value
;
709 return CCtxParams
->cParams
.hashLog
;
711 case ZSTD_c_chainLog
:
712 if (value
!=0) /* 0 => use default */
713 BOUNDCHECK(ZSTD_c_chainLog
, value
);
714 CCtxParams
->cParams
.chainLog
= (U32
)value
;
715 return CCtxParams
->cParams
.chainLog
;
717 case ZSTD_c_searchLog
:
718 if (value
!=0) /* 0 => use default */
719 BOUNDCHECK(ZSTD_c_searchLog
, value
);
720 CCtxParams
->cParams
.searchLog
= (U32
)value
;
721 return (size_t)value
;
723 case ZSTD_c_minMatch
:
724 if (value
!=0) /* 0 => use default */
725 BOUNDCHECK(ZSTD_c_minMatch
, value
);
726 CCtxParams
->cParams
.minMatch
= value
;
727 return CCtxParams
->cParams
.minMatch
;
729 case ZSTD_c_targetLength
:
730 BOUNDCHECK(ZSTD_c_targetLength
, value
);
731 CCtxParams
->cParams
.targetLength
= value
;
732 return CCtxParams
->cParams
.targetLength
;
734 case ZSTD_c_strategy
:
735 if (value
!=0) /* 0 => use default */
736 BOUNDCHECK(ZSTD_c_strategy
, value
);
737 CCtxParams
->cParams
.strategy
= (ZSTD_strategy
)value
;
738 return (size_t)CCtxParams
->cParams
.strategy
;
740 case ZSTD_c_contentSizeFlag
:
741 /* Content size written in frame header _when known_ (default:1) */
742 DEBUGLOG(4, "set content size flag = %u", (value
!=0));
743 CCtxParams
->fParams
.contentSizeFlag
= value
!= 0;
744 return CCtxParams
->fParams
.contentSizeFlag
;
746 case ZSTD_c_checksumFlag
:
747 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
748 CCtxParams
->fParams
.checksumFlag
= value
!= 0;
749 return CCtxParams
->fParams
.checksumFlag
;
751 case ZSTD_c_dictIDFlag
: /* When applicable, dictionary's dictID is provided in frame header (default:1) */
752 DEBUGLOG(4, "set dictIDFlag = %u", (value
!=0));
753 CCtxParams
->fParams
.noDictIDFlag
= !value
;
754 return !CCtxParams
->fParams
.noDictIDFlag
;
756 case ZSTD_c_forceMaxWindow
:
757 CCtxParams
->forceWindow
= (value
!= 0);
758 return CCtxParams
->forceWindow
;
760 case ZSTD_c_forceAttachDict
: {
761 const ZSTD_dictAttachPref_e pref
= (ZSTD_dictAttachPref_e
)value
;
762 BOUNDCHECK(ZSTD_c_forceAttachDict
, pref
);
763 CCtxParams
->attachDictPref
= pref
;
764 return CCtxParams
->attachDictPref
;
767 case ZSTD_c_literalCompressionMode
: {
768 const ZSTD_paramSwitch_e lcm
= (ZSTD_paramSwitch_e
)value
;
769 BOUNDCHECK(ZSTD_c_literalCompressionMode
, lcm
);
770 CCtxParams
->literalCompressionMode
= lcm
;
771 return CCtxParams
->literalCompressionMode
;
774 case ZSTD_c_nbWorkers
:
775 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
778 case ZSTD_c_jobSize
:
779 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
782 case ZSTD_c_overlapLog
:
783 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
786 case ZSTD_c_rsyncable
:
787 RETURN_ERROR_IF(value
!=0, parameter_unsupported
, "not compiled with multithreading");
790 case ZSTD_c_enableDedicatedDictSearch
:
791 CCtxParams
->enableDedicatedDictSearch
= (value
!=0);
792 return CCtxParams
->enableDedicatedDictSearch
;
794 case ZSTD_c_enableLongDistanceMatching
:
795 CCtxParams
->ldmParams
.enableLdm
= (ZSTD_paramSwitch_e
)value
;
796 return CCtxParams
->ldmParams
.enableLdm
;
798 case ZSTD_c_ldmHashLog
:
799 if (value
!=0) /* 0 ==> auto */
800 BOUNDCHECK(ZSTD_c_ldmHashLog
, value
);
801 CCtxParams
->ldmParams
.hashLog
= value
;
802 return CCtxParams
->ldmParams
.hashLog
;
804 case ZSTD_c_ldmMinMatch
:
805 if (value
!=0) /* 0 ==> default */
806 BOUNDCHECK(ZSTD_c_ldmMinMatch
, value
);
807 CCtxParams
->ldmParams
.minMatchLength
= value
;
808 return CCtxParams
->ldmParams
.minMatchLength
;
810 case ZSTD_c_ldmBucketSizeLog
:
811 if (value
!=0) /* 0 ==> default */
812 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog
, value
);
813 CCtxParams
->ldmParams
.bucketSizeLog
= value
;
814 return CCtxParams
->ldmParams
.bucketSizeLog
;
816 case ZSTD_c_ldmHashRateLog
:
817 if (value
!=0) /* 0 ==> default */
818 BOUNDCHECK(ZSTD_c_ldmHashRateLog
, value
);
819 CCtxParams
->ldmParams
.hashRateLog
= value
;
820 return CCtxParams
->ldmParams
.hashRateLog
;
822 case ZSTD_c_targetCBlockSize
:
823 if (value
!=0) /* 0 ==> default */
824 BOUNDCHECK(ZSTD_c_targetCBlockSize
, value
);
825 CCtxParams
->targetCBlockSize
= value
;
826 return CCtxParams
->targetCBlockSize
;
828 case ZSTD_c_srcSizeHint
:
829 if (value
!=0) /* 0 ==> default */
830 BOUNDCHECK(ZSTD_c_srcSizeHint
, value
);
831 CCtxParams
->srcSizeHint
= value
;
832 return CCtxParams
->srcSizeHint
;
834 case ZSTD_c_stableInBuffer
:
835 BOUNDCHECK(ZSTD_c_stableInBuffer
, value
);
836 CCtxParams
->inBufferMode
= (ZSTD_bufferMode_e
)value
;
837 return CCtxParams
->inBufferMode
;
839 case ZSTD_c_stableOutBuffer
:
840 BOUNDCHECK(ZSTD_c_stableOutBuffer
, value
);
841 CCtxParams
->outBufferMode
= (ZSTD_bufferMode_e
)value
;
842 return CCtxParams
->outBufferMode
;
844 case ZSTD_c_blockDelimiters
:
845 BOUNDCHECK(ZSTD_c_blockDelimiters
, value
);
846 CCtxParams
->blockDelimiters
= (ZSTD_sequenceFormat_e
)value
;
847 return CCtxParams
->blockDelimiters
;
849 case ZSTD_c_validateSequences
:
850 BOUNDCHECK(ZSTD_c_validateSequences
, value
);
851 CCtxParams
->validateSequences
= value
;
852 return CCtxParams
->validateSequences
;
854 case ZSTD_c_useBlockSplitter
:
855 BOUNDCHECK(ZSTD_c_useBlockSplitter
, value
);
856 CCtxParams
->useBlockSplitter
= (ZSTD_paramSwitch_e
)value
;
857 return CCtxParams
->useBlockSplitter
;
859 case ZSTD_c_useRowMatchFinder
:
860 BOUNDCHECK(ZSTD_c_useRowMatchFinder
, value
);
861 CCtxParams
->useRowMatchFinder
= (ZSTD_paramSwitch_e
)value
;
862 return CCtxParams
->useRowMatchFinder
;
864 case ZSTD_c_deterministicRefPrefix
:
865 BOUNDCHECK(ZSTD_c_deterministicRefPrefix
, value
);
866 CCtxParams
->deterministicRefPrefix
= !!value
;
867 return CCtxParams
->deterministicRefPrefix
;
869 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
873 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx
const* cctx
, ZSTD_cParameter param
, int* value
)
875 return ZSTD_CCtxParams_getParameter(&cctx
->requestedParams
, param
, value
);
878 size_t ZSTD_CCtxParams_getParameter(
879 ZSTD_CCtx_params
const* CCtxParams
, ZSTD_cParameter param
, int* value
)
884 *value
= CCtxParams
->format
;
886 case ZSTD_c_compressionLevel
:
887 *value
= CCtxParams
->compressionLevel
;
889 case ZSTD_c_windowLog
:
890 *value
= (int)CCtxParams
->cParams
.windowLog
;
892 case ZSTD_c_hashLog
:
893 *value
= (int)CCtxParams
->cParams
.hashLog
;
895 case ZSTD_c_chainLog
:
896 *value
= (int)CCtxParams
->cParams
.chainLog
;
898 case ZSTD_c_searchLog
:
899 *value
= CCtxParams
->cParams
.searchLog
;
901 case ZSTD_c_minMatch
:
902 *value
= CCtxParams
->cParams
.minMatch
;
904 case ZSTD_c_targetLength
:
905 *value
= CCtxParams
->cParams
.targetLength
;
907 case ZSTD_c_strategy
:
908 *value
= (unsigned)CCtxParams
->cParams
.strategy
;
910 case ZSTD_c_contentSizeFlag
:
911 *value
= CCtxParams
->fParams
.contentSizeFlag
;
913 case ZSTD_c_checksumFlag
:
914 *value
= CCtxParams
->fParams
.checksumFlag
;
916 case ZSTD_c_dictIDFlag
:
917 *value
= !CCtxParams
->fParams
.noDictIDFlag
;
919 case ZSTD_c_forceMaxWindow
:
920 *value
= CCtxParams
->forceWindow
;
922 case ZSTD_c_forceAttachDict
:
923 *value
= CCtxParams
->attachDictPref
;
925 case ZSTD_c_literalCompressionMode
:
926 *value
= CCtxParams
->literalCompressionMode
;
928 case ZSTD_c_nbWorkers
:
929 assert(CCtxParams
->nbWorkers
== 0);
930 *value
= CCtxParams
->nbWorkers
;
932 case ZSTD_c_jobSize
:
933 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
934 case ZSTD_c_overlapLog
:
935 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
936 case ZSTD_c_rsyncable
:
937 RETURN_ERROR(parameter_unsupported
, "not compiled with multithreading");
938 case ZSTD_c_enableDedicatedDictSearch
:
939 *value
= CCtxParams
->enableDedicatedDictSearch
;
941 case ZSTD_c_enableLongDistanceMatching
:
942 *value
= CCtxParams
->ldmParams
.enableLdm
;
944 case ZSTD_c_ldmHashLog
:
945 *value
= CCtxParams
->ldmParams
.hashLog
;
947 case ZSTD_c_ldmMinMatch
:
948 *value
= CCtxParams
->ldmParams
.minMatchLength
;
950 case ZSTD_c_ldmBucketSizeLog
:
951 *value
= CCtxParams
->ldmParams
.bucketSizeLog
;
953 case ZSTD_c_ldmHashRateLog
:
954 *value
= CCtxParams
->ldmParams
.hashRateLog
;
956 case ZSTD_c_targetCBlockSize
:
957 *value
= (int)CCtxParams
->targetCBlockSize
;
959 case ZSTD_c_srcSizeHint
:
960 *value
= (int)CCtxParams
->srcSizeHint
;
962 case ZSTD_c_stableInBuffer
:
963 *value
= (int)CCtxParams
->inBufferMode
;
965 case ZSTD_c_stableOutBuffer
:
966 *value
= (int)CCtxParams
->outBufferMode
;
968 case ZSTD_c_blockDelimiters
:
969 *value
= (int)CCtxParams
->blockDelimiters
;
971 case ZSTD_c_validateSequences
:
972 *value
= (int)CCtxParams
->validateSequences
;
974 case ZSTD_c_useBlockSplitter
:
975 *value
= (int)CCtxParams
->useBlockSplitter
;
977 case ZSTD_c_useRowMatchFinder
:
978 *value
= (int)CCtxParams
->useRowMatchFinder
;
980 case ZSTD_c_deterministicRefPrefix
:
981 *value
= (int)CCtxParams
->deterministicRefPrefix
;
983 default: RETURN_ERROR(parameter_unsupported
, "unknown parameter");
988 /* ZSTD_CCtx_setParametersUsingCCtxParams() :
989 * just applies `params` into `cctx`
990 * no action is performed, parameters are merely stored.
991 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
992 * This is possible even if a compression is ongoing.
993 * In which case, new parameters will be applied on the fly, starting with next compression job.
995 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
996 ZSTD_CCtx
* cctx
, const ZSTD_CCtx_params
* params
)
998 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
999 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1000 "The context is in the wrong stage!");
1001 RETURN_ERROR_IF(cctx
->cdict
, stage_wrong
,
1002 "Can't override parameters with cdict attached (some must "
1003 "be inherited from the cdict).");
1005 cctx
->requestedParams
= *params
;
1009 size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx
* cctx
, unsigned long long pledgedSrcSize
)
1011 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32
)pledgedSrcSize
);
1012 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1013 "Can't set pledgedSrcSize when not in init stage.");
1014 cctx
->pledgedSrcSizePlusOne
= pledgedSrcSize
+1;
1018 static ZSTD_compressionParameters
ZSTD_dedicatedDictSearch_getCParams(
1019 int const compressionLevel
,
1020 size_t const dictSize
);
1021 static int ZSTD_dedicatedDictSearch_isSupported(
1022 const ZSTD_compressionParameters
* cParams
);
1023 static void ZSTD_dedicatedDictSearch_revertCParams(
1024 ZSTD_compressionParameters
* cParams
);
1027 * Initializes the local dict using the requested parameters.
1028 * NOTE: This does not use the pledged src size, because it may be used for more
1029 * than one compression.
1031 static size_t ZSTD_initLocalDict(ZSTD_CCtx
* cctx
)
1033 ZSTD_localDict
* const dl
= &cctx
->localDict
;
1034 if (dl
->dict
== NULL
) {
1035 /* No local dictionary. */
1036 assert(dl
->dictBuffer
== NULL
);
1037 assert(dl
->cdict
== NULL
);
1038 assert(dl
->dictSize
== 0);
1041 if (dl
->cdict
!= NULL
) {
1042 assert(cctx
->cdict
== dl
->cdict
);
1043 /* Local dictionary already initialized. */
1046 assert(dl
->dictSize
> 0);
1047 assert(cctx
->cdict
== NULL
);
1048 assert(cctx
->prefixDict
.dict
== NULL
);
1050 dl
->cdict
= ZSTD_createCDict_advanced2(
1054 dl
->dictContentType
,
1055 &cctx
->requestedParams
,
1057 RETURN_ERROR_IF(!dl
->cdict
, memory_allocation
, "ZSTD_createCDict_advanced failed");
1058 cctx
->cdict
= dl
->cdict
;
1062 size_t ZSTD_CCtx_loadDictionary_advanced(
1063 ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
,
1064 ZSTD_dictLoadMethod_e dictLoadMethod
, ZSTD_dictContentType_e dictContentType
)
1066 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1067 "Can't load a dictionary when ctx is not in init stage.");
1068 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32
)dictSize
);
1069 ZSTD_clearAllDicts(cctx
); /* in case one already exists */
1070 if (dict
== NULL
|| dictSize
== 0) /* no dictionary mode */
1072 if (dictLoadMethod
== ZSTD_dlm_byRef
) {
1073 cctx
->localDict
.dict
= dict
;
1076 RETURN_ERROR_IF(cctx
->staticSize
, memory_allocation
,
1077 "no malloc for static CCtx");
1078 dictBuffer
= ZSTD_customMalloc(dictSize
, cctx
->customMem
);
1079 RETURN_ERROR_IF(!dictBuffer
, memory_allocation
, "NULL pointer!");
1080 ZSTD_memcpy(dictBuffer
, dict
, dictSize
);
1081 cctx
->localDict
.dictBuffer
= dictBuffer
;
1082 cctx
->localDict
.dict
= dictBuffer
;
1084 cctx
->localDict
.dictSize
= dictSize
;
1085 cctx
->localDict
.dictContentType
= dictContentType
;
1089 size_t ZSTD_CCtx_loadDictionary_byReference(
1090 ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
)
1092 return ZSTD_CCtx_loadDictionary_advanced(
1093 cctx
, dict
, dictSize
, ZSTD_dlm_byRef
, ZSTD_dct_auto
);
1096 size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
)
1098 return ZSTD_CCtx_loadDictionary_advanced(
1099 cctx
, dict
, dictSize
, ZSTD_dlm_byCopy
, ZSTD_dct_auto
);
1103 size_t ZSTD_CCtx_refCDict(ZSTD_CCtx
* cctx
, const ZSTD_CDict
* cdict
)
1105 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1106 "Can't ref a dict when ctx not in init stage.");
1107 /* Free the existing local cdict (if any) to save memory. */
1108 ZSTD_clearAllDicts(cctx
);
1109 cctx
->cdict
= cdict
;
1113 size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx
* cctx
, ZSTD_threadPool
* pool
)
1115 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1116 "Can't ref a pool when ctx not in init stage.");
1121 size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx
* cctx
, const void* prefix
, size_t prefixSize
)
1123 return ZSTD_CCtx_refPrefix_advanced(cctx
, prefix
, prefixSize
, ZSTD_dct_rawContent
);
1126 size_t ZSTD_CCtx_refPrefix_advanced(
1127 ZSTD_CCtx
* cctx
, const void* prefix
, size_t prefixSize
, ZSTD_dictContentType_e dictContentType
)
1129 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1130 "Can't ref a prefix when ctx not in init stage.");
1131 ZSTD_clearAllDicts(cctx
);
1132 if (prefix
!= NULL
&& prefixSize
> 0) {
1133 cctx
->prefixDict
.dict
= prefix
;
1134 cctx
->prefixDict
.dictSize
= prefixSize
;
1135 cctx
->prefixDict
.dictContentType
= dictContentType
;
1140 /*! ZSTD_CCtx_reset() :
1141 * Also dumps dictionary */
1142 size_t ZSTD_CCtx_reset(ZSTD_CCtx
* cctx
, ZSTD_ResetDirective reset
)
1144 if ( (reset
== ZSTD_reset_session_only
)
1145 || (reset
== ZSTD_reset_session_and_parameters
) ) {
1146 cctx
->streamStage
= zcss_init
;
1147 cctx
->pledgedSrcSizePlusOne
= 0;
1149 if ( (reset
== ZSTD_reset_parameters
)
1150 || (reset
== ZSTD_reset_session_and_parameters
) ) {
1151 RETURN_ERROR_IF(cctx
->streamStage
!= zcss_init
, stage_wrong
,
1152 "Can't reset parameters only when not in init stage.");
1153 ZSTD_clearAllDicts(cctx
);
1154 return ZSTD_CCtxParams_reset(&cctx
->requestedParams
);
1160 /* ZSTD_checkCParams() :
1161 control CParam values remain within authorized range.
1162 @return : 0, or an error code if one value is beyond authorized range */
1163 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams
)
1165 BOUNDCHECK(ZSTD_c_windowLog
, (int)cParams
.windowLog
);
1166 BOUNDCHECK(ZSTD_c_chainLog
, (int)cParams
.chainLog
);
1167 BOUNDCHECK(ZSTD_c_hashLog
, (int)cParams
.hashLog
);
1168 BOUNDCHECK(ZSTD_c_searchLog
, (int)cParams
.searchLog
);
1169 BOUNDCHECK(ZSTD_c_minMatch
, (int)cParams
.minMatch
);
1170 BOUNDCHECK(ZSTD_c_targetLength
,(int)cParams
.targetLength
);
1171 BOUNDCHECK(ZSTD_c_strategy
, cParams
.strategy
);
1175 /* ZSTD_clampCParams() :
1176 * make CParam values within valid range.
1177 * @return : valid CParams */
1178 static ZSTD_compressionParameters
1179 ZSTD_clampCParams(ZSTD_compressionParameters cParams
)
1181 # define CLAMP_TYPE(cParam, val, type) { \
1182 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
1183 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
1184 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
1186 # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
1187 CLAMP(ZSTD_c_windowLog
, cParams
.windowLog
);
1188 CLAMP(ZSTD_c_chainLog
, cParams
.chainLog
);
1189 CLAMP(ZSTD_c_hashLog
, cParams
.hashLog
);
1190 CLAMP(ZSTD_c_searchLog
, cParams
.searchLog
);
1191 CLAMP(ZSTD_c_minMatch
, cParams
.minMatch
);
1192 CLAMP(ZSTD_c_targetLength
,cParams
.targetLength
);
1193 CLAMP_TYPE(ZSTD_c_strategy
,cParams
.strategy
, ZSTD_strategy
);
1197 /* ZSTD_cycleLog() :
1198 * condition for correct operation : hashLog > 1 */
1199 U32
ZSTD_cycleLog(U32 hashLog
, ZSTD_strategy strat
)
1201 U32
const btScale
= ((U32
)strat
>= (U32
)ZSTD_btlazy2
);
1202 return hashLog
- btScale
;
1205 /* ZSTD_dictAndWindowLog() :
1206 * Returns an adjusted window log that is large enough to fit the source and the dictionary.
1207 * The zstd format says that the entire dictionary is valid if one byte of the dictionary
1208 * is within the window. So the hashLog and chainLog should be large enough to reference both
1209 * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
1210 * the hashLog and windowLog.
1211 * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
1213 static U32
ZSTD_dictAndWindowLog(U32 windowLog
, U64 srcSize
, U64 dictSize
)
1215 const U64 maxWindowSize
= 1ULL << ZSTD_WINDOWLOG_MAX
;
1216 /* No dictionary ==> No change */
1217 if (dictSize
== 0) {
1220 assert(windowLog
<= ZSTD_WINDOWLOG_MAX
);
1221 assert(srcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
); /* Handled in ZSTD_adjustCParams_internal() */
1223 U64
const windowSize
= 1ULL << windowLog
;
1224 U64
const dictAndWindowSize
= dictSize
+ windowSize
;
1225 /* If the window size is already large enough to fit both the source and the dictionary
1226 * then just use the window size. Otherwise adjust so that it fits the dictionary and
1229 if (windowSize
>= dictSize
+ srcSize
) {
1230 return windowLog
; /* Window size large enough already */
1231 } else if (dictAndWindowSize
>= maxWindowSize
) {
1232 return ZSTD_WINDOWLOG_MAX
; /* Larger than max window log */
1234 return ZSTD_highbit32((U32
)dictAndWindowSize
- 1) + 1;
1239 /* ZSTD_adjustCParams_internal() :
1240 * optimize `cPar` for a specified input (`srcSize` and `dictSize`).
1241 * mostly downsize to reduce memory consumption and initialization latency.
1242 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
1243 * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
1244 * note : `srcSize==0` means 0!
1245 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
1246 static ZSTD_compressionParameters
1247 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar
,
1248 unsigned long long srcSize
,
1250 ZSTD_cParamMode_e mode
)
1252 const U64 minSrcSize
= 513; /* (1<<9) + 1 */
1253 const U64 maxWindowResize
= 1ULL << (ZSTD_WINDOWLOG_MAX
-1);
1254 assert(ZSTD_checkCParams(cPar
)==0);
1257 case ZSTD_cpm_unknown
:
1258 case ZSTD_cpm_noAttachDict
:
1259 /* If we don't know the source size, don't make any
1260 * assumptions about it. We will already have selected
1261 * smaller parameters if a dictionary is in use.
1264 case ZSTD_cpm_createCDict
:
1265 /* Assume a small source size when creating a dictionary
1266 * with an unknown source size.
1268 if (dictSize
&& srcSize
== ZSTD_CONTENTSIZE_UNKNOWN
)
1269 srcSize
= minSrcSize
;
1271 case ZSTD_cpm_attachDict
:
1272 /* Dictionary has its own dedicated parameters which have
1273 * already been selected. We are selecting parameters
1274 * for only the source.
1283 /* resize windowLog if input is small enough, to use less memory */
1284 if ( (srcSize
< maxWindowResize
)
1285 && (dictSize
< maxWindowResize
) ) {
1286 U32
const tSize
= (U32
)(srcSize
+ dictSize
);
1287 static U32
const hashSizeMin
= 1 << ZSTD_HASHLOG_MIN
;
1288 U32
const srcLog
= (tSize
< hashSizeMin
) ? ZSTD_HASHLOG_MIN
:
1289 ZSTD_highbit32(tSize
-1) + 1;
1290 if (cPar
.windowLog
> srcLog
) cPar
.windowLog
= srcLog
;
1292 if (srcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
) {
1293 U32
const dictAndWindowLog
= ZSTD_dictAndWindowLog(cPar
.windowLog
, (U64
)srcSize
, (U64
)dictSize
);
1294 U32
const cycleLog
= ZSTD_cycleLog(cPar
.chainLog
, cPar
.strategy
);
1295 if (cPar
.hashLog
> dictAndWindowLog
+1) cPar
.hashLog
= dictAndWindowLog
+1;
1296 if (cycleLog
> dictAndWindowLog
)
1297 cPar
.chainLog
-= (cycleLog
- dictAndWindowLog
);
1300 if (cPar
.windowLog
< ZSTD_WINDOWLOG_ABSOLUTEMIN
)
1301 cPar
.windowLog
= ZSTD_WINDOWLOG_ABSOLUTEMIN
; /* minimum wlog required for valid frame header */
1306 ZSTD_compressionParameters
1307 ZSTD_adjustCParams(ZSTD_compressionParameters cPar
,
1308 unsigned long long srcSize
,
1311 cPar
= ZSTD_clampCParams(cPar
); /* resulting cPar is necessarily valid (all parameters within range) */
1312 if (srcSize
== 0) srcSize
= ZSTD_CONTENTSIZE_UNKNOWN
;
1313 return ZSTD_adjustCParams_internal(cPar
, srcSize
, dictSize
, ZSTD_cpm_unknown
);
1316 static ZSTD_compressionParameters
ZSTD_getCParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
);
1317 static ZSTD_parameters
ZSTD_getParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
);
1319 static void ZSTD_overrideCParams(
1320 ZSTD_compressionParameters
* cParams
,
1321 const ZSTD_compressionParameters
* overrides
)
1323 if (overrides
->windowLog
) cParams
->windowLog
= overrides
->windowLog
;
1324 if (overrides
->hashLog
) cParams
->hashLog
= overrides
->hashLog
;
1325 if (overrides
->chainLog
) cParams
->chainLog
= overrides
->chainLog
;
1326 if (overrides
->searchLog
) cParams
->searchLog
= overrides
->searchLog
;
1327 if (overrides
->minMatch
) cParams
->minMatch
= overrides
->minMatch
;
1328 if (overrides
->targetLength
) cParams
->targetLength
= overrides
->targetLength
;
1329 if (overrides
->strategy
) cParams
->strategy
= overrides
->strategy
;
1332 ZSTD_compressionParameters
ZSTD_getCParamsFromCCtxParams(
1333 const ZSTD_CCtx_params
* CCtxParams
, U64 srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
1335 ZSTD_compressionParameters cParams
;
1336 if (srcSizeHint
== ZSTD_CONTENTSIZE_UNKNOWN
&& CCtxParams
->srcSizeHint
> 0) {
1337 srcSizeHint
= CCtxParams
->srcSizeHint
;
1339 cParams
= ZSTD_getCParams_internal(CCtxParams
->compressionLevel
, srcSizeHint
, dictSize
, mode
);
1340 if (CCtxParams
->ldmParams
.enableLdm
== ZSTD_ps_enable
) cParams
.windowLog
= ZSTD_LDM_DEFAULT_WINDOW_LOG
;
1341 ZSTD_overrideCParams(&cParams
, &CCtxParams
->cParams
);
1342 assert(!ZSTD_checkCParams(cParams
));
1343 /* srcSizeHint == 0 means 0 */
1344 return ZSTD_adjustCParams_internal(cParams
, srcSizeHint
, dictSize
, mode
);
1348 ZSTD_sizeof_matchState(const ZSTD_compressionParameters
* const cParams
,
1349 const ZSTD_paramSwitch_e useRowMatchFinder
,
1350 const U32 enableDedicatedDictSearch
,
1353 /* chain table size should be 0 for fast or row-hash strategies */
1354 size_t const chainSize
= ZSTD_allocateChainTable(cParams
->strategy
, useRowMatchFinder
, enableDedicatedDictSearch
&& !forCCtx
)
1355 ? ((size_t)1 << cParams
->chainLog
)
1357 size_t const hSize
= ((size_t)1) << cParams
->hashLog
;
1358 U32
const hashLog3
= (forCCtx
&& cParams
->minMatch
==3) ? MIN(ZSTD_HASHLOG3_MAX
, cParams
->windowLog
) : 0;
1359 size_t const h3Size
= hashLog3
? ((size_t)1) << hashLog3
: 0;
1360 /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
1361 * surrounded by redzones in ASAN. */
1362 size_t const tableSpace
= chainSize
* sizeof(U32
)
1363 + hSize
* sizeof(U32
)
1364 + h3Size
* sizeof(U32
);
1365 size_t const optPotentialSpace
=
1366 ZSTD_cwksp_aligned_alloc_size((MaxML
+1) * sizeof(U32
))
1367 + ZSTD_cwksp_aligned_alloc_size((MaxLL
+1) * sizeof(U32
))
1368 + ZSTD_cwksp_aligned_alloc_size((MaxOff
+1) * sizeof(U32
))
1369 + ZSTD_cwksp_aligned_alloc_size((1<<Litbits
) * sizeof(U32
))
1370 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM
+1) * sizeof(ZSTD_match_t
))
1371 + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM
+1) * sizeof(ZSTD_optimal_t
));
1372 size_t const lazyAdditionalSpace
= ZSTD_rowMatchFinderUsed(cParams
->strategy
, useRowMatchFinder
)
1373 ? ZSTD_cwksp_aligned_alloc_size(hSize
*sizeof(U16
))
1375 size_t const optSpace
= (forCCtx
&& (cParams
->strategy
>= ZSTD_btopt
))
1378 size_t const slackSpace
= ZSTD_cwksp_slack_space_required();
1380 /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
1381 ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN
>= 4 && ZSTD_WINDOWLOG_MIN
>= 4 && ZSTD_CHAINLOG_MIN
>= 4);
1382 assert(useRowMatchFinder
!= ZSTD_ps_auto
);
1384 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1385 (U32
)chainSize
, (U32
)hSize
, (U32
)h3Size
);
1386 return tableSpace
+ optSpace
+ slackSpace
+ lazyAdditionalSpace
;
1389 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1390 const ZSTD_compressionParameters
* cParams
,
1391 const ldmParams_t
* ldmParams
,
1393 const ZSTD_paramSwitch_e useRowMatchFinder
,
1394 const size_t buffInSize
,
1395 const size_t buffOutSize
,
1396 const U64 pledgedSrcSize
)
1398 size_t const windowSize
= (size_t) BOUNDED(1ULL, 1ULL << cParams
->windowLog
, pledgedSrcSize
);
1399 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, windowSize
);
1400 U32
const divider
= (cParams
->minMatch
==3) ? 3 : 4;
1401 size_t const maxNbSeq
= blockSize
/ divider
;
1402 size_t const tokenSpace
= ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH
+ blockSize
)
1403 + ZSTD_cwksp_aligned_alloc_size(maxNbSeq
* sizeof(seqDef
))
1404 + 3 * ZSTD_cwksp_alloc_size(maxNbSeq
* sizeof(BYTE
));
1405 size_t const entropySpace
= ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE
);
1406 size_t const blockStateSpace
= 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t
));
1407 size_t const matchStateSize
= ZSTD_sizeof_matchState(cParams
, useRowMatchFinder
, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
1409 size_t const ldmSpace
= ZSTD_ldm_getTableSize(*ldmParams
);
1410 size_t const maxNbLdmSeq
= ZSTD_ldm_getMaxNbSeq(*ldmParams
, blockSize
);
1411 size_t const ldmSeqSpace
= ldmParams
->enableLdm
== ZSTD_ps_enable
?
1412 ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq
* sizeof(rawSeq
)) : 0;
1415 size_t const bufferSpace
= ZSTD_cwksp_alloc_size(buffInSize
)
1416 + ZSTD_cwksp_alloc_size(buffOutSize
);
1418 size_t const cctxSpace
= isStatic
? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx
)) : 0;
1420 size_t const neededSpace
=
1430 DEBUGLOG(5, "estimate workspace : %u", (U32
)neededSpace
);
1434 size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params
* params
)
1436 ZSTD_compressionParameters
const cParams
=
1437 ZSTD_getCParamsFromCCtxParams(params
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1438 ZSTD_paramSwitch_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
->useRowMatchFinder
,
1441 RETURN_ERROR_IF(params
->nbWorkers
> 0, GENERIC
, "Estimate CCtx size is supported for single-threaded compression only.");
1442 /* estimateCCtxSize is for one-shot compression. So no buffers should
1443 * be needed. However, we still allocate two 0-sized buffers, which can
1444 * take space under ASAN. */
1445 return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1446 &cParams
, ¶ms
->ldmParams
, 1, useRowMatchFinder
, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN
);
1449 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams
)
1451 ZSTD_CCtx_params initialParams
= ZSTD_makeCCtxParamsFromCParams(cParams
);
1452 if (ZSTD_rowMatchFinderSupported(cParams
.strategy
)) {
1453 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1454 size_t noRowCCtxSize
;
1456 initialParams
.useRowMatchFinder
= ZSTD_ps_disable
;
1457 noRowCCtxSize
= ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1458 initialParams
.useRowMatchFinder
= ZSTD_ps_enable
;
1459 rowCCtxSize
= ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1460 return MAX(noRowCCtxSize
, rowCCtxSize
);
1462 return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams
);
1466 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel
)
1469 size_t largestSize
= 0;
1470 static const unsigned long long srcSizeTiers
[4] = {16 KB
, 128 KB
, 256 KB
, ZSTD_CONTENTSIZE_UNKNOWN
};
1471 for (; tier
< 4; ++tier
) {
1472 /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
1473 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, srcSizeTiers
[tier
], 0, ZSTD_cpm_noAttachDict
);
1474 largestSize
= MAX(ZSTD_estimateCCtxSize_usingCParams(cParams
), largestSize
);
1479 size_t ZSTD_estimateCCtxSize(int compressionLevel
)
1482 size_t memBudget
= 0;
1483 for (level
=MIN(compressionLevel
, 1); level
<=compressionLevel
; level
++) {
1484 /* Ensure monotonically increasing memory usage as compression level increases */
1485 size_t const newMB
= ZSTD_estimateCCtxSize_internal(level
);
1486 if (newMB
> memBudget
) memBudget
= newMB
;
1491 size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params
* params
)
1493 RETURN_ERROR_IF(params
->nbWorkers
> 0, GENERIC
, "Estimate CCtx size is supported for single-threaded compression only.");
1494 { ZSTD_compressionParameters
const cParams
=
1495 ZSTD_getCParamsFromCCtxParams(params
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1496 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, (size_t)1 << cParams
.windowLog
);
1497 size_t const inBuffSize
= (params
->inBufferMode
== ZSTD_bm_buffered
)
1498 ? ((size_t)1 << cParams
.windowLog
) + blockSize
1500 size_t const outBuffSize
= (params
->outBufferMode
== ZSTD_bm_buffered
)
1501 ? ZSTD_compressBound(blockSize
) + 1
1503 ZSTD_paramSwitch_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
->useRowMatchFinder
, ¶ms
->cParams
);
1505 return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1506 &cParams
, ¶ms
->ldmParams
, 1, useRowMatchFinder
, inBuffSize
, outBuffSize
,
1507 ZSTD_CONTENTSIZE_UNKNOWN
);
1511 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams
)
1513 ZSTD_CCtx_params initialParams
= ZSTD_makeCCtxParamsFromCParams(cParams
);
1514 if (ZSTD_rowMatchFinderSupported(cParams
.strategy
)) {
1515 /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1516 size_t noRowCCtxSize
;
1518 initialParams
.useRowMatchFinder
= ZSTD_ps_disable
;
1519 noRowCCtxSize
= ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1520 initialParams
.useRowMatchFinder
= ZSTD_ps_enable
;
1521 rowCCtxSize
= ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1522 return MAX(noRowCCtxSize
, rowCCtxSize
);
1524 return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams
);
1528 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel
)
1530 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, 0, ZSTD_cpm_noAttachDict
);
1531 return ZSTD_estimateCStreamSize_usingCParams(cParams
);
1534 size_t ZSTD_estimateCStreamSize(int compressionLevel
)
1537 size_t memBudget
= 0;
1538 for (level
=MIN(compressionLevel
, 1); level
<=compressionLevel
; level
++) {
1539 size_t const newMB
= ZSTD_estimateCStreamSize_internal(level
);
1540 if (newMB
> memBudget
) memBudget
= newMB
;
1545 /* ZSTD_getFrameProgression():
1546 * tells how much data has been consumed (input) and produced (output) for current frame.
1547 * able to count progression inside worker threads (non-blocking mode).
1549 ZSTD_frameProgression
ZSTD_getFrameProgression(const ZSTD_CCtx
* cctx
)
1551 { ZSTD_frameProgression fp
;
1552 size_t const buffered
= (cctx
->inBuff
== NULL
) ? 0 :
1553 cctx
->inBuffPos
- cctx
->inToCompress
;
1554 if (buffered
) assert(cctx
->inBuffPos
>= cctx
->inToCompress
);
1555 assert(buffered
<= ZSTD_BLOCKSIZE_MAX
);
1556 fp
.ingested
= cctx
->consumedSrcSize
+ buffered
;
1557 fp
.consumed
= cctx
->consumedSrcSize
;
1558 fp
.produced
= cctx
->producedCSize
;
1559 fp
.flushed
= cctx
->producedCSize
; /* simplified; some data might still be left within streaming output buffer */
1560 fp
.currentJobID
= 0;
1561 fp
.nbActiveWorkers
= 0;
1565 /*! ZSTD_toFlushNow()
1566 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
1568 size_t ZSTD_toFlushNow(ZSTD_CCtx
* cctx
)
1571 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1574 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1
,
1575 ZSTD_compressionParameters cParams2
)
1579 assert(cParams1
.windowLog
== cParams2
.windowLog
);
1580 assert(cParams1
.chainLog
== cParams2
.chainLog
);
1581 assert(cParams1
.hashLog
== cParams2
.hashLog
);
1582 assert(cParams1
.searchLog
== cParams2
.searchLog
);
1583 assert(cParams1
.minMatch
== cParams2
.minMatch
);
1584 assert(cParams1
.targetLength
== cParams2
.targetLength
);
1585 assert(cParams1
.strategy
== cParams2
.strategy
);
1588 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t
* bs
)
1591 for (i
= 0; i
< ZSTD_REP_NUM
; ++i
)
1592 bs
->rep
[i
] = repStartValue
[i
];
1593 bs
->entropy
.huf
.repeatMode
= HUF_repeat_none
;
1594 bs
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_none
;
1595 bs
->entropy
.fse
.matchlength_repeatMode
= FSE_repeat_none
;
1596 bs
->entropy
.fse
.litlength_repeatMode
= FSE_repeat_none
;
1599 /*! ZSTD_invalidateMatchState()
1600 * Invalidate all the matches in the match finder tables.
1601 * Requires nextSrc and base to be set (can be NULL).
1603 static void ZSTD_invalidateMatchState(ZSTD_matchState_t
* ms
)
1605 ZSTD_window_clear(&ms
->window
);
1607 ms
->nextToUpdate
= ms
->window
.dictLimit
;
1608 ms
->loadedDictEnd
= 0;
1609 ms
->opt
.litLengthSum
= 0; /* force reset of btopt stats */
1610 ms
->dictMatchState
= NULL
;
1614 * Controls, for this matchState reset, whether the tables need to be cleared /
1615 * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
1616 * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
1617 * subsequent operation will overwrite the table space anyways (e.g., copying
1618 * the matchState contents in from a CDict).
1623 } ZSTD_compResetPolicy_e
;
1626 * Controls, for this matchState reset, whether indexing can continue where it
1627 * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
1633 } ZSTD_indexResetPolicy_e
;
1636 ZSTD_resetTarget_CDict
,
1637 ZSTD_resetTarget_CCtx
1638 } ZSTD_resetTarget_e
;
1642 ZSTD_reset_matchState(ZSTD_matchState_t
* ms
,
1644 const ZSTD_compressionParameters
* cParams
,
1645 const ZSTD_paramSwitch_e useRowMatchFinder
,
1646 const ZSTD_compResetPolicy_e crp
,
1647 const ZSTD_indexResetPolicy_e forceResetIndex
,
1648 const ZSTD_resetTarget_e forWho
)
1650 /* disable chain table allocation for fast or row-based strategies */
1651 size_t const chainSize
= ZSTD_allocateChainTable(cParams
->strategy
, useRowMatchFinder
,
1652 ms
->dedicatedDictSearch
&& (forWho
== ZSTD_resetTarget_CDict
))
1653 ? ((size_t)1 << cParams
->chainLog
)
1655 size_t const hSize
= ((size_t)1) << cParams
->hashLog
;
1656 U32
const hashLog3
= ((forWho
== ZSTD_resetTarget_CCtx
) && cParams
->minMatch
==3) ? MIN(ZSTD_HASHLOG3_MAX
, cParams
->windowLog
) : 0;
1657 size_t const h3Size
= hashLog3
? ((size_t)1) << hashLog3
: 0;
1659 DEBUGLOG(4, "reset indices : %u", forceResetIndex
== ZSTDirp_reset
);
1660 assert(useRowMatchFinder
!= ZSTD_ps_auto
);
1661 if (forceResetIndex
== ZSTDirp_reset
) {
1662 ZSTD_window_init(&ms
->window
);
1663 ZSTD_cwksp_mark_tables_dirty(ws
);
1666 ms
->hashLog3
= hashLog3
;
1668 ZSTD_invalidateMatchState(ms
);
1670 assert(!ZSTD_cwksp_reserve_failed(ws
)); /* check that allocation hasn't already failed */
1672 ZSTD_cwksp_clear_tables(ws
);
1674 DEBUGLOG(5, "reserving table space");
1676 ms
->hashTable
= (U32
*)ZSTD_cwksp_reserve_table(ws
, hSize
* sizeof(U32
));
1677 ms
->chainTable
= (U32
*)ZSTD_cwksp_reserve_table(ws
, chainSize
* sizeof(U32
));
1678 ms
->hashTable3
= (U32
*)ZSTD_cwksp_reserve_table(ws
, h3Size
* sizeof(U32
));
1679 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws
), memory_allocation
,
1680 "failed a workspace allocation in ZSTD_reset_matchState");
1682 DEBUGLOG(4, "reset table : %u", crp
!=ZSTDcrp_leaveDirty
);
1683 if (crp
!=ZSTDcrp_leaveDirty
) {
1684 /* reset tables only */
1685 ZSTD_cwksp_clean_tables(ws
);
1688 /* opt parser space */
1689 if ((forWho
== ZSTD_resetTarget_CCtx
) && (cParams
->strategy
>= ZSTD_btopt
)) {
1690 DEBUGLOG(4, "reserving optimal parser space");
1691 ms
->opt
.litFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (1<<Litbits
) * sizeof(unsigned));
1692 ms
->opt
.litLengthFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxLL
+1) * sizeof(unsigned));
1693 ms
->opt
.matchLengthFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxML
+1) * sizeof(unsigned));
1694 ms
->opt
.offCodeFreq
= (unsigned*)ZSTD_cwksp_reserve_aligned(ws
, (MaxOff
+1) * sizeof(unsigned));
1695 ms
->opt
.matchTable
= (ZSTD_match_t
*)ZSTD_cwksp_reserve_aligned(ws
, (ZSTD_OPT_NUM
+1) * sizeof(ZSTD_match_t
));
1696 ms
->opt
.priceTable
= (ZSTD_optimal_t
*)ZSTD_cwksp_reserve_aligned(ws
, (ZSTD_OPT_NUM
+1) * sizeof(ZSTD_optimal_t
));
1699 if (ZSTD_rowMatchFinderUsed(cParams
->strategy
, useRowMatchFinder
)) {
1700 { /* Row match finder needs an additional table of hashes ("tags") */
1701 size_t const tagTableSize
= hSize
*sizeof(U16
);
1702 ms
->tagTable
= (U16
*)ZSTD_cwksp_reserve_aligned(ws
, tagTableSize
);
1703 if (ms
->tagTable
) ZSTD_memset(ms
->tagTable
, 0, tagTableSize
);
1705 { /* Switch to 32-entry rows if searchLog is 5 (or more) */
1706 U32
const rowLog
= BOUNDED(4, cParams
->searchLog
, 6);
1707 assert(cParams
->hashLog
>= rowLog
);
1708 ms
->rowHashLog
= cParams
->hashLog
- rowLog
;
1712 ms
->cParams
= *cParams
;
1714 RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws
), memory_allocation
,
1715 "failed a workspace allocation in ZSTD_reset_matchState");
1719 /* ZSTD_indexTooCloseToMax() :
1720 * minor optimization : prefer memset() rather than reduceIndex()
1721 * which is measurably slow in some circumstances (reported for Visual Studio).
1722 * Works when re-using a context for a lot of smallish inputs :
1723 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1724 * memset() will be triggered before reduceIndex().
1726 #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
1727 static int ZSTD_indexTooCloseToMax(ZSTD_window_t w
)
1729 return (size_t)(w
.nextSrc
- w
.base
) > (ZSTD_CURRENT_MAX
- ZSTD_INDEXOVERFLOW_MARGIN
);
1732 /* ZSTD_dictTooBig():
1733 * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
1734 * one go generically. So we ensure that in that case we reset the tables to zero,
1735 * so that we can load as much of the dictionary as possible.
1737 static int ZSTD_dictTooBig(size_t const loadedDictSize
)
1739 return loadedDictSize
> ZSTD_CHUNKSIZE_MAX
;
1742 /*! ZSTD_resetCCtx_internal() :
1743 * @param loadedDictSize The size of the dictionary to be loaded
1744 * into the context, if any. If no dictionary is used, or the
1745 * dictionary is being attached / copied, then pass 0.
1746 * note : `params` are assumed fully validated at this stage.
1748 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx
* zc
,
1749 ZSTD_CCtx_params
const* params
,
1750 U64
const pledgedSrcSize
,
1751 size_t const loadedDictSize
,
1752 ZSTD_compResetPolicy_e
const crp
,
1753 ZSTD_buffered_policy_e
const zbuff
)
1755 ZSTD_cwksp
* const ws
= &zc
->workspace
;
1756 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
1757 (U32
)pledgedSrcSize
, params
->cParams
.windowLog
, (int)params
->useRowMatchFinder
, (int)params
->useBlockSplitter
);
1758 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
1760 zc
->isFirstBlock
= 1;
1762 /* Set applied params early so we can modify them for LDM,
1763 * and point params at the applied params.
1765 zc
->appliedParams
= *params
;
1766 params
= &zc
->appliedParams
;
1768 assert(params
->useRowMatchFinder
!= ZSTD_ps_auto
);
1769 assert(params
->useBlockSplitter
!= ZSTD_ps_auto
);
1770 assert(params
->ldmParams
.enableLdm
!= ZSTD_ps_auto
);
1771 if (params
->ldmParams
.enableLdm
== ZSTD_ps_enable
) {
1772 /* Adjust long distance matching parameters */
1773 ZSTD_ldm_adjustParameters(&zc
->appliedParams
.ldmParams
, ¶ms
->cParams
);
1774 assert(params
->ldmParams
.hashLog
>= params
->ldmParams
.bucketSizeLog
);
1775 assert(params
->ldmParams
.hashRateLog
< 32);
1778 { size_t const windowSize
= MAX(1, (size_t)MIN(((U64
)1 << params
->cParams
.windowLog
), pledgedSrcSize
));
1779 size_t const blockSize
= MIN(ZSTD_BLOCKSIZE_MAX
, windowSize
);
1780 U32
const divider
= (params
->cParams
.minMatch
==3) ? 3 : 4;
1781 size_t const maxNbSeq
= blockSize
/ divider
;
1782 size_t const buffOutSize
= (zbuff
== ZSTDb_buffered
&& params
->outBufferMode
== ZSTD_bm_buffered
)
1783 ? ZSTD_compressBound(blockSize
) + 1
1785 size_t const buffInSize
= (zbuff
== ZSTDb_buffered
&& params
->inBufferMode
== ZSTD_bm_buffered
)
1786 ? windowSize
+ blockSize
1788 size_t const maxNbLdmSeq
= ZSTD_ldm_getMaxNbSeq(params
->ldmParams
, blockSize
);
1790 int const indexTooClose
= ZSTD_indexTooCloseToMax(zc
->blockState
.matchState
.window
);
1791 int const dictTooBig
= ZSTD_dictTooBig(loadedDictSize
);
1792 ZSTD_indexResetPolicy_e needsIndexReset
=
1793 (indexTooClose
|| dictTooBig
|| !zc
->initialized
) ? ZSTDirp_reset
: ZSTDirp_continue
;
1795 size_t const neededSpace
=
1796 ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1797 ¶ms
->cParams
, ¶ms
->ldmParams
, zc
->staticSize
!= 0, params
->useRowMatchFinder
,
1798 buffInSize
, buffOutSize
, pledgedSrcSize
);
1799 int resizeWorkspace
;
1801 FORWARD_IF_ERROR(neededSpace
, "cctx size estimate failed!");
1803 if (!zc
->staticSize
) ZSTD_cwksp_bump_oversized_duration(ws
, 0);
1805 { /* Check if workspace is large enough, alloc a new one if needed */
1806 int const workspaceTooSmall
= ZSTD_cwksp_sizeof(ws
) < neededSpace
;
1807 int const workspaceWasteful
= ZSTD_cwksp_check_wasteful(ws
, neededSpace
);
1808 resizeWorkspace
= workspaceTooSmall
|| workspaceWasteful
;
1809 DEBUGLOG(4, "Need %zu B workspace", neededSpace
);
1810 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize
, blockSize
);
1812 if (resizeWorkspace
) {
1813 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1814 ZSTD_cwksp_sizeof(ws
) >> 10,
1817 RETURN_ERROR_IF(zc
->staticSize
, memory_allocation
, "static cctx : no resize");
1819 needsIndexReset
= ZSTDirp_reset
;
1821 ZSTD_cwksp_free(ws
, zc
->customMem
);
1822 FORWARD_IF_ERROR(ZSTD_cwksp_create(ws
, neededSpace
, zc
->customMem
), "");
1824 DEBUGLOG(5, "reserving object space");
1825 /* Statically sized space.
1826 * entropyWorkspace never moves,
1827 * though prev/next block swap places */
1828 assert(ZSTD_cwksp_check_available(ws
, 2 * sizeof(ZSTD_compressedBlockState_t
)));
1829 zc
->blockState
.prevCBlock
= (ZSTD_compressedBlockState_t
*) ZSTD_cwksp_reserve_object(ws
, sizeof(ZSTD_compressedBlockState_t
));
1830 RETURN_ERROR_IF(zc
->blockState
.prevCBlock
== NULL
, memory_allocation
, "couldn't allocate prevCBlock");
1831 zc
->blockState
.nextCBlock
= (ZSTD_compressedBlockState_t
*) ZSTD_cwksp_reserve_object(ws
, sizeof(ZSTD_compressedBlockState_t
));
1832 RETURN_ERROR_IF(zc
->blockState
.nextCBlock
== NULL
, memory_allocation
, "couldn't allocate nextCBlock");
1833 zc
->entropyWorkspace
= (U32
*) ZSTD_cwksp_reserve_object(ws
, ENTROPY_WORKSPACE_SIZE
);
1834 RETURN_ERROR_IF(zc
->entropyWorkspace
== NULL
, memory_allocation
, "couldn't allocate entropyWorkspace");
1837 ZSTD_cwksp_clear(ws
);
1840 zc
->blockState
.matchState
.cParams
= params
->cParams
;
1841 zc
->pledgedSrcSizePlusOne
= pledgedSrcSize
+1;
1842 zc
->consumedSrcSize
= 0;
1843 zc
->producedCSize
= 0;
1844 if (pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
)
1845 zc
->appliedParams
.fParams
.contentSizeFlag
= 0;
1846 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1847 (unsigned)pledgedSrcSize
, zc
->appliedParams
.fParams
.contentSizeFlag
);
1848 zc
->blockSize
= blockSize
;
1850 xxh64_reset(&zc
->xxhState
, 0);
1851 zc
->stage
= ZSTDcs_init
;
1853 zc
->dictContentSize
= 0;
1855 ZSTD_reset_compressedBlockState(zc
->blockState
.prevCBlock
);
1857 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1858 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1860 zc
->seqStore
.litStart
= ZSTD_cwksp_reserve_buffer(ws
, blockSize
+ WILDCOPY_OVERLENGTH
);
1861 zc
->seqStore
.maxNbLit
= blockSize
;
1864 zc
->bufferedPolicy
= zbuff
;
1865 zc
->inBuffSize
= buffInSize
;
1866 zc
->inBuff
= (char*)ZSTD_cwksp_reserve_buffer(ws
, buffInSize
);
1867 zc
->outBuffSize
= buffOutSize
;
1868 zc
->outBuff
= (char*)ZSTD_cwksp_reserve_buffer(ws
, buffOutSize
);
1870 /* ldm bucketOffsets table */
1871 if (params
->ldmParams
.enableLdm
== ZSTD_ps_enable
) {
1872 /* TODO: avoid memset? */
1873 size_t const numBuckets
=
1874 ((size_t)1) << (params
->ldmParams
.hashLog
-
1875 params
->ldmParams
.bucketSizeLog
);
1876 zc
->ldmState
.bucketOffsets
= ZSTD_cwksp_reserve_buffer(ws
, numBuckets
);
1877 ZSTD_memset(zc
->ldmState
.bucketOffsets
, 0, numBuckets
);
1880 /* sequences storage */
1881 ZSTD_referenceExternalSequences(zc
, NULL
, 0);
1882 zc
->seqStore
.maxNbSeq
= maxNbSeq
;
1883 zc
->seqStore
.llCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1884 zc
->seqStore
.mlCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1885 zc
->seqStore
.ofCode
= ZSTD_cwksp_reserve_buffer(ws
, maxNbSeq
* sizeof(BYTE
));
1886 zc
->seqStore
.sequencesStart
= (seqDef
*)ZSTD_cwksp_reserve_aligned(ws
, maxNbSeq
* sizeof(seqDef
));
1888 FORWARD_IF_ERROR(ZSTD_reset_matchState(
1889 &zc
->blockState
.matchState
,
1892 params
->useRowMatchFinder
,
1895 ZSTD_resetTarget_CCtx
), "");
1897 /* ldm hash table */
1898 if (params
->ldmParams
.enableLdm
== ZSTD_ps_enable
) {
1899 /* TODO: avoid memset? */
1900 size_t const ldmHSize
= ((size_t)1) << params
->ldmParams
.hashLog
;
1901 zc
->ldmState
.hashTable
= (ldmEntry_t
*)ZSTD_cwksp_reserve_aligned(ws
, ldmHSize
* sizeof(ldmEntry_t
));
1902 ZSTD_memset(zc
->ldmState
.hashTable
, 0, ldmHSize
* sizeof(ldmEntry_t
));
1903 zc
->ldmSequences
= (rawSeq
*)ZSTD_cwksp_reserve_aligned(ws
, maxNbLdmSeq
* sizeof(rawSeq
));
1904 zc
->maxNbLdmSequences
= maxNbLdmSeq
;
1906 ZSTD_window_init(&zc
->ldmState
.window
);
1907 zc
->ldmState
.loadedDictEnd
= 0;
1910 DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws
));
1911 assert(ZSTD_cwksp_estimated_space_within_bounds(ws
, neededSpace
, resizeWorkspace
));
1913 zc
->initialized
= 1;
1919 /* ZSTD_invalidateRepCodes() :
1920 * ensures next compression will not use repcodes from previous block.
1921 * Note : only works with regular variant;
1922 * do not use with extDict variant ! */
1923 void ZSTD_invalidateRepCodes(ZSTD_CCtx
* cctx
) {
1925 for (i
=0; i
<ZSTD_REP_NUM
; i
++) cctx
->blockState
.prevCBlock
->rep
[i
] = 0;
1926 assert(!ZSTD_window_hasExtDict(cctx
->blockState
.matchState
.window
));
1929 /* These are the approximate sizes for each strategy past which copying the
1930 * dictionary tables into the working context is faster than using them
1933 static const size_t attachDictSizeCutoffs
[ZSTD_STRATEGY_MAX
+1] = {
1935 8 KB
, /* ZSTD_fast */
1936 16 KB
, /* ZSTD_dfast */
1937 32 KB
, /* ZSTD_greedy */
1938 32 KB
, /* ZSTD_lazy */
1939 32 KB
, /* ZSTD_lazy2 */
1940 32 KB
, /* ZSTD_btlazy2 */
1941 32 KB
, /* ZSTD_btopt */
1942 8 KB
, /* ZSTD_btultra */
1943 8 KB
/* ZSTD_btultra2 */
1946 static int ZSTD_shouldAttachDict(const ZSTD_CDict
* cdict
,
1947 const ZSTD_CCtx_params
* params
,
1950 size_t cutoff
= attachDictSizeCutoffs
[cdict
->matchState
.cParams
.strategy
];
1951 int const dedicatedDictSearch
= cdict
->matchState
.dedicatedDictSearch
;
1952 return dedicatedDictSearch
1953 || ( ( pledgedSrcSize
<= cutoff
1954 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
1955 || params
->attachDictPref
== ZSTD_dictForceAttach
)
1956 && params
->attachDictPref
!= ZSTD_dictForceCopy
1957 && !params
->forceWindow
); /* dictMatchState isn't correctly
1958 * handled in _enforceMaxDist */
1962 ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx
* cctx
,
1963 const ZSTD_CDict
* cdict
,
1964 ZSTD_CCtx_params params
,
1966 ZSTD_buffered_policy_e zbuff
)
1968 DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
1969 (unsigned long long)pledgedSrcSize
);
1971 ZSTD_compressionParameters adjusted_cdict_cParams
= cdict
->matchState
.cParams
;
1972 unsigned const windowLog
= params
.cParams
.windowLog
;
1973 assert(windowLog
!= 0);
1974 /* Resize working context table params for input only, since the dict
1975 * has its own tables. */
1976 /* pledgedSrcSize == 0 means 0! */
1978 if (cdict
->matchState
.dedicatedDictSearch
) {
1979 ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams
);
1982 params
.cParams
= ZSTD_adjustCParams_internal(adjusted_cdict_cParams
, pledgedSrcSize
,
1983 cdict
->dictContentSize
, ZSTD_cpm_attachDict
);
1984 params
.cParams
.windowLog
= windowLog
;
1985 params
.useRowMatchFinder
= cdict
->useRowMatchFinder
; /* cdict overrides */
1986 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx
, ¶ms
, pledgedSrcSize
,
1987 /* loadedDictSize */ 0,
1988 ZSTDcrp_makeClean
, zbuff
), "");
1989 assert(cctx
->appliedParams
.cParams
.strategy
== adjusted_cdict_cParams
.strategy
);
1992 { const U32 cdictEnd
= (U32
)( cdict
->matchState
.window
.nextSrc
1993 - cdict
->matchState
.window
.base
);
1994 const U32 cdictLen
= cdictEnd
- cdict
->matchState
.window
.dictLimit
;
1995 if (cdictLen
== 0) {
1996 /* don't even attach dictionaries with no contents */
1997 DEBUGLOG(4, "skipping attaching empty dictionary");
1999 DEBUGLOG(4, "attaching dictionary into context");
2000 cctx
->blockState
.matchState
.dictMatchState
= &cdict
->matchState
;
2002 /* prep working match state so dict matches never have negative indices
2003 * when they are translated to the working context's index space. */
2004 if (cctx
->blockState
.matchState
.window
.dictLimit
< cdictEnd
) {
2005 cctx
->blockState
.matchState
.window
.nextSrc
=
2006 cctx
->blockState
.matchState
.window
.base
+ cdictEnd
;
2007 ZSTD_window_clear(&cctx
->blockState
.matchState
.window
);
2009 /* loadedDictEnd is expressed within the referential of the active context */
2010 cctx
->blockState
.matchState
.loadedDictEnd
= cctx
->blockState
.matchState
.window
.dictLimit
;
2013 cctx
->dictID
= cdict
->dictID
;
2014 cctx
->dictContentSize
= cdict
->dictContentSize
;
2016 /* copy block state */
2017 ZSTD_memcpy(cctx
->blockState
.prevCBlock
, &cdict
->cBlockState
, sizeof(cdict
->cBlockState
));
2022 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx
* cctx
,
2023 const ZSTD_CDict
* cdict
,
2024 ZSTD_CCtx_params params
,
2026 ZSTD_buffered_policy_e zbuff
)
2028 const ZSTD_compressionParameters
*cdict_cParams
= &cdict
->matchState
.cParams
;
2030 assert(!cdict
->matchState
.dedicatedDictSearch
);
2031 DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
2032 (unsigned long long)pledgedSrcSize
);
2034 { unsigned const windowLog
= params
.cParams
.windowLog
;
2035 assert(windowLog
!= 0);
2036 /* Copy only compression parameters related to tables. */
2037 params
.cParams
= *cdict_cParams
;
2038 params
.cParams
.windowLog
= windowLog
;
2039 params
.useRowMatchFinder
= cdict
->useRowMatchFinder
;
2040 FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx
, ¶ms
, pledgedSrcSize
,
2041 /* loadedDictSize */ 0,
2042 ZSTDcrp_leaveDirty
, zbuff
), "");
2043 assert(cctx
->appliedParams
.cParams
.strategy
== cdict_cParams
->strategy
);
2044 assert(cctx
->appliedParams
.cParams
.hashLog
== cdict_cParams
->hashLog
);
2045 assert(cctx
->appliedParams
.cParams
.chainLog
== cdict_cParams
->chainLog
);
2048 ZSTD_cwksp_mark_tables_dirty(&cctx
->workspace
);
2049 assert(params
.useRowMatchFinder
!= ZSTD_ps_auto
);
2052 { size_t const chainSize
= ZSTD_allocateChainTable(cdict_cParams
->strategy
, cdict
->useRowMatchFinder
, 0 /* DDS guaranteed disabled */)
2053 ? ((size_t)1 << cdict_cParams
->chainLog
)
2055 size_t const hSize
= (size_t)1 << cdict_cParams
->hashLog
;
2057 ZSTD_memcpy(cctx
->blockState
.matchState
.hashTable
,
2058 cdict
->matchState
.hashTable
,
2059 hSize
* sizeof(U32
));
2060 /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
2061 if (ZSTD_allocateChainTable(cctx
->appliedParams
.cParams
.strategy
, cctx
->appliedParams
.useRowMatchFinder
, 0 /* forDDSDict */)) {
2062 ZSTD_memcpy(cctx
->blockState
.matchState
.chainTable
,
2063 cdict
->matchState
.chainTable
,
2064 chainSize
* sizeof(U32
));
2066 /* copy tag table */
2067 if (ZSTD_rowMatchFinderUsed(cdict_cParams
->strategy
, cdict
->useRowMatchFinder
)) {
2068 size_t const tagTableSize
= hSize
*sizeof(U16
);
2069 ZSTD_memcpy(cctx
->blockState
.matchState
.tagTable
,
2070 cdict
->matchState
.tagTable
,
2075 /* Zero the hashTable3, since the cdict never fills it */
2076 { int const h3log
= cctx
->blockState
.matchState
.hashLog3
;
2077 size_t const h3Size
= h3log
? ((size_t)1 << h3log
) : 0;
2078 assert(cdict
->matchState
.hashLog3
== 0);
2079 ZSTD_memset(cctx
->blockState
.matchState
.hashTable3
, 0, h3Size
* sizeof(U32
));
2082 ZSTD_cwksp_mark_tables_clean(&cctx
->workspace
);
2084 /* copy dictionary offsets */
2085 { ZSTD_matchState_t
const* srcMatchState
= &cdict
->matchState
;
2086 ZSTD_matchState_t
* dstMatchState
= &cctx
->blockState
.matchState
;
2087 dstMatchState
->window
= srcMatchState
->window
;
2088 dstMatchState
->nextToUpdate
= srcMatchState
->nextToUpdate
;
2089 dstMatchState
->loadedDictEnd
= srcMatchState
->loadedDictEnd
;
2092 cctx
->dictID
= cdict
->dictID
;
2093 cctx
->dictContentSize
= cdict
->dictContentSize
;
2095 /* copy block state */
2096 ZSTD_memcpy(cctx
->blockState
.prevCBlock
, &cdict
->cBlockState
, sizeof(cdict
->cBlockState
));
2101 /* We have a choice between copying the dictionary context into the working
2102 * context, or referencing the dictionary context from the working context
2103 * in-place. We decide here which strategy to use. */
2104 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx
* cctx
,
2105 const ZSTD_CDict
* cdict
,
2106 const ZSTD_CCtx_params
* params
,
2108 ZSTD_buffered_policy_e zbuff
)
2111 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
2112 (unsigned)pledgedSrcSize
);
2114 if (ZSTD_shouldAttachDict(cdict
, params
, pledgedSrcSize
)) {
2115 return ZSTD_resetCCtx_byAttachingCDict(
2116 cctx
, cdict
, *params
, pledgedSrcSize
, zbuff
);
2118 return ZSTD_resetCCtx_byCopyingCDict(
2119 cctx
, cdict
, *params
, pledgedSrcSize
, zbuff
);
2123 /*! ZSTD_copyCCtx_internal() :
2124 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2125 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2126 * The "context", in this case, refers to the hash and chain tables,
2127 * entropy tables, and dictionary references.
2128 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
2129 * @return : 0, or an error code */
2130 static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx
* dstCCtx
,
2131 const ZSTD_CCtx
* srcCCtx
,
2132 ZSTD_frameParameters fParams
,
2134 ZSTD_buffered_policy_e zbuff
)
2136 RETURN_ERROR_IF(srcCCtx
->stage
!=ZSTDcs_init
, stage_wrong
,
2137 "Can't copy a ctx that's not in init stage.");
2138 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
2139 ZSTD_memcpy(&dstCCtx
->customMem
, &srcCCtx
->customMem
, sizeof(ZSTD_customMem
));
2140 { ZSTD_CCtx_params params
= dstCCtx
->requestedParams
;
2141 /* Copy only compression parameters related to tables. */
2142 params
.cParams
= srcCCtx
->appliedParams
.cParams
;
2143 assert(srcCCtx
->appliedParams
.useRowMatchFinder
!= ZSTD_ps_auto
);
2144 assert(srcCCtx
->appliedParams
.useBlockSplitter
!= ZSTD_ps_auto
);
2145 assert(srcCCtx
->appliedParams
.ldmParams
.enableLdm
!= ZSTD_ps_auto
);
2146 params
.useRowMatchFinder
= srcCCtx
->appliedParams
.useRowMatchFinder
;
2147 params
.useBlockSplitter
= srcCCtx
->appliedParams
.useBlockSplitter
;
2148 params
.ldmParams
= srcCCtx
->appliedParams
.ldmParams
;
2149 params
.fParams
= fParams
;
2150 ZSTD_resetCCtx_internal(dstCCtx
, ¶ms
, pledgedSrcSize
,
2151 /* loadedDictSize */ 0,
2152 ZSTDcrp_leaveDirty
, zbuff
);
2153 assert(dstCCtx
->appliedParams
.cParams
.windowLog
== srcCCtx
->appliedParams
.cParams
.windowLog
);
2154 assert(dstCCtx
->appliedParams
.cParams
.strategy
== srcCCtx
->appliedParams
.cParams
.strategy
);
2155 assert(dstCCtx
->appliedParams
.cParams
.hashLog
== srcCCtx
->appliedParams
.cParams
.hashLog
);
2156 assert(dstCCtx
->appliedParams
.cParams
.chainLog
== srcCCtx
->appliedParams
.cParams
.chainLog
);
2157 assert(dstCCtx
->blockState
.matchState
.hashLog3
== srcCCtx
->blockState
.matchState
.hashLog3
);
2160 ZSTD_cwksp_mark_tables_dirty(&dstCCtx
->workspace
);
2163 { size_t const chainSize
= ZSTD_allocateChainTable(srcCCtx
->appliedParams
.cParams
.strategy
,
2164 srcCCtx
->appliedParams
.useRowMatchFinder
,
2166 ? ((size_t)1 << srcCCtx
->appliedParams
.cParams
.chainLog
)
2168 size_t const hSize
= (size_t)1 << srcCCtx
->appliedParams
.cParams
.hashLog
;
2169 int const h3log
= srcCCtx
->blockState
.matchState
.hashLog3
;
2170 size_t const h3Size
= h3log
? ((size_t)1 << h3log
) : 0;
2172 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.hashTable
,
2173 srcCCtx
->blockState
.matchState
.hashTable
,
2174 hSize
* sizeof(U32
));
2175 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.chainTable
,
2176 srcCCtx
->blockState
.matchState
.chainTable
,
2177 chainSize
* sizeof(U32
));
2178 ZSTD_memcpy(dstCCtx
->blockState
.matchState
.hashTable3
,
2179 srcCCtx
->blockState
.matchState
.hashTable3
,
2180 h3Size
* sizeof(U32
));
2183 ZSTD_cwksp_mark_tables_clean(&dstCCtx
->workspace
);
2185 /* copy dictionary offsets */
2187 const ZSTD_matchState_t
* srcMatchState
= &srcCCtx
->blockState
.matchState
;
2188 ZSTD_matchState_t
* dstMatchState
= &dstCCtx
->blockState
.matchState
;
2189 dstMatchState
->window
= srcMatchState
->window
;
2190 dstMatchState
->nextToUpdate
= srcMatchState
->nextToUpdate
;
2191 dstMatchState
->loadedDictEnd
= srcMatchState
->loadedDictEnd
;
2193 dstCCtx
->dictID
= srcCCtx
->dictID
;
2194 dstCCtx
->dictContentSize
= srcCCtx
->dictContentSize
;
2196 /* copy block state */
2197 ZSTD_memcpy(dstCCtx
->blockState
.prevCBlock
, srcCCtx
->blockState
.prevCBlock
, sizeof(*srcCCtx
->blockState
.prevCBlock
));
2202 /*! ZSTD_copyCCtx() :
2203 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
2204 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
2205 * pledgedSrcSize==0 means "unknown".
2206 * @return : 0, or an error code */
2207 size_t ZSTD_copyCCtx(ZSTD_CCtx
* dstCCtx
, const ZSTD_CCtx
* srcCCtx
, unsigned long long pledgedSrcSize
)
2209 ZSTD_frameParameters fParams
= { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2210 ZSTD_buffered_policy_e
const zbuff
= srcCCtx
->bufferedPolicy
;
2211 ZSTD_STATIC_ASSERT((U32
)ZSTDb_buffered
==1);
2212 if (pledgedSrcSize
==0) pledgedSrcSize
= ZSTD_CONTENTSIZE_UNKNOWN
;
2213 fParams
.contentSizeFlag
= (pledgedSrcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
);
2215 return ZSTD_copyCCtx_internal(dstCCtx
, srcCCtx
,
2216 fParams
, pledgedSrcSize
,
2221 #define ZSTD_ROWSIZE 16
2222 /*! ZSTD_reduceTable() :
2223 * reduce table indexes by `reducerValue`, or squash to zero.
2224 * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
2225 * It must be set to a clear 0/1 value, to remove branch during inlining.
2226 * Presume table size is a multiple of ZSTD_ROWSIZE
2227 * to help auto-vectorization */
2228 FORCE_INLINE_TEMPLATE
void
2229 ZSTD_reduceTable_internal (U32
* const table
, U32
const size
, U32
const reducerValue
, int const preserveMark
)
2231 int const nbRows
= (int)size
/ ZSTD_ROWSIZE
;
2234 /* Protect special index values < ZSTD_WINDOW_START_INDEX. */
2235 U32
const reducerThreshold
= reducerValue
+ ZSTD_WINDOW_START_INDEX
;
2236 assert((size
& (ZSTD_ROWSIZE
-1)) == 0); /* multiple of ZSTD_ROWSIZE */
2237 assert(size
< (1U<<31)); /* can be casted to int */
2240 for (rowNb
=0 ; rowNb
< nbRows
; rowNb
++) {
2242 for (column
=0; column
<ZSTD_ROWSIZE
; column
++) {
2244 if (preserveMark
&& table
[cellNb
] == ZSTD_DUBT_UNSORTED_MARK
) {
2245 /* This write is pointless, but is required(?) for the compiler
2246 * to auto-vectorize the loop. */
2247 newVal
= ZSTD_DUBT_UNSORTED_MARK
;
2248 } else if (table
[cellNb
] < reducerThreshold
) {
2251 newVal
= table
[cellNb
] - reducerValue
;
2253 table
[cellNb
] = newVal
;
2258 static void ZSTD_reduceTable(U32
* const table
, U32
const size
, U32
const reducerValue
)
2260 ZSTD_reduceTable_internal(table
, size
, reducerValue
, 0);
2263 static void ZSTD_reduceTable_btlazy2(U32
* const table
, U32
const size
, U32
const reducerValue
)
2265 ZSTD_reduceTable_internal(table
, size
, reducerValue
, 1);
2268 /*! ZSTD_reduceIndex() :
2269 * rescale all indexes to avoid future overflow (indexes are U32) */
2270 static void ZSTD_reduceIndex (ZSTD_matchState_t
* ms
, ZSTD_CCtx_params
const* params
, const U32 reducerValue
)
2272 { U32
const hSize
= (U32
)1 << params
->cParams
.hashLog
;
2273 ZSTD_reduceTable(ms
->hashTable
, hSize
, reducerValue
);
2276 if (ZSTD_allocateChainTable(params
->cParams
.strategy
, params
->useRowMatchFinder
, (U32
)ms
->dedicatedDictSearch
)) {
2277 U32
const chainSize
= (U32
)1 << params
->cParams
.chainLog
;
2278 if (params
->cParams
.strategy
== ZSTD_btlazy2
)
2279 ZSTD_reduceTable_btlazy2(ms
->chainTable
, chainSize
, reducerValue
);
2281 ZSTD_reduceTable(ms
->chainTable
, chainSize
, reducerValue
);
2285 U32
const h3Size
= (U32
)1 << ms
->hashLog3
;
2286 ZSTD_reduceTable(ms
->hashTable3
, h3Size
, reducerValue
);
2291 /*-*******************************************************
2292 * Block entropic compression
2293 *********************************************************/
2295 /* See doc/zstd_compression_format.md for detailed format description */
2297 void ZSTD_seqToCodes(const seqStore_t
* seqStorePtr
)
2299 const seqDef
* const sequences
= seqStorePtr
->sequencesStart
;
2300 BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2301 BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2302 BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2303 U32
const nbSeq
= (U32
)(seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
);
2305 assert(nbSeq
<= seqStorePtr
->maxNbSeq
);
2306 for (u
=0; u
<nbSeq
; u
++) {
2307 U32
const llv
= sequences
[u
].litLength
;
2308 U32
const mlv
= sequences
[u
].mlBase
;
2309 llCodeTable
[u
] = (BYTE
)ZSTD_LLcode(llv
);
2310 ofCodeTable
[u
] = (BYTE
)ZSTD_highbit32(sequences
[u
].offBase
);
2311 mlCodeTable
[u
] = (BYTE
)ZSTD_MLcode(mlv
);
2313 if (seqStorePtr
->longLengthType
==ZSTD_llt_literalLength
)
2314 llCodeTable
[seqStorePtr
->longLengthPos
] = MaxLL
;
2315 if (seqStorePtr
->longLengthType
==ZSTD_llt_matchLength
)
2316 mlCodeTable
[seqStorePtr
->longLengthPos
] = MaxML
;
2319 /* ZSTD_useTargetCBlockSize():
2320 * Returns if target compressed block size param is being used.
2321 * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
2322 * Returns 1 if true, 0 otherwise. */
2323 static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params
* cctxParams
)
2325 DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams
->targetCBlockSize
);
2326 return (cctxParams
->targetCBlockSize
!= 0);
2329 /* ZSTD_blockSplitterEnabled():
2330 * Returns if block splitting param is being used
2331 * If used, compression will do best effort to split a block in order to improve compression ratio.
2332 * At the time this function is called, the parameter must be finalized.
2333 * Returns 1 if true, 0 otherwise. */
2334 static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params
* cctxParams
)
2336 DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams
->useBlockSplitter
);
2337 assert(cctxParams
->useBlockSplitter
!= ZSTD_ps_auto
);
2338 return (cctxParams
->useBlockSplitter
== ZSTD_ps_enable
);
2341 /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
2342 * and size of the sequences statistics
2349 size_t lastCountSize
; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
2350 } ZSTD_symbolEncodingTypeStats_t
;
2352 /* ZSTD_buildSequencesStatistics():
2353 * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
2354 * Modifies `nextEntropy` to have the appropriate values as a side effect.
2355 * nbSeq must be greater than 0.
2357 * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
2359 static ZSTD_symbolEncodingTypeStats_t
2360 ZSTD_buildSequencesStatistics(seqStore_t
* seqStorePtr
, size_t nbSeq
,
2361 const ZSTD_fseCTables_t
* prevEntropy
, ZSTD_fseCTables_t
* nextEntropy
,
2362 BYTE
* dst
, const BYTE
* const dstEnd
,
2363 ZSTD_strategy strategy
, unsigned* countWorkspace
,
2364 void* entropyWorkspace
, size_t entropyWkspSize
) {
2365 BYTE
* const ostart
= dst
;
2366 const BYTE
* const oend
= dstEnd
;
2368 FSE_CTable
* CTable_LitLength
= nextEntropy
->litlengthCTable
;
2369 FSE_CTable
* CTable_OffsetBits
= nextEntropy
->offcodeCTable
;
2370 FSE_CTable
* CTable_MatchLength
= nextEntropy
->matchlengthCTable
;
2371 const BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2372 const BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2373 const BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2374 ZSTD_symbolEncodingTypeStats_t stats
;
2376 stats
.lastCountSize
= 0;
2377 /* convert length/distances into codes */
2378 ZSTD_seqToCodes(seqStorePtr
);
2380 assert(nbSeq
!= 0); /* ZSTD_selectEncodingType() divides by nbSeq */
2381 /* build CTable for Literal Lengths */
2382 { unsigned max
= MaxLL
;
2383 size_t const mostFrequent
= HIST_countFast_wksp(countWorkspace
, &max
, llCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2384 DEBUGLOG(5, "Building LL table");
2385 nextEntropy
->litlength_repeatMode
= prevEntropy
->litlength_repeatMode
;
2386 stats
.LLtype
= ZSTD_selectEncodingType(&nextEntropy
->litlength_repeatMode
,
2387 countWorkspace
, max
, mostFrequent
, nbSeq
,
2388 LLFSELog
, prevEntropy
->litlengthCTable
,
2389 LL_defaultNorm
, LL_defaultNormLog
,
2390 ZSTD_defaultAllowed
, strategy
);
2391 assert(set_basic
< set_compressed
&& set_rle
< set_compressed
);
2392 assert(!(stats
.LLtype
< set_compressed
&& nextEntropy
->litlength_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2393 { size_t const countSize
= ZSTD_buildCTable(
2394 op
, (size_t)(oend
- op
),
2395 CTable_LitLength
, LLFSELog
, (symbolEncodingType_e
)stats
.LLtype
,
2396 countWorkspace
, max
, llCodeTable
, nbSeq
,
2397 LL_defaultNorm
, LL_defaultNormLog
, MaxLL
,
2398 prevEntropy
->litlengthCTable
,
2399 sizeof(prevEntropy
->litlengthCTable
),
2400 entropyWorkspace
, entropyWkspSize
);
2401 if (ZSTD_isError(countSize
)) {
2402 DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
2403 stats
.size
= countSize
;
2406 if (stats
.LLtype
== set_compressed
)
2407 stats
.lastCountSize
= countSize
;
2411 /* build CTable for Offsets */
2412 { unsigned max
= MaxOff
;
2413 size_t const mostFrequent
= HIST_countFast_wksp(
2414 countWorkspace
, &max
, ofCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2415 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2416 ZSTD_defaultPolicy_e
const defaultPolicy
= (max
<= DefaultMaxOff
) ? ZSTD_defaultAllowed
: ZSTD_defaultDisallowed
;
2417 DEBUGLOG(5, "Building OF table");
2418 nextEntropy
->offcode_repeatMode
= prevEntropy
->offcode_repeatMode
;
2419 stats
.Offtype
= ZSTD_selectEncodingType(&nextEntropy
->offcode_repeatMode
,
2420 countWorkspace
, max
, mostFrequent
, nbSeq
,
2421 OffFSELog
, prevEntropy
->offcodeCTable
,
2422 OF_defaultNorm
, OF_defaultNormLog
,
2423 defaultPolicy
, strategy
);
2424 assert(!(stats
.Offtype
< set_compressed
&& nextEntropy
->offcode_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2425 { size_t const countSize
= ZSTD_buildCTable(
2426 op
, (size_t)(oend
- op
),
2427 CTable_OffsetBits
, OffFSELog
, (symbolEncodingType_e
)stats
.Offtype
,
2428 countWorkspace
, max
, ofCodeTable
, nbSeq
,
2429 OF_defaultNorm
, OF_defaultNormLog
, DefaultMaxOff
,
2430 prevEntropy
->offcodeCTable
,
2431 sizeof(prevEntropy
->offcodeCTable
),
2432 entropyWorkspace
, entropyWkspSize
);
2433 if (ZSTD_isError(countSize
)) {
2434 DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
2435 stats
.size
= countSize
;
2438 if (stats
.Offtype
== set_compressed
)
2439 stats
.lastCountSize
= countSize
;
2443 /* build CTable for MatchLengths */
2444 { unsigned max
= MaxML
;
2445 size_t const mostFrequent
= HIST_countFast_wksp(
2446 countWorkspace
, &max
, mlCodeTable
, nbSeq
, entropyWorkspace
, entropyWkspSize
); /* can't fail */
2447 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend
-op
));
2448 nextEntropy
->matchlength_repeatMode
= prevEntropy
->matchlength_repeatMode
;
2449 stats
.MLtype
= ZSTD_selectEncodingType(&nextEntropy
->matchlength_repeatMode
,
2450 countWorkspace
, max
, mostFrequent
, nbSeq
,
2451 MLFSELog
, prevEntropy
->matchlengthCTable
,
2452 ML_defaultNorm
, ML_defaultNormLog
,
2453 ZSTD_defaultAllowed
, strategy
);
2454 assert(!(stats
.MLtype
< set_compressed
&& nextEntropy
->matchlength_repeatMode
!= FSE_repeat_none
)); /* We don't copy tables */
2455 { size_t const countSize
= ZSTD_buildCTable(
2456 op
, (size_t)(oend
- op
),
2457 CTable_MatchLength
, MLFSELog
, (symbolEncodingType_e
)stats
.MLtype
,
2458 countWorkspace
, max
, mlCodeTable
, nbSeq
,
2459 ML_defaultNorm
, ML_defaultNormLog
, MaxML
,
2460 prevEntropy
->matchlengthCTable
,
2461 sizeof(prevEntropy
->matchlengthCTable
),
2462 entropyWorkspace
, entropyWkspSize
);
2463 if (ZSTD_isError(countSize
)) {
2464 DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
2465 stats
.size
= countSize
;
2468 if (stats
.MLtype
== set_compressed
)
2469 stats
.lastCountSize
= countSize
;
2473 stats
.size
= (size_t)(op
-ostart
);
2477 /* ZSTD_entropyCompressSeqStore_internal():
2478 * compresses both literals and sequences
2479 * Returns compressed size of block, or a zstd error.
2481 #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
2483 ZSTD_entropyCompressSeqStore_internal(seqStore_t
* seqStorePtr
,
2484 const ZSTD_entropyCTables_t
* prevEntropy
,
2485 ZSTD_entropyCTables_t
* nextEntropy
,
2486 const ZSTD_CCtx_params
* cctxParams
,
2487 void* dst
, size_t dstCapacity
,
2488 void* entropyWorkspace
, size_t entropyWkspSize
,
2491 const int longOffsets
= cctxParams
->cParams
.windowLog
> STREAM_ACCUMULATOR_MIN
;
2492 ZSTD_strategy
const strategy
= cctxParams
->cParams
.strategy
;
2493 unsigned* count
= (unsigned*)entropyWorkspace
;
2494 FSE_CTable
* CTable_LitLength
= nextEntropy
->fse
.litlengthCTable
;
2495 FSE_CTable
* CTable_OffsetBits
= nextEntropy
->fse
.offcodeCTable
;
2496 FSE_CTable
* CTable_MatchLength
= nextEntropy
->fse
.matchlengthCTable
;
2497 const seqDef
* const sequences
= seqStorePtr
->sequencesStart
;
2498 const size_t nbSeq
= seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
;
2499 const BYTE
* const ofCodeTable
= seqStorePtr
->ofCode
;
2500 const BYTE
* const llCodeTable
= seqStorePtr
->llCode
;
2501 const BYTE
* const mlCodeTable
= seqStorePtr
->mlCode
;
2502 BYTE
* const ostart
= (BYTE
*)dst
;
2503 BYTE
* const oend
= ostart
+ dstCapacity
;
2505 size_t lastCountSize
;
2507 entropyWorkspace
= count
+ (MaxSeq
+ 1);
2508 entropyWkspSize
-= (MaxSeq
+ 1) * sizeof(*count
);
2510 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq
);
2511 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE
>= (1<<MAX(MLFSELog
,LLFSELog
)));
2512 assert(entropyWkspSize
>= HUF_WORKSPACE_SIZE
);
2514 /* Compress literals */
2515 { const BYTE
* const literals
= seqStorePtr
->litStart
;
2516 size_t const numSequences
= seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
;
2517 size_t const numLiterals
= seqStorePtr
->lit
- seqStorePtr
->litStart
;
2518 /* Base suspicion of uncompressibility on ratio of literals to sequences */
2519 unsigned const suspectUncompressible
= (numSequences
== 0) || (numLiterals
/ numSequences
>= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO
);
2520 size_t const litSize
= (size_t)(seqStorePtr
->lit
- literals
);
2521 size_t const cSize
= ZSTD_compressLiterals(
2522 &prevEntropy
->huf
, &nextEntropy
->huf
,
2523 cctxParams
->cParams
.strategy
,
2524 ZSTD_literalsCompressionIsDisabled(cctxParams
),
2527 entropyWorkspace
, entropyWkspSize
,
2528 bmi2
, suspectUncompressible
);
2529 FORWARD_IF_ERROR(cSize
, "ZSTD_compressLiterals failed");
2530 assert(cSize
<= dstCapacity
);
2534 /* Sequences Header */
2535 RETURN_ERROR_IF((oend
-op
) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
2536 dstSize_tooSmall
, "Can't fit seq hdr in output buf!");
2538 *op
++ = (BYTE
)nbSeq
;
2539 } else if (nbSeq
< LONGNBSEQ
) {
2540 op
[0] = (BYTE
)((nbSeq
>>8) + 0x80);
2541 op
[1] = (BYTE
)nbSeq
;
2545 MEM_writeLE16(op
+1, (U16
)(nbSeq
- LONGNBSEQ
));
2550 /* Copy the old tables over as if we repeated them */
2551 ZSTD_memcpy(&nextEntropy
->fse
, &prevEntropy
->fse
, sizeof(prevEntropy
->fse
));
2552 return (size_t)(op
- ostart
);
2555 ZSTD_symbolEncodingTypeStats_t stats
;
2556 BYTE
* seqHead
= op
++;
2557 /* build stats for sequences */
2558 stats
= ZSTD_buildSequencesStatistics(seqStorePtr
, nbSeq
,
2559 &prevEntropy
->fse
, &nextEntropy
->fse
,
2562 entropyWorkspace
, entropyWkspSize
);
2563 FORWARD_IF_ERROR(stats
.size
, "ZSTD_buildSequencesStatistics failed!");
2564 *seqHead
= (BYTE
)((stats
.LLtype
<<6) + (stats
.Offtype
<<4) + (stats
.MLtype
<<2));
2565 lastCountSize
= stats
.lastCountSize
;
2569 { size_t const bitstreamSize
= ZSTD_encodeSequences(
2570 op
, (size_t)(oend
- op
),
2571 CTable_MatchLength
, mlCodeTable
,
2572 CTable_OffsetBits
, ofCodeTable
,
2573 CTable_LitLength
, llCodeTable
,
2576 FORWARD_IF_ERROR(bitstreamSize
, "ZSTD_encodeSequences failed");
2577 op
+= bitstreamSize
;
2579 /* zstd versions <= 1.3.4 mistakenly report corruption when
2580 * FSE_readNCount() receives a buffer < 4 bytes.
2581 * Fixed by https://github.com/facebook/zstd/pull/1146.
2582 * This can happen when the last set_compressed table present is 2
2583 * bytes and the bitstream is only one byte.
2584 * In this exceedingly rare case, we will simply emit an uncompressed
2585 * block, since it isn't worth optimizing.
2587 if (lastCountSize
&& (lastCountSize
+ bitstreamSize
) < 4) {
2588 /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2589 assert(lastCountSize
+ bitstreamSize
== 3);
2590 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2591 "emitting an uncompressed block.");
2596 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op
- ostart
));
2597 return (size_t)(op
- ostart
);
2601 ZSTD_entropyCompressSeqStore(seqStore_t
* seqStorePtr
,
2602 const ZSTD_entropyCTables_t
* prevEntropy
,
2603 ZSTD_entropyCTables_t
* nextEntropy
,
2604 const ZSTD_CCtx_params
* cctxParams
,
2605 void* dst
, size_t dstCapacity
,
2607 void* entropyWorkspace
, size_t entropyWkspSize
,
2610 size_t const cSize
= ZSTD_entropyCompressSeqStore_internal(
2611 seqStorePtr
, prevEntropy
, nextEntropy
, cctxParams
,
2613 entropyWorkspace
, entropyWkspSize
, bmi2
);
2614 if (cSize
== 0) return 0;
2615 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2616 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2618 if ((cSize
== ERROR(dstSize_tooSmall
)) & (srcSize
<= dstCapacity
))
2619 return 0; /* block not compressed */
2620 FORWARD_IF_ERROR(cSize
, "ZSTD_entropyCompressSeqStore_internal failed");
2622 /* Check compressibility */
2623 { size_t const maxCSize
= srcSize
- ZSTD_minGain(srcSize
, cctxParams
->cParams
.strategy
);
2624 if (cSize
>= maxCSize
) return 0; /* block not compressed */
2626 DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize
);
2630 /* ZSTD_selectBlockCompressor() :
2631 * Not static, but internal use only (used by long distance matcher)
2632 * assumption : strat is a valid strategy */
2633 ZSTD_blockCompressor
ZSTD_selectBlockCompressor(ZSTD_strategy strat
, ZSTD_paramSwitch_e useRowMatchFinder
, ZSTD_dictMode_e dictMode
)
2635 static const ZSTD_blockCompressor blockCompressor
[4][ZSTD_STRATEGY_MAX
+1] = {
2636 { ZSTD_compressBlock_fast
/* default for 0 */,
2637 ZSTD_compressBlock_fast
,
2638 ZSTD_compressBlock_doubleFast
,
2639 ZSTD_compressBlock_greedy
,
2640 ZSTD_compressBlock_lazy
,
2641 ZSTD_compressBlock_lazy2
,
2642 ZSTD_compressBlock_btlazy2
,
2643 ZSTD_compressBlock_btopt
,
2644 ZSTD_compressBlock_btultra
,
2645 ZSTD_compressBlock_btultra2
},
2646 { ZSTD_compressBlock_fast_extDict
/* default for 0 */,
2647 ZSTD_compressBlock_fast_extDict
,
2648 ZSTD_compressBlock_doubleFast_extDict
,
2649 ZSTD_compressBlock_greedy_extDict
,
2650 ZSTD_compressBlock_lazy_extDict
,
2651 ZSTD_compressBlock_lazy2_extDict
,
2652 ZSTD_compressBlock_btlazy2_extDict
,
2653 ZSTD_compressBlock_btopt_extDict
,
2654 ZSTD_compressBlock_btultra_extDict
,
2655 ZSTD_compressBlock_btultra_extDict
},
2656 { ZSTD_compressBlock_fast_dictMatchState
/* default for 0 */,
2657 ZSTD_compressBlock_fast_dictMatchState
,
2658 ZSTD_compressBlock_doubleFast_dictMatchState
,
2659 ZSTD_compressBlock_greedy_dictMatchState
,
2660 ZSTD_compressBlock_lazy_dictMatchState
,
2661 ZSTD_compressBlock_lazy2_dictMatchState
,
2662 ZSTD_compressBlock_btlazy2_dictMatchState
,
2663 ZSTD_compressBlock_btopt_dictMatchState
,
2664 ZSTD_compressBlock_btultra_dictMatchState
,
2665 ZSTD_compressBlock_btultra_dictMatchState
},
2666 { NULL
/* default for 0 */,
2669 ZSTD_compressBlock_greedy_dedicatedDictSearch
,
2670 ZSTD_compressBlock_lazy_dedicatedDictSearch
,
2671 ZSTD_compressBlock_lazy2_dedicatedDictSearch
,
2677 ZSTD_blockCompressor selectedCompressor
;
2678 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast
== 1);
2680 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy
, strat
));
2681 DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode
, (int)strat
, (int)useRowMatchFinder
);
2682 if (ZSTD_rowMatchFinderUsed(strat
, useRowMatchFinder
)) {
2683 static const ZSTD_blockCompressor rowBasedBlockCompressors
[4][3] = {
2684 { ZSTD_compressBlock_greedy_row
,
2685 ZSTD_compressBlock_lazy_row
,
2686 ZSTD_compressBlock_lazy2_row
},
2687 { ZSTD_compressBlock_greedy_extDict_row
,
2688 ZSTD_compressBlock_lazy_extDict_row
,
2689 ZSTD_compressBlock_lazy2_extDict_row
},
2690 { ZSTD_compressBlock_greedy_dictMatchState_row
,
2691 ZSTD_compressBlock_lazy_dictMatchState_row
,
2692 ZSTD_compressBlock_lazy2_dictMatchState_row
},
2693 { ZSTD_compressBlock_greedy_dedicatedDictSearch_row
,
2694 ZSTD_compressBlock_lazy_dedicatedDictSearch_row
,
2695 ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
}
2697 DEBUGLOG(4, "Selecting a row-based matchfinder");
2698 assert(useRowMatchFinder
!= ZSTD_ps_auto
);
2699 selectedCompressor
= rowBasedBlockCompressors
[(int)dictMode
][(int)strat
- (int)ZSTD_greedy
];
2701 selectedCompressor
= blockCompressor
[(int)dictMode
][(int)strat
];
2703 assert(selectedCompressor
!= NULL
);
2704 return selectedCompressor
;
2707 static void ZSTD_storeLastLiterals(seqStore_t
* seqStorePtr
,
2708 const BYTE
* anchor
, size_t lastLLSize
)
2710 ZSTD_memcpy(seqStorePtr
->lit
, anchor
, lastLLSize
);
2711 seqStorePtr
->lit
+= lastLLSize
;
2714 void ZSTD_resetSeqStore(seqStore_t
* ssPtr
)
2716 ssPtr
->lit
= ssPtr
->litStart
;
2717 ssPtr
->sequences
= ssPtr
->sequencesStart
;
2718 ssPtr
->longLengthType
= ZSTD_llt_none
;
2721 typedef enum { ZSTDbss_compress
, ZSTDbss_noCompress
} ZSTD_buildSeqStore_e
;
2723 static size_t ZSTD_buildSeqStore(ZSTD_CCtx
* zc
, const void* src
, size_t srcSize
)
2725 ZSTD_matchState_t
* const ms
= &zc
->blockState
.matchState
;
2726 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize
);
2727 assert(srcSize
<= ZSTD_BLOCKSIZE_MAX
);
2728 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2729 ZSTD_assertEqualCParams(zc
->appliedParams
.cParams
, ms
->cParams
);
2730 if (srcSize
< MIN_CBLOCK_SIZE
+ZSTD_blockHeaderSize
+1) {
2731 if (zc
->appliedParams
.cParams
.strategy
>= ZSTD_btopt
) {
2732 ZSTD_ldm_skipRawSeqStoreBytes(&zc
->externSeqStore
, srcSize
);
2734 ZSTD_ldm_skipSequences(&zc
->externSeqStore
, srcSize
, zc
->appliedParams
.cParams
.minMatch
);
2736 return ZSTDbss_noCompress
; /* don't even attempt compression below a certain srcSize */
2738 ZSTD_resetSeqStore(&(zc
->seqStore
));
2739 /* required for optimal parser to read stats from dictionary */
2740 ms
->opt
.symbolCosts
= &zc
->blockState
.prevCBlock
->entropy
;
2741 /* tell the optimal parser how we expect to compress literals */
2742 ms
->opt
.literalCompressionMode
= zc
->appliedParams
.literalCompressionMode
;
2743 /* a gap between an attached dict and the current window is not safe,
2744 * they must remain adjacent,
2745 * and when that stops being the case, the dict must be unset */
2746 assert(ms
->dictMatchState
== NULL
|| ms
->loadedDictEnd
== ms
->window
.dictLimit
);
2748 /* limited update after a very long match */
2749 { const BYTE
* const base
= ms
->window
.base
;
2750 const BYTE
* const istart
= (const BYTE
*)src
;
2751 const U32 curr
= (U32
)(istart
-base
);
2752 if (sizeof(ptrdiff_t)==8) assert(istart
- base
< (ptrdiff_t)(U32
)(-1)); /* ensure no overflow */
2753 if (curr
> ms
->nextToUpdate
+ 384)
2754 ms
->nextToUpdate
= curr
- MIN(192, (U32
)(curr
- ms
->nextToUpdate
- 384));
2757 /* select and store sequences */
2758 { ZSTD_dictMode_e
const dictMode
= ZSTD_matchState_dictMode(ms
);
2761 for (i
= 0; i
< ZSTD_REP_NUM
; ++i
)
2762 zc
->blockState
.nextCBlock
->rep
[i
] = zc
->blockState
.prevCBlock
->rep
[i
];
2764 if (zc
->externSeqStore
.pos
< zc
->externSeqStore
.size
) {
2765 assert(zc
->appliedParams
.ldmParams
.enableLdm
== ZSTD_ps_disable
);
2766 /* Updates ldmSeqStore.pos */
2768 ZSTD_ldm_blockCompress(&zc
->externSeqStore
,
2770 zc
->blockState
.nextCBlock
->rep
,
2771 zc
->appliedParams
.useRowMatchFinder
,
2773 assert(zc
->externSeqStore
.pos
<= zc
->externSeqStore
.size
);
2774 } else if (zc
->appliedParams
.ldmParams
.enableLdm
== ZSTD_ps_enable
) {
2775 rawSeqStore_t ldmSeqStore
= kNullRawSeqStore
;
2777 ldmSeqStore
.seq
= zc
->ldmSequences
;
2778 ldmSeqStore
.capacity
= zc
->maxNbLdmSequences
;
2779 /* Updates ldmSeqStore.size */
2780 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc
->ldmState
, &ldmSeqStore
,
2781 &zc
->appliedParams
.ldmParams
,
2783 /* Updates ldmSeqStore.pos */
2785 ZSTD_ldm_blockCompress(&ldmSeqStore
,
2787 zc
->blockState
.nextCBlock
->rep
,
2788 zc
->appliedParams
.useRowMatchFinder
,
2790 assert(ldmSeqStore
.pos
== ldmSeqStore
.size
);
2791 } else { /* not long range mode */
2792 ZSTD_blockCompressor
const blockCompressor
= ZSTD_selectBlockCompressor(zc
->appliedParams
.cParams
.strategy
,
2793 zc
->appliedParams
.useRowMatchFinder
,
2795 ms
->ldmSeqStore
= NULL
;
2796 lastLLSize
= blockCompressor(ms
, &zc
->seqStore
, zc
->blockState
.nextCBlock
->rep
, src
, srcSize
);
2798 { const BYTE
* const lastLiterals
= (const BYTE
*)src
+ srcSize
- lastLLSize
;
2799 ZSTD_storeLastLiterals(&zc
->seqStore
, lastLiterals
, lastLLSize
);
2801 return ZSTDbss_compress
;
2804 static void ZSTD_copyBlockSequences(ZSTD_CCtx
* zc
)
2806 const seqStore_t
* seqStore
= ZSTD_getSeqStore(zc
);
2807 const seqDef
* seqStoreSeqs
= seqStore
->sequencesStart
;
2808 size_t seqStoreSeqSize
= seqStore
->sequences
- seqStoreSeqs
;
2809 size_t seqStoreLiteralsSize
= (size_t)(seqStore
->lit
- seqStore
->litStart
);
2810 size_t literalsRead
= 0;
2813 ZSTD_Sequence
* outSeqs
= &zc
->seqCollector
.seqStart
[zc
->seqCollector
.seqIndex
];
2815 repcodes_t updatedRepcodes
;
2817 assert(zc
->seqCollector
.seqIndex
+ 1 < zc
->seqCollector
.maxSequences
);
2818 /* Ensure we have enough space for last literals "sequence" */
2819 assert(zc
->seqCollector
.maxSequences
>= seqStoreSeqSize
+ 1);
2820 ZSTD_memcpy(updatedRepcodes
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
2821 for (i
= 0; i
< seqStoreSeqSize
; ++i
) {
2822 U32 rawOffset
= seqStoreSeqs
[i
].offBase
- ZSTD_REP_NUM
;
2823 outSeqs
[i
].litLength
= seqStoreSeqs
[i
].litLength
;
2824 outSeqs
[i
].matchLength
= seqStoreSeqs
[i
].mlBase
+ MINMATCH
;
2827 if (i
== seqStore
->longLengthPos
) {
2828 if (seqStore
->longLengthType
== ZSTD_llt_literalLength
) {
2829 outSeqs
[i
].litLength
+= 0x10000;
2830 } else if (seqStore
->longLengthType
== ZSTD_llt_matchLength
) {
2831 outSeqs
[i
].matchLength
+= 0x10000;
2835 if (seqStoreSeqs
[i
].offBase
<= ZSTD_REP_NUM
) {
2836 /* Derive the correct offset corresponding to a repcode */
2837 outSeqs
[i
].rep
= seqStoreSeqs
[i
].offBase
;
2838 if (outSeqs
[i
].litLength
!= 0) {
2839 rawOffset
= updatedRepcodes
.rep
[outSeqs
[i
].rep
- 1];
2841 if (outSeqs
[i
].rep
== 3) {
2842 rawOffset
= updatedRepcodes
.rep
[0] - 1;
2844 rawOffset
= updatedRepcodes
.rep
[outSeqs
[i
].rep
];
2848 outSeqs
[i
].offset
= rawOffset
;
2849 /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
2850 so we provide seqStoreSeqs[i].offset - 1 */
2851 ZSTD_updateRep(updatedRepcodes
.rep
,
2852 seqStoreSeqs
[i
].offBase
- 1,
2853 seqStoreSeqs
[i
].litLength
== 0);
2854 literalsRead
+= outSeqs
[i
].litLength
;
2856 /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
2857 * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
2858 * for the block boundary, according to the API.
2860 assert(seqStoreLiteralsSize
>= literalsRead
);
2861 lastLLSize
= seqStoreLiteralsSize
- literalsRead
;
2862 outSeqs
[i
].litLength
= (U32
)lastLLSize
;
2863 outSeqs
[i
].matchLength
= outSeqs
[i
].offset
= outSeqs
[i
].rep
= 0;
2865 zc
->seqCollector
.seqIndex
+= seqStoreSeqSize
;
2868 size_t ZSTD_generateSequences(ZSTD_CCtx
* zc
, ZSTD_Sequence
* outSeqs
,
2869 size_t outSeqsSize
, const void* src
, size_t srcSize
)
2871 const size_t dstCapacity
= ZSTD_compressBound(srcSize
);
2872 void* dst
= ZSTD_customMalloc(dstCapacity
, ZSTD_defaultCMem
);
2873 SeqCollector seqCollector
;
2875 RETURN_ERROR_IF(dst
== NULL
, memory_allocation
, "NULL pointer!");
2877 seqCollector
.collectSequences
= 1;
2878 seqCollector
.seqStart
= outSeqs
;
2879 seqCollector
.seqIndex
= 0;
2880 seqCollector
.maxSequences
= outSeqsSize
;
2881 zc
->seqCollector
= seqCollector
;
2883 ZSTD_compress2(zc
, dst
, dstCapacity
, src
, srcSize
);
2884 ZSTD_customFree(dst
, ZSTD_defaultCMem
);
2885 return zc
->seqCollector
.seqIndex
;
2888 size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence
* sequences
, size_t seqsSize
) {
2891 for (; in
< seqsSize
; ++in
) {
2892 if (sequences
[in
].offset
== 0 && sequences
[in
].matchLength
== 0) {
2893 if (in
!= seqsSize
- 1) {
2894 sequences
[in
+1].litLength
+= sequences
[in
].litLength
;
2897 sequences
[out
] = sequences
[in
];
2904 /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
2905 static int ZSTD_isRLE(const BYTE
* src
, size_t length
) {
2906 const BYTE
* ip
= src
;
2907 const BYTE value
= ip
[0];
2908 const size_t valueST
= (size_t)((U64
)value
* 0x0101010101010101ULL
);
2909 const size_t unrollSize
= sizeof(size_t) * 4;
2910 const size_t unrollMask
= unrollSize
- 1;
2911 const size_t prefixLength
= length
& unrollMask
;
2914 if (length
== 1) return 1;
2915 /* Check if prefix is RLE first before using unrolled loop */
2916 if (prefixLength
&& ZSTD_count(ip
+1, ip
, ip
+prefixLength
) != prefixLength
-1) {
2919 for (i
= prefixLength
; i
!= length
; i
+= unrollSize
) {
2920 for (u
= 0; u
< unrollSize
; u
+= sizeof(size_t)) {
2921 if (MEM_readST(ip
+ i
+ u
) != valueST
) {
2929 /* Returns true if the given block may be RLE.
2930 * This is just a heuristic based on the compressibility.
2931 * It may return both false positives and false negatives.
2933 static int ZSTD_maybeRLE(seqStore_t
const* seqStore
)
2935 size_t const nbSeqs
= (size_t)(seqStore
->sequences
- seqStore
->sequencesStart
);
2936 size_t const nbLits
= (size_t)(seqStore
->lit
- seqStore
->litStart
);
2938 return nbSeqs
< 4 && nbLits
< 10;
2941 static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t
* const bs
)
2943 ZSTD_compressedBlockState_t
* const tmp
= bs
->prevCBlock
;
2944 bs
->prevCBlock
= bs
->nextCBlock
;
2945 bs
->nextCBlock
= tmp
;
2948 /* Writes the block header */
2949 static void writeBlockHeader(void* op
, size_t cSize
, size_t blockSize
, U32 lastBlock
) {
2950 U32
const cBlockHeader
= cSize
== 1 ?
2951 lastBlock
+ (((U32
)bt_rle
)<<1) + (U32
)(blockSize
<< 3) :
2952 lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(cSize
<< 3);
2953 MEM_writeLE24(op
, cBlockHeader
);
2954 DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize
, blockSize
, lastBlock
);
2957 /* ZSTD_buildBlockEntropyStats_literals() :
2958 * Builds entropy for the literals.
2959 * Stores literals block type (raw, rle, compressed, repeat) and
2960 * huffman description table to hufMetadata.
2961 * Requires ENTROPY_WORKSPACE_SIZE workspace
2962 * @return : size of huffman description table or error code */
2963 static size_t ZSTD_buildBlockEntropyStats_literals(void* const src
, size_t srcSize
,
2964 const ZSTD_hufCTables_t
* prevHuf
,
2965 ZSTD_hufCTables_t
* nextHuf
,
2966 ZSTD_hufCTablesMetadata_t
* hufMetadata
,
2967 const int literalsCompressionIsDisabled
,
2968 void* workspace
, size_t wkspSize
)
2970 BYTE
* const wkspStart
= (BYTE
*)workspace
;
2971 BYTE
* const wkspEnd
= wkspStart
+ wkspSize
;
2972 BYTE
* const countWkspStart
= wkspStart
;
2973 unsigned* const countWksp
= (unsigned*)workspace
;
2974 const size_t countWkspSize
= (HUF_SYMBOLVALUE_MAX
+ 1) * sizeof(unsigned);
2975 BYTE
* const nodeWksp
= countWkspStart
+ countWkspSize
;
2976 const size_t nodeWkspSize
= wkspEnd
-nodeWksp
;
2977 unsigned maxSymbolValue
= HUF_SYMBOLVALUE_MAX
;
2978 unsigned huffLog
= HUF_TABLELOG_DEFAULT
;
2979 HUF_repeat repeat
= prevHuf
->repeatMode
;
2980 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize
);
2982 /* Prepare nextEntropy assuming reusing the existing table */
2983 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
2985 if (literalsCompressionIsDisabled
) {
2986 DEBUGLOG(5, "set_basic - disabled");
2987 hufMetadata
->hType
= set_basic
;
2991 /* small ? don't even attempt compression (speed opt) */
2992 #ifndef COMPRESS_LITERALS_SIZE_MIN
2993 #define COMPRESS_LITERALS_SIZE_MIN 63
2995 { size_t const minLitSize
= (prevHuf
->repeatMode
== HUF_repeat_valid
) ? 6 : COMPRESS_LITERALS_SIZE_MIN
;
2996 if (srcSize
<= minLitSize
) {
2997 DEBUGLOG(5, "set_basic - too small");
2998 hufMetadata
->hType
= set_basic
;
3003 /* Scan input and build symbol stats */
3004 { size_t const largest
= HIST_count_wksp (countWksp
, &maxSymbolValue
, (const BYTE
*)src
, srcSize
, workspace
, wkspSize
);
3005 FORWARD_IF_ERROR(largest
, "HIST_count_wksp failed");
3006 if (largest
== srcSize
) {
3007 DEBUGLOG(5, "set_rle");
3008 hufMetadata
->hType
= set_rle
;
3011 if (largest
<= (srcSize
>> 7)+4) {
3012 DEBUGLOG(5, "set_basic - no gain");
3013 hufMetadata
->hType
= set_basic
;
3018 /* Validate the previous Huffman table */
3019 if (repeat
== HUF_repeat_check
&& !HUF_validateCTable((HUF_CElt
const*)prevHuf
->CTable
, countWksp
, maxSymbolValue
)) {
3020 repeat
= HUF_repeat_none
;
3023 /* Build Huffman Tree */
3024 ZSTD_memset(nextHuf
->CTable
, 0, sizeof(nextHuf
->CTable
));
3025 huffLog
= HUF_optimalTableLog(huffLog
, srcSize
, maxSymbolValue
);
3026 { size_t const maxBits
= HUF_buildCTable_wksp((HUF_CElt
*)nextHuf
->CTable
, countWksp
,
3027 maxSymbolValue
, huffLog
,
3028 nodeWksp
, nodeWkspSize
);
3029 FORWARD_IF_ERROR(maxBits
, "HUF_buildCTable_wksp");
3030 huffLog
= (U32
)maxBits
;
3031 { /* Build and write the CTable */
3032 size_t const newCSize
= HUF_estimateCompressedSize(
3033 (HUF_CElt
*)nextHuf
->CTable
, countWksp
, maxSymbolValue
);
3034 size_t const hSize
= HUF_writeCTable_wksp(
3035 hufMetadata
->hufDesBuffer
, sizeof(hufMetadata
->hufDesBuffer
),
3036 (HUF_CElt
*)nextHuf
->CTable
, maxSymbolValue
, huffLog
,
3037 nodeWksp
, nodeWkspSize
);
3038 /* Check against repeating the previous CTable */
3039 if (repeat
!= HUF_repeat_none
) {
3040 size_t const oldCSize
= HUF_estimateCompressedSize(
3041 (HUF_CElt
const*)prevHuf
->CTable
, countWksp
, maxSymbolValue
);
3042 if (oldCSize
< srcSize
&& (oldCSize
<= hSize
+ newCSize
|| hSize
+ 12 >= srcSize
)) {
3043 DEBUGLOG(5, "set_repeat - smaller");
3044 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
3045 hufMetadata
->hType
= set_repeat
;
3049 if (newCSize
+ hSize
>= srcSize
) {
3050 DEBUGLOG(5, "set_basic - no gains");
3051 ZSTD_memcpy(nextHuf
, prevHuf
, sizeof(*prevHuf
));
3052 hufMetadata
->hType
= set_basic
;
3055 DEBUGLOG(5, "set_compressed (hSize=%u)", (U32
)hSize
);
3056 hufMetadata
->hType
= set_compressed
;
3057 nextHuf
->repeatMode
= HUF_repeat_check
;
3064 /* ZSTD_buildDummySequencesStatistics():
3065 * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
3066 * and updates nextEntropy to the appropriate repeatMode.
3068 static ZSTD_symbolEncodingTypeStats_t
3069 ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t
* nextEntropy
) {
3070 ZSTD_symbolEncodingTypeStats_t stats
= {set_basic
, set_basic
, set_basic
, 0, 0};
3071 nextEntropy
->litlength_repeatMode
= FSE_repeat_none
;
3072 nextEntropy
->offcode_repeatMode
= FSE_repeat_none
;
3073 nextEntropy
->matchlength_repeatMode
= FSE_repeat_none
;
3077 /* ZSTD_buildBlockEntropyStats_sequences() :
3078 * Builds entropy for the sequences.
3079 * Stores symbol compression modes and fse table to fseMetadata.
3080 * Requires ENTROPY_WORKSPACE_SIZE wksp.
3081 * @return : size of fse tables or error code */
3082 static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t
* seqStorePtr
,
3083 const ZSTD_fseCTables_t
* prevEntropy
,
3084 ZSTD_fseCTables_t
* nextEntropy
,
3085 const ZSTD_CCtx_params
* cctxParams
,
3086 ZSTD_fseCTablesMetadata_t
* fseMetadata
,
3087 void* workspace
, size_t wkspSize
)
3089 ZSTD_strategy
const strategy
= cctxParams
->cParams
.strategy
;
3090 size_t const nbSeq
= seqStorePtr
->sequences
- seqStorePtr
->sequencesStart
;
3091 BYTE
* const ostart
= fseMetadata
->fseTablesBuffer
;
3092 BYTE
* const oend
= ostart
+ sizeof(fseMetadata
->fseTablesBuffer
);
3094 unsigned* countWorkspace
= (unsigned*)workspace
;
3095 unsigned* entropyWorkspace
= countWorkspace
+ (MaxSeq
+ 1);
3096 size_t entropyWorkspaceSize
= wkspSize
- (MaxSeq
+ 1) * sizeof(*countWorkspace
);
3097 ZSTD_symbolEncodingTypeStats_t stats
;
3099 DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq
);
3100 stats
= nbSeq
!= 0 ? ZSTD_buildSequencesStatistics(seqStorePtr
, nbSeq
,
3101 prevEntropy
, nextEntropy
, op
, oend
,
3102 strategy
, countWorkspace
,
3103 entropyWorkspace
, entropyWorkspaceSize
)
3104 : ZSTD_buildDummySequencesStatistics(nextEntropy
);
3105 FORWARD_IF_ERROR(stats
.size
, "ZSTD_buildSequencesStatistics failed!");
3106 fseMetadata
->llType
= (symbolEncodingType_e
) stats
.LLtype
;
3107 fseMetadata
->ofType
= (symbolEncodingType_e
) stats
.Offtype
;
3108 fseMetadata
->mlType
= (symbolEncodingType_e
) stats
.MLtype
;
3109 fseMetadata
->lastCountSize
= stats
.lastCountSize
;
3114 /* ZSTD_buildBlockEntropyStats() :
3115 * Builds entropy for the block.
3116 * Requires workspace size ENTROPY_WORKSPACE_SIZE
3118 * @return : 0 on success or error code
3120 size_t ZSTD_buildBlockEntropyStats(seqStore_t
* seqStorePtr
,
3121 const ZSTD_entropyCTables_t
* prevEntropy
,
3122 ZSTD_entropyCTables_t
* nextEntropy
,
3123 const ZSTD_CCtx_params
* cctxParams
,
3124 ZSTD_entropyCTablesMetadata_t
* entropyMetadata
,
3125 void* workspace
, size_t wkspSize
)
3127 size_t const litSize
= seqStorePtr
->lit
- seqStorePtr
->litStart
;
3128 entropyMetadata
->hufMetadata
.hufDesSize
=
3129 ZSTD_buildBlockEntropyStats_literals(seqStorePtr
->litStart
, litSize
,
3130 &prevEntropy
->huf
, &nextEntropy
->huf
,
3131 &entropyMetadata
->hufMetadata
,
3132 ZSTD_literalsCompressionIsDisabled(cctxParams
),
3133 workspace
, wkspSize
);
3134 FORWARD_IF_ERROR(entropyMetadata
->hufMetadata
.hufDesSize
, "ZSTD_buildBlockEntropyStats_literals failed");
3135 entropyMetadata
->fseMetadata
.fseTablesSize
=
3136 ZSTD_buildBlockEntropyStats_sequences(seqStorePtr
,
3137 &prevEntropy
->fse
, &nextEntropy
->fse
,
3139 &entropyMetadata
->fseMetadata
,
3140 workspace
, wkspSize
);
3141 FORWARD_IF_ERROR(entropyMetadata
->fseMetadata
.fseTablesSize
, "ZSTD_buildBlockEntropyStats_sequences failed");
3145 /* Returns the size estimate for the literals section (header + content) of a block */
3146 static size_t ZSTD_estimateBlockSize_literal(const BYTE
* literals
, size_t litSize
,
3147 const ZSTD_hufCTables_t
* huf
,
3148 const ZSTD_hufCTablesMetadata_t
* hufMetadata
,
3149 void* workspace
, size_t wkspSize
,
3152 unsigned* const countWksp
= (unsigned*)workspace
;
3153 unsigned maxSymbolValue
= HUF_SYMBOLVALUE_MAX
;
3154 size_t literalSectionHeaderSize
= 3 + (litSize
>= 1 KB
) + (litSize
>= 16 KB
);
3155 U32 singleStream
= litSize
< 256;
3157 if (hufMetadata
->hType
== set_basic
) return litSize
;
3158 else if (hufMetadata
->hType
== set_rle
) return 1;
3159 else if (hufMetadata
->hType
== set_compressed
|| hufMetadata
->hType
== set_repeat
) {
3160 size_t const largest
= HIST_count_wksp (countWksp
, &maxSymbolValue
, (const BYTE
*)literals
, litSize
, workspace
, wkspSize
);
3161 if (ZSTD_isError(largest
)) return litSize
;
3162 { size_t cLitSizeEstimate
= HUF_estimateCompressedSize((const HUF_CElt
*)huf
->CTable
, countWksp
, maxSymbolValue
);
3163 if (writeEntropy
) cLitSizeEstimate
+= hufMetadata
->hufDesSize
;
3164 if (!singleStream
) cLitSizeEstimate
+= 6; /* multi-stream huffman uses 6-byte jump table */
3165 return cLitSizeEstimate
+ literalSectionHeaderSize
;
3167 assert(0); /* impossible */
3171 /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
3172 static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type
,
3173 const BYTE
* codeTable
, size_t nbSeq
, unsigned maxCode
,
3174 const FSE_CTable
* fseCTable
,
3175 const U8
* additionalBits
,
3176 short const* defaultNorm
, U32 defaultNormLog
, U32 defaultMax
,
3177 void* workspace
, size_t wkspSize
)
3179 unsigned* const countWksp
= (unsigned*)workspace
;
3180 const BYTE
* ctp
= codeTable
;
3181 const BYTE
* const ctStart
= ctp
;
3182 const BYTE
* const ctEnd
= ctStart
+ nbSeq
;
3183 size_t cSymbolTypeSizeEstimateInBits
= 0;
3184 unsigned max
= maxCode
;
3186 HIST_countFast_wksp(countWksp
, &max
, codeTable
, nbSeq
, workspace
, wkspSize
); /* can't fail */
3187 if (type
== set_basic
) {
3188 /* We selected this encoding type, so it must be valid. */
3189 assert(max
<= defaultMax
);
3191 cSymbolTypeSizeEstimateInBits
= ZSTD_crossEntropyCost(defaultNorm
, defaultNormLog
, countWksp
, max
);
3192 } else if (type
== set_rle
) {
3193 cSymbolTypeSizeEstimateInBits
= 0;
3194 } else if (type
== set_compressed
|| type
== set_repeat
) {
3195 cSymbolTypeSizeEstimateInBits
= ZSTD_fseBitCost(fseCTable
, countWksp
, max
);
3197 if (ZSTD_isError(cSymbolTypeSizeEstimateInBits
)) {
3200 while (ctp
< ctEnd
) {
3201 if (additionalBits
) cSymbolTypeSizeEstimateInBits
+= additionalBits
[*ctp
];
3202 else cSymbolTypeSizeEstimateInBits
+= *ctp
; /* for offset, offset code is also the number of additional bits */
3205 return cSymbolTypeSizeEstimateInBits
>> 3;
3208 /* Returns the size estimate for the sequences section (header + content) of a block */
3209 static size_t ZSTD_estimateBlockSize_sequences(const BYTE
* ofCodeTable
,
3210 const BYTE
* llCodeTable
,
3211 const BYTE
* mlCodeTable
,
3213 const ZSTD_fseCTables_t
* fseTables
,
3214 const ZSTD_fseCTablesMetadata_t
* fseMetadata
,
3215 void* workspace
, size_t wkspSize
,
3218 size_t sequencesSectionHeaderSize
= 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq
>= 128) + (nbSeq
>= LONGNBSEQ
);
3219 size_t cSeqSizeEstimate
= 0;
3220 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->ofType
, ofCodeTable
, nbSeq
, MaxOff
,
3221 fseTables
->offcodeCTable
, NULL
,
3222 OF_defaultNorm
, OF_defaultNormLog
, DefaultMaxOff
,
3223 workspace
, wkspSize
);
3224 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->llType
, llCodeTable
, nbSeq
, MaxLL
,
3225 fseTables
->litlengthCTable
, LL_bits
,
3226 LL_defaultNorm
, LL_defaultNormLog
, MaxLL
,
3227 workspace
, wkspSize
);
3228 cSeqSizeEstimate
+= ZSTD_estimateBlockSize_symbolType(fseMetadata
->mlType
, mlCodeTable
, nbSeq
, MaxML
,
3229 fseTables
->matchlengthCTable
, ML_bits
,
3230 ML_defaultNorm
, ML_defaultNormLog
, MaxML
,
3231 workspace
, wkspSize
);
3232 if (writeEntropy
) cSeqSizeEstimate
+= fseMetadata
->fseTablesSize
;
3233 return cSeqSizeEstimate
+ sequencesSectionHeaderSize
;
3236 /* Returns the size estimate for a given stream of literals, of, ll, ml */
3237 static size_t ZSTD_estimateBlockSize(const BYTE
* literals
, size_t litSize
,
3238 const BYTE
* ofCodeTable
,
3239 const BYTE
* llCodeTable
,
3240 const BYTE
* mlCodeTable
,
3242 const ZSTD_entropyCTables_t
* entropy
,
3243 const ZSTD_entropyCTablesMetadata_t
* entropyMetadata
,
3244 void* workspace
, size_t wkspSize
,
3245 int writeLitEntropy
, int writeSeqEntropy
) {
3246 size_t const literalsSize
= ZSTD_estimateBlockSize_literal(literals
, litSize
,
3247 &entropy
->huf
, &entropyMetadata
->hufMetadata
,
3248 workspace
, wkspSize
, writeLitEntropy
);
3249 size_t const seqSize
= ZSTD_estimateBlockSize_sequences(ofCodeTable
, llCodeTable
, mlCodeTable
,
3250 nbSeq
, &entropy
->fse
, &entropyMetadata
->fseMetadata
,
3251 workspace
, wkspSize
, writeSeqEntropy
);
3252 return seqSize
+ literalsSize
+ ZSTD_blockHeaderSize
;
3255 /* Builds entropy statistics and uses them for blocksize estimation.
3257 * Returns the estimated compressed size of the seqStore, or a zstd error.
3259 static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t
* seqStore
, ZSTD_CCtx
* zc
) {
3260 ZSTD_entropyCTablesMetadata_t
* entropyMetadata
= &zc
->blockSplitCtx
.entropyMetadata
;
3261 DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
3262 FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore
,
3263 &zc
->blockState
.prevCBlock
->entropy
,
3264 &zc
->blockState
.nextCBlock
->entropy
,
3267 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */), "");
3268 return ZSTD_estimateBlockSize(seqStore
->litStart
, (size_t)(seqStore
->lit
- seqStore
->litStart
),
3269 seqStore
->ofCode
, seqStore
->llCode
, seqStore
->mlCode
,
3270 (size_t)(seqStore
->sequences
- seqStore
->sequencesStart
),
3271 &zc
->blockState
.nextCBlock
->entropy
, entropyMetadata
, zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
,
3272 (int)(entropyMetadata
->hufMetadata
.hType
== set_compressed
), 1);
3275 /* Returns literals bytes represented in a seqStore */
3276 static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t
* const seqStore
) {
3277 size_t literalsBytes
= 0;
3278 size_t const nbSeqs
= seqStore
->sequences
- seqStore
->sequencesStart
;
3280 for (i
= 0; i
< nbSeqs
; ++i
) {
3281 seqDef seq
= seqStore
->sequencesStart
[i
];
3282 literalsBytes
+= seq
.litLength
;
3283 if (i
== seqStore
->longLengthPos
&& seqStore
->longLengthType
== ZSTD_llt_literalLength
) {
3284 literalsBytes
+= 0x10000;
3287 return literalsBytes
;
3290 /* Returns match bytes represented in a seqStore */
3291 static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t
* const seqStore
) {
3292 size_t matchBytes
= 0;
3293 size_t const nbSeqs
= seqStore
->sequences
- seqStore
->sequencesStart
;
3295 for (i
= 0; i
< nbSeqs
; ++i
) {
3296 seqDef seq
= seqStore
->sequencesStart
[i
];
3297 matchBytes
+= seq
.mlBase
+ MINMATCH
;
3298 if (i
== seqStore
->longLengthPos
&& seqStore
->longLengthType
== ZSTD_llt_matchLength
) {
3299 matchBytes
+= 0x10000;
3305 /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
3306 * Stores the result in resultSeqStore.
3308 static void ZSTD_deriveSeqStoreChunk(seqStore_t
* resultSeqStore
,
3309 const seqStore_t
* originalSeqStore
,
3310 size_t startIdx
, size_t endIdx
) {
3311 BYTE
* const litEnd
= originalSeqStore
->lit
;
3312 size_t literalsBytes
;
3313 size_t literalsBytesPreceding
= 0;
3315 *resultSeqStore
= *originalSeqStore
;
3317 resultSeqStore
->sequences
= originalSeqStore
->sequencesStart
+ startIdx
;
3318 literalsBytesPreceding
= ZSTD_countSeqStoreLiteralsBytes(resultSeqStore
);
3321 /* Move longLengthPos into the correct position if necessary */
3322 if (originalSeqStore
->longLengthType
!= ZSTD_llt_none
) {
3323 if (originalSeqStore
->longLengthPos
< startIdx
|| originalSeqStore
->longLengthPos
> endIdx
) {
3324 resultSeqStore
->longLengthType
= ZSTD_llt_none
;
3326 resultSeqStore
->longLengthPos
-= (U32
)startIdx
;
3329 resultSeqStore
->sequencesStart
= originalSeqStore
->sequencesStart
+ startIdx
;
3330 resultSeqStore
->sequences
= originalSeqStore
->sequencesStart
+ endIdx
;
3331 literalsBytes
= ZSTD_countSeqStoreLiteralsBytes(resultSeqStore
);
3332 resultSeqStore
->litStart
+= literalsBytesPreceding
;
3333 if (endIdx
== (size_t)(originalSeqStore
->sequences
- originalSeqStore
->sequencesStart
)) {
3334 /* This accounts for possible last literals if the derived chunk reaches the end of the block */
3335 resultSeqStore
->lit
= litEnd
;
3337 resultSeqStore
->lit
= resultSeqStore
->litStart
+literalsBytes
;
3339 resultSeqStore
->llCode
+= startIdx
;
3340 resultSeqStore
->mlCode
+= startIdx
;
3341 resultSeqStore
->ofCode
+= startIdx
;
3345 * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3346 * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
3349 ZSTD_resolveRepcodeToRawOffset(const U32 rep
[ZSTD_REP_NUM
], const U32 offCode
, const U32 ll0
)
3351 U32
const adjustedOffCode
= STORED_REPCODE(offCode
) - 1 + ll0
; /* [ 0 - 3 ] */
3352 assert(STORED_IS_REPCODE(offCode
));
3353 if (adjustedOffCode
== ZSTD_REP_NUM
) {
3354 /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3358 return rep
[adjustedOffCode
];
3362 * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
3363 * due to emission of RLE/raw blocks that disturb the offset history,
3364 * and replaces any repcodes within the seqStore that may be invalid.
3366 * dRepcodes are updated as would be on the decompression side.
3367 * cRepcodes are updated exactly in accordance with the seqStore.
3369 * Note : this function assumes seq->offBase respects the following numbering scheme :
3372 * 4+ : real_offset+3
3374 static void ZSTD_seqStore_resolveOffCodes(repcodes_t
* const dRepcodes
, repcodes_t
* const cRepcodes
,
3375 seqStore_t
* const seqStore
, U32
const nbSeq
) {
3377 for (; idx
< nbSeq
; ++idx
) {
3378 seqDef
* const seq
= seqStore
->sequencesStart
+ idx
;
3379 U32
const ll0
= (seq
->litLength
== 0);
3380 U32
const offCode
= OFFBASE_TO_STORED(seq
->offBase
);
3381 assert(seq
->offBase
> 0);
3382 if (STORED_IS_REPCODE(offCode
)) {
3383 U32
const dRawOffset
= ZSTD_resolveRepcodeToRawOffset(dRepcodes
->rep
, offCode
, ll0
);
3384 U32
const cRawOffset
= ZSTD_resolveRepcodeToRawOffset(cRepcodes
->rep
, offCode
, ll0
);
3385 /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
3386 * the repcode with the offset it actually references, determined by the compression
3389 if (dRawOffset
!= cRawOffset
) {
3390 seq
->offBase
= cRawOffset
+ ZSTD_REP_NUM
;
3393 /* Compression repcode history is always updated with values directly from the unmodified seqStore.
3394 * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
3396 ZSTD_updateRep(dRepcodes
->rep
, OFFBASE_TO_STORED(seq
->offBase
), ll0
);
3397 ZSTD_updateRep(cRepcodes
->rep
, offCode
, ll0
);
3401 /* ZSTD_compressSeqStore_singleBlock():
3402 * Compresses a seqStore into a block with a block header, into the buffer dst.
3404 * Returns the total size of that block (including header) or a ZSTD error code.
3407 ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx
* zc
, seqStore_t
* const seqStore
,
3408 repcodes_t
* const dRep
, repcodes_t
* const cRep
,
3409 void* dst
, size_t dstCapacity
,
3410 const void* src
, size_t srcSize
,
3411 U32 lastBlock
, U32 isPartition
)
3413 const U32 rleMaxLength
= 25;
3414 BYTE
* op
= (BYTE
*)dst
;
3415 const BYTE
* ip
= (const BYTE
*)src
;
3419 /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
3420 repcodes_t
const dRepOriginal
= *dRep
;
3421 DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
3423 ZSTD_seqStore_resolveOffCodes(dRep
, cRep
, seqStore
, (U32
)(seqStore
->sequences
- seqStore
->sequencesStart
));
3425 RETURN_ERROR_IF(dstCapacity
< ZSTD_blockHeaderSize
, dstSize_tooSmall
, "Block header doesn't fit");
3426 cSeqsSize
= ZSTD_entropyCompressSeqStore(seqStore
,
3427 &zc
->blockState
.prevCBlock
->entropy
, &zc
->blockState
.nextCBlock
->entropy
,
3429 op
+ ZSTD_blockHeaderSize
, dstCapacity
- ZSTD_blockHeaderSize
,
3431 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
3433 FORWARD_IF_ERROR(cSeqsSize
, "ZSTD_entropyCompressSeqStore failed!");
3435 if (!zc
->isFirstBlock
&&
3436 cSeqsSize
< rleMaxLength
&&
3437 ZSTD_isRLE((BYTE
const*)src
, srcSize
)) {
3438 /* We don't want to emit our first block as a RLE even if it qualifies because
3439 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3440 * This is only an issue for zstd <= v1.4.3
3445 if (zc
->seqCollector
.collectSequences
) {
3446 ZSTD_copyBlockSequences(zc
);
3447 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3451 if (cSeqsSize
== 0) {
3452 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, srcSize
, lastBlock
);
3453 FORWARD_IF_ERROR(cSize
, "Nocompress block failed");
3454 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize
);
3455 *dRep
= dRepOriginal
; /* reset simulated decompression repcode history */
3456 } else if (cSeqsSize
== 1) {
3457 cSize
= ZSTD_rleCompressBlock(op
, dstCapacity
, *ip
, srcSize
, lastBlock
);
3458 FORWARD_IF_ERROR(cSize
, "RLE compress block failed");
3459 DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize
);
3460 *dRep
= dRepOriginal
; /* reset simulated decompression repcode history */
3462 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3463 writeBlockHeader(op
, cSeqsSize
, srcSize
, lastBlock
);
3464 cSize
= ZSTD_blockHeaderSize
+ cSeqsSize
;
3465 DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize
);
3468 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3469 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3474 /* Struct to keep track of where we are in our recursive calls. */
3476 U32
* splitLocations
; /* Array of split indices */
3477 size_t idx
; /* The current index within splitLocations being worked on */
3480 #define MIN_SEQUENCES_BLOCK_SPLITTING 300
3482 /* Helper function to perform the recursive search for block splits.
3483 * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
3484 * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
3485 * we do not recurse.
3487 * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
3488 * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
3489 * In practice, recursion depth usually doesn't go beyond 4.
3491 * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
3492 * maximum of 128 KB, this value is actually impossible to reach.
3495 ZSTD_deriveBlockSplitsHelper(seqStoreSplits
* splits
, size_t startIdx
, size_t endIdx
,
3496 ZSTD_CCtx
* zc
, const seqStore_t
* origSeqStore
)
3498 seqStore_t
* fullSeqStoreChunk
= &zc
->blockSplitCtx
.fullSeqStoreChunk
;
3499 seqStore_t
* firstHalfSeqStore
= &zc
->blockSplitCtx
.firstHalfSeqStore
;
3500 seqStore_t
* secondHalfSeqStore
= &zc
->blockSplitCtx
.secondHalfSeqStore
;
3501 size_t estimatedOriginalSize
;
3502 size_t estimatedFirstHalfSize
;
3503 size_t estimatedSecondHalfSize
;
3504 size_t midIdx
= (startIdx
+ endIdx
)/2;
3506 if (endIdx
- startIdx
< MIN_SEQUENCES_BLOCK_SPLITTING
|| splits
->idx
>= ZSTD_MAX_NB_BLOCK_SPLITS
) {
3507 DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
3510 DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx
, endIdx
);
3511 ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk
, origSeqStore
, startIdx
, endIdx
);
3512 ZSTD_deriveSeqStoreChunk(firstHalfSeqStore
, origSeqStore
, startIdx
, midIdx
);
3513 ZSTD_deriveSeqStoreChunk(secondHalfSeqStore
, origSeqStore
, midIdx
, endIdx
);
3514 estimatedOriginalSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk
, zc
);
3515 estimatedFirstHalfSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore
, zc
);
3516 estimatedSecondHalfSize
= ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore
, zc
);
3517 DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
3518 estimatedOriginalSize
, estimatedFirstHalfSize
, estimatedSecondHalfSize
);
3519 if (ZSTD_isError(estimatedOriginalSize
) || ZSTD_isError(estimatedFirstHalfSize
) || ZSTD_isError(estimatedSecondHalfSize
)) {
3522 if (estimatedFirstHalfSize
+ estimatedSecondHalfSize
< estimatedOriginalSize
) {
3523 ZSTD_deriveBlockSplitsHelper(splits
, startIdx
, midIdx
, zc
, origSeqStore
);
3524 splits
->splitLocations
[splits
->idx
] = (U32
)midIdx
;
3526 ZSTD_deriveBlockSplitsHelper(splits
, midIdx
, endIdx
, zc
, origSeqStore
);
3530 /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
3532 * Returns the number of splits made (which equals the size of the partition table - 1).
3534 static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx
* zc
, U32 partitions
[], U32 nbSeq
) {
3535 seqStoreSplits splits
= {partitions
, 0};
3537 DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
3538 /* Refuse to try and split anything with less than 4 sequences */
3541 ZSTD_deriveBlockSplitsHelper(&splits
, 0, nbSeq
, zc
, &zc
->seqStore
);
3542 splits
.splitLocations
[splits
.idx
] = nbSeq
;
3543 DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits
.idx
+1);
3547 /* ZSTD_compressBlock_splitBlock():
3548 * Attempts to split a given block into multiple blocks to improve compression ratio.
3550 * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
3553 ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx
* zc
, void* dst
, size_t dstCapacity
,
3554 const void* src
, size_t blockSize
, U32 lastBlock
, U32 nbSeq
)
3557 const BYTE
* ip
= (const BYTE
*)src
;
3558 BYTE
* op
= (BYTE
*)dst
;
3560 size_t srcBytesTotal
= 0;
3561 U32
* partitions
= zc
->blockSplitCtx
.partitions
; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
3562 seqStore_t
* nextSeqStore
= &zc
->blockSplitCtx
.nextSeqStore
;
3563 seqStore_t
* currSeqStore
= &zc
->blockSplitCtx
.currSeqStore
;
3564 size_t numSplits
= ZSTD_deriveBlockSplits(zc
, partitions
, nbSeq
);
3566 /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
3567 * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
3568 * separate repcode histories that simulate repcode history on compression and decompression side,
3569 * and use the histories to determine whether we must replace a particular repcode with its raw offset.
3571 * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
3572 * or RLE. This allows us to retrieve the offset value that an invalid repcode references within
3573 * a nocompress/RLE block.
3574 * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
3575 * the replacement offset value rather than the original repcode to update the repcode history.
3576 * dRep also will be the final repcode history sent to the next block.
3578 * See ZSTD_seqStore_resolveOffCodes() for more details.
3582 ZSTD_memcpy(dRep
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
3583 ZSTD_memcpy(cRep
.rep
, zc
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
3584 ZSTD_memset(nextSeqStore
, 0, sizeof(seqStore_t
));
3586 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3587 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
,
3588 (unsigned)zc
->blockState
.matchState
.nextToUpdate
);
3590 if (numSplits
== 0) {
3591 size_t cSizeSingleBlock
= ZSTD_compressSeqStore_singleBlock(zc
, &zc
->seqStore
,
3595 lastBlock
, 0 /* isPartition */);
3596 FORWARD_IF_ERROR(cSizeSingleBlock
, "Compressing single block from splitBlock_internal() failed!");
3597 DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
3598 assert(cSizeSingleBlock
<= ZSTD_BLOCKSIZE_MAX
+ ZSTD_blockHeaderSize
);
3599 return cSizeSingleBlock
;
3602 ZSTD_deriveSeqStoreChunk(currSeqStore
, &zc
->seqStore
, 0, partitions
[0]);
3603 for (i
= 0; i
<= numSplits
; ++i
) {
3606 U32
const lastPartition
= (i
== numSplits
);
3607 U32 lastBlockEntireSrc
= 0;
3609 srcBytes
= ZSTD_countSeqStoreLiteralsBytes(currSeqStore
) + ZSTD_countSeqStoreMatchBytes(currSeqStore
);
3610 srcBytesTotal
+= srcBytes
;
3611 if (lastPartition
) {
3612 /* This is the final partition, need to account for possible last literals */
3613 srcBytes
+= blockSize
- srcBytesTotal
;
3614 lastBlockEntireSrc
= lastBlock
;
3616 ZSTD_deriveSeqStoreChunk(nextSeqStore
, &zc
->seqStore
, partitions
[i
], partitions
[i
+1]);
3619 cSizeChunk
= ZSTD_compressSeqStore_singleBlock(zc
, currSeqStore
,
3623 lastBlockEntireSrc
, 1 /* isPartition */);
3624 DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore
, zc
), cSizeChunk
);
3625 FORWARD_IF_ERROR(cSizeChunk
, "Compressing chunk failed!");
3629 dstCapacity
-= cSizeChunk
;
3630 cSize
+= cSizeChunk
;
3631 *currSeqStore
= *nextSeqStore
;
3632 assert(cSizeChunk
<= ZSTD_BLOCKSIZE_MAX
+ ZSTD_blockHeaderSize
);
3634 /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
3635 * for the next block.
3637 ZSTD_memcpy(zc
->blockState
.prevCBlock
->rep
, dRep
.rep
, sizeof(repcodes_t
));
3642 ZSTD_compressBlock_splitBlock(ZSTD_CCtx
* zc
,
3643 void* dst
, size_t dstCapacity
,
3644 const void* src
, size_t srcSize
, U32 lastBlock
)
3646 const BYTE
* ip
= (const BYTE
*)src
;
3647 BYTE
* op
= (BYTE
*)dst
;
3650 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
3651 assert(zc
->appliedParams
.useBlockSplitter
== ZSTD_ps_enable
);
3653 { const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3654 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3655 if (bss
== ZSTDbss_noCompress
) {
3656 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3657 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3658 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, srcSize
, lastBlock
);
3659 FORWARD_IF_ERROR(cSize
, "ZSTD_noCompressBlock failed");
3660 DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
3663 nbSeq
= (U32
)(zc
->seqStore
.sequences
- zc
->seqStore
.sequencesStart
);
3666 cSize
= ZSTD_compressBlock_splitBlock_internal(zc
, dst
, dstCapacity
, src
, srcSize
, lastBlock
, nbSeq
);
3667 FORWARD_IF_ERROR(cSize
, "Splitting blocks failed!");
3672 ZSTD_compressBlock_internal(ZSTD_CCtx
* zc
,
3673 void* dst
, size_t dstCapacity
,
3674 const void* src
, size_t srcSize
, U32 frame
)
3676 /* This the upper bound for the length of an rle block.
3677 * This isn't the actual upper bound. Finding the real threshold
3678 * needs further investigation.
3680 const U32 rleMaxLength
= 25;
3682 const BYTE
* ip
= (const BYTE
*)src
;
3683 BYTE
* op
= (BYTE
*)dst
;
3684 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3685 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
,
3686 (unsigned)zc
->blockState
.matchState
.nextToUpdate
);
3688 { const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3689 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3690 if (bss
== ZSTDbss_noCompress
) { cSize
= 0; goto out
; }
3693 if (zc
->seqCollector
.collectSequences
) {
3694 ZSTD_copyBlockSequences(zc
);
3695 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3699 /* encode sequences and literals */
3700 cSize
= ZSTD_entropyCompressSeqStore(&zc
->seqStore
,
3701 &zc
->blockState
.prevCBlock
->entropy
, &zc
->blockState
.nextCBlock
->entropy
,
3705 zc
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
3709 /* We don't want to emit our first block as a RLE even if it qualifies because
3710 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3711 * This is only an issue for zstd <= v1.4.3
3713 !zc
->isFirstBlock
&&
3714 cSize
< rleMaxLength
&&
3715 ZSTD_isRLE(ip
, srcSize
))
3722 if (!ZSTD_isError(cSize
) && cSize
> 1) {
3723 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3725 /* We check that dictionaries have offset codes available for the first
3726 * block. After the first block, the offcode table might not have large
3727 * enough codes to represent the offsets in the data.
3729 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3730 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3735 static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx
* zc
,
3736 void* dst
, size_t dstCapacity
,
3737 const void* src
, size_t srcSize
,
3738 const size_t bss
, U32 lastBlock
)
3740 DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
3741 if (bss
== ZSTDbss_compress
) {
3742 if (/* We don't want to emit our first block as a RLE even if it qualifies because
3743 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3744 * This is only an issue for zstd <= v1.4.3
3746 !zc
->isFirstBlock
&&
3747 ZSTD_maybeRLE(&zc
->seqStore
) &&
3748 ZSTD_isRLE((BYTE
const*)src
, srcSize
))
3750 return ZSTD_rleCompressBlock(dst
, dstCapacity
, *(BYTE
const*)src
, srcSize
, lastBlock
);
3752 /* Attempt superblock compression.
3754 * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
3755 * standard ZSTD_compressBound(). This is a problem, because even if we have
3756 * space now, taking an extra byte now could cause us to run out of space later
3757 * and violate ZSTD_compressBound().
3759 * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
3761 * In order to respect ZSTD_compressBound() we must attempt to emit a raw
3762 * uncompressed block in these cases:
3763 * * cSize == 0: Return code for an uncompressed block.
3764 * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
3765 * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
3767 * * cSize >= blockBound(srcSize): We have expanded the block too much so
3768 * emit an uncompressed block.
3771 size_t const cSize
= ZSTD_compressSuperBlock(zc
, dst
, dstCapacity
, src
, srcSize
, lastBlock
);
3772 if (cSize
!= ERROR(dstSize_tooSmall
)) {
3773 size_t const maxCSize
= srcSize
- ZSTD_minGain(srcSize
, zc
->appliedParams
.cParams
.strategy
);
3774 FORWARD_IF_ERROR(cSize
, "ZSTD_compressSuperBlock failed");
3775 if (cSize
!= 0 && cSize
< maxCSize
+ ZSTD_blockHeaderSize
) {
3776 ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc
->blockState
);
3783 DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
3784 /* Superblock compression failed, attempt to emit a single no compress block.
3785 * The decoder will be able to stream this block since it is uncompressed.
3787 return ZSTD_noCompressBlock(dst
, dstCapacity
, src
, srcSize
, lastBlock
);
3790 static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx
* zc
,
3791 void* dst
, size_t dstCapacity
,
3792 const void* src
, size_t srcSize
,
3796 const size_t bss
= ZSTD_buildSeqStore(zc
, src
, srcSize
);
3797 DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
3798 (unsigned)dstCapacity
, (unsigned)zc
->blockState
.matchState
.window
.dictLimit
, (unsigned)zc
->blockState
.matchState
.nextToUpdate
, srcSize
);
3799 FORWARD_IF_ERROR(bss
, "ZSTD_buildSeqStore failed");
3801 cSize
= ZSTD_compressBlock_targetCBlockSize_body(zc
, dst
, dstCapacity
, src
, srcSize
, bss
, lastBlock
);
3802 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_targetCBlockSize_body failed");
3804 if (zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
3805 zc
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
3810 static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t
* ms
,
3812 ZSTD_CCtx_params
const* params
,
3816 U32
const cycleLog
= ZSTD_cycleLog(params
->cParams
.chainLog
, params
->cParams
.strategy
);
3817 U32
const maxDist
= (U32
)1 << params
->cParams
.windowLog
;
3818 if (ZSTD_window_needOverflowCorrection(ms
->window
, cycleLog
, maxDist
, ms
->loadedDictEnd
, ip
, iend
)) {
3819 U32
const correction
= ZSTD_window_correctOverflow(&ms
->window
, cycleLog
, maxDist
, ip
);
3820 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX
<= 30);
3821 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32
<= 30);
3822 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX
<= 31);
3823 ZSTD_cwksp_mark_tables_dirty(ws
);
3824 ZSTD_reduceIndex(ms
, params
, correction
);
3825 ZSTD_cwksp_mark_tables_clean(ws
);
3826 if (ms
->nextToUpdate
< correction
) ms
->nextToUpdate
= 0;
3827 else ms
->nextToUpdate
-= correction
;
3828 /* invalidate dictionaries on overflow correction */
3829 ms
->loadedDictEnd
= 0;
3830 ms
->dictMatchState
= NULL
;
3834 /*! ZSTD_compress_frameChunk() :
3835 * Compress a chunk of data into one or multiple blocks.
3836 * All blocks will be terminated, all input will be consumed.
3837 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
3838 * Frame is supposed already started (header already produced)
3839 * @return : compressed size, or an error code
3841 static size_t ZSTD_compress_frameChunk(ZSTD_CCtx
* cctx
,
3842 void* dst
, size_t dstCapacity
,
3843 const void* src
, size_t srcSize
,
3846 size_t blockSize
= cctx
->blockSize
;
3847 size_t remaining
= srcSize
;
3848 const BYTE
* ip
= (const BYTE
*)src
;
3849 BYTE
* const ostart
= (BYTE
*)dst
;
3851 U32
const maxDist
= (U32
)1 << cctx
->appliedParams
.cParams
.windowLog
;
3853 assert(cctx
->appliedParams
.cParams
.windowLog
<= ZSTD_WINDOWLOG_MAX
);
3855 DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize
);
3856 if (cctx
->appliedParams
.fParams
.checksumFlag
&& srcSize
)
3857 xxh64_update(&cctx
->xxhState
, src
, srcSize
);
3860 ZSTD_matchState_t
* const ms
= &cctx
->blockState
.matchState
;
3861 U32
const lastBlock
= lastFrameChunk
& (blockSize
>= remaining
);
3863 RETURN_ERROR_IF(dstCapacity
< ZSTD_blockHeaderSize
+ MIN_CBLOCK_SIZE
,
3865 "not enough space to store compressed block");
3866 if (remaining
< blockSize
) blockSize
= remaining
;
3868 ZSTD_overflowCorrectIfNeeded(
3869 ms
, &cctx
->workspace
, &cctx
->appliedParams
, ip
, ip
+ blockSize
);
3870 ZSTD_checkDictValidity(&ms
->window
, ip
+ blockSize
, maxDist
, &ms
->loadedDictEnd
, &ms
->dictMatchState
);
3871 ZSTD_window_enforceMaxDist(&ms
->window
, ip
, maxDist
, &ms
->loadedDictEnd
, &ms
->dictMatchState
);
3873 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
3874 if (ms
->nextToUpdate
< ms
->window
.lowLimit
) ms
->nextToUpdate
= ms
->window
.lowLimit
;
3877 if (ZSTD_useTargetCBlockSize(&cctx
->appliedParams
)) {
3878 cSize
= ZSTD_compressBlock_targetCBlockSize(cctx
, op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3879 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_targetCBlockSize failed");
3881 assert(cSize
<= blockSize
+ ZSTD_blockHeaderSize
);
3882 } else if (ZSTD_blockSplitterEnabled(&cctx
->appliedParams
)) {
3883 cSize
= ZSTD_compressBlock_splitBlock(cctx
, op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3884 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_splitBlock failed");
3885 assert(cSize
> 0 || cctx
->seqCollector
.collectSequences
== 1);
3887 cSize
= ZSTD_compressBlock_internal(cctx
,
3888 op
+ZSTD_blockHeaderSize
, dstCapacity
-ZSTD_blockHeaderSize
,
3889 ip
, blockSize
, 1 /* frame */);
3890 FORWARD_IF_ERROR(cSize
, "ZSTD_compressBlock_internal failed");
3892 if (cSize
== 0) { /* block is not compressible */
3893 cSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
3894 FORWARD_IF_ERROR(cSize
, "ZSTD_noCompressBlock failed");
3896 U32
const cBlockHeader
= cSize
== 1 ?
3897 lastBlock
+ (((U32
)bt_rle
)<<1) + (U32
)(blockSize
<< 3) :
3898 lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(cSize
<< 3);
3899 MEM_writeLE24(op
, cBlockHeader
);
3900 cSize
+= ZSTD_blockHeaderSize
;
3906 assert(remaining
>= blockSize
);
3907 remaining
-= blockSize
;
3909 assert(dstCapacity
>= cSize
);
3910 dstCapacity
-= cSize
;
3911 cctx
->isFirstBlock
= 0;
3912 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
3916 if (lastFrameChunk
&& (op
>ostart
)) cctx
->stage
= ZSTDcs_ending
;
3917 return (size_t)(op
-ostart
);
3921 static size_t ZSTD_writeFrameHeader(void* dst
, size_t dstCapacity
,
3922 const ZSTD_CCtx_params
* params
, U64 pledgedSrcSize
, U32 dictID
)
3923 { BYTE
* const op
= (BYTE
*)dst
;
3924 U32
const dictIDSizeCodeLength
= (dictID
>0) + (dictID
>=256) + (dictID
>=65536); /* 0-3 */
3925 U32
const dictIDSizeCode
= params
->fParams
.noDictIDFlag
? 0 : dictIDSizeCodeLength
; /* 0-3 */
3926 U32
const checksumFlag
= params
->fParams
.checksumFlag
>0;
3927 U32
const windowSize
= (U32
)1 << params
->cParams
.windowLog
;
3928 U32
const singleSegment
= params
->fParams
.contentSizeFlag
&& (windowSize
>= pledgedSrcSize
);
3929 BYTE
const windowLogByte
= (BYTE
)((params
->cParams
.windowLog
- ZSTD_WINDOWLOG_ABSOLUTEMIN
) << 3);
3930 U32
const fcsCode
= params
->fParams
.contentSizeFlag
?
3931 (pledgedSrcSize
>=256) + (pledgedSrcSize
>=65536+256) + (pledgedSrcSize
>=0xFFFFFFFFU
) : 0; /* 0-3 */
3932 BYTE
const frameHeaderDescriptionByte
= (BYTE
)(dictIDSizeCode
+ (checksumFlag
<<2) + (singleSegment
<<5) + (fcsCode
<<6) );
3935 assert(!(params
->fParams
.contentSizeFlag
&& pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
));
3936 RETURN_ERROR_IF(dstCapacity
< ZSTD_FRAMEHEADERSIZE_MAX
, dstSize_tooSmall
,
3937 "dst buf is too small to fit worst-case frame header size.");
3938 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
3939 !params
->fParams
.noDictIDFlag
, (unsigned)dictID
, (unsigned)dictIDSizeCode
);
3940 if (params
->format
== ZSTD_f_zstd1
) {
3941 MEM_writeLE32(dst
, ZSTD_MAGICNUMBER
);
3944 op
[pos
++] = frameHeaderDescriptionByte
;
3945 if (!singleSegment
) op
[pos
++] = windowLogByte
;
3946 switch(dictIDSizeCode
)
3949 assert(0); /* impossible */
3952 case 1 : op
[pos
] = (BYTE
)(dictID
); pos
++; break;
3953 case 2 : MEM_writeLE16(op
+pos
, (U16
)dictID
); pos
+=2; break;
3954 case 3 : MEM_writeLE32(op
+pos
, dictID
); pos
+=4; break;
3959 assert(0); /* impossible */
3961 case 0 : if (singleSegment
) op
[pos
++] = (BYTE
)(pledgedSrcSize
); break;
3962 case 1 : MEM_writeLE16(op
+pos
, (U16
)(pledgedSrcSize
-256)); pos
+=2; break;
3963 case 2 : MEM_writeLE32(op
+pos
, (U32
)(pledgedSrcSize
)); pos
+=4; break;
3964 case 3 : MEM_writeLE64(op
+pos
, (U64
)(pledgedSrcSize
)); pos
+=8; break;
3969 /* ZSTD_writeSkippableFrame_advanced() :
3970 * Writes out a skippable frame with the specified magic number variant (16 are supported),
3971 * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
3973 * Returns the total number of bytes written, or a ZSTD error code.
3975 size_t ZSTD_writeSkippableFrame(void* dst
, size_t dstCapacity
,
3976 const void* src
, size_t srcSize
, unsigned magicVariant
) {
3977 BYTE
* op
= (BYTE
*)dst
;
3978 RETURN_ERROR_IF(dstCapacity
< srcSize
+ ZSTD_SKIPPABLEHEADERSIZE
/* Skippable frame overhead */,
3979 dstSize_tooSmall
, "Not enough room for skippable frame");
3980 RETURN_ERROR_IF(srcSize
> (unsigned)0xFFFFFFFF, srcSize_wrong
, "Src size too large for skippable frame");
3981 RETURN_ERROR_IF(magicVariant
> 15, parameter_outOfBound
, "Skippable frame magic number variant not supported");
3983 MEM_writeLE32(op
, (U32
)(ZSTD_MAGIC_SKIPPABLE_START
+ magicVariant
));
3984 MEM_writeLE32(op
+4, (U32
)srcSize
);
3985 ZSTD_memcpy(op
+8, src
, srcSize
);
3986 return srcSize
+ ZSTD_SKIPPABLEHEADERSIZE
;
3989 /* ZSTD_writeLastEmptyBlock() :
3990 * output an empty Block with end-of-frame mark to complete a frame
3991 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
3992 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
3994 size_t ZSTD_writeLastEmptyBlock(void* dst
, size_t dstCapacity
)
3996 RETURN_ERROR_IF(dstCapacity
< ZSTD_blockHeaderSize
, dstSize_tooSmall
,
3997 "dst buf is too small to write frame trailer empty block.");
3998 { U32
const cBlockHeader24
= 1 /*lastBlock*/ + (((U32
)bt_raw
)<<1); /* 0 size */
3999 MEM_writeLE24(dst
, cBlockHeader24
);
4000 return ZSTD_blockHeaderSize
;
4004 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx
* cctx
, rawSeq
* seq
, size_t nbSeq
)
4006 RETURN_ERROR_IF(cctx
->stage
!= ZSTDcs_init
, stage_wrong
,
4007 "wrong cctx stage");
4008 RETURN_ERROR_IF(cctx
->appliedParams
.ldmParams
.enableLdm
== ZSTD_ps_enable
,
4009 parameter_unsupported
,
4010 "incompatible with ldm");
4011 cctx
->externSeqStore
.seq
= seq
;
4012 cctx
->externSeqStore
.size
= nbSeq
;
4013 cctx
->externSeqStore
.capacity
= nbSeq
;
4014 cctx
->externSeqStore
.pos
= 0;
4015 cctx
->externSeqStore
.posInSequence
= 0;
4020 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx
* cctx
,
4021 void* dst
, size_t dstCapacity
,
4022 const void* src
, size_t srcSize
,
4023 U32 frame
, U32 lastFrameChunk
)
4025 ZSTD_matchState_t
* const ms
= &cctx
->blockState
.matchState
;
4028 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
4029 cctx
->stage
, (unsigned)srcSize
);
4030 RETURN_ERROR_IF(cctx
->stage
==ZSTDcs_created
, stage_wrong
,
4031 "missing init (ZSTD_compressBegin)");
4033 if (frame
&& (cctx
->stage
==ZSTDcs_init
)) {
4034 fhSize
= ZSTD_writeFrameHeader(dst
, dstCapacity
, &cctx
->appliedParams
,
4035 cctx
->pledgedSrcSizePlusOne
-1, cctx
->dictID
);
4036 FORWARD_IF_ERROR(fhSize
, "ZSTD_writeFrameHeader failed");
4037 assert(fhSize
<= dstCapacity
);
4038 dstCapacity
-= fhSize
;
4039 dst
= (char*)dst
+ fhSize
;
4040 cctx
->stage
= ZSTDcs_ongoing
;
4043 if (!srcSize
) return fhSize
; /* do not generate an empty block if no input */
4045 if (!ZSTD_window_update(&ms
->window
, src
, srcSize
, ms
->forceNonContiguous
)) {
4046 ms
->forceNonContiguous
= 0;
4047 ms
->nextToUpdate
= ms
->window
.dictLimit
;
4049 if (cctx
->appliedParams
.ldmParams
.enableLdm
== ZSTD_ps_enable
) {
4050 ZSTD_window_update(&cctx
->ldmState
.window
, src
, srcSize
, /* forceNonContiguous */ 0);
4054 /* overflow check and correction for block mode */
4055 ZSTD_overflowCorrectIfNeeded(
4056 ms
, &cctx
->workspace
, &cctx
->appliedParams
,
4057 src
, (BYTE
const*)src
+ srcSize
);
4060 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx
->blockSize
);
4061 { size_t const cSize
= frame
?
4062 ZSTD_compress_frameChunk (cctx
, dst
, dstCapacity
, src
, srcSize
, lastFrameChunk
) :
4063 ZSTD_compressBlock_internal (cctx
, dst
, dstCapacity
, src
, srcSize
, 0 /* frame */);
4064 FORWARD_IF_ERROR(cSize
, "%s", frame
? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
4065 cctx
->consumedSrcSize
+= srcSize
;
4066 cctx
->producedCSize
+= (cSize
+ fhSize
);
4067 assert(!(cctx
->appliedParams
.fParams
.contentSizeFlag
&& cctx
->pledgedSrcSizePlusOne
== 0));
4068 if (cctx
->pledgedSrcSizePlusOne
!= 0) { /* control src size */
4069 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
== (unsigned long long)-1);
4071 cctx
->consumedSrcSize
+1 > cctx
->pledgedSrcSizePlusOne
,
4073 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
4074 (unsigned)cctx
->pledgedSrcSizePlusOne
-1,
4075 (unsigned)cctx
->consumedSrcSize
);
4077 return cSize
+ fhSize
;
4081 size_t ZSTD_compressContinue (ZSTD_CCtx
* cctx
,
4082 void* dst
, size_t dstCapacity
,
4083 const void* src
, size_t srcSize
)
4085 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize
);
4086 return ZSTD_compressContinue_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, 1 /* frame mode */, 0 /* last chunk */);
4090 size_t ZSTD_getBlockSize(const ZSTD_CCtx
* cctx
)
4092 ZSTD_compressionParameters
const cParams
= cctx
->appliedParams
.cParams
;
4093 assert(!ZSTD_checkCParams(cParams
));
4094 return MIN (ZSTD_BLOCKSIZE_MAX
, (U32
)1 << cParams
.windowLog
);
4097 size_t ZSTD_compressBlock(ZSTD_CCtx
* cctx
, void* dst
, size_t dstCapacity
, const void* src
, size_t srcSize
)
4099 DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize
);
4100 { size_t const blockSizeMax
= ZSTD_getBlockSize(cctx
);
4101 RETURN_ERROR_IF(srcSize
> blockSizeMax
, srcSize_wrong
, "input is larger than a block"); }
4103 return ZSTD_compressContinue_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, 0 /* frame mode */, 0 /* last chunk */);
4106 /*! ZSTD_loadDictionaryContent() :
4107 * @return : 0, or an error code
4109 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t
* ms
,
4112 ZSTD_CCtx_params
const* params
,
4113 const void* src
, size_t srcSize
,
4114 ZSTD_dictTableLoadMethod_e dtlm
)
4116 const BYTE
* ip
= (const BYTE
*) src
;
4117 const BYTE
* const iend
= ip
+ srcSize
;
4118 int const loadLdmDict
= params
->ldmParams
.enableLdm
== ZSTD_ps_enable
&& ls
!= NULL
;
4120 /* Assert that we the ms params match the params we're being given */
4121 ZSTD_assertEqualCParams(params
->cParams
, ms
->cParams
);
4123 if (srcSize
> ZSTD_CHUNKSIZE_MAX
) {
4124 /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
4125 * Dictionaries right at the edge will immediately trigger overflow
4126 * correction, but I don't want to insert extra constraints here.
4128 U32
const maxDictSize
= ZSTD_CURRENT_MAX
- 1;
4129 /* We must have cleared our windows when our source is this large. */
4130 assert(ZSTD_window_isEmpty(ms
->window
));
4132 assert(ZSTD_window_isEmpty(ls
->window
));
4133 /* If the dictionary is too large, only load the suffix of the dictionary. */
4134 if (srcSize
> maxDictSize
) {
4135 ip
= iend
- maxDictSize
;
4137 srcSize
= maxDictSize
;
4141 DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params
->useRowMatchFinder
);
4142 ZSTD_window_update(&ms
->window
, src
, srcSize
, /* forceNonContiguous */ 0);
4143 ms
->loadedDictEnd
= params
->forceWindow
? 0 : (U32
)(iend
- ms
->window
.base
);
4144 ms
->forceNonContiguous
= params
->deterministicRefPrefix
;
4147 ZSTD_window_update(&ls
->window
, src
, srcSize
, /* forceNonContiguous */ 0);
4148 ls
->loadedDictEnd
= params
->forceWindow
? 0 : (U32
)(iend
- ls
->window
.base
);
4151 if (srcSize
<= HASH_READ_SIZE
) return 0;
4153 ZSTD_overflowCorrectIfNeeded(ms
, ws
, params
, ip
, iend
);
4156 ZSTD_ldm_fillHashTable(ls
, ip
, iend
, ¶ms
->ldmParams
);
4158 switch(params
->cParams
.strategy
)
4161 ZSTD_fillHashTable(ms
, iend
, dtlm
);
4164 ZSTD_fillDoubleHashTable(ms
, iend
, dtlm
);
4170 assert(srcSize
>= HASH_READ_SIZE
);
4171 if (ms
->dedicatedDictSearch
) {
4172 assert(ms
->chainTable
!= NULL
);
4173 ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms
, iend
-HASH_READ_SIZE
);
4175 assert(params
->useRowMatchFinder
!= ZSTD_ps_auto
);
4176 if (params
->useRowMatchFinder
== ZSTD_ps_enable
) {
4177 size_t const tagTableSize
= ((size_t)1 << params
->cParams
.hashLog
) * sizeof(U16
);
4178 ZSTD_memset(ms
->tagTable
, 0, tagTableSize
);
4179 ZSTD_row_update(ms
, iend
-HASH_READ_SIZE
);
4180 DEBUGLOG(4, "Using row-based hash table for lazy dict");
4182 ZSTD_insertAndFindFirstIndex(ms
, iend
-HASH_READ_SIZE
);
4183 DEBUGLOG(4, "Using chain-based hash table for lazy dict");
4188 case ZSTD_btlazy2
: /* we want the dictionary table fully sorted */
4192 assert(srcSize
>= HASH_READ_SIZE
);
4193 ZSTD_updateTree(ms
, iend
-HASH_READ_SIZE
, iend
);
4197 assert(0); /* not possible : not a valid strategy id */
4200 ms
->nextToUpdate
= (U32
)(iend
- ms
->window
.base
);
4205 /* Dictionaries that assign zero probability to symbols that show up causes problems
4206 * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
4207 * and only dictionaries with 100% valid symbols can be assumed valid.
4209 static FSE_repeat
ZSTD_dictNCountRepeat(short* normalizedCounter
, unsigned dictMaxSymbolValue
, unsigned maxSymbolValue
)
4212 if (dictMaxSymbolValue
< maxSymbolValue
) {
4213 return FSE_repeat_check
;
4215 for (s
= 0; s
<= maxSymbolValue
; ++s
) {
4216 if (normalizedCounter
[s
] == 0) {
4217 return FSE_repeat_check
;
4220 return FSE_repeat_valid
;
4223 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t
* bs
, void* workspace
,
4224 const void* const dict
, size_t dictSize
)
4226 short offcodeNCount
[MaxOff
+1];
4227 unsigned offcodeMaxValue
= MaxOff
;
4228 const BYTE
* dictPtr
= (const BYTE
*)dict
; /* skip magic num and dict ID */
4229 const BYTE
* const dictEnd
= dictPtr
+ dictSize
;
4231 bs
->entropy
.huf
.repeatMode
= HUF_repeat_check
;
4233 { unsigned maxSymbolValue
= 255;
4234 unsigned hasZeroWeights
= 1;
4235 size_t const hufHeaderSize
= HUF_readCTable((HUF_CElt
*)bs
->entropy
.huf
.CTable
, &maxSymbolValue
, dictPtr
,
4236 dictEnd
-dictPtr
, &hasZeroWeights
);
4238 /* We only set the loaded table as valid if it contains all non-zero
4239 * weights. Otherwise, we set it to check */
4240 if (!hasZeroWeights
)
4241 bs
->entropy
.huf
.repeatMode
= HUF_repeat_valid
;
4243 RETURN_ERROR_IF(HUF_isError(hufHeaderSize
), dictionary_corrupted
, "");
4244 RETURN_ERROR_IF(maxSymbolValue
< 255, dictionary_corrupted
, "");
4245 dictPtr
+= hufHeaderSize
;
4248 { unsigned offcodeLog
;
4249 size_t const offcodeHeaderSize
= FSE_readNCount(offcodeNCount
, &offcodeMaxValue
, &offcodeLog
, dictPtr
, dictEnd
-dictPtr
);
4250 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize
), dictionary_corrupted
, "");
4251 RETURN_ERROR_IF(offcodeLog
> OffFSELog
, dictionary_corrupted
, "");
4252 /* fill all offset symbols to avoid garbage at end of table */
4253 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4254 bs
->entropy
.fse
.offcodeCTable
,
4255 offcodeNCount
, MaxOff
, offcodeLog
,
4256 workspace
, HUF_WORKSPACE_SIZE
)),
4257 dictionary_corrupted
, "");
4258 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
4259 dictPtr
+= offcodeHeaderSize
;
4262 { short matchlengthNCount
[MaxML
+1];
4263 unsigned matchlengthMaxValue
= MaxML
, matchlengthLog
;
4264 size_t const matchlengthHeaderSize
= FSE_readNCount(matchlengthNCount
, &matchlengthMaxValue
, &matchlengthLog
, dictPtr
, dictEnd
-dictPtr
);
4265 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize
), dictionary_corrupted
, "");
4266 RETURN_ERROR_IF(matchlengthLog
> MLFSELog
, dictionary_corrupted
, "");
4267 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4268 bs
->entropy
.fse
.matchlengthCTable
,
4269 matchlengthNCount
, matchlengthMaxValue
, matchlengthLog
,
4270 workspace
, HUF_WORKSPACE_SIZE
)),
4271 dictionary_corrupted
, "");
4272 bs
->entropy
.fse
.matchlength_repeatMode
= ZSTD_dictNCountRepeat(matchlengthNCount
, matchlengthMaxValue
, MaxML
);
4273 dictPtr
+= matchlengthHeaderSize
;
4276 { short litlengthNCount
[MaxLL
+1];
4277 unsigned litlengthMaxValue
= MaxLL
, litlengthLog
;
4278 size_t const litlengthHeaderSize
= FSE_readNCount(litlengthNCount
, &litlengthMaxValue
, &litlengthLog
, dictPtr
, dictEnd
-dictPtr
);
4279 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize
), dictionary_corrupted
, "");
4280 RETURN_ERROR_IF(litlengthLog
> LLFSELog
, dictionary_corrupted
, "");
4281 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
4282 bs
->entropy
.fse
.litlengthCTable
,
4283 litlengthNCount
, litlengthMaxValue
, litlengthLog
,
4284 workspace
, HUF_WORKSPACE_SIZE
)),
4285 dictionary_corrupted
, "");
4286 bs
->entropy
.fse
.litlength_repeatMode
= ZSTD_dictNCountRepeat(litlengthNCount
, litlengthMaxValue
, MaxLL
);
4287 dictPtr
+= litlengthHeaderSize
;
4290 RETURN_ERROR_IF(dictPtr
+12 > dictEnd
, dictionary_corrupted
, "");
4291 bs
->rep
[0] = MEM_readLE32(dictPtr
+0);
4292 bs
->rep
[1] = MEM_readLE32(dictPtr
+4);
4293 bs
->rep
[2] = MEM_readLE32(dictPtr
+8);
4296 { size_t const dictContentSize
= (size_t)(dictEnd
- dictPtr
);
4297 U32 offcodeMax
= MaxOff
;
4298 if (dictContentSize
<= ((U32
)-1) - 128 KB
) {
4299 U32
const maxOffset
= (U32
)dictContentSize
+ 128 KB
; /* The maximum offset that must be supported */
4300 offcodeMax
= ZSTD_highbit32(maxOffset
); /* Calculate minimum offset code required to represent maxOffset */
4302 /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
4303 bs
->entropy
.fse
.offcode_repeatMode
= ZSTD_dictNCountRepeat(offcodeNCount
, offcodeMaxValue
, MIN(offcodeMax
, MaxOff
));
4305 /* All repCodes must be <= dictContentSize and != 0 */
4307 for (u
=0; u
<3; u
++) {
4308 RETURN_ERROR_IF(bs
->rep
[u
] == 0, dictionary_corrupted
, "");
4309 RETURN_ERROR_IF(bs
->rep
[u
] > dictContentSize
, dictionary_corrupted
, "");
4312 return dictPtr
- (const BYTE
*)dict
;
4315 /* Dictionary format :
4317 * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
4319 /*! ZSTD_loadZstdDictionary() :
4320 * @return : dictID, or an error code
4321 * assumptions : magic number supposed already checked
4322 * dictSize supposed >= 8
4324 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t
* bs
,
4325 ZSTD_matchState_t
* ms
,
4327 ZSTD_CCtx_params
const* params
,
4328 const void* dict
, size_t dictSize
,
4329 ZSTD_dictTableLoadMethod_e dtlm
,
4332 const BYTE
* dictPtr
= (const BYTE
*)dict
;
4333 const BYTE
* const dictEnd
= dictPtr
+ dictSize
;
4336 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE
>= (1<<MAX(MLFSELog
,LLFSELog
)));
4337 assert(dictSize
>= 8);
4338 assert(MEM_readLE32(dictPtr
) == ZSTD_MAGIC_DICTIONARY
);
4340 dictID
= params
->fParams
.noDictIDFlag
? 0 : MEM_readLE32(dictPtr
+ 4 /* skip magic number */ );
4341 eSize
= ZSTD_loadCEntropy(bs
, workspace
, dict
, dictSize
);
4342 FORWARD_IF_ERROR(eSize
, "ZSTD_loadCEntropy failed");
4346 size_t const dictContentSize
= (size_t)(dictEnd
- dictPtr
);
4347 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
4348 ms
, NULL
, ws
, params
, dictPtr
, dictContentSize
, dtlm
), "");
4353 /* ZSTD_compress_insertDictionary() :
4354 * @return : dictID, or an error code */
4356 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t
* bs
,
4357 ZSTD_matchState_t
* ms
,
4360 const ZSTD_CCtx_params
* params
,
4361 const void* dict
, size_t dictSize
,
4362 ZSTD_dictContentType_e dictContentType
,
4363 ZSTD_dictTableLoadMethod_e dtlm
,
4366 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32
)dictSize
);
4367 if ((dict
==NULL
) || (dictSize
<8)) {
4368 RETURN_ERROR_IF(dictContentType
== ZSTD_dct_fullDict
, dictionary_wrong
, "");
4372 ZSTD_reset_compressedBlockState(bs
);
4374 /* dict restricted modes */
4375 if (dictContentType
== ZSTD_dct_rawContent
)
4376 return ZSTD_loadDictionaryContent(ms
, ls
, ws
, params
, dict
, dictSize
, dtlm
);
4378 if (MEM_readLE32(dict
) != ZSTD_MAGIC_DICTIONARY
) {
4379 if (dictContentType
== ZSTD_dct_auto
) {
4380 DEBUGLOG(4, "raw content dictionary detected");
4381 return ZSTD_loadDictionaryContent(
4382 ms
, ls
, ws
, params
, dict
, dictSize
, dtlm
);
4384 RETURN_ERROR_IF(dictContentType
== ZSTD_dct_fullDict
, dictionary_wrong
, "");
4385 assert(0); /* impossible */
4388 /* dict as full zstd dictionary */
4389 return ZSTD_loadZstdDictionary(
4390 bs
, ms
, ws
, params
, dict
, dictSize
, dtlm
, workspace
);
4393 #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
4394 #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
4396 /*! ZSTD_compressBegin_internal() :
4397 * @return : 0, or an error code */
4398 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx
* cctx
,
4399 const void* dict
, size_t dictSize
,
4400 ZSTD_dictContentType_e dictContentType
,
4401 ZSTD_dictTableLoadMethod_e dtlm
,
4402 const ZSTD_CDict
* cdict
,
4403 const ZSTD_CCtx_params
* params
, U64 pledgedSrcSize
,
4404 ZSTD_buffered_policy_e zbuff
)
4406 size_t const dictContentSize
= cdict
? cdict
->dictContentSize
: dictSize
;
4407 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params
->cParams
.windowLog
);
4408 /* params are supposed to be fully validated at this point */
4409 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
4410 assert(!((dict
) && (cdict
))); /* either dict or cdict, not both */
4412 && (cdict
->dictContentSize
> 0)
4413 && ( pledgedSrcSize
< ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4414 || pledgedSrcSize
< cdict
->dictContentSize
* ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4415 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
4416 || cdict
->compressionLevel
== 0)
4417 && (params
->attachDictPref
!= ZSTD_dictForceLoad
) ) {
4418 return ZSTD_resetCCtx_usingCDict(cctx
, cdict
, params
, pledgedSrcSize
, zbuff
);
4421 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx
, params
, pledgedSrcSize
,
4423 ZSTDcrp_makeClean
, zbuff
) , "");
4424 { size_t const dictID
= cdict
?
4425 ZSTD_compress_insertDictionary(
4426 cctx
->blockState
.prevCBlock
, &cctx
->blockState
.matchState
,
4427 &cctx
->ldmState
, &cctx
->workspace
, &cctx
->appliedParams
, cdict
->dictContent
,
4428 cdict
->dictContentSize
, cdict
->dictContentType
, dtlm
,
4429 cctx
->entropyWorkspace
)
4430 : ZSTD_compress_insertDictionary(
4431 cctx
->blockState
.prevCBlock
, &cctx
->blockState
.matchState
,
4432 &cctx
->ldmState
, &cctx
->workspace
, &cctx
->appliedParams
, dict
, dictSize
,
4433 dictContentType
, dtlm
, cctx
->entropyWorkspace
);
4434 FORWARD_IF_ERROR(dictID
, "ZSTD_compress_insertDictionary failed");
4435 assert(dictID
<= UINT_MAX
);
4436 cctx
->dictID
= (U32
)dictID
;
4437 cctx
->dictContentSize
= dictContentSize
;
4442 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx
* cctx
,
4443 const void* dict
, size_t dictSize
,
4444 ZSTD_dictContentType_e dictContentType
,
4445 ZSTD_dictTableLoadMethod_e dtlm
,
4446 const ZSTD_CDict
* cdict
,
4447 const ZSTD_CCtx_params
* params
,
4448 unsigned long long pledgedSrcSize
)
4450 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params
->cParams
.windowLog
);
4451 /* compression parameters verification and optimization */
4452 FORWARD_IF_ERROR( ZSTD_checkCParams(params
->cParams
) , "");
4453 return ZSTD_compressBegin_internal(cctx
,
4454 dict
, dictSize
, dictContentType
, dtlm
,
4456 params
, pledgedSrcSize
,
4457 ZSTDb_not_buffered
);
4460 /*! ZSTD_compressBegin_advanced() :
4461 * @return : 0, or an error code */
4462 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx
* cctx
,
4463 const void* dict
, size_t dictSize
,
4464 ZSTD_parameters params
, unsigned long long pledgedSrcSize
)
4466 ZSTD_CCtx_params cctxParams
;
4467 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, ZSTD_NO_CLEVEL
);
4468 return ZSTD_compressBegin_advanced_internal(cctx
,
4469 dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
,
4471 &cctxParams
, pledgedSrcSize
);
4474 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx
* cctx
, const void* dict
, size_t dictSize
, int compressionLevel
)
4476 ZSTD_CCtx_params cctxParams
;
4478 ZSTD_parameters
const params
= ZSTD_getParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_noAttachDict
);
4479 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
);
4481 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize
);
4482 return ZSTD_compressBegin_internal(cctx
, dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
, NULL
,
4483 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, ZSTDb_not_buffered
);
4486 size_t ZSTD_compressBegin(ZSTD_CCtx
* cctx
, int compressionLevel
)
4488 return ZSTD_compressBegin_usingDict(cctx
, NULL
, 0, compressionLevel
);
4492 /*! ZSTD_writeEpilogue() :
4494 * @return : nb of bytes written into dst (or an error code) */
4495 static size_t ZSTD_writeEpilogue(ZSTD_CCtx
* cctx
, void* dst
, size_t dstCapacity
)
4497 BYTE
* const ostart
= (BYTE
*)dst
;
4501 DEBUGLOG(4, "ZSTD_writeEpilogue");
4502 RETURN_ERROR_IF(cctx
->stage
== ZSTDcs_created
, stage_wrong
, "init missing");
4504 /* special case : empty frame */
4505 if (cctx
->stage
== ZSTDcs_init
) {
4506 fhSize
= ZSTD_writeFrameHeader(dst
, dstCapacity
, &cctx
->appliedParams
, 0, 0);
4507 FORWARD_IF_ERROR(fhSize
, "ZSTD_writeFrameHeader failed");
4508 dstCapacity
-= fhSize
;
4510 cctx
->stage
= ZSTDcs_ongoing
;
4513 if (cctx
->stage
!= ZSTDcs_ending
) {
4514 /* write one last empty block, make it the "last" block */
4515 U32
const cBlockHeader24
= 1 /* last block */ + (((U32
)bt_raw
)<<1) + 0;
4516 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for epilogue");
4517 MEM_writeLE32(op
, cBlockHeader24
);
4518 op
+= ZSTD_blockHeaderSize
;
4519 dstCapacity
-= ZSTD_blockHeaderSize
;
4522 if (cctx
->appliedParams
.fParams
.checksumFlag
) {
4523 U32
const checksum
= (U32
) xxh64_digest(&cctx
->xxhState
);
4524 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for checksum");
4525 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum
);
4526 MEM_writeLE32(op
, checksum
);
4530 cctx
->stage
= ZSTDcs_created
; /* return to "created but no init" status */
4534 void ZSTD_CCtx_trace(ZSTD_CCtx
* cctx
, size_t extraCSize
)
4540 size_t ZSTD_compressEnd (ZSTD_CCtx
* cctx
,
4541 void* dst
, size_t dstCapacity
,
4542 const void* src
, size_t srcSize
)
4545 size_t const cSize
= ZSTD_compressContinue_internal(cctx
,
4546 dst
, dstCapacity
, src
, srcSize
,
4547 1 /* frame mode */, 1 /* last chunk */);
4548 FORWARD_IF_ERROR(cSize
, "ZSTD_compressContinue_internal failed");
4549 endResult
= ZSTD_writeEpilogue(cctx
, (char*)dst
+ cSize
, dstCapacity
-cSize
);
4550 FORWARD_IF_ERROR(endResult
, "ZSTD_writeEpilogue failed");
4551 assert(!(cctx
->appliedParams
.fParams
.contentSizeFlag
&& cctx
->pledgedSrcSizePlusOne
== 0));
4552 if (cctx
->pledgedSrcSizePlusOne
!= 0) { /* control src size */
4553 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN
== (unsigned long long)-1);
4554 DEBUGLOG(4, "end of frame : controlling src size");
4556 cctx
->pledgedSrcSizePlusOne
!= cctx
->consumedSrcSize
+1,
4558 "error : pledgedSrcSize = %u, while realSrcSize = %u",
4559 (unsigned)cctx
->pledgedSrcSizePlusOne
-1,
4560 (unsigned)cctx
->consumedSrcSize
);
4562 ZSTD_CCtx_trace(cctx
, endResult
);
4563 return cSize
+ endResult
;
4566 size_t ZSTD_compress_advanced (ZSTD_CCtx
* cctx
,
4567 void* dst
, size_t dstCapacity
,
4568 const void* src
, size_t srcSize
,
4569 const void* dict
,size_t dictSize
,
4570 ZSTD_parameters params
)
4572 DEBUGLOG(4, "ZSTD_compress_advanced");
4573 FORWARD_IF_ERROR(ZSTD_checkCParams(params
.cParams
), "");
4574 ZSTD_CCtxParams_init_internal(&cctx
->simpleApiParams
, ¶ms
, ZSTD_NO_CLEVEL
);
4575 return ZSTD_compress_advanced_internal(cctx
,
4579 &cctx
->simpleApiParams
);
4583 size_t ZSTD_compress_advanced_internal(
4585 void* dst
, size_t dstCapacity
,
4586 const void* src
, size_t srcSize
,
4587 const void* dict
,size_t dictSize
,
4588 const ZSTD_CCtx_params
* params
)
4590 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize
);
4591 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx
,
4592 dict
, dictSize
, ZSTD_dct_auto
, ZSTD_dtlm_fast
, NULL
,
4593 params
, srcSize
, ZSTDb_not_buffered
) , "");
4594 return ZSTD_compressEnd(cctx
, dst
, dstCapacity
, src
, srcSize
);
4597 size_t ZSTD_compress_usingDict(ZSTD_CCtx
* cctx
,
4598 void* dst
, size_t dstCapacity
,
4599 const void* src
, size_t srcSize
,
4600 const void* dict
, size_t dictSize
,
4601 int compressionLevel
)
4604 ZSTD_parameters
const params
= ZSTD_getParams_internal(compressionLevel
, srcSize
, dict
? dictSize
: 0, ZSTD_cpm_noAttachDict
);
4605 assert(params
.fParams
.contentSizeFlag
== 1);
4606 ZSTD_CCtxParams_init_internal(&cctx
->simpleApiParams
, ¶ms
, (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
);
4608 DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize
);
4609 return ZSTD_compress_advanced_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, dict
, dictSize
, &cctx
->simpleApiParams
);
4612 size_t ZSTD_compressCCtx(ZSTD_CCtx
* cctx
,
4613 void* dst
, size_t dstCapacity
,
4614 const void* src
, size_t srcSize
,
4615 int compressionLevel
)
4617 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize
);
4618 assert(cctx
!= NULL
);
4619 return ZSTD_compress_usingDict(cctx
, dst
, dstCapacity
, src
, srcSize
, NULL
, 0, compressionLevel
);
4622 size_t ZSTD_compress(void* dst
, size_t dstCapacity
,
4623 const void* src
, size_t srcSize
,
4624 int compressionLevel
)
4627 ZSTD_CCtx
* cctx
= ZSTD_createCCtx();
4628 RETURN_ERROR_IF(!cctx
, memory_allocation
, "ZSTD_createCCtx failed");
4629 result
= ZSTD_compressCCtx(cctx
, dst
, dstCapacity
, src
, srcSize
, compressionLevel
);
4630 ZSTD_freeCCtx(cctx
);
4635 /* ===== Dictionary API ===== */
4637 /*! ZSTD_estimateCDictSize_advanced() :
4638 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
4639 size_t ZSTD_estimateCDictSize_advanced(
4640 size_t dictSize
, ZSTD_compressionParameters cParams
,
4641 ZSTD_dictLoadMethod_e dictLoadMethod
)
4643 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict
));
4644 return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
))
4645 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
)
4646 /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
4647 * in case we are using DDS with row-hash. */
4648 + ZSTD_sizeof_matchState(&cParams
, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto
, &cParams
),
4649 /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
4650 + (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4651 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void *))));
4654 size_t ZSTD_estimateCDictSize(size_t dictSize
, int compressionLevel
)
4656 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4657 return ZSTD_estimateCDictSize_advanced(dictSize
, cParams
, ZSTD_dlm_byCopy
);
4660 size_t ZSTD_sizeof_CDict(const ZSTD_CDict
* cdict
)
4662 if (cdict
==NULL
) return 0; /* support sizeof on NULL */
4663 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict
));
4664 /* cdict may be in the workspace */
4665 return (cdict
->workspace
.workspace
== cdict
? 0 : sizeof(*cdict
))
4666 + ZSTD_cwksp_sizeof(&cdict
->workspace
);
4669 static size_t ZSTD_initCDict_internal(
4671 const void* dictBuffer
, size_t dictSize
,
4672 ZSTD_dictLoadMethod_e dictLoadMethod
,
4673 ZSTD_dictContentType_e dictContentType
,
4674 ZSTD_CCtx_params params
)
4676 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType
);
4677 assert(!ZSTD_checkCParams(params
.cParams
));
4678 cdict
->matchState
.cParams
= params
.cParams
;
4679 cdict
->matchState
.dedicatedDictSearch
= params
.enableDedicatedDictSearch
;
4680 if ((dictLoadMethod
== ZSTD_dlm_byRef
) || (!dictBuffer
) || (!dictSize
)) {
4681 cdict
->dictContent
= dictBuffer
;
4683 void *internalBuffer
= ZSTD_cwksp_reserve_object(&cdict
->workspace
, ZSTD_cwksp_align(dictSize
, sizeof(void*)));
4684 RETURN_ERROR_IF(!internalBuffer
, memory_allocation
, "NULL pointer!");
4685 cdict
->dictContent
= internalBuffer
;
4686 ZSTD_memcpy(internalBuffer
, dictBuffer
, dictSize
);
4688 cdict
->dictContentSize
= dictSize
;
4689 cdict
->dictContentType
= dictContentType
;
4691 cdict
->entropyWorkspace
= (U32
*)ZSTD_cwksp_reserve_object(&cdict
->workspace
, HUF_WORKSPACE_SIZE
);
4694 /* Reset the state to no dictionary */
4695 ZSTD_reset_compressedBlockState(&cdict
->cBlockState
);
4696 FORWARD_IF_ERROR(ZSTD_reset_matchState(
4700 params
.useRowMatchFinder
,
4703 ZSTD_resetTarget_CDict
), "");
4704 /* (Maybe) load the dictionary
4705 * Skips loading the dictionary if it is < 8 bytes.
4707 { params
.compressionLevel
= ZSTD_CLEVEL_DEFAULT
;
4708 params
.fParams
.contentSizeFlag
= 1;
4709 { size_t const dictID
= ZSTD_compress_insertDictionary(
4710 &cdict
->cBlockState
, &cdict
->matchState
, NULL
, &cdict
->workspace
,
4711 ¶ms
, cdict
->dictContent
, cdict
->dictContentSize
,
4712 dictContentType
, ZSTD_dtlm_full
, cdict
->entropyWorkspace
);
4713 FORWARD_IF_ERROR(dictID
, "ZSTD_compress_insertDictionary failed");
4714 assert(dictID
<= (size_t)(U32
)-1);
4715 cdict
->dictID
= (U32
)dictID
;
4722 static ZSTD_CDict
* ZSTD_createCDict_advanced_internal(size_t dictSize
,
4723 ZSTD_dictLoadMethod_e dictLoadMethod
,
4724 ZSTD_compressionParameters cParams
,
4725 ZSTD_paramSwitch_e useRowMatchFinder
,
4726 U32 enableDedicatedDictSearch
,
4727 ZSTD_customMem customMem
)
4729 if ((!customMem
.customAlloc
) ^ (!customMem
.customFree
)) return NULL
;
4731 { size_t const workspaceSize
=
4732 ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
)) +
4733 ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
) +
4734 ZSTD_sizeof_matchState(&cParams
, useRowMatchFinder
, enableDedicatedDictSearch
, /* forCCtx */ 0) +
4735 (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4736 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void*))));
4737 void* const workspace
= ZSTD_customMalloc(workspaceSize
, customMem
);
4742 ZSTD_customFree(workspace
, customMem
);
4746 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_dynamic_alloc
);
4748 cdict
= (ZSTD_CDict
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CDict
));
4749 assert(cdict
!= NULL
);
4750 ZSTD_cwksp_move(&cdict
->workspace
, &ws
);
4751 cdict
->customMem
= customMem
;
4752 cdict
->compressionLevel
= ZSTD_NO_CLEVEL
; /* signals advanced API usage */
4753 cdict
->useRowMatchFinder
= useRowMatchFinder
;
4758 ZSTD_CDict
* ZSTD_createCDict_advanced(const void* dictBuffer
, size_t dictSize
,
4759 ZSTD_dictLoadMethod_e dictLoadMethod
,
4760 ZSTD_dictContentType_e dictContentType
,
4761 ZSTD_compressionParameters cParams
,
4762 ZSTD_customMem customMem
)
4764 ZSTD_CCtx_params cctxParams
;
4765 ZSTD_memset(&cctxParams
, 0, sizeof(cctxParams
));
4766 ZSTD_CCtxParams_init(&cctxParams
, 0);
4767 cctxParams
.cParams
= cParams
;
4768 cctxParams
.customMem
= customMem
;
4769 return ZSTD_createCDict_advanced2(
4770 dictBuffer
, dictSize
,
4771 dictLoadMethod
, dictContentType
,
4772 &cctxParams
, customMem
);
4775 ZSTD_CDict
* ZSTD_createCDict_advanced2(
4776 const void* dict
, size_t dictSize
,
4777 ZSTD_dictLoadMethod_e dictLoadMethod
,
4778 ZSTD_dictContentType_e dictContentType
,
4779 const ZSTD_CCtx_params
* originalCctxParams
,
4780 ZSTD_customMem customMem
)
4782 ZSTD_CCtx_params cctxParams
= *originalCctxParams
;
4783 ZSTD_compressionParameters cParams
;
4786 DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType
);
4787 if (!customMem
.customAlloc
^ !customMem
.customFree
) return NULL
;
4789 if (cctxParams
.enableDedicatedDictSearch
) {
4790 cParams
= ZSTD_dedicatedDictSearch_getCParams(
4791 cctxParams
.compressionLevel
, dictSize
);
4792 ZSTD_overrideCParams(&cParams
, &cctxParams
.cParams
);
4794 cParams
= ZSTD_getCParamsFromCCtxParams(
4795 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4798 if (!ZSTD_dedicatedDictSearch_isSupported(&cParams
)) {
4799 /* Fall back to non-DDSS params */
4800 cctxParams
.enableDedicatedDictSearch
= 0;
4801 cParams
= ZSTD_getCParamsFromCCtxParams(
4802 &cctxParams
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4805 DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams
.enableDedicatedDictSearch
);
4806 cctxParams
.cParams
= cParams
;
4807 cctxParams
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(cctxParams
.useRowMatchFinder
, &cParams
);
4809 cdict
= ZSTD_createCDict_advanced_internal(dictSize
,
4810 dictLoadMethod
, cctxParams
.cParams
,
4811 cctxParams
.useRowMatchFinder
, cctxParams
.enableDedicatedDictSearch
,
4816 if (ZSTD_isError( ZSTD_initCDict_internal(cdict
,
4818 dictLoadMethod
, dictContentType
,
4820 ZSTD_freeCDict(cdict
);
4827 ZSTD_CDict
* ZSTD_createCDict(const void* dict
, size_t dictSize
, int compressionLevel
)
4829 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4830 ZSTD_CDict
* const cdict
= ZSTD_createCDict_advanced(dict
, dictSize
,
4831 ZSTD_dlm_byCopy
, ZSTD_dct_auto
,
4832 cParams
, ZSTD_defaultCMem
);
4834 cdict
->compressionLevel
= (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
;
4838 ZSTD_CDict
* ZSTD_createCDict_byReference(const void* dict
, size_t dictSize
, int compressionLevel
)
4840 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, ZSTD_CONTENTSIZE_UNKNOWN
, dictSize
, ZSTD_cpm_createCDict
);
4841 ZSTD_CDict
* const cdict
= ZSTD_createCDict_advanced(dict
, dictSize
,
4842 ZSTD_dlm_byRef
, ZSTD_dct_auto
,
4843 cParams
, ZSTD_defaultCMem
);
4845 cdict
->compressionLevel
= (compressionLevel
== 0) ? ZSTD_CLEVEL_DEFAULT
: compressionLevel
;
4849 size_t ZSTD_freeCDict(ZSTD_CDict
* cdict
)
4851 if (cdict
==NULL
) return 0; /* support free on NULL */
4852 { ZSTD_customMem
const cMem
= cdict
->customMem
;
4853 int cdictInWorkspace
= ZSTD_cwksp_owns_buffer(&cdict
->workspace
, cdict
);
4854 ZSTD_cwksp_free(&cdict
->workspace
, cMem
);
4855 if (!cdictInWorkspace
) {
4856 ZSTD_customFree(cdict
, cMem
);
4862 /*! ZSTD_initStaticCDict_advanced() :
4863 * Generate a digested dictionary in provided memory area.
4864 * workspace: The memory area to emplace the dictionary into.
4865 * Provided pointer must 8-bytes aligned.
4866 * It must outlive dictionary usage.
4867 * workspaceSize: Use ZSTD_estimateCDictSize()
4868 * to determine how large workspace must be.
4869 * cParams : use ZSTD_getCParams() to transform a compression level
4870 * into its relevants cParams.
4871 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
4872 * Note : there is no corresponding "free" function.
4873 * Since workspace was allocated externally, it must be freed externally.
4875 const ZSTD_CDict
* ZSTD_initStaticCDict(
4876 void* workspace
, size_t workspaceSize
,
4877 const void* dict
, size_t dictSize
,
4878 ZSTD_dictLoadMethod_e dictLoadMethod
,
4879 ZSTD_dictContentType_e dictContentType
,
4880 ZSTD_compressionParameters cParams
)
4882 ZSTD_paramSwitch_e
const useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto
, &cParams
);
4883 /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
4884 size_t const matchStateSize
= ZSTD_sizeof_matchState(&cParams
, useRowMatchFinder
, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
4885 size_t const neededSize
= ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict
))
4886 + (dictLoadMethod
== ZSTD_dlm_byRef
? 0
4887 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize
, sizeof(void*))))
4888 + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE
)
4891 ZSTD_CCtx_params params
;
4893 if ((size_t)workspace
& 7) return NULL
; /* 8-aligned */
4897 ZSTD_cwksp_init(&ws
, workspace
, workspaceSize
, ZSTD_cwksp_static_alloc
);
4898 cdict
= (ZSTD_CDict
*)ZSTD_cwksp_reserve_object(&ws
, sizeof(ZSTD_CDict
));
4899 if (cdict
== NULL
) return NULL
;
4900 ZSTD_cwksp_move(&cdict
->workspace
, &ws
);
4903 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
4904 (unsigned)workspaceSize
, (unsigned)neededSize
, (unsigned)(workspaceSize
< neededSize
));
4905 if (workspaceSize
< neededSize
) return NULL
;
4907 ZSTD_CCtxParams_init(¶ms
, 0);
4908 params
.cParams
= cParams
;
4909 params
.useRowMatchFinder
= useRowMatchFinder
;
4910 cdict
->useRowMatchFinder
= useRowMatchFinder
;
4912 if (ZSTD_isError( ZSTD_initCDict_internal(cdict
,
4914 dictLoadMethod
, dictContentType
,
4921 ZSTD_compressionParameters
ZSTD_getCParamsFromCDict(const ZSTD_CDict
* cdict
)
4923 assert(cdict
!= NULL
);
4924 return cdict
->matchState
.cParams
;
4927 /*! ZSTD_getDictID_fromCDict() :
4928 * Provides the dictID of the dictionary loaded into `cdict`.
4929 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
4930 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
4931 unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict
* cdict
)
4933 if (cdict
==NULL
) return 0;
4934 return cdict
->dictID
;
4937 /* ZSTD_compressBegin_usingCDict_internal() :
4938 * Implementation of various ZSTD_compressBegin_usingCDict* functions.
4940 static size_t ZSTD_compressBegin_usingCDict_internal(
4941 ZSTD_CCtx
* const cctx
, const ZSTD_CDict
* const cdict
,
4942 ZSTD_frameParameters
const fParams
, unsigned long long const pledgedSrcSize
)
4944 ZSTD_CCtx_params cctxParams
;
4945 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
4946 RETURN_ERROR_IF(cdict
==NULL
, dictionary_wrong
, "NULL pointer!");
4947 /* Initialize the cctxParams from the cdict */
4949 ZSTD_parameters params
;
4950 params
.fParams
= fParams
;
4951 params
.cParams
= ( pledgedSrcSize
< ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
4952 || pledgedSrcSize
< cdict
->dictContentSize
* ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
4953 || pledgedSrcSize
== ZSTD_CONTENTSIZE_UNKNOWN
4954 || cdict
->compressionLevel
== 0 ) ?
4955 ZSTD_getCParamsFromCDict(cdict
)
4956 : ZSTD_getCParams(cdict
->compressionLevel
,
4958 cdict
->dictContentSize
);
4959 ZSTD_CCtxParams_init_internal(&cctxParams
, ¶ms
, cdict
->compressionLevel
);
4961 /* Increase window log to fit the entire dictionary and source if the
4962 * source size is known. Limit the increase to 19, which is the
4963 * window log for compression level 1 with the largest source size.
4965 if (pledgedSrcSize
!= ZSTD_CONTENTSIZE_UNKNOWN
) {
4966 U32
const limitedSrcSize
= (U32
)MIN(pledgedSrcSize
, 1U << 19);
4967 U32
const limitedSrcLog
= limitedSrcSize
> 1 ? ZSTD_highbit32(limitedSrcSize
- 1) + 1 : 1;
4968 cctxParams
.cParams
.windowLog
= MAX(cctxParams
.cParams
.windowLog
, limitedSrcLog
);
4970 return ZSTD_compressBegin_internal(cctx
,
4971 NULL
, 0, ZSTD_dct_auto
, ZSTD_dtlm_fast
,
4973 &cctxParams
, pledgedSrcSize
,
4974 ZSTDb_not_buffered
);
4978 /* ZSTD_compressBegin_usingCDict_advanced() :
4979 * This function is DEPRECATED.
4980 * cdict must be != NULL */
4981 size_t ZSTD_compressBegin_usingCDict_advanced(
4982 ZSTD_CCtx
* const cctx
, const ZSTD_CDict
* const cdict
,
4983 ZSTD_frameParameters
const fParams
, unsigned long long const pledgedSrcSize
)
4985 return ZSTD_compressBegin_usingCDict_internal(cctx
, cdict
, fParams
, pledgedSrcSize
);
4988 /* ZSTD_compressBegin_usingCDict() :
4989 * cdict must be != NULL */
4990 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx
* cctx
, const ZSTD_CDict
* cdict
)
4992 ZSTD_frameParameters
const fParams
= { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
4993 return ZSTD_compressBegin_usingCDict_internal(cctx
, cdict
, fParams
, ZSTD_CONTENTSIZE_UNKNOWN
);
4996 /*! ZSTD_compress_usingCDict_internal():
4997 * Implementation of various ZSTD_compress_usingCDict* functions.
4999 static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx
* cctx
,
5000 void* dst
, size_t dstCapacity
,
5001 const void* src
, size_t srcSize
,
5002 const ZSTD_CDict
* cdict
, ZSTD_frameParameters fParams
)
5004 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx
, cdict
, fParams
, srcSize
), ""); /* will check if cdict != NULL */
5005 return ZSTD_compressEnd(cctx
, dst
, dstCapacity
, src
, srcSize
);
5008 /*! ZSTD_compress_usingCDict_advanced():
5009 * This function is DEPRECATED.
5011 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx
* cctx
,
5012 void* dst
, size_t dstCapacity
,
5013 const void* src
, size_t srcSize
,
5014 const ZSTD_CDict
* cdict
, ZSTD_frameParameters fParams
)
5016 return ZSTD_compress_usingCDict_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, cdict
, fParams
);
5019 /*! ZSTD_compress_usingCDict() :
5020 * Compression using a digested Dictionary.
5021 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
5022 * Note that compression parameters are decided at CDict creation time
5023 * while frame parameters are hardcoded */
5024 size_t ZSTD_compress_usingCDict(ZSTD_CCtx
* cctx
,
5025 void* dst
, size_t dstCapacity
,
5026 const void* src
, size_t srcSize
,
5027 const ZSTD_CDict
* cdict
)
5029 ZSTD_frameParameters
const fParams
= { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
5030 return ZSTD_compress_usingCDict_internal(cctx
, dst
, dstCapacity
, src
, srcSize
, cdict
, fParams
);
5035 /* ******************************************************************
5037 ********************************************************************/
5039 ZSTD_CStream
* ZSTD_createCStream(void)
5041 DEBUGLOG(3, "ZSTD_createCStream");
5042 return ZSTD_createCStream_advanced(ZSTD_defaultCMem
);
5045 ZSTD_CStream
* ZSTD_initStaticCStream(void *workspace
, size_t workspaceSize
)
5047 return ZSTD_initStaticCCtx(workspace
, workspaceSize
);
5050 ZSTD_CStream
* ZSTD_createCStream_advanced(ZSTD_customMem customMem
)
5051 { /* CStream and CCtx are now same object */
5052 return ZSTD_createCCtx_advanced(customMem
);
5055 size_t ZSTD_freeCStream(ZSTD_CStream
* zcs
)
5057 return ZSTD_freeCCtx(zcs
); /* same object */
5062 /*====== Initialization ======*/
5064 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX
; }
5066 size_t ZSTD_CStreamOutSize(void)
5068 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX
) + ZSTD_blockHeaderSize
+ 4 /* 32-bits hash */ ;
5071 static ZSTD_cParamMode_e
ZSTD_getCParamMode(ZSTD_CDict
const* cdict
, ZSTD_CCtx_params
const* params
, U64 pledgedSrcSize
)
5073 if (cdict
!= NULL
&& ZSTD_shouldAttachDict(cdict
, params
, pledgedSrcSize
))
5074 return ZSTD_cpm_attachDict
;
5076 return ZSTD_cpm_noAttachDict
;
5079 /* ZSTD_resetCStream():
5080 * pledgedSrcSize == 0 means "unknown" */
5081 size_t ZSTD_resetCStream(ZSTD_CStream
* zcs
, unsigned long long pss
)
5083 /* temporary : 0 interpreted as "unknown" during transition period.
5084 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5085 * 0 will be interpreted as "empty" in the future.
5087 U64
const pledgedSrcSize
= (pss
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5088 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize
);
5089 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5090 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5094 /*! ZSTD_initCStream_internal() :
5095 * Note : for lib/compress only. Used by zstdmt_compress.c.
5096 * Assumption 1 : params are valid
5097 * Assumption 2 : either dict, or cdict, is defined, not both */
5098 size_t ZSTD_initCStream_internal(ZSTD_CStream
* zcs
,
5099 const void* dict
, size_t dictSize
, const ZSTD_CDict
* cdict
,
5100 const ZSTD_CCtx_params
* params
,
5101 unsigned long long pledgedSrcSize
)
5103 DEBUGLOG(4, "ZSTD_initCStream_internal");
5104 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5105 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5106 assert(!ZSTD_isError(ZSTD_checkCParams(params
->cParams
)));
5107 zcs
->requestedParams
= *params
;
5108 assert(!((dict
) && (cdict
))); /* either dict or cdict, not both */
5110 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5112 /* Dictionary is cleared if !cdict */
5113 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5118 /* ZSTD_initCStream_usingCDict_advanced() :
5119 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
5120 size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream
* zcs
,
5121 const ZSTD_CDict
* cdict
,
5122 ZSTD_frameParameters fParams
,
5123 unsigned long long pledgedSrcSize
)
5125 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
5126 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5127 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5128 zcs
->requestedParams
.fParams
= fParams
;
5129 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5133 /* note : cdict must outlive compression session */
5134 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream
* zcs
, const ZSTD_CDict
* cdict
)
5136 DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
5137 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5138 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, cdict
) , "");
5143 /* ZSTD_initCStream_advanced() :
5144 * pledgedSrcSize must be exact.
5145 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
5146 * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
5147 size_t ZSTD_initCStream_advanced(ZSTD_CStream
* zcs
,
5148 const void* dict
, size_t dictSize
,
5149 ZSTD_parameters params
, unsigned long long pss
)
5151 /* for compatibility with older programs relying on this behavior.
5152 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
5153 * This line will be removed in the future.
5155 U64
const pledgedSrcSize
= (pss
==0 && params
.fParams
.contentSizeFlag
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5156 DEBUGLOG(4, "ZSTD_initCStream_advanced");
5157 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5158 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5159 FORWARD_IF_ERROR( ZSTD_checkCParams(params
.cParams
) , "");
5160 ZSTD_CCtxParams_setZstdParams(&zcs
->requestedParams
, ¶ms
);
5161 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5165 size_t ZSTD_initCStream_usingDict(ZSTD_CStream
* zcs
, const void* dict
, size_t dictSize
, int compressionLevel
)
5167 DEBUGLOG(4, "ZSTD_initCStream_usingDict");
5168 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5169 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5170 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs
, dict
, dictSize
) , "");
5174 size_t ZSTD_initCStream_srcSize(ZSTD_CStream
* zcs
, int compressionLevel
, unsigned long long pss
)
5176 /* temporary : 0 interpreted as "unknown" during transition period.
5177 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
5178 * 0 will be interpreted as "empty" in the future.
5180 U64
const pledgedSrcSize
= (pss
==0) ? ZSTD_CONTENTSIZE_UNKNOWN
: pss
;
5181 DEBUGLOG(4, "ZSTD_initCStream_srcSize");
5182 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5183 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, NULL
) , "");
5184 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5185 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs
, pledgedSrcSize
) , "");
5189 size_t ZSTD_initCStream(ZSTD_CStream
* zcs
, int compressionLevel
)
5191 DEBUGLOG(4, "ZSTD_initCStream");
5192 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
) , "");
5193 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs
, NULL
) , "");
5194 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs
, ZSTD_c_compressionLevel
, compressionLevel
) , "");
5198 /*====== Compression ======*/
5200 static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx
* cctx
)
5202 size_t hintInSize
= cctx
->inBuffTarget
- cctx
->inBuffPos
;
5203 if (hintInSize
==0) hintInSize
= cctx
->blockSize
;
5207 /* ZSTD_compressStream_generic():
5208 * internal function for all *compressStream*() variants
5209 * non-static, because can be called from zstdmt_compress.c
5210 * @return : hint size for next input */
5211 static size_t ZSTD_compressStream_generic(ZSTD_CStream
* zcs
,
5212 ZSTD_outBuffer
* output
,
5213 ZSTD_inBuffer
* input
,
5214 ZSTD_EndDirective
const flushMode
)
5216 const char* const istart
= (const char*)input
->src
;
5217 const char* const iend
= input
->size
!= 0 ? istart
+ input
->size
: istart
;
5218 const char* ip
= input
->pos
!= 0 ? istart
+ input
->pos
: istart
;
5219 char* const ostart
= (char*)output
->dst
;
5220 char* const oend
= output
->size
!= 0 ? ostart
+ output
->size
: ostart
;
5221 char* op
= output
->pos
!= 0 ? ostart
+ output
->pos
: ostart
;
5222 U32 someMoreWork
= 1;
5224 /* check expectations */
5225 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode
);
5226 if (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5227 assert(zcs
->inBuff
!= NULL
);
5228 assert(zcs
->inBuffSize
> 0);
5230 if (zcs
->appliedParams
.outBufferMode
== ZSTD_bm_buffered
) {
5231 assert(zcs
->outBuff
!= NULL
);
5232 assert(zcs
->outBuffSize
> 0);
5234 assert(output
->pos
<= output
->size
);
5235 assert(input
->pos
<= input
->size
);
5236 assert((U32
)flushMode
<= (U32
)ZSTD_e_end
);
5238 while (someMoreWork
) {
5239 switch(zcs
->streamStage
)
5242 RETURN_ERROR(init_missing
, "call ZSTD_initCStream() first!");
5245 if ( (flushMode
== ZSTD_e_end
)
5246 && ( (size_t)(oend
-op
) >= ZSTD_compressBound(iend
-ip
) /* Enough output space */
5247 || zcs
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) /* OR we are allowed to return dstSizeTooSmall */
5248 && (zcs
->inBuffPos
== 0) ) {
5249 /* shortcut to compression pass directly into output buffer */
5250 size_t const cSize
= ZSTD_compressEnd(zcs
,
5251 op
, oend
-op
, ip
, iend
-ip
);
5252 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize
);
5253 FORWARD_IF_ERROR(cSize
, "ZSTD_compressEnd failed");
5256 zcs
->frameEnded
= 1;
5257 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5258 someMoreWork
= 0; break;
5260 /* complete loading into inBuffer in buffered mode */
5261 if (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5262 size_t const toLoad
= zcs
->inBuffTarget
- zcs
->inBuffPos
;
5263 size_t const loaded
= ZSTD_limitCopy(
5264 zcs
->inBuff
+ zcs
->inBuffPos
, toLoad
,
5266 zcs
->inBuffPos
+= loaded
;
5269 if ( (flushMode
== ZSTD_e_continue
)
5270 && (zcs
->inBuffPos
< zcs
->inBuffTarget
) ) {
5271 /* not enough input to fill full block : stop here */
5272 someMoreWork
= 0; break;
5274 if ( (flushMode
== ZSTD_e_flush
)
5275 && (zcs
->inBuffPos
== zcs
->inToCompress
) ) {
5277 someMoreWork
= 0; break;
5280 /* compress current block (note : this stage cannot be stopped in the middle) */
5281 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode
);
5282 { int const inputBuffered
= (zcs
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
);
5285 size_t oSize
= oend
-op
;
5286 size_t const iSize
= inputBuffered
5287 ? zcs
->inBuffPos
- zcs
->inToCompress
5288 : MIN((size_t)(iend
- ip
), zcs
->blockSize
);
5289 if (oSize
>= ZSTD_compressBound(iSize
) || zcs
->appliedParams
.outBufferMode
== ZSTD_bm_stable
)
5290 cDst
= op
; /* compress into output buffer, to skip flush stage */
5292 cDst
= zcs
->outBuff
, oSize
= zcs
->outBuffSize
;
5293 if (inputBuffered
) {
5294 unsigned const lastBlock
= (flushMode
== ZSTD_e_end
) && (ip
==iend
);
5296 ZSTD_compressEnd(zcs
, cDst
, oSize
,
5297 zcs
->inBuff
+ zcs
->inToCompress
, iSize
) :
5298 ZSTD_compressContinue(zcs
, cDst
, oSize
,
5299 zcs
->inBuff
+ zcs
->inToCompress
, iSize
);
5300 FORWARD_IF_ERROR(cSize
, "%s", lastBlock
? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5301 zcs
->frameEnded
= lastBlock
;
5302 /* prepare next block */
5303 zcs
->inBuffTarget
= zcs
->inBuffPos
+ zcs
->blockSize
;
5304 if (zcs
->inBuffTarget
> zcs
->inBuffSize
)
5305 zcs
->inBuffPos
= 0, zcs
->inBuffTarget
= zcs
->blockSize
;
5306 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
5307 (unsigned)zcs
->inBuffTarget
, (unsigned)zcs
->inBuffSize
);
5309 assert(zcs
->inBuffTarget
<= zcs
->inBuffSize
);
5310 zcs
->inToCompress
= zcs
->inBuffPos
;
5312 unsigned const lastBlock
= (ip
+ iSize
== iend
);
5313 assert(flushMode
== ZSTD_e_end
/* Already validated */);
5315 ZSTD_compressEnd(zcs
, cDst
, oSize
, ip
, iSize
) :
5316 ZSTD_compressContinue(zcs
, cDst
, oSize
, ip
, iSize
);
5317 /* Consume the input prior to error checking to mirror buffered mode. */
5320 FORWARD_IF_ERROR(cSize
, "%s", lastBlock
? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
5321 zcs
->frameEnded
= lastBlock
;
5325 if (cDst
== op
) { /* no need to flush */
5327 if (zcs
->frameEnded
) {
5328 DEBUGLOG(5, "Frame completed directly in outBuffer");
5330 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5334 zcs
->outBuffContentSize
= cSize
;
5335 zcs
->outBuffFlushedSize
= 0;
5336 zcs
->streamStage
= zcss_flush
; /* pass-through to flush stage */
5340 DEBUGLOG(5, "flush stage");
5341 assert(zcs
->appliedParams
.outBufferMode
== ZSTD_bm_buffered
);
5342 { size_t const toFlush
= zcs
->outBuffContentSize
- zcs
->outBuffFlushedSize
;
5343 size_t const flushed
= ZSTD_limitCopy(op
, (size_t)(oend
-op
),
5344 zcs
->outBuff
+ zcs
->outBuffFlushedSize
, toFlush
);
5345 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
5346 (unsigned)toFlush
, (unsigned)(oend
-op
), (unsigned)flushed
);
5349 zcs
->outBuffFlushedSize
+= flushed
;
5350 if (toFlush
!=flushed
) {
5351 /* flush not fully completed, presumably because dst is too small */
5356 zcs
->outBuffContentSize
= zcs
->outBuffFlushedSize
= 0;
5357 if (zcs
->frameEnded
) {
5358 DEBUGLOG(5, "Frame completed on flush");
5360 ZSTD_CCtx_reset(zcs
, ZSTD_reset_session_only
);
5363 zcs
->streamStage
= zcss_load
;
5367 default: /* impossible */
5372 input
->pos
= ip
- istart
;
5373 output
->pos
= op
- ostart
;
5374 if (zcs
->frameEnded
) return 0;
5375 return ZSTD_nextInputSizeHint(zcs
);
5378 static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx
* cctx
)
5380 return ZSTD_nextInputSizeHint(cctx
);
5384 size_t ZSTD_compressStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
, ZSTD_inBuffer
* input
)
5386 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs
, output
, input
, ZSTD_e_continue
) , "");
5387 return ZSTD_nextInputSizeHint_MTorST(zcs
);
5390 /* After a compression call set the expected input/output buffer.
5391 * This is validated at the start of the next compression call.
5393 static void ZSTD_setBufferExpectations(ZSTD_CCtx
* cctx
, ZSTD_outBuffer
const* output
, ZSTD_inBuffer
const* input
)
5395 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_stable
) {
5396 cctx
->expectedInBuffer
= *input
;
5398 if (cctx
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) {
5399 cctx
->expectedOutBufferSize
= output
->size
- output
->pos
;
5403 /* Validate that the input/output buffers match the expectations set by
5404 * ZSTD_setBufferExpectations.
5406 static size_t ZSTD_checkBufferStability(ZSTD_CCtx
const* cctx
,
5407 ZSTD_outBuffer
const* output
,
5408 ZSTD_inBuffer
const* input
,
5409 ZSTD_EndDirective endOp
)
5411 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_stable
) {
5412 ZSTD_inBuffer
const expect
= cctx
->expectedInBuffer
;
5413 if (expect
.src
!= input
->src
|| expect
.pos
!= input
->pos
|| expect
.size
!= input
->size
)
5414 RETURN_ERROR(srcBuffer_wrong
, "ZSTD_c_stableInBuffer enabled but input differs!");
5415 if (endOp
!= ZSTD_e_end
)
5416 RETURN_ERROR(srcBuffer_wrong
, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
5418 if (cctx
->appliedParams
.outBufferMode
== ZSTD_bm_stable
) {
5419 size_t const outBufferSize
= output
->size
- output
->pos
;
5420 if (cctx
->expectedOutBufferSize
!= outBufferSize
)
5421 RETURN_ERROR(dstBuffer_wrong
, "ZSTD_c_stableOutBuffer enabled but output size differs!");
5426 static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx
* cctx
,
5427 ZSTD_EndDirective endOp
,
5429 ZSTD_CCtx_params params
= cctx
->requestedParams
;
5430 ZSTD_prefixDict
const prefixDict
= cctx
->prefixDict
;
5431 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx
) , ""); /* Init the local dict if present. */
5432 ZSTD_memset(&cctx
->prefixDict
, 0, sizeof(cctx
->prefixDict
)); /* single usage */
5433 assert(prefixDict
.dict
==NULL
|| cctx
->cdict
==NULL
); /* only one can be set */
5434 if (cctx
->cdict
&& !cctx
->localDict
.cdict
) {
5435 /* Let the cdict's compression level take priority over the requested params.
5436 * But do not take the cdict's compression level if the "cdict" is actually a localDict
5437 * generated from ZSTD_initLocalDict().
5439 params
.compressionLevel
= cctx
->cdict
->compressionLevel
;
5441 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
5442 if (endOp
== ZSTD_e_end
) cctx
->pledgedSrcSizePlusOne
= inSize
+ 1; /* auto-fix pledgedSrcSize */
5444 size_t const dictSize
= prefixDict
.dict
5445 ? prefixDict
.dictSize
5446 : (cctx
->cdict
? cctx
->cdict
->dictContentSize
: 0);
5447 ZSTD_cParamMode_e
const mode
= ZSTD_getCParamMode(cctx
->cdict
, ¶ms
, cctx
->pledgedSrcSizePlusOne
- 1);
5448 params
.cParams
= ZSTD_getCParamsFromCCtxParams(
5449 ¶ms
, cctx
->pledgedSrcSizePlusOne
-1,
5453 params
.useBlockSplitter
= ZSTD_resolveBlockSplitterMode(params
.useBlockSplitter
, ¶ms
.cParams
);
5454 params
.ldmParams
.enableLdm
= ZSTD_resolveEnableLdm(params
.ldmParams
.enableLdm
, ¶ms
.cParams
);
5455 params
.useRowMatchFinder
= ZSTD_resolveRowMatchFinderMode(params
.useRowMatchFinder
, ¶ms
.cParams
);
5457 { U64
const pledgedSrcSize
= cctx
->pledgedSrcSizePlusOne
- 1;
5458 assert(!ZSTD_isError(ZSTD_checkCParams(params
.cParams
)));
5459 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx
,
5460 prefixDict
.dict
, prefixDict
.dictSize
, prefixDict
.dictContentType
, ZSTD_dtlm_fast
,
5462 ¶ms
, pledgedSrcSize
,
5463 ZSTDb_buffered
) , "");
5464 assert(cctx
->appliedParams
.nbWorkers
== 0);
5465 cctx
->inToCompress
= 0;
5466 cctx
->inBuffPos
= 0;
5467 if (cctx
->appliedParams
.inBufferMode
== ZSTD_bm_buffered
) {
5468 /* for small input: avoid automatic flush on reaching end of block, since
5469 * it would require to add a 3-bytes null block to end frame
5471 cctx
->inBuffTarget
= cctx
->blockSize
+ (cctx
->blockSize
== pledgedSrcSize
);
5473 cctx
->inBuffTarget
= 0;
5475 cctx
->outBuffContentSize
= cctx
->outBuffFlushedSize
= 0;
5476 cctx
->streamStage
= zcss_load
;
5477 cctx
->frameEnded
= 0;
5482 size_t ZSTD_compressStream2( ZSTD_CCtx
* cctx
,
5483 ZSTD_outBuffer
* output
,
5484 ZSTD_inBuffer
* input
,
5485 ZSTD_EndDirective endOp
)
5487 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp
);
5488 /* check conditions */
5489 RETURN_ERROR_IF(output
->pos
> output
->size
, dstSize_tooSmall
, "invalid output buffer");
5490 RETURN_ERROR_IF(input
->pos
> input
->size
, srcSize_wrong
, "invalid input buffer");
5491 RETURN_ERROR_IF((U32
)endOp
> (U32
)ZSTD_e_end
, parameter_outOfBound
, "invalid endDirective");
5492 assert(cctx
!= NULL
);
5494 /* transparent initialization stage */
5495 if (cctx
->streamStage
== zcss_init
) {
5496 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx
, endOp
, input
->size
), "CompressStream2 initialization failed");
5497 ZSTD_setBufferExpectations(cctx
, output
, input
); /* Set initial buffer expectations now that we've initialized */
5499 /* end of transparent initialization stage */
5501 FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx
, output
, input
, endOp
), "invalid buffers");
5502 /* compression stage */
5503 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx
, output
, input
, endOp
) , "");
5504 DEBUGLOG(5, "completed ZSTD_compressStream2");
5505 ZSTD_setBufferExpectations(cctx
, output
, input
);
5506 return cctx
->outBuffContentSize
- cctx
->outBuffFlushedSize
; /* remaining to flush */
5509 size_t ZSTD_compressStream2_simpleArgs (
5511 void* dst
, size_t dstCapacity
, size_t* dstPos
,
5512 const void* src
, size_t srcSize
, size_t* srcPos
,
5513 ZSTD_EndDirective endOp
)
5515 ZSTD_outBuffer output
= { dst
, dstCapacity
, *dstPos
};
5516 ZSTD_inBuffer input
= { src
, srcSize
, *srcPos
};
5517 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
5518 size_t const cErr
= ZSTD_compressStream2(cctx
, &output
, &input
, endOp
);
5519 *dstPos
= output
.pos
;
5520 *srcPos
= input
.pos
;
5524 size_t ZSTD_compress2(ZSTD_CCtx
* cctx
,
5525 void* dst
, size_t dstCapacity
,
5526 const void* src
, size_t srcSize
)
5528 ZSTD_bufferMode_e
const originalInBufferMode
= cctx
->requestedParams
.inBufferMode
;
5529 ZSTD_bufferMode_e
const originalOutBufferMode
= cctx
->requestedParams
.outBufferMode
;
5530 DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize
);
5531 ZSTD_CCtx_reset(cctx
, ZSTD_reset_session_only
);
5532 /* Enable stable input/output buffers. */
5533 cctx
->requestedParams
.inBufferMode
= ZSTD_bm_stable
;
5534 cctx
->requestedParams
.outBufferMode
= ZSTD_bm_stable
;
5537 size_t const result
= ZSTD_compressStream2_simpleArgs(cctx
,
5538 dst
, dstCapacity
, &oPos
,
5539 src
, srcSize
, &iPos
,
5541 /* Reset to the original values. */
5542 cctx
->requestedParams
.inBufferMode
= originalInBufferMode
;
5543 cctx
->requestedParams
.outBufferMode
= originalOutBufferMode
;
5544 FORWARD_IF_ERROR(result
, "ZSTD_compressStream2_simpleArgs failed");
5545 if (result
!= 0) { /* compression not completed, due to lack of output space */
5546 assert(oPos
== dstCapacity
);
5547 RETURN_ERROR(dstSize_tooSmall
, "");
5549 assert(iPos
== srcSize
); /* all input is expected consumed */
5555 U32 idx
; /* Index in array of ZSTD_Sequence */
5556 U32 posInSequence
; /* Position within sequence at idx */
5557 size_t posInSrc
; /* Number of bytes given by sequences provided so far */
5558 } ZSTD_sequencePosition
;
5560 /* ZSTD_validateSequence() :
5561 * @offCode : is presumed to follow format required by ZSTD_storeSeq()
5562 * @returns a ZSTD error code if sequence is not valid
5565 ZSTD_validateSequence(U32 offCode
, U32 matchLength
,
5566 size_t posInSrc
, U32 windowLog
, size_t dictSize
)
5568 U32
const windowSize
= 1 << windowLog
;
5569 /* posInSrc represents the amount of data the decoder would decode up to this point.
5570 * As long as the amount of data decoded is less than or equal to window size, offsets may be
5571 * larger than the total length of output decoded in order to reference the dict, even larger than
5572 * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
5574 size_t const offsetBound
= posInSrc
> windowSize
? (size_t)windowSize
: posInSrc
+ (size_t)dictSize
;
5575 RETURN_ERROR_IF(offCode
> STORE_OFFSET(offsetBound
), corruption_detected
, "Offset too large!");
5576 RETURN_ERROR_IF(matchLength
< MINMATCH
, corruption_detected
, "Matchlength too small");
5580 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
5581 static U32
ZSTD_finalizeOffCode(U32 rawOffset
, const U32 rep
[ZSTD_REP_NUM
], U32 ll0
)
5583 U32 offCode
= STORE_OFFSET(rawOffset
);
5585 if (!ll0
&& rawOffset
== rep
[0]) {
5586 offCode
= STORE_REPCODE_1
;
5587 } else if (rawOffset
== rep
[1]) {
5588 offCode
= STORE_REPCODE(2 - ll0
);
5589 } else if (rawOffset
== rep
[2]) {
5590 offCode
= STORE_REPCODE(3 - ll0
);
5591 } else if (ll0
&& rawOffset
== rep
[0] - 1) {
5592 offCode
= STORE_REPCODE_3
;
5597 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
5598 * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
5601 ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx
* cctx
,
5602 ZSTD_sequencePosition
* seqPos
,
5603 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5604 const void* src
, size_t blockSize
)
5606 U32 idx
= seqPos
->idx
;
5607 BYTE
const* ip
= (BYTE
const*)(src
);
5608 const BYTE
* const iend
= ip
+ blockSize
;
5609 repcodes_t updatedRepcodes
;
5613 dictSize
= (U32
)cctx
->cdict
->dictContentSize
;
5614 } else if (cctx
->prefixDict
.dict
) {
5615 dictSize
= (U32
)cctx
->prefixDict
.dictSize
;
5619 ZSTD_memcpy(updatedRepcodes
.rep
, cctx
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
5620 for (; (inSeqs
[idx
].matchLength
!= 0 || inSeqs
[idx
].offset
!= 0) && idx
< inSeqsSize
; ++idx
) {
5621 U32
const litLength
= inSeqs
[idx
].litLength
;
5622 U32
const ll0
= (litLength
== 0);
5623 U32
const matchLength
= inSeqs
[idx
].matchLength
;
5624 U32
const offCode
= ZSTD_finalizeOffCode(inSeqs
[idx
].offset
, updatedRepcodes
.rep
, ll0
);
5625 ZSTD_updateRep(updatedRepcodes
.rep
, offCode
, ll0
);
5627 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode
, matchLength
, litLength
);
5628 if (cctx
->appliedParams
.validateSequences
) {
5629 seqPos
->posInSrc
+= litLength
+ matchLength
;
5630 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode
, matchLength
, seqPos
->posInSrc
,
5631 cctx
->appliedParams
.cParams
.windowLog
, dictSize
),
5632 "Sequence validation failed");
5634 RETURN_ERROR_IF(idx
- seqPos
->idx
> cctx
->seqStore
.maxNbSeq
, memory_allocation
,
5635 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5636 ZSTD_storeSeq(&cctx
->seqStore
, litLength
, ip
, iend
, offCode
, matchLength
);
5637 ip
+= matchLength
+ litLength
;
5639 ZSTD_memcpy(cctx
->blockState
.nextCBlock
->rep
, updatedRepcodes
.rep
, sizeof(repcodes_t
));
5641 if (inSeqs
[idx
].litLength
) {
5642 DEBUGLOG(6, "Storing last literals of size: %u", inSeqs
[idx
].litLength
);
5643 ZSTD_storeLastLiterals(&cctx
->seqStore
, ip
, inSeqs
[idx
].litLength
);
5644 ip
+= inSeqs
[idx
].litLength
;
5645 seqPos
->posInSrc
+= inSeqs
[idx
].litLength
;
5647 RETURN_ERROR_IF(ip
!= iend
, corruption_detected
, "Blocksize doesn't agree with block delimiter!");
5648 seqPos
->idx
= idx
+1;
5652 /* Returns the number of bytes to move the current read position back by. Only non-zero
5653 * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
5656 * This function will attempt to scan through blockSize bytes represented by the sequences
5657 * in inSeqs, storing any (partial) sequences.
5659 * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
5660 * avoid splitting a match, or to avoid splitting a match such that it would produce a match
5661 * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
5664 ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx
* cctx
, ZSTD_sequencePosition
* seqPos
,
5665 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5666 const void* src
, size_t blockSize
)
5668 U32 idx
= seqPos
->idx
;
5669 U32 startPosInSequence
= seqPos
->posInSequence
;
5670 U32 endPosInSequence
= seqPos
->posInSequence
+ (U32
)blockSize
;
5672 BYTE
const* ip
= (BYTE
const*)(src
);
5673 BYTE
const* iend
= ip
+ blockSize
; /* May be adjusted if we decide to process fewer than blockSize bytes */
5674 repcodes_t updatedRepcodes
;
5675 U32 bytesAdjustment
= 0;
5676 U32 finalMatchSplit
= 0;
5679 dictSize
= cctx
->cdict
->dictContentSize
;
5680 } else if (cctx
->prefixDict
.dict
) {
5681 dictSize
= cctx
->prefixDict
.dictSize
;
5685 DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx
, startPosInSequence
, blockSize
);
5686 DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx
, inSeqs
[idx
].offset
, inSeqs
[idx
].matchLength
, inSeqs
[idx
].litLength
);
5687 ZSTD_memcpy(updatedRepcodes
.rep
, cctx
->blockState
.prevCBlock
->rep
, sizeof(repcodes_t
));
5688 while (endPosInSequence
&& idx
< inSeqsSize
&& !finalMatchSplit
) {
5689 const ZSTD_Sequence currSeq
= inSeqs
[idx
];
5690 U32 litLength
= currSeq
.litLength
;
5691 U32 matchLength
= currSeq
.matchLength
;
5692 U32
const rawOffset
= currSeq
.offset
;
5695 /* Modify the sequence depending on where endPosInSequence lies */
5696 if (endPosInSequence
>= currSeq
.litLength
+ currSeq
.matchLength
) {
5697 if (startPosInSequence
>= litLength
) {
5698 startPosInSequence
-= litLength
;
5700 matchLength
-= startPosInSequence
;
5702 litLength
-= startPosInSequence
;
5704 /* Move to the next sequence */
5705 endPosInSequence
-= currSeq
.litLength
+ currSeq
.matchLength
;
5706 startPosInSequence
= 0;
5709 /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
5710 does not reach the end of the match. So, we have to split the sequence */
5711 DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
5712 currSeq
.litLength
+ currSeq
.matchLength
- endPosInSequence
, idx
, endPosInSequence
);
5713 if (endPosInSequence
> litLength
) {
5714 U32 firstHalfMatchLength
;
5715 litLength
= startPosInSequence
>= litLength
? 0 : litLength
- startPosInSequence
;
5716 firstHalfMatchLength
= endPosInSequence
- startPosInSequence
- litLength
;
5717 if (matchLength
> blockSize
&& firstHalfMatchLength
>= cctx
->appliedParams
.cParams
.minMatch
) {
5718 /* Only ever split the match if it is larger than the block size */
5719 U32 secondHalfMatchLength
= currSeq
.matchLength
+ currSeq
.litLength
- endPosInSequence
;
5720 if (secondHalfMatchLength
< cctx
->appliedParams
.cParams
.minMatch
) {
5721 /* Move the endPosInSequence backward so that it creates match of minMatch length */
5722 endPosInSequence
-= cctx
->appliedParams
.cParams
.minMatch
- secondHalfMatchLength
;
5723 bytesAdjustment
= cctx
->appliedParams
.cParams
.minMatch
- secondHalfMatchLength
;
5724 firstHalfMatchLength
-= bytesAdjustment
;
5726 matchLength
= firstHalfMatchLength
;
5727 /* Flag that we split the last match - after storing the sequence, exit the loop,
5728 but keep the value of endPosInSequence */
5729 finalMatchSplit
= 1;
5731 /* Move the position in sequence backwards so that we don't split match, and break to store
5732 * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
5733 * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
5734 * would cause the first half of the match to be too small
5736 bytesAdjustment
= endPosInSequence
- currSeq
.litLength
;
5737 endPosInSequence
= currSeq
.litLength
;
5741 /* This sequence ends inside the literals, break to store the last literals */
5745 /* Check if this offset can be represented with a repcode */
5746 { U32
const ll0
= (litLength
== 0);
5747 offCode
= ZSTD_finalizeOffCode(rawOffset
, updatedRepcodes
.rep
, ll0
);
5748 ZSTD_updateRep(updatedRepcodes
.rep
, offCode
, ll0
);
5751 if (cctx
->appliedParams
.validateSequences
) {
5752 seqPos
->posInSrc
+= litLength
+ matchLength
;
5753 FORWARD_IF_ERROR(ZSTD_validateSequence(offCode
, matchLength
, seqPos
->posInSrc
,
5754 cctx
->appliedParams
.cParams
.windowLog
, dictSize
),
5755 "Sequence validation failed");
5757 DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode
, matchLength
, litLength
);
5758 RETURN_ERROR_IF(idx
- seqPos
->idx
> cctx
->seqStore
.maxNbSeq
, memory_allocation
,
5759 "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
5760 ZSTD_storeSeq(&cctx
->seqStore
, litLength
, ip
, iend
, offCode
, matchLength
);
5761 ip
+= matchLength
+ litLength
;
5763 DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx
, inSeqs
[idx
].offset
, inSeqs
[idx
].matchLength
, inSeqs
[idx
].litLength
);
5764 assert(idx
== inSeqsSize
|| endPosInSequence
<= inSeqs
[idx
].litLength
+ inSeqs
[idx
].matchLength
);
5766 seqPos
->posInSequence
= endPosInSequence
;
5767 ZSTD_memcpy(cctx
->blockState
.nextCBlock
->rep
, updatedRepcodes
.rep
, sizeof(repcodes_t
));
5769 iend
-= bytesAdjustment
;
5771 /* Store any last literals */
5772 U32 lastLLSize
= (U32
)(iend
- ip
);
5774 DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize
);
5775 ZSTD_storeLastLiterals(&cctx
->seqStore
, ip
, lastLLSize
);
5776 seqPos
->posInSrc
+= lastLLSize
;
5779 return bytesAdjustment
;
5782 typedef size_t (*ZSTD_sequenceCopier
) (ZSTD_CCtx
* cctx
, ZSTD_sequencePosition
* seqPos
,
5783 const ZSTD_Sequence
* const inSeqs
, size_t inSeqsSize
,
5784 const void* src
, size_t blockSize
);
5785 static ZSTD_sequenceCopier
ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode
)
5787 ZSTD_sequenceCopier sequenceCopier
= NULL
;
5788 assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters
, mode
));
5789 if (mode
== ZSTD_sf_explicitBlockDelimiters
) {
5790 return ZSTD_copySequencesToSeqStoreExplicitBlockDelim
;
5791 } else if (mode
== ZSTD_sf_noBlockDelimiters
) {
5792 return ZSTD_copySequencesToSeqStoreNoBlockDelim
;
5794 assert(sequenceCopier
!= NULL
);
5795 return sequenceCopier
;
5798 /* Compress, block-by-block, all of the sequences given.
5800 * Returns the cumulative size of all compressed blocks (including their headers),
5801 * otherwise a ZSTD error.
5804 ZSTD_compressSequences_internal(ZSTD_CCtx
* cctx
,
5805 void* dst
, size_t dstCapacity
,
5806 const ZSTD_Sequence
* inSeqs
, size_t inSeqsSize
,
5807 const void* src
, size_t srcSize
)
5812 size_t compressedSeqsSize
;
5813 size_t remaining
= srcSize
;
5814 ZSTD_sequencePosition seqPos
= {0, 0, 0};
5816 BYTE
const* ip
= (BYTE
const*)src
;
5817 BYTE
* op
= (BYTE
*)dst
;
5818 ZSTD_sequenceCopier
const sequenceCopier
= ZSTD_selectSequenceCopier(cctx
->appliedParams
.blockDelimiters
);
5820 DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize
, inSeqsSize
);
5821 /* Special case: empty frame */
5822 if (remaining
== 0) {
5823 U32
const cBlockHeader24
= 1 /* last block */ + (((U32
)bt_raw
)<<1);
5824 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "No room for empty frame block header");
5825 MEM_writeLE32(op
, cBlockHeader24
);
5826 op
+= ZSTD_blockHeaderSize
;
5827 dstCapacity
-= ZSTD_blockHeaderSize
;
5828 cSize
+= ZSTD_blockHeaderSize
;
5833 size_t additionalByteAdjustment
;
5834 lastBlock
= remaining
<= cctx
->blockSize
;
5835 blockSize
= lastBlock
? (U32
)remaining
: (U32
)cctx
->blockSize
;
5836 ZSTD_resetSeqStore(&cctx
->seqStore
);
5837 DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize
);
5839 additionalByteAdjustment
= sequenceCopier(cctx
, &seqPos
, inSeqs
, inSeqsSize
, ip
, blockSize
);
5840 FORWARD_IF_ERROR(additionalByteAdjustment
, "Bad sequence copy");
5841 blockSize
-= additionalByteAdjustment
;
5843 /* If blocks are too small, emit as a nocompress block */
5844 if (blockSize
< MIN_CBLOCK_SIZE
+ZSTD_blockHeaderSize
+1) {
5845 cBlockSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
5846 FORWARD_IF_ERROR(cBlockSize
, "Nocompress block failed");
5847 DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize
);
5848 cSize
+= cBlockSize
;
5851 remaining
-= blockSize
;
5852 dstCapacity
-= cBlockSize
;
5856 compressedSeqsSize
= ZSTD_entropyCompressSeqStore(&cctx
->seqStore
,
5857 &cctx
->blockState
.prevCBlock
->entropy
, &cctx
->blockState
.nextCBlock
->entropy
,
5858 &cctx
->appliedParams
,
5859 op
+ ZSTD_blockHeaderSize
/* Leave space for block header */, dstCapacity
- ZSTD_blockHeaderSize
,
5861 cctx
->entropyWorkspace
, ENTROPY_WORKSPACE_SIZE
/* statically allocated in resetCCtx */,
5863 FORWARD_IF_ERROR(compressedSeqsSize
, "Compressing sequences of block failed");
5864 DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize
);
5866 if (!cctx
->isFirstBlock
&&
5867 ZSTD_maybeRLE(&cctx
->seqStore
) &&
5868 ZSTD_isRLE((BYTE
const*)src
, srcSize
)) {
5869 /* We don't want to emit our first block as a RLE even if it qualifies because
5870 * doing so will cause the decoder (cli only) to throw a "should consume all input error."
5871 * This is only an issue for zstd <= v1.4.3
5873 compressedSeqsSize
= 1;
5876 if (compressedSeqsSize
== 0) {
5877 /* ZSTD_noCompressBlock writes the block header as well */
5878 cBlockSize
= ZSTD_noCompressBlock(op
, dstCapacity
, ip
, blockSize
, lastBlock
);
5879 FORWARD_IF_ERROR(cBlockSize
, "Nocompress block failed");
5880 DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize
);
5881 } else if (compressedSeqsSize
== 1) {
5882 cBlockSize
= ZSTD_rleCompressBlock(op
, dstCapacity
, *ip
, blockSize
, lastBlock
);
5883 FORWARD_IF_ERROR(cBlockSize
, "RLE compress block failed");
5884 DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize
);
5887 /* Error checking and repcodes update */
5888 ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx
->blockState
);
5889 if (cctx
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
== FSE_repeat_valid
)
5890 cctx
->blockState
.prevCBlock
->entropy
.fse
.offcode_repeatMode
= FSE_repeat_check
;
5892 /* Write block header into beginning of block*/
5893 cBlockHeader
= lastBlock
+ (((U32
)bt_compressed
)<<1) + (U32
)(compressedSeqsSize
<< 3);
5894 MEM_writeLE24(op
, cBlockHeader
);
5895 cBlockSize
= ZSTD_blockHeaderSize
+ compressedSeqsSize
;
5896 DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize
);
5899 cSize
+= cBlockSize
;
5900 DEBUGLOG(4, "cSize running total: %zu", cSize
);
5907 remaining
-= blockSize
;
5908 dstCapacity
-= cBlockSize
;
5909 cctx
->isFirstBlock
= 0;
5916 size_t ZSTD_compressSequences(ZSTD_CCtx
* const cctx
, void* dst
, size_t dstCapacity
,
5917 const ZSTD_Sequence
* inSeqs
, size_t inSeqsSize
,
5918 const void* src
, size_t srcSize
)
5920 BYTE
* op
= (BYTE
*)dst
;
5922 size_t compressedBlocksSize
= 0;
5923 size_t frameHeaderSize
= 0;
5925 /* Transparent initialization stage, same as compressStream2() */
5926 DEBUGLOG(3, "ZSTD_compressSequences()");
5927 assert(cctx
!= NULL
);
5928 FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx
, ZSTD_e_end
, srcSize
), "CCtx initialization failed");
5929 /* Begin writing output, starting with frame header */
5930 frameHeaderSize
= ZSTD_writeFrameHeader(op
, dstCapacity
, &cctx
->appliedParams
, srcSize
, cctx
->dictID
);
5931 op
+= frameHeaderSize
;
5932 dstCapacity
-= frameHeaderSize
;
5933 cSize
+= frameHeaderSize
;
5934 if (cctx
->appliedParams
.fParams
.checksumFlag
&& srcSize
) {
5935 xxh64_update(&cctx
->xxhState
, src
, srcSize
);
5937 /* cSize includes block header size and compressed sequences size */
5938 compressedBlocksSize
= ZSTD_compressSequences_internal(cctx
,
5942 FORWARD_IF_ERROR(compressedBlocksSize
, "Compressing blocks failed!");
5943 cSize
+= compressedBlocksSize
;
5944 dstCapacity
-= compressedBlocksSize
;
5946 if (cctx
->appliedParams
.fParams
.checksumFlag
) {
5947 U32
const checksum
= (U32
) xxh64_digest(&cctx
->xxhState
);
5948 RETURN_ERROR_IF(dstCapacity
<4, dstSize_tooSmall
, "no room for checksum");
5949 DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum
);
5950 MEM_writeLE32((char*)dst
+ cSize
, checksum
);
5954 DEBUGLOG(3, "Final compressed size: %zu", cSize
);
5958 /*====== Finalize ======*/
5960 /*! ZSTD_flushStream() :
5961 * @return : amount of data remaining to flush */
5962 size_t ZSTD_flushStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
)
5964 ZSTD_inBuffer input
= { NULL
, 0, 0 };
5965 return ZSTD_compressStream2(zcs
, output
, &input
, ZSTD_e_flush
);
5969 size_t ZSTD_endStream(ZSTD_CStream
* zcs
, ZSTD_outBuffer
* output
)
5971 ZSTD_inBuffer input
= { NULL
, 0, 0 };
5972 size_t const remainingToFlush
= ZSTD_compressStream2(zcs
, output
, &input
, ZSTD_e_end
);
5973 FORWARD_IF_ERROR( remainingToFlush
, "ZSTD_compressStream2 failed");
5974 if (zcs
->appliedParams
.nbWorkers
> 0) return remainingToFlush
; /* minimal estimation */
5975 /* single thread mode : attempt to calculate remaining to flush more precisely */
5976 { size_t const lastBlockSize
= zcs
->frameEnded
? 0 : ZSTD_BLOCKHEADERSIZE
;
5977 size_t const checksumSize
= (size_t)(zcs
->frameEnded
? 0 : zcs
->appliedParams
.fParams
.checksumFlag
* 4);
5978 size_t const toFlush
= remainingToFlush
+ lastBlockSize
+ checksumSize
;
5979 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush
);
5985 /*-===== Pre-defined compression levels =====-*/
5986 #include "clevels.h"
5988 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL
; }
5989 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX
; }
5990 int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT
; }
5992 static ZSTD_compressionParameters
ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel
, size_t const dictSize
)
5994 ZSTD_compressionParameters cParams
= ZSTD_getCParams_internal(compressionLevel
, 0, dictSize
, ZSTD_cpm_createCDict
);
5995 switch (cParams
.strategy
) {
6002 cParams
.hashLog
+= ZSTD_LAZY_DDSS_BUCKET_LOG
;
6013 static int ZSTD_dedicatedDictSearch_isSupported(
6014 ZSTD_compressionParameters
const* cParams
)
6016 return (cParams
->strategy
>= ZSTD_greedy
)
6017 && (cParams
->strategy
<= ZSTD_lazy2
)
6018 && (cParams
->hashLog
> cParams
->chainLog
)
6019 && (cParams
->chainLog
<= 24);
6023 * Reverses the adjustment applied to cparams when enabling dedicated dict
6024 * search. This is used to recover the params set to be used in the working
6025 * context. (Otherwise, those tables would also grow.)
6027 static void ZSTD_dedicatedDictSearch_revertCParams(
6028 ZSTD_compressionParameters
* cParams
) {
6029 switch (cParams
->strategy
) {
6036 cParams
->hashLog
-= ZSTD_LAZY_DDSS_BUCKET_LOG
;
6037 if (cParams
->hashLog
< ZSTD_HASHLOG_MIN
) {
6038 cParams
->hashLog
= ZSTD_HASHLOG_MIN
;
6049 static U64
ZSTD_getCParamRowSize(U64 srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
6052 case ZSTD_cpm_unknown
:
6053 case ZSTD_cpm_noAttachDict
:
6054 case ZSTD_cpm_createCDict
:
6056 case ZSTD_cpm_attachDict
:
6063 { int const unknown
= srcSizeHint
== ZSTD_CONTENTSIZE_UNKNOWN
;
6064 size_t const addedSize
= unknown
&& dictSize
> 0 ? 500 : 0;
6065 return unknown
&& dictSize
== 0 ? ZSTD_CONTENTSIZE_UNKNOWN
: srcSizeHint
+dictSize
+addedSize
;
6069 /*! ZSTD_getCParams_internal() :
6070 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6071 * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
6072 * Use dictSize == 0 for unknown or unused.
6073 * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
6074 static ZSTD_compressionParameters
ZSTD_getCParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
)
6076 U64
const rSize
= ZSTD_getCParamRowSize(srcSizeHint
, dictSize
, mode
);
6077 U32
const tableID
= (rSize
<= 256 KB
) + (rSize
<= 128 KB
) + (rSize
<= 16 KB
);
6079 DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel
);
6082 if (compressionLevel
== 0) row
= ZSTD_CLEVEL_DEFAULT
; /* 0 == default */
6083 else if (compressionLevel
< 0) row
= 0; /* entry 0 is baseline for fast mode */
6084 else if (compressionLevel
> ZSTD_MAX_CLEVEL
) row
= ZSTD_MAX_CLEVEL
;
6085 else row
= compressionLevel
;
6087 { ZSTD_compressionParameters cp
= ZSTD_defaultCParameters
[tableID
][row
];
6088 DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID
, row
, (U32
)cp
.strategy
);
6089 /* acceleration factor */
6090 if (compressionLevel
< 0) {
6091 int const clampedCompressionLevel
= MAX(ZSTD_minCLevel(), compressionLevel
);
6092 cp
.targetLength
= (unsigned)(-clampedCompressionLevel
);
6094 /* refine parameters based on srcSize & dictSize */
6095 return ZSTD_adjustCParams_internal(cp
, srcSizeHint
, dictSize
, mode
);
6099 /*! ZSTD_getCParams() :
6100 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
6101 * Size values are optional, provide 0 if not known or unused */
6102 ZSTD_compressionParameters
ZSTD_getCParams(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
)
6104 if (srcSizeHint
== 0) srcSizeHint
= ZSTD_CONTENTSIZE_UNKNOWN
;
6105 return ZSTD_getCParams_internal(compressionLevel
, srcSizeHint
, dictSize
, ZSTD_cpm_unknown
);
6108 /*! ZSTD_getParams() :
6109 * same idea as ZSTD_getCParams()
6110 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6111 * Fields of `ZSTD_frameParameters` are set to default values */
6112 static ZSTD_parameters
ZSTD_getParams_internal(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
, ZSTD_cParamMode_e mode
) {
6113 ZSTD_parameters params
;
6114 ZSTD_compressionParameters
const cParams
= ZSTD_getCParams_internal(compressionLevel
, srcSizeHint
, dictSize
, mode
);
6115 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel
);
6116 ZSTD_memset(¶ms
, 0, sizeof(params
));
6117 params
.cParams
= cParams
;
6118 params
.fParams
.contentSizeFlag
= 1;
6122 /*! ZSTD_getParams() :
6123 * same idea as ZSTD_getCParams()
6124 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
6125 * Fields of `ZSTD_frameParameters` are set to default values */
6126 ZSTD_parameters
ZSTD_getParams(int compressionLevel
, unsigned long long srcSizeHint
, size_t dictSize
) {
6127 if (srcSizeHint
== 0) srcSizeHint
= ZSTD_CONTENTSIZE_UNKNOWN
;
6128 return ZSTD_getParams_internal(compressionLevel
, srcSizeHint
, dictSize
, ZSTD_cpm_unknown
);