1 /* ******************************************************************
2 * Huffman encoder, part of New Generation Entropy library
3 * Copyright (c) Yann Collet, Facebook, Inc.
5 * You can contact the author at :
6 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7 * - Public forum : https://groups.google.com/forum/#!forum/lz4c
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13 ****************************************************************** */
15 /* **************************************************************
17 ****************************************************************/
20 /* **************************************************************
22 ****************************************************************/
23 #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
24 #include "../common/compiler.h"
25 #include "../common/bitstream.h"
27 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
28 #include "../common/fse.h" /* header compression */
29 #define HUF_STATIC_LINKING_ONLY
30 #include "../common/huf.h"
31 #include "../common/error_private.h"
34 /* **************************************************************
36 ****************************************************************/
37 #define HUF_isError ERR_isError
38 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
41 /* **************************************************************
43 ****************************************************************/
44 unsigned HUF_optimalTableLog(unsigned maxTableLog
, size_t srcSize
, unsigned maxSymbolValue
)
46 return FSE_optimalTableLog_internal(maxTableLog
, srcSize
, maxSymbolValue
, 1);
50 /* *******************************************************
51 * HUF : Huffman block compression
52 *********************************************************/
53 #define HUF_WORKSPACE_MAX_ALIGNMENT 8
55 static void* HUF_alignUpWorkspace(void* workspace
, size_t* workspaceSizePtr
, size_t align
)
57 size_t const mask
= align
- 1;
58 size_t const rem
= (size_t)workspace
& mask
;
59 size_t const add
= (align
- rem
) & mask
;
60 BYTE
* const aligned
= (BYTE
*)workspace
+ add
;
61 assert((align
& (align
- 1)) == 0); /* pow 2 */
62 assert(align
<= HUF_WORKSPACE_MAX_ALIGNMENT
);
63 if (*workspaceSizePtr
>= add
) {
65 assert(((size_t)aligned
& mask
) == 0);
66 *workspaceSizePtr
-= add
;
69 *workspaceSizePtr
= 0;
75 /* HUF_compressWeights() :
76 * Same as FSE_compress(), but dedicated to huff0's weights compression.
77 * The use case needs much less stack memory.
78 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
80 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
83 FSE_CTable CTable
[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER
, HUF_TABLELOG_MAX
)];
84 U32 scratchBuffer
[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX
, MAX_FSE_TABLELOG_FOR_HUFF_HEADER
)];
85 unsigned count
[HUF_TABLELOG_MAX
+1];
86 S16 norm
[HUF_TABLELOG_MAX
+1];
87 } HUF_CompressWeightsWksp
;
89 static size_t HUF_compressWeights(void* dst
, size_t dstSize
, const void* weightTable
, size_t wtSize
, void* workspace
, size_t workspaceSize
)
91 BYTE
* const ostart
= (BYTE
*) dst
;
93 BYTE
* const oend
= ostart
+ dstSize
;
95 unsigned maxSymbolValue
= HUF_TABLELOG_MAX
;
96 U32 tableLog
= MAX_FSE_TABLELOG_FOR_HUFF_HEADER
;
97 HUF_CompressWeightsWksp
* wksp
= (HUF_CompressWeightsWksp
*)HUF_alignUpWorkspace(workspace
, &workspaceSize
, ZSTD_ALIGNOF(U32
));
99 if (workspaceSize
< sizeof(HUF_CompressWeightsWksp
)) return ERROR(GENERIC
);
101 /* init conditions */
102 if (wtSize
<= 1) return 0; /* Not compressible */
104 /* Scan input and build symbol stats */
105 { unsigned const maxCount
= HIST_count_simple(wksp
->count
, &maxSymbolValue
, weightTable
, wtSize
); /* never fails */
106 if (maxCount
== wtSize
) return 1; /* only a single symbol in src : rle */
107 if (maxCount
== 1) return 0; /* each symbol present maximum once => not compressible */
110 tableLog
= FSE_optimalTableLog(tableLog
, wtSize
, maxSymbolValue
);
111 CHECK_F( FSE_normalizeCount(wksp
->norm
, tableLog
, wksp
->count
, wtSize
, maxSymbolValue
, /* useLowProbCount */ 0) );
113 /* Write table description header */
114 { CHECK_V_F(hSize
, FSE_writeNCount(op
, (size_t)(oend
-op
), wksp
->norm
, maxSymbolValue
, tableLog
) );
119 CHECK_F( FSE_buildCTable_wksp(wksp
->CTable
, wksp
->norm
, maxSymbolValue
, tableLog
, wksp
->scratchBuffer
, sizeof(wksp
->scratchBuffer
)) );
120 { CHECK_V_F(cSize
, FSE_compress_usingCTable(op
, (size_t)(oend
- op
), weightTable
, wtSize
, wksp
->CTable
) );
121 if (cSize
== 0) return 0; /* not enough space for compressed data */
125 return (size_t)(op
-ostart
);
128 static size_t HUF_getNbBits(HUF_CElt elt
)
133 static size_t HUF_getNbBitsFast(HUF_CElt elt
)
138 static size_t HUF_getValue(HUF_CElt elt
)
143 static size_t HUF_getValueFast(HUF_CElt elt
)
148 static void HUF_setNbBits(HUF_CElt
* elt
, size_t nbBits
)
150 assert(nbBits
<= HUF_TABLELOG_ABSOLUTEMAX
);
154 static void HUF_setValue(HUF_CElt
* elt
, size_t value
)
156 size_t const nbBits
= HUF_getNbBits(*elt
);
158 assert((value
>> nbBits
) == 0);
159 *elt
|= value
<< (sizeof(HUF_CElt
) * 8 - nbBits
);
164 HUF_CompressWeightsWksp wksp
;
165 BYTE bitsToWeight
[HUF_TABLELOG_MAX
+ 1]; /* precomputed conversion table */
166 BYTE huffWeight
[HUF_SYMBOLVALUE_MAX
];
167 } HUF_WriteCTableWksp
;
169 size_t HUF_writeCTable_wksp(void* dst
, size_t maxDstSize
,
170 const HUF_CElt
* CTable
, unsigned maxSymbolValue
, unsigned huffLog
,
171 void* workspace
, size_t workspaceSize
)
173 HUF_CElt
const* const ct
= CTable
+ 1;
174 BYTE
* op
= (BYTE
*)dst
;
176 HUF_WriteCTableWksp
* wksp
= (HUF_WriteCTableWksp
*)HUF_alignUpWorkspace(workspace
, &workspaceSize
, ZSTD_ALIGNOF(U32
));
178 /* check conditions */
179 if (workspaceSize
< sizeof(HUF_WriteCTableWksp
)) return ERROR(GENERIC
);
180 if (maxSymbolValue
> HUF_SYMBOLVALUE_MAX
) return ERROR(maxSymbolValue_tooLarge
);
182 /* convert to weight */
183 wksp
->bitsToWeight
[0] = 0;
184 for (n
=1; n
<huffLog
+1; n
++)
185 wksp
->bitsToWeight
[n
] = (BYTE
)(huffLog
+ 1 - n
);
186 for (n
=0; n
<maxSymbolValue
; n
++)
187 wksp
->huffWeight
[n
] = wksp
->bitsToWeight
[HUF_getNbBits(ct
[n
])];
189 /* attempt weights compression by FSE */
190 if (maxDstSize
< 1) return ERROR(dstSize_tooSmall
);
191 { CHECK_V_F(hSize
, HUF_compressWeights(op
+1, maxDstSize
-1, wksp
->huffWeight
, maxSymbolValue
, &wksp
->wksp
, sizeof(wksp
->wksp
)) );
192 if ((hSize
>1) & (hSize
< maxSymbolValue
/2)) { /* FSE compressed */
197 /* write raw values as 4-bits (max : 15) */
198 if (maxSymbolValue
> (256-128)) return ERROR(GENERIC
); /* should not happen : likely means source cannot be compressed */
199 if (((maxSymbolValue
+1)/2) + 1 > maxDstSize
) return ERROR(dstSize_tooSmall
); /* not enough space within dst buffer */
200 op
[0] = (BYTE
)(128 /*special case*/ + (maxSymbolValue
-1));
201 wksp
->huffWeight
[maxSymbolValue
] = 0; /* to be sure it doesn't cause msan issue in final combination */
202 for (n
=0; n
<maxSymbolValue
; n
+=2)
203 op
[(n
/2)+1] = (BYTE
)((wksp
->huffWeight
[n
] << 4) + wksp
->huffWeight
[n
+1]);
204 return ((maxSymbolValue
+1)/2) + 1;
207 /*! HUF_writeCTable() :
208 `CTable` : Huffman tree to save, using huf representation.
209 @return : size of saved CTable */
210 size_t HUF_writeCTable (void* dst
, size_t maxDstSize
,
211 const HUF_CElt
* CTable
, unsigned maxSymbolValue
, unsigned huffLog
)
213 HUF_WriteCTableWksp wksp
;
214 return HUF_writeCTable_wksp(dst
, maxDstSize
, CTable
, maxSymbolValue
, huffLog
, &wksp
, sizeof(wksp
));
218 size_t HUF_readCTable (HUF_CElt
* CTable
, unsigned* maxSymbolValuePtr
, const void* src
, size_t srcSize
, unsigned* hasZeroWeights
)
220 BYTE huffWeight
[HUF_SYMBOLVALUE_MAX
+ 1]; /* init not required, even though some static analyzer may complain */
221 U32 rankVal
[HUF_TABLELOG_ABSOLUTEMAX
+ 1]; /* large enough for values from 0 to 16 */
224 HUF_CElt
* const ct
= CTable
+ 1;
226 /* get symbol weights */
227 CHECK_V_F(readSize
, HUF_readStats(huffWeight
, HUF_SYMBOLVALUE_MAX
+1, rankVal
, &nbSymbols
, &tableLog
, src
, srcSize
));
228 *hasZeroWeights
= (rankVal
[0] > 0);
231 if (tableLog
> HUF_TABLELOG_MAX
) return ERROR(tableLog_tooLarge
);
232 if (nbSymbols
> *maxSymbolValuePtr
+1) return ERROR(maxSymbolValue_tooSmall
);
234 CTable
[0] = tableLog
;
236 /* Prepare base value per rank */
237 { U32 n
, nextRankStart
= 0;
238 for (n
=1; n
<=tableLog
; n
++) {
239 U32 curr
= nextRankStart
;
240 nextRankStart
+= (rankVal
[n
] << (n
-1));
245 { U32 n
; for (n
=0; n
<nbSymbols
; n
++) {
246 const U32 w
= huffWeight
[n
];
247 HUF_setNbBits(ct
+ n
, (BYTE
)(tableLog
+ 1 - w
) & -(w
!= 0));
251 { U16 nbPerRank
[HUF_TABLELOG_MAX
+2] = {0}; /* support w=0=>n=tableLog+1 */
252 U16 valPerRank
[HUF_TABLELOG_MAX
+2] = {0};
253 { U32 n
; for (n
=0; n
<nbSymbols
; n
++) nbPerRank
[HUF_getNbBits(ct
[n
])]++; }
254 /* determine stating value per rank */
255 valPerRank
[tableLog
+1] = 0; /* for w==0 */
257 U32 n
; for (n
=tableLog
; n
>0; n
--) { /* start at n=tablelog <-> w=1 */
258 valPerRank
[n
] = min
; /* get starting value within each rank */
262 /* assign value within rank, symbol order */
263 { U32 n
; for (n
=0; n
<nbSymbols
; n
++) HUF_setValue(ct
+ n
, valPerRank
[HUF_getNbBits(ct
[n
])]++); }
266 *maxSymbolValuePtr
= nbSymbols
- 1;
270 U32
HUF_getNbBitsFromCTable(HUF_CElt
const* CTable
, U32 symbolValue
)
272 const HUF_CElt
* ct
= CTable
+ 1;
273 assert(symbolValue
<= HUF_SYMBOLVALUE_MAX
);
274 return (U32
)HUF_getNbBits(ct
[symbolValue
]);
278 typedef struct nodeElt_s
{
286 * HUF_setMaxHeight():
287 * Enforces maxNbBits on the Huffman tree described in huffNode.
289 * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
290 * the tree to so that it is a valid canonical Huffman tree.
292 * @pre The sum of the ranks of each symbol == 2^largestBits,
293 * where largestBits == huffNode[lastNonNull].nbBits.
294 * @post The sum of the ranks of each symbol == 2^largestBits,
295 * where largestBits is the return value <= maxNbBits.
297 * @param huffNode The Huffman tree modified in place to enforce maxNbBits.
298 * @param lastNonNull The symbol with the lowest count in the Huffman tree.
299 * @param maxNbBits The maximum allowed number of bits, which the Huffman tree
300 * may not respect. After this function the Huffman tree will
302 * @return The maximum number of bits of the Huffman tree after adjustment,
303 * necessarily no more than maxNbBits.
305 static U32
HUF_setMaxHeight(nodeElt
* huffNode
, U32 lastNonNull
, U32 maxNbBits
)
307 const U32 largestBits
= huffNode
[lastNonNull
].nbBits
;
308 /* early exit : no elt > maxNbBits, so the tree is already valid. */
309 if (largestBits
<= maxNbBits
) return largestBits
;
311 /* there are several too large elements (at least >= 2) */
313 const U32 baseCost
= 1 << (largestBits
- maxNbBits
);
314 int n
= (int)lastNonNull
;
316 /* Adjust any ranks > maxNbBits to maxNbBits.
317 * Compute totalCost, which is how far the sum of the ranks is
318 * we are over 2^largestBits after adjust the offending ranks.
320 while (huffNode
[n
].nbBits
> maxNbBits
) {
321 totalCost
+= baseCost
- (1 << (largestBits
- huffNode
[n
].nbBits
));
322 huffNode
[n
].nbBits
= (BYTE
)maxNbBits
;
325 /* n stops at huffNode[n].nbBits <= maxNbBits */
326 assert(huffNode
[n
].nbBits
<= maxNbBits
);
327 /* n end at index of smallest symbol using < maxNbBits */
328 while (huffNode
[n
].nbBits
== maxNbBits
) --n
;
330 /* renorm totalCost from 2^largestBits to 2^maxNbBits
331 * note : totalCost is necessarily a multiple of baseCost */
332 assert((totalCost
& (baseCost
- 1)) == 0);
333 totalCost
>>= (largestBits
- maxNbBits
);
334 assert(totalCost
> 0);
336 /* repay normalized cost */
337 { U32
const noSymbol
= 0xF0F0F0F0;
338 U32 rankLast
[HUF_TABLELOG_MAX
+2];
340 /* Get pos of last (smallest = lowest cum. count) symbol per rank */
341 ZSTD_memset(rankLast
, 0xF0, sizeof(rankLast
));
342 { U32 currentNbBits
= maxNbBits
;
344 for (pos
=n
; pos
>= 0; pos
--) {
345 if (huffNode
[pos
].nbBits
>= currentNbBits
) continue;
346 currentNbBits
= huffNode
[pos
].nbBits
; /* < maxNbBits */
347 rankLast
[maxNbBits
-currentNbBits
] = (U32
)pos
;
350 while (totalCost
> 0) {
351 /* Try to reduce the next power of 2 above totalCost because we
352 * gain back half the rank.
354 U32 nBitsToDecrease
= BIT_highbit32((U32
)totalCost
) + 1;
355 for ( ; nBitsToDecrease
> 1; nBitsToDecrease
--) {
356 U32
const highPos
= rankLast
[nBitsToDecrease
];
357 U32
const lowPos
= rankLast
[nBitsToDecrease
-1];
358 if (highPos
== noSymbol
) continue;
359 /* Decrease highPos if no symbols of lowPos or if it is
360 * not cheaper to remove 2 lowPos than highPos.
362 if (lowPos
== noSymbol
) break;
363 { U32
const highTotal
= huffNode
[highPos
].count
;
364 U32
const lowTotal
= 2 * huffNode
[lowPos
].count
;
365 if (highTotal
<= lowTotal
) break;
367 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
368 assert(rankLast
[nBitsToDecrease
] != noSymbol
|| nBitsToDecrease
== 1);
369 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
370 while ((nBitsToDecrease
<=HUF_TABLELOG_MAX
) && (rankLast
[nBitsToDecrease
] == noSymbol
))
372 assert(rankLast
[nBitsToDecrease
] != noSymbol
);
373 /* Increase the number of bits to gain back half the rank cost. */
374 totalCost
-= 1 << (nBitsToDecrease
-1);
375 huffNode
[rankLast
[nBitsToDecrease
]].nbBits
++;
377 /* Fix up the new rank.
378 * If the new rank was empty, this symbol is now its smallest.
379 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
381 if (rankLast
[nBitsToDecrease
-1] == noSymbol
)
382 rankLast
[nBitsToDecrease
-1] = rankLast
[nBitsToDecrease
];
383 /* Fix up the old rank.
384 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
385 * it must be the only symbol in its rank, so the old rank now has no symbols.
386 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
387 * the smallest node in the rank. If the previous position belongs to a different rank,
388 * then the rank is now empty.
390 if (rankLast
[nBitsToDecrease
] == 0) /* special case, reached largest symbol */
391 rankLast
[nBitsToDecrease
] = noSymbol
;
393 rankLast
[nBitsToDecrease
]--;
394 if (huffNode
[rankLast
[nBitsToDecrease
]].nbBits
!= maxNbBits
-nBitsToDecrease
)
395 rankLast
[nBitsToDecrease
] = noSymbol
; /* this rank is now empty */
397 } /* while (totalCost > 0) */
399 /* If we've removed too much weight, then we have to add it back.
400 * To avoid overshooting again, we only adjust the smallest rank.
401 * We take the largest nodes from the lowest rank 0 and move them
402 * to rank 1. There's guaranteed to be enough rank 0 symbols because
405 while (totalCost
< 0) { /* Sometimes, cost correction overshoot */
406 /* special case : no rank 1 symbol (using maxNbBits-1);
407 * let's create one from largest rank 0 (using maxNbBits).
409 if (rankLast
[1] == noSymbol
) {
410 while (huffNode
[n
].nbBits
== maxNbBits
) n
--;
411 huffNode
[n
+1].nbBits
--;
413 rankLast
[1] = (U32
)(n
+1);
417 huffNode
[ rankLast
[1] + 1 ].nbBits
--;
421 } /* repay normalized cost */
422 } /* there are several too large elements (at least >= 2) */
432 typedef nodeElt huffNodeTable
[HUF_CTABLE_WORKSPACE_SIZE_U32
];
434 /* Number of buckets available for HUF_sort() */
435 #define RANK_POSITION_TABLE_SIZE 192
438 huffNodeTable huffNodeTbl
;
439 rankPos rankPosition
[RANK_POSITION_TABLE_SIZE
];
440 } HUF_buildCTable_wksp_tables
;
442 /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
443 * Strategy is to use as many buckets as possible for representing distinct
444 * counts while using the remainder to represent all "large" counts.
446 * To satisfy this requirement for 192 buckets, we can do the following:
447 * Let buckets 0-166 represent distinct counts of [0, 166]
448 * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
450 #define RANK_POSITION_MAX_COUNT_LOG 32
451 #define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
452 #define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
454 /* Return the appropriate bucket index for a given count. See definition of
455 * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
457 static U32
HUF_getIndex(U32
const count
) {
458 return (count
< RANK_POSITION_DISTINCT_COUNT_CUTOFF
)
460 : BIT_highbit32(count
) + RANK_POSITION_LOG_BUCKETS_BEGIN
;
463 /* Helper swap function for HUF_quickSortPartition() */
464 static void HUF_swapNodes(nodeElt
* a
, nodeElt
* b
) {
470 /* Returns 0 if the huffNode array is not sorted by descending count */
471 MEM_STATIC
int HUF_isSorted(nodeElt huffNode
[], U32
const maxSymbolValue1
) {
473 for (i
= 1; i
< maxSymbolValue1
; ++i
) {
474 if (huffNode
[i
].count
> huffNode
[i
-1].count
) {
481 /* Insertion sort by descending order */
482 HINT_INLINE
void HUF_insertionSort(nodeElt huffNode
[], int const low
, int const high
) {
484 int const size
= high
-low
+1;
486 for (i
= 1; i
< size
; ++i
) {
487 nodeElt
const key
= huffNode
[i
];
489 while (j
>= 0 && huffNode
[j
].count
< key
.count
) {
490 huffNode
[j
+ 1] = huffNode
[j
];
493 huffNode
[j
+ 1] = key
;
497 /* Pivot helper function for quicksort. */
498 static int HUF_quickSortPartition(nodeElt arr
[], int const low
, int const high
) {
499 /* Simply select rightmost element as pivot. "Better" selectors like
500 * median-of-three don't experimentally appear to have any benefit.
502 U32
const pivot
= arr
[high
].count
;
505 for ( ; j
< high
; j
++) {
506 if (arr
[j
].count
> pivot
) {
508 HUF_swapNodes(&arr
[i
], &arr
[j
]);
511 HUF_swapNodes(&arr
[i
+ 1], &arr
[high
]);
515 /* Classic quicksort by descending with partially iterative calls
516 * to reduce worst case callstack size.
518 static void HUF_simpleQuickSort(nodeElt arr
[], int low
, int high
) {
519 int const kInsertionSortThreshold
= 8;
520 if (high
- low
< kInsertionSortThreshold
) {
521 HUF_insertionSort(arr
, low
, high
);
525 int const idx
= HUF_quickSortPartition(arr
, low
, high
);
526 if (idx
- low
< high
- idx
) {
527 HUF_simpleQuickSort(arr
, low
, idx
- 1);
530 HUF_simpleQuickSort(arr
, idx
+ 1, high
);
538 * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
539 * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
541 * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
542 * Must have (maxSymbolValue + 1) entries.
543 * @param[in] count Histogram of the symbols.
544 * @param[in] maxSymbolValue Maximum symbol value.
545 * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
547 static void HUF_sort(nodeElt huffNode
[], const unsigned count
[], U32
const maxSymbolValue
, rankPos rankPosition
[]) {
549 U32
const maxSymbolValue1
= maxSymbolValue
+1;
551 /* Compute base and set curr to base.
552 * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
553 * See HUF_getIndex to see bucketing strategy.
554 * We attribute each symbol to lowerRank's base value, because we want to know where
555 * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
557 ZSTD_memset(rankPosition
, 0, sizeof(*rankPosition
) * RANK_POSITION_TABLE_SIZE
);
558 for (n
= 0; n
< maxSymbolValue1
; ++n
) {
559 U32 lowerRank
= HUF_getIndex(count
[n
]);
560 assert(lowerRank
< RANK_POSITION_TABLE_SIZE
- 1);
561 rankPosition
[lowerRank
].base
++;
564 assert(rankPosition
[RANK_POSITION_TABLE_SIZE
- 1].base
== 0);
565 /* Set up the rankPosition table */
566 for (n
= RANK_POSITION_TABLE_SIZE
- 1; n
> 0; --n
) {
567 rankPosition
[n
-1].base
+= rankPosition
[n
].base
;
568 rankPosition
[n
-1].curr
= rankPosition
[n
-1].base
;
571 /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
572 for (n
= 0; n
< maxSymbolValue1
; ++n
) {
573 U32
const c
= count
[n
];
574 U32
const r
= HUF_getIndex(c
) + 1;
575 U32
const pos
= rankPosition
[r
].curr
++;
576 assert(pos
< maxSymbolValue1
);
577 huffNode
[pos
].count
= c
;
578 huffNode
[pos
].byte
= (BYTE
)n
;
581 /* Sort each bucket. */
582 for (n
= RANK_POSITION_DISTINCT_COUNT_CUTOFF
; n
< RANK_POSITION_TABLE_SIZE
- 1; ++n
) {
583 U32
const bucketSize
= rankPosition
[n
].curr
-rankPosition
[n
].base
;
584 U32
const bucketStartIdx
= rankPosition
[n
].base
;
585 if (bucketSize
> 1) {
586 assert(bucketStartIdx
< maxSymbolValue1
);
587 HUF_simpleQuickSort(huffNode
+ bucketStartIdx
, 0, bucketSize
-1);
591 assert(HUF_isSorted(huffNode
, maxSymbolValue1
));
594 /* HUF_buildCTable_wksp() :
595 * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
596 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
598 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
601 * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
603 * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
604 * @param maxSymbolValue The maximum symbol value.
605 * @return The smallest node in the Huffman tree (by count).
607 static int HUF_buildTree(nodeElt
* huffNode
, U32 maxSymbolValue
)
609 nodeElt
* const huffNode0
= huffNode
- 1;
612 int nodeNb
= STARTNODE
;
614 /* init for parents */
615 nonNullRank
= (int)maxSymbolValue
;
616 while(huffNode
[nonNullRank
].count
== 0) nonNullRank
--;
617 lowS
= nonNullRank
; nodeRoot
= nodeNb
+ lowS
- 1; lowN
= nodeNb
;
618 huffNode
[nodeNb
].count
= huffNode
[lowS
].count
+ huffNode
[lowS
-1].count
;
619 huffNode
[lowS
].parent
= huffNode
[lowS
-1].parent
= (U16
)nodeNb
;
621 for (n
=nodeNb
; n
<=nodeRoot
; n
++) huffNode
[n
].count
= (U32
)(1U<<30);
622 huffNode0
[0].count
= (U32
)(1U<<31); /* fake entry, strong barrier */
625 while (nodeNb
<= nodeRoot
) {
626 int const n1
= (huffNode
[lowS
].count
< huffNode
[lowN
].count
) ? lowS
-- : lowN
++;
627 int const n2
= (huffNode
[lowS
].count
< huffNode
[lowN
].count
) ? lowS
-- : lowN
++;
628 huffNode
[nodeNb
].count
= huffNode
[n1
].count
+ huffNode
[n2
].count
;
629 huffNode
[n1
].parent
= huffNode
[n2
].parent
= (U16
)nodeNb
;
633 /* distribute weights (unlimited tree height) */
634 huffNode
[nodeRoot
].nbBits
= 0;
635 for (n
=nodeRoot
-1; n
>=STARTNODE
; n
--)
636 huffNode
[n
].nbBits
= huffNode
[ huffNode
[n
].parent
].nbBits
+ 1;
637 for (n
=0; n
<=nonNullRank
; n
++)
638 huffNode
[n
].nbBits
= huffNode
[ huffNode
[n
].parent
].nbBits
+ 1;
644 * HUF_buildCTableFromTree():
645 * Build the CTable given the Huffman tree in huffNode.
647 * @param[out] CTable The output Huffman CTable.
648 * @param huffNode The Huffman tree.
649 * @param nonNullRank The last and smallest node in the Huffman tree.
650 * @param maxSymbolValue The maximum symbol value.
651 * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
653 static void HUF_buildCTableFromTree(HUF_CElt
* CTable
, nodeElt
const* huffNode
, int nonNullRank
, U32 maxSymbolValue
, U32 maxNbBits
)
655 HUF_CElt
* const ct
= CTable
+ 1;
656 /* fill result into ctable (val, nbBits) */
658 U16 nbPerRank
[HUF_TABLELOG_MAX
+1] = {0};
659 U16 valPerRank
[HUF_TABLELOG_MAX
+1] = {0};
660 int const alphabetSize
= (int)(maxSymbolValue
+ 1);
661 for (n
=0; n
<=nonNullRank
; n
++)
662 nbPerRank
[huffNode
[n
].nbBits
]++;
663 /* determine starting value per rank */
665 for (n
=(int)maxNbBits
; n
>0; n
--) {
666 valPerRank
[n
] = min
; /* get starting value within each rank */
670 for (n
=0; n
<alphabetSize
; n
++)
671 HUF_setNbBits(ct
+ huffNode
[n
].byte
, huffNode
[n
].nbBits
); /* push nbBits per symbol, symbol order */
672 for (n
=0; n
<alphabetSize
; n
++)
673 HUF_setValue(ct
+ n
, valPerRank
[HUF_getNbBits(ct
[n
])]++); /* assign value within rank, symbol order */
674 CTable
[0] = maxNbBits
;
677 size_t HUF_buildCTable_wksp (HUF_CElt
* CTable
, const unsigned* count
, U32 maxSymbolValue
, U32 maxNbBits
, void* workSpace
, size_t wkspSize
)
679 HUF_buildCTable_wksp_tables
* const wksp_tables
= (HUF_buildCTable_wksp_tables
*)HUF_alignUpWorkspace(workSpace
, &wkspSize
, ZSTD_ALIGNOF(U32
));
680 nodeElt
* const huffNode0
= wksp_tables
->huffNodeTbl
;
681 nodeElt
* const huffNode
= huffNode0
+1;
685 if (wkspSize
< sizeof(HUF_buildCTable_wksp_tables
))
686 return ERROR(workSpace_tooSmall
);
687 if (maxNbBits
== 0) maxNbBits
= HUF_TABLELOG_DEFAULT
;
688 if (maxSymbolValue
> HUF_SYMBOLVALUE_MAX
)
689 return ERROR(maxSymbolValue_tooLarge
);
690 ZSTD_memset(huffNode0
, 0, sizeof(huffNodeTable
));
692 /* sort, decreasing order */
693 HUF_sort(huffNode
, count
, maxSymbolValue
, wksp_tables
->rankPosition
);
696 nonNullRank
= HUF_buildTree(huffNode
, maxSymbolValue
);
698 /* enforce maxTableLog */
699 maxNbBits
= HUF_setMaxHeight(huffNode
, (U32
)nonNullRank
, maxNbBits
);
700 if (maxNbBits
> HUF_TABLELOG_MAX
) return ERROR(GENERIC
); /* check fit into table */
702 HUF_buildCTableFromTree(CTable
, huffNode
, nonNullRank
, maxSymbolValue
, maxNbBits
);
707 size_t HUF_estimateCompressedSize(const HUF_CElt
* CTable
, const unsigned* count
, unsigned maxSymbolValue
)
709 HUF_CElt
const* ct
= CTable
+ 1;
712 for (s
= 0; s
<= (int)maxSymbolValue
; ++s
) {
713 nbBits
+= HUF_getNbBits(ct
[s
]) * count
[s
];
718 int HUF_validateCTable(const HUF_CElt
* CTable
, const unsigned* count
, unsigned maxSymbolValue
) {
719 HUF_CElt
const* ct
= CTable
+ 1;
722 for (s
= 0; s
<= (int)maxSymbolValue
; ++s
) {
723 bad
|= (count
[s
] != 0) & (HUF_getNbBits(ct
[s
]) == 0);
728 size_t HUF_compressBound(size_t size
) { return HUF_COMPRESSBOUND(size
); }
731 * Huffman uses its own BIT_CStream_t implementation.
732 * There are three major differences from BIT_CStream_t:
733 * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
734 * the pair (nbBits, value) in the format:
736 * - Bits [0, 4) = nbBits
737 * - Bits [4, 64 - nbBits) = 0
738 * - Bits [64 - nbBits, 64) = value
739 * 2. The bitContainer is built from the upper bits and
740 * right shifted. E.g. to add a new value of N bits
741 * you right shift the bitContainer by N, then or in
742 * the new value into the N upper bits.
743 * 3. The bitstream has two bit containers. You can add
744 * bits to the second container and merge them into
745 * the first container.
748 #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
751 size_t bitContainer
[2];
759 /*! HUF_initCStream():
760 * Initializes the bitstream.
761 * @returns 0 or an error code.
763 static size_t HUF_initCStream(HUF_CStream_t
* bitC
,
764 void* startPtr
, size_t dstCapacity
)
766 ZSTD_memset(bitC
, 0, sizeof(*bitC
));
767 bitC
->startPtr
= (BYTE
*)startPtr
;
768 bitC
->ptr
= bitC
->startPtr
;
769 bitC
->endPtr
= bitC
->startPtr
+ dstCapacity
- sizeof(bitC
->bitContainer
[0]);
770 if (dstCapacity
<= sizeof(bitC
->bitContainer
[0])) return ERROR(dstSize_tooSmall
);
775 * Adds the symbol stored in HUF_CElt elt to the bitstream.
777 * @param elt The element we're adding. This is a (nbBits, value) pair.
778 * See the HUF_CStream_t docs for the format.
779 * @param idx Insert into the bitstream at this idx.
780 * @param kFast This is a template parameter. If the bitstream is guaranteed
781 * to have at least 4 unused bits after this call it may be 1,
782 * otherwise it must be 0. HUF_addBits() is faster when fast is set.
784 FORCE_INLINE_TEMPLATE
void HUF_addBits(HUF_CStream_t
* bitC
, HUF_CElt elt
, int idx
, int kFast
)
787 assert(HUF_getNbBits(elt
) <= HUF_TABLELOG_ABSOLUTEMAX
);
788 /* This is efficient on x86-64 with BMI2 because shrx
789 * only reads the low 6 bits of the register. The compiler
790 * knows this and elides the mask. When fast is set,
791 * every operation can use the same value loaded from elt.
793 bitC
->bitContainer
[idx
] >>= HUF_getNbBits(elt
);
794 bitC
->bitContainer
[idx
] |= kFast
? HUF_getValueFast(elt
) : HUF_getValue(elt
);
795 /* We only read the low 8 bits of bitC->bitPos[idx] so it
796 * doesn't matter that the high bits have noise from the value.
798 bitC
->bitPos
[idx
] += HUF_getNbBitsFast(elt
);
799 assert((bitC
->bitPos
[idx
] & 0xFF) <= HUF_BITS_IN_CONTAINER
);
800 /* The last 4-bits of elt are dirty if fast is set,
801 * so we must not be overwriting bits that have already been
802 * inserted into the bit container.
806 size_t const nbBits
= HUF_getNbBits(elt
);
807 size_t const dirtyBits
= nbBits
== 0 ? 0 : BIT_highbit32((U32
)nbBits
) + 1;
809 /* Middle bits are 0. */
810 assert(((elt
>> dirtyBits
) << (dirtyBits
+ nbBits
)) == 0);
811 /* We didn't overwrite any bits in the bit container. */
812 assert(!kFast
|| (bitC
->bitPos
[idx
] & 0xFF) <= HUF_BITS_IN_CONTAINER
);
818 FORCE_INLINE_TEMPLATE
void HUF_zeroIndex1(HUF_CStream_t
* bitC
)
820 bitC
->bitContainer
[1] = 0;
824 /*! HUF_mergeIndex1() :
825 * Merges the bit container @ index 1 into the bit container @ index 0
826 * and zeros the bit container @ index 1.
828 FORCE_INLINE_TEMPLATE
void HUF_mergeIndex1(HUF_CStream_t
* bitC
)
830 assert((bitC
->bitPos
[1] & 0xFF) < HUF_BITS_IN_CONTAINER
);
831 bitC
->bitContainer
[0] >>= (bitC
->bitPos
[1] & 0xFF);
832 bitC
->bitContainer
[0] |= bitC
->bitContainer
[1];
833 bitC
->bitPos
[0] += bitC
->bitPos
[1];
834 assert((bitC
->bitPos
[0] & 0xFF) <= HUF_BITS_IN_CONTAINER
);
837 /*! HUF_flushBits() :
838 * Flushes the bits in the bit container @ index 0.
840 * @post bitPos will be < 8.
841 * @param kFast If kFast is set then we must know a-priori that
842 * the bit container will not overflow.
844 FORCE_INLINE_TEMPLATE
void HUF_flushBits(HUF_CStream_t
* bitC
, int kFast
)
846 /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
847 size_t const nbBits
= bitC
->bitPos
[0] & 0xFF;
848 size_t const nbBytes
= nbBits
>> 3;
849 /* The top nbBits bits of bitContainer are the ones we need. */
850 size_t const bitContainer
= bitC
->bitContainer
[0] >> (HUF_BITS_IN_CONTAINER
- nbBits
);
851 /* Mask bitPos to account for the bytes we consumed. */
852 bitC
->bitPos
[0] &= 7;
854 assert(nbBits
<= sizeof(bitC
->bitContainer
[0]) * 8);
855 assert(bitC
->ptr
<= bitC
->endPtr
);
856 MEM_writeLEST(bitC
->ptr
, bitContainer
);
857 bitC
->ptr
+= nbBytes
;
858 assert(!kFast
|| bitC
->ptr
<= bitC
->endPtr
);
859 if (!kFast
&& bitC
->ptr
> bitC
->endPtr
) bitC
->ptr
= bitC
->endPtr
;
860 /* bitContainer doesn't need to be modified because the leftover
861 * bits are already the top bitPos bits. And we don't care about
862 * noise in the lower values.
867 * @returns The Huffman stream end mark: A 1-bit value = 1.
869 static HUF_CElt
HUF_endMark(void)
872 HUF_setNbBits(&endMark
, 1);
873 HUF_setValue(&endMark
, 1);
877 /*! HUF_closeCStream() :
878 * @return Size of CStream, in bytes,
879 * or 0 if it could not fit into dstBuffer */
880 static size_t HUF_closeCStream(HUF_CStream_t
* bitC
)
882 HUF_addBits(bitC
, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
883 HUF_flushBits(bitC
, /* kFast */ 0);
885 size_t const nbBits
= bitC
->bitPos
[0] & 0xFF;
886 if (bitC
->ptr
>= bitC
->endPtr
) return 0; /* overflow detected */
887 return (bitC
->ptr
- bitC
->startPtr
) + (nbBits
> 0);
891 FORCE_INLINE_TEMPLATE
void
892 HUF_encodeSymbol(HUF_CStream_t
* bitCPtr
, U32 symbol
, const HUF_CElt
* CTable
, int idx
, int fast
)
894 HUF_addBits(bitCPtr
, CTable
[symbol
], idx
, fast
);
897 FORCE_INLINE_TEMPLATE
void
898 HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t
* bitC
,
899 const BYTE
* ip
, size_t srcSize
,
901 int kUnroll
, int kFastFlush
, int kLastFast
)
903 /* Join to kUnroll */
904 int n
= (int)srcSize
;
905 int rem
= n
% kUnroll
;
907 for (; rem
> 0; --rem
) {
908 HUF_encodeSymbol(bitC
, ip
[--n
], ct
, 0, /* fast */ 0);
910 HUF_flushBits(bitC
, kFastFlush
);
912 assert(n
% kUnroll
== 0);
914 /* Join to 2 * kUnroll */
915 if (n
% (2 * kUnroll
)) {
917 for (u
= 1; u
< kUnroll
; ++u
) {
918 HUF_encodeSymbol(bitC
, ip
[n
- u
], ct
, 0, 1);
920 HUF_encodeSymbol(bitC
, ip
[n
- kUnroll
], ct
, 0, kLastFast
);
921 HUF_flushBits(bitC
, kFastFlush
);
924 assert(n
% (2 * kUnroll
) == 0);
926 for (; n
>0; n
-= 2 * kUnroll
) {
927 /* Encode kUnroll symbols into the bitstream @ index 0. */
929 for (u
= 1; u
< kUnroll
; ++u
) {
930 HUF_encodeSymbol(bitC
, ip
[n
- u
], ct
, /* idx */ 0, /* fast */ 1);
932 HUF_encodeSymbol(bitC
, ip
[n
- kUnroll
], ct
, /* idx */ 0, /* fast */ kLastFast
);
933 HUF_flushBits(bitC
, kFastFlush
);
934 /* Encode kUnroll symbols into the bitstream @ index 1.
935 * This allows us to start filling the bit container
936 * without any data dependencies.
938 HUF_zeroIndex1(bitC
);
939 for (u
= 1; u
< kUnroll
; ++u
) {
940 HUF_encodeSymbol(bitC
, ip
[n
- kUnroll
- u
], ct
, /* idx */ 1, /* fast */ 1);
942 HUF_encodeSymbol(bitC
, ip
[n
- kUnroll
- kUnroll
], ct
, /* idx */ 1, /* fast */ kLastFast
);
943 /* Merge bitstream @ index 1 into the bitstream @ index 0 */
944 HUF_mergeIndex1(bitC
);
945 HUF_flushBits(bitC
, kFastFlush
);
952 * Returns a tight upper bound on the output space needed by Huffman
953 * with 8 bytes buffer to handle over-writes. If the output is at least
954 * this large we don't need to do bounds checks during Huffman encoding.
956 static size_t HUF_tightCompressBound(size_t srcSize
, size_t tableLog
)
958 return ((srcSize
* tableLog
) >> 3) + 8;
962 FORCE_INLINE_TEMPLATE
size_t
963 HUF_compress1X_usingCTable_internal_body(void* dst
, size_t dstSize
,
964 const void* src
, size_t srcSize
,
965 const HUF_CElt
* CTable
)
967 U32
const tableLog
= (U32
)CTable
[0];
968 HUF_CElt
const* ct
= CTable
+ 1;
969 const BYTE
* ip
= (const BYTE
*) src
;
970 BYTE
* const ostart
= (BYTE
*)dst
;
971 BYTE
* const oend
= ostart
+ dstSize
;
976 if (dstSize
< 8) return 0; /* not enough space to compress */
977 { size_t const initErr
= HUF_initCStream(&bitC
, op
, (size_t)(oend
-op
));
978 if (HUF_isError(initErr
)) return 0; }
980 if (dstSize
< HUF_tightCompressBound(srcSize
, (size_t)tableLog
) || tableLog
> 11)
981 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
986 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
988 case 10: ZSTD_FALLTHROUGH
;
989 case 9: ZSTD_FALLTHROUGH
;
991 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
993 case 7: ZSTD_FALLTHROUGH
;
995 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1001 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1004 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1007 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1010 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1013 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1015 case 6: ZSTD_FALLTHROUGH
;
1017 HUF_compress1X_usingCTable_internal_body_loop(&bitC
, ip
, srcSize
, ct
, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1022 assert(bitC
.ptr
<= bitC
.endPtr
);
1024 return HUF_closeCStream(&bitC
);
1029 static BMI2_TARGET_ATTRIBUTE
size_t
1030 HUF_compress1X_usingCTable_internal_bmi2(void* dst
, size_t dstSize
,
1031 const void* src
, size_t srcSize
,
1032 const HUF_CElt
* CTable
)
1034 return HUF_compress1X_usingCTable_internal_body(dst
, dstSize
, src
, srcSize
, CTable
);
1038 HUF_compress1X_usingCTable_internal_default(void* dst
, size_t dstSize
,
1039 const void* src
, size_t srcSize
,
1040 const HUF_CElt
* CTable
)
1042 return HUF_compress1X_usingCTable_internal_body(dst
, dstSize
, src
, srcSize
, CTable
);
1046 HUF_compress1X_usingCTable_internal(void* dst
, size_t dstSize
,
1047 const void* src
, size_t srcSize
,
1048 const HUF_CElt
* CTable
, const int bmi2
)
1051 return HUF_compress1X_usingCTable_internal_bmi2(dst
, dstSize
, src
, srcSize
, CTable
);
1053 return HUF_compress1X_usingCTable_internal_default(dst
, dstSize
, src
, srcSize
, CTable
);
1059 HUF_compress1X_usingCTable_internal(void* dst
, size_t dstSize
,
1060 const void* src
, size_t srcSize
,
1061 const HUF_CElt
* CTable
, const int bmi2
)
1064 return HUF_compress1X_usingCTable_internal_body(dst
, dstSize
, src
, srcSize
, CTable
);
1069 size_t HUF_compress1X_usingCTable(void* dst
, size_t dstSize
, const void* src
, size_t srcSize
, const HUF_CElt
* CTable
)
1071 return HUF_compress1X_usingCTable_bmi2(dst
, dstSize
, src
, srcSize
, CTable
, /* bmi2 */ 0);
1074 size_t HUF_compress1X_usingCTable_bmi2(void* dst
, size_t dstSize
, const void* src
, size_t srcSize
, const HUF_CElt
* CTable
, int bmi2
)
1076 return HUF_compress1X_usingCTable_internal(dst
, dstSize
, src
, srcSize
, CTable
, bmi2
);
1080 HUF_compress4X_usingCTable_internal(void* dst
, size_t dstSize
,
1081 const void* src
, size_t srcSize
,
1082 const HUF_CElt
* CTable
, int bmi2
)
1084 size_t const segmentSize
= (srcSize
+3)/4; /* first 3 segments */
1085 const BYTE
* ip
= (const BYTE
*) src
;
1086 const BYTE
* const iend
= ip
+ srcSize
;
1087 BYTE
* const ostart
= (BYTE
*) dst
;
1088 BYTE
* const oend
= ostart
+ dstSize
;
1091 if (dstSize
< 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
1092 if (srcSize
< 12) return 0; /* no saving possible : too small input */
1093 op
+= 6; /* jumpTable */
1096 { CHECK_V_F(cSize
, HUF_compress1X_usingCTable_internal(op
, (size_t)(oend
-op
), ip
, segmentSize
, CTable
, bmi2
) );
1097 if (cSize
== 0 || cSize
> 65535) return 0;
1098 MEM_writeLE16(ostart
, (U16
)cSize
);
1104 { CHECK_V_F(cSize
, HUF_compress1X_usingCTable_internal(op
, (size_t)(oend
-op
), ip
, segmentSize
, CTable
, bmi2
) );
1105 if (cSize
== 0 || cSize
> 65535) return 0;
1106 MEM_writeLE16(ostart
+2, (U16
)cSize
);
1112 { CHECK_V_F(cSize
, HUF_compress1X_usingCTable_internal(op
, (size_t)(oend
-op
), ip
, segmentSize
, CTable
, bmi2
) );
1113 if (cSize
== 0 || cSize
> 65535) return 0;
1114 MEM_writeLE16(ostart
+4, (U16
)cSize
);
1121 { CHECK_V_F(cSize
, HUF_compress1X_usingCTable_internal(op
, (size_t)(oend
-op
), ip
, (size_t)(iend
-ip
), CTable
, bmi2
) );
1122 if (cSize
== 0 || cSize
> 65535) return 0;
1126 return (size_t)(op
-ostart
);
1129 size_t HUF_compress4X_usingCTable(void* dst
, size_t dstSize
, const void* src
, size_t srcSize
, const HUF_CElt
* CTable
)
1131 return HUF_compress4X_usingCTable_bmi2(dst
, dstSize
, src
, srcSize
, CTable
, /* bmi2 */ 0);
1134 size_t HUF_compress4X_usingCTable_bmi2(void* dst
, size_t dstSize
, const void* src
, size_t srcSize
, const HUF_CElt
* CTable
, int bmi2
)
1136 return HUF_compress4X_usingCTable_internal(dst
, dstSize
, src
, srcSize
, CTable
, bmi2
);
1139 typedef enum { HUF_singleStream
, HUF_fourStreams
} HUF_nbStreams_e
;
1141 static size_t HUF_compressCTable_internal(
1142 BYTE
* const ostart
, BYTE
* op
, BYTE
* const oend
,
1143 const void* src
, size_t srcSize
,
1144 HUF_nbStreams_e nbStreams
, const HUF_CElt
* CTable
, const int bmi2
)
1146 size_t const cSize
= (nbStreams
==HUF_singleStream
) ?
1147 HUF_compress1X_usingCTable_internal(op
, (size_t)(oend
- op
), src
, srcSize
, CTable
, bmi2
) :
1148 HUF_compress4X_usingCTable_internal(op
, (size_t)(oend
- op
), src
, srcSize
, CTable
, bmi2
);
1149 if (HUF_isError(cSize
)) { return cSize
; }
1150 if (cSize
==0) { return 0; } /* uncompressible */
1152 /* check compressibility */
1153 assert(op
>= ostart
);
1154 if ((size_t)(op
-ostart
) >= srcSize
-1) { return 0; }
1155 return (size_t)(op
-ostart
);
1159 unsigned count
[HUF_SYMBOLVALUE_MAX
+ 1];
1160 HUF_CElt CTable
[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX
)];
1162 HUF_buildCTable_wksp_tables buildCTable_wksp
;
1163 HUF_WriteCTableWksp writeCTable_wksp
;
1164 U32 hist_wksp
[HIST_WKSP_SIZE_U32
];
1166 } HUF_compress_tables_t
;
1168 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1169 #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1171 /* HUF_compress_internal() :
1172 * `workSpace_align4` must be aligned on 4-bytes boundaries,
1173 * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
1175 HUF_compress_internal (void* dst
, size_t dstSize
,
1176 const void* src
, size_t srcSize
,
1177 unsigned maxSymbolValue
, unsigned huffLog
,
1178 HUF_nbStreams_e nbStreams
,
1179 void* workSpace
, size_t wkspSize
,
1180 HUF_CElt
* oldHufTable
, HUF_repeat
* repeat
, int preferRepeat
,
1181 const int bmi2
, unsigned suspectUncompressible
)
1183 HUF_compress_tables_t
* const table
= (HUF_compress_tables_t
*)HUF_alignUpWorkspace(workSpace
, &wkspSize
, ZSTD_ALIGNOF(size_t));
1184 BYTE
* const ostart
= (BYTE
*)dst
;
1185 BYTE
* const oend
= ostart
+ dstSize
;
1188 HUF_STATIC_ASSERT(sizeof(*table
) + HUF_WORKSPACE_MAX_ALIGNMENT
<= HUF_WORKSPACE_SIZE
);
1190 /* checks & inits */
1191 if (wkspSize
< sizeof(*table
)) return ERROR(workSpace_tooSmall
);
1192 if (!srcSize
) return 0; /* Uncompressed */
1193 if (!dstSize
) return 0; /* cannot fit anything within dst budget */
1194 if (srcSize
> HUF_BLOCKSIZE_MAX
) return ERROR(srcSize_wrong
); /* current block size limit */
1195 if (huffLog
> HUF_TABLELOG_MAX
) return ERROR(tableLog_tooLarge
);
1196 if (maxSymbolValue
> HUF_SYMBOLVALUE_MAX
) return ERROR(maxSymbolValue_tooLarge
);
1197 if (!maxSymbolValue
) maxSymbolValue
= HUF_SYMBOLVALUE_MAX
;
1198 if (!huffLog
) huffLog
= HUF_TABLELOG_DEFAULT
;
1200 /* Heuristic : If old table is valid, use it for small inputs */
1201 if (preferRepeat
&& repeat
&& *repeat
== HUF_repeat_valid
) {
1202 return HUF_compressCTable_internal(ostart
, op
, oend
,
1204 nbStreams
, oldHufTable
, bmi2
);
1207 /* If uncompressible data is suspected, do a smaller sampling first */
1208 DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO
>= 2);
1209 if (suspectUncompressible
&& srcSize
>= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE
* SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO
)) {
1210 size_t largestTotal
= 0;
1211 { unsigned maxSymbolValueBegin
= maxSymbolValue
;
1212 CHECK_V_F(largestBegin
, HIST_count_simple (table
->count
, &maxSymbolValueBegin
, (const BYTE
*)src
, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE
) );
1213 largestTotal
+= largestBegin
;
1215 { unsigned maxSymbolValueEnd
= maxSymbolValue
;
1216 CHECK_V_F(largestEnd
, HIST_count_simple (table
->count
, &maxSymbolValueEnd
, (const BYTE
*)src
+ srcSize
- SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE
, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE
) );
1217 largestTotal
+= largestEnd
;
1219 if (largestTotal
<= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE
) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
1222 /* Scan input and build symbol stats */
1223 { CHECK_V_F(largest
, HIST_count_wksp (table
->count
, &maxSymbolValue
, (const BYTE
*)src
, srcSize
, table
->wksps
.hist_wksp
, sizeof(table
->wksps
.hist_wksp
)) );
1224 if (largest
== srcSize
) { *ostart
= ((const BYTE
*)src
)[0]; return 1; } /* single symbol, rle */
1225 if (largest
<= (srcSize
>> 7)+4) return 0; /* heuristic : probably not compressible enough */
1228 /* Check validity of previous table */
1230 && *repeat
== HUF_repeat_check
1231 && !HUF_validateCTable(oldHufTable
, table
->count
, maxSymbolValue
)) {
1232 *repeat
= HUF_repeat_none
;
1234 /* Heuristic : use existing table for small inputs */
1235 if (preferRepeat
&& repeat
&& *repeat
!= HUF_repeat_none
) {
1236 return HUF_compressCTable_internal(ostart
, op
, oend
,
1238 nbStreams
, oldHufTable
, bmi2
);
1241 /* Build Huffman Tree */
1242 huffLog
= HUF_optimalTableLog(huffLog
, srcSize
, maxSymbolValue
);
1243 { size_t const maxBits
= HUF_buildCTable_wksp(table
->CTable
, table
->count
,
1244 maxSymbolValue
, huffLog
,
1245 &table
->wksps
.buildCTable_wksp
, sizeof(table
->wksps
.buildCTable_wksp
));
1247 huffLog
= (U32
)maxBits
;
1249 /* Zero unused symbols in CTable, so we can check it for validity */
1251 size_t const ctableSize
= HUF_CTABLE_SIZE_ST(maxSymbolValue
);
1252 size_t const unusedSize
= sizeof(table
->CTable
) - ctableSize
* sizeof(HUF_CElt
);
1253 ZSTD_memset(table
->CTable
+ ctableSize
, 0, unusedSize
);
1256 /* Write table description header */
1257 { CHECK_V_F(hSize
, HUF_writeCTable_wksp(op
, dstSize
, table
->CTable
, maxSymbolValue
, huffLog
,
1258 &table
->wksps
.writeCTable_wksp
, sizeof(table
->wksps
.writeCTable_wksp
)) );
1259 /* Check if using previous huffman table is beneficial */
1260 if (repeat
&& *repeat
!= HUF_repeat_none
) {
1261 size_t const oldSize
= HUF_estimateCompressedSize(oldHufTable
, table
->count
, maxSymbolValue
);
1262 size_t const newSize
= HUF_estimateCompressedSize(table
->CTable
, table
->count
, maxSymbolValue
);
1263 if (oldSize
<= hSize
+ newSize
|| hSize
+ 12 >= srcSize
) {
1264 return HUF_compressCTable_internal(ostart
, op
, oend
,
1266 nbStreams
, oldHufTable
, bmi2
);
1269 /* Use the new huffman table */
1270 if (hSize
+ 12ul >= srcSize
) { return 0; }
1272 if (repeat
) { *repeat
= HUF_repeat_none
; }
1274 ZSTD_memcpy(oldHufTable
, table
->CTable
, sizeof(table
->CTable
)); /* Save new table */
1276 return HUF_compressCTable_internal(ostart
, op
, oend
,
1278 nbStreams
, table
->CTable
, bmi2
);
1282 size_t HUF_compress1X_wksp (void* dst
, size_t dstSize
,
1283 const void* src
, size_t srcSize
,
1284 unsigned maxSymbolValue
, unsigned huffLog
,
1285 void* workSpace
, size_t wkspSize
)
1287 return HUF_compress_internal(dst
, dstSize
, src
, srcSize
,
1288 maxSymbolValue
, huffLog
, HUF_singleStream
,
1289 workSpace
, wkspSize
,
1290 NULL
, NULL
, 0, 0 /*bmi2*/, 0);
1293 size_t HUF_compress1X_repeat (void* dst
, size_t dstSize
,
1294 const void* src
, size_t srcSize
,
1295 unsigned maxSymbolValue
, unsigned huffLog
,
1296 void* workSpace
, size_t wkspSize
,
1297 HUF_CElt
* hufTable
, HUF_repeat
* repeat
, int preferRepeat
,
1298 int bmi2
, unsigned suspectUncompressible
)
1300 return HUF_compress_internal(dst
, dstSize
, src
, srcSize
,
1301 maxSymbolValue
, huffLog
, HUF_singleStream
,
1302 workSpace
, wkspSize
, hufTable
,
1303 repeat
, preferRepeat
, bmi2
, suspectUncompressible
);
1306 /* HUF_compress4X_repeat():
1307 * compress input using 4 streams.
1308 * provide workspace to generate compression tables */
1309 size_t HUF_compress4X_wksp (void* dst
, size_t dstSize
,
1310 const void* src
, size_t srcSize
,
1311 unsigned maxSymbolValue
, unsigned huffLog
,
1312 void* workSpace
, size_t wkspSize
)
1314 return HUF_compress_internal(dst
, dstSize
, src
, srcSize
,
1315 maxSymbolValue
, huffLog
, HUF_fourStreams
,
1316 workSpace
, wkspSize
,
1317 NULL
, NULL
, 0, 0 /*bmi2*/, 0);
1320 /* HUF_compress4X_repeat():
1321 * compress input using 4 streams.
1322 * consider skipping quickly
1323 * re-use an existing huffman compression table */
1324 size_t HUF_compress4X_repeat (void* dst
, size_t dstSize
,
1325 const void* src
, size_t srcSize
,
1326 unsigned maxSymbolValue
, unsigned huffLog
,
1327 void* workSpace
, size_t wkspSize
,
1328 HUF_CElt
* hufTable
, HUF_repeat
* repeat
, int preferRepeat
, int bmi2
, unsigned suspectUncompressible
)
1330 return HUF_compress_internal(dst
, dstSize
, src
, srcSize
,
1331 maxSymbolValue
, huffLog
, HUF_fourStreams
,
1332 workSpace
, wkspSize
,
1333 hufTable
, repeat
, preferRepeat
, bmi2
, suspectUncompressible
);