2 * Huffman decoder, part of New Generation Entropy library
3 * Copyright (C) 2013-2016, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * This program is free software; you can redistribute it and/or modify it under
31 * the terms of the GNU General Public License version 2 as published by the
32 * Free Software Foundation. This program is dual-licensed; you may select
33 * either version 2 of the GNU General Public License ("GPL") or BSD license
36 * You can contact the author at :
37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
40 /* **************************************************************
42 ****************************************************************/
43 #define FORCE_INLINE static __always_inline
45 /* **************************************************************
47 ****************************************************************/
48 #include "bitstream.h" /* BIT_* */
49 #include "fse.h" /* header compression */
51 #include <linux/compiler.h>
52 #include <linux/kernel.h>
53 #include <linux/string.h> /* memcpy, memset */
55 /* **************************************************************
57 ****************************************************************/
58 #define HUF_STATIC_ASSERT(c) \
60 enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
61 } /* use only *after* variable declarations */
63 /*-***************************/
64 /* generic DTableDesc */
65 /*-***************************/
74 static DTableDesc
HUF_getDTableDesc(const HUF_DTable
*table
)
77 memcpy(&dtd
, table
, sizeof(dtd
));
81 /*-***************************/
82 /* single-symbol decoding */
83 /*-***************************/
88 } HUF_DEltX2
; /* single-symbol decoding */
90 size_t HUF_readDTableX2_wksp(HUF_DTable
*DTable
, const void *src
, size_t srcSize
, void *workspace
, size_t workspaceSize
)
95 void *const dtPtr
= DTable
+ 1;
96 HUF_DEltX2
*const dt
= (HUF_DEltX2
*)dtPtr
;
100 size_t spaceUsed32
= 0;
102 rankVal
= (U32
*)workspace
+ spaceUsed32
;
103 spaceUsed32
+= HUF_TABLELOG_ABSOLUTEMAX
+ 1;
104 huffWeight
= (BYTE
*)((U32
*)workspace
+ spaceUsed32
);
105 spaceUsed32
+= ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
107 if ((spaceUsed32
<< 2) > workspaceSize
)
108 return ERROR(tableLog_tooLarge
);
109 workspace
= (U32
*)workspace
+ spaceUsed32
;
110 workspaceSize
-= (spaceUsed32
<< 2);
112 HUF_STATIC_ASSERT(sizeof(DTableDesc
) == sizeof(HUF_DTable
));
113 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
115 iSize
= HUF_readStats_wksp(huffWeight
, HUF_SYMBOLVALUE_MAX
+ 1, rankVal
, &nbSymbols
, &tableLog
, src
, srcSize
, workspace
, workspaceSize
);
116 if (HUF_isError(iSize
))
121 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
122 if (tableLog
> (U32
)(dtd
.maxTableLog
+ 1))
123 return ERROR(tableLog_tooLarge
); /* DTable too small, Huffman tree cannot fit in */
125 dtd
.tableLog
= (BYTE
)tableLog
;
126 memcpy(DTable
, &dtd
, sizeof(dtd
));
129 /* Calculate starting value for each rank */
131 U32 n
, nextRankStart
= 0;
132 for (n
= 1; n
< tableLog
+ 1; n
++) {
133 U32
const curr
= nextRankStart
;
134 nextRankStart
+= (rankVal
[n
] << (n
- 1));
142 for (n
= 0; n
< nbSymbols
; n
++) {
143 U32
const w
= huffWeight
[n
];
144 U32
const length
= (1 << w
) >> 1;
148 D
.nbBits
= (BYTE
)(tableLog
+ 1 - w
);
149 for (u
= rankVal
[w
]; u
< rankVal
[w
] + length
; u
++)
151 rankVal
[w
] += length
;
158 static BYTE
HUF_decodeSymbolX2(BIT_DStream_t
*Dstream
, const HUF_DEltX2
*dt
, const U32 dtLog
)
160 size_t const val
= BIT_lookBitsFast(Dstream
, dtLog
); /* note : dtLog >= 1 */
161 BYTE
const c
= dt
[val
].byte
;
162 BIT_skipBits(Dstream
, dt
[val
].nbBits
);
166 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
168 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
169 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
170 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
172 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
174 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
176 FORCE_INLINE
size_t HUF_decodeStreamX2(BYTE
*p
, BIT_DStream_t
*const bitDPtr
, BYTE
*const pEnd
, const HUF_DEltX2
*const dt
, const U32 dtLog
)
178 BYTE
*const pStart
= p
;
180 /* up to 4 symbols at a time */
181 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) && (p
<= pEnd
- 4)) {
182 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
183 HUF_DECODE_SYMBOLX2_1(p
, bitDPtr
);
184 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
185 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
188 /* closer to the end */
189 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) && (p
< pEnd
))
190 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
192 /* no more data to retrieve from bitstream, hence no need to reload */
194 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
196 return pEnd
- pStart
;
199 static size_t HUF_decompress1X2_usingDTable_internal(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
201 BYTE
*op
= (BYTE
*)dst
;
202 BYTE
*const oend
= op
+ dstSize
;
203 const void *dtPtr
= DTable
+ 1;
204 const HUF_DEltX2
*const dt
= (const HUF_DEltX2
*)dtPtr
;
206 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
207 U32
const dtLog
= dtd
.tableLog
;
210 size_t const errorCode
= BIT_initDStream(&bitD
, cSrc
, cSrcSize
);
211 if (HUF_isError(errorCode
))
215 HUF_decodeStreamX2(op
, &bitD
, oend
, dt
, dtLog
);
218 if (!BIT_endOfDStream(&bitD
))
219 return ERROR(corruption_detected
);
224 size_t HUF_decompress1X2_usingDTable(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
226 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
227 if (dtd
.tableType
!= 0)
228 return ERROR(GENERIC
);
229 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
);
232 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable
*DCtx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
234 const BYTE
*ip
= (const BYTE
*)cSrc
;
236 size_t const hSize
= HUF_readDTableX2_wksp(DCtx
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
237 if (HUF_isError(hSize
))
239 if (hSize
>= cSrcSize
)
240 return ERROR(srcSize_wrong
);
244 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
);
247 static size_t HUF_decompress4X2_usingDTable_internal(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
251 return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
254 const BYTE
*const istart
= (const BYTE
*)cSrc
;
255 BYTE
*const ostart
= (BYTE
*)dst
;
256 BYTE
*const oend
= ostart
+ dstSize
;
257 const void *const dtPtr
= DTable
+ 1;
258 const HUF_DEltX2
*const dt
= (const HUF_DEltX2
*)dtPtr
;
265 size_t const length1
= ZSTD_readLE16(istart
);
266 size_t const length2
= ZSTD_readLE16(istart
+ 2);
267 size_t const length3
= ZSTD_readLE16(istart
+ 4);
268 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
269 const BYTE
*const istart1
= istart
+ 6; /* jumpTable */
270 const BYTE
*const istart2
= istart1
+ length1
;
271 const BYTE
*const istart3
= istart2
+ length2
;
272 const BYTE
*const istart4
= istart3
+ length3
;
273 const size_t segmentSize
= (dstSize
+ 3) / 4;
274 BYTE
*const opStart2
= ostart
+ segmentSize
;
275 BYTE
*const opStart3
= opStart2
+ segmentSize
;
276 BYTE
*const opStart4
= opStart3
+ segmentSize
;
278 BYTE
*op2
= opStart2
;
279 BYTE
*op3
= opStart3
;
280 BYTE
*op4
= opStart4
;
282 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
283 U32
const dtLog
= dtd
.tableLog
;
285 if (length4
> cSrcSize
)
286 return ERROR(corruption_detected
); /* overflow */
288 size_t const errorCode
= BIT_initDStream(&bitD1
, istart1
, length1
);
289 if (HUF_isError(errorCode
))
293 size_t const errorCode
= BIT_initDStream(&bitD2
, istart2
, length2
);
294 if (HUF_isError(errorCode
))
298 size_t const errorCode
= BIT_initDStream(&bitD3
, istart3
, length3
);
299 if (HUF_isError(errorCode
))
303 size_t const errorCode
= BIT_initDStream(&bitD4
, istart4
, length4
);
304 if (HUF_isError(errorCode
))
308 /* 16-32 symbols per loop (4-8 symbols per stream) */
309 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
310 for (; (endSignal
== BIT_DStream_unfinished
) && (op4
< (oend
- 7));) {
311 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
312 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
313 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
314 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
315 HUF_DECODE_SYMBOLX2_1(op1
, &bitD1
);
316 HUF_DECODE_SYMBOLX2_1(op2
, &bitD2
);
317 HUF_DECODE_SYMBOLX2_1(op3
, &bitD3
);
318 HUF_DECODE_SYMBOLX2_1(op4
, &bitD4
);
319 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
320 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
321 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
322 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
323 HUF_DECODE_SYMBOLX2_0(op1
, &bitD1
);
324 HUF_DECODE_SYMBOLX2_0(op2
, &bitD2
);
325 HUF_DECODE_SYMBOLX2_0(op3
, &bitD3
);
326 HUF_DECODE_SYMBOLX2_0(op4
, &bitD4
);
327 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
330 /* check corruption */
332 return ERROR(corruption_detected
);
334 return ERROR(corruption_detected
);
336 return ERROR(corruption_detected
);
337 /* note : op4 supposed already verified within main loop */
339 /* finish bitStreams one by one */
340 HUF_decodeStreamX2(op1
, &bitD1
, opStart2
, dt
, dtLog
);
341 HUF_decodeStreamX2(op2
, &bitD2
, opStart3
, dt
, dtLog
);
342 HUF_decodeStreamX2(op3
, &bitD3
, opStart4
, dt
, dtLog
);
343 HUF_decodeStreamX2(op4
, &bitD4
, oend
, dt
, dtLog
);
346 endSignal
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
348 return ERROR(corruption_detected
);
355 size_t HUF_decompress4X2_usingDTable(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
357 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
358 if (dtd
.tableType
!= 0)
359 return ERROR(GENERIC
);
360 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
);
363 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable
*dctx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
365 const BYTE
*ip
= (const BYTE
*)cSrc
;
367 size_t const hSize
= HUF_readDTableX2_wksp(dctx
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
368 if (HUF_isError(hSize
))
370 if (hSize
>= cSrcSize
)
371 return ERROR(srcSize_wrong
);
375 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
);
378 /* *************************/
379 /* double-symbols decoding */
380 /* *************************/
385 } HUF_DEltX4
; /* double-symbols decoding */
392 /* HUF_fillDTableX4Level2() :
393 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
394 static void HUF_fillDTableX4Level2(HUF_DEltX4
*DTable
, U32 sizeLog
, const U32 consumed
, const U32
*rankValOrigin
, const int minWeight
,
395 const sortedSymbol_t
*sortedSymbols
, const U32 sortedListSize
, U32 nbBitsBaseline
, U16 baseSeq
)
398 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
400 /* get pre-calculated rankVal */
401 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
403 /* fill skipped values */
405 U32 i
, skipSize
= rankVal
[minWeight
];
406 ZSTD_writeLE16(&(DElt
.sequence
), baseSeq
);
407 DElt
.nbBits
= (BYTE
)(consumed
);
409 for (i
= 0; i
< skipSize
; i
++)
416 for (s
= 0; s
< sortedListSize
; s
++) { /* note : sortedSymbols already skipped */
417 const U32 symbol
= sortedSymbols
[s
].symbol
;
418 const U32 weight
= sortedSymbols
[s
].weight
;
419 const U32 nbBits
= nbBitsBaseline
- weight
;
420 const U32 length
= 1 << (sizeLog
- nbBits
);
421 const U32 start
= rankVal
[weight
];
423 const U32 end
= start
+ length
;
425 ZSTD_writeLE16(&(DElt
.sequence
), (U16
)(baseSeq
+ (symbol
<< 8)));
426 DElt
.nbBits
= (BYTE
)(nbBits
+ consumed
);
430 } while (i
< end
); /* since length >= 1 */
432 rankVal
[weight
] += length
;
437 typedef U32 rankVal_t
[HUF_TABLELOG_MAX
][HUF_TABLELOG_MAX
+ 1];
438 typedef U32 rankValCol_t
[HUF_TABLELOG_MAX
+ 1];
440 static void HUF_fillDTableX4(HUF_DEltX4
*DTable
, const U32 targetLog
, const sortedSymbol_t
*sortedList
, const U32 sortedListSize
, const U32
*rankStart
,
441 rankVal_t rankValOrigin
, const U32 maxWeight
, const U32 nbBitsBaseline
)
443 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
444 const int scaleLog
= nbBitsBaseline
- targetLog
; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
445 const U32 minBits
= nbBitsBaseline
- maxWeight
;
448 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
451 for (s
= 0; s
< sortedListSize
; s
++) {
452 const U16 symbol
= sortedList
[s
].symbol
;
453 const U32 weight
= sortedList
[s
].weight
;
454 const U32 nbBits
= nbBitsBaseline
- weight
;
455 const U32 start
= rankVal
[weight
];
456 const U32 length
= 1 << (targetLog
- nbBits
);
458 if (targetLog
- nbBits
>= minBits
) { /* enough room for a second symbol */
460 int minWeight
= nbBits
+ scaleLog
;
463 sortedRank
= rankStart
[minWeight
];
464 HUF_fillDTableX4Level2(DTable
+ start
, targetLog
- nbBits
, nbBits
, rankValOrigin
[nbBits
], minWeight
, sortedList
+ sortedRank
,
465 sortedListSize
- sortedRank
, nbBitsBaseline
, symbol
);
468 ZSTD_writeLE16(&(DElt
.sequence
), symbol
);
469 DElt
.nbBits
= (BYTE
)(nbBits
);
472 U32
const end
= start
+ length
;
474 for (u
= start
; u
< end
; u
++)
478 rankVal
[weight
] += length
;
482 size_t HUF_readDTableX4_wksp(HUF_DTable
*DTable
, const void *src
, size_t srcSize
, void *workspace
, size_t workspaceSize
)
484 U32 tableLog
, maxW
, sizeOfSort
, nbSymbols
;
485 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
486 U32
const maxTableLog
= dtd
.maxTableLog
;
488 void *dtPtr
= DTable
+ 1; /* force compiler to avoid strict-aliasing */
489 HUF_DEltX4
*const dt
= (HUF_DEltX4
*)dtPtr
;
492 rankValCol_t
*rankVal
;
495 sortedSymbol_t
*sortedSymbol
;
497 size_t spaceUsed32
= 0;
499 HUF_STATIC_ASSERT((sizeof(rankValCol_t
) & 3) == 0);
501 rankVal
= (rankValCol_t
*)((U32
*)workspace
+ spaceUsed32
);
502 spaceUsed32
+= (sizeof(rankValCol_t
) * HUF_TABLELOG_MAX
) >> 2;
503 rankStats
= (U32
*)workspace
+ spaceUsed32
;
504 spaceUsed32
+= HUF_TABLELOG_MAX
+ 1;
505 rankStart0
= (U32
*)workspace
+ spaceUsed32
;
506 spaceUsed32
+= HUF_TABLELOG_MAX
+ 2;
507 sortedSymbol
= (sortedSymbol_t
*)((U32
*)workspace
+ spaceUsed32
);
508 spaceUsed32
+= ALIGN(sizeof(sortedSymbol_t
) * (HUF_SYMBOLVALUE_MAX
+ 1), sizeof(U32
)) >> 2;
509 weightList
= (BYTE
*)((U32
*)workspace
+ spaceUsed32
);
510 spaceUsed32
+= ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
512 if ((spaceUsed32
<< 2) > workspaceSize
)
513 return ERROR(tableLog_tooLarge
);
514 workspace
= (U32
*)workspace
+ spaceUsed32
;
515 workspaceSize
-= (spaceUsed32
<< 2);
517 rankStart
= rankStart0
+ 1;
518 memset(rankStats
, 0, sizeof(U32
) * (2 * HUF_TABLELOG_MAX
+ 2 + 1));
520 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4
) == sizeof(HUF_DTable
)); /* if compiler fails here, assertion is wrong */
521 if (maxTableLog
> HUF_TABLELOG_MAX
)
522 return ERROR(tableLog_tooLarge
);
523 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
525 iSize
= HUF_readStats_wksp(weightList
, HUF_SYMBOLVALUE_MAX
+ 1, rankStats
, &nbSymbols
, &tableLog
, src
, srcSize
, workspace
, workspaceSize
);
526 if (HUF_isError(iSize
))
530 if (tableLog
> maxTableLog
)
531 return ERROR(tableLog_tooLarge
); /* DTable can't fit code depth */
534 for (maxW
= tableLog
; rankStats
[maxW
] == 0; maxW
--) {
535 } /* necessarily finds a solution before 0 */
537 /* Get start index of each weight */
539 U32 w
, nextRankStart
= 0;
540 for (w
= 1; w
< maxW
+ 1; w
++) {
541 U32 curr
= nextRankStart
;
542 nextRankStart
+= rankStats
[w
];
545 rankStart
[0] = nextRankStart
; /* put all 0w symbols at the end of sorted list*/
546 sizeOfSort
= nextRankStart
;
549 /* sort symbols by weight */
552 for (s
= 0; s
< nbSymbols
; s
++) {
553 U32
const w
= weightList
[s
];
554 U32
const r
= rankStart
[w
]++;
555 sortedSymbol
[r
].symbol
= (BYTE
)s
;
556 sortedSymbol
[r
].weight
= (BYTE
)w
;
558 rankStart
[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
563 U32
*const rankVal0
= rankVal
[0];
565 int const rescale
= (maxTableLog
- tableLog
) - 1; /* tableLog <= maxTableLog */
568 for (w
= 1; w
< maxW
+ 1; w
++) {
569 U32 curr
= nextRankVal
;
570 nextRankVal
+= rankStats
[w
] << (w
+ rescale
);
575 U32
const minBits
= tableLog
+ 1 - maxW
;
577 for (consumed
= minBits
; consumed
< maxTableLog
- minBits
+ 1; consumed
++) {
578 U32
*const rankValPtr
= rankVal
[consumed
];
580 for (w
= 1; w
< maxW
+ 1; w
++) {
581 rankValPtr
[w
] = rankVal0
[w
] >> consumed
;
587 HUF_fillDTableX4(dt
, maxTableLog
, sortedSymbol
, sizeOfSort
, rankStart0
, rankVal
, maxW
, tableLog
+ 1);
589 dtd
.tableLog
= (BYTE
)maxTableLog
;
591 memcpy(DTable
, &dtd
, sizeof(dtd
));
595 static U32
HUF_decodeSymbolX4(void *op
, BIT_DStream_t
*DStream
, const HUF_DEltX4
*dt
, const U32 dtLog
)
597 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
598 memcpy(op
, dt
+ val
, 2);
599 BIT_skipBits(DStream
, dt
[val
].nbBits
);
600 return dt
[val
].length
;
603 static U32
HUF_decodeLastSymbolX4(void *op
, BIT_DStream_t
*DStream
, const HUF_DEltX4
*dt
, const U32 dtLog
)
605 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
606 memcpy(op
, dt
+ val
, 1);
607 if (dt
[val
].length
== 1)
608 BIT_skipBits(DStream
, dt
[val
].nbBits
);
610 if (DStream
->bitsConsumed
< (sizeof(DStream
->bitContainer
) * 8)) {
611 BIT_skipBits(DStream
, dt
[val
].nbBits
);
612 if (DStream
->bitsConsumed
> (sizeof(DStream
->bitContainer
) * 8))
613 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
614 DStream
->bitsConsumed
= (sizeof(DStream
->bitContainer
) * 8);
620 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
622 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
623 if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
624 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
626 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
628 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
630 FORCE_INLINE
size_t HUF_decodeStreamX4(BYTE
*p
, BIT_DStream_t
*bitDPtr
, BYTE
*const pEnd
, const HUF_DEltX4
*const dt
, const U32 dtLog
)
632 BYTE
*const pStart
= p
;
634 /* up to 8 symbols at a time */
635 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
- (sizeof(bitDPtr
->bitContainer
) - 1))) {
636 HUF_DECODE_SYMBOLX4_2(p
, bitDPtr
);
637 HUF_DECODE_SYMBOLX4_1(p
, bitDPtr
);
638 HUF_DECODE_SYMBOLX4_2(p
, bitDPtr
);
639 HUF_DECODE_SYMBOLX4_0(p
, bitDPtr
);
642 /* closer to end : up to 2 symbols at a time */
643 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
<= pEnd
- 2))
644 HUF_DECODE_SYMBOLX4_0(p
, bitDPtr
);
646 while (p
<= pEnd
- 2)
647 HUF_DECODE_SYMBOLX4_0(p
, bitDPtr
); /* no need to reload : reached the end of DStream */
650 p
+= HUF_decodeLastSymbolX4(p
, bitDPtr
, dt
, dtLog
);
655 static size_t HUF_decompress1X4_usingDTable_internal(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
661 size_t const errorCode
= BIT_initDStream(&bitD
, cSrc
, cSrcSize
);
662 if (HUF_isError(errorCode
))
668 BYTE
*const ostart
= (BYTE
*)dst
;
669 BYTE
*const oend
= ostart
+ dstSize
;
670 const void *const dtPtr
= DTable
+ 1; /* force compiler to not use strict-aliasing */
671 const HUF_DEltX4
*const dt
= (const HUF_DEltX4
*)dtPtr
;
672 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
673 HUF_decodeStreamX4(ostart
, &bitD
, oend
, dt
, dtd
.tableLog
);
677 if (!BIT_endOfDStream(&bitD
))
678 return ERROR(corruption_detected
);
684 size_t HUF_decompress1X4_usingDTable(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
686 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
687 if (dtd
.tableType
!= 1)
688 return ERROR(GENERIC
);
689 return HUF_decompress1X4_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
);
692 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable
*DCtx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
694 const BYTE
*ip
= (const BYTE
*)cSrc
;
696 size_t const hSize
= HUF_readDTableX4_wksp(DCtx
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
697 if (HUF_isError(hSize
))
699 if (hSize
>= cSrcSize
)
700 return ERROR(srcSize_wrong
);
704 return HUF_decompress1X4_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
);
707 static size_t HUF_decompress4X4_usingDTable_internal(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
710 return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
713 const BYTE
*const istart
= (const BYTE
*)cSrc
;
714 BYTE
*const ostart
= (BYTE
*)dst
;
715 BYTE
*const oend
= ostart
+ dstSize
;
716 const void *const dtPtr
= DTable
+ 1;
717 const HUF_DEltX4
*const dt
= (const HUF_DEltX4
*)dtPtr
;
724 size_t const length1
= ZSTD_readLE16(istart
);
725 size_t const length2
= ZSTD_readLE16(istart
+ 2);
726 size_t const length3
= ZSTD_readLE16(istart
+ 4);
727 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
728 const BYTE
*const istart1
= istart
+ 6; /* jumpTable */
729 const BYTE
*const istart2
= istart1
+ length1
;
730 const BYTE
*const istart3
= istart2
+ length2
;
731 const BYTE
*const istart4
= istart3
+ length3
;
732 size_t const segmentSize
= (dstSize
+ 3) / 4;
733 BYTE
*const opStart2
= ostart
+ segmentSize
;
734 BYTE
*const opStart3
= opStart2
+ segmentSize
;
735 BYTE
*const opStart4
= opStart3
+ segmentSize
;
737 BYTE
*op2
= opStart2
;
738 BYTE
*op3
= opStart3
;
739 BYTE
*op4
= opStart4
;
741 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
742 U32
const dtLog
= dtd
.tableLog
;
744 if (length4
> cSrcSize
)
745 return ERROR(corruption_detected
); /* overflow */
747 size_t const errorCode
= BIT_initDStream(&bitD1
, istart1
, length1
);
748 if (HUF_isError(errorCode
))
752 size_t const errorCode
= BIT_initDStream(&bitD2
, istart2
, length2
);
753 if (HUF_isError(errorCode
))
757 size_t const errorCode
= BIT_initDStream(&bitD3
, istart3
, length3
);
758 if (HUF_isError(errorCode
))
762 size_t const errorCode
= BIT_initDStream(&bitD4
, istart4
, length4
);
763 if (HUF_isError(errorCode
))
767 /* 16-32 symbols per loop (4-8 symbols per stream) */
768 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
769 for (; (endSignal
== BIT_DStream_unfinished
) & (op4
< (oend
- (sizeof(bitD4
.bitContainer
) - 1)));) {
770 HUF_DECODE_SYMBOLX4_2(op1
, &bitD1
);
771 HUF_DECODE_SYMBOLX4_2(op2
, &bitD2
);
772 HUF_DECODE_SYMBOLX4_2(op3
, &bitD3
);
773 HUF_DECODE_SYMBOLX4_2(op4
, &bitD4
);
774 HUF_DECODE_SYMBOLX4_1(op1
, &bitD1
);
775 HUF_DECODE_SYMBOLX4_1(op2
, &bitD2
);
776 HUF_DECODE_SYMBOLX4_1(op3
, &bitD3
);
777 HUF_DECODE_SYMBOLX4_1(op4
, &bitD4
);
778 HUF_DECODE_SYMBOLX4_2(op1
, &bitD1
);
779 HUF_DECODE_SYMBOLX4_2(op2
, &bitD2
);
780 HUF_DECODE_SYMBOLX4_2(op3
, &bitD3
);
781 HUF_DECODE_SYMBOLX4_2(op4
, &bitD4
);
782 HUF_DECODE_SYMBOLX4_0(op1
, &bitD1
);
783 HUF_DECODE_SYMBOLX4_0(op2
, &bitD2
);
784 HUF_DECODE_SYMBOLX4_0(op3
, &bitD3
);
785 HUF_DECODE_SYMBOLX4_0(op4
, &bitD4
);
787 endSignal
= BIT_reloadDStream(&bitD1
) | BIT_reloadDStream(&bitD2
) | BIT_reloadDStream(&bitD3
) | BIT_reloadDStream(&bitD4
);
790 /* check corruption */
792 return ERROR(corruption_detected
);
794 return ERROR(corruption_detected
);
796 return ERROR(corruption_detected
);
797 /* note : op4 already verified within main loop */
799 /* finish bitStreams one by one */
800 HUF_decodeStreamX4(op1
, &bitD1
, opStart2
, dt
, dtLog
);
801 HUF_decodeStreamX4(op2
, &bitD2
, opStart3
, dt
, dtLog
);
802 HUF_decodeStreamX4(op3
, &bitD3
, opStart4
, dt
, dtLog
);
803 HUF_decodeStreamX4(op4
, &bitD4
, oend
, dt
, dtLog
);
807 U32
const endCheck
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
809 return ERROR(corruption_detected
);
817 size_t HUF_decompress4X4_usingDTable(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
819 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
820 if (dtd
.tableType
!= 1)
821 return ERROR(GENERIC
);
822 return HUF_decompress4X4_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
);
825 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable
*dctx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
827 const BYTE
*ip
= (const BYTE
*)cSrc
;
829 size_t hSize
= HUF_readDTableX4_wksp(dctx
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
830 if (HUF_isError(hSize
))
832 if (hSize
>= cSrcSize
)
833 return ERROR(srcSize_wrong
);
837 return HUF_decompress4X4_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
);
840 /* ********************************/
841 /* Generic decompression selector */
842 /* ********************************/
844 size_t HUF_decompress1X_usingDTable(void *dst
, size_t maxDstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
846 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
847 return dtd
.tableType
? HUF_decompress1X4_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
)
848 : HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
);
851 size_t HUF_decompress4X_usingDTable(void *dst
, size_t maxDstSize
, const void *cSrc
, size_t cSrcSize
, const HUF_DTable
*DTable
)
853 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
854 return dtd
.tableType
? HUF_decompress4X4_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
)
855 : HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
);
862 static const algo_time_t algoTime
[16 /* Quantization */][3 /* single, double, quad */] = {
863 /* single, double, quad */
864 {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */
865 {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */
866 {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
867 {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
868 {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
869 {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
870 {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
871 {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
872 {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
873 {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
874 {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
875 {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
876 {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
877 {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
878 {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
879 {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */
882 /** HUF_selectDecoder() :
883 * Tells which decoder is likely to decode faster,
884 * based on a set of pre-determined metrics.
885 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
886 * Assumption : 0 < cSrcSize < dstSize <= 128 KB */
887 U32
HUF_selectDecoder(size_t dstSize
, size_t cSrcSize
)
889 /* decoder timing evaluation */
890 U32
const Q
= (U32
)(cSrcSize
* 16 / dstSize
); /* Q < 16 since dstSize > cSrcSize */
891 U32
const D256
= (U32
)(dstSize
>> 8);
892 U32
const DTime0
= algoTime
[Q
][0].tableTime
+ (algoTime
[Q
][0].decode256Time
* D256
);
893 U32 DTime1
= algoTime
[Q
][1].tableTime
+ (algoTime
[Q
][1].decode256Time
* D256
);
894 DTime1
+= DTime1
>> 3; /* advantage to algorithm using less memory, for cache eviction */
896 return DTime1
< DTime0
;
899 typedef size_t (*decompressionAlgo
)(void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
);
901 size_t HUF_decompress4X_DCtx_wksp(HUF_DTable
*dctx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
903 /* validation checks */
905 return ERROR(dstSize_tooSmall
);
906 if (cSrcSize
> dstSize
)
907 return ERROR(corruption_detected
); /* invalid */
908 if (cSrcSize
== dstSize
) {
909 memcpy(dst
, cSrc
, dstSize
);
911 } /* not compressed */
913 memset(dst
, *(const BYTE
*)cSrc
, dstSize
);
918 U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
919 return algoNb
? HUF_decompress4X4_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
)
920 : HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
924 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable
*dctx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
926 /* validation checks */
928 return ERROR(dstSize_tooSmall
);
929 if ((cSrcSize
>= dstSize
) || (cSrcSize
<= 1))
930 return ERROR(corruption_detected
); /* invalid */
933 U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
934 return algoNb
? HUF_decompress4X4_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
)
935 : HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
);
939 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable
*dctx
, void *dst
, size_t dstSize
, const void *cSrc
, size_t cSrcSize
, void *workspace
, size_t workspaceSize
)
941 /* validation checks */
943 return ERROR(dstSize_tooSmall
);
944 if (cSrcSize
> dstSize
)
945 return ERROR(corruption_detected
); /* invalid */
946 if (cSrcSize
== dstSize
) {
947 memcpy(dst
, cSrc
, dstSize
);
949 } /* not compressed */
951 memset(dst
, *(const BYTE
*)cSrc
, dstSize
);
956 U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
957 return algoNb
? HUF_decompress1X4_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
)
958 : HUF_decompress1X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workspace
, workspaceSize
);