1 /* ******************************************************************
2 * huff0 huffman decoder,
3 * part of Finite State Entropy library
4 * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
6 * You can contact the author at :
7 * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
9 * This source code is licensed under both the BSD-style license (found in the
10 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 * in the COPYING file in the root directory of this source tree).
12 * You may select, at your option, one of the above-listed licenses.
13 ****************************************************************** */
15 /* **************************************************************
17 ****************************************************************/
18 #include <string.h> /* memcpy, memset */
19 #include "../common/compiler.h"
20 #include "../common/bitstream.h" /* BIT_* */
21 #include "../common/fse.h" /* to compress headers */
22 #define HUF_STATIC_LINKING_ONLY
23 #include "../common/huf.h"
24 #include "../common/error_private.h"
26 /* **************************************************************
28 ****************************************************************/
30 /* These two optional macros force the use one way or another of the two
31 * Huffman decompression implementations. You can't force in both directions
34 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
35 defined(HUF_FORCE_DECOMPRESS_X2)
36 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
40 /* **************************************************************
42 ****************************************************************/
43 #define HUF_isError ERR_isError
46 /* **************************************************************
47 * Byte alignment for workSpace management
48 ****************************************************************/
49 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
50 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
53 /* **************************************************************
54 * BMI2 Variant Wrappers
55 ****************************************************************/
58 #define HUF_DGEN(fn) \
60 static size_t fn##_default( \
61 void* dst, size_t dstSize, \
62 const void* cSrc, size_t cSrcSize, \
63 const HUF_DTable* DTable) \
65 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
68 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
69 void* dst, size_t dstSize, \
70 const void* cSrc, size_t cSrcSize, \
71 const HUF_DTable* DTable) \
73 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
76 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
77 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
80 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
82 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
87 #define HUF_DGEN(fn) \
88 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
89 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
92 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
98 /*-***************************/
99 /* generic DTableDesc */
100 /*-***************************/
101 typedef struct { BYTE maxTableLog
; BYTE tableType
; BYTE tableLog
; BYTE reserved
; } DTableDesc
;
103 static DTableDesc
HUF_getDTableDesc(const HUF_DTable
* table
)
106 memcpy(&dtd
, table
, sizeof(dtd
));
111 #ifndef HUF_FORCE_DECOMPRESS_X2
113 /*-***************************/
114 /* single-symbol decoding */
115 /*-***************************/
116 typedef struct { BYTE byte
; BYTE nbBits
; } HUF_DEltX1
; /* single-symbol decoding */
118 size_t HUF_readDTableX1_wksp(HUF_DTable
* DTable
, const void* src
, size_t srcSize
, void* workSpace
, size_t wkspSize
)
123 void* const dtPtr
= DTable
+ 1;
124 HUF_DEltX1
* const dt
= (HUF_DEltX1
*)dtPtr
;
128 size_t spaceUsed32
= 0;
130 rankVal
= (U32
*)workSpace
+ spaceUsed32
;
131 spaceUsed32
+= HUF_TABLELOG_ABSOLUTEMAX
+ 1;
132 huffWeight
= (BYTE
*)((U32
*)workSpace
+ spaceUsed32
);
133 spaceUsed32
+= HUF_ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
135 if ((spaceUsed32
<< 2) > wkspSize
) return ERROR(tableLog_tooLarge
);
137 DEBUG_STATIC_ASSERT(sizeof(DTableDesc
) == sizeof(HUF_DTable
));
138 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
140 iSize
= HUF_readStats(huffWeight
, HUF_SYMBOLVALUE_MAX
+ 1, rankVal
, &nbSymbols
, &tableLog
, src
, srcSize
);
141 if (HUF_isError(iSize
)) return iSize
;
144 { DTableDesc dtd
= HUF_getDTableDesc(DTable
);
145 if (tableLog
> (U32
)(dtd
.maxTableLog
+1)) return ERROR(tableLog_tooLarge
); /* DTable too small, Huffman tree cannot fit in */
147 dtd
.tableLog
= (BYTE
)tableLog
;
148 memcpy(DTable
, &dtd
, sizeof(dtd
));
151 /* Calculate starting value for each rank */
152 { U32 n
, nextRankStart
= 0;
153 for (n
=1; n
<tableLog
+1; n
++) {
154 U32
const current
= nextRankStart
;
155 nextRankStart
+= (rankVal
[n
] << (n
-1));
156 rankVal
[n
] = current
;
161 size_t const nEnd
= nbSymbols
;
162 for (n
=0; n
<nEnd
; n
++) {
163 size_t const w
= huffWeight
[n
];
164 size_t const length
= (1 << w
) >> 1;
165 size_t const uStart
= rankVal
[w
];
166 size_t const uEnd
= uStart
+ length
;
170 D
.nbBits
= (BYTE
)(tableLog
+ 1 - w
);
171 rankVal
[w
] = (U32
)uEnd
;
173 /* Use length in the loop bound so the compiler knows it is short. */
174 for (u
= 0; u
< length
; ++u
)
177 /* Unroll the loop 4 times, we know it is a power of 2. */
178 for (u
= uStart
; u
< uEnd
; u
+= 4) {
187 size_t HUF_readDTableX1(HUF_DTable
* DTable
, const void* src
, size_t srcSize
)
189 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
190 return HUF_readDTableX1_wksp(DTable
, src
, srcSize
,
191 workSpace
, sizeof(workSpace
));
194 FORCE_INLINE_TEMPLATE BYTE
195 HUF_decodeSymbolX1(BIT_DStream_t
* Dstream
, const HUF_DEltX1
* dt
, const U32 dtLog
)
197 size_t const val
= BIT_lookBitsFast(Dstream
, dtLog
); /* note : dtLog >= 1 */
198 BYTE
const c
= dt
[val
].byte
;
199 BIT_skipBits(Dstream
, dt
[val
].nbBits
);
203 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
204 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
206 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
207 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
208 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
210 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
212 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
215 HUF_decodeStreamX1(BYTE
* p
, BIT_DStream_t
* const bitDPtr
, BYTE
* const pEnd
, const HUF_DEltX1
* const dt
, const U32 dtLog
)
217 BYTE
* const pStart
= p
;
219 /* up to 4 symbols at a time */
220 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
-3)) {
221 HUF_DECODE_SYMBOLX1_2(p
, bitDPtr
);
222 HUF_DECODE_SYMBOLX1_1(p
, bitDPtr
);
223 HUF_DECODE_SYMBOLX1_2(p
, bitDPtr
);
224 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
227 /* [0-3] symbols remaining */
229 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
))
230 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
232 /* no more data to retrieve from bitstream, no need to reload */
234 HUF_DECODE_SYMBOLX1_0(p
, bitDPtr
);
239 FORCE_INLINE_TEMPLATE
size_t
240 HUF_decompress1X1_usingDTable_internal_body(
241 void* dst
, size_t dstSize
,
242 const void* cSrc
, size_t cSrcSize
,
243 const HUF_DTable
* DTable
)
245 BYTE
* op
= (BYTE
*)dst
;
246 BYTE
* const oend
= op
+ dstSize
;
247 const void* dtPtr
= DTable
+ 1;
248 const HUF_DEltX1
* const dt
= (const HUF_DEltX1
*)dtPtr
;
250 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
251 U32
const dtLog
= dtd
.tableLog
;
253 CHECK_F( BIT_initDStream(&bitD
, cSrc
, cSrcSize
) );
255 HUF_decodeStreamX1(op
, &bitD
, oend
, dt
, dtLog
);
257 if (!BIT_endOfDStream(&bitD
)) return ERROR(corruption_detected
);
262 FORCE_INLINE_TEMPLATE
size_t
263 HUF_decompress4X1_usingDTable_internal_body(
264 void* dst
, size_t dstSize
,
265 const void* cSrc
, size_t cSrcSize
,
266 const HUF_DTable
* DTable
)
269 if (cSrcSize
< 10) return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
271 { const BYTE
* const istart
= (const BYTE
*) cSrc
;
272 BYTE
* const ostart
= (BYTE
*) dst
;
273 BYTE
* const oend
= ostart
+ dstSize
;
274 BYTE
* const olimit
= oend
- 3;
275 const void* const dtPtr
= DTable
+ 1;
276 const HUF_DEltX1
* const dt
= (const HUF_DEltX1
*)dtPtr
;
283 size_t const length1
= MEM_readLE16(istart
);
284 size_t const length2
= MEM_readLE16(istart
+2);
285 size_t const length3
= MEM_readLE16(istart
+4);
286 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
287 const BYTE
* const istart1
= istart
+ 6; /* jumpTable */
288 const BYTE
* const istart2
= istart1
+ length1
;
289 const BYTE
* const istart3
= istart2
+ length2
;
290 const BYTE
* const istart4
= istart3
+ length3
;
291 const size_t segmentSize
= (dstSize
+3) / 4;
292 BYTE
* const opStart2
= ostart
+ segmentSize
;
293 BYTE
* const opStart3
= opStart2
+ segmentSize
;
294 BYTE
* const opStart4
= opStart3
+ segmentSize
;
296 BYTE
* op2
= opStart2
;
297 BYTE
* op3
= opStart3
;
298 BYTE
* op4
= opStart4
;
299 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
300 U32
const dtLog
= dtd
.tableLog
;
303 if (length4
> cSrcSize
) return ERROR(corruption_detected
); /* overflow */
304 CHECK_F( BIT_initDStream(&bitD1
, istart1
, length1
) );
305 CHECK_F( BIT_initDStream(&bitD2
, istart2
, length2
) );
306 CHECK_F( BIT_initDStream(&bitD3
, istart3
, length3
) );
307 CHECK_F( BIT_initDStream(&bitD4
, istart4
, length4
) );
309 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
310 for ( ; (endSignal
) & (op4
< olimit
) ; ) {
311 HUF_DECODE_SYMBOLX1_2(op1
, &bitD1
);
312 HUF_DECODE_SYMBOLX1_2(op2
, &bitD2
);
313 HUF_DECODE_SYMBOLX1_2(op3
, &bitD3
);
314 HUF_DECODE_SYMBOLX1_2(op4
, &bitD4
);
315 HUF_DECODE_SYMBOLX1_1(op1
, &bitD1
);
316 HUF_DECODE_SYMBOLX1_1(op2
, &bitD2
);
317 HUF_DECODE_SYMBOLX1_1(op3
, &bitD3
);
318 HUF_DECODE_SYMBOLX1_1(op4
, &bitD4
);
319 HUF_DECODE_SYMBOLX1_2(op1
, &bitD1
);
320 HUF_DECODE_SYMBOLX1_2(op2
, &bitD2
);
321 HUF_DECODE_SYMBOLX1_2(op3
, &bitD3
);
322 HUF_DECODE_SYMBOLX1_2(op4
, &bitD4
);
323 HUF_DECODE_SYMBOLX1_0(op1
, &bitD1
);
324 HUF_DECODE_SYMBOLX1_0(op2
, &bitD2
);
325 HUF_DECODE_SYMBOLX1_0(op3
, &bitD3
);
326 HUF_DECODE_SYMBOLX1_0(op4
, &bitD4
);
327 endSignal
&= BIT_reloadDStreamFast(&bitD1
) == BIT_DStream_unfinished
;
328 endSignal
&= BIT_reloadDStreamFast(&bitD2
) == BIT_DStream_unfinished
;
329 endSignal
&= BIT_reloadDStreamFast(&bitD3
) == BIT_DStream_unfinished
;
330 endSignal
&= BIT_reloadDStreamFast(&bitD4
) == BIT_DStream_unfinished
;
333 /* check corruption */
334 /* note : should not be necessary : op# advance in lock step, and we control op4.
335 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
336 if (op1
> opStart2
) return ERROR(corruption_detected
);
337 if (op2
> opStart3
) return ERROR(corruption_detected
);
338 if (op3
> opStart4
) return ERROR(corruption_detected
);
339 /* note : op4 supposed already verified within main loop */
341 /* finish bitStreams one by one */
342 HUF_decodeStreamX1(op1
, &bitD1
, opStart2
, dt
, dtLog
);
343 HUF_decodeStreamX1(op2
, &bitD2
, opStart3
, dt
, dtLog
);
344 HUF_decodeStreamX1(op3
, &bitD3
, opStart4
, dt
, dtLog
);
345 HUF_decodeStreamX1(op4
, &bitD4
, oend
, dt
, dtLog
);
348 { U32
const endCheck
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
349 if (!endCheck
) return ERROR(corruption_detected
); }
357 typedef size_t (*HUF_decompress_usingDTable_t
)(void *dst
, size_t dstSize
,
360 const HUF_DTable
*DTable
);
362 HUF_DGEN(HUF_decompress1X1_usingDTable_internal
)
363 HUF_DGEN(HUF_decompress4X1_usingDTable_internal
)
367 size_t HUF_decompress1X1_usingDTable(
368 void* dst
, size_t dstSize
,
369 const void* cSrc
, size_t cSrcSize
,
370 const HUF_DTable
* DTable
)
372 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
373 if (dtd
.tableType
!= 0) return ERROR(GENERIC
);
374 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
377 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
378 const void* cSrc
, size_t cSrcSize
,
379 void* workSpace
, size_t wkspSize
)
381 const BYTE
* ip
= (const BYTE
*) cSrc
;
383 size_t const hSize
= HUF_readDTableX1_wksp(DCtx
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
384 if (HUF_isError(hSize
)) return hSize
;
385 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
386 ip
+= hSize
; cSrcSize
-= hSize
;
388 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
, /* bmi2 */ 0);
392 size_t HUF_decompress1X1_DCtx(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
393 const void* cSrc
, size_t cSrcSize
)
395 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
396 return HUF_decompress1X1_DCtx_wksp(DCtx
, dst
, dstSize
, cSrc
, cSrcSize
,
397 workSpace
, sizeof(workSpace
));
400 size_t HUF_decompress1X1 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
402 HUF_CREATE_STATIC_DTABLEX1(DTable
, HUF_TABLELOG_MAX
);
403 return HUF_decompress1X1_DCtx (DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
406 size_t HUF_decompress4X1_usingDTable(
407 void* dst
, size_t dstSize
,
408 const void* cSrc
, size_t cSrcSize
,
409 const HUF_DTable
* DTable
)
411 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
412 if (dtd
.tableType
!= 0) return ERROR(GENERIC
);
413 return HUF_decompress4X1_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
416 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
417 const void* cSrc
, size_t cSrcSize
,
418 void* workSpace
, size_t wkspSize
, int bmi2
)
420 const BYTE
* ip
= (const BYTE
*) cSrc
;
422 size_t const hSize
= HUF_readDTableX1_wksp (dctx
, cSrc
, cSrcSize
,
423 workSpace
, wkspSize
);
424 if (HUF_isError(hSize
)) return hSize
;
425 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
426 ip
+= hSize
; cSrcSize
-= hSize
;
428 return HUF_decompress4X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
431 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
432 const void* cSrc
, size_t cSrcSize
,
433 void* workSpace
, size_t wkspSize
)
435 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, 0);
439 size_t HUF_decompress4X1_DCtx (HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
441 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
442 return HUF_decompress4X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
443 workSpace
, sizeof(workSpace
));
445 size_t HUF_decompress4X1 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
447 HUF_CREATE_STATIC_DTABLEX1(DTable
, HUF_TABLELOG_MAX
);
448 return HUF_decompress4X1_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
451 #endif /* HUF_FORCE_DECOMPRESS_X2 */
454 #ifndef HUF_FORCE_DECOMPRESS_X1
456 /* *************************/
457 /* double-symbols decoding */
458 /* *************************/
460 typedef struct { U16 sequence
; BYTE nbBits
; BYTE length
; } HUF_DEltX2
; /* double-symbols decoding */
461 typedef struct { BYTE symbol
; BYTE weight
; } sortedSymbol_t
;
462 typedef U32 rankValCol_t
[HUF_TABLELOG_MAX
+ 1];
463 typedef rankValCol_t rankVal_t
[HUF_TABLELOG_MAX
];
466 /* HUF_fillDTableX2Level2() :
467 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
468 static void HUF_fillDTableX2Level2(HUF_DEltX2
* DTable
, U32 sizeLog
, const U32 consumed
,
469 const U32
* rankValOrigin
, const int minWeight
,
470 const sortedSymbol_t
* sortedSymbols
, const U32 sortedListSize
,
471 U32 nbBitsBaseline
, U16 baseSeq
)
474 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
476 /* get pre-calculated rankVal */
477 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
479 /* fill skipped values */
481 U32 i
, skipSize
= rankVal
[minWeight
];
482 MEM_writeLE16(&(DElt
.sequence
), baseSeq
);
483 DElt
.nbBits
= (BYTE
)(consumed
);
485 for (i
= 0; i
< skipSize
; i
++)
490 { U32 s
; for (s
=0; s
<sortedListSize
; s
++) { /* note : sortedSymbols already skipped */
491 const U32 symbol
= sortedSymbols
[s
].symbol
;
492 const U32 weight
= sortedSymbols
[s
].weight
;
493 const U32 nbBits
= nbBitsBaseline
- weight
;
494 const U32 length
= 1 << (sizeLog
-nbBits
);
495 const U32 start
= rankVal
[weight
];
497 const U32 end
= start
+ length
;
499 MEM_writeLE16(&(DElt
.sequence
), (U16
)(baseSeq
+ (symbol
<< 8)));
500 DElt
.nbBits
= (BYTE
)(nbBits
+ consumed
);
502 do { DTable
[i
++] = DElt
; } while (i
<end
); /* since length >= 1 */
504 rankVal
[weight
] += length
;
509 static void HUF_fillDTableX2(HUF_DEltX2
* DTable
, const U32 targetLog
,
510 const sortedSymbol_t
* sortedList
, const U32 sortedListSize
,
511 const U32
* rankStart
, rankVal_t rankValOrigin
, const U32 maxWeight
,
512 const U32 nbBitsBaseline
)
514 U32 rankVal
[HUF_TABLELOG_MAX
+ 1];
515 const int scaleLog
= nbBitsBaseline
- targetLog
; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
516 const U32 minBits
= nbBitsBaseline
- maxWeight
;
519 memcpy(rankVal
, rankValOrigin
, sizeof(rankVal
));
522 for (s
=0; s
<sortedListSize
; s
++) {
523 const U16 symbol
= sortedList
[s
].symbol
;
524 const U32 weight
= sortedList
[s
].weight
;
525 const U32 nbBits
= nbBitsBaseline
- weight
;
526 const U32 start
= rankVal
[weight
];
527 const U32 length
= 1 << (targetLog
-nbBits
);
529 if (targetLog
-nbBits
>= minBits
) { /* enough room for a second symbol */
531 int minWeight
= nbBits
+ scaleLog
;
532 if (minWeight
< 1) minWeight
= 1;
533 sortedRank
= rankStart
[minWeight
];
534 HUF_fillDTableX2Level2(DTable
+start
, targetLog
-nbBits
, nbBits
,
535 rankValOrigin
[nbBits
], minWeight
,
536 sortedList
+sortedRank
, sortedListSize
-sortedRank
,
537 nbBitsBaseline
, symbol
);
540 MEM_writeLE16(&(DElt
.sequence
), symbol
);
541 DElt
.nbBits
= (BYTE
)(nbBits
);
543 { U32
const end
= start
+ length
;
545 for (u
= start
; u
< end
; u
++) DTable
[u
] = DElt
;
547 rankVal
[weight
] += length
;
551 size_t HUF_readDTableX2_wksp(HUF_DTable
* DTable
,
552 const void* src
, size_t srcSize
,
553 void* workSpace
, size_t wkspSize
)
555 U32 tableLog
, maxW
, sizeOfSort
, nbSymbols
;
556 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
557 U32
const maxTableLog
= dtd
.maxTableLog
;
559 void* dtPtr
= DTable
+1; /* force compiler to avoid strict-aliasing */
560 HUF_DEltX2
* const dt
= (HUF_DEltX2
*)dtPtr
;
563 rankValCol_t
* rankVal
;
566 sortedSymbol_t
* sortedSymbol
;
568 size_t spaceUsed32
= 0;
570 rankVal
= (rankValCol_t
*)((U32
*)workSpace
+ spaceUsed32
);
571 spaceUsed32
+= (sizeof(rankValCol_t
) * HUF_TABLELOG_MAX
) >> 2;
572 rankStats
= (U32
*)workSpace
+ spaceUsed32
;
573 spaceUsed32
+= HUF_TABLELOG_MAX
+ 1;
574 rankStart0
= (U32
*)workSpace
+ spaceUsed32
;
575 spaceUsed32
+= HUF_TABLELOG_MAX
+ 2;
576 sortedSymbol
= (sortedSymbol_t
*)workSpace
+ (spaceUsed32
* sizeof(U32
)) / sizeof(sortedSymbol_t
);
577 spaceUsed32
+= HUF_ALIGN(sizeof(sortedSymbol_t
) * (HUF_SYMBOLVALUE_MAX
+ 1), sizeof(U32
)) >> 2;
578 weightList
= (BYTE
*)((U32
*)workSpace
+ spaceUsed32
);
579 spaceUsed32
+= HUF_ALIGN(HUF_SYMBOLVALUE_MAX
+ 1, sizeof(U32
)) >> 2;
581 if ((spaceUsed32
<< 2) > wkspSize
) return ERROR(tableLog_tooLarge
);
583 rankStart
= rankStart0
+ 1;
584 memset(rankStats
, 0, sizeof(U32
) * (2 * HUF_TABLELOG_MAX
+ 2 + 1));
586 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2
) == sizeof(HUF_DTable
)); /* if compiler fails here, assertion is wrong */
587 if (maxTableLog
> HUF_TABLELOG_MAX
) return ERROR(tableLog_tooLarge
);
588 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
590 iSize
= HUF_readStats(weightList
, HUF_SYMBOLVALUE_MAX
+ 1, rankStats
, &nbSymbols
, &tableLog
, src
, srcSize
);
591 if (HUF_isError(iSize
)) return iSize
;
594 if (tableLog
> maxTableLog
) return ERROR(tableLog_tooLarge
); /* DTable can't fit code depth */
597 for (maxW
= tableLog
; rankStats
[maxW
]==0; maxW
--) {} /* necessarily finds a solution before 0 */
599 /* Get start index of each weight */
600 { U32 w
, nextRankStart
= 0;
601 for (w
=1; w
<maxW
+1; w
++) {
602 U32 current
= nextRankStart
;
603 nextRankStart
+= rankStats
[w
];
604 rankStart
[w
] = current
;
606 rankStart
[0] = nextRankStart
; /* put all 0w symbols at the end of sorted list*/
607 sizeOfSort
= nextRankStart
;
610 /* sort symbols by weight */
612 for (s
=0; s
<nbSymbols
; s
++) {
613 U32
const w
= weightList
[s
];
614 U32
const r
= rankStart
[w
]++;
615 sortedSymbol
[r
].symbol
= (BYTE
)s
;
616 sortedSymbol
[r
].weight
= (BYTE
)w
;
618 rankStart
[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
622 { U32
* const rankVal0
= rankVal
[0];
623 { int const rescale
= (maxTableLog
-tableLog
) - 1; /* tableLog <= maxTableLog */
626 for (w
=1; w
<maxW
+1; w
++) {
627 U32 current
= nextRankVal
;
628 nextRankVal
+= rankStats
[w
] << (w
+rescale
);
629 rankVal0
[w
] = current
;
631 { U32
const minBits
= tableLog
+1 - maxW
;
633 for (consumed
= minBits
; consumed
< maxTableLog
- minBits
+ 1; consumed
++) {
634 U32
* const rankValPtr
= rankVal
[consumed
];
636 for (w
= 1; w
< maxW
+1; w
++) {
637 rankValPtr
[w
] = rankVal0
[w
] >> consumed
;
640 HUF_fillDTableX2(dt
, maxTableLog
,
641 sortedSymbol
, sizeOfSort
,
642 rankStart0
, rankVal
, maxW
,
645 dtd
.tableLog
= (BYTE
)maxTableLog
;
647 memcpy(DTable
, &dtd
, sizeof(dtd
));
651 size_t HUF_readDTableX2(HUF_DTable
* DTable
, const void* src
, size_t srcSize
)
653 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
654 return HUF_readDTableX2_wksp(DTable
, src
, srcSize
,
655 workSpace
, sizeof(workSpace
));
659 FORCE_INLINE_TEMPLATE U32
660 HUF_decodeSymbolX2(void* op
, BIT_DStream_t
* DStream
, const HUF_DEltX2
* dt
, const U32 dtLog
)
662 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
663 memcpy(op
, dt
+val
, 2);
664 BIT_skipBits(DStream
, dt
[val
].nbBits
);
665 return dt
[val
].length
;
668 FORCE_INLINE_TEMPLATE U32
669 HUF_decodeLastSymbolX2(void* op
, BIT_DStream_t
* DStream
, const HUF_DEltX2
* dt
, const U32 dtLog
)
671 size_t const val
= BIT_lookBitsFast(DStream
, dtLog
); /* note : dtLog >= 1 */
672 memcpy(op
, dt
+val
, 1);
673 if (dt
[val
].length
==1) BIT_skipBits(DStream
, dt
[val
].nbBits
);
675 if (DStream
->bitsConsumed
< (sizeof(DStream
->bitContainer
)*8)) {
676 BIT_skipBits(DStream
, dt
[val
].nbBits
);
677 if (DStream
->bitsConsumed
> (sizeof(DStream
->bitContainer
)*8))
678 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
679 DStream
->bitsConsumed
= (sizeof(DStream
->bitContainer
)*8);
684 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
685 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
687 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
688 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
689 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
691 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
693 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
696 HUF_decodeStreamX2(BYTE
* p
, BIT_DStream_t
* bitDPtr
, BYTE
* const pEnd
,
697 const HUF_DEltX2
* const dt
, const U32 dtLog
)
699 BYTE
* const pStart
= p
;
701 /* up to 8 symbols at a time */
702 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
< pEnd
-(sizeof(bitDPtr
->bitContainer
)-1))) {
703 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
704 HUF_DECODE_SYMBOLX2_1(p
, bitDPtr
);
705 HUF_DECODE_SYMBOLX2_2(p
, bitDPtr
);
706 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
709 /* closer to end : up to 2 symbols at a time */
710 while ((BIT_reloadDStream(bitDPtr
) == BIT_DStream_unfinished
) & (p
<= pEnd
-2))
711 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
);
714 HUF_DECODE_SYMBOLX2_0(p
, bitDPtr
); /* no need to reload : reached the end of DStream */
717 p
+= HUF_decodeLastSymbolX2(p
, bitDPtr
, dt
, dtLog
);
722 FORCE_INLINE_TEMPLATE
size_t
723 HUF_decompress1X2_usingDTable_internal_body(
724 void* dst
, size_t dstSize
,
725 const void* cSrc
, size_t cSrcSize
,
726 const HUF_DTable
* DTable
)
731 CHECK_F( BIT_initDStream(&bitD
, cSrc
, cSrcSize
) );
734 { BYTE
* const ostart
= (BYTE
*) dst
;
735 BYTE
* const oend
= ostart
+ dstSize
;
736 const void* const dtPtr
= DTable
+1; /* force compiler to not use strict-aliasing */
737 const HUF_DEltX2
* const dt
= (const HUF_DEltX2
*)dtPtr
;
738 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
739 HUF_decodeStreamX2(ostart
, &bitD
, oend
, dt
, dtd
.tableLog
);
743 if (!BIT_endOfDStream(&bitD
)) return ERROR(corruption_detected
);
749 FORCE_INLINE_TEMPLATE
size_t
750 HUF_decompress4X2_usingDTable_internal_body(
751 void* dst
, size_t dstSize
,
752 const void* cSrc
, size_t cSrcSize
,
753 const HUF_DTable
* DTable
)
755 if (cSrcSize
< 10) return ERROR(corruption_detected
); /* strict minimum : jump table + 1 byte per stream */
757 { const BYTE
* const istart
= (const BYTE
*) cSrc
;
758 BYTE
* const ostart
= (BYTE
*) dst
;
759 BYTE
* const oend
= ostart
+ dstSize
;
760 BYTE
* const olimit
= oend
- (sizeof(size_t)-1);
761 const void* const dtPtr
= DTable
+1;
762 const HUF_DEltX2
* const dt
= (const HUF_DEltX2
*)dtPtr
;
769 size_t const length1
= MEM_readLE16(istart
);
770 size_t const length2
= MEM_readLE16(istart
+2);
771 size_t const length3
= MEM_readLE16(istart
+4);
772 size_t const length4
= cSrcSize
- (length1
+ length2
+ length3
+ 6);
773 const BYTE
* const istart1
= istart
+ 6; /* jumpTable */
774 const BYTE
* const istart2
= istart1
+ length1
;
775 const BYTE
* const istart3
= istart2
+ length2
;
776 const BYTE
* const istart4
= istart3
+ length3
;
777 size_t const segmentSize
= (dstSize
+3) / 4;
778 BYTE
* const opStart2
= ostart
+ segmentSize
;
779 BYTE
* const opStart3
= opStart2
+ segmentSize
;
780 BYTE
* const opStart4
= opStart3
+ segmentSize
;
782 BYTE
* op2
= opStart2
;
783 BYTE
* op3
= opStart3
;
784 BYTE
* op4
= opStart4
;
786 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
787 U32
const dtLog
= dtd
.tableLog
;
789 if (length4
> cSrcSize
) return ERROR(corruption_detected
); /* overflow */
790 CHECK_F( BIT_initDStream(&bitD1
, istart1
, length1
) );
791 CHECK_F( BIT_initDStream(&bitD2
, istart2
, length2
) );
792 CHECK_F( BIT_initDStream(&bitD3
, istart3
, length3
) );
793 CHECK_F( BIT_initDStream(&bitD4
, istart4
, length4
) );
795 /* 16-32 symbols per loop (4-8 symbols per stream) */
796 for ( ; (endSignal
) & (op4
< olimit
); ) {
797 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
799 HUF_DECODE_SYMBOLX2_1(op1
, &bitD1
);
800 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
801 HUF_DECODE_SYMBOLX2_0(op1
, &bitD1
);
802 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
803 HUF_DECODE_SYMBOLX2_1(op2
, &bitD2
);
804 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
805 HUF_DECODE_SYMBOLX2_0(op2
, &bitD2
);
806 endSignal
&= BIT_reloadDStreamFast(&bitD1
) == BIT_DStream_unfinished
;
807 endSignal
&= BIT_reloadDStreamFast(&bitD2
) == BIT_DStream_unfinished
;
808 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
809 HUF_DECODE_SYMBOLX2_1(op3
, &bitD3
);
810 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
811 HUF_DECODE_SYMBOLX2_0(op3
, &bitD3
);
812 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
813 HUF_DECODE_SYMBOLX2_1(op4
, &bitD4
);
814 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
815 HUF_DECODE_SYMBOLX2_0(op4
, &bitD4
);
816 endSignal
&= BIT_reloadDStreamFast(&bitD3
) == BIT_DStream_unfinished
;
817 endSignal
&= BIT_reloadDStreamFast(&bitD4
) == BIT_DStream_unfinished
;
819 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
820 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
821 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
822 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
823 HUF_DECODE_SYMBOLX2_1(op1
, &bitD1
);
824 HUF_DECODE_SYMBOLX2_1(op2
, &bitD2
);
825 HUF_DECODE_SYMBOLX2_1(op3
, &bitD3
);
826 HUF_DECODE_SYMBOLX2_1(op4
, &bitD4
);
827 HUF_DECODE_SYMBOLX2_2(op1
, &bitD1
);
828 HUF_DECODE_SYMBOLX2_2(op2
, &bitD2
);
829 HUF_DECODE_SYMBOLX2_2(op3
, &bitD3
);
830 HUF_DECODE_SYMBOLX2_2(op4
, &bitD4
);
831 HUF_DECODE_SYMBOLX2_0(op1
, &bitD1
);
832 HUF_DECODE_SYMBOLX2_0(op2
, &bitD2
);
833 HUF_DECODE_SYMBOLX2_0(op3
, &bitD3
);
834 HUF_DECODE_SYMBOLX2_0(op4
, &bitD4
);
835 endSignal
= (U32
)LIKELY(
836 (BIT_reloadDStreamFast(&bitD1
) == BIT_DStream_unfinished
)
837 & (BIT_reloadDStreamFast(&bitD2
) == BIT_DStream_unfinished
)
838 & (BIT_reloadDStreamFast(&bitD3
) == BIT_DStream_unfinished
)
839 & (BIT_reloadDStreamFast(&bitD4
) == BIT_DStream_unfinished
));
843 /* check corruption */
844 if (op1
> opStart2
) return ERROR(corruption_detected
);
845 if (op2
> opStart3
) return ERROR(corruption_detected
);
846 if (op3
> opStart4
) return ERROR(corruption_detected
);
847 /* note : op4 already verified within main loop */
849 /* finish bitStreams one by one */
850 HUF_decodeStreamX2(op1
, &bitD1
, opStart2
, dt
, dtLog
);
851 HUF_decodeStreamX2(op2
, &bitD2
, opStart3
, dt
, dtLog
);
852 HUF_decodeStreamX2(op3
, &bitD3
, opStart4
, dt
, dtLog
);
853 HUF_decodeStreamX2(op4
, &bitD4
, oend
, dt
, dtLog
);
856 { U32
const endCheck
= BIT_endOfDStream(&bitD1
) & BIT_endOfDStream(&bitD2
) & BIT_endOfDStream(&bitD3
) & BIT_endOfDStream(&bitD4
);
857 if (!endCheck
) return ERROR(corruption_detected
); }
864 HUF_DGEN(HUF_decompress1X2_usingDTable_internal
)
865 HUF_DGEN(HUF_decompress4X2_usingDTable_internal
)
867 size_t HUF_decompress1X2_usingDTable(
868 void* dst
, size_t dstSize
,
869 const void* cSrc
, size_t cSrcSize
,
870 const HUF_DTable
* DTable
)
872 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
873 if (dtd
.tableType
!= 1) return ERROR(GENERIC
);
874 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
877 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
878 const void* cSrc
, size_t cSrcSize
,
879 void* workSpace
, size_t wkspSize
)
881 const BYTE
* ip
= (const BYTE
*) cSrc
;
883 size_t const hSize
= HUF_readDTableX2_wksp(DCtx
, cSrc
, cSrcSize
,
884 workSpace
, wkspSize
);
885 if (HUF_isError(hSize
)) return hSize
;
886 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
887 ip
+= hSize
; cSrcSize
-= hSize
;
889 return HUF_decompress1X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, DCtx
, /* bmi2 */ 0);
893 size_t HUF_decompress1X2_DCtx(HUF_DTable
* DCtx
, void* dst
, size_t dstSize
,
894 const void* cSrc
, size_t cSrcSize
)
896 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
897 return HUF_decompress1X2_DCtx_wksp(DCtx
, dst
, dstSize
, cSrc
, cSrcSize
,
898 workSpace
, sizeof(workSpace
));
901 size_t HUF_decompress1X2 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
903 HUF_CREATE_STATIC_DTABLEX2(DTable
, HUF_TABLELOG_MAX
);
904 return HUF_decompress1X2_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
907 size_t HUF_decompress4X2_usingDTable(
908 void* dst
, size_t dstSize
,
909 const void* cSrc
, size_t cSrcSize
,
910 const HUF_DTable
* DTable
)
912 DTableDesc dtd
= HUF_getDTableDesc(DTable
);
913 if (dtd
.tableType
!= 1) return ERROR(GENERIC
);
914 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
917 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
918 const void* cSrc
, size_t cSrcSize
,
919 void* workSpace
, size_t wkspSize
, int bmi2
)
921 const BYTE
* ip
= (const BYTE
*) cSrc
;
923 size_t hSize
= HUF_readDTableX2_wksp(dctx
, cSrc
, cSrcSize
,
924 workSpace
, wkspSize
);
925 if (HUF_isError(hSize
)) return hSize
;
926 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
927 ip
+= hSize
; cSrcSize
-= hSize
;
929 return HUF_decompress4X2_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
932 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
933 const void* cSrc
, size_t cSrcSize
,
934 void* workSpace
, size_t wkspSize
)
936 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, /* bmi2 */ 0);
940 size_t HUF_decompress4X2_DCtx(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
941 const void* cSrc
, size_t cSrcSize
)
943 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
944 return HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
945 workSpace
, sizeof(workSpace
));
948 size_t HUF_decompress4X2 (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
950 HUF_CREATE_STATIC_DTABLEX2(DTable
, HUF_TABLELOG_MAX
);
951 return HUF_decompress4X2_DCtx(DTable
, dst
, dstSize
, cSrc
, cSrcSize
);
954 #endif /* HUF_FORCE_DECOMPRESS_X1 */
957 /* ***********************************/
958 /* Universal decompression selectors */
959 /* ***********************************/
961 size_t HUF_decompress1X_usingDTable(void* dst
, size_t maxDstSize
,
962 const void* cSrc
, size_t cSrcSize
,
963 const HUF_DTable
* DTable
)
965 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
966 #if defined(HUF_FORCE_DECOMPRESS_X1)
968 assert(dtd
.tableType
== 0);
969 return HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
970 #elif defined(HUF_FORCE_DECOMPRESS_X2)
972 assert(dtd
.tableType
== 1);
973 return HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
975 return dtd
.tableType
? HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0) :
976 HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
980 size_t HUF_decompress4X_usingDTable(void* dst
, size_t maxDstSize
,
981 const void* cSrc
, size_t cSrcSize
,
982 const HUF_DTable
* DTable
)
984 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
985 #if defined(HUF_FORCE_DECOMPRESS_X1)
987 assert(dtd
.tableType
== 0);
988 return HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
989 #elif defined(HUF_FORCE_DECOMPRESS_X2)
991 assert(dtd
.tableType
== 1);
992 return HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
994 return dtd
.tableType
? HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0) :
995 HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, /* bmi2 */ 0);
1000 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1001 typedef struct { U32 tableTime
; U32 decode256Time
; } algo_time_t
;
1002 static const algo_time_t algoTime
[16 /* Quantization */][3 /* single, double, quad */] =
1004 /* single, double, quad */
1005 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1006 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1007 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1008 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1009 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1010 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1011 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1012 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1013 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1014 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1015 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1016 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1017 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1018 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1019 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1020 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1024 /** HUF_selectDecoder() :
1025 * Tells which decoder is likely to decode faster,
1026 * based on a set of pre-computed metrics.
1027 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1028 * Assumption : 0 < dstSize <= 128 KB */
1029 U32
HUF_selectDecoder (size_t dstSize
, size_t cSrcSize
)
1031 assert(dstSize
> 0);
1032 assert(dstSize
<= 128*1024);
1033 #if defined(HUF_FORCE_DECOMPRESS_X1)
1037 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1042 /* decoder timing evaluation */
1043 { U32
const Q
= (cSrcSize
>= dstSize
) ? 15 : (U32
)(cSrcSize
* 16 / dstSize
); /* Q < 16 */
1044 U32
const D256
= (U32
)(dstSize
>> 8);
1045 U32
const DTime0
= algoTime
[Q
][0].tableTime
+ (algoTime
[Q
][0].decode256Time
* D256
);
1046 U32 DTime1
= algoTime
[Q
][1].tableTime
+ (algoTime
[Q
][1].decode256Time
* D256
);
1047 DTime1
+= DTime1
>> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1048 return DTime1
< DTime0
;
1054 typedef size_t (*decompressionAlgo
)(void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
);
1056 size_t HUF_decompress (void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1058 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1059 static const decompressionAlgo decompress
[2] = { HUF_decompress4X1
, HUF_decompress4X2
};
1062 /* validation checks */
1063 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1064 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1065 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1066 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1068 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1069 #if defined(HUF_FORCE_DECOMPRESS_X1)
1071 assert(algoNb
== 0);
1072 return HUF_decompress4X1(dst
, dstSize
, cSrc
, cSrcSize
);
1073 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1075 assert(algoNb
== 1);
1076 return HUF_decompress4X2(dst
, dstSize
, cSrc
, cSrcSize
);
1078 return decompress
[algoNb
](dst
, dstSize
, cSrc
, cSrcSize
);
1083 size_t HUF_decompress4X_DCtx (HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1085 /* validation checks */
1086 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1087 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1088 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1089 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1091 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1092 #if defined(HUF_FORCE_DECOMPRESS_X1)
1094 assert(algoNb
== 0);
1095 return HUF_decompress4X1_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
);
1096 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1098 assert(algoNb
== 1);
1099 return HUF_decompress4X2_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
);
1101 return algoNb
? HUF_decompress4X2_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
) :
1102 HUF_decompress4X1_DCtx(dctx
, dst
, dstSize
, cSrc
, cSrcSize
) ;
1107 size_t HUF_decompress4X_hufOnly(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
)
1109 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
1110 return HUF_decompress4X_hufOnly_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
1111 workSpace
, sizeof(workSpace
));
1115 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable
* dctx
, void* dst
,
1116 size_t dstSize
, const void* cSrc
,
1117 size_t cSrcSize
, void* workSpace
,
1120 /* validation checks */
1121 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1122 if (cSrcSize
== 0) return ERROR(corruption_detected
);
1124 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1125 #if defined(HUF_FORCE_DECOMPRESS_X1)
1127 assert(algoNb
== 0);
1128 return HUF_decompress4X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1129 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1131 assert(algoNb
== 1);
1132 return HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1134 return algoNb
? HUF_decompress4X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1135 cSrcSize
, workSpace
, wkspSize
):
1136 HUF_decompress4X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1141 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
1142 const void* cSrc
, size_t cSrcSize
,
1143 void* workSpace
, size_t wkspSize
)
1145 /* validation checks */
1146 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1147 if (cSrcSize
> dstSize
) return ERROR(corruption_detected
); /* invalid */
1148 if (cSrcSize
== dstSize
) { memcpy(dst
, cSrc
, dstSize
); return dstSize
; } /* not compressed */
1149 if (cSrcSize
== 1) { memset(dst
, *(const BYTE
*)cSrc
, dstSize
); return dstSize
; } /* RLE */
1151 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1152 #if defined(HUF_FORCE_DECOMPRESS_X1)
1154 assert(algoNb
== 0);
1155 return HUF_decompress1X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1156 cSrcSize
, workSpace
, wkspSize
);
1157 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1159 assert(algoNb
== 1);
1160 return HUF_decompress1X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1161 cSrcSize
, workSpace
, wkspSize
);
1163 return algoNb
? HUF_decompress1X2_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1164 cSrcSize
, workSpace
, wkspSize
):
1165 HUF_decompress1X1_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
,
1166 cSrcSize
, workSpace
, wkspSize
);
1171 size_t HUF_decompress1X_DCtx(HUF_DTable
* dctx
, void* dst
, size_t dstSize
,
1172 const void* cSrc
, size_t cSrcSize
)
1174 U32 workSpace
[HUF_DECOMPRESS_WORKSPACE_SIZE_U32
];
1175 return HUF_decompress1X_DCtx_wksp(dctx
, dst
, dstSize
, cSrc
, cSrcSize
,
1176 workSpace
, sizeof(workSpace
));
1180 size_t HUF_decompress1X_usingDTable_bmi2(void* dst
, size_t maxDstSize
, const void* cSrc
, size_t cSrcSize
, const HUF_DTable
* DTable
, int bmi2
)
1182 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
1183 #if defined(HUF_FORCE_DECOMPRESS_X1)
1185 assert(dtd
.tableType
== 0);
1186 return HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1187 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1189 assert(dtd
.tableType
== 1);
1190 return HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1192 return dtd
.tableType
? HUF_decompress1X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
) :
1193 HUF_decompress1X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1197 #ifndef HUF_FORCE_DECOMPRESS_X2
1198 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
, void* workSpace
, size_t wkspSize
, int bmi2
)
1200 const BYTE
* ip
= (const BYTE
*) cSrc
;
1202 size_t const hSize
= HUF_readDTableX1_wksp(dctx
, cSrc
, cSrcSize
, workSpace
, wkspSize
);
1203 if (HUF_isError(hSize
)) return hSize
;
1204 if (hSize
>= cSrcSize
) return ERROR(srcSize_wrong
);
1205 ip
+= hSize
; cSrcSize
-= hSize
;
1207 return HUF_decompress1X1_usingDTable_internal(dst
, dstSize
, ip
, cSrcSize
, dctx
, bmi2
);
1211 size_t HUF_decompress4X_usingDTable_bmi2(void* dst
, size_t maxDstSize
, const void* cSrc
, size_t cSrcSize
, const HUF_DTable
* DTable
, int bmi2
)
1213 DTableDesc
const dtd
= HUF_getDTableDesc(DTable
);
1214 #if defined(HUF_FORCE_DECOMPRESS_X1)
1216 assert(dtd
.tableType
== 0);
1217 return HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1218 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1220 assert(dtd
.tableType
== 1);
1221 return HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1223 return dtd
.tableType
? HUF_decompress4X2_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
) :
1224 HUF_decompress4X1_usingDTable_internal(dst
, maxDstSize
, cSrc
, cSrcSize
, DTable
, bmi2
);
1228 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable
* dctx
, void* dst
, size_t dstSize
, const void* cSrc
, size_t cSrcSize
, void* workSpace
, size_t wkspSize
, int bmi2
)
1230 /* validation checks */
1231 if (dstSize
== 0) return ERROR(dstSize_tooSmall
);
1232 if (cSrcSize
== 0) return ERROR(corruption_detected
);
1234 { U32
const algoNb
= HUF_selectDecoder(dstSize
, cSrcSize
);
1235 #if defined(HUF_FORCE_DECOMPRESS_X1)
1237 assert(algoNb
== 0);
1238 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
);
1239 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1241 assert(algoNb
== 1);
1242 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
);
1244 return algoNb
? HUF_decompress4X2_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
) :
1245 HUF_decompress4X1_DCtx_wksp_bmi2(dctx
, dst
, dstSize
, cSrc
, cSrcSize
, workSpace
, wkspSize
, bmi2
);