dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / common / ficl / softcore / lz4.c
blob800402897fe36c4408eecaa736d58e5b972a17cb
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
35 #include <sys/types.h>
36 #include <sys/byteorder.h>
37 #include <assert.h>
38 #include <string.h>
39 #include <umem.h>
41 size_t lz4_compress(void *, void *, size_t, size_t, int);
42 int lz4_decompress(void *, void *, size_t, size_t, int);
43 static int real_LZ4_compress(const char *source, char *dest, int isize,
44 int osize);
45 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
46 int isize, int maxOutputSize);
47 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
48 int isize, int osize);
49 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
50 int isize, int osize);
52 /*ARGSUSED*/
53 size_t
54 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
56 uint32_t bufsiz;
57 char *dest = d_start;
59 assert(d_len >= sizeof (bufsiz));
61 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
62 d_len - sizeof (bufsiz));
64 /* Signal an error if the compression routine returned zero. */
65 if (bufsiz == 0)
66 return (s_len);
69 * Encode the compresed buffer size at the start. We'll need this in
70 * decompression to counter the effects of padding which might be
71 * added to the compressed buffer and which, if unhandled, would
72 * confuse the hell out of our decompression function.
74 *(uint32_t *)dest = BE_32(bufsiz);
76 return (bufsiz + sizeof (bufsiz));
79 /*ARGSUSED*/
80 int
81 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
83 const char *src = s_start;
84 uint32_t bufsiz = BE_IN32(src);
86 /* invalid compressed buffer size encoded at start */
87 if (bufsiz + sizeof (bufsiz) > s_len)
88 return (1);
91 * Returns 0 on success (decompression function returned non-negative)
92 * and non-zero on failure (decompression function returned negative.
94 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 d_start, bufsiz, d_len) < 0);
99 * LZ4 API Description:
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * isize : is the input size. Max supported value is ~1.9GB
104 * return : the number of bytes written in buffer dest
105 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * note : destination buffer must be already allocated.
107 * destination buffer must be sized to handle worst cases
108 * situations (input data not compressible)
110 * Advanced Functions
112 * LZ4_uncompress_unknownOutputSize() :
113 * isize : is the input size, therefore the compressed size
114 * maxOutputSize : is the size of the destination buffer (which must be
115 * already allocated)
116 * return : the number of bytes decoded in the destination buffer
117 * (necessarily <= maxOutputSize). If the source stream is
118 * malformed, the function will stop decoding and return a
119 * negative result, indicating the byte position of the faulty
120 * instruction. This function never writes beyond dest +
121 * maxOutputSize, and is therefore protected against malicious
122 * data packets.
123 * note : Destination buffer must be already allocated.
125 * LZ4_compressCtx() :
126 * This function explicitly handles the CTX memory structure.
128 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
129 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
130 * isn't valid.
132 * LZ4_compress64kCtx() :
133 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
134 * isize *Must* be <64KB, otherwise the output will be corrupted.
136 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
137 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
138 * isn't valid.
142 * Tuning parameters
146 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
147 * Lowering this value reduces memory usage. Reduced memory usage
148 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
149 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
150 * (examples : 12 -> 16KB ; 17 -> 512KB)
152 #define COMPRESSIONLEVEL 12
155 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
156 * algorithm skip faster data segments considered "incompressible".
157 * This may decrease compression ratio dramatically, but will be
158 * faster on incompressible data. Increasing this value will make
159 * the algorithm search more before declaring a segment "incompressible".
160 * This could improve compression a bit, but will be slower on
161 * incompressible data. The default value (6) is recommended.
163 #define NOTCOMPRESSIBLE_CONFIRMATION 6
166 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
167 * performance for big endian cpu, but the resulting compressed stream
168 * will be incompatible with little-endian CPU. You can set this option
169 * to 1 in situations where data will stay within closed environment.
170 * This option is useless on Little_Endian CPU (such as x86).
172 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
175 * CPU Feature Detection
178 /* 32 or 64 bits ? */
179 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
180 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
181 defined(__LP64__) || defined(_LP64))
182 #define LZ4_ARCH64 1
183 #else
184 #define LZ4_ARCH64 0
185 #endif
188 * Limits the amount of stack space that the algorithm may consume to hold
189 * the compression lookup table. The value `9' here means we'll never use
190 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
191 * If more memory is needed, it is allocated from the heap.
193 #define STACKLIMIT 9
196 * Little Endian or Big Endian?
197 * Note: overwrite the below #define if you know your architecture endianess.
199 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
200 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
201 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
202 defined(__powerpc) || defined(powerpc) || \
203 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
204 #define LZ4_BIG_ENDIAN 1
205 #else
207 * Little Endian assumed. PDP Endian and other very rare endian format
208 * are unsupported.
210 #endif
213 * Unaligned memory access is automatically enabled for "common" CPU,
214 * such as x86. For others CPU, the compiler will be more cautious, and
215 * insert extra code to ensure aligned access is respected. If you know
216 * your target CPU supports unaligned memory access, you may want to
217 * force this option manually to improve performance
219 #if defined(__ARM_FEATURE_UNALIGNED)
220 #define LZ4_FORCE_UNALIGNED_ACCESS 1
221 #endif
225 * Compiler Options
227 #if __STDC_VERSION__ >= 199901L /* C99 */
228 /* "restrict" is a known keyword */
229 #else
230 /* Disable restrict */
231 #define restrict
232 #endif
234 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
236 #ifdef _MSC_VER
237 /* Visual Studio */
238 /* Visual is not C99, but supports some kind of inline */
239 #define inline __forceinline
240 #if LZ4_ARCH64
241 /* For Visual 2005 */
242 #pragma intrinsic(_BitScanForward64)
243 #pragma intrinsic(_BitScanReverse64)
244 #else /* !LZ4_ARCH64 */
245 /* For Visual 2005 */
246 #pragma intrinsic(_BitScanForward)
247 #pragma intrinsic(_BitScanReverse)
248 #endif /* !LZ4_ARCH64 */
249 #endif /* _MSC_VER */
251 #ifdef _MSC_VER
252 #define lz4_bswap16(x) _byteswap_ushort(x)
253 #else /* !_MSC_VER */
254 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
255 (((x) & 0xffu) << 8)))
256 #endif /* !_MSC_VER */
258 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
259 #define expect(expr, value) (__builtin_expect((expr), (value)))
260 #else
261 #define expect(expr, value) (expr)
262 #endif
264 #define likely(expr) expect((expr) != 0, 1)
265 #define unlikely(expr) expect((expr) != 0, 0)
267 /* Basic types */
268 #if defined(_MSC_VER)
269 /* Visual Studio does not support 'stdint' natively */
270 #define BYTE unsigned __int8
271 #define U16 unsigned __int16
272 #define U32 unsigned __int32
273 #define S32 __int32
274 #define U64 unsigned __int64
275 #else /* !defined(_MSC_VER) */
276 #define BYTE uint8_t
277 #define U16 uint16_t
278 #define U32 uint32_t
279 #define S32 int32_t
280 #define U64 uint64_t
281 #endif /* !defined(_MSC_VER) */
283 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
284 #pragma pack(1)
285 #endif
287 typedef struct _U16_S {
288 U16 v;
289 } U16_S;
290 typedef struct _U32_S {
291 U32 v;
292 } U32_S;
293 typedef struct _U64_S {
294 U64 v;
295 } U64_S;
297 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
298 #pragma pack()
299 #endif
301 #define A64(x) (((U64_S *)(x))->v)
302 #define A32(x) (((U32_S *)(x))->v)
303 #define A16(x) (((U16_S *)(x))->v)
306 * Constants
308 #define MINMATCH 4
310 #define HASH_LOG COMPRESSIONLEVEL
311 #define HASHTABLESIZE (1 << HASH_LOG)
312 #define HASH_MASK (HASHTABLESIZE - 1)
314 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
315 NOTCOMPRESSIBLE_CONFIRMATION : 2)
318 * Defines if memory is allocated into the stack (local variable),
319 * or into the heap (kmem_alloc()).
321 #define HEAPMODE (HASH_LOG > STACKLIMIT)
322 #define COPYLENGTH 8
323 #define LASTLITERALS 5
324 #define MFLIMIT (COPYLENGTH + MINMATCH)
325 #define MINLENGTH (MFLIMIT + 1)
327 #define MAXD_LOG 16
328 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
330 #define ML_BITS 4
331 #define ML_MASK ((1U<<ML_BITS)-1)
332 #define RUN_BITS (8-ML_BITS)
333 #define RUN_MASK ((1U<<RUN_BITS)-1)
337 * Architecture-specific macros
339 #if LZ4_ARCH64
340 #define STEPSIZE 8
341 #define UARCH U64
342 #define AARCH A64
343 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
344 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
345 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
346 #define HTYPE U32
347 #define INITBASE(base) const BYTE* const base = ip
348 #else /* !LZ4_ARCH64 */
349 #define STEPSIZE 4
350 #define UARCH U32
351 #define AARCH A32
352 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
353 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
354 #define LZ4_SECURECOPY LZ4_WILDCOPY
355 #define HTYPE const BYTE *
356 #define INITBASE(base) const int base = 0
357 #endif /* !LZ4_ARCH64 */
359 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
360 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
361 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
362 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
363 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
364 #else
365 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
366 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
367 #endif
370 /* Local structures */
371 struct refTables {
372 HTYPE hashTable[HASHTABLESIZE];
376 /* Macros */
377 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
378 HASH_LOG))
379 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
380 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
381 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
382 d = e; }
385 /* Private functions */
386 #if LZ4_ARCH64
388 static int
389 LZ4_NbCommonBytes(register U64 val)
391 #if defined(LZ4_BIG_ENDIAN)
392 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
393 unsigned long r = 0;
394 _BitScanReverse64(&r, val);
395 return (int)(r >> 3);
396 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
397 !defined(LZ4_FORCE_SW_BITCOUNT)
398 return (__builtin_clzll(val) >> 3);
399 #else
400 int r;
401 if (!(val >> 32)) {
402 r = 4;
403 } else {
404 r = 0;
405 val >>= 32;
407 if (!(val >> 16)) {
408 r += 2;
409 val >>= 8;
410 } else {
411 val >>= 24;
413 r += (!val);
414 return (r);
415 #endif
416 #else
417 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
418 unsigned long r = 0;
419 _BitScanForward64(&r, val);
420 return (int)(r >> 3);
421 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
422 !defined(LZ4_FORCE_SW_BITCOUNT)
423 return (__builtin_ctzll(val) >> 3);
424 #else
425 static const int DeBruijnBytePos[64] =
426 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
427 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
428 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
429 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
431 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
432 58];
433 #endif
434 #endif
437 #else
439 static int
440 LZ4_NbCommonBytes(register U32 val)
442 #if defined(LZ4_BIG_ENDIAN)
443 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
444 unsigned long r = 0;
445 _BitScanReverse(&r, val);
446 return (int)(r >> 3);
447 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
448 !defined(LZ4_FORCE_SW_BITCOUNT)
449 return (__builtin_clz(val) >> 3);
450 #else
451 int r;
452 if (!(val >> 16)) {
453 r = 2;
454 val >>= 8;
455 } else {
456 r = 0;
457 val >>= 24;
459 r += (!val);
460 return (r);
461 #endif
462 #else
463 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
464 unsigned long r = 0;
465 _BitScanForward(&r, val);
466 return (int)(r >> 3);
467 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
468 !defined(LZ4_FORCE_SW_BITCOUNT)
469 return (__builtin_ctz(val) >> 3);
470 #else
471 static const int DeBruijnBytePos[32] = {
472 0, 0, 3, 0, 3, 1, 3, 0,
473 3, 2, 2, 1, 3, 2, 0, 1,
474 3, 3, 1, 2, 2, 2, 2, 0,
475 3, 1, 2, 0, 1, 0, 1, 1
477 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
478 27];
479 #endif
480 #endif
483 #endif
485 /* Public functions */
487 /* Compression functions */
489 /*ARGSUSED*/
490 static int
491 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
492 int osize)
494 #if HEAPMODE
495 struct refTables *srt = (struct refTables *)ctx;
496 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
497 #else
498 HTYPE HashTable[HASHTABLESIZE] = { 0 };
499 #endif
501 const BYTE *ip = (BYTE *) source;
502 INITBASE(base);
503 const BYTE *anchor = ip;
504 const BYTE *const iend = ip + isize;
505 const BYTE *const oend = (BYTE *) dest + osize;
506 const BYTE *const mflimit = iend - MFLIMIT;
507 #define matchlimit (iend - LASTLITERALS)
509 BYTE *op = (BYTE *) dest;
511 int len, length;
512 const int skipStrength = SKIPSTRENGTH;
513 U32 forwardH;
516 /* Init */
517 if (isize < MINLENGTH)
518 goto _last_literals;
520 /* First Byte */
521 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
522 ip++;
523 forwardH = LZ4_HASH_VALUE(ip);
525 /* Main Loop */
526 for (;;) {
527 int findMatchAttempts = (1U << skipStrength) + 3;
528 const BYTE *forwardIp = ip;
529 const BYTE *ref;
530 BYTE *token;
532 /* Find a match */
533 do {
534 U32 h = forwardH;
535 int step = findMatchAttempts++ >> skipStrength;
536 ip = forwardIp;
537 forwardIp = ip + step;
539 if unlikely(forwardIp > mflimit) {
540 goto _last_literals;
543 forwardH = LZ4_HASH_VALUE(forwardIp);
544 ref = base + HashTable[h];
545 HashTable[h] = ip - base;
547 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
549 /* Catch up */
550 while ((ip > anchor) && (ref > (BYTE *) source) &&
551 unlikely(ip[-1] == ref[-1])) {
552 ip--;
553 ref--;
556 /* Encode Literal length */
557 length = ip - anchor;
558 token = op++;
560 /* Check output limit */
561 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
562 (length >> 8) > oend)
563 return (0);
565 if (length >= (int)RUN_MASK) {
566 *token = (RUN_MASK << ML_BITS);
567 len = length - RUN_MASK;
568 for (; len > 254; len -= 255)
569 *op++ = 255;
570 *op++ = (BYTE)len;
571 } else
572 *token = (length << ML_BITS);
574 /* Copy Literals */
575 LZ4_BLINDCOPY(anchor, op, length);
577 _next_match:
578 /* Encode Offset */
579 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
581 /* Start Counting */
582 ip += MINMATCH;
583 ref += MINMATCH; /* MinMatch verified */
584 anchor = ip;
585 while likely(ip < matchlimit - (STEPSIZE - 1)) {
586 UARCH diff = AARCH(ref) ^ AARCH(ip);
587 if (!diff) {
588 ip += STEPSIZE;
589 ref += STEPSIZE;
590 continue;
592 ip += LZ4_NbCommonBytes(diff);
593 goto _endCount;
595 #if LZ4_ARCH64
596 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
597 ip += 4;
598 ref += 4;
600 #endif
601 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
602 ip += 2;
603 ref += 2;
605 if ((ip < matchlimit) && (*ref == *ip))
606 ip++;
607 _endCount:
609 /* Encode MatchLength */
610 len = (ip - anchor);
611 /* Check output limit */
612 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
613 return (0);
614 if (len >= (int)ML_MASK) {
615 *token += ML_MASK;
616 len -= ML_MASK;
617 for (; len > 509; len -= 510) {
618 *op++ = 255;
619 *op++ = 255;
621 if (len > 254) {
622 len -= 255;
623 *op++ = 255;
625 *op++ = (BYTE)len;
626 } else
627 *token += len;
629 /* Test end of chunk */
630 if (ip > mflimit) {
631 anchor = ip;
632 break;
634 /* Fill table */
635 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
637 /* Test next position */
638 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
639 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
640 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
641 token = op++;
642 *token = 0;
643 goto _next_match;
645 /* Prepare next loop */
646 anchor = ip++;
647 forwardH = LZ4_HASH_VALUE(ip);
650 _last_literals:
651 /* Encode Last Literals */
653 int lastRun = iend - anchor;
654 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
655 oend)
656 return (0);
657 if (lastRun >= (int)RUN_MASK) {
658 *op++ = (RUN_MASK << ML_BITS);
659 lastRun -= RUN_MASK;
660 for (; lastRun > 254; lastRun -= 255) {
661 *op++ = 255;
663 *op++ = (BYTE)lastRun;
664 } else
665 *op++ = (lastRun << ML_BITS);
666 (void) memcpy(op, anchor, iend - anchor);
667 op += iend - anchor;
670 /* End */
671 return (int)(((char *)op) - dest);
676 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
677 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
678 #define HASHLOG64K (HASH_LOG + 1)
679 #define HASH64KTABLESIZE (1U << HASHLOG64K)
680 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
681 HASHLOG64K))
682 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
684 /*ARGSUSED*/
685 static int
686 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
687 int osize)
689 #if HEAPMODE
690 struct refTables *srt = (struct refTables *)ctx;
691 U16 *HashTable = (U16 *) (srt->hashTable);
692 #else
693 U16 HashTable[HASH64KTABLESIZE] = { 0 };
694 #endif
696 const BYTE *ip = (BYTE *) source;
697 const BYTE *anchor = ip;
698 const BYTE *const base = ip;
699 const BYTE *const iend = ip + isize;
700 const BYTE *const oend = (BYTE *) dest + osize;
701 const BYTE *const mflimit = iend - MFLIMIT;
702 #define matchlimit (iend - LASTLITERALS)
704 BYTE *op = (BYTE *) dest;
706 int len, length;
707 const int skipStrength = SKIPSTRENGTH;
708 U32 forwardH;
710 /* Init */
711 if (isize < MINLENGTH)
712 goto _last_literals;
714 /* First Byte */
715 ip++;
716 forwardH = LZ4_HASH64K_VALUE(ip);
718 /* Main Loop */
719 for (;;) {
720 int findMatchAttempts = (1U << skipStrength) + 3;
721 const BYTE *forwardIp = ip;
722 const BYTE *ref;
723 BYTE *token;
725 /* Find a match */
726 do {
727 U32 h = forwardH;
728 int step = findMatchAttempts++ >> skipStrength;
729 ip = forwardIp;
730 forwardIp = ip + step;
732 if (forwardIp > mflimit) {
733 goto _last_literals;
736 forwardH = LZ4_HASH64K_VALUE(forwardIp);
737 ref = base + HashTable[h];
738 HashTable[h] = ip - base;
740 } while (A32(ref) != A32(ip));
742 /* Catch up */
743 while ((ip > anchor) && (ref > (BYTE *) source) &&
744 (ip[-1] == ref[-1])) {
745 ip--;
746 ref--;
749 /* Encode Literal length */
750 length = ip - anchor;
751 token = op++;
753 /* Check output limit */
754 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
755 (length >> 8) > oend)
756 return (0);
758 if (length >= (int)RUN_MASK) {
759 *token = (RUN_MASK << ML_BITS);
760 len = length - RUN_MASK;
761 for (; len > 254; len -= 255)
762 *op++ = 255;
763 *op++ = (BYTE)len;
764 } else
765 *token = (length << ML_BITS);
767 /* Copy Literals */
768 LZ4_BLINDCOPY(anchor, op, length);
770 _next_match:
771 /* Encode Offset */
772 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
774 /* Start Counting */
775 ip += MINMATCH;
776 ref += MINMATCH; /* MinMatch verified */
777 anchor = ip;
778 while (ip < matchlimit - (STEPSIZE - 1)) {
779 UARCH diff = AARCH(ref) ^ AARCH(ip);
780 if (!diff) {
781 ip += STEPSIZE;
782 ref += STEPSIZE;
783 continue;
785 ip += LZ4_NbCommonBytes(diff);
786 goto _endCount;
788 #if LZ4_ARCH64
789 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
790 ip += 4;
791 ref += 4;
793 #endif
794 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
795 ip += 2;
796 ref += 2;
798 if ((ip < matchlimit) && (*ref == *ip))
799 ip++;
800 _endCount:
802 /* Encode MatchLength */
803 len = (ip - anchor);
804 /* Check output limit */
805 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
806 return (0);
807 if (len >= (int)ML_MASK) {
808 *token += ML_MASK;
809 len -= ML_MASK;
810 for (; len > 509; len -= 510) {
811 *op++ = 255;
812 *op++ = 255;
814 if (len > 254) {
815 len -= 255;
816 *op++ = 255;
818 *op++ = (BYTE)len;
819 } else
820 *token += len;
822 /* Test end of chunk */
823 if (ip > mflimit) {
824 anchor = ip;
825 break;
827 /* Fill table */
828 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
830 /* Test next position */
831 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
832 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
833 if (A32(ref) == A32(ip)) {
834 token = op++;
835 *token = 0;
836 goto _next_match;
838 /* Prepare next loop */
839 anchor = ip++;
840 forwardH = LZ4_HASH64K_VALUE(ip);
843 _last_literals:
844 /* Encode Last Literals */
846 int lastRun = iend - anchor;
847 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
848 oend)
849 return (0);
850 if (lastRun >= (int)RUN_MASK) {
851 *op++ = (RUN_MASK << ML_BITS);
852 lastRun -= RUN_MASK;
853 for (; lastRun > 254; lastRun -= 255)
854 *op++ = 255;
855 *op++ = (BYTE)lastRun;
856 } else
857 *op++ = (lastRun << ML_BITS);
858 (void) memcpy(op, anchor, iend - anchor);
859 op += iend - anchor;
862 /* End */
863 return (int)(((char *)op) - dest);
866 static int
867 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
869 #if HEAPMODE
870 void *ctx = umem_zalloc(sizeof (struct refTables), UMEM_DEFAULT);
871 int result;
874 * out of kernel memory, gently fall through - this will disable
875 * compression in zio_compress_data
877 if (ctx == NULL)
878 return (0);
880 if (isize < LZ4_64KLIMIT)
881 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
882 else
883 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
885 umem_free(ctx, sizeof (struct refTables));
886 return (result);
887 #else
888 if (isize < (int)LZ4_64KLIMIT)
889 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
890 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
891 #endif
894 /* Decompression functions */
897 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
898 * against "buffer overflow" attack type.
899 * LZ4_uncompress_unknownOutputSize() insures that it will never read
900 * outside of the input buffer. A corrupted input will produce an error
901 * result, a negative int, indicating the position of the error within
902 * input stream.
905 static int
906 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
907 int maxOutputSize)
909 /* Local Variables */
910 const BYTE *restrict ip = (const BYTE *) source;
911 const BYTE *const iend = ip + isize;
912 const BYTE *ref;
914 BYTE *op = (BYTE *) dest;
915 BYTE *const oend = op + maxOutputSize;
916 BYTE *cpy;
918 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
919 #if LZ4_ARCH64
920 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
921 #endif
923 /* Main Loop */
924 while (ip < iend) {
925 unsigned token;
926 size_t length;
928 /* get runlength */
929 token = *ip++;
930 if ((length = (token >> ML_BITS)) == RUN_MASK) {
931 int s = 255;
932 while ((ip < iend) && (s == 255)) {
933 s = *ip++;
934 length += s;
937 /* copy literals */
938 cpy = op + length;
939 /* CORNER-CASE: cpy might overflow. */
940 if (cpy < op)
941 goto _output_error; /* cpy was overflowed, bail! */
942 if ((cpy > oend - COPYLENGTH) ||
943 (ip + length > iend - COPYLENGTH)) {
944 if (cpy > oend)
945 /* Error: writes beyond output buffer */
946 goto _output_error;
947 if (ip + length != iend)
949 * Error: LZ4 format requires to consume all
950 * input at this stage
952 goto _output_error;
953 (void) memcpy(op, ip, length);
954 op += length;
955 /* Necessarily EOF, due to parsing restrictions */
956 break;
958 LZ4_WILDCOPY(ip, op, cpy);
959 ip -= (op - cpy);
960 op = cpy;
962 /* get offset */
963 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
964 ip += 2;
965 if (ref < (BYTE * const) dest)
967 * Error: offset creates reference outside of
968 * destination buffer
970 goto _output_error;
972 /* get matchlength */
973 if ((length = (token & ML_MASK)) == ML_MASK) {
974 while (ip < iend) {
975 int s = *ip++;
976 length += s;
977 if (s == 255)
978 continue;
979 break;
982 /* copy repeated sequence */
983 if unlikely(op - ref < STEPSIZE) {
984 #if LZ4_ARCH64
985 size_t dec64 = dec64table[op-ref];
986 #else
987 const int dec64 = 0;
988 #endif
989 op[0] = ref[0];
990 op[1] = ref[1];
991 op[2] = ref[2];
992 op[3] = ref[3];
993 op += 4;
994 ref += 4;
995 ref -= dec32table[op-ref];
996 A32(op) = A32(ref);
997 op += STEPSIZE - 4;
998 ref -= dec64;
999 } else {
1000 LZ4_COPYSTEP(ref, op);
1002 cpy = op + length - (STEPSIZE - 4);
1003 if (cpy > oend - COPYLENGTH) {
1004 if (cpy > oend)
1006 * Error: request to write outside of
1007 * destination buffer
1009 goto _output_error;
1010 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1011 while (op < cpy)
1012 *op++ = *ref++;
1013 op = cpy;
1014 if (op == oend)
1016 * Check EOF (should never happen, since
1017 * last 5 bytes are supposed to be literals)
1019 goto _output_error;
1020 continue;
1022 LZ4_SECURECOPY(ref, op, cpy);
1023 op = cpy; /* correction */
1026 /* end of decoding */
1027 return (int)(((char *)op) - dest);
1029 /* write overflow error detected */
1030 _output_error:
1031 return (int)(-(((char *)ip) - source));