8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / common / ficl / softcore / lz4.c
blobabff0700ed1d49567b6f20981b2bc0546fc8fb8f
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
35 #include <sys/types.h>
36 #include <sys/byteorder.h>
37 #include <assert.h>
38 #include <string.h>
39 #include <umem.h>
41 size_t lz4_compress(void *, void *, size_t, size_t, int);
42 int lz4_decompress(void *, void *, size_t, size_t, int);
43 static int real_LZ4_compress(const char *source, char *dest, int isize,
44 int osize);
45 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
46 int isize, int maxOutputSize);
47 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
48 int isize, int osize);
49 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
50 int isize, int osize);
52 /*ARGSUSED*/
53 size_t
54 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
56 uint32_t bufsiz;
57 char *dest = d_start;
59 assert(d_len >= sizeof (bufsiz));
61 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
62 d_len - sizeof (bufsiz));
64 /* Signal an error if the compression routine returned zero. */
65 if (bufsiz == 0)
66 return (s_len);
69 * Encode the compresed buffer size at the start. We'll need this in
70 * decompression to counter the effects of padding which might be
71 * added to the compressed buffer and which, if unhandled, would
72 * confuse the hell out of our decompression function.
74 *(uint32_t *)dest = BE_32(bufsiz);
76 return (bufsiz + sizeof (bufsiz));
79 /*ARGSUSED*/
80 int
81 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
83 const char *src = s_start;
84 uint32_t bufsiz = BE_IN32(src);
86 /* invalid compressed buffer size encoded at start */
87 if (bufsiz + sizeof (bufsiz) > s_len)
88 return (1);
91 * Returns 0 on success (decompression function returned non-negative)
92 * and non-zero on failure (decompression function returned negative.
94 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 d_start, bufsiz, d_len) < 0);
99 * LZ4 API Description:
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * isize : is the input size. Max supported value is ~1.9GB
104 * return : the number of bytes written in buffer dest
105 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * note : destination buffer must be already allocated.
107 * destination buffer must be sized to handle worst cases
108 * situations (input data not compressible)
110 * Advanced Functions
112 * LZ4_uncompress_unknownOutputSize() :
113 * isize : is the input size, therefore the compressed size
114 * maxOutputSize : is the size of the destination buffer (which must be
115 * already allocated)
116 * return : the number of bytes decoded in the destination buffer
117 * (necessarily <= maxOutputSize). If the source stream is
118 * malformed, the function will stop decoding and return a
119 * negative result, indicating the byte position of the faulty
120 * instruction. This function never writes beyond dest +
121 * maxOutputSize, and is therefore protected against malicious
122 * data packets.
123 * note : Destination buffer must be already allocated.
125 * LZ4_compressCtx() :
126 * This function explicitly handles the CTX memory structure.
128 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
129 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
130 * isn't valid.
132 * LZ4_compress64kCtx() :
133 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
134 * isize *Must* be <64KB, otherwise the output will be corrupted.
136 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
137 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
138 * isn't valid.
142 * Tuning parameters
146 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
147 * Lowering this value reduces memory usage. Reduced memory usage
148 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
149 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
150 * (examples : 12 -> 16KB ; 17 -> 512KB)
152 #define COMPRESSIONLEVEL 12
155 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
156 * algorithm skip faster data segments considered "incompressible".
157 * This may decrease compression ratio dramatically, but will be
158 * faster on incompressible data. Increasing this value will make
159 * the algorithm search more before declaring a segment "incompressible".
160 * This could improve compression a bit, but will be slower on
161 * incompressible data. The default value (6) is recommended.
163 #define NOTCOMPRESSIBLE_CONFIRMATION 6
166 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
167 * performance for big endian cpu, but the resulting compressed stream
168 * will be incompatible with little-endian CPU. You can set this option
169 * to 1 in situations where data will stay within closed environment.
170 * This option is useless on Little_Endian CPU (such as x86).
172 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
175 * CPU Feature Detection
178 /* 32 or 64 bits ? */
179 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
180 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
181 defined(__LP64__) || defined(_LP64))
182 #define LZ4_ARCH64 1
183 #else
184 #define LZ4_ARCH64 0
185 #endif
188 * Limits the amount of stack space that the algorithm may consume to hold
189 * the compression lookup table. The value `9' here means we'll never use
190 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
191 * If more memory is needed, it is allocated from the heap.
193 #define STACKLIMIT 9
196 * Little Endian or Big Endian?
197 * Note: overwrite the below #define if you know your architecture endianess.
199 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
200 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
201 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
202 defined(__powerpc) || defined(powerpc) || \
203 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
204 #define LZ4_BIG_ENDIAN 1
205 #else
207 * Little Endian assumed. PDP Endian and other very rare endian format
208 * are unsupported.
210 #endif
213 * Unaligned memory access is automatically enabled for "common" CPU,
214 * such as x86. For others CPU, the compiler will be more cautious, and
215 * insert extra code to ensure aligned access is respected. If you know
216 * your target CPU supports unaligned memory access, you may want to
217 * force this option manually to improve performance
219 #if defined(__ARM_FEATURE_UNALIGNED)
220 #define LZ4_FORCE_UNALIGNED_ACCESS 1
221 #endif
223 #ifdef __sparc
224 #define LZ4_FORCE_SW_BITCOUNT
225 #endif
228 * Compiler Options
230 #if __STDC_VERSION__ >= 199901L /* C99 */
231 /* "restrict" is a known keyword */
232 #else
233 /* Disable restrict */
234 #define restrict
235 #endif
237 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
239 #ifdef _MSC_VER
240 /* Visual Studio */
241 /* Visual is not C99, but supports some kind of inline */
242 #define inline __forceinline
243 #if LZ4_ARCH64
244 /* For Visual 2005 */
245 #pragma intrinsic(_BitScanForward64)
246 #pragma intrinsic(_BitScanReverse64)
247 #else /* !LZ4_ARCH64 */
248 /* For Visual 2005 */
249 #pragma intrinsic(_BitScanForward)
250 #pragma intrinsic(_BitScanReverse)
251 #endif /* !LZ4_ARCH64 */
252 #endif /* _MSC_VER */
254 #ifdef _MSC_VER
255 #define lz4_bswap16(x) _byteswap_ushort(x)
256 #else /* !_MSC_VER */
257 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
258 (((x) & 0xffu) << 8)))
259 #endif /* !_MSC_VER */
261 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
262 #define expect(expr, value) (__builtin_expect((expr), (value)))
263 #else
264 #define expect(expr, value) (expr)
265 #endif
267 #define likely(expr) expect((expr) != 0, 1)
268 #define unlikely(expr) expect((expr) != 0, 0)
270 /* Basic types */
271 #if defined(_MSC_VER)
272 /* Visual Studio does not support 'stdint' natively */
273 #define BYTE unsigned __int8
274 #define U16 unsigned __int16
275 #define U32 unsigned __int32
276 #define S32 __int32
277 #define U64 unsigned __int64
278 #else /* !defined(_MSC_VER) */
279 #define BYTE uint8_t
280 #define U16 uint16_t
281 #define U32 uint32_t
282 #define S32 int32_t
283 #define U64 uint64_t
284 #endif /* !defined(_MSC_VER) */
286 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
287 #pragma pack(1)
288 #endif
290 typedef struct _U16_S {
291 U16 v;
292 } U16_S;
293 typedef struct _U32_S {
294 U32 v;
295 } U32_S;
296 typedef struct _U64_S {
297 U64 v;
298 } U64_S;
300 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
301 #pragma pack()
302 #endif
304 #define A64(x) (((U64_S *)(x))->v)
305 #define A32(x) (((U32_S *)(x))->v)
306 #define A16(x) (((U16_S *)(x))->v)
309 * Constants
311 #define MINMATCH 4
313 #define HASH_LOG COMPRESSIONLEVEL
314 #define HASHTABLESIZE (1 << HASH_LOG)
315 #define HASH_MASK (HASHTABLESIZE - 1)
317 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
318 NOTCOMPRESSIBLE_CONFIRMATION : 2)
321 * Defines if memory is allocated into the stack (local variable),
322 * or into the heap (kmem_alloc()).
324 #define HEAPMODE (HASH_LOG > STACKLIMIT)
325 #define COPYLENGTH 8
326 #define LASTLITERALS 5
327 #define MFLIMIT (COPYLENGTH + MINMATCH)
328 #define MINLENGTH (MFLIMIT + 1)
330 #define MAXD_LOG 16
331 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
333 #define ML_BITS 4
334 #define ML_MASK ((1U<<ML_BITS)-1)
335 #define RUN_BITS (8-ML_BITS)
336 #define RUN_MASK ((1U<<RUN_BITS)-1)
340 * Architecture-specific macros
342 #if LZ4_ARCH64
343 #define STEPSIZE 8
344 #define UARCH U64
345 #define AARCH A64
346 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
347 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
348 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
349 #define HTYPE U32
350 #define INITBASE(base) const BYTE* const base = ip
351 #else /* !LZ4_ARCH64 */
352 #define STEPSIZE 4
353 #define UARCH U32
354 #define AARCH A32
355 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
356 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
357 #define LZ4_SECURECOPY LZ4_WILDCOPY
358 #define HTYPE const BYTE *
359 #define INITBASE(base) const int base = 0
360 #endif /* !LZ4_ARCH64 */
362 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
363 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
364 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
365 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
366 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
367 #else
368 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
369 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
370 #endif
373 /* Local structures */
374 struct refTables {
375 HTYPE hashTable[HASHTABLESIZE];
379 /* Macros */
380 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
381 HASH_LOG))
382 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
383 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
384 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
385 d = e; }
388 /* Private functions */
389 #if LZ4_ARCH64
391 static int
392 LZ4_NbCommonBytes(register U64 val)
394 #if defined(LZ4_BIG_ENDIAN)
395 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
396 unsigned long r = 0;
397 _BitScanReverse64(&r, val);
398 return (int)(r >> 3);
399 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
400 !defined(LZ4_FORCE_SW_BITCOUNT)
401 return (__builtin_clzll(val) >> 3);
402 #else
403 int r;
404 if (!(val >> 32)) {
405 r = 4;
406 } else {
407 r = 0;
408 val >>= 32;
410 if (!(val >> 16)) {
411 r += 2;
412 val >>= 8;
413 } else {
414 val >>= 24;
416 r += (!val);
417 return (r);
418 #endif
419 #else
420 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
421 unsigned long r = 0;
422 _BitScanForward64(&r, val);
423 return (int)(r >> 3);
424 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
425 !defined(LZ4_FORCE_SW_BITCOUNT)
426 return (__builtin_ctzll(val) >> 3);
427 #else
428 static const int DeBruijnBytePos[64] =
429 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
430 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
431 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
432 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
434 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
435 58];
436 #endif
437 #endif
440 #else
442 static int
443 LZ4_NbCommonBytes(register U32 val)
445 #if defined(LZ4_BIG_ENDIAN)
446 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
447 unsigned long r = 0;
448 _BitScanReverse(&r, val);
449 return (int)(r >> 3);
450 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
451 !defined(LZ4_FORCE_SW_BITCOUNT)
452 return (__builtin_clz(val) >> 3);
453 #else
454 int r;
455 if (!(val >> 16)) {
456 r = 2;
457 val >>= 8;
458 } else {
459 r = 0;
460 val >>= 24;
462 r += (!val);
463 return (r);
464 #endif
465 #else
466 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
467 unsigned long r = 0;
468 _BitScanForward(&r, val);
469 return (int)(r >> 3);
470 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
471 !defined(LZ4_FORCE_SW_BITCOUNT)
472 return (__builtin_ctz(val) >> 3);
473 #else
474 static const int DeBruijnBytePos[32] = {
475 0, 0, 3, 0, 3, 1, 3, 0,
476 3, 2, 2, 1, 3, 2, 0, 1,
477 3, 3, 1, 2, 2, 2, 2, 0,
478 3, 1, 2, 0, 1, 0, 1, 1
480 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
481 27];
482 #endif
483 #endif
486 #endif
488 /* Public functions */
490 /* Compression functions */
492 /*ARGSUSED*/
493 static int
494 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
495 int osize)
497 #if HEAPMODE
498 struct refTables *srt = (struct refTables *)ctx;
499 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
500 #else
501 HTYPE HashTable[HASHTABLESIZE] = { 0 };
502 #endif
504 const BYTE *ip = (BYTE *) source;
505 INITBASE(base);
506 const BYTE *anchor = ip;
507 const BYTE *const iend = ip + isize;
508 const BYTE *const oend = (BYTE *) dest + osize;
509 const BYTE *const mflimit = iend - MFLIMIT;
510 #define matchlimit (iend - LASTLITERALS)
512 BYTE *op = (BYTE *) dest;
514 int len, length;
515 const int skipStrength = SKIPSTRENGTH;
516 U32 forwardH;
519 /* Init */
520 if (isize < MINLENGTH)
521 goto _last_literals;
523 /* First Byte */
524 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
525 ip++;
526 forwardH = LZ4_HASH_VALUE(ip);
528 /* Main Loop */
529 for (;;) {
530 int findMatchAttempts = (1U << skipStrength) + 3;
531 const BYTE *forwardIp = ip;
532 const BYTE *ref;
533 BYTE *token;
535 /* Find a match */
536 do {
537 U32 h = forwardH;
538 int step = findMatchAttempts++ >> skipStrength;
539 ip = forwardIp;
540 forwardIp = ip + step;
542 if unlikely(forwardIp > mflimit) {
543 goto _last_literals;
546 forwardH = LZ4_HASH_VALUE(forwardIp);
547 ref = base + HashTable[h];
548 HashTable[h] = ip - base;
550 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
552 /* Catch up */
553 while ((ip > anchor) && (ref > (BYTE *) source) &&
554 unlikely(ip[-1] == ref[-1])) {
555 ip--;
556 ref--;
559 /* Encode Literal length */
560 length = ip - anchor;
561 token = op++;
563 /* Check output limit */
564 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
565 (length >> 8) > oend)
566 return (0);
568 if (length >= (int)RUN_MASK) {
569 *token = (RUN_MASK << ML_BITS);
570 len = length - RUN_MASK;
571 for (; len > 254; len -= 255)
572 *op++ = 255;
573 *op++ = (BYTE)len;
574 } else
575 *token = (length << ML_BITS);
577 /* Copy Literals */
578 LZ4_BLINDCOPY(anchor, op, length);
580 _next_match:
581 /* Encode Offset */
582 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
584 /* Start Counting */
585 ip += MINMATCH;
586 ref += MINMATCH; /* MinMatch verified */
587 anchor = ip;
588 while likely(ip < matchlimit - (STEPSIZE - 1)) {
589 UARCH diff = AARCH(ref) ^ AARCH(ip);
590 if (!diff) {
591 ip += STEPSIZE;
592 ref += STEPSIZE;
593 continue;
595 ip += LZ4_NbCommonBytes(diff);
596 goto _endCount;
598 #if LZ4_ARCH64
599 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
600 ip += 4;
601 ref += 4;
603 #endif
604 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
605 ip += 2;
606 ref += 2;
608 if ((ip < matchlimit) && (*ref == *ip))
609 ip++;
610 _endCount:
612 /* Encode MatchLength */
613 len = (ip - anchor);
614 /* Check output limit */
615 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
616 return (0);
617 if (len >= (int)ML_MASK) {
618 *token += ML_MASK;
619 len -= ML_MASK;
620 for (; len > 509; len -= 510) {
621 *op++ = 255;
622 *op++ = 255;
624 if (len > 254) {
625 len -= 255;
626 *op++ = 255;
628 *op++ = (BYTE)len;
629 } else
630 *token += len;
632 /* Test end of chunk */
633 if (ip > mflimit) {
634 anchor = ip;
635 break;
637 /* Fill table */
638 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
640 /* Test next position */
641 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
642 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
643 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
644 token = op++;
645 *token = 0;
646 goto _next_match;
648 /* Prepare next loop */
649 anchor = ip++;
650 forwardH = LZ4_HASH_VALUE(ip);
653 _last_literals:
654 /* Encode Last Literals */
656 int lastRun = iend - anchor;
657 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
658 oend)
659 return (0);
660 if (lastRun >= (int)RUN_MASK) {
661 *op++ = (RUN_MASK << ML_BITS);
662 lastRun -= RUN_MASK;
663 for (; lastRun > 254; lastRun -= 255) {
664 *op++ = 255;
666 *op++ = (BYTE)lastRun;
667 } else
668 *op++ = (lastRun << ML_BITS);
669 (void) memcpy(op, anchor, iend - anchor);
670 op += iend - anchor;
673 /* End */
674 return (int)(((char *)op) - dest);
679 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
680 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
681 #define HASHLOG64K (HASH_LOG + 1)
682 #define HASH64KTABLESIZE (1U << HASHLOG64K)
683 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
684 HASHLOG64K))
685 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
687 /*ARGSUSED*/
688 static int
689 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
690 int osize)
692 #if HEAPMODE
693 struct refTables *srt = (struct refTables *)ctx;
694 U16 *HashTable = (U16 *) (srt->hashTable);
695 #else
696 U16 HashTable[HASH64KTABLESIZE] = { 0 };
697 #endif
699 const BYTE *ip = (BYTE *) source;
700 const BYTE *anchor = ip;
701 const BYTE *const base = ip;
702 const BYTE *const iend = ip + isize;
703 const BYTE *const oend = (BYTE *) dest + osize;
704 const BYTE *const mflimit = iend - MFLIMIT;
705 #define matchlimit (iend - LASTLITERALS)
707 BYTE *op = (BYTE *) dest;
709 int len, length;
710 const int skipStrength = SKIPSTRENGTH;
711 U32 forwardH;
713 /* Init */
714 if (isize < MINLENGTH)
715 goto _last_literals;
717 /* First Byte */
718 ip++;
719 forwardH = LZ4_HASH64K_VALUE(ip);
721 /* Main Loop */
722 for (;;) {
723 int findMatchAttempts = (1U << skipStrength) + 3;
724 const BYTE *forwardIp = ip;
725 const BYTE *ref;
726 BYTE *token;
728 /* Find a match */
729 do {
730 U32 h = forwardH;
731 int step = findMatchAttempts++ >> skipStrength;
732 ip = forwardIp;
733 forwardIp = ip + step;
735 if (forwardIp > mflimit) {
736 goto _last_literals;
739 forwardH = LZ4_HASH64K_VALUE(forwardIp);
740 ref = base + HashTable[h];
741 HashTable[h] = ip - base;
743 } while (A32(ref) != A32(ip));
745 /* Catch up */
746 while ((ip > anchor) && (ref > (BYTE *) source) &&
747 (ip[-1] == ref[-1])) {
748 ip--;
749 ref--;
752 /* Encode Literal length */
753 length = ip - anchor;
754 token = op++;
756 /* Check output limit */
757 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
758 (length >> 8) > oend)
759 return (0);
761 if (length >= (int)RUN_MASK) {
762 *token = (RUN_MASK << ML_BITS);
763 len = length - RUN_MASK;
764 for (; len > 254; len -= 255)
765 *op++ = 255;
766 *op++ = (BYTE)len;
767 } else
768 *token = (length << ML_BITS);
770 /* Copy Literals */
771 LZ4_BLINDCOPY(anchor, op, length);
773 _next_match:
774 /* Encode Offset */
775 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
777 /* Start Counting */
778 ip += MINMATCH;
779 ref += MINMATCH; /* MinMatch verified */
780 anchor = ip;
781 while (ip < matchlimit - (STEPSIZE - 1)) {
782 UARCH diff = AARCH(ref) ^ AARCH(ip);
783 if (!diff) {
784 ip += STEPSIZE;
785 ref += STEPSIZE;
786 continue;
788 ip += LZ4_NbCommonBytes(diff);
789 goto _endCount;
791 #if LZ4_ARCH64
792 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
793 ip += 4;
794 ref += 4;
796 #endif
797 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
798 ip += 2;
799 ref += 2;
801 if ((ip < matchlimit) && (*ref == *ip))
802 ip++;
803 _endCount:
805 /* Encode MatchLength */
806 len = (ip - anchor);
807 /* Check output limit */
808 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
809 return (0);
810 if (len >= (int)ML_MASK) {
811 *token += ML_MASK;
812 len -= ML_MASK;
813 for (; len > 509; len -= 510) {
814 *op++ = 255;
815 *op++ = 255;
817 if (len > 254) {
818 len -= 255;
819 *op++ = 255;
821 *op++ = (BYTE)len;
822 } else
823 *token += len;
825 /* Test end of chunk */
826 if (ip > mflimit) {
827 anchor = ip;
828 break;
830 /* Fill table */
831 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
833 /* Test next position */
834 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
835 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
836 if (A32(ref) == A32(ip)) {
837 token = op++;
838 *token = 0;
839 goto _next_match;
841 /* Prepare next loop */
842 anchor = ip++;
843 forwardH = LZ4_HASH64K_VALUE(ip);
846 _last_literals:
847 /* Encode Last Literals */
849 int lastRun = iend - anchor;
850 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
851 oend)
852 return (0);
853 if (lastRun >= (int)RUN_MASK) {
854 *op++ = (RUN_MASK << ML_BITS);
855 lastRun -= RUN_MASK;
856 for (; lastRun > 254; lastRun -= 255)
857 *op++ = 255;
858 *op++ = (BYTE)lastRun;
859 } else
860 *op++ = (lastRun << ML_BITS);
861 (void) memcpy(op, anchor, iend - anchor);
862 op += iend - anchor;
865 /* End */
866 return (int)(((char *)op) - dest);
869 static int
870 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
872 #if HEAPMODE
873 void *ctx = umem_zalloc(sizeof (struct refTables), UMEM_DEFAULT);
874 int result;
877 * out of kernel memory, gently fall through - this will disable
878 * compression in zio_compress_data
880 if (ctx == NULL)
881 return (0);
883 if (isize < LZ4_64KLIMIT)
884 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
885 else
886 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
888 umem_free(ctx, sizeof (struct refTables));
889 return (result);
890 #else
891 if (isize < (int)LZ4_64KLIMIT)
892 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
893 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
894 #endif
897 /* Decompression functions */
900 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
901 * against "buffer overflow" attack type.
902 * LZ4_uncompress_unknownOutputSize() insures that it will never read
903 * outside of the input buffer. A corrupted input will produce an error
904 * result, a negative int, indicating the position of the error within
905 * input stream.
908 static int
909 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
910 int maxOutputSize)
912 /* Local Variables */
913 const BYTE *restrict ip = (const BYTE *) source;
914 const BYTE *const iend = ip + isize;
915 const BYTE *ref;
917 BYTE *op = (BYTE *) dest;
918 BYTE *const oend = op + maxOutputSize;
919 BYTE *cpy;
921 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
922 #if LZ4_ARCH64
923 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
924 #endif
926 /* Main Loop */
927 while (ip < iend) {
928 unsigned token;
929 size_t length;
931 /* get runlength */
932 token = *ip++;
933 if ((length = (token >> ML_BITS)) == RUN_MASK) {
934 int s = 255;
935 while ((ip < iend) && (s == 255)) {
936 s = *ip++;
937 length += s;
940 /* copy literals */
941 cpy = op + length;
942 /* CORNER-CASE: cpy might overflow. */
943 if (cpy < op)
944 goto _output_error; /* cpy was overflowed, bail! */
945 if ((cpy > oend - COPYLENGTH) ||
946 (ip + length > iend - COPYLENGTH)) {
947 if (cpy > oend)
948 /* Error: writes beyond output buffer */
949 goto _output_error;
950 if (ip + length != iend)
952 * Error: LZ4 format requires to consume all
953 * input at this stage
955 goto _output_error;
956 (void) memcpy(op, ip, length);
957 op += length;
958 /* Necessarily EOF, due to parsing restrictions */
959 break;
961 LZ4_WILDCOPY(ip, op, cpy);
962 ip -= (op - cpy);
963 op = cpy;
965 /* get offset */
966 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
967 ip += 2;
968 if (ref < (BYTE * const) dest)
970 * Error: offset creates reference outside of
971 * destination buffer
973 goto _output_error;
975 /* get matchlength */
976 if ((length = (token & ML_MASK)) == ML_MASK) {
977 while (ip < iend) {
978 int s = *ip++;
979 length += s;
980 if (s == 255)
981 continue;
982 break;
985 /* copy repeated sequence */
986 if unlikely(op - ref < STEPSIZE) {
987 #if LZ4_ARCH64
988 size_t dec64 = dec64table[op-ref];
989 #else
990 const int dec64 = 0;
991 #endif
992 op[0] = ref[0];
993 op[1] = ref[1];
994 op[2] = ref[2];
995 op[3] = ref[3];
996 op += 4;
997 ref += 4;
998 ref -= dec32table[op-ref];
999 A32(op) = A32(ref);
1000 op += STEPSIZE - 4;
1001 ref -= dec64;
1002 } else {
1003 LZ4_COPYSTEP(ref, op);
1005 cpy = op + length - (STEPSIZE - 4);
1006 if (cpy > oend - COPYLENGTH) {
1007 if (cpy > oend)
1009 * Error: request to write outside of
1010 * destination buffer
1012 goto _output_error;
1013 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1014 while (op < cpy)
1015 *op++ = *ref++;
1016 op = cpy;
1017 if (op == oend)
1019 * Check EOF (should never happen, since
1020 * last 5 bytes are supposed to be literals)
1022 goto _output_error;
1023 continue;
1025 LZ4_SECURECOPY(ref, op, cpy);
1026 op = cpy; /* correction */
1029 /* end of decoding */
1030 return (int)(((char *)op) - dest);
1032 /* write overflow error detected */
1033 _output_error:
1034 return (int)(-(((char *)ip) - source));