zed: Allow autoreplace and fault LEDs for removed vdevs
[zfs.git] / module / zfs / lz4_zfs.c
blobe28215cf35019be0ffb0638a70b2e8ebac996b64
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43 int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51 int isize, int maxOutputSize);
53 static kmem_cache_t *lz4_cache;
55 size_t
56 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
57 size_t d_len, int n)
59 (void) n;
60 uint32_t bufsiz;
61 char *dest = d_start;
63 ASSERT(d_len >= sizeof (bufsiz));
65 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
66 d_len - sizeof (bufsiz));
68 /* Signal an error if the compression routine returned zero. */
69 if (bufsiz == 0)
70 return (s_len);
73 * The exact compressed size is needed by the decompression routine,
74 * so it is stored at the start of the buffer. Note that this may be
75 * less than the compressed block size, which is rounded up to a
76 * multiple of 1<<ashift.
78 *(uint32_t *)dest = BE_32(bufsiz);
80 return (bufsiz + sizeof (bufsiz));
83 int
84 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
85 size_t d_len, int n)
87 (void) n;
88 const char *src = s_start;
89 uint32_t bufsiz = BE_IN32(src);
91 /* invalid compressed buffer size encoded at start */
92 if (bufsiz + sizeof (bufsiz) > s_len)
93 return (1);
96 * Returns 0 on success (decompression function returned non-negative)
97 * and non-zero on failure (decompression function returned negative).
99 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
100 d_start, bufsiz, d_len) < 0);
104 * LZ4 API Description:
106 * Simple Functions:
107 * real_LZ4_compress() :
108 * isize : is the input size. Max supported value is ~1.9GB
109 * return : the number of bytes written in buffer dest
110 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
111 * note : destination buffer must be already allocated.
112 * destination buffer must be sized to handle worst cases
113 * situations (input data not compressible) worst case size
114 * evaluation is provided by function LZ4_compressBound().
116 * real_LZ4_uncompress() :
117 * osize : is the output size, therefore the original size
118 * return : the number of bytes read in the source buffer.
119 * If the source stream is malformed, the function will stop
120 * decoding and return a negative result, indicating the byte
121 * position of the faulty instruction. This function never
122 * writes beyond dest + osize, and is therefore protected
123 * against malicious data packets.
124 * note : destination buffer must be already allocated
125 * note : real_LZ4_uncompress() is not used in ZFS so its code
126 * is not present here.
128 * Advanced Functions
130 * LZ4_compressBound() :
131 * Provides the maximum size that LZ4 may output in a "worst case"
132 * scenario (input data not compressible) primarily useful for memory
133 * allocation of output buffer.
135 * isize : is the input size. Max supported value is ~1.9GB
136 * return : maximum output size in a "worst case" scenario
137 * note : this function is limited by "int" range (2^31-1)
139 * LZ4_uncompress_unknownOutputSize() :
140 * isize : is the input size, therefore the compressed size
141 * maxOutputSize : is the size of the destination buffer (which must be
142 * already allocated)
143 * return : the number of bytes decoded in the destination buffer
144 * (necessarily <= maxOutputSize). If the source stream is
145 * malformed, the function will stop decoding and return a
146 * negative result, indicating the byte position of the faulty
147 * instruction. This function never writes beyond dest +
148 * maxOutputSize, and is therefore protected against malicious
149 * data packets.
150 * note : Destination buffer must be already allocated.
151 * This version is slightly slower than real_LZ4_uncompress()
153 * LZ4_compressCtx() :
154 * This function explicitly handles the CTX memory structure.
156 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
157 * by the caller (either on the stack or using kmem_cache_alloc). Passing
158 * NULL isn't valid.
160 * LZ4_compress64kCtx() :
161 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
162 * isize *Must* be <64KB, otherwise the output will be corrupted.
164 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
165 * by the caller (either on the stack or using kmem_cache_alloc). Passing
166 * NULL isn't valid.
170 * Tuning parameters
174 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
175 * Lowering this value reduces memory usage. Reduced memory usage
176 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
177 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
178 * (examples : 12 -> 16KB ; 17 -> 512KB)
180 #define COMPRESSIONLEVEL 12
183 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
184 * algorithm skip faster data segments considered "incompressible".
185 * This may decrease compression ratio dramatically, but will be
186 * faster on incompressible data. Increasing this value will make
187 * the algorithm search more before declaring a segment "incompressible".
188 * This could improve compression a bit, but will be slower on
189 * incompressible data. The default value (6) is recommended.
191 #define NOTCOMPRESSIBLE_CONFIRMATION 6
194 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
195 * performance for big endian cpu, but the resulting compressed stream
196 * will be incompatible with little-endian CPU. You can set this option
197 * to 1 in situations where data will stay within closed environment.
198 * This option is useless on Little_Endian CPU (such as x86).
200 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
203 * CPU Feature Detection
206 /* 32 or 64 bits ? */
207 #if defined(_LP64)
208 #define LZ4_ARCH64 1
209 #else
210 #define LZ4_ARCH64 0
211 #endif
214 * Little Endian or Big Endian?
215 * Note: overwrite the below #define if you know your architecture endianness.
217 #if defined(_ZFS_BIG_ENDIAN)
218 #define LZ4_BIG_ENDIAN 1
219 #else
221 * Little Endian assumed. PDP Endian and other very rare endian format
222 * are unsupported.
224 #undef LZ4_BIG_ENDIAN
225 #endif
228 * Unaligned memory access is automatically enabled for "common" CPU,
229 * such as x86. For others CPU, the compiler will be more cautious, and
230 * insert extra code to ensure aligned access is respected. If you know
231 * your target CPU supports unaligned memory access, you may want to
232 * force this option manually to improve performance
234 #if defined(__ARM_FEATURE_UNALIGNED)
235 #define LZ4_FORCE_UNALIGNED_ACCESS 1
236 #endif
239 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
240 * kernel
241 * Linux : we can use GCC's __builtin_ctz family of builtins in the
242 * kernel
244 #undef LZ4_FORCE_SW_BITCOUNT
245 #if defined(__sparc)
246 #define LZ4_FORCE_SW_BITCOUNT
247 #endif
250 * Compiler Options
252 /* Disable restrict */
253 #define restrict
256 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
257 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
259 #ifdef GCC_VERSION
260 #undef GCC_VERSION
261 #endif
263 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
265 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
266 #define expect(expr, value) (__builtin_expect((expr), (value)))
267 #else
268 #define expect(expr, value) (expr)
269 #endif
271 #ifndef likely
272 #define likely(expr) expect((expr) != 0, 1)
273 #endif
275 #ifndef unlikely
276 #define unlikely(expr) expect((expr) != 0, 0)
277 #endif
279 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
280 (((x) & 0xffu) << 8)))
282 /* Basic types */
283 #define BYTE uint8_t
284 #define U16 uint16_t
285 #define U32 uint32_t
286 #define S32 int32_t
287 #define U64 uint64_t
289 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
290 #pragma pack(1)
291 #endif
293 typedef struct _U16_S {
294 U16 v;
295 } U16_S;
296 typedef struct _U32_S {
297 U32 v;
298 } U32_S;
299 typedef struct _U64_S {
300 U64 v;
301 } U64_S;
303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
304 #pragma pack()
305 #endif
307 #define A64(x) (((U64_S *)(x))->v)
308 #define A32(x) (((U32_S *)(x))->v)
309 #define A16(x) (((U16_S *)(x))->v)
312 * Constants
314 #define MINMATCH 4
316 #define HASH_LOG COMPRESSIONLEVEL
317 #define HASHTABLESIZE (1 << HASH_LOG)
318 #define HASH_MASK (HASHTABLESIZE - 1)
320 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
321 NOTCOMPRESSIBLE_CONFIRMATION : 2)
323 #define COPYLENGTH 8
324 #define LASTLITERALS 5
325 #define MFLIMIT (COPYLENGTH + MINMATCH)
326 #define MINLENGTH (MFLIMIT + 1)
328 #define MAXD_LOG 16
329 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
331 #define ML_BITS 4
332 #define ML_MASK ((1U<<ML_BITS)-1)
333 #define RUN_BITS (8-ML_BITS)
334 #define RUN_MASK ((1U<<RUN_BITS)-1)
338 * Architecture-specific macros
340 #if LZ4_ARCH64
341 #define STEPSIZE 8
342 #define UARCH U64
343 #define AARCH A64
344 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
345 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
346 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
347 #define HTYPE U32
348 #define INITBASE(base) const BYTE* const base = ip
349 #else /* !LZ4_ARCH64 */
350 #define STEPSIZE 4
351 #define UARCH U32
352 #define AARCH A32
353 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
354 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
355 #define LZ4_SECURECOPY LZ4_WILDCOPY
356 #define HTYPE const BYTE *
357 #define INITBASE(base) const int base = 0
358 #endif /* !LZ4_ARCH64 */
360 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
361 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
362 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
363 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
364 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
365 #else
366 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
367 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
368 #endif
371 /* Local structures */
372 struct refTables {
373 HTYPE hashTable[HASHTABLESIZE];
377 /* Macros */
378 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
379 HASH_LOG))
380 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
381 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
382 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
383 d = e; }
386 /* Private functions */
387 #if LZ4_ARCH64
389 static inline int
390 LZ4_NbCommonBytes(register U64 val)
392 #if defined(LZ4_BIG_ENDIAN)
393 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
394 !defined(LZ4_FORCE_SW_BITCOUNT)
395 return (__builtin_clzll(val) >> 3);
396 #else
397 int r;
398 if (!(val >> 32)) {
399 r = 4;
400 } else {
401 r = 0;
402 val >>= 32;
404 if (!(val >> 16)) {
405 r += 2;
406 val >>= 8;
407 } else {
408 val >>= 24;
410 r += (!val);
411 return (r);
412 #endif
413 #else
414 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
415 !defined(LZ4_FORCE_SW_BITCOUNT)
416 return (__builtin_ctzll(val) >> 3);
417 #else
418 static const int DeBruijnBytePos[64] =
419 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
420 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
421 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
422 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
424 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
425 58];
426 #endif
427 #endif
430 #else
432 static inline int
433 LZ4_NbCommonBytes(register U32 val)
435 #if defined(LZ4_BIG_ENDIAN)
436 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
437 !defined(LZ4_FORCE_SW_BITCOUNT)
438 return (__builtin_clz(val) >> 3);
439 #else
440 int r;
441 if (!(val >> 16)) {
442 r = 2;
443 val >>= 8;
444 } else {
445 r = 0;
446 val >>= 24;
448 r += (!val);
449 return (r);
450 #endif
451 #else
452 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
453 !defined(LZ4_FORCE_SW_BITCOUNT)
454 return (__builtin_ctz(val) >> 3);
455 #else
456 static const int DeBruijnBytePos[32] = {
457 0, 0, 3, 0, 3, 1, 3, 0,
458 3, 2, 2, 1, 3, 2, 0, 1,
459 3, 3, 1, 2, 2, 2, 2, 0,
460 3, 1, 2, 0, 1, 0, 1, 1
462 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
463 27];
464 #endif
465 #endif
468 #endif
470 /* Compression functions */
472 static int
473 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
474 int osize)
476 struct refTables *srt = (struct refTables *)ctx;
477 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
479 const BYTE *ip = (BYTE *) source;
480 INITBASE(base);
481 const BYTE *anchor = ip;
482 const BYTE *const iend = ip + isize;
483 const BYTE *const oend = (BYTE *) dest + osize;
484 const BYTE *const mflimit = iend - MFLIMIT;
485 #define matchlimit (iend - LASTLITERALS)
487 BYTE *op = (BYTE *) dest;
489 int len, length;
490 const int skipStrength = SKIPSTRENGTH;
491 U32 forwardH;
494 /* Init */
495 if (isize < MINLENGTH)
496 goto _last_literals;
498 /* First Byte */
499 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
500 ip++;
501 forwardH = LZ4_HASH_VALUE(ip);
503 /* Main Loop */
504 for (;;) {
505 int findMatchAttempts = (1U << skipStrength) + 3;
506 const BYTE *forwardIp = ip;
507 const BYTE *ref;
508 BYTE *token;
510 /* Find a match */
511 do {
512 U32 h = forwardH;
513 int step = findMatchAttempts++ >> skipStrength;
514 ip = forwardIp;
515 forwardIp = ip + step;
517 if (unlikely(forwardIp > mflimit)) {
518 goto _last_literals;
521 forwardH = LZ4_HASH_VALUE(forwardIp);
522 ref = base + HashTable[h];
523 HashTable[h] = ip - base;
525 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
527 /* Catch up */
528 while ((ip > anchor) && (ref > (BYTE *) source) &&
529 unlikely(ip[-1] == ref[-1])) {
530 ip--;
531 ref--;
534 /* Encode Literal length */
535 length = ip - anchor;
536 token = op++;
538 /* Check output limit */
539 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
540 (length >> 8) > oend))
541 return (0);
543 if (length >= (int)RUN_MASK) {
544 *token = (RUN_MASK << ML_BITS);
545 len = length - RUN_MASK;
546 for (; len > 254; len -= 255)
547 *op++ = 255;
548 *op++ = (BYTE)len;
549 } else
550 *token = (length << ML_BITS);
552 /* Copy Literals */
553 LZ4_BLINDCOPY(anchor, op, length);
555 _next_match:
556 /* Encode Offset */
557 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
559 /* Start Counting */
560 ip += MINMATCH;
561 ref += MINMATCH; /* MinMatch verified */
562 anchor = ip;
563 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
564 UARCH diff = AARCH(ref) ^ AARCH(ip);
565 if (!diff) {
566 ip += STEPSIZE;
567 ref += STEPSIZE;
568 continue;
570 ip += LZ4_NbCommonBytes(diff);
571 goto _endCount;
573 #if LZ4_ARCH64
574 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
575 ip += 4;
576 ref += 4;
578 #endif
579 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
580 ip += 2;
581 ref += 2;
583 if ((ip < matchlimit) && (*ref == *ip))
584 ip++;
585 _endCount:
587 /* Encode MatchLength */
588 len = (ip - anchor);
589 /* Check output limit */
590 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
591 return (0);
592 if (len >= (int)ML_MASK) {
593 *token += ML_MASK;
594 len -= ML_MASK;
595 for (; len > 509; len -= 510) {
596 *op++ = 255;
597 *op++ = 255;
599 if (len > 254) {
600 len -= 255;
601 *op++ = 255;
603 *op++ = (BYTE)len;
604 } else
605 *token += len;
607 /* Test end of chunk */
608 if (ip > mflimit) {
609 anchor = ip;
610 break;
612 /* Fill table */
613 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
615 /* Test next position */
616 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
617 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
618 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
619 token = op++;
620 *token = 0;
621 goto _next_match;
623 /* Prepare next loop */
624 anchor = ip++;
625 forwardH = LZ4_HASH_VALUE(ip);
628 _last_literals:
629 /* Encode Last Literals */
631 int lastRun = iend - anchor;
632 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
633 oend)
634 return (0);
635 if (lastRun >= (int)RUN_MASK) {
636 *op++ = (RUN_MASK << ML_BITS);
637 lastRun -= RUN_MASK;
638 for (; lastRun > 254; lastRun -= 255) {
639 *op++ = 255;
641 *op++ = (BYTE)lastRun;
642 } else
643 *op++ = (lastRun << ML_BITS);
644 (void) memcpy(op, anchor, iend - anchor);
645 op += iend - anchor;
648 /* End */
649 return (int)(((char *)op) - dest);
654 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
655 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
656 #define HASHLOG64K (HASH_LOG + 1)
657 #define HASH64KTABLESIZE (1U << HASHLOG64K)
658 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
659 HASHLOG64K))
660 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
662 static int
663 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
664 int osize)
666 struct refTables *srt = (struct refTables *)ctx;
667 U16 *HashTable = (U16 *) (srt->hashTable);
669 const BYTE *ip = (BYTE *) source;
670 const BYTE *anchor = ip;
671 const BYTE *const base = ip;
672 const BYTE *const iend = ip + isize;
673 const BYTE *const oend = (BYTE *) dest + osize;
674 const BYTE *const mflimit = iend - MFLIMIT;
675 #define matchlimit (iend - LASTLITERALS)
677 BYTE *op = (BYTE *) dest;
679 int len, length;
680 const int skipStrength = SKIPSTRENGTH;
681 U32 forwardH;
683 /* Init */
684 if (isize < MINLENGTH)
685 goto _last_literals;
687 /* First Byte */
688 ip++;
689 forwardH = LZ4_HASH64K_VALUE(ip);
691 /* Main Loop */
692 for (;;) {
693 int findMatchAttempts = (1U << skipStrength) + 3;
694 const BYTE *forwardIp = ip;
695 const BYTE *ref;
696 BYTE *token;
698 /* Find a match */
699 do {
700 U32 h = forwardH;
701 int step = findMatchAttempts++ >> skipStrength;
702 ip = forwardIp;
703 forwardIp = ip + step;
705 if (forwardIp > mflimit) {
706 goto _last_literals;
709 forwardH = LZ4_HASH64K_VALUE(forwardIp);
710 ref = base + HashTable[h];
711 HashTable[h] = ip - base;
713 } while (A32(ref) != A32(ip));
715 /* Catch up */
716 while ((ip > anchor) && (ref > (BYTE *) source) &&
717 (ip[-1] == ref[-1])) {
718 ip--;
719 ref--;
722 /* Encode Literal length */
723 length = ip - anchor;
724 token = op++;
726 /* Check output limit */
727 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
728 (length >> 8) > oend))
729 return (0);
731 if (length >= (int)RUN_MASK) {
732 *token = (RUN_MASK << ML_BITS);
733 len = length - RUN_MASK;
734 for (; len > 254; len -= 255)
735 *op++ = 255;
736 *op++ = (BYTE)len;
737 } else
738 *token = (length << ML_BITS);
740 /* Copy Literals */
741 LZ4_BLINDCOPY(anchor, op, length);
743 _next_match:
744 /* Encode Offset */
745 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
747 /* Start Counting */
748 ip += MINMATCH;
749 ref += MINMATCH; /* MinMatch verified */
750 anchor = ip;
751 while (ip < matchlimit - (STEPSIZE - 1)) {
752 UARCH diff = AARCH(ref) ^ AARCH(ip);
753 if (!diff) {
754 ip += STEPSIZE;
755 ref += STEPSIZE;
756 continue;
758 ip += LZ4_NbCommonBytes(diff);
759 goto _endCount;
761 #if LZ4_ARCH64
762 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
763 ip += 4;
764 ref += 4;
766 #endif
767 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
768 ip += 2;
769 ref += 2;
771 if ((ip < matchlimit) && (*ref == *ip))
772 ip++;
773 _endCount:
775 /* Encode MatchLength */
776 len = (ip - anchor);
777 /* Check output limit */
778 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
779 return (0);
780 if (len >= (int)ML_MASK) {
781 *token += ML_MASK;
782 len -= ML_MASK;
783 for (; len > 509; len -= 510) {
784 *op++ = 255;
785 *op++ = 255;
787 if (len > 254) {
788 len -= 255;
789 *op++ = 255;
791 *op++ = (BYTE)len;
792 } else
793 *token += len;
795 /* Test end of chunk */
796 if (ip > mflimit) {
797 anchor = ip;
798 break;
800 /* Fill table */
801 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
803 /* Test next position */
804 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
805 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
806 if (A32(ref) == A32(ip)) {
807 token = op++;
808 *token = 0;
809 goto _next_match;
811 /* Prepare next loop */
812 anchor = ip++;
813 forwardH = LZ4_HASH64K_VALUE(ip);
816 _last_literals:
817 /* Encode Last Literals */
819 int lastRun = iend - anchor;
820 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
821 oend)
822 return (0);
823 if (lastRun >= (int)RUN_MASK) {
824 *op++ = (RUN_MASK << ML_BITS);
825 lastRun -= RUN_MASK;
826 for (; lastRun > 254; lastRun -= 255)
827 *op++ = 255;
828 *op++ = (BYTE)lastRun;
829 } else
830 *op++ = (lastRun << ML_BITS);
831 (void) memcpy(op, anchor, iend - anchor);
832 op += iend - anchor;
835 /* End */
836 return (int)(((char *)op) - dest);
839 static int
840 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
842 void *ctx;
843 int result;
845 ASSERT(lz4_cache != NULL);
846 ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP);
849 * out of kernel memory, gently fall through - this will disable
850 * compression in zio_compress_data
852 if (ctx == NULL)
853 return (0);
855 memset(ctx, 0, sizeof (struct refTables));
857 if (isize < LZ4_64KLIMIT)
858 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
859 else
860 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
862 kmem_cache_free(lz4_cache, ctx);
863 return (result);
866 void
867 lz4_init(void)
869 lz4_cache = kmem_cache_create("lz4_cache",
870 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
873 void
874 lz4_fini(void)
876 if (lz4_cache) {
877 kmem_cache_destroy(lz4_cache);
878 lz4_cache = NULL;