Reduce dirty records memory usage
[zfs.git] / module / zfs / lz4_zfs.c
blob0033b5e50d1a7280d727bcd870cd16528045ce9a
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43 int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51 int isize, int maxOutputSize);
53 static kmem_cache_t *lz4_cache;
55 static size_t
56 zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
57 size_t d_len, int n)
59 (void) n;
60 uint32_t bufsiz;
61 char *dest = d_start;
63 ASSERT(d_len >= sizeof (bufsiz));
65 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
66 d_len - sizeof (bufsiz));
68 /* Signal an error if the compression routine returned zero. */
69 if (bufsiz == 0)
70 return (s_len);
73 * The exact compressed size is needed by the decompression routine,
74 * so it is stored at the start of the buffer. Note that this may be
75 * less than the compressed block size, which is rounded up to a
76 * multiple of 1<<ashift.
78 *(uint32_t *)dest = BE_32(bufsiz);
80 return (bufsiz + sizeof (bufsiz));
83 static int
84 zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
85 size_t d_len, int n)
87 (void) n;
88 const char *src = s_start;
89 uint32_t bufsiz = BE_IN32(src);
91 /* invalid compressed buffer size encoded at start */
92 if (bufsiz + sizeof (bufsiz) > s_len)
93 return (1);
96 * Returns 0 on success (decompression function returned non-negative)
97 * and non-zero on failure (decompression function returned negative).
99 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
100 d_start, bufsiz, d_len) < 0);
103 ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
104 ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
107 * LZ4 API Description:
109 * Simple Functions:
110 * real_LZ4_compress() :
111 * isize : is the input size. Max supported value is ~1.9GB
112 * return : the number of bytes written in buffer dest
113 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
114 * note : destination buffer must be already allocated.
115 * destination buffer must be sized to handle worst cases
116 * situations (input data not compressible) worst case size
117 * evaluation is provided by function LZ4_compressBound().
119 * real_LZ4_uncompress() :
120 * osize : is the output size, therefore the original size
121 * return : the number of bytes read in the source buffer.
122 * If the source stream is malformed, the function will stop
123 * decoding and return a negative result, indicating the byte
124 * position of the faulty instruction. This function never
125 * writes beyond dest + osize, and is therefore protected
126 * against malicious data packets.
127 * note : destination buffer must be already allocated
128 * note : real_LZ4_uncompress() is not used in ZFS so its code
129 * is not present here.
131 * Advanced Functions
133 * LZ4_compressBound() :
134 * Provides the maximum size that LZ4 may output in a "worst case"
135 * scenario (input data not compressible) primarily useful for memory
136 * allocation of output buffer.
138 * isize : is the input size. Max supported value is ~1.9GB
139 * return : maximum output size in a "worst case" scenario
140 * note : this function is limited by "int" range (2^31-1)
142 * LZ4_uncompress_unknownOutputSize() :
143 * isize : is the input size, therefore the compressed size
144 * maxOutputSize : is the size of the destination buffer (which must be
145 * already allocated)
146 * return : the number of bytes decoded in the destination buffer
147 * (necessarily <= maxOutputSize). If the source stream is
148 * malformed, the function will stop decoding and return a
149 * negative result, indicating the byte position of the faulty
150 * instruction. This function never writes beyond dest +
151 * maxOutputSize, and is therefore protected against malicious
152 * data packets.
153 * note : Destination buffer must be already allocated.
154 * This version is slightly slower than real_LZ4_uncompress()
156 * LZ4_compressCtx() :
157 * This function explicitly handles the CTX memory structure.
159 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160 * by the caller (either on the stack or using kmem_cache_alloc). Passing
161 * NULL isn't valid.
163 * LZ4_compress64kCtx() :
164 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
165 * isize *Must* be <64KB, otherwise the output will be corrupted.
167 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
168 * by the caller (either on the stack or using kmem_cache_alloc). Passing
169 * NULL isn't valid.
173 * Tuning parameters
177 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
178 * Lowering this value reduces memory usage. Reduced memory usage
179 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
180 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
181 * (examples : 12 -> 16KB ; 17 -> 512KB)
183 #define COMPRESSIONLEVEL 12
186 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
187 * algorithm skip faster data segments considered "incompressible".
188 * This may decrease compression ratio dramatically, but will be
189 * faster on incompressible data. Increasing this value will make
190 * the algorithm search more before declaring a segment "incompressible".
191 * This could improve compression a bit, but will be slower on
192 * incompressible data. The default value (6) is recommended.
194 #define NOTCOMPRESSIBLE_CONFIRMATION 6
197 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
198 * performance for big endian cpu, but the resulting compressed stream
199 * will be incompatible with little-endian CPU. You can set this option
200 * to 1 in situations where data will stay within closed environment.
201 * This option is useless on Little_Endian CPU (such as x86).
203 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
206 * CPU Feature Detection
209 /* 32 or 64 bits ? */
210 #if defined(_LP64)
211 #define LZ4_ARCH64 1
212 #else
213 #define LZ4_ARCH64 0
214 #endif
217 * Little Endian or Big Endian?
218 * Note: overwrite the below #define if you know your architecture endianness.
220 #if defined(_ZFS_BIG_ENDIAN)
221 #define LZ4_BIG_ENDIAN 1
222 #else
224 * Little Endian assumed. PDP Endian and other very rare endian format
225 * are unsupported.
227 #undef LZ4_BIG_ENDIAN
228 #endif
231 * Unaligned memory access is automatically enabled for "common" CPU,
232 * such as x86. For others CPU, the compiler will be more cautious, and
233 * insert extra code to ensure aligned access is respected. If you know
234 * your target CPU supports unaligned memory access, you may want to
235 * force this option manually to improve performance
237 #if defined(__ARM_FEATURE_UNALIGNED)
238 #define LZ4_FORCE_UNALIGNED_ACCESS 1
239 #endif
242 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
243 * kernel
244 * Linux : we can use GCC's __builtin_ctz family of builtins in the
245 * kernel
247 #undef LZ4_FORCE_SW_BITCOUNT
248 #if defined(__sparc)
249 #define LZ4_FORCE_SW_BITCOUNT
250 #endif
253 * Compiler Options
255 /* Disable restrict */
256 #define restrict
259 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
260 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
262 #ifdef GCC_VERSION
263 #undef GCC_VERSION
264 #endif
266 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
268 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
269 #define expect(expr, value) (__builtin_expect((expr), (value)))
270 #else
271 #define expect(expr, value) (expr)
272 #endif
274 #ifndef likely
275 #define likely(expr) expect((expr) != 0, 1)
276 #endif
278 #ifndef unlikely
279 #define unlikely(expr) expect((expr) != 0, 0)
280 #endif
282 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
283 (((x) & 0xffu) << 8)))
285 /* Basic types */
286 #define BYTE uint8_t
287 #define U16 uint16_t
288 #define U32 uint32_t
289 #define S32 int32_t
290 #define U64 uint64_t
292 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
293 #pragma pack(1)
294 #endif
296 typedef struct _U16_S {
297 U16 v;
298 } U16_S;
299 typedef struct _U32_S {
300 U32 v;
301 } U32_S;
302 typedef struct _U64_S {
303 U64 v;
304 } U64_S;
306 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
307 #pragma pack()
308 #endif
310 #define A64(x) (((U64_S *)(x))->v)
311 #define A32(x) (((U32_S *)(x))->v)
312 #define A16(x) (((U16_S *)(x))->v)
315 * Constants
317 #define MINMATCH 4
319 #define HASH_LOG COMPRESSIONLEVEL
320 #define HASHTABLESIZE (1 << HASH_LOG)
321 #define HASH_MASK (HASHTABLESIZE - 1)
323 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
324 NOTCOMPRESSIBLE_CONFIRMATION : 2)
326 #define COPYLENGTH 8
327 #define LASTLITERALS 5
328 #define MFLIMIT (COPYLENGTH + MINMATCH)
329 #define MINLENGTH (MFLIMIT + 1)
331 #define MAXD_LOG 16
332 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
334 #define ML_BITS 4
335 #define ML_MASK ((1U<<ML_BITS)-1)
336 #define RUN_BITS (8-ML_BITS)
337 #define RUN_MASK ((1U<<RUN_BITS)-1)
341 * Architecture-specific macros
343 #if LZ4_ARCH64
344 #define STEPSIZE 8
345 #define UARCH U64
346 #define AARCH A64
347 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
348 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
349 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
350 #define HTYPE U32
351 #define INITBASE(base) const BYTE* const base = ip
352 #else /* !LZ4_ARCH64 */
353 #define STEPSIZE 4
354 #define UARCH U32
355 #define AARCH A32
356 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
357 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
358 #define LZ4_SECURECOPY LZ4_WILDCOPY
359 #define HTYPE const BYTE *
360 #define INITBASE(base) const int base = 0
361 #endif /* !LZ4_ARCH64 */
363 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
364 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
365 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
366 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
367 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
368 #else
369 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
370 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
371 #endif
374 /* Local structures */
375 struct refTables {
376 HTYPE hashTable[HASHTABLESIZE];
380 /* Macros */
381 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
382 HASH_LOG))
383 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
384 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
385 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
386 d = e; }
389 /* Private functions */
390 #if LZ4_ARCH64
392 static inline int
393 LZ4_NbCommonBytes(register U64 val)
395 #if defined(LZ4_BIG_ENDIAN)
396 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
397 !defined(LZ4_FORCE_SW_BITCOUNT)
398 return (__builtin_clzll(val) >> 3);
399 #else
400 int r;
401 if (!(val >> 32)) {
402 r = 4;
403 } else {
404 r = 0;
405 val >>= 32;
407 if (!(val >> 16)) {
408 r += 2;
409 val >>= 8;
410 } else {
411 val >>= 24;
413 r += (!val);
414 return (r);
415 #endif
416 #else
417 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
418 !defined(LZ4_FORCE_SW_BITCOUNT)
419 return (__builtin_ctzll(val) >> 3);
420 #else
421 static const int DeBruijnBytePos[64] =
422 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
423 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
424 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
425 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
427 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
428 58];
429 #endif
430 #endif
433 #else
435 static inline int
436 LZ4_NbCommonBytes(register U32 val)
438 #if defined(LZ4_BIG_ENDIAN)
439 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
440 !defined(LZ4_FORCE_SW_BITCOUNT)
441 return (__builtin_clz(val) >> 3);
442 #else
443 int r;
444 if (!(val >> 16)) {
445 r = 2;
446 val >>= 8;
447 } else {
448 r = 0;
449 val >>= 24;
451 r += (!val);
452 return (r);
453 #endif
454 #else
455 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
456 !defined(LZ4_FORCE_SW_BITCOUNT)
457 return (__builtin_ctz(val) >> 3);
458 #else
459 static const int DeBruijnBytePos[32] = {
460 0, 0, 3, 0, 3, 1, 3, 0,
461 3, 2, 2, 1, 3, 2, 0, 1,
462 3, 3, 1, 2, 2, 2, 2, 0,
463 3, 1, 2, 0, 1, 0, 1, 1
465 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
466 27];
467 #endif
468 #endif
471 #endif
473 /* Compression functions */
475 static int
476 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
477 int osize)
479 struct refTables *srt = (struct refTables *)ctx;
480 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
482 const BYTE *ip = (BYTE *) source;
483 INITBASE(base);
484 const BYTE *anchor = ip;
485 const BYTE *const iend = ip + isize;
486 const BYTE *const oend = (BYTE *) dest + osize;
487 const BYTE *const mflimit = iend - MFLIMIT;
488 #define matchlimit (iend - LASTLITERALS)
490 BYTE *op = (BYTE *) dest;
492 int len, length;
493 const int skipStrength = SKIPSTRENGTH;
494 U32 forwardH;
497 /* Init */
498 if (isize < MINLENGTH)
499 goto _last_literals;
501 /* First Byte */
502 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
503 ip++;
504 forwardH = LZ4_HASH_VALUE(ip);
506 /* Main Loop */
507 for (;;) {
508 int findMatchAttempts = (1U << skipStrength) + 3;
509 const BYTE *forwardIp = ip;
510 const BYTE *ref;
511 BYTE *token;
513 /* Find a match */
514 do {
515 U32 h = forwardH;
516 int step = findMatchAttempts++ >> skipStrength;
517 ip = forwardIp;
518 forwardIp = ip + step;
520 if (unlikely(forwardIp > mflimit)) {
521 goto _last_literals;
524 forwardH = LZ4_HASH_VALUE(forwardIp);
525 ref = base + HashTable[h];
526 HashTable[h] = ip - base;
528 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
530 /* Catch up */
531 while ((ip > anchor) && (ref > (BYTE *) source) &&
532 unlikely(ip[-1] == ref[-1])) {
533 ip--;
534 ref--;
537 /* Encode Literal length */
538 length = ip - anchor;
539 token = op++;
541 /* Check output limit */
542 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
543 (length >> 8) > oend))
544 return (0);
546 if (length >= (int)RUN_MASK) {
547 *token = (RUN_MASK << ML_BITS);
548 len = length - RUN_MASK;
549 for (; len > 254; len -= 255)
550 *op++ = 255;
551 *op++ = (BYTE)len;
552 } else
553 *token = (length << ML_BITS);
555 /* Copy Literals */
556 LZ4_BLINDCOPY(anchor, op, length);
558 _next_match:
559 /* Encode Offset */
560 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
562 /* Start Counting */
563 ip += MINMATCH;
564 ref += MINMATCH; /* MinMatch verified */
565 anchor = ip;
566 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
567 UARCH diff = AARCH(ref) ^ AARCH(ip);
568 if (!diff) {
569 ip += STEPSIZE;
570 ref += STEPSIZE;
571 continue;
573 ip += LZ4_NbCommonBytes(diff);
574 goto _endCount;
576 #if LZ4_ARCH64
577 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
578 ip += 4;
579 ref += 4;
581 #endif
582 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
583 ip += 2;
584 ref += 2;
586 if ((ip < matchlimit) && (*ref == *ip))
587 ip++;
588 _endCount:
590 /* Encode MatchLength */
591 len = (ip - anchor);
592 /* Check output limit */
593 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
594 return (0);
595 if (len >= (int)ML_MASK) {
596 *token += ML_MASK;
597 len -= ML_MASK;
598 for (; len > 509; len -= 510) {
599 *op++ = 255;
600 *op++ = 255;
602 if (len > 254) {
603 len -= 255;
604 *op++ = 255;
606 *op++ = (BYTE)len;
607 } else
608 *token += len;
610 /* Test end of chunk */
611 if (ip > mflimit) {
612 anchor = ip;
613 break;
615 /* Fill table */
616 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
618 /* Test next position */
619 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
620 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
621 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
622 token = op++;
623 *token = 0;
624 goto _next_match;
626 /* Prepare next loop */
627 anchor = ip++;
628 forwardH = LZ4_HASH_VALUE(ip);
631 _last_literals:
632 /* Encode Last Literals */
634 int lastRun = iend - anchor;
635 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
636 oend)
637 return (0);
638 if (lastRun >= (int)RUN_MASK) {
639 *op++ = (RUN_MASK << ML_BITS);
640 lastRun -= RUN_MASK;
641 for (; lastRun > 254; lastRun -= 255) {
642 *op++ = 255;
644 *op++ = (BYTE)lastRun;
645 } else
646 *op++ = (lastRun << ML_BITS);
647 (void) memcpy(op, anchor, iend - anchor);
648 op += iend - anchor;
651 /* End */
652 return (int)(((char *)op) - dest);
657 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
658 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
659 #define HASHLOG64K (HASH_LOG + 1)
660 #define HASH64KTABLESIZE (1U << HASHLOG64K)
661 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
662 HASHLOG64K))
663 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
665 static int
666 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
667 int osize)
669 struct refTables *srt = (struct refTables *)ctx;
670 U16 *HashTable = (U16 *) (srt->hashTable);
672 const BYTE *ip = (BYTE *) source;
673 const BYTE *anchor = ip;
674 const BYTE *const base = ip;
675 const BYTE *const iend = ip + isize;
676 const BYTE *const oend = (BYTE *) dest + osize;
677 const BYTE *const mflimit = iend - MFLIMIT;
678 #define matchlimit (iend - LASTLITERALS)
680 BYTE *op = (BYTE *) dest;
682 int len, length;
683 const int skipStrength = SKIPSTRENGTH;
684 U32 forwardH;
686 /* Init */
687 if (isize < MINLENGTH)
688 goto _last_literals;
690 /* First Byte */
691 ip++;
692 forwardH = LZ4_HASH64K_VALUE(ip);
694 /* Main Loop */
695 for (;;) {
696 int findMatchAttempts = (1U << skipStrength) + 3;
697 const BYTE *forwardIp = ip;
698 const BYTE *ref;
699 BYTE *token;
701 /* Find a match */
702 do {
703 U32 h = forwardH;
704 int step = findMatchAttempts++ >> skipStrength;
705 ip = forwardIp;
706 forwardIp = ip + step;
708 if (forwardIp > mflimit) {
709 goto _last_literals;
712 forwardH = LZ4_HASH64K_VALUE(forwardIp);
713 ref = base + HashTable[h];
714 HashTable[h] = ip - base;
716 } while (A32(ref) != A32(ip));
718 /* Catch up */
719 while ((ip > anchor) && (ref > (BYTE *) source) &&
720 (ip[-1] == ref[-1])) {
721 ip--;
722 ref--;
725 /* Encode Literal length */
726 length = ip - anchor;
727 token = op++;
729 /* Check output limit */
730 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
731 (length >> 8) > oend))
732 return (0);
734 if (length >= (int)RUN_MASK) {
735 *token = (RUN_MASK << ML_BITS);
736 len = length - RUN_MASK;
737 for (; len > 254; len -= 255)
738 *op++ = 255;
739 *op++ = (BYTE)len;
740 } else
741 *token = (length << ML_BITS);
743 /* Copy Literals */
744 LZ4_BLINDCOPY(anchor, op, length);
746 _next_match:
747 /* Encode Offset */
748 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
750 /* Start Counting */
751 ip += MINMATCH;
752 ref += MINMATCH; /* MinMatch verified */
753 anchor = ip;
754 while (ip < matchlimit - (STEPSIZE - 1)) {
755 UARCH diff = AARCH(ref) ^ AARCH(ip);
756 if (!diff) {
757 ip += STEPSIZE;
758 ref += STEPSIZE;
759 continue;
761 ip += LZ4_NbCommonBytes(diff);
762 goto _endCount;
764 #if LZ4_ARCH64
765 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
766 ip += 4;
767 ref += 4;
769 #endif
770 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
771 ip += 2;
772 ref += 2;
774 if ((ip < matchlimit) && (*ref == *ip))
775 ip++;
776 _endCount:
778 /* Encode MatchLength */
779 len = (ip - anchor);
780 /* Check output limit */
781 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
782 return (0);
783 if (len >= (int)ML_MASK) {
784 *token += ML_MASK;
785 len -= ML_MASK;
786 for (; len > 509; len -= 510) {
787 *op++ = 255;
788 *op++ = 255;
790 if (len > 254) {
791 len -= 255;
792 *op++ = 255;
794 *op++ = (BYTE)len;
795 } else
796 *token += len;
798 /* Test end of chunk */
799 if (ip > mflimit) {
800 anchor = ip;
801 break;
803 /* Fill table */
804 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
806 /* Test next position */
807 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
808 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
809 if (A32(ref) == A32(ip)) {
810 token = op++;
811 *token = 0;
812 goto _next_match;
814 /* Prepare next loop */
815 anchor = ip++;
816 forwardH = LZ4_HASH64K_VALUE(ip);
819 _last_literals:
820 /* Encode Last Literals */
822 int lastRun = iend - anchor;
823 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
824 oend)
825 return (0);
826 if (lastRun >= (int)RUN_MASK) {
827 *op++ = (RUN_MASK << ML_BITS);
828 lastRun -= RUN_MASK;
829 for (; lastRun > 254; lastRun -= 255)
830 *op++ = 255;
831 *op++ = (BYTE)lastRun;
832 } else
833 *op++ = (lastRun << ML_BITS);
834 (void) memcpy(op, anchor, iend - anchor);
835 op += iend - anchor;
838 /* End */
839 return (int)(((char *)op) - dest);
842 static int
843 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
845 void *ctx;
846 int result;
848 ASSERT(lz4_cache != NULL);
849 ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP);
852 * out of kernel memory, gently fall through - this will disable
853 * compression in zio_compress_data
855 if (ctx == NULL)
856 return (0);
858 memset(ctx, 0, sizeof (struct refTables));
860 if (isize < LZ4_64KLIMIT)
861 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
862 else
863 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
865 kmem_cache_free(lz4_cache, ctx);
866 return (result);
869 void
870 lz4_init(void)
872 lz4_cache = kmem_cache_create("lz4_cache",
873 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL,
874 KMC_RECLAIMABLE);
877 void
878 lz4_fini(void)
880 if (lz4_cache) {
881 kmem_cache_destroy(lz4_cache);
882 lz4_cache = NULL;