1 // SPDX-License-Identifier: GPL-2.0
9 #include <linux/zlib.h>
10 #include <linux/zstd.h>
24 static struct bbuf
__bounce_alloc(struct bch_fs
*c
, unsigned size
, int rw
)
28 BUG_ON(size
> c
->opts
.encoded_extent_max
);
30 b
= kmalloc(size
, GFP_NOFS
|__GFP_NOWARN
);
32 return (struct bbuf
) { .b
= b
, .type
= BB_KMALLOC
, .rw
= rw
};
34 b
= mempool_alloc(&c
->compression_bounce
[rw
], GFP_NOFS
);
36 return (struct bbuf
) { .b
= b
, .type
= BB_MEMPOOL
, .rw
= rw
};
41 static bool bio_phys_contig(struct bio
*bio
, struct bvec_iter start
)
44 struct bvec_iter iter
;
45 void *expected_start
= NULL
;
47 __bio_for_each_bvec(bv
, bio
, iter
, start
) {
49 expected_start
!= page_address(bv
.bv_page
) + bv
.bv_offset
)
52 expected_start
= page_address(bv
.bv_page
) +
53 bv
.bv_offset
+ bv
.bv_len
;
59 static struct bbuf
__bio_map_or_bounce(struct bch_fs
*c
, struct bio
*bio
,
60 struct bvec_iter start
, int rw
)
64 struct bvec_iter iter
;
65 unsigned nr_pages
= 0;
66 struct page
*stack_pages
[16];
67 struct page
**pages
= NULL
;
70 BUG_ON(start
.bi_size
> c
->opts
.encoded_extent_max
);
72 if (!PageHighMem(bio_iter_page(bio
, start
)) &&
73 bio_phys_contig(bio
, start
))
74 return (struct bbuf
) {
75 .b
= page_address(bio_iter_page(bio
, start
)) +
76 bio_iter_offset(bio
, start
),
77 .type
= BB_NONE
, .rw
= rw
80 /* check if we can map the pages contiguously: */
81 __bio_for_each_segment(bv
, bio
, iter
, start
) {
82 if (iter
.bi_size
!= start
.bi_size
&&
86 if (bv
.bv_len
< iter
.bi_size
&&
87 bv
.bv_offset
+ bv
.bv_len
< PAGE_SIZE
)
93 BUG_ON(DIV_ROUND_UP(start
.bi_size
, PAGE_SIZE
) > nr_pages
);
95 pages
= nr_pages
> ARRAY_SIZE(stack_pages
)
96 ? kmalloc_array(nr_pages
, sizeof(struct page
*), GFP_NOFS
)
102 __bio_for_each_segment(bv
, bio
, iter
, start
)
103 pages
[nr_pages
++] = bv
.bv_page
;
105 data
= vmap(pages
, nr_pages
, VM_MAP
, PAGE_KERNEL
);
106 if (pages
!= stack_pages
)
110 return (struct bbuf
) {
111 .b
= data
+ bio_iter_offset(bio
, start
),
112 .type
= BB_VMAP
, .rw
= rw
115 ret
= __bounce_alloc(c
, start
.bi_size
, rw
);
118 memcpy_from_bio(ret
.b
, bio
, start
);
123 static struct bbuf
bio_map_or_bounce(struct bch_fs
*c
, struct bio
*bio
, int rw
)
125 return __bio_map_or_bounce(c
, bio
, bio
->bi_iter
, rw
);
128 static void bio_unmap_or_unbounce(struct bch_fs
*c
, struct bbuf buf
)
134 vunmap((void *) ((unsigned long) buf
.b
& PAGE_MASK
));
140 mempool_free(buf
.b
, &c
->compression_bounce
[buf
.rw
]);
145 static inline void zlib_set_workspace(z_stream
*strm
, void *workspace
)
148 strm
->workspace
= workspace
;
152 static int __bio_uncompress(struct bch_fs
*c
, struct bio
*src
,
153 void *dst_data
, struct bch_extent_crc_unpacked crc
)
155 struct bbuf src_data
= { NULL
};
156 size_t src_len
= src
->bi_iter
.bi_size
;
157 size_t dst_len
= crc
.uncompressed_size
<< 9;
161 src_data
= bio_map_or_bounce(c
, src
, READ
);
163 switch (crc
.compression_type
) {
164 case BCH_COMPRESSION_TYPE_lz4_old
:
165 case BCH_COMPRESSION_TYPE_lz4
:
166 ret
= LZ4_decompress_safe_partial(src_data
.b
, dst_data
,
167 src_len
, dst_len
, dst_len
);
171 case BCH_COMPRESSION_TYPE_gzip
: {
173 .next_in
= src_data
.b
,
175 .next_out
= dst_data
,
176 .avail_out
= dst_len
,
179 workspace
= mempool_alloc(&c
->decompress_workspace
, GFP_NOFS
);
181 zlib_set_workspace(&strm
, workspace
);
182 zlib_inflateInit2(&strm
, -MAX_WBITS
);
183 ret
= zlib_inflate(&strm
, Z_FINISH
);
185 mempool_free(workspace
, &c
->decompress_workspace
);
187 if (ret
!= Z_STREAM_END
)
191 case BCH_COMPRESSION_TYPE_zstd
: {
193 size_t real_src_len
= le32_to_cpup(src_data
.b
);
195 if (real_src_len
> src_len
- 4)
198 workspace
= mempool_alloc(&c
->decompress_workspace
, GFP_NOFS
);
199 ctx
= zstd_init_dctx(workspace
, zstd_dctx_workspace_bound());
201 ret
= zstd_decompress_dctx(ctx
,
203 src_data
.b
+ 4, real_src_len
);
205 mempool_free(workspace
, &c
->decompress_workspace
);
216 bio_unmap_or_unbounce(c
, src_data
);
223 int bch2_bio_uncompress_inplace(struct bch_fs
*c
, struct bio
*bio
,
224 struct bch_extent_crc_unpacked
*crc
)
226 struct bbuf data
= { NULL
};
227 size_t dst_len
= crc
->uncompressed_size
<< 9;
229 /* bio must own its pages: */
230 BUG_ON(!bio
->bi_vcnt
);
231 BUG_ON(DIV_ROUND_UP(crc
->live_size
, PAGE_SECTORS
) > bio
->bi_max_vecs
);
233 if (crc
->uncompressed_size
<< 9 > c
->opts
.encoded_extent_max
||
234 crc
->compressed_size
<< 9 > c
->opts
.encoded_extent_max
) {
235 bch_err(c
, "error rewriting existing data: extent too big");
239 data
= __bounce_alloc(c
, dst_len
, WRITE
);
241 if (__bio_uncompress(c
, bio
, data
.b
, *crc
)) {
242 if (!c
->opts
.no_data_io
)
243 bch_err(c
, "error rewriting existing data: decompression error");
244 bio_unmap_or_unbounce(c
, data
);
249 * XXX: don't have a good way to assert that the bio was allocated with
250 * enough space, we depend on bch2_move_extent doing the right thing
252 bio
->bi_iter
.bi_size
= crc
->live_size
<< 9;
254 memcpy_to_bio(bio
, bio
->bi_iter
, data
.b
+ (crc
->offset
<< 9));
257 crc
->compression_type
= 0;
258 crc
->compressed_size
= crc
->live_size
;
259 crc
->uncompressed_size
= crc
->live_size
;
261 crc
->csum
= (struct bch_csum
) { 0, 0 };
263 bio_unmap_or_unbounce(c
, data
);
267 int bch2_bio_uncompress(struct bch_fs
*c
, struct bio
*src
,
268 struct bio
*dst
, struct bvec_iter dst_iter
,
269 struct bch_extent_crc_unpacked crc
)
271 struct bbuf dst_data
= { NULL
};
272 size_t dst_len
= crc
.uncompressed_size
<< 9;
275 if (crc
.uncompressed_size
<< 9 > c
->opts
.encoded_extent_max
||
276 crc
.compressed_size
<< 9 > c
->opts
.encoded_extent_max
)
279 dst_data
= dst_len
== dst_iter
.bi_size
280 ? __bio_map_or_bounce(c
, dst
, dst_iter
, WRITE
)
281 : __bounce_alloc(c
, dst_len
, WRITE
);
283 ret
= __bio_uncompress(c
, src
, dst_data
.b
, crc
);
287 if (dst_data
.type
!= BB_NONE
&&
288 dst_data
.type
!= BB_VMAP
)
289 memcpy_to_bio(dst
, dst_iter
, dst_data
.b
+ (crc
.offset
<< 9));
291 bio_unmap_or_unbounce(c
, dst_data
);
295 static int attempt_compress(struct bch_fs
*c
,
297 void *dst
, size_t dst_len
,
298 void *src
, size_t src_len
,
299 struct bch_compression_opt compression
)
301 enum bch_compression_type compression_type
=
302 __bch2_compression_opt_to_type
[compression
.type
];
304 switch (compression_type
) {
305 case BCH_COMPRESSION_TYPE_lz4
:
306 if (compression
.level
< LZ4HC_MIN_CLEVEL
) {
308 int ret
= LZ4_compress_destSize(
317 int ret
= LZ4_compress_HC(
325 case BCH_COMPRESSION_TYPE_gzip
: {
330 .avail_out
= dst_len
,
333 zlib_set_workspace(&strm
, workspace
);
334 zlib_deflateInit2(&strm
,
336 ? clamp_t(unsigned, compression
.level
,
337 Z_BEST_SPEED
, Z_BEST_COMPRESSION
)
338 : Z_DEFAULT_COMPRESSION
,
339 Z_DEFLATED
, -MAX_WBITS
, DEF_MEM_LEVEL
,
342 if (zlib_deflate(&strm
, Z_FINISH
) != Z_STREAM_END
)
345 if (zlib_deflateEnd(&strm
) != Z_OK
)
348 return strm
.total_out
;
350 case BCH_COMPRESSION_TYPE_zstd
: {
353 * zstd max compression level is 22, our max level is 15
355 unsigned level
= min((compression
.level
* 3) / 2, zstd_max_clevel());
356 ZSTD_parameters params
= zstd_get_params(level
, c
->opts
.encoded_extent_max
);
357 ZSTD_CCtx
*ctx
= zstd_init_cctx(workspace
, c
->zstd_workspace_size
);
360 * ZSTD requires that when we decompress we pass in the exact
361 * compressed size - rounding it up to the nearest sector
362 * doesn't work, so we use the first 4 bytes of the buffer for
365 * Additionally, the ZSTD code seems to have a bug where it will
366 * write just past the end of the buffer - so subtract a fudge
367 * factor (7 bytes) from the dst buffer size to account for
370 size_t len
= zstd_compress_cctx(ctx
,
371 dst
+ 4, dst_len
- 4 - 7,
374 if (zstd_is_error(len
))
377 *((__le32
*) dst
) = cpu_to_le32(len
);
385 static unsigned __bio_compress(struct bch_fs
*c
,
386 struct bio
*dst
, size_t *dst_len
,
387 struct bio
*src
, size_t *src_len
,
388 struct bch_compression_opt compression
)
390 struct bbuf src_data
= { NULL
}, dst_data
= { NULL
};
392 enum bch_compression_type compression_type
=
393 __bch2_compression_opt_to_type
[compression
.type
];
397 BUG_ON(compression_type
>= BCH_COMPRESSION_TYPE_NR
);
398 BUG_ON(!mempool_initialized(&c
->compress_workspace
[compression_type
]));
400 /* If it's only one block, don't bother trying to compress: */
401 if (src
->bi_iter
.bi_size
<= c
->opts
.block_size
)
402 return BCH_COMPRESSION_TYPE_incompressible
;
404 dst_data
= bio_map_or_bounce(c
, dst
, WRITE
);
405 src_data
= bio_map_or_bounce(c
, src
, READ
);
407 workspace
= mempool_alloc(&c
->compress_workspace
[compression_type
], GFP_NOFS
);
409 *src_len
= src
->bi_iter
.bi_size
;
410 *dst_len
= dst
->bi_iter
.bi_size
;
413 * XXX: this algorithm sucks when the compression code doesn't tell us
414 * how much would fit, like LZ4 does:
417 if (*src_len
<= block_bytes(c
)) {
422 ret
= attempt_compress(c
, workspace
,
423 dst_data
.b
, *dst_len
,
424 src_data
.b
, *src_len
,
432 /* Didn't fit: should we retry with a smaller amount? */
433 if (*src_len
<= *dst_len
) {
439 * If ret is negative, it's a hint as to how much data would fit
441 BUG_ON(-ret
>= *src_len
);
446 *src_len
-= (*src_len
- *dst_len
) / 2;
447 *src_len
= round_down(*src_len
, block_bytes(c
));
450 mempool_free(workspace
, &c
->compress_workspace
[compression_type
]);
455 /* Didn't get smaller: */
456 if (round_up(*dst_len
, block_bytes(c
)) >= *src_len
)
459 pad
= round_up(*dst_len
, block_bytes(c
)) - *dst_len
;
461 memset(dst_data
.b
+ *dst_len
, 0, pad
);
464 if (dst_data
.type
!= BB_NONE
&&
465 dst_data
.type
!= BB_VMAP
)
466 memcpy_to_bio(dst
, dst
->bi_iter
, dst_data
.b
);
468 BUG_ON(!*dst_len
|| *dst_len
> dst
->bi_iter
.bi_size
);
469 BUG_ON(!*src_len
|| *src_len
> src
->bi_iter
.bi_size
);
470 BUG_ON(*dst_len
& (block_bytes(c
) - 1));
471 BUG_ON(*src_len
& (block_bytes(c
) - 1));
472 ret
= compression_type
;
474 bio_unmap_or_unbounce(c
, src_data
);
475 bio_unmap_or_unbounce(c
, dst_data
);
478 ret
= BCH_COMPRESSION_TYPE_incompressible
;
482 unsigned bch2_bio_compress(struct bch_fs
*c
,
483 struct bio
*dst
, size_t *dst_len
,
484 struct bio
*src
, size_t *src_len
,
485 unsigned compression_opt
)
487 unsigned orig_dst
= dst
->bi_iter
.bi_size
;
488 unsigned orig_src
= src
->bi_iter
.bi_size
;
489 unsigned compression_type
;
491 /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
492 src
->bi_iter
.bi_size
= min_t(unsigned, src
->bi_iter
.bi_size
,
493 c
->opts
.encoded_extent_max
);
494 /* Don't generate a bigger output than input: */
495 dst
->bi_iter
.bi_size
= min(dst
->bi_iter
.bi_size
, src
->bi_iter
.bi_size
);
498 __bio_compress(c
, dst
, dst_len
, src
, src_len
,
499 bch2_compression_decode(compression_opt
));
501 dst
->bi_iter
.bi_size
= orig_dst
;
502 src
->bi_iter
.bi_size
= orig_src
;
503 return compression_type
;
506 static int __bch2_fs_compress_init(struct bch_fs
*, u64
);
508 #define BCH_FEATURE_none 0
510 static const unsigned bch2_compression_opt_to_feature
[] = {
511 #define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
512 BCH_COMPRESSION_OPTS()
516 #undef BCH_FEATURE_none
518 static int __bch2_check_set_has_compressed_data(struct bch_fs
*c
, u64 f
)
522 if ((c
->sb
.features
& f
) == f
)
525 mutex_lock(&c
->sb_lock
);
527 if ((c
->sb
.features
& f
) == f
) {
528 mutex_unlock(&c
->sb_lock
);
532 ret
= __bch2_fs_compress_init(c
, c
->sb
.features
|f
);
534 mutex_unlock(&c
->sb_lock
);
538 c
->disk_sb
.sb
->features
[0] |= cpu_to_le64(f
);
540 mutex_unlock(&c
->sb_lock
);
545 int bch2_check_set_has_compressed_data(struct bch_fs
*c
,
546 unsigned compression_opt
)
548 unsigned compression_type
= bch2_compression_decode(compression_opt
).type
;
550 BUG_ON(compression_type
>= ARRAY_SIZE(bch2_compression_opt_to_feature
));
552 return compression_type
553 ? __bch2_check_set_has_compressed_data(c
,
554 1ULL << bch2_compression_opt_to_feature
[compression_type
])
558 void bch2_fs_compress_exit(struct bch_fs
*c
)
562 mempool_exit(&c
->decompress_workspace
);
563 for (i
= 0; i
< ARRAY_SIZE(c
->compress_workspace
); i
++)
564 mempool_exit(&c
->compress_workspace
[i
]);
565 mempool_exit(&c
->compression_bounce
[WRITE
]);
566 mempool_exit(&c
->compression_bounce
[READ
]);
569 static int __bch2_fs_compress_init(struct bch_fs
*c
, u64 features
)
571 size_t decompress_workspace_size
= 0;
572 ZSTD_parameters params
= zstd_get_params(zstd_max_clevel(),
573 c
->opts
.encoded_extent_max
);
575 c
->zstd_workspace_size
= zstd_cctx_workspace_bound(¶ms
.cParams
);
579 enum bch_compression_type type
;
580 size_t compress_workspace
;
581 size_t decompress_workspace
;
582 } compression_types
[] = {
583 { BCH_FEATURE_lz4
, BCH_COMPRESSION_TYPE_lz4
,
584 max_t(size_t, LZ4_MEM_COMPRESS
, LZ4HC_MEM_COMPRESS
),
586 { BCH_FEATURE_gzip
, BCH_COMPRESSION_TYPE_gzip
,
587 zlib_deflate_workspacesize(MAX_WBITS
, DEF_MEM_LEVEL
),
588 zlib_inflate_workspacesize(), },
589 { BCH_FEATURE_zstd
, BCH_COMPRESSION_TYPE_zstd
,
590 c
->zstd_workspace_size
,
591 zstd_dctx_workspace_bound() },
593 bool have_compressed
= false;
595 for (i
= compression_types
;
596 i
< compression_types
+ ARRAY_SIZE(compression_types
);
598 have_compressed
|= (features
& (1 << i
->feature
)) != 0;
600 if (!have_compressed
)
603 if (!mempool_initialized(&c
->compression_bounce
[READ
]) &&
604 mempool_init_kvmalloc_pool(&c
->compression_bounce
[READ
],
605 1, c
->opts
.encoded_extent_max
))
606 return -BCH_ERR_ENOMEM_compression_bounce_read_init
;
608 if (!mempool_initialized(&c
->compression_bounce
[WRITE
]) &&
609 mempool_init_kvmalloc_pool(&c
->compression_bounce
[WRITE
],
610 1, c
->opts
.encoded_extent_max
))
611 return -BCH_ERR_ENOMEM_compression_bounce_write_init
;
613 for (i
= compression_types
;
614 i
< compression_types
+ ARRAY_SIZE(compression_types
);
616 decompress_workspace_size
=
617 max(decompress_workspace_size
, i
->decompress_workspace
);
619 if (!(features
& (1 << i
->feature
)))
622 if (mempool_initialized(&c
->compress_workspace
[i
->type
]))
625 if (mempool_init_kvmalloc_pool(
626 &c
->compress_workspace
[i
->type
],
627 1, i
->compress_workspace
))
628 return -BCH_ERR_ENOMEM_compression_workspace_init
;
631 if (!mempool_initialized(&c
->decompress_workspace
) &&
632 mempool_init_kvmalloc_pool(&c
->decompress_workspace
,
633 1, decompress_workspace_size
))
634 return -BCH_ERR_ENOMEM_decompression_workspace_init
;
639 static u64
compression_opt_to_feature(unsigned v
)
641 unsigned type
= bch2_compression_decode(v
).type
;
643 return BIT_ULL(bch2_compression_opt_to_feature
[type
]);
646 int bch2_fs_compress_init(struct bch_fs
*c
)
648 u64 f
= c
->sb
.features
;
650 f
|= compression_opt_to_feature(c
->opts
.compression
);
651 f
|= compression_opt_to_feature(c
->opts
.background_compression
);
653 return __bch2_fs_compress_init(c
, f
);
656 int bch2_opt_compression_parse(struct bch_fs
*c
, const char *_val
, u64
*res
,
657 struct printbuf
*err
)
659 char *val
= kstrdup(_val
, GFP_KERNEL
);
660 char *p
= val
, *type_str
, *level_str
;
661 struct bch_compression_opt opt
= { 0 };
667 type_str
= strsep(&p
, ":");
670 ret
= match_string(bch2_compression_opts
, -1, type_str
);
672 prt_str(err
, "invalid compression type");
681 ret
= kstrtouint(level_str
, 10, &level
);
682 if (!ret
&& !opt
.type
&& level
)
684 if (!ret
&& level
> 15)
687 prt_str(err
, "invalid compression level");
694 *res
= bch2_compression_encode(opt
);
700 void bch2_compression_opt_to_text(struct printbuf
*out
, u64 v
)
702 struct bch_compression_opt opt
= bch2_compression_decode(v
);
704 if (opt
.type
< BCH_COMPRESSION_OPT_NR
)
705 prt_str(out
, bch2_compression_opts
[opt
.type
]);
707 prt_printf(out
, "(unknown compression opt %u)", opt
.type
);
709 prt_printf(out
, ":%u", opt
.level
);
712 void bch2_opt_compression_to_text(struct printbuf
*out
,
717 return bch2_compression_opt_to_text(out
, v
);
720 int bch2_opt_compression_validate(u64 v
, struct printbuf
*err
)
722 if (!bch2_compression_opt_valid(v
)) {
723 prt_printf(err
, "invalid compression opt %llu", v
);
724 return -BCH_ERR_invalid_sb_opt_compression
;