2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
39 static DEFINE_IDR(zram_index_idr
);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex
);
43 static int zram_major
;
44 static const char *default_compressor
= "lzo-rle";
46 /* Module params (documentation at end) */
47 static unsigned int num_devices
= 1;
49 * Pages that compress to sizes equals or greater than this are stored
50 * uncompressed in memory.
52 static size_t huge_class_size
;
54 static void zram_free_page(struct zram
*zram
, size_t index
);
55 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
56 u32 index
, int offset
, struct bio
*bio
);
59 static int zram_slot_trylock(struct zram
*zram
, u32 index
)
61 return bit_spin_trylock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
64 static void zram_slot_lock(struct zram
*zram
, u32 index
)
66 bit_spin_lock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
69 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
71 bit_spin_unlock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
74 static inline bool init_done(struct zram
*zram
)
76 return zram
->disksize
;
79 static inline struct zram
*dev_to_zram(struct device
*dev
)
81 return (struct zram
*)dev_to_disk(dev
)->private_data
;
84 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
86 return zram
->table
[index
].handle
;
89 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
91 zram
->table
[index
].handle
= handle
;
94 /* flag operations require table entry bit_spin_lock() being held */
95 static bool zram_test_flag(struct zram
*zram
, u32 index
,
96 enum zram_pageflags flag
)
98 return zram
->table
[index
].flags
& BIT(flag
);
101 static void zram_set_flag(struct zram
*zram
, u32 index
,
102 enum zram_pageflags flag
)
104 zram
->table
[index
].flags
|= BIT(flag
);
107 static void zram_clear_flag(struct zram
*zram
, u32 index
,
108 enum zram_pageflags flag
)
110 zram
->table
[index
].flags
&= ~BIT(flag
);
113 static inline void zram_set_element(struct zram
*zram
, u32 index
,
114 unsigned long element
)
116 zram
->table
[index
].element
= element
;
119 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
121 return zram
->table
[index
].element
;
124 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
126 return zram
->table
[index
].flags
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
129 static void zram_set_obj_size(struct zram
*zram
,
130 u32 index
, size_t size
)
132 unsigned long flags
= zram
->table
[index
].flags
>> ZRAM_FLAG_SHIFT
;
134 zram
->table
[index
].flags
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
137 static inline bool zram_allocated(struct zram
*zram
, u32 index
)
139 return zram_get_obj_size(zram
, index
) ||
140 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
141 zram_test_flag(zram
, index
, ZRAM_WB
);
144 #if PAGE_SIZE != 4096
145 static inline bool is_partial_io(struct bio_vec
*bvec
)
147 return bvec
->bv_len
!= PAGE_SIZE
;
150 static inline bool is_partial_io(struct bio_vec
*bvec
)
157 * Check if request is within bounds and aligned on zram logical blocks.
159 static inline bool valid_io_request(struct zram
*zram
,
160 sector_t start
, unsigned int size
)
164 /* unaligned request */
165 if (unlikely(start
& (ZRAM_SECTOR_PER_LOGICAL_BLOCK
- 1)))
167 if (unlikely(size
& (ZRAM_LOGICAL_BLOCK_SIZE
- 1)))
170 end
= start
+ (size
>> SECTOR_SHIFT
);
171 bound
= zram
->disksize
>> SECTOR_SHIFT
;
172 /* out of range range */
173 if (unlikely(start
>= bound
|| end
> bound
|| start
> end
))
176 /* I/O request is valid */
180 static void update_position(u32
*index
, int *offset
, struct bio_vec
*bvec
)
182 *index
+= (*offset
+ bvec
->bv_len
) / PAGE_SIZE
;
183 *offset
= (*offset
+ bvec
->bv_len
) % PAGE_SIZE
;
186 static inline void update_used_max(struct zram
*zram
,
187 const unsigned long pages
)
189 unsigned long old_max
, cur_max
;
191 old_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
196 old_max
= atomic_long_cmpxchg(
197 &zram
->stats
.max_used_pages
, cur_max
, pages
);
198 } while (old_max
!= cur_max
);
201 static inline void zram_fill_page(void *ptr
, unsigned long len
,
204 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
205 memset_l(ptr
, value
, len
/ sizeof(unsigned long));
208 static bool page_same_filled(void *ptr
, unsigned long *element
)
212 unsigned int pos
, last_pos
= PAGE_SIZE
/ sizeof(*page
) - 1;
214 page
= (unsigned long *)ptr
;
217 if (val
!= page
[last_pos
])
220 for (pos
= 1; pos
< last_pos
; pos
++) {
221 if (val
!= page
[pos
])
230 static ssize_t
initstate_show(struct device
*dev
,
231 struct device_attribute
*attr
, char *buf
)
234 struct zram
*zram
= dev_to_zram(dev
);
236 down_read(&zram
->init_lock
);
237 val
= init_done(zram
);
238 up_read(&zram
->init_lock
);
240 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
243 static ssize_t
disksize_show(struct device
*dev
,
244 struct device_attribute
*attr
, char *buf
)
246 struct zram
*zram
= dev_to_zram(dev
);
248 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
251 static ssize_t
mem_limit_store(struct device
*dev
,
252 struct device_attribute
*attr
, const char *buf
, size_t len
)
256 struct zram
*zram
= dev_to_zram(dev
);
258 limit
= memparse(buf
, &tmp
);
259 if (buf
== tmp
) /* no chars parsed, invalid input */
262 down_write(&zram
->init_lock
);
263 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
264 up_write(&zram
->init_lock
);
269 static ssize_t
mem_used_max_store(struct device
*dev
,
270 struct device_attribute
*attr
, const char *buf
, size_t len
)
274 struct zram
*zram
= dev_to_zram(dev
);
276 err
= kstrtoul(buf
, 10, &val
);
280 down_read(&zram
->init_lock
);
281 if (init_done(zram
)) {
282 atomic_long_set(&zram
->stats
.max_used_pages
,
283 zs_get_total_pages(zram
->mem_pool
));
285 up_read(&zram
->init_lock
);
290 static ssize_t
idle_store(struct device
*dev
,
291 struct device_attribute
*attr
, const char *buf
, size_t len
)
293 struct zram
*zram
= dev_to_zram(dev
);
294 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
297 if (!sysfs_streq(buf
, "all"))
300 down_read(&zram
->init_lock
);
301 if (!init_done(zram
)) {
302 up_read(&zram
->init_lock
);
306 for (index
= 0; index
< nr_pages
; index
++) {
308 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
309 * See the comment in writeback_store.
311 zram_slot_lock(zram
, index
);
312 if (zram_allocated(zram
, index
) &&
313 !zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
314 zram_set_flag(zram
, index
, ZRAM_IDLE
);
315 zram_slot_unlock(zram
, index
);
318 up_read(&zram
->init_lock
);
323 #ifdef CONFIG_ZRAM_WRITEBACK
324 static ssize_t
writeback_limit_enable_store(struct device
*dev
,
325 struct device_attribute
*attr
, const char *buf
, size_t len
)
327 struct zram
*zram
= dev_to_zram(dev
);
329 ssize_t ret
= -EINVAL
;
331 if (kstrtoull(buf
, 10, &val
))
334 down_read(&zram
->init_lock
);
335 spin_lock(&zram
->wb_limit_lock
);
336 zram
->wb_limit_enable
= val
;
337 spin_unlock(&zram
->wb_limit_lock
);
338 up_read(&zram
->init_lock
);
344 static ssize_t
writeback_limit_enable_show(struct device
*dev
,
345 struct device_attribute
*attr
, char *buf
)
348 struct zram
*zram
= dev_to_zram(dev
);
350 down_read(&zram
->init_lock
);
351 spin_lock(&zram
->wb_limit_lock
);
352 val
= zram
->wb_limit_enable
;
353 spin_unlock(&zram
->wb_limit_lock
);
354 up_read(&zram
->init_lock
);
356 return scnprintf(buf
, PAGE_SIZE
, "%d\n", val
);
359 static ssize_t
writeback_limit_store(struct device
*dev
,
360 struct device_attribute
*attr
, const char *buf
, size_t len
)
362 struct zram
*zram
= dev_to_zram(dev
);
364 ssize_t ret
= -EINVAL
;
366 if (kstrtoull(buf
, 10, &val
))
369 down_read(&zram
->init_lock
);
370 spin_lock(&zram
->wb_limit_lock
);
371 zram
->bd_wb_limit
= val
;
372 spin_unlock(&zram
->wb_limit_lock
);
373 up_read(&zram
->init_lock
);
379 static ssize_t
writeback_limit_show(struct device
*dev
,
380 struct device_attribute
*attr
, char *buf
)
383 struct zram
*zram
= dev_to_zram(dev
);
385 down_read(&zram
->init_lock
);
386 spin_lock(&zram
->wb_limit_lock
);
387 val
= zram
->bd_wb_limit
;
388 spin_unlock(&zram
->wb_limit_lock
);
389 up_read(&zram
->init_lock
);
391 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", val
);
394 static void reset_bdev(struct zram
*zram
)
396 struct block_device
*bdev
;
398 if (!zram
->backing_dev
)
402 if (zram
->old_block_size
)
403 set_blocksize(bdev
, zram
->old_block_size
);
404 blkdev_put(bdev
, FMODE_READ
|FMODE_WRITE
|FMODE_EXCL
);
405 /* hope filp_close flush all of IO */
406 filp_close(zram
->backing_dev
, NULL
);
407 zram
->backing_dev
= NULL
;
408 zram
->old_block_size
= 0;
410 zram
->disk
->queue
->backing_dev_info
->capabilities
|=
411 BDI_CAP_SYNCHRONOUS_IO
;
412 kvfree(zram
->bitmap
);
416 static ssize_t
backing_dev_show(struct device
*dev
,
417 struct device_attribute
*attr
, char *buf
)
420 struct zram
*zram
= dev_to_zram(dev
);
424 down_read(&zram
->init_lock
);
425 file
= zram
->backing_dev
;
427 memcpy(buf
, "none\n", 5);
428 up_read(&zram
->init_lock
);
432 p
= file_path(file
, buf
, PAGE_SIZE
- 1);
439 memmove(buf
, p
, ret
);
442 up_read(&zram
->init_lock
);
446 static ssize_t
backing_dev_store(struct device
*dev
,
447 struct device_attribute
*attr
, const char *buf
, size_t len
)
451 struct file
*backing_dev
= NULL
;
453 struct address_space
*mapping
;
454 unsigned int bitmap_sz
, old_block_size
= 0;
455 unsigned long nr_pages
, *bitmap
= NULL
;
456 struct block_device
*bdev
= NULL
;
458 struct zram
*zram
= dev_to_zram(dev
);
460 file_name
= kmalloc(PATH_MAX
, GFP_KERNEL
);
464 down_write(&zram
->init_lock
);
465 if (init_done(zram
)) {
466 pr_info("Can't setup backing device for initialized device\n");
471 strlcpy(file_name
, buf
, PATH_MAX
);
472 /* ignore trailing newline */
473 sz
= strlen(file_name
);
474 if (sz
> 0 && file_name
[sz
- 1] == '\n')
475 file_name
[sz
- 1] = 0x00;
477 backing_dev
= filp_open(file_name
, O_RDWR
|O_LARGEFILE
, 0);
478 if (IS_ERR(backing_dev
)) {
479 err
= PTR_ERR(backing_dev
);
484 mapping
= backing_dev
->f_mapping
;
485 inode
= mapping
->host
;
487 /* Support only block device in this moment */
488 if (!S_ISBLK(inode
->i_mode
)) {
493 bdev
= bdgrab(I_BDEV(inode
));
494 err
= blkdev_get(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
, zram
);
500 nr_pages
= i_size_read(inode
) >> PAGE_SHIFT
;
501 bitmap_sz
= BITS_TO_LONGS(nr_pages
) * sizeof(long);
502 bitmap
= kvzalloc(bitmap_sz
, GFP_KERNEL
);
508 old_block_size
= block_size(bdev
);
509 err
= set_blocksize(bdev
, PAGE_SIZE
);
515 zram
->old_block_size
= old_block_size
;
517 zram
->backing_dev
= backing_dev
;
518 zram
->bitmap
= bitmap
;
519 zram
->nr_pages
= nr_pages
;
521 * With writeback feature, zram does asynchronous IO so it's no longer
522 * synchronous device so let's remove synchronous io flag. Othewise,
523 * upper layer(e.g., swap) could wait IO completion rather than
524 * (submit and return), which will cause system sluggish.
525 * Furthermore, when the IO function returns(e.g., swap_readpage),
526 * upper layer expects IO was done so it could deallocate the page
527 * freely but in fact, IO is going on so finally could cause
528 * use-after-free when the IO is really done.
530 zram
->disk
->queue
->backing_dev_info
->capabilities
&=
531 ~BDI_CAP_SYNCHRONOUS_IO
;
532 up_write(&zram
->init_lock
);
534 pr_info("setup backing device %s\n", file_name
);
543 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
546 filp_close(backing_dev
, NULL
);
548 up_write(&zram
->init_lock
);
555 static unsigned long alloc_block_bdev(struct zram
*zram
)
557 unsigned long blk_idx
= 1;
559 /* skip 0 bit to confuse zram.handle = 0 */
560 blk_idx
= find_next_zero_bit(zram
->bitmap
, zram
->nr_pages
, blk_idx
);
561 if (blk_idx
== zram
->nr_pages
)
564 if (test_and_set_bit(blk_idx
, zram
->bitmap
))
567 atomic64_inc(&zram
->stats
.bd_count
);
571 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
)
575 was_set
= test_and_clear_bit(blk_idx
, zram
->bitmap
);
576 WARN_ON_ONCE(!was_set
);
577 atomic64_dec(&zram
->stats
.bd_count
);
580 static void zram_page_end_io(struct bio
*bio
)
582 struct page
*page
= bio_first_page_all(bio
);
584 page_endio(page
, op_is_write(bio_op(bio
)),
585 blk_status_to_errno(bio
->bi_status
));
590 * Returns 1 if the submission is successful.
592 static int read_from_bdev_async(struct zram
*zram
, struct bio_vec
*bvec
,
593 unsigned long entry
, struct bio
*parent
)
597 bio
= bio_alloc(GFP_ATOMIC
, 1);
601 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
602 bio_set_dev(bio
, zram
->bdev
);
603 if (!bio_add_page(bio
, bvec
->bv_page
, bvec
->bv_len
, bvec
->bv_offset
)) {
609 bio
->bi_opf
= REQ_OP_READ
;
610 bio
->bi_end_io
= zram_page_end_io
;
612 bio
->bi_opf
= parent
->bi_opf
;
613 bio_chain(bio
, parent
);
620 #define HUGE_WRITEBACK 1
621 #define IDLE_WRITEBACK 2
623 static ssize_t
writeback_store(struct device
*dev
,
624 struct device_attribute
*attr
, const char *buf
, size_t len
)
626 struct zram
*zram
= dev_to_zram(dev
);
627 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
630 struct bio_vec bio_vec
;
634 unsigned long blk_idx
= 0;
636 if (sysfs_streq(buf
, "idle"))
637 mode
= IDLE_WRITEBACK
;
638 else if (sysfs_streq(buf
, "huge"))
639 mode
= HUGE_WRITEBACK
;
643 down_read(&zram
->init_lock
);
644 if (!init_done(zram
)) {
646 goto release_init_lock
;
649 if (!zram
->backing_dev
) {
651 goto release_init_lock
;
654 page
= alloc_page(GFP_KERNEL
);
657 goto release_init_lock
;
660 for (index
= 0; index
< nr_pages
; index
++) {
664 bvec
.bv_len
= PAGE_SIZE
;
667 spin_lock(&zram
->wb_limit_lock
);
668 if (zram
->wb_limit_enable
&& !zram
->bd_wb_limit
) {
669 spin_unlock(&zram
->wb_limit_lock
);
673 spin_unlock(&zram
->wb_limit_lock
);
676 blk_idx
= alloc_block_bdev(zram
);
683 zram_slot_lock(zram
, index
);
684 if (!zram_allocated(zram
, index
))
687 if (zram_test_flag(zram
, index
, ZRAM_WB
) ||
688 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
689 zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
692 if (mode
== IDLE_WRITEBACK
&&
693 !zram_test_flag(zram
, index
, ZRAM_IDLE
))
695 if (mode
== HUGE_WRITEBACK
&&
696 !zram_test_flag(zram
, index
, ZRAM_HUGE
))
699 * Clearing ZRAM_UNDER_WB is duty of caller.
700 * IOW, zram_free_page never clear it.
702 zram_set_flag(zram
, index
, ZRAM_UNDER_WB
);
703 /* Need for hugepage writeback racing */
704 zram_set_flag(zram
, index
, ZRAM_IDLE
);
705 zram_slot_unlock(zram
, index
);
706 if (zram_bvec_read(zram
, &bvec
, index
, 0, NULL
)) {
707 zram_slot_lock(zram
, index
);
708 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
709 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
710 zram_slot_unlock(zram
, index
);
714 bio_init(&bio
, &bio_vec
, 1);
715 bio_set_dev(&bio
, zram
->bdev
);
716 bio
.bi_iter
.bi_sector
= blk_idx
* (PAGE_SIZE
>> 9);
717 bio
.bi_opf
= REQ_OP_WRITE
| REQ_SYNC
;
719 bio_add_page(&bio
, bvec
.bv_page
, bvec
.bv_len
,
722 * XXX: A single page IO would be inefficient for write
723 * but it would be not bad as starter.
725 ret
= submit_bio_wait(&bio
);
727 zram_slot_lock(zram
, index
);
728 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
729 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
730 zram_slot_unlock(zram
, index
);
734 atomic64_inc(&zram
->stats
.bd_writes
);
736 * We released zram_slot_lock so need to check if the slot was
737 * changed. If there is freeing for the slot, we can catch it
738 * easily by zram_allocated.
739 * A subtle case is the slot is freed/reallocated/marked as
740 * ZRAM_IDLE again. To close the race, idle_store doesn't
741 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
742 * Thus, we could close the race by checking ZRAM_IDLE bit.
744 zram_slot_lock(zram
, index
);
745 if (!zram_allocated(zram
, index
) ||
746 !zram_test_flag(zram
, index
, ZRAM_IDLE
)) {
747 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
748 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
752 zram_free_page(zram
, index
);
753 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
754 zram_set_flag(zram
, index
, ZRAM_WB
);
755 zram_set_element(zram
, index
, blk_idx
);
757 atomic64_inc(&zram
->stats
.pages_stored
);
758 spin_lock(&zram
->wb_limit_lock
);
759 if (zram
->wb_limit_enable
&& zram
->bd_wb_limit
> 0)
760 zram
->bd_wb_limit
-= 1UL << (PAGE_SHIFT
- 12);
761 spin_unlock(&zram
->wb_limit_lock
);
763 zram_slot_unlock(zram
, index
);
767 free_block_bdev(zram
, blk_idx
);
770 up_read(&zram
->init_lock
);
776 struct work_struct work
;
783 #if PAGE_SIZE != 4096
784 static void zram_sync_read(struct work_struct
*work
)
786 struct zram_work
*zw
= container_of(work
, struct zram_work
, work
);
787 struct zram
*zram
= zw
->zram
;
788 unsigned long entry
= zw
->entry
;
789 struct bio
*bio
= zw
->bio
;
791 read_from_bdev_async(zram
, &zw
->bvec
, entry
, bio
);
795 * Block layer want one ->make_request_fn to be active at a time
796 * so if we use chained IO with parent IO in same context,
797 * it's a deadlock. To avoid, it, it uses worker thread context.
799 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
800 unsigned long entry
, struct bio
*bio
)
802 struct zram_work work
;
809 INIT_WORK_ONSTACK(&work
.work
, zram_sync_read
);
810 queue_work(system_unbound_wq
, &work
.work
);
811 flush_work(&work
.work
);
812 destroy_work_on_stack(&work
.work
);
817 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
818 unsigned long entry
, struct bio
*bio
)
825 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
826 unsigned long entry
, struct bio
*parent
, bool sync
)
828 atomic64_inc(&zram
->stats
.bd_reads
);
830 return read_from_bdev_sync(zram
, bvec
, entry
, parent
);
832 return read_from_bdev_async(zram
, bvec
, entry
, parent
);
835 static inline void reset_bdev(struct zram
*zram
) {};
836 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
837 unsigned long entry
, struct bio
*parent
, bool sync
)
842 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
) {};
845 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
847 static struct dentry
*zram_debugfs_root
;
849 static void zram_debugfs_create(void)
851 zram_debugfs_root
= debugfs_create_dir("zram", NULL
);
854 static void zram_debugfs_destroy(void)
856 debugfs_remove_recursive(zram_debugfs_root
);
859 static void zram_accessed(struct zram
*zram
, u32 index
)
861 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
862 zram
->table
[index
].ac_time
= ktime_get_boottime();
865 static ssize_t
read_block_state(struct file
*file
, char __user
*buf
,
866 size_t count
, loff_t
*ppos
)
869 ssize_t index
, written
= 0;
870 struct zram
*zram
= file
->private_data
;
871 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
872 struct timespec64 ts
;
874 kbuf
= kvmalloc(count
, GFP_KERNEL
);
878 down_read(&zram
->init_lock
);
879 if (!init_done(zram
)) {
880 up_read(&zram
->init_lock
);
885 for (index
= *ppos
; index
< nr_pages
; index
++) {
888 zram_slot_lock(zram
, index
);
889 if (!zram_allocated(zram
, index
))
892 ts
= ktime_to_timespec64(zram
->table
[index
].ac_time
);
893 copied
= snprintf(kbuf
+ written
, count
,
894 "%12zd %12lld.%06lu %c%c%c%c\n",
895 index
, (s64
)ts
.tv_sec
,
896 ts
.tv_nsec
/ NSEC_PER_USEC
,
897 zram_test_flag(zram
, index
, ZRAM_SAME
) ? 's' : '.',
898 zram_test_flag(zram
, index
, ZRAM_WB
) ? 'w' : '.',
899 zram_test_flag(zram
, index
, ZRAM_HUGE
) ? 'h' : '.',
900 zram_test_flag(zram
, index
, ZRAM_IDLE
) ? 'i' : '.');
902 if (count
< copied
) {
903 zram_slot_unlock(zram
, index
);
909 zram_slot_unlock(zram
, index
);
913 up_read(&zram
->init_lock
);
914 if (copy_to_user(buf
, kbuf
, written
))
921 static const struct file_operations proc_zram_block_state_op
= {
923 .read
= read_block_state
,
924 .llseek
= default_llseek
,
927 static void zram_debugfs_register(struct zram
*zram
)
929 if (!zram_debugfs_root
)
932 zram
->debugfs_dir
= debugfs_create_dir(zram
->disk
->disk_name
,
934 debugfs_create_file("block_state", 0400, zram
->debugfs_dir
,
935 zram
, &proc_zram_block_state_op
);
938 static void zram_debugfs_unregister(struct zram
*zram
)
940 debugfs_remove_recursive(zram
->debugfs_dir
);
943 static void zram_debugfs_create(void) {};
944 static void zram_debugfs_destroy(void) {};
945 static void zram_accessed(struct zram
*zram
, u32 index
)
947 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
949 static void zram_debugfs_register(struct zram
*zram
) {};
950 static void zram_debugfs_unregister(struct zram
*zram
) {};
954 * We switched to per-cpu streams and this attr is not needed anymore.
955 * However, we will keep it around for some time, because:
956 * a) we may revert per-cpu streams in the future
957 * b) it's visible to user space and we need to follow our 2 years
958 * retirement rule; but we already have a number of 'soon to be
959 * altered' attrs, so max_comp_streams need to wait for the next
962 static ssize_t
max_comp_streams_show(struct device
*dev
,
963 struct device_attribute
*attr
, char *buf
)
965 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
968 static ssize_t
max_comp_streams_store(struct device
*dev
,
969 struct device_attribute
*attr
, const char *buf
, size_t len
)
974 static ssize_t
comp_algorithm_show(struct device
*dev
,
975 struct device_attribute
*attr
, char *buf
)
978 struct zram
*zram
= dev_to_zram(dev
);
980 down_read(&zram
->init_lock
);
981 sz
= zcomp_available_show(zram
->compressor
, buf
);
982 up_read(&zram
->init_lock
);
987 static ssize_t
comp_algorithm_store(struct device
*dev
,
988 struct device_attribute
*attr
, const char *buf
, size_t len
)
990 struct zram
*zram
= dev_to_zram(dev
);
991 char compressor
[ARRAY_SIZE(zram
->compressor
)];
994 strlcpy(compressor
, buf
, sizeof(compressor
));
995 /* ignore trailing newline */
996 sz
= strlen(compressor
);
997 if (sz
> 0 && compressor
[sz
- 1] == '\n')
998 compressor
[sz
- 1] = 0x00;
1000 if (!zcomp_available_algorithm(compressor
))
1003 down_write(&zram
->init_lock
);
1004 if (init_done(zram
)) {
1005 up_write(&zram
->init_lock
);
1006 pr_info("Can't change algorithm for initialized device\n");
1010 strcpy(zram
->compressor
, compressor
);
1011 up_write(&zram
->init_lock
);
1015 static ssize_t
compact_store(struct device
*dev
,
1016 struct device_attribute
*attr
, const char *buf
, size_t len
)
1018 struct zram
*zram
= dev_to_zram(dev
);
1020 down_read(&zram
->init_lock
);
1021 if (!init_done(zram
)) {
1022 up_read(&zram
->init_lock
);
1026 zs_compact(zram
->mem_pool
);
1027 up_read(&zram
->init_lock
);
1032 static ssize_t
io_stat_show(struct device
*dev
,
1033 struct device_attribute
*attr
, char *buf
)
1035 struct zram
*zram
= dev_to_zram(dev
);
1038 down_read(&zram
->init_lock
);
1039 ret
= scnprintf(buf
, PAGE_SIZE
,
1040 "%8llu %8llu %8llu %8llu\n",
1041 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
1042 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
1043 (u64
)atomic64_read(&zram
->stats
.invalid_io
),
1044 (u64
)atomic64_read(&zram
->stats
.notify_free
));
1045 up_read(&zram
->init_lock
);
1050 static ssize_t
mm_stat_show(struct device
*dev
,
1051 struct device_attribute
*attr
, char *buf
)
1053 struct zram
*zram
= dev_to_zram(dev
);
1054 struct zs_pool_stats pool_stats
;
1055 u64 orig_size
, mem_used
= 0;
1059 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
1061 down_read(&zram
->init_lock
);
1062 if (init_done(zram
)) {
1063 mem_used
= zs_get_total_pages(zram
->mem_pool
);
1064 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
1067 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
1068 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
1070 ret
= scnprintf(buf
, PAGE_SIZE
,
1071 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
1072 orig_size
<< PAGE_SHIFT
,
1073 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
1074 mem_used
<< PAGE_SHIFT
,
1075 zram
->limit_pages
<< PAGE_SHIFT
,
1076 max_used
<< PAGE_SHIFT
,
1077 (u64
)atomic64_read(&zram
->stats
.same_pages
),
1078 pool_stats
.pages_compacted
,
1079 (u64
)atomic64_read(&zram
->stats
.huge_pages
));
1080 up_read(&zram
->init_lock
);
1085 #ifdef CONFIG_ZRAM_WRITEBACK
1086 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1087 static ssize_t
bd_stat_show(struct device
*dev
,
1088 struct device_attribute
*attr
, char *buf
)
1090 struct zram
*zram
= dev_to_zram(dev
);
1093 down_read(&zram
->init_lock
);
1094 ret
= scnprintf(buf
, PAGE_SIZE
,
1095 "%8llu %8llu %8llu\n",
1096 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_count
)),
1097 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_reads
)),
1098 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_writes
)));
1099 up_read(&zram
->init_lock
);
1105 static ssize_t
debug_stat_show(struct device
*dev
,
1106 struct device_attribute
*attr
, char *buf
)
1109 struct zram
*zram
= dev_to_zram(dev
);
1112 down_read(&zram
->init_lock
);
1113 ret
= scnprintf(buf
, PAGE_SIZE
,
1114 "version: %d\n%8llu %8llu\n",
1116 (u64
)atomic64_read(&zram
->stats
.writestall
),
1117 (u64
)atomic64_read(&zram
->stats
.miss_free
));
1118 up_read(&zram
->init_lock
);
1123 static DEVICE_ATTR_RO(io_stat
);
1124 static DEVICE_ATTR_RO(mm_stat
);
1125 #ifdef CONFIG_ZRAM_WRITEBACK
1126 static DEVICE_ATTR_RO(bd_stat
);
1128 static DEVICE_ATTR_RO(debug_stat
);
1130 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
1132 size_t num_pages
= disksize
>> PAGE_SHIFT
;
1135 /* Free all pages that are still in this zram device */
1136 for (index
= 0; index
< num_pages
; index
++)
1137 zram_free_page(zram
, index
);
1139 zs_destroy_pool(zram
->mem_pool
);
1143 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
1147 num_pages
= disksize
>> PAGE_SHIFT
;
1148 zram
->table
= vzalloc(array_size(num_pages
, sizeof(*zram
->table
)));
1152 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
1153 if (!zram
->mem_pool
) {
1158 if (!huge_class_size
)
1159 huge_class_size
= zs_huge_class_size(zram
->mem_pool
);
1164 * To protect concurrent access to the same index entry,
1165 * caller should hold this table index entry's bit_spinlock to
1166 * indicate this index entry is accessing.
1168 static void zram_free_page(struct zram
*zram
, size_t index
)
1170 unsigned long handle
;
1172 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1173 zram
->table
[index
].ac_time
= 0;
1175 if (zram_test_flag(zram
, index
, ZRAM_IDLE
))
1176 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
1178 if (zram_test_flag(zram
, index
, ZRAM_HUGE
)) {
1179 zram_clear_flag(zram
, index
, ZRAM_HUGE
);
1180 atomic64_dec(&zram
->stats
.huge_pages
);
1183 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1184 zram_clear_flag(zram
, index
, ZRAM_WB
);
1185 free_block_bdev(zram
, zram_get_element(zram
, index
));
1190 * No memory is allocated for same element filled pages.
1191 * Simply clear same page flag.
1193 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1194 zram_clear_flag(zram
, index
, ZRAM_SAME
);
1195 atomic64_dec(&zram
->stats
.same_pages
);
1199 handle
= zram_get_handle(zram
, index
);
1203 zs_free(zram
->mem_pool
, handle
);
1205 atomic64_sub(zram_get_obj_size(zram
, index
),
1206 &zram
->stats
.compr_data_size
);
1208 atomic64_dec(&zram
->stats
.pages_stored
);
1209 zram_set_handle(zram
, index
, 0);
1210 zram_set_obj_size(zram
, index
, 0);
1211 WARN_ON_ONCE(zram
->table
[index
].flags
&
1212 ~(1UL << ZRAM_LOCK
| 1UL << ZRAM_UNDER_WB
));
1215 static int __zram_bvec_read(struct zram
*zram
, struct page
*page
, u32 index
,
1216 struct bio
*bio
, bool partial_io
)
1219 unsigned long handle
;
1223 zram_slot_lock(zram
, index
);
1224 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1225 struct bio_vec bvec
;
1227 zram_slot_unlock(zram
, index
);
1229 bvec
.bv_page
= page
;
1230 bvec
.bv_len
= PAGE_SIZE
;
1232 return read_from_bdev(zram
, &bvec
,
1233 zram_get_element(zram
, index
),
1237 handle
= zram_get_handle(zram
, index
);
1238 if (!handle
|| zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1239 unsigned long value
;
1242 value
= handle
? zram_get_element(zram
, index
) : 0;
1243 mem
= kmap_atomic(page
);
1244 zram_fill_page(mem
, PAGE_SIZE
, value
);
1246 zram_slot_unlock(zram
, index
);
1250 size
= zram_get_obj_size(zram
, index
);
1252 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
1253 if (size
== PAGE_SIZE
) {
1254 dst
= kmap_atomic(page
);
1255 memcpy(dst
, src
, PAGE_SIZE
);
1259 struct zcomp_strm
*zstrm
= zcomp_stream_get(zram
->comp
);
1261 dst
= kmap_atomic(page
);
1262 ret
= zcomp_decompress(zstrm
, src
, size
, dst
);
1264 zcomp_stream_put(zram
->comp
);
1266 zs_unmap_object(zram
->mem_pool
, handle
);
1267 zram_slot_unlock(zram
, index
);
1269 /* Should NEVER happen. Return bio error if it does. */
1271 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
1276 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
1277 u32 index
, int offset
, struct bio
*bio
)
1282 page
= bvec
->bv_page
;
1283 if (is_partial_io(bvec
)) {
1284 /* Use a temporary buffer to decompress the page */
1285 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1290 ret
= __zram_bvec_read(zram
, page
, index
, bio
, is_partial_io(bvec
));
1294 if (is_partial_io(bvec
)) {
1295 void *dst
= kmap_atomic(bvec
->bv_page
);
1296 void *src
= kmap_atomic(page
);
1298 memcpy(dst
+ bvec
->bv_offset
, src
+ offset
, bvec
->bv_len
);
1303 if (is_partial_io(bvec
))
1309 static int __zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1310 u32 index
, struct bio
*bio
)
1313 unsigned long alloced_pages
;
1314 unsigned long handle
= 0;
1315 unsigned int comp_len
= 0;
1316 void *src
, *dst
, *mem
;
1317 struct zcomp_strm
*zstrm
;
1318 struct page
*page
= bvec
->bv_page
;
1319 unsigned long element
= 0;
1320 enum zram_pageflags flags
= 0;
1322 mem
= kmap_atomic(page
);
1323 if (page_same_filled(mem
, &element
)) {
1325 /* Free memory associated with this sector now. */
1327 atomic64_inc(&zram
->stats
.same_pages
);
1333 zstrm
= zcomp_stream_get(zram
->comp
);
1334 src
= kmap_atomic(page
);
1335 ret
= zcomp_compress(zstrm
, src
, &comp_len
);
1338 if (unlikely(ret
)) {
1339 zcomp_stream_put(zram
->comp
);
1340 pr_err("Compression failed! err=%d\n", ret
);
1341 zs_free(zram
->mem_pool
, handle
);
1345 if (comp_len
>= huge_class_size
)
1346 comp_len
= PAGE_SIZE
;
1348 * handle allocation has 2 paths:
1349 * a) fast path is executed with preemption disabled (for
1350 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1351 * since we can't sleep;
1352 * b) slow path enables preemption and attempts to allocate
1353 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1354 * put per-cpu compression stream and, thus, to re-do
1355 * the compression once handle is allocated.
1357 * if we have a 'non-null' handle here then we are coming
1358 * from the slow path and handle has already been allocated.
1361 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1362 __GFP_KSWAPD_RECLAIM
|
1367 zcomp_stream_put(zram
->comp
);
1368 atomic64_inc(&zram
->stats
.writestall
);
1369 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1370 GFP_NOIO
| __GFP_HIGHMEM
|
1373 goto compress_again
;
1377 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
1378 update_used_max(zram
, alloced_pages
);
1380 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
1381 zcomp_stream_put(zram
->comp
);
1382 zs_free(zram
->mem_pool
, handle
);
1386 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
1388 src
= zstrm
->buffer
;
1389 if (comp_len
== PAGE_SIZE
)
1390 src
= kmap_atomic(page
);
1391 memcpy(dst
, src
, comp_len
);
1392 if (comp_len
== PAGE_SIZE
)
1395 zcomp_stream_put(zram
->comp
);
1396 zs_unmap_object(zram
->mem_pool
, handle
);
1397 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
1400 * Free memory associated with this sector
1401 * before overwriting unused sectors.
1403 zram_slot_lock(zram
, index
);
1404 zram_free_page(zram
, index
);
1406 if (comp_len
== PAGE_SIZE
) {
1407 zram_set_flag(zram
, index
, ZRAM_HUGE
);
1408 atomic64_inc(&zram
->stats
.huge_pages
);
1412 zram_set_flag(zram
, index
, flags
);
1413 zram_set_element(zram
, index
, element
);
1415 zram_set_handle(zram
, index
, handle
);
1416 zram_set_obj_size(zram
, index
, comp_len
);
1418 zram_slot_unlock(zram
, index
);
1421 atomic64_inc(&zram
->stats
.pages_stored
);
1425 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1426 u32 index
, int offset
, struct bio
*bio
)
1429 struct page
*page
= NULL
;
1434 if (is_partial_io(bvec
)) {
1437 * This is a partial IO. We need to read the full page
1438 * before to write the changes.
1440 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1444 ret
= __zram_bvec_read(zram
, page
, index
, bio
, true);
1448 src
= kmap_atomic(bvec
->bv_page
);
1449 dst
= kmap_atomic(page
);
1450 memcpy(dst
+ offset
, src
+ bvec
->bv_offset
, bvec
->bv_len
);
1455 vec
.bv_len
= PAGE_SIZE
;
1459 ret
= __zram_bvec_write(zram
, &vec
, index
, bio
);
1461 if (is_partial_io(bvec
))
1467 * zram_bio_discard - handler on discard request
1468 * @index: physical block index in PAGE_SIZE units
1469 * @offset: byte offset within physical block
1471 static void zram_bio_discard(struct zram
*zram
, u32 index
,
1472 int offset
, struct bio
*bio
)
1474 size_t n
= bio
->bi_iter
.bi_size
;
1477 * zram manages data in physical block size units. Because logical block
1478 * size isn't identical with physical block size on some arch, we
1479 * could get a discard request pointing to a specific offset within a
1480 * certain physical block. Although we can handle this request by
1481 * reading that physiclal block and decompressing and partially zeroing
1482 * and re-compressing and then re-storing it, this isn't reasonable
1483 * because our intent with a discard request is to save memory. So
1484 * skipping this logical block is appropriate here.
1487 if (n
<= (PAGE_SIZE
- offset
))
1490 n
-= (PAGE_SIZE
- offset
);
1494 while (n
>= PAGE_SIZE
) {
1495 zram_slot_lock(zram
, index
);
1496 zram_free_page(zram
, index
);
1497 zram_slot_unlock(zram
, index
);
1498 atomic64_inc(&zram
->stats
.notify_free
);
1505 * Returns errno if it has some problem. Otherwise return 0 or 1.
1506 * Returns 0 if IO request was done synchronously
1507 * Returns 1 if IO request was successfully submitted.
1509 static int zram_bvec_rw(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
,
1510 int offset
, unsigned int op
, struct bio
*bio
)
1512 unsigned long start_time
= jiffies
;
1513 struct request_queue
*q
= zram
->disk
->queue
;
1516 generic_start_io_acct(q
, op
, bvec
->bv_len
>> SECTOR_SHIFT
,
1517 &zram
->disk
->part0
);
1519 if (!op_is_write(op
)) {
1520 atomic64_inc(&zram
->stats
.num_reads
);
1521 ret
= zram_bvec_read(zram
, bvec
, index
, offset
, bio
);
1522 flush_dcache_page(bvec
->bv_page
);
1524 atomic64_inc(&zram
->stats
.num_writes
);
1525 ret
= zram_bvec_write(zram
, bvec
, index
, offset
, bio
);
1528 generic_end_io_acct(q
, op
, &zram
->disk
->part0
, start_time
);
1530 zram_slot_lock(zram
, index
);
1531 zram_accessed(zram
, index
);
1532 zram_slot_unlock(zram
, index
);
1534 if (unlikely(ret
< 0)) {
1535 if (!op_is_write(op
))
1536 atomic64_inc(&zram
->stats
.failed_reads
);
1538 atomic64_inc(&zram
->stats
.failed_writes
);
1544 static void __zram_make_request(struct zram
*zram
, struct bio
*bio
)
1548 struct bio_vec bvec
;
1549 struct bvec_iter iter
;
1551 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
1552 offset
= (bio
->bi_iter
.bi_sector
&
1553 (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1555 switch (bio_op(bio
)) {
1556 case REQ_OP_DISCARD
:
1557 case REQ_OP_WRITE_ZEROES
:
1558 zram_bio_discard(zram
, index
, offset
, bio
);
1565 bio_for_each_segment(bvec
, bio
, iter
) {
1566 struct bio_vec bv
= bvec
;
1567 unsigned int unwritten
= bvec
.bv_len
;
1570 bv
.bv_len
= min_t(unsigned int, PAGE_SIZE
- offset
,
1572 if (zram_bvec_rw(zram
, &bv
, index
, offset
,
1573 bio_op(bio
), bio
) < 0)
1576 bv
.bv_offset
+= bv
.bv_len
;
1577 unwritten
-= bv
.bv_len
;
1579 update_position(&index
, &offset
, &bv
);
1580 } while (unwritten
);
1591 * Handler function for all zram I/O requests.
1593 static blk_qc_t
zram_make_request(struct request_queue
*queue
, struct bio
*bio
)
1595 struct zram
*zram
= queue
->queuedata
;
1597 if (!valid_io_request(zram
, bio
->bi_iter
.bi_sector
,
1598 bio
->bi_iter
.bi_size
)) {
1599 atomic64_inc(&zram
->stats
.invalid_io
);
1603 __zram_make_request(zram
, bio
);
1604 return BLK_QC_T_NONE
;
1608 return BLK_QC_T_NONE
;
1611 static void zram_slot_free_notify(struct block_device
*bdev
,
1612 unsigned long index
)
1616 zram
= bdev
->bd_disk
->private_data
;
1618 atomic64_inc(&zram
->stats
.notify_free
);
1619 if (!zram_slot_trylock(zram
, index
)) {
1620 atomic64_inc(&zram
->stats
.miss_free
);
1624 zram_free_page(zram
, index
);
1625 zram_slot_unlock(zram
, index
);
1628 static int zram_rw_page(struct block_device
*bdev
, sector_t sector
,
1629 struct page
*page
, unsigned int op
)
1636 if (PageTransHuge(page
))
1638 zram
= bdev
->bd_disk
->private_data
;
1640 if (!valid_io_request(zram
, sector
, PAGE_SIZE
)) {
1641 atomic64_inc(&zram
->stats
.invalid_io
);
1646 index
= sector
>> SECTORS_PER_PAGE_SHIFT
;
1647 offset
= (sector
& (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1650 bv
.bv_len
= PAGE_SIZE
;
1653 ret
= zram_bvec_rw(zram
, &bv
, index
, offset
, op
, NULL
);
1656 * If I/O fails, just return error(ie, non-zero) without
1657 * calling page_endio.
1658 * It causes resubmit the I/O with bio request by upper functions
1659 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1660 * bio->bi_end_io does things to handle the error
1661 * (e.g., SetPageError, set_page_dirty and extra works).
1663 if (unlikely(ret
< 0))
1668 page_endio(page
, op_is_write(op
), 0);
1679 static void zram_reset_device(struct zram
*zram
)
1684 down_write(&zram
->init_lock
);
1686 zram
->limit_pages
= 0;
1688 if (!init_done(zram
)) {
1689 up_write(&zram
->init_lock
);
1694 disksize
= zram
->disksize
;
1697 set_capacity(zram
->disk
, 0);
1698 part_stat_set_all(&zram
->disk
->part0
, 0);
1700 up_write(&zram
->init_lock
);
1701 /* I/O operation under all of CPU are done so let's free */
1702 zram_meta_free(zram
, disksize
);
1703 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
1704 zcomp_destroy(comp
);
1708 static ssize_t
disksize_store(struct device
*dev
,
1709 struct device_attribute
*attr
, const char *buf
, size_t len
)
1713 struct zram
*zram
= dev_to_zram(dev
);
1716 disksize
= memparse(buf
, NULL
);
1720 down_write(&zram
->init_lock
);
1721 if (init_done(zram
)) {
1722 pr_info("Cannot change disksize for initialized device\n");
1727 disksize
= PAGE_ALIGN(disksize
);
1728 if (!zram_meta_alloc(zram
, disksize
)) {
1733 comp
= zcomp_create(zram
->compressor
);
1735 pr_err("Cannot initialise %s compressing backend\n",
1737 err
= PTR_ERR(comp
);
1742 zram
->disksize
= disksize
;
1743 set_capacity(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
1745 revalidate_disk(zram
->disk
);
1746 up_write(&zram
->init_lock
);
1751 zram_meta_free(zram
, disksize
);
1753 up_write(&zram
->init_lock
);
1757 static ssize_t
reset_store(struct device
*dev
,
1758 struct device_attribute
*attr
, const char *buf
, size_t len
)
1761 unsigned short do_reset
;
1763 struct block_device
*bdev
;
1765 ret
= kstrtou16(buf
, 10, &do_reset
);
1772 zram
= dev_to_zram(dev
);
1773 bdev
= bdget_disk(zram
->disk
, 0);
1777 mutex_lock(&bdev
->bd_mutex
);
1778 /* Do not reset an active device or claimed device */
1779 if (bdev
->bd_openers
|| zram
->claim
) {
1780 mutex_unlock(&bdev
->bd_mutex
);
1785 /* From now on, anyone can't open /dev/zram[0-9] */
1787 mutex_unlock(&bdev
->bd_mutex
);
1789 /* Make sure all the pending I/O are finished */
1791 zram_reset_device(zram
);
1792 revalidate_disk(zram
->disk
);
1795 mutex_lock(&bdev
->bd_mutex
);
1796 zram
->claim
= false;
1797 mutex_unlock(&bdev
->bd_mutex
);
1802 static int zram_open(struct block_device
*bdev
, fmode_t mode
)
1807 WARN_ON(!mutex_is_locked(&bdev
->bd_mutex
));
1809 zram
= bdev
->bd_disk
->private_data
;
1810 /* zram was claimed to reset so open request fails */
1817 static const struct block_device_operations zram_devops
= {
1819 .swap_slot_free_notify
= zram_slot_free_notify
,
1820 .rw_page
= zram_rw_page
,
1821 .owner
= THIS_MODULE
1824 static DEVICE_ATTR_WO(compact
);
1825 static DEVICE_ATTR_RW(disksize
);
1826 static DEVICE_ATTR_RO(initstate
);
1827 static DEVICE_ATTR_WO(reset
);
1828 static DEVICE_ATTR_WO(mem_limit
);
1829 static DEVICE_ATTR_WO(mem_used_max
);
1830 static DEVICE_ATTR_WO(idle
);
1831 static DEVICE_ATTR_RW(max_comp_streams
);
1832 static DEVICE_ATTR_RW(comp_algorithm
);
1833 #ifdef CONFIG_ZRAM_WRITEBACK
1834 static DEVICE_ATTR_RW(backing_dev
);
1835 static DEVICE_ATTR_WO(writeback
);
1836 static DEVICE_ATTR_RW(writeback_limit
);
1837 static DEVICE_ATTR_RW(writeback_limit_enable
);
1840 static struct attribute
*zram_disk_attrs
[] = {
1841 &dev_attr_disksize
.attr
,
1842 &dev_attr_initstate
.attr
,
1843 &dev_attr_reset
.attr
,
1844 &dev_attr_compact
.attr
,
1845 &dev_attr_mem_limit
.attr
,
1846 &dev_attr_mem_used_max
.attr
,
1847 &dev_attr_idle
.attr
,
1848 &dev_attr_max_comp_streams
.attr
,
1849 &dev_attr_comp_algorithm
.attr
,
1850 #ifdef CONFIG_ZRAM_WRITEBACK
1851 &dev_attr_backing_dev
.attr
,
1852 &dev_attr_writeback
.attr
,
1853 &dev_attr_writeback_limit
.attr
,
1854 &dev_attr_writeback_limit_enable
.attr
,
1856 &dev_attr_io_stat
.attr
,
1857 &dev_attr_mm_stat
.attr
,
1858 #ifdef CONFIG_ZRAM_WRITEBACK
1859 &dev_attr_bd_stat
.attr
,
1861 &dev_attr_debug_stat
.attr
,
1865 static const struct attribute_group zram_disk_attr_group
= {
1866 .attrs
= zram_disk_attrs
,
1869 static const struct attribute_group
*zram_disk_attr_groups
[] = {
1870 &zram_disk_attr_group
,
1875 * Allocate and initialize new zram device. the function returns
1876 * '>= 0' device_id upon success, and negative value otherwise.
1878 static int zram_add(void)
1881 struct request_queue
*queue
;
1884 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
1888 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
1893 init_rwsem(&zram
->init_lock
);
1894 #ifdef CONFIG_ZRAM_WRITEBACK
1895 spin_lock_init(&zram
->wb_limit_lock
);
1897 queue
= blk_alloc_queue(GFP_KERNEL
);
1899 pr_err("Error allocating disk queue for device %d\n",
1905 blk_queue_make_request(queue
, zram_make_request
);
1907 /* gendisk structure */
1908 zram
->disk
= alloc_disk(1);
1910 pr_err("Error allocating disk structure for device %d\n",
1913 goto out_free_queue
;
1916 zram
->disk
->major
= zram_major
;
1917 zram
->disk
->first_minor
= device_id
;
1918 zram
->disk
->fops
= &zram_devops
;
1919 zram
->disk
->queue
= queue
;
1920 zram
->disk
->queue
->queuedata
= zram
;
1921 zram
->disk
->private_data
= zram
;
1922 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
1924 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1925 set_capacity(zram
->disk
, 0);
1926 /* zram devices sort of resembles non-rotational disks */
1927 blk_queue_flag_set(QUEUE_FLAG_NONROT
, zram
->disk
->queue
);
1928 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM
, zram
->disk
->queue
);
1931 * To ensure that we always get PAGE_SIZE aligned
1932 * and n*PAGE_SIZED sized I/O requests.
1934 blk_queue_physical_block_size(zram
->disk
->queue
, PAGE_SIZE
);
1935 blk_queue_logical_block_size(zram
->disk
->queue
,
1936 ZRAM_LOGICAL_BLOCK_SIZE
);
1937 blk_queue_io_min(zram
->disk
->queue
, PAGE_SIZE
);
1938 blk_queue_io_opt(zram
->disk
->queue
, PAGE_SIZE
);
1939 zram
->disk
->queue
->limits
.discard_granularity
= PAGE_SIZE
;
1940 blk_queue_max_discard_sectors(zram
->disk
->queue
, UINT_MAX
);
1941 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, zram
->disk
->queue
);
1944 * zram_bio_discard() will clear all logical blocks if logical block
1945 * size is identical with physical block size(PAGE_SIZE). But if it is
1946 * different, we will skip discarding some parts of logical blocks in
1947 * the part of the request range which isn't aligned to physical block
1948 * size. So we can't ensure that all discarded logical blocks are
1951 if (ZRAM_LOGICAL_BLOCK_SIZE
== PAGE_SIZE
)
1952 blk_queue_max_write_zeroes_sectors(zram
->disk
->queue
, UINT_MAX
);
1954 zram
->disk
->queue
->backing_dev_info
->capabilities
|=
1955 (BDI_CAP_STABLE_WRITES
| BDI_CAP_SYNCHRONOUS_IO
);
1956 device_add_disk(NULL
, zram
->disk
, zram_disk_attr_groups
);
1958 strlcpy(zram
->compressor
, default_compressor
, sizeof(zram
->compressor
));
1960 zram_debugfs_register(zram
);
1961 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
1965 blk_cleanup_queue(queue
);
1967 idr_remove(&zram_index_idr
, device_id
);
1973 static int zram_remove(struct zram
*zram
)
1975 struct block_device
*bdev
;
1977 bdev
= bdget_disk(zram
->disk
, 0);
1981 mutex_lock(&bdev
->bd_mutex
);
1982 if (bdev
->bd_openers
|| zram
->claim
) {
1983 mutex_unlock(&bdev
->bd_mutex
);
1989 mutex_unlock(&bdev
->bd_mutex
);
1991 zram_debugfs_unregister(zram
);
1993 /* Make sure all the pending I/O are finished */
1995 zram_reset_device(zram
);
1998 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
2000 del_gendisk(zram
->disk
);
2001 blk_cleanup_queue(zram
->disk
->queue
);
2002 put_disk(zram
->disk
);
2007 /* zram-control sysfs attributes */
2010 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2011 * sense that reading from this file does alter the state of your system -- it
2012 * creates a new un-initialized zram device and returns back this device's
2013 * device_id (or an error code if it fails to create a new device).
2015 static ssize_t
hot_add_show(struct class *class,
2016 struct class_attribute
*attr
,
2021 mutex_lock(&zram_index_mutex
);
2023 mutex_unlock(&zram_index_mutex
);
2027 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
2029 static CLASS_ATTR_RO(hot_add
);
2031 static ssize_t
hot_remove_store(struct class *class,
2032 struct class_attribute
*attr
,
2039 /* dev_id is gendisk->first_minor, which is `int' */
2040 ret
= kstrtoint(buf
, 10, &dev_id
);
2046 mutex_lock(&zram_index_mutex
);
2048 zram
= idr_find(&zram_index_idr
, dev_id
);
2050 ret
= zram_remove(zram
);
2052 idr_remove(&zram_index_idr
, dev_id
);
2057 mutex_unlock(&zram_index_mutex
);
2058 return ret
? ret
: count
;
2060 static CLASS_ATTR_WO(hot_remove
);
2062 static struct attribute
*zram_control_class_attrs
[] = {
2063 &class_attr_hot_add
.attr
,
2064 &class_attr_hot_remove
.attr
,
2067 ATTRIBUTE_GROUPS(zram_control_class
);
2069 static struct class zram_control_class
= {
2070 .name
= "zram-control",
2071 .owner
= THIS_MODULE
,
2072 .class_groups
= zram_control_class_groups
,
2075 static int zram_remove_cb(int id
, void *ptr
, void *data
)
2081 static void destroy_devices(void)
2083 class_unregister(&zram_control_class
);
2084 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
2085 zram_debugfs_destroy();
2086 idr_destroy(&zram_index_idr
);
2087 unregister_blkdev(zram_major
, "zram");
2088 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2091 static int __init
zram_init(void)
2095 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
2096 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
2100 ret
= class_register(&zram_control_class
);
2102 pr_err("Unable to register zram-control class\n");
2103 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2107 zram_debugfs_create();
2108 zram_major
= register_blkdev(0, "zram");
2109 if (zram_major
<= 0) {
2110 pr_err("Unable to get major number\n");
2111 class_unregister(&zram_control_class
);
2112 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2116 while (num_devices
!= 0) {
2117 mutex_lock(&zram_index_mutex
);
2119 mutex_unlock(&zram_index_mutex
);
2132 static void __exit
zram_exit(void)
2137 module_init(zram_init
);
2138 module_exit(zram_exit
);
2140 module_param(num_devices
, uint
, 0);
2141 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
2143 MODULE_LICENSE("Dual BSD/GPL");
2144 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2145 MODULE_DESCRIPTION("Compressed RAM Block Device");