2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36 #include <linux/part_stat.h>
40 static DEFINE_IDR(zram_index_idr
);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex
);
44 static int zram_major
;
45 static const char *default_compressor
= CONFIG_ZRAM_DEF_COMP
;
47 /* Module params (documentation at end) */
48 static unsigned int num_devices
= 1;
50 * Pages that compress to sizes equals or greater than this are stored
51 * uncompressed in memory.
53 static size_t huge_class_size
;
55 static const struct block_device_operations zram_devops
;
56 static const struct block_device_operations zram_wb_devops
;
58 static void zram_free_page(struct zram
*zram
, size_t index
);
59 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
60 u32 index
, int offset
, struct bio
*bio
);
63 static int zram_slot_trylock(struct zram
*zram
, u32 index
)
65 return bit_spin_trylock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
68 static void zram_slot_lock(struct zram
*zram
, u32 index
)
70 bit_spin_lock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
73 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
75 bit_spin_unlock(ZRAM_LOCK
, &zram
->table
[index
].flags
);
78 static inline bool init_done(struct zram
*zram
)
80 return zram
->disksize
;
83 static inline struct zram
*dev_to_zram(struct device
*dev
)
85 return (struct zram
*)dev_to_disk(dev
)->private_data
;
88 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
90 return zram
->table
[index
].handle
;
93 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
95 zram
->table
[index
].handle
= handle
;
98 /* flag operations require table entry bit_spin_lock() being held */
99 static bool zram_test_flag(struct zram
*zram
, u32 index
,
100 enum zram_pageflags flag
)
102 return zram
->table
[index
].flags
& BIT(flag
);
105 static void zram_set_flag(struct zram
*zram
, u32 index
,
106 enum zram_pageflags flag
)
108 zram
->table
[index
].flags
|= BIT(flag
);
111 static void zram_clear_flag(struct zram
*zram
, u32 index
,
112 enum zram_pageflags flag
)
114 zram
->table
[index
].flags
&= ~BIT(flag
);
117 static inline void zram_set_element(struct zram
*zram
, u32 index
,
118 unsigned long element
)
120 zram
->table
[index
].element
= element
;
123 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
125 return zram
->table
[index
].element
;
128 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
130 return zram
->table
[index
].flags
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
133 static void zram_set_obj_size(struct zram
*zram
,
134 u32 index
, size_t size
)
136 unsigned long flags
= zram
->table
[index
].flags
>> ZRAM_FLAG_SHIFT
;
138 zram
->table
[index
].flags
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
141 static inline bool zram_allocated(struct zram
*zram
, u32 index
)
143 return zram_get_obj_size(zram
, index
) ||
144 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
145 zram_test_flag(zram
, index
, ZRAM_WB
);
148 #if PAGE_SIZE != 4096
149 static inline bool is_partial_io(struct bio_vec
*bvec
)
151 return bvec
->bv_len
!= PAGE_SIZE
;
154 static inline bool is_partial_io(struct bio_vec
*bvec
)
161 * Check if request is within bounds and aligned on zram logical blocks.
163 static inline bool valid_io_request(struct zram
*zram
,
164 sector_t start
, unsigned int size
)
168 /* unaligned request */
169 if (unlikely(start
& (ZRAM_SECTOR_PER_LOGICAL_BLOCK
- 1)))
171 if (unlikely(size
& (ZRAM_LOGICAL_BLOCK_SIZE
- 1)))
174 end
= start
+ (size
>> SECTOR_SHIFT
);
175 bound
= zram
->disksize
>> SECTOR_SHIFT
;
176 /* out of range range */
177 if (unlikely(start
>= bound
|| end
> bound
|| start
> end
))
180 /* I/O request is valid */
184 static void update_position(u32
*index
, int *offset
, struct bio_vec
*bvec
)
186 *index
+= (*offset
+ bvec
->bv_len
) / PAGE_SIZE
;
187 *offset
= (*offset
+ bvec
->bv_len
) % PAGE_SIZE
;
190 static inline void update_used_max(struct zram
*zram
,
191 const unsigned long pages
)
193 unsigned long old_max
, cur_max
;
195 old_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
200 old_max
= atomic_long_cmpxchg(
201 &zram
->stats
.max_used_pages
, cur_max
, pages
);
202 } while (old_max
!= cur_max
);
205 static inline void zram_fill_page(void *ptr
, unsigned long len
,
208 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
209 memset_l(ptr
, value
, len
/ sizeof(unsigned long));
212 static bool page_same_filled(void *ptr
, unsigned long *element
)
216 unsigned int pos
, last_pos
= PAGE_SIZE
/ sizeof(*page
) - 1;
218 page
= (unsigned long *)ptr
;
221 if (val
!= page
[last_pos
])
224 for (pos
= 1; pos
< last_pos
; pos
++) {
225 if (val
!= page
[pos
])
234 static ssize_t
initstate_show(struct device
*dev
,
235 struct device_attribute
*attr
, char *buf
)
238 struct zram
*zram
= dev_to_zram(dev
);
240 down_read(&zram
->init_lock
);
241 val
= init_done(zram
);
242 up_read(&zram
->init_lock
);
244 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
247 static ssize_t
disksize_show(struct device
*dev
,
248 struct device_attribute
*attr
, char *buf
)
250 struct zram
*zram
= dev_to_zram(dev
);
252 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
255 static ssize_t
mem_limit_store(struct device
*dev
,
256 struct device_attribute
*attr
, const char *buf
, size_t len
)
260 struct zram
*zram
= dev_to_zram(dev
);
262 limit
= memparse(buf
, &tmp
);
263 if (buf
== tmp
) /* no chars parsed, invalid input */
266 down_write(&zram
->init_lock
);
267 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
268 up_write(&zram
->init_lock
);
273 static ssize_t
mem_used_max_store(struct device
*dev
,
274 struct device_attribute
*attr
, const char *buf
, size_t len
)
278 struct zram
*zram
= dev_to_zram(dev
);
280 err
= kstrtoul(buf
, 10, &val
);
284 down_read(&zram
->init_lock
);
285 if (init_done(zram
)) {
286 atomic_long_set(&zram
->stats
.max_used_pages
,
287 zs_get_total_pages(zram
->mem_pool
));
289 up_read(&zram
->init_lock
);
294 static ssize_t
idle_store(struct device
*dev
,
295 struct device_attribute
*attr
, const char *buf
, size_t len
)
297 struct zram
*zram
= dev_to_zram(dev
);
298 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
301 if (!sysfs_streq(buf
, "all"))
304 down_read(&zram
->init_lock
);
305 if (!init_done(zram
)) {
306 up_read(&zram
->init_lock
);
310 for (index
= 0; index
< nr_pages
; index
++) {
312 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
313 * See the comment in writeback_store.
315 zram_slot_lock(zram
, index
);
316 if (zram_allocated(zram
, index
) &&
317 !zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
318 zram_set_flag(zram
, index
, ZRAM_IDLE
);
319 zram_slot_unlock(zram
, index
);
322 up_read(&zram
->init_lock
);
327 #ifdef CONFIG_ZRAM_WRITEBACK
328 static ssize_t
writeback_limit_enable_store(struct device
*dev
,
329 struct device_attribute
*attr
, const char *buf
, size_t len
)
331 struct zram
*zram
= dev_to_zram(dev
);
333 ssize_t ret
= -EINVAL
;
335 if (kstrtoull(buf
, 10, &val
))
338 down_read(&zram
->init_lock
);
339 spin_lock(&zram
->wb_limit_lock
);
340 zram
->wb_limit_enable
= val
;
341 spin_unlock(&zram
->wb_limit_lock
);
342 up_read(&zram
->init_lock
);
348 static ssize_t
writeback_limit_enable_show(struct device
*dev
,
349 struct device_attribute
*attr
, char *buf
)
352 struct zram
*zram
= dev_to_zram(dev
);
354 down_read(&zram
->init_lock
);
355 spin_lock(&zram
->wb_limit_lock
);
356 val
= zram
->wb_limit_enable
;
357 spin_unlock(&zram
->wb_limit_lock
);
358 up_read(&zram
->init_lock
);
360 return scnprintf(buf
, PAGE_SIZE
, "%d\n", val
);
363 static ssize_t
writeback_limit_store(struct device
*dev
,
364 struct device_attribute
*attr
, const char *buf
, size_t len
)
366 struct zram
*zram
= dev_to_zram(dev
);
368 ssize_t ret
= -EINVAL
;
370 if (kstrtoull(buf
, 10, &val
))
373 down_read(&zram
->init_lock
);
374 spin_lock(&zram
->wb_limit_lock
);
375 zram
->bd_wb_limit
= val
;
376 spin_unlock(&zram
->wb_limit_lock
);
377 up_read(&zram
->init_lock
);
383 static ssize_t
writeback_limit_show(struct device
*dev
,
384 struct device_attribute
*attr
, char *buf
)
387 struct zram
*zram
= dev_to_zram(dev
);
389 down_read(&zram
->init_lock
);
390 spin_lock(&zram
->wb_limit_lock
);
391 val
= zram
->bd_wb_limit
;
392 spin_unlock(&zram
->wb_limit_lock
);
393 up_read(&zram
->init_lock
);
395 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", val
);
398 static void reset_bdev(struct zram
*zram
)
400 struct block_device
*bdev
;
402 if (!zram
->backing_dev
)
406 blkdev_put(bdev
, FMODE_READ
|FMODE_WRITE
|FMODE_EXCL
);
407 /* hope filp_close flush all of IO */
408 filp_close(zram
->backing_dev
, NULL
);
409 zram
->backing_dev
= NULL
;
411 zram
->disk
->fops
= &zram_devops
;
412 kvfree(zram
->bitmap
);
416 static ssize_t
backing_dev_show(struct device
*dev
,
417 struct device_attribute
*attr
, char *buf
)
420 struct zram
*zram
= dev_to_zram(dev
);
424 down_read(&zram
->init_lock
);
425 file
= zram
->backing_dev
;
427 memcpy(buf
, "none\n", 5);
428 up_read(&zram
->init_lock
);
432 p
= file_path(file
, buf
, PAGE_SIZE
- 1);
439 memmove(buf
, p
, ret
);
442 up_read(&zram
->init_lock
);
446 static ssize_t
backing_dev_store(struct device
*dev
,
447 struct device_attribute
*attr
, const char *buf
, size_t len
)
451 struct file
*backing_dev
= NULL
;
453 struct address_space
*mapping
;
454 unsigned int bitmap_sz
;
455 unsigned long nr_pages
, *bitmap
= NULL
;
456 struct block_device
*bdev
= NULL
;
458 struct zram
*zram
= dev_to_zram(dev
);
460 file_name
= kmalloc(PATH_MAX
, GFP_KERNEL
);
464 down_write(&zram
->init_lock
);
465 if (init_done(zram
)) {
466 pr_info("Can't setup backing device for initialized device\n");
471 strlcpy(file_name
, buf
, PATH_MAX
);
472 /* ignore trailing newline */
473 sz
= strlen(file_name
);
474 if (sz
> 0 && file_name
[sz
- 1] == '\n')
475 file_name
[sz
- 1] = 0x00;
477 backing_dev
= filp_open(file_name
, O_RDWR
|O_LARGEFILE
, 0);
478 if (IS_ERR(backing_dev
)) {
479 err
= PTR_ERR(backing_dev
);
484 mapping
= backing_dev
->f_mapping
;
485 inode
= mapping
->host
;
487 /* Support only block device in this moment */
488 if (!S_ISBLK(inode
->i_mode
)) {
493 bdev
= blkdev_get_by_dev(inode
->i_rdev
,
494 FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
, zram
);
501 nr_pages
= i_size_read(inode
) >> PAGE_SHIFT
;
502 bitmap_sz
= BITS_TO_LONGS(nr_pages
) * sizeof(long);
503 bitmap
= kvzalloc(bitmap_sz
, GFP_KERNEL
);
512 zram
->backing_dev
= backing_dev
;
513 zram
->bitmap
= bitmap
;
514 zram
->nr_pages
= nr_pages
;
516 * With writeback feature, zram does asynchronous IO so it's no longer
517 * synchronous device so let's remove synchronous io flag. Othewise,
518 * upper layer(e.g., swap) could wait IO completion rather than
519 * (submit and return), which will cause system sluggish.
520 * Furthermore, when the IO function returns(e.g., swap_readpage),
521 * upper layer expects IO was done so it could deallocate the page
522 * freely but in fact, IO is going on so finally could cause
523 * use-after-free when the IO is really done.
525 zram
->disk
->fops
= &zram_wb_devops
;
526 up_write(&zram
->init_lock
);
528 pr_info("setup backing device %s\n", file_name
);
537 blkdev_put(bdev
, FMODE_READ
| FMODE_WRITE
| FMODE_EXCL
);
540 filp_close(backing_dev
, NULL
);
542 up_write(&zram
->init_lock
);
549 static unsigned long alloc_block_bdev(struct zram
*zram
)
551 unsigned long blk_idx
= 1;
553 /* skip 0 bit to confuse zram.handle = 0 */
554 blk_idx
= find_next_zero_bit(zram
->bitmap
, zram
->nr_pages
, blk_idx
);
555 if (blk_idx
== zram
->nr_pages
)
558 if (test_and_set_bit(blk_idx
, zram
->bitmap
))
561 atomic64_inc(&zram
->stats
.bd_count
);
565 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
)
569 was_set
= test_and_clear_bit(blk_idx
, zram
->bitmap
);
570 WARN_ON_ONCE(!was_set
);
571 atomic64_dec(&zram
->stats
.bd_count
);
574 static void zram_page_end_io(struct bio
*bio
)
576 struct page
*page
= bio_first_page_all(bio
);
578 page_endio(page
, op_is_write(bio_op(bio
)),
579 blk_status_to_errno(bio
->bi_status
));
584 * Returns 1 if the submission is successful.
586 static int read_from_bdev_async(struct zram
*zram
, struct bio_vec
*bvec
,
587 unsigned long entry
, struct bio
*parent
)
591 bio
= bio_alloc(GFP_ATOMIC
, 1);
595 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
596 bio_set_dev(bio
, zram
->bdev
);
597 if (!bio_add_page(bio
, bvec
->bv_page
, bvec
->bv_len
, bvec
->bv_offset
)) {
603 bio
->bi_opf
= REQ_OP_READ
;
604 bio
->bi_end_io
= zram_page_end_io
;
606 bio
->bi_opf
= parent
->bi_opf
;
607 bio_chain(bio
, parent
);
614 #define PAGE_WB_SIG "page_index="
616 #define PAGE_WRITEBACK 0
617 #define HUGE_WRITEBACK 1
618 #define IDLE_WRITEBACK 2
621 static ssize_t
writeback_store(struct device
*dev
,
622 struct device_attribute
*attr
, const char *buf
, size_t len
)
624 struct zram
*zram
= dev_to_zram(dev
);
625 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
626 unsigned long index
= 0;
628 struct bio_vec bio_vec
;
632 unsigned long blk_idx
= 0;
634 if (sysfs_streq(buf
, "idle"))
635 mode
= IDLE_WRITEBACK
;
636 else if (sysfs_streq(buf
, "huge"))
637 mode
= HUGE_WRITEBACK
;
639 if (strncmp(buf
, PAGE_WB_SIG
, sizeof(PAGE_WB_SIG
) - 1))
642 ret
= kstrtol(buf
+ sizeof(PAGE_WB_SIG
) - 1, 10, &index
);
643 if (ret
|| index
>= nr_pages
)
647 mode
= PAGE_WRITEBACK
;
650 down_read(&zram
->init_lock
);
651 if (!init_done(zram
)) {
653 goto release_init_lock
;
656 if (!zram
->backing_dev
) {
658 goto release_init_lock
;
661 page
= alloc_page(GFP_KERNEL
);
664 goto release_init_lock
;
671 bvec
.bv_len
= PAGE_SIZE
;
674 spin_lock(&zram
->wb_limit_lock
);
675 if (zram
->wb_limit_enable
&& !zram
->bd_wb_limit
) {
676 spin_unlock(&zram
->wb_limit_lock
);
680 spin_unlock(&zram
->wb_limit_lock
);
683 blk_idx
= alloc_block_bdev(zram
);
690 zram_slot_lock(zram
, index
);
691 if (!zram_allocated(zram
, index
))
694 if (zram_test_flag(zram
, index
, ZRAM_WB
) ||
695 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
696 zram_test_flag(zram
, index
, ZRAM_UNDER_WB
))
699 if (mode
== IDLE_WRITEBACK
&&
700 !zram_test_flag(zram
, index
, ZRAM_IDLE
))
702 if (mode
== HUGE_WRITEBACK
&&
703 !zram_test_flag(zram
, index
, ZRAM_HUGE
))
706 * Clearing ZRAM_UNDER_WB is duty of caller.
707 * IOW, zram_free_page never clear it.
709 zram_set_flag(zram
, index
, ZRAM_UNDER_WB
);
710 /* Need for hugepage writeback racing */
711 zram_set_flag(zram
, index
, ZRAM_IDLE
);
712 zram_slot_unlock(zram
, index
);
713 if (zram_bvec_read(zram
, &bvec
, index
, 0, NULL
)) {
714 zram_slot_lock(zram
, index
);
715 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
716 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
717 zram_slot_unlock(zram
, index
);
721 bio_init(&bio
, &bio_vec
, 1);
722 bio_set_dev(&bio
, zram
->bdev
);
723 bio
.bi_iter
.bi_sector
= blk_idx
* (PAGE_SIZE
>> 9);
724 bio
.bi_opf
= REQ_OP_WRITE
| REQ_SYNC
;
726 bio_add_page(&bio
, bvec
.bv_page
, bvec
.bv_len
,
729 * XXX: A single page IO would be inefficient for write
730 * but it would be not bad as starter.
732 ret
= submit_bio_wait(&bio
);
734 zram_slot_lock(zram
, index
);
735 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
736 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
737 zram_slot_unlock(zram
, index
);
741 atomic64_inc(&zram
->stats
.bd_writes
);
743 * We released zram_slot_lock so need to check if the slot was
744 * changed. If there is freeing for the slot, we can catch it
745 * easily by zram_allocated.
746 * A subtle case is the slot is freed/reallocated/marked as
747 * ZRAM_IDLE again. To close the race, idle_store doesn't
748 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
749 * Thus, we could close the race by checking ZRAM_IDLE bit.
751 zram_slot_lock(zram
, index
);
752 if (!zram_allocated(zram
, index
) ||
753 !zram_test_flag(zram
, index
, ZRAM_IDLE
)) {
754 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
755 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
759 zram_free_page(zram
, index
);
760 zram_clear_flag(zram
, index
, ZRAM_UNDER_WB
);
761 zram_set_flag(zram
, index
, ZRAM_WB
);
762 zram_set_element(zram
, index
, blk_idx
);
764 atomic64_inc(&zram
->stats
.pages_stored
);
765 spin_lock(&zram
->wb_limit_lock
);
766 if (zram
->wb_limit_enable
&& zram
->bd_wb_limit
> 0)
767 zram
->bd_wb_limit
-= 1UL << (PAGE_SHIFT
- 12);
768 spin_unlock(&zram
->wb_limit_lock
);
770 zram_slot_unlock(zram
, index
);
774 free_block_bdev(zram
, blk_idx
);
777 up_read(&zram
->init_lock
);
783 struct work_struct work
;
790 #if PAGE_SIZE != 4096
791 static void zram_sync_read(struct work_struct
*work
)
793 struct zram_work
*zw
= container_of(work
, struct zram_work
, work
);
794 struct zram
*zram
= zw
->zram
;
795 unsigned long entry
= zw
->entry
;
796 struct bio
*bio
= zw
->bio
;
798 read_from_bdev_async(zram
, &zw
->bvec
, entry
, bio
);
802 * Block layer want one ->submit_bio to be active at a time, so if we use
803 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
804 * use a worker thread context.
806 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
807 unsigned long entry
, struct bio
*bio
)
809 struct zram_work work
;
816 INIT_WORK_ONSTACK(&work
.work
, zram_sync_read
);
817 queue_work(system_unbound_wq
, &work
.work
);
818 flush_work(&work
.work
);
819 destroy_work_on_stack(&work
.work
);
824 static int read_from_bdev_sync(struct zram
*zram
, struct bio_vec
*bvec
,
825 unsigned long entry
, struct bio
*bio
)
832 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
833 unsigned long entry
, struct bio
*parent
, bool sync
)
835 atomic64_inc(&zram
->stats
.bd_reads
);
837 return read_from_bdev_sync(zram
, bvec
, entry
, parent
);
839 return read_from_bdev_async(zram
, bvec
, entry
, parent
);
842 static inline void reset_bdev(struct zram
*zram
) {};
843 static int read_from_bdev(struct zram
*zram
, struct bio_vec
*bvec
,
844 unsigned long entry
, struct bio
*parent
, bool sync
)
849 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
) {};
852 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
854 static struct dentry
*zram_debugfs_root
;
856 static void zram_debugfs_create(void)
858 zram_debugfs_root
= debugfs_create_dir("zram", NULL
);
861 static void zram_debugfs_destroy(void)
863 debugfs_remove_recursive(zram_debugfs_root
);
866 static void zram_accessed(struct zram
*zram
, u32 index
)
868 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
869 zram
->table
[index
].ac_time
= ktime_get_boottime();
872 static ssize_t
read_block_state(struct file
*file
, char __user
*buf
,
873 size_t count
, loff_t
*ppos
)
876 ssize_t index
, written
= 0;
877 struct zram
*zram
= file
->private_data
;
878 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
879 struct timespec64 ts
;
881 kbuf
= kvmalloc(count
, GFP_KERNEL
);
885 down_read(&zram
->init_lock
);
886 if (!init_done(zram
)) {
887 up_read(&zram
->init_lock
);
892 for (index
= *ppos
; index
< nr_pages
; index
++) {
895 zram_slot_lock(zram
, index
);
896 if (!zram_allocated(zram
, index
))
899 ts
= ktime_to_timespec64(zram
->table
[index
].ac_time
);
900 copied
= snprintf(kbuf
+ written
, count
,
901 "%12zd %12lld.%06lu %c%c%c%c\n",
902 index
, (s64
)ts
.tv_sec
,
903 ts
.tv_nsec
/ NSEC_PER_USEC
,
904 zram_test_flag(zram
, index
, ZRAM_SAME
) ? 's' : '.',
905 zram_test_flag(zram
, index
, ZRAM_WB
) ? 'w' : '.',
906 zram_test_flag(zram
, index
, ZRAM_HUGE
) ? 'h' : '.',
907 zram_test_flag(zram
, index
, ZRAM_IDLE
) ? 'i' : '.');
909 if (count
< copied
) {
910 zram_slot_unlock(zram
, index
);
916 zram_slot_unlock(zram
, index
);
920 up_read(&zram
->init_lock
);
921 if (copy_to_user(buf
, kbuf
, written
))
928 static const struct file_operations proc_zram_block_state_op
= {
930 .read
= read_block_state
,
931 .llseek
= default_llseek
,
934 static void zram_debugfs_register(struct zram
*zram
)
936 if (!zram_debugfs_root
)
939 zram
->debugfs_dir
= debugfs_create_dir(zram
->disk
->disk_name
,
941 debugfs_create_file("block_state", 0400, zram
->debugfs_dir
,
942 zram
, &proc_zram_block_state_op
);
945 static void zram_debugfs_unregister(struct zram
*zram
)
947 debugfs_remove_recursive(zram
->debugfs_dir
);
950 static void zram_debugfs_create(void) {};
951 static void zram_debugfs_destroy(void) {};
952 static void zram_accessed(struct zram
*zram
, u32 index
)
954 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
956 static void zram_debugfs_register(struct zram
*zram
) {};
957 static void zram_debugfs_unregister(struct zram
*zram
) {};
961 * We switched to per-cpu streams and this attr is not needed anymore.
962 * However, we will keep it around for some time, because:
963 * a) we may revert per-cpu streams in the future
964 * b) it's visible to user space and we need to follow our 2 years
965 * retirement rule; but we already have a number of 'soon to be
966 * altered' attrs, so max_comp_streams need to wait for the next
969 static ssize_t
max_comp_streams_show(struct device
*dev
,
970 struct device_attribute
*attr
, char *buf
)
972 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
975 static ssize_t
max_comp_streams_store(struct device
*dev
,
976 struct device_attribute
*attr
, const char *buf
, size_t len
)
981 static ssize_t
comp_algorithm_show(struct device
*dev
,
982 struct device_attribute
*attr
, char *buf
)
985 struct zram
*zram
= dev_to_zram(dev
);
987 down_read(&zram
->init_lock
);
988 sz
= zcomp_available_show(zram
->compressor
, buf
);
989 up_read(&zram
->init_lock
);
994 static ssize_t
comp_algorithm_store(struct device
*dev
,
995 struct device_attribute
*attr
, const char *buf
, size_t len
)
997 struct zram
*zram
= dev_to_zram(dev
);
998 char compressor
[ARRAY_SIZE(zram
->compressor
)];
1001 strlcpy(compressor
, buf
, sizeof(compressor
));
1002 /* ignore trailing newline */
1003 sz
= strlen(compressor
);
1004 if (sz
> 0 && compressor
[sz
- 1] == '\n')
1005 compressor
[sz
- 1] = 0x00;
1007 if (!zcomp_available_algorithm(compressor
))
1010 down_write(&zram
->init_lock
);
1011 if (init_done(zram
)) {
1012 up_write(&zram
->init_lock
);
1013 pr_info("Can't change algorithm for initialized device\n");
1017 strcpy(zram
->compressor
, compressor
);
1018 up_write(&zram
->init_lock
);
1022 static ssize_t
compact_store(struct device
*dev
,
1023 struct device_attribute
*attr
, const char *buf
, size_t len
)
1025 struct zram
*zram
= dev_to_zram(dev
);
1027 down_read(&zram
->init_lock
);
1028 if (!init_done(zram
)) {
1029 up_read(&zram
->init_lock
);
1033 zs_compact(zram
->mem_pool
);
1034 up_read(&zram
->init_lock
);
1039 static ssize_t
io_stat_show(struct device
*dev
,
1040 struct device_attribute
*attr
, char *buf
)
1042 struct zram
*zram
= dev_to_zram(dev
);
1045 down_read(&zram
->init_lock
);
1046 ret
= scnprintf(buf
, PAGE_SIZE
,
1047 "%8llu %8llu %8llu %8llu\n",
1048 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
1049 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
1050 (u64
)atomic64_read(&zram
->stats
.invalid_io
),
1051 (u64
)atomic64_read(&zram
->stats
.notify_free
));
1052 up_read(&zram
->init_lock
);
1057 static ssize_t
mm_stat_show(struct device
*dev
,
1058 struct device_attribute
*attr
, char *buf
)
1060 struct zram
*zram
= dev_to_zram(dev
);
1061 struct zs_pool_stats pool_stats
;
1062 u64 orig_size
, mem_used
= 0;
1066 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
1068 down_read(&zram
->init_lock
);
1069 if (init_done(zram
)) {
1070 mem_used
= zs_get_total_pages(zram
->mem_pool
);
1071 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
1074 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
1075 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
1077 ret
= scnprintf(buf
, PAGE_SIZE
,
1078 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1079 orig_size
<< PAGE_SHIFT
,
1080 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
1081 mem_used
<< PAGE_SHIFT
,
1082 zram
->limit_pages
<< PAGE_SHIFT
,
1083 max_used
<< PAGE_SHIFT
,
1084 (u64
)atomic64_read(&zram
->stats
.same_pages
),
1085 pool_stats
.pages_compacted
,
1086 (u64
)atomic64_read(&zram
->stats
.huge_pages
),
1087 (u64
)atomic64_read(&zram
->stats
.huge_pages_since
));
1088 up_read(&zram
->init_lock
);
1093 #ifdef CONFIG_ZRAM_WRITEBACK
1094 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1095 static ssize_t
bd_stat_show(struct device
*dev
,
1096 struct device_attribute
*attr
, char *buf
)
1098 struct zram
*zram
= dev_to_zram(dev
);
1101 down_read(&zram
->init_lock
);
1102 ret
= scnprintf(buf
, PAGE_SIZE
,
1103 "%8llu %8llu %8llu\n",
1104 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_count
)),
1105 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_reads
)),
1106 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_writes
)));
1107 up_read(&zram
->init_lock
);
1113 static ssize_t
debug_stat_show(struct device
*dev
,
1114 struct device_attribute
*attr
, char *buf
)
1117 struct zram
*zram
= dev_to_zram(dev
);
1120 down_read(&zram
->init_lock
);
1121 ret
= scnprintf(buf
, PAGE_SIZE
,
1122 "version: %d\n%8llu %8llu\n",
1124 (u64
)atomic64_read(&zram
->stats
.writestall
),
1125 (u64
)atomic64_read(&zram
->stats
.miss_free
));
1126 up_read(&zram
->init_lock
);
1131 static DEVICE_ATTR_RO(io_stat
);
1132 static DEVICE_ATTR_RO(mm_stat
);
1133 #ifdef CONFIG_ZRAM_WRITEBACK
1134 static DEVICE_ATTR_RO(bd_stat
);
1136 static DEVICE_ATTR_RO(debug_stat
);
1138 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
1140 size_t num_pages
= disksize
>> PAGE_SHIFT
;
1143 /* Free all pages that are still in this zram device */
1144 for (index
= 0; index
< num_pages
; index
++)
1145 zram_free_page(zram
, index
);
1147 zs_destroy_pool(zram
->mem_pool
);
1151 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
1155 num_pages
= disksize
>> PAGE_SHIFT
;
1156 zram
->table
= vzalloc(array_size(num_pages
, sizeof(*zram
->table
)));
1160 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
1161 if (!zram
->mem_pool
) {
1166 if (!huge_class_size
)
1167 huge_class_size
= zs_huge_class_size(zram
->mem_pool
);
1172 * To protect concurrent access to the same index entry,
1173 * caller should hold this table index entry's bit_spinlock to
1174 * indicate this index entry is accessing.
1176 static void zram_free_page(struct zram
*zram
, size_t index
)
1178 unsigned long handle
;
1180 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
1181 zram
->table
[index
].ac_time
= 0;
1183 if (zram_test_flag(zram
, index
, ZRAM_IDLE
))
1184 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
1186 if (zram_test_flag(zram
, index
, ZRAM_HUGE
)) {
1187 zram_clear_flag(zram
, index
, ZRAM_HUGE
);
1188 atomic64_dec(&zram
->stats
.huge_pages
);
1191 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1192 zram_clear_flag(zram
, index
, ZRAM_WB
);
1193 free_block_bdev(zram
, zram_get_element(zram
, index
));
1198 * No memory is allocated for same element filled pages.
1199 * Simply clear same page flag.
1201 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1202 zram_clear_flag(zram
, index
, ZRAM_SAME
);
1203 atomic64_dec(&zram
->stats
.same_pages
);
1207 handle
= zram_get_handle(zram
, index
);
1211 zs_free(zram
->mem_pool
, handle
);
1213 atomic64_sub(zram_get_obj_size(zram
, index
),
1214 &zram
->stats
.compr_data_size
);
1216 atomic64_dec(&zram
->stats
.pages_stored
);
1217 zram_set_handle(zram
, index
, 0);
1218 zram_set_obj_size(zram
, index
, 0);
1219 WARN_ON_ONCE(zram
->table
[index
].flags
&
1220 ~(1UL << ZRAM_LOCK
| 1UL << ZRAM_UNDER_WB
));
1223 static int __zram_bvec_read(struct zram
*zram
, struct page
*page
, u32 index
,
1224 struct bio
*bio
, bool partial_io
)
1226 struct zcomp_strm
*zstrm
;
1227 unsigned long handle
;
1232 zram_slot_lock(zram
, index
);
1233 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1234 struct bio_vec bvec
;
1236 zram_slot_unlock(zram
, index
);
1238 bvec
.bv_page
= page
;
1239 bvec
.bv_len
= PAGE_SIZE
;
1241 return read_from_bdev(zram
, &bvec
,
1242 zram_get_element(zram
, index
),
1246 handle
= zram_get_handle(zram
, index
);
1247 if (!handle
|| zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1248 unsigned long value
;
1251 value
= handle
? zram_get_element(zram
, index
) : 0;
1252 mem
= kmap_atomic(page
);
1253 zram_fill_page(mem
, PAGE_SIZE
, value
);
1255 zram_slot_unlock(zram
, index
);
1259 size
= zram_get_obj_size(zram
, index
);
1261 if (size
!= PAGE_SIZE
)
1262 zstrm
= zcomp_stream_get(zram
->comp
);
1264 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
1265 if (size
== PAGE_SIZE
) {
1266 dst
= kmap_atomic(page
);
1267 memcpy(dst
, src
, PAGE_SIZE
);
1271 dst
= kmap_atomic(page
);
1272 ret
= zcomp_decompress(zstrm
, src
, size
, dst
);
1274 zcomp_stream_put(zram
->comp
);
1276 zs_unmap_object(zram
->mem_pool
, handle
);
1277 zram_slot_unlock(zram
, index
);
1279 /* Should NEVER happen. Return bio error if it does. */
1281 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
1286 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
1287 u32 index
, int offset
, struct bio
*bio
)
1292 page
= bvec
->bv_page
;
1293 if (is_partial_io(bvec
)) {
1294 /* Use a temporary buffer to decompress the page */
1295 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1300 ret
= __zram_bvec_read(zram
, page
, index
, bio
, is_partial_io(bvec
));
1304 if (is_partial_io(bvec
)) {
1305 void *dst
= kmap_atomic(bvec
->bv_page
);
1306 void *src
= kmap_atomic(page
);
1308 memcpy(dst
+ bvec
->bv_offset
, src
+ offset
, bvec
->bv_len
);
1313 if (is_partial_io(bvec
))
1319 static int __zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1320 u32 index
, struct bio
*bio
)
1323 unsigned long alloced_pages
;
1324 unsigned long handle
= 0;
1325 unsigned int comp_len
= 0;
1326 void *src
, *dst
, *mem
;
1327 struct zcomp_strm
*zstrm
;
1328 struct page
*page
= bvec
->bv_page
;
1329 unsigned long element
= 0;
1330 enum zram_pageflags flags
= 0;
1332 mem
= kmap_atomic(page
);
1333 if (page_same_filled(mem
, &element
)) {
1335 /* Free memory associated with this sector now. */
1337 atomic64_inc(&zram
->stats
.same_pages
);
1343 zstrm
= zcomp_stream_get(zram
->comp
);
1344 src
= kmap_atomic(page
);
1345 ret
= zcomp_compress(zstrm
, src
, &comp_len
);
1348 if (unlikely(ret
)) {
1349 zcomp_stream_put(zram
->comp
);
1350 pr_err("Compression failed! err=%d\n", ret
);
1351 zs_free(zram
->mem_pool
, handle
);
1355 if (comp_len
>= huge_class_size
)
1356 comp_len
= PAGE_SIZE
;
1358 * handle allocation has 2 paths:
1359 * a) fast path is executed with preemption disabled (for
1360 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1361 * since we can't sleep;
1362 * b) slow path enables preemption and attempts to allocate
1363 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1364 * put per-cpu compression stream and, thus, to re-do
1365 * the compression once handle is allocated.
1367 * if we have a 'non-null' handle here then we are coming
1368 * from the slow path and handle has already been allocated.
1371 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1372 __GFP_KSWAPD_RECLAIM
|
1377 zcomp_stream_put(zram
->comp
);
1378 atomic64_inc(&zram
->stats
.writestall
);
1379 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1380 GFP_NOIO
| __GFP_HIGHMEM
|
1383 goto compress_again
;
1387 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
1388 update_used_max(zram
, alloced_pages
);
1390 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
1391 zcomp_stream_put(zram
->comp
);
1392 zs_free(zram
->mem_pool
, handle
);
1396 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
1398 src
= zstrm
->buffer
;
1399 if (comp_len
== PAGE_SIZE
)
1400 src
= kmap_atomic(page
);
1401 memcpy(dst
, src
, comp_len
);
1402 if (comp_len
== PAGE_SIZE
)
1405 zcomp_stream_put(zram
->comp
);
1406 zs_unmap_object(zram
->mem_pool
, handle
);
1407 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
1410 * Free memory associated with this sector
1411 * before overwriting unused sectors.
1413 zram_slot_lock(zram
, index
);
1414 zram_free_page(zram
, index
);
1416 if (comp_len
== PAGE_SIZE
) {
1417 zram_set_flag(zram
, index
, ZRAM_HUGE
);
1418 atomic64_inc(&zram
->stats
.huge_pages
);
1419 atomic64_inc(&zram
->stats
.huge_pages_since
);
1423 zram_set_flag(zram
, index
, flags
);
1424 zram_set_element(zram
, index
, element
);
1426 zram_set_handle(zram
, index
, handle
);
1427 zram_set_obj_size(zram
, index
, comp_len
);
1429 zram_slot_unlock(zram
, index
);
1432 atomic64_inc(&zram
->stats
.pages_stored
);
1436 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1437 u32 index
, int offset
, struct bio
*bio
)
1440 struct page
*page
= NULL
;
1445 if (is_partial_io(bvec
)) {
1448 * This is a partial IO. We need to read the full page
1449 * before to write the changes.
1451 page
= alloc_page(GFP_NOIO
|__GFP_HIGHMEM
);
1455 ret
= __zram_bvec_read(zram
, page
, index
, bio
, true);
1459 src
= kmap_atomic(bvec
->bv_page
);
1460 dst
= kmap_atomic(page
);
1461 memcpy(dst
+ offset
, src
+ bvec
->bv_offset
, bvec
->bv_len
);
1466 vec
.bv_len
= PAGE_SIZE
;
1470 ret
= __zram_bvec_write(zram
, &vec
, index
, bio
);
1472 if (is_partial_io(bvec
))
1478 * zram_bio_discard - handler on discard request
1479 * @index: physical block index in PAGE_SIZE units
1480 * @offset: byte offset within physical block
1482 static void zram_bio_discard(struct zram
*zram
, u32 index
,
1483 int offset
, struct bio
*bio
)
1485 size_t n
= bio
->bi_iter
.bi_size
;
1488 * zram manages data in physical block size units. Because logical block
1489 * size isn't identical with physical block size on some arch, we
1490 * could get a discard request pointing to a specific offset within a
1491 * certain physical block. Although we can handle this request by
1492 * reading that physiclal block and decompressing and partially zeroing
1493 * and re-compressing and then re-storing it, this isn't reasonable
1494 * because our intent with a discard request is to save memory. So
1495 * skipping this logical block is appropriate here.
1498 if (n
<= (PAGE_SIZE
- offset
))
1501 n
-= (PAGE_SIZE
- offset
);
1505 while (n
>= PAGE_SIZE
) {
1506 zram_slot_lock(zram
, index
);
1507 zram_free_page(zram
, index
);
1508 zram_slot_unlock(zram
, index
);
1509 atomic64_inc(&zram
->stats
.notify_free
);
1516 * Returns errno if it has some problem. Otherwise return 0 or 1.
1517 * Returns 0 if IO request was done synchronously
1518 * Returns 1 if IO request was successfully submitted.
1520 static int zram_bvec_rw(struct zram
*zram
, struct bio_vec
*bvec
, u32 index
,
1521 int offset
, unsigned int op
, struct bio
*bio
)
1525 if (!op_is_write(op
)) {
1526 atomic64_inc(&zram
->stats
.num_reads
);
1527 ret
= zram_bvec_read(zram
, bvec
, index
, offset
, bio
);
1528 flush_dcache_page(bvec
->bv_page
);
1530 atomic64_inc(&zram
->stats
.num_writes
);
1531 ret
= zram_bvec_write(zram
, bvec
, index
, offset
, bio
);
1534 zram_slot_lock(zram
, index
);
1535 zram_accessed(zram
, index
);
1536 zram_slot_unlock(zram
, index
);
1538 if (unlikely(ret
< 0)) {
1539 if (!op_is_write(op
))
1540 atomic64_inc(&zram
->stats
.failed_reads
);
1542 atomic64_inc(&zram
->stats
.failed_writes
);
1548 static void __zram_make_request(struct zram
*zram
, struct bio
*bio
)
1552 struct bio_vec bvec
;
1553 struct bvec_iter iter
;
1554 unsigned long start_time
;
1556 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
1557 offset
= (bio
->bi_iter
.bi_sector
&
1558 (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1560 switch (bio_op(bio
)) {
1561 case REQ_OP_DISCARD
:
1562 case REQ_OP_WRITE_ZEROES
:
1563 zram_bio_discard(zram
, index
, offset
, bio
);
1570 start_time
= bio_start_io_acct(bio
);
1571 bio_for_each_segment(bvec
, bio
, iter
) {
1572 struct bio_vec bv
= bvec
;
1573 unsigned int unwritten
= bvec
.bv_len
;
1576 bv
.bv_len
= min_t(unsigned int, PAGE_SIZE
- offset
,
1578 if (zram_bvec_rw(zram
, &bv
, index
, offset
,
1579 bio_op(bio
), bio
) < 0) {
1580 bio
->bi_status
= BLK_STS_IOERR
;
1584 bv
.bv_offset
+= bv
.bv_len
;
1585 unwritten
-= bv
.bv_len
;
1587 update_position(&index
, &offset
, &bv
);
1588 } while (unwritten
);
1590 bio_end_io_acct(bio
, start_time
);
1595 * Handler function for all zram I/O requests.
1597 static blk_qc_t
zram_submit_bio(struct bio
*bio
)
1599 struct zram
*zram
= bio
->bi_disk
->private_data
;
1601 if (!valid_io_request(zram
, bio
->bi_iter
.bi_sector
,
1602 bio
->bi_iter
.bi_size
)) {
1603 atomic64_inc(&zram
->stats
.invalid_io
);
1607 __zram_make_request(zram
, bio
);
1608 return BLK_QC_T_NONE
;
1612 return BLK_QC_T_NONE
;
1615 static void zram_slot_free_notify(struct block_device
*bdev
,
1616 unsigned long index
)
1620 zram
= bdev
->bd_disk
->private_data
;
1622 atomic64_inc(&zram
->stats
.notify_free
);
1623 if (!zram_slot_trylock(zram
, index
)) {
1624 atomic64_inc(&zram
->stats
.miss_free
);
1628 zram_free_page(zram
, index
);
1629 zram_slot_unlock(zram
, index
);
1632 static int zram_rw_page(struct block_device
*bdev
, sector_t sector
,
1633 struct page
*page
, unsigned int op
)
1639 unsigned long start_time
;
1641 if (PageTransHuge(page
))
1643 zram
= bdev
->bd_disk
->private_data
;
1645 if (!valid_io_request(zram
, sector
, PAGE_SIZE
)) {
1646 atomic64_inc(&zram
->stats
.invalid_io
);
1651 index
= sector
>> SECTORS_PER_PAGE_SHIFT
;
1652 offset
= (sector
& (SECTORS_PER_PAGE
- 1)) << SECTOR_SHIFT
;
1655 bv
.bv_len
= PAGE_SIZE
;
1658 start_time
= disk_start_io_acct(bdev
->bd_disk
, SECTORS_PER_PAGE
, op
);
1659 ret
= zram_bvec_rw(zram
, &bv
, index
, offset
, op
, NULL
);
1660 disk_end_io_acct(bdev
->bd_disk
, op
, start_time
);
1663 * If I/O fails, just return error(ie, non-zero) without
1664 * calling page_endio.
1665 * It causes resubmit the I/O with bio request by upper functions
1666 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1667 * bio->bi_end_io does things to handle the error
1668 * (e.g., SetPageError, set_page_dirty and extra works).
1670 if (unlikely(ret
< 0))
1675 page_endio(page
, op_is_write(op
), 0);
1686 static void zram_reset_device(struct zram
*zram
)
1691 down_write(&zram
->init_lock
);
1693 zram
->limit_pages
= 0;
1695 if (!init_done(zram
)) {
1696 up_write(&zram
->init_lock
);
1701 disksize
= zram
->disksize
;
1704 set_capacity_and_notify(zram
->disk
, 0);
1705 part_stat_set_all(zram
->disk
->part0
, 0);
1707 up_write(&zram
->init_lock
);
1708 /* I/O operation under all of CPU are done so let's free */
1709 zram_meta_free(zram
, disksize
);
1710 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
1711 zcomp_destroy(comp
);
1715 static ssize_t
disksize_store(struct device
*dev
,
1716 struct device_attribute
*attr
, const char *buf
, size_t len
)
1720 struct zram
*zram
= dev_to_zram(dev
);
1723 disksize
= memparse(buf
, NULL
);
1727 down_write(&zram
->init_lock
);
1728 if (init_done(zram
)) {
1729 pr_info("Cannot change disksize for initialized device\n");
1734 disksize
= PAGE_ALIGN(disksize
);
1735 if (!zram_meta_alloc(zram
, disksize
)) {
1740 comp
= zcomp_create(zram
->compressor
);
1742 pr_err("Cannot initialise %s compressing backend\n",
1744 err
= PTR_ERR(comp
);
1749 zram
->disksize
= disksize
;
1750 set_capacity_and_notify(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
1751 up_write(&zram
->init_lock
);
1756 zram_meta_free(zram
, disksize
);
1758 up_write(&zram
->init_lock
);
1762 static ssize_t
reset_store(struct device
*dev
,
1763 struct device_attribute
*attr
, const char *buf
, size_t len
)
1766 unsigned short do_reset
;
1768 struct block_device
*bdev
;
1770 ret
= kstrtou16(buf
, 10, &do_reset
);
1777 zram
= dev_to_zram(dev
);
1778 bdev
= zram
->disk
->part0
;
1780 mutex_lock(&bdev
->bd_mutex
);
1781 /* Do not reset an active device or claimed device */
1782 if (bdev
->bd_openers
|| zram
->claim
) {
1783 mutex_unlock(&bdev
->bd_mutex
);
1787 /* From now on, anyone can't open /dev/zram[0-9] */
1789 mutex_unlock(&bdev
->bd_mutex
);
1791 /* Make sure all the pending I/O are finished */
1793 zram_reset_device(zram
);
1795 mutex_lock(&bdev
->bd_mutex
);
1796 zram
->claim
= false;
1797 mutex_unlock(&bdev
->bd_mutex
);
1802 static int zram_open(struct block_device
*bdev
, fmode_t mode
)
1807 WARN_ON(!mutex_is_locked(&bdev
->bd_mutex
));
1809 zram
= bdev
->bd_disk
->private_data
;
1810 /* zram was claimed to reset so open request fails */
1817 static const struct block_device_operations zram_devops
= {
1819 .submit_bio
= zram_submit_bio
,
1820 .swap_slot_free_notify
= zram_slot_free_notify
,
1821 .rw_page
= zram_rw_page
,
1822 .owner
= THIS_MODULE
1825 static const struct block_device_operations zram_wb_devops
= {
1827 .submit_bio
= zram_submit_bio
,
1828 .swap_slot_free_notify
= zram_slot_free_notify
,
1829 .owner
= THIS_MODULE
1832 static DEVICE_ATTR_WO(compact
);
1833 static DEVICE_ATTR_RW(disksize
);
1834 static DEVICE_ATTR_RO(initstate
);
1835 static DEVICE_ATTR_WO(reset
);
1836 static DEVICE_ATTR_WO(mem_limit
);
1837 static DEVICE_ATTR_WO(mem_used_max
);
1838 static DEVICE_ATTR_WO(idle
);
1839 static DEVICE_ATTR_RW(max_comp_streams
);
1840 static DEVICE_ATTR_RW(comp_algorithm
);
1841 #ifdef CONFIG_ZRAM_WRITEBACK
1842 static DEVICE_ATTR_RW(backing_dev
);
1843 static DEVICE_ATTR_WO(writeback
);
1844 static DEVICE_ATTR_RW(writeback_limit
);
1845 static DEVICE_ATTR_RW(writeback_limit_enable
);
1848 static struct attribute
*zram_disk_attrs
[] = {
1849 &dev_attr_disksize
.attr
,
1850 &dev_attr_initstate
.attr
,
1851 &dev_attr_reset
.attr
,
1852 &dev_attr_compact
.attr
,
1853 &dev_attr_mem_limit
.attr
,
1854 &dev_attr_mem_used_max
.attr
,
1855 &dev_attr_idle
.attr
,
1856 &dev_attr_max_comp_streams
.attr
,
1857 &dev_attr_comp_algorithm
.attr
,
1858 #ifdef CONFIG_ZRAM_WRITEBACK
1859 &dev_attr_backing_dev
.attr
,
1860 &dev_attr_writeback
.attr
,
1861 &dev_attr_writeback_limit
.attr
,
1862 &dev_attr_writeback_limit_enable
.attr
,
1864 &dev_attr_io_stat
.attr
,
1865 &dev_attr_mm_stat
.attr
,
1866 #ifdef CONFIG_ZRAM_WRITEBACK
1867 &dev_attr_bd_stat
.attr
,
1869 &dev_attr_debug_stat
.attr
,
1873 static const struct attribute_group zram_disk_attr_group
= {
1874 .attrs
= zram_disk_attrs
,
1877 static const struct attribute_group
*zram_disk_attr_groups
[] = {
1878 &zram_disk_attr_group
,
1883 * Allocate and initialize new zram device. the function returns
1884 * '>= 0' device_id upon success, and negative value otherwise.
1886 static int zram_add(void)
1889 struct request_queue
*queue
;
1892 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
1896 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
1901 init_rwsem(&zram
->init_lock
);
1902 #ifdef CONFIG_ZRAM_WRITEBACK
1903 spin_lock_init(&zram
->wb_limit_lock
);
1905 queue
= blk_alloc_queue(NUMA_NO_NODE
);
1907 pr_err("Error allocating disk queue for device %d\n",
1913 /* gendisk structure */
1914 zram
->disk
= alloc_disk(1);
1916 pr_err("Error allocating disk structure for device %d\n",
1919 goto out_free_queue
;
1922 zram
->disk
->major
= zram_major
;
1923 zram
->disk
->first_minor
= device_id
;
1924 zram
->disk
->fops
= &zram_devops
;
1925 zram
->disk
->queue
= queue
;
1926 zram
->disk
->private_data
= zram
;
1927 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
1929 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1930 set_capacity(zram
->disk
, 0);
1931 /* zram devices sort of resembles non-rotational disks */
1932 blk_queue_flag_set(QUEUE_FLAG_NONROT
, zram
->disk
->queue
);
1933 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM
, zram
->disk
->queue
);
1936 * To ensure that we always get PAGE_SIZE aligned
1937 * and n*PAGE_SIZED sized I/O requests.
1939 blk_queue_physical_block_size(zram
->disk
->queue
, PAGE_SIZE
);
1940 blk_queue_logical_block_size(zram
->disk
->queue
,
1941 ZRAM_LOGICAL_BLOCK_SIZE
);
1942 blk_queue_io_min(zram
->disk
->queue
, PAGE_SIZE
);
1943 blk_queue_io_opt(zram
->disk
->queue
, PAGE_SIZE
);
1944 zram
->disk
->queue
->limits
.discard_granularity
= PAGE_SIZE
;
1945 blk_queue_max_discard_sectors(zram
->disk
->queue
, UINT_MAX
);
1946 blk_queue_flag_set(QUEUE_FLAG_DISCARD
, zram
->disk
->queue
);
1949 * zram_bio_discard() will clear all logical blocks if logical block
1950 * size is identical with physical block size(PAGE_SIZE). But if it is
1951 * different, we will skip discarding some parts of logical blocks in
1952 * the part of the request range which isn't aligned to physical block
1953 * size. So we can't ensure that all discarded logical blocks are
1956 if (ZRAM_LOGICAL_BLOCK_SIZE
== PAGE_SIZE
)
1957 blk_queue_max_write_zeroes_sectors(zram
->disk
->queue
, UINT_MAX
);
1959 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES
, zram
->disk
->queue
);
1960 device_add_disk(NULL
, zram
->disk
, zram_disk_attr_groups
);
1962 strlcpy(zram
->compressor
, default_compressor
, sizeof(zram
->compressor
));
1964 zram_debugfs_register(zram
);
1965 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
1969 blk_cleanup_queue(queue
);
1971 idr_remove(&zram_index_idr
, device_id
);
1977 static int zram_remove(struct zram
*zram
)
1979 struct block_device
*bdev
= zram
->disk
->part0
;
1981 mutex_lock(&bdev
->bd_mutex
);
1982 if (bdev
->bd_openers
|| zram
->claim
) {
1983 mutex_unlock(&bdev
->bd_mutex
);
1988 mutex_unlock(&bdev
->bd_mutex
);
1990 zram_debugfs_unregister(zram
);
1992 /* Make sure all the pending I/O are finished */
1994 zram_reset_device(zram
);
1996 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
1998 del_gendisk(zram
->disk
);
1999 blk_cleanup_queue(zram
->disk
->queue
);
2000 put_disk(zram
->disk
);
2005 /* zram-control sysfs attributes */
2008 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2009 * sense that reading from this file does alter the state of your system -- it
2010 * creates a new un-initialized zram device and returns back this device's
2011 * device_id (or an error code if it fails to create a new device).
2013 static ssize_t
hot_add_show(struct class *class,
2014 struct class_attribute
*attr
,
2019 mutex_lock(&zram_index_mutex
);
2021 mutex_unlock(&zram_index_mutex
);
2025 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
2027 static struct class_attribute class_attr_hot_add
=
2028 __ATTR(hot_add
, 0400, hot_add_show
, NULL
);
2030 static ssize_t
hot_remove_store(struct class *class,
2031 struct class_attribute
*attr
,
2038 /* dev_id is gendisk->first_minor, which is `int' */
2039 ret
= kstrtoint(buf
, 10, &dev_id
);
2045 mutex_lock(&zram_index_mutex
);
2047 zram
= idr_find(&zram_index_idr
, dev_id
);
2049 ret
= zram_remove(zram
);
2051 idr_remove(&zram_index_idr
, dev_id
);
2056 mutex_unlock(&zram_index_mutex
);
2057 return ret
? ret
: count
;
2059 static CLASS_ATTR_WO(hot_remove
);
2061 static struct attribute
*zram_control_class_attrs
[] = {
2062 &class_attr_hot_add
.attr
,
2063 &class_attr_hot_remove
.attr
,
2066 ATTRIBUTE_GROUPS(zram_control_class
);
2068 static struct class zram_control_class
= {
2069 .name
= "zram-control",
2070 .owner
= THIS_MODULE
,
2071 .class_groups
= zram_control_class_groups
,
2074 static int zram_remove_cb(int id
, void *ptr
, void *data
)
2080 static void destroy_devices(void)
2082 class_unregister(&zram_control_class
);
2083 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
2084 zram_debugfs_destroy();
2085 idr_destroy(&zram_index_idr
);
2086 unregister_blkdev(zram_major
, "zram");
2087 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2090 static int __init
zram_init(void)
2094 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
2095 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
2099 ret
= class_register(&zram_control_class
);
2101 pr_err("Unable to register zram-control class\n");
2102 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2106 zram_debugfs_create();
2107 zram_major
= register_blkdev(0, "zram");
2108 if (zram_major
<= 0) {
2109 pr_err("Unable to get major number\n");
2110 class_unregister(&zram_control_class
);
2111 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2115 while (num_devices
!= 0) {
2116 mutex_lock(&zram_index_mutex
);
2118 mutex_unlock(&zram_index_mutex
);
2131 static void __exit
zram_exit(void)
2136 module_init(zram_init
);
2137 module_exit(zram_exit
);
2139 module_param(num_devices
, uint
, 0);
2140 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
2142 MODULE_LICENSE("Dual BSD/GPL");
2143 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2144 MODULE_DESCRIPTION("Compressed RAM Block Device");