2 * Compressed RAM block device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
5 * 2012, 2013 Minchan Kim
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/backing-dev.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32 #include <linux/sysfs.h>
33 #include <linux/debugfs.h>
34 #include <linux/cpuhotplug.h>
35 #include <linux/part_stat.h>
36 #include <linux/kernel_read_file.h>
40 static DEFINE_IDR(zram_index_idr
);
41 /* idr index must be protected */
42 static DEFINE_MUTEX(zram_index_mutex
);
44 static int zram_major
;
45 static const char *default_compressor
= CONFIG_ZRAM_DEF_COMP
;
47 /* Module params (documentation at end) */
48 static unsigned int num_devices
= 1;
50 * Pages that compress to sizes equals or greater than this are stored
51 * uncompressed in memory.
53 static size_t huge_class_size
;
55 static const struct block_device_operations zram_devops
;
57 static void zram_free_page(struct zram
*zram
, size_t index
);
58 static int zram_read_page(struct zram
*zram
, struct page
*page
, u32 index
,
61 static int zram_slot_trylock(struct zram
*zram
, u32 index
)
63 return spin_trylock(&zram
->table
[index
].lock
);
66 static void zram_slot_lock(struct zram
*zram
, u32 index
)
68 spin_lock(&zram
->table
[index
].lock
);
71 static void zram_slot_unlock(struct zram
*zram
, u32 index
)
73 spin_unlock(&zram
->table
[index
].lock
);
76 static inline bool init_done(struct zram
*zram
)
78 return zram
->disksize
;
81 static inline struct zram
*dev_to_zram(struct device
*dev
)
83 return (struct zram
*)dev_to_disk(dev
)->private_data
;
86 static unsigned long zram_get_handle(struct zram
*zram
, u32 index
)
88 return zram
->table
[index
].handle
;
91 static void zram_set_handle(struct zram
*zram
, u32 index
, unsigned long handle
)
93 zram
->table
[index
].handle
= handle
;
96 /* flag operations require table entry bit_spin_lock() being held */
97 static bool zram_test_flag(struct zram
*zram
, u32 index
,
98 enum zram_pageflags flag
)
100 return zram
->table
[index
].flags
& BIT(flag
);
103 static void zram_set_flag(struct zram
*zram
, u32 index
,
104 enum zram_pageflags flag
)
106 zram
->table
[index
].flags
|= BIT(flag
);
109 static void zram_clear_flag(struct zram
*zram
, u32 index
,
110 enum zram_pageflags flag
)
112 zram
->table
[index
].flags
&= ~BIT(flag
);
115 static inline void zram_set_element(struct zram
*zram
, u32 index
,
116 unsigned long element
)
118 zram
->table
[index
].element
= element
;
121 static unsigned long zram_get_element(struct zram
*zram
, u32 index
)
123 return zram
->table
[index
].element
;
126 static size_t zram_get_obj_size(struct zram
*zram
, u32 index
)
128 return zram
->table
[index
].flags
& (BIT(ZRAM_FLAG_SHIFT
) - 1);
131 static void zram_set_obj_size(struct zram
*zram
,
132 u32 index
, size_t size
)
134 unsigned long flags
= zram
->table
[index
].flags
>> ZRAM_FLAG_SHIFT
;
136 zram
->table
[index
].flags
= (flags
<< ZRAM_FLAG_SHIFT
) | size
;
139 static inline bool zram_allocated(struct zram
*zram
, u32 index
)
141 return zram_get_obj_size(zram
, index
) ||
142 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
143 zram_test_flag(zram
, index
, ZRAM_WB
);
146 #if PAGE_SIZE != 4096
147 static inline bool is_partial_io(struct bio_vec
*bvec
)
149 return bvec
->bv_len
!= PAGE_SIZE
;
151 #define ZRAM_PARTIAL_IO 1
153 static inline bool is_partial_io(struct bio_vec
*bvec
)
159 static inline void zram_set_priority(struct zram
*zram
, u32 index
, u32 prio
)
161 prio
&= ZRAM_COMP_PRIORITY_MASK
;
163 * Clear previous priority value first, in case if we recompress
164 * further an already recompressed page
166 zram
->table
[index
].flags
&= ~(ZRAM_COMP_PRIORITY_MASK
<<
167 ZRAM_COMP_PRIORITY_BIT1
);
168 zram
->table
[index
].flags
|= (prio
<< ZRAM_COMP_PRIORITY_BIT1
);
171 static inline u32
zram_get_priority(struct zram
*zram
, u32 index
)
173 u32 prio
= zram
->table
[index
].flags
>> ZRAM_COMP_PRIORITY_BIT1
;
175 return prio
& ZRAM_COMP_PRIORITY_MASK
;
178 static void zram_accessed(struct zram
*zram
, u32 index
)
180 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
181 zram_clear_flag(zram
, index
, ZRAM_PP_SLOT
);
182 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
183 zram
->table
[index
].ac_time
= ktime_get_boottime();
187 #if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
188 struct zram_pp_slot
{
190 struct list_head entry
;
194 * A post-processing bucket is, essentially, a size class, this defines
195 * the range (in bytes) of pp-slots sizes in particular bucket.
197 #define PP_BUCKET_SIZE_RANGE 64
198 #define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
201 struct list_head pp_buckets
[NUM_PP_BUCKETS
];
204 static struct zram_pp_ctl
*init_pp_ctl(void)
206 struct zram_pp_ctl
*ctl
;
209 ctl
= kmalloc(sizeof(*ctl
), GFP_KERNEL
);
213 for (idx
= 0; idx
< NUM_PP_BUCKETS
; idx
++)
214 INIT_LIST_HEAD(&ctl
->pp_buckets
[idx
]);
218 static void release_pp_slot(struct zram
*zram
, struct zram_pp_slot
*pps
)
220 list_del_init(&pps
->entry
);
222 zram_slot_lock(zram
, pps
->index
);
223 zram_clear_flag(zram
, pps
->index
, ZRAM_PP_SLOT
);
224 zram_slot_unlock(zram
, pps
->index
);
229 static void release_pp_ctl(struct zram
*zram
, struct zram_pp_ctl
*ctl
)
236 for (idx
= 0; idx
< NUM_PP_BUCKETS
; idx
++) {
237 while (!list_empty(&ctl
->pp_buckets
[idx
])) {
238 struct zram_pp_slot
*pps
;
240 pps
= list_first_entry(&ctl
->pp_buckets
[idx
],
243 release_pp_slot(zram
, pps
);
250 static void place_pp_slot(struct zram
*zram
, struct zram_pp_ctl
*ctl
,
251 struct zram_pp_slot
*pps
)
255 idx
= zram_get_obj_size(zram
, pps
->index
) / PP_BUCKET_SIZE_RANGE
;
256 list_add(&pps
->entry
, &ctl
->pp_buckets
[idx
]);
258 zram_set_flag(zram
, pps
->index
, ZRAM_PP_SLOT
);
261 static struct zram_pp_slot
*select_pp_slot(struct zram_pp_ctl
*ctl
)
263 struct zram_pp_slot
*pps
= NULL
;
264 s32 idx
= NUM_PP_BUCKETS
- 1;
266 /* The higher the bucket id the more optimal slot post-processing is */
268 pps
= list_first_entry_or_null(&ctl
->pp_buckets
[idx
],
280 static inline void update_used_max(struct zram
*zram
,
281 const unsigned long pages
)
283 unsigned long cur_max
= atomic_long_read(&zram
->stats
.max_used_pages
);
286 if (cur_max
>= pages
)
288 } while (!atomic_long_try_cmpxchg(&zram
->stats
.max_used_pages
,
292 static inline void zram_fill_page(void *ptr
, unsigned long len
,
295 WARN_ON_ONCE(!IS_ALIGNED(len
, sizeof(unsigned long)));
296 memset_l(ptr
, value
, len
/ sizeof(unsigned long));
299 static bool page_same_filled(void *ptr
, unsigned long *element
)
303 unsigned int pos
, last_pos
= PAGE_SIZE
/ sizeof(*page
) - 1;
305 page
= (unsigned long *)ptr
;
308 if (val
!= page
[last_pos
])
311 for (pos
= 1; pos
< last_pos
; pos
++) {
312 if (val
!= page
[pos
])
321 static ssize_t
initstate_show(struct device
*dev
,
322 struct device_attribute
*attr
, char *buf
)
325 struct zram
*zram
= dev_to_zram(dev
);
327 down_read(&zram
->init_lock
);
328 val
= init_done(zram
);
329 up_read(&zram
->init_lock
);
331 return scnprintf(buf
, PAGE_SIZE
, "%u\n", val
);
334 static ssize_t
disksize_show(struct device
*dev
,
335 struct device_attribute
*attr
, char *buf
)
337 struct zram
*zram
= dev_to_zram(dev
);
339 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", zram
->disksize
);
342 static ssize_t
mem_limit_store(struct device
*dev
,
343 struct device_attribute
*attr
, const char *buf
, size_t len
)
347 struct zram
*zram
= dev_to_zram(dev
);
349 limit
= memparse(buf
, &tmp
);
350 if (buf
== tmp
) /* no chars parsed, invalid input */
353 down_write(&zram
->init_lock
);
354 zram
->limit_pages
= PAGE_ALIGN(limit
) >> PAGE_SHIFT
;
355 up_write(&zram
->init_lock
);
360 static ssize_t
mem_used_max_store(struct device
*dev
,
361 struct device_attribute
*attr
, const char *buf
, size_t len
)
365 struct zram
*zram
= dev_to_zram(dev
);
367 err
= kstrtoul(buf
, 10, &val
);
371 down_read(&zram
->init_lock
);
372 if (init_done(zram
)) {
373 atomic_long_set(&zram
->stats
.max_used_pages
,
374 zs_get_total_pages(zram
->mem_pool
));
376 up_read(&zram
->init_lock
);
382 * Mark all pages which are older than or equal to cutoff as IDLE.
383 * Callers should hold the zram init lock in read mode
385 static void mark_idle(struct zram
*zram
, ktime_t cutoff
)
388 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
391 for (index
= 0; index
< nr_pages
; index
++) {
393 * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
394 * post-processing (recompress, writeback) happens to the
397 * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
399 zram_slot_lock(zram
, index
);
400 if (!zram_allocated(zram
, index
) ||
401 zram_test_flag(zram
, index
, ZRAM_WB
) ||
402 zram_test_flag(zram
, index
, ZRAM_SAME
)) {
403 zram_slot_unlock(zram
, index
);
407 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
409 ktime_after(cutoff
, zram
->table
[index
].ac_time
);
412 zram_set_flag(zram
, index
, ZRAM_IDLE
);
414 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
415 zram_slot_unlock(zram
, index
);
419 static ssize_t
idle_store(struct device
*dev
,
420 struct device_attribute
*attr
, const char *buf
, size_t len
)
422 struct zram
*zram
= dev_to_zram(dev
);
423 ktime_t cutoff_time
= 0;
424 ssize_t rv
= -EINVAL
;
426 if (!sysfs_streq(buf
, "all")) {
428 * If it did not parse as 'all' try to treat it as an integer
429 * when we have memory tracking enabled.
433 if (IS_ENABLED(CONFIG_ZRAM_TRACK_ENTRY_ACTIME
) && !kstrtoull(buf
, 0, &age_sec
))
434 cutoff_time
= ktime_sub(ktime_get_boottime(),
435 ns_to_ktime(age_sec
* NSEC_PER_SEC
));
440 down_read(&zram
->init_lock
);
441 if (!init_done(zram
))
445 * A cutoff_time of 0 marks everything as idle, this is the
448 mark_idle(zram
, cutoff_time
);
452 up_read(&zram
->init_lock
);
457 #ifdef CONFIG_ZRAM_WRITEBACK
458 static ssize_t
writeback_limit_enable_store(struct device
*dev
,
459 struct device_attribute
*attr
, const char *buf
, size_t len
)
461 struct zram
*zram
= dev_to_zram(dev
);
463 ssize_t ret
= -EINVAL
;
465 if (kstrtoull(buf
, 10, &val
))
468 down_read(&zram
->init_lock
);
469 spin_lock(&zram
->wb_limit_lock
);
470 zram
->wb_limit_enable
= val
;
471 spin_unlock(&zram
->wb_limit_lock
);
472 up_read(&zram
->init_lock
);
478 static ssize_t
writeback_limit_enable_show(struct device
*dev
,
479 struct device_attribute
*attr
, char *buf
)
482 struct zram
*zram
= dev_to_zram(dev
);
484 down_read(&zram
->init_lock
);
485 spin_lock(&zram
->wb_limit_lock
);
486 val
= zram
->wb_limit_enable
;
487 spin_unlock(&zram
->wb_limit_lock
);
488 up_read(&zram
->init_lock
);
490 return scnprintf(buf
, PAGE_SIZE
, "%d\n", val
);
493 static ssize_t
writeback_limit_store(struct device
*dev
,
494 struct device_attribute
*attr
, const char *buf
, size_t len
)
496 struct zram
*zram
= dev_to_zram(dev
);
498 ssize_t ret
= -EINVAL
;
500 if (kstrtoull(buf
, 10, &val
))
503 down_read(&zram
->init_lock
);
504 spin_lock(&zram
->wb_limit_lock
);
505 zram
->bd_wb_limit
= val
;
506 spin_unlock(&zram
->wb_limit_lock
);
507 up_read(&zram
->init_lock
);
513 static ssize_t
writeback_limit_show(struct device
*dev
,
514 struct device_attribute
*attr
, char *buf
)
517 struct zram
*zram
= dev_to_zram(dev
);
519 down_read(&zram
->init_lock
);
520 spin_lock(&zram
->wb_limit_lock
);
521 val
= zram
->bd_wb_limit
;
522 spin_unlock(&zram
->wb_limit_lock
);
523 up_read(&zram
->init_lock
);
525 return scnprintf(buf
, PAGE_SIZE
, "%llu\n", val
);
528 static void reset_bdev(struct zram
*zram
)
530 if (!zram
->backing_dev
)
533 /* hope filp_close flush all of IO */
534 filp_close(zram
->backing_dev
, NULL
);
535 zram
->backing_dev
= NULL
;
537 zram
->disk
->fops
= &zram_devops
;
538 kvfree(zram
->bitmap
);
542 static ssize_t
backing_dev_show(struct device
*dev
,
543 struct device_attribute
*attr
, char *buf
)
546 struct zram
*zram
= dev_to_zram(dev
);
550 down_read(&zram
->init_lock
);
551 file
= zram
->backing_dev
;
553 memcpy(buf
, "none\n", 5);
554 up_read(&zram
->init_lock
);
558 p
= file_path(file
, buf
, PAGE_SIZE
- 1);
565 memmove(buf
, p
, ret
);
568 up_read(&zram
->init_lock
);
572 static ssize_t
backing_dev_store(struct device
*dev
,
573 struct device_attribute
*attr
, const char *buf
, size_t len
)
577 struct file
*backing_dev
= NULL
;
579 unsigned int bitmap_sz
;
580 unsigned long nr_pages
, *bitmap
= NULL
;
582 struct zram
*zram
= dev_to_zram(dev
);
584 file_name
= kmalloc(PATH_MAX
, GFP_KERNEL
);
588 down_write(&zram
->init_lock
);
589 if (init_done(zram
)) {
590 pr_info("Can't setup backing device for initialized device\n");
595 strscpy(file_name
, buf
, PATH_MAX
);
596 /* ignore trailing newline */
597 sz
= strlen(file_name
);
598 if (sz
> 0 && file_name
[sz
- 1] == '\n')
599 file_name
[sz
- 1] = 0x00;
601 backing_dev
= filp_open(file_name
, O_RDWR
| O_LARGEFILE
| O_EXCL
, 0);
602 if (IS_ERR(backing_dev
)) {
603 err
= PTR_ERR(backing_dev
);
608 inode
= backing_dev
->f_mapping
->host
;
610 /* Support only block device in this moment */
611 if (!S_ISBLK(inode
->i_mode
)) {
616 nr_pages
= i_size_read(inode
) >> PAGE_SHIFT
;
617 bitmap_sz
= BITS_TO_LONGS(nr_pages
) * sizeof(long);
618 bitmap
= kvzalloc(bitmap_sz
, GFP_KERNEL
);
626 zram
->bdev
= I_BDEV(inode
);
627 zram
->backing_dev
= backing_dev
;
628 zram
->bitmap
= bitmap
;
629 zram
->nr_pages
= nr_pages
;
630 up_write(&zram
->init_lock
);
632 pr_info("setup backing device %s\n", file_name
);
640 filp_close(backing_dev
, NULL
);
642 up_write(&zram
->init_lock
);
649 static unsigned long alloc_block_bdev(struct zram
*zram
)
651 unsigned long blk_idx
= 1;
653 /* skip 0 bit to confuse zram.handle = 0 */
654 blk_idx
= find_next_zero_bit(zram
->bitmap
, zram
->nr_pages
, blk_idx
);
655 if (blk_idx
== zram
->nr_pages
)
658 if (test_and_set_bit(blk_idx
, zram
->bitmap
))
661 atomic64_inc(&zram
->stats
.bd_count
);
665 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
)
669 was_set
= test_and_clear_bit(blk_idx
, zram
->bitmap
);
670 WARN_ON_ONCE(!was_set
);
671 atomic64_dec(&zram
->stats
.bd_count
);
674 static void read_from_bdev_async(struct zram
*zram
, struct page
*page
,
675 unsigned long entry
, struct bio
*parent
)
679 bio
= bio_alloc(zram
->bdev
, 1, parent
->bi_opf
, GFP_NOIO
);
680 bio
->bi_iter
.bi_sector
= entry
* (PAGE_SIZE
>> 9);
681 __bio_add_page(bio
, page
, PAGE_SIZE
, 0);
682 bio_chain(bio
, parent
);
686 #define PAGE_WB_SIG "page_index="
688 #define PAGE_WRITEBACK 0
689 #define HUGE_WRITEBACK (1<<0)
690 #define IDLE_WRITEBACK (1<<1)
691 #define INCOMPRESSIBLE_WRITEBACK (1<<2)
693 static int scan_slots_for_writeback(struct zram
*zram
, u32 mode
,
694 unsigned long nr_pages
,
696 struct zram_pp_ctl
*ctl
)
698 struct zram_pp_slot
*pps
= NULL
;
700 for (; nr_pages
!= 0; index
++, nr_pages
--) {
702 pps
= kmalloc(sizeof(*pps
), GFP_KERNEL
);
706 INIT_LIST_HEAD(&pps
->entry
);
708 zram_slot_lock(zram
, index
);
709 if (!zram_allocated(zram
, index
))
712 if (zram_test_flag(zram
, index
, ZRAM_WB
) ||
713 zram_test_flag(zram
, index
, ZRAM_SAME
))
716 if (mode
& IDLE_WRITEBACK
&&
717 !zram_test_flag(zram
, index
, ZRAM_IDLE
))
719 if (mode
& HUGE_WRITEBACK
&&
720 !zram_test_flag(zram
, index
, ZRAM_HUGE
))
722 if (mode
& INCOMPRESSIBLE_WRITEBACK
&&
723 !zram_test_flag(zram
, index
, ZRAM_INCOMPRESSIBLE
))
727 place_pp_slot(zram
, ctl
, pps
);
730 zram_slot_unlock(zram
, index
);
737 static ssize_t
writeback_store(struct device
*dev
,
738 struct device_attribute
*attr
, const char *buf
, size_t len
)
740 struct zram
*zram
= dev_to_zram(dev
);
741 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
742 struct zram_pp_ctl
*ctl
= NULL
;
743 struct zram_pp_slot
*pps
;
744 unsigned long index
= 0;
746 struct bio_vec bio_vec
;
750 unsigned long blk_idx
= 0;
752 if (sysfs_streq(buf
, "idle"))
753 mode
= IDLE_WRITEBACK
;
754 else if (sysfs_streq(buf
, "huge"))
755 mode
= HUGE_WRITEBACK
;
756 else if (sysfs_streq(buf
, "huge_idle"))
757 mode
= IDLE_WRITEBACK
| HUGE_WRITEBACK
;
758 else if (sysfs_streq(buf
, "incompressible"))
759 mode
= INCOMPRESSIBLE_WRITEBACK
;
761 if (strncmp(buf
, PAGE_WB_SIG
, sizeof(PAGE_WB_SIG
) - 1))
764 if (kstrtol(buf
+ sizeof(PAGE_WB_SIG
) - 1, 10, &index
) ||
769 mode
= PAGE_WRITEBACK
;
772 down_read(&zram
->init_lock
);
773 if (!init_done(zram
)) {
775 goto release_init_lock
;
778 /* Do not permit concurrent post-processing actions. */
779 if (atomic_xchg(&zram
->pp_in_progress
, 1)) {
780 up_read(&zram
->init_lock
);
784 if (!zram
->backing_dev
) {
786 goto release_init_lock
;
789 page
= alloc_page(GFP_KERNEL
);
792 goto release_init_lock
;
798 goto release_init_lock
;
801 scan_slots_for_writeback(zram
, mode
, nr_pages
, index
, ctl
);
803 while ((pps
= select_pp_slot(ctl
))) {
804 spin_lock(&zram
->wb_limit_lock
);
805 if (zram
->wb_limit_enable
&& !zram
->bd_wb_limit
) {
806 spin_unlock(&zram
->wb_limit_lock
);
810 spin_unlock(&zram
->wb_limit_lock
);
813 blk_idx
= alloc_block_bdev(zram
);
821 zram_slot_lock(zram
, index
);
823 * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so
824 * slots can change in the meantime. If slots are accessed or
825 * freed they lose ZRAM_PP_SLOT flag and hence we don't
828 if (!zram_test_flag(zram
, index
, ZRAM_PP_SLOT
))
830 zram_slot_unlock(zram
, index
);
832 if (zram_read_page(zram
, page
, index
, NULL
)) {
833 release_pp_slot(zram
, pps
);
837 bio_init(&bio
, zram
->bdev
, &bio_vec
, 1,
838 REQ_OP_WRITE
| REQ_SYNC
);
839 bio
.bi_iter
.bi_sector
= blk_idx
* (PAGE_SIZE
>> 9);
840 __bio_add_page(&bio
, page
, PAGE_SIZE
, 0);
843 * XXX: A single page IO would be inefficient for write
844 * but it would be not bad as starter.
846 err
= submit_bio_wait(&bio
);
848 release_pp_slot(zram
, pps
);
850 * BIO errors are not fatal, we continue and simply
851 * attempt to writeback the remaining objects (pages).
852 * At the same time we need to signal user-space that
853 * some writes (at least one, but also could be all of
854 * them) were not successful and we do so by returning
855 * the most recent BIO error.
861 atomic64_inc(&zram
->stats
.bd_writes
);
862 zram_slot_lock(zram
, index
);
864 * Same as above, we release slot lock during writeback so
865 * slot can change under us: slot_free() or slot_free() and
866 * reallocation (zram_write_page()). In both cases slot loses
867 * ZRAM_PP_SLOT flag. No concurrent post-processing can set
868 * ZRAM_PP_SLOT on such slots until current post-processing
871 if (!zram_test_flag(zram
, index
, ZRAM_PP_SLOT
))
874 zram_free_page(zram
, index
);
875 zram_set_flag(zram
, index
, ZRAM_WB
);
876 zram_set_element(zram
, index
, blk_idx
);
878 atomic64_inc(&zram
->stats
.pages_stored
);
879 spin_lock(&zram
->wb_limit_lock
);
880 if (zram
->wb_limit_enable
&& zram
->bd_wb_limit
> 0)
881 zram
->bd_wb_limit
-= 1UL << (PAGE_SHIFT
- 12);
882 spin_unlock(&zram
->wb_limit_lock
);
884 zram_slot_unlock(zram
, index
);
885 release_pp_slot(zram
, pps
);
889 free_block_bdev(zram
, blk_idx
);
892 release_pp_ctl(zram
, ctl
);
893 atomic_set(&zram
->pp_in_progress
, 0);
894 up_read(&zram
->init_lock
);
900 struct work_struct work
;
907 static void zram_sync_read(struct work_struct
*work
)
909 struct zram_work
*zw
= container_of(work
, struct zram_work
, work
);
913 bio_init(&bio
, zw
->zram
->bdev
, &bv
, 1, REQ_OP_READ
);
914 bio
.bi_iter
.bi_sector
= zw
->entry
* (PAGE_SIZE
>> 9);
915 __bio_add_page(&bio
, zw
->page
, PAGE_SIZE
, 0);
916 zw
->error
= submit_bio_wait(&bio
);
920 * Block layer want one ->submit_bio to be active at a time, so if we use
921 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
922 * use a worker thread context.
924 static int read_from_bdev_sync(struct zram
*zram
, struct page
*page
,
927 struct zram_work work
;
933 INIT_WORK_ONSTACK(&work
.work
, zram_sync_read
);
934 queue_work(system_unbound_wq
, &work
.work
);
935 flush_work(&work
.work
);
936 destroy_work_on_stack(&work
.work
);
941 static int read_from_bdev(struct zram
*zram
, struct page
*page
,
942 unsigned long entry
, struct bio
*parent
)
944 atomic64_inc(&zram
->stats
.bd_reads
);
946 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO
)))
948 return read_from_bdev_sync(zram
, page
, entry
);
950 read_from_bdev_async(zram
, page
, entry
, parent
);
954 static inline void reset_bdev(struct zram
*zram
) {};
955 static int read_from_bdev(struct zram
*zram
, struct page
*page
,
956 unsigned long entry
, struct bio
*parent
)
961 static void free_block_bdev(struct zram
*zram
, unsigned long blk_idx
) {};
964 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
966 static struct dentry
*zram_debugfs_root
;
968 static void zram_debugfs_create(void)
970 zram_debugfs_root
= debugfs_create_dir("zram", NULL
);
973 static void zram_debugfs_destroy(void)
975 debugfs_remove_recursive(zram_debugfs_root
);
978 static ssize_t
read_block_state(struct file
*file
, char __user
*buf
,
979 size_t count
, loff_t
*ppos
)
982 ssize_t index
, written
= 0;
983 struct zram
*zram
= file
->private_data
;
984 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
985 struct timespec64 ts
;
987 kbuf
= kvmalloc(count
, GFP_KERNEL
);
991 down_read(&zram
->init_lock
);
992 if (!init_done(zram
)) {
993 up_read(&zram
->init_lock
);
998 for (index
= *ppos
; index
< nr_pages
; index
++) {
1001 zram_slot_lock(zram
, index
);
1002 if (!zram_allocated(zram
, index
))
1005 ts
= ktime_to_timespec64(zram
->table
[index
].ac_time
);
1006 copied
= snprintf(kbuf
+ written
, count
,
1007 "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
1008 index
, (s64
)ts
.tv_sec
,
1009 ts
.tv_nsec
/ NSEC_PER_USEC
,
1010 zram_test_flag(zram
, index
, ZRAM_SAME
) ? 's' : '.',
1011 zram_test_flag(zram
, index
, ZRAM_WB
) ? 'w' : '.',
1012 zram_test_flag(zram
, index
, ZRAM_HUGE
) ? 'h' : '.',
1013 zram_test_flag(zram
, index
, ZRAM_IDLE
) ? 'i' : '.',
1014 zram_get_priority(zram
, index
) ? 'r' : '.',
1015 zram_test_flag(zram
, index
,
1016 ZRAM_INCOMPRESSIBLE
) ? 'n' : '.');
1018 if (count
<= copied
) {
1019 zram_slot_unlock(zram
, index
);
1025 zram_slot_unlock(zram
, index
);
1029 up_read(&zram
->init_lock
);
1030 if (copy_to_user(buf
, kbuf
, written
))
1037 static const struct file_operations proc_zram_block_state_op
= {
1038 .open
= simple_open
,
1039 .read
= read_block_state
,
1040 .llseek
= default_llseek
,
1043 static void zram_debugfs_register(struct zram
*zram
)
1045 if (!zram_debugfs_root
)
1048 zram
->debugfs_dir
= debugfs_create_dir(zram
->disk
->disk_name
,
1050 debugfs_create_file("block_state", 0400, zram
->debugfs_dir
,
1051 zram
, &proc_zram_block_state_op
);
1054 static void zram_debugfs_unregister(struct zram
*zram
)
1056 debugfs_remove_recursive(zram
->debugfs_dir
);
1059 static void zram_debugfs_create(void) {};
1060 static void zram_debugfs_destroy(void) {};
1061 static void zram_debugfs_register(struct zram
*zram
) {};
1062 static void zram_debugfs_unregister(struct zram
*zram
) {};
1066 * We switched to per-cpu streams and this attr is not needed anymore.
1067 * However, we will keep it around for some time, because:
1068 * a) we may revert per-cpu streams in the future
1069 * b) it's visible to user space and we need to follow our 2 years
1070 * retirement rule; but we already have a number of 'soon to be
1071 * altered' attrs, so max_comp_streams need to wait for the next
1074 static ssize_t
max_comp_streams_show(struct device
*dev
,
1075 struct device_attribute
*attr
, char *buf
)
1077 return scnprintf(buf
, PAGE_SIZE
, "%d\n", num_online_cpus());
1080 static ssize_t
max_comp_streams_store(struct device
*dev
,
1081 struct device_attribute
*attr
, const char *buf
, size_t len
)
1086 static void comp_algorithm_set(struct zram
*zram
, u32 prio
, const char *alg
)
1088 /* Do not free statically defined compression algorithms */
1089 if (zram
->comp_algs
[prio
] != default_compressor
)
1090 kfree(zram
->comp_algs
[prio
]);
1092 zram
->comp_algs
[prio
] = alg
;
1095 static ssize_t
__comp_algorithm_show(struct zram
*zram
, u32 prio
, char *buf
)
1099 down_read(&zram
->init_lock
);
1100 sz
= zcomp_available_show(zram
->comp_algs
[prio
], buf
);
1101 up_read(&zram
->init_lock
);
1106 static int __comp_algorithm_store(struct zram
*zram
, u32 prio
, const char *buf
)
1112 if (sz
>= CRYPTO_MAX_ALG_NAME
)
1115 compressor
= kstrdup(buf
, GFP_KERNEL
);
1119 /* ignore trailing newline */
1120 if (sz
> 0 && compressor
[sz
- 1] == '\n')
1121 compressor
[sz
- 1] = 0x00;
1123 if (!zcomp_available_algorithm(compressor
)) {
1128 down_write(&zram
->init_lock
);
1129 if (init_done(zram
)) {
1130 up_write(&zram
->init_lock
);
1132 pr_info("Can't change algorithm for initialized device\n");
1136 comp_algorithm_set(zram
, prio
, compressor
);
1137 up_write(&zram
->init_lock
);
1141 static void comp_params_reset(struct zram
*zram
, u32 prio
)
1143 struct zcomp_params
*params
= &zram
->params
[prio
];
1145 vfree(params
->dict
);
1146 params
->level
= ZCOMP_PARAM_NO_LEVEL
;
1147 params
->dict_sz
= 0;
1148 params
->dict
= NULL
;
1151 static int comp_params_store(struct zram
*zram
, u32 prio
, s32 level
,
1152 const char *dict_path
)
1156 comp_params_reset(zram
, prio
);
1159 sz
= kernel_read_file_from_path(dict_path
, 0,
1160 &zram
->params
[prio
].dict
,
1168 zram
->params
[prio
].dict_sz
= sz
;
1169 zram
->params
[prio
].level
= level
;
1173 static ssize_t
algorithm_params_store(struct device
*dev
,
1174 struct device_attribute
*attr
,
1178 s32 prio
= ZRAM_PRIMARY_COMP
, level
= ZCOMP_PARAM_NO_LEVEL
;
1179 char *args
, *param
, *val
, *algo
= NULL
, *dict_path
= NULL
;
1180 struct zram
*zram
= dev_to_zram(dev
);
1183 args
= skip_spaces(buf
);
1185 args
= next_arg(args
, ¶m
, &val
);
1190 if (!strcmp(param
, "priority")) {
1191 ret
= kstrtoint(val
, 10, &prio
);
1197 if (!strcmp(param
, "level")) {
1198 ret
= kstrtoint(val
, 10, &level
);
1204 if (!strcmp(param
, "algo")) {
1209 if (!strcmp(param
, "dict")) {
1215 /* Lookup priority by algorithm name */
1220 for (p
= ZRAM_PRIMARY_COMP
; p
< ZRAM_MAX_COMPS
; p
++) {
1221 if (!zram
->comp_algs
[p
])
1224 if (!strcmp(zram
->comp_algs
[p
], algo
)) {
1231 if (prio
< ZRAM_PRIMARY_COMP
|| prio
>= ZRAM_MAX_COMPS
)
1234 ret
= comp_params_store(zram
, prio
, level
, dict_path
);
1235 return ret
? ret
: len
;
1238 static ssize_t
comp_algorithm_show(struct device
*dev
,
1239 struct device_attribute
*attr
,
1242 struct zram
*zram
= dev_to_zram(dev
);
1244 return __comp_algorithm_show(zram
, ZRAM_PRIMARY_COMP
, buf
);
1247 static ssize_t
comp_algorithm_store(struct device
*dev
,
1248 struct device_attribute
*attr
,
1252 struct zram
*zram
= dev_to_zram(dev
);
1255 ret
= __comp_algorithm_store(zram
, ZRAM_PRIMARY_COMP
, buf
);
1256 return ret
? ret
: len
;
1259 #ifdef CONFIG_ZRAM_MULTI_COMP
1260 static ssize_t
recomp_algorithm_show(struct device
*dev
,
1261 struct device_attribute
*attr
,
1264 struct zram
*zram
= dev_to_zram(dev
);
1268 for (prio
= ZRAM_SECONDARY_COMP
; prio
< ZRAM_MAX_COMPS
; prio
++) {
1269 if (!zram
->comp_algs
[prio
])
1272 sz
+= scnprintf(buf
+ sz
, PAGE_SIZE
- sz
- 2, "#%d: ", prio
);
1273 sz
+= __comp_algorithm_show(zram
, prio
, buf
+ sz
);
1279 static ssize_t
recomp_algorithm_store(struct device
*dev
,
1280 struct device_attribute
*attr
,
1284 struct zram
*zram
= dev_to_zram(dev
);
1285 int prio
= ZRAM_SECONDARY_COMP
;
1286 char *args
, *param
, *val
;
1290 args
= skip_spaces(buf
);
1292 args
= next_arg(args
, ¶m
, &val
);
1297 if (!strcmp(param
, "algo")) {
1302 if (!strcmp(param
, "priority")) {
1303 ret
= kstrtoint(val
, 10, &prio
);
1313 if (prio
< ZRAM_SECONDARY_COMP
|| prio
>= ZRAM_MAX_COMPS
)
1316 ret
= __comp_algorithm_store(zram
, prio
, alg
);
1317 return ret
? ret
: len
;
1321 static ssize_t
compact_store(struct device
*dev
,
1322 struct device_attribute
*attr
, const char *buf
, size_t len
)
1324 struct zram
*zram
= dev_to_zram(dev
);
1326 down_read(&zram
->init_lock
);
1327 if (!init_done(zram
)) {
1328 up_read(&zram
->init_lock
);
1332 zs_compact(zram
->mem_pool
);
1333 up_read(&zram
->init_lock
);
1338 static ssize_t
io_stat_show(struct device
*dev
,
1339 struct device_attribute
*attr
, char *buf
)
1341 struct zram
*zram
= dev_to_zram(dev
);
1344 down_read(&zram
->init_lock
);
1345 ret
= scnprintf(buf
, PAGE_SIZE
,
1346 "%8llu %8llu 0 %8llu\n",
1347 (u64
)atomic64_read(&zram
->stats
.failed_reads
),
1348 (u64
)atomic64_read(&zram
->stats
.failed_writes
),
1349 (u64
)atomic64_read(&zram
->stats
.notify_free
));
1350 up_read(&zram
->init_lock
);
1355 static ssize_t
mm_stat_show(struct device
*dev
,
1356 struct device_attribute
*attr
, char *buf
)
1358 struct zram
*zram
= dev_to_zram(dev
);
1359 struct zs_pool_stats pool_stats
;
1360 u64 orig_size
, mem_used
= 0;
1364 memset(&pool_stats
, 0x00, sizeof(struct zs_pool_stats
));
1366 down_read(&zram
->init_lock
);
1367 if (init_done(zram
)) {
1368 mem_used
= zs_get_total_pages(zram
->mem_pool
);
1369 zs_pool_stats(zram
->mem_pool
, &pool_stats
);
1372 orig_size
= atomic64_read(&zram
->stats
.pages_stored
);
1373 max_used
= atomic_long_read(&zram
->stats
.max_used_pages
);
1375 ret
= scnprintf(buf
, PAGE_SIZE
,
1376 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
1377 orig_size
<< PAGE_SHIFT
,
1378 (u64
)atomic64_read(&zram
->stats
.compr_data_size
),
1379 mem_used
<< PAGE_SHIFT
,
1380 zram
->limit_pages
<< PAGE_SHIFT
,
1381 max_used
<< PAGE_SHIFT
,
1382 (u64
)atomic64_read(&zram
->stats
.same_pages
),
1383 atomic_long_read(&pool_stats
.pages_compacted
),
1384 (u64
)atomic64_read(&zram
->stats
.huge_pages
),
1385 (u64
)atomic64_read(&zram
->stats
.huge_pages_since
));
1386 up_read(&zram
->init_lock
);
1391 #ifdef CONFIG_ZRAM_WRITEBACK
1392 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
1393 static ssize_t
bd_stat_show(struct device
*dev
,
1394 struct device_attribute
*attr
, char *buf
)
1396 struct zram
*zram
= dev_to_zram(dev
);
1399 down_read(&zram
->init_lock
);
1400 ret
= scnprintf(buf
, PAGE_SIZE
,
1401 "%8llu %8llu %8llu\n",
1402 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_count
)),
1403 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_reads
)),
1404 FOUR_K((u64
)atomic64_read(&zram
->stats
.bd_writes
)));
1405 up_read(&zram
->init_lock
);
1411 static ssize_t
debug_stat_show(struct device
*dev
,
1412 struct device_attribute
*attr
, char *buf
)
1415 struct zram
*zram
= dev_to_zram(dev
);
1418 down_read(&zram
->init_lock
);
1419 ret
= scnprintf(buf
, PAGE_SIZE
,
1420 "version: %d\n%8llu %8llu\n",
1422 (u64
)atomic64_read(&zram
->stats
.writestall
),
1423 (u64
)atomic64_read(&zram
->stats
.miss_free
));
1424 up_read(&zram
->init_lock
);
1429 static DEVICE_ATTR_RO(io_stat
);
1430 static DEVICE_ATTR_RO(mm_stat
);
1431 #ifdef CONFIG_ZRAM_WRITEBACK
1432 static DEVICE_ATTR_RO(bd_stat
);
1434 static DEVICE_ATTR_RO(debug_stat
);
1436 static void zram_meta_free(struct zram
*zram
, u64 disksize
)
1438 size_t num_pages
= disksize
>> PAGE_SHIFT
;
1441 /* Free all pages that are still in this zram device */
1442 for (index
= 0; index
< num_pages
; index
++)
1443 zram_free_page(zram
, index
);
1445 zs_destroy_pool(zram
->mem_pool
);
1449 static bool zram_meta_alloc(struct zram
*zram
, u64 disksize
)
1451 size_t num_pages
, index
;
1453 num_pages
= disksize
>> PAGE_SHIFT
;
1454 zram
->table
= vzalloc(array_size(num_pages
, sizeof(*zram
->table
)));
1458 zram
->mem_pool
= zs_create_pool(zram
->disk
->disk_name
);
1459 if (!zram
->mem_pool
) {
1464 if (!huge_class_size
)
1465 huge_class_size
= zs_huge_class_size(zram
->mem_pool
);
1467 for (index
= 0; index
< num_pages
; index
++)
1468 spin_lock_init(&zram
->table
[index
].lock
);
1473 * To protect concurrent access to the same index entry,
1474 * caller should hold this table index entry's bit_spinlock to
1475 * indicate this index entry is accessing.
1477 static void zram_free_page(struct zram
*zram
, size_t index
)
1479 unsigned long handle
;
1481 #ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
1482 zram
->table
[index
].ac_time
= 0;
1485 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
1486 zram_clear_flag(zram
, index
, ZRAM_INCOMPRESSIBLE
);
1487 zram_clear_flag(zram
, index
, ZRAM_PP_SLOT
);
1488 zram_set_priority(zram
, index
, 0);
1490 if (zram_test_flag(zram
, index
, ZRAM_HUGE
)) {
1491 zram_clear_flag(zram
, index
, ZRAM_HUGE
);
1492 atomic64_dec(&zram
->stats
.huge_pages
);
1495 if (zram_test_flag(zram
, index
, ZRAM_WB
)) {
1496 zram_clear_flag(zram
, index
, ZRAM_WB
);
1497 free_block_bdev(zram
, zram_get_element(zram
, index
));
1502 * No memory is allocated for same element filled pages.
1503 * Simply clear same page flag.
1505 if (zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1506 zram_clear_flag(zram
, index
, ZRAM_SAME
);
1507 atomic64_dec(&zram
->stats
.same_pages
);
1511 handle
= zram_get_handle(zram
, index
);
1515 zs_free(zram
->mem_pool
, handle
);
1517 atomic64_sub(zram_get_obj_size(zram
, index
),
1518 &zram
->stats
.compr_data_size
);
1520 atomic64_dec(&zram
->stats
.pages_stored
);
1521 zram_set_handle(zram
, index
, 0);
1522 zram_set_obj_size(zram
, index
, 0);
1526 * Reads (decompresses if needed) a page from zspool (zsmalloc).
1527 * Corresponding ZRAM slot should be locked.
1529 static int zram_read_from_zspool(struct zram
*zram
, struct page
*page
,
1532 struct zcomp_strm
*zstrm
;
1533 unsigned long handle
;
1539 handle
= zram_get_handle(zram
, index
);
1540 if (!handle
|| zram_test_flag(zram
, index
, ZRAM_SAME
)) {
1541 unsigned long value
;
1544 value
= handle
? zram_get_element(zram
, index
) : 0;
1545 mem
= kmap_local_page(page
);
1546 zram_fill_page(mem
, PAGE_SIZE
, value
);
1551 size
= zram_get_obj_size(zram
, index
);
1553 if (size
!= PAGE_SIZE
) {
1554 prio
= zram_get_priority(zram
, index
);
1555 zstrm
= zcomp_stream_get(zram
->comps
[prio
]);
1558 src
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_RO
);
1559 if (size
== PAGE_SIZE
) {
1560 dst
= kmap_local_page(page
);
1561 copy_page(dst
, src
);
1565 dst
= kmap_local_page(page
);
1566 ret
= zcomp_decompress(zram
->comps
[prio
], zstrm
,
1569 zcomp_stream_put(zram
->comps
[prio
]);
1571 zs_unmap_object(zram
->mem_pool
, handle
);
1575 static int zram_read_page(struct zram
*zram
, struct page
*page
, u32 index
,
1580 zram_slot_lock(zram
, index
);
1581 if (!zram_test_flag(zram
, index
, ZRAM_WB
)) {
1582 /* Slot should be locked through out the function call */
1583 ret
= zram_read_from_zspool(zram
, page
, index
);
1584 zram_slot_unlock(zram
, index
);
1587 * The slot should be unlocked before reading from the backing
1590 zram_slot_unlock(zram
, index
);
1592 ret
= read_from_bdev(zram
, page
, zram_get_element(zram
, index
),
1596 /* Should NEVER happen. Return bio error if it does. */
1597 if (WARN_ON(ret
< 0))
1598 pr_err("Decompression failed! err=%d, page=%u\n", ret
, index
);
1604 * Use a temporary buffer to decompress the page, as the decompressor
1605 * always expects a full page for the output.
1607 static int zram_bvec_read_partial(struct zram
*zram
, struct bio_vec
*bvec
,
1608 u32 index
, int offset
)
1610 struct page
*page
= alloc_page(GFP_NOIO
);
1615 ret
= zram_read_page(zram
, page
, index
, NULL
);
1617 memcpy_to_bvec(bvec
, page_address(page
) + offset
);
1622 static int zram_bvec_read(struct zram
*zram
, struct bio_vec
*bvec
,
1623 u32 index
, int offset
, struct bio
*bio
)
1625 if (is_partial_io(bvec
))
1626 return zram_bvec_read_partial(zram
, bvec
, index
, offset
);
1627 return zram_read_page(zram
, bvec
->bv_page
, index
, bio
);
1630 static int zram_write_page(struct zram
*zram
, struct page
*page
, u32 index
)
1633 unsigned long alloced_pages
;
1634 unsigned long handle
= -ENOMEM
;
1635 unsigned int comp_len
= 0;
1636 void *src
, *dst
, *mem
;
1637 struct zcomp_strm
*zstrm
;
1638 unsigned long element
= 0;
1639 enum zram_pageflags flags
= 0;
1641 mem
= kmap_local_page(page
);
1642 if (page_same_filled(mem
, &element
)) {
1644 /* Free memory associated with this sector now. */
1646 atomic64_inc(&zram
->stats
.same_pages
);
1652 zstrm
= zcomp_stream_get(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1653 src
= kmap_local_page(page
);
1654 ret
= zcomp_compress(zram
->comps
[ZRAM_PRIMARY_COMP
], zstrm
,
1658 if (unlikely(ret
)) {
1659 zcomp_stream_put(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1660 pr_err("Compression failed! err=%d\n", ret
);
1661 zs_free(zram
->mem_pool
, handle
);
1665 if (comp_len
>= huge_class_size
)
1666 comp_len
= PAGE_SIZE
;
1668 * handle allocation has 2 paths:
1669 * a) fast path is executed with preemption disabled (for
1670 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1671 * since we can't sleep;
1672 * b) slow path enables preemption and attempts to allocate
1673 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1674 * put per-cpu compression stream and, thus, to re-do
1675 * the compression once handle is allocated.
1677 * if we have a 'non-null' handle here then we are coming
1678 * from the slow path and handle has already been allocated.
1680 if (IS_ERR_VALUE(handle
))
1681 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1682 __GFP_KSWAPD_RECLAIM
|
1686 if (IS_ERR_VALUE(handle
)) {
1687 zcomp_stream_put(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1688 atomic64_inc(&zram
->stats
.writestall
);
1689 handle
= zs_malloc(zram
->mem_pool
, comp_len
,
1690 GFP_NOIO
| __GFP_HIGHMEM
|
1692 if (IS_ERR_VALUE(handle
))
1693 return PTR_ERR((void *)handle
);
1695 if (comp_len
!= PAGE_SIZE
)
1696 goto compress_again
;
1698 * If the page is not compressible, you need to acquire the
1699 * lock and execute the code below. The zcomp_stream_get()
1700 * call is needed to disable the cpu hotplug and grab the
1701 * zstrm buffer back. It is necessary that the dereferencing
1702 * of the zstrm variable below occurs correctly.
1704 zstrm
= zcomp_stream_get(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1707 alloced_pages
= zs_get_total_pages(zram
->mem_pool
);
1708 update_used_max(zram
, alloced_pages
);
1710 if (zram
->limit_pages
&& alloced_pages
> zram
->limit_pages
) {
1711 zcomp_stream_put(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1712 zs_free(zram
->mem_pool
, handle
);
1716 dst
= zs_map_object(zram
->mem_pool
, handle
, ZS_MM_WO
);
1718 src
= zstrm
->buffer
;
1719 if (comp_len
== PAGE_SIZE
)
1720 src
= kmap_local_page(page
);
1721 memcpy(dst
, src
, comp_len
);
1722 if (comp_len
== PAGE_SIZE
)
1725 zcomp_stream_put(zram
->comps
[ZRAM_PRIMARY_COMP
]);
1726 zs_unmap_object(zram
->mem_pool
, handle
);
1727 atomic64_add(comp_len
, &zram
->stats
.compr_data_size
);
1730 * Free memory associated with this sector
1731 * before overwriting unused sectors.
1733 zram_slot_lock(zram
, index
);
1734 zram_free_page(zram
, index
);
1736 if (comp_len
== PAGE_SIZE
) {
1737 zram_set_flag(zram
, index
, ZRAM_HUGE
);
1738 atomic64_inc(&zram
->stats
.huge_pages
);
1739 atomic64_inc(&zram
->stats
.huge_pages_since
);
1743 zram_set_flag(zram
, index
, flags
);
1744 zram_set_element(zram
, index
, element
);
1746 zram_set_handle(zram
, index
, handle
);
1747 zram_set_obj_size(zram
, index
, comp_len
);
1749 zram_slot_unlock(zram
, index
);
1752 atomic64_inc(&zram
->stats
.pages_stored
);
1757 * This is a partial IO. Read the full page before writing the changes.
1759 static int zram_bvec_write_partial(struct zram
*zram
, struct bio_vec
*bvec
,
1760 u32 index
, int offset
, struct bio
*bio
)
1762 struct page
*page
= alloc_page(GFP_NOIO
);
1768 ret
= zram_read_page(zram
, page
, index
, bio
);
1770 memcpy_from_bvec(page_address(page
) + offset
, bvec
);
1771 ret
= zram_write_page(zram
, page
, index
);
1777 static int zram_bvec_write(struct zram
*zram
, struct bio_vec
*bvec
,
1778 u32 index
, int offset
, struct bio
*bio
)
1780 if (is_partial_io(bvec
))
1781 return zram_bvec_write_partial(zram
, bvec
, index
, offset
, bio
);
1782 return zram_write_page(zram
, bvec
->bv_page
, index
);
1785 #ifdef CONFIG_ZRAM_MULTI_COMP
1786 #define RECOMPRESS_IDLE (1 << 0)
1787 #define RECOMPRESS_HUGE (1 << 1)
1789 static int scan_slots_for_recompress(struct zram
*zram
, u32 mode
,
1790 struct zram_pp_ctl
*ctl
)
1792 unsigned long nr_pages
= zram
->disksize
>> PAGE_SHIFT
;
1793 struct zram_pp_slot
*pps
= NULL
;
1794 unsigned long index
;
1796 for (index
= 0; index
< nr_pages
; index
++) {
1798 pps
= kmalloc(sizeof(*pps
), GFP_KERNEL
);
1802 INIT_LIST_HEAD(&pps
->entry
);
1804 zram_slot_lock(zram
, index
);
1805 if (!zram_allocated(zram
, index
))
1808 if (mode
& RECOMPRESS_IDLE
&&
1809 !zram_test_flag(zram
, index
, ZRAM_IDLE
))
1812 if (mode
& RECOMPRESS_HUGE
&&
1813 !zram_test_flag(zram
, index
, ZRAM_HUGE
))
1816 if (zram_test_flag(zram
, index
, ZRAM_WB
) ||
1817 zram_test_flag(zram
, index
, ZRAM_SAME
) ||
1818 zram_test_flag(zram
, index
, ZRAM_INCOMPRESSIBLE
))
1822 place_pp_slot(zram
, ctl
, pps
);
1825 zram_slot_unlock(zram
, index
);
1833 * This function will decompress (unless it's ZRAM_HUGE) the page and then
1834 * attempt to compress it using provided compression algorithm priority
1835 * (which is potentially more effective).
1837 * Corresponding ZRAM slot should be locked.
1839 static int recompress_slot(struct zram
*zram
, u32 index
, struct page
*page
,
1840 u64
*num_recomp_pages
, u32 threshold
, u32 prio
,
1843 struct zcomp_strm
*zstrm
= NULL
;
1844 unsigned long handle_old
;
1845 unsigned long handle_new
;
1846 unsigned int comp_len_old
;
1847 unsigned int comp_len_new
;
1848 unsigned int class_index_old
;
1849 unsigned int class_index_new
;
1850 u32 num_recomps
= 0;
1854 handle_old
= zram_get_handle(zram
, index
);
1858 comp_len_old
= zram_get_obj_size(zram
, index
);
1860 * Do not recompress objects that are already "small enough".
1862 if (comp_len_old
< threshold
)
1865 ret
= zram_read_from_zspool(zram
, page
, index
);
1870 * We touched this entry so mark it as non-IDLE. This makes sure that
1871 * we don't preserve IDLE flag and don't incorrectly pick this entry
1872 * for different post-processing type (e.g. writeback).
1874 zram_clear_flag(zram
, index
, ZRAM_IDLE
);
1876 class_index_old
= zs_lookup_class_index(zram
->mem_pool
, comp_len_old
);
1878 * Iterate the secondary comp algorithms list (in order of priority)
1879 * and try to recompress the page.
1881 for (; prio
< prio_max
; prio
++) {
1882 if (!zram
->comps
[prio
])
1886 * Skip if the object is already re-compressed with a higher
1887 * priority algorithm (or same algorithm).
1889 if (prio
<= zram_get_priority(zram
, index
))
1893 zstrm
= zcomp_stream_get(zram
->comps
[prio
]);
1894 src
= kmap_local_page(page
);
1895 ret
= zcomp_compress(zram
->comps
[prio
], zstrm
,
1896 src
, &comp_len_new
);
1900 zcomp_stream_put(zram
->comps
[prio
]);
1904 class_index_new
= zs_lookup_class_index(zram
->mem_pool
,
1907 /* Continue until we make progress */
1908 if (class_index_new
>= class_index_old
||
1909 (threshold
&& comp_len_new
>= threshold
)) {
1910 zcomp_stream_put(zram
->comps
[prio
]);
1914 /* Recompression was successful so break out */
1919 * We did not try to recompress, e.g. when we have only one
1920 * secondary algorithm and the page is already recompressed
1921 * using that algorithm
1927 * Decrement the limit (if set) on pages we can recompress, even
1928 * when current recompression was unsuccessful or did not compress
1929 * the page below the threshold, because we still spent resources
1932 if (*num_recomp_pages
)
1933 *num_recomp_pages
-= 1;
1935 if (class_index_new
>= class_index_old
) {
1937 * Secondary algorithms failed to re-compress the page
1938 * in a way that would save memory, mark the object as
1939 * incompressible so that we will not try to compress
1942 * We need to make sure that all secondary algorithms have
1943 * failed, so we test if the number of recompressions matches
1944 * the number of active secondary algorithms.
1946 if (num_recomps
== zram
->num_active_comps
- 1)
1947 zram_set_flag(zram
, index
, ZRAM_INCOMPRESSIBLE
);
1951 /* Successful recompression but above threshold */
1952 if (threshold
&& comp_len_new
>= threshold
)
1956 * No direct reclaim (slow path) for handle allocation and no
1957 * re-compression attempt (unlike in zram_write_bvec()) since
1958 * we already have stored that object in zsmalloc. If we cannot
1959 * alloc memory for recompressed object then we bail out and
1960 * simply keep the old (existing) object in zsmalloc.
1962 handle_new
= zs_malloc(zram
->mem_pool
, comp_len_new
,
1963 __GFP_KSWAPD_RECLAIM
|
1967 if (IS_ERR_VALUE(handle_new
)) {
1968 zcomp_stream_put(zram
->comps
[prio
]);
1969 return PTR_ERR((void *)handle_new
);
1972 dst
= zs_map_object(zram
->mem_pool
, handle_new
, ZS_MM_WO
);
1973 memcpy(dst
, zstrm
->buffer
, comp_len_new
);
1974 zcomp_stream_put(zram
->comps
[prio
]);
1976 zs_unmap_object(zram
->mem_pool
, handle_new
);
1978 zram_free_page(zram
, index
);
1979 zram_set_handle(zram
, index
, handle_new
);
1980 zram_set_obj_size(zram
, index
, comp_len_new
);
1981 zram_set_priority(zram
, index
, prio
);
1983 atomic64_add(comp_len_new
, &zram
->stats
.compr_data_size
);
1984 atomic64_inc(&zram
->stats
.pages_stored
);
1989 static ssize_t
recompress_store(struct device
*dev
,
1990 struct device_attribute
*attr
,
1991 const char *buf
, size_t len
)
1993 u32 prio
= ZRAM_SECONDARY_COMP
, prio_max
= ZRAM_MAX_COMPS
;
1994 struct zram
*zram
= dev_to_zram(dev
);
1995 char *args
, *param
, *val
, *algo
= NULL
;
1996 u64 num_recomp_pages
= ULLONG_MAX
;
1997 struct zram_pp_ctl
*ctl
= NULL
;
1998 struct zram_pp_slot
*pps
;
1999 u32 mode
= 0, threshold
= 0;
2003 args
= skip_spaces(buf
);
2005 args
= next_arg(args
, ¶m
, &val
);
2010 if (!strcmp(param
, "type")) {
2011 if (!strcmp(val
, "idle"))
2012 mode
= RECOMPRESS_IDLE
;
2013 if (!strcmp(val
, "huge"))
2014 mode
= RECOMPRESS_HUGE
;
2015 if (!strcmp(val
, "huge_idle"))
2016 mode
= RECOMPRESS_IDLE
| RECOMPRESS_HUGE
;
2020 if (!strcmp(param
, "max_pages")) {
2022 * Limit the number of entries (pages) we attempt to
2025 ret
= kstrtoull(val
, 10, &num_recomp_pages
);
2031 if (!strcmp(param
, "threshold")) {
2033 * We will re-compress only idle objects equal or
2034 * greater in size than watermark.
2036 ret
= kstrtouint(val
, 10, &threshold
);
2042 if (!strcmp(param
, "algo")) {
2047 if (!strcmp(param
, "priority")) {
2048 ret
= kstrtouint(val
, 10, &prio
);
2052 if (prio
== ZRAM_PRIMARY_COMP
)
2053 prio
= ZRAM_SECONDARY_COMP
;
2055 prio_max
= min(prio
+ 1, ZRAM_MAX_COMPS
);
2060 if (threshold
>= huge_class_size
)
2063 down_read(&zram
->init_lock
);
2064 if (!init_done(zram
)) {
2066 goto release_init_lock
;
2069 /* Do not permit concurrent post-processing actions. */
2070 if (atomic_xchg(&zram
->pp_in_progress
, 1)) {
2071 up_read(&zram
->init_lock
);
2078 for (; prio
< ZRAM_MAX_COMPS
; prio
++) {
2079 if (!zram
->comp_algs
[prio
])
2082 if (!strcmp(zram
->comp_algs
[prio
], algo
)) {
2083 prio_max
= min(prio
+ 1, ZRAM_MAX_COMPS
);
2091 goto release_init_lock
;
2095 page
= alloc_page(GFP_KERNEL
);
2098 goto release_init_lock
;
2101 ctl
= init_pp_ctl();
2104 goto release_init_lock
;
2107 scan_slots_for_recompress(zram
, mode
, ctl
);
2110 while ((pps
= select_pp_slot(ctl
))) {
2113 if (!num_recomp_pages
)
2116 zram_slot_lock(zram
, pps
->index
);
2117 if (!zram_test_flag(zram
, pps
->index
, ZRAM_PP_SLOT
))
2120 err
= recompress_slot(zram
, pps
->index
, page
,
2121 &num_recomp_pages
, threshold
,
2124 zram_slot_unlock(zram
, pps
->index
);
2125 release_pp_slot(zram
, pps
);
2138 release_pp_ctl(zram
, ctl
);
2139 atomic_set(&zram
->pp_in_progress
, 0);
2140 up_read(&zram
->init_lock
);
2145 static void zram_bio_discard(struct zram
*zram
, struct bio
*bio
)
2147 size_t n
= bio
->bi_iter
.bi_size
;
2148 u32 index
= bio
->bi_iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
2149 u32 offset
= (bio
->bi_iter
.bi_sector
& (SECTORS_PER_PAGE
- 1)) <<
2153 * zram manages data in physical block size units. Because logical block
2154 * size isn't identical with physical block size on some arch, we
2155 * could get a discard request pointing to a specific offset within a
2156 * certain physical block. Although we can handle this request by
2157 * reading that physiclal block and decompressing and partially zeroing
2158 * and re-compressing and then re-storing it, this isn't reasonable
2159 * because our intent with a discard request is to save memory. So
2160 * skipping this logical block is appropriate here.
2163 if (n
<= (PAGE_SIZE
- offset
))
2166 n
-= (PAGE_SIZE
- offset
);
2170 while (n
>= PAGE_SIZE
) {
2171 zram_slot_lock(zram
, index
);
2172 zram_free_page(zram
, index
);
2173 zram_slot_unlock(zram
, index
);
2174 atomic64_inc(&zram
->stats
.notify_free
);
2182 static void zram_bio_read(struct zram
*zram
, struct bio
*bio
)
2184 unsigned long start_time
= bio_start_io_acct(bio
);
2185 struct bvec_iter iter
= bio
->bi_iter
;
2188 u32 index
= iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
2189 u32 offset
= (iter
.bi_sector
& (SECTORS_PER_PAGE
- 1)) <<
2191 struct bio_vec bv
= bio_iter_iovec(bio
, iter
);
2193 bv
.bv_len
= min_t(u32
, bv
.bv_len
, PAGE_SIZE
- offset
);
2195 if (zram_bvec_read(zram
, &bv
, index
, offset
, bio
) < 0) {
2196 atomic64_inc(&zram
->stats
.failed_reads
);
2197 bio
->bi_status
= BLK_STS_IOERR
;
2200 flush_dcache_page(bv
.bv_page
);
2202 zram_slot_lock(zram
, index
);
2203 zram_accessed(zram
, index
);
2204 zram_slot_unlock(zram
, index
);
2206 bio_advance_iter_single(bio
, &iter
, bv
.bv_len
);
2207 } while (iter
.bi_size
);
2209 bio_end_io_acct(bio
, start_time
);
2213 static void zram_bio_write(struct zram
*zram
, struct bio
*bio
)
2215 unsigned long start_time
= bio_start_io_acct(bio
);
2216 struct bvec_iter iter
= bio
->bi_iter
;
2219 u32 index
= iter
.bi_sector
>> SECTORS_PER_PAGE_SHIFT
;
2220 u32 offset
= (iter
.bi_sector
& (SECTORS_PER_PAGE
- 1)) <<
2222 struct bio_vec bv
= bio_iter_iovec(bio
, iter
);
2224 bv
.bv_len
= min_t(u32
, bv
.bv_len
, PAGE_SIZE
- offset
);
2226 if (zram_bvec_write(zram
, &bv
, index
, offset
, bio
) < 0) {
2227 atomic64_inc(&zram
->stats
.failed_writes
);
2228 bio
->bi_status
= BLK_STS_IOERR
;
2232 zram_slot_lock(zram
, index
);
2233 zram_accessed(zram
, index
);
2234 zram_slot_unlock(zram
, index
);
2236 bio_advance_iter_single(bio
, &iter
, bv
.bv_len
);
2237 } while (iter
.bi_size
);
2239 bio_end_io_acct(bio
, start_time
);
2244 * Handler function for all zram I/O requests.
2246 static void zram_submit_bio(struct bio
*bio
)
2248 struct zram
*zram
= bio
->bi_bdev
->bd_disk
->private_data
;
2250 switch (bio_op(bio
)) {
2252 zram_bio_read(zram
, bio
);
2255 zram_bio_write(zram
, bio
);
2257 case REQ_OP_DISCARD
:
2258 case REQ_OP_WRITE_ZEROES
:
2259 zram_bio_discard(zram
, bio
);
2267 static void zram_slot_free_notify(struct block_device
*bdev
,
2268 unsigned long index
)
2272 zram
= bdev
->bd_disk
->private_data
;
2274 atomic64_inc(&zram
->stats
.notify_free
);
2275 if (!zram_slot_trylock(zram
, index
)) {
2276 atomic64_inc(&zram
->stats
.miss_free
);
2280 zram_free_page(zram
, index
);
2281 zram_slot_unlock(zram
, index
);
2284 static void zram_comp_params_reset(struct zram
*zram
)
2288 for (prio
= ZRAM_PRIMARY_COMP
; prio
< ZRAM_MAX_COMPS
; prio
++) {
2289 comp_params_reset(zram
, prio
);
2293 static void zram_destroy_comps(struct zram
*zram
)
2297 for (prio
= ZRAM_PRIMARY_COMP
; prio
< ZRAM_MAX_COMPS
; prio
++) {
2298 struct zcomp
*comp
= zram
->comps
[prio
];
2300 zram
->comps
[prio
] = NULL
;
2303 zcomp_destroy(comp
);
2304 zram
->num_active_comps
--;
2307 for (prio
= ZRAM_PRIMARY_COMP
; prio
< ZRAM_MAX_COMPS
; prio
++) {
2308 /* Do not free statically defined compression algorithms */
2309 if (zram
->comp_algs
[prio
] != default_compressor
)
2310 kfree(zram
->comp_algs
[prio
]);
2311 zram
->comp_algs
[prio
] = NULL
;
2314 zram_comp_params_reset(zram
);
2317 static void zram_reset_device(struct zram
*zram
)
2319 down_write(&zram
->init_lock
);
2321 zram
->limit_pages
= 0;
2323 if (!init_done(zram
)) {
2324 up_write(&zram
->init_lock
);
2328 set_capacity_and_notify(zram
->disk
, 0);
2329 part_stat_set_all(zram
->disk
->part0
, 0);
2331 /* I/O operation under all of CPU are done so let's free */
2332 zram_meta_free(zram
, zram
->disksize
);
2334 zram_destroy_comps(zram
);
2335 memset(&zram
->stats
, 0, sizeof(zram
->stats
));
2336 atomic_set(&zram
->pp_in_progress
, 0);
2339 comp_algorithm_set(zram
, ZRAM_PRIMARY_COMP
, default_compressor
);
2340 up_write(&zram
->init_lock
);
2343 static ssize_t
disksize_store(struct device
*dev
,
2344 struct device_attribute
*attr
, const char *buf
, size_t len
)
2348 struct zram
*zram
= dev_to_zram(dev
);
2352 disksize
= memparse(buf
, NULL
);
2356 down_write(&zram
->init_lock
);
2357 if (init_done(zram
)) {
2358 pr_info("Cannot change disksize for initialized device\n");
2363 disksize
= PAGE_ALIGN(disksize
);
2364 if (!zram_meta_alloc(zram
, disksize
)) {
2369 for (prio
= ZRAM_PRIMARY_COMP
; prio
< ZRAM_MAX_COMPS
; prio
++) {
2370 if (!zram
->comp_algs
[prio
])
2373 comp
= zcomp_create(zram
->comp_algs
[prio
],
2374 &zram
->params
[prio
]);
2376 pr_err("Cannot initialise %s compressing backend\n",
2377 zram
->comp_algs
[prio
]);
2378 err
= PTR_ERR(comp
);
2379 goto out_free_comps
;
2382 zram
->comps
[prio
] = comp
;
2383 zram
->num_active_comps
++;
2385 zram
->disksize
= disksize
;
2386 set_capacity_and_notify(zram
->disk
, zram
->disksize
>> SECTOR_SHIFT
);
2387 up_write(&zram
->init_lock
);
2392 zram_destroy_comps(zram
);
2393 zram_meta_free(zram
, disksize
);
2395 up_write(&zram
->init_lock
);
2399 static ssize_t
reset_store(struct device
*dev
,
2400 struct device_attribute
*attr
, const char *buf
, size_t len
)
2403 unsigned short do_reset
;
2405 struct gendisk
*disk
;
2407 ret
= kstrtou16(buf
, 10, &do_reset
);
2414 zram
= dev_to_zram(dev
);
2417 mutex_lock(&disk
->open_mutex
);
2418 /* Do not reset an active device or claimed device */
2419 if (disk_openers(disk
) || zram
->claim
) {
2420 mutex_unlock(&disk
->open_mutex
);
2424 /* From now on, anyone can't open /dev/zram[0-9] */
2426 mutex_unlock(&disk
->open_mutex
);
2428 /* Make sure all the pending I/O are finished */
2429 sync_blockdev(disk
->part0
);
2430 zram_reset_device(zram
);
2432 mutex_lock(&disk
->open_mutex
);
2433 zram
->claim
= false;
2434 mutex_unlock(&disk
->open_mutex
);
2439 static int zram_open(struct gendisk
*disk
, blk_mode_t mode
)
2441 struct zram
*zram
= disk
->private_data
;
2443 WARN_ON(!mutex_is_locked(&disk
->open_mutex
));
2445 /* zram was claimed to reset so open request fails */
2451 static const struct block_device_operations zram_devops
= {
2453 .submit_bio
= zram_submit_bio
,
2454 .swap_slot_free_notify
= zram_slot_free_notify
,
2455 .owner
= THIS_MODULE
2458 static DEVICE_ATTR_WO(compact
);
2459 static DEVICE_ATTR_RW(disksize
);
2460 static DEVICE_ATTR_RO(initstate
);
2461 static DEVICE_ATTR_WO(reset
);
2462 static DEVICE_ATTR_WO(mem_limit
);
2463 static DEVICE_ATTR_WO(mem_used_max
);
2464 static DEVICE_ATTR_WO(idle
);
2465 static DEVICE_ATTR_RW(max_comp_streams
);
2466 static DEVICE_ATTR_RW(comp_algorithm
);
2467 #ifdef CONFIG_ZRAM_WRITEBACK
2468 static DEVICE_ATTR_RW(backing_dev
);
2469 static DEVICE_ATTR_WO(writeback
);
2470 static DEVICE_ATTR_RW(writeback_limit
);
2471 static DEVICE_ATTR_RW(writeback_limit_enable
);
2473 #ifdef CONFIG_ZRAM_MULTI_COMP
2474 static DEVICE_ATTR_RW(recomp_algorithm
);
2475 static DEVICE_ATTR_WO(recompress
);
2477 static DEVICE_ATTR_WO(algorithm_params
);
2479 static struct attribute
*zram_disk_attrs
[] = {
2480 &dev_attr_disksize
.attr
,
2481 &dev_attr_initstate
.attr
,
2482 &dev_attr_reset
.attr
,
2483 &dev_attr_compact
.attr
,
2484 &dev_attr_mem_limit
.attr
,
2485 &dev_attr_mem_used_max
.attr
,
2486 &dev_attr_idle
.attr
,
2487 &dev_attr_max_comp_streams
.attr
,
2488 &dev_attr_comp_algorithm
.attr
,
2489 #ifdef CONFIG_ZRAM_WRITEBACK
2490 &dev_attr_backing_dev
.attr
,
2491 &dev_attr_writeback
.attr
,
2492 &dev_attr_writeback_limit
.attr
,
2493 &dev_attr_writeback_limit_enable
.attr
,
2495 &dev_attr_io_stat
.attr
,
2496 &dev_attr_mm_stat
.attr
,
2497 #ifdef CONFIG_ZRAM_WRITEBACK
2498 &dev_attr_bd_stat
.attr
,
2500 &dev_attr_debug_stat
.attr
,
2501 #ifdef CONFIG_ZRAM_MULTI_COMP
2502 &dev_attr_recomp_algorithm
.attr
,
2503 &dev_attr_recompress
.attr
,
2505 &dev_attr_algorithm_params
.attr
,
2509 ATTRIBUTE_GROUPS(zram_disk
);
2512 * Allocate and initialize new zram device. the function returns
2513 * '>= 0' device_id upon success, and negative value otherwise.
2515 static int zram_add(void)
2517 struct queue_limits lim
= {
2518 .logical_block_size
= ZRAM_LOGICAL_BLOCK_SIZE
,
2520 * To ensure that we always get PAGE_SIZE aligned and
2521 * n*PAGE_SIZED sized I/O requests.
2523 .physical_block_size
= PAGE_SIZE
,
2524 .io_min
= PAGE_SIZE
,
2525 .io_opt
= PAGE_SIZE
,
2526 .max_hw_discard_sectors
= UINT_MAX
,
2528 * zram_bio_discard() will clear all logical blocks if logical
2529 * block size is identical with physical block size(PAGE_SIZE).
2530 * But if it is different, we will skip discarding some parts of
2531 * logical blocks in the part of the request range which isn't
2532 * aligned to physical block size. So we can't ensure that all
2533 * discarded logical blocks are zeroed.
2535 #if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
2536 .max_write_zeroes_sectors
= UINT_MAX
,
2538 .features
= BLK_FEAT_STABLE_WRITES
|
2539 BLK_FEAT_SYNCHRONOUS
,
2544 zram
= kzalloc(sizeof(struct zram
), GFP_KERNEL
);
2548 ret
= idr_alloc(&zram_index_idr
, zram
, 0, 0, GFP_KERNEL
);
2553 init_rwsem(&zram
->init_lock
);
2554 #ifdef CONFIG_ZRAM_WRITEBACK
2555 spin_lock_init(&zram
->wb_limit_lock
);
2558 /* gendisk structure */
2559 zram
->disk
= blk_alloc_disk(&lim
, NUMA_NO_NODE
);
2560 if (IS_ERR(zram
->disk
)) {
2561 pr_err("Error allocating disk structure for device %d\n",
2563 ret
= PTR_ERR(zram
->disk
);
2567 zram
->disk
->major
= zram_major
;
2568 zram
->disk
->first_minor
= device_id
;
2569 zram
->disk
->minors
= 1;
2570 zram
->disk
->flags
|= GENHD_FL_NO_PART
;
2571 zram
->disk
->fops
= &zram_devops
;
2572 zram
->disk
->private_data
= zram
;
2573 snprintf(zram
->disk
->disk_name
, 16, "zram%d", device_id
);
2574 atomic_set(&zram
->pp_in_progress
, 0);
2575 zram_comp_params_reset(zram
);
2576 comp_algorithm_set(zram
, ZRAM_PRIMARY_COMP
, default_compressor
);
2578 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
2579 set_capacity(zram
->disk
, 0);
2580 ret
= device_add_disk(NULL
, zram
->disk
, zram_disk_groups
);
2582 goto out_cleanup_disk
;
2584 zram_debugfs_register(zram
);
2585 pr_info("Added device: %s\n", zram
->disk
->disk_name
);
2589 put_disk(zram
->disk
);
2591 idr_remove(&zram_index_idr
, device_id
);
2597 static int zram_remove(struct zram
*zram
)
2601 mutex_lock(&zram
->disk
->open_mutex
);
2602 if (disk_openers(zram
->disk
)) {
2603 mutex_unlock(&zram
->disk
->open_mutex
);
2607 claimed
= zram
->claim
;
2610 mutex_unlock(&zram
->disk
->open_mutex
);
2612 zram_debugfs_unregister(zram
);
2616 * If we were claimed by reset_store(), del_gendisk() will
2617 * wait until reset_store() is done, so nothing need to do.
2621 /* Make sure all the pending I/O are finished */
2622 sync_blockdev(zram
->disk
->part0
);
2623 zram_reset_device(zram
);
2626 pr_info("Removed device: %s\n", zram
->disk
->disk_name
);
2628 del_gendisk(zram
->disk
);
2630 /* del_gendisk drains pending reset_store */
2631 WARN_ON_ONCE(claimed
&& zram
->claim
);
2634 * disksize_store() may be called in between zram_reset_device()
2635 * and del_gendisk(), so run the last reset to avoid leaking
2636 * anything allocated with disksize_store()
2638 zram_reset_device(zram
);
2640 put_disk(zram
->disk
);
2645 /* zram-control sysfs attributes */
2648 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2649 * sense that reading from this file does alter the state of your system -- it
2650 * creates a new un-initialized zram device and returns back this device's
2651 * device_id (or an error code if it fails to create a new device).
2653 static ssize_t
hot_add_show(const struct class *class,
2654 const struct class_attribute
*attr
,
2659 mutex_lock(&zram_index_mutex
);
2661 mutex_unlock(&zram_index_mutex
);
2665 return scnprintf(buf
, PAGE_SIZE
, "%d\n", ret
);
2667 /* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
2668 static struct class_attribute class_attr_hot_add
=
2669 __ATTR(hot_add
, 0400, hot_add_show
, NULL
);
2671 static ssize_t
hot_remove_store(const struct class *class,
2672 const struct class_attribute
*attr
,
2679 /* dev_id is gendisk->first_minor, which is `int' */
2680 ret
= kstrtoint(buf
, 10, &dev_id
);
2686 mutex_lock(&zram_index_mutex
);
2688 zram
= idr_find(&zram_index_idr
, dev_id
);
2690 ret
= zram_remove(zram
);
2692 idr_remove(&zram_index_idr
, dev_id
);
2697 mutex_unlock(&zram_index_mutex
);
2698 return ret
? ret
: count
;
2700 static CLASS_ATTR_WO(hot_remove
);
2702 static struct attribute
*zram_control_class_attrs
[] = {
2703 &class_attr_hot_add
.attr
,
2704 &class_attr_hot_remove
.attr
,
2707 ATTRIBUTE_GROUPS(zram_control_class
);
2709 static struct class zram_control_class
= {
2710 .name
= "zram-control",
2711 .class_groups
= zram_control_class_groups
,
2714 static int zram_remove_cb(int id
, void *ptr
, void *data
)
2716 WARN_ON_ONCE(zram_remove(ptr
));
2720 static void destroy_devices(void)
2722 class_unregister(&zram_control_class
);
2723 idr_for_each(&zram_index_idr
, &zram_remove_cb
, NULL
);
2724 zram_debugfs_destroy();
2725 idr_destroy(&zram_index_idr
);
2726 unregister_blkdev(zram_major
, "zram");
2727 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2730 static int __init
zram_init(void)
2732 struct zram_table_entry zram_te
;
2735 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS
> sizeof(zram_te
.flags
) * 8);
2737 ret
= cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE
, "block/zram:prepare",
2738 zcomp_cpu_up_prepare
, zcomp_cpu_dead
);
2742 ret
= class_register(&zram_control_class
);
2744 pr_err("Unable to register zram-control class\n");
2745 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2749 zram_debugfs_create();
2750 zram_major
= register_blkdev(0, "zram");
2751 if (zram_major
<= 0) {
2752 pr_err("Unable to get major number\n");
2753 class_unregister(&zram_control_class
);
2754 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE
);
2758 while (num_devices
!= 0) {
2759 mutex_lock(&zram_index_mutex
);
2761 mutex_unlock(&zram_index_mutex
);
2774 static void __exit
zram_exit(void)
2779 module_init(zram_init
);
2780 module_exit(zram_exit
);
2782 module_param(num_devices
, uint
, 0);
2783 MODULE_PARM_DESC(num_devices
, "Number of pre-created zram devices");
2785 MODULE_LICENSE("Dual BSD/GPL");
2786 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
2787 MODULE_DESCRIPTION("Compressed RAM Block Device");