drivers/block/zram/zram_drv.c

   1 /*
   2  * Compressed RAM block device
   3  *
   4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
   5  *               2012, 2013 Minchan Kim
   6  *
   7  * This code is released using a dual license strategy: BSD/GPL
   8  * You can choose the licence that better fits your requirements.
   9  *
  10  * Released under the terms of 3-clause BSD License
  11  * Released under the terms of GNU General Public License Version 2.0
  12  *
  13  */
  14
  15 #define KMSG_COMPONENT "zram"
  16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  17
  18 #include <linux/module.h>
  19 #include <linux/kernel.h>
  20 #include <linux/bio.h>
  21 #include <linux/bitops.h>
  22 #include <linux/blkdev.h>
  23 #include <linux/buffer_head.h>
  24 #include <linux/device.h>
  25 #include <linux/genhd.h>
  26 #include <linux/highmem.h>
  27 #include <linux/slab.h>
  28 #include <linux/backing-dev.h>
  29 #include <linux/string.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/err.h>
  32 #include <linux/idr.h>
  33 #include <linux/sysfs.h>
  34 #include <linux/cpuhotplug.h>
  35
  36 #include "zram_drv.h"
  37
  38 static DEFINE_IDR(zram_index_idr);
  39 /* idr index must be protected */
  40 static DEFINE_MUTEX(zram_index_mutex);
  41
  42 static int zram_major;
  43 static const char *default_compressor = "lzo";
  44
  45 /* Module params (documentation at end) */
  46 static unsigned int num_devices = 1;
  47
  48 static void zram_free_page(struct zram *zram, size_t index);
  49
  50 static inline bool init_done(struct zram *zram)
  51 {
  52         return zram->disksize;
  53 }
  54
  55 static inline struct zram *dev_to_zram(struct device *dev)
  56 {
  57         return (struct zram *)dev_to_disk(dev)->private_data;
  58 }
  59
  60 static unsigned long zram_get_handle(struct zram *zram, u32 index)
  61 {
  62         return zram->table[index].handle;
  63 }
  64
  65 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
  66 {
  67         zram->table[index].handle = handle;
  68 }
  69
  70 /* flag operations require table entry bit_spin_lock() being held */
  71 static int zram_test_flag(struct zram *zram, u32 index,
  72                         enum zram_pageflags flag)
  73 {
  74         return zram->table[index].value & BIT(flag);
  75 }
  76
  77 static void zram_set_flag(struct zram *zram, u32 index,
  78                         enum zram_pageflags flag)
  79 {
  80         zram->table[index].value |= BIT(flag);
  81 }
  82
  83 static void zram_clear_flag(struct zram *zram, u32 index,
  84                         enum zram_pageflags flag)
  85 {
  86         zram->table[index].value &= ~BIT(flag);
  87 }
  88
  89 static inline void zram_set_element(struct zram *zram, u32 index,
  90                         unsigned long element)
  91 {
  92         zram->table[index].element = element;
  93 }
  94
  95 static unsigned long zram_get_element(struct zram *zram, u32 index)
  96 {
  97         return zram->table[index].element;
  98 }
  99
 100 static size_t zram_get_obj_size(struct zram *zram, u32 index)
 101 {
 102         return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
 103 }
 104
 105 static void zram_set_obj_size(struct zram *zram,
 106                                         u32 index, size_t size)
 107 {
 108         unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
 109
 110         zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
 111 }
 112
 113 #if PAGE_SIZE != 4096
 114 static inline bool is_partial_io(struct bio_vec *bvec)
 115 {
 116         return bvec->bv_len != PAGE_SIZE;
 117 }
 118 #else
 119 static inline bool is_partial_io(struct bio_vec *bvec)
 120 {
 121         return false;
 122 }
 123 #endif
 124
 125 /*
 126  * Check if request is within bounds and aligned on zram logical blocks.
 127  */
 128 static inline bool valid_io_request(struct zram *zram,
 129                 sector_t start, unsigned int size)
 130 {
 131         u64 end, bound;
 132
 133         /* unaligned request */
 134         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 135                 return false;
 136         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 137                 return false;
 138
 139         end = start + (size >> SECTOR_SHIFT);
 140         bound = zram->disksize >> SECTOR_SHIFT;
 141         /* out of range range */
 142         if (unlikely(start >= bound || end > bound || start > end))
 143                 return false;
 144
 145         /* I/O request is valid */
 146         return true;
 147 }
 148
 149 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
 150 {
 151         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
 152         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
 153 }
 154
 155 static inline void update_used_max(struct zram *zram,
 156                                         const unsigned long pages)
 157 {
 158         unsigned long old_max, cur_max;
 159
 160         old_max = atomic_long_read(&zram->stats.max_used_pages);
 161
 162         do {
 163                 cur_max = old_max;
 164                 if (pages > cur_max)
 165                         old_max = atomic_long_cmpxchg(
 166                                 &zram->stats.max_used_pages, cur_max, pages);
 167         } while (old_max != cur_max);
 168 }
 169
 170 static inline void zram_fill_page(void *ptr, unsigned long len,
 171                                         unsigned long value)
 172 {
 173         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
 174         memset_l(ptr, value, len / sizeof(unsigned long));
 175 }
 176
 177 static bool page_same_filled(void *ptr, unsigned long *element)
 178 {
 179         unsigned int pos;
 180         unsigned long *page;
 181         unsigned long val;
 182
 183         page = (unsigned long *)ptr;
 184         val = page[0];
 185
 186         for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
 187                 if (val != page[pos])
 188                         return false;
 189         }
 190
 191         *element = val;
 192
 193         return true;
 194 }
 195
 196 static ssize_t initstate_show(struct device *dev,
 197                 struct device_attribute *attr, char *buf)
 198 {
 199         u32 val;
 200         struct zram *zram = dev_to_zram(dev);
 201
 202         down_read(&zram->init_lock);
 203         val = init_done(zram);
 204         up_read(&zram->init_lock);
 205
 206         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 207 }
 208
 209 static ssize_t disksize_show(struct device *dev,
 210                 struct device_attribute *attr, char *buf)
 211 {
 212         struct zram *zram = dev_to_zram(dev);
 213
 214         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
 215 }
 216
 217 static ssize_t mem_limit_store(struct device *dev,
 218                 struct device_attribute *attr, const char *buf, size_t len)
 219 {
 220         u64 limit;
 221         char *tmp;
 222         struct zram *zram = dev_to_zram(dev);
 223
 224         limit = memparse(buf, &tmp);
 225         if (buf == tmp) /* no chars parsed, invalid input */
 226                 return -EINVAL;
 227
 228         down_write(&zram->init_lock);
 229         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
 230         up_write(&zram->init_lock);
 231
 232         return len;
 233 }
 234
 235 static ssize_t mem_used_max_store(struct device *dev,
 236                 struct device_attribute *attr, const char *buf, size_t len)
 237 {
 238         int err;
 239         unsigned long val;
 240         struct zram *zram = dev_to_zram(dev);
 241
 242         err = kstrtoul(buf, 10, &val);
 243         if (err || val != 0)
 244                 return -EINVAL;
 245
 246         down_read(&zram->init_lock);
 247         if (init_done(zram)) {
 248                 atomic_long_set(&zram->stats.max_used_pages,
 249                                 zs_get_total_pages(zram->mem_pool));
 250         }
 251         up_read(&zram->init_lock);
 252
 253         return len;
 254 }
 255
 256 #ifdef CONFIG_ZRAM_WRITEBACK
 257 static bool zram_wb_enabled(struct zram *zram)
 258 {
 259         return zram->backing_dev;
 260 }
 261
 262 static void reset_bdev(struct zram *zram)
 263 {
 264         struct block_device *bdev;
 265
 266         if (!zram_wb_enabled(zram))
 267                 return;
 268
 269         bdev = zram->bdev;
 270         if (zram->old_block_size)
 271                 set_blocksize(bdev, zram->old_block_size);
 272         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
 273         /* hope filp_close flush all of IO */
 274         filp_close(zram->backing_dev, NULL);
 275         zram->backing_dev = NULL;
 276         zram->old_block_size = 0;
 277         zram->bdev = NULL;
 278
 279         kvfree(zram->bitmap);
 280         zram->bitmap = NULL;
 281 }
 282
 283 static ssize_t backing_dev_show(struct device *dev,
 284                 struct device_attribute *attr, char *buf)
 285 {
 286         struct zram *zram = dev_to_zram(dev);
 287         struct file *file = zram->backing_dev;
 288         char *p;
 289         ssize_t ret;
 290
 291         down_read(&zram->init_lock);
 292         if (!zram_wb_enabled(zram)) {
 293                 memcpy(buf, "none\n", 5);
 294                 up_read(&zram->init_lock);
 295                 return 5;
 296         }
 297
 298         p = file_path(file, buf, PAGE_SIZE - 1);
 299         if (IS_ERR(p)) {
 300                 ret = PTR_ERR(p);
 301                 goto out;
 302         }
 303
 304         ret = strlen(p);
 305         memmove(buf, p, ret);
 306         buf[ret++] = '\n';
 307 out:
 308         up_read(&zram->init_lock);
 309         return ret;
 310 }
 311
 312 static ssize_t backing_dev_store(struct device *dev,
 313                 struct device_attribute *attr, const char *buf, size_t len)
 314 {
 315         char *file_name;
 316         struct file *backing_dev = NULL;
 317         struct inode *inode;
 318         struct address_space *mapping;
 319         unsigned int bitmap_sz, old_block_size = 0;
 320         unsigned long nr_pages, *bitmap = NULL;
 321         struct block_device *bdev = NULL;
 322         int err;
 323         struct zram *zram = dev_to_zram(dev);
 324
 325         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
 326         if (!file_name)
 327                 return -ENOMEM;
 328
 329         down_write(&zram->init_lock);
 330         if (init_done(zram)) {
 331                 pr_info("Can't setup backing device for initialized device\n");
 332                 err = -EBUSY;
 333                 goto out;
 334         }
 335
 336         strlcpy(file_name, buf, len);
 337
 338         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
 339         if (IS_ERR(backing_dev)) {
 340                 err = PTR_ERR(backing_dev);
 341                 backing_dev = NULL;
 342                 goto out;
 343         }
 344
 345         mapping = backing_dev->f_mapping;
 346         inode = mapping->host;
 347
 348         /* Support only block device in this moment */
 349         if (!S_ISBLK(inode->i_mode)) {
 350                 err = -ENOTBLK;
 351                 goto out;
 352         }
 353
 354         bdev = bdgrab(I_BDEV(inode));
 355         err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
 356         if (err < 0)
 357                 goto out;
 358
 359         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
 360         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
 361         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
 362         if (!bitmap) {
 363                 err = -ENOMEM;
 364                 goto out;
 365         }
 366
 367         old_block_size = block_size(bdev);
 368         err = set_blocksize(bdev, PAGE_SIZE);
 369         if (err)
 370                 goto out;
 371
 372         reset_bdev(zram);
 373         spin_lock_init(&zram->bitmap_lock);
 374
 375         zram->old_block_size = old_block_size;
 376         zram->bdev = bdev;
 377         zram->backing_dev = backing_dev;
 378         zram->bitmap = bitmap;
 379         zram->nr_pages = nr_pages;
 380         up_write(&zram->init_lock);
 381
 382         pr_info("setup backing device %s\n", file_name);
 383         kfree(file_name);
 384
 385         return len;
 386 out:
 387         if (bitmap)
 388                 kvfree(bitmap);
 389
 390         if (bdev)
 391                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 392
 393         if (backing_dev)
 394                 filp_close(backing_dev, NULL);
 395
 396         up_write(&zram->init_lock);
 397
 398         kfree(file_name);
 399
 400         return err;
 401 }
 402
 403 static unsigned long get_entry_bdev(struct zram *zram)
 404 {
 405         unsigned long entry;
 406
 407         spin_lock(&zram->bitmap_lock);
 408         /* skip 0 bit to confuse zram.handle = 0 */
 409         entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
 410         if (entry == zram->nr_pages) {
 411                 spin_unlock(&zram->bitmap_lock);
 412                 return 0;
 413         }
 414
 415         set_bit(entry, zram->bitmap);
 416         spin_unlock(&zram->bitmap_lock);
 417
 418         return entry;
 419 }
 420
 421 static void put_entry_bdev(struct zram *zram, unsigned long entry)
 422 {
 423         int was_set;
 424
 425         spin_lock(&zram->bitmap_lock);
 426         was_set = test_and_clear_bit(entry, zram->bitmap);
 427         spin_unlock(&zram->bitmap_lock);
 428         WARN_ON_ONCE(!was_set);
 429 }
 430
 431 static void zram_page_end_io(struct bio *bio)
 432 {
 433         struct page *page = bio_first_page_all(bio);
 434
 435         page_endio(page, op_is_write(bio_op(bio)),
 436                         blk_status_to_errno(bio->bi_status));
 437         bio_put(bio);
 438 }
 439
 440 /*
 441  * Returns 1 if the submission is successful.
 442  */
 443 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 444                         unsigned long entry, struct bio *parent)
 445 {
 446         struct bio *bio;
 447
 448         bio = bio_alloc(GFP_ATOMIC, 1);
 449         if (!bio)
 450                 return -ENOMEM;
 451
 452         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
 453         bio_set_dev(bio, zram->bdev);
 454         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
 455                 bio_put(bio);
 456                 return -EIO;
 457         }
 458
 459         if (!parent) {
 460                 bio->bi_opf = REQ_OP_READ;
 461                 bio->bi_end_io = zram_page_end_io;
 462         } else {
 463                 bio->bi_opf = parent->bi_opf;
 464                 bio_chain(bio, parent);
 465         }
 466
 467         submit_bio(bio);
 468         return 1;
 469 }
 470
 471 struct zram_work {
 472         struct work_struct work;
 473         struct zram *zram;
 474         unsigned long entry;
 475         struct bio *bio;
 476 };
 477
 478 #if PAGE_SIZE != 4096
 479 static void zram_sync_read(struct work_struct *work)
 480 {
 481         struct bio_vec bvec;
 482         struct zram_work *zw = container_of(work, struct zram_work, work);
 483         struct zram *zram = zw->zram;
 484         unsigned long entry = zw->entry;
 485         struct bio *bio = zw->bio;
 486
 487         read_from_bdev_async(zram, &bvec, entry, bio);
 488 }
 489
 490 /*
 491  * Block layer want one ->make_request_fn to be active at a time
 492  * so if we use chained IO with parent IO in same context,
 493  * it's a deadlock. To avoid, it, it uses worker thread context.
 494  */
 495 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 496                                 unsigned long entry, struct bio *bio)
 497 {
 498         struct zram_work work;
 499
 500         work.zram = zram;
 501         work.entry = entry;
 502         work.bio = bio;
 503
 504         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
 505         queue_work(system_unbound_wq, &work.work);
 506         flush_work(&work.work);
 507         destroy_work_on_stack(&work.work);
 508
 509         return 1;
 510 }
 511 #else
 512 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
 513                                 unsigned long entry, struct bio *bio)
 514 {
 515         WARN_ON(1);
 516         return -EIO;
 517 }
 518 #endif
 519
 520 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 521                         unsigned long entry, struct bio *parent, bool sync)
 522 {
 523         if (sync)
 524                 return read_from_bdev_sync(zram, bvec, entry, parent);
 525         else
 526                 return read_from_bdev_async(zram, bvec, entry, parent);
 527 }
 528
 529 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
 530                                         u32 index, struct bio *parent,
 531                                         unsigned long *pentry)
 532 {
 533         struct bio *bio;
 534         unsigned long entry;
 535
 536         bio = bio_alloc(GFP_ATOMIC, 1);
 537         if (!bio)
 538                 return -ENOMEM;
 539
 540         entry = get_entry_bdev(zram);
 541         if (!entry) {
 542                 bio_put(bio);
 543                 return -ENOSPC;
 544         }
 545
 546         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
 547         bio_set_dev(bio, zram->bdev);
 548         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
 549                                         bvec->bv_offset)) {
 550                 bio_put(bio);
 551                 put_entry_bdev(zram, entry);
 552                 return -EIO;
 553         }
 554
 555         if (!parent) {
 556                 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
 557                 bio->bi_end_io = zram_page_end_io;
 558         } else {
 559                 bio->bi_opf = parent->bi_opf;
 560                 bio_chain(bio, parent);
 561         }
 562
 563         submit_bio(bio);
 564         *pentry = entry;
 565
 566         return 0;
 567 }
 568
 569 static void zram_wb_clear(struct zram *zram, u32 index)
 570 {
 571         unsigned long entry;
 572
 573         zram_clear_flag(zram, index, ZRAM_WB);
 574         entry = zram_get_element(zram, index);
 575         zram_set_element(zram, index, 0);
 576         put_entry_bdev(zram, entry);
 577 }
 578
 579 #else
 580 static bool zram_wb_enabled(struct zram *zram) { return false; }
 581 static inline void reset_bdev(struct zram *zram) {};
 582 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
 583                                         u32 index, struct bio *parent,
 584                                         unsigned long *pentry)
 585
 586 {
 587         return -EIO;
 588 }
 589
 590 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
 591                         unsigned long entry, struct bio *parent, bool sync)
 592 {
 593         return -EIO;
 594 }
 595 static void zram_wb_clear(struct zram *zram, u32 index) {}
 596 #endif
 597
 598
 599 /*
 600  * We switched to per-cpu streams and this attr is not needed anymore.
 601  * However, we will keep it around for some time, because:
 602  * a) we may revert per-cpu streams in the future
 603  * b) it's visible to user space and we need to follow our 2 years
 604  *    retirement rule; but we already have a number of 'soon to be
 605  *    altered' attrs, so max_comp_streams need to wait for the next
 606  *    layoff cycle.
 607  */
 608 static ssize_t max_comp_streams_show(struct device *dev,
 609                 struct device_attribute *attr, char *buf)
 610 {
 611         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
 612 }
 613
 614 static ssize_t max_comp_streams_store(struct device *dev,
 615                 struct device_attribute *attr, const char *buf, size_t len)
 616 {
 617         return len;
 618 }
 619
 620 static ssize_t comp_algorithm_show(struct device *dev,
 621                 struct device_attribute *attr, char *buf)
 622 {
 623         size_t sz;
 624         struct zram *zram = dev_to_zram(dev);
 625
 626         down_read(&zram->init_lock);
 627         sz = zcomp_available_show(zram->compressor, buf);
 628         up_read(&zram->init_lock);
 629
 630         return sz;
 631 }
 632
 633 static ssize_t comp_algorithm_store(struct device *dev,
 634                 struct device_attribute *attr, const char *buf, size_t len)
 635 {
 636         struct zram *zram = dev_to_zram(dev);
 637         char compressor[ARRAY_SIZE(zram->compressor)];
 638         size_t sz;
 639
 640         strlcpy(compressor, buf, sizeof(compressor));
 641         /* ignore trailing newline */
 642         sz = strlen(compressor);
 643         if (sz > 0 && compressor[sz - 1] == '\n')
 644                 compressor[sz - 1] = 0x00;
 645
 646         if (!zcomp_available_algorithm(compressor))
 647                 return -EINVAL;
 648
 649         down_write(&zram->init_lock);
 650         if (init_done(zram)) {
 651                 up_write(&zram->init_lock);
 652                 pr_info("Can't change algorithm for initialized device\n");
 653                 return -EBUSY;
 654         }
 655
 656         strcpy(zram->compressor, compressor);
 657         up_write(&zram->init_lock);
 658         return len;
 659 }
 660
 661 static ssize_t compact_store(struct device *dev,
 662                 struct device_attribute *attr, const char *buf, size_t len)
 663 {
 664         struct zram *zram = dev_to_zram(dev);
 665
 666         down_read(&zram->init_lock);
 667         if (!init_done(zram)) {
 668                 up_read(&zram->init_lock);
 669                 return -EINVAL;
 670         }
 671
 672         zs_compact(zram->mem_pool);
 673         up_read(&zram->init_lock);
 674
 675         return len;
 676 }
 677
 678 static ssize_t io_stat_show(struct device *dev,
 679                 struct device_attribute *attr, char *buf)
 680 {
 681         struct zram *zram = dev_to_zram(dev);
 682         ssize_t ret;
 683
 684         down_read(&zram->init_lock);
 685         ret = scnprintf(buf, PAGE_SIZE,
 686                         "%8llu %8llu %8llu %8llu\n",
 687                         (u64)atomic64_read(&zram->stats.failed_reads),
 688                         (u64)atomic64_read(&zram->stats.failed_writes),
 689                         (u64)atomic64_read(&zram->stats.invalid_io),
 690                         (u64)atomic64_read(&zram->stats.notify_free));
 691         up_read(&zram->init_lock);
 692
 693         return ret;
 694 }
 695
 696 static ssize_t mm_stat_show(struct device *dev,
 697                 struct device_attribute *attr, char *buf)
 698 {
 699         struct zram *zram = dev_to_zram(dev);
 700         struct zs_pool_stats pool_stats;
 701         u64 orig_size, mem_used = 0;
 702         long max_used;
 703         ssize_t ret;
 704
 705         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
 706
 707         down_read(&zram->init_lock);
 708         if (init_done(zram)) {
 709                 mem_used = zs_get_total_pages(zram->mem_pool);
 710                 zs_pool_stats(zram->mem_pool, &pool_stats);
 711         }
 712
 713         orig_size = atomic64_read(&zram->stats.pages_stored);
 714         max_used = atomic_long_read(&zram->stats.max_used_pages);
 715
 716         ret = scnprintf(buf, PAGE_SIZE,
 717                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
 718                         orig_size << PAGE_SHIFT,
 719                         (u64)atomic64_read(&zram->stats.compr_data_size),
 720                         mem_used << PAGE_SHIFT,
 721                         zram->limit_pages << PAGE_SHIFT,
 722                         max_used << PAGE_SHIFT,
 723                         (u64)atomic64_read(&zram->stats.same_pages),
 724                         pool_stats.pages_compacted);
 725         up_read(&zram->init_lock);
 726
 727         return ret;
 728 }
 729
 730 static ssize_t debug_stat_show(struct device *dev,
 731                 struct device_attribute *attr, char *buf)
 732 {
 733         int version = 1;
 734         struct zram *zram = dev_to_zram(dev);
 735         ssize_t ret;
 736
 737         down_read(&zram->init_lock);
 738         ret = scnprintf(buf, PAGE_SIZE,
 739                         "version: %d\n%8llu\n",
 740                         version,
 741                         (u64)atomic64_read(&zram->stats.writestall));
 742         up_read(&zram->init_lock);
 743
 744         return ret;
 745 }
 746
 747 static DEVICE_ATTR_RO(io_stat);
 748 static DEVICE_ATTR_RO(mm_stat);
 749 static DEVICE_ATTR_RO(debug_stat);
 750
 751 static void zram_slot_lock(struct zram *zram, u32 index)
 752 {
 753         bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value);
 754 }
 755
 756 static void zram_slot_unlock(struct zram *zram, u32 index)
 757 {
 758         bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value);
 759 }
 760
 761 static void zram_meta_free(struct zram *zram, u64 disksize)
 762 {
 763         size_t num_pages = disksize >> PAGE_SHIFT;
 764         size_t index;
 765
 766         /* Free all pages that are still in this zram device */
 767         for (index = 0; index < num_pages; index++)
 768                 zram_free_page(zram, index);
 769
 770         zs_destroy_pool(zram->mem_pool);
 771         vfree(zram->table);
 772 }
 773
 774 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
 775 {
 776         size_t num_pages;
 777
 778         num_pages = disksize >> PAGE_SHIFT;
 779         zram->table = vzalloc(num_pages * sizeof(*zram->table));
 780         if (!zram->table)
 781                 return false;
 782
 783         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
 784         if (!zram->mem_pool) {
 785                 vfree(zram->table);
 786                 return false;
 787         }
 788
 789         return true;
 790 }
 791
 792 /*
 793  * To protect concurrent access to the same index entry,
 794  * caller should hold this table index entry's bit_spinlock to
 795  * indicate this index entry is accessing.
 796  */
 797 static void zram_free_page(struct zram *zram, size_t index)
 798 {
 799         unsigned long handle;
 800
 801         if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
 802                 zram_wb_clear(zram, index);
 803                 atomic64_dec(&zram->stats.pages_stored);
 804                 return;
 805         }
 806
 807         /*
 808          * No memory is allocated for same element filled pages.
 809          * Simply clear same page flag.
 810          */
 811         if (zram_test_flag(zram, index, ZRAM_SAME)) {
 812                 zram_clear_flag(zram, index, ZRAM_SAME);
 813                 zram_set_element(zram, index, 0);
 814                 atomic64_dec(&zram->stats.same_pages);
 815                 atomic64_dec(&zram->stats.pages_stored);
 816                 return;
 817         }
 818
 819         handle = zram_get_handle(zram, index);
 820         if (!handle)
 821                 return;
 822
 823         zs_free(zram->mem_pool, handle);
 824
 825         atomic64_sub(zram_get_obj_size(zram, index),
 826                         &zram->stats.compr_data_size);
 827         atomic64_dec(&zram->stats.pages_stored);
 828
 829         zram_set_handle(zram, index, 0);
 830         zram_set_obj_size(zram, index, 0);
 831 }
 832
 833 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 834                                 struct bio *bio, bool partial_io)
 835 {
 836         int ret;
 837         unsigned long handle;
 838         unsigned int size;
 839         void *src, *dst;
 840
 841         if (zram_wb_enabled(zram)) {
 842                 zram_slot_lock(zram, index);
 843                 if (zram_test_flag(zram, index, ZRAM_WB)) {
 844                         struct bio_vec bvec;
 845
 846                         zram_slot_unlock(zram, index);
 847
 848                         bvec.bv_page = page;
 849                         bvec.bv_len = PAGE_SIZE;
 850                         bvec.bv_offset = 0;
 851                         return read_from_bdev(zram, &bvec,
 852                                         zram_get_element(zram, index),
 853                                         bio, partial_io);
 854                 }
 855                 zram_slot_unlock(zram, index);
 856         }
 857
 858         zram_slot_lock(zram, index);
 859         handle = zram_get_handle(zram, index);
 860         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
 861                 unsigned long value;
 862                 void *mem;
 863
 864                 value = handle ? zram_get_element(zram, index) : 0;
 865                 mem = kmap_atomic(page);
 866                 zram_fill_page(mem, PAGE_SIZE, value);
 867                 kunmap_atomic(mem);
 868                 zram_slot_unlock(zram, index);
 869                 return 0;
 870         }
 871
 872         size = zram_get_obj_size(zram, index);
 873
 874         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
 875         if (size == PAGE_SIZE) {
 876                 dst = kmap_atomic(page);
 877                 memcpy(dst, src, PAGE_SIZE);
 878                 kunmap_atomic(dst);
 879                 ret = 0;
 880         } else {
 881                 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
 882
 883                 dst = kmap_atomic(page);
 884                 ret = zcomp_decompress(zstrm, src, size, dst);
 885                 kunmap_atomic(dst);
 886                 zcomp_stream_put(zram->comp);
 887         }
 888         zs_unmap_object(zram->mem_pool, handle);
 889         zram_slot_unlock(zram, index);
 890
 891         /* Should NEVER happen. Return bio error if it does. */
 892         if (unlikely(ret))
 893                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
 894
 895         return ret;
 896 }
 897
 898 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
 899                                 u32 index, int offset, struct bio *bio)
 900 {
 901         int ret;
 902         struct page *page;
 903
 904         page = bvec->bv_page;
 905         if (is_partial_io(bvec)) {
 906                 /* Use a temporary buffer to decompress the page */
 907                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
 908                 if (!page)
 909                         return -ENOMEM;
 910         }
 911
 912         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
 913         if (unlikely(ret))
 914                 goto out;
 915
 916         if (is_partial_io(bvec)) {
 917                 void *dst = kmap_atomic(bvec->bv_page);
 918                 void *src = kmap_atomic(page);
 919
 920                 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
 921                 kunmap_atomic(src);
 922                 kunmap_atomic(dst);
 923         }
 924 out:
 925         if (is_partial_io(bvec))
 926                 __free_page(page);
 927
 928         return ret;
 929 }
 930
 931 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 932                                 u32 index, struct bio *bio)
 933 {
 934         int ret = 0;
 935         unsigned long alloced_pages;
 936         unsigned long handle = 0;
 937         unsigned int comp_len = 0;
 938         void *src, *dst, *mem;
 939         struct zcomp_strm *zstrm;
 940         struct page *page = bvec->bv_page;
 941         unsigned long element = 0;
 942         enum zram_pageflags flags = 0;
 943         bool allow_wb = true;
 944
 945         mem = kmap_atomic(page);
 946         if (page_same_filled(mem, &element)) {
 947                 kunmap_atomic(mem);
 948                 /* Free memory associated with this sector now. */
 949                 flags = ZRAM_SAME;
 950                 atomic64_inc(&zram->stats.same_pages);
 951                 goto out;
 952         }
 953         kunmap_atomic(mem);
 954
 955 compress_again:
 956         zstrm = zcomp_stream_get(zram->comp);
 957         src = kmap_atomic(page);
 958         ret = zcomp_compress(zstrm, src, &comp_len);
 959         kunmap_atomic(src);
 960
 961         if (unlikely(ret)) {
 962                 zcomp_stream_put(zram->comp);
 963                 pr_err("Compression failed! err=%d\n", ret);
 964                 zs_free(zram->mem_pool, handle);
 965                 return ret;
 966         }
 967
 968         if (unlikely(comp_len > max_zpage_size)) {
 969                 if (zram_wb_enabled(zram) && allow_wb) {
 970                         zcomp_stream_put(zram->comp);
 971                         ret = write_to_bdev(zram, bvec, index, bio, &element);
 972                         if (!ret) {
 973                                 flags = ZRAM_WB;
 974                                 ret = 1;
 975                                 goto out;
 976                         }
 977                         allow_wb = false;
 978                         goto compress_again;
 979                 }
 980                 comp_len = PAGE_SIZE;
 981         }
 982
 983         /*
 984          * handle allocation has 2 paths:
 985          * a) fast path is executed with preemption disabled (for
 986          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
 987          *  since we can't sleep;
 988          * b) slow path enables preemption and attempts to allocate
 989          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
 990          *  put per-cpu compression stream and, thus, to re-do
 991          *  the compression once handle is allocated.
 992          *
 993          * if we have a 'non-null' handle here then we are coming
 994          * from the slow path and handle has already been allocated.
 995          */
 996         if (!handle)
 997                 handle = zs_malloc(zram->mem_pool, comp_len,
 998                                 __GFP_KSWAPD_RECLAIM |
 999                                 __GFP_NOWARN |
1000                                 __GFP_HIGHMEM |
1001                                 __GFP_MOVABLE);
1002         if (!handle) {
1003                 zcomp_stream_put(zram->comp);
1004                 atomic64_inc(&zram->stats.writestall);
1005                 handle = zs_malloc(zram->mem_pool, comp_len,
1006                                 GFP_NOIO | __GFP_HIGHMEM |
1007                                 __GFP_MOVABLE);
1008                 if (handle)
1009                         goto compress_again;
1010                 return -ENOMEM;
1011         }
1012
1013         alloced_pages = zs_get_total_pages(zram->mem_pool);
1014         update_used_max(zram, alloced_pages);
1015
1016         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1017                 zcomp_stream_put(zram->comp);
1018                 zs_free(zram->mem_pool, handle);
1019                 return -ENOMEM;
1020         }
1021
1022         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1023
1024         src = zstrm->buffer;
1025         if (comp_len == PAGE_SIZE)
1026                 src = kmap_atomic(page);
1027         memcpy(dst, src, comp_len);
1028         if (comp_len == PAGE_SIZE)
1029                 kunmap_atomic(src);
1030
1031         zcomp_stream_put(zram->comp);
1032         zs_unmap_object(zram->mem_pool, handle);
1033         atomic64_add(comp_len, &zram->stats.compr_data_size);
1034 out:
1035         /*
1036          * Free memory associated with this sector
1037          * before overwriting unused sectors.
1038          */
1039         zram_slot_lock(zram, index);
1040         zram_free_page(zram, index);
1041
1042         if (flags) {
1043                 zram_set_flag(zram, index, flags);
1044                 zram_set_element(zram, index, element);
1045         }  else {
1046                 zram_set_handle(zram, index, handle);
1047                 zram_set_obj_size(zram, index, comp_len);
1048         }
1049         zram_slot_unlock(zram, index);
1050
1051         /* Update stats */
1052         atomic64_inc(&zram->stats.pages_stored);
1053         return ret;
1054 }
1055
1056 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1057                                 u32 index, int offset, struct bio *bio)
1058 {
1059         int ret;
1060         struct page *page = NULL;
1061         void *src;
1062         struct bio_vec vec;
1063
1064         vec = *bvec;
1065         if (is_partial_io(bvec)) {
1066                 void *dst;
1067                 /*
1068                  * This is a partial IO. We need to read the full page
1069                  * before to write the changes.
1070                  */
1071                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1072                 if (!page)
1073                         return -ENOMEM;
1074
1075                 ret = __zram_bvec_read(zram, page, index, bio, true);
1076                 if (ret)
1077                         goto out;
1078
1079                 src = kmap_atomic(bvec->bv_page);
1080                 dst = kmap_atomic(page);
1081                 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1082                 kunmap_atomic(dst);
1083                 kunmap_atomic(src);
1084
1085                 vec.bv_page = page;
1086                 vec.bv_len = PAGE_SIZE;
1087                 vec.bv_offset = 0;
1088         }
1089
1090         ret = __zram_bvec_write(zram, &vec, index, bio);
1091 out:
1092         if (is_partial_io(bvec))
1093                 __free_page(page);
1094         return ret;
1095 }
1096
1097 /*
1098  * zram_bio_discard - handler on discard request
1099  * @index: physical block index in PAGE_SIZE units
1100  * @offset: byte offset within physical block
1101  */
1102 static void zram_bio_discard(struct zram *zram, u32 index,
1103                              int offset, struct bio *bio)
1104 {
1105         size_t n = bio->bi_iter.bi_size;
1106
1107         /*
1108          * zram manages data in physical block size units. Because logical block
1109          * size isn't identical with physical block size on some arch, we
1110          * could get a discard request pointing to a specific offset within a
1111          * certain physical block.  Although we can handle this request by
1112          * reading that physiclal block and decompressing and partially zeroing
1113          * and re-compressing and then re-storing it, this isn't reasonable
1114          * because our intent with a discard request is to save memory.  So
1115          * skipping this logical block is appropriate here.
1116          */
1117         if (offset) {
1118                 if (n <= (PAGE_SIZE - offset))
1119                         return;
1120
1121                 n -= (PAGE_SIZE - offset);
1122                 index++;
1123         }
1124
1125         while (n >= PAGE_SIZE) {
1126                 zram_slot_lock(zram, index);
1127                 zram_free_page(zram, index);
1128                 zram_slot_unlock(zram, index);
1129                 atomic64_inc(&zram->stats.notify_free);
1130                 index++;
1131                 n -= PAGE_SIZE;
1132         }
1133 }
1134
1135 /*
1136  * Returns errno if it has some problem. Otherwise return 0 or 1.
1137  * Returns 0 if IO request was done synchronously
1138  * Returns 1 if IO request was successfully submitted.
1139  */
1140 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1141                         int offset, bool is_write, struct bio *bio)
1142 {
1143         unsigned long start_time = jiffies;
1144         int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
1145         struct request_queue *q = zram->disk->queue;
1146         int ret;
1147
1148         generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
1149                         &zram->disk->part0);
1150
1151         if (!is_write) {
1152                 atomic64_inc(&zram->stats.num_reads);
1153                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1154                 flush_dcache_page(bvec->bv_page);
1155         } else {
1156                 atomic64_inc(&zram->stats.num_writes);
1157                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1158         }
1159
1160         generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
1161
1162         if (unlikely(ret < 0)) {
1163                 if (!is_write)
1164                         atomic64_inc(&zram->stats.failed_reads);
1165                 else
1166                         atomic64_inc(&zram->stats.failed_writes);
1167         }
1168
1169         return ret;
1170 }
1171
1172 static void __zram_make_request(struct zram *zram, struct bio *bio)
1173 {
1174         int offset;
1175         u32 index;
1176         struct bio_vec bvec;
1177         struct bvec_iter iter;
1178
1179         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1180         offset = (bio->bi_iter.bi_sector &
1181                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1182
1183         switch (bio_op(bio)) {
1184         case REQ_OP_DISCARD:
1185         case REQ_OP_WRITE_ZEROES:
1186                 zram_bio_discard(zram, index, offset, bio);
1187                 bio_endio(bio);
1188                 return;
1189         default:
1190                 break;
1191         }
1192
1193         bio_for_each_segment(bvec, bio, iter) {
1194                 struct bio_vec bv = bvec;
1195                 unsigned int unwritten = bvec.bv_len;
1196
1197                 do {
1198                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1199                                                         unwritten);
1200                         if (zram_bvec_rw(zram, &bv, index, offset,
1201                                         op_is_write(bio_op(bio)), bio) < 0)
1202                                 goto out;
1203
1204                         bv.bv_offset += bv.bv_len;
1205                         unwritten -= bv.bv_len;
1206
1207                         update_position(&index, &offset, &bv);
1208                 } while (unwritten);
1209         }
1210
1211         bio_endio(bio);
1212         return;
1213
1214 out:
1215         bio_io_error(bio);
1216 }
1217
1218 /*
1219  * Handler function for all zram I/O requests.
1220  */
1221 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1222 {
1223         struct zram *zram = queue->queuedata;
1224
1225         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1226                                         bio->bi_iter.bi_size)) {
1227                 atomic64_inc(&zram->stats.invalid_io);
1228                 goto error;
1229         }
1230
1231         __zram_make_request(zram, bio);
1232         return BLK_QC_T_NONE;
1233
1234 error:
1235         bio_io_error(bio);
1236         return BLK_QC_T_NONE;
1237 }
1238
1239 static void zram_slot_free_notify(struct block_device *bdev,
1240                                 unsigned long index)
1241 {
1242         struct zram *zram;
1243
1244         zram = bdev->bd_disk->private_data;
1245
1246         zram_slot_lock(zram, index);
1247         zram_free_page(zram, index);
1248         zram_slot_unlock(zram, index);
1249         atomic64_inc(&zram->stats.notify_free);
1250 }
1251
1252 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1253                        struct page *page, bool is_write)
1254 {
1255         int offset, ret;
1256         u32 index;
1257         struct zram *zram;
1258         struct bio_vec bv;
1259
1260         if (PageTransHuge(page))
1261                 return -ENOTSUPP;
1262         zram = bdev->bd_disk->private_data;
1263
1264         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1265                 atomic64_inc(&zram->stats.invalid_io);
1266                 ret = -EINVAL;
1267                 goto out;
1268         }
1269
1270         index = sector >> SECTORS_PER_PAGE_SHIFT;
1271         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1272
1273         bv.bv_page = page;
1274         bv.bv_len = PAGE_SIZE;
1275         bv.bv_offset = 0;
1276
1277         ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
1278 out:
1279         /*
1280          * If I/O fails, just return error(ie, non-zero) without
1281          * calling page_endio.
1282          * It causes resubmit the I/O with bio request by upper functions
1283          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1284          * bio->bi_end_io does things to handle the error
1285          * (e.g., SetPageError, set_page_dirty and extra works).
1286          */
1287         if (unlikely(ret < 0))
1288                 return ret;
1289
1290         switch (ret) {
1291         case 0:
1292                 page_endio(page, is_write, 0);
1293                 break;
1294         case 1:
1295                 ret = 0;
1296                 break;
1297         default:
1298                 WARN_ON(1);
1299         }
1300         return ret;
1301 }
1302
1303 static void zram_reset_device(struct zram *zram)
1304 {
1305         struct zcomp *comp;
1306         u64 disksize;
1307
1308         down_write(&zram->init_lock);
1309
1310         zram->limit_pages = 0;
1311
1312         if (!init_done(zram)) {
1313                 up_write(&zram->init_lock);
1314                 return;
1315         }
1316
1317         comp = zram->comp;
1318         disksize = zram->disksize;
1319         zram->disksize = 0;
1320
1321         set_capacity(zram->disk, 0);
1322         part_stat_set_all(&zram->disk->part0, 0);
1323
1324         up_write(&zram->init_lock);
1325         /* I/O operation under all of CPU are done so let's free */
1326         zram_meta_free(zram, disksize);
1327         memset(&zram->stats, 0, sizeof(zram->stats));
1328         zcomp_destroy(comp);
1329         reset_bdev(zram);
1330 }
1331
1332 static ssize_t disksize_store(struct device *dev,
1333                 struct device_attribute *attr, const char *buf, size_t len)
1334 {
1335         u64 disksize;
1336         struct zcomp *comp;
1337         struct zram *zram = dev_to_zram(dev);
1338         int err;
1339
1340         disksize = memparse(buf, NULL);
1341         if (!disksize)
1342                 return -EINVAL;
1343
1344         down_write(&zram->init_lock);
1345         if (init_done(zram)) {
1346                 pr_info("Cannot change disksize for initialized device\n");
1347                 err = -EBUSY;
1348                 goto out_unlock;
1349         }
1350
1351         disksize = PAGE_ALIGN(disksize);
1352         if (!zram_meta_alloc(zram, disksize)) {
1353                 err = -ENOMEM;
1354                 goto out_unlock;
1355         }
1356
1357         comp = zcomp_create(zram->compressor);
1358         if (IS_ERR(comp)) {
1359                 pr_err("Cannot initialise %s compressing backend\n",
1360                                 zram->compressor);
1361                 err = PTR_ERR(comp);
1362                 goto out_free_meta;
1363         }
1364
1365         zram->comp = comp;
1366         zram->disksize = disksize;
1367         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1368
1369         revalidate_disk(zram->disk);
1370         up_write(&zram->init_lock);
1371
1372         return len;
1373
1374 out_free_meta:
1375         zram_meta_free(zram, disksize);
1376 out_unlock:
1377         up_write(&zram->init_lock);
1378         return err;
1379 }
1380
1381 static ssize_t reset_store(struct device *dev,
1382                 struct device_attribute *attr, const char *buf, size_t len)
1383 {
1384         int ret;
1385         unsigned short do_reset;
1386         struct zram *zram;
1387         struct block_device *bdev;
1388
1389         ret = kstrtou16(buf, 10, &do_reset);
1390         if (ret)
1391                 return ret;
1392
1393         if (!do_reset)
1394                 return -EINVAL;
1395
1396         zram = dev_to_zram(dev);
1397         bdev = bdget_disk(zram->disk, 0);
1398         if (!bdev)
1399                 return -ENOMEM;
1400
1401         mutex_lock(&bdev->bd_mutex);
1402         /* Do not reset an active device or claimed device */
1403         if (bdev->bd_openers || zram->claim) {
1404                 mutex_unlock(&bdev->bd_mutex);
1405                 bdput(bdev);
1406                 return -EBUSY;
1407         }
1408
1409         /* From now on, anyone can't open /dev/zram[0-9] */
1410         zram->claim = true;
1411         mutex_unlock(&bdev->bd_mutex);
1412
1413         /* Make sure all the pending I/O are finished */
1414         fsync_bdev(bdev);
1415         zram_reset_device(zram);
1416         revalidate_disk(zram->disk);
1417         bdput(bdev);
1418
1419         mutex_lock(&bdev->bd_mutex);
1420         zram->claim = false;
1421         mutex_unlock(&bdev->bd_mutex);
1422
1423         return len;
1424 }
1425
1426 static int zram_open(struct block_device *bdev, fmode_t mode)
1427 {
1428         int ret = 0;
1429         struct zram *zram;
1430
1431         WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1432
1433         zram = bdev->bd_disk->private_data;
1434         /* zram was claimed to reset so open request fails */
1435         if (zram->claim)
1436                 ret = -EBUSY;
1437
1438         return ret;
1439 }
1440
1441 static const struct block_device_operations zram_devops = {
1442         .open = zram_open,
1443         .swap_slot_free_notify = zram_slot_free_notify,
1444         .rw_page = zram_rw_page,
1445         .owner = THIS_MODULE
1446 };
1447
1448 static DEVICE_ATTR_WO(compact);
1449 static DEVICE_ATTR_RW(disksize);
1450 static DEVICE_ATTR_RO(initstate);
1451 static DEVICE_ATTR_WO(reset);
1452 static DEVICE_ATTR_WO(mem_limit);
1453 static DEVICE_ATTR_WO(mem_used_max);
1454 static DEVICE_ATTR_RW(max_comp_streams);
1455 static DEVICE_ATTR_RW(comp_algorithm);
1456 #ifdef CONFIG_ZRAM_WRITEBACK
1457 static DEVICE_ATTR_RW(backing_dev);
1458 #endif
1459
1460 static struct attribute *zram_disk_attrs[] = {
1461         &dev_attr_disksize.attr,
1462         &dev_attr_initstate.attr,
1463         &dev_attr_reset.attr,
1464         &dev_attr_compact.attr,
1465         &dev_attr_mem_limit.attr,
1466         &dev_attr_mem_used_max.attr,
1467         &dev_attr_max_comp_streams.attr,
1468         &dev_attr_comp_algorithm.attr,
1469 #ifdef CONFIG_ZRAM_WRITEBACK
1470         &dev_attr_backing_dev.attr,
1471 #endif
1472         &dev_attr_io_stat.attr,
1473         &dev_attr_mm_stat.attr,
1474         &dev_attr_debug_stat.attr,
1475         NULL,
1476 };
1477
1478 static const struct attribute_group zram_disk_attr_group = {
1479         .attrs = zram_disk_attrs,
1480 };
1481
1482 /*
1483  * Allocate and initialize new zram device. the function returns
1484  * '>= 0' device_id upon success, and negative value otherwise.
1485  */
1486 static int zram_add(void)
1487 {
1488         struct zram *zram;
1489         struct request_queue *queue;
1490         int ret, device_id;
1491
1492         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1493         if (!zram)
1494                 return -ENOMEM;
1495
1496         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1497         if (ret < 0)
1498                 goto out_free_dev;
1499         device_id = ret;
1500
1501         init_rwsem(&zram->init_lock);
1502
1503         queue = blk_alloc_queue(GFP_KERNEL);
1504         if (!queue) {
1505                 pr_err("Error allocating disk queue for device %d\n",
1506                         device_id);
1507                 ret = -ENOMEM;
1508                 goto out_free_idr;
1509         }
1510
1511         blk_queue_make_request(queue, zram_make_request);
1512
1513         /* gendisk structure */
1514         zram->disk = alloc_disk(1);
1515         if (!zram->disk) {
1516                 pr_err("Error allocating disk structure for device %d\n",
1517                         device_id);
1518                 ret = -ENOMEM;
1519                 goto out_free_queue;
1520         }
1521
1522         zram->disk->major = zram_major;
1523         zram->disk->first_minor = device_id;
1524         zram->disk->fops = &zram_devops;
1525         zram->disk->queue = queue;
1526         zram->disk->queue->queuedata = zram;
1527         zram->disk->private_data = zram;
1528         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1529
1530         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1531         set_capacity(zram->disk, 0);
1532         /* zram devices sort of resembles non-rotational disks */
1533         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1534         queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1535
1536         /*
1537          * To ensure that we always get PAGE_SIZE aligned
1538          * and n*PAGE_SIZED sized I/O requests.
1539          */
1540         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1541         blk_queue_logical_block_size(zram->disk->queue,
1542                                         ZRAM_LOGICAL_BLOCK_SIZE);
1543         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1544         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1545         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1546         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1547         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1548
1549         /*
1550          * zram_bio_discard() will clear all logical blocks if logical block
1551          * size is identical with physical block size(PAGE_SIZE). But if it is
1552          * different, we will skip discarding some parts of logical blocks in
1553          * the part of the request range which isn't aligned to physical block
1554          * size.  So we can't ensure that all discarded logical blocks are
1555          * zeroed.
1556          */
1557         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1558                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1559
1560         zram->disk->queue->backing_dev_info->capabilities |=
1561                         (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1562         add_disk(zram->disk);
1563
1564         ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1565                                 &zram_disk_attr_group);
1566         if (ret < 0) {
1567                 pr_err("Error creating sysfs group for device %d\n",
1568                                 device_id);
1569                 goto out_free_disk;
1570         }
1571         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1572
1573         pr_info("Added device: %s\n", zram->disk->disk_name);
1574         return device_id;
1575
1576 out_free_disk:
1577         del_gendisk(zram->disk);
1578         put_disk(zram->disk);
1579 out_free_queue:
1580         blk_cleanup_queue(queue);
1581 out_free_idr:
1582         idr_remove(&zram_index_idr, device_id);
1583 out_free_dev:
1584         kfree(zram);
1585         return ret;
1586 }
1587
1588 static int zram_remove(struct zram *zram)
1589 {
1590         struct block_device *bdev;
1591
1592         bdev = bdget_disk(zram->disk, 0);
1593         if (!bdev)
1594                 return -ENOMEM;
1595
1596         mutex_lock(&bdev->bd_mutex);
1597         if (bdev->bd_openers || zram->claim) {
1598                 mutex_unlock(&bdev->bd_mutex);
1599                 bdput(bdev);
1600                 return -EBUSY;
1601         }
1602
1603         zram->claim = true;
1604         mutex_unlock(&bdev->bd_mutex);
1605
1606         /*
1607          * Remove sysfs first, so no one will perform a disksize
1608          * store while we destroy the devices. This also helps during
1609          * hot_remove -- zram_reset_device() is the last holder of
1610          * ->init_lock, no later/concurrent disksize_store() or any
1611          * other sysfs handlers are possible.
1612          */
1613         sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1614                         &zram_disk_attr_group);
1615
1616         /* Make sure all the pending I/O are finished */
1617         fsync_bdev(bdev);
1618         zram_reset_device(zram);
1619         bdput(bdev);
1620
1621         pr_info("Removed device: %s\n", zram->disk->disk_name);
1622
1623         blk_cleanup_queue(zram->disk->queue);
1624         del_gendisk(zram->disk);
1625         put_disk(zram->disk);
1626         kfree(zram);
1627         return 0;
1628 }
1629
1630 /* zram-control sysfs attributes */
1631
1632 /*
1633  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1634  * sense that reading from this file does alter the state of your system -- it
1635  * creates a new un-initialized zram device and returns back this device's
1636  * device_id (or an error code if it fails to create a new device).
1637  */
1638 static ssize_t hot_add_show(struct class *class,
1639                         struct class_attribute *attr,
1640                         char *buf)
1641 {
1642         int ret;
1643
1644         mutex_lock(&zram_index_mutex);
1645         ret = zram_add();
1646         mutex_unlock(&zram_index_mutex);
1647
1648         if (ret < 0)
1649                 return ret;
1650         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1651 }
1652 static CLASS_ATTR_RO(hot_add);
1653
1654 static ssize_t hot_remove_store(struct class *class,
1655                         struct class_attribute *attr,
1656                         const char *buf,
1657                         size_t count)
1658 {
1659         struct zram *zram;
1660         int ret, dev_id;
1661
1662         /* dev_id is gendisk->first_minor, which is `int' */
1663         ret = kstrtoint(buf, 10, &dev_id);
1664         if (ret)
1665                 return ret;
1666         if (dev_id < 0)
1667                 return -EINVAL;
1668
1669         mutex_lock(&zram_index_mutex);
1670
1671         zram = idr_find(&zram_index_idr, dev_id);
1672         if (zram) {
1673                 ret = zram_remove(zram);
1674                 if (!ret)
1675                         idr_remove(&zram_index_idr, dev_id);
1676         } else {
1677                 ret = -ENODEV;
1678         }
1679
1680         mutex_unlock(&zram_index_mutex);
1681         return ret ? ret : count;
1682 }
1683 static CLASS_ATTR_WO(hot_remove);
1684
1685 static struct attribute *zram_control_class_attrs[] = {
1686         &class_attr_hot_add.attr,
1687         &class_attr_hot_remove.attr,
1688         NULL,
1689 };
1690 ATTRIBUTE_GROUPS(zram_control_class);
1691
1692 static struct class zram_control_class = {
1693         .name           = "zram-control",
1694         .owner          = THIS_MODULE,
1695         .class_groups   = zram_control_class_groups,
1696 };
1697
1698 static int zram_remove_cb(int id, void *ptr, void *data)
1699 {
1700         zram_remove(ptr);
1701         return 0;
1702 }
1703
1704 static void destroy_devices(void)
1705 {
1706         class_unregister(&zram_control_class);
1707         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1708         idr_destroy(&zram_index_idr);
1709         unregister_blkdev(zram_major, "zram");
1710         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1711 }
1712
1713 static int __init zram_init(void)
1714 {
1715         int ret;
1716
1717         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
1718                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
1719         if (ret < 0)
1720                 return ret;
1721
1722         ret = class_register(&zram_control_class);
1723         if (ret) {
1724                 pr_err("Unable to register zram-control class\n");
1725                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1726                 return ret;
1727         }
1728
1729         zram_major = register_blkdev(0, "zram");
1730         if (zram_major <= 0) {
1731                 pr_err("Unable to get major number\n");
1732                 class_unregister(&zram_control_class);
1733                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1734                 return -EBUSY;
1735         }
1736
1737         while (num_devices != 0) {
1738                 mutex_lock(&zram_index_mutex);
1739                 ret = zram_add();
1740                 mutex_unlock(&zram_index_mutex);
1741                 if (ret < 0)
1742                         goto out_error;
1743                 num_devices--;
1744         }
1745
1746         return 0;
1747
1748 out_error:
1749         destroy_devices();
1750         return ret;
1751 }
1752
1753 static void __exit zram_exit(void)
1754 {
1755         destroy_devices();
1756 }
1757
1758 module_init(zram_init);
1759 module_exit(zram_exit);
1760
1761 module_param(num_devices, uint, 0);
1762 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1763
1764 MODULE_LICENSE("Dual BSD/GPL");
1765 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1766 MODULE_DESCRIPTION("Compressed RAM Block Device");