fs/btrfs/scrub.c

   1 /*
   2  * Copyright (C) 2011 STRATO.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/sched.h>
  20 #include <linux/pagemap.h>
  21 #include <linux/writeback.h>
  22 #include <linux/blkdev.h>
  23 #include <linux/rbtree.h>
  24 #include <linux/slab.h>
  25 #include <linux/workqueue.h>
  26 #include "ctree.h"
  27 #include "volumes.h"
  28 #include "disk-io.h"
  29 #include "ordered-data.h"
  30
  31 /*
  32  * This is only the first step towards a full-features scrub. It reads all
  33  * extent and super block and verifies the checksums. In case a bad checksum
  34  * is found or the extent cannot be read, good data will be written back if
  35  * any can be found.
  36  *
  37  * Future enhancements:
  38  *  - To enhance the performance, better read-ahead strategies for the
  39  *    extent-tree can be employed.
  40  *  - In case an unrepairable extent is encountered, track which files are
  41  *    affected and report them
  42  *  - In case of a read error on files with nodatasum, map the file and read
  43  *    the extent to trigger a writeback of the good copy
  44  *  - track and record media errors, throw out bad devices
  45  *  - add a mode to also read unallocated space
  46  *  - make the prefetch cancellable
  47  */
  48
  49 struct scrub_bio;
  50 struct scrub_page;
  51 struct scrub_dev;
  52 static void scrub_bio_end_io(struct bio *bio, int err);
  53 static void scrub_checksum(struct btrfs_work *work);
  54 static int scrub_checksum_data(struct scrub_dev *sdev,
  55                                struct scrub_page *spag, void *buffer);
  56 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  57                                      struct scrub_page *spag, u64 logical,
  58                                      void *buffer);
  59 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
  60 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
  61 static void scrub_fixup_end_io(struct bio *bio, int err);
  62 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  63                           struct page *page);
  64 static void scrub_fixup(struct scrub_bio *sbio, int ix);
  65
  66 #define SCRUB_PAGES_PER_BIO     16      /* 64k per bio */
  67 #define SCRUB_BIOS_PER_DEV      16      /* 1 MB per device in flight */
  68
  69 struct scrub_page {
  70         u64                     flags;  /* extent flags */
  71         u64                     generation;
  72         u64                     mirror_num;
  73         int                     have_csum;
  74         u8                      csum[BTRFS_CSUM_SIZE];
  75 };
  76
  77 struct scrub_bio {
  78         int                     index;
  79         struct scrub_dev        *sdev;
  80         struct bio              *bio;
  81         int                     err;
  82         u64                     logical;
  83         u64                     physical;
  84         struct scrub_page       spag[SCRUB_PAGES_PER_BIO];
  85         u64                     count;
  86         int                     next_free;
  87         struct btrfs_work       work;
  88 };
  89
  90 struct scrub_dev {
  91         struct scrub_bio        *bios[SCRUB_BIOS_PER_DEV];
  92         struct btrfs_device     *dev;
  93         int                     first_free;
  94         int                     curr;
  95         atomic_t                in_flight;
  96         spinlock_t              list_lock;
  97         wait_queue_head_t       list_wait;
  98         u16                     csum_size;
  99         struct list_head        csum_list;
 100         atomic_t                cancel_req;
 101         int                     readonly;
 102         /*
 103          * statistics
 104          */
 105         struct btrfs_scrub_progress stat;
 106         spinlock_t              stat_lock;
 107 };
 108
 109 static void scrub_free_csums(struct scrub_dev *sdev)
 110 {
 111         while (!list_empty(&sdev->csum_list)) {
 112                 struct btrfs_ordered_sum *sum;
 113                 sum = list_first_entry(&sdev->csum_list,
 114                                        struct btrfs_ordered_sum, list);
 115                 list_del(&sum->list);
 116                 kfree(sum);
 117         }
 118 }
 119
 120 static void scrub_free_bio(struct bio *bio)
 121 {
 122         int i;
 123         struct page *last_page = NULL;
 124
 125         if (!bio)
 126                 return;
 127
 128         for (i = 0; i < bio->bi_vcnt; ++i) {
 129                 if (bio->bi_io_vec[i].bv_page == last_page)
 130                         continue;
 131                 last_page = bio->bi_io_vec[i].bv_page;
 132                 __free_page(last_page);
 133         }
 134         bio_put(bio);
 135 }
 136
 137 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
 138 {
 139         int i;
 140
 141         if (!sdev)
 142                 return;
 143
 144         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 145                 struct scrub_bio *sbio = sdev->bios[i];
 146
 147                 if (!sbio)
 148                         break;
 149
 150                 scrub_free_bio(sbio->bio);
 151                 kfree(sbio);
 152         }
 153
 154         scrub_free_csums(sdev);
 155         kfree(sdev);
 156 }
 157
 158 static noinline_for_stack
 159 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
 160 {
 161         struct scrub_dev *sdev;
 162         int             i;
 163         struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
 164
 165         sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
 166         if (!sdev)
 167                 goto nomem;
 168         sdev->dev = dev;
 169         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 170                 struct scrub_bio *sbio;
 171
 172                 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
 173                 if (!sbio)
 174                         goto nomem;
 175                 sdev->bios[i] = sbio;
 176
 177                 sbio->index = i;
 178                 sbio->sdev = sdev;
 179                 sbio->count = 0;
 180                 sbio->work.func = scrub_checksum;
 181
 182                 if (i != SCRUB_BIOS_PER_DEV-1)
 183                         sdev->bios[i]->next_free = i + 1;
 184                  else
 185                         sdev->bios[i]->next_free = -1;
 186         }
 187         sdev->first_free = 0;
 188         sdev->curr = -1;
 189         atomic_set(&sdev->in_flight, 0);
 190         atomic_set(&sdev->cancel_req, 0);
 191         sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
 192         INIT_LIST_HEAD(&sdev->csum_list);
 193
 194         spin_lock_init(&sdev->list_lock);
 195         spin_lock_init(&sdev->stat_lock);
 196         init_waitqueue_head(&sdev->list_wait);
 197         return sdev;
 198
 199 nomem:
 200         scrub_free_dev(sdev);
 201         return ERR_PTR(-ENOMEM);
 202 }
 203
 204 /*
 205  * scrub_recheck_error gets called when either verification of the page
 206  * failed or the bio failed to read, e.g. with EIO. In the latter case,
 207  * recheck_error gets called for every page in the bio, even though only
 208  * one may be bad
 209  */
 210 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
 211 {
 212         if (sbio->err) {
 213                 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
 214                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 215                                    sbio->bio->bi_io_vec[ix].bv_page) == 0) {
 216                         if (scrub_fixup_check(sbio, ix) == 0)
 217                                 return;
 218                 }
 219         }
 220
 221         scrub_fixup(sbio, ix);
 222 }
 223
 224 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 225 {
 226         int ret = 1;
 227         struct page *page;
 228         void *buffer;
 229         u64 flags = sbio->spag[ix].flags;
 230
 231         page = sbio->bio->bi_io_vec[ix].bv_page;
 232         buffer = kmap_atomic(page, KM_USER0);
 233         if (flags & BTRFS_EXTENT_FLAG_DATA) {
 234                 ret = scrub_checksum_data(sbio->sdev,
 235                                           sbio->spag + ix, buffer);
 236         } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 237                 ret = scrub_checksum_tree_block(sbio->sdev,
 238                                                 sbio->spag + ix,
 239                                                 sbio->logical + ix * PAGE_SIZE,
 240                                                 buffer);
 241         } else {
 242                 WARN_ON(1);
 243         }
 244         kunmap_atomic(buffer, KM_USER0);
 245
 246         return ret;
 247 }
 248
 249 static void scrub_fixup_end_io(struct bio *bio, int err)
 250 {
 251         complete((struct completion *)bio->bi_private);
 252 }
 253
 254 static void scrub_fixup(struct scrub_bio *sbio, int ix)
 255 {
 256         struct scrub_dev *sdev = sbio->sdev;
 257         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 258         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 259         struct btrfs_multi_bio *multi = NULL;
 260         u64 logical = sbio->logical + ix * PAGE_SIZE;
 261         u64 length;
 262         int i;
 263         int ret;
 264         DECLARE_COMPLETION_ONSTACK(complete);
 265
 266         if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
 267             (sbio->spag[ix].have_csum == 0)) {
 268                 /*
 269                  * nodatasum, don't try to fix anything
 270                  * FIXME: we can do better, open the inode and trigger a
 271                  * writeback
 272                  */
 273                 goto uncorrectable;
 274         }
 275
 276         length = PAGE_SIZE;
 277         ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
 278                               &multi, 0);
 279         if (ret || !multi || length < PAGE_SIZE) {
 280                 printk(KERN_ERR
 281                        "scrub_fixup: btrfs_map_block failed us for %llu\n",
 282                        (unsigned long long)logical);
 283                 WARN_ON(1);
 284                 return;
 285         }
 286
 287         if (multi->num_stripes == 1)
 288                 /* there aren't any replicas */
 289                 goto uncorrectable;
 290
 291         /*
 292          * first find a good copy
 293          */
 294         for (i = 0; i < multi->num_stripes; ++i) {
 295                 if (i == sbio->spag[ix].mirror_num)
 296                         continue;
 297
 298                 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
 299                                    multi->stripes[i].physical >> 9,
 300                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 301                         /* I/O-error, this is not a good copy */
 302                         continue;
 303                 }
 304
 305                 if (scrub_fixup_check(sbio, ix) == 0)
 306                         break;
 307         }
 308         if (i == multi->num_stripes)
 309                 goto uncorrectable;
 310
 311         if (!sdev->readonly) {
 312                 /*
 313                  * bi_io_vec[ix].bv_page now contains good data, write it back
 314                  */
 315                 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
 316                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 317                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 318                         /* I/O-error, writeback failed, give up */
 319                         goto uncorrectable;
 320                 }
 321         }
 322
 323         kfree(multi);
 324         spin_lock(&sdev->stat_lock);
 325         ++sdev->stat.corrected_errors;
 326         spin_unlock(&sdev->stat_lock);
 327
 328         if (printk_ratelimit())
 329                 printk(KERN_ERR "btrfs: fixed up at %llu\n",
 330                        (unsigned long long)logical);
 331         return;
 332
 333 uncorrectable:
 334         kfree(multi);
 335         spin_lock(&sdev->stat_lock);
 336         ++sdev->stat.uncorrectable_errors;
 337         spin_unlock(&sdev->stat_lock);
 338
 339         if (printk_ratelimit())
 340                 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
 341                          (unsigned long long)logical);
 342 }
 343
 344 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
 345                          struct page *page)
 346 {
 347         struct bio *bio = NULL;
 348         int ret;
 349         DECLARE_COMPLETION_ONSTACK(complete);
 350
 351         bio = bio_alloc(GFP_NOFS, 1);
 352         bio->bi_bdev = bdev;
 353         bio->bi_sector = sector;
 354         bio_add_page(bio, page, PAGE_SIZE, 0);
 355         bio->bi_end_io = scrub_fixup_end_io;
 356         bio->bi_private = &complete;
 357         submit_bio(rw, bio);
 358
 359         /* this will also unplug the queue */
 360         wait_for_completion(&complete);
 361
 362         ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
 363         bio_put(bio);
 364         return ret;
 365 }
 366
 367 static void scrub_bio_end_io(struct bio *bio, int err)
 368 {
 369         struct scrub_bio *sbio = bio->bi_private;
 370         struct scrub_dev *sdev = sbio->sdev;
 371         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 372
 373         sbio->err = err;
 374         sbio->bio = bio;
 375
 376         btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
 377 }
 378
 379 static void scrub_checksum(struct btrfs_work *work)
 380 {
 381         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
 382         struct scrub_dev *sdev = sbio->sdev;
 383         struct page *page;
 384         void *buffer;
 385         int i;
 386         u64 flags;
 387         u64 logical;
 388         int ret;
 389
 390         if (sbio->err) {
 391                 for (i = 0; i < sbio->count; ++i)
 392                         scrub_recheck_error(sbio, i);
 393
 394                 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
 395                 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
 396                 sbio->bio->bi_phys_segments = 0;
 397                 sbio->bio->bi_idx = 0;
 398
 399                 for (i = 0; i < sbio->count; i++) {
 400                         struct bio_vec *bi;
 401                         bi = &sbio->bio->bi_io_vec[i];
 402                         bi->bv_offset = 0;
 403                         bi->bv_len = PAGE_SIZE;
 404                 }
 405
 406                 spin_lock(&sdev->stat_lock);
 407                 ++sdev->stat.read_errors;
 408                 spin_unlock(&sdev->stat_lock);
 409                 goto out;
 410         }
 411         for (i = 0; i < sbio->count; ++i) {
 412                 page = sbio->bio->bi_io_vec[i].bv_page;
 413                 buffer = kmap_atomic(page, KM_USER0);
 414                 flags = sbio->spag[i].flags;
 415                 logical = sbio->logical + i * PAGE_SIZE;
 416                 ret = 0;
 417                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 418                         ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
 419                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 420                         ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
 421                                                         logical, buffer);
 422                 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
 423                         BUG_ON(i);
 424                         (void)scrub_checksum_super(sbio, buffer);
 425                 } else {
 426                         WARN_ON(1);
 427                 }
 428                 kunmap_atomic(buffer, KM_USER0);
 429                 if (ret)
 430                         scrub_recheck_error(sbio, i);
 431         }
 432
 433 out:
 434         scrub_free_bio(sbio->bio);
 435         sbio->bio = NULL;
 436         spin_lock(&sdev->list_lock);
 437         sbio->next_free = sdev->first_free;
 438         sdev->first_free = sbio->index;
 439         spin_unlock(&sdev->list_lock);
 440         atomic_dec(&sdev->in_flight);
 441         wake_up(&sdev->list_wait);
 442 }
 443
 444 static int scrub_checksum_data(struct scrub_dev *sdev,
 445                                struct scrub_page *spag, void *buffer)
 446 {
 447         u8 csum[BTRFS_CSUM_SIZE];
 448         u32 crc = ~(u32)0;
 449         int fail = 0;
 450         struct btrfs_root *root = sdev->dev->dev_root;
 451
 452         if (!spag->have_csum)
 453                 return 0;
 454
 455         crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
 456         btrfs_csum_final(crc, csum);
 457         if (memcmp(csum, spag->csum, sdev->csum_size))
 458                 fail = 1;
 459
 460         spin_lock(&sdev->stat_lock);
 461         ++sdev->stat.data_extents_scrubbed;
 462         sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
 463         if (fail)
 464                 ++sdev->stat.csum_errors;
 465         spin_unlock(&sdev->stat_lock);
 466
 467         return fail;
 468 }
 469
 470 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
 471                                      struct scrub_page *spag, u64 logical,
 472                                      void *buffer)
 473 {
 474         struct btrfs_header *h;
 475         struct btrfs_root *root = sdev->dev->dev_root;
 476         struct btrfs_fs_info *fs_info = root->fs_info;
 477         u8 csum[BTRFS_CSUM_SIZE];
 478         u32 crc = ~(u32)0;
 479         int fail = 0;
 480         int crc_fail = 0;
 481
 482         /*
 483          * we don't use the getter functions here, as we
 484          * a) don't have an extent buffer and
 485          * b) the page is already kmapped
 486          */
 487         h = (struct btrfs_header *)buffer;
 488
 489         if (logical != le64_to_cpu(h->bytenr))
 490                 ++fail;
 491
 492         if (spag->generation != le64_to_cpu(h->generation))
 493                 ++fail;
 494
 495         if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 496                 ++fail;
 497
 498         if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
 499                    BTRFS_UUID_SIZE))
 500                 ++fail;
 501
 502         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 503                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 504         btrfs_csum_final(crc, csum);
 505         if (memcmp(csum, h->csum, sdev->csum_size))
 506                 ++crc_fail;
 507
 508         spin_lock(&sdev->stat_lock);
 509         ++sdev->stat.tree_extents_scrubbed;
 510         sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
 511         if (crc_fail)
 512                 ++sdev->stat.csum_errors;
 513         if (fail)
 514                 ++sdev->stat.verify_errors;
 515         spin_unlock(&sdev->stat_lock);
 516
 517         return fail || crc_fail;
 518 }
 519
 520 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 521 {
 522         struct btrfs_super_block *s;
 523         u64 logical;
 524         struct scrub_dev *sdev = sbio->sdev;
 525         struct btrfs_root *root = sdev->dev->dev_root;
 526         struct btrfs_fs_info *fs_info = root->fs_info;
 527         u8 csum[BTRFS_CSUM_SIZE];
 528         u32 crc = ~(u32)0;
 529         int fail = 0;
 530
 531         s = (struct btrfs_super_block *)buffer;
 532         logical = sbio->logical;
 533
 534         if (logical != le64_to_cpu(s->bytenr))
 535                 ++fail;
 536
 537         if (sbio->spag[0].generation != le64_to_cpu(s->generation))
 538                 ++fail;
 539
 540         if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 541                 ++fail;
 542
 543         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 544                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 545         btrfs_csum_final(crc, csum);
 546         if (memcmp(csum, s->csum, sbio->sdev->csum_size))
 547                 ++fail;
 548
 549         if (fail) {
 550                 /*
 551                  * if we find an error in a super block, we just report it.
 552                  * They will get written with the next transaction commit
 553                  * anyway
 554                  */
 555                 spin_lock(&sdev->stat_lock);
 556                 ++sdev->stat.super_errors;
 557                 spin_unlock(&sdev->stat_lock);
 558         }
 559
 560         return fail;
 561 }
 562
 563 static int scrub_submit(struct scrub_dev *sdev)
 564 {
 565         struct scrub_bio *sbio;
 566         struct bio *bio;
 567         int i;
 568
 569         if (sdev->curr == -1)
 570                 return 0;
 571
 572         sbio = sdev->bios[sdev->curr];
 573
 574         bio = bio_alloc(GFP_NOFS, sbio->count);
 575         if (!bio)
 576                 goto nomem;
 577
 578         bio->bi_private = sbio;
 579         bio->bi_end_io = scrub_bio_end_io;
 580         bio->bi_bdev = sdev->dev->bdev;
 581         bio->bi_sector = sbio->physical >> 9;
 582
 583         for (i = 0; i < sbio->count; ++i) {
 584                 struct page *page;
 585                 int ret;
 586
 587                 page = alloc_page(GFP_NOFS);
 588                 if (!page)
 589                         goto nomem;
 590
 591                 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 592                 if (!ret) {
 593                         __free_page(page);
 594                         goto nomem;
 595                 }
 596         }
 597
 598         sbio->err = 0;
 599         sdev->curr = -1;
 600         atomic_inc(&sdev->in_flight);
 601
 602         submit_bio(READ, bio);
 603
 604         return 0;
 605
 606 nomem:
 607         scrub_free_bio(bio);
 608
 609         return -ENOMEM;
 610 }
 611
 612 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
 613                       u64 physical, u64 flags, u64 gen, u64 mirror_num,
 614                       u8 *csum, int force)
 615 {
 616         struct scrub_bio *sbio;
 617
 618 again:
 619         /*
 620          * grab a fresh bio or wait for one to become available
 621          */
 622         while (sdev->curr == -1) {
 623                 spin_lock(&sdev->list_lock);
 624                 sdev->curr = sdev->first_free;
 625                 if (sdev->curr != -1) {
 626                         sdev->first_free = sdev->bios[sdev->curr]->next_free;
 627                         sdev->bios[sdev->curr]->next_free = -1;
 628                         sdev->bios[sdev->curr]->count = 0;
 629                         spin_unlock(&sdev->list_lock);
 630                 } else {
 631                         spin_unlock(&sdev->list_lock);
 632                         wait_event(sdev->list_wait, sdev->first_free != -1);
 633                 }
 634         }
 635         sbio = sdev->bios[sdev->curr];
 636         if (sbio->count == 0) {
 637                 sbio->physical = physical;
 638                 sbio->logical = logical;
 639         } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
 640                    sbio->logical + sbio->count * PAGE_SIZE != logical) {
 641                 int ret;
 642
 643                 ret = scrub_submit(sdev);
 644                 if (ret)
 645                         return ret;
 646                 goto again;
 647         }
 648         sbio->spag[sbio->count].flags = flags;
 649         sbio->spag[sbio->count].generation = gen;
 650         sbio->spag[sbio->count].have_csum = 0;
 651         sbio->spag[sbio->count].mirror_num = mirror_num;
 652         if (csum) {
 653                 sbio->spag[sbio->count].have_csum = 1;
 654                 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
 655         }
 656         ++sbio->count;
 657         if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
 658                 int ret;
 659
 660                 ret = scrub_submit(sdev);
 661                 if (ret)
 662                         return ret;
 663         }
 664
 665         return 0;
 666 }
 667
 668 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
 669                            u8 *csum)
 670 {
 671         struct btrfs_ordered_sum *sum = NULL;
 672         int ret = 0;
 673         unsigned long i;
 674         unsigned long num_sectors;
 675         u32 sectorsize = sdev->dev->dev_root->sectorsize;
 676
 677         while (!list_empty(&sdev->csum_list)) {
 678                 sum = list_first_entry(&sdev->csum_list,
 679                                        struct btrfs_ordered_sum, list);
 680                 if (sum->bytenr > logical)
 681                         return 0;
 682                 if (sum->bytenr + sum->len > logical)
 683                         break;
 684
 685                 ++sdev->stat.csum_discards;
 686                 list_del(&sum->list);
 687                 kfree(sum);
 688                 sum = NULL;
 689         }
 690         if (!sum)
 691                 return 0;
 692
 693         num_sectors = sum->len / sectorsize;
 694         for (i = 0; i < num_sectors; ++i) {
 695                 if (sum->sums[i].bytenr == logical) {
 696                         memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
 697                         ret = 1;
 698                         break;
 699                 }
 700         }
 701         if (ret && i == num_sectors - 1) {
 702                 list_del(&sum->list);
 703                 kfree(sum);
 704         }
 705         return ret;
 706 }
 707
 708 /* scrub extent tries to collect up to 64 kB for each bio */
 709 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
 710                         u64 physical, u64 flags, u64 gen, u64 mirror_num)
 711 {
 712         int ret;
 713         u8 csum[BTRFS_CSUM_SIZE];
 714
 715         while (len) {
 716                 u64 l = min_t(u64, len, PAGE_SIZE);
 717                 int have_csum = 0;
 718
 719                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 720                         /* push csums to sbio */
 721                         have_csum = scrub_find_csum(sdev, logical, l, csum);
 722                         if (have_csum == 0)
 723                                 ++sdev->stat.no_csum;
 724                 }
 725                 ret = scrub_page(sdev, logical, l, physical, flags, gen,
 726                                  mirror_num, have_csum ? csum : NULL, 0);
 727                 if (ret)
 728                         return ret;
 729                 len -= l;
 730                 logical += l;
 731                 physical += l;
 732         }
 733         return 0;
 734 }
 735
 736 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 737         struct map_lookup *map, int num, u64 base, u64 length)
 738 {
 739         struct btrfs_path *path;
 740         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 741         struct btrfs_root *root = fs_info->extent_root;
 742         struct btrfs_root *csum_root = fs_info->csum_root;
 743         struct btrfs_extent_item *extent;
 744         struct blk_plug plug;
 745         u64 flags;
 746         int ret;
 747         int slot;
 748         int i;
 749         u64 nstripes;
 750         int start_stripe;
 751         struct extent_buffer *l;
 752         struct btrfs_key key;
 753         u64 physical;
 754         u64 logical;
 755         u64 generation;
 756         u64 mirror_num;
 757
 758         u64 increment = map->stripe_len;
 759         u64 offset;
 760
 761         nstripes = length;
 762         offset = 0;
 763         do_div(nstripes, map->stripe_len);
 764         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 765                 offset = map->stripe_len * num;
 766                 increment = map->stripe_len * map->num_stripes;
 767                 mirror_num = 0;
 768         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 769                 int factor = map->num_stripes / map->sub_stripes;
 770                 offset = map->stripe_len * (num / map->sub_stripes);
 771                 increment = map->stripe_len * factor;
 772                 mirror_num = num % map->sub_stripes;
 773         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
 774                 increment = map->stripe_len;
 775                 mirror_num = num % map->num_stripes;
 776         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
 777                 increment = map->stripe_len;
 778                 mirror_num = num % map->num_stripes;
 779         } else {
 780                 increment = map->stripe_len;
 781                 mirror_num = 0;
 782         }
 783
 784         path = btrfs_alloc_path();
 785         if (!path)
 786                 return -ENOMEM;
 787
 788         path->reada = 2;
 789         path->search_commit_root = 1;
 790         path->skip_locking = 1;
 791
 792         /*
 793          * find all extents for each stripe and just read them to get
 794          * them into the page cache
 795          * FIXME: we can do better. build a more intelligent prefetching
 796          */
 797         logical = base + offset;
 798         physical = map->stripes[num].physical;
 799         ret = 0;
 800         for (i = 0; i < nstripes; ++i) {
 801                 key.objectid = logical;
 802                 key.type = BTRFS_EXTENT_ITEM_KEY;
 803                 key.offset = (u64)0;
 804
 805                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 806                 if (ret < 0)
 807                         goto out;
 808
 809                 l = path->nodes[0];
 810                 slot = path->slots[0];
 811                 btrfs_item_key_to_cpu(l, &key, slot);
 812                 if (key.objectid != logical) {
 813                         ret = btrfs_previous_item(root, path, 0,
 814                                                   BTRFS_EXTENT_ITEM_KEY);
 815                         if (ret < 0)
 816                                 goto out;
 817                 }
 818
 819                 while (1) {
 820                         l = path->nodes[0];
 821                         slot = path->slots[0];
 822                         if (slot >= btrfs_header_nritems(l)) {
 823                                 ret = btrfs_next_leaf(root, path);
 824                                 if (ret == 0)
 825                                         continue;
 826                                 if (ret < 0)
 827                                         goto out;
 828
 829                                 break;
 830                         }
 831                         btrfs_item_key_to_cpu(l, &key, slot);
 832
 833                         if (key.objectid >= logical + map->stripe_len)
 834                                 break;
 835
 836                         path->slots[0]++;
 837                 }
 838                 btrfs_release_path(path);
 839                 logical += increment;
 840                 physical += map->stripe_len;
 841                 cond_resched();
 842         }
 843
 844         /*
 845          * collect all data csums for the stripe to avoid seeking during
 846          * the scrub. This might currently (crc32) end up to be about 1MB
 847          */
 848         start_stripe = 0;
 849         blk_start_plug(&plug);
 850 again:
 851         logical = base + offset + start_stripe * increment;
 852         for (i = start_stripe; i < nstripes; ++i) {
 853                 ret = btrfs_lookup_csums_range(csum_root, logical,
 854                                                logical + map->stripe_len - 1,
 855                                                &sdev->csum_list, 1);
 856                 if (ret)
 857                         goto out;
 858
 859                 logical += increment;
 860                 cond_resched();
 861         }
 862         /*
 863          * now find all extents for each stripe and scrub them
 864          */
 865         logical = base + offset + start_stripe * increment;
 866         physical = map->stripes[num].physical + start_stripe * map->stripe_len;
 867         ret = 0;
 868         for (i = start_stripe; i < nstripes; ++i) {
 869                 /*
 870                  * canceled?
 871                  */
 872                 if (atomic_read(&fs_info->scrub_cancel_req) ||
 873                     atomic_read(&sdev->cancel_req)) {
 874                         ret = -ECANCELED;
 875                         goto out;
 876                 }
 877                 /*
 878                  * check to see if we have to pause
 879                  */
 880                 if (atomic_read(&fs_info->scrub_pause_req)) {
 881                         /* push queued extents */
 882                         scrub_submit(sdev);
 883                         wait_event(sdev->list_wait,
 884                                    atomic_read(&sdev->in_flight) == 0);
 885                         atomic_inc(&fs_info->scrubs_paused);
 886                         wake_up(&fs_info->scrub_pause_wait);
 887                         mutex_lock(&fs_info->scrub_lock);
 888                         while (atomic_read(&fs_info->scrub_pause_req)) {
 889                                 mutex_unlock(&fs_info->scrub_lock);
 890                                 wait_event(fs_info->scrub_pause_wait,
 891                                    atomic_read(&fs_info->scrub_pause_req) == 0);
 892                                 mutex_lock(&fs_info->scrub_lock);
 893                         }
 894                         atomic_dec(&fs_info->scrubs_paused);
 895                         mutex_unlock(&fs_info->scrub_lock);
 896                         wake_up(&fs_info->scrub_pause_wait);
 897                         scrub_free_csums(sdev);
 898                         start_stripe = i;
 899                         goto again;
 900                 }
 901
 902                 key.objectid = logical;
 903                 key.type = BTRFS_EXTENT_ITEM_KEY;
 904                 key.offset = (u64)0;
 905
 906                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 907                 if (ret < 0)
 908                         goto out;
 909
 910                 l = path->nodes[0];
 911                 slot = path->slots[0];
 912                 btrfs_item_key_to_cpu(l, &key, slot);
 913                 if (key.objectid != logical) {
 914                         ret = btrfs_previous_item(root, path, 0,
 915                                                   BTRFS_EXTENT_ITEM_KEY);
 916                         if (ret < 0)
 917                                 goto out;
 918                 }
 919
 920                 while (1) {
 921                         l = path->nodes[0];
 922                         slot = path->slots[0];
 923                         if (slot >= btrfs_header_nritems(l)) {
 924                                 ret = btrfs_next_leaf(root, path);
 925                                 if (ret == 0)
 926                                         continue;
 927                                 if (ret < 0)
 928                                         goto out;
 929
 930                                 break;
 931                         }
 932                         btrfs_item_key_to_cpu(l, &key, slot);
 933
 934                         if (key.objectid + key.offset <= logical)
 935                                 goto next;
 936
 937                         if (key.objectid >= logical + map->stripe_len)
 938                                 break;
 939
 940                         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
 941                                 goto next;
 942
 943                         extent = btrfs_item_ptr(l, slot,
 944                                                 struct btrfs_extent_item);
 945                         flags = btrfs_extent_flags(l, extent);
 946                         generation = btrfs_extent_generation(l, extent);
 947
 948                         if (key.objectid < logical &&
 949                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
 950                                 printk(KERN_ERR
 951                                        "btrfs scrub: tree block %llu spanning "
 952                                        "stripes, ignored. logical=%llu\n",
 953                                        (unsigned long long)key.objectid,
 954                                        (unsigned long long)logical);
 955                                 goto next;
 956                         }
 957
 958                         /*
 959                          * trim extent to this stripe
 960                          */
 961                         if (key.objectid < logical) {
 962                                 key.offset -= logical - key.objectid;
 963                                 key.objectid = logical;
 964                         }
 965                         if (key.objectid + key.offset >
 966                             logical + map->stripe_len) {
 967                                 key.offset = logical + map->stripe_len -
 968                                              key.objectid;
 969                         }
 970
 971                         ret = scrub_extent(sdev, key.objectid, key.offset,
 972                                            key.objectid - logical + physical,
 973                                            flags, generation, mirror_num);
 974                         if (ret)
 975                                 goto out;
 976
 977 next:
 978                         path->slots[0]++;
 979                 }
 980                 btrfs_release_path(path);
 981                 logical += increment;
 982                 physical += map->stripe_len;
 983                 spin_lock(&sdev->stat_lock);
 984                 sdev->stat.last_physical = physical;
 985                 spin_unlock(&sdev->stat_lock);
 986         }
 987         /* push queued extents */
 988         scrub_submit(sdev);
 989
 990 out:
 991         blk_finish_plug(&plug);
 992         btrfs_free_path(path);
 993         return ret < 0 ? ret : 0;
 994 }
 995
 996 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
 997         u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
 998 {
 999         struct btrfs_mapping_tree *map_tree =
1000                 &sdev->dev->dev_root->fs_info->mapping_tree;
1001         struct map_lookup *map;
1002         struct extent_map *em;
1003         int i;
1004         int ret = -EINVAL;
1005
1006         read_lock(&map_tree->map_tree.lock);
1007         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1008         read_unlock(&map_tree->map_tree.lock);
1009
1010         if (!em)
1011                 return -EINVAL;
1012
1013         map = (struct map_lookup *)em->bdev;
1014         if (em->start != chunk_offset)
1015                 goto out;
1016
1017         if (em->len < length)
1018                 goto out;
1019
1020         for (i = 0; i < map->num_stripes; ++i) {
1021                 if (map->stripes[i].dev == sdev->dev) {
1022                         ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1023                         if (ret)
1024                                 goto out;
1025                 }
1026         }
1027 out:
1028         free_extent_map(em);
1029
1030         return ret;
1031 }
1032
1033 static noinline_for_stack
1034 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1035 {
1036         struct btrfs_dev_extent *dev_extent = NULL;
1037         struct btrfs_path *path;
1038         struct btrfs_root *root = sdev->dev->dev_root;
1039         struct btrfs_fs_info *fs_info = root->fs_info;
1040         u64 length;
1041         u64 chunk_tree;
1042         u64 chunk_objectid;
1043         u64 chunk_offset;
1044         int ret;
1045         int slot;
1046         struct extent_buffer *l;
1047         struct btrfs_key key;
1048         struct btrfs_key found_key;
1049         struct btrfs_block_group_cache *cache;
1050
1051         path = btrfs_alloc_path();
1052         if (!path)
1053                 return -ENOMEM;
1054
1055         path->reada = 2;
1056         path->search_commit_root = 1;
1057         path->skip_locking = 1;
1058
1059         key.objectid = sdev->dev->devid;
1060         key.offset = 0ull;
1061         key.type = BTRFS_DEV_EXTENT_KEY;
1062
1063
1064         while (1) {
1065                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1066                 if (ret < 0)
1067                         goto out;
1068                 ret = 0;
1069
1070                 l = path->nodes[0];
1071                 slot = path->slots[0];
1072
1073                 btrfs_item_key_to_cpu(l, &found_key, slot);
1074
1075                 if (found_key.objectid != sdev->dev->devid)
1076                         break;
1077
1078                 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
1079                         break;
1080
1081                 if (found_key.offset >= end)
1082                         break;
1083
1084                 if (found_key.offset < key.offset)
1085                         break;
1086
1087                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1088                 length = btrfs_dev_extent_length(l, dev_extent);
1089
1090                 if (found_key.offset + length <= start) {
1091                         key.offset = found_key.offset + length;
1092                         btrfs_release_path(path);
1093                         continue;
1094                 }
1095
1096                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1097                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1098                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1099
1100                 /*
1101                  * get a reference on the corresponding block group to prevent
1102                  * the chunk from going away while we scrub it
1103                  */
1104                 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1105                 if (!cache) {
1106                         ret = -ENOENT;
1107                         goto out;
1108                 }
1109                 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1110                                   chunk_offset, length);
1111                 btrfs_put_block_group(cache);
1112                 if (ret)
1113                         break;
1114
1115                 key.offset = found_key.offset + length;
1116                 btrfs_release_path(path);
1117         }
1118
1119 out:
1120         btrfs_free_path(path);
1121         return ret;
1122 }
1123
1124 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1125 {
1126         int     i;
1127         u64     bytenr;
1128         u64     gen;
1129         int     ret;
1130         struct btrfs_device *device = sdev->dev;
1131         struct btrfs_root *root = device->dev_root;
1132
1133         gen = root->fs_info->last_trans_committed;
1134
1135         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1136                 bytenr = btrfs_sb_offset(i);
1137                 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1138                         break;
1139
1140                 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1141                                  BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1142                 if (ret)
1143                         return ret;
1144         }
1145         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1146
1147         return 0;
1148 }
1149
1150 /*
1151  * get a reference count on fs_info->scrub_workers. start worker if necessary
1152  */
1153 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1154 {
1155         struct btrfs_fs_info *fs_info = root->fs_info;
1156
1157         mutex_lock(&fs_info->scrub_lock);
1158         if (fs_info->scrub_workers_refcnt == 0)
1159                 btrfs_start_workers(&fs_info->scrub_workers, 1);
1160         ++fs_info->scrub_workers_refcnt;
1161         mutex_unlock(&fs_info->scrub_lock);
1162
1163         return 0;
1164 }
1165
1166 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1167 {
1168         struct btrfs_fs_info *fs_info = root->fs_info;
1169
1170         mutex_lock(&fs_info->scrub_lock);
1171         if (--fs_info->scrub_workers_refcnt == 0)
1172                 btrfs_stop_workers(&fs_info->scrub_workers);
1173         WARN_ON(fs_info->scrub_workers_refcnt < 0);
1174         mutex_unlock(&fs_info->scrub_lock);
1175 }
1176
1177
1178 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1179                     struct btrfs_scrub_progress *progress, int readonly)
1180 {
1181         struct scrub_dev *sdev;
1182         struct btrfs_fs_info *fs_info = root->fs_info;
1183         int ret;
1184         struct btrfs_device *dev;
1185
1186         if (btrfs_fs_closing(root->fs_info))
1187                 return -EINVAL;
1188
1189         /*
1190          * check some assumptions
1191          */
1192         if (root->sectorsize != PAGE_SIZE ||
1193             root->sectorsize != root->leafsize ||
1194             root->sectorsize != root->nodesize) {
1195                 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1196                 return -EINVAL;
1197         }
1198
1199         ret = scrub_workers_get(root);
1200         if (ret)
1201                 return ret;
1202
1203         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1204         dev = btrfs_find_device(root, devid, NULL, NULL);
1205         if (!dev || dev->missing) {
1206                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1207                 scrub_workers_put(root);
1208                 return -ENODEV;
1209         }
1210         mutex_lock(&fs_info->scrub_lock);
1211
1212         if (!dev->in_fs_metadata) {
1213                 mutex_unlock(&fs_info->scrub_lock);
1214                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1215                 scrub_workers_put(root);
1216                 return -ENODEV;
1217         }
1218
1219         if (dev->scrub_device) {
1220                 mutex_unlock(&fs_info->scrub_lock);
1221                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1222                 scrub_workers_put(root);
1223                 return -EINPROGRESS;
1224         }
1225         sdev = scrub_setup_dev(dev);
1226         if (IS_ERR(sdev)) {
1227                 mutex_unlock(&fs_info->scrub_lock);
1228                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1229                 scrub_workers_put(root);
1230                 return PTR_ERR(sdev);
1231         }
1232         sdev->readonly = readonly;
1233         dev->scrub_device = sdev;
1234
1235         atomic_inc(&fs_info->scrubs_running);
1236         mutex_unlock(&fs_info->scrub_lock);
1237         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1238
1239         down_read(&fs_info->scrub_super_lock);
1240         ret = scrub_supers(sdev);
1241         up_read(&fs_info->scrub_super_lock);
1242
1243         if (!ret)
1244                 ret = scrub_enumerate_chunks(sdev, start, end);
1245
1246         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1247
1248         atomic_dec(&fs_info->scrubs_running);
1249         wake_up(&fs_info->scrub_pause_wait);
1250
1251         if (progress)
1252                 memcpy(progress, &sdev->stat, sizeof(*progress));
1253
1254         mutex_lock(&fs_info->scrub_lock);
1255         dev->scrub_device = NULL;
1256         mutex_unlock(&fs_info->scrub_lock);
1257
1258         scrub_free_dev(sdev);
1259         scrub_workers_put(root);
1260
1261         return ret;
1262 }
1263
1264 int btrfs_scrub_pause(struct btrfs_root *root)
1265 {
1266         struct btrfs_fs_info *fs_info = root->fs_info;
1267
1268         mutex_lock(&fs_info->scrub_lock);
1269         atomic_inc(&fs_info->scrub_pause_req);
1270         while (atomic_read(&fs_info->scrubs_paused) !=
1271                atomic_read(&fs_info->scrubs_running)) {
1272                 mutex_unlock(&fs_info->scrub_lock);
1273                 wait_event(fs_info->scrub_pause_wait,
1274                            atomic_read(&fs_info->scrubs_paused) ==
1275                            atomic_read(&fs_info->scrubs_running));
1276                 mutex_lock(&fs_info->scrub_lock);
1277         }
1278         mutex_unlock(&fs_info->scrub_lock);
1279
1280         return 0;
1281 }
1282
1283 int btrfs_scrub_continue(struct btrfs_root *root)
1284 {
1285         struct btrfs_fs_info *fs_info = root->fs_info;
1286
1287         atomic_dec(&fs_info->scrub_pause_req);
1288         wake_up(&fs_info->scrub_pause_wait);
1289         return 0;
1290 }
1291
1292 int btrfs_scrub_pause_super(struct btrfs_root *root)
1293 {
1294         down_write(&root->fs_info->scrub_super_lock);
1295         return 0;
1296 }
1297
1298 int btrfs_scrub_continue_super(struct btrfs_root *root)
1299 {
1300         up_write(&root->fs_info->scrub_super_lock);
1301         return 0;
1302 }
1303
1304 int btrfs_scrub_cancel(struct btrfs_root *root)
1305 {
1306         struct btrfs_fs_info *fs_info = root->fs_info;
1307
1308         mutex_lock(&fs_info->scrub_lock);
1309         if (!atomic_read(&fs_info->scrubs_running)) {
1310                 mutex_unlock(&fs_info->scrub_lock);
1311                 return -ENOTCONN;
1312         }
1313
1314         atomic_inc(&fs_info->scrub_cancel_req);
1315         while (atomic_read(&fs_info->scrubs_running)) {
1316                 mutex_unlock(&fs_info->scrub_lock);
1317                 wait_event(fs_info->scrub_pause_wait,
1318                            atomic_read(&fs_info->scrubs_running) == 0);
1319                 mutex_lock(&fs_info->scrub_lock);
1320         }
1321         atomic_dec(&fs_info->scrub_cancel_req);
1322         mutex_unlock(&fs_info->scrub_lock);
1323
1324         return 0;
1325 }
1326
1327 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1328 {
1329         struct btrfs_fs_info *fs_info = root->fs_info;
1330         struct scrub_dev *sdev;
1331
1332         mutex_lock(&fs_info->scrub_lock);
1333         sdev = dev->scrub_device;
1334         if (!sdev) {
1335                 mutex_unlock(&fs_info->scrub_lock);
1336                 return -ENOTCONN;
1337         }
1338         atomic_inc(&sdev->cancel_req);
1339         while (dev->scrub_device) {
1340                 mutex_unlock(&fs_info->scrub_lock);
1341                 wait_event(fs_info->scrub_pause_wait,
1342                            dev->scrub_device == NULL);
1343                 mutex_lock(&fs_info->scrub_lock);
1344         }
1345         mutex_unlock(&fs_info->scrub_lock);
1346
1347         return 0;
1348 }
1349 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1350 {
1351         struct btrfs_fs_info *fs_info = root->fs_info;
1352         struct btrfs_device *dev;
1353         int ret;
1354
1355         /*
1356          * we have to hold the device_list_mutex here so the device
1357          * does not go away in cancel_dev. FIXME: find a better solution
1358          */
1359         mutex_lock(&fs_info->fs_devices->device_list_mutex);
1360         dev = btrfs_find_device(root, devid, NULL, NULL);
1361         if (!dev) {
1362                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1363                 return -ENODEV;
1364         }
1365         ret = btrfs_scrub_cancel_dev(root, dev);
1366         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1367
1368         return ret;
1369 }
1370
1371 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1372                          struct btrfs_scrub_progress *progress)
1373 {
1374         struct btrfs_device *dev;
1375         struct scrub_dev *sdev = NULL;
1376
1377         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1378         dev = btrfs_find_device(root, devid, NULL, NULL);
1379         if (dev)
1380                 sdev = dev->scrub_device;
1381         if (sdev)
1382                 memcpy(progress, &sdev->stat, sizeof(*progress));
1383         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1384
1385         return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1386 }