fs/btrfs/scrub.c

   1 /*
   2  * Copyright (C) 2011 STRATO.  All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License v2 as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  11  * General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public
  14  * License along with this program; if not, write to the
  15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16  * Boston, MA 021110-1307, USA.
  17  */
  18
  19 #include <linux/blkdev.h>
  20 #include "ctree.h"
  21 #include "volumes.h"
  22 #include "disk-io.h"
  23 #include "ordered-data.h"
  24
  25 /*
  26  * This is only the first step towards a full-features scrub. It reads all
  27  * extent and super block and verifies the checksums. In case a bad checksum
  28  * is found or the extent cannot be read, good data will be written back if
  29  * any can be found.
  30  *
  31  * Future enhancements:
  32  *  - To enhance the performance, better read-ahead strategies for the
  33  *    extent-tree can be employed.
  34  *  - In case an unrepairable extent is encountered, track which files are
  35  *    affected and report them
  36  *  - In case of a read error on files with nodatasum, map the file and read
  37  *    the extent to trigger a writeback of the good copy
  38  *  - track and record media errors, throw out bad devices
  39  *  - add a mode to also read unallocated space
  40  *  - make the prefetch cancellable
  41  */
  42
  43 struct scrub_bio;
  44 struct scrub_page;
  45 struct scrub_dev;
  46 static void scrub_bio_end_io(struct bio *bio, int err);
  47 static void scrub_checksum(struct btrfs_work *work);
  48 static int scrub_checksum_data(struct scrub_dev *sdev,
  49                                struct scrub_page *spag, void *buffer);
  50 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  51                                      struct scrub_page *spag, u64 logical,
  52                                      void *buffer);
  53 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
  54 static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
  55 static void scrub_fixup_end_io(struct bio *bio, int err);
  56 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  57                           struct page *page);
  58 static void scrub_fixup(struct scrub_bio *sbio, int ix);
  59
  60 #define SCRUB_PAGES_PER_BIO     16      /* 64k per bio */
  61 #define SCRUB_BIOS_PER_DEV      16      /* 1 MB per device in flight */
  62
  63 struct scrub_page {
  64         u64                     flags;  /* extent flags */
  65         u64                     generation;
  66         u64                     mirror_num;
  67         int                     have_csum;
  68         u8                      csum[BTRFS_CSUM_SIZE];
  69 };
  70
  71 struct scrub_bio {
  72         int                     index;
  73         struct scrub_dev        *sdev;
  74         struct bio              *bio;
  75         int                     err;
  76         u64                     logical;
  77         u64                     physical;
  78         struct scrub_page       spag[SCRUB_PAGES_PER_BIO];
  79         u64                     count;
  80         int                     next_free;
  81         struct btrfs_work       work;
  82 };
  83
  84 struct scrub_dev {
  85         struct scrub_bio        *bios[SCRUB_BIOS_PER_DEV];
  86         struct btrfs_device     *dev;
  87         int                     first_free;
  88         int                     curr;
  89         atomic_t                in_flight;
  90         spinlock_t              list_lock;
  91         wait_queue_head_t       list_wait;
  92         u16                     csum_size;
  93         struct list_head        csum_list;
  94         atomic_t                cancel_req;
  95         int                     readonly;
  96         /*
  97          * statistics
  98          */
  99         struct btrfs_scrub_progress stat;
 100         spinlock_t              stat_lock;
 101 };
 102
 103 static void scrub_free_csums(struct scrub_dev *sdev)
 104 {
 105         while (!list_empty(&sdev->csum_list)) {
 106                 struct btrfs_ordered_sum *sum;
 107                 sum = list_first_entry(&sdev->csum_list,
 108                                        struct btrfs_ordered_sum, list);
 109                 list_del(&sum->list);
 110                 kfree(sum);
 111         }
 112 }
 113
 114 static void scrub_free_bio(struct bio *bio)
 115 {
 116         int i;
 117         struct page *last_page = NULL;
 118
 119         if (!bio)
 120                 return;
 121
 122         for (i = 0; i < bio->bi_vcnt; ++i) {
 123                 if (bio->bi_io_vec[i].bv_page == last_page)
 124                         continue;
 125                 last_page = bio->bi_io_vec[i].bv_page;
 126                 __free_page(last_page);
 127         }
 128         bio_put(bio);
 129 }
 130
 131 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
 132 {
 133         int i;
 134
 135         if (!sdev)
 136                 return;
 137
 138         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 139                 struct scrub_bio *sbio = sdev->bios[i];
 140
 141                 if (!sbio)
 142                         break;
 143
 144                 scrub_free_bio(sbio->bio);
 145                 kfree(sbio);
 146         }
 147
 148         scrub_free_csums(sdev);
 149         kfree(sdev);
 150 }
 151
 152 static noinline_for_stack
 153 struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
 154 {
 155         struct scrub_dev *sdev;
 156         int             i;
 157         struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
 158
 159         sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
 160         if (!sdev)
 161                 goto nomem;
 162         sdev->dev = dev;
 163         for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
 164                 struct scrub_bio *sbio;
 165
 166                 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
 167                 if (!sbio)
 168                         goto nomem;
 169                 sdev->bios[i] = sbio;
 170
 171                 sbio->index = i;
 172                 sbio->sdev = sdev;
 173                 sbio->count = 0;
 174                 sbio->work.func = scrub_checksum;
 175
 176                 if (i != SCRUB_BIOS_PER_DEV-1)
 177                         sdev->bios[i]->next_free = i + 1;
 178                  else
 179                         sdev->bios[i]->next_free = -1;
 180         }
 181         sdev->first_free = 0;
 182         sdev->curr = -1;
 183         atomic_set(&sdev->in_flight, 0);
 184         atomic_set(&sdev->cancel_req, 0);
 185         sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
 186         INIT_LIST_HEAD(&sdev->csum_list);
 187
 188         spin_lock_init(&sdev->list_lock);
 189         spin_lock_init(&sdev->stat_lock);
 190         init_waitqueue_head(&sdev->list_wait);
 191         return sdev;
 192
 193 nomem:
 194         scrub_free_dev(sdev);
 195         return ERR_PTR(-ENOMEM);
 196 }
 197
 198 /*
 199  * scrub_recheck_error gets called when either verification of the page
 200  * failed or the bio failed to read, e.g. with EIO. In the latter case,
 201  * recheck_error gets called for every page in the bio, even though only
 202  * one may be bad
 203  */
 204 static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
 205 {
 206         if (sbio->err) {
 207                 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
 208                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 209                                    sbio->bio->bi_io_vec[ix].bv_page) == 0) {
 210                         if (scrub_fixup_check(sbio, ix) == 0)
 211                                 return;
 212                 }
 213         }
 214
 215         scrub_fixup(sbio, ix);
 216 }
 217
 218 static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
 219 {
 220         int ret = 1;
 221         struct page *page;
 222         void *buffer;
 223         u64 flags = sbio->spag[ix].flags;
 224
 225         page = sbio->bio->bi_io_vec[ix].bv_page;
 226         buffer = kmap_atomic(page, KM_USER0);
 227         if (flags & BTRFS_EXTENT_FLAG_DATA) {
 228                 ret = scrub_checksum_data(sbio->sdev,
 229                                           sbio->spag + ix, buffer);
 230         } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 231                 ret = scrub_checksum_tree_block(sbio->sdev,
 232                                                 sbio->spag + ix,
 233                                                 sbio->logical + ix * PAGE_SIZE,
 234                                                 buffer);
 235         } else {
 236                 WARN_ON(1);
 237         }
 238         kunmap_atomic(buffer, KM_USER0);
 239
 240         return ret;
 241 }
 242
 243 static void scrub_fixup_end_io(struct bio *bio, int err)
 244 {
 245         complete((struct completion *)bio->bi_private);
 246 }
 247
 248 static void scrub_fixup(struct scrub_bio *sbio, int ix)
 249 {
 250         struct scrub_dev *sdev = sbio->sdev;
 251         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 252         struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 253         struct btrfs_multi_bio *multi = NULL;
 254         u64 logical = sbio->logical + ix * PAGE_SIZE;
 255         u64 length;
 256         int i;
 257         int ret;
 258         DECLARE_COMPLETION_ONSTACK(complete);
 259
 260         if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
 261             (sbio->spag[ix].have_csum == 0)) {
 262                 /*
 263                  * nodatasum, don't try to fix anything
 264                  * FIXME: we can do better, open the inode and trigger a
 265                  * writeback
 266                  */
 267                 goto uncorrectable;
 268         }
 269
 270         length = PAGE_SIZE;
 271         ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
 272                               &multi, 0);
 273         if (ret || !multi || length < PAGE_SIZE) {
 274                 printk(KERN_ERR
 275                        "scrub_fixup: btrfs_map_block failed us for %llu\n",
 276                        (unsigned long long)logical);
 277                 WARN_ON(1);
 278                 return;
 279         }
 280
 281         if (multi->num_stripes == 1)
 282                 /* there aren't any replicas */
 283                 goto uncorrectable;
 284
 285         /*
 286          * first find a good copy
 287          */
 288         for (i = 0; i < multi->num_stripes; ++i) {
 289                 if (i == sbio->spag[ix].mirror_num)
 290                         continue;
 291
 292                 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
 293                                    multi->stripes[i].physical >> 9,
 294                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 295                         /* I/O-error, this is not a good copy */
 296                         continue;
 297                 }
 298
 299                 if (scrub_fixup_check(sbio, ix) == 0)
 300                         break;
 301         }
 302         if (i == multi->num_stripes)
 303                 goto uncorrectable;
 304
 305         if (!sdev->readonly) {
 306                 /*
 307                  * bi_io_vec[ix].bv_page now contains good data, write it back
 308                  */
 309                 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
 310                                    (sbio->physical + ix * PAGE_SIZE) >> 9,
 311                                    sbio->bio->bi_io_vec[ix].bv_page)) {
 312                         /* I/O-error, writeback failed, give up */
 313                         goto uncorrectable;
 314                 }
 315         }
 316
 317         kfree(multi);
 318         spin_lock(&sdev->stat_lock);
 319         ++sdev->stat.corrected_errors;
 320         spin_unlock(&sdev->stat_lock);
 321
 322         if (printk_ratelimit())
 323                 printk(KERN_ERR "btrfs: fixed up at %llu\n",
 324                        (unsigned long long)logical);
 325         return;
 326
 327 uncorrectable:
 328         kfree(multi);
 329         spin_lock(&sdev->stat_lock);
 330         ++sdev->stat.uncorrectable_errors;
 331         spin_unlock(&sdev->stat_lock);
 332
 333         if (printk_ratelimit())
 334                 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
 335                          (unsigned long long)logical);
 336 }
 337
 338 static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
 339                          struct page *page)
 340 {
 341         struct bio *bio = NULL;
 342         int ret;
 343         DECLARE_COMPLETION_ONSTACK(complete);
 344
 345         bio = bio_alloc(GFP_NOFS, 1);
 346         bio->bi_bdev = bdev;
 347         bio->bi_sector = sector;
 348         bio_add_page(bio, page, PAGE_SIZE, 0);
 349         bio->bi_end_io = scrub_fixup_end_io;
 350         bio->bi_private = &complete;
 351         submit_bio(rw, bio);
 352
 353         /* this will also unplug the queue */
 354         wait_for_completion(&complete);
 355
 356         ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
 357         bio_put(bio);
 358         return ret;
 359 }
 360
 361 static void scrub_bio_end_io(struct bio *bio, int err)
 362 {
 363         struct scrub_bio *sbio = bio->bi_private;
 364         struct scrub_dev *sdev = sbio->sdev;
 365         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 366
 367         sbio->err = err;
 368         sbio->bio = bio;
 369
 370         btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
 371 }
 372
 373 static void scrub_checksum(struct btrfs_work *work)
 374 {
 375         struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
 376         struct scrub_dev *sdev = sbio->sdev;
 377         struct page *page;
 378         void *buffer;
 379         int i;
 380         u64 flags;
 381         u64 logical;
 382         int ret;
 383
 384         if (sbio->err) {
 385                 for (i = 0; i < sbio->count; ++i)
 386                         scrub_recheck_error(sbio, i);
 387
 388                 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
 389                 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
 390                 sbio->bio->bi_phys_segments = 0;
 391                 sbio->bio->bi_idx = 0;
 392
 393                 for (i = 0; i < sbio->count; i++) {
 394                         struct bio_vec *bi;
 395                         bi = &sbio->bio->bi_io_vec[i];
 396                         bi->bv_offset = 0;
 397                         bi->bv_len = PAGE_SIZE;
 398                 }
 399
 400                 spin_lock(&sdev->stat_lock);
 401                 ++sdev->stat.read_errors;
 402                 spin_unlock(&sdev->stat_lock);
 403                 goto out;
 404         }
 405         for (i = 0; i < sbio->count; ++i) {
 406                 page = sbio->bio->bi_io_vec[i].bv_page;
 407                 buffer = kmap_atomic(page, KM_USER0);
 408                 flags = sbio->spag[i].flags;
 409                 logical = sbio->logical + i * PAGE_SIZE;
 410                 ret = 0;
 411                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 412                         ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
 413                 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
 414                         ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
 415                                                         logical, buffer);
 416                 } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
 417                         BUG_ON(i);
 418                         (void)scrub_checksum_super(sbio, buffer);
 419                 } else {
 420                         WARN_ON(1);
 421                 }
 422                 kunmap_atomic(buffer, KM_USER0);
 423                 if (ret)
 424                         scrub_recheck_error(sbio, i);
 425         }
 426
 427 out:
 428         scrub_free_bio(sbio->bio);
 429         sbio->bio = NULL;
 430         spin_lock(&sdev->list_lock);
 431         sbio->next_free = sdev->first_free;
 432         sdev->first_free = sbio->index;
 433         spin_unlock(&sdev->list_lock);
 434         atomic_dec(&sdev->in_flight);
 435         wake_up(&sdev->list_wait);
 436 }
 437
 438 static int scrub_checksum_data(struct scrub_dev *sdev,
 439                                struct scrub_page *spag, void *buffer)
 440 {
 441         u8 csum[BTRFS_CSUM_SIZE];
 442         u32 crc = ~(u32)0;
 443         int fail = 0;
 444         struct btrfs_root *root = sdev->dev->dev_root;
 445
 446         if (!spag->have_csum)
 447                 return 0;
 448
 449         crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
 450         btrfs_csum_final(crc, csum);
 451         if (memcmp(csum, spag->csum, sdev->csum_size))
 452                 fail = 1;
 453
 454         spin_lock(&sdev->stat_lock);
 455         ++sdev->stat.data_extents_scrubbed;
 456         sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
 457         if (fail)
 458                 ++sdev->stat.csum_errors;
 459         spin_unlock(&sdev->stat_lock);
 460
 461         return fail;
 462 }
 463
 464 static int scrub_checksum_tree_block(struct scrub_dev *sdev,
 465                                      struct scrub_page *spag, u64 logical,
 466                                      void *buffer)
 467 {
 468         struct btrfs_header *h;
 469         struct btrfs_root *root = sdev->dev->dev_root;
 470         struct btrfs_fs_info *fs_info = root->fs_info;
 471         u8 csum[BTRFS_CSUM_SIZE];
 472         u32 crc = ~(u32)0;
 473         int fail = 0;
 474         int crc_fail = 0;
 475
 476         /*
 477          * we don't use the getter functions here, as we
 478          * a) don't have an extent buffer and
 479          * b) the page is already kmapped
 480          */
 481         h = (struct btrfs_header *)buffer;
 482
 483         if (logical != le64_to_cpu(h->bytenr))
 484                 ++fail;
 485
 486         if (spag->generation != le64_to_cpu(h->generation))
 487                 ++fail;
 488
 489         if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 490                 ++fail;
 491
 492         if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
 493                    BTRFS_UUID_SIZE))
 494                 ++fail;
 495
 496         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 497                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 498         btrfs_csum_final(crc, csum);
 499         if (memcmp(csum, h->csum, sdev->csum_size))
 500                 ++crc_fail;
 501
 502         spin_lock(&sdev->stat_lock);
 503         ++sdev->stat.tree_extents_scrubbed;
 504         sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
 505         if (crc_fail)
 506                 ++sdev->stat.csum_errors;
 507         if (fail)
 508                 ++sdev->stat.verify_errors;
 509         spin_unlock(&sdev->stat_lock);
 510
 511         return fail || crc_fail;
 512 }
 513
 514 static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 515 {
 516         struct btrfs_super_block *s;
 517         u64 logical;
 518         struct scrub_dev *sdev = sbio->sdev;
 519         struct btrfs_root *root = sdev->dev->dev_root;
 520         struct btrfs_fs_info *fs_info = root->fs_info;
 521         u8 csum[BTRFS_CSUM_SIZE];
 522         u32 crc = ~(u32)0;
 523         int fail = 0;
 524
 525         s = (struct btrfs_super_block *)buffer;
 526         logical = sbio->logical;
 527
 528         if (logical != le64_to_cpu(s->bytenr))
 529                 ++fail;
 530
 531         if (sbio->spag[0].generation != le64_to_cpu(s->generation))
 532                 ++fail;
 533
 534         if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
 535                 ++fail;
 536
 537         crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
 538                               PAGE_SIZE - BTRFS_CSUM_SIZE);
 539         btrfs_csum_final(crc, csum);
 540         if (memcmp(csum, s->csum, sbio->sdev->csum_size))
 541                 ++fail;
 542
 543         if (fail) {
 544                 /*
 545                  * if we find an error in a super block, we just report it.
 546                  * They will get written with the next transaction commit
 547                  * anyway
 548                  */
 549                 spin_lock(&sdev->stat_lock);
 550                 ++sdev->stat.super_errors;
 551                 spin_unlock(&sdev->stat_lock);
 552         }
 553
 554         return fail;
 555 }
 556
 557 static int scrub_submit(struct scrub_dev *sdev)
 558 {
 559         struct scrub_bio *sbio;
 560         struct bio *bio;
 561         int i;
 562
 563         if (sdev->curr == -1)
 564                 return 0;
 565
 566         sbio = sdev->bios[sdev->curr];
 567
 568         bio = bio_alloc(GFP_NOFS, sbio->count);
 569         if (!bio)
 570                 goto nomem;
 571
 572         bio->bi_private = sbio;
 573         bio->bi_end_io = scrub_bio_end_io;
 574         bio->bi_bdev = sdev->dev->bdev;
 575         bio->bi_sector = sbio->physical >> 9;
 576
 577         for (i = 0; i < sbio->count; ++i) {
 578                 struct page *page;
 579                 int ret;
 580
 581                 page = alloc_page(GFP_NOFS);
 582                 if (!page)
 583                         goto nomem;
 584
 585                 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
 586                 if (!ret) {
 587                         __free_page(page);
 588                         goto nomem;
 589                 }
 590         }
 591
 592         sbio->err = 0;
 593         sdev->curr = -1;
 594         atomic_inc(&sdev->in_flight);
 595
 596         submit_bio(READ, bio);
 597
 598         return 0;
 599
 600 nomem:
 601         scrub_free_bio(bio);
 602
 603         return -ENOMEM;
 604 }
 605
 606 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
 607                       u64 physical, u64 flags, u64 gen, u64 mirror_num,
 608                       u8 *csum, int force)
 609 {
 610         struct scrub_bio *sbio;
 611
 612 again:
 613         /*
 614          * grab a fresh bio or wait for one to become available
 615          */
 616         while (sdev->curr == -1) {
 617                 spin_lock(&sdev->list_lock);
 618                 sdev->curr = sdev->first_free;
 619                 if (sdev->curr != -1) {
 620                         sdev->first_free = sdev->bios[sdev->curr]->next_free;
 621                         sdev->bios[sdev->curr]->next_free = -1;
 622                         sdev->bios[sdev->curr]->count = 0;
 623                         spin_unlock(&sdev->list_lock);
 624                 } else {
 625                         spin_unlock(&sdev->list_lock);
 626                         wait_event(sdev->list_wait, sdev->first_free != -1);
 627                 }
 628         }
 629         sbio = sdev->bios[sdev->curr];
 630         if (sbio->count == 0) {
 631                 sbio->physical = physical;
 632                 sbio->logical = logical;
 633         } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
 634                    sbio->logical + sbio->count * PAGE_SIZE != logical) {
 635                 int ret;
 636
 637                 ret = scrub_submit(sdev);
 638                 if (ret)
 639                         return ret;
 640                 goto again;
 641         }
 642         sbio->spag[sbio->count].flags = flags;
 643         sbio->spag[sbio->count].generation = gen;
 644         sbio->spag[sbio->count].have_csum = 0;
 645         sbio->spag[sbio->count].mirror_num = mirror_num;
 646         if (csum) {
 647                 sbio->spag[sbio->count].have_csum = 1;
 648                 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
 649         }
 650         ++sbio->count;
 651         if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
 652                 int ret;
 653
 654                 ret = scrub_submit(sdev);
 655                 if (ret)
 656                         return ret;
 657         }
 658
 659         return 0;
 660 }
 661
 662 static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
 663                            u8 *csum)
 664 {
 665         struct btrfs_ordered_sum *sum = NULL;
 666         int ret = 0;
 667         unsigned long i;
 668         unsigned long num_sectors;
 669         u32 sectorsize = sdev->dev->dev_root->sectorsize;
 670
 671         while (!list_empty(&sdev->csum_list)) {
 672                 sum = list_first_entry(&sdev->csum_list,
 673                                        struct btrfs_ordered_sum, list);
 674                 if (sum->bytenr > logical)
 675                         return 0;
 676                 if (sum->bytenr + sum->len > logical)
 677                         break;
 678
 679                 ++sdev->stat.csum_discards;
 680                 list_del(&sum->list);
 681                 kfree(sum);
 682                 sum = NULL;
 683         }
 684         if (!sum)
 685                 return 0;
 686
 687         num_sectors = sum->len / sectorsize;
 688         for (i = 0; i < num_sectors; ++i) {
 689                 if (sum->sums[i].bytenr == logical) {
 690                         memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
 691                         ret = 1;
 692                         break;
 693                 }
 694         }
 695         if (ret && i == num_sectors - 1) {
 696                 list_del(&sum->list);
 697                 kfree(sum);
 698         }
 699         return ret;
 700 }
 701
 702 /* scrub extent tries to collect up to 64 kB for each bio */
 703 static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
 704                         u64 physical, u64 flags, u64 gen, u64 mirror_num)
 705 {
 706         int ret;
 707         u8 csum[BTRFS_CSUM_SIZE];
 708
 709         while (len) {
 710                 u64 l = min_t(u64, len, PAGE_SIZE);
 711                 int have_csum = 0;
 712
 713                 if (flags & BTRFS_EXTENT_FLAG_DATA) {
 714                         /* push csums to sbio */
 715                         have_csum = scrub_find_csum(sdev, logical, l, csum);
 716                         if (have_csum == 0)
 717                                 ++sdev->stat.no_csum;
 718                 }
 719                 ret = scrub_page(sdev, logical, l, physical, flags, gen,
 720                                  mirror_num, have_csum ? csum : NULL, 0);
 721                 if (ret)
 722                         return ret;
 723                 len -= l;
 724                 logical += l;
 725                 physical += l;
 726         }
 727         return 0;
 728 }
 729
 730 static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 731         struct map_lookup *map, int num, u64 base, u64 length)
 732 {
 733         struct btrfs_path *path;
 734         struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 735         struct btrfs_root *root = fs_info->extent_root;
 736         struct btrfs_root *csum_root = fs_info->csum_root;
 737         struct btrfs_extent_item *extent;
 738         struct blk_plug plug;
 739         u64 flags;
 740         int ret;
 741         int slot;
 742         int i;
 743         u64 nstripes;
 744         int start_stripe;
 745         struct extent_buffer *l;
 746         struct btrfs_key key;
 747         u64 physical;
 748         u64 logical;
 749         u64 generation;
 750         u64 mirror_num;
 751
 752         u64 increment = map->stripe_len;
 753         u64 offset;
 754
 755         nstripes = length;
 756         offset = 0;
 757         do_div(nstripes, map->stripe_len);
 758         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 759                 offset = map->stripe_len * num;
 760                 increment = map->stripe_len * map->num_stripes;
 761                 mirror_num = 0;
 762         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 763                 int factor = map->num_stripes / map->sub_stripes;
 764                 offset = map->stripe_len * (num / map->sub_stripes);
 765                 increment = map->stripe_len * factor;
 766                 mirror_num = num % map->sub_stripes;
 767         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
 768                 increment = map->stripe_len;
 769                 mirror_num = num % map->num_stripes;
 770         } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
 771                 increment = map->stripe_len;
 772                 mirror_num = num % map->num_stripes;
 773         } else {
 774                 increment = map->stripe_len;
 775                 mirror_num = 0;
 776         }
 777
 778         path = btrfs_alloc_path();
 779         if (!path)
 780                 return -ENOMEM;
 781
 782         path->reada = 2;
 783         path->search_commit_root = 1;
 784         path->skip_locking = 1;
 785
 786         /*
 787          * find all extents for each stripe and just read them to get
 788          * them into the page cache
 789          * FIXME: we can do better. build a more intelligent prefetching
 790          */
 791         logical = base + offset;
 792         physical = map->stripes[num].physical;
 793         ret = 0;
 794         for (i = 0; i < nstripes; ++i) {
 795                 key.objectid = logical;
 796                 key.type = BTRFS_EXTENT_ITEM_KEY;
 797                 key.offset = (u64)0;
 798
 799                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 800                 if (ret < 0)
 801                         goto out_noplug;
 802
 803                 /*
 804                  * we might miss half an extent here, but that doesn't matter,
 805                  * as it's only the prefetch
 806                  */
 807                 while (1) {
 808                         l = path->nodes[0];
 809                         slot = path->slots[0];
 810                         if (slot >= btrfs_header_nritems(l)) {
 811                                 ret = btrfs_next_leaf(root, path);
 812                                 if (ret == 0)
 813                                         continue;
 814                                 if (ret < 0)
 815                                         goto out_noplug;
 816
 817                                 break;
 818                         }
 819                         btrfs_item_key_to_cpu(l, &key, slot);
 820
 821                         if (key.objectid >= logical + map->stripe_len)
 822                                 break;
 823
 824                         path->slots[0]++;
 825                 }
 826                 btrfs_release_path(path);
 827                 logical += increment;
 828                 physical += map->stripe_len;
 829                 cond_resched();
 830         }
 831
 832         /*
 833          * collect all data csums for the stripe to avoid seeking during
 834          * the scrub. This might currently (crc32) end up to be about 1MB
 835          */
 836         start_stripe = 0;
 837         blk_start_plug(&plug);
 838 again:
 839         logical = base + offset + start_stripe * increment;
 840         for (i = start_stripe; i < nstripes; ++i) {
 841                 ret = btrfs_lookup_csums_range(csum_root, logical,
 842                                                logical + map->stripe_len - 1,
 843                                                &sdev->csum_list, 1);
 844                 if (ret)
 845                         goto out;
 846
 847                 logical += increment;
 848                 cond_resched();
 849         }
 850         /*
 851          * now find all extents for each stripe and scrub them
 852          */
 853         logical = base + offset + start_stripe * increment;
 854         physical = map->stripes[num].physical + start_stripe * map->stripe_len;
 855         ret = 0;
 856         for (i = start_stripe; i < nstripes; ++i) {
 857                 /*
 858                  * canceled?
 859                  */
 860                 if (atomic_read(&fs_info->scrub_cancel_req) ||
 861                     atomic_read(&sdev->cancel_req)) {
 862                         ret = -ECANCELED;
 863                         goto out;
 864                 }
 865                 /*
 866                  * check to see if we have to pause
 867                  */
 868                 if (atomic_read(&fs_info->scrub_pause_req)) {
 869                         /* push queued extents */
 870                         scrub_submit(sdev);
 871                         wait_event(sdev->list_wait,
 872                                    atomic_read(&sdev->in_flight) == 0);
 873                         atomic_inc(&fs_info->scrubs_paused);
 874                         wake_up(&fs_info->scrub_pause_wait);
 875                         mutex_lock(&fs_info->scrub_lock);
 876                         while (atomic_read(&fs_info->scrub_pause_req)) {
 877                                 mutex_unlock(&fs_info->scrub_lock);
 878                                 wait_event(fs_info->scrub_pause_wait,
 879                                    atomic_read(&fs_info->scrub_pause_req) == 0);
 880                                 mutex_lock(&fs_info->scrub_lock);
 881                         }
 882                         atomic_dec(&fs_info->scrubs_paused);
 883                         mutex_unlock(&fs_info->scrub_lock);
 884                         wake_up(&fs_info->scrub_pause_wait);
 885                         scrub_free_csums(sdev);
 886                         start_stripe = i;
 887                         goto again;
 888                 }
 889
 890                 key.objectid = logical;
 891                 key.type = BTRFS_EXTENT_ITEM_KEY;
 892                 key.offset = (u64)0;
 893
 894                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 895                 if (ret < 0)
 896                         goto out;
 897                 if (ret > 0) {
 898                         ret = btrfs_previous_item(root, path, 0,
 899                                                   BTRFS_EXTENT_ITEM_KEY);
 900                         if (ret < 0)
 901                                 goto out;
 902                         if (ret > 0) {
 903                                 /* there's no smaller item, so stick with the
 904                                  * larger one */
 905                                 btrfs_release_path(path);
 906                                 ret = btrfs_search_slot(NULL, root, &key,
 907                                                         path, 0, 0);
 908                                 if (ret < 0)
 909                                         goto out;
 910                         }
 911                 }
 912
 913                 while (1) {
 914                         l = path->nodes[0];
 915                         slot = path->slots[0];
 916                         if (slot >= btrfs_header_nritems(l)) {
 917                                 ret = btrfs_next_leaf(root, path);
 918                                 if (ret == 0)
 919                                         continue;
 920                                 if (ret < 0)
 921                                         goto out;
 922
 923                                 break;
 924                         }
 925                         btrfs_item_key_to_cpu(l, &key, slot);
 926
 927                         if (key.objectid + key.offset <= logical)
 928                                 goto next;
 929
 930                         if (key.objectid >= logical + map->stripe_len)
 931                                 break;
 932
 933                         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
 934                                 goto next;
 935
 936                         extent = btrfs_item_ptr(l, slot,
 937                                                 struct btrfs_extent_item);
 938                         flags = btrfs_extent_flags(l, extent);
 939                         generation = btrfs_extent_generation(l, extent);
 940
 941                         if (key.objectid < logical &&
 942                             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
 943                                 printk(KERN_ERR
 944                                        "btrfs scrub: tree block %llu spanning "
 945                                        "stripes, ignored. logical=%llu\n",
 946                                        (unsigned long long)key.objectid,
 947                                        (unsigned long long)logical);
 948                                 goto next;
 949                         }
 950
 951                         /*
 952                          * trim extent to this stripe
 953                          */
 954                         if (key.objectid < logical) {
 955                                 key.offset -= logical - key.objectid;
 956                                 key.objectid = logical;
 957                         }
 958                         if (key.objectid + key.offset >
 959                             logical + map->stripe_len) {
 960                                 key.offset = logical + map->stripe_len -
 961                                              key.objectid;
 962                         }
 963
 964                         ret = scrub_extent(sdev, key.objectid, key.offset,
 965                                            key.objectid - logical + physical,
 966                                            flags, generation, mirror_num);
 967                         if (ret)
 968                                 goto out;
 969
 970 next:
 971                         path->slots[0]++;
 972                 }
 973                 btrfs_release_path(path);
 974                 logical += increment;
 975                 physical += map->stripe_len;
 976                 spin_lock(&sdev->stat_lock);
 977                 sdev->stat.last_physical = physical;
 978                 spin_unlock(&sdev->stat_lock);
 979         }
 980         /* push queued extents */
 981         scrub_submit(sdev);
 982
 983 out:
 984         blk_finish_plug(&plug);
 985 out_noplug:
 986         btrfs_free_path(path);
 987         return ret < 0 ? ret : 0;
 988 }
 989
 990 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
 991         u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
 992 {
 993         struct btrfs_mapping_tree *map_tree =
 994                 &sdev->dev->dev_root->fs_info->mapping_tree;
 995         struct map_lookup *map;
 996         struct extent_map *em;
 997         int i;
 998         int ret = -EINVAL;
 999
1000         read_lock(&map_tree->map_tree.lock);
1001         em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
1002         read_unlock(&map_tree->map_tree.lock);
1003
1004         if (!em)
1005                 return -EINVAL;
1006
1007         map = (struct map_lookup *)em->bdev;
1008         if (em->start != chunk_offset)
1009                 goto out;
1010
1011         if (em->len < length)
1012                 goto out;
1013
1014         for (i = 0; i < map->num_stripes; ++i) {
1015                 if (map->stripes[i].dev == sdev->dev) {
1016                         ret = scrub_stripe(sdev, map, i, chunk_offset, length);
1017                         if (ret)
1018                                 goto out;
1019                 }
1020         }
1021 out:
1022         free_extent_map(em);
1023
1024         return ret;
1025 }
1026
1027 static noinline_for_stack
1028 int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1029 {
1030         struct btrfs_dev_extent *dev_extent = NULL;
1031         struct btrfs_path *path;
1032         struct btrfs_root *root = sdev->dev->dev_root;
1033         struct btrfs_fs_info *fs_info = root->fs_info;
1034         u64 length;
1035         u64 chunk_tree;
1036         u64 chunk_objectid;
1037         u64 chunk_offset;
1038         int ret;
1039         int slot;
1040         struct extent_buffer *l;
1041         struct btrfs_key key;
1042         struct btrfs_key found_key;
1043         struct btrfs_block_group_cache *cache;
1044
1045         path = btrfs_alloc_path();
1046         if (!path)
1047                 return -ENOMEM;
1048
1049         path->reada = 2;
1050         path->search_commit_root = 1;
1051         path->skip_locking = 1;
1052
1053         key.objectid = sdev->dev->devid;
1054         key.offset = 0ull;
1055         key.type = BTRFS_DEV_EXTENT_KEY;
1056
1057
1058         while (1) {
1059                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1060                 if (ret < 0)
1061                         break;
1062                 if (ret > 0) {
1063                         if (path->slots[0] >=
1064                             btrfs_header_nritems(path->nodes[0])) {
1065                                 ret = btrfs_next_leaf(root, path);
1066                                 if (ret)
1067                                         break;
1068                         }
1069                 }
1070
1071                 l = path->nodes[0];
1072                 slot = path->slots[0];
1073
1074                 btrfs_item_key_to_cpu(l, &found_key, slot);
1075
1076                 if (found_key.objectid != sdev->dev->devid)
1077                         break;
1078
1079                 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1080                         break;
1081
1082                 if (found_key.offset >= end)
1083                         break;
1084
1085                 if (found_key.offset < key.offset)
1086                         break;
1087
1088                 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1089                 length = btrfs_dev_extent_length(l, dev_extent);
1090
1091                 if (found_key.offset + length <= start) {
1092                         key.offset = found_key.offset + length;
1093                         btrfs_release_path(path);
1094                         continue;
1095                 }
1096
1097                 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1098                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1099                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1100
1101                 /*
1102                  * get a reference on the corresponding block group to prevent
1103                  * the chunk from going away while we scrub it
1104                  */
1105                 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1106                 if (!cache) {
1107                         ret = -ENOENT;
1108                         break;
1109                 }
1110                 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1111                                   chunk_offset, length);
1112                 btrfs_put_block_group(cache);
1113                 if (ret)
1114                         break;
1115
1116                 key.offset = found_key.offset + length;
1117                 btrfs_release_path(path);
1118         }
1119
1120         btrfs_free_path(path);
1121
1122         /*
1123          * ret can still be 1 from search_slot or next_leaf,
1124          * that's not an error
1125          */
1126         return ret < 0 ? ret : 0;
1127 }
1128
1129 static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1130 {
1131         int     i;
1132         u64     bytenr;
1133         u64     gen;
1134         int     ret;
1135         struct btrfs_device *device = sdev->dev;
1136         struct btrfs_root *root = device->dev_root;
1137
1138         gen = root->fs_info->last_trans_committed;
1139
1140         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
1141                 bytenr = btrfs_sb_offset(i);
1142                 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
1143                         break;
1144
1145                 ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
1146                                  BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
1147                 if (ret)
1148                         return ret;
1149         }
1150         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1151
1152         return 0;
1153 }
1154
1155 /*
1156  * get a reference count on fs_info->scrub_workers. start worker if necessary
1157  */
1158 static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1159 {
1160         struct btrfs_fs_info *fs_info = root->fs_info;
1161
1162         mutex_lock(&fs_info->scrub_lock);
1163         if (fs_info->scrub_workers_refcnt == 0) {
1164                 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165                            fs_info->thread_pool_size, &fs_info->generic_worker);
1166                 fs_info->scrub_workers.idle_thresh = 4;
1167                 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168         }
1169         ++fs_info->scrub_workers_refcnt;
1170         mutex_unlock(&fs_info->scrub_lock);
1171
1172         return 0;
1173 }
1174
1175 static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
1176 {
1177         struct btrfs_fs_info *fs_info = root->fs_info;
1178
1179         mutex_lock(&fs_info->scrub_lock);
1180         if (--fs_info->scrub_workers_refcnt == 0)
1181                 btrfs_stop_workers(&fs_info->scrub_workers);
1182         WARN_ON(fs_info->scrub_workers_refcnt < 0);
1183         mutex_unlock(&fs_info->scrub_lock);
1184 }
1185
1186
1187 int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1188                     struct btrfs_scrub_progress *progress, int readonly)
1189 {
1190         struct scrub_dev *sdev;
1191         struct btrfs_fs_info *fs_info = root->fs_info;
1192         int ret;
1193         struct btrfs_device *dev;
1194
1195         if (btrfs_fs_closing(root->fs_info))
1196                 return -EINVAL;
1197
1198         /*
1199          * check some assumptions
1200          */
1201         if (root->sectorsize != PAGE_SIZE ||
1202             root->sectorsize != root->leafsize ||
1203             root->sectorsize != root->nodesize) {
1204                 printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
1205                 return -EINVAL;
1206         }
1207
1208         ret = scrub_workers_get(root);
1209         if (ret)
1210                 return ret;
1211
1212         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1213         dev = btrfs_find_device(root, devid, NULL, NULL);
1214         if (!dev || dev->missing) {
1215                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1216                 scrub_workers_put(root);
1217                 return -ENODEV;
1218         }
1219         mutex_lock(&fs_info->scrub_lock);
1220
1221         if (!dev->in_fs_metadata) {
1222                 mutex_unlock(&fs_info->scrub_lock);
1223                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1224                 scrub_workers_put(root);
1225                 return -ENODEV;
1226         }
1227
1228         if (dev->scrub_device) {
1229                 mutex_unlock(&fs_info->scrub_lock);
1230                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1231                 scrub_workers_put(root);
1232                 return -EINPROGRESS;
1233         }
1234         sdev = scrub_setup_dev(dev);
1235         if (IS_ERR(sdev)) {
1236                 mutex_unlock(&fs_info->scrub_lock);
1237                 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1238                 scrub_workers_put(root);
1239                 return PTR_ERR(sdev);
1240         }
1241         sdev->readonly = readonly;
1242         dev->scrub_device = sdev;
1243
1244         atomic_inc(&fs_info->scrubs_running);
1245         mutex_unlock(&fs_info->scrub_lock);
1246         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1247
1248         down_read(&fs_info->scrub_super_lock);
1249         ret = scrub_supers(sdev);
1250         up_read(&fs_info->scrub_super_lock);
1251
1252         if (!ret)
1253                 ret = scrub_enumerate_chunks(sdev, start, end);
1254
1255         wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1256
1257         atomic_dec(&fs_info->scrubs_running);
1258         wake_up(&fs_info->scrub_pause_wait);
1259
1260         if (progress)
1261                 memcpy(progress, &sdev->stat, sizeof(*progress));
1262
1263         mutex_lock(&fs_info->scrub_lock);
1264         dev->scrub_device = NULL;
1265         mutex_unlock(&fs_info->scrub_lock);
1266
1267         scrub_free_dev(sdev);
1268         scrub_workers_put(root);
1269
1270         return ret;
1271 }
1272
1273 int btrfs_scrub_pause(struct btrfs_root *root)
1274 {
1275         struct btrfs_fs_info *fs_info = root->fs_info;
1276
1277         mutex_lock(&fs_info->scrub_lock);
1278         atomic_inc(&fs_info->scrub_pause_req);
1279         while (atomic_read(&fs_info->scrubs_paused) !=
1280                atomic_read(&fs_info->scrubs_running)) {
1281                 mutex_unlock(&fs_info->scrub_lock);
1282                 wait_event(fs_info->scrub_pause_wait,
1283                            atomic_read(&fs_info->scrubs_paused) ==
1284                            atomic_read(&fs_info->scrubs_running));
1285                 mutex_lock(&fs_info->scrub_lock);
1286         }
1287         mutex_unlock(&fs_info->scrub_lock);
1288
1289         return 0;
1290 }
1291
1292 int btrfs_scrub_continue(struct btrfs_root *root)
1293 {
1294         struct btrfs_fs_info *fs_info = root->fs_info;
1295
1296         atomic_dec(&fs_info->scrub_pause_req);
1297         wake_up(&fs_info->scrub_pause_wait);
1298         return 0;
1299 }
1300
1301 int btrfs_scrub_pause_super(struct btrfs_root *root)
1302 {
1303         down_write(&root->fs_info->scrub_super_lock);
1304         return 0;
1305 }
1306
1307 int btrfs_scrub_continue_super(struct btrfs_root *root)
1308 {
1309         up_write(&root->fs_info->scrub_super_lock);
1310         return 0;
1311 }
1312
1313 int btrfs_scrub_cancel(struct btrfs_root *root)
1314 {
1315         struct btrfs_fs_info *fs_info = root->fs_info;
1316
1317         mutex_lock(&fs_info->scrub_lock);
1318         if (!atomic_read(&fs_info->scrubs_running)) {
1319                 mutex_unlock(&fs_info->scrub_lock);
1320                 return -ENOTCONN;
1321         }
1322
1323         atomic_inc(&fs_info->scrub_cancel_req);
1324         while (atomic_read(&fs_info->scrubs_running)) {
1325                 mutex_unlock(&fs_info->scrub_lock);
1326                 wait_event(fs_info->scrub_pause_wait,
1327                            atomic_read(&fs_info->scrubs_running) == 0);
1328                 mutex_lock(&fs_info->scrub_lock);
1329         }
1330         atomic_dec(&fs_info->scrub_cancel_req);
1331         mutex_unlock(&fs_info->scrub_lock);
1332
1333         return 0;
1334 }
1335
1336 int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
1337 {
1338         struct btrfs_fs_info *fs_info = root->fs_info;
1339         struct scrub_dev *sdev;
1340
1341         mutex_lock(&fs_info->scrub_lock);
1342         sdev = dev->scrub_device;
1343         if (!sdev) {
1344                 mutex_unlock(&fs_info->scrub_lock);
1345                 return -ENOTCONN;
1346         }
1347         atomic_inc(&sdev->cancel_req);
1348         while (dev->scrub_device) {
1349                 mutex_unlock(&fs_info->scrub_lock);
1350                 wait_event(fs_info->scrub_pause_wait,
1351                            dev->scrub_device == NULL);
1352                 mutex_lock(&fs_info->scrub_lock);
1353         }
1354         mutex_unlock(&fs_info->scrub_lock);
1355
1356         return 0;
1357 }
1358 int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
1359 {
1360         struct btrfs_fs_info *fs_info = root->fs_info;
1361         struct btrfs_device *dev;
1362         int ret;
1363
1364         /*
1365          * we have to hold the device_list_mutex here so the device
1366          * does not go away in cancel_dev. FIXME: find a better solution
1367          */
1368         mutex_lock(&fs_info->fs_devices->device_list_mutex);
1369         dev = btrfs_find_device(root, devid, NULL, NULL);
1370         if (!dev) {
1371                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1372                 return -ENODEV;
1373         }
1374         ret = btrfs_scrub_cancel_dev(root, dev);
1375         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1376
1377         return ret;
1378 }
1379
1380 int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
1381                          struct btrfs_scrub_progress *progress)
1382 {
1383         struct btrfs_device *dev;
1384         struct scrub_dev *sdev = NULL;
1385
1386         mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1387         dev = btrfs_find_device(root, devid, NULL, NULL);
1388         if (dev)
1389                 sdev = dev->scrub_device;
1390         if (sdev)
1391                 memcpy(progress, &sdev->stat, sizeof(*progress));
1392         mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1393
1394         return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
1395 }