fs/gfs2/lops.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #include <linux/sched.h>
   8 #include <linux/slab.h>
   9 #include <linux/spinlock.h>
  10 #include <linux/completion.h>
  11 #include <linux/buffer_head.h>
  12 #include <linux/mempool.h>
  13 #include <linux/gfs2_ondisk.h>
  14 #include <linux/bio.h>
  15 #include <linux/fs.h>
  16 #include <linux/list_sort.h>
  17 #include <linux/blkdev.h>
  18
  19 #include "bmap.h"
  20 #include "dir.h"
  21 #include "gfs2.h"
  22 #include "incore.h"
  23 #include "inode.h"
  24 #include "glock.h"
  25 #include "log.h"
  26 #include "lops.h"
  27 #include "meta_io.h"
  28 #include "recovery.h"
  29 #include "rgrp.h"
  30 #include "trans.h"
  31 #include "util.h"
  32 #include "trace_gfs2.h"
  33
  34 /**
  35  * gfs2_pin - Pin a buffer in memory
  36  * @sdp: The superblock
  37  * @bh: The buffer to be pinned
  38  *
  39  * The log lock must be held when calling this function
  40  */
  41 void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
  42 {
  43         struct gfs2_bufdata *bd;
  44
  45         BUG_ON(!current->journal_info);
  46
  47         clear_buffer_dirty(bh);
  48         if (test_set_buffer_pinned(bh))
  49                 gfs2_assert_withdraw(sdp, 0);
  50         if (!buffer_uptodate(bh))
  51                 gfs2_io_error_bh_wd(sdp, bh);
  52         bd = bh->b_private;
  53         /* If this buffer is in the AIL and it has already been written
  54          * to in-place disk block, remove it from the AIL.
  55          */
  56         spin_lock(&sdp->sd_ail_lock);
  57         if (bd->bd_tr)
  58                 list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list);
  59         spin_unlock(&sdp->sd_ail_lock);
  60         get_bh(bh);
  61         atomic_inc(&sdp->sd_log_pinned);
  62         trace_gfs2_pin(bd, 1);
  63 }
  64
  65 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
  66 {
  67         return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
  68 }
  69
  70 static void maybe_release_space(struct gfs2_bufdata *bd)
  71 {
  72         struct gfs2_glock *gl = bd->bd_gl;
  73         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
  74         struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
  75         unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
  76         struct gfs2_bitmap *bi = rgd->rd_bits + index;
  77
  78         if (bi->bi_clone == NULL)
  79                 return;
  80         if (sdp->sd_args.ar_discard)
  81                 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
  82         memcpy(bi->bi_clone + bi->bi_offset,
  83                bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
  84         clear_bit(GBF_FULL, &bi->bi_flags);
  85         rgd->rd_free_clone = rgd->rd_free;
  86         rgd->rd_extfail_pt = rgd->rd_free;
  87 }
  88
  89 /**
  90  * gfs2_unpin - Unpin a buffer
  91  * @sdp: the filesystem the buffer belongs to
  92  * @bh: The buffer to unpin
  93  * @ai:
  94  * @flags: The inode dirty flags
  95  *
  96  */
  97
  98 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
  99                        struct gfs2_trans *tr)
 100 {
 101         struct gfs2_bufdata *bd = bh->b_private;
 102
 103         BUG_ON(!buffer_uptodate(bh));
 104         BUG_ON(!buffer_pinned(bh));
 105
 106         lock_buffer(bh);
 107         mark_buffer_dirty(bh);
 108         clear_buffer_pinned(bh);
 109
 110         if (buffer_is_rgrp(bd))
 111                 maybe_release_space(bd);
 112
 113         spin_lock(&sdp->sd_ail_lock);
 114         if (bd->bd_tr) {
 115                 list_del(&bd->bd_ail_st_list);
 116                 brelse(bh);
 117         } else {
 118                 struct gfs2_glock *gl = bd->bd_gl;
 119                 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
 120                 atomic_inc(&gl->gl_ail_count);
 121         }
 122         bd->bd_tr = tr;
 123         list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list);
 124         spin_unlock(&sdp->sd_ail_lock);
 125
 126         clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 127         trace_gfs2_pin(bd, 0);
 128         unlock_buffer(bh);
 129         atomic_dec(&sdp->sd_log_pinned);
 130 }
 131
 132 static void gfs2_log_incr_head(struct gfs2_sbd *sdp)
 133 {
 134         BUG_ON((sdp->sd_log_flush_head == sdp->sd_log_tail) &&
 135                (sdp->sd_log_flush_head != sdp->sd_log_head));
 136
 137         if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks)
 138                 sdp->sd_log_flush_head = 0;
 139 }
 140
 141 u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
 142 {
 143         unsigned int lbn = sdp->sd_log_flush_head;
 144         struct gfs2_journal_extent *je;
 145         u64 block;
 146
 147         list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) {
 148                 if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) {
 149                         block = je->dblock + lbn - je->lblock;
 150                         gfs2_log_incr_head(sdp);
 151                         return block;
 152                 }
 153         }
 154
 155         return -1;
 156 }
 157
 158 /**
 159  * gfs2_end_log_write_bh - end log write of pagecache data with buffers
 160  * @sdp: The superblock
 161  * @bvec: The bio_vec
 162  * @error: The i/o status
 163  *
 164  * This finds the relevant buffers and unlocks them and sets the
 165  * error flag according to the status of the i/o request. This is
 166  * used when the log is writing data which has an in-place version
 167  * that is pinned in the pagecache.
 168  */
 169
 170 static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
 171                                   struct bio_vec *bvec,
 172                                   blk_status_t error)
 173 {
 174         struct buffer_head *bh, *next;
 175         struct page *page = bvec->bv_page;
 176         unsigned size;
 177
 178         bh = page_buffers(page);
 179         size = bvec->bv_len;
 180         while (bh_offset(bh) < bvec->bv_offset)
 181                 bh = bh->b_this_page;
 182         do {
 183                 if (error)
 184                         mark_buffer_write_io_error(bh);
 185                 unlock_buffer(bh);
 186                 next = bh->b_this_page;
 187                 size -= bh->b_size;
 188                 brelse(bh);
 189                 bh = next;
 190         } while(bh && size);
 191 }
 192
 193 /**
 194  * gfs2_end_log_write - end of i/o to the log
 195  * @bio: The bio
 196  *
 197  * Each bio_vec contains either data from the pagecache or data
 198  * relating to the log itself. Here we iterate over the bio_vec
 199  * array, processing both kinds of data.
 200  *
 201  */
 202
 203 static void gfs2_end_log_write(struct bio *bio)
 204 {
 205         struct gfs2_sbd *sdp = bio->bi_private;
 206         struct bio_vec *bvec;
 207         struct page *page;
 208         struct bvec_iter_all iter_all;
 209
 210         if (bio->bi_status) {
 211                 fs_err(sdp, "Error %d writing to journal, jid=%u\n",
 212                        bio->bi_status, sdp->sd_jdesc->jd_jid);
 213                 wake_up(&sdp->sd_logd_waitq);
 214         }
 215
 216         bio_for_each_segment_all(bvec, bio, iter_all) {
 217                 page = bvec->bv_page;
 218                 if (page_has_buffers(page))
 219                         gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
 220                 else
 221                         mempool_free(page, gfs2_page_pool);
 222         }
 223
 224         bio_put(bio);
 225         if (atomic_dec_and_test(&sdp->sd_log_in_flight))
 226                 wake_up(&sdp->sd_log_flush_wait);
 227 }
 228
 229 /**
 230  * gfs2_log_submit_bio - Submit any pending log bio
 231  * @biop: Address of the bio pointer
 232  * @opf: REQ_OP | op_flags
 233  *
 234  * Submit any pending part-built or full bio to the block device. If
 235  * there is no pending bio, then this is a no-op.
 236  */
 237
 238 void gfs2_log_submit_bio(struct bio **biop, int opf)
 239 {
 240         struct bio *bio = *biop;
 241         if (bio) {
 242                 struct gfs2_sbd *sdp = bio->bi_private;
 243                 atomic_inc(&sdp->sd_log_in_flight);
 244                 bio->bi_opf = opf;
 245                 submit_bio(bio);
 246                 *biop = NULL;
 247         }
 248 }
 249
 250 /**
 251  * gfs2_log_alloc_bio - Allocate a bio
 252  * @sdp: The super block
 253  * @blkno: The device block number we want to write to
 254  * @end_io: The bi_end_io callback
 255  *
 256  * Allocate a new bio, initialize it with the given parameters and return it.
 257  *
 258  * Returns: The newly allocated bio
 259  */
 260
 261 static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
 262                                       bio_end_io_t *end_io)
 263 {
 264         struct super_block *sb = sdp->sd_vfs;
 265         struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 266
 267         bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 268         bio_set_dev(bio, sb->s_bdev);
 269         bio->bi_end_io = end_io;
 270         bio->bi_private = sdp;
 271
 272         return bio;
 273 }
 274
 275 /**
 276  * gfs2_log_get_bio - Get cached log bio, or allocate a new one
 277  * @sdp: The super block
 278  * @blkno: The device block number we want to write to
 279  * @bio: The bio to get or allocate
 280  * @op: REQ_OP
 281  * @end_io: The bi_end_io callback
 282  * @flush: Always flush the current bio and allocate a new one?
 283  *
 284  * If there is a cached bio, then if the next block number is sequential
 285  * with the previous one, return it, otherwise flush the bio to the
 286  * device. If there is no cached bio, or we just flushed it, then
 287  * allocate a new one.
 288  *
 289  * Returns: The bio to use for log writes
 290  */
 291
 292 static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
 293                                     struct bio **biop, int op,
 294                                     bio_end_io_t *end_io, bool flush)
 295 {
 296         struct bio *bio = *biop;
 297
 298         if (bio) {
 299                 u64 nblk;
 300
 301                 nblk = bio_end_sector(bio);
 302                 nblk >>= sdp->sd_fsb2bb_shift;
 303                 if (blkno == nblk && !flush)
 304                         return bio;
 305                 gfs2_log_submit_bio(biop, op);
 306         }
 307
 308         *biop = gfs2_log_alloc_bio(sdp, blkno, end_io);
 309         return *biop;
 310 }
 311
 312 /**
 313  * gfs2_log_write - write to log
 314  * @sdp: the filesystem
 315  * @page: the page to write
 316  * @size: the size of the data to write
 317  * @offset: the offset within the page
 318  * @blkno: block number of the log entry
 319  *
 320  * Try and add the page segment to the current bio. If that fails,
 321  * submit the current bio to the device and create a new one, and
 322  * then add the page segment to that.
 323  */
 324
 325 void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
 326                     unsigned size, unsigned offset, u64 blkno)
 327 {
 328         struct bio *bio;
 329         int ret;
 330
 331         bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
 332                                gfs2_end_log_write, false);
 333         ret = bio_add_page(bio, page, size, offset);
 334         if (ret == 0) {
 335                 bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
 336                                        REQ_OP_WRITE, gfs2_end_log_write, true);
 337                 ret = bio_add_page(bio, page, size, offset);
 338                 WARN_ON(ret == 0);
 339         }
 340 }
 341
 342 /**
 343  * gfs2_log_write_bh - write a buffer's content to the log
 344  * @sdp: The super block
 345  * @bh: The buffer pointing to the in-place location
 346  *
 347  * This writes the content of the buffer to the next available location
 348  * in the log. The buffer will be unlocked once the i/o to the log has
 349  * completed.
 350  */
 351
 352 static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 353 {
 354         gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh),
 355                        gfs2_log_bmap(sdp));
 356 }
 357
 358 /**
 359  * gfs2_log_write_page - write one block stored in a page, into the log
 360  * @sdp: The superblock
 361  * @page: The struct page
 362  *
 363  * This writes the first block-sized part of the page into the log. Note
 364  * that the page must have been allocated from the gfs2_page_pool mempool
 365  * and that after this has been called, ownership has been transferred and
 366  * the page may be freed at any time.
 367  */
 368
 369 void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 370 {
 371         struct super_block *sb = sdp->sd_vfs;
 372         gfs2_log_write(sdp, page, sb->s_blocksize, 0,
 373                        gfs2_log_bmap(sdp));
 374 }
 375
 376 /**
 377  * gfs2_end_log_read - end I/O callback for reads from the log
 378  * @bio: The bio
 379  *
 380  * Simply unlock the pages in the bio. The main thread will wait on them and
 381  * process them in order as necessary.
 382  */
 383
 384 static void gfs2_end_log_read(struct bio *bio)
 385 {
 386         struct page *page;
 387         struct bio_vec *bvec;
 388         struct bvec_iter_all iter_all;
 389
 390         bio_for_each_segment_all(bvec, bio, iter_all) {
 391                 page = bvec->bv_page;
 392                 if (bio->bi_status) {
 393                         int err = blk_status_to_errno(bio->bi_status);
 394
 395                         SetPageError(page);
 396                         mapping_set_error(page->mapping, err);
 397                 }
 398                 unlock_page(page);
 399         }
 400
 401         bio_put(bio);
 402 }
 403
 404 /**
 405  * gfs2_jhead_pg_srch - Look for the journal head in a given page.
 406  * @jd: The journal descriptor
 407  * @page: The page to look in
 408  *
 409  * Returns: 1 if found, 0 otherwise.
 410  */
 411
 412 static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
 413                               struct gfs2_log_header_host *head,
 414                               struct page *page)
 415 {
 416         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 417         struct gfs2_log_header_host uninitialized_var(lh);
 418         void *kaddr = kmap_atomic(page);
 419         unsigned int offset;
 420         bool ret = false;
 421
 422         for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
 423                 if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
 424                         if (lh.lh_sequence > head->lh_sequence)
 425                                 *head = lh;
 426                         else {
 427                                 ret = true;
 428                                 break;
 429                         }
 430                 }
 431         }
 432         kunmap_atomic(kaddr);
 433         return ret;
 434 }
 435
 436 /**
 437  * gfs2_jhead_process_page - Search/cleanup a page
 438  * @jd: The journal descriptor
 439  * @index: Index of the page to look into
 440  * @done: If set, perform only cleanup, else search and set if found.
 441  *
 442  * Find the page with 'index' in the journal's mapping. Search the page for
 443  * the journal head if requested (cleanup == false). Release refs on the
 444  * page so the page cache can reclaim it (put_page() twice). We grabbed a
 445  * reference on this page two times, first when we did a find_or_create_page()
 446  * to obtain the page to add it to the bio and second when we do a
 447  * find_get_page() here to get the page to wait on while I/O on it is being
 448  * completed.
 449  * This function is also used to free up a page we might've grabbed but not
 450  * used. Maybe we added it to a bio, but not submitted it for I/O. Or we
 451  * submitted the I/O, but we already found the jhead so we only need to drop
 452  * our references to the page.
 453  */
 454
 455 static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
 456                                     struct gfs2_log_header_host *head,
 457                                     bool *done)
 458 {
 459         struct page *page;
 460
 461         page = find_get_page(jd->jd_inode->i_mapping, index);
 462         wait_on_page_locked(page);
 463
 464         if (PageError(page))
 465                 *done = true;
 466
 467         if (!*done)
 468                 *done = gfs2_jhead_pg_srch(jd, head, page);
 469
 470         put_page(page); /* Once for find_get_page */
 471         put_page(page); /* Once more for find_or_create_page */
 472 }
 473
 474 /**
 475  * gfs2_find_jhead - find the head of a log
 476  * @jd: The journal descriptor
 477  * @head: The log descriptor for the head of the log is returned here
 478  *
 479  * Do a search of a journal by reading it in large chunks using bios and find
 480  * the valid log entry with the highest sequence number.  (i.e. the log head)
 481  *
 482  * Returns: 0 on success, errno otherwise
 483  */
 484 int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
 485                     bool keep_cache)
 486 {
 487         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 488         struct address_space *mapping = jd->jd_inode->i_mapping;
 489         unsigned int block = 0, blocks_submitted = 0, blocks_read = 0;
 490         unsigned int bsize = sdp->sd_sb.sb_bsize;
 491         unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
 492         unsigned int shift = PAGE_SHIFT - bsize_shift;
 493         unsigned int readhead_blocks = BIO_MAX_PAGES << shift;
 494         struct gfs2_journal_extent *je;
 495         int sz, ret = 0;
 496         struct bio *bio = NULL;
 497         struct page *page = NULL;
 498         bool done = false;
 499         errseq_t since;
 500
 501         memset(head, 0, sizeof(*head));
 502         if (list_empty(&jd->extent_list))
 503                 gfs2_map_journal_extents(sdp, jd);
 504
 505         since = filemap_sample_wb_err(mapping);
 506         list_for_each_entry(je, &jd->extent_list, list) {
 507                 for (; block < je->lblock + je->blocks; block++) {
 508                         u64 dblock;
 509
 510                         if (!page) {
 511                                 page = find_or_create_page(mapping,
 512                                                 block >> shift, GFP_NOFS);
 513                                 if (!page) {
 514                                         ret = -ENOMEM;
 515                                         done = true;
 516                                         goto out;
 517                                 }
 518                         }
 519
 520                         if (bio) {
 521                                 unsigned int off;
 522
 523                                 off = (block << bsize_shift) & ~PAGE_MASK;
 524                                 sz = bio_add_page(bio, page, bsize, off);
 525                                 if (sz == bsize) { /* block added */
 526                                         if (off + bsize == PAGE_SIZE) {
 527                                                 page = NULL;
 528                                                 goto page_added;
 529                                         }
 530                                         continue;
 531                                 }
 532                                 blocks_submitted = block + 1;
 533                                 submit_bio(bio);
 534                                 bio = NULL;
 535                         }
 536
 537                         dblock = je->dblock + (block - je->lblock);
 538                         bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read);
 539                         bio->bi_opf = REQ_OP_READ;
 540                         sz = bio_add_page(bio, page, bsize, 0);
 541                         gfs2_assert_warn(sdp, sz == bsize);
 542                         if (bsize == PAGE_SIZE)
 543                                 page = NULL;
 544
 545 page_added:
 546                         if (blocks_submitted < blocks_read + readhead_blocks) {
 547                                 /* Keep at least one bio in flight */
 548                                 continue;
 549                         }
 550
 551                         gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
 552                         blocks_read += PAGE_SIZE >> bsize_shift;
 553                         if (done)
 554                                 goto out;  /* found */
 555                 }
 556         }
 557
 558 out:
 559         if (bio)
 560                 submit_bio(bio);
 561         while (blocks_read < block) {
 562                 gfs2_jhead_process_page(jd, blocks_read >> shift, head, &done);
 563                 blocks_read += PAGE_SIZE >> bsize_shift;
 564         }
 565
 566         if (!ret)
 567                 ret = filemap_check_wb_err(mapping, since);
 568
 569         if (!keep_cache)
 570                 truncate_inode_pages(mapping, 0);
 571
 572         return ret;
 573 }
 574
 575 static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type,
 576                                       u32 ld_length, u32 ld_data1)
 577 {
 578         struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 579         struct gfs2_log_descriptor *ld = page_address(page);
 580         clear_page(ld);
 581         ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 582         ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
 583         ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
 584         ld->ld_type = cpu_to_be32(ld_type);
 585         ld->ld_length = cpu_to_be32(ld_length);
 586         ld->ld_data1 = cpu_to_be32(ld_data1);
 587         ld->ld_data2 = 0;
 588         return page;
 589 }
 590
 591 static void gfs2_check_magic(struct buffer_head *bh)
 592 {
 593         void *kaddr;
 594         __be32 *ptr;
 595
 596         clear_buffer_escaped(bh);
 597         kaddr = kmap_atomic(bh->b_page);
 598         ptr = kaddr + bh_offset(bh);
 599         if (*ptr == cpu_to_be32(GFS2_MAGIC))
 600                 set_buffer_escaped(bh);
 601         kunmap_atomic(kaddr);
 602 }
 603
 604 static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b)
 605 {
 606         struct gfs2_bufdata *bda, *bdb;
 607
 608         bda = list_entry(a, struct gfs2_bufdata, bd_list);
 609         bdb = list_entry(b, struct gfs2_bufdata, bd_list);
 610
 611         if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
 612                 return -1;
 613         if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr)
 614                 return 1;
 615         return 0;
 616 }
 617
 618 static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
 619                                 unsigned int total, struct list_head *blist,
 620                                 bool is_databuf)
 621 {
 622         struct gfs2_log_descriptor *ld;
 623         struct gfs2_bufdata *bd1 = NULL, *bd2;
 624         struct page *page;
 625         unsigned int num;
 626         unsigned n;
 627         __be64 *ptr;
 628
 629         gfs2_log_lock(sdp);
 630         list_sort(NULL, blist, blocknr_cmp);
 631         bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list);
 632         while(total) {
 633                 num = total;
 634                 if (total > limit)
 635                         num = limit;
 636                 gfs2_log_unlock(sdp);
 637                 page = gfs2_get_log_desc(sdp,
 638                                          is_databuf ? GFS2_LOG_DESC_JDATA :
 639                                          GFS2_LOG_DESC_METADATA, num + 1, num);
 640                 ld = page_address(page);
 641                 gfs2_log_lock(sdp);
 642                 ptr = (__be64 *)(ld + 1);
 643
 644                 n = 0;
 645                 list_for_each_entry_continue(bd1, blist, bd_list) {
 646                         *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
 647                         if (is_databuf) {
 648                                 gfs2_check_magic(bd1->bd_bh);
 649                                 *ptr++ = cpu_to_be64(buffer_escaped(bd1->bd_bh) ? 1 : 0);
 650                         }
 651                         if (++n >= num)
 652                                 break;
 653                 }
 654
 655                 gfs2_log_unlock(sdp);
 656                 gfs2_log_write_page(sdp, page);
 657                 gfs2_log_lock(sdp);
 658
 659                 n = 0;
 660                 list_for_each_entry_continue(bd2, blist, bd_list) {
 661                         get_bh(bd2->bd_bh);
 662                         gfs2_log_unlock(sdp);
 663                         lock_buffer(bd2->bd_bh);
 664
 665                         if (buffer_escaped(bd2->bd_bh)) {
 666                                 void *kaddr;
 667                                 page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 668                                 ptr = page_address(page);
 669                                 kaddr = kmap_atomic(bd2->bd_bh->b_page);
 670                                 memcpy(ptr, kaddr + bh_offset(bd2->bd_bh),
 671                                        bd2->bd_bh->b_size);
 672                                 kunmap_atomic(kaddr);
 673                                 *(__be32 *)ptr = 0;
 674                                 clear_buffer_escaped(bd2->bd_bh);
 675                                 unlock_buffer(bd2->bd_bh);
 676                                 brelse(bd2->bd_bh);
 677                                 gfs2_log_write_page(sdp, page);
 678                         } else {
 679                                 gfs2_log_write_bh(sdp, bd2->bd_bh);
 680                         }
 681                         gfs2_log_lock(sdp);
 682                         if (++n >= num)
 683                                 break;
 684                 }
 685
 686                 BUG_ON(total < num);
 687                 total -= num;
 688         }
 689         gfs2_log_unlock(sdp);
 690 }
 691
 692 static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 693 {
 694         unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */
 695         unsigned int nbuf;
 696         if (tr == NULL)
 697                 return;
 698         nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
 699         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0);
 700 }
 701
 702 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 703 {
 704         struct list_head *head;
 705         struct gfs2_bufdata *bd;
 706
 707         if (tr == NULL)
 708                 return;
 709
 710         head = &tr->tr_buf;
 711         while (!list_empty(head)) {
 712                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 713                 list_del_init(&bd->bd_list);
 714                 gfs2_unpin(sdp, bd->bd_bh, tr);
 715         }
 716 }
 717
 718 static void buf_lo_before_scan(struct gfs2_jdesc *jd,
 719                                struct gfs2_log_header_host *head, int pass)
 720 {
 721         if (pass != 0)
 722                 return;
 723
 724         jd->jd_found_blocks = 0;
 725         jd->jd_replayed_blocks = 0;
 726 }
 727
 728 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
 729                                 struct gfs2_log_descriptor *ld, __be64 *ptr,
 730                                 int pass)
 731 {
 732         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 733         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 734         struct gfs2_glock *gl = ip->i_gl;
 735         unsigned int blks = be32_to_cpu(ld->ld_data1);
 736         struct buffer_head *bh_log, *bh_ip;
 737         u64 blkno;
 738         int error = 0;
 739
 740         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
 741                 return 0;
 742
 743         gfs2_replay_incr_blk(jd, &start);
 744
 745         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 746                 blkno = be64_to_cpu(*ptr++);
 747
 748                 jd->jd_found_blocks++;
 749
 750                 if (gfs2_revoke_check(jd, blkno, start))
 751                         continue;
 752
 753                 error = gfs2_replay_read_block(jd, start, &bh_log);
 754                 if (error)
 755                         return error;
 756
 757                 bh_ip = gfs2_meta_new(gl, blkno);
 758                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 759
 760                 if (gfs2_meta_check(sdp, bh_ip))
 761                         error = -EIO;
 762                 else
 763                         mark_buffer_dirty(bh_ip);
 764
 765                 brelse(bh_log);
 766                 brelse(bh_ip);
 767
 768                 if (error)
 769                         break;
 770
 771                 jd->jd_replayed_blocks++;
 772         }
 773
 774         return error;
 775 }
 776
 777 /**
 778  * gfs2_meta_sync - Sync all buffers associated with a glock
 779  * @gl: The glock
 780  *
 781  */
 782
 783 static void gfs2_meta_sync(struct gfs2_glock *gl)
 784 {
 785         struct address_space *mapping = gfs2_glock2aspace(gl);
 786         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 787         int error;
 788
 789         if (mapping == NULL)
 790                 mapping = &sdp->sd_aspace;
 791
 792         filemap_fdatawrite(mapping);
 793         error = filemap_fdatawait(mapping);
 794
 795         if (error)
 796                 gfs2_io_error(gl->gl_name.ln_sbd);
 797 }
 798
 799 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 800 {
 801         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 802         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 803
 804         if (error) {
 805                 gfs2_meta_sync(ip->i_gl);
 806                 return;
 807         }
 808         if (pass != 1)
 809                 return;
 810
 811         gfs2_meta_sync(ip->i_gl);
 812
 813         fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
 814                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
 815 }
 816
 817 static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 818 {
 819         struct gfs2_meta_header *mh;
 820         unsigned int offset;
 821         struct list_head *head = &sdp->sd_log_revokes;
 822         struct gfs2_bufdata *bd;
 823         struct page *page;
 824         unsigned int length;
 825
 826         gfs2_write_revokes(sdp);
 827         if (!sdp->sd_log_num_revoke)
 828                 return;
 829
 830         length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64));
 831         page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE, length, sdp->sd_log_num_revoke);
 832         offset = sizeof(struct gfs2_log_descriptor);
 833
 834         list_for_each_entry(bd, head, bd_list) {
 835                 sdp->sd_log_num_revoke--;
 836
 837                 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
 838
 839                         gfs2_log_write_page(sdp, page);
 840                         page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 841                         mh = page_address(page);
 842                         clear_page(mh);
 843                         mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
 844                         mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
 845                         mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
 846                         offset = sizeof(struct gfs2_meta_header);
 847                 }
 848
 849                 *(__be64 *)(page_address(page) + offset) = cpu_to_be64(bd->bd_blkno);
 850                 offset += sizeof(u64);
 851         }
 852         gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 853
 854         gfs2_log_write_page(sdp, page);
 855 }
 856
 857 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 858 {
 859         struct list_head *head = &sdp->sd_log_revokes;
 860         struct gfs2_bufdata *bd;
 861         struct gfs2_glock *gl;
 862
 863         while (!list_empty(head)) {
 864                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
 865                 list_del_init(&bd->bd_list);
 866                 gl = bd->bd_gl;
 867                 if (atomic_dec_return(&gl->gl_revokes) == 0) {
 868                         clear_bit(GLF_LFLUSH, &gl->gl_flags);
 869                         gfs2_glock_queue_put(gl);
 870                 }
 871                 kmem_cache_free(gfs2_bufdata_cachep, bd);
 872         }
 873 }
 874
 875 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 876                                   struct gfs2_log_header_host *head, int pass)
 877 {
 878         if (pass != 0)
 879                 return;
 880
 881         jd->jd_found_revokes = 0;
 882         jd->jd_replay_tail = head->lh_tail;
 883 }
 884
 885 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
 886                                    struct gfs2_log_descriptor *ld, __be64 *ptr,
 887                                    int pass)
 888 {
 889         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 890         unsigned int blks = be32_to_cpu(ld->ld_length);
 891         unsigned int revokes = be32_to_cpu(ld->ld_data1);
 892         struct buffer_head *bh;
 893         unsigned int offset;
 894         u64 blkno;
 895         int first = 1;
 896         int error;
 897
 898         if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
 899                 return 0;
 900
 901         offset = sizeof(struct gfs2_log_descriptor);
 902
 903         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 904                 error = gfs2_replay_read_block(jd, start, &bh);
 905                 if (error)
 906                         return error;
 907
 908                 if (!first)
 909                         gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
 910
 911                 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
 912                         blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
 913
 914                         error = gfs2_revoke_add(jd, blkno, start);
 915                         if (error < 0) {
 916                                 brelse(bh);
 917                                 return error;
 918                         }
 919                         else if (error)
 920                                 jd->jd_found_revokes++;
 921
 922                         if (!--revokes)
 923                                 break;
 924                         offset += sizeof(u64);
 925                 }
 926
 927                 brelse(bh);
 928                 offset = sizeof(struct gfs2_meta_header);
 929                 first = 0;
 930         }
 931
 932         return 0;
 933 }
 934
 935 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 936 {
 937         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 938
 939         if (error) {
 940                 gfs2_revoke_clean(jd);
 941                 return;
 942         }
 943         if (pass != 1)
 944                 return;
 945
 946         fs_info(sdp, "jid=%u: Found %u revoke tags\n",
 947                 jd->jd_jid, jd->jd_found_revokes);
 948
 949         gfs2_revoke_clean(jd);
 950 }
 951
 952 /**
 953  * databuf_lo_before_commit - Scan the data buffers, writing as we go
 954  *
 955  */
 956
 957 static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 958 {
 959         unsigned int limit = databuf_limit(sdp);
 960         unsigned int nbuf;
 961         if (tr == NULL)
 962                 return;
 963         nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
 964         gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1);
 965 }
 966
 967 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
 968                                     struct gfs2_log_descriptor *ld,
 969                                     __be64 *ptr, int pass)
 970 {
 971         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 972         struct gfs2_glock *gl = ip->i_gl;
 973         unsigned int blks = be32_to_cpu(ld->ld_data1);
 974         struct buffer_head *bh_log, *bh_ip;
 975         u64 blkno;
 976         u64 esc;
 977         int error = 0;
 978
 979         if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
 980                 return 0;
 981
 982         gfs2_replay_incr_blk(jd, &start);
 983         for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
 984                 blkno = be64_to_cpu(*ptr++);
 985                 esc = be64_to_cpu(*ptr++);
 986
 987                 jd->jd_found_blocks++;
 988
 989                 if (gfs2_revoke_check(jd, blkno, start))
 990                         continue;
 991
 992                 error = gfs2_replay_read_block(jd, start, &bh_log);
 993                 if (error)
 994                         return error;
 995
 996                 bh_ip = gfs2_meta_new(gl, blkno);
 997                 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
 998
 999                 /* Unescape */
1000                 if (esc) {
1001                         __be32 *eptr = (__be32 *)bh_ip->b_data;
1002                         *eptr = cpu_to_be32(GFS2_MAGIC);
1003                 }
1004                 mark_buffer_dirty(bh_ip);
1005
1006                 brelse(bh_log);
1007                 brelse(bh_ip);
1008
1009                 jd->jd_replayed_blocks++;
1010         }
1011
1012         return error;
1013 }
1014
1015 /* FIXME: sort out accounting for log blocks etc. */
1016
1017 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
1018 {
1019         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1020         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
1021
1022         if (error) {
1023                 gfs2_meta_sync(ip->i_gl);
1024                 return;
1025         }
1026         if (pass != 1)
1027                 return;
1028
1029         /* data sync? */
1030         gfs2_meta_sync(ip->i_gl);
1031
1032         fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
1033                 jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
1034 }
1035
1036 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
1037 {
1038         struct list_head *head;
1039         struct gfs2_bufdata *bd;
1040
1041         if (tr == NULL)
1042                 return;
1043
1044         head = &tr->tr_databuf;
1045         while (!list_empty(head)) {
1046                 bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
1047                 list_del_init(&bd->bd_list);
1048                 gfs2_unpin(sdp, bd->bd_bh, tr);
1049         }
1050 }
1051
1052
1053 static const struct gfs2_log_operations gfs2_buf_lops = {
1054         .lo_before_commit = buf_lo_before_commit,
1055         .lo_after_commit = buf_lo_after_commit,
1056         .lo_before_scan = buf_lo_before_scan,
1057         .lo_scan_elements = buf_lo_scan_elements,
1058         .lo_after_scan = buf_lo_after_scan,
1059         .lo_name = "buf",
1060 };
1061
1062 static const struct gfs2_log_operations gfs2_revoke_lops = {
1063         .lo_before_commit = revoke_lo_before_commit,
1064         .lo_after_commit = revoke_lo_after_commit,
1065         .lo_before_scan = revoke_lo_before_scan,
1066         .lo_scan_elements = revoke_lo_scan_elements,
1067         .lo_after_scan = revoke_lo_after_scan,
1068         .lo_name = "revoke",
1069 };
1070
1071 static const struct gfs2_log_operations gfs2_databuf_lops = {
1072         .lo_before_commit = databuf_lo_before_commit,
1073         .lo_after_commit = databuf_lo_after_commit,
1074         .lo_scan_elements = databuf_lo_scan_elements,
1075         .lo_after_scan = databuf_lo_after_scan,
1076         .lo_name = "databuf",
1077 };
1078
1079 const struct gfs2_log_operations *gfs2_log_ops[] = {
1080         &gfs2_databuf_lops,
1081         &gfs2_buf_lops,
1082         &gfs2_revoke_lops,
1083         NULL,
1084 };
1085