fs/gfs2/bmap.c

   1 /*
   2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
   4  *
   5  * This copyrighted material is made available to anyone wishing to use,
   6  * modify, copy, or redistribute it subject to the terms and conditions
   7  * of the GNU General Public License version 2.
   8  */
   9
  10 #include <linux/spinlock.h>
  11 #include <linux/completion.h>
  12 #include <linux/buffer_head.h>
  13 #include <linux/blkdev.h>
  14 #include <linux/gfs2_ondisk.h>
  15 #include <linux/crc32.h>
  16 #include <linux/iomap.h>
  17
  18 #include "gfs2.h"
  19 #include "incore.h"
  20 #include "bmap.h"
  21 #include "glock.h"
  22 #include "inode.h"
  23 #include "meta_io.h"
  24 #include "quota.h"
  25 #include "rgrp.h"
  26 #include "log.h"
  27 #include "super.h"
  28 #include "trans.h"
  29 #include "dir.h"
  30 #include "util.h"
  31 #include "trace_gfs2.h"
  32
  33 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  34  * block is 512, so __u16 is fine for that. It saves stack space to
  35  * keep it small.
  36  */
  37 struct metapath {
  38         struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT];
  39         __u16 mp_list[GFS2_MAX_META_HEIGHT];
  40         int mp_fheight; /* find_metapath height */
  41         int mp_aheight; /* actual height (lookup height) */
  42 };
  43
  44 /**
  45  * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
  46  * @ip: the inode
  47  * @dibh: the dinode buffer
  48  * @block: the block number that was allocated
  49  * @page: The (optional) page. This is looked up if @page is NULL
  50  *
  51  * Returns: errno
  52  */
  53
  54 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
  55                                u64 block, struct page *page)
  56 {
  57         struct inode *inode = &ip->i_inode;
  58         struct buffer_head *bh;
  59         int release = 0;
  60
  61         if (!page || page->index) {
  62                 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
  63                 if (!page)
  64                         return -ENOMEM;
  65                 release = 1;
  66         }
  67
  68         if (!PageUptodate(page)) {
  69                 void *kaddr = kmap(page);
  70                 u64 dsize = i_size_read(inode);
  71
  72                 if (dsize > gfs2_max_stuffed_size(ip))
  73                         dsize = gfs2_max_stuffed_size(ip);
  74
  75                 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
  76                 memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
  77                 kunmap(page);
  78
  79                 SetPageUptodate(page);
  80         }
  81
  82         if (!page_has_buffers(page))
  83                 create_empty_buffers(page, BIT(inode->i_blkbits),
  84                                      BIT(BH_Uptodate));
  85
  86         bh = page_buffers(page);
  87
  88         if (!buffer_mapped(bh))
  89                 map_bh(bh, inode->i_sb, block);
  90
  91         set_buffer_uptodate(bh);
  92         if (!gfs2_is_jdata(ip))
  93                 mark_buffer_dirty(bh);
  94         if (!gfs2_is_writeback(ip))
  95                 gfs2_trans_add_data(ip->i_gl, bh);
  96
  97         if (release) {
  98                 unlock_page(page);
  99                 put_page(page);
 100         }
 101
 102         return 0;
 103 }
 104
 105 /**
 106  * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 107  * @ip: The GFS2 inode to unstuff
 108  * @page: The (optional) page. This is looked up if the @page is NULL
 109  *
 110  * This routine unstuffs a dinode and returns it to a "normal" state such
 111  * that the height can be grown in the traditional way.
 112  *
 113  * Returns: errno
 114  */
 115
 116 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
 117 {
 118         struct buffer_head *bh, *dibh;
 119         struct gfs2_dinode *di;
 120         u64 block = 0;
 121         int isdir = gfs2_is_dir(ip);
 122         int error;
 123
 124         down_write(&ip->i_rw_mutex);
 125
 126         error = gfs2_meta_inode_buffer(ip, &dibh);
 127         if (error)
 128                 goto out;
 129
 130         if (i_size_read(&ip->i_inode)) {
 131                 /* Get a free block, fill it with the stuffed data,
 132                    and write it out to disk */
 133
 134                 unsigned int n = 1;
 135                 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
 136                 if (error)
 137                         goto out_brelse;
 138                 if (isdir) {
 139                         gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1);
 140                         error = gfs2_dir_get_new_buffer(ip, block, &bh);
 141                         if (error)
 142                                 goto out_brelse;
 143                         gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
 144                                               dibh, sizeof(struct gfs2_dinode));
 145                         brelse(bh);
 146                 } else {
 147                         error = gfs2_unstuffer_page(ip, dibh, block, page);
 148                         if (error)
 149                                 goto out_brelse;
 150                 }
 151         }
 152
 153         /*  Set up the pointer to the new block  */
 154
 155         gfs2_trans_add_meta(ip->i_gl, dibh);
 156         di = (struct gfs2_dinode *)dibh->b_data;
 157         gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 158
 159         if (i_size_read(&ip->i_inode)) {
 160                 *(__be64 *)(di + 1) = cpu_to_be64(block);
 161                 gfs2_add_inode_blocks(&ip->i_inode, 1);
 162                 di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
 163         }
 164
 165         ip->i_height = 1;
 166         di->di_height = cpu_to_be16(1);
 167
 168 out_brelse:
 169         brelse(dibh);
 170 out:
 171         up_write(&ip->i_rw_mutex);
 172         return error;
 173 }
 174
 175
 176 /**
 177  * find_metapath - Find path through the metadata tree
 178  * @sdp: The superblock
 179  * @mp: The metapath to return the result in
 180  * @block: The disk block to look up
 181  * @height: The pre-calculated height of the metadata tree
 182  *
 183  *   This routine returns a struct metapath structure that defines a path
 184  *   through the metadata of inode "ip" to get to block "block".
 185  *
 186  *   Example:
 187  *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 188  *   filesystem with a blocksize of 4096.
 189  *
 190  *   find_metapath() would return a struct metapath structure set to:
 191  *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
 192  *   and mp_list[2] = 165.
 193  *
 194  *   That means that in order to get to the block containing the byte at
 195  *   offset 101342453, we would load the indirect block pointed to by pointer
 196  *   0 in the dinode.  We would then load the indirect block pointed to by
 197  *   pointer 48 in that indirect block.  We would then load the data block
 198  *   pointed to by pointer 165 in that indirect block.
 199  *
 200  *             ----------------------------------------
 201  *             | Dinode |                             |
 202  *             |        |                            4|
 203  *             |        |0 1 2 3 4 5                 9|
 204  *             |        |                            6|
 205  *             ----------------------------------------
 206  *                       |
 207  *                       |
 208  *                       V
 209  *             ----------------------------------------
 210  *             | Indirect Block                       |
 211  *             |                                     5|
 212  *             |            4 4 4 4 4 5 5            1|
 213  *             |0           5 6 7 8 9 0 1            2|
 214  *             ----------------------------------------
 215  *                                |
 216  *                                |
 217  *                                V
 218  *             ----------------------------------------
 219  *             | Indirect Block                       |
 220  *             |                         1 1 1 1 1   5|
 221  *             |                         6 6 6 6 6   1|
 222  *             |0                        3 4 5 6 7   2|
 223  *             ----------------------------------------
 224  *                                           |
 225  *                                           |
 226  *                                           V
 227  *             ----------------------------------------
 228  *             | Data block containing offset         |
 229  *             |            101342453                 |
 230  *             |                                      |
 231  *             |                                      |
 232  *             ----------------------------------------
 233  *
 234  */
 235
 236 static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 237                           struct metapath *mp, unsigned int height)
 238 {
 239         unsigned int i;
 240
 241         mp->mp_fheight = height;
 242         for (i = height; i--;)
 243                 mp->mp_list[i] = do_div(block, sdp->sd_inptrs);
 244 }
 245
 246 static inline unsigned int metapath_branch_start(const struct metapath *mp)
 247 {
 248         if (mp->mp_list[0] == 0)
 249                 return 2;
 250         return 1;
 251 }
 252
 253 /**
 254  * metaptr1 - Return the first possible metadata pointer in a metapath buffer
 255  * @height: The metadata height (0 = dinode)
 256  * @mp: The metapath
 257  */
 258 static inline __be64 *metaptr1(unsigned int height, const struct metapath *mp)
 259 {
 260         struct buffer_head *bh = mp->mp_bh[height];
 261         if (height == 0)
 262                 return ((__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)));
 263         return ((__be64 *)(bh->b_data + sizeof(struct gfs2_meta_header)));
 264 }
 265
 266 /**
 267  * metapointer - Return pointer to start of metadata in a buffer
 268  * @height: The metadata height (0 = dinode)
 269  * @mp: The metapath
 270  *
 271  * Return a pointer to the block number of the next height of the metadata
 272  * tree given a buffer containing the pointer to the current height of the
 273  * metadata tree.
 274  */
 275
 276 static inline __be64 *metapointer(unsigned int height, const struct metapath *mp)
 277 {
 278         __be64 *p = metaptr1(height, mp);
 279         return p + mp->mp_list[height];
 280 }
 281
 282 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 283 {
 284         const __be64 *t;
 285
 286         for (t = start; t < end; t++) {
 287                 struct buffer_head *rabh;
 288
 289                 if (!*t)
 290                         continue;
 291
 292                 rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
 293                 if (trylock_buffer(rabh)) {
 294                         if (!buffer_uptodate(rabh)) {
 295                                 rabh->b_end_io = end_buffer_read_sync;
 296                                 submit_bh(REQ_OP_READ,
 297                                           REQ_RAHEAD | REQ_META | REQ_PRIO,
 298                                           rabh);
 299                                 continue;
 300                         }
 301                         unlock_buffer(rabh);
 302                 }
 303                 brelse(rabh);
 304         }
 305 }
 306
 307 static int __fillup_metapath(struct gfs2_inode *ip, struct metapath *mp,
 308                              unsigned int x, unsigned int h)
 309 {
 310         for (; x < h; x++) {
 311                 __be64 *ptr = metapointer(x, mp);
 312                 u64 dblock = be64_to_cpu(*ptr);
 313                 int ret;
 314
 315                 if (!dblock)
 316                         break;
 317                 ret = gfs2_meta_indirect_buffer(ip, x + 1, dblock, &mp->mp_bh[x + 1]);
 318                 if (ret)
 319                         return ret;
 320         }
 321         mp->mp_aheight = x + 1;
 322         return 0;
 323 }
 324
 325 /**
 326  * lookup_metapath - Walk the metadata tree to a specific point
 327  * @ip: The inode
 328  * @mp: The metapath
 329  *
 330  * Assumes that the inode's buffer has already been looked up and
 331  * hooked onto mp->mp_bh[0] and that the metapath has been initialised
 332  * by find_metapath().
 333  *
 334  * If this function encounters part of the tree which has not been
 335  * allocated, it returns the current height of the tree at the point
 336  * at which it found the unallocated block. Blocks which are found are
 337  * added to the mp->mp_bh[] list.
 338  *
 339  * Returns: error
 340  */
 341
 342 static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp)
 343 {
 344         return __fillup_metapath(ip, mp, 0, ip->i_height - 1);
 345 }
 346
 347 /**
 348  * fillup_metapath - fill up buffers for the metadata path to a specific height
 349  * @ip: The inode
 350  * @mp: The metapath
 351  * @h: The height to which it should be mapped
 352  *
 353  * Similar to lookup_metapath, but does lookups for a range of heights
 354  *
 355  * Returns: error or the number of buffers filled
 356  */
 357
 358 static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
 359 {
 360         unsigned int x = 0;
 361         int ret;
 362
 363         if (h) {
 364                 /* find the first buffer we need to look up. */
 365                 for (x = h - 1; x > 0; x--) {
 366                         if (mp->mp_bh[x])
 367                                 break;
 368                 }
 369         }
 370         ret = __fillup_metapath(ip, mp, x, h);
 371         if (ret)
 372                 return ret;
 373         return mp->mp_aheight - x - 1;
 374 }
 375
 376 static inline void release_metapath(struct metapath *mp)
 377 {
 378         int i;
 379
 380         for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) {
 381                 if (mp->mp_bh[i] == NULL)
 382                         break;
 383                 brelse(mp->mp_bh[i]);
 384         }
 385 }
 386
 387 /**
 388  * gfs2_extent_length - Returns length of an extent of blocks
 389  * @start: Start of the buffer
 390  * @len: Length of the buffer in bytes
 391  * @ptr: Current position in the buffer
 392  * @limit: Max extent length to return (0 = unlimited)
 393  * @eob: Set to 1 if we hit "end of block"
 394  *
 395  * If the first block is zero (unallocated) it will return the number of
 396  * unallocated blocks in the extent, otherwise it will return the number
 397  * of contiguous blocks in the extent.
 398  *
 399  * Returns: The length of the extent (minimum of one block)
 400  */
 401
 402 static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob)
 403 {
 404         const __be64 *end = (start + len);
 405         const __be64 *first = ptr;
 406         u64 d = be64_to_cpu(*ptr);
 407
 408         *eob = 0;
 409         do {
 410                 ptr++;
 411                 if (ptr >= end)
 412                         break;
 413                 if (limit && --limit == 0)
 414                         break;
 415                 if (d)
 416                         d++;
 417         } while(be64_to_cpu(*ptr) == d);
 418         if (ptr >= end)
 419                 *eob = 1;
 420         return (ptr - first);
 421 }
 422
 423 static inline void bmap_lock(struct gfs2_inode *ip, int create)
 424 {
 425         if (create)
 426                 down_write(&ip->i_rw_mutex);
 427         else
 428                 down_read(&ip->i_rw_mutex);
 429 }
 430
 431 static inline void bmap_unlock(struct gfs2_inode *ip, int create)
 432 {
 433         if (create)
 434                 up_write(&ip->i_rw_mutex);
 435         else
 436                 up_read(&ip->i_rw_mutex);
 437 }
 438
 439 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
 440                                          struct gfs2_glock *gl, unsigned int i,
 441                                          unsigned offset, u64 bn)
 442 {
 443         __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data +
 444                        ((i > 1) ? sizeof(struct gfs2_meta_header) :
 445                                  sizeof(struct gfs2_dinode)));
 446         BUG_ON(i < 1);
 447         BUG_ON(mp->mp_bh[i] != NULL);
 448         mp->mp_bh[i] = gfs2_meta_new(gl, bn);
 449         gfs2_trans_add_meta(gl, mp->mp_bh[i]);
 450         gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 451         gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
 452         ptr += offset;
 453         *ptr = cpu_to_be64(bn);
 454         return ptr;
 455 }
 456
 457 enum alloc_state {
 458         ALLOC_DATA = 0,
 459         ALLOC_GROW_DEPTH = 1,
 460         ALLOC_GROW_HEIGHT = 2,
 461         /* ALLOC_UNSTUFF = 3,   TBD and rather complicated */
 462 };
 463
 464 /**
 465  * gfs2_bmap_alloc - Build a metadata tree of the requested height
 466  * @inode: The GFS2 inode
 467  * @lblock: The logical starting block of the extent
 468  * @bh_map: This is used to return the mapping details
 469  * @zero_new: True if newly allocated blocks should be zeroed
 470  * @mp: The metapath, with proper height information calculated
 471  * @maxlen: The max number of data blocks to alloc
 472  * @dblock: Pointer to return the resulting new block
 473  * @dblks: Pointer to return the number of blocks allocated
 474  *
 475  * In this routine we may have to alloc:
 476  *   i) Indirect blocks to grow the metadata tree height
 477  *  ii) Indirect blocks to fill in lower part of the metadata tree
 478  * iii) Data blocks
 479  *
 480  * The function is in two parts. The first part works out the total
 481  * number of blocks which we need. The second part does the actual
 482  * allocation asking for an extent at a time (if enough contiguous free
 483  * blocks are available, there will only be one request per bmap call)
 484  * and uses the state machine to initialise the blocks in order.
 485  *
 486  * Returns: errno on error
 487  */
 488
 489 static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap,
 490                             unsigned flags, struct metapath *mp)
 491 {
 492         struct gfs2_inode *ip = GFS2_I(inode);
 493         struct gfs2_sbd *sdp = GFS2_SB(inode);
 494         struct super_block *sb = sdp->sd_vfs;
 495         struct buffer_head *dibh = mp->mp_bh[0];
 496         u64 bn;
 497         unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 498         unsigned dblks = 0;
 499         unsigned ptrs_per_blk;
 500         const unsigned end_of_metadata = mp->mp_fheight - 1;
 501         int ret;
 502         enum alloc_state state;
 503         __be64 *ptr;
 504         __be64 zero_bn = 0;
 505         size_t maxlen = iomap->length >> inode->i_blkbits;
 506
 507         BUG_ON(mp->mp_aheight < 1);
 508         BUG_ON(dibh == NULL);
 509
 510         gfs2_trans_add_meta(ip->i_gl, dibh);
 511
 512         if (mp->mp_fheight == mp->mp_aheight) {
 513                 struct buffer_head *bh;
 514                 int eob;
 515
 516                 /* Bottom indirect block exists, find unalloced extent size */
 517                 ptr = metapointer(end_of_metadata, mp);
 518                 bh = mp->mp_bh[end_of_metadata];
 519                 dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr,
 520                                            maxlen, &eob);
 521                 BUG_ON(dblks < 1);
 522                 state = ALLOC_DATA;
 523         } else {
 524                 /* Need to allocate indirect blocks */
 525                 ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs :
 526                         sdp->sd_diptrs;
 527                 dblks = min(maxlen, (size_t)(ptrs_per_blk -
 528                                              mp->mp_list[end_of_metadata]));
 529                 if (mp->mp_fheight == ip->i_height) {
 530                         /* Writing into existing tree, extend tree down */
 531                         iblks = mp->mp_fheight - mp->mp_aheight;
 532                         state = ALLOC_GROW_DEPTH;
 533                 } else {
 534                         /* Building up tree height */
 535                         state = ALLOC_GROW_HEIGHT;
 536                         iblks = mp->mp_fheight - ip->i_height;
 537                         branch_start = metapath_branch_start(mp);
 538                         iblks += (mp->mp_fheight - branch_start);
 539                 }
 540         }
 541
 542         /* start of the second part of the function (state machine) */
 543
 544         blks = dblks + iblks;
 545         i = mp->mp_aheight;
 546         do {
 547                 int error;
 548                 n = blks - alloced;
 549                 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
 550                 if (error)
 551                         return error;
 552                 alloced += n;
 553                 if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 554                         gfs2_trans_add_unrevoke(sdp, bn, n);
 555                 switch (state) {
 556                 /* Growing height of tree */
 557                 case ALLOC_GROW_HEIGHT:
 558                         if (i == 1) {
 559                                 ptr = (__be64 *)(dibh->b_data +
 560                                                  sizeof(struct gfs2_dinode));
 561                                 zero_bn = *ptr;
 562                         }
 563                         for (; i - 1 < mp->mp_fheight - ip->i_height && n > 0;
 564                              i++, n--)
 565                                 gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++);
 566                         if (i - 1 == mp->mp_fheight - ip->i_height) {
 567                                 i--;
 568                                 gfs2_buffer_copy_tail(mp->mp_bh[i],
 569                                                 sizeof(struct gfs2_meta_header),
 570                                                 dibh, sizeof(struct gfs2_dinode));
 571                                 gfs2_buffer_clear_tail(dibh,
 572                                                 sizeof(struct gfs2_dinode) +
 573                                                 sizeof(__be64));
 574                                 ptr = (__be64 *)(mp->mp_bh[i]->b_data +
 575                                         sizeof(struct gfs2_meta_header));
 576                                 *ptr = zero_bn;
 577                                 state = ALLOC_GROW_DEPTH;
 578                                 for(i = branch_start; i < mp->mp_fheight; i++) {
 579                                         if (mp->mp_bh[i] == NULL)
 580                                                 break;
 581                                         brelse(mp->mp_bh[i]);
 582                                         mp->mp_bh[i] = NULL;
 583                                 }
 584                                 i = branch_start;
 585                         }
 586                         if (n == 0)
 587                                 break;
 588                 /* Branching from existing tree */
 589                 case ALLOC_GROW_DEPTH:
 590                         if (i > 1 && i < mp->mp_fheight)
 591                                 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
 592                         for (; i < mp->mp_fheight && n > 0; i++, n--)
 593                                 gfs2_indirect_init(mp, ip->i_gl, i,
 594                                                    mp->mp_list[i-1], bn++);
 595                         if (i == mp->mp_fheight)
 596                                 state = ALLOC_DATA;
 597                         if (n == 0)
 598                                 break;
 599                 /* Tree complete, adding data blocks */
 600                 case ALLOC_DATA:
 601                         BUG_ON(n > dblks);
 602                         BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
 603                         gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]);
 604                         dblks = n;
 605                         ptr = metapointer(end_of_metadata, mp);
 606                         iomap->addr = bn << inode->i_blkbits;
 607                         iomap->flags |= IOMAP_F_NEW;
 608                         while (n-- > 0)
 609                                 *ptr++ = cpu_to_be64(bn++);
 610                         if (flags & IOMAP_ZERO) {
 611                                 ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits,
 612                                                        dblks, GFP_NOFS);
 613                                 if (ret) {
 614                                         fs_err(sdp,
 615                                                "Failed to zero data buffers\n");
 616                                         flags &= ~IOMAP_ZERO;
 617                                 }
 618                         }
 619                         break;
 620                 }
 621         } while (iomap->addr == IOMAP_NULL_ADDR);
 622
 623         iomap->length = (u64)dblks << inode->i_blkbits;
 624         ip->i_height = mp->mp_fheight;
 625         gfs2_add_inode_blocks(&ip->i_inode, alloced);
 626         gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
 627         return 0;
 628 }
 629
 630 /**
 631  * hole_size - figure out the size of a hole
 632  * @inode: The inode
 633  * @lblock: The logical starting block number
 634  * @mp: The metapath
 635  *
 636  * Returns: The hole size in bytes
 637  *
 638  */
 639 static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
 640 {
 641         struct gfs2_inode *ip = GFS2_I(inode);
 642         struct gfs2_sbd *sdp = GFS2_SB(inode);
 643         struct metapath mp_eof;
 644         u64 factor = 1;
 645         int hgt;
 646         u64 holesz = 0;
 647         const __be64 *first, *end, *ptr;
 648         const struct buffer_head *bh;
 649         u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
 650         int zeroptrs;
 651         bool done = false;
 652
 653         /* Get another metapath, to the very last byte */
 654         find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
 655         for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
 656                 bh = mp->mp_bh[hgt];
 657                 if (bh) {
 658                         zeroptrs = 0;
 659                         first = metapointer(hgt, mp);
 660                         end = (const __be64 *)(bh->b_data + bh->b_size);
 661
 662                         for (ptr = first; ptr < end; ptr++) {
 663                                 if (*ptr) {
 664                                         done = true;
 665                                         break;
 666                                 } else {
 667                                         zeroptrs++;
 668                                 }
 669                         }
 670                 } else {
 671                         zeroptrs = sdp->sd_inptrs;
 672                 }
 673                 if (factor * zeroptrs >= lblock_stop - lblock + 1) {
 674                         holesz = lblock_stop - lblock + 1;
 675                         break;
 676                 }
 677                 holesz += factor * zeroptrs;
 678
 679                 factor *= sdp->sd_inptrs;
 680                 if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
 681                         (mp->mp_list[hgt - 1])++;
 682         }
 683         return holesz << inode->i_blkbits;
 684 }
 685
 686 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
 687 {
 688         struct gfs2_inode *ip = GFS2_I(inode);
 689
 690         iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
 691                       sizeof(struct gfs2_dinode);
 692         iomap->offset = 0;
 693         iomap->length = i_size_read(inode);
 694         iomap->type = IOMAP_MAPPED;
 695         iomap->flags = IOMAP_F_DATA_INLINE;
 696 }
 697
 698 /**
 699  * gfs2_iomap_begin - Map blocks from an inode to disk blocks
 700  * @inode: The inode
 701  * @pos: Starting position in bytes
 702  * @length: Length to map, in bytes
 703  * @flags: iomap flags
 704  * @iomap: The iomap structure
 705  *
 706  * Returns: errno
 707  */
 708 int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 709                      unsigned flags, struct iomap *iomap)
 710 {
 711         struct gfs2_inode *ip = GFS2_I(inode);
 712         struct gfs2_sbd *sdp = GFS2_SB(inode);
 713         struct metapath mp = { .mp_aheight = 1, };
 714         unsigned int factor = sdp->sd_sb.sb_bsize;
 715         const u64 *arr = sdp->sd_heightsize;
 716         __be64 *ptr;
 717         sector_t lblock;
 718         sector_t lend;
 719         int ret = 0;
 720         int eob;
 721         unsigned int len;
 722         struct buffer_head *bh;
 723         u8 height;
 724
 725         trace_gfs2_iomap_start(ip, pos, length, flags);
 726         if (!length) {
 727                 ret = -EINVAL;
 728                 goto out;
 729         }
 730
 731         if (gfs2_is_stuffed(ip)) {
 732                 if (flags & IOMAP_REPORT) {
 733                         gfs2_stuffed_iomap(inode, iomap);
 734                         if (pos >= iomap->length)
 735                                 ret = -ENOENT;
 736                         goto out;
 737                 }
 738                 BUG_ON(!(flags & IOMAP_WRITE));
 739         }
 740
 741         lblock = pos >> inode->i_blkbits;
 742         lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
 743
 744         iomap->offset = lblock << inode->i_blkbits;
 745         iomap->addr = IOMAP_NULL_ADDR;
 746         iomap->type = IOMAP_HOLE;
 747         iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
 748         iomap->flags = IOMAP_F_MERGED;
 749         bmap_lock(ip, flags & IOMAP_WRITE);
 750
 751         /*
 752          * Directory data blocks have a struct gfs2_meta_header header, so the
 753          * remaining size is smaller than the filesystem block size.  Logical
 754          * block numbers for directories are in units of this remaining size!
 755          */
 756         if (gfs2_is_dir(ip)) {
 757                 factor = sdp->sd_jbsize;
 758                 arr = sdp->sd_jheightsize;
 759         }
 760
 761         ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
 762         if (ret)
 763                 goto out_release;
 764
 765         height = ip->i_height;
 766         while ((lblock + 1) * factor > arr[height])
 767                 height++;
 768         find_metapath(sdp, lblock, &mp, height);
 769         if (height > ip->i_height || gfs2_is_stuffed(ip))
 770                 goto do_alloc;
 771
 772         ret = lookup_metapath(ip, &mp);
 773         if (ret)
 774                 goto out_release;
 775
 776         if (mp.mp_aheight != ip->i_height)
 777                 goto do_alloc;
 778
 779         ptr = metapointer(ip->i_height - 1, &mp);
 780         if (*ptr == 0)
 781                 goto do_alloc;
 782
 783         iomap->type = IOMAP_MAPPED;
 784         iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
 785
 786         bh = mp.mp_bh[ip->i_height - 1];
 787         len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
 788         if (eob)
 789                 iomap->flags |= IOMAP_F_BOUNDARY;
 790         iomap->length = (u64)len << inode->i_blkbits;
 791
 792 out_release:
 793         release_metapath(&mp);
 794         bmap_unlock(ip, flags & IOMAP_WRITE);
 795 out:
 796         trace_gfs2_iomap_end(ip, iomap, ret);
 797         return ret;
 798
 799 do_alloc:
 800         if (flags & IOMAP_WRITE) {
 801                 ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
 802         } else if (flags & IOMAP_REPORT) {
 803                 loff_t size = i_size_read(inode);
 804                 if (pos >= size)
 805                         ret = -ENOENT;
 806                 else if (height <= ip->i_height)
 807                         iomap->length = hole_size(inode, lblock, &mp);
 808                 else
 809                         iomap->length = size - pos;
 810         } else {
 811                 if (height <= ip->i_height)
 812                         iomap->length = hole_size(inode, lblock, &mp);
 813         }
 814         goto out_release;
 815 }
 816
 817 /**
 818  * gfs2_block_map - Map a block from an inode to a disk block
 819  * @inode: The inode
 820  * @lblock: The logical block number
 821  * @bh_map: The bh to be mapped
 822  * @create: True if its ok to alloc blocks to satify the request
 823  *
 824  * Sets buffer_mapped() if successful, sets buffer_boundary() if a
 825  * read of metadata will be required before the next block can be
 826  * mapped. Sets buffer_new() if new blocks were allocated.
 827  *
 828  * Returns: errno
 829  */
 830
 831 int gfs2_block_map(struct inode *inode, sector_t lblock,
 832                    struct buffer_head *bh_map, int create)
 833 {
 834         struct gfs2_inode *ip = GFS2_I(inode);
 835         struct iomap iomap;
 836         int ret, flags = 0;
 837
 838         clear_buffer_mapped(bh_map);
 839         clear_buffer_new(bh_map);
 840         clear_buffer_boundary(bh_map);
 841         trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 842
 843         if (create)
 844                 flags |= IOMAP_WRITE;
 845         if (buffer_zeronew(bh_map))
 846                 flags |= IOMAP_ZERO;
 847         ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
 848                                bh_map->b_size, flags, &iomap);
 849         if (ret) {
 850                 if (!create && ret == -ENOENT) {
 851                         /* Return unmapped buffer beyond the end of file.  */
 852                         ret = 0;
 853                 }
 854                 goto out;
 855         }
 856
 857         if (iomap.length > bh_map->b_size) {
 858                 iomap.length = bh_map->b_size;
 859                 iomap.flags &= ~IOMAP_F_BOUNDARY;
 860         }
 861         if (iomap.addr != IOMAP_NULL_ADDR)
 862                 map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 863         bh_map->b_size = iomap.length;
 864         if (iomap.flags & IOMAP_F_BOUNDARY)
 865                 set_buffer_boundary(bh_map);
 866         if (iomap.flags & IOMAP_F_NEW)
 867                 set_buffer_new(bh_map);
 868
 869 out:
 870         trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
 871         return ret;
 872 }
 873
 874 /*
 875  * Deprecated: do not use in new code
 876  */
 877 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
 878 {
 879         struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
 880         int ret;
 881         int create = *new;
 882
 883         BUG_ON(!extlen);
 884         BUG_ON(!dblock);
 885         BUG_ON(!new);
 886
 887         bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 888         ret = gfs2_block_map(inode, lblock, &bh, create);
 889         *extlen = bh.b_size >> inode->i_blkbits;
 890         *dblock = bh.b_blocknr;
 891         if (buffer_new(&bh))
 892                 *new = 1;
 893         else
 894                 *new = 0;
 895         return ret;
 896 }
 897
 898 /**
 899  * gfs2_block_zero_range - Deal with zeroing out data
 900  *
 901  * This is partly borrowed from ext3.
 902  */
 903 static int gfs2_block_zero_range(struct inode *inode, loff_t from,
 904                                  unsigned int length)
 905 {
 906         struct address_space *mapping = inode->i_mapping;
 907         struct gfs2_inode *ip = GFS2_I(inode);
 908         unsigned long index = from >> PAGE_SHIFT;
 909         unsigned offset = from & (PAGE_SIZE-1);
 910         unsigned blocksize, iblock, pos;
 911         struct buffer_head *bh;
 912         struct page *page;
 913         int err;
 914
 915         page = find_or_create_page(mapping, index, GFP_NOFS);
 916         if (!page)
 917                 return 0;
 918
 919         blocksize = inode->i_sb->s_blocksize;
 920         iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
 921
 922         if (!page_has_buffers(page))
 923                 create_empty_buffers(page, blocksize, 0);
 924
 925         /* Find the buffer that contains "offset" */
 926         bh = page_buffers(page);
 927         pos = blocksize;
 928         while (offset >= pos) {
 929                 bh = bh->b_this_page;
 930                 iblock++;
 931                 pos += blocksize;
 932         }
 933
 934         err = 0;
 935
 936         if (!buffer_mapped(bh)) {
 937                 gfs2_block_map(inode, iblock, bh, 0);
 938                 /* unmapped? It's a hole - nothing to do */
 939                 if (!buffer_mapped(bh))
 940                         goto unlock;
 941         }
 942
 943         /* Ok, it's mapped. Make sure it's up-to-date */
 944         if (PageUptodate(page))
 945                 set_buffer_uptodate(bh);
 946
 947         if (!buffer_uptodate(bh)) {
 948                 err = -EIO;
 949                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
 950                 wait_on_buffer(bh);
 951                 /* Uhhuh. Read error. Complain and punt. */
 952                 if (!buffer_uptodate(bh))
 953                         goto unlock;
 954                 err = 0;
 955         }
 956
 957         if (!gfs2_is_writeback(ip))
 958                 gfs2_trans_add_data(ip->i_gl, bh);
 959
 960         zero_user(page, offset, length);
 961         mark_buffer_dirty(bh);
 962 unlock:
 963         unlock_page(page);
 964         put_page(page);
 965         return err;
 966 }
 967
 968 #define GFS2_JTRUNC_REVOKES 8192
 969
 970 /**
 971  * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
 972  * @inode: The inode being truncated
 973  * @oldsize: The original (larger) size
 974  * @newsize: The new smaller size
 975  *
 976  * With jdata files, we have to journal a revoke for each block which is
 977  * truncated. As a result, we need to split this into separate transactions
 978  * if the number of pages being truncated gets too large.
 979  */
 980
 981 static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize)
 982 {
 983         struct gfs2_sbd *sdp = GFS2_SB(inode);
 984         u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
 985         u64 chunk;
 986         int error;
 987
 988         while (oldsize != newsize) {
 989                 struct gfs2_trans *tr;
 990                 unsigned int offs;
 991
 992                 chunk = oldsize - newsize;
 993                 if (chunk > max_chunk)
 994                         chunk = max_chunk;
 995
 996                 offs = oldsize & ~PAGE_MASK;
 997                 if (offs && chunk > PAGE_SIZE)
 998                         chunk = offs + ((chunk - offs) & PAGE_MASK);
 999
1000                 truncate_pagecache(inode, oldsize - chunk);
1001                 oldsize -= chunk;
1002
1003                 tr = current->journal_info;
1004                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
1005                         continue;
1006
1007                 gfs2_trans_end(sdp);
1008                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
1009                 if (error)
1010                         return error;
1011         }
1012
1013         return 0;
1014 }
1015
1016 static int trunc_start(struct inode *inode, u64 newsize)
1017 {
1018         struct gfs2_inode *ip = GFS2_I(inode);
1019         struct gfs2_sbd *sdp = GFS2_SB(inode);
1020         struct buffer_head *dibh = NULL;
1021         int journaled = gfs2_is_jdata(ip);
1022         u64 oldsize = inode->i_size;
1023         int error;
1024
1025         if (journaled)
1026                 error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, GFS2_JTRUNC_REVOKES);
1027         else
1028                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1029         if (error)
1030                 return error;
1031
1032         error = gfs2_meta_inode_buffer(ip, &dibh);
1033         if (error)
1034                 goto out;
1035
1036         gfs2_trans_add_meta(ip->i_gl, dibh);
1037
1038         if (gfs2_is_stuffed(ip)) {
1039                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize);
1040         } else {
1041                 unsigned int blocksize = i_blocksize(inode);
1042                 unsigned int offs = newsize & (blocksize - 1);
1043                 if (offs) {
1044                         error = gfs2_block_zero_range(inode, newsize,
1045                                                       blocksize - offs);
1046                         if (error)
1047                                 goto out;
1048                 }
1049                 ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
1050         }
1051
1052         i_size_write(inode, newsize);
1053         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1054         gfs2_dinode_out(ip, dibh->b_data);
1055
1056         if (journaled)
1057                 error = gfs2_journaled_truncate(inode, oldsize, newsize);
1058         else
1059                 truncate_pagecache(inode, newsize);
1060
1061 out:
1062         brelse(dibh);
1063         if (current->journal_info)
1064                 gfs2_trans_end(sdp);
1065         return error;
1066 }
1067
1068 /**
1069  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
1070  * @ip: inode
1071  * @rg_gh: holder of resource group glock
1072  * @bh: buffer head to sweep
1073  * @start: starting point in bh
1074  * @end: end point in bh
1075  * @meta: true if bh points to metadata (rather than data)
1076  * @btotal: place to keep count of total blocks freed
1077  *
1078  * We sweep a metadata buffer (provided by the metapath) for blocks we need to
1079  * free, and free them all. However, we do it one rgrp at a time. If this
1080  * block has references to multiple rgrps, we break it into individual
1081  * transactions. This allows other processes to use the rgrps while we're
1082  * focused on a single one, for better concurrency / performance.
1083  * At every transaction boundary, we rewrite the inode into the journal.
1084  * That way the bitmaps are kept consistent with the inode and we can recover
1085  * if we're interrupted by power-outages.
1086  *
1087  * Returns: 0, or return code if an error occurred.
1088  *          *btotal has the total number of blocks freed
1089  */
1090 static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
1091                               struct buffer_head *bh, __be64 *start, __be64 *end,
1092                               bool meta, u32 *btotal)
1093 {
1094         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1095         struct gfs2_rgrpd *rgd;
1096         struct gfs2_trans *tr;
1097         __be64 *p;
1098         int blks_outside_rgrp;
1099         u64 bn, bstart, isize_blks;
1100         s64 blen; /* needs to be s64 or gfs2_add_inode_blocks breaks */
1101         int ret = 0;
1102         bool buf_in_tr = false; /* buffer was added to transaction */
1103
1104 more_rgrps:
1105         rgd = NULL;
1106         if (gfs2_holder_initialized(rd_gh)) {
1107                 rgd = gfs2_glock2rgrp(rd_gh->gh_gl);
1108                 gfs2_assert_withdraw(sdp,
1109                              gfs2_glock_is_locked_by_me(rd_gh->gh_gl));
1110         }
1111         blks_outside_rgrp = 0;
1112         bstart = 0;
1113         blen = 0;
1114
1115         for (p = start; p < end; p++) {
1116                 if (!*p)
1117                         continue;
1118                 bn = be64_to_cpu(*p);
1119
1120                 if (rgd) {
1121                         if (!rgrp_contains_block(rgd, bn)) {
1122                                 blks_outside_rgrp++;
1123                                 continue;
1124                         }
1125                 } else {
1126                         rgd = gfs2_blk2rgrpd(sdp, bn, true);
1127                         if (unlikely(!rgd)) {
1128                                 ret = -EIO;
1129                                 goto out;
1130                         }
1131                         ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1132                                                  0, rd_gh);
1133                         if (ret)
1134                                 goto out;
1135
1136                         /* Must be done with the rgrp glock held: */
1137                         if (gfs2_rs_active(&ip->i_res) &&
1138                             rgd == ip->i_res.rs_rbm.rgd)
1139                                 gfs2_rs_deltree(&ip->i_res);
1140                 }
1141
1142                 /* The size of our transactions will be unknown until we
1143                    actually process all the metadata blocks that relate to
1144                    the rgrp. So we estimate. We know it can't be more than
1145                    the dinode's i_blocks and we don't want to exceed the
1146                    journal flush threshold, sd_log_thresh2. */
1147                 if (current->journal_info == NULL) {
1148                         unsigned int jblocks_rqsted, revokes;
1149
1150                         jblocks_rqsted = rgd->rd_length + RES_DINODE +
1151                                 RES_INDIRECT;
1152                         isize_blks = gfs2_get_inode_blocks(&ip->i_inode);
1153                         if (isize_blks > atomic_read(&sdp->sd_log_thresh2))
1154                                 jblocks_rqsted +=
1155                                         atomic_read(&sdp->sd_log_thresh2);
1156                         else
1157                                 jblocks_rqsted += isize_blks;
1158                         revokes = jblocks_rqsted;
1159                         if (meta)
1160                                 revokes += end - start;
1161                         else if (ip->i_depth)
1162                                 revokes += sdp->sd_inptrs;
1163                         ret = gfs2_trans_begin(sdp, jblocks_rqsted, revokes);
1164                         if (ret)
1165                                 goto out_unlock;
1166                         down_write(&ip->i_rw_mutex);
1167                 }
1168                 /* check if we will exceed the transaction blocks requested */
1169                 tr = current->journal_info;
1170                 if (tr->tr_num_buf_new + RES_STATFS +
1171                     RES_QUOTA >= atomic_read(&sdp->sd_log_thresh2)) {
1172                         /* We set blks_outside_rgrp to ensure the loop will
1173                            be repeated for the same rgrp, but with a new
1174                            transaction. */
1175                         blks_outside_rgrp++;
1176                         /* This next part is tricky. If the buffer was added
1177                            to the transaction, we've already set some block
1178                            pointers to 0, so we better follow through and free
1179                            them, or we will introduce corruption (so break).
1180                            This may be impossible, or at least rare, but I
1181                            decided to cover the case regardless.
1182
1183                            If the buffer was not added to the transaction
1184                            (this call), doing so would exceed our transaction
1185                            size, so we need to end the transaction and start a
1186                            new one (so goto). */
1187
1188                         if (buf_in_tr)
1189                                 break;
1190                         goto out_unlock;
1191                 }
1192
1193                 gfs2_trans_add_meta(ip->i_gl, bh);
1194                 buf_in_tr = true;
1195                 *p = 0;
1196                 if (bstart + blen == bn) {
1197                         blen++;
1198                         continue;
1199                 }
1200                 if (bstart) {
1201                         __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1202                         (*btotal) += blen;
1203                         gfs2_add_inode_blocks(&ip->i_inode, -blen);
1204                 }
1205                 bstart = bn;
1206                 blen = 1;
1207         }
1208         if (bstart) {
1209                 __gfs2_free_blocks(ip, bstart, (u32)blen, meta);
1210                 (*btotal) += blen;
1211                 gfs2_add_inode_blocks(&ip->i_inode, -blen);
1212         }
1213 out_unlock:
1214         if (!ret && blks_outside_rgrp) { /* If buffer still has non-zero blocks
1215                                             outside the rgrp we just processed,
1216                                             do it all over again. */
1217                 if (current->journal_info) {
1218                         struct buffer_head *dibh;
1219
1220                         ret = gfs2_meta_inode_buffer(ip, &dibh);
1221                         if (ret)
1222                                 goto out;
1223
1224                         /* Every transaction boundary, we rewrite the dinode
1225                            to keep its di_blocks current in case of failure. */
1226                         ip->i_inode.i_mtime = ip->i_inode.i_ctime =
1227                                 current_time(&ip->i_inode);
1228                         gfs2_trans_add_meta(ip->i_gl, dibh);
1229                         gfs2_dinode_out(ip, dibh->b_data);
1230                         brelse(dibh);
1231                         up_write(&ip->i_rw_mutex);
1232                         gfs2_trans_end(sdp);
1233                 }
1234                 gfs2_glock_dq_uninit(rd_gh);
1235                 cond_resched();
1236                 goto more_rgrps;
1237         }
1238 out:
1239         return ret;
1240 }
1241
1242 static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
1243 {
1244         if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
1245                 return false;
1246         return true;
1247 }
1248
1249 /**
1250  * find_nonnull_ptr - find a non-null pointer given a metapath and height
1251  * @mp: starting metapath
1252  * @h: desired height to search
1253  *
1254  * Assumes the metapath is valid (with buffers) out to height h.
1255  * Returns: true if a non-null pointer was found in the metapath buffer
1256  *          false if all remaining pointers are NULL in the buffer
1257  */
1258 static bool find_nonnull_ptr(struct gfs2_sbd *sdp, struct metapath *mp,
1259                              unsigned int h,
1260                              __u16 *end_list, unsigned int end_aligned)
1261 {
1262         struct buffer_head *bh = mp->mp_bh[h];
1263         __be64 *first, *ptr, *end;
1264
1265         first = metaptr1(h, mp);
1266         ptr = first + mp->mp_list[h];
1267         end = (__be64 *)(bh->b_data + bh->b_size);
1268         if (end_list && mp_eq_to_hgt(mp, end_list, h)) {
1269                 bool keep_end = h < end_aligned;
1270                 end = first + end_list[h] + keep_end;
1271         }
1272
1273         while (ptr < end) {
1274                 if (*ptr) { /* if we have a non-null pointer */
1275                         mp->mp_list[h] = ptr - first;
1276                         h++;
1277                         if (h < GFS2_MAX_META_HEIGHT)
1278                                 mp->mp_list[h] = 0;
1279                         return true;
1280                 }
1281                 ptr++;
1282         }
1283         return false;
1284 }
1285
1286 enum dealloc_states {
1287         DEALLOC_MP_FULL = 0,    /* Strip a metapath with all buffers read in */
1288         DEALLOC_MP_LOWER = 1,   /* lower the metapath strip height */
1289         DEALLOC_FILL_MP = 2,  /* Fill in the metapath to the given height. */
1290         DEALLOC_DONE = 3,       /* process complete */
1291 };
1292
1293 static inline void
1294 metapointer_range(struct metapath *mp, int height,
1295                   __u16 *start_list, unsigned int start_aligned,
1296                   __u16 *end_list, unsigned int end_aligned,
1297                   __be64 **start, __be64 **end)
1298 {
1299         struct buffer_head *bh = mp->mp_bh[height];
1300         __be64 *first;
1301
1302         first = metaptr1(height, mp);
1303         *start = first;
1304         if (mp_eq_to_hgt(mp, start_list, height)) {
1305                 bool keep_start = height < start_aligned;
1306                 *start = first + start_list[height] + keep_start;
1307         }
1308         *end = (__be64 *)(bh->b_data + bh->b_size);
1309         if (end_list && mp_eq_to_hgt(mp, end_list, height)) {
1310                 bool keep_end = height < end_aligned;
1311                 *end = first + end_list[height] + keep_end;
1312         }
1313 }
1314
1315 static inline bool walk_done(struct gfs2_sbd *sdp,
1316                              struct metapath *mp, int height,
1317                              __u16 *end_list, unsigned int end_aligned)
1318 {
1319         __u16 end;
1320
1321         if (end_list) {
1322                 bool keep_end = height < end_aligned;
1323                 if (!mp_eq_to_hgt(mp, end_list, height))
1324                         return false;
1325                 end = end_list[height] + keep_end;
1326         } else
1327                 end = (height > 0) ? sdp->sd_inptrs : sdp->sd_diptrs;
1328         return mp->mp_list[height] >= end;
1329 }
1330
1331 /**
1332  * punch_hole - deallocate blocks in a file
1333  * @ip: inode to truncate
1334  * @offset: the start of the hole
1335  * @length: the size of the hole (or 0 for truncate)
1336  *
1337  * Punch a hole into a file or truncate a file at a given position.  This
1338  * function operates in whole blocks (@offset and @length are rounded
1339  * accordingly); partially filled blocks must be cleared otherwise.
1340  *
1341  * This function works from the bottom up, and from the right to the left. In
1342  * other words, it strips off the highest layer (data) before stripping any of
1343  * the metadata. Doing it this way is best in case the operation is interrupted
1344  * by power failure, etc.  The dinode is rewritten in every transaction to
1345  * guarantee integrity.
1346  */
1347 static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
1348 {
1349         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1350         struct metapath mp = {};
1351         struct buffer_head *dibh, *bh;
1352         struct gfs2_holder rd_gh;
1353         unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
1354         u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
1355         __u16 start_list[GFS2_MAX_META_HEIGHT];
1356         __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
1357         unsigned int start_aligned, uninitialized_var(end_aligned);
1358         unsigned int strip_h = ip->i_height - 1;
1359         u32 btotal = 0;
1360         int ret, state;
1361         int mp_h; /* metapath buffers are read in to this height */
1362         u64 prev_bnr = 0;
1363         __be64 *start, *end;
1364
1365         /*
1366          * The start position of the hole is defined by lblock, start_list, and
1367          * start_aligned.  The end position of the hole is defined by lend,
1368          * end_list, and end_aligned.
1369          *
1370          * start_aligned and end_aligned define down to which height the start
1371          * and end positions are aligned to the metadata tree (i.e., the
1372          * position is a multiple of the metadata granularity at the height
1373          * above).  This determines at which heights additional meta pointers
1374          * needs to be preserved for the remaining data.
1375          */
1376
1377         if (length) {
1378                 u64 maxsize = sdp->sd_heightsize[ip->i_height];
1379                 u64 end_offset = offset + length;
1380                 u64 lend;
1381
1382                 /*
1383                  * Clip the end at the maximum file size for the given height:
1384                  * that's how far the metadata goes; files bigger than that
1385                  * will have additional layers of indirection.
1386                  */
1387                 if (end_offset > maxsize)
1388                         end_offset = maxsize;
1389                 lend = end_offset >> bsize_shift;
1390
1391                 if (lblock >= lend)
1392                         return 0;
1393
1394                 find_metapath(sdp, lend, &mp, ip->i_height);
1395                 end_list = __end_list;
1396                 memcpy(end_list, mp.mp_list, sizeof(mp.mp_list));
1397
1398                 for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1399                         if (end_list[mp_h])
1400                                 break;
1401                 }
1402                 end_aligned = mp_h;
1403         }
1404
1405         find_metapath(sdp, lblock, &mp, ip->i_height);
1406         memcpy(start_list, mp.mp_list, sizeof(start_list));
1407
1408         for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
1409                 if (start_list[mp_h])
1410                         break;
1411         }
1412         start_aligned = mp_h;
1413
1414         ret = gfs2_meta_inode_buffer(ip, &dibh);
1415         if (ret)
1416                 return ret;
1417
1418         mp.mp_bh[0] = dibh;
1419         ret = lookup_metapath(ip, &mp);
1420         if (ret)
1421                 goto out_metapath;
1422
1423         /* issue read-ahead on metadata */
1424         for (mp_h = 0; mp_h < mp.mp_aheight - 1; mp_h++) {
1425                 metapointer_range(&mp, mp_h, start_list, start_aligned,
1426                                   end_list, end_aligned, &start, &end);
1427                 gfs2_metapath_ra(ip->i_gl, start, end);
1428         }
1429
1430         if (mp.mp_aheight == ip->i_height)
1431                 state = DEALLOC_MP_FULL; /* We have a complete metapath */
1432         else
1433                 state = DEALLOC_FILL_MP; /* deal with partial metapath */
1434
1435         ret = gfs2_rindex_update(sdp);
1436         if (ret)
1437                 goto out_metapath;
1438
1439         ret = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1440         if (ret)
1441                 goto out_metapath;
1442         gfs2_holder_mark_uninitialized(&rd_gh);
1443
1444         mp_h = strip_h;
1445
1446         while (state != DEALLOC_DONE) {
1447                 switch (state) {
1448                 /* Truncate a full metapath at the given strip height.
1449                  * Note that strip_h == mp_h in order to be in this state. */
1450                 case DEALLOC_MP_FULL:
1451                         bh = mp.mp_bh[mp_h];
1452                         gfs2_assert_withdraw(sdp, bh);
1453                         if (gfs2_assert_withdraw(sdp,
1454                                                  prev_bnr != bh->b_blocknr)) {
1455                                 printk(KERN_EMERG "GFS2: fsid=%s:inode %llu, "
1456                                        "block:%llu, i_h:%u, s_h:%u, mp_h:%u\n",
1457                                        sdp->sd_fsname,
1458                                        (unsigned long long)ip->i_no_addr,
1459                                        prev_bnr, ip->i_height, strip_h, mp_h);
1460                         }
1461                         prev_bnr = bh->b_blocknr;
1462
1463                         if (gfs2_metatype_check(sdp, bh,
1464                                                 (mp_h ? GFS2_METATYPE_IN :
1465                                                         GFS2_METATYPE_DI))) {
1466                                 ret = -EIO;
1467                                 goto out;
1468                         }
1469
1470                         /*
1471                          * Below, passing end_aligned as 0 gives us the
1472                          * metapointer range excluding the end point: the end
1473                          * point is the first metapath we must not deallocate!
1474                          */
1475
1476                         metapointer_range(&mp, mp_h, start_list, start_aligned,
1477                                           end_list, 0 /* end_aligned */,
1478                                           &start, &end);
1479                         ret = sweep_bh_for_rgrps(ip, &rd_gh, mp.mp_bh[mp_h],
1480                                                  start, end,
1481                                                  mp_h != ip->i_height - 1,
1482                                                  &btotal);
1483
1484                         /* If we hit an error or just swept dinode buffer,
1485                            just exit. */
1486                         if (ret || !mp_h) {
1487                                 state = DEALLOC_DONE;
1488                                 break;
1489                         }
1490                         state = DEALLOC_MP_LOWER;
1491                         break;
1492
1493                 /* lower the metapath strip height */
1494                 case DEALLOC_MP_LOWER:
1495                         /* We're done with the current buffer, so release it,
1496                            unless it's the dinode buffer. Then back up to the
1497                            previous pointer. */
1498                         if (mp_h) {
1499                                 brelse(mp.mp_bh[mp_h]);
1500                                 mp.mp_bh[mp_h] = NULL;
1501                         }
1502                         /* If we can't get any lower in height, we've stripped
1503                            off all we can. Next step is to back up and start
1504                            stripping the previous level of metadata. */
1505                         if (mp_h == 0) {
1506                                 strip_h--;
1507                                 memcpy(mp.mp_list, start_list, sizeof(start_list));
1508                                 mp_h = strip_h;
1509                                 state = DEALLOC_FILL_MP;
1510                                 break;
1511                         }
1512                         mp.mp_list[mp_h] = 0;
1513                         mp_h--; /* search one metadata height down */
1514                         mp.mp_list[mp_h]++;
1515                         if (walk_done(sdp, &mp, mp_h, end_list, end_aligned))
1516                                 break;
1517                         /* Here we've found a part of the metapath that is not
1518                          * allocated. We need to search at that height for the
1519                          * next non-null pointer. */
1520                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned)) {
1521                                 state = DEALLOC_FILL_MP;
1522                                 mp_h++;
1523                         }
1524                         /* No more non-null pointers at this height. Back up
1525                            to the previous height and try again. */
1526                         break; /* loop around in the same state */
1527
1528                 /* Fill the metapath with buffers to the given height. */
1529                 case DEALLOC_FILL_MP:
1530                         /* Fill the buffers out to the current height. */
1531                         ret = fillup_metapath(ip, &mp, mp_h);
1532                         if (ret < 0)
1533                                 goto out;
1534
1535                         /* issue read-ahead on metadata */
1536                         if (mp.mp_aheight > 1) {
1537                                 for (; ret > 1; ret--) {
1538                                         metapointer_range(&mp, mp.mp_aheight - ret,
1539                                                           start_list, start_aligned,
1540                                                           end_list, end_aligned,
1541                                                           &start, &end);
1542                                         gfs2_metapath_ra(ip->i_gl, start, end);
1543                                 }
1544                         }
1545
1546                         /* If buffers found for the entire strip height */
1547                         if (mp.mp_aheight - 1 == strip_h) {
1548                                 state = DEALLOC_MP_FULL;
1549                                 break;
1550                         }
1551                         if (mp.mp_aheight < ip->i_height) /* We have a partial height */
1552                                 mp_h = mp.mp_aheight - 1;
1553
1554                         /* If we find a non-null block pointer, crawl a bit
1555                            higher up in the metapath and try again, otherwise
1556                            we need to look lower for a new starting point. */
1557                         if (find_nonnull_ptr(sdp, &mp, mp_h, end_list, end_aligned))
1558                                 mp_h++;
1559                         else
1560                                 state = DEALLOC_MP_LOWER;
1561                         break;
1562                 }
1563         }
1564
1565         if (btotal) {
1566                 if (current->journal_info == NULL) {
1567                         ret = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS +
1568                                                RES_QUOTA, 0);
1569                         if (ret)
1570                                 goto out;
1571                         down_write(&ip->i_rw_mutex);
1572                 }
1573                 gfs2_statfs_change(sdp, 0, +btotal, 0);
1574                 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
1575                                   ip->i_inode.i_gid);
1576                 ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1577                 gfs2_trans_add_meta(ip->i_gl, dibh);
1578                 gfs2_dinode_out(ip, dibh->b_data);
1579                 up_write(&ip->i_rw_mutex);
1580                 gfs2_trans_end(sdp);
1581         }
1582
1583 out:
1584         if (gfs2_holder_initialized(&rd_gh))
1585                 gfs2_glock_dq_uninit(&rd_gh);
1586         if (current->journal_info) {
1587                 up_write(&ip->i_rw_mutex);
1588                 gfs2_trans_end(sdp);
1589                 cond_resched();
1590         }
1591         gfs2_quota_unhold(ip);
1592 out_metapath:
1593         release_metapath(&mp);
1594         return ret;
1595 }
1596
1597 static int trunc_end(struct gfs2_inode *ip)
1598 {
1599         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1600         struct buffer_head *dibh;
1601         int error;
1602
1603         error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1604         if (error)
1605                 return error;
1606
1607         down_write(&ip->i_rw_mutex);
1608
1609         error = gfs2_meta_inode_buffer(ip, &dibh);
1610         if (error)
1611                 goto out;
1612
1613         if (!i_size_read(&ip->i_inode)) {
1614                 ip->i_height = 0;
1615                 ip->i_goal = ip->i_no_addr;
1616                 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1617                 gfs2_ordered_del_inode(ip);
1618         }
1619         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1620         ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
1621
1622         gfs2_trans_add_meta(ip->i_gl, dibh);
1623         gfs2_dinode_out(ip, dibh->b_data);
1624         brelse(dibh);
1625
1626 out:
1627         up_write(&ip->i_rw_mutex);
1628         gfs2_trans_end(sdp);
1629         return error;
1630 }
1631
1632 /**
1633  * do_shrink - make a file smaller
1634  * @inode: the inode
1635  * @newsize: the size to make the file
1636  *
1637  * Called with an exclusive lock on @inode. The @size must
1638  * be equal to or smaller than the current inode size.
1639  *
1640  * Returns: errno
1641  */
1642
1643 static int do_shrink(struct inode *inode, u64 newsize)
1644 {
1645         struct gfs2_inode *ip = GFS2_I(inode);
1646         int error;
1647
1648         error = trunc_start(inode, newsize);
1649         if (error < 0)
1650                 return error;
1651         if (gfs2_is_stuffed(ip))
1652                 return 0;
1653
1654         error = punch_hole(ip, newsize, 0);
1655         if (error == 0)
1656                 error = trunc_end(ip);
1657
1658         return error;
1659 }
1660
1661 void gfs2_trim_blocks(struct inode *inode)
1662 {
1663         int ret;
1664
1665         ret = do_shrink(inode, inode->i_size);
1666         WARN_ON(ret != 0);
1667 }
1668
1669 /**
1670  * do_grow - Touch and update inode size
1671  * @inode: The inode
1672  * @size: The new size
1673  *
1674  * This function updates the timestamps on the inode and
1675  * may also increase the size of the inode. This function
1676  * must not be called with @size any smaller than the current
1677  * inode size.
1678  *
1679  * Although it is not strictly required to unstuff files here,
1680  * earlier versions of GFS2 have a bug in the stuffed file reading
1681  * code which will result in a buffer overrun if the size is larger
1682  * than the max stuffed file size. In order to prevent this from
1683  * occurring, such files are unstuffed, but in other cases we can
1684  * just update the inode size directly.
1685  *
1686  * Returns: 0 on success, or -ve on error
1687  */
1688
1689 static int do_grow(struct inode *inode, u64 size)
1690 {
1691         struct gfs2_inode *ip = GFS2_I(inode);
1692         struct gfs2_sbd *sdp = GFS2_SB(inode);
1693         struct gfs2_alloc_parms ap = { .target = 1, };
1694         struct buffer_head *dibh;
1695         int error;
1696         int unstuff = 0;
1697
1698         if (gfs2_is_stuffed(ip) && size > gfs2_max_stuffed_size(ip)) {
1699                 error = gfs2_quota_lock_check(ip, &ap);
1700                 if (error)
1701                         return error;
1702
1703                 error = gfs2_inplace_reserve(ip, &ap);
1704                 if (error)
1705                         goto do_grow_qunlock;
1706                 unstuff = 1;
1707         }
1708
1709         error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT +
1710                                  (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ?
1711                                   0 : RES_QUOTA), 0);
1712         if (error)
1713                 goto do_grow_release;
1714
1715         if (unstuff) {
1716                 error = gfs2_unstuff_dinode(ip, NULL);
1717                 if (error)
1718                         goto do_end_trans;
1719         }
1720
1721         error = gfs2_meta_inode_buffer(ip, &dibh);
1722         if (error)
1723                 goto do_end_trans;
1724
1725         i_size_write(inode, size);
1726         ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
1727         gfs2_trans_add_meta(ip->i_gl, dibh);
1728         gfs2_dinode_out(ip, dibh->b_data);
1729         brelse(dibh);
1730
1731 do_end_trans:
1732         gfs2_trans_end(sdp);
1733 do_grow_release:
1734         if (unstuff) {
1735                 gfs2_inplace_release(ip);
1736 do_grow_qunlock:
1737                 gfs2_quota_unlock(ip);
1738         }
1739         return error;
1740 }
1741
1742 /**
1743  * gfs2_setattr_size - make a file a given size
1744  * @inode: the inode
1745  * @newsize: the size to make the file
1746  *
1747  * The file size can grow, shrink, or stay the same size. This
1748  * is called holding i_mutex and an exclusive glock on the inode
1749  * in question.
1750  *
1751  * Returns: errno
1752  */
1753
1754 int gfs2_setattr_size(struct inode *inode, u64 newsize)
1755 {
1756         struct gfs2_inode *ip = GFS2_I(inode);
1757         int ret;
1758
1759         BUG_ON(!S_ISREG(inode->i_mode));
1760
1761         ret = inode_newsize_ok(inode, newsize);
1762         if (ret)
1763                 return ret;
1764
1765         inode_dio_wait(inode);
1766
1767         ret = gfs2_rsqa_alloc(ip);
1768         if (ret)
1769                 goto out;
1770
1771         if (newsize >= inode->i_size) {
1772                 ret = do_grow(inode, newsize);
1773                 goto out;
1774         }
1775
1776         ret = do_shrink(inode, newsize);
1777 out:
1778         gfs2_rsqa_delete(ip, NULL);
1779         return ret;
1780 }
1781
1782 int gfs2_truncatei_resume(struct gfs2_inode *ip)
1783 {
1784         int error;
1785         error = punch_hole(ip, i_size_read(&ip->i_inode), 0);
1786         if (!error)
1787                 error = trunc_end(ip);
1788         return error;
1789 }
1790
1791 int gfs2_file_dealloc(struct gfs2_inode *ip)
1792 {
1793         return punch_hole(ip, 0, 0);
1794 }
1795
1796 /**
1797  * gfs2_free_journal_extents - Free cached journal bmap info
1798  * @jd: The journal
1799  *
1800  */
1801
1802 void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
1803 {
1804         struct gfs2_journal_extent *jext;
1805
1806         while(!list_empty(&jd->extent_list)) {
1807                 jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
1808                 list_del(&jext->list);
1809                 kfree(jext);
1810         }
1811 }
1812
1813 /**
1814  * gfs2_add_jextent - Add or merge a new extent to extent cache
1815  * @jd: The journal descriptor
1816  * @lblock: The logical block at start of new extent
1817  * @dblock: The physical block at start of new extent
1818  * @blocks: Size of extent in fs blocks
1819  *
1820  * Returns: 0 on success or -ENOMEM
1821  */
1822
1823 static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
1824 {
1825         struct gfs2_journal_extent *jext;
1826
1827         if (!list_empty(&jd->extent_list)) {
1828                 jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
1829                 if ((jext->dblock + jext->blocks) == dblock) {
1830                         jext->blocks += blocks;
1831                         return 0;
1832                 }
1833         }
1834
1835         jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
1836         if (jext == NULL)
1837                 return -ENOMEM;
1838         jext->dblock = dblock;
1839         jext->lblock = lblock;
1840         jext->blocks = blocks;
1841         list_add_tail(&jext->list, &jd->extent_list);
1842         jd->nr_extents++;
1843         return 0;
1844 }
1845
1846 /**
1847  * gfs2_map_journal_extents - Cache journal bmap info
1848  * @sdp: The super block
1849  * @jd: The journal to map
1850  *
1851  * Create a reusable "extent" mapping from all logical
1852  * blocks to all physical blocks for the given journal.  This will save
1853  * us time when writing journal blocks.  Most journals will have only one
1854  * extent that maps all their logical blocks.  That's because gfs2.mkfs
1855  * arranges the journal blocks sequentially to maximize performance.
1856  * So the extent would map the first block for the entire file length.
1857  * However, gfs2_jadd can happen while file activity is happening, so
1858  * those journals may not be sequential.  Less likely is the case where
1859  * the users created their own journals by mounting the metafs and
1860  * laying it out.  But it's still possible.  These journals might have
1861  * several extents.
1862  *
1863  * Returns: 0 on success, or error on failure
1864  */
1865
1866 int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
1867 {
1868         u64 lblock = 0;
1869         u64 lblock_stop;
1870         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
1871         struct buffer_head bh;
1872         unsigned int shift = sdp->sd_sb.sb_bsize_shift;
1873         u64 size;
1874         int rc;
1875
1876         lblock_stop = i_size_read(jd->jd_inode) >> shift;
1877         size = (lblock_stop - lblock) << shift;
1878         jd->nr_extents = 0;
1879         WARN_ON(!list_empty(&jd->extent_list));
1880
1881         do {
1882                 bh.b_state = 0;
1883                 bh.b_blocknr = 0;
1884                 bh.b_size = size;
1885                 rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
1886                 if (rc || !buffer_mapped(&bh))
1887                         goto fail;
1888                 rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
1889                 if (rc)
1890                         goto fail;
1891                 size -= bh.b_size;
1892                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1893         } while(size > 0);
1894
1895         fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
1896                 jd->nr_extents);
1897         return 0;
1898
1899 fail:
1900         fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
1901                 rc, jd->jd_jid,
1902                 (unsigned long long)(i_size_read(jd->jd_inode) - size),
1903                 jd->nr_extents);
1904         fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
1905                 rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
1906                 bh.b_state, (unsigned long long)bh.b_size);
1907         gfs2_free_journal_extents(jd);
1908         return rc;
1909 }
1910
1911 /**
1912  * gfs2_write_alloc_required - figure out if a write will require an allocation
1913  * @ip: the file being written to
1914  * @offset: the offset to write to
1915  * @len: the number of bytes being written
1916  *
1917  * Returns: 1 if an alloc is required, 0 otherwise
1918  */
1919
1920 int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
1921                               unsigned int len)
1922 {
1923         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1924         struct buffer_head bh;
1925         unsigned int shift;
1926         u64 lblock, lblock_stop, size;
1927         u64 end_of_file;
1928
1929         if (!len)
1930                 return 0;
1931
1932         if (gfs2_is_stuffed(ip)) {
1933                 if (offset + len > gfs2_max_stuffed_size(ip))
1934                         return 1;
1935                 return 0;
1936         }
1937
1938         shift = sdp->sd_sb.sb_bsize_shift;
1939         BUG_ON(gfs2_is_dir(ip));
1940         end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift;
1941         lblock = offset >> shift;
1942         lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1943         if (lblock_stop > end_of_file)
1944                 return 1;
1945
1946         size = (lblock_stop - lblock) << shift;
1947         do {
1948                 bh.b_state = 0;
1949                 bh.b_size = size;
1950                 gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
1951                 if (!buffer_mapped(&bh))
1952                         return 1;
1953                 size -= bh.b_size;
1954                 lblock += (bh.b_size >> ip->i_inode.i_blkbits);
1955         } while(size > 0);
1956
1957         return 0;
1958 }
1959
1960 static int stuffed_zero_range(struct inode *inode, loff_t offset, loff_t length)
1961 {
1962         struct gfs2_inode *ip = GFS2_I(inode);
1963         struct buffer_head *dibh;
1964         int error;
1965
1966         if (offset >= inode->i_size)
1967                 return 0;
1968         if (offset + length > inode->i_size)
1969                 length = inode->i_size - offset;
1970
1971         error = gfs2_meta_inode_buffer(ip, &dibh);
1972         if (error)
1973                 return error;
1974         gfs2_trans_add_meta(ip->i_gl, dibh);
1975         memset(dibh->b_data + sizeof(struct gfs2_dinode) + offset, 0,
1976                length);
1977         brelse(dibh);
1978         return 0;
1979 }
1980
1981 static int gfs2_journaled_truncate_range(struct inode *inode, loff_t offset,
1982                                          loff_t length)
1983 {
1984         struct gfs2_sbd *sdp = GFS2_SB(inode);
1985         loff_t max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
1986         int error;
1987
1988         while (length) {
1989                 struct gfs2_trans *tr;
1990                 loff_t chunk;
1991                 unsigned int offs;
1992
1993                 chunk = length;
1994                 if (chunk > max_chunk)
1995                         chunk = max_chunk;
1996
1997                 offs = offset & ~PAGE_MASK;
1998                 if (offs && chunk > PAGE_SIZE)
1999                         chunk = offs + ((chunk - offs) & PAGE_MASK);
2000
2001                 truncate_pagecache_range(inode, offset, chunk);
2002                 offset += chunk;
2003                 length -= chunk;
2004
2005                 tr = current->journal_info;
2006                 if (!test_bit(TR_TOUCHED, &tr->tr_flags))
2007                         continue;
2008
2009                 gfs2_trans_end(sdp);
2010                 error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
2011                 if (error)
2012                         return error;
2013         }
2014         return 0;
2015 }
2016
2017 int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
2018 {
2019         struct inode *inode = file_inode(file);
2020         struct gfs2_inode *ip = GFS2_I(inode);
2021         struct gfs2_sbd *sdp = GFS2_SB(inode);
2022         int error;
2023
2024         if (gfs2_is_jdata(ip))
2025                 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_JDATA,
2026                                          GFS2_JTRUNC_REVOKES);
2027         else
2028                 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2029         if (error)
2030                 return error;
2031
2032         if (gfs2_is_stuffed(ip)) {
2033                 error = stuffed_zero_range(inode, offset, length);
2034                 if (error)
2035                         goto out;
2036         } else {
2037                 unsigned int start_off, end_off, blocksize;
2038
2039                 blocksize = i_blocksize(inode);
2040                 start_off = offset & (blocksize - 1);
2041                 end_off = (offset + length) & (blocksize - 1);
2042                 if (start_off) {
2043                         unsigned int len = length;
2044                         if (length > blocksize - start_off)
2045                                 len = blocksize - start_off;
2046                         error = gfs2_block_zero_range(inode, offset, len);
2047                         if (error)
2048                                 goto out;
2049                         if (start_off + length < blocksize)
2050                                 end_off = 0;
2051                 }
2052                 if (end_off) {
2053                         error = gfs2_block_zero_range(inode,
2054                                 offset + length - end_off, end_off);
2055                         if (error)
2056                                 goto out;
2057                 }
2058         }
2059
2060         if (gfs2_is_jdata(ip)) {
2061                 BUG_ON(!current->journal_info);
2062                 gfs2_journaled_truncate_range(inode, offset, length);
2063         } else
2064                 truncate_pagecache_range(inode, offset, offset + length - 1);
2065
2066         file_update_time(file);
2067         mark_inode_dirty(inode);
2068
2069         if (current->journal_info)
2070                 gfs2_trans_end(sdp);
2071
2072         if (!gfs2_is_stuffed(ip))
2073                 error = punch_hole(ip, offset, length);
2074
2075 out:
2076         if (current->journal_info)
2077                 gfs2_trans_end(sdp);
2078         return error;
2079 }