fs/gfs2/super.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   4  * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
   5  */
   6
   7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9 #include <linux/bio.h>
  10 #include <linux/sched/signal.h>
  11 #include <linux/slab.h>
  12 #include <linux/spinlock.h>
  13 #include <linux/completion.h>
  14 #include <linux/buffer_head.h>
  15 #include <linux/statfs.h>
  16 #include <linux/seq_file.h>
  17 #include <linux/mount.h>
  18 #include <linux/kthread.h>
  19 #include <linux/delay.h>
  20 #include <linux/gfs2_ondisk.h>
  21 #include <linux/crc32.h>
  22 #include <linux/time.h>
  23 #include <linux/wait.h>
  24 #include <linux/writeback.h>
  25 #include <linux/backing-dev.h>
  26 #include <linux/kernel.h>
  27
  28 #include "gfs2.h"
  29 #include "incore.h"
  30 #include "bmap.h"
  31 #include "dir.h"
  32 #include "glock.h"
  33 #include "glops.h"
  34 #include "inode.h"
  35 #include "log.h"
  36 #include "meta_io.h"
  37 #include "quota.h"
  38 #include "recovery.h"
  39 #include "rgrp.h"
  40 #include "super.h"
  41 #include "trans.h"
  42 #include "util.h"
  43 #include "sys.h"
  44 #include "xattr.h"
  45 #include "lops.h"
  46
  47 enum evict_behavior {
  48         EVICT_SHOULD_DELETE,
  49         EVICT_SHOULD_SKIP_DELETE,
  50         EVICT_SHOULD_DEFER_DELETE,
  51 };
  52
  53 /**
  54  * gfs2_jindex_free - Clear all the journal index information
  55  * @sdp: The GFS2 superblock
  56  *
  57  */
  58
  59 void gfs2_jindex_free(struct gfs2_sbd *sdp)
  60 {
  61         struct list_head list;
  62         struct gfs2_jdesc *jd;
  63
  64         spin_lock(&sdp->sd_jindex_spin);
  65         list_add(&list, &sdp->sd_jindex_list);
  66         list_del_init(&sdp->sd_jindex_list);
  67         sdp->sd_journals = 0;
  68         spin_unlock(&sdp->sd_jindex_spin);
  69
  70         down_write(&sdp->sd_log_flush_lock);
  71         sdp->sd_jdesc = NULL;
  72         up_write(&sdp->sd_log_flush_lock);
  73
  74         while (!list_empty(&list)) {
  75                 jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
  76                 BUG_ON(jd->jd_log_bio);
  77                 gfs2_free_journal_extents(jd);
  78                 list_del(&jd->jd_list);
  79                 iput(jd->jd_inode);
  80                 jd->jd_inode = NULL;
  81                 kfree(jd);
  82         }
  83 }
  84
  85 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
  86 {
  87         struct gfs2_jdesc *jd;
  88
  89         list_for_each_entry(jd, head, jd_list) {
  90                 if (jd->jd_jid == jid)
  91                         return jd;
  92         }
  93         return NULL;
  94 }
  95
  96 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
  97 {
  98         struct gfs2_jdesc *jd;
  99
 100         spin_lock(&sdp->sd_jindex_spin);
 101         jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
 102         spin_unlock(&sdp->sd_jindex_spin);
 103
 104         return jd;
 105 }
 106
 107 int gfs2_jdesc_check(struct gfs2_jdesc *jd)
 108 {
 109         struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
 110         struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 111         u64 size = i_size_read(jd->jd_inode);
 112
 113         if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
 114                 return -EIO;
 115
 116         jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
 117
 118         if (gfs2_write_alloc_required(ip, 0, size)) {
 119                 gfs2_consist_inode(ip);
 120                 return -EIO;
 121         }
 122
 123         return 0;
 124 }
 125
 126 /**
 127  * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
 128  * @sdp: the filesystem
 129  *
 130  * Returns: errno
 131  */
 132
 133 int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 134 {
 135         struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 136         struct gfs2_glock *j_gl = ip->i_gl;
 137         struct gfs2_log_header_host head;
 138         int error;
 139
 140         j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 141         if (gfs2_withdrawing_or_withdrawn(sdp))
 142                 return -EIO;
 143
 144         error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
 145         if (error) {
 146                 gfs2_consist(sdp);
 147                 return error;
 148         }
 149
 150         if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 151                 gfs2_consist(sdp);
 152                 return -EIO;
 153         }
 154
 155         /*  Initialize some head of the log stuff  */
 156         sdp->sd_log_sequence = head.lh_sequence + 1;
 157         gfs2_log_pointers_init(sdp, head.lh_blkno);
 158
 159         error = gfs2_quota_init(sdp);
 160         if (!error && gfs2_withdrawing_or_withdrawn(sdp))
 161                 error = -EIO;
 162         if (!error)
 163                 set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 164         return error;
 165 }
 166
 167 void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
 168 {
 169         const struct gfs2_statfs_change *str = buf;
 170
 171         sc->sc_total = be64_to_cpu(str->sc_total);
 172         sc->sc_free = be64_to_cpu(str->sc_free);
 173         sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
 174 }
 175
 176 void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
 177 {
 178         struct gfs2_statfs_change *str = buf;
 179
 180         str->sc_total = cpu_to_be64(sc->sc_total);
 181         str->sc_free = cpu_to_be64(sc->sc_free);
 182         str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
 183 }
 184
 185 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 186 {
 187         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 188         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 189         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 190         struct buffer_head *m_bh;
 191         struct gfs2_holder gh;
 192         int error;
 193
 194         error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 195                                    &gh);
 196         if (error)
 197                 return error;
 198
 199         error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 200         if (error)
 201                 goto out;
 202
 203         if (sdp->sd_args.ar_spectator) {
 204                 spin_lock(&sdp->sd_statfs_spin);
 205                 gfs2_statfs_change_in(m_sc, m_bh->b_data +
 206                                       sizeof(struct gfs2_dinode));
 207                 spin_unlock(&sdp->sd_statfs_spin);
 208         } else {
 209                 spin_lock(&sdp->sd_statfs_spin);
 210                 gfs2_statfs_change_in(m_sc, m_bh->b_data +
 211                                       sizeof(struct gfs2_dinode));
 212                 gfs2_statfs_change_in(l_sc, sdp->sd_sc_bh->b_data +
 213                                       sizeof(struct gfs2_dinode));
 214                 spin_unlock(&sdp->sd_statfs_spin);
 215
 216         }
 217
 218         brelse(m_bh);
 219 out:
 220         gfs2_glock_dq_uninit(&gh);
 221         return 0;
 222 }
 223
 224 void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
 225                         s64 dinodes)
 226 {
 227         struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 228         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 229         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 230         s64 x, y;
 231         int need_sync = 0;
 232
 233         gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
 234
 235         spin_lock(&sdp->sd_statfs_spin);
 236         l_sc->sc_total += total;
 237         l_sc->sc_free += free;
 238         l_sc->sc_dinodes += dinodes;
 239         gfs2_statfs_change_out(l_sc, sdp->sd_sc_bh->b_data +
 240                                sizeof(struct gfs2_dinode));
 241         if (sdp->sd_args.ar_statfs_percent) {
 242                 x = 100 * l_sc->sc_free;
 243                 y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent;
 244                 if (x >= y || x <= -y)
 245                         need_sync = 1;
 246         }
 247         spin_unlock(&sdp->sd_statfs_spin);
 248
 249         if (need_sync)
 250                 gfs2_wake_up_statfs(sdp);
 251 }
 252
 253 void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh)
 254 {
 255         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 256         struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
 257         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 258         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 259
 260         gfs2_trans_add_meta(l_ip->i_gl, sdp->sd_sc_bh);
 261         gfs2_trans_add_meta(m_ip->i_gl, m_bh);
 262
 263         spin_lock(&sdp->sd_statfs_spin);
 264         m_sc->sc_total += l_sc->sc_total;
 265         m_sc->sc_free += l_sc->sc_free;
 266         m_sc->sc_dinodes += l_sc->sc_dinodes;
 267         memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
 268         memset(sdp->sd_sc_bh->b_data + sizeof(struct gfs2_dinode),
 269                0, sizeof(struct gfs2_statfs_change));
 270         gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
 271         spin_unlock(&sdp->sd_statfs_spin);
 272 }
 273
 274 int gfs2_statfs_sync(struct super_block *sb, int type)
 275 {
 276         struct gfs2_sbd *sdp = sb->s_fs_info;
 277         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 278         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 279         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 280         struct gfs2_holder gh;
 281         struct buffer_head *m_bh;
 282         int error;
 283
 284         error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 285                                    &gh);
 286         if (error)
 287                 goto out;
 288
 289         error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 290         if (error)
 291                 goto out_unlock;
 292
 293         spin_lock(&sdp->sd_statfs_spin);
 294         gfs2_statfs_change_in(m_sc, m_bh->b_data +
 295                               sizeof(struct gfs2_dinode));
 296         if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
 297                 spin_unlock(&sdp->sd_statfs_spin);
 298                 goto out_bh;
 299         }
 300         spin_unlock(&sdp->sd_statfs_spin);
 301
 302         error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
 303         if (error)
 304                 goto out_bh;
 305
 306         update_statfs(sdp, m_bh);
 307         sdp->sd_statfs_force_sync = 0;
 308
 309         gfs2_trans_end(sdp);
 310
 311 out_bh:
 312         brelse(m_bh);
 313 out_unlock:
 314         gfs2_glock_dq_uninit(&gh);
 315 out:
 316         return error;
 317 }
 318
 319 struct lfcc {
 320         struct list_head list;
 321         struct gfs2_holder gh;
 322 };
 323
 324 /**
 325  * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
 326  *                            journals are clean
 327  * @sdp: the file system
 328  *
 329  * Returns: errno
 330  */
 331
 332 static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
 333 {
 334         struct gfs2_inode *ip;
 335         struct gfs2_jdesc *jd;
 336         struct lfcc *lfcc;
 337         LIST_HEAD(list);
 338         struct gfs2_log_header_host lh;
 339         int error, error2;
 340
 341         /*
 342          * Grab all the journal glocks in SH mode.  We are *probably* doing
 343          * that to prevent recovery.
 344          */
 345
 346         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 347                 lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
 348                 if (!lfcc) {
 349                         error = -ENOMEM;
 350                         goto out;
 351                 }
 352                 ip = GFS2_I(jd->jd_inode);
 353                 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
 354                 if (error) {
 355                         kfree(lfcc);
 356                         goto out;
 357                 }
 358                 list_add(&lfcc->list, &list);
 359         }
 360
 361         gfs2_freeze_unlock(sdp);
 362
 363         error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE,
 364                                    LM_FLAG_NOEXP | GL_NOPID,
 365                                    &sdp->sd_freeze_gh);
 366         if (error)
 367                 goto relock_shared;
 368
 369         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 370                 error = gfs2_jdesc_check(jd);
 371                 if (error)
 372                         break;
 373                 error = gfs2_find_jhead(jd, &lh, false);
 374                 if (error)
 375                         break;
 376                 if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
 377                         error = -EBUSY;
 378                         break;
 379                 }
 380         }
 381
 382         if (!error)
 383                 goto out;  /* success */
 384
 385         gfs2_freeze_unlock(sdp);
 386
 387 relock_shared:
 388         error2 = gfs2_freeze_lock_shared(sdp);
 389         gfs2_assert_withdraw(sdp, !error2);
 390
 391 out:
 392         while (!list_empty(&list)) {
 393                 lfcc = list_first_entry(&list, struct lfcc, list);
 394                 list_del(&lfcc->list);
 395                 gfs2_glock_dq_uninit(&lfcc->gh);
 396                 kfree(lfcc);
 397         }
 398         return error;
 399 }
 400
 401 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 402 {
 403         const struct inode *inode = &ip->i_inode;
 404         struct gfs2_dinode *str = buf;
 405
 406         str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
 407         str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
 408         str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
 409         str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 410         str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
 411         str->di_mode = cpu_to_be32(inode->i_mode);
 412         str->di_uid = cpu_to_be32(i_uid_read(inode));
 413         str->di_gid = cpu_to_be32(i_gid_read(inode));
 414         str->di_nlink = cpu_to_be32(inode->i_nlink);
 415         str->di_size = cpu_to_be64(i_size_read(inode));
 416         str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
 417         str->di_atime = cpu_to_be64(inode_get_atime_sec(inode));
 418         str->di_mtime = cpu_to_be64(inode_get_mtime_sec(inode));
 419         str->di_ctime = cpu_to_be64(inode_get_ctime_sec(inode));
 420
 421         str->di_goal_meta = cpu_to_be64(ip->i_goal);
 422         str->di_goal_data = cpu_to_be64(ip->i_goal);
 423         str->di_generation = cpu_to_be64(ip->i_generation);
 424
 425         str->di_flags = cpu_to_be32(ip->i_diskflags);
 426         str->di_height = cpu_to_be16(ip->i_height);
 427         str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
 428                                              !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
 429                                              GFS2_FORMAT_DE : 0);
 430         str->di_depth = cpu_to_be16(ip->i_depth);
 431         str->di_entries = cpu_to_be32(ip->i_entries);
 432
 433         str->di_eattr = cpu_to_be64(ip->i_eattr);
 434         str->di_atime_nsec = cpu_to_be32(inode_get_atime_nsec(inode));
 435         str->di_mtime_nsec = cpu_to_be32(inode_get_mtime_nsec(inode));
 436         str->di_ctime_nsec = cpu_to_be32(inode_get_ctime_nsec(inode));
 437 }
 438
 439 /**
 440  * gfs2_write_inode - Make sure the inode is stable on the disk
 441  * @inode: The inode
 442  * @wbc: The writeback control structure
 443  *
 444  * Returns: errno
 445  */
 446
 447 static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
 448 {
 449         struct gfs2_inode *ip = GFS2_I(inode);
 450         struct gfs2_sbd *sdp = GFS2_SB(inode);
 451         struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
 452         struct backing_dev_info *bdi = inode_to_bdi(metamapping->host);
 453         int ret = 0;
 454         bool flush_all = (wbc->sync_mode == WB_SYNC_ALL || gfs2_is_jdata(ip));
 455
 456         if (flush_all)
 457                 gfs2_log_flush(GFS2_SB(inode), ip->i_gl,
 458                                GFS2_LOG_HEAD_FLUSH_NORMAL |
 459                                GFS2_LFC_WRITE_INODE);
 460         if (bdi->wb.dirty_exceeded)
 461                 gfs2_ail1_flush(sdp, wbc);
 462         else
 463                 filemap_fdatawrite(metamapping);
 464         if (flush_all)
 465                 ret = filemap_fdatawait(metamapping);
 466         if (ret)
 467                 mark_inode_dirty_sync(inode);
 468         else {
 469                 spin_lock(&inode->i_lock);
 470                 if (!(inode->i_flags & I_DIRTY))
 471                         gfs2_ordered_del_inode(ip);
 472                 spin_unlock(&inode->i_lock);
 473         }
 474         return ret;
 475 }
 476
 477 /**
 478  * gfs2_dirty_inode - check for atime updates
 479  * @inode: The inode in question
 480  * @flags: The type of dirty
 481  *
 482  * Unfortunately it can be called under any combination of inode
 483  * glock and freeze glock, so we have to check carefully.
 484  *
 485  * At the moment this deals only with atime - it should be possible
 486  * to expand that role in future, once a review of the locking has
 487  * been carried out.
 488  */
 489
 490 static void gfs2_dirty_inode(struct inode *inode, int flags)
 491 {
 492         struct gfs2_inode *ip = GFS2_I(inode);
 493         struct gfs2_sbd *sdp = GFS2_SB(inode);
 494         struct buffer_head *bh;
 495         struct gfs2_holder gh;
 496         int need_unlock = 0;
 497         int need_endtrans = 0;
 498         int ret;
 499
 500         if (unlikely(!ip->i_gl)) {
 501                 /* This can only happen during incomplete inode creation. */
 502                 BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
 503                 return;
 504         }
 505
 506         if (gfs2_withdrawing_or_withdrawn(sdp))
 507                 return;
 508         if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
 509                 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 510                 if (ret) {
 511                         fs_err(sdp, "dirty_inode: glock %d\n", ret);
 512                         gfs2_dump_glock(NULL, ip->i_gl, true);
 513                         return;
 514                 }
 515                 need_unlock = 1;
 516         } else if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
 517                 return;
 518
 519         if (current->journal_info == NULL) {
 520                 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
 521                 if (ret) {
 522                         fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
 523                         goto out;
 524                 }
 525                 need_endtrans = 1;
 526         }
 527
 528         ret = gfs2_meta_inode_buffer(ip, &bh);
 529         if (ret == 0) {
 530                 gfs2_trans_add_meta(ip->i_gl, bh);
 531                 gfs2_dinode_out(ip, bh->b_data);
 532                 brelse(bh);
 533         }
 534
 535         if (need_endtrans)
 536                 gfs2_trans_end(sdp);
 537 out:
 538         if (need_unlock)
 539                 gfs2_glock_dq_uninit(&gh);
 540 }
 541
 542 /**
 543  * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
 544  * @sdp: the filesystem
 545  *
 546  * Returns: errno
 547  */
 548
 549 void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 550 {
 551         int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 552
 553         if (!test_bit(SDF_KILL, &sdp->sd_flags))
 554                 gfs2_flush_delete_work(sdp);
 555
 556         gfs2_destroy_threads(sdp);
 557
 558         if (log_write_allowed) {
 559                 gfs2_quota_sync(sdp->sd_vfs, 0);
 560                 gfs2_statfs_sync(sdp->sd_vfs, 0);
 561
 562                 /* We do two log flushes here. The first one commits dirty inodes
 563                  * and rgrps to the journal, but queues up revokes to the ail list.
 564                  * The second flush writes out and removes the revokes.
 565                  *
 566                  * The first must be done before the FLUSH_SHUTDOWN code
 567                  * clears the LIVE flag, otherwise it will not be able to start
 568                  * a transaction to write its revokes, and the error will cause
 569                  * a withdraw of the file system. */
 570                 gfs2_log_flush(sdp, NULL, GFS2_LFC_MAKE_FS_RO);
 571                 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
 572                                GFS2_LFC_MAKE_FS_RO);
 573                 wait_event_timeout(sdp->sd_log_waitq,
 574                                    gfs2_log_is_empty(sdp),
 575                                    HZ * 5);
 576                 gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
 577         }
 578         gfs2_quota_cleanup(sdp);
 579 }
 580
 581 /**
 582  * gfs2_put_super - Unmount the filesystem
 583  * @sb: The VFS superblock
 584  *
 585  */
 586
 587 static void gfs2_put_super(struct super_block *sb)
 588 {
 589         struct gfs2_sbd *sdp = sb->s_fs_info;
 590         struct gfs2_jdesc *jd;
 591
 592         /* No more recovery requests */
 593         set_bit(SDF_NORECOVERY, &sdp->sd_flags);
 594         smp_mb();
 595
 596         /* Wait on outstanding recovery */
 597 restart:
 598         spin_lock(&sdp->sd_jindex_spin);
 599         list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 600                 if (!test_bit(JDF_RECOVERY, &jd->jd_flags))
 601                         continue;
 602                 spin_unlock(&sdp->sd_jindex_spin);
 603                 wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
 604                             TASK_UNINTERRUPTIBLE);
 605                 goto restart;
 606         }
 607         spin_unlock(&sdp->sd_jindex_spin);
 608
 609         if (!sb_rdonly(sb))
 610                 gfs2_make_fs_ro(sdp);
 611         else {
 612                 if (gfs2_withdrawing_or_withdrawn(sdp))
 613                         gfs2_destroy_threads(sdp);
 614
 615                 gfs2_quota_cleanup(sdp);
 616         }
 617
 618         WARN_ON(gfs2_withdrawing(sdp));
 619
 620         /*  At this point, we're through modifying the disk  */
 621
 622         /*  Release stuff  */
 623
 624         gfs2_freeze_unlock(sdp);
 625
 626         iput(sdp->sd_jindex);
 627         iput(sdp->sd_statfs_inode);
 628         iput(sdp->sd_rindex);
 629         iput(sdp->sd_quota_inode);
 630
 631         gfs2_glock_put(sdp->sd_rename_gl);
 632         gfs2_glock_put(sdp->sd_freeze_gl);
 633
 634         if (!sdp->sd_args.ar_spectator) {
 635                 if (gfs2_holder_initialized(&sdp->sd_journal_gh))
 636                         gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
 637                 if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
 638                         gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
 639                 brelse(sdp->sd_sc_bh);
 640                 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
 641                 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
 642                 free_local_statfs_inodes(sdp);
 643                 iput(sdp->sd_qc_inode);
 644         }
 645
 646         gfs2_glock_dq_uninit(&sdp->sd_live_gh);
 647         gfs2_clear_rgrpd(sdp);
 648         gfs2_jindex_free(sdp);
 649         /*  Take apart glock structures and buffer lists  */
 650         gfs2_gl_hash_clear(sdp);
 651         truncate_inode_pages_final(&sdp->sd_aspace);
 652         gfs2_delete_debugfs_file(sdp);
 653
 654         gfs2_sys_fs_del(sdp);
 655         free_sbd(sdp);
 656 }
 657
 658 /**
 659  * gfs2_sync_fs - sync the filesystem
 660  * @sb: the superblock
 661  * @wait: true to wait for completion
 662  *
 663  * Flushes the log to disk.
 664  */
 665
 666 static int gfs2_sync_fs(struct super_block *sb, int wait)
 667 {
 668         struct gfs2_sbd *sdp = sb->s_fs_info;
 669
 670         gfs2_quota_sync(sb, -1);
 671         if (wait)
 672                 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 673                                GFS2_LFC_SYNC_FS);
 674         return sdp->sd_log_error;
 675 }
 676
 677 static int gfs2_do_thaw(struct gfs2_sbd *sdp)
 678 {
 679         struct super_block *sb = sdp->sd_vfs;
 680         int error;
 681
 682         error = gfs2_freeze_lock_shared(sdp);
 683         if (error)
 684                 goto fail;
 685         error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 686         if (!error)
 687                 return 0;
 688
 689 fail:
 690         fs_info(sdp, "GFS2: couldn't thaw filesystem: %d\n", error);
 691         gfs2_assert_withdraw(sdp, 0);
 692         return error;
 693 }
 694
 695 void gfs2_freeze_func(struct work_struct *work)
 696 {
 697         struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
 698         struct super_block *sb = sdp->sd_vfs;
 699         int error;
 700
 701         mutex_lock(&sdp->sd_freeze_mutex);
 702         error = -EBUSY;
 703         if (test_bit(SDF_FROZEN, &sdp->sd_flags))
 704                 goto freeze_failed;
 705
 706         error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 707         if (error)
 708                 goto freeze_failed;
 709
 710         gfs2_freeze_unlock(sdp);
 711         set_bit(SDF_FROZEN, &sdp->sd_flags);
 712
 713         error = gfs2_do_thaw(sdp);
 714         if (error)
 715                 goto out;
 716
 717         clear_bit(SDF_FROZEN, &sdp->sd_flags);
 718         goto out;
 719
 720 freeze_failed:
 721         fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n", error);
 722
 723 out:
 724         mutex_unlock(&sdp->sd_freeze_mutex);
 725         deactivate_super(sb);
 726 }
 727
 728 /**
 729  * gfs2_freeze_super - prevent further writes to the filesystem
 730  * @sb: the VFS structure for the filesystem
 731  *
 732  */
 733
 734 static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
 735 {
 736         struct gfs2_sbd *sdp = sb->s_fs_info;
 737         int error;
 738
 739         if (!mutex_trylock(&sdp->sd_freeze_mutex))
 740                 return -EBUSY;
 741         if (test_bit(SDF_FROZEN, &sdp->sd_flags)) {
 742                 mutex_unlock(&sdp->sd_freeze_mutex);
 743                 return -EBUSY;
 744         }
 745
 746         for (;;) {
 747                 error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 748                 if (error) {
 749                         fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
 750                                 error);
 751                         goto out;
 752                 }
 753
 754                 error = gfs2_lock_fs_check_clean(sdp);
 755                 if (!error) {
 756                         set_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
 757                         set_bit(SDF_FROZEN, &sdp->sd_flags);
 758                         break;
 759                 }
 760
 761                 error = gfs2_do_thaw(sdp);
 762                 if (error)
 763                         goto out;
 764
 765                 if (error == -EBUSY)
 766                         fs_err(sdp, "waiting for recovery before freeze\n");
 767                 else if (error == -EIO) {
 768                         fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due "
 769                                "to recovery error.\n");
 770                         goto out;
 771                 } else {
 772                         fs_err(sdp, "error freezing FS: %d\n", error);
 773                 }
 774                 fs_err(sdp, "retrying...\n");
 775                 msleep(1000);
 776         }
 777
 778 out:
 779         mutex_unlock(&sdp->sd_freeze_mutex);
 780         return error;
 781 }
 782
 783 static int gfs2_freeze_fs(struct super_block *sb)
 784 {
 785         struct gfs2_sbd *sdp = sb->s_fs_info;
 786
 787         if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 788                 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
 789                                GFS2_LFC_FREEZE_GO_SYNC);
 790                 if (gfs2_withdrawing_or_withdrawn(sdp))
 791                         return -EIO;
 792         }
 793         return 0;
 794 }
 795
 796 /**
 797  * gfs2_thaw_super - reallow writes to the filesystem
 798  * @sb: the VFS structure for the filesystem
 799  *
 800  */
 801
 802 static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
 803 {
 804         struct gfs2_sbd *sdp = sb->s_fs_info;
 805         int error;
 806
 807         if (!mutex_trylock(&sdp->sd_freeze_mutex))
 808                 return -EBUSY;
 809         if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) {
 810                 mutex_unlock(&sdp->sd_freeze_mutex);
 811                 return -EINVAL;
 812         }
 813
 814         atomic_inc(&sb->s_active);
 815         gfs2_freeze_unlock(sdp);
 816
 817         error = gfs2_do_thaw(sdp);
 818
 819         if (!error) {
 820                 clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
 821                 clear_bit(SDF_FROZEN, &sdp->sd_flags);
 822         }
 823         mutex_unlock(&sdp->sd_freeze_mutex);
 824         deactivate_super(sb);
 825         return error;
 826 }
 827
 828 void gfs2_thaw_freeze_initiator(struct super_block *sb)
 829 {
 830         struct gfs2_sbd *sdp = sb->s_fs_info;
 831
 832         mutex_lock(&sdp->sd_freeze_mutex);
 833         if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags))
 834                 goto out;
 835
 836         gfs2_freeze_unlock(sdp);
 837
 838 out:
 839         mutex_unlock(&sdp->sd_freeze_mutex);
 840 }
 841
 842 /**
 843  * statfs_slow_fill - fill in the sg for a given RG
 844  * @rgd: the RG
 845  * @sc: the sc structure
 846  *
 847  * Returns: 0 on success, -ESTALE if the LVB is invalid
 848  */
 849
 850 static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
 851                             struct gfs2_statfs_change_host *sc)
 852 {
 853         gfs2_rgrp_verify(rgd);
 854         sc->sc_total += rgd->rd_data;
 855         sc->sc_free += rgd->rd_free;
 856         sc->sc_dinodes += rgd->rd_dinodes;
 857         return 0;
 858 }
 859
 860 /**
 861  * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
 862  * @sdp: the filesystem
 863  * @sc: the sc info that will be returned
 864  *
 865  * Any error (other than a signal) will cause this routine to fall back
 866  * to the synchronous version.
 867  *
 868  * FIXME: This really shouldn't busy wait like this.
 869  *
 870  * Returns: errno
 871  */
 872
 873 static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 874 {
 875         struct gfs2_rgrpd *rgd_next;
 876         struct gfs2_holder *gha, *gh;
 877         unsigned int slots = 64;
 878         unsigned int x;
 879         int done;
 880         int error = 0, err;
 881
 882         memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
 883         gha = kmalloc_array(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
 884         if (!gha)
 885                 return -ENOMEM;
 886         for (x = 0; x < slots; x++)
 887                 gfs2_holder_mark_uninitialized(gha + x);
 888
 889         rgd_next = gfs2_rgrpd_get_first(sdp);
 890
 891         for (;;) {
 892                 done = 1;
 893
 894                 for (x = 0; x < slots; x++) {
 895                         gh = gha + x;
 896
 897                         if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
 898                                 err = gfs2_glock_wait(gh);
 899                                 if (err) {
 900                                         gfs2_holder_uninit(gh);
 901                                         error = err;
 902                                 } else {
 903                                         if (!error) {
 904                                                 struct gfs2_rgrpd *rgd =
 905                                                         gfs2_glock2rgrp(gh->gh_gl);
 906
 907                                                 error = statfs_slow_fill(rgd, sc);
 908                                         }
 909                                         gfs2_glock_dq_uninit(gh);
 910                                 }
 911                         }
 912
 913                         if (gfs2_holder_initialized(gh))
 914                                 done = 0;
 915                         else if (rgd_next && !error) {
 916                                 error = gfs2_glock_nq_init(rgd_next->rd_gl,
 917                                                            LM_ST_SHARED,
 918                                                            GL_ASYNC,
 919                                                            gh);
 920                                 rgd_next = gfs2_rgrpd_get_next(rgd_next);
 921                                 done = 0;
 922                         }
 923
 924                         if (signal_pending(current))
 925                                 error = -ERESTARTSYS;
 926                 }
 927
 928                 if (done)
 929                         break;
 930
 931                 yield();
 932         }
 933
 934         kfree(gha);
 935         return error;
 936 }
 937
 938 /**
 939  * gfs2_statfs_i - Do a statfs
 940  * @sdp: the filesystem
 941  * @sc: the sc structure
 942  *
 943  * Returns: errno
 944  */
 945
 946 static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
 947 {
 948         struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
 949         struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
 950
 951         spin_lock(&sdp->sd_statfs_spin);
 952
 953         *sc = *m_sc;
 954         sc->sc_total += l_sc->sc_total;
 955         sc->sc_free += l_sc->sc_free;
 956         sc->sc_dinodes += l_sc->sc_dinodes;
 957
 958         spin_unlock(&sdp->sd_statfs_spin);
 959
 960         if (sc->sc_free < 0)
 961                 sc->sc_free = 0;
 962         if (sc->sc_free > sc->sc_total)
 963                 sc->sc_free = sc->sc_total;
 964         if (sc->sc_dinodes < 0)
 965                 sc->sc_dinodes = 0;
 966
 967         return 0;
 968 }
 969
 970 /**
 971  * gfs2_statfs - Gather and return stats about the filesystem
 972  * @dentry: The name of the link
 973  * @buf: The buffer
 974  *
 975  * Returns: 0 on success or error code
 976  */
 977
 978 static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 979 {
 980         struct super_block *sb = dentry->d_sb;
 981         struct gfs2_sbd *sdp = sb->s_fs_info;
 982         struct gfs2_statfs_change_host sc;
 983         int error;
 984
 985         error = gfs2_rindex_update(sdp);
 986         if (error)
 987                 return error;
 988
 989         if (gfs2_tune_get(sdp, gt_statfs_slow))
 990                 error = gfs2_statfs_slow(sdp, &sc);
 991         else
 992                 error = gfs2_statfs_i(sdp, &sc);
 993
 994         if (error)
 995                 return error;
 996
 997         buf->f_type = GFS2_MAGIC;
 998         buf->f_bsize = sdp->sd_sb.sb_bsize;
 999         buf->f_blocks = sc.sc_total;
1000         buf->f_bfree = sc.sc_free;
1001         buf->f_bavail = sc.sc_free;
1002         buf->f_files = sc.sc_dinodes + sc.sc_free;
1003         buf->f_ffree = sc.sc_free;
1004         buf->f_namelen = GFS2_FNAMESIZE;
1005         buf->f_fsid = uuid_to_fsid(sb->s_uuid.b);
1006
1007         return 0;
1008 }
1009
1010 /**
1011  * gfs2_drop_inode - Drop an inode (test for remote unlink)
1012  * @inode: The inode to drop
1013  *
1014  * If we've received a callback on an iopen lock then it's because a
1015  * remote node tried to deallocate the inode but failed due to this node
1016  * still having the inode open. Here we mark the link count zero
1017  * since we know that it must have reached zero if the GLF_DEMOTE flag
1018  * is set on the iopen glock. If we didn't do a disk read since the
1019  * remote node removed the final link then we might otherwise miss
1020  * this event. This check ensures that this node will deallocate the
1021  * inode's blocks, or alternatively pass the baton on to another
1022  * node for later deallocation.
1023  */
1024
1025 static int gfs2_drop_inode(struct inode *inode)
1026 {
1027         struct gfs2_inode *ip = GFS2_I(inode);
1028         struct gfs2_sbd *sdp = GFS2_SB(inode);
1029
1030         if (inode->i_nlink &&
1031             gfs2_holder_initialized(&ip->i_iopen_gh)) {
1032                 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1033                 if (glock_needs_demote(gl))
1034                         clear_nlink(inode);
1035         }
1036
1037         /*
1038          * When under memory pressure when an inode's link count has dropped to
1039          * zero, defer deleting the inode to the delete workqueue.  This avoids
1040          * calling into DLM under memory pressure, which can deadlock.
1041          */
1042         if (!inode->i_nlink &&
1043             unlikely(current->flags & PF_MEMALLOC) &&
1044             gfs2_holder_initialized(&ip->i_iopen_gh)) {
1045                 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1046
1047                 gfs2_glock_hold(gl);
1048                 if (!gfs2_queue_verify_delete(gl, true))
1049                         gfs2_glock_put_async(gl);
1050                 return 0;
1051         }
1052
1053         /*
1054          * No longer cache inodes when trying to evict them all.
1055          */
1056         if (test_bit(SDF_EVICTING, &sdp->sd_flags))
1057                 return 1;
1058
1059         return generic_drop_inode(inode);
1060 }
1061
1062 /**
1063  * gfs2_show_options - Show mount options for /proc/mounts
1064  * @s: seq_file structure
1065  * @root: root of this (sub)tree
1066  *
1067  * Returns: 0 on success or error code
1068  */
1069
1070 static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1071 {
1072         struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
1073         struct gfs2_args *args = &sdp->sd_args;
1074         unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
1075
1076         spin_lock(&sdp->sd_tune.gt_spin);
1077         logd_secs = sdp->sd_tune.gt_logd_secs;
1078         quota_quantum = sdp->sd_tune.gt_quota_quantum;
1079         statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
1080         statfs_slow = sdp->sd_tune.gt_statfs_slow;
1081         spin_unlock(&sdp->sd_tune.gt_spin);
1082
1083         if (is_subdir(root, sdp->sd_master_dir))
1084                 seq_puts(s, ",meta");
1085         if (args->ar_lockproto[0])
1086                 seq_show_option(s, "lockproto", args->ar_lockproto);
1087         if (args->ar_locktable[0])
1088                 seq_show_option(s, "locktable", args->ar_locktable);
1089         if (args->ar_hostdata[0])
1090                 seq_show_option(s, "hostdata", args->ar_hostdata);
1091         if (args->ar_spectator)
1092                 seq_puts(s, ",spectator");
1093         if (args->ar_localflocks)
1094                 seq_puts(s, ",localflocks");
1095         if (args->ar_debug)
1096                 seq_puts(s, ",debug");
1097         if (args->ar_posix_acl)
1098                 seq_puts(s, ",acl");
1099         if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
1100                 char *state;
1101                 switch (args->ar_quota) {
1102                 case GFS2_QUOTA_OFF:
1103                         state = "off";
1104                         break;
1105                 case GFS2_QUOTA_ACCOUNT:
1106                         state = "account";
1107                         break;
1108                 case GFS2_QUOTA_ON:
1109                         state = "on";
1110                         break;
1111                 case GFS2_QUOTA_QUIET:
1112                         state = "quiet";
1113                         break;
1114                 default:
1115                         state = "unknown";
1116                         break;
1117                 }
1118                 seq_printf(s, ",quota=%s", state);
1119         }
1120         if (args->ar_suiddir)
1121                 seq_puts(s, ",suiddir");
1122         if (args->ar_data != GFS2_DATA_DEFAULT) {
1123                 char *state;
1124                 switch (args->ar_data) {
1125                 case GFS2_DATA_WRITEBACK:
1126                         state = "writeback";
1127                         break;
1128                 case GFS2_DATA_ORDERED:
1129                         state = "ordered";
1130                         break;
1131                 default:
1132                         state = "unknown";
1133                         break;
1134                 }
1135                 seq_printf(s, ",data=%s", state);
1136         }
1137         if (args->ar_discard)
1138                 seq_puts(s, ",discard");
1139         if (logd_secs != 30)
1140                 seq_printf(s, ",commit=%d", logd_secs);
1141         if (statfs_quantum != 30)
1142                 seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
1143         else if (statfs_slow)
1144                 seq_puts(s, ",statfs_quantum=0");
1145         if (quota_quantum != 60)
1146                 seq_printf(s, ",quota_quantum=%d", quota_quantum);
1147         if (args->ar_statfs_percent)
1148                 seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
1149         if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1150                 const char *state;
1151
1152                 switch (args->ar_errors) {
1153                 case GFS2_ERRORS_WITHDRAW:
1154                         state = "withdraw";
1155                         break;
1156                 case GFS2_ERRORS_PANIC:
1157                         state = "panic";
1158                         break;
1159                 default:
1160                         state = "unknown";
1161                         break;
1162                 }
1163                 seq_printf(s, ",errors=%s", state);
1164         }
1165         if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1166                 seq_puts(s, ",nobarrier");
1167         if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1168                 seq_puts(s, ",demote_interface_used");
1169         if (args->ar_rgrplvb)
1170                 seq_puts(s, ",rgrplvb");
1171         if (args->ar_loccookie)
1172                 seq_puts(s, ",loccookie");
1173         return 0;
1174 }
1175
1176 static void gfs2_final_release_pages(struct gfs2_inode *ip)
1177 {
1178         struct inode *inode = &ip->i_inode;
1179         struct gfs2_glock *gl = ip->i_gl;
1180
1181         if (unlikely(!gl)) {
1182                 /* This can only happen during incomplete inode creation. */
1183                 BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
1184                 return;
1185         }
1186
1187         truncate_inode_pages(gfs2_glock2aspace(gl), 0);
1188         truncate_inode_pages(&inode->i_data, 0);
1189
1190         if (atomic_read(&gl->gl_revokes) == 0) {
1191                 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1192                 clear_bit(GLF_DIRTY, &gl->gl_flags);
1193         }
1194 }
1195
1196 static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1197 {
1198         struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1199         struct gfs2_rgrpd *rgd;
1200         struct gfs2_holder gh;
1201         int error;
1202
1203         if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1204                 gfs2_consist_inode(ip);
1205                 return -EIO;
1206         }
1207
1208         gfs2_rindex_update(sdp);
1209
1210         error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
1211         if (error)
1212                 return error;
1213
1214         rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1215         if (!rgd) {
1216                 gfs2_consist_inode(ip);
1217                 error = -EIO;
1218                 goto out_qs;
1219         }
1220
1221         error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1222                                    LM_FLAG_NODE_SCOPE, &gh);
1223         if (error)
1224                 goto out_qs;
1225
1226         error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1227                                  sdp->sd_jdesc->jd_blocks);
1228         if (error)
1229                 goto out_rg_gunlock;
1230
1231         gfs2_free_di(rgd, ip);
1232
1233         gfs2_final_release_pages(ip);
1234
1235         gfs2_trans_end(sdp);
1236
1237 out_rg_gunlock:
1238         gfs2_glock_dq_uninit(&gh);
1239 out_qs:
1240         gfs2_quota_unhold(ip);
1241         return error;
1242 }
1243
1244 /**
1245  * gfs2_glock_put_eventually
1246  * @gl: The glock to put
1247  *
1248  * When under memory pressure, trigger a deferred glock put to make sure we
1249  * won't call into DLM and deadlock.  Otherwise, put the glock directly.
1250  */
1251
1252 static void gfs2_glock_put_eventually(struct gfs2_glock *gl)
1253 {
1254         if (current->flags & PF_MEMALLOC)
1255                 gfs2_glock_put_async(gl);
1256         else
1257                 gfs2_glock_put(gl);
1258 }
1259
1260 static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode)
1261 {
1262         struct gfs2_inode *ip = GFS2_I(inode);
1263         struct gfs2_sbd *sdp = GFS2_SB(inode);
1264         struct gfs2_holder *gh = &ip->i_iopen_gh;
1265         int error;
1266
1267         gh->gh_flags |= GL_NOCACHE;
1268         gfs2_glock_dq_wait(gh);
1269
1270         /*
1271          * If there are no other lock holders, we will immediately get
1272          * exclusive access to the iopen glock here.
1273          *
1274          * Otherwise, the other nodes holding the lock will be notified about
1275          * our locking request (see iopen_go_callback()).  If they do not have
1276          * the inode open, they are expected to evict the cached inode and
1277          * release the lock, allowing us to proceed.
1278          *
1279          * Otherwise, if they cannot evict the inode, they are expected to poke
1280          * the inode glock (note: not the iopen glock).  We will notice that
1281          * and stop waiting for the iopen glock immediately.  The other node(s)
1282          * are then expected to take care of deleting the inode when they no
1283          * longer use it.
1284          *
1285          * As a last resort, if another node keeps holding the iopen glock
1286          * without showing any activity on the inode glock, we will eventually
1287          * time out and fail the iopen glock upgrade.
1288          */
1289
1290         gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh);
1291         error = gfs2_glock_nq(gh);
1292         if (error)
1293                 return EVICT_SHOULD_SKIP_DELETE;
1294
1295         wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
1296                 !test_bit(HIF_WAIT, &gh->gh_iflags) ||
1297                 glock_needs_demote(ip->i_gl),
1298                 5 * HZ);
1299         if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) {
1300                 gfs2_glock_dq(gh);
1301                 if (glock_needs_demote(ip->i_gl))
1302                         return EVICT_SHOULD_SKIP_DELETE;
1303                 return EVICT_SHOULD_DEFER_DELETE;
1304         }
1305         error = gfs2_glock_holder_ready(gh);
1306         if (error)
1307                 return EVICT_SHOULD_SKIP_DELETE;
1308         return EVICT_SHOULD_DELETE;
1309 }
1310
1311 /**
1312  * evict_should_delete - determine whether the inode is eligible for deletion
1313  * @inode: The inode to evict
1314  * @gh: The glock holder structure
1315  *
1316  * This function determines whether the evicted inode is eligible to be deleted
1317  * and locks the inode glock.
1318  *
1319  * Returns: the fate of the dinode
1320  */
1321 static enum evict_behavior evict_should_delete(struct inode *inode,
1322                                                struct gfs2_holder *gh)
1323 {
1324         struct gfs2_inode *ip = GFS2_I(inode);
1325         struct super_block *sb = inode->i_sb;
1326         struct gfs2_sbd *sdp = sb->s_fs_info;
1327         int ret;
1328
1329         if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
1330                 goto should_delete;
1331
1332         if (test_bit(GIF_DEFER_DELETE, &ip->i_flags))
1333                 return EVICT_SHOULD_DEFER_DELETE;
1334
1335         /* Deletes should never happen under memory pressure anymore.  */
1336         if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
1337                 return EVICT_SHOULD_DEFER_DELETE;
1338
1339         /* Must not read inode block until block type has been verified */
1340         ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh);
1341         if (unlikely(ret)) {
1342                 glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
1343                 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1344                 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1345                 return EVICT_SHOULD_DEFER_DELETE;
1346         }
1347
1348         if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino))
1349                 return EVICT_SHOULD_SKIP_DELETE;
1350         ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1351         if (ret)
1352                 return EVICT_SHOULD_SKIP_DELETE;
1353
1354         ret = gfs2_instantiate(gh);
1355         if (ret)
1356                 return EVICT_SHOULD_SKIP_DELETE;
1357
1358         /*
1359          * The inode may have been recreated in the meantime.
1360          */
1361         if (inode->i_nlink)
1362                 return EVICT_SHOULD_SKIP_DELETE;
1363
1364 should_delete:
1365         if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
1366             test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1367                 enum evict_behavior behavior =
1368                         gfs2_upgrade_iopen_glock(inode);
1369
1370                 if (behavior != EVICT_SHOULD_DELETE) {
1371                         gfs2_holder_uninit(&ip->i_iopen_gh);
1372                         return behavior;
1373                 }
1374         }
1375         return EVICT_SHOULD_DELETE;
1376 }
1377
1378 /**
1379  * evict_unlinked_inode - delete the pieces of an unlinked evicted inode
1380  * @inode: The inode to evict
1381  */
1382 static int evict_unlinked_inode(struct inode *inode)
1383 {
1384         struct gfs2_inode *ip = GFS2_I(inode);
1385         int ret;
1386
1387         if (S_ISDIR(inode->i_mode) &&
1388             (ip->i_diskflags & GFS2_DIF_EXHASH)) {
1389                 ret = gfs2_dir_exhash_dealloc(ip);
1390                 if (ret)
1391                         goto out;
1392         }
1393
1394         if (ip->i_eattr) {
1395                 ret = gfs2_ea_dealloc(ip);
1396                 if (ret)
1397                         goto out;
1398         }
1399
1400         if (!gfs2_is_stuffed(ip)) {
1401                 ret = gfs2_file_dealloc(ip);
1402                 if (ret)
1403                         goto out;
1404         }
1405
1406         /*
1407          * As soon as we clear the bitmap for the dinode, gfs2_create_inode()
1408          * can get called to recreate it, or even gfs2_inode_lookup() if the
1409          * inode was recreated on another node in the meantime.
1410          *
1411          * However, inserting the new inode into the inode hash table will not
1412          * succeed until the old inode is removed, and that only happens after
1413          * ->evict_inode() returns.  The new inode is attached to its inode and
1414          *  iopen glocks after inserting it into the inode hash table, so at
1415          *  that point we can be sure that both glocks are unused.
1416          */
1417
1418         ret = gfs2_dinode_dealloc(ip);
1419         if (!ret && ip->i_gl)
1420                 gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
1421
1422 out:
1423         return ret;
1424 }
1425
1426 /*
1427  * evict_linked_inode - evict an inode whose dinode has not been unlinked
1428  * @inode: The inode to evict
1429  */
1430 static int evict_linked_inode(struct inode *inode)
1431 {
1432         struct super_block *sb = inode->i_sb;
1433         struct gfs2_sbd *sdp = sb->s_fs_info;
1434         struct gfs2_inode *ip = GFS2_I(inode);
1435         struct address_space *metamapping;
1436         int ret;
1437
1438         gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
1439                        GFS2_LFC_EVICT_INODE);
1440         metamapping = gfs2_glock2aspace(ip->i_gl);
1441         if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
1442                 filemap_fdatawrite(metamapping);
1443                 filemap_fdatawait(metamapping);
1444         }
1445         write_inode_now(inode, 1);
1446         gfs2_ail_flush(ip->i_gl, 0);
1447
1448         ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1449         if (ret)
1450                 return ret;
1451
1452         /* Needs to be done before glock release & also in a transaction */
1453         truncate_inode_pages(&inode->i_data, 0);
1454         truncate_inode_pages(metamapping, 0);
1455         gfs2_trans_end(sdp);
1456         return 0;
1457 }
1458
1459 /**
1460  * gfs2_evict_inode - Remove an inode from cache
1461  * @inode: The inode to evict
1462  *
1463  * There are three cases to consider:
1464  * 1. i_nlink == 0, we are final opener (and must deallocate)
1465  * 2. i_nlink == 0, we are not the final opener (and cannot deallocate)
1466  * 3. i_nlink > 0
1467  *
1468  * If the fs is read only, then we have to treat all cases as per #3
1469  * since we are unable to do any deallocation. The inode will be
1470  * deallocated by the next read/write node to attempt an allocation
1471  * in the same resource group
1472  *
1473  * We have to (at the moment) hold the inodes main lock to cover
1474  * the gap between unlocking the shared lock on the iopen lock and
1475  * taking the exclusive lock. I'd rather do a shared -> exclusive
1476  * conversion on the iopen lock, but we can change that later. This
1477  * is safe, just less efficient.
1478  */
1479
1480 static void gfs2_evict_inode(struct inode *inode)
1481 {
1482         struct super_block *sb = inode->i_sb;
1483         struct gfs2_sbd *sdp = sb->s_fs_info;
1484         struct gfs2_inode *ip = GFS2_I(inode);
1485         struct gfs2_holder gh;
1486         enum evict_behavior behavior;
1487         int ret;
1488
1489         gfs2_holder_mark_uninitialized(&gh);
1490         if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
1491                 goto out;
1492
1493         /*
1494          * In case of an incomplete mount, gfs2_evict_inode() may be called for
1495          * system files without having an active journal to write to.  In that
1496          * case, skip the filesystem evict.
1497          */
1498         if (!sdp->sd_jdesc)
1499                 goto out;
1500
1501         behavior = evict_should_delete(inode, &gh);
1502         if (behavior == EVICT_SHOULD_DEFER_DELETE &&
1503             !test_bit(SDF_KILL, &sdp->sd_flags)) {
1504                 struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
1505
1506                 if (io_gl) {
1507                         gfs2_glock_hold(io_gl);
1508                         if (!gfs2_queue_verify_delete(io_gl, true))
1509                                 gfs2_glock_put(io_gl);
1510                         goto out;
1511                 }
1512                 behavior = EVICT_SHOULD_DELETE;
1513         }
1514         if (behavior == EVICT_SHOULD_DELETE)
1515                 ret = evict_unlinked_inode(inode);
1516         else
1517                 ret = evict_linked_inode(inode);
1518
1519         if (gfs2_rs_active(&ip->i_res))
1520                 gfs2_rs_deltree(&ip->i_res);
1521
1522         if (ret && ret != GLR_TRYFAILED && ret != -EROFS)
1523                 fs_warn(sdp, "gfs2_evict_inode: %d\n", ret);
1524 out:
1525         if (gfs2_holder_initialized(&gh))
1526                 gfs2_glock_dq_uninit(&gh);
1527         truncate_inode_pages_final(&inode->i_data);
1528         if (ip->i_qadata)
1529                 gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
1530         gfs2_rs_deltree(&ip->i_res);
1531         gfs2_ordered_del_inode(ip);
1532         clear_inode(inode);
1533         gfs2_dir_hash_inval(ip);
1534         if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
1535                 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
1536
1537                 glock_clear_object(gl, ip);
1538                 gfs2_glock_hold(gl);
1539                 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1540                 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1541                 gfs2_glock_put_eventually(gl);
1542         }
1543         if (ip->i_gl) {
1544                 glock_clear_object(ip->i_gl, ip);
1545                 wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
1546                 gfs2_glock_put_eventually(ip->i_gl);
1547                 rcu_assign_pointer(ip->i_gl, NULL);
1548         }
1549 }
1550
1551 static struct inode *gfs2_alloc_inode(struct super_block *sb)
1552 {
1553         struct gfs2_inode *ip;
1554
1555         ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
1556         if (!ip)
1557                 return NULL;
1558         ip->i_no_addr = 0;
1559         ip->i_no_formal_ino = 0;
1560         ip->i_flags = 0;
1561         ip->i_gl = NULL;
1562         gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
1563         memset(&ip->i_res, 0, sizeof(ip->i_res));
1564         RB_CLEAR_NODE(&ip->i_res.rs_node);
1565         ip->i_diskflags = 0;
1566         ip->i_rahead = 0;
1567         return &ip->i_inode;
1568 }
1569
1570 static void gfs2_free_inode(struct inode *inode)
1571 {
1572         kmem_cache_free(gfs2_inode_cachep, GFS2_I(inode));
1573 }
1574
1575 void free_local_statfs_inodes(struct gfs2_sbd *sdp)
1576 {
1577         struct local_statfs_inode *lsi, *safe;
1578
1579         /* Run through the statfs inodes list to iput and free memory */
1580         list_for_each_entry_safe(lsi, safe, &sdp->sd_sc_inodes_list, si_list) {
1581                 if (lsi->si_jid == sdp->sd_jdesc->jd_jid)
1582                         sdp->sd_sc_inode = NULL; /* belongs to this node */
1583                 if (lsi->si_sc_inode)
1584                         iput(lsi->si_sc_inode);
1585                 list_del(&lsi->si_list);
1586                 kfree(lsi);
1587         }
1588 }
1589
1590 struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp,
1591                                       unsigned int index)
1592 {
1593         struct local_statfs_inode *lsi;
1594
1595         /* Return the local (per node) statfs inode in the
1596          * sdp->sd_sc_inodes_list corresponding to the 'index'. */
1597         list_for_each_entry(lsi, &sdp->sd_sc_inodes_list, si_list) {
1598                 if (lsi->si_jid == index)
1599                         return lsi->si_sc_inode;
1600         }
1601         return NULL;
1602 }
1603
1604 const struct super_operations gfs2_super_ops = {
1605         .alloc_inode            = gfs2_alloc_inode,
1606         .free_inode             = gfs2_free_inode,
1607         .write_inode            = gfs2_write_inode,
1608         .dirty_inode            = gfs2_dirty_inode,
1609         .evict_inode            = gfs2_evict_inode,
1610         .put_super              = gfs2_put_super,
1611         .sync_fs                = gfs2_sync_fs,
1612         .freeze_super           = gfs2_freeze_super,
1613         .freeze_fs              = gfs2_freeze_fs,
1614         .thaw_super             = gfs2_thaw_super,
1615         .statfs                 = gfs2_statfs,
1616         .drop_inode             = gfs2_drop_inode,
1617         .show_options           = gfs2_show_options,
1618 };
1619