fs/xfs/xfs_iops.c

   1 /*
   2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
   3  * All Rights Reserved.
   4  *
   5  * This program is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU General Public License as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it would be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write the Free Software Foundation,
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include "xfs.h"
  19 #include "xfs_fs.h"
  20 #include "xfs_acl.h"
  21 #include "xfs_bit.h"
  22 #include "xfs_log.h"
  23 #include "xfs_inum.h"
  24 #include "xfs_trans.h"
  25 #include "xfs_sb.h"
  26 #include "xfs_ag.h"
  27 #include "xfs_alloc.h"
  28 #include "xfs_quota.h"
  29 #include "xfs_mount.h"
  30 #include "xfs_bmap_btree.h"
  31 #include "xfs_dinode.h"
  32 #include "xfs_inode.h"
  33 #include "xfs_bmap.h"
  34 #include "xfs_rtalloc.h"
  35 #include "xfs_error.h"
  36 #include "xfs_itable.h"
  37 #include "xfs_rw.h"
  38 #include "xfs_attr.h"
  39 #include "xfs_buf_item.h"
  40 #include "xfs_utils.h"
  41 #include "xfs_vnodeops.h"
  42 #include "xfs_inode_item.h"
  43 #include "xfs_trace.h"
  44
  45 #include <linux/capability.h>
  46 #include <linux/xattr.h>
  47 #include <linux/namei.h>
  48 #include <linux/posix_acl.h>
  49 #include <linux/security.h>
  50 #include <linux/fiemap.h>
  51 #include <linux/slab.h>
  52
  53 /*
  54  * Bring the timestamps in the XFS inode uptodate.
  55  *
  56  * Used before writing the inode to disk.
  57  */
  58 void
  59 xfs_synchronize_times(
  60         xfs_inode_t     *ip)
  61 {
  62         struct inode    *inode = VFS_I(ip);
  63
  64         ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
  65         ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
  66         ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
  67         ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
  68         ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
  69         ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
  70 }
  71
  72 /*
  73  * If the linux inode is valid, mark it dirty.
  74  * Used when committing a dirty inode into a transaction so that
  75  * the inode will get written back by the linux code
  76  */
  77 void
  78 xfs_mark_inode_dirty_sync(
  79         xfs_inode_t     *ip)
  80 {
  81         struct inode    *inode = VFS_I(ip);
  82
  83         if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
  84                 mark_inode_dirty_sync(inode);
  85 }
  86
  87 void
  88 xfs_mark_inode_dirty(
  89         xfs_inode_t     *ip)
  90 {
  91         struct inode    *inode = VFS_I(ip);
  92
  93         if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
  94                 mark_inode_dirty(inode);
  95 }
  96
  97
  98 int xfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
  99                    void *fs_info)
 100 {
 101         const struct xattr *xattr;
 102         struct xfs_inode *ip = XFS_I(inode);
 103         int error = 0;
 104
 105         for (xattr = xattr_array; xattr->name != NULL; xattr++) {
 106                 error = xfs_attr_set(ip, xattr->name, xattr->value,
 107                                      xattr->value_len, ATTR_SECURE);
 108                 if (error < 0)
 109                         break;
 110         }
 111         return error;
 112 }
 113
 114 /*
 115  * Hook in SELinux.  This is not quite correct yet, what we really need
 116  * here (as we do for default ACLs) is a mechanism by which creation of
 117  * these attrs can be journalled at inode creation time (along with the
 118  * inode, of course, such that log replay can't cause these to be lost).
 119  */
 120
 121 STATIC int
 122 xfs_init_security(
 123         struct inode    *inode,
 124         struct inode    *dir,
 125         const struct qstr *qstr)
 126 {
 127         return security_inode_init_security(inode, dir, qstr,
 128                                             &xfs_initxattrs, NULL);
 129 }
 130
 131 static void
 132 xfs_dentry_to_name(
 133         struct xfs_name *namep,
 134         struct dentry   *dentry)
 135 {
 136         namep->name = dentry->d_name.name;
 137         namep->len = dentry->d_name.len;
 138 }
 139
 140 STATIC void
 141 xfs_cleanup_inode(
 142         struct inode    *dir,
 143         struct inode    *inode,
 144         struct dentry   *dentry)
 145 {
 146         struct xfs_name teardown;
 147
 148         /* Oh, the horror.
 149          * If we can't add the ACL or we fail in
 150          * xfs_init_security we must back out.
 151          * ENOSPC can hit here, among other things.
 152          */
 153         xfs_dentry_to_name(&teardown, dentry);
 154
 155         xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
 156         iput(inode);
 157 }
 158
 159 STATIC int
 160 xfs_vn_mknod(
 161         struct inode    *dir,
 162         struct dentry   *dentry,
 163         int             mode,
 164         dev_t           rdev)
 165 {
 166         struct inode    *inode;
 167         struct xfs_inode *ip = NULL;
 168         struct posix_acl *default_acl = NULL;
 169         struct xfs_name name;
 170         int             error;
 171
 172         /*
 173          * Irix uses Missed'em'V split, but doesn't want to see
 174          * the upper 5 bits of (14bit) major.
 175          */
 176         if (S_ISCHR(mode) || S_ISBLK(mode)) {
 177                 if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 178                         return -EINVAL;
 179                 rdev = sysv_encode_dev(rdev);
 180         } else {
 181                 rdev = 0;
 182         }
 183
 184         if (IS_POSIXACL(dir)) {
 185                 default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
 186                 if (IS_ERR(default_acl))
 187                         return PTR_ERR(default_acl);
 188
 189                 if (!default_acl)
 190                         mode &= ~current_umask();
 191         }
 192
 193         xfs_dentry_to_name(&name, dentry);
 194         error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
 195         if (unlikely(error))
 196                 goto out_free_acl;
 197
 198         inode = VFS_I(ip);
 199
 200         error = xfs_init_security(inode, dir, &dentry->d_name);
 201         if (unlikely(error))
 202                 goto out_cleanup_inode;
 203
 204         if (default_acl) {
 205                 error = -xfs_inherit_acl(inode, default_acl);
 206                 default_acl = NULL;
 207                 if (unlikely(error))
 208                         goto out_cleanup_inode;
 209         }
 210
 211
 212         d_instantiate(dentry, inode);
 213         return -error;
 214
 215  out_cleanup_inode:
 216         xfs_cleanup_inode(dir, inode, dentry);
 217  out_free_acl:
 218         posix_acl_release(default_acl);
 219         return -error;
 220 }
 221
 222 STATIC int
 223 xfs_vn_create(
 224         struct inode    *dir,
 225         struct dentry   *dentry,
 226         int             mode,
 227         struct nameidata *nd)
 228 {
 229         return xfs_vn_mknod(dir, dentry, mode, 0);
 230 }
 231
 232 STATIC int
 233 xfs_vn_mkdir(
 234         struct inode    *dir,
 235         struct dentry   *dentry,
 236         int             mode)
 237 {
 238         return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
 239 }
 240
 241 STATIC struct dentry *
 242 xfs_vn_lookup(
 243         struct inode    *dir,
 244         struct dentry   *dentry,
 245         struct nameidata *nd)
 246 {
 247         struct xfs_inode *cip;
 248         struct xfs_name name;
 249         int             error;
 250
 251         if (dentry->d_name.len >= MAXNAMELEN)
 252                 return ERR_PTR(-ENAMETOOLONG);
 253
 254         xfs_dentry_to_name(&name, dentry);
 255         error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
 256         if (unlikely(error)) {
 257                 if (unlikely(error != ENOENT))
 258                         return ERR_PTR(-error);
 259                 d_add(dentry, NULL);
 260                 return NULL;
 261         }
 262
 263         return d_splice_alias(VFS_I(cip), dentry);
 264 }
 265
 266 STATIC struct dentry *
 267 xfs_vn_ci_lookup(
 268         struct inode    *dir,
 269         struct dentry   *dentry,
 270         struct nameidata *nd)
 271 {
 272         struct xfs_inode *ip;
 273         struct xfs_name xname;
 274         struct xfs_name ci_name;
 275         struct qstr     dname;
 276         int             error;
 277
 278         if (dentry->d_name.len >= MAXNAMELEN)
 279                 return ERR_PTR(-ENAMETOOLONG);
 280
 281         xfs_dentry_to_name(&xname, dentry);
 282         error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
 283         if (unlikely(error)) {
 284                 if (unlikely(error != ENOENT))
 285                         return ERR_PTR(-error);
 286                 /*
 287                  * call d_add(dentry, NULL) here when d_drop_negative_children
 288                  * is called in xfs_vn_mknod (ie. allow negative dentries
 289                  * with CI filesystems).
 290                  */
 291                 return NULL;
 292         }
 293
 294         /* if exact match, just splice and exit */
 295         if (!ci_name.name)
 296                 return d_splice_alias(VFS_I(ip), dentry);
 297
 298         /* else case-insensitive match... */
 299         dname.name = ci_name.name;
 300         dname.len = ci_name.len;
 301         dentry = d_add_ci(dentry, VFS_I(ip), &dname);
 302         kmem_free(ci_name.name);
 303         return dentry;
 304 }
 305
 306 STATIC int
 307 xfs_vn_link(
 308         struct dentry   *old_dentry,
 309         struct inode    *dir,
 310         struct dentry   *dentry)
 311 {
 312         struct inode    *inode = old_dentry->d_inode;
 313         struct xfs_name name;
 314         int             error;
 315
 316         xfs_dentry_to_name(&name, dentry);
 317
 318         error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
 319         if (unlikely(error))
 320                 return -error;
 321
 322         ihold(inode);
 323         d_instantiate(dentry, inode);
 324         return 0;
 325 }
 326
 327 STATIC int
 328 xfs_vn_unlink(
 329         struct inode    *dir,
 330         struct dentry   *dentry)
 331 {
 332         struct xfs_name name;
 333         int             error;
 334
 335         xfs_dentry_to_name(&name, dentry);
 336
 337         error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
 338         if (error)
 339                 return error;
 340
 341         /*
 342          * With unlink, the VFS makes the dentry "negative": no inode,
 343          * but still hashed. This is incompatible with case-insensitive
 344          * mode, so invalidate (unhash) the dentry in CI-mode.
 345          */
 346         if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
 347                 d_invalidate(dentry);
 348         return 0;
 349 }
 350
 351 STATIC int
 352 xfs_vn_symlink(
 353         struct inode    *dir,
 354         struct dentry   *dentry,
 355         const char      *symname)
 356 {
 357         struct inode    *inode;
 358         struct xfs_inode *cip = NULL;
 359         struct xfs_name name;
 360         int             error;
 361         mode_t          mode;
 362
 363         mode = S_IFLNK |
 364                 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
 365         xfs_dentry_to_name(&name, dentry);
 366
 367         error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
 368         if (unlikely(error))
 369                 goto out;
 370
 371         inode = VFS_I(cip);
 372
 373         error = xfs_init_security(inode, dir, &dentry->d_name);
 374         if (unlikely(error))
 375                 goto out_cleanup_inode;
 376
 377         d_instantiate(dentry, inode);
 378         return 0;
 379
 380  out_cleanup_inode:
 381         xfs_cleanup_inode(dir, inode, dentry);
 382  out:
 383         return -error;
 384 }
 385
 386 STATIC int
 387 xfs_vn_rename(
 388         struct inode    *odir,
 389         struct dentry   *odentry,
 390         struct inode    *ndir,
 391         struct dentry   *ndentry)
 392 {
 393         struct inode    *new_inode = ndentry->d_inode;
 394         struct xfs_name oname;
 395         struct xfs_name nname;
 396
 397         xfs_dentry_to_name(&oname, odentry);
 398         xfs_dentry_to_name(&nname, ndentry);
 399
 400         return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
 401                            XFS_I(ndir), &nname, new_inode ?
 402                                                 XFS_I(new_inode) : NULL);
 403 }
 404
 405 /*
 406  * careful here - this function can get called recursively, so
 407  * we need to be very careful about how much stack we use.
 408  * uio is kmalloced for this reason...
 409  */
 410 STATIC void *
 411 xfs_vn_follow_link(
 412         struct dentry           *dentry,
 413         struct nameidata        *nd)
 414 {
 415         char                    *link;
 416         int                     error = -ENOMEM;
 417
 418         link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
 419         if (!link)
 420                 goto out_err;
 421
 422         error = -xfs_readlink(XFS_I(dentry->d_inode), link);
 423         if (unlikely(error))
 424                 goto out_kfree;
 425
 426         nd_set_link(nd, link);
 427         return NULL;
 428
 429  out_kfree:
 430         kfree(link);
 431  out_err:
 432         nd_set_link(nd, ERR_PTR(error));
 433         return NULL;
 434 }
 435
 436 STATIC void
 437 xfs_vn_put_link(
 438         struct dentry   *dentry,
 439         struct nameidata *nd,
 440         void            *p)
 441 {
 442         char            *s = nd_get_link(nd);
 443
 444         if (!IS_ERR(s))
 445                 kfree(s);
 446 }
 447
 448 STATIC int
 449 xfs_vn_getattr(
 450         struct vfsmount         *mnt,
 451         struct dentry           *dentry,
 452         struct kstat            *stat)
 453 {
 454         struct inode            *inode = dentry->d_inode;
 455         struct xfs_inode        *ip = XFS_I(inode);
 456         struct xfs_mount        *mp = ip->i_mount;
 457
 458         trace_xfs_getattr(ip);
 459
 460         if (XFS_FORCED_SHUTDOWN(mp))
 461                 return XFS_ERROR(EIO);
 462
 463         stat->size = XFS_ISIZE(ip);
 464         stat->dev = inode->i_sb->s_dev;
 465         stat->mode = ip->i_d.di_mode;
 466         stat->nlink = ip->i_d.di_nlink;
 467         stat->uid = ip->i_d.di_uid;
 468         stat->gid = ip->i_d.di_gid;
 469         stat->ino = ip->i_ino;
 470         stat->atime = inode->i_atime;
 471         stat->mtime = inode->i_mtime;
 472         stat->ctime = inode->i_ctime;
 473         stat->blocks =
 474                 XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
 475
 476
 477         switch (inode->i_mode & S_IFMT) {
 478         case S_IFBLK:
 479         case S_IFCHR:
 480                 stat->blksize = BLKDEV_IOSIZE;
 481                 stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
 482                                    sysv_minor(ip->i_df.if_u2.if_rdev));
 483                 break;
 484         default:
 485                 if (XFS_IS_REALTIME_INODE(ip)) {
 486                         /*
 487                          * If the file blocks are being allocated from a
 488                          * realtime volume, then return the inode's realtime
 489                          * extent size or the realtime volume's extent size.
 490                          */
 491                         stat->blksize =
 492                                 xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
 493                 } else
 494                         stat->blksize = xfs_preferred_iosize(mp);
 495                 stat->rdev = 0;
 496                 break;
 497         }
 498
 499         return 0;
 500 }
 501
 502 int
 503 xfs_setattr_nonsize(
 504         struct xfs_inode        *ip,
 505         struct iattr            *iattr,
 506         int                     flags)
 507 {
 508         xfs_mount_t             *mp = ip->i_mount;
 509         struct inode            *inode = VFS_I(ip);
 510         int                     mask = iattr->ia_valid;
 511         xfs_trans_t             *tp;
 512         int                     error;
 513         uid_t                   uid = 0, iuid = 0;
 514         gid_t                   gid = 0, igid = 0;
 515         struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
 516         struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
 517
 518         trace_xfs_setattr(ip);
 519
 520         if (mp->m_flags & XFS_MOUNT_RDONLY)
 521                 return XFS_ERROR(EROFS);
 522
 523         if (XFS_FORCED_SHUTDOWN(mp))
 524                 return XFS_ERROR(EIO);
 525
 526         error = -inode_change_ok(inode, iattr);
 527         if (error)
 528                 return XFS_ERROR(error);
 529
 530         ASSERT((mask & ATTR_SIZE) == 0);
 531
 532         /*
 533          * If disk quotas is on, we make sure that the dquots do exist on disk,
 534          * before we start any other transactions. Trying to do this later
 535          * is messy. We don't care to take a readlock to look at the ids
 536          * in inode here, because we can't hold it across the trans_reserve.
 537          * If the IDs do change before we take the ilock, we're covered
 538          * because the i_*dquot fields will get updated anyway.
 539          */
 540         if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
 541                 uint    qflags = 0;
 542
 543                 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
 544                         uid = iattr->ia_uid;
 545                         qflags |= XFS_QMOPT_UQUOTA;
 546                 } else {
 547                         uid = ip->i_d.di_uid;
 548                 }
 549                 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
 550                         gid = iattr->ia_gid;
 551                         qflags |= XFS_QMOPT_GQUOTA;
 552                 }  else {
 553                         gid = ip->i_d.di_gid;
 554                 }
 555
 556                 /*
 557                  * We take a reference when we initialize udqp and gdqp,
 558                  * so it is important that we never blindly double trip on
 559                  * the same variable. See xfs_create() for an example.
 560                  */
 561                 ASSERT(udqp == NULL);
 562                 ASSERT(gdqp == NULL);
 563                 error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
 564                                          qflags, &udqp, &gdqp);
 565                 if (error)
 566                         return error;
 567         }
 568
 569         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
 570         error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
 571         if (error)
 572                 goto out_dqrele;
 573
 574         xfs_ilock(ip, XFS_ILOCK_EXCL);
 575
 576         /*
 577          * Change file ownership.  Must be the owner or privileged.
 578          */
 579         if (mask & (ATTR_UID|ATTR_GID)) {
 580                 /*
 581                  * These IDs could have changed since we last looked at them.
 582                  * But, we're assured that if the ownership did change
 583                  * while we didn't have the inode locked, inode's dquot(s)
 584                  * would have changed also.
 585                  */
 586                 iuid = ip->i_d.di_uid;
 587                 igid = ip->i_d.di_gid;
 588                 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
 589                 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
 590
 591                 /*
 592                  * Do a quota reservation only if uid/gid is actually
 593                  * going to change.
 594                  */
 595                 if (XFS_IS_QUOTA_RUNNING(mp) &&
 596                     ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
 597                      (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
 598                         ASSERT(tp);
 599                         error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 600                                                 capable(CAP_FOWNER) ?
 601                                                 XFS_QMOPT_FORCE_RES : 0);
 602                         if (error)      /* out of quota */
 603                                 goto out_trans_cancel;
 604                 }
 605         }
 606
 607         xfs_trans_ijoin(tp, ip);
 608
 609         /*
 610          * Change file ownership.  Must be the owner or privileged.
 611          */
 612         if (mask & (ATTR_UID|ATTR_GID)) {
 613                 /*
 614                  * CAP_FSETID overrides the following restrictions:
 615                  *
 616                  * The set-user-ID and set-group-ID bits of a file will be
 617                  * cleared upon successful return from chown()
 618                  */
 619                 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
 620                     !capable(CAP_FSETID))
 621                         ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 622
 623                 /*
 624                  * Change the ownerships and register quota modifications
 625                  * in the transaction.
 626                  */
 627                 if (iuid != uid) {
 628                         if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
 629                                 ASSERT(mask & ATTR_UID);
 630                                 ASSERT(udqp);
 631                                 olddquot1 = xfs_qm_vop_chown(tp, ip,
 632                                                         &ip->i_udquot, udqp);
 633                         }
 634                         ip->i_d.di_uid = uid;
 635                         inode->i_uid = uid;
 636                 }
 637                 if (igid != gid) {
 638                         if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
 639                                 ASSERT(!XFS_IS_PQUOTA_ON(mp));
 640                                 ASSERT(mask & ATTR_GID);
 641                                 ASSERT(gdqp);
 642                                 olddquot2 = xfs_qm_vop_chown(tp, ip,
 643                                                         &ip->i_gdquot, gdqp);
 644                         }
 645                         ip->i_d.di_gid = gid;
 646                         inode->i_gid = gid;
 647                 }
 648         }
 649
 650         /*
 651          * Change file access modes.
 652          */
 653         if (mask & ATTR_MODE) {
 654                 umode_t mode = iattr->ia_mode;
 655
 656                 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
 657                         mode &= ~S_ISGID;
 658
 659                 ip->i_d.di_mode &= S_IFMT;
 660                 ip->i_d.di_mode |= mode & ~S_IFMT;
 661
 662                 inode->i_mode &= S_IFMT;
 663                 inode->i_mode |= mode & ~S_IFMT;
 664         }
 665
 666         /*
 667          * Change file access or modified times.
 668          */
 669         if (mask & ATTR_ATIME) {
 670                 inode->i_atime = iattr->ia_atime;
 671                 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
 672                 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
 673                 ip->i_update_core = 1;
 674         }
 675         if (mask & ATTR_CTIME) {
 676                 inode->i_ctime = iattr->ia_ctime;
 677                 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
 678                 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
 679                 ip->i_update_core = 1;
 680         }
 681         if (mask & ATTR_MTIME) {
 682                 inode->i_mtime = iattr->ia_mtime;
 683                 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
 684                 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
 685                 ip->i_update_core = 1;
 686         }
 687
 688         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 689
 690         XFS_STATS_INC(xs_ig_attrchg);
 691
 692         if (mp->m_flags & XFS_MOUNT_WSYNC)
 693                 xfs_trans_set_sync(tp);
 694         error = xfs_trans_commit(tp, 0);
 695
 696         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 697
 698         /*
 699          * Release any dquot(s) the inode had kept before chown.
 700          */
 701         xfs_qm_dqrele(olddquot1);
 702         xfs_qm_dqrele(olddquot2);
 703         xfs_qm_dqrele(udqp);
 704         xfs_qm_dqrele(gdqp);
 705
 706         if (error)
 707                 return XFS_ERROR(error);
 708
 709         /*
 710          * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
 711          *           update.  We could avoid this with linked transactions
 712          *           and passing down the transaction pointer all the way
 713          *           to attr_set.  No previous user of the generic
 714          *           Posix ACL code seems to care about this issue either.
 715          */
 716         if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
 717                 error = -xfs_acl_chmod(inode);
 718                 if (error)
 719                         return XFS_ERROR(error);
 720         }
 721
 722         return 0;
 723
 724 out_trans_cancel:
 725         xfs_trans_cancel(tp, 0);
 726         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 727 out_dqrele:
 728         xfs_qm_dqrele(udqp);
 729         xfs_qm_dqrele(gdqp);
 730         return error;
 731 }
 732
 733 /*
 734  * Truncate file.  Must have write permission and not be a directory.
 735  */
 736 int
 737 xfs_setattr_size(
 738         struct xfs_inode        *ip,
 739         struct iattr            *iattr,
 740         int                     flags)
 741 {
 742         struct xfs_mount        *mp = ip->i_mount;
 743         struct inode            *inode = VFS_I(ip);
 744         int                     mask = iattr->ia_valid;
 745         struct xfs_trans        *tp;
 746         int                     error;
 747         uint                    lock_flags;
 748         uint                    commit_flags = 0;
 749
 750         trace_xfs_setattr(ip);
 751
 752         if (mp->m_flags & XFS_MOUNT_RDONLY)
 753                 return XFS_ERROR(EROFS);
 754
 755         if (XFS_FORCED_SHUTDOWN(mp))
 756                 return XFS_ERROR(EIO);
 757
 758         error = -inode_change_ok(inode, iattr);
 759         if (error)
 760                 return XFS_ERROR(error);
 761
 762         ASSERT(S_ISREG(ip->i_d.di_mode));
 763         ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
 764                         ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
 765                         ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
 766
 767         lock_flags = XFS_ILOCK_EXCL;
 768         if (!(flags & XFS_ATTR_NOLOCK))
 769                 lock_flags |= XFS_IOLOCK_EXCL;
 770         xfs_ilock(ip, lock_flags);
 771
 772         /*
 773          * Short circuit the truncate case for zero length files.
 774          */
 775         if (iattr->ia_size == 0 &&
 776             ip->i_size == 0 && ip->i_d.di_nextents == 0) {
 777                 if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
 778                         goto out_unlock;
 779
 780                 /*
 781                  * Use the regular setattr path to update the timestamps.
 782                  */
 783                 xfs_iunlock(ip, lock_flags);
 784                 iattr->ia_valid &= ~ATTR_SIZE;
 785                 return xfs_setattr_nonsize(ip, iattr, 0);
 786         }
 787
 788         /*
 789          * Make sure that the dquots are attached to the inode.
 790          */
 791         error = xfs_qm_dqattach_locked(ip, 0);
 792         if (error)
 793                 goto out_unlock;
 794
 795         /*
 796          * Now we can make the changes.  Before we join the inode to the
 797          * transaction, take care of the part of the truncation that must be
 798          * done without the inode lock.  This needs to be done before joining
 799          * the inode to the transaction, because the inode cannot be unlocked
 800          * once it is a part of the transaction.
 801          */
 802         if (iattr->ia_size > ip->i_size) {
 803                 /*
 804                  * Do the first part of growing a file: zero any data in the
 805                  * last block that is beyond the old EOF.  We need to do this
 806                  * before the inode is joined to the transaction to modify
 807                  * i_size.
 808                  */
 809                 error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
 810                 if (error)
 811                         goto out_unlock;
 812         }
 813         xfs_iunlock(ip, XFS_ILOCK_EXCL);
 814         lock_flags &= ~XFS_ILOCK_EXCL;
 815
 816         /*
 817          * We are going to log the inode size change in this transaction so
 818          * any previous writes that are beyond the on disk EOF and the new
 819          * EOF that have not been written out need to be written here.  If we
 820          * do not write the data out, we expose ourselves to the null files
 821          * problem.
 822          *
 823          * Only flush from the on disk size to the smaller of the in memory
 824          * file size or the new size as that's the range we really care about
 825          * here and prevents waiting for other data not within the range we
 826          * care about here.
 827          */
 828         if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
 829                 error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
 830                                         XBF_ASYNC, FI_NONE);
 831                 if (error)
 832                         goto out_unlock;
 833         }
 834
 835         /*
 836          * Wait for all I/O to complete.
 837          */
 838         xfs_ioend_wait(ip);
 839
 840         error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
 841                                      xfs_get_blocks);
 842         if (error)
 843                 goto out_unlock;
 844
 845         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
 846         error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
 847                                  XFS_TRANS_PERM_LOG_RES,
 848                                  XFS_ITRUNCATE_LOG_COUNT);
 849         if (error)
 850                 goto out_trans_cancel;
 851
 852         truncate_setsize(inode, iattr->ia_size);
 853
 854         commit_flags = XFS_TRANS_RELEASE_LOG_RES;
 855         lock_flags |= XFS_ILOCK_EXCL;
 856
 857         xfs_ilock(ip, XFS_ILOCK_EXCL);
 858
 859         xfs_trans_ijoin(tp, ip);
 860
 861         /*
 862          * Only change the c/mtime if we are changing the size or we are
 863          * explicitly asked to change it.  This handles the semantic difference
 864          * between truncate() and ftruncate() as implemented in the VFS.
 865          *
 866          * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
 867          * special case where we need to update the times despite not having
 868          * these flags set.  For all other operations the VFS set these flags
 869          * explicitly if it wants a timestamp update.
 870          */
 871         if (iattr->ia_size != ip->i_size &&
 872             (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
 873                 iattr->ia_ctime = iattr->ia_mtime =
 874                         current_fs_time(inode->i_sb);
 875                 mask |= ATTR_CTIME | ATTR_MTIME;
 876         }
 877
 878         if (iattr->ia_size > ip->i_size) {
 879                 ip->i_d.di_size = iattr->ia_size;
 880                 ip->i_size = iattr->ia_size;
 881         } else if (iattr->ia_size <= ip->i_size ||
 882                    (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
 883                 error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
 884                 if (error)
 885                         goto out_trans_abort;
 886
 887                 /*
 888                  * Truncated "down", so we're removing references to old data
 889                  * here - if we delay flushing for a long time, we expose
 890                  * ourselves unduly to the notorious NULL files problem.  So,
 891                  * we mark this inode and flush it when the file is closed,
 892                  * and do not wait the usual (long) time for writeout.
 893                  */
 894                 xfs_iflags_set(ip, XFS_ITRUNCATED);
 895         }
 896
 897         if (mask & ATTR_CTIME) {
 898                 inode->i_ctime = iattr->ia_ctime;
 899                 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
 900                 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
 901                 ip->i_update_core = 1;
 902         }
 903         if (mask & ATTR_MTIME) {
 904                 inode->i_mtime = iattr->ia_mtime;
 905                 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
 906                 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
 907                 ip->i_update_core = 1;
 908         }
 909
 910         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 911
 912         XFS_STATS_INC(xs_ig_attrchg);
 913
 914         if (mp->m_flags & XFS_MOUNT_WSYNC)
 915                 xfs_trans_set_sync(tp);
 916
 917         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 918 out_unlock:
 919         if (lock_flags)
 920                 xfs_iunlock(ip, lock_flags);
 921         return error;
 922
 923 out_trans_abort:
 924         commit_flags |= XFS_TRANS_ABORT;
 925 out_trans_cancel:
 926         xfs_trans_cancel(tp, commit_flags);
 927         goto out_unlock;
 928 }
 929
 930 STATIC int
 931 xfs_vn_setattr(
 932         struct dentry   *dentry,
 933         struct iattr    *iattr)
 934 {
 935         if (iattr->ia_valid & ATTR_SIZE)
 936                 return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
 937         return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
 938 }
 939
 940 #define XFS_FIEMAP_FLAGS        (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 941
 942 /*
 943  * Call fiemap helper to fill in user data.
 944  * Returns positive errors to xfs_getbmap.
 945  */
 946 STATIC int
 947 xfs_fiemap_format(
 948         void                    **arg,
 949         struct getbmapx         *bmv,
 950         int                     *full)
 951 {
 952         int                     error;
 953         struct fiemap_extent_info *fieinfo = *arg;
 954         u32                     fiemap_flags = 0;
 955         u64                     logical, physical, length;
 956
 957         /* Do nothing for a hole */
 958         if (bmv->bmv_block == -1LL)
 959                 return 0;
 960
 961         logical = BBTOB(bmv->bmv_offset);
 962         physical = BBTOB(bmv->bmv_block);
 963         length = BBTOB(bmv->bmv_length);
 964
 965         if (bmv->bmv_oflags & BMV_OF_PREALLOC)
 966                 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
 967         else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
 968                 fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
 969                 physical = 0;   /* no block yet */
 970         }
 971         if (bmv->bmv_oflags & BMV_OF_LAST)
 972                 fiemap_flags |= FIEMAP_EXTENT_LAST;
 973
 974         error = fiemap_fill_next_extent(fieinfo, logical, physical,
 975                                         length, fiemap_flags);
 976         if (error > 0) {
 977                 error = 0;
 978                 *full = 1;      /* user array now full */
 979         }
 980
 981         return -error;
 982 }
 983
 984 STATIC int
 985 xfs_vn_fiemap(
 986         struct inode            *inode,
 987         struct fiemap_extent_info *fieinfo,
 988         u64                     start,
 989         u64                     length)
 990 {
 991         xfs_inode_t             *ip = XFS_I(inode);
 992         struct getbmapx         bm;
 993         int                     error;
 994
 995         error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
 996         if (error)
 997                 return error;
 998
 999         /* Set up bmap header for xfs internal routine */
1000         bm.bmv_offset = BTOBB(start);
1001         /* Special case for whole file */
1002         if (length == FIEMAP_MAX_OFFSET)
1003                 bm.bmv_length = -1LL;
1004         else
1005                 bm.bmv_length = BTOBB(length);
1006
1007         /* We add one because in getbmap world count includes the header */
1008         bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
1009                                         fieinfo->fi_extents_max + 1;
1010         bm.bmv_count = min_t(__s32, bm.bmv_count,
1011                              (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
1012         bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
1013         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
1014                 bm.bmv_iflags |= BMV_IF_ATTRFORK;
1015         if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
1016                 bm.bmv_iflags |= BMV_IF_DELALLOC;
1017
1018         error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
1019         if (error)
1020                 return -error;
1021
1022         return 0;
1023 }
1024
1025 static const struct inode_operations xfs_inode_operations = {
1026         .get_acl                = xfs_get_acl,
1027         .getattr                = xfs_vn_getattr,
1028         .setattr                = xfs_vn_setattr,
1029         .setxattr               = generic_setxattr,
1030         .getxattr               = generic_getxattr,
1031         .removexattr            = generic_removexattr,
1032         .listxattr              = xfs_vn_listxattr,
1033         .fiemap                 = xfs_vn_fiemap,
1034 };
1035
1036 static const struct inode_operations xfs_dir_inode_operations = {
1037         .create                 = xfs_vn_create,
1038         .lookup                 = xfs_vn_lookup,
1039         .link                   = xfs_vn_link,
1040         .unlink                 = xfs_vn_unlink,
1041         .symlink                = xfs_vn_symlink,
1042         .mkdir                  = xfs_vn_mkdir,
1043         /*
1044          * Yes, XFS uses the same method for rmdir and unlink.
1045          *
1046          * There are some subtile differences deeper in the code,
1047          * but we use S_ISDIR to check for those.
1048          */
1049         .rmdir                  = xfs_vn_unlink,
1050         .mknod                  = xfs_vn_mknod,
1051         .rename                 = xfs_vn_rename,
1052         .get_acl                = xfs_get_acl,
1053         .getattr                = xfs_vn_getattr,
1054         .setattr                = xfs_vn_setattr,
1055         .setxattr               = generic_setxattr,
1056         .getxattr               = generic_getxattr,
1057         .removexattr            = generic_removexattr,
1058         .listxattr              = xfs_vn_listxattr,
1059 };
1060
1061 static const struct inode_operations xfs_dir_ci_inode_operations = {
1062         .create                 = xfs_vn_create,
1063         .lookup                 = xfs_vn_ci_lookup,
1064         .link                   = xfs_vn_link,
1065         .unlink                 = xfs_vn_unlink,
1066         .symlink                = xfs_vn_symlink,
1067         .mkdir                  = xfs_vn_mkdir,
1068         /*
1069          * Yes, XFS uses the same method for rmdir and unlink.
1070          *
1071          * There are some subtile differences deeper in the code,
1072          * but we use S_ISDIR to check for those.
1073          */
1074         .rmdir                  = xfs_vn_unlink,
1075         .mknod                  = xfs_vn_mknod,
1076         .rename                 = xfs_vn_rename,
1077         .get_acl                = xfs_get_acl,
1078         .getattr                = xfs_vn_getattr,
1079         .setattr                = xfs_vn_setattr,
1080         .setxattr               = generic_setxattr,
1081         .getxattr               = generic_getxattr,
1082         .removexattr            = generic_removexattr,
1083         .listxattr              = xfs_vn_listxattr,
1084 };
1085
1086 static const struct inode_operations xfs_symlink_inode_operations = {
1087         .readlink               = generic_readlink,
1088         .follow_link            = xfs_vn_follow_link,
1089         .put_link               = xfs_vn_put_link,
1090         .get_acl                = xfs_get_acl,
1091         .getattr                = xfs_vn_getattr,
1092         .setattr                = xfs_vn_setattr,
1093         .setxattr               = generic_setxattr,
1094         .getxattr               = generic_getxattr,
1095         .removexattr            = generic_removexattr,
1096         .listxattr              = xfs_vn_listxattr,
1097 };
1098
1099 STATIC void
1100 xfs_diflags_to_iflags(
1101         struct inode            *inode,
1102         struct xfs_inode        *ip)
1103 {
1104         if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
1105                 inode->i_flags |= S_IMMUTABLE;
1106         else
1107                 inode->i_flags &= ~S_IMMUTABLE;
1108         if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
1109                 inode->i_flags |= S_APPEND;
1110         else
1111                 inode->i_flags &= ~S_APPEND;
1112         if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
1113                 inode->i_flags |= S_SYNC;
1114         else
1115                 inode->i_flags &= ~S_SYNC;
1116         if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
1117                 inode->i_flags |= S_NOATIME;
1118         else
1119                 inode->i_flags &= ~S_NOATIME;
1120 }
1121
1122 /*
1123  * Initialize the Linux inode, set up the operation vectors and
1124  * unlock the inode.
1125  *
1126  * When reading existing inodes from disk this is called directly
1127  * from xfs_iget, when creating a new inode it is called from
1128  * xfs_ialloc after setting up the inode.
1129  *
1130  * We are always called with an uninitialised linux inode here.
1131  * We need to initialise the necessary fields and take a reference
1132  * on it.
1133  */
1134 void
1135 xfs_setup_inode(
1136         struct xfs_inode        *ip)
1137 {
1138         struct inode            *inode = &ip->i_vnode;
1139
1140         inode->i_ino = ip->i_ino;
1141         inode->i_state = I_NEW;
1142
1143         inode_sb_list_add(inode);
1144         /* make the inode look hashed for the writeback code */
1145         hlist_add_fake(&inode->i_hash);
1146
1147         inode->i_mode   = ip->i_d.di_mode;
1148         inode->i_nlink  = ip->i_d.di_nlink;
1149         inode->i_uid    = ip->i_d.di_uid;
1150         inode->i_gid    = ip->i_d.di_gid;
1151
1152         switch (inode->i_mode & S_IFMT) {
1153         case S_IFBLK:
1154         case S_IFCHR:
1155                 inode->i_rdev =
1156                         MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
1157                               sysv_minor(ip->i_df.if_u2.if_rdev));
1158                 break;
1159         default:
1160                 inode->i_rdev = 0;
1161                 break;
1162         }
1163
1164         inode->i_generation = ip->i_d.di_gen;
1165         i_size_write(inode, ip->i_d.di_size);
1166         inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
1167         inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
1168         inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
1169         inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
1170         inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
1171         inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
1172         xfs_diflags_to_iflags(inode, ip);
1173
1174         switch (inode->i_mode & S_IFMT) {
1175         case S_IFREG:
1176                 inode->i_op = &xfs_inode_operations;
1177                 inode->i_fop = &xfs_file_operations;
1178                 inode->i_mapping->a_ops = &xfs_address_space_operations;
1179                 break;
1180         case S_IFDIR:
1181                 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1182                         inode->i_op = &xfs_dir_ci_inode_operations;
1183                 else
1184                         inode->i_op = &xfs_dir_inode_operations;
1185                 inode->i_fop = &xfs_dir_file_operations;
1186                 break;
1187         case S_IFLNK:
1188                 inode->i_op = &xfs_symlink_inode_operations;
1189                 if (!(ip->i_df.if_flags & XFS_IFINLINE))
1190                         inode->i_mapping->a_ops = &xfs_address_space_operations;
1191                 break;
1192         default:
1193                 inode->i_op = &xfs_inode_operations;
1194                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1195                 break;
1196         }
1197
1198         /*
1199          * If there is no attribute fork no ACL can exist on this inode,
1200          * and it can't have any file capabilities attached to it either.
1201          */
1202         if (!XFS_IFORK_Q(ip)) {
1203                 inode_has_no_xattr(inode);
1204                 cache_no_acl(inode);
1205         }
1206
1207         xfs_iflags_clear(ip, XFS_INEW);
1208         barrier();
1209
1210         unlock_new_inode(inode);
1211 }