fs/xfs/scrub/inode_repair.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_trans_resv.h"
  11 #include "xfs_mount.h"
  12 #include "xfs_defer.h"
  13 #include "xfs_btree.h"
  14 #include "xfs_bit.h"
  15 #include "xfs_log_format.h"
  16 #include "xfs_trans.h"
  17 #include "xfs_sb.h"
  18 #include "xfs_inode.h"
  19 #include "xfs_icache.h"
  20 #include "xfs_inode_buf.h"
  21 #include "xfs_inode_fork.h"
  22 #include "xfs_ialloc.h"
  23 #include "xfs_da_format.h"
  24 #include "xfs_reflink.h"
  25 #include "xfs_alloc.h"
  26 #include "xfs_rmap.h"
  27 #include "xfs_rmap_btree.h"
  28 #include "xfs_bmap.h"
  29 #include "xfs_bmap_btree.h"
  30 #include "xfs_bmap_util.h"
  31 #include "xfs_dir2.h"
  32 #include "xfs_dir2_priv.h"
  33 #include "xfs_quota_defs.h"
  34 #include "xfs_quota.h"
  35 #include "xfs_ag.h"
  36 #include "xfs_rtbitmap.h"
  37 #include "xfs_attr_leaf.h"
  38 #include "xfs_log_priv.h"
  39 #include "xfs_health.h"
  40 #include "xfs_symlink_remote.h"
  41 #include "scrub/xfs_scrub.h"
  42 #include "scrub/scrub.h"
  43 #include "scrub/common.h"
  44 #include "scrub/btree.h"
  45 #include "scrub/trace.h"
  46 #include "scrub/repair.h"
  47 #include "scrub/iscan.h"
  48 #include "scrub/readdir.h"
  49 #include "scrub/tempfile.h"
  50
  51 /*
  52  * Inode Record Repair
  53  * ===================
  54  *
  55  * Roughly speaking, inode problems can be classified based on whether or not
  56  * they trip the dinode verifiers.  If those trip, then we won't be able to
  57  * xfs_iget ourselves the inode.
  58  *
  59  * Therefore, the xrep_dinode_* functions fix anything that will cause the
  60  * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
  61  * fix things on live incore inodes.  The inode repair functions make decisions
  62  * with security and usability implications when reviving a file:
  63  *
  64  * - Files with zero di_mode or a garbage di_mode are converted to regular file
  65  *   that only root can read.  This file may not actually contain user data,
  66  *   if the file was not previously a regular file.  Setuid and setgid bits
  67  *   are cleared.
  68  *
  69  * - Zero-size directories can be truncated to look empty.  It is necessary to
  70  *   run the bmapbtd and directory repair functions to fully rebuild the
  71  *   directory.
  72  *
  73  * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
  74  *   to run the bmapbtd and symlink repair functions to salvage the symlink.
  75  *
  76  * - Invalid extent size hints will be removed.
  77  *
  78  * - Quotacheck will be scheduled if we repaired an inode that was so badly
  79  *   damaged that the ondisk inode had to be rebuilt.
  80  *
  81  * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
  82  *   Setuid and setgid bits are cleared.
  83  *
  84  * - Data and attr forks are reset to extents format with zero extents if the
  85  *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
  86  *   repair functions to recover the space mapping.
  87  *
  88  * - ACLs will not be recovered if the attr fork is zapped or the extended
  89  *   attribute structure itself requires salvaging.
  90  *
  91  * - If the attr fork is zapped, the user and group ids are reset to root and
  92  *   the setuid and setgid bits are removed.
  93  */
  94
  95 /*
  96  * All the information we need to repair the ondisk inode if we can't iget the
  97  * incore inode.  We don't allocate this buffer unless we're going to perform
  98  * a repair to the ondisk inode cluster buffer.
  99  */
 100 struct xrep_inode {
 101         /* Inode mapping that we saved from the initial lookup attempt. */
 102         struct xfs_imap         imap;
 103
 104         struct xfs_scrub        *sc;
 105
 106         /* Blocks in use on the data device by data extents or bmbt blocks. */
 107         xfs_rfsblock_t          data_blocks;
 108
 109         /* Blocks in use on the rt device. */
 110         xfs_rfsblock_t          rt_blocks;
 111
 112         /* Blocks in use by the attr fork. */
 113         xfs_rfsblock_t          attr_blocks;
 114
 115         /* Number of data device extents for the data fork. */
 116         xfs_extnum_t            data_extents;
 117
 118         /*
 119          * Number of realtime device extents for the data fork.  If
 120          * data_extents and rt_extents indicate that the data fork has extents
 121          * on both devices, we'll just back away slowly.
 122          */
 123         xfs_extnum_t            rt_extents;
 124
 125         /* Number of (data device) extents for the attr fork. */
 126         xfs_aextnum_t           attr_extents;
 127
 128         /* Sick state to set after zapping parts of the inode. */
 129         unsigned int            ino_sick_mask;
 130
 131         /* Must we remove all access from this file? */
 132         bool                    zap_acls;
 133
 134         /* Inode scanner to see if we can find the ftype from dirents */
 135         struct xchk_iscan       ftype_iscan;
 136         uint8_t                 alleged_ftype;
 137 };
 138
 139 /*
 140  * Setup function for inode repair.  @imap contains the ondisk inode mapping
 141  * information so that we can correct the ondisk inode cluster buffer if
 142  * necessary to make iget work.
 143  */
 144 int
 145 xrep_setup_inode(
 146         struct xfs_scrub        *sc,
 147         const struct xfs_imap   *imap)
 148 {
 149         struct xrep_inode       *ri;
 150
 151         sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
 152         if (!sc->buf)
 153                 return -ENOMEM;
 154
 155         ri = sc->buf;
 156         memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
 157         ri->sc = sc;
 158         return 0;
 159 }
 160
 161 /*
 162  * Make sure this ondisk inode can pass the inode buffer verifier.  This is
 163  * not the same as the dinode verifier.
 164  */
 165 STATIC void
 166 xrep_dinode_buf_core(
 167         struct xfs_scrub        *sc,
 168         struct xfs_buf          *bp,
 169         unsigned int            ioffset)
 170 {
 171         struct xfs_dinode       *dip = xfs_buf_offset(bp, ioffset);
 172         struct xfs_trans        *tp = sc->tp;
 173         struct xfs_mount        *mp = sc->mp;
 174         xfs_agino_t             agino;
 175         bool                    crc_ok = false;
 176         bool                    magic_ok = false;
 177         bool                    unlinked_ok = false;
 178
 179         agino = be32_to_cpu(dip->di_next_unlinked);
 180
 181         if (xfs_verify_agino_or_null(bp->b_pag, agino))
 182                 unlinked_ok = true;
 183
 184         if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
 185             xfs_dinode_good_version(mp, dip->di_version))
 186                 magic_ok = true;
 187
 188         if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
 189                         XFS_DINODE_CRC_OFF))
 190                 crc_ok = true;
 191
 192         if (magic_ok && unlinked_ok && crc_ok)
 193                 return;
 194
 195         if (!magic_ok) {
 196                 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 197                 dip->di_version = 3;
 198         }
 199         if (!unlinked_ok)
 200                 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
 201         xfs_dinode_calc_crc(mp, dip);
 202         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 203         xfs_trans_log_buf(tp, bp, ioffset,
 204                                   ioffset + sizeof(struct xfs_dinode) - 1);
 205 }
 206
 207 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
 208 STATIC void
 209 xrep_dinode_buf(
 210         struct xfs_scrub        *sc,
 211         struct xfs_buf          *bp)
 212 {
 213         struct xfs_mount        *mp = sc->mp;
 214         int                     i;
 215         int                     ni;
 216
 217         ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
 218         for (i = 0; i < ni; i++)
 219                 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
 220 }
 221
 222 /* Reinitialize things that never change in an inode. */
 223 STATIC void
 224 xrep_dinode_header(
 225         struct xfs_scrub        *sc,
 226         struct xfs_dinode       *dip)
 227 {
 228         trace_xrep_dinode_header(sc, dip);
 229
 230         dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 231         if (!xfs_dinode_good_version(sc->mp, dip->di_version))
 232                 dip->di_version = 3;
 233         dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
 234         uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
 235         dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
 236 }
 237
 238 /*
 239  * If this directory entry points to the scrub target inode, then the directory
 240  * we're scanning is the parent of the scrub target inode.
 241  */
 242 STATIC int
 243 xrep_dinode_findmode_dirent(
 244         struct xfs_scrub                *sc,
 245         struct xfs_inode                *dp,
 246         xfs_dir2_dataptr_t              dapos,
 247         const struct xfs_name           *name,
 248         xfs_ino_t                       ino,
 249         void                            *priv)
 250 {
 251         struct xrep_inode               *ri = priv;
 252         int                             error = 0;
 253
 254         if (xchk_should_terminate(ri->sc, &error))
 255                 return error;
 256
 257         if (ino != sc->sm->sm_ino)
 258                 return 0;
 259
 260         /* Ignore garbage directory entry names. */
 261         if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
 262                 return -EFSCORRUPTED;
 263
 264         /* Don't pick up dot or dotdot entries; we only want child dirents. */
 265         if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
 266             xfs_dir2_samename(name, &xfs_name_dot))
 267                 return 0;
 268
 269         /*
 270          * Uhoh, more than one parent for this inode and they don't agree on
 271          * the file type?
 272          */
 273         if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
 274             ri->alleged_ftype != name->type) {
 275                 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
 276                                 ri->alleged_ftype);
 277                 return -EFSCORRUPTED;
 278         }
 279
 280         /* We found a potential parent; remember the ftype. */
 281         trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
 282         ri->alleged_ftype = name->type;
 283         return 0;
 284 }
 285
 286 /* Try to lock a directory, or wait a jiffy. */
 287 static inline int
 288 xrep_dinode_ilock_nowait(
 289         struct xfs_inode        *dp,
 290         unsigned int            lock_mode)
 291 {
 292         if (xfs_ilock_nowait(dp, lock_mode))
 293                 return true;
 294
 295         schedule_timeout_killable(1);
 296         return false;
 297 }
 298
 299 /*
 300  * Try to lock a directory to look for ftype hints.  Since we already hold the
 301  * AGI buffer, we cannot block waiting for the ILOCK because rename can take
 302  * the ILOCK and then try to lock AGIs.
 303  */
 304 STATIC int
 305 xrep_dinode_trylock_directory(
 306         struct xrep_inode       *ri,
 307         struct xfs_inode        *dp,
 308         unsigned int            *lock_modep)
 309 {
 310         unsigned long           deadline = jiffies + msecs_to_jiffies(30000);
 311         unsigned int            lock_mode;
 312         int                     error = 0;
 313
 314         do {
 315                 if (xchk_should_terminate(ri->sc, &error))
 316                         return error;
 317
 318                 if (xfs_need_iread_extents(&dp->i_df))
 319                         lock_mode = XFS_ILOCK_EXCL;
 320                 else
 321                         lock_mode = XFS_ILOCK_SHARED;
 322
 323                 if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
 324                         *lock_modep = lock_mode;
 325                         return 0;
 326                 }
 327         } while (!time_is_before_jiffies(deadline));
 328         return -EBUSY;
 329 }
 330
 331 /*
 332  * If this is a directory, walk the dirents looking for any that point to the
 333  * scrub target inode.
 334  */
 335 STATIC int
 336 xrep_dinode_findmode_walk_directory(
 337         struct xrep_inode       *ri,
 338         struct xfs_inode        *dp)
 339 {
 340         struct xfs_scrub        *sc = ri->sc;
 341         unsigned int            lock_mode;
 342         int                     error = 0;
 343
 344         /* Ignore temporary repair directories. */
 345         if (xrep_is_tempfile(dp))
 346                 return 0;
 347
 348         /*
 349          * Scan the directory to see if there it contains an entry pointing to
 350          * the directory that we are repairing.
 351          */
 352         error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
 353         if (error)
 354                 return error;
 355
 356         /*
 357          * If this directory is known to be sick, we cannot scan it reliably
 358          * and must abort.
 359          */
 360         if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
 361                                        XFS_SICK_INO_BMBTD |
 362                                        XFS_SICK_INO_DIR)) {
 363                 error = -EFSCORRUPTED;
 364                 goto out_unlock;
 365         }
 366
 367         /*
 368          * We cannot complete our parent pointer scan if a directory looks as
 369          * though it has been zapped by the inode record repair code.
 370          */
 371         if (xchk_dir_looks_zapped(dp)) {
 372                 error = -EBUSY;
 373                 goto out_unlock;
 374         }
 375
 376         error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
 377         if (error)
 378                 goto out_unlock;
 379
 380 out_unlock:
 381         xfs_iunlock(dp, lock_mode);
 382         return error;
 383 }
 384
 385 /*
 386  * Try to find the mode of the inode being repaired by looking for directories
 387  * that point down to this file.
 388  */
 389 STATIC int
 390 xrep_dinode_find_mode(
 391         struct xrep_inode       *ri,
 392         uint16_t                *mode)
 393 {
 394         struct xfs_scrub        *sc = ri->sc;
 395         struct xfs_inode        *dp;
 396         int                     error;
 397
 398         /* No ftype means we have no other metadata to consult. */
 399         if (!xfs_has_ftype(sc->mp)) {
 400                 *mode = S_IFREG;
 401                 return 0;
 402         }
 403
 404         /*
 405          * Scan all directories for parents that might point down to this
 406          * inode.  Skip the inode being repaired during the scan since it
 407          * cannot be its own parent.  Note that we still hold the AGI locked
 408          * so there's a real possibility that _iscan_iter can return EBUSY.
 409          */
 410         xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
 411         xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
 412         ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
 413         ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
 414         while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
 415                 if (S_ISDIR(VFS_I(dp)->i_mode))
 416                         error = xrep_dinode_findmode_walk_directory(ri, dp);
 417                 xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
 418                 xchk_irele(sc, dp);
 419                 if (error < 0)
 420                         break;
 421                 if (xchk_should_terminate(sc, &error))
 422                         break;
 423         }
 424         xchk_iscan_iter_finish(&ri->ftype_iscan);
 425         xchk_iscan_teardown(&ri->ftype_iscan);
 426
 427         if (error == -EBUSY) {
 428                 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
 429                         /*
 430                          * If we got an EBUSY after finding at least one
 431                          * dirent, that means the scan found an inode on the
 432                          * inactivation list and could not open it.  Accept the
 433                          * alleged ftype and install a new mode below.
 434                          */
 435                         error = 0;
 436                 } else if (!(sc->flags & XCHK_TRY_HARDER)) {
 437                         /*
 438                          * Otherwise, retry the operation one time to see if
 439                          * the reason for the delay is an inode from the same
 440                          * cluster buffer waiting on the inactivation list.
 441                          */
 442                         error = -EDEADLOCK;
 443                 }
 444         }
 445         if (error)
 446                 return error;
 447
 448         /*
 449          * Convert the discovered ftype into the file mode.  If all else fails,
 450          * return S_IFREG.
 451          */
 452         switch (ri->alleged_ftype) {
 453         case XFS_DIR3_FT_DIR:
 454                 *mode = S_IFDIR;
 455                 break;
 456         case XFS_DIR3_FT_WHT:
 457         case XFS_DIR3_FT_CHRDEV:
 458                 *mode = S_IFCHR;
 459                 break;
 460         case XFS_DIR3_FT_BLKDEV:
 461                 *mode = S_IFBLK;
 462                 break;
 463         case XFS_DIR3_FT_FIFO:
 464                 *mode = S_IFIFO;
 465                 break;
 466         case XFS_DIR3_FT_SOCK:
 467                 *mode = S_IFSOCK;
 468                 break;
 469         case XFS_DIR3_FT_SYMLINK:
 470                 *mode = S_IFLNK;
 471                 break;
 472         default:
 473                 *mode = S_IFREG;
 474                 break;
 475         }
 476         return 0;
 477 }
 478
 479 /* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
 480 STATIC int
 481 xrep_dinode_mode(
 482         struct xrep_inode       *ri,
 483         struct xfs_dinode       *dip)
 484 {
 485         struct xfs_scrub        *sc = ri->sc;
 486         uint16_t                mode = be16_to_cpu(dip->di_mode);
 487         int                     error;
 488
 489         trace_xrep_dinode_mode(sc, dip);
 490
 491         if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
 492                 return 0;
 493
 494         /* Try to fix the mode.  If we cannot, then leave everything alone. */
 495         error = xrep_dinode_find_mode(ri, &mode);
 496         switch (error) {
 497         case -EINTR:
 498         case -EBUSY:
 499         case -EDEADLOCK:
 500                 /* temporary failure or fatal signal */
 501                 return error;
 502         case 0:
 503                 /* found mode */
 504                 break;
 505         default:
 506                 /* some other error, assume S_IFREG */
 507                 mode = S_IFREG;
 508                 break;
 509         }
 510
 511         /* bad mode, so we set it to a file that only root can read */
 512         dip->di_mode = cpu_to_be16(mode);
 513         dip->di_uid = 0;
 514         dip->di_gid = 0;
 515         ri->zap_acls = true;
 516         return 0;
 517 }
 518
 519 /* Fix unused link count fields having nonzero values. */
 520 STATIC void
 521 xrep_dinode_nlinks(
 522         struct xfs_dinode       *dip)
 523 {
 524         if (dip->di_version < 2) {
 525                 dip->di_nlink = 0;
 526                 return;
 527         }
 528
 529         if (xfs_dinode_is_metadir(dip)) {
 530                 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
 531                         dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
 532         } else {
 533                 dip->di_metatype = 0;
 534         }
 535 }
 536
 537 /* Fix any conflicting flags that the verifiers complain about. */
 538 STATIC void
 539 xrep_dinode_flags(
 540         struct xfs_scrub        *sc,
 541         struct xfs_dinode       *dip,
 542         bool                    isrt)
 543 {
 544         struct xfs_mount        *mp = sc->mp;
 545         uint64_t                flags2 = be64_to_cpu(dip->di_flags2);
 546         uint16_t                flags = be16_to_cpu(dip->di_flags);
 547         uint16_t                mode = be16_to_cpu(dip->di_mode);
 548
 549         trace_xrep_dinode_flags(sc, dip);
 550
 551         if (isrt)
 552                 flags |= XFS_DIFLAG_REALTIME;
 553         else
 554                 flags &= ~XFS_DIFLAG_REALTIME;
 555
 556         /*
 557          * For regular files on a reflink filesystem, set the REFLINK flag to
 558          * protect shared extents.  A later stage will actually check those
 559          * extents and clear the flag if possible.
 560          */
 561         if (xfs_has_reflink(mp) && S_ISREG(mode))
 562                 flags2 |= XFS_DIFLAG2_REFLINK;
 563         else
 564                 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
 565         if (flags & XFS_DIFLAG_REALTIME)
 566                 flags2 &= ~XFS_DIFLAG2_REFLINK;
 567         if (!xfs_has_bigtime(mp))
 568                 flags2 &= ~XFS_DIFLAG2_BIGTIME;
 569         if (!xfs_has_large_extent_counts(mp))
 570                 flags2 &= ~XFS_DIFLAG2_NREXT64;
 571         if (flags2 & XFS_DIFLAG2_NREXT64)
 572                 dip->di_nrext64_pad = 0;
 573         else if (dip->di_version >= 3)
 574                 dip->di_v3_pad = 0;
 575
 576         if (flags2 & XFS_DIFLAG2_METADATA) {
 577                 xfs_failaddr_t  fa;
 578
 579                 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
 580                                 flags2);
 581                 if (fa)
 582                         flags2 &= ~XFS_DIFLAG2_METADATA;
 583         }
 584
 585         dip->di_flags = cpu_to_be16(flags);
 586         dip->di_flags2 = cpu_to_be64(flags2);
 587 }
 588
 589 /*
 590  * Blow out symlink; now it points nowhere.  We don't have to worry about
 591  * incore state because this inode is failing the verifiers.
 592  */
 593 STATIC void
 594 xrep_dinode_zap_symlink(
 595         struct xrep_inode       *ri,
 596         struct xfs_dinode       *dip)
 597 {
 598         struct xfs_scrub        *sc = ri->sc;
 599         char                    *p;
 600
 601         trace_xrep_dinode_zap_symlink(sc, dip);
 602
 603         dip->di_format = XFS_DINODE_FMT_LOCAL;
 604         dip->di_size = cpu_to_be64(1);
 605         p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 606         *p = '?';
 607         ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
 608 }
 609
 610 /*
 611  * Blow out dir, make the parent point to the root.  In the future repair will
 612  * reconstruct this directory for us.  Note that there's no in-core directory
 613  * inode because the sf verifier tripped, so we don't have to worry about the
 614  * dentry cache.
 615  */
 616 STATIC void
 617 xrep_dinode_zap_dir(
 618         struct xrep_inode       *ri,
 619         struct xfs_dinode       *dip)
 620 {
 621         struct xfs_scrub        *sc = ri->sc;
 622         struct xfs_mount        *mp = sc->mp;
 623         struct xfs_dir2_sf_hdr  *sfp;
 624         int                     i8count;
 625
 626         trace_xrep_dinode_zap_dir(sc, dip);
 627
 628         dip->di_format = XFS_DINODE_FMT_LOCAL;
 629         i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
 630         sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 631         sfp->count = 0;
 632         sfp->i8count = i8count;
 633         xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
 634         dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
 635         ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
 636 }
 637
 638 /* Make sure we don't have a garbage file size. */
 639 STATIC void
 640 xrep_dinode_size(
 641         struct xrep_inode       *ri,
 642         struct xfs_dinode       *dip)
 643 {
 644         struct xfs_scrub        *sc = ri->sc;
 645         uint64_t                size = be64_to_cpu(dip->di_size);
 646         uint16_t                mode = be16_to_cpu(dip->di_mode);
 647
 648         trace_xrep_dinode_size(sc, dip);
 649
 650         switch (mode & S_IFMT) {
 651         case S_IFIFO:
 652         case S_IFCHR:
 653         case S_IFBLK:
 654         case S_IFSOCK:
 655                 /* di_size can't be nonzero for special files */
 656                 dip->di_size = 0;
 657                 break;
 658         case S_IFREG:
 659                 /* Regular files can't be larger than 2^63-1 bytes. */
 660                 dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
 661                 break;
 662         case S_IFLNK:
 663                 /*
 664                  * Truncate ridiculously oversized symlinks.  If the size is
 665                  * zero, reset it to point to the current directory.  Both of
 666                  * these conditions trigger dinode verifier errors, so there
 667                  * is no in-core state to reset.
 668                  */
 669                 if (size > XFS_SYMLINK_MAXLEN)
 670                         dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
 671                 else if (size == 0)
 672                         xrep_dinode_zap_symlink(ri, dip);
 673                 break;
 674         case S_IFDIR:
 675                 /*
 676                  * Directories can't have a size larger than 32G.  If the size
 677                  * is zero, reset it to an empty directory.  Both of these
 678                  * conditions trigger dinode verifier errors, so there is no
 679                  * in-core state to reset.
 680                  */
 681                 if (size > XFS_DIR2_SPACE_SIZE)
 682                         dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
 683                 else if (size == 0)
 684                         xrep_dinode_zap_dir(ri, dip);
 685                 break;
 686         }
 687 }
 688
 689 /* Fix extent size hints. */
 690 STATIC void
 691 xrep_dinode_extsize_hints(
 692         struct xfs_scrub        *sc,
 693         struct xfs_dinode       *dip)
 694 {
 695         struct xfs_mount        *mp = sc->mp;
 696         uint64_t                flags2 = be64_to_cpu(dip->di_flags2);
 697         uint16_t                flags = be16_to_cpu(dip->di_flags);
 698         uint16_t                mode = be16_to_cpu(dip->di_mode);
 699
 700         xfs_failaddr_t          fa;
 701
 702         trace_xrep_dinode_extsize_hints(sc, dip);
 703
 704         fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
 705                         mode, flags);
 706         if (fa) {
 707                 dip->di_extsize = 0;
 708                 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
 709                                               XFS_DIFLAG_EXTSZINHERIT);
 710         }
 711
 712         if (dip->di_version < 3)
 713                 return;
 714
 715         fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 716                         mode, flags, flags2);
 717         if (fa) {
 718                 dip->di_cowextsize = 0;
 719                 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
 720         }
 721 }
 722
 723 /* Count extents and blocks for an inode given an rmap. */
 724 STATIC int
 725 xrep_dinode_walk_rmap(
 726         struct xfs_btree_cur            *cur,
 727         const struct xfs_rmap_irec      *rec,
 728         void                            *priv)
 729 {
 730         struct xrep_inode               *ri = priv;
 731         int                             error = 0;
 732
 733         if (xchk_should_terminate(ri->sc, &error))
 734                 return error;
 735
 736         /* We only care about this inode. */
 737         if (rec->rm_owner != ri->sc->sm->sm_ino)
 738                 return 0;
 739
 740         if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
 741                 ri->attr_blocks += rec->rm_blockcount;
 742                 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 743                         ri->attr_extents++;
 744
 745                 return 0;
 746         }
 747
 748         ri->data_blocks += rec->rm_blockcount;
 749         if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
 750                 ri->data_extents++;
 751
 752         return 0;
 753 }
 754
 755 /* Count extents and blocks for an inode from all AG rmap data. */
 756 STATIC int
 757 xrep_dinode_count_ag_rmaps(
 758         struct xrep_inode       *ri,
 759         struct xfs_perag        *pag)
 760 {
 761         struct xfs_btree_cur    *cur;
 762         struct xfs_buf          *agf;
 763         int                     error;
 764
 765         error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
 766         if (error)
 767                 return error;
 768
 769         cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
 770         error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
 771         xfs_btree_del_cursor(cur, error);
 772         xfs_trans_brelse(ri->sc->tp, agf);
 773         return error;
 774 }
 775
 776 /* Count extents and blocks for a given inode from all rmap data. */
 777 STATIC int
 778 xrep_dinode_count_rmaps(
 779         struct xrep_inode       *ri)
 780 {
 781         struct xfs_perag        *pag = NULL;
 782         int                     error;
 783
 784         if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
 785                 return -EOPNOTSUPP;
 786
 787         while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
 788                 error = xrep_dinode_count_ag_rmaps(ri, pag);
 789                 if (error) {
 790                         xfs_perag_rele(pag);
 791                         return error;
 792                 }
 793         }
 794
 795         /* Can't have extents on both the rt and the data device. */
 796         if (ri->data_extents && ri->rt_extents)
 797                 return -EFSCORRUPTED;
 798
 799         trace_xrep_dinode_count_rmaps(ri->sc,
 800                         ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
 801                         ri->data_extents, ri->rt_extents, ri->attr_extents);
 802         return 0;
 803 }
 804
 805 /* Return true if this extents-format ifork looks like garbage. */
 806 STATIC bool
 807 xrep_dinode_bad_extents_fork(
 808         struct xfs_scrub        *sc,
 809         struct xfs_dinode       *dip,
 810         unsigned int            dfork_size,
 811         int                     whichfork)
 812 {
 813         struct xfs_bmbt_irec    new;
 814         struct xfs_bmbt_rec     *dp;
 815         xfs_extnum_t            nex;
 816         bool                    isrt;
 817         unsigned int            i;
 818
 819         nex = xfs_dfork_nextents(dip, whichfork);
 820         if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
 821                 return true;
 822
 823         dp = XFS_DFORK_PTR(dip, whichfork);
 824
 825         isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
 826         for (i = 0; i < nex; i++, dp++) {
 827                 xfs_failaddr_t  fa;
 828
 829                 xfs_bmbt_disk_get_all(dp, &new);
 830                 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
 831                                 &new);
 832                 if (fa)
 833                         return true;
 834         }
 835
 836         return false;
 837 }
 838
 839 /* Return true if this btree-format ifork looks like garbage. */
 840 STATIC bool
 841 xrep_dinode_bad_bmbt_fork(
 842         struct xfs_scrub        *sc,
 843         struct xfs_dinode       *dip,
 844         unsigned int            dfork_size,
 845         int                     whichfork)
 846 {
 847         struct xfs_bmdr_block   *dfp;
 848         xfs_extnum_t            nex;
 849         unsigned int            i;
 850         unsigned int            dmxr;
 851         unsigned int            nrecs;
 852         unsigned int            level;
 853
 854         nex = xfs_dfork_nextents(dip, whichfork);
 855         if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
 856                 return true;
 857
 858         if (dfork_size < sizeof(struct xfs_bmdr_block))
 859                 return true;
 860
 861         dfp = XFS_DFORK_PTR(dip, whichfork);
 862         nrecs = be16_to_cpu(dfp->bb_numrecs);
 863         level = be16_to_cpu(dfp->bb_level);
 864
 865         if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
 866                 return true;
 867         if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
 868                 return true;
 869
 870         dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
 871         for (i = 1; i <= nrecs; i++) {
 872                 struct xfs_bmbt_key     *fkp;
 873                 xfs_bmbt_ptr_t          *fpp;
 874                 xfs_fileoff_t           fileoff;
 875                 xfs_fsblock_t           fsbno;
 876
 877                 fkp = xfs_bmdr_key_addr(dfp, i);
 878                 fileoff = be64_to_cpu(fkp->br_startoff);
 879                 if (!xfs_verify_fileoff(sc->mp, fileoff))
 880                         return true;
 881
 882                 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
 883                 fsbno = be64_to_cpu(*fpp);
 884                 if (!xfs_verify_fsbno(sc->mp, fsbno))
 885                         return true;
 886         }
 887
 888         return false;
 889 }
 890
 891 /*
 892  * Check the data fork for things that will fail the ifork verifiers or the
 893  * ifork formatters.
 894  */
 895 STATIC bool
 896 xrep_dinode_check_dfork(
 897         struct xfs_scrub        *sc,
 898         struct xfs_dinode       *dip,
 899         uint16_t                mode)
 900 {
 901         void                    *dfork_ptr;
 902         int64_t                 data_size;
 903         unsigned int            fmt;
 904         unsigned int            dfork_size;
 905
 906         /*
 907          * Verifier functions take signed int64_t, so check for bogus negative
 908          * values first.
 909          */
 910         data_size = be64_to_cpu(dip->di_size);
 911         if (data_size < 0)
 912                 return true;
 913
 914         fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
 915         switch (mode & S_IFMT) {
 916         case S_IFIFO:
 917         case S_IFCHR:
 918         case S_IFBLK:
 919         case S_IFSOCK:
 920                 if (fmt != XFS_DINODE_FMT_DEV)
 921                         return true;
 922                 break;
 923         case S_IFREG:
 924                 if (fmt == XFS_DINODE_FMT_LOCAL)
 925                         return true;
 926                 fallthrough;
 927         case S_IFLNK:
 928         case S_IFDIR:
 929                 switch (fmt) {
 930                 case XFS_DINODE_FMT_LOCAL:
 931                 case XFS_DINODE_FMT_EXTENTS:
 932                 case XFS_DINODE_FMT_BTREE:
 933                         break;
 934                 default:
 935                         return true;
 936                 }
 937                 break;
 938         default:
 939                 return true;
 940         }
 941
 942         dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
 943         dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
 944
 945         switch (fmt) {
 946         case XFS_DINODE_FMT_DEV:
 947                 break;
 948         case XFS_DINODE_FMT_LOCAL:
 949                 /* dir/symlink structure cannot be larger than the fork */
 950                 if (data_size > dfork_size)
 951                         return true;
 952                 /* directory structure must pass verification. */
 953                 if (S_ISDIR(mode) &&
 954                     xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
 955                         return true;
 956                 /* symlink structure must pass verification. */
 957                 if (S_ISLNK(mode) &&
 958                     xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
 959                         return true;
 960                 break;
 961         case XFS_DINODE_FMT_EXTENTS:
 962                 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
 963                                 XFS_DATA_FORK))
 964                         return true;
 965                 break;
 966         case XFS_DINODE_FMT_BTREE:
 967                 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
 968                                 XFS_DATA_FORK))
 969                         return true;
 970                 break;
 971         default:
 972                 return true;
 973         }
 974
 975         return false;
 976 }
 977
 978 static void
 979 xrep_dinode_set_data_nextents(
 980         struct xfs_dinode       *dip,
 981         xfs_extnum_t            nextents)
 982 {
 983         if (xfs_dinode_has_large_extent_counts(dip))
 984                 dip->di_big_nextents = cpu_to_be64(nextents);
 985         else
 986                 dip->di_nextents = cpu_to_be32(nextents);
 987 }
 988
 989 static void
 990 xrep_dinode_set_attr_nextents(
 991         struct xfs_dinode       *dip,
 992         xfs_extnum_t            nextents)
 993 {
 994         if (xfs_dinode_has_large_extent_counts(dip))
 995                 dip->di_big_anextents = cpu_to_be32(nextents);
 996         else
 997                 dip->di_anextents = cpu_to_be16(nextents);
 998 }
 999
1000 /* Reset the data fork to something sane. */
1001 STATIC void
1002 xrep_dinode_zap_dfork(
1003         struct xrep_inode       *ri,
1004         struct xfs_dinode       *dip,
1005         uint16_t                mode)
1006 {
1007         struct xfs_scrub        *sc = ri->sc;
1008
1009         trace_xrep_dinode_zap_dfork(sc, dip);
1010
1011         ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
1012
1013         xrep_dinode_set_data_nextents(dip, 0);
1014         ri->data_blocks = 0;
1015         ri->rt_blocks = 0;
1016
1017         /* Special files always get reset to DEV */
1018         switch (mode & S_IFMT) {
1019         case S_IFIFO:
1020         case S_IFCHR:
1021         case S_IFBLK:
1022         case S_IFSOCK:
1023                 dip->di_format = XFS_DINODE_FMT_DEV;
1024                 dip->di_size = 0;
1025                 return;
1026         }
1027
1028         /*
1029          * If we have data extents, reset to an empty map and hope the user
1030          * will run the bmapbtd checker next.
1031          */
1032         if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1033                 dip->di_format = XFS_DINODE_FMT_EXTENTS;
1034                 return;
1035         }
1036
1037         /* Otherwise, reset the local format to the minimum. */
1038         switch (mode & S_IFMT) {
1039         case S_IFLNK:
1040                 xrep_dinode_zap_symlink(ri, dip);
1041                 break;
1042         case S_IFDIR:
1043                 xrep_dinode_zap_dir(ri, dip);
1044                 break;
1045         }
1046 }
1047
1048 /*
1049  * Check the attr fork for things that will fail the ifork verifiers or the
1050  * ifork formatters.
1051  */
1052 STATIC bool
1053 xrep_dinode_check_afork(
1054         struct xfs_scrub                *sc,
1055         struct xfs_dinode               *dip)
1056 {
1057         struct xfs_attr_sf_hdr          *afork_ptr;
1058         size_t                          attr_size;
1059         unsigned int                    afork_size;
1060
1061         if (XFS_DFORK_BOFF(dip) == 0)
1062                 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1063                        xfs_dfork_attr_extents(dip) != 0;
1064
1065         afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1066         afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1067
1068         switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1069         case XFS_DINODE_FMT_LOCAL:
1070                 /* Fork has to be large enough to extract the xattr size. */
1071                 if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1072                         return true;
1073
1074                 /* xattr structure cannot be larger than the fork */
1075                 attr_size = be16_to_cpu(afork_ptr->totsize);
1076                 if (attr_size > afork_size)
1077                         return true;
1078
1079                 /* xattr structure must pass verification. */
1080                 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1081         case XFS_DINODE_FMT_EXTENTS:
1082                 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1083                                         XFS_ATTR_FORK))
1084                         return true;
1085                 break;
1086         case XFS_DINODE_FMT_BTREE:
1087                 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1088                                         XFS_ATTR_FORK))
1089                         return true;
1090                 break;
1091         default:
1092                 return true;
1093         }
1094
1095         return false;
1096 }
1097
1098 /*
1099  * Reset the attr fork to empty.  Since the attr fork could have contained
1100  * ACLs, make the file readable only by root.
1101  */
1102 STATIC void
1103 xrep_dinode_zap_afork(
1104         struct xrep_inode       *ri,
1105         struct xfs_dinode       *dip,
1106         uint16_t                mode)
1107 {
1108         struct xfs_scrub        *sc = ri->sc;
1109
1110         trace_xrep_dinode_zap_afork(sc, dip);
1111
1112         ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1113
1114         dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1115         xrep_dinode_set_attr_nextents(dip, 0);
1116         ri->attr_blocks = 0;
1117
1118         /*
1119          * If the data fork is in btree format, removing the attr fork entirely
1120          * might cause verifier failures if the next level down in the bmbt
1121          * could now fit in the data fork area.
1122          */
1123         if (dip->di_format != XFS_DINODE_FMT_BTREE)
1124                 dip->di_forkoff = 0;
1125         dip->di_mode = cpu_to_be16(mode & ~0777);
1126         dip->di_uid = 0;
1127         dip->di_gid = 0;
1128 }
1129
1130 /* Make sure the fork offset is a sensible value. */
1131 STATIC void
1132 xrep_dinode_ensure_forkoff(
1133         struct xrep_inode       *ri,
1134         struct xfs_dinode       *dip,
1135         uint16_t                mode)
1136 {
1137         struct xfs_bmdr_block   *bmdr;
1138         struct xfs_scrub        *sc = ri->sc;
1139         xfs_extnum_t            attr_extents, data_extents;
1140         size_t                  bmdr_minsz = xfs_bmdr_space_calc(1);
1141         unsigned int            lit_sz = XFS_LITINO(sc->mp);
1142         unsigned int            afork_min, dfork_min;
1143
1144         trace_xrep_dinode_ensure_forkoff(sc, dip);
1145
1146         /*
1147          * Before calling this function, xrep_dinode_core ensured that both
1148          * forks actually fit inside their respective literal areas.  If this
1149          * was not the case, the fork was reset to FMT_EXTENTS with zero
1150          * records.  If the rmapbt scan found attr or data fork blocks, this
1151          * will be noted in the dinode_stats, and we must leave enough room
1152          * for the bmap repair code to reconstruct the mapping structure.
1153          *
1154          * First, compute the minimum space required for the attr fork.
1155          */
1156         switch (dip->di_aformat) {
1157         case XFS_DINODE_FMT_LOCAL:
1158                 /*
1159                  * If we still have a shortform xattr structure at all, that
1160                  * means the attr fork area was exactly large enough to fit
1161                  * the sf structure.
1162                  */
1163                 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1164                 break;
1165         case XFS_DINODE_FMT_EXTENTS:
1166                 attr_extents = xfs_dfork_attr_extents(dip);
1167                 if (attr_extents) {
1168                         /*
1169                          * We must maintain sufficient space to hold the entire
1170                          * extent map array in the data fork.  Note that we
1171                          * previously zapped the fork if it had no chance of
1172                          * fitting in the inode.
1173                          */
1174                         afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1175                 } else if (ri->attr_extents > 0) {
1176                         /*
1177                          * The attr fork thinks it has zero extents, but we
1178                          * found some xattr extents.  We need to leave enough
1179                          * empty space here so that the incore attr fork will
1180                          * get created (and hence trigger the attr fork bmap
1181                          * repairer).
1182                          */
1183                         afork_min = bmdr_minsz;
1184                 } else {
1185                         /* No extents on disk or found in rmapbt. */
1186                         afork_min = 0;
1187                 }
1188                 break;
1189         case XFS_DINODE_FMT_BTREE:
1190                 /* Must have space for btree header and key/pointers. */
1191                 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1192                 afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1193                 break;
1194         default:
1195                 /* We should never see any other formats. */
1196                 afork_min = 0;
1197                 break;
1198         }
1199
1200         /* Compute the minimum space required for the data fork. */
1201         switch (dip->di_format) {
1202         case XFS_DINODE_FMT_DEV:
1203                 dfork_min = sizeof(__be32);
1204                 break;
1205         case XFS_DINODE_FMT_UUID:
1206                 dfork_min = sizeof(uuid_t);
1207                 break;
1208         case XFS_DINODE_FMT_LOCAL:
1209                 /*
1210                  * If we still have a shortform data fork at all, that means
1211                  * the data fork area was large enough to fit whatever was in
1212                  * there.
1213                  */
1214                 dfork_min = be64_to_cpu(dip->di_size);
1215                 break;
1216         case XFS_DINODE_FMT_EXTENTS:
1217                 data_extents = xfs_dfork_data_extents(dip);
1218                 if (data_extents) {
1219                         /*
1220                          * We must maintain sufficient space to hold the entire
1221                          * extent map array in the data fork.  Note that we
1222                          * previously zapped the fork if it had no chance of
1223                          * fitting in the inode.
1224                          */
1225                         dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1226                 } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1227                         /*
1228                          * The data fork thinks it has zero extents, but we
1229                          * found some data extents.  We need to leave enough
1230                          * empty space here so that the data fork bmap repair
1231                          * will recover the mappings.
1232                          */
1233                         dfork_min = bmdr_minsz;
1234                 } else {
1235                         /* No extents on disk or found in rmapbt. */
1236                         dfork_min = 0;
1237                 }
1238                 break;
1239         case XFS_DINODE_FMT_BTREE:
1240                 /* Must have space for btree header and key/pointers. */
1241                 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1242                 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1243                 break;
1244         default:
1245                 dfork_min = 0;
1246                 break;
1247         }
1248
1249         /*
1250          * Round all values up to the nearest 8 bytes, because that is the
1251          * precision of di_forkoff.
1252          */
1253         afork_min = roundup(afork_min, 8);
1254         dfork_min = roundup(dfork_min, 8);
1255         bmdr_minsz = roundup(bmdr_minsz, 8);
1256
1257         ASSERT(dfork_min <= lit_sz);
1258         ASSERT(afork_min <= lit_sz);
1259
1260         /*
1261          * If the data fork was zapped and we don't have enough space for the
1262          * recovery fork, move the attr fork up.
1263          */
1264         if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1265             xfs_dfork_data_extents(dip) == 0 &&
1266             (ri->data_extents > 0 || ri->rt_extents > 0) &&
1267             bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1268                 if (bmdr_minsz + afork_min > lit_sz) {
1269                         /*
1270                          * The attr for and the stub fork we need to recover
1271                          * the data fork won't both fit.  Zap the attr fork.
1272                          */
1273                         xrep_dinode_zap_afork(ri, dip, mode);
1274                         afork_min = bmdr_minsz;
1275                 } else {
1276                         void    *before, *after;
1277
1278                         /* Otherwise, just slide the attr fork up. */
1279                         before = XFS_DFORK_APTR(dip);
1280                         dip->di_forkoff = bmdr_minsz >> 3;
1281                         after = XFS_DFORK_APTR(dip);
1282                         memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1283                 }
1284         }
1285
1286         /*
1287          * If the attr fork was zapped and we don't have enough space for the
1288          * recovery fork, move the attr fork down.
1289          */
1290         if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1291             xfs_dfork_attr_extents(dip) == 0 &&
1292             ri->attr_extents > 0 &&
1293             bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1294                 if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1295                         /*
1296                          * If the data fork is in btree format then we can't
1297                          * adjust forkoff because that runs the risk of
1298                          * violating the extents/btree format transition rules.
1299                          */
1300                 } else if (bmdr_minsz + dfork_min > lit_sz) {
1301                         /*
1302                          * If we can't move the attr fork, too bad, we lose the
1303                          * attr fork and leak its blocks.
1304                          */
1305                         xrep_dinode_zap_afork(ri, dip, mode);
1306                 } else {
1307                         /*
1308                          * Otherwise, just slide the attr fork down.  The attr
1309                          * fork is empty, so we don't have any old contents to
1310                          * move here.
1311                          */
1312                         dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1313                 }
1314         }
1315 }
1316
1317 /*
1318  * Zap the data/attr forks if we spot anything that isn't going to pass the
1319  * ifork verifiers or the ifork formatters, because we need to get the inode
1320  * into good enough shape that the higher level repair functions can run.
1321  */
1322 STATIC void
1323 xrep_dinode_zap_forks(
1324         struct xrep_inode       *ri,
1325         struct xfs_dinode       *dip)
1326 {
1327         struct xfs_scrub        *sc = ri->sc;
1328         xfs_extnum_t            data_extents;
1329         xfs_extnum_t            attr_extents;
1330         xfs_filblks_t           nblocks;
1331         uint16_t                mode;
1332         bool                    zap_datafork = false;
1333         bool                    zap_attrfork = ri->zap_acls;
1334
1335         trace_xrep_dinode_zap_forks(sc, dip);
1336
1337         mode = be16_to_cpu(dip->di_mode);
1338
1339         data_extents = xfs_dfork_data_extents(dip);
1340         attr_extents = xfs_dfork_attr_extents(dip);
1341         nblocks = be64_to_cpu(dip->di_nblocks);
1342
1343         /* Inode counters don't make sense? */
1344         if (data_extents > nblocks)
1345                 zap_datafork = true;
1346         if (attr_extents > nblocks)
1347                 zap_attrfork = true;
1348         if (data_extents + attr_extents > nblocks)
1349                 zap_datafork = zap_attrfork = true;
1350
1351         if (!zap_datafork)
1352                 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1353         if (!zap_attrfork)
1354                 zap_attrfork = xrep_dinode_check_afork(sc, dip);
1355
1356         /* Zap whatever's bad. */
1357         if (zap_attrfork)
1358                 xrep_dinode_zap_afork(ri, dip, mode);
1359         if (zap_datafork)
1360                 xrep_dinode_zap_dfork(ri, dip, mode);
1361         xrep_dinode_ensure_forkoff(ri, dip, mode);
1362
1363         /*
1364          * Zero di_nblocks if we don't have any extents at all to satisfy the
1365          * buffer verifier.
1366          */
1367         data_extents = xfs_dfork_data_extents(dip);
1368         attr_extents = xfs_dfork_attr_extents(dip);
1369         if (data_extents + attr_extents == 0)
1370                 dip->di_nblocks = 0;
1371 }
1372
1373 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1374 STATIC int
1375 xrep_dinode_core(
1376         struct xrep_inode       *ri)
1377 {
1378         struct xfs_scrub        *sc = ri->sc;
1379         struct xfs_buf          *bp;
1380         struct xfs_dinode       *dip;
1381         xfs_ino_t               ino = sc->sm->sm_ino;
1382         int                     error;
1383         int                     iget_error;
1384
1385         /* Figure out what this inode had mapped in both forks. */
1386         error = xrep_dinode_count_rmaps(ri);
1387         if (error)
1388                 return error;
1389
1390         /* Read the inode cluster buffer. */
1391         error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1392                         ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1393                         NULL);
1394         if (error)
1395                 return error;
1396
1397         /* Make sure we can pass the inode buffer verifier. */
1398         xrep_dinode_buf(sc, bp);
1399         bp->b_ops = &xfs_inode_buf_ops;
1400
1401         /* Fix everything the verifier will complain about. */
1402         dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1403         xrep_dinode_header(sc, dip);
1404         iget_error = xrep_dinode_mode(ri, dip);
1405         if (iget_error)
1406                 goto write;
1407         xrep_dinode_nlinks(dip);
1408         xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1409         xrep_dinode_size(ri, dip);
1410         xrep_dinode_extsize_hints(sc, dip);
1411         xrep_dinode_zap_forks(ri, dip);
1412
1413 write:
1414         /* Write out the inode. */
1415         trace_xrep_dinode_fixed(sc, dip);
1416         xfs_dinode_calc_crc(sc->mp, dip);
1417         xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1418         xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1419                         ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1420
1421         /*
1422          * In theory, we've fixed the ondisk inode record enough that we should
1423          * be able to load the inode into the cache.  Try to iget that inode
1424          * now while we hold the AGI and the inode cluster buffer and take the
1425          * IOLOCK so that we can continue with repairs without anyone else
1426          * accessing the inode.  If iget fails, we still need to commit the
1427          * changes.
1428          */
1429         if (!iget_error)
1430                 iget_error = xchk_iget(sc, ino, &sc->ip);
1431         if (!iget_error)
1432                 xchk_ilock(sc, XFS_IOLOCK_EXCL);
1433
1434         /*
1435          * Commit the inode cluster buffer updates and drop the AGI buffer that
1436          * we've been holding since scrub setup.  From here on out, repairs
1437          * deal only with the cached inode.
1438          */
1439         error = xrep_trans_commit(sc);
1440         if (error)
1441                 return error;
1442
1443         if (iget_error)
1444                 return iget_error;
1445
1446         error = xchk_trans_alloc(sc, 0);
1447         if (error)
1448                 return error;
1449
1450         error = xrep_ino_dqattach(sc);
1451         if (error)
1452                 return error;
1453
1454         xchk_ilock(sc, XFS_ILOCK_EXCL);
1455         if (ri->ino_sick_mask)
1456                 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1457         return 0;
1458 }
1459
1460 /* Fix everything xfs_dinode_verify cares about. */
1461 STATIC int
1462 xrep_dinode_problems(
1463         struct xrep_inode       *ri)
1464 {
1465         struct xfs_scrub        *sc = ri->sc;
1466         int                     error;
1467
1468         error = xrep_dinode_core(ri);
1469         if (error)
1470                 return error;
1471
1472         /* We had to fix a totally busted inode, schedule quotacheck. */
1473         if (XFS_IS_UQUOTA_ON(sc->mp))
1474                 xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1475         if (XFS_IS_GQUOTA_ON(sc->mp))
1476                 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1477         if (XFS_IS_PQUOTA_ON(sc->mp))
1478                 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1479
1480         return 0;
1481 }
1482
1483 /*
1484  * Fix problems that the verifiers don't care about.  In general these are
1485  * errors that don't cause problems elsewhere in the kernel that we can easily
1486  * detect, so we don't check them all that rigorously.
1487  */
1488
1489 /* Make sure block and extent counts are ok. */
1490 STATIC int
1491 xrep_inode_blockcounts(
1492         struct xfs_scrub        *sc)
1493 {
1494         struct xfs_ifork        *ifp;
1495         xfs_filblks_t           count;
1496         xfs_filblks_t           acount;
1497         xfs_extnum_t            nextents;
1498         int                     error;
1499
1500         trace_xrep_inode_blockcounts(sc);
1501
1502         /* Set data fork counters from the data fork mappings. */
1503         error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1504                         &nextents, &count);
1505         if (error)
1506                 return error;
1507         if (xfs_is_reflink_inode(sc->ip)) {
1508                 /*
1509                  * data fork blockcount can exceed physical storage if a user
1510                  * reflinks the same block over and over again.
1511                  */
1512                 ;
1513         } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1514                 if (count >= sc->mp->m_sb.sb_rblocks)
1515                         return -EFSCORRUPTED;
1516         } else {
1517                 if (count >= sc->mp->m_sb.sb_dblocks)
1518                         return -EFSCORRUPTED;
1519         }
1520         error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1521         if (error)
1522                 return error;
1523         sc->ip->i_df.if_nextents = nextents;
1524
1525         /* Set attr fork counters from the attr fork mappings. */
1526         ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1527         if (ifp) {
1528                 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1529                                 &nextents, &acount);
1530                 if (error)
1531                         return error;
1532                 if (count >= sc->mp->m_sb.sb_dblocks)
1533                         return -EFSCORRUPTED;
1534                 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1535                                 nextents);
1536                 if (error)
1537                         return error;
1538                 ifp->if_nextents = nextents;
1539         } else {
1540                 acount = 0;
1541         }
1542
1543         sc->ip->i_nblocks = count + acount;
1544         return 0;
1545 }
1546
1547 /* Check for invalid uid/gid/prid. */
1548 STATIC void
1549 xrep_inode_ids(
1550         struct xfs_scrub        *sc)
1551 {
1552         bool                    dirty = false;
1553
1554         trace_xrep_inode_ids(sc);
1555
1556         if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1557                 i_uid_write(VFS_I(sc->ip), 0);
1558                 dirty = true;
1559                 if (XFS_IS_UQUOTA_ON(sc->mp))
1560                         xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1561         }
1562
1563         if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1564                 i_gid_write(VFS_I(sc->ip), 0);
1565                 dirty = true;
1566                 if (XFS_IS_GQUOTA_ON(sc->mp))
1567                         xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1568         }
1569
1570         if (sc->ip->i_projid == -1U) {
1571                 sc->ip->i_projid = 0;
1572                 dirty = true;
1573                 if (XFS_IS_PQUOTA_ON(sc->mp))
1574                         xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1575         }
1576
1577         /* strip setuid/setgid if we touched any of the ids */
1578         if (dirty)
1579                 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1580 }
1581
1582 static inline void
1583 xrep_clamp_timestamp(
1584         struct xfs_inode        *ip,
1585         struct timespec64       *ts)
1586 {
1587         ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1588         *ts = timestamp_truncate(*ts, VFS_I(ip));
1589 }
1590
1591 /* Nanosecond counters can't have more than 1 billion. */
1592 STATIC void
1593 xrep_inode_timestamps(
1594         struct xfs_inode        *ip)
1595 {
1596         struct timespec64       tstamp;
1597         struct inode            *inode = VFS_I(ip);
1598
1599         tstamp = inode_get_atime(inode);
1600         xrep_clamp_timestamp(ip, &tstamp);
1601         inode_set_atime_to_ts(inode, tstamp);
1602
1603         tstamp = inode_get_mtime(inode);
1604         xrep_clamp_timestamp(ip, &tstamp);
1605         inode_set_mtime_to_ts(inode, tstamp);
1606
1607         tstamp = inode_get_ctime(inode);
1608         xrep_clamp_timestamp(ip, &tstamp);
1609         inode_set_ctime_to_ts(inode, tstamp);
1610
1611         xrep_clamp_timestamp(ip, &ip->i_crtime);
1612 }
1613
1614 /* Fix inode flags that don't make sense together. */
1615 STATIC void
1616 xrep_inode_flags(
1617         struct xfs_scrub        *sc)
1618 {
1619         uint16_t                mode;
1620
1621         trace_xrep_inode_flags(sc);
1622
1623         mode = VFS_I(sc->ip)->i_mode;
1624
1625         /* Clear junk flags */
1626         if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1627                 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1628
1629         /* NEWRTBM only applies to realtime bitmaps */
1630         if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1631                 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1632         else
1633                 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1634
1635         /* These only make sense for directories. */
1636         if (!S_ISDIR(mode))
1637                 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1638                                           XFS_DIFLAG_EXTSZINHERIT |
1639                                           XFS_DIFLAG_PROJINHERIT |
1640                                           XFS_DIFLAG_NOSYMLINKS);
1641
1642         /* These only make sense for files. */
1643         if (!S_ISREG(mode))
1644                 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1645                                           XFS_DIFLAG_EXTSIZE);
1646
1647         /* These only make sense for non-rt files. */
1648         if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1649                 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1650
1651         /* Immutable and append only?  Drop the append. */
1652         if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1653             (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1654                 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1655
1656         /* Clear junk flags. */
1657         if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1658                 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1659
1660         /* No reflink flag unless we support it and it's a file. */
1661         if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1662                 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1663
1664         /* DAX only applies to files and dirs. */
1665         if (!(S_ISREG(mode) || S_ISDIR(mode)))
1666                 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1667
1668         /* No reflink files on the realtime device. */
1669         if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1670                 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1671 }
1672
1673 /*
1674  * Fix size problems with block/node format directories.  If we fail to find
1675  * the extent list, just bail out and let the bmapbtd repair functions clean
1676  * up that mess.
1677  */
1678 STATIC void
1679 xrep_inode_blockdir_size(
1680         struct xfs_scrub        *sc)
1681 {
1682         struct xfs_iext_cursor  icur;
1683         struct xfs_bmbt_irec    got;
1684         struct xfs_ifork        *ifp;
1685         xfs_fileoff_t           off;
1686         int                     error;
1687
1688         trace_xrep_inode_blockdir_size(sc);
1689
1690         error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1691         if (error)
1692                 return;
1693
1694         /* Find the last block before 32G; this is the dir size. */
1695         ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1696         off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1697         if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1698                 /* zero-extents directory? */
1699                 return;
1700         }
1701
1702         off = got.br_startoff + got.br_blockcount;
1703         sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1704                         XFS_FSB_TO_B(sc->mp, off));
1705 }
1706
1707 /* Fix size problems with short format directories. */
1708 STATIC void
1709 xrep_inode_sfdir_size(
1710         struct xfs_scrub        *sc)
1711 {
1712         struct xfs_ifork        *ifp;
1713
1714         trace_xrep_inode_sfdir_size(sc);
1715
1716         ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1717         sc->ip->i_disk_size = ifp->if_bytes;
1718 }
1719
1720 /*
1721  * Fix any irregularities in a directory inode's size now that we can iterate
1722  * extent maps and access other regular inode data.
1723  */
1724 STATIC void
1725 xrep_inode_dir_size(
1726         struct xfs_scrub        *sc)
1727 {
1728         trace_xrep_inode_dir_size(sc);
1729
1730         switch (sc->ip->i_df.if_format) {
1731         case XFS_DINODE_FMT_EXTENTS:
1732         case XFS_DINODE_FMT_BTREE:
1733                 xrep_inode_blockdir_size(sc);
1734                 break;
1735         case XFS_DINODE_FMT_LOCAL:
1736                 xrep_inode_sfdir_size(sc);
1737                 break;
1738         }
1739 }
1740
1741 /* Fix extent size hint problems. */
1742 STATIC void
1743 xrep_inode_extsize(
1744         struct xfs_scrub        *sc)
1745 {
1746         /* Fix misaligned extent size hints on a directory. */
1747         if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1748             (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1749             xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1750                 sc->ip->i_extsize = 0;
1751                 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1752         }
1753 }
1754
1755 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1756 STATIC int
1757 xrep_inode_pptr(
1758         struct xfs_scrub        *sc)
1759 {
1760         struct xfs_mount        *mp = sc->mp;
1761         struct xfs_inode        *ip = sc->ip;
1762         struct inode            *inode = VFS_I(ip);
1763
1764         if (!xfs_has_parent(mp))
1765                 return 0;
1766
1767         /*
1768          * Unlinked inodes that cannot be added to the directory tree will not
1769          * have a parent pointer.
1770          */
1771         if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1772                 return 0;
1773
1774         /* Children of the superblock do not have parent pointers. */
1775         if (xchk_inode_is_sb_rooted(ip))
1776                 return 0;
1777
1778         /* Inode already has an attr fork; no further work possible here. */
1779         if (xfs_inode_has_attr_fork(ip))
1780                 return 0;
1781
1782         return xfs_bmap_add_attrfork(sc->tp, ip,
1783                         sizeof(struct xfs_attr_sf_hdr), true);
1784 }
1785
1786 /* Fix any irregularities in an inode that the verifiers don't catch. */
1787 STATIC int
1788 xrep_inode_problems(
1789         struct xfs_scrub        *sc)
1790 {
1791         int                     error;
1792
1793         error = xrep_inode_blockcounts(sc);
1794         if (error)
1795                 return error;
1796         error = xrep_inode_pptr(sc);
1797         if (error)
1798                 return error;
1799         xrep_inode_timestamps(sc->ip);
1800         xrep_inode_flags(sc);
1801         xrep_inode_ids(sc);
1802         /*
1803          * We can now do a better job fixing the size of a directory now that
1804          * we can scan the data fork extents than we could in xrep_dinode_size.
1805          */
1806         if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1807                 xrep_inode_dir_size(sc);
1808         xrep_inode_extsize(sc);
1809
1810         trace_xrep_inode_fixed(sc);
1811         xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1812         return xrep_roll_trans(sc);
1813 }
1814
1815 /*
1816  * Make sure this inode's unlinked list pointers are consistent with its
1817  * link count.
1818  */
1819 STATIC int
1820 xrep_inode_unlinked(
1821         struct xfs_scrub        *sc)
1822 {
1823         unsigned int            nlink = VFS_I(sc->ip)->i_nlink;
1824         int                     error;
1825
1826         /*
1827          * If this inode is linked from the directory tree and on the unlinked
1828          * list, remove it from the unlinked list.
1829          */
1830         if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
1831                 struct xfs_perag        *pag;
1832                 int                     error;
1833
1834                 pag = xfs_perag_get(sc->mp,
1835                                 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
1836                 error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
1837                 xfs_perag_put(pag);
1838                 if (error)
1839                         return error;
1840         }
1841
1842         /*
1843          * If this inode is not linked from the directory tree yet not on the
1844          * unlinked list, put it on the unlinked list.
1845          */
1846         if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
1847                 error = xfs_iunlink(sc->tp, sc->ip);
1848                 if (error)
1849                         return error;
1850         }
1851
1852         return 0;
1853 }
1854
1855 /* Repair an inode's fields. */
1856 int
1857 xrep_inode(
1858         struct xfs_scrub        *sc)
1859 {
1860         int                     error = 0;
1861
1862         /*
1863          * No inode?  That means we failed the _iget verifiers.  Repair all
1864          * the things that the inode verifiers care about, then retry _iget.
1865          */
1866         if (!sc->ip) {
1867                 struct xrep_inode       *ri = sc->buf;
1868
1869                 ASSERT(ri != NULL);
1870
1871                 error = xrep_dinode_problems(ri);
1872                 if (error == -EBUSY) {
1873                         /*
1874                          * Directory scan to recover inode mode encountered a
1875                          * busy inode, so we did not continue repairing things.
1876                          */
1877                         return 0;
1878                 }
1879                 if (error)
1880                         return error;
1881
1882                 /* By this point we had better have a working incore inode. */
1883                 if (!sc->ip)
1884                         return -EFSCORRUPTED;
1885         }
1886
1887         xfs_trans_ijoin(sc->tp, sc->ip, 0);
1888
1889         /* If we found corruption of any kind, try to fix it. */
1890         if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1891             (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1892                 error = xrep_inode_problems(sc);
1893                 if (error)
1894                         return error;
1895         }
1896
1897         /* See if we can clear the reflink flag. */
1898         if (xfs_is_reflink_inode(sc->ip)) {
1899                 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1900                 if (error)
1901                         return error;
1902         }
1903
1904         /* Reconnect incore unlinked list */
1905         error = xrep_inode_unlinked(sc);
1906         if (error)
1907                 return error;
1908
1909         return xrep_defer_finish(sc);
1910 }