fs/xfs/libxfs/xfs_exchmaps.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <djwong@kernel.org>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_shared.h"
   9 #include "xfs_format.h"
  10 #include "xfs_log_format.h"
  11 #include "xfs_trans_resv.h"
  12 #include "xfs_mount.h"
  13 #include "xfs_defer.h"
  14 #include "xfs_inode.h"
  15 #include "xfs_trans.h"
  16 #include "xfs_bmap.h"
  17 #include "xfs_icache.h"
  18 #include "xfs_quota.h"
  19 #include "xfs_exchmaps.h"
  20 #include "xfs_trace.h"
  21 #include "xfs_bmap_btree.h"
  22 #include "xfs_trans_space.h"
  23 #include "xfs_error.h"
  24 #include "xfs_errortag.h"
  25 #include "xfs_health.h"
  26 #include "xfs_exchmaps_item.h"
  27 #include "xfs_da_format.h"
  28 #include "xfs_da_btree.h"
  29 #include "xfs_attr_leaf.h"
  30 #include "xfs_attr.h"
  31 #include "xfs_dir2_priv.h"
  32 #include "xfs_dir2.h"
  33 #include "xfs_symlink_remote.h"
  34
  35 struct kmem_cache       *xfs_exchmaps_intent_cache;
  36
  37 /* bmbt mappings adjacent to a pair of records. */
  38 struct xfs_exchmaps_adjacent {
  39         struct xfs_bmbt_irec            left1;
  40         struct xfs_bmbt_irec            right1;
  41         struct xfs_bmbt_irec            left2;
  42         struct xfs_bmbt_irec            right2;
  43 };
  44
  45 #define ADJACENT_INIT { \
  46         .left1  = { .br_startblock = HOLESTARTBLOCK }, \
  47         .right1 = { .br_startblock = HOLESTARTBLOCK }, \
  48         .left2  = { .br_startblock = HOLESTARTBLOCK }, \
  49         .right2 = { .br_startblock = HOLESTARTBLOCK }, \
  50 }
  51
  52 /* Information to reset reflink flag / CoW fork state after an exchange. */
  53
  54 /*
  55  * If the reflink flag is set on either inode, make sure it has an incore CoW
  56  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
  57  * has mappings in it, make sure the inodes are tagged appropriately so that
  58  * speculative preallocations can be GC'd if we run low of space.
  59  */
  60 static inline void
  61 xfs_exchmaps_ensure_cowfork(
  62         struct xfs_inode        *ip)
  63 {
  64         struct xfs_ifork        *cfork;
  65
  66         if (xfs_is_reflink_inode(ip))
  67                 xfs_ifork_init_cow(ip);
  68
  69         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
  70         if (!cfork)
  71                 return;
  72         if (cfork->if_bytes > 0)
  73                 xfs_inode_set_cowblocks_tag(ip);
  74         else
  75                 xfs_inode_clear_cowblocks_tag(ip);
  76 }
  77
  78 /*
  79  * Adjust the on-disk inode size upwards if needed so that we never add
  80  * mappings into the file past EOF.  This is crucial so that log recovery won't
  81  * get confused by the sudden appearance of post-eof mappings.
  82  */
  83 STATIC void
  84 xfs_exchmaps_update_size(
  85         struct xfs_trans        *tp,
  86         struct xfs_inode        *ip,
  87         struct xfs_bmbt_irec    *imap,
  88         xfs_fsize_t             new_isize)
  89 {
  90         struct xfs_mount        *mp = tp->t_mountp;
  91         xfs_fsize_t             len;
  92
  93         if (new_isize < 0)
  94                 return;
  95
  96         len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
  97                   new_isize);
  98
  99         if (len <= ip->i_disk_size)
 100                 return;
 101
 102         trace_xfs_exchmaps_update_inode_size(ip, len);
 103
 104         ip->i_disk_size = len;
 105         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 106 }
 107
 108 /* Advance the incore state tracking after exchanging a mapping. */
 109 static inline void
 110 xmi_advance(
 111         struct xfs_exchmaps_intent      *xmi,
 112         const struct xfs_bmbt_irec      *irec)
 113 {
 114         xmi->xmi_startoff1 += irec->br_blockcount;
 115         xmi->xmi_startoff2 += irec->br_blockcount;
 116         xmi->xmi_blockcount -= irec->br_blockcount;
 117 }
 118
 119 /* Do we still have more mappings to exchange? */
 120 static inline bool
 121 xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
 122 {
 123         return xmi->xmi_blockcount > 0;
 124 }
 125
 126 /* Do we have post-operation cleanups to perform? */
 127 static inline bool
 128 xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
 129 {
 130         return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
 131                                  XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
 132                                  __XFS_EXCHMAPS_INO2_SHORTFORM);
 133 }
 134
 135 /* Check all mappings to make sure we can actually exchange them. */
 136 int
 137 xfs_exchmaps_check_forks(
 138         struct xfs_mount                *mp,
 139         const struct xfs_exchmaps_req   *req)
 140 {
 141         struct xfs_ifork                *ifp1, *ifp2;
 142         int                             whichfork = xfs_exchmaps_reqfork(req);
 143
 144         /* No fork? */
 145         ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
 146         ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
 147         if (!ifp1 || !ifp2)
 148                 return -EINVAL;
 149
 150         /* We don't know how to exchange local format forks. */
 151         if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
 152             ifp2->if_format == XFS_DINODE_FMT_LOCAL)
 153                 return -EINVAL;
 154
 155         return 0;
 156 }
 157
 158 #ifdef CONFIG_XFS_QUOTA
 159 /* Log the actual updates to the quota accounting. */
 160 static inline void
 161 xfs_exchmaps_update_quota(
 162         struct xfs_trans                *tp,
 163         struct xfs_exchmaps_intent      *xmi,
 164         struct xfs_bmbt_irec            *irec1,
 165         struct xfs_bmbt_irec            *irec2)
 166 {
 167         int64_t                         ip1_delta = 0, ip2_delta = 0;
 168         unsigned int                    qflag;
 169
 170         qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
 171                                                       XFS_TRANS_DQ_BCOUNT;
 172
 173         if (xfs_bmap_is_real_extent(irec1)) {
 174                 ip1_delta -= irec1->br_blockcount;
 175                 ip2_delta += irec1->br_blockcount;
 176         }
 177
 178         if (xfs_bmap_is_real_extent(irec2)) {
 179                 ip1_delta += irec2->br_blockcount;
 180                 ip2_delta -= irec2->br_blockcount;
 181         }
 182
 183         xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
 184         xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
 185 }
 186 #else
 187 # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2)       ((void)0)
 188 #endif
 189
 190 /* Decide if we want to skip this mapping from file1. */
 191 static inline bool
 192 xfs_exchmaps_can_skip_mapping(
 193         struct xfs_exchmaps_intent      *xmi,
 194         struct xfs_bmbt_irec            *irec)
 195 {
 196         struct xfs_mount                *mp = xmi->xmi_ip1->i_mount;
 197
 198         /* Do not skip this mapping if the caller did not tell us to. */
 199         if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
 200                 return false;
 201
 202         /* Do not skip mapped, written mappings. */
 203         if (xfs_bmap_is_written_extent(irec))
 204                 return false;
 205
 206         /*
 207          * The mapping is unwritten or a hole.  It cannot be a delalloc
 208          * reservation because we already excluded those.  It cannot be an
 209          * unwritten extent with dirty page cache because we flushed the page
 210          * cache.  For files where the allocation unit is 1FSB (files on the
 211          * data dev, rt files if the extent size is 1FSB), we can safely
 212          * skip this mapping.
 213          */
 214         if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
 215                 return true;
 216
 217         /*
 218          * For a realtime file with a multi-fsb allocation unit, the decision
 219          * is trickier because we can only swap full allocation units.
 220          * Unwritten mappings can appear in the middle of an rtx if the rtx is
 221          * partially written, but they can also appear for preallocations.
 222          *
 223          * If the mapping is a hole, skip it entirely.  Holes should align with
 224          * rtx boundaries.
 225          */
 226         if (!xfs_bmap_is_real_extent(irec))
 227                 return true;
 228
 229         /*
 230          * All mappings below this point are unwritten.
 231          *
 232          * - If the beginning is not aligned to an rtx, trim the end of the
 233          *   mapping so that it does not cross an rtx boundary, and swap it.
 234          *
 235          * - If both ends are aligned to an rtx, skip the entire mapping.
 236          */
 237         if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
 238                 xfs_fileoff_t   new_end;
 239
 240                 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
 241                 irec->br_blockcount = min(irec->br_blockcount,
 242                                           new_end - irec->br_startoff);
 243                 return false;
 244         }
 245         if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
 246                 return true;
 247
 248         /*
 249          * All mappings below this point are unwritten, start on an rtx
 250          * boundary, and do not end on an rtx boundary.
 251          *
 252          * - If the mapping is longer than one rtx, trim the end of the mapping
 253          *   down to an rtx boundary and skip it.
 254          *
 255          * - The mapping is shorter than one rtx.  Swap it.
 256          */
 257         if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
 258                 xfs_fileoff_t   new_end;
 259
 260                 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
 261                                 mp->m_sb.sb_rextsize);
 262                 irec->br_blockcount = new_end - irec->br_startoff;
 263                 return true;
 264         }
 265
 266         return false;
 267 }
 268
 269 /*
 270  * Walk forward through the file ranges in @xmi until we find two different
 271  * mappings to exchange.  If there is work to do, return the mappings;
 272  * otherwise we've reached the end of the range and xmi_blockcount will be
 273  * zero.
 274  *
 275  * If the walk skips over a pair of mappings to the same storage, save them as
 276  * the left records in @adj (if provided) so that the simulation phase can
 277  * avoid an extra lookup.
 278   */
 279 static int
 280 xfs_exchmaps_find_mappings(
 281         struct xfs_exchmaps_intent      *xmi,
 282         struct xfs_bmbt_irec            *irec1,
 283         struct xfs_bmbt_irec            *irec2,
 284         struct xfs_exchmaps_adjacent    *adj)
 285 {
 286         int                             nimaps;
 287         int                             bmap_flags;
 288         int                             error;
 289
 290         bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
 291
 292         for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
 293                 /* Read mapping from the first file */
 294                 nimaps = 1;
 295                 error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
 296                                 xmi->xmi_blockcount, irec1, &nimaps,
 297                                 bmap_flags);
 298                 if (error)
 299                         return error;
 300                 if (nimaps != 1 ||
 301                     irec1->br_startblock == DELAYSTARTBLOCK ||
 302                     irec1->br_startoff != xmi->xmi_startoff1) {
 303                         /*
 304                          * We should never get no mapping or a delalloc mapping
 305                          * or something that doesn't match what we asked for,
 306                          * since the caller flushed both inodes and we hold the
 307                          * ILOCKs for both inodes.
 308                          */
 309                         ASSERT(0);
 310                         return -EINVAL;
 311                 }
 312
 313                 if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
 314                         trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
 315                         continue;
 316                 }
 317
 318                 /* Read mapping from the second file */
 319                 nimaps = 1;
 320                 error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
 321                                 irec1->br_blockcount, irec2, &nimaps,
 322                                 bmap_flags);
 323                 if (error)
 324                         return error;
 325                 if (nimaps != 1 ||
 326                     irec2->br_startblock == DELAYSTARTBLOCK ||
 327                     irec2->br_startoff != xmi->xmi_startoff2) {
 328                         /*
 329                          * We should never get no mapping or a delalloc mapping
 330                          * or something that doesn't match what we asked for,
 331                          * since the caller flushed both inodes and we hold the
 332                          * ILOCKs for both inodes.
 333                          */
 334                         ASSERT(0);
 335                         return -EINVAL;
 336                 }
 337
 338                 /*
 339                  * We can only exchange as many blocks as the smaller of the
 340                  * two mapping maps.
 341                  */
 342                 irec1->br_blockcount = min(irec1->br_blockcount,
 343                                            irec2->br_blockcount);
 344
 345                 trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
 346                 trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
 347
 348                 /* We found something to exchange, so return it. */
 349                 if (irec1->br_startblock != irec2->br_startblock)
 350                         return 0;
 351
 352                 /*
 353                  * Two mappings pointing to the same physical block must not
 354                  * have different states; that's filesystem corruption.  Move
 355                  * on to the next mapping if they're both holes or both point
 356                  * to the same physical space extent.
 357                  */
 358                 if (irec1->br_state != irec2->br_state) {
 359                         xfs_bmap_mark_sick(xmi->xmi_ip1,
 360                                         xfs_exchmaps_whichfork(xmi));
 361                         xfs_bmap_mark_sick(xmi->xmi_ip2,
 362                                         xfs_exchmaps_whichfork(xmi));
 363                         return -EFSCORRUPTED;
 364                 }
 365
 366                 /*
 367                  * Save the mappings if we're estimating work and skipping
 368                  * these identical mappings.
 369                  */
 370                 if (adj) {
 371                         memcpy(&adj->left1, irec1, sizeof(*irec1));
 372                         memcpy(&adj->left2, irec2, sizeof(*irec2));
 373                 }
 374         }
 375
 376         return 0;
 377 }
 378
 379 /* Exchange these two mappings. */
 380 static void
 381 xfs_exchmaps_one_step(
 382         struct xfs_trans                *tp,
 383         struct xfs_exchmaps_intent      *xmi,
 384         struct xfs_bmbt_irec            *irec1,
 385         struct xfs_bmbt_irec            *irec2)
 386 {
 387         int                             whichfork = xfs_exchmaps_whichfork(xmi);
 388
 389         xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
 390
 391         /* Remove both mappings. */
 392         xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
 393         xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
 394
 395         /*
 396          * Re-add both mappings.  We exchange the file offsets between the two
 397          * maps and add the opposite map, which has the effect of filling the
 398          * logical offsets we just unmapped, but with with the physical mapping
 399          * information exchanged.
 400          */
 401         swap(irec1->br_startoff, irec2->br_startoff);
 402         xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
 403         xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
 404
 405         /* Make sure we're not adding mappings past EOF. */
 406         if (whichfork == XFS_DATA_FORK) {
 407                 xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
 408                                 xmi->xmi_isize1);
 409                 xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
 410                                 xmi->xmi_isize2);
 411         }
 412
 413         /*
 414          * Advance our cursor and exit.   The caller (either defer ops or log
 415          * recovery) will log the XMD item, and if *blockcount is nonzero, it
 416          * will log a new XMI item for the remainder and call us back.
 417          */
 418         xmi_advance(xmi, irec1);
 419 }
 420
 421 /* Convert inode2's leaf attr fork back to shortform, if possible.. */
 422 STATIC int
 423 xfs_exchmaps_attr_to_sf(
 424         struct xfs_trans                *tp,
 425         struct xfs_exchmaps_intent      *xmi)
 426 {
 427         struct xfs_da_args      args = {
 428                 .dp             = xmi->xmi_ip2,
 429                 .geo            = tp->t_mountp->m_attr_geo,
 430                 .whichfork      = XFS_ATTR_FORK,
 431                 .trans          = tp,
 432                 .owner          = xmi->xmi_ip2->i_ino,
 433         };
 434         struct xfs_buf          *bp;
 435         int                     forkoff;
 436         int                     error;
 437
 438         if (!xfs_attr_is_leaf(xmi->xmi_ip2))
 439                 return 0;
 440
 441         error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
 442                         &bp);
 443         if (error)
 444                 return error;
 445
 446         forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
 447         if (forkoff == 0)
 448                 return 0;
 449
 450         return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
 451 }
 452
 453 /* Convert inode2's block dir fork back to shortform, if possible.. */
 454 STATIC int
 455 xfs_exchmaps_dir_to_sf(
 456         struct xfs_trans                *tp,
 457         struct xfs_exchmaps_intent      *xmi)
 458 {
 459         struct xfs_da_args      args = {
 460                 .dp             = xmi->xmi_ip2,
 461                 .geo            = tp->t_mountp->m_dir_geo,
 462                 .whichfork      = XFS_DATA_FORK,
 463                 .trans          = tp,
 464                 .owner          = xmi->xmi_ip2->i_ino,
 465         };
 466         struct xfs_dir2_sf_hdr  sfh;
 467         struct xfs_buf          *bp;
 468         int                     size;
 469         int                     error = 0;
 470
 471         if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
 472                 return error;
 473
 474         error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
 475         if (error)
 476                 return error;
 477
 478         size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
 479         if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
 480                 return 0;
 481
 482         return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
 483 }
 484
 485 /* Convert inode2's remote symlink target back to shortform, if possible. */
 486 STATIC int
 487 xfs_exchmaps_link_to_sf(
 488         struct xfs_trans                *tp,
 489         struct xfs_exchmaps_intent      *xmi)
 490 {
 491         struct xfs_inode                *ip = xmi->xmi_ip2;
 492         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
 493         char                            *buf;
 494         int                             error;
 495
 496         if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
 497             ip->i_disk_size > xfs_inode_data_fork_size(ip))
 498                 return 0;
 499
 500         /* Read the current symlink target into a buffer. */
 501         buf = kmalloc(ip->i_disk_size + 1,
 502                         GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
 503         if (!buf) {
 504                 ASSERT(0);
 505                 return -ENOMEM;
 506         }
 507
 508         error = xfs_symlink_remote_read(ip, buf);
 509         if (error)
 510                 goto free;
 511
 512         /* Remove the blocks. */
 513         error = xfs_symlink_remote_truncate(tp, ip);
 514         if (error)
 515                 goto free;
 516
 517         /* Convert fork to local format and log our changes. */
 518         xfs_idestroy_fork(ifp);
 519         ifp->if_bytes = 0;
 520         ifp->if_format = XFS_DINODE_FMT_LOCAL;
 521         xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
 522         xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
 523 free:
 524         kfree(buf);
 525         return error;
 526 }
 527
 528 /* Clear the reflink flag after an exchange. */
 529 static inline void
 530 xfs_exchmaps_clear_reflink(
 531         struct xfs_trans        *tp,
 532         struct xfs_inode        *ip)
 533 {
 534         trace_xfs_reflink_unset_inode_flag(ip);
 535
 536         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
 537         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 538 }
 539
 540 /* Finish whatever work might come after an exchange operation. */
 541 static int
 542 xfs_exchmaps_do_postop_work(
 543         struct xfs_trans                *tp,
 544         struct xfs_exchmaps_intent      *xmi)
 545 {
 546         if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
 547                 int                     error = 0;
 548
 549                 if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
 550                         error = xfs_exchmaps_attr_to_sf(tp, xmi);
 551                 else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
 552                         error = xfs_exchmaps_dir_to_sf(tp, xmi);
 553                 else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
 554                         error = xfs_exchmaps_link_to_sf(tp, xmi);
 555                 xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
 556                 if (error)
 557                         return error;
 558         }
 559
 560         if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
 561                 xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
 562                 xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
 563         }
 564
 565         if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
 566                 xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
 567                 xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
 568         }
 569
 570         return 0;
 571 }
 572
 573 /* Finish one step in a mapping exchange operation, possibly relogging. */
 574 int
 575 xfs_exchmaps_finish_one(
 576         struct xfs_trans                *tp,
 577         struct xfs_exchmaps_intent      *xmi)
 578 {
 579         struct xfs_bmbt_irec            irec1, irec2;
 580         int                             error;
 581
 582         if (xmi_has_more_exchange_work(xmi)) {
 583                 /*
 584                  * If the operation state says that some range of the files
 585                  * have not yet been exchanged, look for mappings in that range
 586                  * to exchange.  If we find some mappings, exchange them.
 587                  */
 588                 error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
 589                 if (error)
 590                         return error;
 591
 592                 if (xmi_has_more_exchange_work(xmi))
 593                         xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
 594
 595                 /*
 596                  * If the caller asked us to exchange the file sizes after the
 597                  * exchange and either we just exchanged the last mappings in
 598                  * the range or we didn't find anything to exchange, update the
 599                  * ondisk file sizes.
 600                  */
 601                 if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
 602                     !xmi_has_more_exchange_work(xmi)) {
 603                         xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
 604                         xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
 605
 606                         xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
 607                         xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
 608                 }
 609         } else if (xmi_has_postop_work(xmi)) {
 610                 /*
 611                  * Now that we're finished with the exchange operation,
 612                  * complete the post-op cleanup work.
 613                  */
 614                 error = xfs_exchmaps_do_postop_work(tp, xmi);
 615                 if (error)
 616                         return error;
 617         }
 618
 619         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
 620                 return -EIO;
 621
 622         /* If we still have work to do, ask for a new transaction. */
 623         if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
 624                 trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
 625                 return -EAGAIN;
 626         }
 627
 628         /*
 629          * If we reach here, we've finished all the exchange work and the post
 630          * operation work.  The last thing we need to do before returning to
 631          * the caller is to make sure that COW forks are set up correctly.
 632          */
 633         if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
 634                 xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
 635                 xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
 636         }
 637
 638         return 0;
 639 }
 640
 641 /*
 642  * Compute the amount of bmbt blocks we should reserve for each file.  In the
 643  * worst case, each exchange will fill a hole with a new mapping, which could
 644  * result in a btree split every time we add a new leaf block.
 645  */
 646 static inline uint64_t
 647 xfs_exchmaps_bmbt_blocks(
 648         struct xfs_mount                *mp,
 649         const struct xfs_exchmaps_req   *req)
 650 {
 651         return howmany_64(req->nr_exchanges,
 652                                         XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
 653                         XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
 654 }
 655
 656 /* Compute the space we should reserve for the rmap btree expansions. */
 657 static inline uint64_t
 658 xfs_exchmaps_rmapbt_blocks(
 659         struct xfs_mount                *mp,
 660         const struct xfs_exchmaps_req   *req)
 661 {
 662         if (!xfs_has_rmapbt(mp))
 663                 return 0;
 664         if (XFS_IS_REALTIME_INODE(req->ip1))
 665                 return 0;
 666
 667         return howmany_64(req->nr_exchanges,
 668                                         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
 669                         XFS_RMAPADD_SPACE_RES(mp);
 670 }
 671
 672 /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
 673 int
 674 xfs_exchmaps_estimate_overhead(
 675         struct xfs_exchmaps_req         *req)
 676 {
 677         struct xfs_mount                *mp = req->ip1->i_mount;
 678         xfs_filblks_t                   bmbt_blocks;
 679         xfs_filblks_t                   rmapbt_blocks;
 680         xfs_filblks_t                   resblks = req->resblks;
 681
 682         /*
 683          * Compute the number of bmbt and rmapbt blocks we might need to handle
 684          * the estimated number of exchanges.
 685          */
 686         bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
 687         rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
 688
 689         trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
 690
 691         /* Make sure the change in file block count doesn't overflow. */
 692         if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
 693                 return -EFBIG;
 694         if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
 695                 return -EFBIG;
 696
 697         /*
 698          * Add together the number of blocks we need to handle btree growth,
 699          * then add it to the number of blocks we need to reserve to this
 700          * transaction.
 701          */
 702         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
 703                 return -ENOSPC;
 704         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
 705                 return -ENOSPC;
 706         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
 707                 return -ENOSPC;
 708         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
 709                 return -ENOSPC;
 710
 711         /* Can't actually reserve more than UINT_MAX blocks. */
 712         if (req->resblks > UINT_MAX)
 713                 return -ENOSPC;
 714
 715         req->resblks = resblks;
 716         trace_xfs_exchmaps_final_estimate(req);
 717         return 0;
 718 }
 719
 720 /* Decide if we can merge two real mappings. */
 721 static inline bool
 722 xmi_can_merge(
 723         const struct xfs_bmbt_irec      *b1,
 724         const struct xfs_bmbt_irec      *b2)
 725 {
 726         /* Don't merge holes. */
 727         if (b1->br_startblock == HOLESTARTBLOCK ||
 728             b2->br_startblock == HOLESTARTBLOCK)
 729                 return false;
 730
 731         /* We don't merge holes. */
 732         if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
 733                 return false;
 734
 735         if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
 736             b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
 737             b1->br_state                          == b2->br_state &&
 738             b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
 739                 return true;
 740
 741         return false;
 742 }
 743
 744 /*
 745  * Decide if we can merge three mappings.  Caller must ensure all three
 746  * mappings must not be holes or delalloc reservations.
 747  */
 748 static inline bool
 749 xmi_can_merge_all(
 750         const struct xfs_bmbt_irec      *l,
 751         const struct xfs_bmbt_irec      *m,
 752         const struct xfs_bmbt_irec      *r)
 753 {
 754         xfs_filblks_t                   new_len;
 755
 756         new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
 757         return new_len <= XFS_MAX_BMBT_EXTLEN;
 758 }
 759
 760 #define CLEFT_CONTIG    0x01
 761 #define CRIGHT_CONTIG   0x02
 762 #define CHOLE           0x04
 763 #define CBOTH_CONTIG    (CLEFT_CONTIG | CRIGHT_CONTIG)
 764
 765 #define NLEFT_CONTIG    0x10
 766 #define NRIGHT_CONTIG   0x20
 767 #define NHOLE           0x40
 768 #define NBOTH_CONTIG    (NLEFT_CONTIG | NRIGHT_CONTIG)
 769
 770 /* Estimate the effect of a single exchange on mapping count. */
 771 static inline int
 772 xmi_delta_nextents_step(
 773         struct xfs_mount                *mp,
 774         const struct xfs_bmbt_irec      *left,
 775         const struct xfs_bmbt_irec      *curr,
 776         const struct xfs_bmbt_irec      *new,
 777         const struct xfs_bmbt_irec      *right)
 778 {
 779         bool                            lhole, rhole, chole, nhole;
 780         unsigned int                    state = 0;
 781         int                             ret = 0;
 782
 783         lhole = left->br_startblock == HOLESTARTBLOCK;
 784         rhole = right->br_startblock == HOLESTARTBLOCK;
 785         chole = curr->br_startblock == HOLESTARTBLOCK;
 786         nhole = new->br_startblock == HOLESTARTBLOCK;
 787
 788         if (chole)
 789                 state |= CHOLE;
 790         if (!lhole && !chole && xmi_can_merge(left, curr))
 791                 state |= CLEFT_CONTIG;
 792         if (!rhole && !chole && xmi_can_merge(curr, right))
 793                 state |= CRIGHT_CONTIG;
 794         if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
 795             !xmi_can_merge_all(left, curr, right))
 796                 state &= ~CRIGHT_CONTIG;
 797
 798         if (nhole)
 799                 state |= NHOLE;
 800         if (!lhole && !nhole && xmi_can_merge(left, new))
 801                 state |= NLEFT_CONTIG;
 802         if (!rhole && !nhole && xmi_can_merge(new, right))
 803                 state |= NRIGHT_CONTIG;
 804         if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
 805             !xmi_can_merge_all(left, new, right))
 806                 state &= ~NRIGHT_CONTIG;
 807
 808         switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
 809         case CLEFT_CONTIG | CRIGHT_CONTIG:
 810                 /*
 811                  * left/curr/right are the same mapping, so deleting curr
 812                  * causes 2 new mappings to be created.
 813                  */
 814                 ret += 2;
 815                 break;
 816         case 0:
 817                 /*
 818                  * curr is not contiguous with any mapping, so we remove curr
 819                  * completely
 820                  */
 821                 ret--;
 822                 break;
 823         case CHOLE:
 824                 /* hole, do nothing */
 825                 break;
 826         case CLEFT_CONTIG:
 827         case CRIGHT_CONTIG:
 828                 /* trim either left or right, no change */
 829                 break;
 830         }
 831
 832         switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
 833         case NLEFT_CONTIG | NRIGHT_CONTIG:
 834                 /*
 835                  * left/curr/right will become the same mapping, so adding
 836                  * curr causes the deletion of right.
 837                  */
 838                 ret--;
 839                 break;
 840         case 0:
 841                 /* new is not contiguous with any mapping */
 842                 ret++;
 843                 break;
 844         case NHOLE:
 845                 /* hole, do nothing. */
 846                 break;
 847         case NLEFT_CONTIG:
 848         case NRIGHT_CONTIG:
 849                 /* new is absorbed into left or right, no change */
 850                 break;
 851         }
 852
 853         trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
 854                         state);
 855         return ret;
 856 }
 857
 858 /* Make sure we don't overflow the extent (mapping) counters. */
 859 static inline int
 860 xmi_ensure_delta_nextents(
 861         struct xfs_exchmaps_req *req,
 862         struct xfs_inode        *ip,
 863         int64_t                 delta)
 864 {
 865         struct xfs_mount        *mp = ip->i_mount;
 866         int                     whichfork = xfs_exchmaps_reqfork(req);
 867         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
 868         uint64_t                new_nextents;
 869         xfs_extnum_t            max_nextents;
 870
 871         if (delta < 0)
 872                 return 0;
 873
 874         /*
 875          * It's always an error if the delta causes integer overflow.  delta
 876          * needs an explicit cast here to avoid warnings about implicit casts
 877          * coded into the overflow check.
 878          */
 879         if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
 880                                 &new_nextents))
 881                 return -EFBIG;
 882
 883         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
 884             new_nextents > 10)
 885                 return -EFBIG;
 886
 887         /*
 888          * We always promote both inodes to have large extent counts if the
 889          * superblock feature is enabled, so we only need to check against the
 890          * theoretical maximum.
 891          */
 892         max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
 893                                              whichfork);
 894         if (new_nextents > max_nextents)
 895                 return -EFBIG;
 896
 897         return 0;
 898 }
 899
 900 /* Find the next mapping after irec. */
 901 static inline int
 902 xmi_next(
 903         struct xfs_inode                *ip,
 904         int                             bmap_flags,
 905         const struct xfs_bmbt_irec      *irec,
 906         struct xfs_bmbt_irec            *nrec)
 907 {
 908         xfs_fileoff_t                   off;
 909         xfs_filblks_t                   blockcount;
 910         int                             nimaps = 1;
 911         int                             error;
 912
 913         off = irec->br_startoff + irec->br_blockcount;
 914         blockcount = XFS_MAX_FILEOFF - off;
 915         error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
 916         if (error)
 917                 return error;
 918         if (nrec->br_startblock == DELAYSTARTBLOCK ||
 919             nrec->br_startoff != off) {
 920                 /*
 921                  * If we don't get the mapping we want, return a zero-length
 922                  * mapping, which our estimator function will pretend is a hole.
 923                  * We shouldn't get delalloc reservations.
 924                  */
 925                 nrec->br_startblock = HOLESTARTBLOCK;
 926         }
 927
 928         return 0;
 929 }
 930
 931 int __init
 932 xfs_exchmaps_intent_init_cache(void)
 933 {
 934         xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
 935                         sizeof(struct xfs_exchmaps_intent),
 936                         0, 0, NULL);
 937
 938         return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
 939 }
 940
 941 void
 942 xfs_exchmaps_intent_destroy_cache(void)
 943 {
 944         kmem_cache_destroy(xfs_exchmaps_intent_cache);
 945         xfs_exchmaps_intent_cache = NULL;
 946 }
 947
 948 /*
 949  * Decide if we will exchange the reflink flags between the two files after the
 950  * exchange.  The only time we want to do this is if we're exchanging all
 951  * mappings under EOF and the inode reflink flags have different states.
 952  */
 953 static inline bool
 954 xmi_can_exchange_reflink_flags(
 955         const struct xfs_exchmaps_req   *req,
 956         unsigned int                    reflink_state)
 957 {
 958         struct xfs_mount                *mp = req->ip1->i_mount;
 959
 960         if (hweight32(reflink_state) != 1)
 961                 return false;
 962         if (req->startoff1 != 0 || req->startoff2 != 0)
 963                 return false;
 964         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
 965                 return false;
 966         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
 967                 return false;
 968         return true;
 969 }
 970
 971
 972 /* Allocate and initialize a new incore intent item from a request. */
 973 struct xfs_exchmaps_intent *
 974 xfs_exchmaps_init_intent(
 975         const struct xfs_exchmaps_req   *req)
 976 {
 977         struct xfs_exchmaps_intent      *xmi;
 978         unsigned int                    rs = 0;
 979
 980         xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
 981                         GFP_NOFS | __GFP_NOFAIL);
 982         INIT_LIST_HEAD(&xmi->xmi_list);
 983         xmi->xmi_ip1 = req->ip1;
 984         xmi->xmi_ip2 = req->ip2;
 985         xmi->xmi_startoff1 = req->startoff1;
 986         xmi->xmi_startoff2 = req->startoff2;
 987         xmi->xmi_blockcount = req->blockcount;
 988         xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
 989         xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
 990
 991         if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
 992                 xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
 993                 return xmi;
 994         }
 995
 996         if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
 997                 xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
 998                 xmi->xmi_isize1 = req->ip2->i_disk_size;
 999                 xmi->xmi_isize2 = req->ip1->i_disk_size;
1000         }
1001
1002         /* Record the state of each inode's reflink flag before the op. */
1003         if (xfs_is_reflink_inode(req->ip1))
1004                 rs |= 1;
1005         if (xfs_is_reflink_inode(req->ip2))
1006                 rs |= 2;
1007
1008         /*
1009          * Figure out if we're clearing the reflink flags (which effectively
1010          * exchanges them) after the operation.
1011          */
1012         if (xmi_can_exchange_reflink_flags(req, rs)) {
1013                 if (rs & 1)
1014                         xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1015                 if (rs & 2)
1016                         xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1017         }
1018
1019         if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1020             S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1021                 xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1022
1023         return xmi;
1024 }
1025
1026 /*
1027  * Estimate the number of exchange operations and the number of file blocks
1028  * in each file that will be affected by the exchange operation.
1029  */
1030 int
1031 xfs_exchmaps_estimate(
1032         struct xfs_exchmaps_req         *req)
1033 {
1034         struct xfs_exchmaps_intent      *xmi;
1035         struct xfs_bmbt_irec            irec1, irec2;
1036         struct xfs_exchmaps_adjacent    adj = ADJACENT_INIT;
1037         xfs_filblks_t                   ip1_blocks = 0, ip2_blocks = 0;
1038         int64_t                         d_nexts1, d_nexts2;
1039         int                             bmap_flags;
1040         int                             error;
1041
1042         ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1043
1044         bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1045         xmi = xfs_exchmaps_init_intent(req);
1046
1047         /*
1048          * To guard against the possibility of overflowing the extent counters,
1049          * we have to estimate an upper bound on the potential increase in that
1050          * counter.  We can split the mapping at each end of the range, and for
1051          * each step of the exchange we can split the mapping that we're
1052          * working on if the mappings do not align.
1053          */
1054         d_nexts1 = d_nexts2 = 3;
1055
1056         while (xmi_has_more_exchange_work(xmi)) {
1057                 /*
1058                  * Walk through the file ranges until we find something to
1059                  * exchange.  Because we're simulating the exchange, pass in
1060                  * adj to capture skipped mappings for correct estimation of
1061                  * bmbt record merges.
1062                  */
1063                 error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1064                 if (error)
1065                         goto out_free;
1066                 if (!xmi_has_more_exchange_work(xmi))
1067                         break;
1068
1069                 /* Update accounting. */
1070                 if (xfs_bmap_is_real_extent(&irec1))
1071                         ip1_blocks += irec1.br_blockcount;
1072                 if (xfs_bmap_is_real_extent(&irec2))
1073                         ip2_blocks += irec2.br_blockcount;
1074                 req->nr_exchanges++;
1075
1076                 /* Read the next mappings from both files. */
1077                 error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1078                 if (error)
1079                         goto out_free;
1080
1081                 error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1082                 if (error)
1083                         goto out_free;
1084
1085                 /* Update extent count deltas. */
1086                 d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1087                                 &adj.left1, &irec1, &irec2, &adj.right1);
1088
1089                 d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1090                                 &adj.left2, &irec2, &irec1, &adj.right2);
1091
1092                 /* Now pretend we exchanged the mappings. */
1093                 if (xmi_can_merge(&adj.left2, &irec1))
1094                         adj.left2.br_blockcount += irec1.br_blockcount;
1095                 else
1096                         memcpy(&adj.left2, &irec1, sizeof(irec1));
1097
1098                 if (xmi_can_merge(&adj.left1, &irec2))
1099                         adj.left1.br_blockcount += irec2.br_blockcount;
1100                 else
1101                         memcpy(&adj.left1, &irec2, sizeof(irec2));
1102
1103                 xmi_advance(xmi, &irec1);
1104         }
1105
1106         /* Account for the blocks that are being exchanged. */
1107         if (XFS_IS_REALTIME_INODE(req->ip1) &&
1108             xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1109                 req->ip1_rtbcount = ip1_blocks;
1110                 req->ip2_rtbcount = ip2_blocks;
1111         } else {
1112                 req->ip1_bcount = ip1_blocks;
1113                 req->ip2_bcount = ip2_blocks;
1114         }
1115
1116         /*
1117          * Make sure that both forks have enough slack left in their extent
1118          * counters that the exchange operation will not overflow.
1119          */
1120         trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1121         if (req->ip1 == req->ip2) {
1122                 error = xmi_ensure_delta_nextents(req, req->ip1,
1123                                 d_nexts1 + d_nexts2);
1124         } else {
1125                 error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1126                 if (error)
1127                         goto out_free;
1128                 error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1129         }
1130         if (error)
1131                 goto out_free;
1132
1133         trace_xfs_exchmaps_initial_estimate(req);
1134         error = xfs_exchmaps_estimate_overhead(req);
1135 out_free:
1136         kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1137         return error;
1138 }
1139
1140 /* Set the reflink flag before an operation. */
1141 static inline void
1142 xfs_exchmaps_set_reflink(
1143         struct xfs_trans        *tp,
1144         struct xfs_inode        *ip)
1145 {
1146         trace_xfs_reflink_set_inode_flag(ip);
1147
1148         ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1149         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1150 }
1151
1152 /*
1153  * If either file has shared blocks and we're exchanging data forks, we must
1154  * flag the other file as having shared blocks so that we get the shared-block
1155  * rmap functions if we need to fix up the rmaps.
1156  */
1157 void
1158 xfs_exchmaps_ensure_reflink(
1159         struct xfs_trans                        *tp,
1160         const struct xfs_exchmaps_intent        *xmi)
1161 {
1162         unsigned int                            rs = 0;
1163
1164         if (xfs_is_reflink_inode(xmi->xmi_ip1))
1165                 rs |= 1;
1166         if (xfs_is_reflink_inode(xmi->xmi_ip2))
1167                 rs |= 2;
1168
1169         if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1170                 xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1171
1172         if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1173                 xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1174 }
1175
1176 /* Set the large extent count flag before an operation if needed. */
1177 static inline void
1178 xfs_exchmaps_ensure_large_extent_counts(
1179         struct xfs_trans        *tp,
1180         struct xfs_inode        *ip)
1181 {
1182         if (xfs_inode_has_large_extent_counts(ip))
1183                 return;
1184
1185         ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1186         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1187 }
1188
1189 /* Widen the extent counter fields of both inodes if necessary. */
1190 void
1191 xfs_exchmaps_upgrade_extent_counts(
1192         struct xfs_trans                        *tp,
1193         const struct xfs_exchmaps_intent        *xmi)
1194 {
1195         if (!xfs_has_large_extent_counts(tp->t_mountp))
1196                 return;
1197
1198         xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1199         xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1200 }
1201
1202 /*
1203  * Schedule an exchange a range of mappings from one inode to another.
1204  *
1205  * The use of file mapping exchange log intent items ensures the operation can
1206  * be resumed even if the system goes down.  The caller must commit the
1207  * transaction to start the work.
1208  *
1209  * The caller must ensure the inodes must be joined to the transaction and
1210  * ILOCKd; they will still be joined to the transaction at exit.
1211  */
1212 void
1213 xfs_exchange_mappings(
1214         struct xfs_trans                *tp,
1215         const struct xfs_exchmaps_req   *req)
1216 {
1217         struct xfs_exchmaps_intent      *xmi;
1218
1219         BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1220
1221         xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1222         xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1223         ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1224         if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1225                 ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1226         ASSERT(xfs_has_exchange_range(tp->t_mountp));
1227
1228         if (req->blockcount == 0)
1229                 return;
1230
1231         xmi = xfs_exchmaps_init_intent(req);
1232         xfs_exchmaps_defer_add(tp, xmi);
1233         xfs_exchmaps_ensure_reflink(tp, xmi);
1234         xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1235 }