fs/xfs/xfs_rmap_item.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Copyright (C) 2016 Oracle.  All Rights Reserved.
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  */
   6 #include "xfs.h"
   7 #include "xfs_fs.h"
   8 #include "xfs_format.h"
   9 #include "xfs_log_format.h"
  10 #include "xfs_trans_resv.h"
  11 #include "xfs_bit.h"
  12 #include "xfs_shared.h"
  13 #include "xfs_mount.h"
  14 #include "xfs_defer.h"
  15 #include "xfs_trans.h"
  16 #include "xfs_trans_priv.h"
  17 #include "xfs_buf_item.h"
  18 #include "xfs_rmap_item.h"
  19 #include "xfs_log.h"
  20 #include "xfs_rmap.h"
  21
  22
  23 kmem_zone_t     *xfs_rui_zone;
  24 kmem_zone_t     *xfs_rud_zone;
  25
  26 static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip)
  27 {
  28         return container_of(lip, struct xfs_rui_log_item, rui_item);
  29 }
  30
  31 void
  32 xfs_rui_item_free(
  33         struct xfs_rui_log_item *ruip)
  34 {
  35         if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
  36                 kmem_free(ruip);
  37         else
  38                 kmem_zone_free(xfs_rui_zone, ruip);
  39 }
  40
  41 /*
  42  * Freeing the RUI requires that we remove it from the AIL if it has already
  43  * been placed there. However, the RUI may not yet have been placed in the AIL
  44  * when called by xfs_rui_release() from RUD processing due to the ordering of
  45  * committed vs unpin operations in bulk insert operations. Hence the reference
  46  * count to ensure only the last caller frees the RUI.
  47  */
  48 void
  49 xfs_rui_release(
  50         struct xfs_rui_log_item *ruip)
  51 {
  52         ASSERT(atomic_read(&ruip->rui_refcount) > 0);
  53         if (atomic_dec_and_test(&ruip->rui_refcount)) {
  54                 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
  55                 xfs_rui_item_free(ruip);
  56         }
  57 }
  58
  59 STATIC void
  60 xfs_rui_item_size(
  61         struct xfs_log_item     *lip,
  62         int                     *nvecs,
  63         int                     *nbytes)
  64 {
  65         struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
  66
  67         *nvecs += 1;
  68         *nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
  69 }
  70
  71 /*
  72  * This is called to fill in the vector of log iovecs for the
  73  * given rui log item. We use only 1 iovec, and we point that
  74  * at the rui_log_format structure embedded in the rui item.
  75  * It is at this point that we assert that all of the extent
  76  * slots in the rui item have been filled.
  77  */
  78 STATIC void
  79 xfs_rui_item_format(
  80         struct xfs_log_item     *lip,
  81         struct xfs_log_vec      *lv)
  82 {
  83         struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
  84         struct xfs_log_iovec    *vecp = NULL;
  85
  86         ASSERT(atomic_read(&ruip->rui_next_extent) ==
  87                         ruip->rui_format.rui_nextents);
  88
  89         ruip->rui_format.rui_type = XFS_LI_RUI;
  90         ruip->rui_format.rui_size = 1;
  91
  92         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
  93                         xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
  94 }
  95
  96 /*
  97  * Pinning has no meaning for an rui item, so just return.
  98  */
  99 STATIC void
 100 xfs_rui_item_pin(
 101         struct xfs_log_item     *lip)
 102 {
 103 }
 104
 105 /*
 106  * The unpin operation is the last place an RUI is manipulated in the log. It is
 107  * either inserted in the AIL or aborted in the event of a log I/O error. In
 108  * either case, the RUI transaction has been successfully committed to make it
 109  * this far. Therefore, we expect whoever committed the RUI to either construct
 110  * and commit the RUD or drop the RUD's reference in the event of error. Simply
 111  * drop the log's RUI reference now that the log is done with it.
 112  */
 113 STATIC void
 114 xfs_rui_item_unpin(
 115         struct xfs_log_item     *lip,
 116         int                     remove)
 117 {
 118         struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
 119
 120         xfs_rui_release(ruip);
 121 }
 122
 123 /*
 124  * RUI items have no locking or pushing.  However, since RUIs are pulled from
 125  * the AIL when their corresponding RUDs are committed to disk, their situation
 126  * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
 127  * will eventually flush the log.  This should help in getting the RUI out of
 128  * the AIL.
 129  */
 130 STATIC uint
 131 xfs_rui_item_push(
 132         struct xfs_log_item     *lip,
 133         struct list_head        *buffer_list)
 134 {
 135         return XFS_ITEM_PINNED;
 136 }
 137
 138 /*
 139  * The RUI has been either committed or aborted if the transaction has been
 140  * cancelled. If the transaction was cancelled, an RUD isn't going to be
 141  * constructed and thus we free the RUI here directly.
 142  */
 143 STATIC void
 144 xfs_rui_item_unlock(
 145         struct xfs_log_item     *lip)
 146 {
 147         if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 148                 xfs_rui_release(RUI_ITEM(lip));
 149 }
 150
 151 /*
 152  * The RUI is logged only once and cannot be moved in the log, so simply return
 153  * the lsn at which it's been logged.
 154  */
 155 STATIC xfs_lsn_t
 156 xfs_rui_item_committed(
 157         struct xfs_log_item     *lip,
 158         xfs_lsn_t               lsn)
 159 {
 160         return lsn;
 161 }
 162
 163 /*
 164  * The RUI dependency tracking op doesn't do squat.  It can't because
 165  * it doesn't know where the free extent is coming from.  The dependency
 166  * tracking has to be handled by the "enclosing" metadata object.  For
 167  * example, for inodes, the inode is locked throughout the extent freeing
 168  * so the dependency should be recorded there.
 169  */
 170 STATIC void
 171 xfs_rui_item_committing(
 172         struct xfs_log_item     *lip,
 173         xfs_lsn_t               lsn)
 174 {
 175 }
 176
 177 /*
 178  * This is the ops vector shared by all rui log items.
 179  */
 180 static const struct xfs_item_ops xfs_rui_item_ops = {
 181         .iop_size       = xfs_rui_item_size,
 182         .iop_format     = xfs_rui_item_format,
 183         .iop_pin        = xfs_rui_item_pin,
 184         .iop_unpin      = xfs_rui_item_unpin,
 185         .iop_unlock     = xfs_rui_item_unlock,
 186         .iop_committed  = xfs_rui_item_committed,
 187         .iop_push       = xfs_rui_item_push,
 188         .iop_committing = xfs_rui_item_committing,
 189 };
 190
 191 /*
 192  * Allocate and initialize an rui item with the given number of extents.
 193  */
 194 struct xfs_rui_log_item *
 195 xfs_rui_init(
 196         struct xfs_mount                *mp,
 197         uint                            nextents)
 198
 199 {
 200         struct xfs_rui_log_item         *ruip;
 201
 202         ASSERT(nextents > 0);
 203         if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
 204                 ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP);
 205         else
 206                 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
 207
 208         xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
 209         ruip->rui_format.rui_nextents = nextents;
 210         ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
 211         atomic_set(&ruip->rui_next_extent, 0);
 212         atomic_set(&ruip->rui_refcount, 2);
 213
 214         return ruip;
 215 }
 216
 217 /*
 218  * Copy an RUI format buffer from the given buf, and into the destination
 219  * RUI format structure.  The RUI/RUD items were designed not to need any
 220  * special alignment handling.
 221  */
 222 int
 223 xfs_rui_copy_format(
 224         struct xfs_log_iovec            *buf,
 225         struct xfs_rui_log_format       *dst_rui_fmt)
 226 {
 227         struct xfs_rui_log_format       *src_rui_fmt;
 228         uint                            len;
 229
 230         src_rui_fmt = buf->i_addr;
 231         len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
 232
 233         if (buf->i_len != len)
 234                 return -EFSCORRUPTED;
 235
 236         memcpy(dst_rui_fmt, src_rui_fmt, len);
 237         return 0;
 238 }
 239
 240 static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
 241 {
 242         return container_of(lip, struct xfs_rud_log_item, rud_item);
 243 }
 244
 245 STATIC void
 246 xfs_rud_item_size(
 247         struct xfs_log_item     *lip,
 248         int                     *nvecs,
 249         int                     *nbytes)
 250 {
 251         *nvecs += 1;
 252         *nbytes += sizeof(struct xfs_rud_log_format);
 253 }
 254
 255 /*
 256  * This is called to fill in the vector of log iovecs for the
 257  * given rud log item. We use only 1 iovec, and we point that
 258  * at the rud_log_format structure embedded in the rud item.
 259  * It is at this point that we assert that all of the extent
 260  * slots in the rud item have been filled.
 261  */
 262 STATIC void
 263 xfs_rud_item_format(
 264         struct xfs_log_item     *lip,
 265         struct xfs_log_vec      *lv)
 266 {
 267         struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
 268         struct xfs_log_iovec    *vecp = NULL;
 269
 270         rudp->rud_format.rud_type = XFS_LI_RUD;
 271         rudp->rud_format.rud_size = 1;
 272
 273         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
 274                         sizeof(struct xfs_rud_log_format));
 275 }
 276
 277 /*
 278  * Pinning has no meaning for an rud item, so just return.
 279  */
 280 STATIC void
 281 xfs_rud_item_pin(
 282         struct xfs_log_item     *lip)
 283 {
 284 }
 285
 286 /*
 287  * Since pinning has no meaning for an rud item, unpinning does
 288  * not either.
 289  */
 290 STATIC void
 291 xfs_rud_item_unpin(
 292         struct xfs_log_item     *lip,
 293         int                     remove)
 294 {
 295 }
 296
 297 /*
 298  * There isn't much you can do to push on an rud item.  It is simply stuck
 299  * waiting for the log to be flushed to disk.
 300  */
 301 STATIC uint
 302 xfs_rud_item_push(
 303         struct xfs_log_item     *lip,
 304         struct list_head        *buffer_list)
 305 {
 306         return XFS_ITEM_PINNED;
 307 }
 308
 309 /*
 310  * The RUD is either committed or aborted if the transaction is cancelled. If
 311  * the transaction is cancelled, drop our reference to the RUI and free the
 312  * RUD.
 313  */
 314 STATIC void
 315 xfs_rud_item_unlock(
 316         struct xfs_log_item     *lip)
 317 {
 318         struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
 319
 320         if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
 321                 xfs_rui_release(rudp->rud_ruip);
 322                 kmem_zone_free(xfs_rud_zone, rudp);
 323         }
 324 }
 325
 326 /*
 327  * When the rud item is committed to disk, all we need to do is delete our
 328  * reference to our partner rui item and then free ourselves. Since we're
 329  * freeing ourselves we must return -1 to keep the transaction code from
 330  * further referencing this item.
 331  */
 332 STATIC xfs_lsn_t
 333 xfs_rud_item_committed(
 334         struct xfs_log_item     *lip,
 335         xfs_lsn_t               lsn)
 336 {
 337         struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
 338
 339         /*
 340          * Drop the RUI reference regardless of whether the RUD has been
 341          * aborted. Once the RUD transaction is constructed, it is the sole
 342          * responsibility of the RUD to release the RUI (even if the RUI is
 343          * aborted due to log I/O error).
 344          */
 345         xfs_rui_release(rudp->rud_ruip);
 346         kmem_zone_free(xfs_rud_zone, rudp);
 347
 348         return (xfs_lsn_t)-1;
 349 }
 350
 351 /*
 352  * The RUD dependency tracking op doesn't do squat.  It can't because
 353  * it doesn't know where the free extent is coming from.  The dependency
 354  * tracking has to be handled by the "enclosing" metadata object.  For
 355  * example, for inodes, the inode is locked throughout the extent freeing
 356  * so the dependency should be recorded there.
 357  */
 358 STATIC void
 359 xfs_rud_item_committing(
 360         struct xfs_log_item     *lip,
 361         xfs_lsn_t               lsn)
 362 {
 363 }
 364
 365 /*
 366  * This is the ops vector shared by all rud log items.
 367  */
 368 static const struct xfs_item_ops xfs_rud_item_ops = {
 369         .iop_size       = xfs_rud_item_size,
 370         .iop_format     = xfs_rud_item_format,
 371         .iop_pin        = xfs_rud_item_pin,
 372         .iop_unpin      = xfs_rud_item_unpin,
 373         .iop_unlock     = xfs_rud_item_unlock,
 374         .iop_committed  = xfs_rud_item_committed,
 375         .iop_push       = xfs_rud_item_push,
 376         .iop_committing = xfs_rud_item_committing,
 377 };
 378
 379 /*
 380  * Allocate and initialize an rud item with the given number of extents.
 381  */
 382 struct xfs_rud_log_item *
 383 xfs_rud_init(
 384         struct xfs_mount                *mp,
 385         struct xfs_rui_log_item         *ruip)
 386
 387 {
 388         struct xfs_rud_log_item *rudp;
 389
 390         rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
 391         xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
 392         rudp->rud_ruip = ruip;
 393         rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
 394
 395         return rudp;
 396 }
 397
 398 /*
 399  * Process an rmap update intent item that was recovered from the log.
 400  * We need to update the rmapbt.
 401  */
 402 int
 403 xfs_rui_recover(
 404         struct xfs_mount                *mp,
 405         struct xfs_rui_log_item         *ruip)
 406 {
 407         int                             i;
 408         int                             error = 0;
 409         struct xfs_map_extent           *rmap;
 410         xfs_fsblock_t                   startblock_fsb;
 411         bool                            op_ok;
 412         struct xfs_rud_log_item         *rudp;
 413         enum xfs_rmap_intent_type       type;
 414         int                             whichfork;
 415         xfs_exntst_t                    state;
 416         struct xfs_trans                *tp;
 417         struct xfs_btree_cur            *rcur = NULL;
 418
 419         ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags));
 420
 421         /*
 422          * First check the validity of the extents described by the
 423          * RUI.  If any are bad, then assume that all are bad and
 424          * just toss the RUI.
 425          */
 426         for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
 427                 rmap = &ruip->rui_format.rui_extents[i];
 428                 startblock_fsb = XFS_BB_TO_FSB(mp,
 429                                    XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
 430                 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
 431                 case XFS_RMAP_EXTENT_MAP:
 432                 case XFS_RMAP_EXTENT_MAP_SHARED:
 433                 case XFS_RMAP_EXTENT_UNMAP:
 434                 case XFS_RMAP_EXTENT_UNMAP_SHARED:
 435                 case XFS_RMAP_EXTENT_CONVERT:
 436                 case XFS_RMAP_EXTENT_CONVERT_SHARED:
 437                 case XFS_RMAP_EXTENT_ALLOC:
 438                 case XFS_RMAP_EXTENT_FREE:
 439                         op_ok = true;
 440                         break;
 441                 default:
 442                         op_ok = false;
 443                         break;
 444                 }
 445                 if (!op_ok || startblock_fsb == 0 ||
 446                     rmap->me_len == 0 ||
 447                     startblock_fsb >= mp->m_sb.sb_dblocks ||
 448                     rmap->me_len >= mp->m_sb.sb_agblocks ||
 449                     (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) {
 450                         /*
 451                          * This will pull the RUI from the AIL and
 452                          * free the memory associated with it.
 453                          */
 454                         set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
 455                         xfs_rui_release(ruip);
 456                         return -EIO;
 457                 }
 458         }
 459
 460         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
 461                         mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
 462         if (error)
 463                 return error;
 464         rudp = xfs_trans_get_rud(tp, ruip);
 465
 466         for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
 467                 rmap = &ruip->rui_format.rui_extents[i];
 468                 state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
 469                                 XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
 470                 whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ?
 471                                 XFS_ATTR_FORK : XFS_DATA_FORK;
 472                 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
 473                 case XFS_RMAP_EXTENT_MAP:
 474                         type = XFS_RMAP_MAP;
 475                         break;
 476                 case XFS_RMAP_EXTENT_MAP_SHARED:
 477                         type = XFS_RMAP_MAP_SHARED;
 478                         break;
 479                 case XFS_RMAP_EXTENT_UNMAP:
 480                         type = XFS_RMAP_UNMAP;
 481                         break;
 482                 case XFS_RMAP_EXTENT_UNMAP_SHARED:
 483                         type = XFS_RMAP_UNMAP_SHARED;
 484                         break;
 485                 case XFS_RMAP_EXTENT_CONVERT:
 486                         type = XFS_RMAP_CONVERT;
 487                         break;
 488                 case XFS_RMAP_EXTENT_CONVERT_SHARED:
 489                         type = XFS_RMAP_CONVERT_SHARED;
 490                         break;
 491                 case XFS_RMAP_EXTENT_ALLOC:
 492                         type = XFS_RMAP_ALLOC;
 493                         break;
 494                 case XFS_RMAP_EXTENT_FREE:
 495                         type = XFS_RMAP_FREE;
 496                         break;
 497                 default:
 498                         error = -EFSCORRUPTED;
 499                         goto abort_error;
 500                 }
 501                 error = xfs_trans_log_finish_rmap_update(tp, rudp, type,
 502                                 rmap->me_owner, whichfork,
 503                                 rmap->me_startoff, rmap->me_startblock,
 504                                 rmap->me_len, state, &rcur);
 505                 if (error)
 506                         goto abort_error;
 507
 508         }
 509
 510         xfs_rmap_finish_one_cleanup(tp, rcur, error);
 511         set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
 512         error = xfs_trans_commit(tp);
 513         return error;
 514
 515 abort_error:
 516         xfs_rmap_finish_one_cleanup(tp, rcur, error);
 517         xfs_trans_cancel(tp);
 518         return error;
 519 }