1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_refcount_item.h"
19 #include "xfs_refcount.h"
20 #include "xfs_error.h"
21 #include "xfs_log_priv.h"
22 #include "xfs_log_recover.h"
24 kmem_zone_t
*xfs_cui_zone
;
25 kmem_zone_t
*xfs_cud_zone
;
27 static const struct xfs_item_ops xfs_cui_item_ops
;
29 static inline struct xfs_cui_log_item
*CUI_ITEM(struct xfs_log_item
*lip
)
31 return container_of(lip
, struct xfs_cui_log_item
, cui_item
);
36 struct xfs_cui_log_item
*cuip
)
38 if (cuip
->cui_format
.cui_nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
41 kmem_cache_free(xfs_cui_zone
, cuip
);
45 * Freeing the CUI requires that we remove it from the AIL if it has already
46 * been placed there. However, the CUI may not yet have been placed in the AIL
47 * when called by xfs_cui_release() from CUD processing due to the ordering of
48 * committed vs unpin operations in bulk insert operations. Hence the reference
49 * count to ensure only the last caller frees the CUI.
53 struct xfs_cui_log_item
*cuip
)
55 ASSERT(atomic_read(&cuip
->cui_refcount
) > 0);
56 if (atomic_dec_and_test(&cuip
->cui_refcount
)) {
57 xfs_trans_ail_delete(&cuip
->cui_item
, SHUTDOWN_LOG_IO_ERROR
);
58 xfs_cui_item_free(cuip
);
65 struct xfs_log_item
*lip
,
69 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
72 *nbytes
+= xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
);
76 * This is called to fill in the vector of log iovecs for the
77 * given cui log item. We use only 1 iovec, and we point that
78 * at the cui_log_format structure embedded in the cui item.
79 * It is at this point that we assert that all of the extent
80 * slots in the cui item have been filled.
84 struct xfs_log_item
*lip
,
85 struct xfs_log_vec
*lv
)
87 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
88 struct xfs_log_iovec
*vecp
= NULL
;
90 ASSERT(atomic_read(&cuip
->cui_next_extent
) ==
91 cuip
->cui_format
.cui_nextents
);
93 cuip
->cui_format
.cui_type
= XFS_LI_CUI
;
94 cuip
->cui_format
.cui_size
= 1;
96 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUI_FORMAT
, &cuip
->cui_format
,
97 xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
));
101 * The unpin operation is the last place an CUI is manipulated in the log. It is
102 * either inserted in the AIL or aborted in the event of a log I/O error. In
103 * either case, the CUI transaction has been successfully committed to make it
104 * this far. Therefore, we expect whoever committed the CUI to either construct
105 * and commit the CUD or drop the CUD's reference in the event of error. Simply
106 * drop the log's CUI reference now that the log is done with it.
110 struct xfs_log_item
*lip
,
113 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
115 xfs_cui_release(cuip
);
119 * The CUI has been either committed or aborted if the transaction has been
120 * cancelled. If the transaction was cancelled, an CUD isn't going to be
121 * constructed and thus we free the CUI here directly.
124 xfs_cui_item_release(
125 struct xfs_log_item
*lip
)
127 xfs_cui_release(CUI_ITEM(lip
));
131 * Allocate and initialize an cui item with the given number of extents.
133 STATIC
struct xfs_cui_log_item
*
135 struct xfs_mount
*mp
,
139 struct xfs_cui_log_item
*cuip
;
141 ASSERT(nextents
> 0);
142 if (nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
143 cuip
= kmem_zalloc(xfs_cui_log_item_sizeof(nextents
),
146 cuip
= kmem_zone_zalloc(xfs_cui_zone
, 0);
148 xfs_log_item_init(mp
, &cuip
->cui_item
, XFS_LI_CUI
, &xfs_cui_item_ops
);
149 cuip
->cui_format
.cui_nextents
= nextents
;
150 cuip
->cui_format
.cui_id
= (uintptr_t)(void *)cuip
;
151 atomic_set(&cuip
->cui_next_extent
, 0);
152 atomic_set(&cuip
->cui_refcount
, 2);
157 static inline struct xfs_cud_log_item
*CUD_ITEM(struct xfs_log_item
*lip
)
159 return container_of(lip
, struct xfs_cud_log_item
, cud_item
);
164 struct xfs_log_item
*lip
,
169 *nbytes
+= sizeof(struct xfs_cud_log_format
);
173 * This is called to fill in the vector of log iovecs for the
174 * given cud log item. We use only 1 iovec, and we point that
175 * at the cud_log_format structure embedded in the cud item.
176 * It is at this point that we assert that all of the extent
177 * slots in the cud item have been filled.
181 struct xfs_log_item
*lip
,
182 struct xfs_log_vec
*lv
)
184 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
185 struct xfs_log_iovec
*vecp
= NULL
;
187 cudp
->cud_format
.cud_type
= XFS_LI_CUD
;
188 cudp
->cud_format
.cud_size
= 1;
190 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUD_FORMAT
, &cudp
->cud_format
,
191 sizeof(struct xfs_cud_log_format
));
195 * The CUD is either committed or aborted if the transaction is cancelled. If
196 * the transaction is cancelled, drop our reference to the CUI and free the
200 xfs_cud_item_release(
201 struct xfs_log_item
*lip
)
203 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
205 xfs_cui_release(cudp
->cud_cuip
);
206 kmem_cache_free(xfs_cud_zone
, cudp
);
209 static const struct xfs_item_ops xfs_cud_item_ops
= {
210 .flags
= XFS_ITEM_RELEASE_WHEN_COMMITTED
,
211 .iop_size
= xfs_cud_item_size
,
212 .iop_format
= xfs_cud_item_format
,
213 .iop_release
= xfs_cud_item_release
,
216 static struct xfs_cud_log_item
*
218 struct xfs_trans
*tp
,
219 struct xfs_cui_log_item
*cuip
)
221 struct xfs_cud_log_item
*cudp
;
223 cudp
= kmem_zone_zalloc(xfs_cud_zone
, 0);
224 xfs_log_item_init(tp
->t_mountp
, &cudp
->cud_item
, XFS_LI_CUD
,
226 cudp
->cud_cuip
= cuip
;
227 cudp
->cud_format
.cud_cui_id
= cuip
->cui_format
.cui_id
;
229 xfs_trans_add_item(tp
, &cudp
->cud_item
);
234 * Finish an refcount update and log it to the CUD. Note that the
235 * transaction is marked dirty regardless of whether the refcount
236 * update succeeds or fails to support the CUI/CUD lifecycle rules.
239 xfs_trans_log_finish_refcount_update(
240 struct xfs_trans
*tp
,
241 struct xfs_cud_log_item
*cudp
,
242 enum xfs_refcount_intent_type type
,
243 xfs_fsblock_t startblock
,
244 xfs_extlen_t blockcount
,
245 xfs_fsblock_t
*new_fsb
,
246 xfs_extlen_t
*new_len
,
247 struct xfs_btree_cur
**pcur
)
251 error
= xfs_refcount_finish_one(tp
, type
, startblock
,
252 blockcount
, new_fsb
, new_len
, pcur
);
255 * Mark the transaction dirty, even on error. This ensures the
256 * transaction is aborted, which:
258 * 1.) releases the CUI and frees the CUD
259 * 2.) shuts down the filesystem
261 tp
->t_flags
|= XFS_TRANS_DIRTY
;
262 set_bit(XFS_LI_DIRTY
, &cudp
->cud_item
.li_flags
);
267 /* Sort refcount intents by AG. */
269 xfs_refcount_update_diff_items(
274 struct xfs_mount
*mp
= priv
;
275 struct xfs_refcount_intent
*ra
;
276 struct xfs_refcount_intent
*rb
;
278 ra
= container_of(a
, struct xfs_refcount_intent
, ri_list
);
279 rb
= container_of(b
, struct xfs_refcount_intent
, ri_list
);
280 return XFS_FSB_TO_AGNO(mp
, ra
->ri_startblock
) -
281 XFS_FSB_TO_AGNO(mp
, rb
->ri_startblock
);
284 /* Set the phys extent flags for this reverse mapping. */
286 xfs_trans_set_refcount_flags(
287 struct xfs_phys_extent
*refc
,
288 enum xfs_refcount_intent_type type
)
292 case XFS_REFCOUNT_INCREASE
:
293 case XFS_REFCOUNT_DECREASE
:
294 case XFS_REFCOUNT_ALLOC_COW
:
295 case XFS_REFCOUNT_FREE_COW
:
296 refc
->pe_flags
|= type
;
303 /* Log refcount updates in the intent item. */
305 xfs_refcount_update_log_item(
306 struct xfs_trans
*tp
,
307 struct xfs_cui_log_item
*cuip
,
308 struct xfs_refcount_intent
*refc
)
311 struct xfs_phys_extent
*ext
;
313 tp
->t_flags
|= XFS_TRANS_DIRTY
;
314 set_bit(XFS_LI_DIRTY
, &cuip
->cui_item
.li_flags
);
317 * atomic_inc_return gives us the value after the increment;
318 * we want to use it as an array index so we need to subtract 1 from
321 next_extent
= atomic_inc_return(&cuip
->cui_next_extent
) - 1;
322 ASSERT(next_extent
< cuip
->cui_format
.cui_nextents
);
323 ext
= &cuip
->cui_format
.cui_extents
[next_extent
];
324 ext
->pe_startblock
= refc
->ri_startblock
;
325 ext
->pe_len
= refc
->ri_blockcount
;
326 xfs_trans_set_refcount_flags(ext
, refc
->ri_type
);
329 static struct xfs_log_item
*
330 xfs_refcount_update_create_intent(
331 struct xfs_trans
*tp
,
332 struct list_head
*items
,
336 struct xfs_mount
*mp
= tp
->t_mountp
;
337 struct xfs_cui_log_item
*cuip
= xfs_cui_init(mp
, count
);
338 struct xfs_refcount_intent
*refc
;
342 xfs_trans_add_item(tp
, &cuip
->cui_item
);
344 list_sort(mp
, items
, xfs_refcount_update_diff_items
);
345 list_for_each_entry(refc
, items
, ri_list
)
346 xfs_refcount_update_log_item(tp
, cuip
, refc
);
347 return &cuip
->cui_item
;
350 /* Get an CUD so we can process all the deferred refcount updates. */
351 static struct xfs_log_item
*
352 xfs_refcount_update_create_done(
353 struct xfs_trans
*tp
,
354 struct xfs_log_item
*intent
,
357 return &xfs_trans_get_cud(tp
, CUI_ITEM(intent
))->cud_item
;
360 /* Process a deferred refcount update. */
362 xfs_refcount_update_finish_item(
363 struct xfs_trans
*tp
,
364 struct xfs_log_item
*done
,
365 struct list_head
*item
,
366 struct xfs_btree_cur
**state
)
368 struct xfs_refcount_intent
*refc
;
369 xfs_fsblock_t new_fsb
;
370 xfs_extlen_t new_aglen
;
373 refc
= container_of(item
, struct xfs_refcount_intent
, ri_list
);
374 error
= xfs_trans_log_finish_refcount_update(tp
, CUD_ITEM(done
),
375 refc
->ri_type
, refc
->ri_startblock
, refc
->ri_blockcount
,
376 &new_fsb
, &new_aglen
, state
);
378 /* Did we run out of reservation? Requeue what we didn't finish. */
379 if (!error
&& new_aglen
> 0) {
380 ASSERT(refc
->ri_type
== XFS_REFCOUNT_INCREASE
||
381 refc
->ri_type
== XFS_REFCOUNT_DECREASE
);
382 refc
->ri_startblock
= new_fsb
;
383 refc
->ri_blockcount
= new_aglen
;
390 /* Abort all pending CUIs. */
392 xfs_refcount_update_abort_intent(
393 struct xfs_log_item
*intent
)
395 xfs_cui_release(CUI_ITEM(intent
));
398 /* Cancel a deferred refcount update. */
400 xfs_refcount_update_cancel_item(
401 struct list_head
*item
)
403 struct xfs_refcount_intent
*refc
;
405 refc
= container_of(item
, struct xfs_refcount_intent
, ri_list
);
409 const struct xfs_defer_op_type xfs_refcount_update_defer_type
= {
410 .max_items
= XFS_CUI_MAX_FAST_EXTENTS
,
411 .create_intent
= xfs_refcount_update_create_intent
,
412 .abort_intent
= xfs_refcount_update_abort_intent
,
413 .create_done
= xfs_refcount_update_create_done
,
414 .finish_item
= xfs_refcount_update_finish_item
,
415 .finish_cleanup
= xfs_refcount_finish_one_cleanup
,
416 .cancel_item
= xfs_refcount_update_cancel_item
,
420 * Process a refcount update intent item that was recovered from the log.
421 * We need to update the refcountbt.
424 xfs_cui_item_recover(
425 struct xfs_log_item
*lip
,
426 struct xfs_trans
*parent_tp
)
428 struct xfs_bmbt_irec irec
;
429 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
430 struct xfs_phys_extent
*refc
;
431 struct xfs_cud_log_item
*cudp
;
432 struct xfs_trans
*tp
;
433 struct xfs_btree_cur
*rcur
= NULL
;
434 struct xfs_mount
*mp
= parent_tp
->t_mountp
;
435 xfs_fsblock_t startblock_fsb
;
436 xfs_fsblock_t new_fsb
;
437 xfs_extlen_t new_len
;
438 unsigned int refc_type
;
440 bool requeue_only
= false;
441 enum xfs_refcount_intent_type type
;
446 * First check the validity of the extents described by the
447 * CUI. If any are bad, then assume that all are bad and
450 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
451 refc
= &cuip
->cui_format
.cui_extents
[i
];
452 startblock_fsb
= XFS_BB_TO_FSB(mp
,
453 XFS_FSB_TO_DADDR(mp
, refc
->pe_startblock
));
454 switch (refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
) {
455 case XFS_REFCOUNT_INCREASE
:
456 case XFS_REFCOUNT_DECREASE
:
457 case XFS_REFCOUNT_ALLOC_COW
:
458 case XFS_REFCOUNT_FREE_COW
:
465 if (!op_ok
|| startblock_fsb
== 0 ||
467 startblock_fsb
>= mp
->m_sb
.sb_dblocks
||
468 refc
->pe_len
>= mp
->m_sb
.sb_agblocks
||
469 (refc
->pe_flags
& ~XFS_REFCOUNT_EXTENT_FLAGS
)) {
471 * This will pull the CUI from the AIL and
472 * free the memory associated with it.
474 xfs_cui_release(cuip
);
475 return -EFSCORRUPTED
;
480 * Under normal operation, refcount updates are deferred, so we
481 * wouldn't be adding them directly to a transaction. All
482 * refcount updates manage reservation usage internally and
483 * dynamically by deferring work that won't fit in the
484 * transaction. Normally, any work that needs to be deferred
485 * gets attached to the same defer_ops that scheduled the
486 * refcount update. However, we're in log recovery here, so we
487 * we use the passed in defer_ops and to finish up any work that
488 * doesn't fit. We need to reserve enough blocks to handle a
489 * full btree split on either end of the refcount range.
491 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_itruncate
,
492 mp
->m_refc_maxlevels
* 2, 0, XFS_TRANS_RESERVE
, &tp
);
496 * Recovery stashes all deferred ops during intent processing and
497 * finishes them on completion. Transfer current dfops state to this
498 * transaction and transfer the result back before we return.
500 xfs_defer_move(tp
, parent_tp
);
501 cudp
= xfs_trans_get_cud(tp
, cuip
);
503 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
504 refc
= &cuip
->cui_format
.cui_extents
[i
];
505 refc_type
= refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
;
507 case XFS_REFCOUNT_INCREASE
:
508 case XFS_REFCOUNT_DECREASE
:
509 case XFS_REFCOUNT_ALLOC_COW
:
510 case XFS_REFCOUNT_FREE_COW
:
514 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, mp
);
515 error
= -EFSCORRUPTED
;
519 new_fsb
= refc
->pe_startblock
;
520 new_len
= refc
->pe_len
;
522 error
= xfs_trans_log_finish_refcount_update(tp
, cudp
,
523 type
, refc
->pe_startblock
, refc
->pe_len
,
524 &new_fsb
, &new_len
, &rcur
);
528 /* Requeue what we didn't finish. */
530 irec
.br_startblock
= new_fsb
;
531 irec
.br_blockcount
= new_len
;
533 case XFS_REFCOUNT_INCREASE
:
534 xfs_refcount_increase_extent(tp
, &irec
);
536 case XFS_REFCOUNT_DECREASE
:
537 xfs_refcount_decrease_extent(tp
, &irec
);
539 case XFS_REFCOUNT_ALLOC_COW
:
540 xfs_refcount_alloc_cow_extent(tp
,
544 case XFS_REFCOUNT_FREE_COW
:
545 xfs_refcount_free_cow_extent(tp
,
556 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
557 xfs_defer_move(parent_tp
, tp
);
558 error
= xfs_trans_commit(tp
);
562 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
563 xfs_defer_move(parent_tp
, tp
);
564 xfs_trans_cancel(tp
);
570 struct xfs_log_item
*lip
,
573 return CUI_ITEM(lip
)->cui_format
.cui_id
== intent_id
;
576 static const struct xfs_item_ops xfs_cui_item_ops
= {
577 .iop_size
= xfs_cui_item_size
,
578 .iop_format
= xfs_cui_item_format
,
579 .iop_unpin
= xfs_cui_item_unpin
,
580 .iop_release
= xfs_cui_item_release
,
581 .iop_recover
= xfs_cui_item_recover
,
582 .iop_match
= xfs_cui_item_match
,
586 * Copy an CUI format buffer from the given buf, and into the destination
587 * CUI format structure. The CUI/CUD items were designed not to need any
588 * special alignment handling.
592 struct xfs_log_iovec
*buf
,
593 struct xfs_cui_log_format
*dst_cui_fmt
)
595 struct xfs_cui_log_format
*src_cui_fmt
;
598 src_cui_fmt
= buf
->i_addr
;
599 len
= xfs_cui_log_format_sizeof(src_cui_fmt
->cui_nextents
);
601 if (buf
->i_len
== len
) {
602 memcpy(dst_cui_fmt
, src_cui_fmt
, len
);
605 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, NULL
);
606 return -EFSCORRUPTED
;
610 * This routine is called to create an in-core extent refcount update
611 * item from the cui format structure which was logged on disk.
612 * It allocates an in-core cui, copies the extents from the format
613 * structure into it, and adds the cui to the AIL with the given
617 xlog_recover_cui_commit_pass2(
619 struct list_head
*buffer_list
,
620 struct xlog_recover_item
*item
,
624 struct xfs_mount
*mp
= log
->l_mp
;
625 struct xfs_cui_log_item
*cuip
;
626 struct xfs_cui_log_format
*cui_formatp
;
628 cui_formatp
= item
->ri_buf
[0].i_addr
;
630 cuip
= xfs_cui_init(mp
, cui_formatp
->cui_nextents
);
631 error
= xfs_cui_copy_format(&item
->ri_buf
[0], &cuip
->cui_format
);
633 xfs_cui_item_free(cuip
);
636 atomic_set(&cuip
->cui_next_extent
, cui_formatp
->cui_nextents
);
638 * Insert the intent into the AIL directly and drop one reference so
639 * that finishing or canceling the work will drop the other.
641 xfs_trans_ail_insert(log
->l_ailp
, &cuip
->cui_item
, lsn
);
642 xfs_cui_release(cuip
);
646 const struct xlog_recover_item_ops xlog_cui_item_ops
= {
647 .item_type
= XFS_LI_CUI
,
648 .commit_pass2
= xlog_recover_cui_commit_pass2
,
652 * This routine is called when an CUD format structure is found in a committed
653 * transaction in the log. Its purpose is to cancel the corresponding CUI if it
654 * was still in the log. To do this it searches the AIL for the CUI with an id
655 * equal to that in the CUD format structure. If we find it we drop the CUD
656 * reference, which removes the CUI from the AIL and frees it.
659 xlog_recover_cud_commit_pass2(
661 struct list_head
*buffer_list
,
662 struct xlog_recover_item
*item
,
665 struct xfs_cud_log_format
*cud_formatp
;
667 cud_formatp
= item
->ri_buf
[0].i_addr
;
668 if (item
->ri_buf
[0].i_len
!= sizeof(struct xfs_cud_log_format
)) {
669 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
);
670 return -EFSCORRUPTED
;
673 xlog_recover_release_intent(log
, XFS_LI_CUI
, cud_formatp
->cud_cui_id
);
677 const struct xlog_recover_item_ops xlog_cud_item_ops
= {
678 .item_type
= XFS_LI_CUD
,
679 .commit_pass2
= xlog_recover_cud_commit_pass2
,