1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_refcount_item.h"
19 #include "xfs_refcount.h"
20 #include "xfs_error.h"
22 kmem_zone_t
*xfs_cui_zone
;
23 kmem_zone_t
*xfs_cud_zone
;
25 static inline struct xfs_cui_log_item
*CUI_ITEM(struct xfs_log_item
*lip
)
27 return container_of(lip
, struct xfs_cui_log_item
, cui_item
);
32 struct xfs_cui_log_item
*cuip
)
34 if (cuip
->cui_format
.cui_nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
37 kmem_cache_free(xfs_cui_zone
, cuip
);
41 * Freeing the CUI requires that we remove it from the AIL if it has already
42 * been placed there. However, the CUI may not yet have been placed in the AIL
43 * when called by xfs_cui_release() from CUD processing due to the ordering of
44 * committed vs unpin operations in bulk insert operations. Hence the reference
45 * count to ensure only the last caller frees the CUI.
49 struct xfs_cui_log_item
*cuip
)
51 ASSERT(atomic_read(&cuip
->cui_refcount
) > 0);
52 if (atomic_dec_and_test(&cuip
->cui_refcount
)) {
53 xfs_trans_ail_remove(&cuip
->cui_item
, SHUTDOWN_LOG_IO_ERROR
);
54 xfs_cui_item_free(cuip
);
61 struct xfs_log_item
*lip
,
65 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
68 *nbytes
+= xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
);
72 * This is called to fill in the vector of log iovecs for the
73 * given cui log item. We use only 1 iovec, and we point that
74 * at the cui_log_format structure embedded in the cui item.
75 * It is at this point that we assert that all of the extent
76 * slots in the cui item have been filled.
80 struct xfs_log_item
*lip
,
81 struct xfs_log_vec
*lv
)
83 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
84 struct xfs_log_iovec
*vecp
= NULL
;
86 ASSERT(atomic_read(&cuip
->cui_next_extent
) ==
87 cuip
->cui_format
.cui_nextents
);
89 cuip
->cui_format
.cui_type
= XFS_LI_CUI
;
90 cuip
->cui_format
.cui_size
= 1;
92 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUI_FORMAT
, &cuip
->cui_format
,
93 xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
));
97 * The unpin operation is the last place an CUI is manipulated in the log. It is
98 * either inserted in the AIL or aborted in the event of a log I/O error. In
99 * either case, the CUI transaction has been successfully committed to make it
100 * this far. Therefore, we expect whoever committed the CUI to either construct
101 * and commit the CUD or drop the CUD's reference in the event of error. Simply
102 * drop the log's CUI reference now that the log is done with it.
106 struct xfs_log_item
*lip
,
109 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
111 xfs_cui_release(cuip
);
115 * The CUI has been either committed or aborted if the transaction has been
116 * cancelled. If the transaction was cancelled, an CUD isn't going to be
117 * constructed and thus we free the CUI here directly.
120 xfs_cui_item_release(
121 struct xfs_log_item
*lip
)
123 xfs_cui_release(CUI_ITEM(lip
));
126 static const struct xfs_item_ops xfs_cui_item_ops
= {
127 .iop_size
= xfs_cui_item_size
,
128 .iop_format
= xfs_cui_item_format
,
129 .iop_unpin
= xfs_cui_item_unpin
,
130 .iop_release
= xfs_cui_item_release
,
134 * Allocate and initialize an cui item with the given number of extents.
136 struct xfs_cui_log_item
*
138 struct xfs_mount
*mp
,
142 struct xfs_cui_log_item
*cuip
;
144 ASSERT(nextents
> 0);
145 if (nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
146 cuip
= kmem_zalloc(xfs_cui_log_item_sizeof(nextents
),
149 cuip
= kmem_zone_zalloc(xfs_cui_zone
, 0);
151 xfs_log_item_init(mp
, &cuip
->cui_item
, XFS_LI_CUI
, &xfs_cui_item_ops
);
152 cuip
->cui_format
.cui_nextents
= nextents
;
153 cuip
->cui_format
.cui_id
= (uintptr_t)(void *)cuip
;
154 atomic_set(&cuip
->cui_next_extent
, 0);
155 atomic_set(&cuip
->cui_refcount
, 2);
160 static inline struct xfs_cud_log_item
*CUD_ITEM(struct xfs_log_item
*lip
)
162 return container_of(lip
, struct xfs_cud_log_item
, cud_item
);
167 struct xfs_log_item
*lip
,
172 *nbytes
+= sizeof(struct xfs_cud_log_format
);
176 * This is called to fill in the vector of log iovecs for the
177 * given cud log item. We use only 1 iovec, and we point that
178 * at the cud_log_format structure embedded in the cud item.
179 * It is at this point that we assert that all of the extent
180 * slots in the cud item have been filled.
184 struct xfs_log_item
*lip
,
185 struct xfs_log_vec
*lv
)
187 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
188 struct xfs_log_iovec
*vecp
= NULL
;
190 cudp
->cud_format
.cud_type
= XFS_LI_CUD
;
191 cudp
->cud_format
.cud_size
= 1;
193 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUD_FORMAT
, &cudp
->cud_format
,
194 sizeof(struct xfs_cud_log_format
));
198 * The CUD is either committed or aborted if the transaction is cancelled. If
199 * the transaction is cancelled, drop our reference to the CUI and free the
203 xfs_cud_item_release(
204 struct xfs_log_item
*lip
)
206 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
208 xfs_cui_release(cudp
->cud_cuip
);
209 kmem_cache_free(xfs_cud_zone
, cudp
);
212 static const struct xfs_item_ops xfs_cud_item_ops
= {
213 .flags
= XFS_ITEM_RELEASE_WHEN_COMMITTED
,
214 .iop_size
= xfs_cud_item_size
,
215 .iop_format
= xfs_cud_item_format
,
216 .iop_release
= xfs_cud_item_release
,
219 static struct xfs_cud_log_item
*
221 struct xfs_trans
*tp
,
222 struct xfs_cui_log_item
*cuip
)
224 struct xfs_cud_log_item
*cudp
;
226 cudp
= kmem_zone_zalloc(xfs_cud_zone
, 0);
227 xfs_log_item_init(tp
->t_mountp
, &cudp
->cud_item
, XFS_LI_CUD
,
229 cudp
->cud_cuip
= cuip
;
230 cudp
->cud_format
.cud_cui_id
= cuip
->cui_format
.cui_id
;
232 xfs_trans_add_item(tp
, &cudp
->cud_item
);
237 * Finish an refcount update and log it to the CUD. Note that the
238 * transaction is marked dirty regardless of whether the refcount
239 * update succeeds or fails to support the CUI/CUD lifecycle rules.
242 xfs_trans_log_finish_refcount_update(
243 struct xfs_trans
*tp
,
244 struct xfs_cud_log_item
*cudp
,
245 enum xfs_refcount_intent_type type
,
246 xfs_fsblock_t startblock
,
247 xfs_extlen_t blockcount
,
248 xfs_fsblock_t
*new_fsb
,
249 xfs_extlen_t
*new_len
,
250 struct xfs_btree_cur
**pcur
)
254 error
= xfs_refcount_finish_one(tp
, type
, startblock
,
255 blockcount
, new_fsb
, new_len
, pcur
);
258 * Mark the transaction dirty, even on error. This ensures the
259 * transaction is aborted, which:
261 * 1.) releases the CUI and frees the CUD
262 * 2.) shuts down the filesystem
264 tp
->t_flags
|= XFS_TRANS_DIRTY
;
265 set_bit(XFS_LI_DIRTY
, &cudp
->cud_item
.li_flags
);
270 /* Sort refcount intents by AG. */
272 xfs_refcount_update_diff_items(
277 struct xfs_mount
*mp
= priv
;
278 struct xfs_refcount_intent
*ra
;
279 struct xfs_refcount_intent
*rb
;
281 ra
= container_of(a
, struct xfs_refcount_intent
, ri_list
);
282 rb
= container_of(b
, struct xfs_refcount_intent
, ri_list
);
283 return XFS_FSB_TO_AGNO(mp
, ra
->ri_startblock
) -
284 XFS_FSB_TO_AGNO(mp
, rb
->ri_startblock
);
289 xfs_refcount_update_create_intent(
290 struct xfs_trans
*tp
,
293 struct xfs_cui_log_item
*cuip
;
298 cuip
= xfs_cui_init(tp
->t_mountp
, count
);
299 ASSERT(cuip
!= NULL
);
302 * Get a log_item_desc to point at the new item.
304 xfs_trans_add_item(tp
, &cuip
->cui_item
);
308 /* Set the phys extent flags for this reverse mapping. */
310 xfs_trans_set_refcount_flags(
311 struct xfs_phys_extent
*refc
,
312 enum xfs_refcount_intent_type type
)
316 case XFS_REFCOUNT_INCREASE
:
317 case XFS_REFCOUNT_DECREASE
:
318 case XFS_REFCOUNT_ALLOC_COW
:
319 case XFS_REFCOUNT_FREE_COW
:
320 refc
->pe_flags
|= type
;
327 /* Log refcount updates in the intent item. */
329 xfs_refcount_update_log_item(
330 struct xfs_trans
*tp
,
332 struct list_head
*item
)
334 struct xfs_cui_log_item
*cuip
= intent
;
335 struct xfs_refcount_intent
*refc
;
337 struct xfs_phys_extent
*ext
;
339 refc
= container_of(item
, struct xfs_refcount_intent
, ri_list
);
341 tp
->t_flags
|= XFS_TRANS_DIRTY
;
342 set_bit(XFS_LI_DIRTY
, &cuip
->cui_item
.li_flags
);
345 * atomic_inc_return gives us the value after the increment;
346 * we want to use it as an array index so we need to subtract 1 from
349 next_extent
= atomic_inc_return(&cuip
->cui_next_extent
) - 1;
350 ASSERT(next_extent
< cuip
->cui_format
.cui_nextents
);
351 ext
= &cuip
->cui_format
.cui_extents
[next_extent
];
352 ext
->pe_startblock
= refc
->ri_startblock
;
353 ext
->pe_len
= refc
->ri_blockcount
;
354 xfs_trans_set_refcount_flags(ext
, refc
->ri_type
);
357 /* Get an CUD so we can process all the deferred refcount updates. */
359 xfs_refcount_update_create_done(
360 struct xfs_trans
*tp
,
364 return xfs_trans_get_cud(tp
, intent
);
367 /* Process a deferred refcount update. */
369 xfs_refcount_update_finish_item(
370 struct xfs_trans
*tp
,
371 struct list_head
*item
,
375 struct xfs_refcount_intent
*refc
;
376 xfs_fsblock_t new_fsb
;
377 xfs_extlen_t new_aglen
;
380 refc
= container_of(item
, struct xfs_refcount_intent
, ri_list
);
381 error
= xfs_trans_log_finish_refcount_update(tp
, done_item
,
385 &new_fsb
, &new_aglen
,
386 (struct xfs_btree_cur
**)state
);
387 /* Did we run out of reservation? Requeue what we didn't finish. */
388 if (!error
&& new_aglen
> 0) {
389 ASSERT(refc
->ri_type
== XFS_REFCOUNT_INCREASE
||
390 refc
->ri_type
== XFS_REFCOUNT_DECREASE
);
391 refc
->ri_startblock
= new_fsb
;
392 refc
->ri_blockcount
= new_aglen
;
399 /* Clean up after processing deferred refcounts. */
401 xfs_refcount_update_finish_cleanup(
402 struct xfs_trans
*tp
,
406 struct xfs_btree_cur
*rcur
= state
;
408 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
411 /* Abort all pending CUIs. */
413 xfs_refcount_update_abort_intent(
416 xfs_cui_release(intent
);
419 /* Cancel a deferred refcount update. */
421 xfs_refcount_update_cancel_item(
422 struct list_head
*item
)
424 struct xfs_refcount_intent
*refc
;
426 refc
= container_of(item
, struct xfs_refcount_intent
, ri_list
);
430 const struct xfs_defer_op_type xfs_refcount_update_defer_type
= {
431 .max_items
= XFS_CUI_MAX_FAST_EXTENTS
,
432 .diff_items
= xfs_refcount_update_diff_items
,
433 .create_intent
= xfs_refcount_update_create_intent
,
434 .abort_intent
= xfs_refcount_update_abort_intent
,
435 .log_item
= xfs_refcount_update_log_item
,
436 .create_done
= xfs_refcount_update_create_done
,
437 .finish_item
= xfs_refcount_update_finish_item
,
438 .finish_cleanup
= xfs_refcount_update_finish_cleanup
,
439 .cancel_item
= xfs_refcount_update_cancel_item
,
443 * Process a refcount update intent item that was recovered from the log.
444 * We need to update the refcountbt.
448 struct xfs_trans
*parent_tp
,
449 struct xfs_cui_log_item
*cuip
)
453 unsigned int refc_type
;
454 struct xfs_phys_extent
*refc
;
455 xfs_fsblock_t startblock_fsb
;
457 struct xfs_cud_log_item
*cudp
;
458 struct xfs_trans
*tp
;
459 struct xfs_btree_cur
*rcur
= NULL
;
460 enum xfs_refcount_intent_type type
;
461 xfs_fsblock_t new_fsb
;
462 xfs_extlen_t new_len
;
463 struct xfs_bmbt_irec irec
;
464 bool requeue_only
= false;
465 struct xfs_mount
*mp
= parent_tp
->t_mountp
;
467 ASSERT(!test_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
));
470 * First check the validity of the extents described by the
471 * CUI. If any are bad, then assume that all are bad and
474 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
475 refc
= &cuip
->cui_format
.cui_extents
[i
];
476 startblock_fsb
= XFS_BB_TO_FSB(mp
,
477 XFS_FSB_TO_DADDR(mp
, refc
->pe_startblock
));
478 switch (refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
) {
479 case XFS_REFCOUNT_INCREASE
:
480 case XFS_REFCOUNT_DECREASE
:
481 case XFS_REFCOUNT_ALLOC_COW
:
482 case XFS_REFCOUNT_FREE_COW
:
489 if (!op_ok
|| startblock_fsb
== 0 ||
491 startblock_fsb
>= mp
->m_sb
.sb_dblocks
||
492 refc
->pe_len
>= mp
->m_sb
.sb_agblocks
||
493 (refc
->pe_flags
& ~XFS_REFCOUNT_EXTENT_FLAGS
)) {
495 * This will pull the CUI from the AIL and
496 * free the memory associated with it.
498 set_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
);
499 xfs_cui_release(cuip
);
500 return -EFSCORRUPTED
;
505 * Under normal operation, refcount updates are deferred, so we
506 * wouldn't be adding them directly to a transaction. All
507 * refcount updates manage reservation usage internally and
508 * dynamically by deferring work that won't fit in the
509 * transaction. Normally, any work that needs to be deferred
510 * gets attached to the same defer_ops that scheduled the
511 * refcount update. However, we're in log recovery here, so we
512 * we use the passed in defer_ops and to finish up any work that
513 * doesn't fit. We need to reserve enough blocks to handle a
514 * full btree split on either end of the refcount range.
516 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_itruncate
,
517 mp
->m_refc_maxlevels
* 2, 0, XFS_TRANS_RESERVE
, &tp
);
521 * Recovery stashes all deferred ops during intent processing and
522 * finishes them on completion. Transfer current dfops state to this
523 * transaction and transfer the result back before we return.
525 xfs_defer_move(tp
, parent_tp
);
526 cudp
= xfs_trans_get_cud(tp
, cuip
);
528 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
529 refc
= &cuip
->cui_format
.cui_extents
[i
];
530 refc_type
= refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
;
532 case XFS_REFCOUNT_INCREASE
:
533 case XFS_REFCOUNT_DECREASE
:
534 case XFS_REFCOUNT_ALLOC_COW
:
535 case XFS_REFCOUNT_FREE_COW
:
539 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, mp
);
540 error
= -EFSCORRUPTED
;
544 new_fsb
= refc
->pe_startblock
;
545 new_len
= refc
->pe_len
;
547 error
= xfs_trans_log_finish_refcount_update(tp
, cudp
,
548 type
, refc
->pe_startblock
, refc
->pe_len
,
549 &new_fsb
, &new_len
, &rcur
);
553 /* Requeue what we didn't finish. */
555 irec
.br_startblock
= new_fsb
;
556 irec
.br_blockcount
= new_len
;
558 case XFS_REFCOUNT_INCREASE
:
559 xfs_refcount_increase_extent(tp
, &irec
);
561 case XFS_REFCOUNT_DECREASE
:
562 xfs_refcount_decrease_extent(tp
, &irec
);
564 case XFS_REFCOUNT_ALLOC_COW
:
565 xfs_refcount_alloc_cow_extent(tp
,
569 case XFS_REFCOUNT_FREE_COW
:
570 xfs_refcount_free_cow_extent(tp
,
581 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
582 set_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
);
583 xfs_defer_move(parent_tp
, tp
);
584 error
= xfs_trans_commit(tp
);
588 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
589 xfs_defer_move(parent_tp
, tp
);
590 xfs_trans_cancel(tp
);