1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_refcount_item.h"
19 #include "xfs_refcount.h"
20 #include "xfs_error.h"
21 #include "xfs_log_priv.h"
22 #include "xfs_log_recover.h"
24 #include "xfs_btree.h"
25 #include "xfs_trace.h"
27 struct kmem_cache
*xfs_cui_cache
;
28 struct kmem_cache
*xfs_cud_cache
;
30 static const struct xfs_item_ops xfs_cui_item_ops
;
32 static inline struct xfs_cui_log_item
*CUI_ITEM(struct xfs_log_item
*lip
)
34 return container_of(lip
, struct xfs_cui_log_item
, cui_item
);
39 struct xfs_cui_log_item
*cuip
)
41 kvfree(cuip
->cui_item
.li_lv_shadow
);
42 if (cuip
->cui_format
.cui_nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
45 kmem_cache_free(xfs_cui_cache
, cuip
);
49 * Freeing the CUI requires that we remove it from the AIL if it has already
50 * been placed there. However, the CUI may not yet have been placed in the AIL
51 * when called by xfs_cui_release() from CUD processing due to the ordering of
52 * committed vs unpin operations in bulk insert operations. Hence the reference
53 * count to ensure only the last caller frees the CUI.
57 struct xfs_cui_log_item
*cuip
)
59 ASSERT(atomic_read(&cuip
->cui_refcount
) > 0);
60 if (!atomic_dec_and_test(&cuip
->cui_refcount
))
63 xfs_trans_ail_delete(&cuip
->cui_item
, 0);
64 xfs_cui_item_free(cuip
);
70 struct xfs_log_item
*lip
,
74 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
77 *nbytes
+= xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
);
81 * This is called to fill in the vector of log iovecs for the
82 * given cui log item. We use only 1 iovec, and we point that
83 * at the cui_log_format structure embedded in the cui item.
84 * It is at this point that we assert that all of the extent
85 * slots in the cui item have been filled.
89 struct xfs_log_item
*lip
,
90 struct xfs_log_vec
*lv
)
92 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
93 struct xfs_log_iovec
*vecp
= NULL
;
95 ASSERT(atomic_read(&cuip
->cui_next_extent
) ==
96 cuip
->cui_format
.cui_nextents
);
98 cuip
->cui_format
.cui_type
= XFS_LI_CUI
;
99 cuip
->cui_format
.cui_size
= 1;
101 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUI_FORMAT
, &cuip
->cui_format
,
102 xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
));
106 * The unpin operation is the last place an CUI is manipulated in the log. It is
107 * either inserted in the AIL or aborted in the event of a log I/O error. In
108 * either case, the CUI transaction has been successfully committed to make it
109 * this far. Therefore, we expect whoever committed the CUI to either construct
110 * and commit the CUD or drop the CUD's reference in the event of error. Simply
111 * drop the log's CUI reference now that the log is done with it.
115 struct xfs_log_item
*lip
,
118 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
120 xfs_cui_release(cuip
);
124 * The CUI has been either committed or aborted if the transaction has been
125 * cancelled. If the transaction was cancelled, an CUD isn't going to be
126 * constructed and thus we free the CUI here directly.
129 xfs_cui_item_release(
130 struct xfs_log_item
*lip
)
132 xfs_cui_release(CUI_ITEM(lip
));
136 * Allocate and initialize an cui item with the given number of extents.
138 STATIC
struct xfs_cui_log_item
*
140 struct xfs_mount
*mp
,
144 struct xfs_cui_log_item
*cuip
;
146 ASSERT(nextents
> 0);
147 if (nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
148 cuip
= kzalloc(xfs_cui_log_item_sizeof(nextents
),
149 GFP_KERNEL
| __GFP_NOFAIL
);
151 cuip
= kmem_cache_zalloc(xfs_cui_cache
,
152 GFP_KERNEL
| __GFP_NOFAIL
);
154 xfs_log_item_init(mp
, &cuip
->cui_item
, XFS_LI_CUI
, &xfs_cui_item_ops
);
155 cuip
->cui_format
.cui_nextents
= nextents
;
156 cuip
->cui_format
.cui_id
= (uintptr_t)(void *)cuip
;
157 atomic_set(&cuip
->cui_next_extent
, 0);
158 atomic_set(&cuip
->cui_refcount
, 2);
163 static inline struct xfs_cud_log_item
*CUD_ITEM(struct xfs_log_item
*lip
)
165 return container_of(lip
, struct xfs_cud_log_item
, cud_item
);
170 struct xfs_log_item
*lip
,
175 *nbytes
+= sizeof(struct xfs_cud_log_format
);
179 * This is called to fill in the vector of log iovecs for the
180 * given cud log item. We use only 1 iovec, and we point that
181 * at the cud_log_format structure embedded in the cud item.
182 * It is at this point that we assert that all of the extent
183 * slots in the cud item have been filled.
187 struct xfs_log_item
*lip
,
188 struct xfs_log_vec
*lv
)
190 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
191 struct xfs_log_iovec
*vecp
= NULL
;
193 cudp
->cud_format
.cud_type
= XFS_LI_CUD
;
194 cudp
->cud_format
.cud_size
= 1;
196 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUD_FORMAT
, &cudp
->cud_format
,
197 sizeof(struct xfs_cud_log_format
));
201 * The CUD is either committed or aborted if the transaction is cancelled. If
202 * the transaction is cancelled, drop our reference to the CUI and free the
206 xfs_cud_item_release(
207 struct xfs_log_item
*lip
)
209 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
211 xfs_cui_release(cudp
->cud_cuip
);
212 kvfree(cudp
->cud_item
.li_lv_shadow
);
213 kmem_cache_free(xfs_cud_cache
, cudp
);
216 static struct xfs_log_item
*
218 struct xfs_log_item
*lip
)
220 return &CUD_ITEM(lip
)->cud_cuip
->cui_item
;
223 static const struct xfs_item_ops xfs_cud_item_ops
= {
224 .flags
= XFS_ITEM_RELEASE_WHEN_COMMITTED
|
225 XFS_ITEM_INTENT_DONE
,
226 .iop_size
= xfs_cud_item_size
,
227 .iop_format
= xfs_cud_item_format
,
228 .iop_release
= xfs_cud_item_release
,
229 .iop_intent
= xfs_cud_item_intent
,
232 static inline struct xfs_refcount_intent
*ci_entry(const struct list_head
*e
)
234 return list_entry(e
, struct xfs_refcount_intent
, ri_list
);
237 /* Sort refcount intents by AG. */
239 xfs_refcount_update_diff_items(
241 const struct list_head
*a
,
242 const struct list_head
*b
)
244 struct xfs_refcount_intent
*ra
= ci_entry(a
);
245 struct xfs_refcount_intent
*rb
= ci_entry(b
);
247 return ra
->ri_group
->xg_gno
- rb
->ri_group
->xg_gno
;
250 /* Log refcount updates in the intent item. */
252 xfs_refcount_update_log_item(
253 struct xfs_trans
*tp
,
254 struct xfs_cui_log_item
*cuip
,
255 struct xfs_refcount_intent
*ri
)
258 struct xfs_phys_extent
*pmap
;
261 * atomic_inc_return gives us the value after the increment;
262 * we want to use it as an array index so we need to subtract 1 from
265 next_extent
= atomic_inc_return(&cuip
->cui_next_extent
) - 1;
266 ASSERT(next_extent
< cuip
->cui_format
.cui_nextents
);
267 pmap
= &cuip
->cui_format
.cui_extents
[next_extent
];
268 pmap
->pe_startblock
= ri
->ri_startblock
;
269 pmap
->pe_len
= ri
->ri_blockcount
;
272 switch (ri
->ri_type
) {
273 case XFS_REFCOUNT_INCREASE
:
274 case XFS_REFCOUNT_DECREASE
:
275 case XFS_REFCOUNT_ALLOC_COW
:
276 case XFS_REFCOUNT_FREE_COW
:
277 pmap
->pe_flags
|= ri
->ri_type
;
284 static struct xfs_log_item
*
285 xfs_refcount_update_create_intent(
286 struct xfs_trans
*tp
,
287 struct list_head
*items
,
291 struct xfs_mount
*mp
= tp
->t_mountp
;
292 struct xfs_cui_log_item
*cuip
= xfs_cui_init(mp
, count
);
293 struct xfs_refcount_intent
*ri
;
298 list_sort(mp
, items
, xfs_refcount_update_diff_items
);
299 list_for_each_entry(ri
, items
, ri_list
)
300 xfs_refcount_update_log_item(tp
, cuip
, ri
);
301 return &cuip
->cui_item
;
304 /* Get an CUD so we can process all the deferred refcount updates. */
305 static struct xfs_log_item
*
306 xfs_refcount_update_create_done(
307 struct xfs_trans
*tp
,
308 struct xfs_log_item
*intent
,
311 struct xfs_cui_log_item
*cuip
= CUI_ITEM(intent
);
312 struct xfs_cud_log_item
*cudp
;
314 cudp
= kmem_cache_zalloc(xfs_cud_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
315 xfs_log_item_init(tp
->t_mountp
, &cudp
->cud_item
, XFS_LI_CUD
,
317 cudp
->cud_cuip
= cuip
;
318 cudp
->cud_format
.cud_cui_id
= cuip
->cui_format
.cui_id
;
320 return &cudp
->cud_item
;
323 /* Add this deferred CUI to the transaction. */
325 xfs_refcount_defer_add(
326 struct xfs_trans
*tp
,
327 struct xfs_refcount_intent
*ri
)
329 struct xfs_mount
*mp
= tp
->t_mountp
;
331 trace_xfs_refcount_defer(mp
, ri
);
333 ri
->ri_group
= xfs_group_intent_get(mp
, ri
->ri_startblock
, XG_TYPE_AG
);
334 xfs_defer_add(tp
, &ri
->ri_list
, &xfs_refcount_update_defer_type
);
337 /* Cancel a deferred refcount update. */
339 xfs_refcount_update_cancel_item(
340 struct list_head
*item
)
342 struct xfs_refcount_intent
*ri
= ci_entry(item
);
344 xfs_group_intent_put(ri
->ri_group
);
345 kmem_cache_free(xfs_refcount_intent_cache
, ri
);
348 /* Process a deferred refcount update. */
350 xfs_refcount_update_finish_item(
351 struct xfs_trans
*tp
,
352 struct xfs_log_item
*done
,
353 struct list_head
*item
,
354 struct xfs_btree_cur
**state
)
356 struct xfs_refcount_intent
*ri
= ci_entry(item
);
359 /* Did we run out of reservation? Requeue what we didn't finish. */
360 error
= xfs_refcount_finish_one(tp
, ri
, state
);
361 if (!error
&& ri
->ri_blockcount
> 0) {
362 ASSERT(ri
->ri_type
== XFS_REFCOUNT_INCREASE
||
363 ri
->ri_type
== XFS_REFCOUNT_DECREASE
);
367 xfs_refcount_update_cancel_item(item
);
371 /* Clean up after calling xfs_refcount_finish_one. */
373 xfs_refcount_finish_one_cleanup(
374 struct xfs_trans
*tp
,
375 struct xfs_btree_cur
*rcur
,
378 struct xfs_buf
*agbp
;
382 agbp
= rcur
->bc_ag
.agbp
;
383 xfs_btree_del_cursor(rcur
, error
);
385 xfs_trans_brelse(tp
, agbp
);
388 /* Abort all pending CUIs. */
390 xfs_refcount_update_abort_intent(
391 struct xfs_log_item
*intent
)
393 xfs_cui_release(CUI_ITEM(intent
));
396 /* Is this recovered CUI ok? */
398 xfs_cui_validate_phys(
399 struct xfs_mount
*mp
,
400 struct xfs_phys_extent
*pmap
)
402 if (!xfs_has_reflink(mp
))
405 if (pmap
->pe_flags
& ~XFS_REFCOUNT_EXTENT_FLAGS
)
408 switch (pmap
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
) {
409 case XFS_REFCOUNT_INCREASE
:
410 case XFS_REFCOUNT_DECREASE
:
411 case XFS_REFCOUNT_ALLOC_COW
:
412 case XFS_REFCOUNT_FREE_COW
:
418 return xfs_verify_fsbext(mp
, pmap
->pe_startblock
, pmap
->pe_len
);
422 xfs_cui_recover_work(
423 struct xfs_mount
*mp
,
424 struct xfs_defer_pending
*dfp
,
425 struct xfs_phys_extent
*pmap
)
427 struct xfs_refcount_intent
*ri
;
429 ri
= kmem_cache_alloc(xfs_refcount_intent_cache
,
430 GFP_KERNEL
| __GFP_NOFAIL
);
431 ri
->ri_type
= pmap
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
;
432 ri
->ri_startblock
= pmap
->pe_startblock
;
433 ri
->ri_blockcount
= pmap
->pe_len
;
434 ri
->ri_group
= xfs_group_intent_get(mp
, pmap
->pe_startblock
,
437 xfs_defer_add_item(dfp
, &ri
->ri_list
);
441 * Process a refcount update intent item that was recovered from the log.
442 * We need to update the refcountbt.
445 xfs_refcount_recover_work(
446 struct xfs_defer_pending
*dfp
,
447 struct list_head
*capture_list
)
449 struct xfs_trans_res resv
;
450 struct xfs_log_item
*lip
= dfp
->dfp_intent
;
451 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
452 struct xfs_trans
*tp
;
453 struct xfs_mount
*mp
= lip
->li_log
->l_mp
;
458 * First check the validity of the extents described by the
459 * CUI. If any are bad, then assume that all are bad and
462 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
463 if (!xfs_cui_validate_phys(mp
,
464 &cuip
->cui_format
.cui_extents
[i
])) {
465 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
467 sizeof(cuip
->cui_format
));
468 return -EFSCORRUPTED
;
471 xfs_cui_recover_work(mp
, dfp
, &cuip
->cui_format
.cui_extents
[i
]);
475 * Under normal operation, refcount updates are deferred, so we
476 * wouldn't be adding them directly to a transaction. All
477 * refcount updates manage reservation usage internally and
478 * dynamically by deferring work that won't fit in the
479 * transaction. Normally, any work that needs to be deferred
480 * gets attached to the same defer_ops that scheduled the
481 * refcount update. However, we're in log recovery here, so we
482 * use the passed in defer_ops and to finish up any work that
483 * doesn't fit. We need to reserve enough blocks to handle a
484 * full btree split on either end of the refcount range.
486 resv
= xlog_recover_resv(&M_RES(mp
)->tr_itruncate
);
487 error
= xfs_trans_alloc(mp
, &resv
, mp
->m_refc_maxlevels
* 2, 0,
488 XFS_TRANS_RESERVE
, &tp
);
492 error
= xlog_recover_finish_intent(tp
, dfp
);
493 if (error
== -EFSCORRUPTED
)
494 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
496 sizeof(cuip
->cui_format
));
500 return xfs_defer_ops_capture_and_commit(tp
, capture_list
);
503 xfs_trans_cancel(tp
);
507 /* Relog an intent item to push the log tail forward. */
508 static struct xfs_log_item
*
509 xfs_refcount_relog_intent(
510 struct xfs_trans
*tp
,
511 struct xfs_log_item
*intent
,
512 struct xfs_log_item
*done_item
)
514 struct xfs_cui_log_item
*cuip
;
515 struct xfs_phys_extent
*pmap
;
518 count
= CUI_ITEM(intent
)->cui_format
.cui_nextents
;
519 pmap
= CUI_ITEM(intent
)->cui_format
.cui_extents
;
521 cuip
= xfs_cui_init(tp
->t_mountp
, count
);
522 memcpy(cuip
->cui_format
.cui_extents
, pmap
, count
* sizeof(*pmap
));
523 atomic_set(&cuip
->cui_next_extent
, count
);
525 return &cuip
->cui_item
;
528 const struct xfs_defer_op_type xfs_refcount_update_defer_type
= {
530 .max_items
= XFS_CUI_MAX_FAST_EXTENTS
,
531 .create_intent
= xfs_refcount_update_create_intent
,
532 .abort_intent
= xfs_refcount_update_abort_intent
,
533 .create_done
= xfs_refcount_update_create_done
,
534 .finish_item
= xfs_refcount_update_finish_item
,
535 .finish_cleanup
= xfs_refcount_finish_one_cleanup
,
536 .cancel_item
= xfs_refcount_update_cancel_item
,
537 .recover_work
= xfs_refcount_recover_work
,
538 .relog_intent
= xfs_refcount_relog_intent
,
543 struct xfs_log_item
*lip
,
546 return CUI_ITEM(lip
)->cui_format
.cui_id
== intent_id
;
549 static const struct xfs_item_ops xfs_cui_item_ops
= {
550 .flags
= XFS_ITEM_INTENT
,
551 .iop_size
= xfs_cui_item_size
,
552 .iop_format
= xfs_cui_item_format
,
553 .iop_unpin
= xfs_cui_item_unpin
,
554 .iop_release
= xfs_cui_item_release
,
555 .iop_match
= xfs_cui_item_match
,
560 struct xfs_cui_log_format
*dst
,
561 const struct xfs_cui_log_format
*src
)
565 memcpy(dst
, src
, offsetof(struct xfs_cui_log_format
, cui_extents
));
567 for (i
= 0; i
< src
->cui_nextents
; i
++)
568 memcpy(&dst
->cui_extents
[i
], &src
->cui_extents
[i
],
569 sizeof(struct xfs_phys_extent
));
573 * This routine is called to create an in-core extent refcount update
574 * item from the cui format structure which was logged on disk.
575 * It allocates an in-core cui, copies the extents from the format
576 * structure into it, and adds the cui to the AIL with the given
580 xlog_recover_cui_commit_pass2(
582 struct list_head
*buffer_list
,
583 struct xlog_recover_item
*item
,
586 struct xfs_mount
*mp
= log
->l_mp
;
587 struct xfs_cui_log_item
*cuip
;
588 struct xfs_cui_log_format
*cui_formatp
;
591 cui_formatp
= item
->ri_buf
[0].i_addr
;
593 if (item
->ri_buf
[0].i_len
< xfs_cui_log_format_sizeof(0)) {
594 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
595 item
->ri_buf
[0].i_addr
, item
->ri_buf
[0].i_len
);
596 return -EFSCORRUPTED
;
599 len
= xfs_cui_log_format_sizeof(cui_formatp
->cui_nextents
);
600 if (item
->ri_buf
[0].i_len
!= len
) {
601 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
602 item
->ri_buf
[0].i_addr
, item
->ri_buf
[0].i_len
);
603 return -EFSCORRUPTED
;
606 cuip
= xfs_cui_init(mp
, cui_formatp
->cui_nextents
);
607 xfs_cui_copy_format(&cuip
->cui_format
, cui_formatp
);
608 atomic_set(&cuip
->cui_next_extent
, cui_formatp
->cui_nextents
);
610 xlog_recover_intent_item(log
, &cuip
->cui_item
, lsn
,
611 &xfs_refcount_update_defer_type
);
615 const struct xlog_recover_item_ops xlog_cui_item_ops
= {
616 .item_type
= XFS_LI_CUI
,
617 .commit_pass2
= xlog_recover_cui_commit_pass2
,
621 * This routine is called when an CUD format structure is found in a committed
622 * transaction in the log. Its purpose is to cancel the corresponding CUI if it
623 * was still in the log. To do this it searches the AIL for the CUI with an id
624 * equal to that in the CUD format structure. If we find it we drop the CUD
625 * reference, which removes the CUI from the AIL and frees it.
628 xlog_recover_cud_commit_pass2(
630 struct list_head
*buffer_list
,
631 struct xlog_recover_item
*item
,
634 struct xfs_cud_log_format
*cud_formatp
;
636 cud_formatp
= item
->ri_buf
[0].i_addr
;
637 if (item
->ri_buf
[0].i_len
!= sizeof(struct xfs_cud_log_format
)) {
638 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
,
639 item
->ri_buf
[0].i_addr
, item
->ri_buf
[0].i_len
);
640 return -EFSCORRUPTED
;
643 xlog_recover_release_intent(log
, XFS_LI_CUI
, cud_formatp
->cud_cui_id
);
647 const struct xlog_recover_item_ops xlog_cud_item_ops
= {
648 .item_type
= XFS_LI_CUD
,
649 .commit_pass2
= xlog_recover_cud_commit_pass2
,