1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_buf_item.h"
18 #include "xfs_refcount_item.h"
20 #include "xfs_refcount.h"
23 kmem_zone_t
*xfs_cui_zone
;
24 kmem_zone_t
*xfs_cud_zone
;
26 static inline struct xfs_cui_log_item
*CUI_ITEM(struct xfs_log_item
*lip
)
28 return container_of(lip
, struct xfs_cui_log_item
, cui_item
);
33 struct xfs_cui_log_item
*cuip
)
35 if (cuip
->cui_format
.cui_nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
38 kmem_zone_free(xfs_cui_zone
, cuip
);
42 * Freeing the CUI requires that we remove it from the AIL if it has already
43 * been placed there. However, the CUI may not yet have been placed in the AIL
44 * when called by xfs_cui_release() from CUD processing due to the ordering of
45 * committed vs unpin operations in bulk insert operations. Hence the reference
46 * count to ensure only the last caller frees the CUI.
50 struct xfs_cui_log_item
*cuip
)
52 ASSERT(atomic_read(&cuip
->cui_refcount
) > 0);
53 if (atomic_dec_and_test(&cuip
->cui_refcount
)) {
54 xfs_trans_ail_remove(&cuip
->cui_item
, SHUTDOWN_LOG_IO_ERROR
);
55 xfs_cui_item_free(cuip
);
62 struct xfs_log_item
*lip
,
66 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
69 *nbytes
+= xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
);
73 * This is called to fill in the vector of log iovecs for the
74 * given cui log item. We use only 1 iovec, and we point that
75 * at the cui_log_format structure embedded in the cui item.
76 * It is at this point that we assert that all of the extent
77 * slots in the cui item have been filled.
81 struct xfs_log_item
*lip
,
82 struct xfs_log_vec
*lv
)
84 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
85 struct xfs_log_iovec
*vecp
= NULL
;
87 ASSERT(atomic_read(&cuip
->cui_next_extent
) ==
88 cuip
->cui_format
.cui_nextents
);
90 cuip
->cui_format
.cui_type
= XFS_LI_CUI
;
91 cuip
->cui_format
.cui_size
= 1;
93 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUI_FORMAT
, &cuip
->cui_format
,
94 xfs_cui_log_format_sizeof(cuip
->cui_format
.cui_nextents
));
98 * Pinning has no meaning for an cui item, so just return.
102 struct xfs_log_item
*lip
)
107 * The unpin operation is the last place an CUI is manipulated in the log. It is
108 * either inserted in the AIL or aborted in the event of a log I/O error. In
109 * either case, the CUI transaction has been successfully committed to make it
110 * this far. Therefore, we expect whoever committed the CUI to either construct
111 * and commit the CUD or drop the CUD's reference in the event of error. Simply
112 * drop the log's CUI reference now that the log is done with it.
116 struct xfs_log_item
*lip
,
119 struct xfs_cui_log_item
*cuip
= CUI_ITEM(lip
);
121 xfs_cui_release(cuip
);
125 * CUI items have no locking or pushing. However, since CUIs are pulled from
126 * the AIL when their corresponding CUDs are committed to disk, their situation
127 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
128 * will eventually flush the log. This should help in getting the CUI out of
133 struct xfs_log_item
*lip
,
134 struct list_head
*buffer_list
)
136 return XFS_ITEM_PINNED
;
140 * The CUI has been either committed or aborted if the transaction has been
141 * cancelled. If the transaction was cancelled, an CUD isn't going to be
142 * constructed and thus we free the CUI here directly.
146 struct xfs_log_item
*lip
)
148 if (test_bit(XFS_LI_ABORTED
, &lip
->li_flags
))
149 xfs_cui_release(CUI_ITEM(lip
));
153 * The CUI is logged only once and cannot be moved in the log, so simply return
154 * the lsn at which it's been logged.
157 xfs_cui_item_committed(
158 struct xfs_log_item
*lip
,
165 * The CUI dependency tracking op doesn't do squat. It can't because
166 * it doesn't know where the free extent is coming from. The dependency
167 * tracking has to be handled by the "enclosing" metadata object. For
168 * example, for inodes, the inode is locked throughout the extent freeing
169 * so the dependency should be recorded there.
172 xfs_cui_item_committing(
173 struct xfs_log_item
*lip
,
179 * This is the ops vector shared by all cui log items.
181 static const struct xfs_item_ops xfs_cui_item_ops
= {
182 .iop_size
= xfs_cui_item_size
,
183 .iop_format
= xfs_cui_item_format
,
184 .iop_pin
= xfs_cui_item_pin
,
185 .iop_unpin
= xfs_cui_item_unpin
,
186 .iop_unlock
= xfs_cui_item_unlock
,
187 .iop_committed
= xfs_cui_item_committed
,
188 .iop_push
= xfs_cui_item_push
,
189 .iop_committing
= xfs_cui_item_committing
,
193 * Allocate and initialize an cui item with the given number of extents.
195 struct xfs_cui_log_item
*
197 struct xfs_mount
*mp
,
201 struct xfs_cui_log_item
*cuip
;
203 ASSERT(nextents
> 0);
204 if (nextents
> XFS_CUI_MAX_FAST_EXTENTS
)
205 cuip
= kmem_zalloc(xfs_cui_log_item_sizeof(nextents
),
208 cuip
= kmem_zone_zalloc(xfs_cui_zone
, KM_SLEEP
);
210 xfs_log_item_init(mp
, &cuip
->cui_item
, XFS_LI_CUI
, &xfs_cui_item_ops
);
211 cuip
->cui_format
.cui_nextents
= nextents
;
212 cuip
->cui_format
.cui_id
= (uintptr_t)(void *)cuip
;
213 atomic_set(&cuip
->cui_next_extent
, 0);
214 atomic_set(&cuip
->cui_refcount
, 2);
219 static inline struct xfs_cud_log_item
*CUD_ITEM(struct xfs_log_item
*lip
)
221 return container_of(lip
, struct xfs_cud_log_item
, cud_item
);
226 struct xfs_log_item
*lip
,
231 *nbytes
+= sizeof(struct xfs_cud_log_format
);
235 * This is called to fill in the vector of log iovecs for the
236 * given cud log item. We use only 1 iovec, and we point that
237 * at the cud_log_format structure embedded in the cud item.
238 * It is at this point that we assert that all of the extent
239 * slots in the cud item have been filled.
243 struct xfs_log_item
*lip
,
244 struct xfs_log_vec
*lv
)
246 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
247 struct xfs_log_iovec
*vecp
= NULL
;
249 cudp
->cud_format
.cud_type
= XFS_LI_CUD
;
250 cudp
->cud_format
.cud_size
= 1;
252 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_CUD_FORMAT
, &cudp
->cud_format
,
253 sizeof(struct xfs_cud_log_format
));
257 * Pinning has no meaning for an cud item, so just return.
261 struct xfs_log_item
*lip
)
266 * Since pinning has no meaning for an cud item, unpinning does
271 struct xfs_log_item
*lip
,
277 * There isn't much you can do to push on an cud item. It is simply stuck
278 * waiting for the log to be flushed to disk.
282 struct xfs_log_item
*lip
,
283 struct list_head
*buffer_list
)
285 return XFS_ITEM_PINNED
;
289 * The CUD is either committed or aborted if the transaction is cancelled. If
290 * the transaction is cancelled, drop our reference to the CUI and free the
295 struct xfs_log_item
*lip
)
297 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
299 if (test_bit(XFS_LI_ABORTED
, &lip
->li_flags
)) {
300 xfs_cui_release(cudp
->cud_cuip
);
301 kmem_zone_free(xfs_cud_zone
, cudp
);
306 * When the cud item is committed to disk, all we need to do is delete our
307 * reference to our partner cui item and then free ourselves. Since we're
308 * freeing ourselves we must return -1 to keep the transaction code from
309 * further referencing this item.
312 xfs_cud_item_committed(
313 struct xfs_log_item
*lip
,
316 struct xfs_cud_log_item
*cudp
= CUD_ITEM(lip
);
319 * Drop the CUI reference regardless of whether the CUD has been
320 * aborted. Once the CUD transaction is constructed, it is the sole
321 * responsibility of the CUD to release the CUI (even if the CUI is
322 * aborted due to log I/O error).
324 xfs_cui_release(cudp
->cud_cuip
);
325 kmem_zone_free(xfs_cud_zone
, cudp
);
327 return (xfs_lsn_t
)-1;
331 * The CUD dependency tracking op doesn't do squat. It can't because
332 * it doesn't know where the free extent is coming from. The dependency
333 * tracking has to be handled by the "enclosing" metadata object. For
334 * example, for inodes, the inode is locked throughout the extent freeing
335 * so the dependency should be recorded there.
338 xfs_cud_item_committing(
339 struct xfs_log_item
*lip
,
345 * This is the ops vector shared by all cud log items.
347 static const struct xfs_item_ops xfs_cud_item_ops
= {
348 .iop_size
= xfs_cud_item_size
,
349 .iop_format
= xfs_cud_item_format
,
350 .iop_pin
= xfs_cud_item_pin
,
351 .iop_unpin
= xfs_cud_item_unpin
,
352 .iop_unlock
= xfs_cud_item_unlock
,
353 .iop_committed
= xfs_cud_item_committed
,
354 .iop_push
= xfs_cud_item_push
,
355 .iop_committing
= xfs_cud_item_committing
,
359 * Allocate and initialize an cud item with the given number of extents.
361 struct xfs_cud_log_item
*
363 struct xfs_mount
*mp
,
364 struct xfs_cui_log_item
*cuip
)
367 struct xfs_cud_log_item
*cudp
;
369 cudp
= kmem_zone_zalloc(xfs_cud_zone
, KM_SLEEP
);
370 xfs_log_item_init(mp
, &cudp
->cud_item
, XFS_LI_CUD
, &xfs_cud_item_ops
);
371 cudp
->cud_cuip
= cuip
;
372 cudp
->cud_format
.cud_cui_id
= cuip
->cui_format
.cui_id
;
378 * Process a refcount update intent item that was recovered from the log.
379 * We need to update the refcountbt.
383 struct xfs_trans
*parent_tp
,
384 struct xfs_cui_log_item
*cuip
)
388 unsigned int refc_type
;
389 struct xfs_phys_extent
*refc
;
390 xfs_fsblock_t startblock_fsb
;
392 struct xfs_cud_log_item
*cudp
;
393 struct xfs_trans
*tp
;
394 struct xfs_btree_cur
*rcur
= NULL
;
395 enum xfs_refcount_intent_type type
;
396 xfs_fsblock_t new_fsb
;
397 xfs_extlen_t new_len
;
398 struct xfs_bmbt_irec irec
;
399 bool requeue_only
= false;
400 struct xfs_mount
*mp
= parent_tp
->t_mountp
;
402 ASSERT(!test_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
));
405 * First check the validity of the extents described by the
406 * CUI. If any are bad, then assume that all are bad and
409 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
410 refc
= &cuip
->cui_format
.cui_extents
[i
];
411 startblock_fsb
= XFS_BB_TO_FSB(mp
,
412 XFS_FSB_TO_DADDR(mp
, refc
->pe_startblock
));
413 switch (refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
) {
414 case XFS_REFCOUNT_INCREASE
:
415 case XFS_REFCOUNT_DECREASE
:
416 case XFS_REFCOUNT_ALLOC_COW
:
417 case XFS_REFCOUNT_FREE_COW
:
424 if (!op_ok
|| startblock_fsb
== 0 ||
426 startblock_fsb
>= mp
->m_sb
.sb_dblocks
||
427 refc
->pe_len
>= mp
->m_sb
.sb_agblocks
||
428 (refc
->pe_flags
& ~XFS_REFCOUNT_EXTENT_FLAGS
)) {
430 * This will pull the CUI from the AIL and
431 * free the memory associated with it.
433 set_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
);
434 xfs_cui_release(cuip
);
440 * Under normal operation, refcount updates are deferred, so we
441 * wouldn't be adding them directly to a transaction. All
442 * refcount updates manage reservation usage internally and
443 * dynamically by deferring work that won't fit in the
444 * transaction. Normally, any work that needs to be deferred
445 * gets attached to the same defer_ops that scheduled the
446 * refcount update. However, we're in log recovery here, so we
447 * we use the passed in defer_ops and to finish up any work that
448 * doesn't fit. We need to reserve enough blocks to handle a
449 * full btree split on either end of the refcount range.
451 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_itruncate
,
452 mp
->m_refc_maxlevels
* 2, 0, XFS_TRANS_RESERVE
, &tp
);
456 * Recovery stashes all deferred ops during intent processing and
457 * finishes them on completion. Transfer current dfops state to this
458 * transaction and transfer the result back before we return.
460 xfs_defer_move(tp
, parent_tp
);
461 cudp
= xfs_trans_get_cud(tp
, cuip
);
463 for (i
= 0; i
< cuip
->cui_format
.cui_nextents
; i
++) {
464 refc
= &cuip
->cui_format
.cui_extents
[i
];
465 refc_type
= refc
->pe_flags
& XFS_REFCOUNT_EXTENT_TYPE_MASK
;
467 case XFS_REFCOUNT_INCREASE
:
468 case XFS_REFCOUNT_DECREASE
:
469 case XFS_REFCOUNT_ALLOC_COW
:
470 case XFS_REFCOUNT_FREE_COW
:
474 error
= -EFSCORRUPTED
;
478 new_fsb
= refc
->pe_startblock
;
479 new_len
= refc
->pe_len
;
481 error
= xfs_trans_log_finish_refcount_update(tp
, cudp
,
482 type
, refc
->pe_startblock
, refc
->pe_len
,
483 &new_fsb
, &new_len
, &rcur
);
487 /* Requeue what we didn't finish. */
489 irec
.br_startblock
= new_fsb
;
490 irec
.br_blockcount
= new_len
;
492 case XFS_REFCOUNT_INCREASE
:
493 error
= xfs_refcount_increase_extent(tp
, &irec
);
495 case XFS_REFCOUNT_DECREASE
:
496 error
= xfs_refcount_decrease_extent(tp
, &irec
);
498 case XFS_REFCOUNT_ALLOC_COW
:
499 error
= xfs_refcount_alloc_cow_extent(tp
,
503 case XFS_REFCOUNT_FREE_COW
:
504 error
= xfs_refcount_free_cow_extent(tp
,
517 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
518 set_bit(XFS_CUI_RECOVERED
, &cuip
->cui_flags
);
519 xfs_defer_move(parent_tp
, tp
);
520 error
= xfs_trans_commit(tp
);
524 xfs_refcount_finish_one_cleanup(tp
, rcur
, error
);
525 xfs_defer_move(parent_tp
, tp
);
526 xfs_trans_cancel(tp
);