1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_trans.h"
16 #include "xfs_trans_priv.h"
17 #include "xfs_rmap_item.h"
20 #include "xfs_error.h"
21 #include "xfs_log_priv.h"
22 #include "xfs_log_recover.h"
24 #include "xfs_btree.h"
25 #include "xfs_trace.h"
27 struct kmem_cache
*xfs_rui_cache
;
28 struct kmem_cache
*xfs_rud_cache
;
30 static const struct xfs_item_ops xfs_rui_item_ops
;
32 static inline struct xfs_rui_log_item
*RUI_ITEM(struct xfs_log_item
*lip
)
34 return container_of(lip
, struct xfs_rui_log_item
, rui_item
);
39 struct xfs_rui_log_item
*ruip
)
41 kvfree(ruip
->rui_item
.li_lv_shadow
);
42 if (ruip
->rui_format
.rui_nextents
> XFS_RUI_MAX_FAST_EXTENTS
)
45 kmem_cache_free(xfs_rui_cache
, ruip
);
49 * Freeing the RUI requires that we remove it from the AIL if it has already
50 * been placed there. However, the RUI may not yet have been placed in the AIL
51 * when called by xfs_rui_release() from RUD processing due to the ordering of
52 * committed vs unpin operations in bulk insert operations. Hence the reference
53 * count to ensure only the last caller frees the RUI.
57 struct xfs_rui_log_item
*ruip
)
59 ASSERT(atomic_read(&ruip
->rui_refcount
) > 0);
60 if (!atomic_dec_and_test(&ruip
->rui_refcount
))
63 xfs_trans_ail_delete(&ruip
->rui_item
, 0);
64 xfs_rui_item_free(ruip
);
69 struct xfs_log_item
*lip
,
73 struct xfs_rui_log_item
*ruip
= RUI_ITEM(lip
);
76 *nbytes
+= xfs_rui_log_format_sizeof(ruip
->rui_format
.rui_nextents
);
80 * This is called to fill in the vector of log iovecs for the
81 * given rui log item. We use only 1 iovec, and we point that
82 * at the rui_log_format structure embedded in the rui item.
83 * It is at this point that we assert that all of the extent
84 * slots in the rui item have been filled.
88 struct xfs_log_item
*lip
,
89 struct xfs_log_vec
*lv
)
91 struct xfs_rui_log_item
*ruip
= RUI_ITEM(lip
);
92 struct xfs_log_iovec
*vecp
= NULL
;
94 ASSERT(atomic_read(&ruip
->rui_next_extent
) ==
95 ruip
->rui_format
.rui_nextents
);
97 ruip
->rui_format
.rui_type
= XFS_LI_RUI
;
98 ruip
->rui_format
.rui_size
= 1;
100 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_RUI_FORMAT
, &ruip
->rui_format
,
101 xfs_rui_log_format_sizeof(ruip
->rui_format
.rui_nextents
));
105 * The unpin operation is the last place an RUI is manipulated in the log. It is
106 * either inserted in the AIL or aborted in the event of a log I/O error. In
107 * either case, the RUI transaction has been successfully committed to make it
108 * this far. Therefore, we expect whoever committed the RUI to either construct
109 * and commit the RUD or drop the RUD's reference in the event of error. Simply
110 * drop the log's RUI reference now that the log is done with it.
114 struct xfs_log_item
*lip
,
117 struct xfs_rui_log_item
*ruip
= RUI_ITEM(lip
);
119 xfs_rui_release(ruip
);
123 * The RUI has been either committed or aborted if the transaction has been
124 * cancelled. If the transaction was cancelled, an RUD isn't going to be
125 * constructed and thus we free the RUI here directly.
128 xfs_rui_item_release(
129 struct xfs_log_item
*lip
)
131 xfs_rui_release(RUI_ITEM(lip
));
135 * Allocate and initialize an rui item with the given number of extents.
137 STATIC
struct xfs_rui_log_item
*
139 struct xfs_mount
*mp
,
143 struct xfs_rui_log_item
*ruip
;
145 ASSERT(nextents
> 0);
146 if (nextents
> XFS_RUI_MAX_FAST_EXTENTS
)
147 ruip
= kzalloc(xfs_rui_log_item_sizeof(nextents
),
148 GFP_KERNEL
| __GFP_NOFAIL
);
150 ruip
= kmem_cache_zalloc(xfs_rui_cache
,
151 GFP_KERNEL
| __GFP_NOFAIL
);
153 xfs_log_item_init(mp
, &ruip
->rui_item
, XFS_LI_RUI
, &xfs_rui_item_ops
);
154 ruip
->rui_format
.rui_nextents
= nextents
;
155 ruip
->rui_format
.rui_id
= (uintptr_t)(void *)ruip
;
156 atomic_set(&ruip
->rui_next_extent
, 0);
157 atomic_set(&ruip
->rui_refcount
, 2);
162 static inline struct xfs_rud_log_item
*RUD_ITEM(struct xfs_log_item
*lip
)
164 return container_of(lip
, struct xfs_rud_log_item
, rud_item
);
169 struct xfs_log_item
*lip
,
174 *nbytes
+= sizeof(struct xfs_rud_log_format
);
178 * This is called to fill in the vector of log iovecs for the
179 * given rud log item. We use only 1 iovec, and we point that
180 * at the rud_log_format structure embedded in the rud item.
181 * It is at this point that we assert that all of the extent
182 * slots in the rud item have been filled.
186 struct xfs_log_item
*lip
,
187 struct xfs_log_vec
*lv
)
189 struct xfs_rud_log_item
*rudp
= RUD_ITEM(lip
);
190 struct xfs_log_iovec
*vecp
= NULL
;
192 rudp
->rud_format
.rud_type
= XFS_LI_RUD
;
193 rudp
->rud_format
.rud_size
= 1;
195 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_RUD_FORMAT
, &rudp
->rud_format
,
196 sizeof(struct xfs_rud_log_format
));
200 * The RUD is either committed or aborted if the transaction is cancelled. If
201 * the transaction is cancelled, drop our reference to the RUI and free the
205 xfs_rud_item_release(
206 struct xfs_log_item
*lip
)
208 struct xfs_rud_log_item
*rudp
= RUD_ITEM(lip
);
210 xfs_rui_release(rudp
->rud_ruip
);
211 kvfree(rudp
->rud_item
.li_lv_shadow
);
212 kmem_cache_free(xfs_rud_cache
, rudp
);
215 static struct xfs_log_item
*
217 struct xfs_log_item
*lip
)
219 return &RUD_ITEM(lip
)->rud_ruip
->rui_item
;
222 static const struct xfs_item_ops xfs_rud_item_ops
= {
223 .flags
= XFS_ITEM_RELEASE_WHEN_COMMITTED
|
224 XFS_ITEM_INTENT_DONE
,
225 .iop_size
= xfs_rud_item_size
,
226 .iop_format
= xfs_rud_item_format
,
227 .iop_release
= xfs_rud_item_release
,
228 .iop_intent
= xfs_rud_item_intent
,
231 static inline struct xfs_rmap_intent
*ri_entry(const struct list_head
*e
)
233 return list_entry(e
, struct xfs_rmap_intent
, ri_list
);
236 /* Sort rmap intents by AG. */
238 xfs_rmap_update_diff_items(
240 const struct list_head
*a
,
241 const struct list_head
*b
)
243 struct xfs_rmap_intent
*ra
= ri_entry(a
);
244 struct xfs_rmap_intent
*rb
= ri_entry(b
);
246 return ra
->ri_group
->xg_gno
- rb
->ri_group
->xg_gno
;
249 /* Log rmap updates in the intent item. */
251 xfs_rmap_update_log_item(
252 struct xfs_trans
*tp
,
253 struct xfs_rui_log_item
*ruip
,
254 struct xfs_rmap_intent
*ri
)
257 struct xfs_map_extent
*map
;
260 * atomic_inc_return gives us the value after the increment;
261 * we want to use it as an array index so we need to subtract 1 from
264 next_extent
= atomic_inc_return(&ruip
->rui_next_extent
) - 1;
265 ASSERT(next_extent
< ruip
->rui_format
.rui_nextents
);
266 map
= &ruip
->rui_format
.rui_extents
[next_extent
];
267 map
->me_owner
= ri
->ri_owner
;
268 map
->me_startblock
= ri
->ri_bmap
.br_startblock
;
269 map
->me_startoff
= ri
->ri_bmap
.br_startoff
;
270 map
->me_len
= ri
->ri_bmap
.br_blockcount
;
273 if (ri
->ri_bmap
.br_state
== XFS_EXT_UNWRITTEN
)
274 map
->me_flags
|= XFS_RMAP_EXTENT_UNWRITTEN
;
275 if (ri
->ri_whichfork
== XFS_ATTR_FORK
)
276 map
->me_flags
|= XFS_RMAP_EXTENT_ATTR_FORK
;
277 switch (ri
->ri_type
) {
279 map
->me_flags
|= XFS_RMAP_EXTENT_MAP
;
281 case XFS_RMAP_MAP_SHARED
:
282 map
->me_flags
|= XFS_RMAP_EXTENT_MAP_SHARED
;
285 map
->me_flags
|= XFS_RMAP_EXTENT_UNMAP
;
287 case XFS_RMAP_UNMAP_SHARED
:
288 map
->me_flags
|= XFS_RMAP_EXTENT_UNMAP_SHARED
;
290 case XFS_RMAP_CONVERT
:
291 map
->me_flags
|= XFS_RMAP_EXTENT_CONVERT
;
293 case XFS_RMAP_CONVERT_SHARED
:
294 map
->me_flags
|= XFS_RMAP_EXTENT_CONVERT_SHARED
;
297 map
->me_flags
|= XFS_RMAP_EXTENT_ALLOC
;
300 map
->me_flags
|= XFS_RMAP_EXTENT_FREE
;
307 static struct xfs_log_item
*
308 xfs_rmap_update_create_intent(
309 struct xfs_trans
*tp
,
310 struct list_head
*items
,
314 struct xfs_mount
*mp
= tp
->t_mountp
;
315 struct xfs_rui_log_item
*ruip
= xfs_rui_init(mp
, count
);
316 struct xfs_rmap_intent
*ri
;
321 list_sort(mp
, items
, xfs_rmap_update_diff_items
);
322 list_for_each_entry(ri
, items
, ri_list
)
323 xfs_rmap_update_log_item(tp
, ruip
, ri
);
324 return &ruip
->rui_item
;
327 /* Get an RUD so we can process all the deferred rmap updates. */
328 static struct xfs_log_item
*
329 xfs_rmap_update_create_done(
330 struct xfs_trans
*tp
,
331 struct xfs_log_item
*intent
,
334 struct xfs_rui_log_item
*ruip
= RUI_ITEM(intent
);
335 struct xfs_rud_log_item
*rudp
;
337 rudp
= kmem_cache_zalloc(xfs_rud_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
338 xfs_log_item_init(tp
->t_mountp
, &rudp
->rud_item
, XFS_LI_RUD
,
340 rudp
->rud_ruip
= ruip
;
341 rudp
->rud_format
.rud_rui_id
= ruip
->rui_format
.rui_id
;
343 return &rudp
->rud_item
;
346 /* Add this deferred RUI to the transaction. */
349 struct xfs_trans
*tp
,
350 struct xfs_rmap_intent
*ri
)
352 struct xfs_mount
*mp
= tp
->t_mountp
;
354 trace_xfs_rmap_defer(mp
, ri
);
356 ri
->ri_group
= xfs_group_intent_get(mp
, ri
->ri_bmap
.br_startblock
,
358 xfs_defer_add(tp
, &ri
->ri_list
, &xfs_rmap_update_defer_type
);
361 /* Cancel a deferred rmap update. */
363 xfs_rmap_update_cancel_item(
364 struct list_head
*item
)
366 struct xfs_rmap_intent
*ri
= ri_entry(item
);
368 xfs_group_intent_put(ri
->ri_group
);
369 kmem_cache_free(xfs_rmap_intent_cache
, ri
);
372 /* Process a deferred rmap update. */
374 xfs_rmap_update_finish_item(
375 struct xfs_trans
*tp
,
376 struct xfs_log_item
*done
,
377 struct list_head
*item
,
378 struct xfs_btree_cur
**state
)
380 struct xfs_rmap_intent
*ri
= ri_entry(item
);
383 error
= xfs_rmap_finish_one(tp
, ri
, state
);
385 xfs_rmap_update_cancel_item(item
);
389 /* Clean up after calling xfs_rmap_finish_one. */
391 xfs_rmap_finish_one_cleanup(
392 struct xfs_trans
*tp
,
393 struct xfs_btree_cur
*rcur
,
396 struct xfs_buf
*agbp
= NULL
;
400 agbp
= rcur
->bc_ag
.agbp
;
401 xfs_btree_del_cursor(rcur
, error
);
403 xfs_trans_brelse(tp
, agbp
);
406 /* Abort all pending RUIs. */
408 xfs_rmap_update_abort_intent(
409 struct xfs_log_item
*intent
)
411 xfs_rui_release(RUI_ITEM(intent
));
414 /* Is this recovered RUI ok? */
416 xfs_rui_validate_map(
417 struct xfs_mount
*mp
,
418 struct xfs_map_extent
*map
)
420 if (!xfs_has_rmapbt(mp
))
423 if (map
->me_flags
& ~XFS_RMAP_EXTENT_FLAGS
)
426 switch (map
->me_flags
& XFS_RMAP_EXTENT_TYPE_MASK
) {
427 case XFS_RMAP_EXTENT_MAP
:
428 case XFS_RMAP_EXTENT_MAP_SHARED
:
429 case XFS_RMAP_EXTENT_UNMAP
:
430 case XFS_RMAP_EXTENT_UNMAP_SHARED
:
431 case XFS_RMAP_EXTENT_CONVERT
:
432 case XFS_RMAP_EXTENT_CONVERT_SHARED
:
433 case XFS_RMAP_EXTENT_ALLOC
:
434 case XFS_RMAP_EXTENT_FREE
:
440 if (!XFS_RMAP_NON_INODE_OWNER(map
->me_owner
) &&
441 !xfs_verify_ino(mp
, map
->me_owner
))
444 if (!xfs_verify_fileext(mp
, map
->me_startoff
, map
->me_len
))
447 return xfs_verify_fsbext(mp
, map
->me_startblock
, map
->me_len
);
451 xfs_rui_recover_work(
452 struct xfs_mount
*mp
,
453 struct xfs_defer_pending
*dfp
,
454 const struct xfs_map_extent
*map
)
456 struct xfs_rmap_intent
*ri
;
458 ri
= kmem_cache_alloc(xfs_rmap_intent_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
460 switch (map
->me_flags
& XFS_RMAP_EXTENT_TYPE_MASK
) {
461 case XFS_RMAP_EXTENT_MAP
:
462 ri
->ri_type
= XFS_RMAP_MAP
;
464 case XFS_RMAP_EXTENT_MAP_SHARED
:
465 ri
->ri_type
= XFS_RMAP_MAP_SHARED
;
467 case XFS_RMAP_EXTENT_UNMAP
:
468 ri
->ri_type
= XFS_RMAP_UNMAP
;
470 case XFS_RMAP_EXTENT_UNMAP_SHARED
:
471 ri
->ri_type
= XFS_RMAP_UNMAP_SHARED
;
473 case XFS_RMAP_EXTENT_CONVERT
:
474 ri
->ri_type
= XFS_RMAP_CONVERT
;
476 case XFS_RMAP_EXTENT_CONVERT_SHARED
:
477 ri
->ri_type
= XFS_RMAP_CONVERT_SHARED
;
479 case XFS_RMAP_EXTENT_ALLOC
:
480 ri
->ri_type
= XFS_RMAP_ALLOC
;
482 case XFS_RMAP_EXTENT_FREE
:
483 ri
->ri_type
= XFS_RMAP_FREE
;
490 ri
->ri_owner
= map
->me_owner
;
491 ri
->ri_whichfork
= (map
->me_flags
& XFS_RMAP_EXTENT_ATTR_FORK
) ?
492 XFS_ATTR_FORK
: XFS_DATA_FORK
;
493 ri
->ri_bmap
.br_startblock
= map
->me_startblock
;
494 ri
->ri_bmap
.br_startoff
= map
->me_startoff
;
495 ri
->ri_bmap
.br_blockcount
= map
->me_len
;
496 ri
->ri_bmap
.br_state
= (map
->me_flags
& XFS_RMAP_EXTENT_UNWRITTEN
) ?
497 XFS_EXT_UNWRITTEN
: XFS_EXT_NORM
;
498 ri
->ri_group
= xfs_group_intent_get(mp
, map
->me_startblock
, XG_TYPE_AG
);
500 xfs_defer_add_item(dfp
, &ri
->ri_list
);
504 * Process an rmap update intent item that was recovered from the log.
505 * We need to update the rmapbt.
508 xfs_rmap_recover_work(
509 struct xfs_defer_pending
*dfp
,
510 struct list_head
*capture_list
)
512 struct xfs_trans_res resv
;
513 struct xfs_log_item
*lip
= dfp
->dfp_intent
;
514 struct xfs_rui_log_item
*ruip
= RUI_ITEM(lip
);
515 struct xfs_trans
*tp
;
516 struct xfs_mount
*mp
= lip
->li_log
->l_mp
;
521 * First check the validity of the extents described by the
522 * RUI. If any are bad, then assume that all are bad and
525 for (i
= 0; i
< ruip
->rui_format
.rui_nextents
; i
++) {
526 if (!xfs_rui_validate_map(mp
,
527 &ruip
->rui_format
.rui_extents
[i
])) {
528 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
530 sizeof(ruip
->rui_format
));
531 return -EFSCORRUPTED
;
534 xfs_rui_recover_work(mp
, dfp
, &ruip
->rui_format
.rui_extents
[i
]);
537 resv
= xlog_recover_resv(&M_RES(mp
)->tr_itruncate
);
538 error
= xfs_trans_alloc(mp
, &resv
, mp
->m_rmap_maxlevels
, 0,
539 XFS_TRANS_RESERVE
, &tp
);
543 error
= xlog_recover_finish_intent(tp
, dfp
);
544 if (error
== -EFSCORRUPTED
)
545 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
547 sizeof(ruip
->rui_format
));
551 return xfs_defer_ops_capture_and_commit(tp
, capture_list
);
554 xfs_trans_cancel(tp
);
558 /* Relog an intent item to push the log tail forward. */
559 static struct xfs_log_item
*
560 xfs_rmap_relog_intent(
561 struct xfs_trans
*tp
,
562 struct xfs_log_item
*intent
,
563 struct xfs_log_item
*done_item
)
565 struct xfs_rui_log_item
*ruip
;
566 struct xfs_map_extent
*map
;
569 count
= RUI_ITEM(intent
)->rui_format
.rui_nextents
;
570 map
= RUI_ITEM(intent
)->rui_format
.rui_extents
;
572 ruip
= xfs_rui_init(tp
->t_mountp
, count
);
573 memcpy(ruip
->rui_format
.rui_extents
, map
, count
* sizeof(*map
));
574 atomic_set(&ruip
->rui_next_extent
, count
);
576 return &ruip
->rui_item
;
579 const struct xfs_defer_op_type xfs_rmap_update_defer_type
= {
581 .max_items
= XFS_RUI_MAX_FAST_EXTENTS
,
582 .create_intent
= xfs_rmap_update_create_intent
,
583 .abort_intent
= xfs_rmap_update_abort_intent
,
584 .create_done
= xfs_rmap_update_create_done
,
585 .finish_item
= xfs_rmap_update_finish_item
,
586 .finish_cleanup
= xfs_rmap_finish_one_cleanup
,
587 .cancel_item
= xfs_rmap_update_cancel_item
,
588 .recover_work
= xfs_rmap_recover_work
,
589 .relog_intent
= xfs_rmap_relog_intent
,
594 struct xfs_log_item
*lip
,
597 return RUI_ITEM(lip
)->rui_format
.rui_id
== intent_id
;
600 static const struct xfs_item_ops xfs_rui_item_ops
= {
601 .flags
= XFS_ITEM_INTENT
,
602 .iop_size
= xfs_rui_item_size
,
603 .iop_format
= xfs_rui_item_format
,
604 .iop_unpin
= xfs_rui_item_unpin
,
605 .iop_release
= xfs_rui_item_release
,
606 .iop_match
= xfs_rui_item_match
,
611 struct xfs_rui_log_format
*dst
,
612 const struct xfs_rui_log_format
*src
)
616 memcpy(dst
, src
, offsetof(struct xfs_rui_log_format
, rui_extents
));
618 for (i
= 0; i
< src
->rui_nextents
; i
++)
619 memcpy(&dst
->rui_extents
[i
], &src
->rui_extents
[i
],
620 sizeof(struct xfs_map_extent
));
624 * This routine is called to create an in-core extent rmap update
625 * item from the rui format structure which was logged on disk.
626 * It allocates an in-core rui, copies the extents from the format
627 * structure into it, and adds the rui to the AIL with the given
631 xlog_recover_rui_commit_pass2(
633 struct list_head
*buffer_list
,
634 struct xlog_recover_item
*item
,
637 struct xfs_mount
*mp
= log
->l_mp
;
638 struct xfs_rui_log_item
*ruip
;
639 struct xfs_rui_log_format
*rui_formatp
;
642 rui_formatp
= item
->ri_buf
[0].i_addr
;
644 if (item
->ri_buf
[0].i_len
< xfs_rui_log_format_sizeof(0)) {
645 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
646 item
->ri_buf
[0].i_addr
, item
->ri_buf
[0].i_len
);
647 return -EFSCORRUPTED
;
650 len
= xfs_rui_log_format_sizeof(rui_formatp
->rui_nextents
);
651 if (item
->ri_buf
[0].i_len
!= len
) {
652 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
653 item
->ri_buf
[0].i_addr
, item
->ri_buf
[0].i_len
);
654 return -EFSCORRUPTED
;
657 ruip
= xfs_rui_init(mp
, rui_formatp
->rui_nextents
);
658 xfs_rui_copy_format(&ruip
->rui_format
, rui_formatp
);
659 atomic_set(&ruip
->rui_next_extent
, rui_formatp
->rui_nextents
);
661 xlog_recover_intent_item(log
, &ruip
->rui_item
, lsn
,
662 &xfs_rmap_update_defer_type
);
666 const struct xlog_recover_item_ops xlog_rui_item_ops
= {
667 .item_type
= XFS_LI_RUI
,
668 .commit_pass2
= xlog_recover_rui_commit_pass2
,
672 * This routine is called when an RUD format structure is found in a committed
673 * transaction in the log. Its purpose is to cancel the corresponding RUI if it
674 * was still in the log. To do this it searches the AIL for the RUI with an id
675 * equal to that in the RUD format structure. If we find it we drop the RUD
676 * reference, which removes the RUI from the AIL and frees it.
679 xlog_recover_rud_commit_pass2(
681 struct list_head
*buffer_list
,
682 struct xlog_recover_item
*item
,
685 struct xfs_rud_log_format
*rud_formatp
;
687 rud_formatp
= item
->ri_buf
[0].i_addr
;
688 if (item
->ri_buf
[0].i_len
!= sizeof(struct xfs_rud_log_format
)) {
689 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
,
690 rud_formatp
, item
->ri_buf
[0].i_len
);
691 return -EFSCORRUPTED
;
694 xlog_recover_release_intent(log
, XFS_LI_RUI
, rud_formatp
->rud_rui_id
);
698 const struct xlog_recover_item_ops xlog_rud_item_ops
= {
699 .item_type
= XFS_LI_RUD
,
700 .commit_pass2
= xlog_recover_rud_commit_pass2
,