1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
12 #include "xfs_shared.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_inode.h"
16 #include "xfs_trans.h"
17 #include "xfs_trans_priv.h"
18 #include "xfs_exchmaps_item.h"
19 #include "xfs_exchmaps.h"
22 #include "xfs_icache.h"
23 #include "xfs_bmap_btree.h"
24 #include "xfs_trans_space.h"
25 #include "xfs_error.h"
26 #include "xfs_log_priv.h"
27 #include "xfs_log_recover.h"
28 #include "xfs_exchrange.h"
29 #include "xfs_trace.h"
31 struct kmem_cache
*xfs_xmi_cache
;
32 struct kmem_cache
*xfs_xmd_cache
;
34 static const struct xfs_item_ops xfs_xmi_item_ops
;
36 static inline struct xfs_xmi_log_item
*XMI_ITEM(struct xfs_log_item
*lip
)
38 return container_of(lip
, struct xfs_xmi_log_item
, xmi_item
);
43 struct xfs_xmi_log_item
*xmi_lip
)
45 kvfree(xmi_lip
->xmi_item
.li_lv_shadow
);
46 kmem_cache_free(xfs_xmi_cache
, xmi_lip
);
50 * Freeing the XMI requires that we remove it from the AIL if it has already
51 * been placed there. However, the XMI may not yet have been placed in the AIL
52 * when called by xfs_xmi_release() from XMD processing due to the ordering of
53 * committed vs unpin operations in bulk insert operations. Hence the reference
54 * count to ensure only the last caller frees the XMI.
58 struct xfs_xmi_log_item
*xmi_lip
)
60 ASSERT(atomic_read(&xmi_lip
->xmi_refcount
) > 0);
61 if (atomic_dec_and_test(&xmi_lip
->xmi_refcount
)) {
62 xfs_trans_ail_delete(&xmi_lip
->xmi_item
, 0);
63 xfs_xmi_item_free(xmi_lip
);
70 struct xfs_log_item
*lip
,
75 *nbytes
+= sizeof(struct xfs_xmi_log_format
);
79 * This is called to fill in the vector of log iovecs for the given xmi log
80 * item. We use only 1 iovec, and we point that at the xmi_log_format structure
81 * embedded in the xmi item.
85 struct xfs_log_item
*lip
,
86 struct xfs_log_vec
*lv
)
88 struct xfs_xmi_log_item
*xmi_lip
= XMI_ITEM(lip
);
89 struct xfs_log_iovec
*vecp
= NULL
;
91 xmi_lip
->xmi_format
.xmi_type
= XFS_LI_XMI
;
92 xmi_lip
->xmi_format
.xmi_size
= 1;
94 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_XMI_FORMAT
,
96 sizeof(struct xfs_xmi_log_format
));
100 * The unpin operation is the last place an XMI is manipulated in the log. It
101 * is either inserted in the AIL or aborted in the event of a log I/O error. In
102 * either case, the XMI transaction has been successfully committed to make it
103 * this far. Therefore, we expect whoever committed the XMI to either construct
104 * and commit the XMD or drop the XMD's reference in the event of error. Simply
105 * drop the log's XMI reference now that the log is done with it.
109 struct xfs_log_item
*lip
,
112 struct xfs_xmi_log_item
*xmi_lip
= XMI_ITEM(lip
);
114 xfs_xmi_release(xmi_lip
);
118 * The XMI has been either committed or aborted if the transaction has been
119 * cancelled. If the transaction was cancelled, an XMD isn't going to be
120 * constructed and thus we free the XMI here directly.
123 xfs_xmi_item_release(
124 struct xfs_log_item
*lip
)
126 xfs_xmi_release(XMI_ITEM(lip
));
129 /* Allocate and initialize an xmi item. */
130 STATIC
struct xfs_xmi_log_item
*
132 struct xfs_mount
*mp
)
135 struct xfs_xmi_log_item
*xmi_lip
;
137 xmi_lip
= kmem_cache_zalloc(xfs_xmi_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
139 xfs_log_item_init(mp
, &xmi_lip
->xmi_item
, XFS_LI_XMI
, &xfs_xmi_item_ops
);
140 xmi_lip
->xmi_format
.xmi_id
= (uintptr_t)(void *)xmi_lip
;
141 atomic_set(&xmi_lip
->xmi_refcount
, 2);
146 static inline struct xfs_xmd_log_item
*XMD_ITEM(struct xfs_log_item
*lip
)
148 return container_of(lip
, struct xfs_xmd_log_item
, xmd_item
);
153 struct xfs_log_item
*lip
,
158 *nbytes
+= sizeof(struct xfs_xmd_log_format
);
162 * This is called to fill in the vector of log iovecs for the given xmd log
163 * item. We use only 1 iovec, and we point that at the xmd_log_format structure
164 * embedded in the xmd item.
168 struct xfs_log_item
*lip
,
169 struct xfs_log_vec
*lv
)
171 struct xfs_xmd_log_item
*xmd_lip
= XMD_ITEM(lip
);
172 struct xfs_log_iovec
*vecp
= NULL
;
174 xmd_lip
->xmd_format
.xmd_type
= XFS_LI_XMD
;
175 xmd_lip
->xmd_format
.xmd_size
= 1;
177 xlog_copy_iovec(lv
, &vecp
, XLOG_REG_TYPE_XMD_FORMAT
, &xmd_lip
->xmd_format
,
178 sizeof(struct xfs_xmd_log_format
));
182 * The XMD is either committed or aborted if the transaction is cancelled. If
183 * the transaction is cancelled, drop our reference to the XMI and free the
187 xfs_xmd_item_release(
188 struct xfs_log_item
*lip
)
190 struct xfs_xmd_log_item
*xmd_lip
= XMD_ITEM(lip
);
192 xfs_xmi_release(xmd_lip
->xmd_intent_log_item
);
193 kvfree(xmd_lip
->xmd_item
.li_lv_shadow
);
194 kmem_cache_free(xfs_xmd_cache
, xmd_lip
);
197 static struct xfs_log_item
*
199 struct xfs_log_item
*lip
)
201 return &XMD_ITEM(lip
)->xmd_intent_log_item
->xmi_item
;
204 static const struct xfs_item_ops xfs_xmd_item_ops
= {
205 .flags
= XFS_ITEM_RELEASE_WHEN_COMMITTED
|
206 XFS_ITEM_INTENT_DONE
,
207 .iop_size
= xfs_xmd_item_size
,
208 .iop_format
= xfs_xmd_item_format
,
209 .iop_release
= xfs_xmd_item_release
,
210 .iop_intent
= xfs_xmd_item_intent
,
213 /* Log file mapping exchange information in the intent item. */
214 STATIC
struct xfs_log_item
*
215 xfs_exchmaps_create_intent(
216 struct xfs_trans
*tp
,
217 struct list_head
*items
,
221 struct xfs_xmi_log_item
*xmi_lip
;
222 struct xfs_exchmaps_intent
*xmi
;
223 struct xfs_xmi_log_format
*xlf
;
227 xmi
= list_first_entry_or_null(items
, struct xfs_exchmaps_intent
,
230 xmi_lip
= xfs_xmi_init(tp
->t_mountp
);
231 xlf
= &xmi_lip
->xmi_format
;
233 xlf
->xmi_inode1
= xmi
->xmi_ip1
->i_ino
;
234 xlf
->xmi_igen1
= VFS_I(xmi
->xmi_ip1
)->i_generation
;
235 xlf
->xmi_inode2
= xmi
->xmi_ip2
->i_ino
;
236 xlf
->xmi_igen2
= VFS_I(xmi
->xmi_ip2
)->i_generation
;
237 xlf
->xmi_startoff1
= xmi
->xmi_startoff1
;
238 xlf
->xmi_startoff2
= xmi
->xmi_startoff2
;
239 xlf
->xmi_blockcount
= xmi
->xmi_blockcount
;
240 xlf
->xmi_isize1
= xmi
->xmi_isize1
;
241 xlf
->xmi_isize2
= xmi
->xmi_isize2
;
242 xlf
->xmi_flags
= xmi
->xmi_flags
& XFS_EXCHMAPS_LOGGED_FLAGS
;
244 return &xmi_lip
->xmi_item
;
247 STATIC
struct xfs_log_item
*
248 xfs_exchmaps_create_done(
249 struct xfs_trans
*tp
,
250 struct xfs_log_item
*intent
,
253 struct xfs_xmi_log_item
*xmi_lip
= XMI_ITEM(intent
);
254 struct xfs_xmd_log_item
*xmd_lip
;
256 xmd_lip
= kmem_cache_zalloc(xfs_xmd_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
257 xfs_log_item_init(tp
->t_mountp
, &xmd_lip
->xmd_item
, XFS_LI_XMD
,
259 xmd_lip
->xmd_intent_log_item
= xmi_lip
;
260 xmd_lip
->xmd_format
.xmd_xmi_id
= xmi_lip
->xmi_format
.xmi_id
;
262 return &xmd_lip
->xmd_item
;
265 /* Add this deferred XMI to the transaction. */
267 xfs_exchmaps_defer_add(
268 struct xfs_trans
*tp
,
269 struct xfs_exchmaps_intent
*xmi
)
271 trace_xfs_exchmaps_defer(tp
->t_mountp
, xmi
);
273 xfs_defer_add(tp
, &xmi
->xmi_list
, &xfs_exchmaps_defer_type
);
276 static inline struct xfs_exchmaps_intent
*xmi_entry(const struct list_head
*e
)
278 return list_entry(e
, struct xfs_exchmaps_intent
, xmi_list
);
281 /* Cancel a deferred file mapping exchange. */
283 xfs_exchmaps_cancel_item(
284 struct list_head
*item
)
286 struct xfs_exchmaps_intent
*xmi
= xmi_entry(item
);
288 kmem_cache_free(xfs_exchmaps_intent_cache
, xmi
);
291 /* Process a deferred file mapping exchange. */
293 xfs_exchmaps_finish_item(
294 struct xfs_trans
*tp
,
295 struct xfs_log_item
*done
,
296 struct list_head
*item
,
297 struct xfs_btree_cur
**state
)
299 struct xfs_exchmaps_intent
*xmi
= xmi_entry(item
);
303 * Exchange one more mappings between two files. If there's still more
304 * work to do, we want to requeue ourselves after all other pending
305 * deferred operations have finished. This includes all of the dfops
306 * that we queued directly as well as any new ones created in the
307 * process of finishing the others. Doing so prevents us from queuing
308 * a large number of XMI log items in kernel memory, which in turn
309 * prevents us from pinning the tail of the log (while logging those
310 * new XMI items) until the first XMI items can be processed.
312 error
= xfs_exchmaps_finish_one(tp
, xmi
);
313 if (error
!= -EAGAIN
)
314 xfs_exchmaps_cancel_item(item
);
318 /* Abort all pending XMIs. */
320 xfs_exchmaps_abort_intent(
321 struct xfs_log_item
*intent
)
323 xfs_xmi_release(XMI_ITEM(intent
));
326 /* Is this recovered XMI ok? */
329 struct xfs_mount
*mp
,
330 struct xfs_xmi_log_item
*xmi_lip
)
332 struct xfs_xmi_log_format
*xlf
= &xmi_lip
->xmi_format
;
334 if (!xfs_has_exchange_range(mp
))
337 if (xmi_lip
->xmi_format
.__pad
!= 0)
340 if (xlf
->xmi_flags
& ~XFS_EXCHMAPS_LOGGED_FLAGS
)
343 if (!xfs_verify_ino(mp
, xlf
->xmi_inode1
) ||
344 !xfs_verify_ino(mp
, xlf
->xmi_inode2
))
347 if (!xfs_verify_fileext(mp
, xlf
->xmi_startoff1
, xlf
->xmi_blockcount
))
350 return xfs_verify_fileext(mp
, xlf
->xmi_startoff2
, xlf
->xmi_blockcount
);
354 * Use the recovered log state to create a new request, estimate resource
355 * requirements, and create a new incore intent state.
357 STATIC
struct xfs_exchmaps_intent
*
358 xfs_xmi_item_recover_intent(
359 struct xfs_mount
*mp
,
360 struct xfs_defer_pending
*dfp
,
361 const struct xfs_xmi_log_format
*xlf
,
362 struct xfs_exchmaps_req
*req
,
363 struct xfs_inode
**ipp1
,
364 struct xfs_inode
**ipp2
)
366 struct xfs_inode
*ip1
, *ip2
;
367 struct xfs_exchmaps_intent
*xmi
;
371 * Grab both inodes and set IRECOVERY to prevent trimming of post-eof
372 * mappings and freeing of unlinked inodes until we're totally done
373 * processing files. The ondisk format of this new log item contains
374 * file handle information, which is why recovery for other items do
375 * not check the inode generation number.
377 error
= xlog_recover_iget_handle(mp
, xlf
->xmi_inode1
, xlf
->xmi_igen1
,
380 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
, xlf
,
382 return ERR_PTR(error
);
385 error
= xlog_recover_iget_handle(mp
, xlf
->xmi_inode2
, xlf
->xmi_igen2
,
388 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
, xlf
,
395 req
->startoff1
= xlf
->xmi_startoff1
;
396 req
->startoff2
= xlf
->xmi_startoff2
;
397 req
->blockcount
= xlf
->xmi_blockcount
;
398 req
->flags
= xlf
->xmi_flags
& XFS_EXCHMAPS_PARAMS
;
400 xfs_exchrange_ilock(NULL
, ip1
, ip2
);
401 error
= xfs_exchmaps_estimate(req
);
402 xfs_exchrange_iunlock(ip1
, ip2
);
408 xmi
= xfs_exchmaps_init_intent(req
);
409 xfs_defer_add_item(dfp
, &xmi
->xmi_list
);
416 req
->ip2
= req
->ip1
= NULL
;
417 return ERR_PTR(error
);
420 /* Process a file mapping exchange item that was recovered from the log. */
422 xfs_exchmaps_recover_work(
423 struct xfs_defer_pending
*dfp
,
424 struct list_head
*capture_list
)
426 struct xfs_exchmaps_req req
= { .flags
= 0 };
427 struct xfs_trans_res resv
;
428 struct xfs_exchmaps_intent
*xmi
;
429 struct xfs_log_item
*lip
= dfp
->dfp_intent
;
430 struct xfs_xmi_log_item
*xmi_lip
= XMI_ITEM(lip
);
431 struct xfs_mount
*mp
= lip
->li_log
->l_mp
;
432 struct xfs_trans
*tp
;
433 struct xfs_inode
*ip1
, *ip2
;
436 if (!xfs_xmi_validate(mp
, xmi_lip
)) {
437 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
438 &xmi_lip
->xmi_format
,
439 sizeof(xmi_lip
->xmi_format
));
440 return -EFSCORRUPTED
;
443 xmi
= xfs_xmi_item_recover_intent(mp
, dfp
, &xmi_lip
->xmi_format
, &req
,
448 trace_xfs_exchmaps_recover(mp
, xmi
);
450 resv
= xlog_recover_resv(&M_RES(mp
)->tr_write
);
451 error
= xfs_trans_alloc(mp
, &resv
, req
.resblks
, 0, 0, &tp
);
455 xfs_exchrange_ilock(tp
, ip1
, ip2
);
457 xfs_exchmaps_ensure_reflink(tp
, xmi
);
458 xfs_exchmaps_upgrade_extent_counts(tp
, xmi
);
459 error
= xlog_recover_finish_intent(tp
, dfp
);
460 if (error
== -EFSCORRUPTED
)
461 XFS_CORRUPTION_ERROR(__func__
, XFS_ERRLEVEL_LOW
, mp
,
462 &xmi_lip
->xmi_format
,
463 sizeof(xmi_lip
->xmi_format
));
468 * Commit transaction, which frees the transaction and saves the inodes
469 * for later replay activities.
471 error
= xfs_defer_ops_capture_and_commit(tp
, capture_list
);
475 xfs_trans_cancel(tp
);
477 xfs_exchrange_iunlock(ip1
, ip2
);
484 /* Relog an intent item to push the log tail forward. */
485 static struct xfs_log_item
*
486 xfs_exchmaps_relog_intent(
487 struct xfs_trans
*tp
,
488 struct xfs_log_item
*intent
,
489 struct xfs_log_item
*done_item
)
491 struct xfs_xmi_log_item
*xmi_lip
;
492 struct xfs_xmi_log_format
*old_xlf
, *new_xlf
;
494 old_xlf
= &XMI_ITEM(intent
)->xmi_format
;
496 xmi_lip
= xfs_xmi_init(tp
->t_mountp
);
497 new_xlf
= &xmi_lip
->xmi_format
;
499 new_xlf
->xmi_inode1
= old_xlf
->xmi_inode1
;
500 new_xlf
->xmi_inode2
= old_xlf
->xmi_inode2
;
501 new_xlf
->xmi_igen1
= old_xlf
->xmi_igen1
;
502 new_xlf
->xmi_igen2
= old_xlf
->xmi_igen2
;
503 new_xlf
->xmi_startoff1
= old_xlf
->xmi_startoff1
;
504 new_xlf
->xmi_startoff2
= old_xlf
->xmi_startoff2
;
505 new_xlf
->xmi_blockcount
= old_xlf
->xmi_blockcount
;
506 new_xlf
->xmi_flags
= old_xlf
->xmi_flags
;
507 new_xlf
->xmi_isize1
= old_xlf
->xmi_isize1
;
508 new_xlf
->xmi_isize2
= old_xlf
->xmi_isize2
;
510 return &xmi_lip
->xmi_item
;
513 const struct xfs_defer_op_type xfs_exchmaps_defer_type
= {
516 .create_intent
= xfs_exchmaps_create_intent
,
517 .abort_intent
= xfs_exchmaps_abort_intent
,
518 .create_done
= xfs_exchmaps_create_done
,
519 .finish_item
= xfs_exchmaps_finish_item
,
520 .cancel_item
= xfs_exchmaps_cancel_item
,
521 .recover_work
= xfs_exchmaps_recover_work
,
522 .relog_intent
= xfs_exchmaps_relog_intent
,
527 struct xfs_log_item
*lip
,
530 return XMI_ITEM(lip
)->xmi_format
.xmi_id
== intent_id
;
533 static const struct xfs_item_ops xfs_xmi_item_ops
= {
534 .flags
= XFS_ITEM_INTENT
,
535 .iop_size
= xfs_xmi_item_size
,
536 .iop_format
= xfs_xmi_item_format
,
537 .iop_unpin
= xfs_xmi_item_unpin
,
538 .iop_release
= xfs_xmi_item_release
,
539 .iop_match
= xfs_xmi_item_match
,
543 * This routine is called to create an in-core file mapping exchange item from
544 * the xmi format structure which was logged on disk. It allocates an in-core
545 * xmi, copies the exchange information from the format structure into it, and
546 * adds the xmi to the AIL with the given LSN.
549 xlog_recover_xmi_commit_pass2(
551 struct list_head
*buffer_list
,
552 struct xlog_recover_item
*item
,
555 struct xfs_mount
*mp
= log
->l_mp
;
556 struct xfs_xmi_log_item
*xmi_lip
;
557 struct xfs_xmi_log_format
*xmi_formatp
;
560 len
= sizeof(struct xfs_xmi_log_format
);
561 if (item
->ri_buf
[0].i_len
!= len
) {
562 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
);
563 return -EFSCORRUPTED
;
566 xmi_formatp
= item
->ri_buf
[0].i_addr
;
567 if (xmi_formatp
->__pad
!= 0) {
568 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
);
569 return -EFSCORRUPTED
;
572 xmi_lip
= xfs_xmi_init(mp
);
573 memcpy(&xmi_lip
->xmi_format
, xmi_formatp
, len
);
575 xlog_recover_intent_item(log
, &xmi_lip
->xmi_item
, lsn
,
576 &xfs_exchmaps_defer_type
);
580 const struct xlog_recover_item_ops xlog_xmi_item_ops
= {
581 .item_type
= XFS_LI_XMI
,
582 .commit_pass2
= xlog_recover_xmi_commit_pass2
,
586 * This routine is called when an XMD format structure is found in a committed
587 * transaction in the log. Its purpose is to cancel the corresponding XMI if it
588 * was still in the log. To do this it searches the AIL for the XMI with an id
589 * equal to that in the XMD format structure. If we find it we drop the XMD
590 * reference, which removes the XMI from the AIL and frees it.
593 xlog_recover_xmd_commit_pass2(
595 struct list_head
*buffer_list
,
596 struct xlog_recover_item
*item
,
599 struct xfs_xmd_log_format
*xmd_formatp
;
601 xmd_formatp
= item
->ri_buf
[0].i_addr
;
602 if (item
->ri_buf
[0].i_len
!= sizeof(struct xfs_xmd_log_format
)) {
603 XFS_ERROR_REPORT(__func__
, XFS_ERRLEVEL_LOW
, log
->l_mp
);
604 return -EFSCORRUPTED
;
607 xlog_recover_release_intent(log
, XFS_LI_XMI
, xmd_formatp
->xmd_xmi_id
);
611 const struct xlog_recover_item_ops xlog_xmd_item_ops
= {
612 .item_type
= XFS_LI_XMD
,
613 .commit_pass2
= xlog_recover_xmd_commit_pass2
,