1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 * Copyright (C) 2010 Red Hat, Inc.
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_extent_busy.h"
15 #include "xfs_quota.h"
16 #include "xfs_trans.h"
17 #include "xfs_trans_priv.h"
19 #include "xfs_log_priv.h"
20 #include "xfs_trace.h"
21 #include "xfs_error.h"
22 #include "xfs_defer.h"
23 #include "xfs_inode.h"
24 #include "xfs_dquot_item.h"
25 #include "xfs_dquot.h"
26 #include "xfs_icache.h"
27 #include "xfs_rtbitmap.h"
28 #include "xfs_rtgroup.h"
31 struct kmem_cache
*xfs_trans_cache
;
33 #if defined(CONFIG_TRACEPOINTS)
35 xfs_trans_trace_reservations(
38 struct xfs_trans_res
*res
;
39 struct xfs_trans_res
*end_res
;
42 res
= (struct xfs_trans_res
*)M_RES(mp
);
43 end_res
= (struct xfs_trans_res
*)(M_RES(mp
) + 1);
44 for (i
= 0; res
< end_res
; i
++, res
++)
45 trace_xfs_trans_resv_calc(mp
, i
, res
);
48 # define xfs_trans_trace_reservations(mp)
52 * Initialize the precomputed transaction reservation values
53 * in the mount structure.
59 xfs_trans_resv_calc(mp
, M_RES(mp
));
60 xfs_trans_trace_reservations(mp
);
64 * Free the transaction structure. If there is more clean up
65 * to do when the structure is freed, add it here.
71 xfs_extent_busy_sort(&tp
->t_busy
);
72 xfs_extent_busy_clear(&tp
->t_busy
, false);
74 trace_xfs_trans_free(tp
, _RET_IP_
);
75 xfs_trans_clear_context(tp
);
76 if (!(tp
->t_flags
& XFS_TRANS_NO_WRITECOUNT
))
77 sb_end_intwrite(tp
->t_mountp
->m_super
);
78 xfs_trans_free_dqinfo(tp
);
79 kmem_cache_free(xfs_trans_cache
, tp
);
83 * This is called to create a new transaction which will share the
84 * permanent log reservation of the given transaction. The remaining
85 * unused block and rt extent reservations are also inherited. This
86 * implies that the original transaction is no longer allowed to allocate
87 * blocks. Locks and log items, however, are no inherited. They must
88 * be added to the new transaction explicitly.
90 STATIC
struct xfs_trans
*
94 struct xfs_trans
*ntp
;
96 trace_xfs_trans_dup(tp
, _RET_IP_
);
98 ntp
= kmem_cache_zalloc(xfs_trans_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
101 * Initialize the new transaction structure.
103 ntp
->t_magic
= XFS_TRANS_HEADER_MAGIC
;
104 ntp
->t_mountp
= tp
->t_mountp
;
105 INIT_LIST_HEAD(&ntp
->t_items
);
106 INIT_LIST_HEAD(&ntp
->t_busy
);
107 INIT_LIST_HEAD(&ntp
->t_dfops
);
108 ntp
->t_highest_agno
= NULLAGNUMBER
;
110 ASSERT(tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
);
111 ASSERT(tp
->t_ticket
!= NULL
);
113 ntp
->t_flags
= XFS_TRANS_PERM_LOG_RES
|
114 (tp
->t_flags
& XFS_TRANS_RESERVE
) |
115 (tp
->t_flags
& XFS_TRANS_NO_WRITECOUNT
) |
116 (tp
->t_flags
& XFS_TRANS_RES_FDBLKS
);
117 /* We gave our writer reference to the new transaction */
118 tp
->t_flags
|= XFS_TRANS_NO_WRITECOUNT
;
119 ntp
->t_ticket
= xfs_log_ticket_get(tp
->t_ticket
);
121 ASSERT(tp
->t_blk_res
>= tp
->t_blk_res_used
);
122 ntp
->t_blk_res
= tp
->t_blk_res
- tp
->t_blk_res_used
;
123 tp
->t_blk_res
= tp
->t_blk_res_used
;
125 ntp
->t_rtx_res
= tp
->t_rtx_res
- tp
->t_rtx_res_used
;
126 tp
->t_rtx_res
= tp
->t_rtx_res_used
;
128 xfs_trans_switch_context(tp
, ntp
);
130 /* move deferred ops over to the new tp */
131 xfs_defer_move(ntp
, tp
);
133 xfs_trans_dup_dqinfo(tp
, ntp
);
138 * This is called to reserve free disk blocks and log space for the
139 * given transaction. This must be done before allocating any resources
140 * within the transaction.
142 * This will return ENOSPC if there are not enough blocks available.
143 * It will sleep waiting for available log space.
144 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
145 * is used by long running transactions. If any one of the reservations
146 * fails then they will all be backed out.
148 * This does not do quota reservations. That typically is done by the
153 struct xfs_trans
*tp
,
154 struct xfs_trans_res
*resp
,
158 struct xfs_mount
*mp
= tp
->t_mountp
;
160 bool rsvd
= (tp
->t_flags
& XFS_TRANS_RESERVE
) != 0;
163 * Attempt to reserve the needed disk blocks by decrementing
164 * the number needed from the number available. This will
165 * fail if the count would go below zero.
168 error
= xfs_dec_fdblocks(mp
, blocks
, rsvd
);
171 tp
->t_blk_res
+= blocks
;
175 * Reserve the log space needed for this transaction.
177 if (resp
->tr_logres
> 0) {
178 bool permanent
= false;
180 ASSERT(tp
->t_log_res
== 0 ||
181 tp
->t_log_res
== resp
->tr_logres
);
182 ASSERT(tp
->t_log_count
== 0 ||
183 tp
->t_log_count
== resp
->tr_logcount
);
185 if (resp
->tr_logflags
& XFS_TRANS_PERM_LOG_RES
) {
186 tp
->t_flags
|= XFS_TRANS_PERM_LOG_RES
;
189 ASSERT(tp
->t_ticket
== NULL
);
190 ASSERT(!(tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
));
193 if (tp
->t_ticket
!= NULL
) {
194 ASSERT(resp
->tr_logflags
& XFS_TRANS_PERM_LOG_RES
);
195 error
= xfs_log_regrant(mp
, tp
->t_ticket
);
197 error
= xfs_log_reserve(mp
, resp
->tr_logres
,
199 &tp
->t_ticket
, permanent
);
205 tp
->t_log_res
= resp
->tr_logres
;
206 tp
->t_log_count
= resp
->tr_logcount
;
210 * Attempt to reserve the needed realtime extents by decrementing
211 * the number needed from the number available. This will
212 * fail if the count would go below zero.
215 error
= xfs_dec_frextents(mp
, rtextents
);
220 tp
->t_rtx_res
+= rtextents
;
226 * Error cases jump to one of these labels to undo any
227 * reservations which have already been performed.
230 if (resp
->tr_logres
> 0) {
231 xfs_log_ticket_ungrant(mp
->m_log
, tp
->t_ticket
);
234 tp
->t_flags
&= ~XFS_TRANS_PERM_LOG_RES
;
239 xfs_add_fdblocks(mp
, blocks
);
247 struct xfs_mount
*mp
,
248 struct xfs_trans_res
*resp
,
252 struct xfs_trans
**tpp
)
254 struct xfs_trans
*tp
;
255 bool want_retry
= true;
259 * Allocate the handle before we do our freeze accounting and setting up
260 * GFP_NOFS allocation context so that we avoid lockdep false positives
261 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
264 tp
= kmem_cache_zalloc(xfs_trans_cache
, GFP_KERNEL
| __GFP_NOFAIL
);
265 if (!(flags
& XFS_TRANS_NO_WRITECOUNT
))
266 sb_start_intwrite(mp
->m_super
);
267 xfs_trans_set_context(tp
);
270 * Zero-reservation ("empty") transactions can't modify anything, so
271 * they're allowed to run while we're frozen.
273 WARN_ON(resp
->tr_logres
> 0 &&
274 mp
->m_super
->s_writers
.frozen
== SB_FREEZE_COMPLETE
);
275 ASSERT(!(flags
& XFS_TRANS_RES_FDBLKS
) ||
276 xfs_has_lazysbcount(mp
));
278 tp
->t_magic
= XFS_TRANS_HEADER_MAGIC
;
281 INIT_LIST_HEAD(&tp
->t_items
);
282 INIT_LIST_HEAD(&tp
->t_busy
);
283 INIT_LIST_HEAD(&tp
->t_dfops
);
284 tp
->t_highest_agno
= NULLAGNUMBER
;
286 error
= xfs_trans_reserve(tp
, resp
, blocks
, rtextents
);
287 if (error
== -ENOSPC
&& want_retry
) {
288 xfs_trans_cancel(tp
);
291 * We weren't able to reserve enough space for the transaction.
292 * Flush the other speculative space allocations to free space.
293 * Do not perform a synchronous scan because callers can hold
296 error
= xfs_blockgc_flush_all(mp
);
303 xfs_trans_cancel(tp
);
307 trace_xfs_trans_alloc(tp
, _RET_IP_
);
314 * Create an empty transaction with no reservation. This is a defensive
315 * mechanism for routines that query metadata without actually modifying them --
316 * if the metadata being queried is somehow cross-linked (think a btree block
317 * pointer that points higher in the tree), we risk deadlock. However, blocks
318 * grabbed as part of a transaction can be re-grabbed. The verifiers will
319 * notice the corrupt block and the operation will fail back to userspace
320 * without deadlocking.
322 * Note the zero-length reservation; this transaction MUST be cancelled without
325 * Callers should obtain freeze protection to avoid a conflict with fs freezing
326 * where we can be grabbing buffers at the same time that freeze is trying to
327 * drain the buffer LRU list.
330 xfs_trans_alloc_empty(
331 struct xfs_mount
*mp
,
332 struct xfs_trans
**tpp
)
334 struct xfs_trans_res resv
= {0};
336 return xfs_trans_alloc(mp
, &resv
, 0, 0, XFS_TRANS_NO_WRITECOUNT
, tpp
);
340 * Record the indicated change to the given field for application
341 * to the file system's superblock when the transaction commits.
342 * For now, just store the change in the transaction structure.
344 * Mark the transaction structure to indicate that the superblock
345 * needs to be updated before committing.
347 * Because we may not be keeping track of allocated/free inodes and
348 * used filesystem blocks in the superblock, we do not mark the
349 * superblock dirty in this transaction if we modify these fields.
350 * We still need to update the transaction deltas so that they get
351 * applied to the incore superblock, but we don't want them to
352 * cause the superblock to get locked and logged if these are the
353 * only fields in the superblock that the transaction modifies.
361 uint32_t flags
= (XFS_TRANS_DIRTY
|XFS_TRANS_SB_DIRTY
);
362 xfs_mount_t
*mp
= tp
->t_mountp
;
365 case XFS_TRANS_SB_ICOUNT
:
366 tp
->t_icount_delta
+= delta
;
367 if (xfs_has_lazysbcount(mp
))
368 flags
&= ~XFS_TRANS_SB_DIRTY
;
370 case XFS_TRANS_SB_IFREE
:
371 tp
->t_ifree_delta
+= delta
;
372 if (xfs_has_lazysbcount(mp
))
373 flags
&= ~XFS_TRANS_SB_DIRTY
;
375 case XFS_TRANS_SB_FDBLOCKS
:
377 * Track the number of blocks allocated in the transaction.
378 * Make sure it does not exceed the number reserved. If so,
379 * shutdown as this can lead to accounting inconsistency.
382 tp
->t_blk_res_used
+= (uint
)-delta
;
383 if (tp
->t_blk_res_used
> tp
->t_blk_res
)
384 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_INCORE
);
385 } else if (delta
> 0 && (tp
->t_flags
& XFS_TRANS_RES_FDBLKS
)) {
386 int64_t blkres_delta
;
389 * Return freed blocks directly to the reservation
390 * instead of the global pool, being careful not to
391 * overflow the trans counter. This is used to preserve
392 * reservation across chains of transaction rolls that
393 * repeatedly free and allocate blocks.
395 blkres_delta
= min_t(int64_t, delta
,
396 UINT_MAX
- tp
->t_blk_res
);
397 tp
->t_blk_res
+= blkres_delta
;
398 delta
-= blkres_delta
;
400 tp
->t_fdblocks_delta
+= delta
;
401 if (xfs_has_lazysbcount(mp
))
402 flags
&= ~XFS_TRANS_SB_DIRTY
;
404 case XFS_TRANS_SB_RES_FDBLOCKS
:
406 * The allocation has already been applied to the
407 * in-core superblock's counter. This should only
408 * be applied to the on-disk superblock.
410 tp
->t_res_fdblocks_delta
+= delta
;
411 if (xfs_has_lazysbcount(mp
))
412 flags
&= ~XFS_TRANS_SB_DIRTY
;
414 case XFS_TRANS_SB_FREXTENTS
:
416 * Track the number of blocks allocated in the
417 * transaction. Make sure it does not exceed the
421 tp
->t_rtx_res_used
+= (uint
)-delta
;
422 ASSERT(tp
->t_rtx_res_used
<= tp
->t_rtx_res
);
424 tp
->t_frextents_delta
+= delta
;
425 if (xfs_has_rtgroups(mp
))
426 flags
&= ~XFS_TRANS_SB_DIRTY
;
428 case XFS_TRANS_SB_RES_FREXTENTS
:
430 * The allocation has already been applied to the
431 * in-core superblock's counter. This should only
432 * be applied to the on-disk superblock.
435 tp
->t_res_frextents_delta
+= delta
;
436 if (xfs_has_rtgroups(mp
))
437 flags
&= ~XFS_TRANS_SB_DIRTY
;
439 case XFS_TRANS_SB_DBLOCKS
:
440 tp
->t_dblocks_delta
+= delta
;
442 case XFS_TRANS_SB_AGCOUNT
:
444 tp
->t_agcount_delta
+= delta
;
446 case XFS_TRANS_SB_IMAXPCT
:
447 tp
->t_imaxpct_delta
+= delta
;
449 case XFS_TRANS_SB_REXTSIZE
:
450 tp
->t_rextsize_delta
+= delta
;
452 case XFS_TRANS_SB_RBMBLOCKS
:
453 tp
->t_rbmblocks_delta
+= delta
;
455 case XFS_TRANS_SB_RBLOCKS
:
456 tp
->t_rblocks_delta
+= delta
;
458 case XFS_TRANS_SB_REXTENTS
:
459 tp
->t_rextents_delta
+= delta
;
461 case XFS_TRANS_SB_REXTSLOG
:
462 tp
->t_rextslog_delta
+= delta
;
464 case XFS_TRANS_SB_RGCOUNT
:
466 tp
->t_rgcount_delta
+= delta
;
473 tp
->t_flags
|= flags
;
477 * xfs_trans_apply_sb_deltas() is called from the commit code
478 * to bring the superblock buffer into the current transaction
479 * and modify it as requested by earlier calls to xfs_trans_mod_sb().
481 * For now we just look at each field allowed to change and change
485 xfs_trans_apply_sb_deltas(
492 bp
= xfs_trans_getsb(tp
);
496 * Only update the superblock counters if we are logging them
498 if (!xfs_has_lazysbcount((tp
->t_mountp
))) {
499 if (tp
->t_icount_delta
)
500 be64_add_cpu(&sbp
->sb_icount
, tp
->t_icount_delta
);
501 if (tp
->t_ifree_delta
)
502 be64_add_cpu(&sbp
->sb_ifree
, tp
->t_ifree_delta
);
503 if (tp
->t_fdblocks_delta
)
504 be64_add_cpu(&sbp
->sb_fdblocks
, tp
->t_fdblocks_delta
);
505 if (tp
->t_res_fdblocks_delta
)
506 be64_add_cpu(&sbp
->sb_fdblocks
, tp
->t_res_fdblocks_delta
);
510 * sb_frextents was added to the lazy sb counters when the rt groups
511 * feature was introduced. This is possible because we know that all
512 * kernels supporting rtgroups will also recompute frextents from the
515 * For older file systems, updating frextents requires careful handling
516 * because we cannot rely on log recovery in older kernels to recompute
517 * the value from the rtbitmap. This means that the ondisk frextents
518 * must be consistent with the rtbitmap.
520 * Therefore, log the frextents change to the ondisk superblock and
521 * update the incore superblock so that future calls to xfs_log_sb
522 * write the correct value ondisk.
524 if ((tp
->t_frextents_delta
|| tp
->t_res_frextents_delta
) &&
525 !xfs_has_rtgroups(tp
->t_mountp
)) {
526 struct xfs_mount
*mp
= tp
->t_mountp
;
529 rtxdelta
= tp
->t_frextents_delta
+ tp
->t_res_frextents_delta
;
531 spin_lock(&mp
->m_sb_lock
);
532 be64_add_cpu(&sbp
->sb_frextents
, rtxdelta
);
533 mp
->m_sb
.sb_frextents
+= rtxdelta
;
534 spin_unlock(&mp
->m_sb_lock
);
537 if (tp
->t_dblocks_delta
) {
538 be64_add_cpu(&sbp
->sb_dblocks
, tp
->t_dblocks_delta
);
541 if (tp
->t_agcount_delta
) {
542 be32_add_cpu(&sbp
->sb_agcount
, tp
->t_agcount_delta
);
545 if (tp
->t_imaxpct_delta
) {
546 sbp
->sb_imax_pct
+= tp
->t_imaxpct_delta
;
549 if (tp
->t_rextsize_delta
) {
550 be32_add_cpu(&sbp
->sb_rextsize
, tp
->t_rextsize_delta
);
553 * Because the ondisk sb records rtgroup size in units of rt
554 * extents, any time we update the rt extent size we have to
555 * recompute the ondisk rtgroup block log. The incore values
556 * will be recomputed in xfs_trans_unreserve_and_mod_sb.
558 if (xfs_has_rtgroups(tp
->t_mountp
)) {
559 sbp
->sb_rgblklog
= xfs_compute_rgblklog(
560 be32_to_cpu(sbp
->sb_rgextents
),
561 be32_to_cpu(sbp
->sb_rextsize
));
565 if (tp
->t_rbmblocks_delta
) {
566 be32_add_cpu(&sbp
->sb_rbmblocks
, tp
->t_rbmblocks_delta
);
569 if (tp
->t_rblocks_delta
) {
570 be64_add_cpu(&sbp
->sb_rblocks
, tp
->t_rblocks_delta
);
573 if (tp
->t_rextents_delta
) {
574 be64_add_cpu(&sbp
->sb_rextents
, tp
->t_rextents_delta
);
577 if (tp
->t_rextslog_delta
) {
578 sbp
->sb_rextslog
+= tp
->t_rextslog_delta
;
581 if (tp
->t_rgcount_delta
) {
582 be32_add_cpu(&sbp
->sb_rgcount
, tp
->t_rgcount_delta
);
586 xfs_trans_buf_set_type(tp
, bp
, XFS_BLFT_SB_BUF
);
589 * Log the whole thing, the fields are noncontiguous.
591 xfs_trans_log_buf(tp
, bp
, 0, sizeof(struct xfs_dsb
) - 1);
594 * Since all the modifiable fields are contiguous, we
595 * can get away with this.
597 xfs_trans_log_buf(tp
, bp
, offsetof(struct xfs_dsb
, sb_icount
),
598 offsetof(struct xfs_dsb
, sb_frextents
) +
599 sizeof(sbp
->sb_frextents
) - 1);
603 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
604 * apply superblock counter changes to the in-core superblock. The
605 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
606 * applied to the in-core superblock. The idea is that that has already been
609 * If we are not logging superblock counters, then the inode allocated/free and
610 * used block counts are not updated in the on disk superblock. In this case,
611 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
612 * still need to update the incore superblock with the changes.
614 * Deltas for the inode count are +/-64, hence we use a large batch size of 128
615 * so we don't need to take the counter lock on every update.
617 #define XFS_ICOUNT_BATCH 128
620 xfs_trans_unreserve_and_mod_sb(
621 struct xfs_trans
*tp
)
623 struct xfs_mount
*mp
= tp
->t_mountp
;
624 int64_t blkdelta
= tp
->t_blk_res
;
625 int64_t rtxdelta
= tp
->t_rtx_res
;
627 int64_t ifreedelta
= 0;
630 * Calculate the deltas.
632 * t_fdblocks_delta and t_frextents_delta can be positive or negative:
634 * - positive values indicate blocks freed in the transaction.
635 * - negative values indicate blocks allocated in the transaction
637 * Negative values can only happen if the transaction has a block
638 * reservation that covers the allocated block. The end result is
639 * that the calculated delta values must always be positive and we
640 * can only put back previous allocated or reserved blocks here.
642 ASSERT(tp
->t_blk_res
|| tp
->t_fdblocks_delta
>= 0);
643 if (xfs_has_lazysbcount(mp
) || (tp
->t_flags
& XFS_TRANS_SB_DIRTY
)) {
644 blkdelta
+= tp
->t_fdblocks_delta
;
645 ASSERT(blkdelta
>= 0);
648 ASSERT(tp
->t_rtx_res
|| tp
->t_frextents_delta
>= 0);
649 if (xfs_has_rtgroups(mp
) || (tp
->t_flags
& XFS_TRANS_SB_DIRTY
)) {
650 rtxdelta
+= tp
->t_frextents_delta
;
651 ASSERT(rtxdelta
>= 0);
654 if (xfs_has_lazysbcount(mp
) || (tp
->t_flags
& XFS_TRANS_SB_DIRTY
)) {
655 idelta
= tp
->t_icount_delta
;
656 ifreedelta
= tp
->t_ifree_delta
;
659 /* apply the per-cpu counters */
661 xfs_add_fdblocks(mp
, blkdelta
);
664 percpu_counter_add_batch(&mp
->m_icount
, idelta
,
668 percpu_counter_add(&mp
->m_ifree
, ifreedelta
);
671 xfs_add_frextents(mp
, rtxdelta
);
673 if (!(tp
->t_flags
& XFS_TRANS_SB_DIRTY
))
676 /* apply remaining deltas */
677 spin_lock(&mp
->m_sb_lock
);
678 mp
->m_sb
.sb_fdblocks
+= tp
->t_fdblocks_delta
+ tp
->t_res_fdblocks_delta
;
679 mp
->m_sb
.sb_icount
+= idelta
;
680 mp
->m_sb
.sb_ifree
+= ifreedelta
;
682 * Do not touch sb_frextents here because it is handled in
683 * xfs_trans_apply_sb_deltas for file systems where it isn't a lazy
686 mp
->m_sb
.sb_dblocks
+= tp
->t_dblocks_delta
;
687 mp
->m_sb
.sb_agcount
+= tp
->t_agcount_delta
;
688 mp
->m_sb
.sb_imax_pct
+= tp
->t_imaxpct_delta
;
689 if (tp
->t_rextsize_delta
)
690 xfs_mount_sb_set_rextsize(mp
, &mp
->m_sb
,
691 mp
->m_sb
.sb_rextsize
+ tp
->t_rextsize_delta
);
692 mp
->m_sb
.sb_rbmblocks
+= tp
->t_rbmblocks_delta
;
693 mp
->m_sb
.sb_rblocks
+= tp
->t_rblocks_delta
;
694 mp
->m_sb
.sb_rextents
+= tp
->t_rextents_delta
;
695 mp
->m_sb
.sb_rextslog
+= tp
->t_rextslog_delta
;
696 mp
->m_sb
.sb_rgcount
+= tp
->t_rgcount_delta
;
697 spin_unlock(&mp
->m_sb_lock
);
700 * Debug checks outside of the spinlock so they don't lock up the
701 * machine if they fail.
703 ASSERT(mp
->m_sb
.sb_imax_pct
>= 0);
704 ASSERT(mp
->m_sb
.sb_rextslog
>= 0);
707 /* Add the given log item to the transaction's list of log items. */
710 struct xfs_trans
*tp
,
711 struct xfs_log_item
*lip
)
713 ASSERT(lip
->li_log
== tp
->t_mountp
->m_log
);
714 ASSERT(lip
->li_ailp
== tp
->t_mountp
->m_ail
);
715 ASSERT(list_empty(&lip
->li_trans
));
716 ASSERT(!test_bit(XFS_LI_DIRTY
, &lip
->li_flags
));
718 list_add_tail(&lip
->li_trans
, &tp
->t_items
);
719 trace_xfs_trans_add_item(tp
, _RET_IP_
);
723 * Unlink the log item from the transaction. the log item is no longer
724 * considered dirty in this transaction, as the linked transaction has
725 * finished, either by abort or commit completion.
729 struct xfs_log_item
*lip
)
731 clear_bit(XFS_LI_DIRTY
, &lip
->li_flags
);
732 list_del_init(&lip
->li_trans
);
735 /* Detach and unlock all of the items in a transaction */
737 xfs_trans_free_items(
738 struct xfs_trans
*tp
,
741 struct xfs_log_item
*lip
, *next
;
743 trace_xfs_trans_free_items(tp
, _RET_IP_
);
745 list_for_each_entry_safe(lip
, next
, &tp
->t_items
, li_trans
) {
746 xfs_trans_del_item(lip
);
748 set_bit(XFS_LI_ABORTED
, &lip
->li_flags
);
749 if (lip
->li_ops
->iop_release
)
750 lip
->li_ops
->iop_release(lip
);
755 * Sort transaction items prior to running precommit operations. This will
756 * attempt to order the items such that they will always be locked in the same
757 * order. Items that have no sort function are moved to the end of the list
758 * and so are locked last.
760 * This may need refinement as different types of objects add sort functions.
762 * Function is more complex than it needs to be because we are comparing 64 bit
763 * values and the function only returns 32 bit values.
766 xfs_trans_precommit_sort(
768 const struct list_head
*a
,
769 const struct list_head
*b
)
771 struct xfs_log_item
*lia
= container_of(a
,
772 struct xfs_log_item
, li_trans
);
773 struct xfs_log_item
*lib
= container_of(b
,
774 struct xfs_log_item
, li_trans
);
778 * If both items are non-sortable, leave them alone. If only one is
779 * sortable, move the non-sortable item towards the end of the list.
781 if (!lia
->li_ops
->iop_sort
&& !lib
->li_ops
->iop_sort
)
783 if (!lia
->li_ops
->iop_sort
)
785 if (!lib
->li_ops
->iop_sort
)
788 diff
= lia
->li_ops
->iop_sort(lia
) - lib
->li_ops
->iop_sort(lib
);
797 * Run transaction precommit functions.
799 * If there is an error in any of the callouts, then stop immediately and
800 * trigger a shutdown to abort the transaction. There is no recovery possible
801 * from errors at this point as the transaction is dirty....
804 xfs_trans_run_precommits(
805 struct xfs_trans
*tp
)
807 struct xfs_mount
*mp
= tp
->t_mountp
;
808 struct xfs_log_item
*lip
, *n
;
812 * Sort the item list to avoid ABBA deadlocks with other transactions
813 * running precommit operations that lock multiple shared items such as
814 * inode cluster buffers.
816 list_sort(NULL
, &tp
->t_items
, xfs_trans_precommit_sort
);
819 * Precommit operations can remove the log item from the transaction
820 * if the log item exists purely to delay modifications until they
821 * can be ordered against other operations. Hence we have to use
822 * list_for_each_entry_safe() here.
824 list_for_each_entry_safe(lip
, n
, &tp
->t_items
, li_trans
) {
825 if (!test_bit(XFS_LI_DIRTY
, &lip
->li_flags
))
827 if (lip
->li_ops
->iop_precommit
) {
828 error
= lip
->li_ops
->iop_precommit(tp
, lip
);
834 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_INCORE
);
839 * Commit the given transaction to the log.
841 * XFS disk error handling mechanism is not based on a typical
842 * transaction abort mechanism. Logically after the filesystem
843 * gets marked 'SHUTDOWN', we can't let any new transactions
844 * be durable - ie. committed to disk - because some metadata might
845 * be inconsistent. In such cases, this returns an error, and the
846 * caller may assume that all locked objects joined to the transaction
847 * have already been unlocked as if the commit had succeeded.
848 * Do not reference the transaction structure after this call.
852 struct xfs_trans
*tp
,
855 struct xfs_mount
*mp
= tp
->t_mountp
;
856 struct xlog
*log
= mp
->m_log
;
857 xfs_csn_t commit_seq
= 0;
859 int sync
= tp
->t_flags
& XFS_TRANS_SYNC
;
861 trace_xfs_trans_commit(tp
, _RET_IP_
);
863 error
= xfs_trans_run_precommits(tp
);
865 if (tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
)
866 xfs_defer_cancel(tp
);
871 * Finish deferred items on final commit. Only permanent transactions
872 * should ever have deferred ops.
874 WARN_ON_ONCE(!list_empty(&tp
->t_dfops
) &&
875 !(tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
));
876 if (!regrant
&& (tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
)) {
877 error
= xfs_defer_finish_noroll(&tp
);
881 /* Run precommits from final tx in defer chain. */
882 error
= xfs_trans_run_precommits(tp
);
888 * If there is nothing to be logged by the transaction,
889 * then unlock all of the items associated with the
890 * transaction and free the transaction structure.
891 * Also make sure to return any reserved blocks to
894 if (!(tp
->t_flags
& XFS_TRANS_DIRTY
))
898 * We must check against log shutdown here because we cannot abort log
899 * items and leave them dirty, inconsistent and unpinned in memory while
900 * the log is active. This leaves them open to being written back to
901 * disk, and that will lead to on-disk corruption.
903 if (xlog_is_shutdown(log
)) {
908 ASSERT(tp
->t_ticket
!= NULL
);
911 * If we need to update the superblock, then do it now.
913 if (tp
->t_flags
& XFS_TRANS_SB_DIRTY
)
914 xfs_trans_apply_sb_deltas(tp
);
915 xfs_trans_apply_dquot_deltas(tp
);
917 xlog_cil_commit(log
, tp
, &commit_seq
, regrant
);
922 * If the transaction needs to be synchronous, then force the
923 * log out now and wait for it.
926 error
= xfs_log_force_seq(mp
, commit_seq
, XFS_LOG_SYNC
, NULL
);
927 XFS_STATS_INC(mp
, xs_trans_sync
);
929 XFS_STATS_INC(mp
, xs_trans_async
);
935 xfs_trans_unreserve_and_mod_sb(tp
);
938 * It is indeed possible for the transaction to be not dirty but
939 * the dqinfo portion to be. All that means is that we have some
940 * (non-persistent) quota reservations that need to be unreserved.
942 xfs_trans_unreserve_and_mod_dquots(tp
);
944 if (regrant
&& !xlog_is_shutdown(log
))
945 xfs_log_ticket_regrant(log
, tp
->t_ticket
);
947 xfs_log_ticket_ungrant(log
, tp
->t_ticket
);
950 xfs_trans_free_items(tp
, !!error
);
953 XFS_STATS_INC(mp
, xs_trans_empty
);
959 struct xfs_trans
*tp
)
961 return __xfs_trans_commit(tp
, false);
965 * Unlock all of the transaction's items and free the transaction. If the
966 * transaction is dirty, we must shut down the filesystem because there is no
967 * way to restore them to their previous state.
969 * If the transaction has made a log reservation, make sure to release it as
972 * This is a high level function (equivalent to xfs_trans_commit()) and so can
973 * be called after the transaction has effectively been aborted due to the mount
974 * being shut down. However, if the mount has not been shut down and the
975 * transaction is dirty we will shut the mount down and, in doing so, that
976 * guarantees that the log is shut down, too. Hence we don't need to be as
977 * careful with shutdown state and dirty items here as we need to be in
978 * xfs_trans_commit().
982 struct xfs_trans
*tp
)
984 struct xfs_mount
*mp
= tp
->t_mountp
;
985 struct xlog
*log
= mp
->m_log
;
986 bool dirty
= (tp
->t_flags
& XFS_TRANS_DIRTY
);
988 trace_xfs_trans_cancel(tp
, _RET_IP_
);
991 * It's never valid to cancel a transaction with deferred ops attached,
992 * because the transaction is effectively dirty. Complain about this
993 * loudly before freeing the in-memory defer items and shutting down the
996 if (!list_empty(&tp
->t_dfops
)) {
997 ASSERT(tp
->t_flags
& XFS_TRANS_PERM_LOG_RES
);
999 xfs_defer_cancel(tp
);
1003 * See if the caller is relying on us to shut down the filesystem. We
1004 * only want an error report if there isn't already a shutdown in
1005 * progress, so we only need to check against the mount shutdown state
1008 if (dirty
&& !xfs_is_shutdown(mp
)) {
1009 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW
, mp
);
1010 xfs_force_shutdown(mp
, SHUTDOWN_CORRUPT_INCORE
);
1013 /* Log items need to be consistent until the log is shut down. */
1014 if (!dirty
&& !xlog_is_shutdown(log
)) {
1015 struct xfs_log_item
*lip
;
1017 list_for_each_entry(lip
, &tp
->t_items
, li_trans
)
1018 ASSERT(!xlog_item_is_intent_done(lip
));
1021 xfs_trans_unreserve_and_mod_sb(tp
);
1022 xfs_trans_unreserve_and_mod_dquots(tp
);
1025 xfs_log_ticket_ungrant(log
, tp
->t_ticket
);
1026 tp
->t_ticket
= NULL
;
1029 xfs_trans_free_items(tp
, dirty
);
1034 * Roll from one trans in the sequence of PERMANENT transactions to
1035 * the next: permanent transactions are only flushed out when
1036 * committed with xfs_trans_commit(), but we still want as soon
1037 * as possible to let chunks of it go to the log. So we commit the
1038 * chunk we've been working on and get a new transaction to continue.
1042 struct xfs_trans
**tpp
)
1044 struct xfs_trans
*trans
= *tpp
;
1045 struct xfs_trans_res tres
;
1048 trace_xfs_trans_roll(trans
, _RET_IP_
);
1051 * Copy the critical parameters from one trans to the next.
1053 tres
.tr_logres
= trans
->t_log_res
;
1054 tres
.tr_logcount
= trans
->t_log_count
;
1056 *tpp
= xfs_trans_dup(trans
);
1059 * Commit the current transaction.
1060 * If this commit failed, then it'd just unlock those items that
1061 * are not marked ihold. That also means that a filesystem shutdown
1062 * is in progress. The caller takes the responsibility to cancel
1063 * the duplicate transaction that gets returned.
1065 error
= __xfs_trans_commit(trans
, true);
1070 * Reserve space in the log for the next transaction.
1071 * This also pushes items in the "AIL", the list of logged items,
1072 * out to disk if they are taking up space at the tail of the log
1073 * that we want to use. This requires that either nothing be locked
1074 * across this call, or that anything that is locked be logged in
1075 * the prior and the next transactions.
1077 tres
.tr_logflags
= XFS_TRANS_PERM_LOG_RES
;
1078 return xfs_trans_reserve(*tpp
, &tres
, 0, 0);
1082 * Allocate an transaction, lock and join the inode to it, and reserve quota.
1084 * The caller must ensure that the on-disk dquots attached to this inode have
1085 * already been allocated and initialized. The caller is responsible for
1086 * releasing ILOCK_EXCL if a new transaction is returned.
1089 xfs_trans_alloc_inode(
1090 struct xfs_inode
*ip
,
1091 struct xfs_trans_res
*resv
,
1092 unsigned int dblocks
,
1093 unsigned int rblocks
,
1095 struct xfs_trans
**tpp
)
1097 struct xfs_trans
*tp
;
1098 struct xfs_mount
*mp
= ip
->i_mount
;
1099 bool retried
= false;
1103 error
= xfs_trans_alloc(mp
, resv
, dblocks
,
1104 xfs_extlen_to_rtxlen(mp
, rblocks
),
1105 force
? XFS_TRANS_RESERVE
: 0, &tp
);
1109 xfs_ilock(ip
, XFS_ILOCK_EXCL
);
1110 xfs_trans_ijoin(tp
, ip
, 0);
1112 error
= xfs_qm_dqattach_locked(ip
, false);
1114 /* Caller should have allocated the dquots! */
1115 ASSERT(error
!= -ENOENT
);
1119 error
= xfs_trans_reserve_quota_nblks(tp
, ip
, dblocks
, rblocks
, force
);
1120 if ((error
== -EDQUOT
|| error
== -ENOSPC
) && !retried
) {
1121 xfs_trans_cancel(tp
);
1122 xfs_iunlock(ip
, XFS_ILOCK_EXCL
);
1123 xfs_blockgc_free_quota(ip
, 0);
1134 xfs_trans_cancel(tp
);
1135 xfs_iunlock(ip
, XFS_ILOCK_EXCL
);
1140 * Try to reserve more blocks for a transaction.
1142 * This is for callers that need to attach resources to a transaction, scan
1143 * those resources to determine the space reservation requirements, and then
1144 * modify the attached resources. In other words, online repair. This can
1145 * fail due to ENOSPC, so the caller must be able to cancel the transaction
1146 * without shutting down the fs.
1149 xfs_trans_reserve_more(
1150 struct xfs_trans
*tp
,
1151 unsigned int blocks
,
1152 unsigned int rtextents
)
1154 struct xfs_trans_res resv
= { };
1156 return xfs_trans_reserve(tp
, &resv
, blocks
, rtextents
);
1160 * Try to reserve more blocks and file quota for a transaction. Same
1161 * conditions of usage as xfs_trans_reserve_more.
1164 xfs_trans_reserve_more_inode(
1165 struct xfs_trans
*tp
,
1166 struct xfs_inode
*ip
,
1167 unsigned int dblocks
,
1168 unsigned int rblocks
,
1171 struct xfs_trans_res resv
= { };
1172 struct xfs_mount
*mp
= ip
->i_mount
;
1173 unsigned int rtx
= xfs_extlen_to_rtxlen(mp
, rblocks
);
1176 xfs_assert_ilocked(ip
, XFS_ILOCK_EXCL
);
1178 error
= xfs_trans_reserve(tp
, &resv
, dblocks
, rtx
);
1182 if (!XFS_IS_QUOTA_ON(mp
) || xfs_is_quota_inode(&mp
->m_sb
, ip
->i_ino
))
1185 if (tp
->t_flags
& XFS_TRANS_RESERVE
)
1188 error
= xfs_trans_reserve_quota_nblks(tp
, ip
, dblocks
, rblocks
,
1193 /* Quota failed, give back the new reservation. */
1194 xfs_add_fdblocks(mp
, dblocks
);
1195 tp
->t_blk_res
-= dblocks
;
1196 xfs_add_frextents(mp
, rtx
);
1197 tp
->t_rtx_res
-= rtx
;
1202 * Allocate an transaction in preparation for inode creation by reserving quota
1203 * against the given dquots. Callers are not required to hold any inode locks.
1206 xfs_trans_alloc_icreate(
1207 struct xfs_mount
*mp
,
1208 struct xfs_trans_res
*resv
,
1209 struct xfs_dquot
*udqp
,
1210 struct xfs_dquot
*gdqp
,
1211 struct xfs_dquot
*pdqp
,
1212 unsigned int dblocks
,
1213 struct xfs_trans
**tpp
)
1215 struct xfs_trans
*tp
;
1216 bool retried
= false;
1220 error
= xfs_trans_alloc(mp
, resv
, dblocks
, 0, 0, &tp
);
1224 error
= xfs_trans_reserve_quota_icreate(tp
, udqp
, gdqp
, pdqp
, dblocks
);
1225 if ((error
== -EDQUOT
|| error
== -ENOSPC
) && !retried
) {
1226 xfs_trans_cancel(tp
);
1227 xfs_blockgc_free_dquots(mp
, udqp
, gdqp
, pdqp
, 0);
1232 xfs_trans_cancel(tp
);
1241 * Allocate an transaction, lock and join the inode to it, and reserve quota
1242 * in preparation for inode attribute changes that include uid, gid, or prid
1245 * The caller must ensure that the on-disk dquots attached to this inode have
1246 * already been allocated and initialized. The ILOCK will be dropped when the
1247 * transaction is committed or cancelled.
1250 xfs_trans_alloc_ichange(
1251 struct xfs_inode
*ip
,
1252 struct xfs_dquot
*new_udqp
,
1253 struct xfs_dquot
*new_gdqp
,
1254 struct xfs_dquot
*new_pdqp
,
1256 struct xfs_trans
**tpp
)
1258 struct xfs_trans
*tp
;
1259 struct xfs_mount
*mp
= ip
->i_mount
;
1260 struct xfs_dquot
*udqp
;
1261 struct xfs_dquot
*gdqp
;
1262 struct xfs_dquot
*pdqp
;
1263 bool retried
= false;
1267 error
= xfs_trans_alloc(mp
, &M_RES(mp
)->tr_ichange
, 0, 0, 0, &tp
);
1271 xfs_ilock(ip
, XFS_ILOCK_EXCL
);
1272 xfs_trans_ijoin(tp
, ip
, XFS_ILOCK_EXCL
);
1274 error
= xfs_qm_dqattach_locked(ip
, false);
1276 /* Caller should have allocated the dquots! */
1277 ASSERT(error
!= -ENOENT
);
1282 * For each quota type, skip quota reservations if the inode's dquots
1283 * now match the ones that came from the caller, or the caller didn't
1284 * pass one in. The inode's dquots can change if we drop the ILOCK to
1285 * perform a blockgc scan, so we must preserve the caller's arguments.
1287 udqp
= (new_udqp
!= ip
->i_udquot
) ? new_udqp
: NULL
;
1288 gdqp
= (new_gdqp
!= ip
->i_gdquot
) ? new_gdqp
: NULL
;
1289 pdqp
= (new_pdqp
!= ip
->i_pdquot
) ? new_pdqp
: NULL
;
1290 if (udqp
|| gdqp
|| pdqp
) {
1291 xfs_filblks_t dblocks
, rblocks
;
1292 unsigned int qflags
= XFS_QMOPT_RES_REGBLKS
;
1293 bool isrt
= XFS_IS_REALTIME_INODE(ip
);
1296 qflags
|= XFS_QMOPT_FORCE_RES
;
1299 error
= xfs_iread_extents(tp
, ip
, XFS_DATA_FORK
);
1304 xfs_inode_count_blocks(tp
, ip
, &dblocks
, &rblocks
);
1307 rblocks
+= ip
->i_delayed_blks
;
1309 dblocks
+= ip
->i_delayed_blks
;
1312 * Reserve enough quota to handle blocks on disk and reserved
1313 * for a delayed allocation. We'll actually transfer the
1314 * delalloc reservation between dquots at chown time, even
1315 * though that part is only semi-transactional.
1317 error
= xfs_trans_reserve_quota_bydquots(tp
, mp
, udqp
, gdqp
,
1318 pdqp
, dblocks
, 1, qflags
);
1319 if ((error
== -EDQUOT
|| error
== -ENOSPC
) && !retried
) {
1320 xfs_trans_cancel(tp
);
1321 xfs_blockgc_free_dquots(mp
, udqp
, gdqp
, pdqp
, 0);
1328 /* Do the same for realtime. */
1329 qflags
= XFS_QMOPT_RES_RTBLKS
| (qflags
& XFS_QMOPT_FORCE_RES
);
1330 error
= xfs_trans_reserve_quota_bydquots(tp
, mp
, udqp
, gdqp
,
1331 pdqp
, rblocks
, 0, qflags
);
1332 if ((error
== -EDQUOT
|| error
== -ENOSPC
) && !retried
) {
1333 xfs_trans_cancel(tp
);
1334 xfs_blockgc_free_dquots(mp
, udqp
, gdqp
, pdqp
, 0);
1346 xfs_trans_cancel(tp
);
1351 * Allocate an transaction, lock and join the directory and child inodes to it,
1352 * and reserve quota for a directory update. If there isn't sufficient space,
1353 * @dblocks will be set to zero for a reservationless directory update and
1354 * @nospace_error will be set to a negative errno describing the space
1355 * constraint we hit.
1357 * The caller must ensure that the on-disk dquots attached to this inode have
1358 * already been allocated and initialized. The ILOCKs will be dropped when the
1359 * transaction is committed or cancelled.
1361 * Caller is responsible for unlocking the inodes manually upon return
1364 xfs_trans_alloc_dir(
1365 struct xfs_inode
*dp
,
1366 struct xfs_trans_res
*resv
,
1367 struct xfs_inode
*ip
,
1368 unsigned int *dblocks
,
1369 struct xfs_trans
**tpp
,
1372 struct xfs_trans
*tp
;
1373 struct xfs_mount
*mp
= ip
->i_mount
;
1374 unsigned int resblks
;
1375 bool retried
= false;
1381 error
= xfs_trans_alloc(mp
, resv
, resblks
, 0, 0, &tp
);
1382 if (error
== -ENOSPC
) {
1383 *nospace_error
= error
;
1385 error
= xfs_trans_alloc(mp
, resv
, resblks
, 0, 0, &tp
);
1390 xfs_lock_two_inodes(dp
, XFS_ILOCK_EXCL
, ip
, XFS_ILOCK_EXCL
);
1392 xfs_trans_ijoin(tp
, dp
, 0);
1393 xfs_trans_ijoin(tp
, ip
, 0);
1395 error
= xfs_qm_dqattach_locked(dp
, false);
1397 /* Caller should have allocated the dquots! */
1398 ASSERT(error
!= -ENOENT
);
1402 error
= xfs_qm_dqattach_locked(ip
, false);
1404 /* Caller should have allocated the dquots! */
1405 ASSERT(error
!= -ENOENT
);
1412 error
= xfs_trans_reserve_quota_nblks(tp
, dp
, resblks
, 0, false);
1413 if (error
== -EDQUOT
|| error
== -ENOSPC
) {
1415 xfs_trans_cancel(tp
);
1416 xfs_iunlock(dp
, XFS_ILOCK_EXCL
);
1418 xfs_iunlock(ip
, XFS_ILOCK_EXCL
);
1419 xfs_blockgc_free_quota(dp
, 0);
1424 *nospace_error
= error
;
1437 xfs_trans_cancel(tp
);