1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 * Copyright (C) 2010 Red Hat, Inc.
9 #include "xfs_shared.h"
10 #include "xfs_format.h"
11 #include "xfs_log_format.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_da_format.h"
15 #include "xfs_da_btree.h"
16 #include "xfs_inode.h"
17 #include "xfs_bmap_btree.h"
18 #include "xfs_quota.h"
19 #include "xfs_trans.h"
21 #include "xfs_trans_space.h"
22 #include "xfs_rtbitmap.h"
23 #include "xfs_attr_item.h"
30 * A buffer has a format structure overhead in the log in addition
31 * to the data, so we need to take this into account when reserving
32 * space in a transaction for a buffer. Round the space required up
33 * to a multiple of 128 bytes so that we don't change the historical
34 * reservation that has been used for this overhead.
37 xfs_buf_log_overhead(void)
39 return round_up(sizeof(struct xlog_op_header
) +
40 sizeof(struct xfs_buf_log_format
), 128);
44 * Calculate out transaction log reservation per item in bytes.
46 * The nbufs argument is used to indicate the number of items that
47 * will be changed in a transaction. size is used to tell how many
48 * bytes should be reserved per item.
55 return nbufs
* (size
+ xfs_buf_log_overhead());
59 * Per-extent log reservation for the btree changes involved in freeing or
60 * allocating an extent. In classic XFS there were two trees that will be
61 * modified (bnobt + cntbt). With rmap enabled, there are three trees
62 * (rmapbt). The number of blocks reserved is based on the formula:
64 * num trees * ((2 blocks/level * max depth) - 1)
66 * Keep in mind that max depth is calculated separately for each type of tree.
69 xfs_allocfree_block_count(
75 blocks
= num_ops
* 2 * (2 * mp
->m_alloc_maxlevels
- 1);
76 if (xfs_has_rmapbt(mp
))
77 blocks
+= num_ops
* (2 * mp
->m_rmap_maxlevels
- 1);
83 * Per-extent log reservation for refcount btree changes. These are never done
84 * in the same transaction as an allocation or a free, so we compute them
88 xfs_refcountbt_block_count(
92 return num_ops
* (2 * mp
->m_refc_maxlevels
- 1);
96 * Logging inodes is really tricksy. They are logged in memory format,
97 * which means that what we write into the log doesn't directly translate into
98 * the amount of space they use on disk.
100 * Case in point - btree format forks in memory format use more space than the
101 * on-disk format. In memory, the buffer contains a normal btree block header so
102 * the btree code can treat it as though it is just another generic buffer.
103 * However, when we write it to the inode fork, we don't write all of this
104 * header as it isn't needed. e.g. the root is only ever in the inode, so
105 * there's no need for sibling pointers which would waste 16 bytes of space.
107 * Hence when we have an inode with a maximally sized btree format fork, then
108 * amount of information we actually log is greater than the size of the inode
109 * on disk. Hence we need an inode reservation function that calculates all this
110 * correctly. So, we log:
112 * - 4 log op headers for object
113 * - for the ilf, the inode core and 2 forks
114 * - inode log format object
116 * - two inode forks containing bmap btree root blocks.
117 * - the btree data contained by both forks will fit into the inode size,
118 * hence when combined with the inode core above, we have a total of the
120 * - the BMBT headers need to be accounted separately, as they are
121 * additional to the records and pointers that fit inside the inode
126 struct xfs_mount
*mp
,
130 (4 * sizeof(struct xlog_op_header
) +
131 sizeof(struct xfs_inode_log_format
) +
132 mp
->m_sb
.sb_inodesize
+
133 2 * xfs_bmbt_block_len(mp
));
137 * Inode btree record insertion/removal modifies the inode btree and free space
138 * btrees (since the inobt does not use the agfl). This requires the following
141 * the inode btree: max depth * blocksize
142 * the allocation btrees: 2 trees * (max depth - 1) * block size
144 * The caller must account for SB and AG header modifications, etc.
148 struct xfs_mount
*mp
)
150 return xfs_calc_buf_res(M_IGEO(mp
)->inobt_maxlevels
,
151 XFS_FSB_TO_B(mp
, 1)) +
152 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
153 XFS_FSB_TO_B(mp
, 1));
157 * The free inode btree is a conditional feature. The behavior differs slightly
158 * from that of the traditional inode btree in that the finobt tracks records
159 * for inode chunks with at least one free inode. A record can be removed from
160 * the tree during individual inode allocation. Therefore the finobt
161 * reservation is unconditional for both the inode chunk allocation and
162 * individual inode allocation (modify) cases.
164 * Behavior aside, the reservation for finobt modification is equivalent to the
165 * traditional inobt: cover a full finobt shape change plus block allocation.
169 struct xfs_mount
*mp
)
171 if (!xfs_has_finobt(mp
))
174 return xfs_calc_inobt_res(mp
);
178 * Calculate the reservation required to allocate or free an inode chunk. This
181 * the allocation btrees: 2 trees * (max depth - 1) * block size
182 * the inode chunk: m_ino_geo.ialloc_blks * N
184 * The size N of the inode chunk reservation depends on whether it is for
185 * allocation or free and which type of create transaction is in use. An inode
186 * chunk free always invalidates the buffers and only requires reservation for
187 * headers (N == 0). An inode chunk allocation requires a chunk sized
188 * reservation on v4 and older superblocks to initialize the chunk. No chunk
189 * reservation is required for allocation on v5 supers, which use ordered
190 * buffers to initialize.
193 xfs_calc_inode_chunk_res(
194 struct xfs_mount
*mp
,
199 res
= xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
200 XFS_FSB_TO_B(mp
, 1));
202 /* icreate tx uses ordered buffers */
203 if (xfs_has_v3inodes(mp
))
205 size
= XFS_FSB_TO_B(mp
, 1);
208 res
+= xfs_calc_buf_res(M_IGEO(mp
)->ialloc_blks
, size
);
213 * Per-extent log reservation for the btree changes involved in freeing or
214 * allocating a realtime extent. We have to be able to log as many rtbitmap
215 * blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
216 * extents, as well as the realtime summary block.
219 xfs_rtalloc_block_count(
220 struct xfs_mount
*mp
,
221 unsigned int num_ops
)
223 unsigned int rtbmp_blocks
;
226 rtxlen
= xfs_extlen_to_rtxlen(mp
, XFS_MAX_BMBT_EXTLEN
);
227 rtbmp_blocks
= xfs_rtbitmap_blockcount_len(mp
, rtxlen
);
228 return (rtbmp_blocks
+ 1) * num_ops
;
232 * Various log reservation values.
234 * These are based on the size of the file system block because that is what
235 * most transactions manipulate. Each adds in an additional 128 bytes per
236 * item logged to try to account for the overhead of the transaction mechanism.
238 * Note: Most of the reservations underestimate the number of allocation
239 * groups into which they could free extents in the xfs_defer_finish() call.
240 * This is because the number in the worst case is quite high and quite
241 * unusual. In order to fix this we need to change xfs_defer_finish() to free
242 * extents in only a single AG at a time. This will require changes to the
243 * EFI code as well, however, so that the EFI for the extents not freed is
244 * logged again in each transaction. See SGI PV #261917.
246 * Reservation functions here avoid a huge stack in xfs_trans_init due to
247 * register overflow from temporaries in the calculations.
251 * Compute the log reservation required to handle the refcount update
252 * transaction. Refcount updates are always done via deferred log items.
254 * This is calculated as:
255 * Data device refcount updates (t1):
256 * the agfs of the ags containing the blocks: nr_ops * sector size
257 * the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
260 xfs_calc_refcountbt_reservation(
261 struct xfs_mount
*mp
,
264 unsigned int blksz
= XFS_FSB_TO_B(mp
, 1);
266 if (!xfs_has_reflink(mp
))
269 return xfs_calc_buf_res(nr_ops
, mp
->m_sb
.sb_sectsize
) +
270 xfs_calc_buf_res(xfs_refcountbt_block_count(mp
, nr_ops
), blksz
);
274 * In a write transaction we can allocate a maximum of 2
275 * extents. This gives (t1):
276 * the inode getting the new extents: inode size
277 * the inode's bmap btree: max depth * block size
278 * the agfs of the ags from which the extents are allocated: 2 * sector
279 * the superblock free block counter: sector size
280 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
281 * Or, if we're writing to a realtime file (t2):
282 * the inode getting the new extents: inode size
283 * the inode's bmap btree: max depth * block size
284 * the agfs of the ags from which the extents are allocated: 2 * sector
285 * the superblock free block counter: sector size
286 * the realtime bitmap: ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
287 * the realtime summary: 1 block
288 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
289 * And the bmap_finish transaction can free bmap blocks in a join (t3):
290 * the agfs of the ags containing the blocks: 2 * sector size
291 * the agfls of the ags containing the blocks: 2 * sector size
292 * the super block free block counter: sector size
293 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
294 * And any refcount updates that happen in a separate transaction (t4).
297 xfs_calc_write_reservation(
298 struct xfs_mount
*mp
,
301 unsigned int t1
, t2
, t3
, t4
;
302 unsigned int blksz
= XFS_FSB_TO_B(mp
, 1);
304 t1
= xfs_calc_inode_res(mp
, 1) +
305 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
), blksz
) +
306 xfs_calc_buf_res(3, mp
->m_sb
.sb_sectsize
) +
307 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 2), blksz
);
309 if (xfs_has_realtime(mp
)) {
310 t2
= xfs_calc_inode_res(mp
, 1) +
311 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
),
313 xfs_calc_buf_res(3, mp
->m_sb
.sb_sectsize
) +
314 xfs_calc_buf_res(xfs_rtalloc_block_count(mp
, 1), blksz
) +
315 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1), blksz
);
320 t3
= xfs_calc_buf_res(5, mp
->m_sb
.sb_sectsize
) +
321 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 2), blksz
);
324 * In the early days of reflink, we included enough reservation to log
325 * two refcountbt splits for each transaction. The codebase runs
326 * refcountbt updates in separate transactions now, so to compute the
327 * minimum log size, add the refcountbtree splits back to t1 and t3 and
328 * do not account them separately as t4. Reflink did not support
329 * realtime when the reservations were established, so no adjustment to
332 if (for_minlogsize
) {
333 unsigned int adj
= 0;
335 if (xfs_has_reflink(mp
))
336 adj
= xfs_calc_buf_res(
337 xfs_refcountbt_block_count(mp
, 2),
341 return XFS_DQUOT_LOGRES
+ max3(t1
, t2
, t3
);
344 t4
= xfs_calc_refcountbt_reservation(mp
, 1);
345 return XFS_DQUOT_LOGRES
+ max(t4
, max3(t1
, t2
, t3
));
349 xfs_calc_write_reservation_minlogsize(
350 struct xfs_mount
*mp
)
352 return xfs_calc_write_reservation(mp
, true);
356 * In truncating a file we free up to two extents at once. We can modify (t1):
357 * the inode being truncated: inode size
358 * the inode's bmap btree: (max depth + 1) * block size
359 * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
360 * the agf for each of the ags: 4 * sector size
361 * the agfl for each of the ags: 4 * sector size
362 * the super block to reflect the freed blocks: sector size
363 * worst case split in allocation btrees per extent assuming 4 extents:
364 * 4 exts * 2 trees * (2 * max depth - 1) * block size
365 * Or, if it's a realtime file (t3):
366 * the agf for each of the ags: 2 * sector size
367 * the agfl for each of the ags: 2 * sector size
368 * the super block to reflect the freed blocks: sector size
369 * the realtime bitmap:
370 * 2 exts * ((XFS_BMBT_MAX_EXTLEN / rtextsize) / NBBY) bytes
371 * the realtime summary: 2 exts * 1 block
372 * worst case split in allocation btrees per extent assuming 2 extents:
373 * 2 exts * 2 trees * (2 * max depth - 1) * block size
374 * And any refcount updates that happen in a separate transaction (t4).
377 xfs_calc_itruncate_reservation(
378 struct xfs_mount
*mp
,
381 unsigned int t1
, t2
, t3
, t4
;
382 unsigned int blksz
= XFS_FSB_TO_B(mp
, 1);
384 t1
= xfs_calc_inode_res(mp
, 1) +
385 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
) + 1, blksz
);
387 t2
= xfs_calc_buf_res(9, mp
->m_sb
.sb_sectsize
) +
388 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 4), blksz
);
390 if (xfs_has_realtime(mp
)) {
391 t3
= xfs_calc_buf_res(5, mp
->m_sb
.sb_sectsize
) +
392 xfs_calc_buf_res(xfs_rtalloc_block_count(mp
, 2), blksz
) +
393 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 2), blksz
);
399 * In the early days of reflink, we included enough reservation to log
400 * four refcountbt splits in the same transaction as bnobt/cntbt
401 * updates. The codebase runs refcountbt updates in separate
402 * transactions now, so to compute the minimum log size, add the
403 * refcount btree splits back here and do not compute them separately
404 * as t4. Reflink did not support realtime when the reservations were
405 * established, so do not adjust t3.
407 if (for_minlogsize
) {
408 if (xfs_has_reflink(mp
))
409 t2
+= xfs_calc_buf_res(
410 xfs_refcountbt_block_count(mp
, 4),
413 return XFS_DQUOT_LOGRES
+ max3(t1
, t2
, t3
);
416 t4
= xfs_calc_refcountbt_reservation(mp
, 2);
417 return XFS_DQUOT_LOGRES
+ max(t4
, max3(t1
, t2
, t3
));
421 xfs_calc_itruncate_reservation_minlogsize(
422 struct xfs_mount
*mp
)
424 return xfs_calc_itruncate_reservation(mp
, true);
427 static inline unsigned int xfs_calc_pptr_link_overhead(void)
429 return sizeof(struct xfs_attri_log_format
) +
430 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec
)) +
431 xlog_calc_iovec_len(MAXNAMELEN
- 1);
433 static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
435 return sizeof(struct xfs_attri_log_format
) +
436 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec
)) +
437 xlog_calc_iovec_len(MAXNAMELEN
- 1);
439 static inline unsigned int xfs_calc_pptr_replace_overhead(void)
441 return sizeof(struct xfs_attri_log_format
) +
442 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec
)) +
443 xlog_calc_iovec_len(MAXNAMELEN
- 1) +
444 xlog_calc_iovec_len(sizeof(struct xfs_parent_rec
)) +
445 xlog_calc_iovec_len(MAXNAMELEN
- 1);
449 * In renaming a files we can modify:
450 * the five inodes involved: 5 * inode size
451 * the two directory btrees: 2 * (max depth + v2) * dir block size
452 * the two directory bmap btrees: 2 * max depth * block size
453 * And the bmap_finish transaction can free dir and bmap blocks (two sets
454 * of bmap blocks) giving (t2):
455 * the agf for the ags in which the blocks live: 3 * sector size
456 * the agfl for the ags in which the blocks live: 3 * sector size
457 * the superblock for the free block count: sector size
458 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
459 * If parent pointers are enabled (t3), then each transaction in the chain
460 * must be capable of setting or removing the extended attribute
461 * containing the parent information. It must also be able to handle
462 * the three xattr intent items that track the progress of the parent
466 xfs_calc_rename_reservation(
467 struct xfs_mount
*mp
)
469 unsigned int overhead
= XFS_DQUOT_LOGRES
;
470 struct xfs_trans_resv
*resp
= M_RES(mp
);
471 unsigned int t1
, t2
, t3
= 0;
473 t1
= xfs_calc_inode_res(mp
, 5) +
474 xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp
),
475 XFS_FSB_TO_B(mp
, 1));
477 t2
= xfs_calc_buf_res(7, mp
->m_sb
.sb_sectsize
) +
478 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 3),
479 XFS_FSB_TO_B(mp
, 1));
481 if (xfs_has_parent(mp
)) {
482 unsigned int rename_overhead
, exchange_overhead
;
484 t3
= max(resp
->tr_attrsetm
.tr_logres
,
485 resp
->tr_attrrm
.tr_logres
);
488 * For a standard rename, the three xattr intent log items
489 * are (1) replacing the pptr for the source file; (2)
490 * removing the pptr on the dest file; and (3) adding a
491 * pptr for the whiteout file in the src dir.
493 * For an RENAME_EXCHANGE, there are two xattr intent
494 * items to replace the pptr for both src and dest
495 * files. Link counts don't change and there is no
498 * In the worst case we can end up relogging all log
499 * intent items to allow the log tail to move ahead, so
500 * they become overhead added to each transaction in a
503 rename_overhead
= xfs_calc_pptr_replace_overhead() +
504 xfs_calc_pptr_unlink_overhead() +
505 xfs_calc_pptr_link_overhead();
506 exchange_overhead
= 2 * xfs_calc_pptr_replace_overhead();
508 overhead
+= max(rename_overhead
, exchange_overhead
);
511 return overhead
+ max3(t1
, t2
, t3
);
514 static inline unsigned int
515 xfs_rename_log_count(
516 struct xfs_mount
*mp
,
517 struct xfs_trans_resv
*resp
)
519 /* One for the rename, one more for freeing blocks */
520 unsigned int ret
= XFS_RENAME_LOG_COUNT
;
523 * Pre-reserve enough log reservation to handle the transaction
524 * rolling needed to remove or add one parent pointer.
526 if (xfs_has_parent(mp
))
527 ret
+= max(resp
->tr_attrsetm
.tr_logcount
,
528 resp
->tr_attrrm
.tr_logcount
);
534 * For removing an inode from unlinked list at first, we can modify:
535 * the agi hash list and counters: sector size
536 * the on disk inode before ours in the agi hash list: inode cluster size
537 * the on disk inode in the agi hash list: inode cluster size
540 xfs_calc_iunlink_remove_reservation(
541 struct xfs_mount
*mp
)
543 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
544 2 * M_IGEO(mp
)->inode_cluster_size
;
547 static inline unsigned int
549 struct xfs_mount
*mp
,
550 struct xfs_trans_resv
*resp
)
552 unsigned int ret
= XFS_LINK_LOG_COUNT
;
555 * Pre-reserve enough log reservation to handle the transaction
556 * rolling needed to add one parent pointer.
558 if (xfs_has_parent(mp
))
559 ret
+= resp
->tr_attrsetm
.tr_logcount
;
565 * For creating a link to an inode:
566 * the parent directory inode: inode size
567 * the linked inode: inode size
568 * the directory btree could split: (max depth + v2) * dir block size
569 * the directory bmap btree could join or split: (max depth + v2) * blocksize
570 * And the bmap_finish transaction can free some bmap blocks giving:
571 * the agf for the ag in which the blocks live: sector size
572 * the agfl for the ag in which the blocks live: sector size
573 * the superblock for the free block count: sector size
574 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
577 xfs_calc_link_reservation(
578 struct xfs_mount
*mp
)
580 unsigned int overhead
= XFS_DQUOT_LOGRES
;
581 struct xfs_trans_resv
*resp
= M_RES(mp
);
582 unsigned int t1
, t2
, t3
= 0;
584 overhead
+= xfs_calc_iunlink_remove_reservation(mp
);
585 t1
= xfs_calc_inode_res(mp
, 2) +
586 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp
), XFS_FSB_TO_B(mp
, 1));
587 t2
= xfs_calc_buf_res(3, mp
->m_sb
.sb_sectsize
) +
588 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
589 XFS_FSB_TO_B(mp
, 1));
591 if (xfs_has_parent(mp
)) {
592 t3
= resp
->tr_attrsetm
.tr_logres
;
593 overhead
+= xfs_calc_pptr_link_overhead();
596 return overhead
+ max3(t1
, t2
, t3
);
600 * For adding an inode to unlinked list we can modify:
601 * the agi hash list: sector size
602 * the on disk inode: inode cluster size
605 xfs_calc_iunlink_add_reservation(xfs_mount_t
*mp
)
607 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
608 M_IGEO(mp
)->inode_cluster_size
;
611 static inline unsigned int
612 xfs_remove_log_count(
613 struct xfs_mount
*mp
,
614 struct xfs_trans_resv
*resp
)
616 unsigned int ret
= XFS_REMOVE_LOG_COUNT
;
619 * Pre-reserve enough log reservation to handle the transaction
620 * rolling needed to add one parent pointer.
622 if (xfs_has_parent(mp
))
623 ret
+= resp
->tr_attrrm
.tr_logcount
;
629 * For removing a directory entry we can modify:
630 * the parent directory inode: inode size
631 * the removed inode: inode size
632 * the directory btree could join: (max depth + v2) * dir block size
633 * the directory bmap btree could join or split: (max depth + v2) * blocksize
634 * And the bmap_finish transaction can free the dir and bmap blocks giving:
635 * the agf for the ag in which the blocks live: 2 * sector size
636 * the agfl for the ag in which the blocks live: 2 * sector size
637 * the superblock for the free block count: sector size
638 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
641 xfs_calc_remove_reservation(
642 struct xfs_mount
*mp
)
644 unsigned int overhead
= XFS_DQUOT_LOGRES
;
645 struct xfs_trans_resv
*resp
= M_RES(mp
);
646 unsigned int t1
, t2
, t3
= 0;
648 overhead
+= xfs_calc_iunlink_add_reservation(mp
);
650 t1
= xfs_calc_inode_res(mp
, 2) +
651 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp
), XFS_FSB_TO_B(mp
, 1));
652 t2
= xfs_calc_buf_res(4, mp
->m_sb
.sb_sectsize
) +
653 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 2),
654 XFS_FSB_TO_B(mp
, 1));
656 if (xfs_has_parent(mp
)) {
657 t3
= resp
->tr_attrrm
.tr_logres
;
658 overhead
+= xfs_calc_pptr_unlink_overhead();
661 return overhead
+ max3(t1
, t2
, t3
);
665 * For create, break it in to the two cases that the transaction
666 * covers. We start with the modify case - allocation done by modification
667 * of the state of existing inodes - and the allocation case.
671 * For create we can modify:
672 * the parent directory inode: inode size
673 * the new inode: inode size
674 * the inode btree entry: block size
675 * the superblock for the nlink flag: sector size
676 * the directory btree: (max depth + v2) * dir block size
677 * the directory inode's bmap btree: (max depth + v2) * block size
678 * the finobt (record modification and allocation btrees)
681 xfs_calc_create_resv_modify(
682 struct xfs_mount
*mp
)
684 return xfs_calc_inode_res(mp
, 2) +
685 xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
686 (uint
)XFS_FSB_TO_B(mp
, 1) +
687 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp
), XFS_FSB_TO_B(mp
, 1)) +
688 xfs_calc_finobt_res(mp
);
692 * For icreate we can allocate some inodes giving:
693 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
694 * the superblock for the nlink flag: sector size
695 * the inode chunk (allocation, optional init)
696 * the inobt (record insertion)
697 * the finobt (optional, record insertion)
700 xfs_calc_icreate_resv_alloc(
701 struct xfs_mount
*mp
)
703 return xfs_calc_buf_res(2, mp
->m_sb
.sb_sectsize
) +
704 mp
->m_sb
.sb_sectsize
+
705 xfs_calc_inode_chunk_res(mp
, _ALLOC
) +
706 xfs_calc_inobt_res(mp
) +
707 xfs_calc_finobt_res(mp
);
710 static inline unsigned int
711 xfs_icreate_log_count(
712 struct xfs_mount
*mp
,
713 struct xfs_trans_resv
*resp
)
715 unsigned int ret
= XFS_CREATE_LOG_COUNT
;
718 * Pre-reserve enough log reservation to handle the transaction
719 * rolling needed to add one parent pointer.
721 if (xfs_has_parent(mp
))
722 ret
+= resp
->tr_attrsetm
.tr_logcount
;
728 xfs_calc_icreate_reservation(
729 struct xfs_mount
*mp
)
731 struct xfs_trans_resv
*resp
= M_RES(mp
);
732 unsigned int overhead
= XFS_DQUOT_LOGRES
;
733 unsigned int t1
, t2
, t3
= 0;
735 t1
= xfs_calc_icreate_resv_alloc(mp
);
736 t2
= xfs_calc_create_resv_modify(mp
);
738 if (xfs_has_parent(mp
)) {
739 t3
= resp
->tr_attrsetm
.tr_logres
;
740 overhead
+= xfs_calc_pptr_link_overhead();
743 return overhead
+ max3(t1
, t2
, t3
);
747 xfs_calc_create_tmpfile_reservation(
748 struct xfs_mount
*mp
)
750 uint res
= XFS_DQUOT_LOGRES
;
752 res
+= xfs_calc_icreate_resv_alloc(mp
);
753 return res
+ xfs_calc_iunlink_add_reservation(mp
);
756 static inline unsigned int
758 struct xfs_mount
*mp
,
759 struct xfs_trans_resv
*resp
)
761 unsigned int ret
= XFS_MKDIR_LOG_COUNT
;
764 * Pre-reserve enough log reservation to handle the transaction
765 * rolling needed to add one parent pointer.
767 if (xfs_has_parent(mp
))
768 ret
+= resp
->tr_attrsetm
.tr_logcount
;
774 * Making a new directory is the same as creating a new file.
777 xfs_calc_mkdir_reservation(
778 struct xfs_mount
*mp
)
780 return xfs_calc_icreate_reservation(mp
);
783 static inline unsigned int
784 xfs_symlink_log_count(
785 struct xfs_mount
*mp
,
786 struct xfs_trans_resv
*resp
)
788 unsigned int ret
= XFS_SYMLINK_LOG_COUNT
;
791 * Pre-reserve enough log reservation to handle the transaction
792 * rolling needed to add one parent pointer.
794 if (xfs_has_parent(mp
))
795 ret
+= resp
->tr_attrsetm
.tr_logcount
;
801 * Making a new symplink is the same as creating a new file, but
802 * with the added blocks for remote symlink data which can be up to 1kB in
803 * length (XFS_SYMLINK_MAXLEN).
806 xfs_calc_symlink_reservation(
807 struct xfs_mount
*mp
)
809 return xfs_calc_icreate_reservation(mp
) +
810 xfs_calc_buf_res(1, XFS_SYMLINK_MAXLEN
);
814 * In freeing an inode we can modify:
815 * the inode being freed: inode size
816 * the super block free inode counter, AGF and AGFL: sector size
817 * the on disk inode (agi unlinked list removal)
818 * the inode chunk (invalidated, headers only)
820 * the finobt (record insertion, removal or modification)
822 * Note that the inode chunk res. includes an allocfree res. for freeing of the
823 * inode chunk. This is technically extraneous because the inode chunk free is
824 * deferred (it occurs after a transaction roll). Include the extra reservation
825 * anyways since we've had reports of ifree transaction overruns due to too many
826 * agfl fixups during inode chunk frees.
829 xfs_calc_ifree_reservation(
830 struct xfs_mount
*mp
)
832 return XFS_DQUOT_LOGRES
+
833 xfs_calc_inode_res(mp
, 1) +
834 xfs_calc_buf_res(3, mp
->m_sb
.sb_sectsize
) +
835 xfs_calc_iunlink_remove_reservation(mp
) +
836 xfs_calc_inode_chunk_res(mp
, _FREE
) +
837 xfs_calc_inobt_res(mp
) +
838 xfs_calc_finobt_res(mp
);
842 * When only changing the inode we log the inode and possibly the superblock
843 * We also add a bit of slop for the transaction stuff.
846 xfs_calc_ichange_reservation(
847 struct xfs_mount
*mp
)
849 return XFS_DQUOT_LOGRES
+
850 xfs_calc_inode_res(mp
, 1) +
851 xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
);
856 * Growing the data section of the filesystem.
862 xfs_calc_growdata_reservation(
863 struct xfs_mount
*mp
)
865 return xfs_calc_buf_res(3, mp
->m_sb
.sb_sectsize
) +
866 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
867 XFS_FSB_TO_B(mp
, 1));
871 * Growing the rt section of the filesystem.
872 * In the first set of transactions (ALLOC) we allocate space to the
873 * bitmap or summary files.
874 * superblock: sector size
875 * agf of the ag from which the extent is allocated: sector size
876 * bmap btree for bitmap/summary inode: max depth * blocksize
877 * bitmap/summary inode: inode size
878 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
881 xfs_calc_growrtalloc_reservation(
882 struct xfs_mount
*mp
)
884 return xfs_calc_buf_res(2, mp
->m_sb
.sb_sectsize
) +
885 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
),
886 XFS_FSB_TO_B(mp
, 1)) +
887 xfs_calc_inode_res(mp
, 1) +
888 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
889 XFS_FSB_TO_B(mp
, 1));
893 * Growing the rt section of the filesystem.
894 * In the second set of transactions (ZERO) we zero the new metadata blocks.
895 * one bitmap/summary block: blocksize
898 xfs_calc_growrtzero_reservation(
899 struct xfs_mount
*mp
)
901 return xfs_calc_buf_res(1, mp
->m_sb
.sb_blocksize
);
905 * Growing the rt section of the filesystem.
906 * In the third set of transactions (FREE) we update metadata without
907 * allocating any new blocks.
908 * superblock: sector size
909 * bitmap inode: inode size
910 * summary inode: inode size
911 * one bitmap block: blocksize
912 * summary blocks: new summary size
915 xfs_calc_growrtfree_reservation(
916 struct xfs_mount
*mp
)
918 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
919 xfs_calc_inode_res(mp
, 2) +
920 xfs_calc_buf_res(1, mp
->m_sb
.sb_blocksize
) +
921 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp
, mp
->m_rsumblocks
));
925 * Logging the inode modification timestamp on a synchronous write.
929 xfs_calc_swrite_reservation(
930 struct xfs_mount
*mp
)
932 return xfs_calc_inode_res(mp
, 1);
936 * Logging the inode mode bits when writing a setuid/setgid file
940 xfs_calc_writeid_reservation(
941 struct xfs_mount
*mp
)
943 return xfs_calc_inode_res(mp
, 1);
947 * Converting the inode from non-attributed to attributed.
948 * the inode being converted: inode size
949 * agf block and superblock (for block allocation)
950 * the new block (directory sized)
951 * bmap blocks for the new directory block
955 xfs_calc_addafork_reservation(
956 struct xfs_mount
*mp
)
958 return XFS_DQUOT_LOGRES
+
959 xfs_calc_inode_res(mp
, 1) +
960 xfs_calc_buf_res(2, mp
->m_sb
.sb_sectsize
) +
961 xfs_calc_buf_res(1, mp
->m_dir_geo
->blksize
) +
962 xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp
, XFS_DATA_FORK
) + 1,
963 XFS_FSB_TO_B(mp
, 1)) +
964 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 1),
965 XFS_FSB_TO_B(mp
, 1));
969 * Removing the attribute fork of a file
970 * the inode being truncated: inode size
971 * the inode's bmap btree: max depth * block size
972 * And the bmap_finish transaction can free the blocks and bmap blocks:
973 * the agf for each of the ags: 4 * sector size
974 * the agfl for each of the ags: 4 * sector size
975 * the super block to reflect the freed blocks: sector size
976 * worst case split in allocation btrees per extent assuming 4 extents:
977 * 4 exts * 2 trees * (2 * max depth - 1) * block size
980 xfs_calc_attrinval_reservation(
981 struct xfs_mount
*mp
)
983 return max((xfs_calc_inode_res(mp
, 1) +
984 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_ATTR_FORK
),
985 XFS_FSB_TO_B(mp
, 1))),
986 (xfs_calc_buf_res(9, mp
->m_sb
.sb_sectsize
) +
987 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 4),
988 XFS_FSB_TO_B(mp
, 1))));
992 * Setting an attribute at mount time.
993 * the inode getting the attribute
994 * the superblock for allocations
995 * the agfs extents are allocated from
996 * the attribute btree * max depth
997 * the inode allocation btree
998 * Since attribute transaction space is dependent on the size of the attribute,
999 * the calculation is done partially at mount time and partially at runtime(see
1003 xfs_calc_attrsetm_reservation(
1004 struct xfs_mount
*mp
)
1006 return XFS_DQUOT_LOGRES
+
1007 xfs_calc_inode_res(mp
, 1) +
1008 xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
1009 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH
, XFS_FSB_TO_B(mp
, 1));
1013 * Setting an attribute at runtime, transaction space unit per block.
1014 * the superblock for allocations: sector size
1015 * the inode bmap btree could join or split: max depth * block size
1016 * Since the runtime attribute transaction space is dependent on the total
1017 * blocks needed for the 1st bmap, here we calculate out the space unit for
1018 * one block so that the caller could figure out the total space according
1019 * to the attibute extent length in blocks by:
1020 * ext * M_RES(mp)->tr_attrsetrt.tr_logres
1023 xfs_calc_attrsetrt_reservation(
1024 struct xfs_mount
*mp
)
1026 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
) +
1027 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_ATTR_FORK
),
1028 XFS_FSB_TO_B(mp
, 1));
1032 * Removing an attribute.
1033 * the inode: inode size
1034 * the attribute btree could join: max depth * block size
1035 * the inode bmap btree could join or split: max depth * block size
1036 * And the bmap_finish transaction can free the attr blocks freed giving:
1037 * the agf for the ag in which the blocks live: 2 * sector size
1038 * the agfl for the ag in which the blocks live: 2 * sector size
1039 * the superblock for the free block count: sector size
1040 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
1043 xfs_calc_attrrm_reservation(
1044 struct xfs_mount
*mp
)
1046 return XFS_DQUOT_LOGRES
+
1047 max((xfs_calc_inode_res(mp
, 1) +
1048 xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH
,
1049 XFS_FSB_TO_B(mp
, 1)) +
1050 (uint
)XFS_FSB_TO_B(mp
,
1051 XFS_BM_MAXLEVELS(mp
, XFS_ATTR_FORK
)) +
1052 xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp
, XFS_DATA_FORK
), 0)),
1053 (xfs_calc_buf_res(5, mp
->m_sb
.sb_sectsize
) +
1054 xfs_calc_buf_res(xfs_allocfree_block_count(mp
, 2),
1055 XFS_FSB_TO_B(mp
, 1))));
1059 * Clearing a bad agino number in an agi hash bucket.
1062 xfs_calc_clear_agi_bucket_reservation(
1063 struct xfs_mount
*mp
)
1065 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
);
1069 * Adjusting quota limits.
1070 * the disk quota buffer: sizeof(struct xfs_disk_dquot)
1073 xfs_calc_qm_setqlim_reservation(void)
1075 return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot
));
1079 * Allocating quota on disk if needed.
1080 * the write transaction log space for quota file extent allocation
1081 * the unit of quota allocation: one system block size
1084 xfs_calc_qm_dqalloc_reservation(
1085 struct xfs_mount
*mp
,
1086 bool for_minlogsize
)
1088 return xfs_calc_write_reservation(mp
, for_minlogsize
) +
1090 XFS_FSB_TO_B(mp
, XFS_DQUOT_CLUSTER_SIZE_FSB
) - 1);
1094 xfs_calc_qm_dqalloc_reservation_minlogsize(
1095 struct xfs_mount
*mp
)
1097 return xfs_calc_qm_dqalloc_reservation(mp
, true);
1101 * Syncing the incore super block changes to disk.
1102 * the super block to reflect the changes: sector size
1105 xfs_calc_sb_reservation(
1106 struct xfs_mount
*mp
)
1108 return xfs_calc_buf_res(1, mp
->m_sb
.sb_sectsize
);
1112 * Namespace reservations.
1114 * These get tricky when parent pointers are enabled as we have attribute
1115 * modifications occurring from within these transactions. Rather than confuse
1116 * each of these reservation calculations with the conditional attribute
1117 * reservations, add them here in a clear and concise manner. This requires that
1118 * the attribute reservations have already been calculated.
1120 * Note that we only include the static attribute reservation here; the runtime
1121 * reservation will have to be modified by the size of the attributes being
1122 * added/removed/modified. See the comments on the attribute reservation
1123 * calculations for more details.
1126 xfs_calc_namespace_reservations(
1127 struct xfs_mount
*mp
,
1128 struct xfs_trans_resv
*resp
)
1130 ASSERT(resp
->tr_attrsetm
.tr_logres
> 0);
1132 resp
->tr_rename
.tr_logres
= xfs_calc_rename_reservation(mp
);
1133 resp
->tr_rename
.tr_logcount
= xfs_rename_log_count(mp
, resp
);
1134 resp
->tr_rename
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1136 resp
->tr_link
.tr_logres
= xfs_calc_link_reservation(mp
);
1137 resp
->tr_link
.tr_logcount
= xfs_link_log_count(mp
, resp
);
1138 resp
->tr_link
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1140 resp
->tr_remove
.tr_logres
= xfs_calc_remove_reservation(mp
);
1141 resp
->tr_remove
.tr_logcount
= xfs_remove_log_count(mp
, resp
);
1142 resp
->tr_remove
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1144 resp
->tr_symlink
.tr_logres
= xfs_calc_symlink_reservation(mp
);
1145 resp
->tr_symlink
.tr_logcount
= xfs_symlink_log_count(mp
, resp
);
1146 resp
->tr_symlink
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1148 resp
->tr_create
.tr_logres
= xfs_calc_icreate_reservation(mp
);
1149 resp
->tr_create
.tr_logcount
= xfs_icreate_log_count(mp
, resp
);
1150 resp
->tr_create
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1152 resp
->tr_mkdir
.tr_logres
= xfs_calc_mkdir_reservation(mp
);
1153 resp
->tr_mkdir
.tr_logcount
= xfs_mkdir_log_count(mp
, resp
);
1154 resp
->tr_mkdir
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1158 xfs_trans_resv_calc(
1159 struct xfs_mount
*mp
,
1160 struct xfs_trans_resv
*resp
)
1162 int logcount_adj
= 0;
1165 * The following transactions are logged in physical format and
1166 * require a permanent reservation on space.
1168 resp
->tr_write
.tr_logres
= xfs_calc_write_reservation(mp
, false);
1169 resp
->tr_write
.tr_logcount
= XFS_WRITE_LOG_COUNT
;
1170 resp
->tr_write
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1172 resp
->tr_itruncate
.tr_logres
= xfs_calc_itruncate_reservation(mp
, false);
1173 resp
->tr_itruncate
.tr_logcount
= XFS_ITRUNCATE_LOG_COUNT
;
1174 resp
->tr_itruncate
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1176 resp
->tr_create_tmpfile
.tr_logres
=
1177 xfs_calc_create_tmpfile_reservation(mp
);
1178 resp
->tr_create_tmpfile
.tr_logcount
= XFS_CREATE_TMPFILE_LOG_COUNT
;
1179 resp
->tr_create_tmpfile
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1181 resp
->tr_ifree
.tr_logres
= xfs_calc_ifree_reservation(mp
);
1182 resp
->tr_ifree
.tr_logcount
= XFS_INACTIVE_LOG_COUNT
;
1183 resp
->tr_ifree
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1185 resp
->tr_addafork
.tr_logres
= xfs_calc_addafork_reservation(mp
);
1186 resp
->tr_addafork
.tr_logcount
= XFS_ADDAFORK_LOG_COUNT
;
1187 resp
->tr_addafork
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1189 resp
->tr_attrinval
.tr_logres
= xfs_calc_attrinval_reservation(mp
);
1190 resp
->tr_attrinval
.tr_logcount
= XFS_ATTRINVAL_LOG_COUNT
;
1191 resp
->tr_attrinval
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1193 resp
->tr_attrsetm
.tr_logres
= xfs_calc_attrsetm_reservation(mp
);
1194 resp
->tr_attrsetm
.tr_logcount
= XFS_ATTRSET_LOG_COUNT
;
1195 resp
->tr_attrsetm
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1197 resp
->tr_attrrm
.tr_logres
= xfs_calc_attrrm_reservation(mp
);
1198 resp
->tr_attrrm
.tr_logcount
= XFS_ATTRRM_LOG_COUNT
;
1199 resp
->tr_attrrm
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1201 resp
->tr_growrtalloc
.tr_logres
= xfs_calc_growrtalloc_reservation(mp
);
1202 resp
->tr_growrtalloc
.tr_logcount
= XFS_DEFAULT_PERM_LOG_COUNT
;
1203 resp
->tr_growrtalloc
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1205 resp
->tr_qm_dqalloc
.tr_logres
= xfs_calc_qm_dqalloc_reservation(mp
,
1207 resp
->tr_qm_dqalloc
.tr_logcount
= XFS_WRITE_LOG_COUNT
;
1208 resp
->tr_qm_dqalloc
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1210 xfs_calc_namespace_reservations(mp
, resp
);
1213 * The following transactions are logged in logical format with
1214 * a default log count.
1216 resp
->tr_qm_setqlim
.tr_logres
= xfs_calc_qm_setqlim_reservation();
1217 resp
->tr_qm_setqlim
.tr_logcount
= XFS_DEFAULT_LOG_COUNT
;
1219 resp
->tr_sb
.tr_logres
= xfs_calc_sb_reservation(mp
);
1220 resp
->tr_sb
.tr_logcount
= XFS_DEFAULT_LOG_COUNT
;
1222 /* growdata requires permanent res; it can free space to the last AG */
1223 resp
->tr_growdata
.tr_logres
= xfs_calc_growdata_reservation(mp
);
1224 resp
->tr_growdata
.tr_logcount
= XFS_DEFAULT_PERM_LOG_COUNT
;
1225 resp
->tr_growdata
.tr_logflags
|= XFS_TRANS_PERM_LOG_RES
;
1227 /* The following transaction are logged in logical format */
1228 resp
->tr_ichange
.tr_logres
= xfs_calc_ichange_reservation(mp
);
1229 resp
->tr_fsyncts
.tr_logres
= xfs_calc_swrite_reservation(mp
);
1230 resp
->tr_writeid
.tr_logres
= xfs_calc_writeid_reservation(mp
);
1231 resp
->tr_attrsetrt
.tr_logres
= xfs_calc_attrsetrt_reservation(mp
);
1232 resp
->tr_clearagi
.tr_logres
= xfs_calc_clear_agi_bucket_reservation(mp
);
1233 resp
->tr_growrtzero
.tr_logres
= xfs_calc_growrtzero_reservation(mp
);
1234 resp
->tr_growrtfree
.tr_logres
= xfs_calc_growrtfree_reservation(mp
);
1237 * Add one logcount for BUI items that appear with rmap or reflink,
1238 * one logcount for refcount intent items, and one logcount for rmap
1241 if (xfs_has_reflink(mp
) || xfs_has_rmapbt(mp
))
1243 if (xfs_has_reflink(mp
))
1245 if (xfs_has_rmapbt(mp
))
1248 resp
->tr_itruncate
.tr_logcount
+= logcount_adj
;
1249 resp
->tr_write
.tr_logcount
+= logcount_adj
;
1250 resp
->tr_qm_dqalloc
.tr_logcount
+= logcount_adj
;