1 // SPDX-License-Identifier: GPL-2.0
6 #include "space-info.h"
7 #include "transaction.h"
8 #include "block-group.h"
11 * HOW DO BLOCK RESERVES WORK
13 * Think of block_rsv's as buckets for logically grouped metadata
14 * reservations. Each block_rsv has a ->size and a ->reserved. ->size is
15 * how large we want our block rsv to be, ->reserved is how much space is
16 * currently reserved for this block reserve.
18 * ->failfast exists for the truncate case, and is described below.
23 * Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill
25 * We call into btrfs_reserve_metadata_bytes() with our bytes, which is
26 * accounted for in space_info->bytes_may_use, and then add the bytes to
27 * ->reserved, and ->size in the case of btrfs_block_rsv_add.
29 * ->size is an over-estimation of how much we may use for a particular
33 * Entrance: btrfs_use_block_rsv
35 * When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv()
36 * to determine the appropriate block_rsv to use, and then verify that
37 * ->reserved has enough space for our tree block allocation. Once
38 * successful we subtract fs_info->nodesize from ->reserved.
41 * Entrance: btrfs_block_rsv_release
43 * We are finished with our operation, subtract our individual reservation
44 * from ->size, and then subtract ->size from ->reserved and free up the
45 * excess if there is any.
47 * There is some logic here to refill the delayed refs rsv or the global rsv
48 * as needed, otherwise the excess is subtracted from
49 * space_info->bytes_may_use.
51 * TYPES OF BLOCK RESERVES
53 * BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK
54 * These behave normally, as described above, just within the confines of the
55 * lifetime of their particular operation (transaction for the whole trans
56 * handle lifetime, for example).
59 * It is impossible to properly account for all the space that may be required
60 * to make our extent tree updates. This block reserve acts as an overflow
61 * buffer in case our delayed refs reserve does not reserve enough space to
62 * update the extent tree.
64 * We can steal from this in some cases as well, notably on evict() or
65 * truncate() in order to help users recover from ENOSPC conditions.
68 * The individual item sizes are determined by the per-inode size
69 * calculations, which are described with the delalloc code. This is pretty
70 * straightforward, it's just the calculation of ->size encodes a lot of
71 * different items, and thus it gets used when updating inodes, inserting file
72 * extents, and inserting checksums.
75 * We keep a running tally of how many delayed refs we have on the system.
76 * We assume each one of these delayed refs are going to use a full
77 * reservation. We use the transaction items and pre-reserve space for every
78 * operation, and use this reservation to refill any gap between ->size and
79 * ->reserved that may exist.
81 * From there it's straightforward, removing a delayed ref means we remove its
82 * count from ->size and free up reservations as necessary. Since this is
83 * the most dynamic block reserve in the system, we will try to refill this
84 * block reserve first with any excess returned by any other block reserve.
87 * This is the fallback block reserve to make us try to reserve space if we
88 * don't have a specific bucket for this allocation. It is mostly used for
89 * updating the device tree and such, since that is a separate pool we're
90 * content to just reserve space from the space_info on demand.
93 * This is used by things like truncate and iput. We will temporarily
94 * allocate a block reserve, set it to some size, and then truncate bytes
95 * until we have no space left. With ->failfast set we'll simply return
96 * ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try
97 * to make a new reservation. This is because these operations are
98 * unbounded, so we want to do as much work as we can, and then back off and
102 static u64
block_rsv_release_bytes(struct btrfs_fs_info
*fs_info
,
103 struct btrfs_block_rsv
*block_rsv
,
104 struct btrfs_block_rsv
*dest
, u64 num_bytes
,
105 u64
*qgroup_to_release_ret
)
107 struct btrfs_space_info
*space_info
= block_rsv
->space_info
;
108 u64 qgroup_to_release
= 0;
111 spin_lock(&block_rsv
->lock
);
112 if (num_bytes
== (u64
)-1) {
113 num_bytes
= block_rsv
->size
;
114 qgroup_to_release
= block_rsv
->qgroup_rsv_size
;
116 block_rsv
->size
-= num_bytes
;
117 if (block_rsv
->reserved
>= block_rsv
->size
) {
118 num_bytes
= block_rsv
->reserved
- block_rsv
->size
;
119 block_rsv
->reserved
= block_rsv
->size
;
124 if (block_rsv
->qgroup_rsv_reserved
>= block_rsv
->qgroup_rsv_size
) {
125 qgroup_to_release
= block_rsv
->qgroup_rsv_reserved
-
126 block_rsv
->qgroup_rsv_size
;
127 block_rsv
->qgroup_rsv_reserved
= block_rsv
->qgroup_rsv_size
;
129 qgroup_to_release
= 0;
131 spin_unlock(&block_rsv
->lock
);
136 spin_lock(&dest
->lock
);
140 bytes_to_add
= dest
->size
- dest
->reserved
;
141 bytes_to_add
= min(num_bytes
, bytes_to_add
);
142 dest
->reserved
+= bytes_to_add
;
143 if (dest
->reserved
>= dest
->size
)
145 num_bytes
-= bytes_to_add
;
147 spin_unlock(&dest
->lock
);
150 btrfs_space_info_free_bytes_may_use(fs_info
,
154 if (qgroup_to_release_ret
)
155 *qgroup_to_release_ret
= qgroup_to_release
;
159 int btrfs_block_rsv_migrate(struct btrfs_block_rsv
*src
,
160 struct btrfs_block_rsv
*dst
, u64 num_bytes
,
165 ret
= btrfs_block_rsv_use_bytes(src
, num_bytes
);
169 btrfs_block_rsv_add_bytes(dst
, num_bytes
, update_size
);
173 void btrfs_init_block_rsv(struct btrfs_block_rsv
*rsv
, unsigned short type
)
175 memset(rsv
, 0, sizeof(*rsv
));
176 spin_lock_init(&rsv
->lock
);
180 void btrfs_init_metadata_block_rsv(struct btrfs_fs_info
*fs_info
,
181 struct btrfs_block_rsv
*rsv
,
184 btrfs_init_block_rsv(rsv
, type
);
185 rsv
->space_info
= btrfs_find_space_info(fs_info
,
186 BTRFS_BLOCK_GROUP_METADATA
);
189 struct btrfs_block_rsv
*btrfs_alloc_block_rsv(struct btrfs_fs_info
*fs_info
,
192 struct btrfs_block_rsv
*block_rsv
;
194 block_rsv
= kmalloc(sizeof(*block_rsv
), GFP_NOFS
);
198 btrfs_init_metadata_block_rsv(fs_info
, block_rsv
, type
);
202 void btrfs_free_block_rsv(struct btrfs_fs_info
*fs_info
,
203 struct btrfs_block_rsv
*rsv
)
207 btrfs_block_rsv_release(fs_info
, rsv
, (u64
)-1, NULL
);
211 int btrfs_block_rsv_add(struct btrfs_root
*root
,
212 struct btrfs_block_rsv
*block_rsv
, u64 num_bytes
,
213 enum btrfs_reserve_flush_enum flush
)
220 ret
= btrfs_reserve_metadata_bytes(root
, block_rsv
, num_bytes
, flush
);
222 btrfs_block_rsv_add_bytes(block_rsv
, num_bytes
, true);
227 int btrfs_block_rsv_check(struct btrfs_block_rsv
*block_rsv
, int min_factor
)
235 spin_lock(&block_rsv
->lock
);
236 num_bytes
= div_factor(block_rsv
->size
, min_factor
);
237 if (block_rsv
->reserved
>= num_bytes
)
239 spin_unlock(&block_rsv
->lock
);
244 int btrfs_block_rsv_refill(struct btrfs_root
*root
,
245 struct btrfs_block_rsv
*block_rsv
, u64 min_reserved
,
246 enum btrfs_reserve_flush_enum flush
)
254 spin_lock(&block_rsv
->lock
);
255 num_bytes
= min_reserved
;
256 if (block_rsv
->reserved
>= num_bytes
)
259 num_bytes
-= block_rsv
->reserved
;
260 spin_unlock(&block_rsv
->lock
);
265 ret
= btrfs_reserve_metadata_bytes(root
, block_rsv
, num_bytes
, flush
);
267 btrfs_block_rsv_add_bytes(block_rsv
, num_bytes
, false);
274 u64
btrfs_block_rsv_release(struct btrfs_fs_info
*fs_info
,
275 struct btrfs_block_rsv
*block_rsv
, u64 num_bytes
,
276 u64
*qgroup_to_release
)
278 struct btrfs_block_rsv
*global_rsv
= &fs_info
->global_block_rsv
;
279 struct btrfs_block_rsv
*delayed_rsv
= &fs_info
->delayed_refs_rsv
;
280 struct btrfs_block_rsv
*target
= NULL
;
283 * If we are the delayed_rsv then push to the global rsv, otherwise dump
284 * into the delayed rsv if it is not full.
286 if (block_rsv
== delayed_rsv
)
288 else if (block_rsv
!= global_rsv
&& !delayed_rsv
->full
)
289 target
= delayed_rsv
;
291 if (target
&& block_rsv
->space_info
!= target
->space_info
)
294 return block_rsv_release_bytes(fs_info
, block_rsv
, target
, num_bytes
,
298 int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv
*block_rsv
, u64 num_bytes
)
302 spin_lock(&block_rsv
->lock
);
303 if (block_rsv
->reserved
>= num_bytes
) {
304 block_rsv
->reserved
-= num_bytes
;
305 if (block_rsv
->reserved
< block_rsv
->size
)
309 spin_unlock(&block_rsv
->lock
);
313 void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv
*block_rsv
,
314 u64 num_bytes
, bool update_size
)
316 spin_lock(&block_rsv
->lock
);
317 block_rsv
->reserved
+= num_bytes
;
319 block_rsv
->size
+= num_bytes
;
320 else if (block_rsv
->reserved
>= block_rsv
->size
)
322 spin_unlock(&block_rsv
->lock
);
325 int btrfs_cond_migrate_bytes(struct btrfs_fs_info
*fs_info
,
326 struct btrfs_block_rsv
*dest
, u64 num_bytes
,
329 struct btrfs_block_rsv
*global_rsv
= &fs_info
->global_block_rsv
;
332 if (global_rsv
->space_info
!= dest
->space_info
)
335 spin_lock(&global_rsv
->lock
);
336 min_bytes
= div_factor(global_rsv
->size
, min_factor
);
337 if (global_rsv
->reserved
< min_bytes
+ num_bytes
) {
338 spin_unlock(&global_rsv
->lock
);
341 global_rsv
->reserved
-= num_bytes
;
342 if (global_rsv
->reserved
< global_rsv
->size
)
343 global_rsv
->full
= 0;
344 spin_unlock(&global_rsv
->lock
);
346 btrfs_block_rsv_add_bytes(dest
, num_bytes
, true);
350 void btrfs_update_global_block_rsv(struct btrfs_fs_info
*fs_info
)
352 struct btrfs_block_rsv
*block_rsv
= &fs_info
->global_block_rsv
;
353 struct btrfs_space_info
*sinfo
= block_rsv
->space_info
;
358 * The global block rsv is based on the size of the extent tree, the
359 * checksum tree and the root tree. If the fs is empty we want to set
360 * it to a minimal amount for safety.
362 num_bytes
= btrfs_root_used(&fs_info
->extent_root
->root_item
) +
363 btrfs_root_used(&fs_info
->csum_root
->root_item
) +
364 btrfs_root_used(&fs_info
->tree_root
->root_item
);
367 * We at a minimum are going to modify the csum root, the tree root, and
373 * But we also want to reserve enough space so we can do the fallback
374 * global reserve for an unlink, which is an additional 5 items (see the
375 * comment in __unlink_start_trans for what we're modifying.)
377 * But we also need space for the delayed ref updates from the unlink,
378 * so its 10, 5 for the actual operation, and 5 for the delayed ref
383 num_bytes
= max_t(u64
, num_bytes
,
384 btrfs_calc_insert_metadata_size(fs_info
, min_items
));
386 spin_lock(&sinfo
->lock
);
387 spin_lock(&block_rsv
->lock
);
389 block_rsv
->size
= min_t(u64
, num_bytes
, SZ_512M
);
391 if (block_rsv
->reserved
< block_rsv
->size
) {
392 num_bytes
= block_rsv
->size
- block_rsv
->reserved
;
393 btrfs_space_info_update_bytes_may_use(fs_info
, sinfo
,
395 block_rsv
->reserved
= block_rsv
->size
;
396 } else if (block_rsv
->reserved
> block_rsv
->size
) {
397 num_bytes
= block_rsv
->reserved
- block_rsv
->size
;
398 btrfs_space_info_update_bytes_may_use(fs_info
, sinfo
,
400 block_rsv
->reserved
= block_rsv
->size
;
401 btrfs_try_granting_tickets(fs_info
, sinfo
);
404 if (block_rsv
->reserved
== block_rsv
->size
)
409 if (block_rsv
->size
>= sinfo
->total_bytes
)
410 sinfo
->force_alloc
= CHUNK_ALLOC_FORCE
;
411 spin_unlock(&block_rsv
->lock
);
412 spin_unlock(&sinfo
->lock
);
415 void btrfs_init_global_block_rsv(struct btrfs_fs_info
*fs_info
)
417 struct btrfs_space_info
*space_info
;
419 space_info
= btrfs_find_space_info(fs_info
, BTRFS_BLOCK_GROUP_SYSTEM
);
420 fs_info
->chunk_block_rsv
.space_info
= space_info
;
422 space_info
= btrfs_find_space_info(fs_info
, BTRFS_BLOCK_GROUP_METADATA
);
423 fs_info
->global_block_rsv
.space_info
= space_info
;
424 fs_info
->trans_block_rsv
.space_info
= space_info
;
425 fs_info
->empty_block_rsv
.space_info
= space_info
;
426 fs_info
->delayed_block_rsv
.space_info
= space_info
;
427 fs_info
->delayed_refs_rsv
.space_info
= space_info
;
429 fs_info
->extent_root
->block_rsv
= &fs_info
->delayed_refs_rsv
;
430 fs_info
->csum_root
->block_rsv
= &fs_info
->delayed_refs_rsv
;
431 fs_info
->dev_root
->block_rsv
= &fs_info
->global_block_rsv
;
432 fs_info
->tree_root
->block_rsv
= &fs_info
->global_block_rsv
;
433 if (fs_info
->quota_root
)
434 fs_info
->quota_root
->block_rsv
= &fs_info
->global_block_rsv
;
435 fs_info
->chunk_root
->block_rsv
= &fs_info
->chunk_block_rsv
;
437 btrfs_update_global_block_rsv(fs_info
);
440 void btrfs_release_global_block_rsv(struct btrfs_fs_info
*fs_info
)
442 btrfs_block_rsv_release(fs_info
, &fs_info
->global_block_rsv
, (u64
)-1,
444 WARN_ON(fs_info
->trans_block_rsv
.size
> 0);
445 WARN_ON(fs_info
->trans_block_rsv
.reserved
> 0);
446 WARN_ON(fs_info
->chunk_block_rsv
.size
> 0);
447 WARN_ON(fs_info
->chunk_block_rsv
.reserved
> 0);
448 WARN_ON(fs_info
->delayed_block_rsv
.size
> 0);
449 WARN_ON(fs_info
->delayed_block_rsv
.reserved
> 0);
450 WARN_ON(fs_info
->delayed_refs_rsv
.reserved
> 0);
451 WARN_ON(fs_info
->delayed_refs_rsv
.size
> 0);
454 static struct btrfs_block_rsv
*get_block_rsv(
455 const struct btrfs_trans_handle
*trans
,
456 const struct btrfs_root
*root
)
458 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
459 struct btrfs_block_rsv
*block_rsv
= NULL
;
461 if (test_bit(BTRFS_ROOT_SHAREABLE
, &root
->state
) ||
462 (root
== fs_info
->csum_root
&& trans
->adding_csums
) ||
463 (root
== fs_info
->uuid_root
))
464 block_rsv
= trans
->block_rsv
;
467 block_rsv
= root
->block_rsv
;
470 block_rsv
= &fs_info
->empty_block_rsv
;
475 struct btrfs_block_rsv
*btrfs_use_block_rsv(struct btrfs_trans_handle
*trans
,
476 struct btrfs_root
*root
,
479 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
480 struct btrfs_block_rsv
*block_rsv
;
481 struct btrfs_block_rsv
*global_rsv
= &fs_info
->global_block_rsv
;
483 bool global_updated
= false;
485 block_rsv
= get_block_rsv(trans
, root
);
487 if (unlikely(block_rsv
->size
== 0))
490 ret
= btrfs_block_rsv_use_bytes(block_rsv
, blocksize
);
494 if (block_rsv
->failfast
)
497 if (block_rsv
->type
== BTRFS_BLOCK_RSV_GLOBAL
&& !global_updated
) {
498 global_updated
= true;
499 btrfs_update_global_block_rsv(fs_info
);
504 * The global reserve still exists to save us from ourselves, so don't
505 * warn_on if we are short on our delayed refs reserve.
507 if (block_rsv
->type
!= BTRFS_BLOCK_RSV_DELREFS
&&
508 btrfs_test_opt(fs_info
, ENOSPC_DEBUG
)) {
509 static DEFINE_RATELIMIT_STATE(_rs
,
510 DEFAULT_RATELIMIT_INTERVAL
* 10,
511 /*DEFAULT_RATELIMIT_BURST*/ 1);
512 if (__ratelimit(&_rs
))
514 "BTRFS: block rsv returned %d\n", ret
);
517 ret
= btrfs_reserve_metadata_bytes(root
, block_rsv
, blocksize
,
518 BTRFS_RESERVE_NO_FLUSH
);
522 * If we couldn't reserve metadata bytes try and use some from
523 * the global reserve if its space type is the same as the global
526 if (block_rsv
->type
!= BTRFS_BLOCK_RSV_GLOBAL
&&
527 block_rsv
->space_info
== global_rsv
->space_info
) {
528 ret
= btrfs_block_rsv_use_bytes(global_rsv
, blocksize
);