1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2007 Oracle. All rights reserved.
9 #include "inode-item.h"
11 #include "transaction.h"
12 #include "space-info.h"
13 #include "accessors.h"
14 #include "extent-tree.h"
15 #include "file-item.h"
17 struct btrfs_inode_ref
*btrfs_find_name_in_backref(const struct extent_buffer
*leaf
,
19 const struct fscrypt_str
*name
)
21 struct btrfs_inode_ref
*ref
;
23 unsigned long name_ptr
;
28 item_size
= btrfs_item_size(leaf
, slot
);
29 ptr
= btrfs_item_ptr_offset(leaf
, slot
);
30 while (cur_offset
< item_size
) {
31 ref
= (struct btrfs_inode_ref
*)(ptr
+ cur_offset
);
32 len
= btrfs_inode_ref_name_len(leaf
, ref
);
33 name_ptr
= (unsigned long)(ref
+ 1);
34 cur_offset
+= len
+ sizeof(*ref
);
37 if (memcmp_extent_buffer(leaf
, name
->name
, name_ptr
,
44 struct btrfs_inode_extref
*btrfs_find_name_in_ext_backref(
45 const struct extent_buffer
*leaf
, int slot
, u64 ref_objectid
,
46 const struct fscrypt_str
*name
)
48 struct btrfs_inode_extref
*extref
;
50 unsigned long name_ptr
;
55 item_size
= btrfs_item_size(leaf
, slot
);
56 ptr
= btrfs_item_ptr_offset(leaf
, slot
);
59 * Search all extended backrefs in this item. We're only
60 * looking through any collisions so most of the time this is
61 * just going to compare against one buffer. If all is well,
62 * we'll return success and the inode ref object.
64 while (cur_offset
< item_size
) {
65 extref
= (struct btrfs_inode_extref
*) (ptr
+ cur_offset
);
66 name_ptr
= (unsigned long)(&extref
->name
);
67 ref_name_len
= btrfs_inode_extref_name_len(leaf
, extref
);
69 if (ref_name_len
== name
->len
&&
70 btrfs_inode_extref_parent(leaf
, extref
) == ref_objectid
&&
71 (memcmp_extent_buffer(leaf
, name
->name
, name_ptr
,
75 cur_offset
+= ref_name_len
+ sizeof(*extref
);
80 /* Returns NULL if no extref found */
81 struct btrfs_inode_extref
*
82 btrfs_lookup_inode_extref(struct btrfs_trans_handle
*trans
,
83 struct btrfs_root
*root
,
84 struct btrfs_path
*path
,
85 const struct fscrypt_str
*name
,
86 u64 inode_objectid
, u64 ref_objectid
, int ins_len
,
92 key
.objectid
= inode_objectid
;
93 key
.type
= BTRFS_INODE_EXTREF_KEY
;
94 key
.offset
= btrfs_extref_hash(ref_objectid
, name
->name
, name
->len
);
96 ret
= btrfs_search_slot(trans
, root
, &key
, path
, ins_len
, cow
);
101 return btrfs_find_name_in_ext_backref(path
->nodes
[0], path
->slots
[0],
106 static int btrfs_del_inode_extref(struct btrfs_trans_handle
*trans
,
107 struct btrfs_root
*root
,
108 const struct fscrypt_str
*name
,
109 u64 inode_objectid
, u64 ref_objectid
,
112 struct btrfs_path
*path
;
113 struct btrfs_key key
;
114 struct btrfs_inode_extref
*extref
;
115 struct extent_buffer
*leaf
;
117 int del_len
= name
->len
+ sizeof(*extref
);
119 unsigned long item_start
;
122 key
.objectid
= inode_objectid
;
123 key
.type
= BTRFS_INODE_EXTREF_KEY
;
124 key
.offset
= btrfs_extref_hash(ref_objectid
, name
->name
, name
->len
);
126 path
= btrfs_alloc_path();
130 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
137 * Sanity check - did we find the right item for this name?
138 * This should always succeed so error here will make the FS
141 extref
= btrfs_find_name_in_ext_backref(path
->nodes
[0], path
->slots
[0],
144 btrfs_abort_transaction(trans
, -ENOENT
);
149 leaf
= path
->nodes
[0];
150 item_size
= btrfs_item_size(leaf
, path
->slots
[0]);
152 *index
= btrfs_inode_extref_index(leaf
, extref
);
154 if (del_len
== item_size
) {
156 * Common case only one ref in the item, remove the
159 ret
= btrfs_del_item(trans
, root
, path
);
163 ptr
= (unsigned long)extref
;
164 item_start
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
166 memmove_extent_buffer(leaf
, ptr
, ptr
+ del_len
,
167 item_size
- (ptr
+ del_len
- item_start
));
169 btrfs_truncate_item(trans
, path
, item_size
- del_len
, 1);
172 btrfs_free_path(path
);
177 int btrfs_del_inode_ref(struct btrfs_trans_handle
*trans
,
178 struct btrfs_root
*root
, const struct fscrypt_str
*name
,
179 u64 inode_objectid
, u64 ref_objectid
, u64
*index
)
181 struct btrfs_path
*path
;
182 struct btrfs_key key
;
183 struct btrfs_inode_ref
*ref
;
184 struct extent_buffer
*leaf
;
186 unsigned long item_start
;
190 int search_ext_refs
= 0;
191 int del_len
= name
->len
+ sizeof(*ref
);
193 key
.objectid
= inode_objectid
;
194 key
.offset
= ref_objectid
;
195 key
.type
= BTRFS_INODE_REF_KEY
;
197 path
= btrfs_alloc_path();
201 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
206 } else if (ret
< 0) {
210 ref
= btrfs_find_name_in_backref(path
->nodes
[0], path
->slots
[0], name
);
216 leaf
= path
->nodes
[0];
217 item_size
= btrfs_item_size(leaf
, path
->slots
[0]);
220 *index
= btrfs_inode_ref_index(leaf
, ref
);
222 if (del_len
== item_size
) {
223 ret
= btrfs_del_item(trans
, root
, path
);
226 ptr
= (unsigned long)ref
;
227 sub_item_len
= name
->len
+ sizeof(*ref
);
228 item_start
= btrfs_item_ptr_offset(leaf
, path
->slots
[0]);
229 memmove_extent_buffer(leaf
, ptr
, ptr
+ sub_item_len
,
230 item_size
- (ptr
+ sub_item_len
- item_start
));
231 btrfs_truncate_item(trans
, path
, item_size
- sub_item_len
, 1);
233 btrfs_free_path(path
);
235 if (search_ext_refs
) {
237 * No refs were found, or we could not find the
238 * name in our ref array. Find and remove the extended
241 return btrfs_del_inode_extref(trans
, root
, name
,
242 inode_objectid
, ref_objectid
, index
);
249 * Insert an extended inode ref into a tree.
251 * The caller must have checked against BTRFS_LINK_MAX already.
253 static int btrfs_insert_inode_extref(struct btrfs_trans_handle
*trans
,
254 struct btrfs_root
*root
,
255 const struct fscrypt_str
*name
,
256 u64 inode_objectid
, u64 ref_objectid
,
259 struct btrfs_inode_extref
*extref
;
261 int ins_len
= name
->len
+ sizeof(*extref
);
263 struct btrfs_path
*path
;
264 struct btrfs_key key
;
265 struct extent_buffer
*leaf
;
267 key
.objectid
= inode_objectid
;
268 key
.type
= BTRFS_INODE_EXTREF_KEY
;
269 key
.offset
= btrfs_extref_hash(ref_objectid
, name
->name
, name
->len
);
271 path
= btrfs_alloc_path();
275 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
277 if (ret
== -EEXIST
) {
278 if (btrfs_find_name_in_ext_backref(path
->nodes
[0],
284 btrfs_extend_item(trans
, path
, ins_len
);
290 leaf
= path
->nodes
[0];
291 ptr
= (unsigned long)btrfs_item_ptr(leaf
, path
->slots
[0], char);
292 ptr
+= btrfs_item_size(leaf
, path
->slots
[0]) - ins_len
;
293 extref
= (struct btrfs_inode_extref
*)ptr
;
295 btrfs_set_inode_extref_name_len(path
->nodes
[0], extref
, name
->len
);
296 btrfs_set_inode_extref_index(path
->nodes
[0], extref
, index
);
297 btrfs_set_inode_extref_parent(path
->nodes
[0], extref
, ref_objectid
);
299 ptr
= (unsigned long)&extref
->name
;
300 write_extent_buffer(path
->nodes
[0], name
->name
, ptr
, name
->len
);
301 btrfs_mark_buffer_dirty(trans
, path
->nodes
[0]);
304 btrfs_free_path(path
);
308 /* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
309 int btrfs_insert_inode_ref(struct btrfs_trans_handle
*trans
,
310 struct btrfs_root
*root
, const struct fscrypt_str
*name
,
311 u64 inode_objectid
, u64 ref_objectid
, u64 index
)
313 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
314 struct btrfs_path
*path
;
315 struct btrfs_key key
;
316 struct btrfs_inode_ref
*ref
;
319 int ins_len
= name
->len
+ sizeof(*ref
);
321 key
.objectid
= inode_objectid
;
322 key
.offset
= ref_objectid
;
323 key
.type
= BTRFS_INODE_REF_KEY
;
325 path
= btrfs_alloc_path();
329 path
->skip_release_on_error
= 1;
330 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
332 if (ret
== -EEXIST
) {
334 ref
= btrfs_find_name_in_backref(path
->nodes
[0], path
->slots
[0],
339 old_size
= btrfs_item_size(path
->nodes
[0], path
->slots
[0]);
340 btrfs_extend_item(trans
, path
, ins_len
);
341 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
342 struct btrfs_inode_ref
);
343 ref
= (struct btrfs_inode_ref
*)((unsigned long)ref
+ old_size
);
344 btrfs_set_inode_ref_name_len(path
->nodes
[0], ref
, name
->len
);
345 btrfs_set_inode_ref_index(path
->nodes
[0], ref
, index
);
346 ptr
= (unsigned long)(ref
+ 1);
348 } else if (ret
< 0) {
349 if (ret
== -EOVERFLOW
) {
350 if (btrfs_find_name_in_backref(path
->nodes
[0],
359 ref
= btrfs_item_ptr(path
->nodes
[0], path
->slots
[0],
360 struct btrfs_inode_ref
);
361 btrfs_set_inode_ref_name_len(path
->nodes
[0], ref
, name
->len
);
362 btrfs_set_inode_ref_index(path
->nodes
[0], ref
, index
);
363 ptr
= (unsigned long)(ref
+ 1);
365 write_extent_buffer(path
->nodes
[0], name
->name
, ptr
, name
->len
);
366 btrfs_mark_buffer_dirty(trans
, path
->nodes
[0]);
369 btrfs_free_path(path
);
371 if (ret
== -EMLINK
) {
372 struct btrfs_super_block
*disk_super
= fs_info
->super_copy
;
373 /* We ran out of space in the ref array. Need to
374 * add an extended ref. */
375 if (btrfs_super_incompat_flags(disk_super
)
376 & BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF
)
377 ret
= btrfs_insert_inode_extref(trans
, root
, name
,
379 ref_objectid
, index
);
385 int btrfs_insert_empty_inode(struct btrfs_trans_handle
*trans
,
386 struct btrfs_root
*root
,
387 struct btrfs_path
*path
, u64 objectid
)
389 struct btrfs_key key
;
391 key
.objectid
= objectid
;
392 key
.type
= BTRFS_INODE_ITEM_KEY
;
395 ret
= btrfs_insert_empty_item(trans
, root
, path
, &key
,
396 sizeof(struct btrfs_inode_item
));
400 int btrfs_lookup_inode(struct btrfs_trans_handle
*trans
, struct btrfs_root
401 *root
, struct btrfs_path
*path
,
402 struct btrfs_key
*location
, int mod
)
404 int ins_len
= mod
< 0 ? -1 : 0;
408 struct extent_buffer
*leaf
;
409 struct btrfs_key found_key
;
411 ret
= btrfs_search_slot(trans
, root
, location
, path
, ins_len
, cow
);
412 if (ret
> 0 && location
->type
== BTRFS_ROOT_ITEM_KEY
&&
413 location
->offset
== (u64
)-1 && path
->slots
[0] != 0) {
414 slot
= path
->slots
[0] - 1;
415 leaf
= path
->nodes
[0];
416 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
417 if (found_key
.objectid
== location
->objectid
&&
418 found_key
.type
== location
->type
) {
426 static inline void btrfs_trace_truncate(const struct btrfs_inode
*inode
,
427 const struct extent_buffer
*leaf
,
428 const struct btrfs_file_extent_item
*fi
,
429 u64 offset
, int extent_type
, int slot
)
433 if (extent_type
== BTRFS_FILE_EXTENT_INLINE
)
434 trace_btrfs_truncate_show_fi_inline(inode
, leaf
, fi
, slot
,
437 trace_btrfs_truncate_show_fi_regular(inode
, leaf
, fi
, offset
);
441 * Remove inode items from a given root.
443 * @trans: A transaction handle.
444 * @root: The root from which to remove items.
445 * @inode: The inode whose items we want to remove.
446 * @control: The btrfs_truncate_control to control how and what we
449 * Remove all keys associated with the inode from the given root that have a key
450 * with a type greater than or equals to @min_type. When @min_type has a value of
451 * BTRFS_EXTENT_DATA_KEY, only remove file extent items that have an offset value
452 * greater than or equals to @new_size. If a file extent item that starts before
453 * @new_size and ends after it is found, its length is adjusted.
455 * Returns: 0 on success, < 0 on error and NEED_TRUNCATE_BLOCK when @min_type is
456 * BTRFS_EXTENT_DATA_KEY and the caller must truncate the last block.
458 int btrfs_truncate_inode_items(struct btrfs_trans_handle
*trans
,
459 struct btrfs_root
*root
,
460 struct btrfs_truncate_control
*control
)
462 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
463 struct btrfs_path
*path
;
464 struct extent_buffer
*leaf
;
465 struct btrfs_file_extent_item
*fi
;
466 struct btrfs_key key
;
467 struct btrfs_key found_key
;
468 u64 new_size
= control
->new_size
;
469 u64 extent_num_bytes
= 0;
470 u64 extent_offset
= 0;
472 u32 found_type
= (u8
)-1;
474 int pending_del_nr
= 0;
475 int pending_del_slot
= 0;
476 int extent_type
= -1;
478 u64 bytes_deleted
= 0;
479 bool be_nice
= false;
481 ASSERT(control
->inode
|| !control
->clear_extent_range
);
482 ASSERT(new_size
== 0 || control
->min_type
== BTRFS_EXTENT_DATA_KEY
);
484 control
->last_size
= new_size
;
485 control
->sub_bytes
= 0;
488 * For shareable roots we want to back off from time to time, this turns
489 * out to be subvolume roots, reloc roots, and data reloc roots.
491 if (test_bit(BTRFS_ROOT_SHAREABLE
, &root
->state
))
494 path
= btrfs_alloc_path();
497 path
->reada
= READA_BACK
;
499 key
.objectid
= control
->ino
;
500 key
.offset
= (u64
)-1;
505 * With a 16K leaf size and 128MiB extents, you can actually queue up a
506 * huge file in a single leaf. Most of the time that bytes_deleted is
507 * > 0, it will be huge by the time we get here
509 if (be_nice
&& bytes_deleted
> SZ_32M
&&
510 btrfs_should_end_transaction(trans
)) {
515 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
521 /* There are no items in the tree for us to truncate, we're done */
522 if (path
->slots
[0] == 0)
528 u64 clear_start
= 0, clear_len
= 0, extent_start
= 0;
529 bool refill_delayed_refs_rsv
= false;
532 leaf
= path
->nodes
[0];
533 btrfs_item_key_to_cpu(leaf
, &found_key
, path
->slots
[0]);
534 found_type
= found_key
.type
;
536 if (found_key
.objectid
!= control
->ino
)
539 if (found_type
< control
->min_type
)
542 item_end
= found_key
.offset
;
543 if (found_type
== BTRFS_EXTENT_DATA_KEY
) {
544 fi
= btrfs_item_ptr(leaf
, path
->slots
[0],
545 struct btrfs_file_extent_item
);
546 extent_type
= btrfs_file_extent_type(leaf
, fi
);
547 if (extent_type
!= BTRFS_FILE_EXTENT_INLINE
)
549 btrfs_file_extent_num_bytes(leaf
, fi
);
550 else if (extent_type
== BTRFS_FILE_EXTENT_INLINE
)
551 item_end
+= btrfs_file_extent_ram_bytes(leaf
, fi
);
553 btrfs_trace_truncate(control
->inode
, leaf
, fi
,
554 found_key
.offset
, extent_type
,
558 if (found_type
> control
->min_type
) {
561 if (item_end
< new_size
)
563 if (found_key
.offset
>= new_size
)
569 /* FIXME, shrink the extent if the ref count is only 1 */
570 if (found_type
!= BTRFS_EXTENT_DATA_KEY
)
573 control
->extents_found
++;
575 if (extent_type
!= BTRFS_FILE_EXTENT_INLINE
) {
578 clear_start
= found_key
.offset
;
579 extent_start
= btrfs_file_extent_disk_bytenr(leaf
, fi
);
582 btrfs_file_extent_num_bytes(leaf
, fi
);
583 extent_num_bytes
= ALIGN(new_size
-
585 fs_info
->sectorsize
);
586 clear_start
= ALIGN(new_size
, fs_info
->sectorsize
);
588 btrfs_set_file_extent_num_bytes(leaf
, fi
,
590 num_dec
= (orig_num_bytes
- extent_num_bytes
);
591 if (extent_start
!= 0)
592 control
->sub_bytes
+= num_dec
;
593 btrfs_mark_buffer_dirty(trans
, leaf
);
596 btrfs_file_extent_disk_num_bytes(leaf
, fi
);
597 extent_offset
= found_key
.offset
-
598 btrfs_file_extent_offset(leaf
, fi
);
600 /* FIXME blocksize != 4096 */
601 num_dec
= btrfs_file_extent_num_bytes(leaf
, fi
);
602 if (extent_start
!= 0)
603 control
->sub_bytes
+= num_dec
;
606 } else if (extent_type
== BTRFS_FILE_EXTENT_INLINE
) {
608 * We can't truncate inline items that have had
612 btrfs_file_extent_encryption(leaf
, fi
) == 0 &&
613 btrfs_file_extent_other_encoding(leaf
, fi
) == 0 &&
614 btrfs_file_extent_compression(leaf
, fi
) == 0) {
615 u32 size
= (u32
)(new_size
- found_key
.offset
);
617 btrfs_set_file_extent_ram_bytes(leaf
, fi
, size
);
618 size
= btrfs_file_extent_calc_inline_size(size
);
619 btrfs_truncate_item(trans
, path
, size
, 1);
620 } else if (!del_item
) {
622 * We have to bail so the last_size is set to
623 * just before this extent.
625 ret
= BTRFS_NEED_TRUNCATE_BLOCK
;
629 * Inline extents are special, we just treat
630 * them as a full sector worth in the file
631 * extent tree just for simplicity sake.
633 clear_len
= fs_info
->sectorsize
;
636 control
->sub_bytes
+= item_end
+ 1 - new_size
;
640 * We only want to clear the file extent range if we're
641 * modifying the actual inode's mapping, which is just the
642 * normal truncate path.
644 if (control
->clear_extent_range
) {
645 ret
= btrfs_inode_clear_file_extent_range(control
->inode
,
646 clear_start
, clear_len
);
648 btrfs_abort_transaction(trans
, ret
);
654 ASSERT(!pending_del_nr
||
655 ((path
->slots
[0] + 1) == pending_del_slot
));
657 control
->last_size
= found_key
.offset
;
658 if (!pending_del_nr
) {
659 /* No pending yet, add ourselves */
660 pending_del_slot
= path
->slots
[0];
662 } else if (path
->slots
[0] + 1 == pending_del_slot
) {
663 /* Hop on the pending chunk */
665 pending_del_slot
= path
->slots
[0];
668 control
->last_size
= new_size
;
672 if (del_item
&& extent_start
!= 0 && !control
->skip_ref_updates
) {
673 struct btrfs_ref ref
= {
674 .action
= BTRFS_DROP_DELAYED_REF
,
675 .bytenr
= extent_start
,
676 .num_bytes
= extent_num_bytes
,
677 .owning_root
= btrfs_root_id(root
),
678 .ref_root
= btrfs_header_owner(leaf
),
681 bytes_deleted
+= extent_num_bytes
;
683 btrfs_init_data_ref(&ref
, control
->ino
, extent_offset
,
684 btrfs_root_id(root
), false);
685 ret
= btrfs_free_extent(trans
, &ref
);
687 btrfs_abort_transaction(trans
, ret
);
690 if (be_nice
&& btrfs_check_space_for_delayed_refs(fs_info
))
691 refill_delayed_refs_rsv
= true;
694 if (found_type
== BTRFS_INODE_ITEM_KEY
)
697 if (path
->slots
[0] == 0 ||
698 path
->slots
[0] != pending_del_slot
||
699 refill_delayed_refs_rsv
) {
700 if (pending_del_nr
) {
701 ret
= btrfs_del_items(trans
, root
, path
,
705 btrfs_abort_transaction(trans
, ret
);
710 btrfs_release_path(path
);
713 * We can generate a lot of delayed refs, so we need to
714 * throttle every once and a while and make sure we're
715 * adding enough space to keep up with the work we are
716 * generating. Since we hold a transaction here we
717 * can't flush, and we don't want to FLUSH_LIMIT because
718 * we could have generated too many delayed refs to
719 * actually allocate, so just bail if we're short and
720 * let the normal reservation dance happen higher up.
722 if (refill_delayed_refs_rsv
) {
723 ret
= btrfs_delayed_refs_rsv_refill(fs_info
,
724 BTRFS_RESERVE_NO_FLUSH
);
736 if (ret
>= 0 && pending_del_nr
) {
739 err
= btrfs_del_items(trans
, root
, path
, pending_del_slot
,
742 btrfs_abort_transaction(trans
, err
);
747 ASSERT(control
->last_size
>= new_size
);
748 if (!ret
&& control
->last_size
> new_size
)
749 control
->last_size
= new_size
;
751 btrfs_free_path(path
);