Btrfs progs v4.17.1
[btrfs-progs-unstable/devel.git] / extent-tree.c
blob5d49af5a901e61caa16ab003a834a04a801a54bd
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <stdint.h>
22 #include <math.h>
23 #include "kerncompat.h"
24 #include "radix-tree.h"
25 #include "ctree.h"
26 #include "disk-io.h"
27 #include "print-tree.h"
28 #include "transaction.h"
29 #include "crc32c.h"
30 #include "volumes.h"
31 #include "free-space-cache.h"
32 #include "utils.h"
34 #define PENDING_EXTENT_INSERT 0
35 #define PENDING_EXTENT_DELETE 1
36 #define PENDING_BACKREF_UPDATE 2
38 struct pending_extent_op {
39 int type;
40 u64 bytenr;
41 u64 num_bytes;
42 u64 flags;
43 struct btrfs_disk_key key;
44 int level;
47 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
48 u64 root_objectid, u64 generation,
49 u64 flags, struct btrfs_disk_key *key,
50 int level, struct btrfs_key *ins);
51 static int __free_extent(struct btrfs_trans_handle *trans,
52 u64 bytenr, u64 num_bytes, u64 parent,
53 u64 root_objectid, u64 owner_objectid,
54 u64 owner_offset, int refs_to_drop);
55 static int finish_current_insert(struct btrfs_trans_handle *trans);
56 static int del_pending_extents(struct btrfs_trans_handle *trans);
57 static struct btrfs_block_group_cache *
58 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
59 *hint, u64 search_start, int data, int owner);
61 static int remove_sb_from_cache(struct btrfs_root *root,
62 struct btrfs_block_group_cache *cache)
64 u64 bytenr;
65 u64 *logical;
66 int stripe_len;
67 int i, nr, ret;
68 struct btrfs_fs_info *fs_info = root->fs_info;
69 struct extent_io_tree *free_space_cache;
71 free_space_cache = &fs_info->free_space_cache;
72 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
73 bytenr = btrfs_sb_offset(i);
74 ret = btrfs_rmap_block(fs_info, cache->key.objectid, bytenr,
75 &logical, &nr, &stripe_len);
76 BUG_ON(ret);
77 while (nr--) {
78 clear_extent_dirty(free_space_cache, logical[nr],
79 logical[nr] + stripe_len - 1);
81 kfree(logical);
83 return 0;
86 static int cache_block_group(struct btrfs_root *root,
87 struct btrfs_block_group_cache *block_group)
89 struct btrfs_path *path;
90 int ret;
91 struct btrfs_key key;
92 struct extent_buffer *leaf;
93 struct extent_io_tree *free_space_cache;
94 int slot;
95 u64 last;
96 u64 hole_size;
98 if (!block_group)
99 return 0;
101 root = root->fs_info->extent_root;
102 free_space_cache = &root->fs_info->free_space_cache;
104 if (block_group->cached)
105 return 0;
107 path = btrfs_alloc_path();
108 if (!path)
109 return -ENOMEM;
111 path->reada = READA_FORWARD;
112 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
113 key.objectid = last;
114 key.offset = 0;
115 key.type = 0;
117 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
118 if (ret < 0)
119 goto err;
121 while(1) {
122 leaf = path->nodes[0];
123 slot = path->slots[0];
124 if (slot >= btrfs_header_nritems(leaf)) {
125 ret = btrfs_next_leaf(root, path);
126 if (ret < 0)
127 goto err;
128 if (ret == 0) {
129 continue;
130 } else {
131 break;
134 btrfs_item_key_to_cpu(leaf, &key, slot);
135 if (key.objectid < block_group->key.objectid) {
136 goto next;
138 if (key.objectid >= block_group->key.objectid +
139 block_group->key.offset) {
140 break;
143 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
144 key.type == BTRFS_METADATA_ITEM_KEY) {
145 if (key.objectid > last) {
146 hole_size = key.objectid - last;
147 set_extent_dirty(free_space_cache, last,
148 last + hole_size - 1);
150 if (key.type == BTRFS_METADATA_ITEM_KEY)
151 last = key.objectid + root->fs_info->nodesize;
152 else
153 last = key.objectid + key.offset;
155 next:
156 path->slots[0]++;
159 if (block_group->key.objectid +
160 block_group->key.offset > last) {
161 hole_size = block_group->key.objectid +
162 block_group->key.offset - last;
163 set_extent_dirty(free_space_cache, last, last + hole_size - 1);
165 remove_sb_from_cache(root, block_group);
166 block_group->cached = 1;
167 err:
168 btrfs_free_path(path);
169 return 0;
172 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
173 btrfs_fs_info *info,
174 u64 bytenr)
176 struct extent_io_tree *block_group_cache;
177 struct btrfs_block_group_cache *block_group = NULL;
178 u64 ptr;
179 u64 start;
180 u64 end;
181 int ret;
183 bytenr = max_t(u64, bytenr,
184 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
185 block_group_cache = &info->block_group_cache;
186 ret = find_first_extent_bit(block_group_cache,
187 bytenr, &start, &end,
188 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
189 BLOCK_GROUP_SYSTEM);
190 if (ret) {
191 return NULL;
193 ret = get_state_private(block_group_cache, start, &ptr);
194 if (ret)
195 return NULL;
197 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
198 return block_group;
201 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
202 btrfs_fs_info *info,
203 u64 bytenr)
205 struct extent_io_tree *block_group_cache;
206 struct btrfs_block_group_cache *block_group = NULL;
207 u64 ptr;
208 u64 start;
209 u64 end;
210 int ret;
212 block_group_cache = &info->block_group_cache;
213 ret = find_first_extent_bit(block_group_cache,
214 bytenr, &start, &end,
215 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
216 BLOCK_GROUP_SYSTEM);
217 if (ret) {
218 return NULL;
220 ret = get_state_private(block_group_cache, start, &ptr);
221 if (ret)
222 return NULL;
224 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
225 if (block_group->key.objectid <= bytenr && bytenr <
226 block_group->key.objectid + block_group->key.offset)
227 return block_group;
228 return NULL;
231 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
233 return (cache->flags & bits) == bits;
236 static int noinline find_search_start(struct btrfs_root *root,
237 struct btrfs_block_group_cache **cache_ret,
238 u64 *start_ret, int num, int data)
240 int ret;
241 struct btrfs_block_group_cache *cache = *cache_ret;
242 u64 last = *start_ret;
243 u64 start = 0;
244 u64 end = 0;
245 u64 search_start = *start_ret;
246 int wrapped = 0;
248 if (!cache)
249 goto out;
250 again:
251 ret = cache_block_group(root, cache);
252 if (ret)
253 goto out;
255 last = max(search_start, cache->key.objectid);
256 if (cache->ro || !block_group_bits(cache, data))
257 goto new_group;
259 while(1) {
260 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
261 last, &start, &end, EXTENT_DIRTY);
262 if (ret) {
263 goto new_group;
266 start = max(last, start);
267 last = end + 1;
268 if (last - start < num) {
269 continue;
271 if (start + num > cache->key.objectid + cache->key.offset) {
272 goto new_group;
274 *start_ret = start;
275 return 0;
277 out:
278 *start_ret = last;
279 cache = btrfs_lookup_block_group(root->fs_info, search_start);
280 if (!cache) {
281 printk("Unable to find block group for %llu\n",
282 (unsigned long long)search_start);
283 return -ENOENT;
285 return -ENOSPC;
287 new_group:
288 last = cache->key.objectid + cache->key.offset;
289 wrapped:
290 cache = btrfs_lookup_first_block_group(root->fs_info, last);
291 if (!cache) {
292 if (!wrapped) {
293 wrapped = 1;
294 last = search_start;
295 goto wrapped;
297 goto out;
299 *cache_ret = cache;
300 goto again;
303 static int block_group_state_bits(u64 flags)
305 int bits = 0;
306 if (flags & BTRFS_BLOCK_GROUP_DATA)
307 bits |= BLOCK_GROUP_DATA;
308 if (flags & BTRFS_BLOCK_GROUP_METADATA)
309 bits |= BLOCK_GROUP_METADATA;
310 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
311 bits |= BLOCK_GROUP_SYSTEM;
312 return bits;
315 static struct btrfs_block_group_cache *
316 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
317 *hint, u64 search_start, int data, int owner)
319 struct btrfs_block_group_cache *cache;
320 struct extent_io_tree *block_group_cache;
321 struct btrfs_block_group_cache *found_group = NULL;
322 struct btrfs_fs_info *info = root->fs_info;
323 u64 used;
324 u64 last = 0;
325 u64 hint_last;
326 u64 start;
327 u64 end;
328 u64 free_check;
329 u64 ptr;
330 int bit;
331 int ret;
332 int full_search = 0;
333 int factor = 10;
335 block_group_cache = &info->block_group_cache;
337 if (!owner)
338 factor = 10;
340 bit = block_group_state_bits(data);
342 if (search_start) {
343 struct btrfs_block_group_cache *shint;
344 shint = btrfs_lookup_block_group(info, search_start);
345 if (shint && !shint->ro && block_group_bits(shint, data)) {
346 used = btrfs_block_group_used(&shint->item);
347 if (used + shint->pinned <
348 div_factor(shint->key.offset, factor)) {
349 return shint;
353 if (hint && !hint->ro && block_group_bits(hint, data)) {
354 used = btrfs_block_group_used(&hint->item);
355 if (used + hint->pinned <
356 div_factor(hint->key.offset, factor)) {
357 return hint;
359 last = hint->key.objectid + hint->key.offset;
360 hint_last = last;
361 } else {
362 if (hint)
363 hint_last = max(hint->key.objectid, search_start);
364 else
365 hint_last = search_start;
367 last = hint_last;
369 again:
370 while(1) {
371 ret = find_first_extent_bit(block_group_cache, last,
372 &start, &end, bit);
373 if (ret)
374 break;
376 ret = get_state_private(block_group_cache, start, &ptr);
377 if (ret)
378 break;
380 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
381 last = cache->key.objectid + cache->key.offset;
382 used = btrfs_block_group_used(&cache->item);
384 if (!cache->ro && block_group_bits(cache, data)) {
385 if (full_search)
386 free_check = cache->key.offset;
387 else
388 free_check = div_factor(cache->key.offset,
389 factor);
391 if (used + cache->pinned < free_check) {
392 found_group = cache;
393 goto found;
396 cond_resched();
398 if (!full_search) {
399 last = search_start;
400 full_search = 1;
401 goto again;
403 found:
404 return found_group;
408 * Back reference rules. Back refs have three main goals:
410 * 1) differentiate between all holders of references to an extent so that
411 * when a reference is dropped we can make sure it was a valid reference
412 * before freeing the extent.
414 * 2) Provide enough information to quickly find the holders of an extent
415 * if we notice a given block is corrupted or bad.
417 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
418 * maintenance. This is actually the same as #2, but with a slightly
419 * different use case.
421 * There are two kinds of back refs. The implicit back refs is optimized
422 * for pointers in non-shared tree blocks. For a given pointer in a block,
423 * back refs of this kind provide information about the block's owner tree
424 * and the pointer's key. These information allow us to find the block by
425 * b-tree searching. The full back refs is for pointers in tree blocks not
426 * referenced by their owner trees. The location of tree block is recorded
427 * in the back refs. Actually the full back refs is generic, and can be
428 * used in all cases the implicit back refs is used. The major shortcoming
429 * of the full back refs is its overhead. Every time a tree block gets
430 * COWed, we have to update back refs entry for all pointers in it.
432 * For a newly allocated tree block, we use implicit back refs for
433 * pointers in it. This means most tree related operations only involve
434 * implicit back refs. For a tree block created in old transaction, the
435 * only way to drop a reference to it is COW it. So we can detect the
436 * event that tree block loses its owner tree's reference and do the
437 * back refs conversion.
439 * When a tree block is COW'd through a tree, there are four cases:
441 * The reference count of the block is one and the tree is the block's
442 * owner tree. Nothing to do in this case.
444 * The reference count of the block is one and the tree is not the
445 * block's owner tree. In this case, full back refs is used for pointers
446 * in the block. Remove these full back refs, add implicit back refs for
447 * every pointers in the new block.
449 * The reference count of the block is greater than one and the tree is
450 * the block's owner tree. In this case, implicit back refs is used for
451 * pointers in the block. Add full back refs for every pointers in the
452 * block, increase lower level extents' reference counts. The original
453 * implicit back refs are entailed to the new block.
455 * The reference count of the block is greater than one and the tree is
456 * not the block's owner tree. Add implicit back refs for every pointer in
457 * the new block, increase lower level extents' reference count.
459 * Back Reference Key composing:
461 * The key objectid corresponds to the first byte in the extent,
462 * The key type is used to differentiate between types of back refs.
463 * There are different meanings of the key offset for different types
464 * of back refs.
466 * File extents can be referenced by:
468 * - multiple snapshots, subvolumes, or different generations in one subvol
469 * - different files inside a single subvolume
470 * - different offsets inside a file (bookend extents in file.c)
472 * The extent ref structure for the implicit back refs has fields for:
474 * - Objectid of the subvolume root
475 * - objectid of the file holding the reference
476 * - original offset in the file
477 * - how many bookend extents
479 * The key offset for the implicit back refs is hash of the first
480 * three fields.
482 * The extent ref structure for the full back refs has field for:
484 * - number of pointers in the tree leaf
486 * The key offset for the implicit back refs is the first byte of
487 * the tree leaf
489 * When a file extent is allocated, The implicit back refs is used.
490 * the fields are filled in:
492 * (root_key.objectid, inode objectid, offset in file, 1)
494 * When a file extent is removed file truncation, we find the
495 * corresponding implicit back refs and check the following fields:
497 * (btrfs_header_owner(leaf), inode objectid, offset in file)
499 * Btree extents can be referenced by:
501 * - Different subvolumes
503 * Both the implicit back refs and the full back refs for tree blocks
504 * only consist of key. The key offset for the implicit back refs is
505 * objectid of block's owner tree. The key offset for the full back refs
506 * is the first byte of parent block.
508 * When implicit back refs is used, information about the lowest key and
509 * level of the tree block are required. These information are stored in
510 * tree block info structure.
513 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
514 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
515 struct btrfs_root *root,
516 struct btrfs_path *path,
517 u64 owner, u32 extra_size)
519 struct btrfs_extent_item *item;
520 struct btrfs_extent_item_v0 *ei0;
521 struct btrfs_extent_ref_v0 *ref0;
522 struct btrfs_tree_block_info *bi;
523 struct extent_buffer *leaf;
524 struct btrfs_key key;
525 struct btrfs_key found_key;
526 u32 new_size = sizeof(*item);
527 u64 refs;
528 int ret;
530 leaf = path->nodes[0];
531 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
533 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
534 ei0 = btrfs_item_ptr(leaf, path->slots[0],
535 struct btrfs_extent_item_v0);
536 refs = btrfs_extent_refs_v0(leaf, ei0);
538 if (owner == (u64)-1) {
539 while (1) {
540 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
541 ret = btrfs_next_leaf(root, path);
542 if (ret < 0)
543 return ret;
544 BUG_ON(ret > 0);
545 leaf = path->nodes[0];
547 btrfs_item_key_to_cpu(leaf, &found_key,
548 path->slots[0]);
549 BUG_ON(key.objectid != found_key.objectid);
550 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
551 path->slots[0]++;
552 continue;
554 ref0 = btrfs_item_ptr(leaf, path->slots[0],
555 struct btrfs_extent_ref_v0);
556 owner = btrfs_ref_objectid_v0(leaf, ref0);
557 break;
560 btrfs_release_path(path);
562 if (owner < BTRFS_FIRST_FREE_OBJECTID)
563 new_size += sizeof(*bi);
565 new_size -= sizeof(*ei0);
566 ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
567 if (ret < 0)
568 return ret;
569 BUG_ON(ret);
571 ret = btrfs_extend_item(root, path, new_size);
572 BUG_ON(ret);
574 leaf = path->nodes[0];
575 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
576 btrfs_set_extent_refs(leaf, item, refs);
577 /* FIXME: get real generation */
578 btrfs_set_extent_generation(leaf, item, 0);
579 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
580 btrfs_set_extent_flags(leaf, item,
581 BTRFS_EXTENT_FLAG_TREE_BLOCK |
582 BTRFS_BLOCK_FLAG_FULL_BACKREF);
583 bi = (struct btrfs_tree_block_info *)(item + 1);
584 /* FIXME: get first key of the block */
585 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
586 btrfs_set_tree_block_level(leaf, bi, (int)owner);
587 } else {
588 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
590 btrfs_mark_buffer_dirty(leaf);
591 return 0;
593 #endif
595 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
597 u32 high_crc = ~(u32)0;
598 u32 low_crc = ~(u32)0;
599 __le64 lenum;
601 lenum = cpu_to_le64(root_objectid);
602 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
603 lenum = cpu_to_le64(owner);
604 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
605 lenum = cpu_to_le64(offset);
606 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
608 return ((u64)high_crc << 31) ^ (u64)low_crc;
611 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
612 struct btrfs_extent_data_ref *ref)
614 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
615 btrfs_extent_data_ref_objectid(leaf, ref),
616 btrfs_extent_data_ref_offset(leaf, ref));
619 static int match_extent_data_ref(struct extent_buffer *leaf,
620 struct btrfs_extent_data_ref *ref,
621 u64 root_objectid, u64 owner, u64 offset)
623 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
624 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
625 btrfs_extent_data_ref_offset(leaf, ref) != offset)
626 return 0;
627 return 1;
630 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
631 struct btrfs_root *root,
632 struct btrfs_path *path,
633 u64 bytenr, u64 parent,
634 u64 root_objectid,
635 u64 owner, u64 offset)
637 struct btrfs_key key;
638 struct btrfs_extent_data_ref *ref;
639 struct extent_buffer *leaf;
640 u32 nritems;
641 int ret;
642 int recow;
643 int err = -ENOENT;
645 key.objectid = bytenr;
646 if (parent) {
647 key.type = BTRFS_SHARED_DATA_REF_KEY;
648 key.offset = parent;
649 } else {
650 key.type = BTRFS_EXTENT_DATA_REF_KEY;
651 key.offset = hash_extent_data_ref(root_objectid,
652 owner, offset);
654 again:
655 recow = 0;
656 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
657 if (ret < 0) {
658 err = ret;
659 goto fail;
662 if (parent) {
663 if (!ret)
664 return 0;
665 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
666 key.type = BTRFS_EXTENT_REF_V0_KEY;
667 btrfs_release_path(path);
668 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
669 if (ret < 0) {
670 err = ret;
671 goto fail;
673 if (!ret)
674 return 0;
675 #endif
676 goto fail;
679 leaf = path->nodes[0];
680 nritems = btrfs_header_nritems(leaf);
681 while (1) {
682 if (path->slots[0] >= nritems) {
683 ret = btrfs_next_leaf(root, path);
684 if (ret < 0)
685 err = ret;
686 if (ret)
687 goto fail;
689 leaf = path->nodes[0];
690 nritems = btrfs_header_nritems(leaf);
691 recow = 1;
694 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
695 if (key.objectid != bytenr ||
696 key.type != BTRFS_EXTENT_DATA_REF_KEY)
697 goto fail;
699 ref = btrfs_item_ptr(leaf, path->slots[0],
700 struct btrfs_extent_data_ref);
702 if (match_extent_data_ref(leaf, ref, root_objectid,
703 owner, offset)) {
704 if (recow) {
705 btrfs_release_path(path);
706 goto again;
708 err = 0;
709 break;
711 path->slots[0]++;
713 fail:
714 return err;
717 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
718 struct btrfs_root *root,
719 struct btrfs_path *path,
720 u64 bytenr, u64 parent,
721 u64 root_objectid, u64 owner,
722 u64 offset, int refs_to_add)
724 struct btrfs_key key;
725 struct extent_buffer *leaf;
726 u32 size;
727 u32 num_refs;
728 int ret;
730 key.objectid = bytenr;
731 if (parent) {
732 key.type = BTRFS_SHARED_DATA_REF_KEY;
733 key.offset = parent;
734 size = sizeof(struct btrfs_shared_data_ref);
735 } else {
736 key.type = BTRFS_EXTENT_DATA_REF_KEY;
737 key.offset = hash_extent_data_ref(root_objectid,
738 owner, offset);
739 size = sizeof(struct btrfs_extent_data_ref);
742 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
743 if (ret && ret != -EEXIST)
744 goto fail;
746 leaf = path->nodes[0];
747 if (parent) {
748 struct btrfs_shared_data_ref *ref;
749 ref = btrfs_item_ptr(leaf, path->slots[0],
750 struct btrfs_shared_data_ref);
751 if (ret == 0) {
752 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
753 } else {
754 num_refs = btrfs_shared_data_ref_count(leaf, ref);
755 num_refs += refs_to_add;
756 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
758 } else {
759 struct btrfs_extent_data_ref *ref;
760 while (ret == -EEXIST) {
761 ref = btrfs_item_ptr(leaf, path->slots[0],
762 struct btrfs_extent_data_ref);
763 if (match_extent_data_ref(leaf, ref, root_objectid,
764 owner, offset))
765 break;
766 btrfs_release_path(path);
768 key.offset++;
769 ret = btrfs_insert_empty_item(trans, root, path, &key,
770 size);
771 if (ret && ret != -EEXIST)
772 goto fail;
774 leaf = path->nodes[0];
776 ref = btrfs_item_ptr(leaf, path->slots[0],
777 struct btrfs_extent_data_ref);
778 if (ret == 0) {
779 btrfs_set_extent_data_ref_root(leaf, ref,
780 root_objectid);
781 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
782 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
783 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
784 } else {
785 num_refs = btrfs_extent_data_ref_count(leaf, ref);
786 num_refs += refs_to_add;
787 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
790 btrfs_mark_buffer_dirty(leaf);
791 ret = 0;
792 fail:
793 btrfs_release_path(path);
794 return ret;
797 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
798 struct btrfs_root *root,
799 struct btrfs_path *path,
800 int refs_to_drop)
802 struct btrfs_key key;
803 struct btrfs_extent_data_ref *ref1 = NULL;
804 struct btrfs_shared_data_ref *ref2 = NULL;
805 struct extent_buffer *leaf;
806 u32 num_refs = 0;
807 int ret = 0;
809 leaf = path->nodes[0];
810 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
812 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
813 ref1 = btrfs_item_ptr(leaf, path->slots[0],
814 struct btrfs_extent_data_ref);
815 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
816 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
817 ref2 = btrfs_item_ptr(leaf, path->slots[0],
818 struct btrfs_shared_data_ref);
819 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
820 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
821 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
822 struct btrfs_extent_ref_v0 *ref0;
823 ref0 = btrfs_item_ptr(leaf, path->slots[0],
824 struct btrfs_extent_ref_v0);
825 num_refs = btrfs_ref_count_v0(leaf, ref0);
826 #endif
827 } else {
828 BUG();
831 BUG_ON(num_refs < refs_to_drop);
832 num_refs -= refs_to_drop;
834 if (num_refs == 0) {
835 ret = btrfs_del_item(trans, root, path);
836 } else {
837 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
838 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
839 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
840 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
841 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
842 else {
843 struct btrfs_extent_ref_v0 *ref0;
844 ref0 = btrfs_item_ptr(leaf, path->slots[0],
845 struct btrfs_extent_ref_v0);
846 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
848 #endif
849 btrfs_mark_buffer_dirty(leaf);
851 return ret;
854 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
855 struct btrfs_extent_inline_ref *iref)
857 struct btrfs_key key;
858 struct extent_buffer *leaf;
859 struct btrfs_extent_data_ref *ref1;
860 struct btrfs_shared_data_ref *ref2;
861 u32 num_refs = 0;
863 leaf = path->nodes[0];
864 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
865 if (iref) {
866 if (btrfs_extent_inline_ref_type(leaf, iref) ==
867 BTRFS_EXTENT_DATA_REF_KEY) {
868 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
869 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
870 } else {
871 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
872 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
874 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
875 ref1 = btrfs_item_ptr(leaf, path->slots[0],
876 struct btrfs_extent_data_ref);
877 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
878 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
879 ref2 = btrfs_item_ptr(leaf, path->slots[0],
880 struct btrfs_shared_data_ref);
881 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
882 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
883 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
884 struct btrfs_extent_ref_v0 *ref0;
885 ref0 = btrfs_item_ptr(leaf, path->slots[0],
886 struct btrfs_extent_ref_v0);
887 num_refs = btrfs_ref_count_v0(leaf, ref0);
888 #endif
889 } else {
890 BUG();
892 return num_refs;
895 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
896 struct btrfs_root *root,
897 struct btrfs_path *path,
898 u64 bytenr, u64 parent,
899 u64 root_objectid)
901 struct btrfs_key key;
902 int ret;
904 key.objectid = bytenr;
905 if (parent) {
906 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
907 key.offset = parent;
908 } else {
909 key.type = BTRFS_TREE_BLOCK_REF_KEY;
910 key.offset = root_objectid;
913 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
914 if (ret > 0)
915 ret = -ENOENT;
916 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
917 if (ret == -ENOENT && parent) {
918 btrfs_release_path(path);
919 key.type = BTRFS_EXTENT_REF_V0_KEY;
920 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
921 if (ret > 0)
922 ret = -ENOENT;
924 #endif
925 return ret;
928 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
929 struct btrfs_root *root,
930 struct btrfs_path *path,
931 u64 bytenr, u64 parent,
932 u64 root_objectid)
934 struct btrfs_key key;
935 int ret;
937 key.objectid = bytenr;
938 if (parent) {
939 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
940 key.offset = parent;
941 } else {
942 key.type = BTRFS_TREE_BLOCK_REF_KEY;
943 key.offset = root_objectid;
946 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
948 btrfs_release_path(path);
949 return ret;
952 static inline int extent_ref_type(u64 parent, u64 owner)
954 int type;
955 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
956 if (parent > 0)
957 type = BTRFS_SHARED_BLOCK_REF_KEY;
958 else
959 type = BTRFS_TREE_BLOCK_REF_KEY;
960 } else {
961 if (parent > 0)
962 type = BTRFS_SHARED_DATA_REF_KEY;
963 else
964 type = BTRFS_EXTENT_DATA_REF_KEY;
966 return type;
969 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
970 struct btrfs_root *root,
971 struct btrfs_path *path,
972 struct btrfs_extent_inline_ref **ref_ret,
973 u64 bytenr, u64 num_bytes,
974 u64 parent, u64 root_objectid,
975 u64 owner, u64 offset, int insert)
977 struct btrfs_key key;
978 struct extent_buffer *leaf;
979 struct btrfs_extent_item *ei;
980 struct btrfs_extent_inline_ref *iref;
981 u64 flags;
982 u32 item_size;
983 unsigned long ptr;
984 unsigned long end;
985 int extra_size;
986 int type;
987 int want;
988 int ret;
989 int err = 0;
990 int skinny_metadata =
991 btrfs_fs_incompat(root->fs_info, SKINNY_METADATA);
993 key.objectid = bytenr;
994 key.type = BTRFS_EXTENT_ITEM_KEY;
995 key.offset = num_bytes;
997 want = extent_ref_type(parent, owner);
998 if (insert)
999 extra_size = btrfs_extent_inline_ref_size(want);
1000 else
1001 extra_size = -1;
1003 if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
1004 key.type = BTRFS_METADATA_ITEM_KEY;
1005 key.offset = owner;
1006 } else if (skinny_metadata) {
1007 skinny_metadata = 0;
1010 again:
1011 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1012 if (ret < 0) {
1013 err = ret;
1014 goto out;
1018 * We may be a newly converted file system which still has the old fat
1019 * extent entries for metadata, so try and see if we have one of those.
1021 if (ret > 0 && skinny_metadata) {
1022 skinny_metadata = 0;
1023 if (path->slots[0]) {
1024 path->slots[0]--;
1025 btrfs_item_key_to_cpu(path->nodes[0], &key,
1026 path->slots[0]);
1027 if (key.objectid == bytenr &&
1028 key.type == BTRFS_EXTENT_ITEM_KEY &&
1029 key.offset == num_bytes)
1030 ret = 0;
1032 if (ret) {
1033 key.type = BTRFS_EXTENT_ITEM_KEY;
1034 key.offset = num_bytes;
1035 btrfs_release_path(path);
1036 goto again;
1040 if (ret) {
1041 printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
1042 return -ENOENT;
1045 BUG_ON(ret);
1047 leaf = path->nodes[0];
1048 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1049 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1050 if (item_size < sizeof(*ei)) {
1051 if (!insert) {
1052 err = -ENOENT;
1053 goto out;
1055 ret = convert_extent_item_v0(trans, root, path, owner,
1056 extra_size);
1057 if (ret < 0) {
1058 err = ret;
1059 goto out;
1061 leaf = path->nodes[0];
1062 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1064 #endif
1065 if (item_size < sizeof(*ei)) {
1066 printf("Size is %u, needs to be %u, slot %d\n",
1067 (unsigned)item_size,
1068 (unsigned)sizeof(*ei), path->slots[0]);
1069 btrfs_print_leaf(leaf);
1070 return -EINVAL;
1072 BUG_ON(item_size < sizeof(*ei));
1074 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1075 flags = btrfs_extent_flags(leaf, ei);
1077 ptr = (unsigned long)(ei + 1);
1078 end = (unsigned long)ei + item_size;
1080 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1081 ptr += sizeof(struct btrfs_tree_block_info);
1082 BUG_ON(ptr > end);
1083 } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1084 if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
1085 return -EIO;
1089 err = -ENOENT;
1090 while (1) {
1091 if (ptr >= end) {
1092 WARN_ON(ptr > end);
1093 break;
1095 iref = (struct btrfs_extent_inline_ref *)ptr;
1096 type = btrfs_extent_inline_ref_type(leaf, iref);
1097 if (want < type)
1098 break;
1099 if (want > type) {
1100 ptr += btrfs_extent_inline_ref_size(type);
1101 continue;
1104 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1105 struct btrfs_extent_data_ref *dref;
1106 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1107 if (match_extent_data_ref(leaf, dref, root_objectid,
1108 owner, offset)) {
1109 err = 0;
1110 break;
1112 if (hash_extent_data_ref_item(leaf, dref) <
1113 hash_extent_data_ref(root_objectid, owner, offset))
1114 break;
1115 } else {
1116 u64 ref_offset;
1117 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1118 if (parent > 0) {
1119 if (parent == ref_offset) {
1120 err = 0;
1121 break;
1123 if (ref_offset < parent)
1124 break;
1125 } else {
1126 if (root_objectid == ref_offset) {
1127 err = 0;
1128 break;
1130 if (ref_offset < root_objectid)
1131 break;
1134 ptr += btrfs_extent_inline_ref_size(type);
1136 if (err == -ENOENT && insert) {
1137 if (item_size + extra_size >=
1138 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1139 err = -EAGAIN;
1140 goto out;
1143 * To add new inline back ref, we have to make sure
1144 * there is no corresponding back ref item.
1145 * For simplicity, we just do not add new inline back
1146 * ref if there is any back ref item.
1148 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1149 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1150 err = -EAGAIN;
1151 goto out;
1154 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1155 out:
1156 return err;
1159 static int setup_inline_extent_backref(struct btrfs_root *root,
1160 struct btrfs_path *path,
1161 struct btrfs_extent_inline_ref *iref,
1162 u64 parent, u64 root_objectid,
1163 u64 owner, u64 offset, int refs_to_add)
1165 struct extent_buffer *leaf;
1166 struct btrfs_extent_item *ei;
1167 unsigned long ptr;
1168 unsigned long end;
1169 unsigned long item_offset;
1170 u64 refs;
1171 int size;
1172 int type;
1173 int ret;
1175 leaf = path->nodes[0];
1176 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1177 item_offset = (unsigned long)iref - (unsigned long)ei;
1179 type = extent_ref_type(parent, owner);
1180 size = btrfs_extent_inline_ref_size(type);
1182 ret = btrfs_extend_item(root, path, size);
1183 BUG_ON(ret);
1185 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1186 refs = btrfs_extent_refs(leaf, ei);
1187 refs += refs_to_add;
1188 btrfs_set_extent_refs(leaf, ei, refs);
1190 ptr = (unsigned long)ei + item_offset;
1191 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1192 if (ptr < end - size)
1193 memmove_extent_buffer(leaf, ptr + size, ptr,
1194 end - size - ptr);
1196 iref = (struct btrfs_extent_inline_ref *)ptr;
1197 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1198 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1199 struct btrfs_extent_data_ref *dref;
1200 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1201 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1202 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1203 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1204 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1205 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1206 struct btrfs_shared_data_ref *sref;
1207 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1208 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1209 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1210 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1211 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1212 } else {
1213 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1215 btrfs_mark_buffer_dirty(leaf);
1216 return 0;
1219 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1220 struct btrfs_root *root,
1221 struct btrfs_path *path,
1222 struct btrfs_extent_inline_ref **ref_ret,
1223 u64 bytenr, u64 num_bytes, u64 parent,
1224 u64 root_objectid, u64 owner, u64 offset)
1226 int ret;
1228 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1229 bytenr, num_bytes, parent,
1230 root_objectid, owner, offset, 0);
1231 if (ret != -ENOENT)
1232 return ret;
1234 btrfs_release_path(path);
1235 *ref_ret = NULL;
1237 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1238 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1239 root_objectid);
1240 } else {
1241 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1242 root_objectid, owner, offset);
1244 return ret;
1247 static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1248 struct btrfs_root *root,
1249 struct btrfs_path *path,
1250 struct btrfs_extent_inline_ref *iref,
1251 int refs_to_mod)
1253 struct extent_buffer *leaf;
1254 struct btrfs_extent_item *ei;
1255 struct btrfs_extent_data_ref *dref = NULL;
1256 struct btrfs_shared_data_ref *sref = NULL;
1257 unsigned long ptr;
1258 unsigned long end;
1259 u32 item_size;
1260 int size;
1261 int type;
1262 int ret;
1263 u64 refs;
1265 leaf = path->nodes[0];
1266 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1267 refs = btrfs_extent_refs(leaf, ei);
1268 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1269 refs += refs_to_mod;
1270 btrfs_set_extent_refs(leaf, ei, refs);
1272 type = btrfs_extent_inline_ref_type(leaf, iref);
1274 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1275 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1276 refs = btrfs_extent_data_ref_count(leaf, dref);
1277 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1278 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1279 refs = btrfs_shared_data_ref_count(leaf, sref);
1280 } else {
1281 refs = 1;
1282 BUG_ON(refs_to_mod != -1);
1285 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1286 refs += refs_to_mod;
1288 if (refs > 0) {
1289 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1290 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1291 else
1292 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1293 } else {
1294 size = btrfs_extent_inline_ref_size(type);
1295 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1296 ptr = (unsigned long)iref;
1297 end = (unsigned long)ei + item_size;
1298 if (ptr + size < end)
1299 memmove_extent_buffer(leaf, ptr, ptr + size,
1300 end - ptr - size);
1301 item_size -= size;
1302 ret = btrfs_truncate_item(root, path, item_size, 1);
1303 BUG_ON(ret);
1305 btrfs_mark_buffer_dirty(leaf);
1306 return 0;
1309 static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1310 struct btrfs_root *root,
1311 struct btrfs_path *path,
1312 u64 bytenr, u64 num_bytes, u64 parent,
1313 u64 root_objectid, u64 owner,
1314 u64 offset, int refs_to_add)
1316 struct btrfs_extent_inline_ref *iref;
1317 int ret;
1319 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1320 bytenr, num_bytes, parent,
1321 root_objectid, owner, offset, 1);
1322 if (ret == 0) {
1323 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1324 ret = update_inline_extent_backref(trans, root, path, iref,
1325 refs_to_add);
1326 } else if (ret == -ENOENT) {
1327 ret = setup_inline_extent_backref(root, path, iref,
1328 parent, root_objectid,
1329 owner, offset, refs_to_add);
1331 return ret;
1334 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1335 struct btrfs_root *root,
1336 struct btrfs_path *path,
1337 u64 bytenr, u64 parent, u64 root_objectid,
1338 u64 owner, u64 offset, int refs_to_add)
1340 int ret;
1342 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
1343 ret = insert_extent_data_ref(trans, root, path, bytenr,
1344 parent, root_objectid,
1345 owner, offset, refs_to_add);
1346 } else {
1347 BUG_ON(refs_to_add != 1);
1348 ret = insert_tree_block_ref(trans, root, path, bytenr,
1349 parent, root_objectid);
1351 return ret;
1354 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1355 struct btrfs_root *root,
1356 struct btrfs_path *path,
1357 struct btrfs_extent_inline_ref *iref,
1358 int refs_to_drop, int is_data)
1360 int ret;
1362 BUG_ON(!is_data && refs_to_drop != 1);
1363 if (iref) {
1364 ret = update_inline_extent_backref(trans, root, path, iref,
1365 -refs_to_drop);
1366 } else if (is_data) {
1367 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1368 } else {
1369 ret = btrfs_del_item(trans, root, path);
1371 return ret;
1374 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root,
1376 u64 bytenr, u64 num_bytes, u64 parent,
1377 u64 root_objectid, u64 owner, u64 offset)
1379 struct btrfs_path *path;
1380 struct extent_buffer *leaf;
1381 struct btrfs_extent_item *item;
1382 u64 refs;
1383 int ret;
1384 int err = 0;
1386 path = btrfs_alloc_path();
1387 if (!path)
1388 return -ENOMEM;
1390 path->reada = READA_BACK;
1392 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1393 path, bytenr, num_bytes, parent,
1394 root_objectid, owner, offset, 1);
1395 if (ret == 0)
1396 goto out;
1398 if (ret != -EAGAIN) {
1399 err = ret;
1400 goto out;
1403 leaf = path->nodes[0];
1404 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1405 refs = btrfs_extent_refs(leaf, item);
1406 btrfs_set_extent_refs(leaf, item, refs + 1);
1408 btrfs_mark_buffer_dirty(leaf);
1409 btrfs_release_path(path);
1411 path->reada = READA_BACK;
1413 /* now insert the actual backref */
1414 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1415 path, bytenr, parent, root_objectid,
1416 owner, offset, 1);
1417 if (ret)
1418 err = ret;
1419 out:
1420 btrfs_free_path(path);
1421 finish_current_insert(trans);
1422 del_pending_extents(trans);
1423 BUG_ON(err);
1424 return err;
1427 int btrfs_extent_post_op(struct btrfs_trans_handle *trans)
1429 finish_current_insert(trans);
1430 del_pending_extents(trans);
1431 return 0;
1434 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1435 struct btrfs_fs_info *fs_info, u64 bytenr,
1436 u64 offset, int metadata, u64 *refs, u64 *flags)
1438 struct btrfs_path *path;
1439 int ret;
1440 struct btrfs_key key;
1441 struct extent_buffer *l;
1442 struct btrfs_extent_item *item;
1443 u32 item_size;
1444 u64 num_refs;
1445 u64 extent_flags;
1447 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
1448 offset = fs_info->nodesize;
1449 metadata = 0;
1452 path = btrfs_alloc_path();
1453 if (!path)
1454 return -ENOMEM;
1455 path->reada = READA_BACK;
1457 key.objectid = bytenr;
1458 key.offset = offset;
1459 if (metadata)
1460 key.type = BTRFS_METADATA_ITEM_KEY;
1461 else
1462 key.type = BTRFS_EXTENT_ITEM_KEY;
1464 again:
1465 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1466 if (ret < 0)
1467 goto out;
1470 * Deal with the fact that we may have mixed SKINNY and normal refs. If
1471 * we didn't find what we wanted check and see if we have a normal ref
1472 * right next to us, or re-search if we are on the edge of the leaf just
1473 * to make sure.
1475 if (ret > 0 && metadata) {
1476 if (path->slots[0]) {
1477 path->slots[0]--;
1478 btrfs_item_key_to_cpu(path->nodes[0], &key,
1479 path->slots[0]);
1480 if (key.objectid == bytenr &&
1481 key.type == BTRFS_EXTENT_ITEM_KEY &&
1482 key.offset == fs_info->nodesize)
1483 ret = 0;
1486 if (ret) {
1487 btrfs_release_path(path);
1488 key.type = BTRFS_EXTENT_ITEM_KEY;
1489 key.offset = fs_info->nodesize;
1490 metadata = 0;
1491 goto again;
1495 if (ret != 0) {
1496 ret = -EIO;
1497 goto out;
1500 l = path->nodes[0];
1501 item_size = btrfs_item_size_nr(l, path->slots[0]);
1502 if (item_size >= sizeof(*item)) {
1503 item = btrfs_item_ptr(l, path->slots[0],
1504 struct btrfs_extent_item);
1505 num_refs = btrfs_extent_refs(l, item);
1506 extent_flags = btrfs_extent_flags(l, item);
1507 } else {
1508 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1509 struct btrfs_extent_item_v0 *ei0;
1510 BUG_ON(item_size != sizeof(*ei0));
1511 ei0 = btrfs_item_ptr(l, path->slots[0],
1512 struct btrfs_extent_item_v0);
1513 num_refs = btrfs_extent_refs_v0(l, ei0);
1514 /* FIXME: this isn't correct for data */
1515 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1516 #else
1517 BUG();
1518 #endif
1520 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1521 if (refs)
1522 *refs = num_refs;
1523 if (flags)
1524 *flags = extent_flags;
1525 out:
1526 btrfs_free_path(path);
1527 return ret;
1530 int btrfs_set_block_flags(struct btrfs_trans_handle *trans, u64 bytenr,
1531 int level, u64 flags)
1533 struct btrfs_fs_info *fs_info = trans->fs_info;
1534 struct btrfs_path *path;
1535 int ret;
1536 struct btrfs_key key;
1537 struct extent_buffer *l;
1538 struct btrfs_extent_item *item;
1539 u32 item_size;
1540 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1542 path = btrfs_alloc_path();
1543 if (!path)
1544 return -ENOMEM;
1545 path->reada = READA_BACK;
1547 key.objectid = bytenr;
1548 if (skinny_metadata) {
1549 key.offset = level;
1550 key.type = BTRFS_METADATA_ITEM_KEY;
1551 } else {
1552 key.offset = fs_info->nodesize;
1553 key.type = BTRFS_EXTENT_ITEM_KEY;
1556 again:
1557 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1558 if (ret < 0)
1559 goto out;
1561 if (ret > 0 && skinny_metadata) {
1562 skinny_metadata = 0;
1563 if (path->slots[0]) {
1564 path->slots[0]--;
1565 btrfs_item_key_to_cpu(path->nodes[0], &key,
1566 path->slots[0]);
1567 if (key.objectid == bytenr &&
1568 key.offset == fs_info->nodesize &&
1569 key.type == BTRFS_EXTENT_ITEM_KEY)
1570 ret = 0;
1572 if (ret) {
1573 btrfs_release_path(path);
1574 key.offset = fs_info->nodesize;
1575 key.type = BTRFS_EXTENT_ITEM_KEY;
1576 goto again;
1580 if (ret != 0) {
1581 btrfs_print_leaf(path->nodes[0]);
1582 printk("failed to find block number %Lu\n",
1583 (unsigned long long)bytenr);
1584 BUG();
1586 l = path->nodes[0];
1587 item_size = btrfs_item_size_nr(l, path->slots[0]);
1588 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1589 if (item_size < sizeof(*item)) {
1590 ret = convert_extent_item_v0(trans, fs_info->extent_root, path,
1591 (u64)-1, 0);
1592 if (ret < 0)
1593 goto out;
1595 l = path->nodes[0];
1596 item_size = btrfs_item_size_nr(l, path->slots[0]);
1598 #endif
1599 BUG_ON(item_size < sizeof(*item));
1600 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1601 flags |= btrfs_extent_flags(l, item);
1602 btrfs_set_extent_flags(l, item, flags);
1603 out:
1604 btrfs_free_path(path);
1605 finish_current_insert(trans);
1606 del_pending_extents(trans);
1607 return ret;
1610 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1611 struct btrfs_root *root,
1612 struct extent_buffer *buf,
1613 int record_parent, int inc)
1615 u64 bytenr;
1616 u64 num_bytes;
1617 u64 parent;
1618 u64 ref_root;
1619 u32 nritems;
1620 struct btrfs_key key;
1621 struct btrfs_file_extent_item *fi;
1622 int i;
1623 int level;
1624 int ret = 0;
1625 int (*process_func)(struct btrfs_trans_handle *trans,
1626 struct btrfs_root *root,
1627 u64, u64, u64, u64, u64, u64);
1629 ref_root = btrfs_header_owner(buf);
1630 nritems = btrfs_header_nritems(buf);
1631 level = btrfs_header_level(buf);
1633 if (!root->ref_cows && level == 0)
1634 return 0;
1636 if (inc)
1637 process_func = btrfs_inc_extent_ref;
1638 else
1639 process_func = btrfs_free_extent;
1641 if (record_parent)
1642 parent = buf->start;
1643 else
1644 parent = 0;
1646 for (i = 0; i < nritems; i++) {
1647 cond_resched();
1648 if (level == 0) {
1649 btrfs_item_key_to_cpu(buf, &key, i);
1650 if (key.type != BTRFS_EXTENT_DATA_KEY)
1651 continue;
1652 fi = btrfs_item_ptr(buf, i,
1653 struct btrfs_file_extent_item);
1654 if (btrfs_file_extent_type(buf, fi) ==
1655 BTRFS_FILE_EXTENT_INLINE)
1656 continue;
1657 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1658 if (bytenr == 0)
1659 continue;
1661 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1662 key.offset -= btrfs_file_extent_offset(buf, fi);
1663 ret = process_func(trans, root, bytenr, num_bytes,
1664 parent, ref_root, key.objectid,
1665 key.offset);
1666 if (ret) {
1667 WARN_ON(1);
1668 goto fail;
1670 } else {
1671 bytenr = btrfs_node_blockptr(buf, i);
1672 num_bytes = root->fs_info->nodesize;
1673 ret = process_func(trans, root, bytenr, num_bytes,
1674 parent, ref_root, level - 1, 0);
1675 if (ret) {
1676 WARN_ON(1);
1677 goto fail;
1681 return 0;
1682 fail:
1683 WARN_ON(1);
1684 return ret;
1687 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1688 struct extent_buffer *buf, int record_parent)
1690 return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
1693 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1694 struct extent_buffer *buf, int record_parent)
1696 return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
1699 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1700 struct btrfs_path *path,
1701 struct btrfs_block_group_cache *cache)
1703 int ret;
1704 int pending_ret;
1705 struct btrfs_root *extent_root = trans->fs_info->extent_root;
1706 unsigned long bi;
1707 struct extent_buffer *leaf;
1709 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1710 if (ret < 0)
1711 goto fail;
1712 BUG_ON(ret);
1714 leaf = path->nodes[0];
1715 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1716 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1717 btrfs_mark_buffer_dirty(leaf);
1718 btrfs_release_path(path);
1719 fail:
1720 finish_current_insert(trans);
1721 pending_ret = del_pending_extents(trans);
1722 if (ret)
1723 return ret;
1724 if (pending_ret)
1725 return pending_ret;
1726 return 0;
1730 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1731 struct btrfs_root *root)
1733 struct extent_io_tree *block_group_cache;
1734 struct btrfs_block_group_cache *cache;
1735 int ret;
1736 struct btrfs_path *path;
1737 u64 last = 0;
1738 u64 start;
1739 u64 end;
1740 u64 ptr;
1742 block_group_cache = &root->fs_info->block_group_cache;
1743 path = btrfs_alloc_path();
1744 if (!path)
1745 return -ENOMEM;
1747 while(1) {
1748 ret = find_first_extent_bit(block_group_cache, last,
1749 &start, &end, BLOCK_GROUP_DIRTY);
1750 if (ret) {
1751 if (last == 0)
1752 break;
1753 last = 0;
1754 continue;
1757 last = end + 1;
1758 ret = get_state_private(block_group_cache, start, &ptr);
1759 BUG_ON(ret);
1761 clear_extent_bits(block_group_cache, start, end,
1762 BLOCK_GROUP_DIRTY);
1764 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1765 ret = write_one_cache_group(trans, path, cache);
1767 btrfs_free_path(path);
1768 return 0;
1771 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1772 u64 flags)
1774 struct btrfs_space_info *found;
1776 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
1778 list_for_each_entry(found, &info->space_info, list) {
1779 if (found->flags & flags)
1780 return found;
1782 return NULL;
1786 static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
1787 u64 total_bytes, u64 bytes_used,
1788 struct btrfs_space_info **space_info)
1790 struct btrfs_space_info *found;
1792 /* only support free block group which is empty */
1793 if (bytes_used)
1794 return -ENOTEMPTY;
1796 found = __find_space_info(fs_info, flags);
1797 if (!found)
1798 return -ENOENT;
1799 if (found->total_bytes < total_bytes) {
1800 fprintf(stderr,
1801 "WARNING: bad space info to free %llu only have %llu\n",
1802 total_bytes, found->total_bytes);
1803 return -EINVAL;
1805 found->total_bytes -= total_bytes;
1806 if (space_info)
1807 *space_info = found;
1808 return 0;
1811 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1812 u64 total_bytes, u64 bytes_used,
1813 struct btrfs_space_info **space_info)
1815 struct btrfs_space_info *found;
1817 found = __find_space_info(info, flags);
1818 if (found) {
1819 found->total_bytes += total_bytes;
1820 found->bytes_used += bytes_used;
1821 if (found->total_bytes < found->bytes_used) {
1822 fprintf(stderr, "warning, bad space info total_bytes "
1823 "%llu used %llu\n",
1824 (unsigned long long)found->total_bytes,
1825 (unsigned long long)found->bytes_used);
1827 *space_info = found;
1828 return 0;
1830 found = kmalloc(sizeof(*found), GFP_NOFS);
1831 if (!found)
1832 return -ENOMEM;
1834 list_add(&found->list, &info->space_info);
1835 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
1836 found->total_bytes = total_bytes;
1837 found->bytes_used = bytes_used;
1838 found->bytes_pinned = 0;
1839 found->full = 0;
1840 *space_info = found;
1841 return 0;
1845 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1847 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1848 BTRFS_BLOCK_GROUP_RAID1 |
1849 BTRFS_BLOCK_GROUP_RAID10 |
1850 BTRFS_BLOCK_GROUP_RAID5 |
1851 BTRFS_BLOCK_GROUP_RAID6 |
1852 BTRFS_BLOCK_GROUP_DUP);
1853 if (extra_flags) {
1854 if (flags & BTRFS_BLOCK_GROUP_DATA)
1855 fs_info->avail_data_alloc_bits |= extra_flags;
1856 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1857 fs_info->avail_metadata_alloc_bits |= extra_flags;
1858 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1859 fs_info->avail_system_alloc_bits |= extra_flags;
1863 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1864 struct btrfs_fs_info *fs_info, u64 alloc_bytes,
1865 u64 flags)
1867 struct btrfs_space_info *space_info;
1868 u64 thresh;
1869 u64 start;
1870 u64 num_bytes;
1871 int ret;
1873 space_info = __find_space_info(fs_info, flags);
1874 if (!space_info) {
1875 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
1876 BUG_ON(ret);
1878 BUG_ON(!space_info);
1880 if (space_info->full)
1881 return 0;
1883 thresh = div_factor(space_info->total_bytes, 7);
1884 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1885 thresh)
1886 return 0;
1889 * Avoid allocating given chunk type
1891 if (fs_info->avoid_meta_chunk_alloc &&
1892 (flags & BTRFS_BLOCK_GROUP_METADATA))
1893 return 0;
1894 if (fs_info->avoid_sys_chunk_alloc &&
1895 (flags & BTRFS_BLOCK_GROUP_SYSTEM))
1896 return 0;
1898 ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes,
1899 space_info->flags);
1900 if (ret == -ENOSPC) {
1901 space_info->full = 1;
1902 return 0;
1905 BUG_ON(ret);
1907 ret = btrfs_make_block_group(trans, fs_info, 0, space_info->flags,
1908 start, num_bytes);
1909 BUG_ON(ret);
1910 return 0;
1913 static int update_block_group(struct btrfs_fs_info *info, u64 bytenr,
1914 u64 num_bytes, int alloc, int mark_free)
1916 struct btrfs_block_group_cache *cache;
1917 u64 total = num_bytes;
1918 u64 old_val;
1919 u64 byte_in_group;
1920 u64 start;
1921 u64 end;
1923 /* block accounting for super block */
1924 old_val = btrfs_super_bytes_used(info->super_copy);
1925 if (alloc)
1926 old_val += num_bytes;
1927 else
1928 old_val -= num_bytes;
1929 btrfs_set_super_bytes_used(info->super_copy, old_val);
1931 while(total) {
1932 cache = btrfs_lookup_block_group(info, bytenr);
1933 if (!cache) {
1934 return -1;
1936 byte_in_group = bytenr - cache->key.objectid;
1937 WARN_ON(byte_in_group > cache->key.offset);
1938 start = cache->key.objectid;
1939 end = start + cache->key.offset - 1;
1940 set_extent_bits(&info->block_group_cache, start, end,
1941 BLOCK_GROUP_DIRTY);
1943 old_val = btrfs_block_group_used(&cache->item);
1944 num_bytes = min(total, cache->key.offset - byte_in_group);
1946 if (alloc) {
1947 old_val += num_bytes;
1948 cache->space_info->bytes_used += num_bytes;
1949 } else {
1950 old_val -= num_bytes;
1951 cache->space_info->bytes_used -= num_bytes;
1952 if (mark_free) {
1953 set_extent_dirty(&info->free_space_cache,
1954 bytenr, bytenr + num_bytes - 1);
1957 btrfs_set_block_group_used(&cache->item, old_val);
1958 total -= num_bytes;
1959 bytenr += num_bytes;
1961 return 0;
1964 static int update_pinned_extents(struct btrfs_fs_info *fs_info,
1965 u64 bytenr, u64 num, int pin)
1967 u64 len;
1968 struct btrfs_block_group_cache *cache;
1970 if (pin) {
1971 set_extent_dirty(&fs_info->pinned_extents,
1972 bytenr, bytenr + num - 1);
1973 } else {
1974 clear_extent_dirty(&fs_info->pinned_extents,
1975 bytenr, bytenr + num - 1);
1977 while (num > 0) {
1978 cache = btrfs_lookup_block_group(fs_info, bytenr);
1979 if (!cache) {
1980 len = min((u64)fs_info->sectorsize, num);
1981 goto next;
1983 WARN_ON(!cache);
1984 len = min(num, cache->key.offset -
1985 (bytenr - cache->key.objectid));
1986 if (pin) {
1987 cache->pinned += len;
1988 cache->space_info->bytes_pinned += len;
1989 fs_info->total_pinned += len;
1990 } else {
1991 cache->pinned -= len;
1992 cache->space_info->bytes_pinned -= len;
1993 fs_info->total_pinned -= len;
1995 next:
1996 bytenr += len;
1997 num -= len;
1999 return 0;
2002 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2003 struct btrfs_root *root,
2004 struct extent_io_tree *unpin)
2006 u64 start;
2007 u64 end;
2008 int ret;
2009 struct extent_io_tree *free_space_cache;
2010 free_space_cache = &root->fs_info->free_space_cache;
2012 while(1) {
2013 ret = find_first_extent_bit(unpin, 0, &start, &end,
2014 EXTENT_DIRTY);
2015 if (ret)
2016 break;
2017 update_pinned_extents(trans->fs_info, start, end + 1 - start,
2019 clear_extent_dirty(unpin, start, end);
2020 set_extent_dirty(free_space_cache, start, end);
2022 return 0;
2025 static int extent_root_pending_ops(struct btrfs_fs_info *info)
2027 u64 start;
2028 u64 end;
2029 int ret;
2031 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2032 &end, EXTENT_LOCKED);
2033 if (!ret) {
2034 ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
2035 EXTENT_LOCKED);
2037 return ret == 0;
2040 static int finish_current_insert(struct btrfs_trans_handle *trans)
2042 u64 start;
2043 u64 end;
2044 u64 priv;
2045 struct btrfs_fs_info *info = trans->fs_info;
2046 struct btrfs_root *extent_root = info->extent_root;
2047 struct pending_extent_op *extent_op;
2048 struct btrfs_key key;
2049 int ret;
2050 int skinny_metadata =
2051 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2053 while(1) {
2054 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2055 &end, EXTENT_LOCKED);
2056 if (ret)
2057 break;
2059 ret = get_state_private(&info->extent_ins, start, &priv);
2060 BUG_ON(ret);
2061 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2063 if (extent_op->type == PENDING_EXTENT_INSERT) {
2064 key.objectid = start;
2065 if (skinny_metadata) {
2066 key.offset = extent_op->level;
2067 key.type = BTRFS_METADATA_ITEM_KEY;
2068 } else {
2069 key.offset = extent_op->num_bytes;
2070 key.type = BTRFS_EXTENT_ITEM_KEY;
2073 ret = alloc_reserved_tree_block(trans,
2074 extent_root->root_key.objectid,
2075 trans->transid,
2076 extent_op->flags,
2077 &extent_op->key,
2078 extent_op->level, &key);
2079 BUG_ON(ret);
2080 } else {
2081 BUG_ON(1);
2084 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED);
2085 kfree(extent_op);
2087 return 0;
2090 static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 bytenr,
2091 u64 num_bytes, int is_data)
2093 int err = 0;
2094 struct extent_buffer *buf;
2096 if (is_data)
2097 goto pinit;
2099 buf = btrfs_find_tree_block(trans->fs_info, bytenr, num_bytes);
2100 if (!buf)
2101 goto pinit;
2103 /* we can reuse a block if it hasn't been written
2104 * and it is from this transaction. We can't
2105 * reuse anything from the tree log root because
2106 * it has tiny sub-transactions.
2108 if (btrfs_buffer_uptodate(buf, 0)) {
2109 u64 header_owner = btrfs_header_owner(buf);
2110 u64 header_transid = btrfs_header_generation(buf);
2111 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2112 header_transid == trans->transid &&
2113 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2114 clean_tree_block(buf);
2115 free_extent_buffer(buf);
2116 return 1;
2119 free_extent_buffer(buf);
2120 pinit:
2121 update_pinned_extents(trans->fs_info, bytenr, num_bytes, 1);
2123 BUG_ON(err < 0);
2124 return 0;
2127 void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2128 u64 bytenr, u64 num_bytes)
2130 update_pinned_extents(fs_info, bytenr, num_bytes, 1);
2133 void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
2134 u64 bytenr, u64 num_bytes)
2136 update_pinned_extents(fs_info, bytenr, num_bytes, 0);
2140 * remove an extent from the root, returns 0 on success
2142 static int __free_extent(struct btrfs_trans_handle *trans,
2143 u64 bytenr, u64 num_bytes, u64 parent,
2144 u64 root_objectid, u64 owner_objectid,
2145 u64 owner_offset, int refs_to_drop)
2148 struct btrfs_key key;
2149 struct btrfs_path *path;
2150 struct btrfs_root *extent_root = trans->fs_info->extent_root;
2151 struct extent_buffer *leaf;
2152 struct btrfs_extent_item *ei;
2153 struct btrfs_extent_inline_ref *iref;
2154 int ret;
2155 int is_data;
2156 int extent_slot = 0;
2157 int found_extent = 0;
2158 int num_to_del = 1;
2159 u32 item_size;
2160 u64 refs;
2161 int skinny_metadata =
2162 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2164 if (trans->fs_info->free_extent_hook) {
2165 trans->fs_info->free_extent_hook(trans->fs_info, bytenr, num_bytes,
2166 parent, root_objectid, owner_objectid,
2167 owner_offset, refs_to_drop);
2170 path = btrfs_alloc_path();
2171 if (!path)
2172 return -ENOMEM;
2174 path->reada = READA_BACK;
2176 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2177 if (is_data)
2178 skinny_metadata = 0;
2179 BUG_ON(!is_data && refs_to_drop != 1);
2181 ret = lookup_extent_backref(trans, extent_root, path, &iref,
2182 bytenr, num_bytes, parent,
2183 root_objectid, owner_objectid,
2184 owner_offset);
2185 if (ret == 0) {
2186 extent_slot = path->slots[0];
2187 while (extent_slot >= 0) {
2188 btrfs_item_key_to_cpu(path->nodes[0], &key,
2189 extent_slot);
2190 if (key.objectid != bytenr)
2191 break;
2192 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2193 key.offset == num_bytes) {
2194 found_extent = 1;
2195 break;
2197 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2198 key.offset == owner_objectid) {
2199 found_extent = 1;
2200 break;
2202 if (path->slots[0] - extent_slot > 5)
2203 break;
2204 extent_slot--;
2206 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2207 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
2208 if (found_extent && item_size < sizeof(*ei))
2209 found_extent = 0;
2210 #endif
2211 if (!found_extent) {
2212 BUG_ON(iref);
2213 ret = remove_extent_backref(trans, extent_root, path,
2214 NULL, refs_to_drop,
2215 is_data);
2216 BUG_ON(ret);
2217 btrfs_release_path(path);
2219 key.objectid = bytenr;
2221 if (skinny_metadata) {
2222 key.type = BTRFS_METADATA_ITEM_KEY;
2223 key.offset = owner_objectid;
2224 } else {
2225 key.type = BTRFS_EXTENT_ITEM_KEY;
2226 key.offset = num_bytes;
2229 ret = btrfs_search_slot(trans, extent_root,
2230 &key, path, -1, 1);
2231 if (ret > 0 && skinny_metadata && path->slots[0]) {
2232 path->slots[0]--;
2233 btrfs_item_key_to_cpu(path->nodes[0],
2234 &key,
2235 path->slots[0]);
2236 if (key.objectid == bytenr &&
2237 key.type == BTRFS_EXTENT_ITEM_KEY &&
2238 key.offset == num_bytes)
2239 ret = 0;
2242 if (ret > 0 && skinny_metadata) {
2243 skinny_metadata = 0;
2244 btrfs_release_path(path);
2245 key.type = BTRFS_EXTENT_ITEM_KEY;
2246 key.offset = num_bytes;
2247 ret = btrfs_search_slot(trans, extent_root,
2248 &key, path, -1, 1);
2251 if (ret) {
2252 printk(KERN_ERR "umm, got %d back from search"
2253 ", was looking for %llu\n", ret,
2254 (unsigned long long)bytenr);
2255 btrfs_print_leaf(path->nodes[0]);
2257 BUG_ON(ret);
2258 extent_slot = path->slots[0];
2260 } else {
2261 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2262 "parent %llu root %llu owner %llu offset %llu\n",
2263 (unsigned long long)bytenr,
2264 (unsigned long long)parent,
2265 (unsigned long long)root_objectid,
2266 (unsigned long long)owner_objectid,
2267 (unsigned long long)owner_offset);
2268 ret = -EIO;
2269 goto fail;
2272 leaf = path->nodes[0];
2273 item_size = btrfs_item_size_nr(leaf, extent_slot);
2274 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2275 if (item_size < sizeof(*ei)) {
2276 BUG_ON(found_extent || extent_slot != path->slots[0]);
2277 ret = convert_extent_item_v0(trans, extent_root, path,
2278 owner_objectid, 0);
2279 BUG_ON(ret < 0);
2281 btrfs_release_path(path);
2283 key.objectid = bytenr;
2284 key.type = BTRFS_EXTENT_ITEM_KEY;
2285 key.offset = num_bytes;
2287 ret = btrfs_search_slot(trans, extent_root, &key, path,
2288 -1, 1);
2289 if (ret) {
2290 printk(KERN_ERR "umm, got %d back from search"
2291 ", was looking for %llu\n", ret,
2292 (unsigned long long)bytenr);
2293 btrfs_print_leaf(path->nodes[0]);
2295 BUG_ON(ret);
2296 extent_slot = path->slots[0];
2297 leaf = path->nodes[0];
2298 item_size = btrfs_item_size_nr(leaf, extent_slot);
2300 #endif
2301 BUG_ON(item_size < sizeof(*ei));
2302 ei = btrfs_item_ptr(leaf, extent_slot,
2303 struct btrfs_extent_item);
2304 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
2305 key.type == BTRFS_EXTENT_ITEM_KEY) {
2306 struct btrfs_tree_block_info *bi;
2307 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2308 bi = (struct btrfs_tree_block_info *)(ei + 1);
2309 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2312 refs = btrfs_extent_refs(leaf, ei);
2313 BUG_ON(refs < refs_to_drop);
2314 refs -= refs_to_drop;
2316 if (refs > 0) {
2318 * In the case of inline back ref, reference count will
2319 * be updated by remove_extent_backref
2321 if (iref) {
2322 BUG_ON(!found_extent);
2323 } else {
2324 btrfs_set_extent_refs(leaf, ei, refs);
2325 btrfs_mark_buffer_dirty(leaf);
2327 if (found_extent) {
2328 ret = remove_extent_backref(trans, extent_root, path,
2329 iref, refs_to_drop,
2330 is_data);
2331 BUG_ON(ret);
2333 } else {
2334 int mark_free = 0;
2335 int pin = 1;
2337 if (found_extent) {
2338 BUG_ON(is_data && refs_to_drop !=
2339 extent_data_ref_count(path, iref));
2340 if (iref) {
2341 BUG_ON(path->slots[0] != extent_slot);
2342 } else {
2343 BUG_ON(path->slots[0] != extent_slot + 1);
2344 path->slots[0] = extent_slot;
2345 num_to_del = 2;
2349 if (pin) {
2350 ret = pin_down_bytes(trans, bytenr, num_bytes,
2351 is_data);
2352 if (ret > 0)
2353 mark_free = 1;
2354 BUG_ON(ret < 0);
2357 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2358 num_to_del);
2359 BUG_ON(ret);
2360 btrfs_release_path(path);
2362 if (is_data) {
2363 ret = btrfs_del_csums(trans, bytenr, num_bytes);
2364 BUG_ON(ret);
2367 update_block_group(trans->fs_info, bytenr, num_bytes, 0,
2368 mark_free);
2370 fail:
2371 btrfs_free_path(path);
2372 finish_current_insert(trans);
2373 return ret;
2377 * find all the blocks marked as pending in the radix tree and remove
2378 * them from the extent map
2380 static int del_pending_extents(struct btrfs_trans_handle *trans)
2382 int ret;
2383 int err = 0;
2384 u64 start;
2385 u64 end;
2386 u64 priv;
2387 struct extent_io_tree *pending_del;
2388 struct extent_io_tree *extent_ins;
2389 struct pending_extent_op *extent_op;
2390 struct btrfs_fs_info *fs_info = trans->fs_info;
2391 struct btrfs_root *extent_root = fs_info->extent_root;
2393 extent_ins = &extent_root->fs_info->extent_ins;
2394 pending_del = &extent_root->fs_info->pending_del;
2396 while(1) {
2397 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2398 EXTENT_LOCKED);
2399 if (ret)
2400 break;
2402 ret = get_state_private(pending_del, start, &priv);
2403 BUG_ON(ret);
2404 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2406 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED);
2408 if (!test_range_bit(extent_ins, start, end,
2409 EXTENT_LOCKED, 0)) {
2410 ret = __free_extent(trans, start, end + 1 - start, 0,
2411 extent_root->root_key.objectid,
2412 extent_op->level, 0, 1);
2413 kfree(extent_op);
2414 } else {
2415 kfree(extent_op);
2416 ret = get_state_private(extent_ins, start, &priv);
2417 BUG_ON(ret);
2418 extent_op = (struct pending_extent_op *)
2419 (unsigned long)priv;
2421 clear_extent_bits(extent_ins, start, end,
2422 EXTENT_LOCKED);
2424 if (extent_op->type == PENDING_BACKREF_UPDATE)
2425 BUG_ON(1);
2427 kfree(extent_op);
2429 if (ret)
2430 err = ret;
2432 return err;
2436 int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
2437 struct btrfs_root *root,
2438 struct extent_buffer *buf,
2439 u64 parent, int last_ref)
2441 return btrfs_free_extent(trans, root, buf->start, buf->len, parent,
2442 root->root_key.objectid,
2443 btrfs_header_level(buf), 0);
2447 * remove an extent from the root, returns 0 on success
2450 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2451 struct btrfs_root *root,
2452 u64 bytenr, u64 num_bytes, u64 parent,
2453 u64 root_objectid, u64 owner, u64 offset)
2455 struct btrfs_root *extent_root = root->fs_info->extent_root;
2456 int pending_ret;
2457 int ret;
2459 WARN_ON(num_bytes < root->fs_info->sectorsize);
2460 if (root == extent_root) {
2461 struct pending_extent_op *extent_op;
2463 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2464 BUG_ON(!extent_op);
2466 extent_op->type = PENDING_EXTENT_DELETE;
2467 extent_op->bytenr = bytenr;
2468 extent_op->num_bytes = num_bytes;
2469 extent_op->level = (int)owner;
2471 set_extent_bits(&root->fs_info->pending_del,
2472 bytenr, bytenr + num_bytes - 1,
2473 EXTENT_LOCKED);
2474 set_state_private(&root->fs_info->pending_del,
2475 bytenr, (unsigned long)extent_op);
2476 return 0;
2478 ret = __free_extent(trans, bytenr, num_bytes, parent,
2479 root_objectid, owner, offset, 1);
2480 pending_ret = del_pending_extents(trans);
2481 return ret ? ret : pending_ret;
2484 static u64 stripe_align(struct btrfs_root *root, u64 val)
2486 return round_up(val, (u64)root->fs_info->stripesize);
2490 * walks the btree of allocated extents and find a hole of a given size.
2491 * The key ins is changed to record the hole:
2492 * ins->objectid == block start
2493 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2494 * ins->offset == number of blocks
2495 * Any available blocks before search_start are skipped.
2497 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2498 struct btrfs_root *orig_root,
2499 u64 num_bytes, u64 empty_size,
2500 u64 search_start, u64 search_end,
2501 u64 hint_byte, struct btrfs_key *ins,
2502 u64 exclude_start, u64 exclude_nr,
2503 int data)
2505 int ret;
2506 u64 orig_search_start = search_start;
2507 struct btrfs_root * root = orig_root->fs_info->extent_root;
2508 struct btrfs_fs_info *info = root->fs_info;
2509 u64 total_needed = num_bytes;
2510 struct btrfs_block_group_cache *block_group;
2511 int full_scan = 0;
2512 int wrapped = 0;
2514 WARN_ON(num_bytes < info->sectorsize);
2515 ins->type = BTRFS_EXTENT_ITEM_KEY;
2517 search_start = stripe_align(root, search_start);
2519 if (hint_byte) {
2520 block_group = btrfs_lookup_first_block_group(info, hint_byte);
2521 if (!block_group)
2522 hint_byte = search_start;
2523 block_group = btrfs_find_block_group(root, block_group,
2524 hint_byte, data, 1);
2525 } else {
2526 block_group = btrfs_find_block_group(root,
2527 trans->block_group,
2528 search_start, data, 1);
2531 total_needed += empty_size;
2533 check_failed:
2534 search_start = stripe_align(root, search_start);
2535 if (!block_group) {
2536 block_group = btrfs_lookup_first_block_group(info,
2537 search_start);
2538 if (!block_group)
2539 block_group = btrfs_lookup_first_block_group(info,
2540 orig_search_start);
2542 ret = find_search_start(root, &block_group, &search_start,
2543 total_needed, data);
2544 if (ret)
2545 goto new_group;
2547 ins->objectid = search_start;
2548 ins->offset = num_bytes;
2550 if (ins->objectid + num_bytes >
2551 block_group->key.objectid + block_group->key.offset) {
2552 search_start = block_group->key.objectid +
2553 block_group->key.offset;
2554 goto new_group;
2557 if (test_range_bit(&info->extent_ins, ins->objectid,
2558 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2559 search_start = ins->objectid + num_bytes;
2560 goto new_group;
2563 if (test_range_bit(&info->pinned_extents, ins->objectid,
2564 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2565 search_start = ins->objectid + num_bytes;
2566 goto new_group;
2569 if (info->excluded_extents &&
2570 test_range_bit(info->excluded_extents, ins->objectid,
2571 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2572 search_start = ins->objectid + num_bytes;
2573 goto new_group;
2576 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2577 ins->objectid < exclude_start + exclude_nr)) {
2578 search_start = exclude_start + exclude_nr;
2579 goto new_group;
2582 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
2583 if (check_crossing_stripes(info, ins->objectid, num_bytes)) {
2584 struct btrfs_block_group_cache *bg_cache;
2585 u64 bg_offset;
2587 bg_cache = btrfs_lookup_block_group(info, ins->objectid);
2588 if (!bg_cache)
2589 goto no_bg_cache;
2590 bg_offset = ins->objectid - bg_cache->key.objectid;
2592 search_start = round_up(
2593 bg_offset + num_bytes, BTRFS_STRIPE_LEN) +
2594 bg_cache->key.objectid;
2595 goto new_group;
2597 no_bg_cache:
2598 block_group = btrfs_lookup_block_group(info, ins->objectid);
2599 if (block_group)
2600 trans->block_group = block_group;
2602 ins->offset = num_bytes;
2603 return 0;
2605 new_group:
2606 block_group = btrfs_lookup_first_block_group(info, search_start);
2607 if (!block_group) {
2608 search_start = orig_search_start;
2609 if (full_scan) {
2610 ret = -ENOSPC;
2611 goto error;
2613 if (wrapped) {
2614 if (!full_scan)
2615 total_needed -= empty_size;
2616 full_scan = 1;
2617 } else
2618 wrapped = 1;
2620 cond_resched();
2621 block_group = btrfs_find_block_group(root, block_group,
2622 search_start, data, 0);
2623 goto check_failed;
2625 error:
2626 return ret;
2629 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2630 struct btrfs_root *root,
2631 u64 num_bytes, u64 empty_size,
2632 u64 hint_byte, u64 search_end,
2633 struct btrfs_key *ins, bool is_data)
2635 int ret;
2636 u64 search_start = 0;
2637 u64 alloc_profile;
2638 u64 profile;
2639 struct btrfs_fs_info *info = root->fs_info;
2641 if (is_data) {
2642 alloc_profile = info->avail_data_alloc_bits &
2643 info->data_alloc_profile;
2644 profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2645 } else if (info->system_allocs == 1 || root == info->chunk_root) {
2646 alloc_profile = info->avail_system_alloc_bits &
2647 info->system_alloc_profile;
2648 profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2649 } else {
2650 alloc_profile = info->avail_metadata_alloc_bits &
2651 info->metadata_alloc_profile;
2652 profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2655 if (root->ref_cows) {
2656 if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) {
2657 ret = do_chunk_alloc(trans, info,
2658 num_bytes,
2659 BTRFS_BLOCK_GROUP_METADATA);
2660 BUG_ON(ret);
2662 ret = do_chunk_alloc(trans, info,
2663 num_bytes + SZ_2M, profile);
2664 BUG_ON(ret);
2667 WARN_ON(num_bytes < info->sectorsize);
2668 ret = find_free_extent(trans, root, num_bytes, empty_size,
2669 search_start, search_end, hint_byte, ins,
2670 trans->alloc_exclude_start,
2671 trans->alloc_exclude_nr, profile);
2672 if (ret < 0)
2673 return ret;
2674 clear_extent_dirty(&info->free_space_cache,
2675 ins->objectid, ins->objectid + ins->offset - 1);
2676 return ret;
2679 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
2680 u64 root_objectid, u64 generation,
2681 u64 flags, struct btrfs_disk_key *key,
2682 int level, struct btrfs_key *ins)
2684 int ret;
2685 struct btrfs_fs_info *fs_info = trans->fs_info;
2686 struct btrfs_extent_item *extent_item;
2687 struct btrfs_tree_block_info *block_info;
2688 struct btrfs_extent_inline_ref *iref;
2689 struct btrfs_path *path;
2690 struct extent_buffer *leaf;
2691 u32 size = sizeof(*extent_item) + sizeof(*iref);
2692 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2694 if (!skinny_metadata)
2695 size += sizeof(*block_info);
2697 path = btrfs_alloc_path();
2698 if (!path)
2699 return -ENOMEM;
2701 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
2702 ins, size);
2703 BUG_ON(ret);
2705 leaf = path->nodes[0];
2706 extent_item = btrfs_item_ptr(leaf, path->slots[0],
2707 struct btrfs_extent_item);
2708 btrfs_set_extent_refs(leaf, extent_item, 1);
2709 btrfs_set_extent_generation(leaf, extent_item, generation);
2710 btrfs_set_extent_flags(leaf, extent_item,
2711 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
2713 if (skinny_metadata) {
2714 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
2715 } else {
2716 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
2717 btrfs_set_tree_block_key(leaf, block_info, key);
2718 btrfs_set_tree_block_level(leaf, block_info, level);
2719 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
2722 btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
2723 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
2725 btrfs_mark_buffer_dirty(leaf);
2726 btrfs_free_path(path);
2728 ret = update_block_group(fs_info, ins->objectid, fs_info->nodesize,
2729 1, 0);
2730 return ret;
2733 static int alloc_tree_block(struct btrfs_trans_handle *trans,
2734 struct btrfs_root *root, u64 num_bytes,
2735 u64 root_objectid, u64 generation,
2736 u64 flags, struct btrfs_disk_key *key,
2737 int level, u64 empty_size, u64 hint_byte,
2738 u64 search_end, struct btrfs_key *ins)
2740 int ret;
2741 ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
2742 hint_byte, search_end, ins, 0);
2743 BUG_ON(ret);
2745 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
2746 struct pending_extent_op *extent_op;
2748 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2749 BUG_ON(!extent_op);
2751 extent_op->type = PENDING_EXTENT_INSERT;
2752 extent_op->bytenr = ins->objectid;
2753 extent_op->num_bytes = ins->offset;
2754 extent_op->level = level;
2755 extent_op->flags = flags;
2756 memcpy(&extent_op->key, key, sizeof(*key));
2758 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2759 ins->objectid + ins->offset - 1,
2760 EXTENT_LOCKED);
2761 set_state_private(&root->fs_info->extent_ins,
2762 ins->objectid, (unsigned long)extent_op);
2763 } else {
2764 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
2765 ins->offset = level;
2766 ins->type = BTRFS_METADATA_ITEM_KEY;
2768 ret = alloc_reserved_tree_block(trans, root_objectid,
2769 generation, flags,
2770 key, level, ins);
2771 finish_current_insert(trans);
2772 del_pending_extents(trans);
2774 return ret;
2778 * helper function to allocate a block for a given tree
2779 * returns the tree buffer or NULL.
2781 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2782 struct btrfs_root *root,
2783 u32 blocksize, u64 root_objectid,
2784 struct btrfs_disk_key *key, int level,
2785 u64 hint, u64 empty_size)
2787 struct btrfs_key ins;
2788 int ret;
2789 struct extent_buffer *buf;
2791 ret = alloc_tree_block(trans, root, blocksize, root_objectid,
2792 trans->transid, 0, key, level,
2793 empty_size, hint, (u64)-1, &ins);
2794 if (ret) {
2795 BUG_ON(ret > 0);
2796 return ERR_PTR(ret);
2799 buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid);
2800 if (!buf) {
2801 btrfs_free_extent(trans, root, ins.objectid, ins.offset,
2802 0, root->root_key.objectid, level, 0);
2803 BUG_ON(1);
2804 return ERR_PTR(-ENOMEM);
2806 btrfs_set_buffer_uptodate(buf);
2807 trans->blocks_used++;
2809 return buf;
2812 #if 0
2814 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2815 struct btrfs_root *root,
2816 struct extent_buffer *leaf)
2818 u64 leaf_owner;
2819 u64 leaf_generation;
2820 struct btrfs_key key;
2821 struct btrfs_file_extent_item *fi;
2822 int i;
2823 int nritems;
2824 int ret;
2826 BUG_ON(!btrfs_is_leaf(leaf));
2827 nritems = btrfs_header_nritems(leaf);
2828 leaf_owner = btrfs_header_owner(leaf);
2829 leaf_generation = btrfs_header_generation(leaf);
2831 for (i = 0; i < nritems; i++) {
2832 u64 disk_bytenr;
2834 btrfs_item_key_to_cpu(leaf, &key, i);
2835 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2836 continue;
2837 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2838 if (btrfs_file_extent_type(leaf, fi) ==
2839 BTRFS_FILE_EXTENT_INLINE)
2840 continue;
2842 * FIXME make sure to insert a trans record that
2843 * repeats the snapshot del on crash
2845 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2846 if (disk_bytenr == 0)
2847 continue;
2848 ret = btrfs_free_extent(trans, root, disk_bytenr,
2849 btrfs_file_extent_disk_num_bytes(leaf, fi),
2850 leaf->start, leaf_owner, leaf_generation,
2851 key.objectid, 0);
2852 BUG_ON(ret);
2854 return 0;
2857 static void noinline reada_walk_down(struct btrfs_root *root,
2858 struct extent_buffer *node,
2859 int slot)
2861 u64 bytenr;
2862 u64 last = 0;
2863 u32 nritems;
2864 u32 refs;
2865 u32 blocksize;
2866 int ret;
2867 int i;
2868 int level;
2869 int skipped = 0;
2871 nritems = btrfs_header_nritems(node);
2872 level = btrfs_header_level(node);
2873 if (level)
2874 return;
2876 for (i = slot; i < nritems && skipped < 32; i++) {
2877 bytenr = btrfs_node_blockptr(node, i);
2878 if (last && ((bytenr > last && bytenr - last > SZ_32K) ||
2879 (last > bytenr && last - bytenr > SZ_32K))) {
2880 skipped++;
2881 continue;
2883 blocksize = btrfs_level_size(root, level - 1);
2884 if (i != slot) {
2885 ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2886 blocksize, &refs);
2887 BUG_ON(ret);
2888 if (refs != 1) {
2889 skipped++;
2890 continue;
2893 mutex_unlock(&root->fs_info->fs_mutex);
2894 ret = readahead_tree_block(root, bytenr, blocksize,
2895 btrfs_node_ptr_generation(node, i));
2896 last = bytenr + blocksize;
2897 cond_resched();
2898 mutex_lock(&root->fs_info->fs_mutex);
2899 if (ret)
2900 break;
2905 * helper function for drop_snapshot, this walks down the tree dropping ref
2906 * counts as it goes.
2908 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2909 struct btrfs_root *root,
2910 struct btrfs_path *path, int *level)
2912 u64 root_owner;
2913 u64 root_gen;
2914 u64 bytenr;
2915 u64 ptr_gen;
2916 struct extent_buffer *next;
2917 struct extent_buffer *cur;
2918 struct extent_buffer *parent;
2919 u32 blocksize;
2920 int ret;
2921 u32 refs;
2923 WARN_ON(*level < 0);
2924 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2925 ret = btrfs_lookup_extent_ref(trans, root,
2926 path->nodes[*level]->start,
2927 path->nodes[*level]->len, &refs);
2928 BUG_ON(ret);
2929 if (refs > 1)
2930 goto out;
2933 * walk down to the last node level and free all the leaves
2935 while(*level >= 0) {
2936 WARN_ON(*level < 0);
2937 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2938 cur = path->nodes[*level];
2940 if (btrfs_header_level(cur) != *level)
2941 WARN_ON(1);
2943 if (path->slots[*level] >=
2944 btrfs_header_nritems(cur))
2945 break;
2946 if (*level == 0) {
2947 ret = drop_leaf_ref(trans, root, cur);
2948 BUG_ON(ret);
2949 break;
2951 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2952 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2953 blocksize = btrfs_level_size(root, *level - 1);
2954 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2955 &refs);
2956 BUG_ON(ret);
2957 if (refs != 1) {
2958 parent = path->nodes[*level];
2959 root_owner = btrfs_header_owner(parent);
2960 root_gen = btrfs_header_generation(parent);
2961 path->slots[*level]++;
2962 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
2963 parent->start, root_owner,
2964 root_gen, *level - 1, 0);
2965 BUG_ON(ret);
2966 continue;
2968 next = btrfs_find_tree_block(root, bytenr, blocksize);
2969 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2970 free_extent_buffer(next);
2971 reada_walk_down(root, cur, path->slots[*level]);
2972 mutex_unlock(&root->fs_info->fs_mutex);
2973 next = read_tree_block(root, bytenr, blocksize,
2974 ptr_gen);
2975 mutex_lock(&root->fs_info->fs_mutex);
2976 if (!extent_buffer_uptodate(next)) {
2977 if (IS_ERR(next))
2978 ret = PTR_ERR(next);
2979 else
2980 ret = -EIO;
2981 break;
2984 WARN_ON(*level <= 0);
2985 if (path->nodes[*level-1])
2986 free_extent_buffer(path->nodes[*level-1]);
2987 path->nodes[*level-1] = next;
2988 *level = btrfs_header_level(next);
2989 path->slots[*level] = 0;
2991 out:
2992 WARN_ON(*level < 0);
2993 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2995 if (path->nodes[*level] == root->node) {
2996 root_owner = root->root_key.objectid;
2997 parent = path->nodes[*level];
2998 } else {
2999 parent = path->nodes[*level + 1];
3000 root_owner = btrfs_header_owner(parent);
3003 root_gen = btrfs_header_generation(parent);
3004 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
3005 path->nodes[*level]->len, parent->start,
3006 root_owner, root_gen, *level, 0);
3007 free_extent_buffer(path->nodes[*level]);
3008 path->nodes[*level] = NULL;
3009 *level += 1;
3010 BUG_ON(ret);
3011 return 0;
3015 * helper for dropping snapshots. This walks back up the tree in the path
3016 * to find the first node higher up where we haven't yet gone through
3017 * all the slots
3019 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3020 struct btrfs_root *root,
3021 struct btrfs_path *path, int *level)
3023 u64 root_owner;
3024 u64 root_gen;
3025 struct btrfs_root_item *root_item = &root->root_item;
3026 int i;
3027 int slot;
3028 int ret;
3030 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3031 slot = path->slots[i];
3032 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3033 struct extent_buffer *node;
3034 struct btrfs_disk_key disk_key;
3035 node = path->nodes[i];
3036 path->slots[i]++;
3037 *level = i;
3038 WARN_ON(*level == 0);
3039 btrfs_node_key(node, &disk_key, path->slots[i]);
3040 memcpy(&root_item->drop_progress,
3041 &disk_key, sizeof(disk_key));
3042 root_item->drop_level = i;
3043 return 0;
3044 } else {
3045 struct extent_buffer *parent;
3046 if (path->nodes[*level] == root->node)
3047 parent = path->nodes[*level];
3048 else
3049 parent = path->nodes[*level + 1];
3051 root_owner = btrfs_header_owner(parent);
3052 root_gen = btrfs_header_generation(parent);
3053 ret = btrfs_free_extent(trans, root,
3054 path->nodes[*level]->start,
3055 path->nodes[*level]->len,
3056 parent->start, root_owner,
3057 root_gen, *level, 0);
3058 BUG_ON(ret);
3059 free_extent_buffer(path->nodes[*level]);
3060 path->nodes[*level] = NULL;
3061 *level = i + 1;
3064 return 1;
3067 #endif
3069 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3071 struct btrfs_space_info *sinfo;
3072 struct btrfs_block_group_cache *cache;
3073 u64 start;
3074 u64 end;
3075 u64 ptr;
3076 int ret;
3078 while(1) {
3079 ret = find_first_extent_bit(&info->block_group_cache, 0,
3080 &start, &end, (unsigned int)-1);
3081 if (ret)
3082 break;
3083 ret = get_state_private(&info->block_group_cache, start, &ptr);
3084 if (!ret) {
3085 cache = u64_to_ptr(ptr);
3086 if (cache->free_space_ctl) {
3087 btrfs_remove_free_space_cache(cache);
3088 kfree(cache->free_space_ctl);
3090 kfree(cache);
3092 clear_extent_bits(&info->block_group_cache, start,
3093 end, (unsigned int)-1);
3095 while(1) {
3096 ret = find_first_extent_bit(&info->free_space_cache, 0,
3097 &start, &end, EXTENT_DIRTY);
3098 if (ret)
3099 break;
3100 clear_extent_dirty(&info->free_space_cache, start, end);
3103 while (!list_empty(&info->space_info)) {
3104 sinfo = list_entry(info->space_info.next,
3105 struct btrfs_space_info, list);
3106 list_del_init(&sinfo->list);
3107 kfree(sinfo);
3109 return 0;
3112 static int find_first_block_group(struct btrfs_root *root,
3113 struct btrfs_path *path, struct btrfs_key *key)
3115 int ret;
3116 struct btrfs_key found_key;
3117 struct extent_buffer *leaf;
3118 int slot;
3120 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3121 if (ret < 0)
3122 return ret;
3123 while(1) {
3124 slot = path->slots[0];
3125 leaf = path->nodes[0];
3126 if (slot >= btrfs_header_nritems(leaf)) {
3127 ret = btrfs_next_leaf(root, path);
3128 if (ret == 0)
3129 continue;
3130 if (ret < 0)
3131 goto error;
3132 break;
3134 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3136 if (found_key.objectid >= key->objectid &&
3137 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
3138 return 0;
3139 path->slots[0]++;
3141 ret = -ENOENT;
3142 error:
3143 return ret;
3146 int btrfs_read_block_groups(struct btrfs_root *root)
3148 struct btrfs_path *path;
3149 int ret;
3150 int bit;
3151 struct btrfs_block_group_cache *cache;
3152 struct btrfs_fs_info *info = root->fs_info;
3153 struct btrfs_space_info *space_info;
3154 struct extent_io_tree *block_group_cache;
3155 struct btrfs_key key;
3156 struct btrfs_key found_key;
3157 struct extent_buffer *leaf;
3159 block_group_cache = &info->block_group_cache;
3161 root = info->extent_root;
3162 key.objectid = 0;
3163 key.offset = 0;
3164 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3165 path = btrfs_alloc_path();
3166 if (!path)
3167 return -ENOMEM;
3169 while(1) {
3170 ret = find_first_block_group(root, path, &key);
3171 if (ret > 0) {
3172 ret = 0;
3173 goto error;
3175 if (ret != 0) {
3176 goto error;
3178 leaf = path->nodes[0];
3179 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3181 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3182 if (!cache) {
3183 ret = -ENOMEM;
3184 goto error;
3187 read_extent_buffer(leaf, &cache->item,
3188 btrfs_item_ptr_offset(leaf, path->slots[0]),
3189 sizeof(cache->item));
3190 memcpy(&cache->key, &found_key, sizeof(found_key));
3191 cache->cached = 0;
3192 cache->pinned = 0;
3193 key.objectid = found_key.objectid + found_key.offset;
3194 if (found_key.offset == 0)
3195 key.objectid++;
3196 btrfs_release_path(path);
3199 * Skip 0 sized block group, don't insert them into block
3200 * group cache tree, as its length is 0, it won't get
3201 * freed at close_ctree() time.
3203 if (found_key.offset == 0) {
3204 free(cache);
3205 continue;
3208 cache->flags = btrfs_block_group_flags(&cache->item);
3209 bit = 0;
3210 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3211 bit = BLOCK_GROUP_DATA;
3212 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3213 bit = BLOCK_GROUP_SYSTEM;
3214 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3215 bit = BLOCK_GROUP_METADATA;
3217 set_avail_alloc_bits(info, cache->flags);
3218 if (btrfs_chunk_readonly(info, cache->key.objectid))
3219 cache->ro = 1;
3221 exclude_super_stripes(root, cache);
3223 ret = update_space_info(info, cache->flags, found_key.offset,
3224 btrfs_block_group_used(&cache->item),
3225 &space_info);
3226 BUG_ON(ret);
3227 cache->space_info = space_info;
3229 /* use EXTENT_LOCKED to prevent merging */
3230 set_extent_bits(block_group_cache, found_key.objectid,
3231 found_key.objectid + found_key.offset - 1,
3232 bit | EXTENT_LOCKED);
3233 set_state_private(block_group_cache, found_key.objectid,
3234 (unsigned long)cache);
3236 ret = 0;
3237 error:
3238 btrfs_free_path(path);
3239 return ret;
3242 struct btrfs_block_group_cache *
3243 btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
3244 u64 chunk_offset, u64 size)
3246 int ret;
3247 int bit = 0;
3248 struct btrfs_block_group_cache *cache;
3249 struct extent_io_tree *block_group_cache;
3251 block_group_cache = &fs_info->block_group_cache;
3253 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3254 BUG_ON(!cache);
3255 cache->key.objectid = chunk_offset;
3256 cache->key.offset = size;
3258 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3259 btrfs_set_block_group_used(&cache->item, bytes_used);
3260 btrfs_set_block_group_chunk_objectid(&cache->item,
3261 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
3262 cache->flags = type;
3263 btrfs_set_block_group_flags(&cache->item, type);
3265 exclude_super_stripes(fs_info->extent_root, cache);
3266 ret = update_space_info(fs_info, cache->flags, size, bytes_used,
3267 &cache->space_info);
3268 BUG_ON(ret);
3270 bit = block_group_state_bits(type);
3271 ret = set_extent_bits(block_group_cache, chunk_offset,
3272 chunk_offset + size - 1,
3273 bit | EXTENT_LOCKED);
3274 BUG_ON(ret);
3276 ret = set_state_private(block_group_cache, chunk_offset,
3277 (unsigned long)cache);
3278 BUG_ON(ret);
3279 set_avail_alloc_bits(fs_info, type);
3281 return cache;
3284 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3285 struct btrfs_fs_info *fs_info, u64 bytes_used,
3286 u64 type, u64 chunk_offset, u64 size)
3288 int ret;
3289 struct btrfs_root *extent_root = fs_info->extent_root;
3290 struct btrfs_block_group_cache *cache;
3292 cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset,
3293 size);
3294 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3295 sizeof(cache->item));
3296 BUG_ON(ret);
3298 ret = finish_current_insert(trans);
3299 BUG_ON(ret);
3300 ret = del_pending_extents(trans);
3301 BUG_ON(ret);
3303 return 0;
3307 * This is for converter use only.
3309 * In that case, we don't know where are free blocks located.
3310 * Therefore all block group cache entries must be setup properly
3311 * before doing any block allocation.
3313 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
3314 struct btrfs_fs_info *fs_info)
3316 u64 total_bytes;
3317 u64 cur_start;
3318 u64 group_type;
3319 u64 group_size;
3320 u64 group_align;
3321 u64 total_data = 0;
3322 u64 total_metadata = 0;
3323 u64 chunk_objectid;
3324 int ret;
3325 int bit;
3326 struct btrfs_root *extent_root = fs_info->extent_root;
3327 struct btrfs_block_group_cache *cache;
3328 struct extent_io_tree *block_group_cache;
3330 block_group_cache = &fs_info->block_group_cache;
3331 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3332 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
3333 group_align = 64 * fs_info->sectorsize;
3335 cur_start = 0;
3336 while (cur_start < total_bytes) {
3337 group_size = total_bytes / 12;
3338 group_size = min_t(u64, group_size, total_bytes - cur_start);
3339 if (cur_start == 0) {
3340 bit = BLOCK_GROUP_SYSTEM;
3341 group_type = BTRFS_BLOCK_GROUP_SYSTEM;
3342 group_size /= 4;
3343 group_size &= ~(group_align - 1);
3344 group_size = max_t(u64, group_size, SZ_8M);
3345 group_size = min_t(u64, group_size, SZ_32M);
3346 } else {
3347 group_size &= ~(group_align - 1);
3348 if (total_data >= total_metadata * 2) {
3349 group_type = BTRFS_BLOCK_GROUP_METADATA;
3350 group_size = min_t(u64, group_size, SZ_1G);
3351 total_metadata += group_size;
3352 } else {
3353 group_type = BTRFS_BLOCK_GROUP_DATA;
3354 group_size = min_t(u64, group_size,
3355 5ULL * SZ_1G);
3356 total_data += group_size;
3358 if ((total_bytes - cur_start) * 4 < group_size * 5)
3359 group_size = total_bytes - cur_start;
3362 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3363 BUG_ON(!cache);
3365 cache->key.objectid = cur_start;
3366 cache->key.offset = group_size;
3367 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3369 btrfs_set_block_group_used(&cache->item, 0);
3370 btrfs_set_block_group_chunk_objectid(&cache->item,
3371 chunk_objectid);
3372 btrfs_set_block_group_flags(&cache->item, group_type);
3374 cache->flags = group_type;
3376 ret = update_space_info(fs_info, group_type, group_size,
3377 0, &cache->space_info);
3378 BUG_ON(ret);
3379 set_avail_alloc_bits(fs_info, group_type);
3381 set_extent_bits(block_group_cache, cur_start,
3382 cur_start + group_size - 1,
3383 bit | EXTENT_LOCKED);
3384 set_state_private(block_group_cache, cur_start,
3385 (unsigned long)cache);
3386 cur_start += group_size;
3388 /* then insert all the items */
3389 cur_start = 0;
3390 while(cur_start < total_bytes) {
3391 cache = btrfs_lookup_block_group(fs_info, cur_start);
3392 BUG_ON(!cache);
3394 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3395 sizeof(cache->item));
3396 BUG_ON(ret);
3398 finish_current_insert(trans);
3399 ret = del_pending_extents(trans);
3400 BUG_ON(ret);
3402 cur_start = cache->key.objectid + cache->key.offset;
3404 return 0;
3407 int btrfs_update_block_group(struct btrfs_root *root,
3408 u64 bytenr, u64 num_bytes, int alloc,
3409 int mark_free)
3411 return update_block_group(root->fs_info, bytenr, num_bytes,
3412 alloc, mark_free);
3416 * Just remove a block group item in extent tree
3417 * Caller should ensure the block group is empty and all space is pinned.
3418 * Or new tree block/data may be allocated into it.
3420 static int free_block_group_item(struct btrfs_trans_handle *trans,
3421 struct btrfs_fs_info *fs_info,
3422 u64 bytenr, u64 len)
3424 struct btrfs_path *path;
3425 struct btrfs_key key;
3426 struct btrfs_root *root = fs_info->extent_root;
3427 int ret = 0;
3429 key.objectid = bytenr;
3430 key.offset = len;
3431 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3433 path = btrfs_alloc_path();
3434 if (!path)
3435 return -ENOMEM;
3437 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3438 if (ret > 0) {
3439 ret = -ENOENT;
3440 goto out;
3442 if (ret < 0)
3443 goto out;
3445 ret = btrfs_del_item(trans, root, path);
3446 out:
3447 btrfs_free_path(path);
3448 return ret;
3451 static int free_dev_extent_item(struct btrfs_trans_handle *trans,
3452 struct btrfs_fs_info *fs_info,
3453 u64 devid, u64 dev_offset)
3455 struct btrfs_root *root = fs_info->dev_root;
3456 struct btrfs_path *path;
3457 struct btrfs_key key;
3458 int ret;
3460 path = btrfs_alloc_path();
3461 if (!path)
3462 return -ENOMEM;
3464 key.objectid = devid;
3465 key.type = BTRFS_DEV_EXTENT_KEY;
3466 key.offset = dev_offset;
3468 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3469 if (ret < 0)
3470 goto out;
3471 if (ret > 0) {
3472 ret = -ENOENT;
3473 goto out;
3476 ret = btrfs_del_item(trans, root, path);
3477 out:
3478 btrfs_free_path(path);
3479 return ret;
3482 static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
3483 struct btrfs_fs_info *fs_info,
3484 u64 chunk_offset)
3486 struct btrfs_chunk *chunk = NULL;
3487 struct btrfs_root *root= fs_info->chunk_root;
3488 struct btrfs_path *path;
3489 struct btrfs_key key;
3490 u16 num_stripes;
3491 int i;
3492 int ret;
3494 path = btrfs_alloc_path();
3495 if (!path)
3496 return -ENOMEM;
3498 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3499 key.type = BTRFS_CHUNK_ITEM_KEY;
3500 key.offset = chunk_offset;
3502 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
3503 if (ret < 0)
3504 goto out;
3505 if (ret > 0) {
3506 ret = -ENOENT;
3507 goto out;
3509 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3510 struct btrfs_chunk);
3511 num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
3512 for (i = 0; i < num_stripes; i++) {
3513 ret = free_dev_extent_item(trans, fs_info,
3514 btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
3515 btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
3516 if (ret < 0)
3517 goto out;
3519 out:
3520 btrfs_free_path(path);
3521 return ret;
3524 static int free_system_chunk_item(struct btrfs_super_block *super,
3525 struct btrfs_key *key)
3527 struct btrfs_disk_key *disk_key;
3528 struct btrfs_key cpu_key;
3529 u32 array_size = btrfs_super_sys_array_size(super);
3530 char *ptr = (char *)super->sys_chunk_array;
3531 int cur = 0;
3532 int ret = -ENOENT;
3534 while (cur < btrfs_super_sys_array_size(super)) {
3535 struct btrfs_chunk *chunk;
3536 u32 num_stripes;
3537 u32 chunk_len;
3539 disk_key = (struct btrfs_disk_key *)(ptr + cur);
3540 btrfs_disk_key_to_cpu(&cpu_key, disk_key);
3541 if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
3542 /* just in case */
3543 ret = -EIO;
3544 goto out;
3547 chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
3548 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
3549 chunk_len = btrfs_chunk_item_size(num_stripes) +
3550 sizeof(*disk_key);
3552 if (key->objectid == cpu_key.objectid &&
3553 key->offset == cpu_key.offset &&
3554 key->type == cpu_key.type) {
3555 memmove(ptr + cur, ptr + cur + chunk_len,
3556 array_size - cur - chunk_len);
3557 array_size -= chunk_len;
3558 btrfs_set_super_sys_array_size(super, array_size);
3559 ret = 0;
3560 goto out;
3563 cur += chunk_len;
3565 out:
3566 return ret;
3569 static int free_chunk_item(struct btrfs_trans_handle *trans,
3570 struct btrfs_fs_info *fs_info,
3571 u64 bytenr)
3573 struct btrfs_path *path;
3574 struct btrfs_key key;
3575 struct btrfs_root *root = fs_info->chunk_root;
3576 struct btrfs_chunk *chunk;
3577 u64 chunk_type;
3578 int ret;
3580 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3581 key.offset = bytenr;
3582 key.type = BTRFS_CHUNK_ITEM_KEY;
3584 path = btrfs_alloc_path();
3585 if (!path)
3586 return -ENOMEM;
3588 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3589 if (ret > 0) {
3590 ret = -ENOENT;
3591 goto out;
3593 if (ret < 0)
3594 goto out;
3595 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3596 struct btrfs_chunk);
3597 chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
3599 ret = btrfs_del_item(trans, root, path);
3600 if (ret < 0)
3601 goto out;
3603 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3604 ret = free_system_chunk_item(fs_info->super_copy, &key);
3605 out:
3606 btrfs_free_path(path);
3607 return ret;
3610 static u64 get_dev_extent_len(struct map_lookup *map)
3612 int div;
3614 switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
3615 case 0: /* Single */
3616 case BTRFS_BLOCK_GROUP_DUP:
3617 case BTRFS_BLOCK_GROUP_RAID1:
3618 div = 1;
3619 break;
3620 case BTRFS_BLOCK_GROUP_RAID5:
3621 div = (map->num_stripes - 1);
3622 break;
3623 case BTRFS_BLOCK_GROUP_RAID6:
3624 div = (map->num_stripes - 2);
3625 break;
3626 case BTRFS_BLOCK_GROUP_RAID10:
3627 div = (map->num_stripes / map->sub_stripes);
3628 break;
3629 default:
3630 /* normally, read chunk security hook should handled it */
3631 BUG_ON(1);
3633 return map->ce.size / div;
3636 /* free block group/chunk related caches */
3637 static int free_block_group_cache(struct btrfs_trans_handle *trans,
3638 struct btrfs_fs_info *fs_info,
3639 u64 bytenr, u64 len)
3641 struct btrfs_block_group_cache *cache;
3642 struct cache_extent *ce;
3643 struct map_lookup *map;
3644 int ret;
3645 int i;
3646 u64 flags;
3648 /* Free block group cache first */
3649 cache = btrfs_lookup_block_group(fs_info, bytenr);
3650 if (!cache)
3651 return -ENOENT;
3652 flags = cache->flags;
3653 if (cache->free_space_ctl) {
3654 btrfs_remove_free_space_cache(cache);
3655 kfree(cache->free_space_ctl);
3657 clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len - 1,
3658 (unsigned int)-1);
3659 ret = free_space_info(fs_info, flags, len, 0, NULL);
3660 if (ret < 0)
3661 goto out;
3662 kfree(cache);
3664 /* Then free mapping info and dev usage info */
3665 ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
3666 if (!ce || ce->start != bytenr) {
3667 ret = -ENOENT;
3668 goto out;
3670 map = container_of(ce, struct map_lookup, ce);
3671 for (i = 0; i < map->num_stripes; i++) {
3672 struct btrfs_device *device;
3674 device = map->stripes[i].dev;
3675 device->bytes_used -= get_dev_extent_len(map);
3676 ret = btrfs_update_device(trans, device);
3677 if (ret < 0)
3678 goto out;
3680 remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
3681 free(map);
3682 out:
3683 return ret;
3686 int btrfs_free_block_group(struct btrfs_trans_handle *trans,
3687 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
3689 struct btrfs_root *extent_root = fs_info->extent_root;
3690 struct btrfs_path *path;
3691 struct btrfs_block_group_item *bgi;
3692 struct btrfs_key key;
3693 int ret = 0;
3695 path = btrfs_alloc_path();
3696 if (!path)
3697 return -ENOMEM;
3699 key.objectid = bytenr;
3700 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3701 key.offset = len;
3703 /* Double check the block group to ensure it's empty */
3704 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3705 if (ret > 0) {
3706 ret = -ENONET;
3707 goto out;
3709 if (ret < 0)
3710 goto out;
3712 bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3713 struct btrfs_block_group_item);
3714 if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
3715 fprintf(stderr,
3716 "WARNING: block group [%llu,%llu) is not empty\n",
3717 bytenr, bytenr + len);
3718 ret = -EINVAL;
3719 goto out;
3721 btrfs_release_path(path);
3724 * Now pin all space in the block group, to prevent further transaction
3725 * allocate space from it.
3726 * Every operation needs a transaction must be in the range.
3728 btrfs_pin_extent(fs_info, bytenr, len);
3730 /* delete block group item and chunk item */
3731 ret = free_block_group_item(trans, fs_info, bytenr, len);
3732 if (ret < 0) {
3733 fprintf(stderr,
3734 "failed to free block group item for [%llu,%llu)\n",
3735 bytenr, bytenr + len);
3736 btrfs_unpin_extent(fs_info, bytenr, len);
3737 goto out;
3740 ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
3741 if (ret < 0) {
3742 fprintf(stderr,
3743 "failed to dev extents belongs to [%llu,%llu)\n",
3744 bytenr, bytenr + len);
3745 btrfs_unpin_extent(fs_info, bytenr, len);
3746 goto out;
3748 ret = free_chunk_item(trans, fs_info, bytenr);
3749 if (ret < 0) {
3750 fprintf(stderr,
3751 "failed to free chunk for [%llu,%llu)\n",
3752 bytenr, bytenr + len);
3753 btrfs_unpin_extent(fs_info, bytenr, len);
3754 goto out;
3757 /* Now release the block_group_cache */
3758 ret = free_block_group_cache(trans, fs_info, bytenr, len);
3759 btrfs_unpin_extent(fs_info, bytenr, len);
3761 out:
3762 btrfs_free_path(path);
3763 return ret;
3767 * Fixup block accounting. The initial block accounting created by
3768 * make_block_groups isn't accuracy in this case.
3770 int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans)
3772 int ret = 0;
3773 int slot;
3774 u64 start = 0;
3775 u64 bytes_used = 0;
3776 struct btrfs_path path;
3777 struct btrfs_key key;
3778 struct extent_buffer *leaf;
3779 struct btrfs_block_group_cache *cache;
3780 struct btrfs_fs_info *fs_info = trans->fs_info;
3781 struct btrfs_root *root = fs_info->extent_root;
3783 while(extent_root_pending_ops(fs_info)) {
3784 ret = finish_current_insert(trans);
3785 if (ret)
3786 return ret;
3787 ret = del_pending_extents(trans);
3788 if (ret)
3789 return ret;
3792 while(1) {
3793 cache = btrfs_lookup_first_block_group(fs_info, start);
3794 if (!cache)
3795 break;
3796 start = cache->key.objectid + cache->key.offset;
3797 btrfs_set_block_group_used(&cache->item, 0);
3798 cache->space_info->bytes_used = 0;
3799 set_extent_bits(&root->fs_info->block_group_cache,
3800 cache->key.objectid,
3801 cache->key.objectid + cache->key.offset -1,
3802 BLOCK_GROUP_DIRTY);
3805 btrfs_init_path(&path);
3806 key.offset = 0;
3807 key.objectid = 0;
3808 key.type = BTRFS_EXTENT_ITEM_KEY;
3809 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3810 &key, &path, 0, 0);
3811 if (ret < 0)
3812 return ret;
3813 while(1) {
3814 leaf = path.nodes[0];
3815 slot = path.slots[0];
3816 if (slot >= btrfs_header_nritems(leaf)) {
3817 ret = btrfs_next_leaf(root, &path);
3818 if (ret < 0)
3819 return ret;
3820 if (ret > 0)
3821 break;
3822 leaf = path.nodes[0];
3823 slot = path.slots[0];
3825 btrfs_item_key_to_cpu(leaf, &key, slot);
3826 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3827 bytes_used += key.offset;
3828 ret = btrfs_update_block_group(root,
3829 key.objectid, key.offset, 1, 0);
3830 BUG_ON(ret);
3831 } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
3832 bytes_used += fs_info->nodesize;
3833 ret = btrfs_update_block_group(root,
3834 key.objectid, fs_info->nodesize, 1, 0);
3835 if (ret)
3836 goto out;
3838 path.slots[0]++;
3840 btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
3841 ret = 0;
3842 out:
3843 btrfs_release_path(&path);
3844 return ret;
3847 static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path,
3848 u64 *start, u64 *len)
3850 struct btrfs_key key;
3852 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3853 BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY ||
3854 key.type == BTRFS_METADATA_ITEM_KEY));
3855 *start = key.objectid;
3856 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3857 *len = key.offset;
3858 else
3859 *len = root->fs_info->nodesize;
3863 * Find first overlap extent for range [bytenr, bytenr + len)
3864 * Return 0 for found and point path to it.
3865 * Return >0 for not found.
3866 * Return <0 for err
3868 int btrfs_search_overlap_extent(struct btrfs_root *root,
3869 struct btrfs_path *path, u64 bytenr, u64 len)
3871 struct btrfs_key key;
3872 u64 cur_start;
3873 u64 cur_len;
3874 int ret;
3876 key.objectid = bytenr;
3877 key.type = BTRFS_EXTENT_DATA_KEY;
3878 key.offset = (u64)-1;
3880 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3881 if (ret < 0)
3882 return ret;
3883 BUG_ON(ret == 0);
3885 ret = btrfs_previous_extent_item(root, path, 0);
3886 if (ret < 0)
3887 return ret;
3888 /* no previous, check next extent */
3889 if (ret > 0)
3890 goto next;
3891 __get_extent_size(root, path, &cur_start, &cur_len);
3892 /* Tail overlap */
3893 if (cur_start + cur_len > bytenr)
3894 return 1;
3896 next:
3897 ret = btrfs_next_extent_item(root, path, bytenr + len);
3898 if (ret < 0)
3899 return ret;
3900 /* No next, prev already checked, no overlap */
3901 if (ret > 0)
3902 return 0;
3903 __get_extent_size(root, path, &cur_start, &cur_len);
3904 /* head overlap*/
3905 if (cur_start < bytenr + len)
3906 return 1;
3907 return 0;
3910 static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
3911 struct btrfs_root *root, u64 objectid,
3912 struct btrfs_inode_item *inode,
3913 u64 file_pos, u64 disk_bytenr,
3914 u64 *ret_num_bytes)
3916 int ret;
3917 struct btrfs_fs_info *info = root->fs_info;
3918 struct btrfs_root *extent_root = info->extent_root;
3919 struct extent_buffer *leaf;
3920 struct btrfs_file_extent_item *fi;
3921 struct btrfs_key ins_key;
3922 struct btrfs_path *path;
3923 struct btrfs_extent_item *ei;
3924 u64 nbytes;
3925 u64 extent_num_bytes;
3926 u64 extent_bytenr;
3927 u64 extent_offset;
3928 u64 num_bytes = *ret_num_bytes;
3931 * All supported file system should not use its 0 extent.
3932 * As it's for hole
3934 * And hole extent has no size limit, no need to loop.
3936 if (disk_bytenr == 0) {
3937 ret = btrfs_insert_file_extent(trans, root, objectid,
3938 file_pos, disk_bytenr,
3939 num_bytes, num_bytes);
3940 return ret;
3942 num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE);
3944 path = btrfs_alloc_path();
3945 if (!path)
3946 return -ENOMEM;
3948 /* First to check extent overlap */
3949 ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr,
3950 num_bytes);
3951 if (ret < 0)
3952 goto fail;
3953 if (ret > 0) {
3954 /* Found overlap */
3955 u64 cur_start;
3956 u64 cur_len;
3958 __get_extent_size(extent_root, path, &cur_start, &cur_len);
3960 * For convert case, this extent should be a subset of
3961 * existing one.
3963 BUG_ON(disk_bytenr < cur_start);
3965 extent_bytenr = cur_start;
3966 extent_num_bytes = cur_len;
3967 extent_offset = disk_bytenr - extent_bytenr;
3968 } else {
3969 /* No overlap, create new extent */
3970 btrfs_release_path(path);
3971 ins_key.objectid = disk_bytenr;
3972 ins_key.offset = num_bytes;
3973 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
3975 ret = btrfs_insert_empty_item(trans, extent_root, path,
3976 &ins_key, sizeof(*ei));
3977 if (ret == 0) {
3978 leaf = path->nodes[0];
3979 ei = btrfs_item_ptr(leaf, path->slots[0],
3980 struct btrfs_extent_item);
3982 btrfs_set_extent_refs(leaf, ei, 0);
3983 btrfs_set_extent_generation(leaf, ei, 0);
3984 btrfs_set_extent_flags(leaf, ei,
3985 BTRFS_EXTENT_FLAG_DATA);
3986 btrfs_mark_buffer_dirty(leaf);
3988 ret = btrfs_update_block_group(root, disk_bytenr,
3989 num_bytes, 1, 0);
3990 if (ret)
3991 goto fail;
3992 } else if (ret != -EEXIST) {
3993 goto fail;
3995 btrfs_extent_post_op(trans);
3996 extent_bytenr = disk_bytenr;
3997 extent_num_bytes = num_bytes;
3998 extent_offset = 0;
4000 btrfs_release_path(path);
4001 ins_key.objectid = objectid;
4002 ins_key.offset = file_pos;
4003 ins_key.type = BTRFS_EXTENT_DATA_KEY;
4004 ret = btrfs_insert_empty_item(trans, root, path, &ins_key,
4005 sizeof(*fi));
4006 if (ret)
4007 goto fail;
4008 leaf = path->nodes[0];
4009 fi = btrfs_item_ptr(leaf, path->slots[0],
4010 struct btrfs_file_extent_item);
4011 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4012 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
4013 btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr);
4014 btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes);
4015 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
4016 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
4017 btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes);
4018 btrfs_set_file_extent_compression(leaf, fi, 0);
4019 btrfs_set_file_extent_encryption(leaf, fi, 0);
4020 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
4021 btrfs_mark_buffer_dirty(leaf);
4023 nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
4024 btrfs_set_stack_inode_nbytes(inode, nbytes);
4025 btrfs_release_path(path);
4027 ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes,
4028 0, root->root_key.objectid, objectid,
4029 file_pos - extent_offset);
4030 if (ret)
4031 goto fail;
4032 ret = 0;
4033 *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes);
4034 fail:
4035 btrfs_free_path(path);
4036 return ret;
4040 * Record a file extent. Do all the required works, such as inserting
4041 * file extent item, inserting extent item and backref item into extent
4042 * tree and updating block accounting.
4044 int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
4045 struct btrfs_root *root, u64 objectid,
4046 struct btrfs_inode_item *inode,
4047 u64 file_pos, u64 disk_bytenr,
4048 u64 num_bytes)
4050 u64 cur_disk_bytenr = disk_bytenr;
4051 u64 cur_file_pos = file_pos;
4052 u64 cur_num_bytes = num_bytes;
4053 int ret = 0;
4055 while (num_bytes > 0) {
4056 ret = __btrfs_record_file_extent(trans, root, objectid,
4057 inode, cur_file_pos,
4058 cur_disk_bytenr,
4059 &cur_num_bytes);
4060 if (ret < 0)
4061 break;
4062 cur_disk_bytenr += cur_num_bytes;
4063 cur_file_pos += cur_num_bytes;
4064 num_bytes -= cur_num_bytes;
4066 return ret;
4070 static int add_excluded_extent(struct btrfs_root *root,
4071 u64 start, u64 num_bytes)
4073 u64 end = start + num_bytes - 1;
4074 set_extent_bits(&root->fs_info->pinned_extents,
4075 start, end, EXTENT_UPTODATE);
4076 return 0;
4079 void free_excluded_extents(struct btrfs_root *root,
4080 struct btrfs_block_group_cache *cache)
4082 u64 start, end;
4084 start = cache->key.objectid;
4085 end = start + cache->key.offset - 1;
4087 clear_extent_bits(&root->fs_info->pinned_extents,
4088 start, end, EXTENT_UPTODATE);
4091 int exclude_super_stripes(struct btrfs_root *root,
4092 struct btrfs_block_group_cache *cache)
4094 u64 bytenr;
4095 u64 *logical;
4096 int stripe_len;
4097 int i, nr, ret;
4099 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
4100 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
4101 cache->bytes_super += stripe_len;
4102 ret = add_excluded_extent(root, cache->key.objectid,
4103 stripe_len);
4104 if (ret)
4105 return ret;
4108 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
4109 bytenr = btrfs_sb_offset(i);
4110 ret = btrfs_rmap_block(root->fs_info,
4111 cache->key.objectid, bytenr,
4112 &logical, &nr, &stripe_len);
4113 if (ret)
4114 return ret;
4116 while (nr--) {
4117 u64 start, len;
4119 if (logical[nr] > cache->key.objectid +
4120 cache->key.offset)
4121 continue;
4123 if (logical[nr] + stripe_len <= cache->key.objectid)
4124 continue;
4126 start = logical[nr];
4127 if (start < cache->key.objectid) {
4128 start = cache->key.objectid;
4129 len = (logical[nr] + stripe_len) - start;
4130 } else {
4131 len = min_t(u64, stripe_len,
4132 cache->key.objectid +
4133 cache->key.offset - start);
4136 cache->bytes_super += len;
4137 ret = add_excluded_extent(root, start, len);
4138 if (ret) {
4139 kfree(logical);
4140 return ret;
4144 kfree(logical);
4146 return 0;
4149 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
4150 struct btrfs_fs_info *info, u64 start, u64 end)
4152 u64 extent_start, extent_end, size, total_added = 0;
4153 int ret;
4155 while (start < end) {
4156 ret = find_first_extent_bit(&info->pinned_extents, start,
4157 &extent_start, &extent_end,
4158 EXTENT_DIRTY | EXTENT_UPTODATE);
4159 if (ret)
4160 break;
4162 if (extent_start <= start) {
4163 start = extent_end + 1;
4164 } else if (extent_start > start && extent_start < end) {
4165 size = extent_start - start;
4166 total_added += size;
4167 ret = btrfs_add_free_space(block_group->free_space_ctl,
4168 start, size);
4169 BUG_ON(ret); /* -ENOMEM or logic error */
4170 start = extent_end + 1;
4171 } else {
4172 break;
4176 if (start < end) {
4177 size = end - start;
4178 total_added += size;
4179 ret = btrfs_add_free_space(block_group->free_space_ctl, start,
4180 size);
4181 BUG_ON(ret); /* -ENOMEM or logic error */
4184 return total_added;