btrfs-progs: use libbtrfsutil for subvol delete
[btrfs-progs-unstable/devel.git] / extent-tree.c
blobe2ae74a7fe66bfbecb64bf27bf7d6994666135fa
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <stdint.h>
22 #include <math.h>
23 #include "kerncompat.h"
24 #include "radix-tree.h"
25 #include "ctree.h"
26 #include "disk-io.h"
27 #include "print-tree.h"
28 #include "transaction.h"
29 #include "crc32c.h"
30 #include "volumes.h"
31 #include "free-space-cache.h"
32 #include "utils.h"
34 #define PENDING_EXTENT_INSERT 0
35 #define PENDING_EXTENT_DELETE 1
36 #define PENDING_BACKREF_UPDATE 2
38 struct pending_extent_op {
39 int type;
40 u64 bytenr;
41 u64 num_bytes;
42 u64 flags;
43 struct btrfs_disk_key key;
44 int level;
47 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root,
49 u64 root_objectid, u64 generation,
50 u64 flags, struct btrfs_disk_key *key,
51 int level, struct btrfs_key *ins);
52 static int __free_extent(struct btrfs_trans_handle *trans,
53 struct btrfs_root *root,
54 u64 bytenr, u64 num_bytes, u64 parent,
55 u64 root_objectid, u64 owner_objectid,
56 u64 owner_offset, int refs_to_drop);
57 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
58 btrfs_root *extent_root);
59 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
60 btrfs_root *extent_root);
61 static struct btrfs_block_group_cache *
62 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
63 *hint, u64 search_start, int data, int owner);
65 static int remove_sb_from_cache(struct btrfs_root *root,
66 struct btrfs_block_group_cache *cache)
68 u64 bytenr;
69 u64 *logical;
70 int stripe_len;
71 int i, nr, ret;
72 struct btrfs_fs_info *fs_info = root->fs_info;
73 struct extent_io_tree *free_space_cache;
75 free_space_cache = &fs_info->free_space_cache;
76 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
77 bytenr = btrfs_sb_offset(i);
78 ret = btrfs_rmap_block(fs_info, cache->key.objectid, bytenr, 0,
79 &logical, &nr, &stripe_len);
80 BUG_ON(ret);
81 while (nr--) {
82 clear_extent_dirty(free_space_cache, logical[nr],
83 logical[nr] + stripe_len - 1);
85 kfree(logical);
87 return 0;
90 static int cache_block_group(struct btrfs_root *root,
91 struct btrfs_block_group_cache *block_group)
93 struct btrfs_path *path;
94 int ret;
95 struct btrfs_key key;
96 struct extent_buffer *leaf;
97 struct extent_io_tree *free_space_cache;
98 int slot;
99 u64 last;
100 u64 hole_size;
102 if (!block_group)
103 return 0;
105 root = root->fs_info->extent_root;
106 free_space_cache = &root->fs_info->free_space_cache;
108 if (block_group->cached)
109 return 0;
111 path = btrfs_alloc_path();
112 if (!path)
113 return -ENOMEM;
115 path->reada = 2;
116 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
117 key.objectid = last;
118 key.offset = 0;
119 key.type = 0;
121 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
122 if (ret < 0)
123 goto err;
125 while(1) {
126 leaf = path->nodes[0];
127 slot = path->slots[0];
128 if (slot >= btrfs_header_nritems(leaf)) {
129 ret = btrfs_next_leaf(root, path);
130 if (ret < 0)
131 goto err;
132 if (ret == 0) {
133 continue;
134 } else {
135 break;
138 btrfs_item_key_to_cpu(leaf, &key, slot);
139 if (key.objectid < block_group->key.objectid) {
140 goto next;
142 if (key.objectid >= block_group->key.objectid +
143 block_group->key.offset) {
144 break;
147 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
148 key.type == BTRFS_METADATA_ITEM_KEY) {
149 if (key.objectid > last) {
150 hole_size = key.objectid - last;
151 set_extent_dirty(free_space_cache, last,
152 last + hole_size - 1);
154 if (key.type == BTRFS_METADATA_ITEM_KEY)
155 last = key.objectid + root->fs_info->nodesize;
156 else
157 last = key.objectid + key.offset;
159 next:
160 path->slots[0]++;
163 if (block_group->key.objectid +
164 block_group->key.offset > last) {
165 hole_size = block_group->key.objectid +
166 block_group->key.offset - last;
167 set_extent_dirty(free_space_cache, last, last + hole_size - 1);
169 remove_sb_from_cache(root, block_group);
170 block_group->cached = 1;
171 err:
172 btrfs_free_path(path);
173 return 0;
176 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
177 btrfs_fs_info *info,
178 u64 bytenr)
180 struct extent_io_tree *block_group_cache;
181 struct btrfs_block_group_cache *block_group = NULL;
182 u64 ptr;
183 u64 start;
184 u64 end;
185 int ret;
187 bytenr = max_t(u64, bytenr,
188 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
189 block_group_cache = &info->block_group_cache;
190 ret = find_first_extent_bit(block_group_cache,
191 bytenr, &start, &end,
192 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
193 BLOCK_GROUP_SYSTEM);
194 if (ret) {
195 return NULL;
197 ret = get_state_private(block_group_cache, start, &ptr);
198 if (ret)
199 return NULL;
201 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
202 return block_group;
205 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
206 btrfs_fs_info *info,
207 u64 bytenr)
209 struct extent_io_tree *block_group_cache;
210 struct btrfs_block_group_cache *block_group = NULL;
211 u64 ptr;
212 u64 start;
213 u64 end;
214 int ret;
216 block_group_cache = &info->block_group_cache;
217 ret = find_first_extent_bit(block_group_cache,
218 bytenr, &start, &end,
219 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
220 BLOCK_GROUP_SYSTEM);
221 if (ret) {
222 return NULL;
224 ret = get_state_private(block_group_cache, start, &ptr);
225 if (ret)
226 return NULL;
228 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
229 if (block_group->key.objectid <= bytenr && bytenr <
230 block_group->key.objectid + block_group->key.offset)
231 return block_group;
232 return NULL;
235 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
237 return (cache->flags & bits) == bits;
240 static int noinline find_search_start(struct btrfs_root *root,
241 struct btrfs_block_group_cache **cache_ret,
242 u64 *start_ret, int num, int data)
244 int ret;
245 struct btrfs_block_group_cache *cache = *cache_ret;
246 u64 last = *start_ret;
247 u64 start = 0;
248 u64 end = 0;
249 u64 search_start = *start_ret;
250 int wrapped = 0;
252 if (!cache)
253 goto out;
254 again:
255 ret = cache_block_group(root, cache);
256 if (ret)
257 goto out;
259 last = max(search_start, cache->key.objectid);
260 if (cache->ro || !block_group_bits(cache, data))
261 goto new_group;
263 while(1) {
264 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
265 last, &start, &end, EXTENT_DIRTY);
266 if (ret) {
267 goto new_group;
270 start = max(last, start);
271 last = end + 1;
272 if (last - start < num) {
273 continue;
275 if (start + num > cache->key.objectid + cache->key.offset) {
276 goto new_group;
278 *start_ret = start;
279 return 0;
281 out:
282 *start_ret = last;
283 cache = btrfs_lookup_block_group(root->fs_info, search_start);
284 if (!cache) {
285 printk("Unable to find block group for %llu\n",
286 (unsigned long long)search_start);
287 return -ENOENT;
289 return -ENOSPC;
291 new_group:
292 last = cache->key.objectid + cache->key.offset;
293 wrapped:
294 cache = btrfs_lookup_first_block_group(root->fs_info, last);
295 if (!cache) {
296 if (!wrapped) {
297 wrapped = 1;
298 last = search_start;
299 goto wrapped;
301 goto out;
303 *cache_ret = cache;
304 goto again;
307 static int block_group_state_bits(u64 flags)
309 int bits = 0;
310 if (flags & BTRFS_BLOCK_GROUP_DATA)
311 bits |= BLOCK_GROUP_DATA;
312 if (flags & BTRFS_BLOCK_GROUP_METADATA)
313 bits |= BLOCK_GROUP_METADATA;
314 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
315 bits |= BLOCK_GROUP_SYSTEM;
316 return bits;
319 static struct btrfs_block_group_cache *
320 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
321 *hint, u64 search_start, int data, int owner)
323 struct btrfs_block_group_cache *cache;
324 struct extent_io_tree *block_group_cache;
325 struct btrfs_block_group_cache *found_group = NULL;
326 struct btrfs_fs_info *info = root->fs_info;
327 u64 used;
328 u64 last = 0;
329 u64 hint_last;
330 u64 start;
331 u64 end;
332 u64 free_check;
333 u64 ptr;
334 int bit;
335 int ret;
336 int full_search = 0;
337 int factor = 10;
339 block_group_cache = &info->block_group_cache;
341 if (!owner)
342 factor = 10;
344 bit = block_group_state_bits(data);
346 if (search_start) {
347 struct btrfs_block_group_cache *shint;
348 shint = btrfs_lookup_block_group(info, search_start);
349 if (shint && !shint->ro && block_group_bits(shint, data)) {
350 used = btrfs_block_group_used(&shint->item);
351 if (used + shint->pinned <
352 div_factor(shint->key.offset, factor)) {
353 return shint;
357 if (hint && !hint->ro && block_group_bits(hint, data)) {
358 used = btrfs_block_group_used(&hint->item);
359 if (used + hint->pinned <
360 div_factor(hint->key.offset, factor)) {
361 return hint;
363 last = hint->key.objectid + hint->key.offset;
364 hint_last = last;
365 } else {
366 if (hint)
367 hint_last = max(hint->key.objectid, search_start);
368 else
369 hint_last = search_start;
371 last = hint_last;
373 again:
374 while(1) {
375 ret = find_first_extent_bit(block_group_cache, last,
376 &start, &end, bit);
377 if (ret)
378 break;
380 ret = get_state_private(block_group_cache, start, &ptr);
381 if (ret)
382 break;
384 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
385 last = cache->key.objectid + cache->key.offset;
386 used = btrfs_block_group_used(&cache->item);
388 if (!cache->ro && block_group_bits(cache, data)) {
389 if (full_search)
390 free_check = cache->key.offset;
391 else
392 free_check = div_factor(cache->key.offset,
393 factor);
395 if (used + cache->pinned < free_check) {
396 found_group = cache;
397 goto found;
400 cond_resched();
402 if (!full_search) {
403 last = search_start;
404 full_search = 1;
405 goto again;
407 found:
408 return found_group;
412 * Back reference rules. Back refs have three main goals:
414 * 1) differentiate between all holders of references to an extent so that
415 * when a reference is dropped we can make sure it was a valid reference
416 * before freeing the extent.
418 * 2) Provide enough information to quickly find the holders of an extent
419 * if we notice a given block is corrupted or bad.
421 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
422 * maintenance. This is actually the same as #2, but with a slightly
423 * different use case.
425 * There are two kinds of back refs. The implicit back refs is optimized
426 * for pointers in non-shared tree blocks. For a given pointer in a block,
427 * back refs of this kind provide information about the block's owner tree
428 * and the pointer's key. These information allow us to find the block by
429 * b-tree searching. The full back refs is for pointers in tree blocks not
430 * referenced by their owner trees. The location of tree block is recorded
431 * in the back refs. Actually the full back refs is generic, and can be
432 * used in all cases the implicit back refs is used. The major shortcoming
433 * of the full back refs is its overhead. Every time a tree block gets
434 * COWed, we have to update back refs entry for all pointers in it.
436 * For a newly allocated tree block, we use implicit back refs for
437 * pointers in it. This means most tree related operations only involve
438 * implicit back refs. For a tree block created in old transaction, the
439 * only way to drop a reference to it is COW it. So we can detect the
440 * event that tree block loses its owner tree's reference and do the
441 * back refs conversion.
443 * When a tree block is COW'd through a tree, there are four cases:
445 * The reference count of the block is one and the tree is the block's
446 * owner tree. Nothing to do in this case.
448 * The reference count of the block is one and the tree is not the
449 * block's owner tree. In this case, full back refs is used for pointers
450 * in the block. Remove these full back refs, add implicit back refs for
451 * every pointers in the new block.
453 * The reference count of the block is greater than one and the tree is
454 * the block's owner tree. In this case, implicit back refs is used for
455 * pointers in the block. Add full back refs for every pointers in the
456 * block, increase lower level extents' reference counts. The original
457 * implicit back refs are entailed to the new block.
459 * The reference count of the block is greater than one and the tree is
460 * not the block's owner tree. Add implicit back refs for every pointer in
461 * the new block, increase lower level extents' reference count.
463 * Back Reference Key composing:
465 * The key objectid corresponds to the first byte in the extent,
466 * The key type is used to differentiate between types of back refs.
467 * There are different meanings of the key offset for different types
468 * of back refs.
470 * File extents can be referenced by:
472 * - multiple snapshots, subvolumes, or different generations in one subvol
473 * - different files inside a single subvolume
474 * - different offsets inside a file (bookend extents in file.c)
476 * The extent ref structure for the implicit back refs has fields for:
478 * - Objectid of the subvolume root
479 * - objectid of the file holding the reference
480 * - original offset in the file
481 * - how many bookend extents
483 * The key offset for the implicit back refs is hash of the first
484 * three fields.
486 * The extent ref structure for the full back refs has field for:
488 * - number of pointers in the tree leaf
490 * The key offset for the implicit back refs is the first byte of
491 * the tree leaf
493 * When a file extent is allocated, The implicit back refs is used.
494 * the fields are filled in:
496 * (root_key.objectid, inode objectid, offset in file, 1)
498 * When a file extent is removed file truncation, we find the
499 * corresponding implicit back refs and check the following fields:
501 * (btrfs_header_owner(leaf), inode objectid, offset in file)
503 * Btree extents can be referenced by:
505 * - Different subvolumes
507 * Both the implicit back refs and the full back refs for tree blocks
508 * only consist of key. The key offset for the implicit back refs is
509 * objectid of block's owner tree. The key offset for the full back refs
510 * is the first byte of parent block.
512 * When implicit back refs is used, information about the lowest key and
513 * level of the tree block are required. These information are stored in
514 * tree block info structure.
517 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
518 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
519 struct btrfs_root *root,
520 struct btrfs_path *path,
521 u64 owner, u32 extra_size)
523 struct btrfs_extent_item *item;
524 struct btrfs_extent_item_v0 *ei0;
525 struct btrfs_extent_ref_v0 *ref0;
526 struct btrfs_tree_block_info *bi;
527 struct extent_buffer *leaf;
528 struct btrfs_key key;
529 struct btrfs_key found_key;
530 u32 new_size = sizeof(*item);
531 u64 refs;
532 int ret;
534 leaf = path->nodes[0];
535 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
537 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
538 ei0 = btrfs_item_ptr(leaf, path->slots[0],
539 struct btrfs_extent_item_v0);
540 refs = btrfs_extent_refs_v0(leaf, ei0);
542 if (owner == (u64)-1) {
543 while (1) {
544 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
545 ret = btrfs_next_leaf(root, path);
546 if (ret < 0)
547 return ret;
548 BUG_ON(ret > 0);
549 leaf = path->nodes[0];
551 btrfs_item_key_to_cpu(leaf, &found_key,
552 path->slots[0]);
553 BUG_ON(key.objectid != found_key.objectid);
554 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
555 path->slots[0]++;
556 continue;
558 ref0 = btrfs_item_ptr(leaf, path->slots[0],
559 struct btrfs_extent_ref_v0);
560 owner = btrfs_ref_objectid_v0(leaf, ref0);
561 break;
564 btrfs_release_path(path);
566 if (owner < BTRFS_FIRST_FREE_OBJECTID)
567 new_size += sizeof(*bi);
569 new_size -= sizeof(*ei0);
570 ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
571 if (ret < 0)
572 return ret;
573 BUG_ON(ret);
575 ret = btrfs_extend_item(root, path, new_size);
576 BUG_ON(ret);
578 leaf = path->nodes[0];
579 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
580 btrfs_set_extent_refs(leaf, item, refs);
581 /* FIXME: get real generation */
582 btrfs_set_extent_generation(leaf, item, 0);
583 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
584 btrfs_set_extent_flags(leaf, item,
585 BTRFS_EXTENT_FLAG_TREE_BLOCK |
586 BTRFS_BLOCK_FLAG_FULL_BACKREF);
587 bi = (struct btrfs_tree_block_info *)(item + 1);
588 /* FIXME: get first key of the block */
589 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
590 btrfs_set_tree_block_level(leaf, bi, (int)owner);
591 } else {
592 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
594 btrfs_mark_buffer_dirty(leaf);
595 return 0;
597 #endif
599 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
601 u32 high_crc = ~(u32)0;
602 u32 low_crc = ~(u32)0;
603 __le64 lenum;
605 lenum = cpu_to_le64(root_objectid);
606 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
607 lenum = cpu_to_le64(owner);
608 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
609 lenum = cpu_to_le64(offset);
610 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
612 return ((u64)high_crc << 31) ^ (u64)low_crc;
615 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
616 struct btrfs_extent_data_ref *ref)
618 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
619 btrfs_extent_data_ref_objectid(leaf, ref),
620 btrfs_extent_data_ref_offset(leaf, ref));
623 static int match_extent_data_ref(struct extent_buffer *leaf,
624 struct btrfs_extent_data_ref *ref,
625 u64 root_objectid, u64 owner, u64 offset)
627 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
628 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
629 btrfs_extent_data_ref_offset(leaf, ref) != offset)
630 return 0;
631 return 1;
634 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
635 struct btrfs_root *root,
636 struct btrfs_path *path,
637 u64 bytenr, u64 parent,
638 u64 root_objectid,
639 u64 owner, u64 offset)
641 struct btrfs_key key;
642 struct btrfs_extent_data_ref *ref;
643 struct extent_buffer *leaf;
644 u32 nritems;
645 int ret;
646 int recow;
647 int err = -ENOENT;
649 key.objectid = bytenr;
650 if (parent) {
651 key.type = BTRFS_SHARED_DATA_REF_KEY;
652 key.offset = parent;
653 } else {
654 key.type = BTRFS_EXTENT_DATA_REF_KEY;
655 key.offset = hash_extent_data_ref(root_objectid,
656 owner, offset);
658 again:
659 recow = 0;
660 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
661 if (ret < 0) {
662 err = ret;
663 goto fail;
666 if (parent) {
667 if (!ret)
668 return 0;
669 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
670 key.type = BTRFS_EXTENT_REF_V0_KEY;
671 btrfs_release_path(path);
672 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
673 if (ret < 0) {
674 err = ret;
675 goto fail;
677 if (!ret)
678 return 0;
679 #endif
680 goto fail;
683 leaf = path->nodes[0];
684 nritems = btrfs_header_nritems(leaf);
685 while (1) {
686 if (path->slots[0] >= nritems) {
687 ret = btrfs_next_leaf(root, path);
688 if (ret < 0)
689 err = ret;
690 if (ret)
691 goto fail;
693 leaf = path->nodes[0];
694 nritems = btrfs_header_nritems(leaf);
695 recow = 1;
698 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
699 if (key.objectid != bytenr ||
700 key.type != BTRFS_EXTENT_DATA_REF_KEY)
701 goto fail;
703 ref = btrfs_item_ptr(leaf, path->slots[0],
704 struct btrfs_extent_data_ref);
706 if (match_extent_data_ref(leaf, ref, root_objectid,
707 owner, offset)) {
708 if (recow) {
709 btrfs_release_path(path);
710 goto again;
712 err = 0;
713 break;
715 path->slots[0]++;
717 fail:
718 return err;
721 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
722 struct btrfs_root *root,
723 struct btrfs_path *path,
724 u64 bytenr, u64 parent,
725 u64 root_objectid, u64 owner,
726 u64 offset, int refs_to_add)
728 struct btrfs_key key;
729 struct extent_buffer *leaf;
730 u32 size;
731 u32 num_refs;
732 int ret;
734 key.objectid = bytenr;
735 if (parent) {
736 key.type = BTRFS_SHARED_DATA_REF_KEY;
737 key.offset = parent;
738 size = sizeof(struct btrfs_shared_data_ref);
739 } else {
740 key.type = BTRFS_EXTENT_DATA_REF_KEY;
741 key.offset = hash_extent_data_ref(root_objectid,
742 owner, offset);
743 size = sizeof(struct btrfs_extent_data_ref);
746 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
747 if (ret && ret != -EEXIST)
748 goto fail;
750 leaf = path->nodes[0];
751 if (parent) {
752 struct btrfs_shared_data_ref *ref;
753 ref = btrfs_item_ptr(leaf, path->slots[0],
754 struct btrfs_shared_data_ref);
755 if (ret == 0) {
756 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
757 } else {
758 num_refs = btrfs_shared_data_ref_count(leaf, ref);
759 num_refs += refs_to_add;
760 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
762 } else {
763 struct btrfs_extent_data_ref *ref;
764 while (ret == -EEXIST) {
765 ref = btrfs_item_ptr(leaf, path->slots[0],
766 struct btrfs_extent_data_ref);
767 if (match_extent_data_ref(leaf, ref, root_objectid,
768 owner, offset))
769 break;
770 btrfs_release_path(path);
772 key.offset++;
773 ret = btrfs_insert_empty_item(trans, root, path, &key,
774 size);
775 if (ret && ret != -EEXIST)
776 goto fail;
778 leaf = path->nodes[0];
780 ref = btrfs_item_ptr(leaf, path->slots[0],
781 struct btrfs_extent_data_ref);
782 if (ret == 0) {
783 btrfs_set_extent_data_ref_root(leaf, ref,
784 root_objectid);
785 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
786 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
787 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
788 } else {
789 num_refs = btrfs_extent_data_ref_count(leaf, ref);
790 num_refs += refs_to_add;
791 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
794 btrfs_mark_buffer_dirty(leaf);
795 ret = 0;
796 fail:
797 btrfs_release_path(path);
798 return ret;
801 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
802 struct btrfs_root *root,
803 struct btrfs_path *path,
804 int refs_to_drop)
806 struct btrfs_key key;
807 struct btrfs_extent_data_ref *ref1 = NULL;
808 struct btrfs_shared_data_ref *ref2 = NULL;
809 struct extent_buffer *leaf;
810 u32 num_refs = 0;
811 int ret = 0;
813 leaf = path->nodes[0];
814 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
816 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
817 ref1 = btrfs_item_ptr(leaf, path->slots[0],
818 struct btrfs_extent_data_ref);
819 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
820 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
821 ref2 = btrfs_item_ptr(leaf, path->slots[0],
822 struct btrfs_shared_data_ref);
823 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
824 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
825 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
826 struct btrfs_extent_ref_v0 *ref0;
827 ref0 = btrfs_item_ptr(leaf, path->slots[0],
828 struct btrfs_extent_ref_v0);
829 num_refs = btrfs_ref_count_v0(leaf, ref0);
830 #endif
831 } else {
832 BUG();
835 BUG_ON(num_refs < refs_to_drop);
836 num_refs -= refs_to_drop;
838 if (num_refs == 0) {
839 ret = btrfs_del_item(trans, root, path);
840 } else {
841 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
842 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
843 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
844 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
845 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
846 else {
847 struct btrfs_extent_ref_v0 *ref0;
848 ref0 = btrfs_item_ptr(leaf, path->slots[0],
849 struct btrfs_extent_ref_v0);
850 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
852 #endif
853 btrfs_mark_buffer_dirty(leaf);
855 return ret;
858 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
859 struct btrfs_extent_inline_ref *iref)
861 struct btrfs_key key;
862 struct extent_buffer *leaf;
863 struct btrfs_extent_data_ref *ref1;
864 struct btrfs_shared_data_ref *ref2;
865 u32 num_refs = 0;
867 leaf = path->nodes[0];
868 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
869 if (iref) {
870 if (btrfs_extent_inline_ref_type(leaf, iref) ==
871 BTRFS_EXTENT_DATA_REF_KEY) {
872 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
873 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
874 } else {
875 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
876 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
878 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
879 ref1 = btrfs_item_ptr(leaf, path->slots[0],
880 struct btrfs_extent_data_ref);
881 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
882 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
883 ref2 = btrfs_item_ptr(leaf, path->slots[0],
884 struct btrfs_shared_data_ref);
885 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
886 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
887 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
888 struct btrfs_extent_ref_v0 *ref0;
889 ref0 = btrfs_item_ptr(leaf, path->slots[0],
890 struct btrfs_extent_ref_v0);
891 num_refs = btrfs_ref_count_v0(leaf, ref0);
892 #endif
893 } else {
894 BUG();
896 return num_refs;
899 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
900 struct btrfs_root *root,
901 struct btrfs_path *path,
902 u64 bytenr, u64 parent,
903 u64 root_objectid)
905 struct btrfs_key key;
906 int ret;
908 key.objectid = bytenr;
909 if (parent) {
910 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
911 key.offset = parent;
912 } else {
913 key.type = BTRFS_TREE_BLOCK_REF_KEY;
914 key.offset = root_objectid;
917 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
918 if (ret > 0)
919 ret = -ENOENT;
920 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
921 if (ret == -ENOENT && parent) {
922 btrfs_release_path(path);
923 key.type = BTRFS_EXTENT_REF_V0_KEY;
924 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
925 if (ret > 0)
926 ret = -ENOENT;
928 #endif
929 return ret;
932 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
933 struct btrfs_root *root,
934 struct btrfs_path *path,
935 u64 bytenr, u64 parent,
936 u64 root_objectid)
938 struct btrfs_key key;
939 int ret;
941 key.objectid = bytenr;
942 if (parent) {
943 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
944 key.offset = parent;
945 } else {
946 key.type = BTRFS_TREE_BLOCK_REF_KEY;
947 key.offset = root_objectid;
950 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
952 btrfs_release_path(path);
953 return ret;
956 static inline int extent_ref_type(u64 parent, u64 owner)
958 int type;
959 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
960 if (parent > 0)
961 type = BTRFS_SHARED_BLOCK_REF_KEY;
962 else
963 type = BTRFS_TREE_BLOCK_REF_KEY;
964 } else {
965 if (parent > 0)
966 type = BTRFS_SHARED_DATA_REF_KEY;
967 else
968 type = BTRFS_EXTENT_DATA_REF_KEY;
970 return type;
973 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
974 struct btrfs_root *root,
975 struct btrfs_path *path,
976 struct btrfs_extent_inline_ref **ref_ret,
977 u64 bytenr, u64 num_bytes,
978 u64 parent, u64 root_objectid,
979 u64 owner, u64 offset, int insert)
981 struct btrfs_key key;
982 struct extent_buffer *leaf;
983 struct btrfs_extent_item *ei;
984 struct btrfs_extent_inline_ref *iref;
985 u64 flags;
986 u32 item_size;
987 unsigned long ptr;
988 unsigned long end;
989 int extra_size;
990 int type;
991 int want;
992 int ret;
993 int err = 0;
994 int skinny_metadata =
995 btrfs_fs_incompat(root->fs_info, SKINNY_METADATA);
997 key.objectid = bytenr;
998 key.type = BTRFS_EXTENT_ITEM_KEY;
999 key.offset = num_bytes;
1001 want = extent_ref_type(parent, owner);
1002 if (insert)
1003 extra_size = btrfs_extent_inline_ref_size(want);
1004 else
1005 extra_size = -1;
1007 if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
1008 key.type = BTRFS_METADATA_ITEM_KEY;
1009 key.offset = owner;
1010 } else if (skinny_metadata) {
1011 skinny_metadata = 0;
1014 again:
1015 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1016 if (ret < 0) {
1017 err = ret;
1018 goto out;
1022 * We may be a newly converted file system which still has the old fat
1023 * extent entries for metadata, so try and see if we have one of those.
1025 if (ret > 0 && skinny_metadata) {
1026 skinny_metadata = 0;
1027 if (path->slots[0]) {
1028 path->slots[0]--;
1029 btrfs_item_key_to_cpu(path->nodes[0], &key,
1030 path->slots[0]);
1031 if (key.objectid == bytenr &&
1032 key.type == BTRFS_EXTENT_ITEM_KEY &&
1033 key.offset == num_bytes)
1034 ret = 0;
1036 if (ret) {
1037 key.type = BTRFS_EXTENT_ITEM_KEY;
1038 key.offset = num_bytes;
1039 btrfs_release_path(path);
1040 goto again;
1044 if (ret) {
1045 printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
1046 return -ENOENT;
1049 BUG_ON(ret);
1051 leaf = path->nodes[0];
1052 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1053 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1054 if (item_size < sizeof(*ei)) {
1055 if (!insert) {
1056 err = -ENOENT;
1057 goto out;
1059 ret = convert_extent_item_v0(trans, root, path, owner,
1060 extra_size);
1061 if (ret < 0) {
1062 err = ret;
1063 goto out;
1065 leaf = path->nodes[0];
1066 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1068 #endif
1069 if (item_size < sizeof(*ei)) {
1070 printf("Size is %u, needs to be %u, slot %d\n",
1071 (unsigned)item_size,
1072 (unsigned)sizeof(*ei), path->slots[0]);
1073 btrfs_print_leaf(root, leaf);
1074 return -EINVAL;
1076 BUG_ON(item_size < sizeof(*ei));
1078 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1079 flags = btrfs_extent_flags(leaf, ei);
1081 ptr = (unsigned long)(ei + 1);
1082 end = (unsigned long)ei + item_size;
1084 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1085 ptr += sizeof(struct btrfs_tree_block_info);
1086 BUG_ON(ptr > end);
1087 } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1088 if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
1089 return -EIO;
1093 err = -ENOENT;
1094 while (1) {
1095 if (ptr >= end) {
1096 WARN_ON(ptr > end);
1097 break;
1099 iref = (struct btrfs_extent_inline_ref *)ptr;
1100 type = btrfs_extent_inline_ref_type(leaf, iref);
1101 if (want < type)
1102 break;
1103 if (want > type) {
1104 ptr += btrfs_extent_inline_ref_size(type);
1105 continue;
1108 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1109 struct btrfs_extent_data_ref *dref;
1110 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1111 if (match_extent_data_ref(leaf, dref, root_objectid,
1112 owner, offset)) {
1113 err = 0;
1114 break;
1116 if (hash_extent_data_ref_item(leaf, dref) <
1117 hash_extent_data_ref(root_objectid, owner, offset))
1118 break;
1119 } else {
1120 u64 ref_offset;
1121 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1122 if (parent > 0) {
1123 if (parent == ref_offset) {
1124 err = 0;
1125 break;
1127 if (ref_offset < parent)
1128 break;
1129 } else {
1130 if (root_objectid == ref_offset) {
1131 err = 0;
1132 break;
1134 if (ref_offset < root_objectid)
1135 break;
1138 ptr += btrfs_extent_inline_ref_size(type);
1140 if (err == -ENOENT && insert) {
1141 if (item_size + extra_size >=
1142 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1143 err = -EAGAIN;
1144 goto out;
1147 * To add new inline back ref, we have to make sure
1148 * there is no corresponding back ref item.
1149 * For simplicity, we just do not add new inline back
1150 * ref if there is any back ref item.
1152 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1153 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1154 err = -EAGAIN;
1155 goto out;
1158 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1159 out:
1160 return err;
1163 static int setup_inline_extent_backref(struct btrfs_root *root,
1164 struct btrfs_path *path,
1165 struct btrfs_extent_inline_ref *iref,
1166 u64 parent, u64 root_objectid,
1167 u64 owner, u64 offset, int refs_to_add)
1169 struct extent_buffer *leaf;
1170 struct btrfs_extent_item *ei;
1171 unsigned long ptr;
1172 unsigned long end;
1173 unsigned long item_offset;
1174 u64 refs;
1175 int size;
1176 int type;
1177 int ret;
1179 leaf = path->nodes[0];
1180 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1181 item_offset = (unsigned long)iref - (unsigned long)ei;
1183 type = extent_ref_type(parent, owner);
1184 size = btrfs_extent_inline_ref_size(type);
1186 ret = btrfs_extend_item(root, path, size);
1187 BUG_ON(ret);
1189 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1190 refs = btrfs_extent_refs(leaf, ei);
1191 refs += refs_to_add;
1192 btrfs_set_extent_refs(leaf, ei, refs);
1194 ptr = (unsigned long)ei + item_offset;
1195 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1196 if (ptr < end - size)
1197 memmove_extent_buffer(leaf, ptr + size, ptr,
1198 end - size - ptr);
1200 iref = (struct btrfs_extent_inline_ref *)ptr;
1201 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1202 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1203 struct btrfs_extent_data_ref *dref;
1204 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1205 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1206 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1207 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1208 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1209 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1210 struct btrfs_shared_data_ref *sref;
1211 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1212 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1213 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1214 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1215 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1216 } else {
1217 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1219 btrfs_mark_buffer_dirty(leaf);
1220 return 0;
1223 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1224 struct btrfs_root *root,
1225 struct btrfs_path *path,
1226 struct btrfs_extent_inline_ref **ref_ret,
1227 u64 bytenr, u64 num_bytes, u64 parent,
1228 u64 root_objectid, u64 owner, u64 offset)
1230 int ret;
1232 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1233 bytenr, num_bytes, parent,
1234 root_objectid, owner, offset, 0);
1235 if (ret != -ENOENT)
1236 return ret;
1238 btrfs_release_path(path);
1239 *ref_ret = NULL;
1241 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1242 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1243 root_objectid);
1244 } else {
1245 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1246 root_objectid, owner, offset);
1248 return ret;
1251 static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1252 struct btrfs_root *root,
1253 struct btrfs_path *path,
1254 struct btrfs_extent_inline_ref *iref,
1255 int refs_to_mod)
1257 struct extent_buffer *leaf;
1258 struct btrfs_extent_item *ei;
1259 struct btrfs_extent_data_ref *dref = NULL;
1260 struct btrfs_shared_data_ref *sref = NULL;
1261 unsigned long ptr;
1262 unsigned long end;
1263 u32 item_size;
1264 int size;
1265 int type;
1266 int ret;
1267 u64 refs;
1269 leaf = path->nodes[0];
1270 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1271 refs = btrfs_extent_refs(leaf, ei);
1272 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1273 refs += refs_to_mod;
1274 btrfs_set_extent_refs(leaf, ei, refs);
1276 type = btrfs_extent_inline_ref_type(leaf, iref);
1278 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1279 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1280 refs = btrfs_extent_data_ref_count(leaf, dref);
1281 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1282 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1283 refs = btrfs_shared_data_ref_count(leaf, sref);
1284 } else {
1285 refs = 1;
1286 BUG_ON(refs_to_mod != -1);
1289 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1290 refs += refs_to_mod;
1292 if (refs > 0) {
1293 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1294 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1295 else
1296 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1297 } else {
1298 size = btrfs_extent_inline_ref_size(type);
1299 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1300 ptr = (unsigned long)iref;
1301 end = (unsigned long)ei + item_size;
1302 if (ptr + size < end)
1303 memmove_extent_buffer(leaf, ptr, ptr + size,
1304 end - ptr - size);
1305 item_size -= size;
1306 ret = btrfs_truncate_item(root, path, item_size, 1);
1307 BUG_ON(ret);
1309 btrfs_mark_buffer_dirty(leaf);
1310 return 0;
1313 static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1314 struct btrfs_root *root,
1315 struct btrfs_path *path,
1316 u64 bytenr, u64 num_bytes, u64 parent,
1317 u64 root_objectid, u64 owner,
1318 u64 offset, int refs_to_add)
1320 struct btrfs_extent_inline_ref *iref;
1321 int ret;
1323 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1324 bytenr, num_bytes, parent,
1325 root_objectid, owner, offset, 1);
1326 if (ret == 0) {
1327 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1328 ret = update_inline_extent_backref(trans, root, path, iref,
1329 refs_to_add);
1330 } else if (ret == -ENOENT) {
1331 ret = setup_inline_extent_backref(root, path, iref,
1332 parent, root_objectid,
1333 owner, offset, refs_to_add);
1335 return ret;
1338 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1339 struct btrfs_root *root,
1340 struct btrfs_path *path,
1341 u64 bytenr, u64 parent, u64 root_objectid,
1342 u64 owner, u64 offset, int refs_to_add)
1344 int ret;
1346 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
1347 ret = insert_extent_data_ref(trans, root, path, bytenr,
1348 parent, root_objectid,
1349 owner, offset, refs_to_add);
1350 } else {
1351 BUG_ON(refs_to_add != 1);
1352 ret = insert_tree_block_ref(trans, root, path, bytenr,
1353 parent, root_objectid);
1355 return ret;
1358 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1359 struct btrfs_root *root,
1360 struct btrfs_path *path,
1361 struct btrfs_extent_inline_ref *iref,
1362 int refs_to_drop, int is_data)
1364 int ret;
1366 BUG_ON(!is_data && refs_to_drop != 1);
1367 if (iref) {
1368 ret = update_inline_extent_backref(trans, root, path, iref,
1369 -refs_to_drop);
1370 } else if (is_data) {
1371 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1372 } else {
1373 ret = btrfs_del_item(trans, root, path);
1375 return ret;
1378 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1379 struct btrfs_root *root,
1380 u64 bytenr, u64 num_bytes, u64 parent,
1381 u64 root_objectid, u64 owner, u64 offset)
1383 struct btrfs_path *path;
1384 struct extent_buffer *leaf;
1385 struct btrfs_extent_item *item;
1386 u64 refs;
1387 int ret;
1388 int err = 0;
1390 path = btrfs_alloc_path();
1391 if (!path)
1392 return -ENOMEM;
1394 path->reada = 1;
1396 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1397 path, bytenr, num_bytes, parent,
1398 root_objectid, owner, offset, 1);
1399 if (ret == 0)
1400 goto out;
1402 if (ret != -EAGAIN) {
1403 err = ret;
1404 goto out;
1407 leaf = path->nodes[0];
1408 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1409 refs = btrfs_extent_refs(leaf, item);
1410 btrfs_set_extent_refs(leaf, item, refs + 1);
1412 btrfs_mark_buffer_dirty(leaf);
1413 btrfs_release_path(path);
1415 path->reada = 1;
1417 /* now insert the actual backref */
1418 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1419 path, bytenr, parent, root_objectid,
1420 owner, offset, 1);
1421 if (ret)
1422 err = ret;
1423 out:
1424 btrfs_free_path(path);
1425 finish_current_insert(trans, root->fs_info->extent_root);
1426 del_pending_extents(trans, root->fs_info->extent_root);
1427 BUG_ON(err);
1428 return err;
1431 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1432 struct btrfs_root *root)
1434 finish_current_insert(trans, root->fs_info->extent_root);
1435 del_pending_extents(trans, root->fs_info->extent_root);
1436 return 0;
1439 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1440 struct btrfs_root *root, u64 bytenr,
1441 u64 offset, int metadata, u64 *refs, u64 *flags)
1443 struct btrfs_path *path;
1444 int ret;
1445 struct btrfs_key key;
1446 struct extent_buffer *l;
1447 struct btrfs_extent_item *item;
1448 u32 item_size;
1449 u64 num_refs;
1450 u64 extent_flags;
1452 if (metadata &&
1453 !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
1454 offset = root->fs_info->nodesize;
1455 metadata = 0;
1458 path = btrfs_alloc_path();
1459 if (!path)
1460 return -ENOMEM;
1461 path->reada = 1;
1463 key.objectid = bytenr;
1464 key.offset = offset;
1465 if (metadata)
1466 key.type = BTRFS_METADATA_ITEM_KEY;
1467 else
1468 key.type = BTRFS_EXTENT_ITEM_KEY;
1470 again:
1471 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1472 0, 0);
1473 if (ret < 0)
1474 goto out;
1477 * Deal with the fact that we may have mixed SKINNY and normal refs. If
1478 * we didn't find what we wanted check and see if we have a normal ref
1479 * right next to us, or re-search if we are on the edge of the leaf just
1480 * to make sure.
1482 if (ret > 0 && metadata) {
1483 if (path->slots[0]) {
1484 path->slots[0]--;
1485 btrfs_item_key_to_cpu(path->nodes[0], &key,
1486 path->slots[0]);
1487 if (key.objectid == bytenr &&
1488 key.type == BTRFS_EXTENT_ITEM_KEY &&
1489 key.offset == root->fs_info->nodesize)
1490 ret = 0;
1493 if (ret) {
1494 btrfs_release_path(path);
1495 key.type = BTRFS_EXTENT_ITEM_KEY;
1496 key.offset = root->fs_info->nodesize;
1497 metadata = 0;
1498 goto again;
1502 if (ret != 0) {
1503 ret = -EIO;
1504 goto out;
1507 l = path->nodes[0];
1508 item_size = btrfs_item_size_nr(l, path->slots[0]);
1509 if (item_size >= sizeof(*item)) {
1510 item = btrfs_item_ptr(l, path->slots[0],
1511 struct btrfs_extent_item);
1512 num_refs = btrfs_extent_refs(l, item);
1513 extent_flags = btrfs_extent_flags(l, item);
1514 } else {
1515 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1516 struct btrfs_extent_item_v0 *ei0;
1517 BUG_ON(item_size != sizeof(*ei0));
1518 ei0 = btrfs_item_ptr(l, path->slots[0],
1519 struct btrfs_extent_item_v0);
1520 num_refs = btrfs_extent_refs_v0(l, ei0);
1521 /* FIXME: this isn't correct for data */
1522 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1523 #else
1524 BUG();
1525 #endif
1527 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1528 if (refs)
1529 *refs = num_refs;
1530 if (flags)
1531 *flags = extent_flags;
1532 out:
1533 btrfs_free_path(path);
1534 return ret;
1537 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
1538 struct btrfs_root *root,
1539 u64 bytenr, int level, u64 flags)
1541 struct btrfs_path *path;
1542 int ret;
1543 struct btrfs_key key;
1544 struct extent_buffer *l;
1545 struct btrfs_extent_item *item;
1546 u32 item_size;
1547 int skinny_metadata =
1548 btrfs_fs_incompat(root->fs_info, SKINNY_METADATA);
1550 path = btrfs_alloc_path();
1551 if (!path)
1552 return -ENOMEM;
1553 path->reada = 1;
1555 key.objectid = bytenr;
1556 if (skinny_metadata) {
1557 key.offset = level;
1558 key.type = BTRFS_METADATA_ITEM_KEY;
1559 } else {
1560 key.offset = root->fs_info->nodesize;
1561 key.type = BTRFS_EXTENT_ITEM_KEY;
1564 again:
1565 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
1566 0, 0);
1567 if (ret < 0)
1568 goto out;
1570 if (ret > 0 && skinny_metadata) {
1571 skinny_metadata = 0;
1572 if (path->slots[0]) {
1573 path->slots[0]--;
1574 btrfs_item_key_to_cpu(path->nodes[0], &key,
1575 path->slots[0]);
1576 if (key.objectid == bytenr &&
1577 key.offset == root->fs_info->nodesize &&
1578 key.type == BTRFS_EXTENT_ITEM_KEY)
1579 ret = 0;
1581 if (ret) {
1582 btrfs_release_path(path);
1583 key.offset = root->fs_info->nodesize;
1584 key.type = BTRFS_EXTENT_ITEM_KEY;
1585 goto again;
1589 if (ret != 0) {
1590 btrfs_print_leaf(root, path->nodes[0]);
1591 printk("failed to find block number %Lu\n",
1592 (unsigned long long)bytenr);
1593 BUG();
1595 l = path->nodes[0];
1596 item_size = btrfs_item_size_nr(l, path->slots[0]);
1597 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1598 if (item_size < sizeof(*item)) {
1599 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1600 path, (u64)-1, 0);
1601 if (ret < 0)
1602 goto out;
1604 l = path->nodes[0];
1605 item_size = btrfs_item_size_nr(l, path->slots[0]);
1607 #endif
1608 BUG_ON(item_size < sizeof(*item));
1609 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1610 flags |= btrfs_extent_flags(l, item);
1611 btrfs_set_extent_flags(l, item, flags);
1612 out:
1613 btrfs_free_path(path);
1614 finish_current_insert(trans, root->fs_info->extent_root);
1615 del_pending_extents(trans, root->fs_info->extent_root);
1616 return ret;
1619 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1620 struct btrfs_root *root,
1621 struct extent_buffer *buf,
1622 int record_parent, int inc)
1624 u64 bytenr;
1625 u64 num_bytes;
1626 u64 parent;
1627 u64 ref_root;
1628 u32 nritems;
1629 struct btrfs_key key;
1630 struct btrfs_file_extent_item *fi;
1631 int i;
1632 int level;
1633 int ret = 0;
1634 int (*process_func)(struct btrfs_trans_handle *trans,
1635 struct btrfs_root *root,
1636 u64, u64, u64, u64, u64, u64);
1638 ref_root = btrfs_header_owner(buf);
1639 nritems = btrfs_header_nritems(buf);
1640 level = btrfs_header_level(buf);
1642 if (!root->ref_cows && level == 0)
1643 return 0;
1645 if (inc)
1646 process_func = btrfs_inc_extent_ref;
1647 else
1648 process_func = btrfs_free_extent;
1650 if (record_parent)
1651 parent = buf->start;
1652 else
1653 parent = 0;
1655 for (i = 0; i < nritems; i++) {
1656 cond_resched();
1657 if (level == 0) {
1658 btrfs_item_key_to_cpu(buf, &key, i);
1659 if (key.type != BTRFS_EXTENT_DATA_KEY)
1660 continue;
1661 fi = btrfs_item_ptr(buf, i,
1662 struct btrfs_file_extent_item);
1663 if (btrfs_file_extent_type(buf, fi) ==
1664 BTRFS_FILE_EXTENT_INLINE)
1665 continue;
1666 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1667 if (bytenr == 0)
1668 continue;
1670 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1671 key.offset -= btrfs_file_extent_offset(buf, fi);
1672 ret = process_func(trans, root, bytenr, num_bytes,
1673 parent, ref_root, key.objectid,
1674 key.offset);
1675 if (ret) {
1676 WARN_ON(1);
1677 goto fail;
1679 } else {
1680 bytenr = btrfs_node_blockptr(buf, i);
1681 num_bytes = root->fs_info->nodesize;
1682 ret = process_func(trans, root, bytenr, num_bytes,
1683 parent, ref_root, level - 1, 0);
1684 if (ret) {
1685 WARN_ON(1);
1686 goto fail;
1690 return 0;
1691 fail:
1692 WARN_ON(1);
1693 return ret;
1696 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1697 struct extent_buffer *buf, int record_parent)
1699 return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
1702 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1703 struct extent_buffer *buf, int record_parent)
1705 return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
1708 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1709 struct btrfs_root *root,
1710 struct btrfs_path *path,
1711 struct btrfs_block_group_cache *cache)
1713 int ret;
1714 int pending_ret;
1715 struct btrfs_root *extent_root = root->fs_info->extent_root;
1716 unsigned long bi;
1717 struct extent_buffer *leaf;
1719 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1720 if (ret < 0)
1721 goto fail;
1722 BUG_ON(ret);
1724 leaf = path->nodes[0];
1725 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1726 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1727 btrfs_mark_buffer_dirty(leaf);
1728 btrfs_release_path(path);
1729 fail:
1730 finish_current_insert(trans, extent_root);
1731 pending_ret = del_pending_extents(trans, extent_root);
1732 if (ret)
1733 return ret;
1734 if (pending_ret)
1735 return pending_ret;
1736 return 0;
1740 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1741 struct btrfs_root *root)
1743 struct extent_io_tree *block_group_cache;
1744 struct btrfs_block_group_cache *cache;
1745 int ret;
1746 struct btrfs_path *path;
1747 u64 last = 0;
1748 u64 start;
1749 u64 end;
1750 u64 ptr;
1752 block_group_cache = &root->fs_info->block_group_cache;
1753 path = btrfs_alloc_path();
1754 if (!path)
1755 return -ENOMEM;
1757 while(1) {
1758 ret = find_first_extent_bit(block_group_cache, last,
1759 &start, &end, BLOCK_GROUP_DIRTY);
1760 if (ret) {
1761 if (last == 0)
1762 break;
1763 last = 0;
1764 continue;
1767 last = end + 1;
1768 ret = get_state_private(block_group_cache, start, &ptr);
1769 BUG_ON(ret);
1771 clear_extent_bits(block_group_cache, start, end,
1772 BLOCK_GROUP_DIRTY);
1774 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1775 ret = write_one_cache_group(trans, root, path, cache);
1777 btrfs_free_path(path);
1778 return 0;
1781 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1782 u64 flags)
1784 struct btrfs_space_info *found;
1786 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
1788 list_for_each_entry(found, &info->space_info, list) {
1789 if (found->flags & flags)
1790 return found;
1792 return NULL;
1796 static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
1797 u64 total_bytes, u64 bytes_used,
1798 struct btrfs_space_info **space_info)
1800 struct btrfs_space_info *found;
1802 /* only support free block group which is empty */
1803 if (bytes_used)
1804 return -ENOTEMPTY;
1806 found = __find_space_info(fs_info, flags);
1807 if (!found)
1808 return -ENOENT;
1809 if (found->total_bytes < total_bytes) {
1810 fprintf(stderr,
1811 "WARNING: bad space info to free %llu only have %llu\n",
1812 total_bytes, found->total_bytes);
1813 return -EINVAL;
1815 found->total_bytes -= total_bytes;
1816 if (space_info)
1817 *space_info = found;
1818 return 0;
1821 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1822 u64 total_bytes, u64 bytes_used,
1823 struct btrfs_space_info **space_info)
1825 struct btrfs_space_info *found;
1827 found = __find_space_info(info, flags);
1828 if (found) {
1829 found->total_bytes += total_bytes;
1830 found->bytes_used += bytes_used;
1831 if (found->total_bytes < found->bytes_used) {
1832 fprintf(stderr, "warning, bad space info total_bytes "
1833 "%llu used %llu\n",
1834 (unsigned long long)found->total_bytes,
1835 (unsigned long long)found->bytes_used);
1837 *space_info = found;
1838 return 0;
1840 found = kmalloc(sizeof(*found), GFP_NOFS);
1841 if (!found)
1842 return -ENOMEM;
1844 list_add(&found->list, &info->space_info);
1845 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
1846 found->total_bytes = total_bytes;
1847 found->bytes_used = bytes_used;
1848 found->bytes_pinned = 0;
1849 found->full = 0;
1850 *space_info = found;
1851 return 0;
1855 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1857 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1858 BTRFS_BLOCK_GROUP_RAID1 |
1859 BTRFS_BLOCK_GROUP_RAID10 |
1860 BTRFS_BLOCK_GROUP_RAID5 |
1861 BTRFS_BLOCK_GROUP_RAID6 |
1862 BTRFS_BLOCK_GROUP_DUP);
1863 if (extra_flags) {
1864 if (flags & BTRFS_BLOCK_GROUP_DATA)
1865 fs_info->avail_data_alloc_bits |= extra_flags;
1866 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1867 fs_info->avail_metadata_alloc_bits |= extra_flags;
1868 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1869 fs_info->avail_system_alloc_bits |= extra_flags;
1873 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1874 struct btrfs_fs_info *fs_info, u64 alloc_bytes,
1875 u64 flags)
1877 struct btrfs_space_info *space_info;
1878 u64 thresh;
1879 u64 start;
1880 u64 num_bytes;
1881 int ret;
1883 space_info = __find_space_info(fs_info, flags);
1884 if (!space_info) {
1885 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
1886 BUG_ON(ret);
1888 BUG_ON(!space_info);
1890 if (space_info->full)
1891 return 0;
1893 thresh = div_factor(space_info->total_bytes, 7);
1894 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1895 thresh)
1896 return 0;
1899 * Avoid allocating given chunk type
1901 if (fs_info->avoid_meta_chunk_alloc &&
1902 (flags & BTRFS_BLOCK_GROUP_METADATA))
1903 return 0;
1904 if (fs_info->avoid_sys_chunk_alloc &&
1905 (flags & BTRFS_BLOCK_GROUP_SYSTEM))
1906 return 0;
1908 ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes,
1909 space_info->flags);
1910 if (ret == -ENOSPC) {
1911 space_info->full = 1;
1912 return 0;
1915 BUG_ON(ret);
1917 ret = btrfs_make_block_group(trans, fs_info, 0, space_info->flags,
1918 start, num_bytes);
1919 BUG_ON(ret);
1920 return 0;
1923 static int update_block_group(struct btrfs_root *root,
1924 u64 bytenr, u64 num_bytes, int alloc,
1925 int mark_free)
1927 struct btrfs_block_group_cache *cache;
1928 struct btrfs_fs_info *info = root->fs_info;
1929 u64 total = num_bytes;
1930 u64 old_val;
1931 u64 byte_in_group;
1932 u64 start;
1933 u64 end;
1935 /* block accounting for super block */
1936 old_val = btrfs_super_bytes_used(info->super_copy);
1937 if (alloc)
1938 old_val += num_bytes;
1939 else
1940 old_val -= num_bytes;
1941 btrfs_set_super_bytes_used(info->super_copy, old_val);
1943 /* block accounting for root item */
1944 old_val = btrfs_root_used(&root->root_item);
1945 if (alloc)
1946 old_val += num_bytes;
1947 else
1948 old_val -= num_bytes;
1949 btrfs_set_root_used(&root->root_item, old_val);
1951 while(total) {
1952 cache = btrfs_lookup_block_group(info, bytenr);
1953 if (!cache) {
1954 return -1;
1956 byte_in_group = bytenr - cache->key.objectid;
1957 WARN_ON(byte_in_group > cache->key.offset);
1958 start = cache->key.objectid;
1959 end = start + cache->key.offset - 1;
1960 set_extent_bits(&info->block_group_cache, start, end,
1961 BLOCK_GROUP_DIRTY);
1963 old_val = btrfs_block_group_used(&cache->item);
1964 num_bytes = min(total, cache->key.offset - byte_in_group);
1966 if (alloc) {
1967 old_val += num_bytes;
1968 cache->space_info->bytes_used += num_bytes;
1969 } else {
1970 old_val -= num_bytes;
1971 cache->space_info->bytes_used -= num_bytes;
1972 if (mark_free) {
1973 set_extent_dirty(&info->free_space_cache,
1974 bytenr, bytenr + num_bytes - 1);
1977 btrfs_set_block_group_used(&cache->item, old_val);
1978 total -= num_bytes;
1979 bytenr += num_bytes;
1981 return 0;
1984 static int update_pinned_extents(struct btrfs_root *root,
1985 u64 bytenr, u64 num, int pin)
1987 u64 len;
1988 struct btrfs_block_group_cache *cache;
1989 struct btrfs_fs_info *fs_info = root->fs_info;
1991 if (pin) {
1992 set_extent_dirty(&fs_info->pinned_extents,
1993 bytenr, bytenr + num - 1);
1994 } else {
1995 clear_extent_dirty(&fs_info->pinned_extents,
1996 bytenr, bytenr + num - 1);
1998 while (num > 0) {
1999 cache = btrfs_lookup_block_group(fs_info, bytenr);
2000 if (!cache) {
2001 len = min((u64)fs_info->sectorsize, num);
2002 goto next;
2004 WARN_ON(!cache);
2005 len = min(num, cache->key.offset -
2006 (bytenr - cache->key.objectid));
2007 if (pin) {
2008 cache->pinned += len;
2009 cache->space_info->bytes_pinned += len;
2010 fs_info->total_pinned += len;
2011 } else {
2012 cache->pinned -= len;
2013 cache->space_info->bytes_pinned -= len;
2014 fs_info->total_pinned -= len;
2016 next:
2017 bytenr += len;
2018 num -= len;
2020 return 0;
2023 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2024 struct btrfs_root *root,
2025 struct extent_io_tree *unpin)
2027 u64 start;
2028 u64 end;
2029 int ret;
2030 struct extent_io_tree *free_space_cache;
2031 free_space_cache = &root->fs_info->free_space_cache;
2033 while(1) {
2034 ret = find_first_extent_bit(unpin, 0, &start, &end,
2035 EXTENT_DIRTY);
2036 if (ret)
2037 break;
2038 update_pinned_extents(root, start, end + 1 - start, 0);
2039 clear_extent_dirty(unpin, start, end);
2040 set_extent_dirty(free_space_cache, start, end);
2042 return 0;
2045 static int extent_root_pending_ops(struct btrfs_fs_info *info)
2047 u64 start;
2048 u64 end;
2049 int ret;
2051 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2052 &end, EXTENT_LOCKED);
2053 if (!ret) {
2054 ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
2055 EXTENT_LOCKED);
2057 return ret == 0;
2060 static int finish_current_insert(struct btrfs_trans_handle *trans,
2061 struct btrfs_root *extent_root)
2063 u64 start;
2064 u64 end;
2065 u64 priv;
2066 struct btrfs_fs_info *info = extent_root->fs_info;
2067 struct pending_extent_op *extent_op;
2068 struct btrfs_key key;
2069 int ret;
2070 int skinny_metadata =
2071 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2073 while(1) {
2074 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2075 &end, EXTENT_LOCKED);
2076 if (ret)
2077 break;
2079 ret = get_state_private(&info->extent_ins, start, &priv);
2080 BUG_ON(ret);
2081 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2083 if (extent_op->type == PENDING_EXTENT_INSERT) {
2084 key.objectid = start;
2085 if (skinny_metadata) {
2086 key.offset = extent_op->level;
2087 key.type = BTRFS_METADATA_ITEM_KEY;
2088 } else {
2089 key.offset = extent_op->num_bytes;
2090 key.type = BTRFS_EXTENT_ITEM_KEY;
2092 ret = alloc_reserved_tree_block(trans, extent_root,
2093 extent_root->root_key.objectid,
2094 trans->transid,
2095 extent_op->flags,
2096 &extent_op->key,
2097 extent_op->level, &key);
2098 BUG_ON(ret);
2099 } else {
2100 BUG_ON(1);
2103 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED);
2104 kfree(extent_op);
2106 return 0;
2109 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2110 struct btrfs_root *root,
2111 u64 bytenr, u64 num_bytes, int is_data)
2113 int err = 0;
2114 struct extent_buffer *buf;
2116 if (is_data)
2117 goto pinit;
2119 buf = btrfs_find_tree_block(root->fs_info, bytenr, num_bytes);
2120 if (!buf)
2121 goto pinit;
2123 /* we can reuse a block if it hasn't been written
2124 * and it is from this transaction. We can't
2125 * reuse anything from the tree log root because
2126 * it has tiny sub-transactions.
2128 if (btrfs_buffer_uptodate(buf, 0)) {
2129 u64 header_owner = btrfs_header_owner(buf);
2130 u64 header_transid = btrfs_header_generation(buf);
2131 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2132 header_transid == trans->transid &&
2133 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2134 clean_tree_block(NULL, root, buf);
2135 free_extent_buffer(buf);
2136 return 1;
2139 free_extent_buffer(buf);
2140 pinit:
2141 update_pinned_extents(root, bytenr, num_bytes, 1);
2143 BUG_ON(err < 0);
2144 return 0;
2147 void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2148 u64 bytenr, u64 num_bytes)
2150 update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 1);
2153 void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
2154 u64 bytenr, u64 num_bytes)
2156 update_pinned_extents(fs_info->extent_root, bytenr, num_bytes, 0);
2160 * remove an extent from the root, returns 0 on success
2162 static int __free_extent(struct btrfs_trans_handle *trans,
2163 struct btrfs_root *root,
2164 u64 bytenr, u64 num_bytes, u64 parent,
2165 u64 root_objectid, u64 owner_objectid,
2166 u64 owner_offset, int refs_to_drop)
2169 struct btrfs_key key;
2170 struct btrfs_path *path;
2171 struct btrfs_root *extent_root = root->fs_info->extent_root;
2172 struct extent_buffer *leaf;
2173 struct btrfs_extent_item *ei;
2174 struct btrfs_extent_inline_ref *iref;
2175 int ret;
2176 int is_data;
2177 int extent_slot = 0;
2178 int found_extent = 0;
2179 int num_to_del = 1;
2180 u32 item_size;
2181 u64 refs;
2182 int skinny_metadata =
2183 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2185 if (root->fs_info->free_extent_hook) {
2186 root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
2187 parent, root_objectid, owner_objectid,
2188 owner_offset, refs_to_drop);
2191 path = btrfs_alloc_path();
2192 if (!path)
2193 return -ENOMEM;
2195 path->reada = 1;
2197 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2198 if (is_data)
2199 skinny_metadata = 0;
2200 BUG_ON(!is_data && refs_to_drop != 1);
2202 ret = lookup_extent_backref(trans, extent_root, path, &iref,
2203 bytenr, num_bytes, parent,
2204 root_objectid, owner_objectid,
2205 owner_offset);
2206 if (ret == 0) {
2207 extent_slot = path->slots[0];
2208 while (extent_slot >= 0) {
2209 btrfs_item_key_to_cpu(path->nodes[0], &key,
2210 extent_slot);
2211 if (key.objectid != bytenr)
2212 break;
2213 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2214 key.offset == num_bytes) {
2215 found_extent = 1;
2216 break;
2218 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2219 key.offset == owner_objectid) {
2220 found_extent = 1;
2221 break;
2223 if (path->slots[0] - extent_slot > 5)
2224 break;
2225 extent_slot--;
2227 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2228 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
2229 if (found_extent && item_size < sizeof(*ei))
2230 found_extent = 0;
2231 #endif
2232 if (!found_extent) {
2233 BUG_ON(iref);
2234 ret = remove_extent_backref(trans, extent_root, path,
2235 NULL, refs_to_drop,
2236 is_data);
2237 BUG_ON(ret);
2238 btrfs_release_path(path);
2240 key.objectid = bytenr;
2242 if (skinny_metadata) {
2243 key.type = BTRFS_METADATA_ITEM_KEY;
2244 key.offset = owner_objectid;
2245 } else {
2246 key.type = BTRFS_EXTENT_ITEM_KEY;
2247 key.offset = num_bytes;
2250 ret = btrfs_search_slot(trans, extent_root,
2251 &key, path, -1, 1);
2252 if (ret > 0 && skinny_metadata && path->slots[0]) {
2253 path->slots[0]--;
2254 btrfs_item_key_to_cpu(path->nodes[0],
2255 &key,
2256 path->slots[0]);
2257 if (key.objectid == bytenr &&
2258 key.type == BTRFS_EXTENT_ITEM_KEY &&
2259 key.offset == num_bytes)
2260 ret = 0;
2263 if (ret > 0 && skinny_metadata) {
2264 skinny_metadata = 0;
2265 btrfs_release_path(path);
2266 key.type = BTRFS_EXTENT_ITEM_KEY;
2267 key.offset = num_bytes;
2268 ret = btrfs_search_slot(trans, extent_root,
2269 &key, path, -1, 1);
2272 if (ret) {
2273 printk(KERN_ERR "umm, got %d back from search"
2274 ", was looking for %llu\n", ret,
2275 (unsigned long long)bytenr);
2276 btrfs_print_leaf(extent_root, path->nodes[0]);
2278 BUG_ON(ret);
2279 extent_slot = path->slots[0];
2281 } else {
2282 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2283 "parent %llu root %llu owner %llu offset %llu\n",
2284 (unsigned long long)bytenr,
2285 (unsigned long long)parent,
2286 (unsigned long long)root_objectid,
2287 (unsigned long long)owner_objectid,
2288 (unsigned long long)owner_offset);
2289 ret = -EIO;
2290 goto fail;
2293 leaf = path->nodes[0];
2294 item_size = btrfs_item_size_nr(leaf, extent_slot);
2295 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2296 if (item_size < sizeof(*ei)) {
2297 BUG_ON(found_extent || extent_slot != path->slots[0]);
2298 ret = convert_extent_item_v0(trans, extent_root, path,
2299 owner_objectid, 0);
2300 BUG_ON(ret < 0);
2302 btrfs_release_path(path);
2304 key.objectid = bytenr;
2305 key.type = BTRFS_EXTENT_ITEM_KEY;
2306 key.offset = num_bytes;
2308 ret = btrfs_search_slot(trans, extent_root, &key, path,
2309 -1, 1);
2310 if (ret) {
2311 printk(KERN_ERR "umm, got %d back from search"
2312 ", was looking for %llu\n", ret,
2313 (unsigned long long)bytenr);
2314 btrfs_print_leaf(extent_root, path->nodes[0]);
2316 BUG_ON(ret);
2317 extent_slot = path->slots[0];
2318 leaf = path->nodes[0];
2319 item_size = btrfs_item_size_nr(leaf, extent_slot);
2321 #endif
2322 BUG_ON(item_size < sizeof(*ei));
2323 ei = btrfs_item_ptr(leaf, extent_slot,
2324 struct btrfs_extent_item);
2325 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
2326 key.type == BTRFS_EXTENT_ITEM_KEY) {
2327 struct btrfs_tree_block_info *bi;
2328 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2329 bi = (struct btrfs_tree_block_info *)(ei + 1);
2330 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2333 refs = btrfs_extent_refs(leaf, ei);
2334 BUG_ON(refs < refs_to_drop);
2335 refs -= refs_to_drop;
2337 if (refs > 0) {
2339 * In the case of inline back ref, reference count will
2340 * be updated by remove_extent_backref
2342 if (iref) {
2343 BUG_ON(!found_extent);
2344 } else {
2345 btrfs_set_extent_refs(leaf, ei, refs);
2346 btrfs_mark_buffer_dirty(leaf);
2348 if (found_extent) {
2349 ret = remove_extent_backref(trans, extent_root, path,
2350 iref, refs_to_drop,
2351 is_data);
2352 BUG_ON(ret);
2354 } else {
2355 int mark_free = 0;
2356 int pin = 1;
2358 if (found_extent) {
2359 BUG_ON(is_data && refs_to_drop !=
2360 extent_data_ref_count(path, iref));
2361 if (iref) {
2362 BUG_ON(path->slots[0] != extent_slot);
2363 } else {
2364 BUG_ON(path->slots[0] != extent_slot + 1);
2365 path->slots[0] = extent_slot;
2366 num_to_del = 2;
2370 if (pin) {
2371 ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2372 is_data);
2373 if (ret > 0)
2374 mark_free = 1;
2375 BUG_ON(ret < 0);
2378 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2379 num_to_del);
2380 BUG_ON(ret);
2381 btrfs_release_path(path);
2383 if (is_data) {
2384 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2385 BUG_ON(ret);
2388 update_block_group(root, bytenr, num_bytes, 0, mark_free);
2390 fail:
2391 btrfs_free_path(path);
2392 finish_current_insert(trans, extent_root);
2393 return ret;
2397 * find all the blocks marked as pending in the radix tree and remove
2398 * them from the extent map
2400 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2401 btrfs_root *extent_root)
2403 int ret;
2404 int err = 0;
2405 u64 start;
2406 u64 end;
2407 u64 priv;
2408 struct extent_io_tree *pending_del;
2409 struct extent_io_tree *extent_ins;
2410 struct pending_extent_op *extent_op;
2412 extent_ins = &extent_root->fs_info->extent_ins;
2413 pending_del = &extent_root->fs_info->pending_del;
2415 while(1) {
2416 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2417 EXTENT_LOCKED);
2418 if (ret)
2419 break;
2421 ret = get_state_private(pending_del, start, &priv);
2422 BUG_ON(ret);
2423 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2425 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED);
2427 if (!test_range_bit(extent_ins, start, end,
2428 EXTENT_LOCKED, 0)) {
2429 ret = __free_extent(trans, extent_root,
2430 start, end + 1 - start, 0,
2431 extent_root->root_key.objectid,
2432 extent_op->level, 0, 1);
2433 kfree(extent_op);
2434 } else {
2435 kfree(extent_op);
2436 ret = get_state_private(extent_ins, start, &priv);
2437 BUG_ON(ret);
2438 extent_op = (struct pending_extent_op *)
2439 (unsigned long)priv;
2441 clear_extent_bits(extent_ins, start, end,
2442 EXTENT_LOCKED);
2444 if (extent_op->type == PENDING_BACKREF_UPDATE)
2445 BUG_ON(1);
2447 kfree(extent_op);
2449 if (ret)
2450 err = ret;
2452 return err;
2456 int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
2457 struct btrfs_root *root,
2458 struct extent_buffer *buf,
2459 u64 parent, int last_ref)
2461 return btrfs_free_extent(trans, root, buf->start, buf->len, parent,
2462 root->root_key.objectid,
2463 btrfs_header_level(buf), 0);
2467 * remove an extent from the root, returns 0 on success
2470 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2471 struct btrfs_root *root,
2472 u64 bytenr, u64 num_bytes, u64 parent,
2473 u64 root_objectid, u64 owner, u64 offset)
2475 struct btrfs_root *extent_root = root->fs_info->extent_root;
2476 int pending_ret;
2477 int ret;
2479 WARN_ON(num_bytes < root->fs_info->sectorsize);
2480 if (root == extent_root) {
2481 struct pending_extent_op *extent_op;
2483 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2484 BUG_ON(!extent_op);
2486 extent_op->type = PENDING_EXTENT_DELETE;
2487 extent_op->bytenr = bytenr;
2488 extent_op->num_bytes = num_bytes;
2489 extent_op->level = (int)owner;
2491 set_extent_bits(&root->fs_info->pending_del,
2492 bytenr, bytenr + num_bytes - 1,
2493 EXTENT_LOCKED);
2494 set_state_private(&root->fs_info->pending_del,
2495 bytenr, (unsigned long)extent_op);
2496 return 0;
2498 ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2499 root_objectid, owner, offset, 1);
2500 pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2501 return ret ? ret : pending_ret;
2504 static u64 stripe_align(struct btrfs_root *root, u64 val)
2506 return round_up(val, (u64)root->fs_info->stripesize);
2510 * walks the btree of allocated extents and find a hole of a given size.
2511 * The key ins is changed to record the hole:
2512 * ins->objectid == block start
2513 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2514 * ins->offset == number of blocks
2515 * Any available blocks before search_start are skipped.
2517 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2518 struct btrfs_root *orig_root,
2519 u64 num_bytes, u64 empty_size,
2520 u64 search_start, u64 search_end,
2521 u64 hint_byte, struct btrfs_key *ins,
2522 u64 exclude_start, u64 exclude_nr,
2523 int data)
2525 int ret;
2526 u64 orig_search_start = search_start;
2527 struct btrfs_root * root = orig_root->fs_info->extent_root;
2528 struct btrfs_fs_info *info = root->fs_info;
2529 u64 total_needed = num_bytes;
2530 struct btrfs_block_group_cache *block_group;
2531 int full_scan = 0;
2532 int wrapped = 0;
2534 WARN_ON(num_bytes < info->sectorsize);
2535 ins->type = BTRFS_EXTENT_ITEM_KEY;
2537 search_start = stripe_align(root, search_start);
2539 if (hint_byte) {
2540 block_group = btrfs_lookup_first_block_group(info, hint_byte);
2541 if (!block_group)
2542 hint_byte = search_start;
2543 block_group = btrfs_find_block_group(root, block_group,
2544 hint_byte, data, 1);
2545 } else {
2546 block_group = btrfs_find_block_group(root,
2547 trans->block_group,
2548 search_start, data, 1);
2551 total_needed += empty_size;
2553 check_failed:
2554 search_start = stripe_align(root, search_start);
2555 if (!block_group) {
2556 block_group = btrfs_lookup_first_block_group(info,
2557 search_start);
2558 if (!block_group)
2559 block_group = btrfs_lookup_first_block_group(info,
2560 orig_search_start);
2562 ret = find_search_start(root, &block_group, &search_start,
2563 total_needed, data);
2564 if (ret)
2565 goto new_group;
2567 ins->objectid = search_start;
2568 ins->offset = num_bytes;
2570 if (ins->objectid + num_bytes >
2571 block_group->key.objectid + block_group->key.offset) {
2572 search_start = block_group->key.objectid +
2573 block_group->key.offset;
2574 goto new_group;
2577 if (test_range_bit(&info->extent_ins, ins->objectid,
2578 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2579 search_start = ins->objectid + num_bytes;
2580 goto new_group;
2583 if (test_range_bit(&info->pinned_extents, ins->objectid,
2584 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2585 search_start = ins->objectid + num_bytes;
2586 goto new_group;
2589 if (info->excluded_extents &&
2590 test_range_bit(info->excluded_extents, ins->objectid,
2591 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2592 search_start = ins->objectid + num_bytes;
2593 goto new_group;
2596 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2597 ins->objectid < exclude_start + exclude_nr)) {
2598 search_start = exclude_start + exclude_nr;
2599 goto new_group;
2602 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
2603 if (check_crossing_stripes(info, ins->objectid, num_bytes)) {
2604 struct btrfs_block_group_cache *bg_cache;
2605 u64 bg_offset;
2607 bg_cache = btrfs_lookup_block_group(info, ins->objectid);
2608 if (!bg_cache)
2609 goto no_bg_cache;
2610 bg_offset = ins->objectid - bg_cache->key.objectid;
2612 search_start = round_up(
2613 bg_offset + num_bytes, BTRFS_STRIPE_LEN) +
2614 bg_cache->key.objectid;
2615 goto new_group;
2617 no_bg_cache:
2618 block_group = btrfs_lookup_block_group(info, ins->objectid);
2619 if (block_group)
2620 trans->block_group = block_group;
2622 ins->offset = num_bytes;
2623 return 0;
2625 new_group:
2626 block_group = btrfs_lookup_first_block_group(info, search_start);
2627 if (!block_group) {
2628 search_start = orig_search_start;
2629 if (full_scan) {
2630 ret = -ENOSPC;
2631 goto error;
2633 if (wrapped) {
2634 if (!full_scan)
2635 total_needed -= empty_size;
2636 full_scan = 1;
2637 } else
2638 wrapped = 1;
2640 cond_resched();
2641 block_group = btrfs_find_block_group(root, block_group,
2642 search_start, data, 0);
2643 goto check_failed;
2645 error:
2646 return ret;
2649 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2650 struct btrfs_root *root,
2651 u64 num_bytes, u64 empty_size,
2652 u64 hint_byte, u64 search_end,
2653 struct btrfs_key *ins, bool is_data)
2655 int ret;
2656 u64 search_start = 0;
2657 u64 alloc_profile;
2658 u64 profile;
2659 struct btrfs_fs_info *info = root->fs_info;
2661 if (is_data) {
2662 alloc_profile = info->avail_data_alloc_bits &
2663 info->data_alloc_profile;
2664 profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2665 } else if (info->system_allocs == 1 || root == info->chunk_root) {
2666 alloc_profile = info->avail_system_alloc_bits &
2667 info->system_alloc_profile;
2668 profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2669 } else {
2670 alloc_profile = info->avail_metadata_alloc_bits &
2671 info->metadata_alloc_profile;
2672 profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2675 if (root->ref_cows) {
2676 if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) {
2677 ret = do_chunk_alloc(trans, info,
2678 num_bytes,
2679 BTRFS_BLOCK_GROUP_METADATA);
2680 BUG_ON(ret);
2682 ret = do_chunk_alloc(trans, info,
2683 num_bytes + SZ_2M, profile);
2684 BUG_ON(ret);
2687 WARN_ON(num_bytes < info->sectorsize);
2688 ret = find_free_extent(trans, root, num_bytes, empty_size,
2689 search_start, search_end, hint_byte, ins,
2690 trans->alloc_exclude_start,
2691 trans->alloc_exclude_nr, profile);
2692 if (ret < 0)
2693 return ret;
2694 clear_extent_dirty(&info->free_space_cache,
2695 ins->objectid, ins->objectid + ins->offset - 1);
2696 return ret;
2699 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
2700 struct btrfs_root *root,
2701 u64 root_objectid, u64 generation,
2702 u64 flags, struct btrfs_disk_key *key,
2703 int level, struct btrfs_key *ins)
2705 int ret;
2706 struct btrfs_fs_info *fs_info = root->fs_info;
2707 struct btrfs_extent_item *extent_item;
2708 struct btrfs_tree_block_info *block_info;
2709 struct btrfs_extent_inline_ref *iref;
2710 struct btrfs_path *path;
2711 struct extent_buffer *leaf;
2712 u32 size = sizeof(*extent_item) + sizeof(*iref);
2713 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2715 if (!skinny_metadata)
2716 size += sizeof(*block_info);
2718 path = btrfs_alloc_path();
2719 if (!path)
2720 return -ENOMEM;
2722 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
2723 ins, size);
2724 BUG_ON(ret);
2726 leaf = path->nodes[0];
2727 extent_item = btrfs_item_ptr(leaf, path->slots[0],
2728 struct btrfs_extent_item);
2729 btrfs_set_extent_refs(leaf, extent_item, 1);
2730 btrfs_set_extent_generation(leaf, extent_item, generation);
2731 btrfs_set_extent_flags(leaf, extent_item,
2732 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
2734 if (skinny_metadata) {
2735 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
2736 } else {
2737 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
2738 btrfs_set_tree_block_key(leaf, block_info, key);
2739 btrfs_set_tree_block_level(leaf, block_info, level);
2740 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
2743 btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
2744 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
2746 btrfs_mark_buffer_dirty(leaf);
2747 btrfs_free_path(path);
2749 ret = update_block_group(root, ins->objectid, fs_info->nodesize,
2750 1, 0);
2751 return ret;
2754 static int alloc_tree_block(struct btrfs_trans_handle *trans,
2755 struct btrfs_root *root, u64 num_bytes,
2756 u64 root_objectid, u64 generation,
2757 u64 flags, struct btrfs_disk_key *key,
2758 int level, u64 empty_size, u64 hint_byte,
2759 u64 search_end, struct btrfs_key *ins)
2761 int ret;
2762 ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
2763 hint_byte, search_end, ins, 0);
2764 BUG_ON(ret);
2766 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
2767 struct pending_extent_op *extent_op;
2769 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2770 BUG_ON(!extent_op);
2772 extent_op->type = PENDING_EXTENT_INSERT;
2773 extent_op->bytenr = ins->objectid;
2774 extent_op->num_bytes = ins->offset;
2775 extent_op->level = level;
2776 extent_op->flags = flags;
2777 memcpy(&extent_op->key, key, sizeof(*key));
2779 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2780 ins->objectid + ins->offset - 1,
2781 EXTENT_LOCKED);
2782 set_state_private(&root->fs_info->extent_ins,
2783 ins->objectid, (unsigned long)extent_op);
2784 } else {
2785 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
2786 ins->offset = level;
2787 ins->type = BTRFS_METADATA_ITEM_KEY;
2789 ret = alloc_reserved_tree_block(trans, root, root_objectid,
2790 generation, flags,
2791 key, level, ins);
2792 finish_current_insert(trans, root->fs_info->extent_root);
2793 del_pending_extents(trans, root->fs_info->extent_root);
2795 return ret;
2799 * helper function to allocate a block for a given tree
2800 * returns the tree buffer or NULL.
2802 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2803 struct btrfs_root *root,
2804 u32 blocksize, u64 root_objectid,
2805 struct btrfs_disk_key *key, int level,
2806 u64 hint, u64 empty_size)
2808 struct btrfs_key ins;
2809 int ret;
2810 struct extent_buffer *buf;
2812 ret = alloc_tree_block(trans, root, blocksize, root_objectid,
2813 trans->transid, 0, key, level,
2814 empty_size, hint, (u64)-1, &ins);
2815 if (ret) {
2816 BUG_ON(ret > 0);
2817 return ERR_PTR(ret);
2820 buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid);
2821 if (!buf) {
2822 btrfs_free_extent(trans, root, ins.objectid, ins.offset,
2823 0, root->root_key.objectid, level, 0);
2824 BUG_ON(1);
2825 return ERR_PTR(-ENOMEM);
2827 btrfs_set_buffer_uptodate(buf);
2828 trans->blocks_used++;
2830 return buf;
2833 #if 0
2835 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2836 struct btrfs_root *root,
2837 struct extent_buffer *leaf)
2839 u64 leaf_owner;
2840 u64 leaf_generation;
2841 struct btrfs_key key;
2842 struct btrfs_file_extent_item *fi;
2843 int i;
2844 int nritems;
2845 int ret;
2847 BUG_ON(!btrfs_is_leaf(leaf));
2848 nritems = btrfs_header_nritems(leaf);
2849 leaf_owner = btrfs_header_owner(leaf);
2850 leaf_generation = btrfs_header_generation(leaf);
2852 for (i = 0; i < nritems; i++) {
2853 u64 disk_bytenr;
2855 btrfs_item_key_to_cpu(leaf, &key, i);
2856 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2857 continue;
2858 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2859 if (btrfs_file_extent_type(leaf, fi) ==
2860 BTRFS_FILE_EXTENT_INLINE)
2861 continue;
2863 * FIXME make sure to insert a trans record that
2864 * repeats the snapshot del on crash
2866 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2867 if (disk_bytenr == 0)
2868 continue;
2869 ret = btrfs_free_extent(trans, root, disk_bytenr,
2870 btrfs_file_extent_disk_num_bytes(leaf, fi),
2871 leaf->start, leaf_owner, leaf_generation,
2872 key.objectid, 0);
2873 BUG_ON(ret);
2875 return 0;
2878 static void noinline reada_walk_down(struct btrfs_root *root,
2879 struct extent_buffer *node,
2880 int slot)
2882 u64 bytenr;
2883 u64 last = 0;
2884 u32 nritems;
2885 u32 refs;
2886 u32 blocksize;
2887 int ret;
2888 int i;
2889 int level;
2890 int skipped = 0;
2892 nritems = btrfs_header_nritems(node);
2893 level = btrfs_header_level(node);
2894 if (level)
2895 return;
2897 for (i = slot; i < nritems && skipped < 32; i++) {
2898 bytenr = btrfs_node_blockptr(node, i);
2899 if (last && ((bytenr > last && bytenr - last > SZ_32K) ||
2900 (last > bytenr && last - bytenr > SZ_32K))) {
2901 skipped++;
2902 continue;
2904 blocksize = btrfs_level_size(root, level - 1);
2905 if (i != slot) {
2906 ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2907 blocksize, &refs);
2908 BUG_ON(ret);
2909 if (refs != 1) {
2910 skipped++;
2911 continue;
2914 mutex_unlock(&root->fs_info->fs_mutex);
2915 ret = readahead_tree_block(root, bytenr, blocksize,
2916 btrfs_node_ptr_generation(node, i));
2917 last = bytenr + blocksize;
2918 cond_resched();
2919 mutex_lock(&root->fs_info->fs_mutex);
2920 if (ret)
2921 break;
2926 * helper function for drop_snapshot, this walks down the tree dropping ref
2927 * counts as it goes.
2929 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2930 struct btrfs_root *root,
2931 struct btrfs_path *path, int *level)
2933 u64 root_owner;
2934 u64 root_gen;
2935 u64 bytenr;
2936 u64 ptr_gen;
2937 struct extent_buffer *next;
2938 struct extent_buffer *cur;
2939 struct extent_buffer *parent;
2940 u32 blocksize;
2941 int ret;
2942 u32 refs;
2944 WARN_ON(*level < 0);
2945 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2946 ret = btrfs_lookup_extent_ref(trans, root,
2947 path->nodes[*level]->start,
2948 path->nodes[*level]->len, &refs);
2949 BUG_ON(ret);
2950 if (refs > 1)
2951 goto out;
2954 * walk down to the last node level and free all the leaves
2956 while(*level >= 0) {
2957 WARN_ON(*level < 0);
2958 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2959 cur = path->nodes[*level];
2961 if (btrfs_header_level(cur) != *level)
2962 WARN_ON(1);
2964 if (path->slots[*level] >=
2965 btrfs_header_nritems(cur))
2966 break;
2967 if (*level == 0) {
2968 ret = drop_leaf_ref(trans, root, cur);
2969 BUG_ON(ret);
2970 break;
2972 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2973 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2974 blocksize = btrfs_level_size(root, *level - 1);
2975 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2976 &refs);
2977 BUG_ON(ret);
2978 if (refs != 1) {
2979 parent = path->nodes[*level];
2980 root_owner = btrfs_header_owner(parent);
2981 root_gen = btrfs_header_generation(parent);
2982 path->slots[*level]++;
2983 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
2984 parent->start, root_owner,
2985 root_gen, *level - 1, 1);
2986 BUG_ON(ret);
2987 continue;
2989 next = btrfs_find_tree_block(root, bytenr, blocksize);
2990 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2991 free_extent_buffer(next);
2992 reada_walk_down(root, cur, path->slots[*level]);
2993 mutex_unlock(&root->fs_info->fs_mutex);
2994 next = read_tree_block(root, bytenr, blocksize,
2995 ptr_gen);
2996 mutex_lock(&root->fs_info->fs_mutex);
2997 if (!extent_buffer_uptodate(next)) {
2998 if (IS_ERR(next))
2999 ret = PTR_ERR(next);
3000 else
3001 ret = -EIO;
3002 break;
3005 WARN_ON(*level <= 0);
3006 if (path->nodes[*level-1])
3007 free_extent_buffer(path->nodes[*level-1]);
3008 path->nodes[*level-1] = next;
3009 *level = btrfs_header_level(next);
3010 path->slots[*level] = 0;
3012 out:
3013 WARN_ON(*level < 0);
3014 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3016 if (path->nodes[*level] == root->node) {
3017 root_owner = root->root_key.objectid;
3018 parent = path->nodes[*level];
3019 } else {
3020 parent = path->nodes[*level + 1];
3021 root_owner = btrfs_header_owner(parent);
3024 root_gen = btrfs_header_generation(parent);
3025 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
3026 path->nodes[*level]->len, parent->start,
3027 root_owner, root_gen, *level, 1);
3028 free_extent_buffer(path->nodes[*level]);
3029 path->nodes[*level] = NULL;
3030 *level += 1;
3031 BUG_ON(ret);
3032 return 0;
3036 * helper for dropping snapshots. This walks back up the tree in the path
3037 * to find the first node higher up where we haven't yet gone through
3038 * all the slots
3040 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3041 struct btrfs_root *root,
3042 struct btrfs_path *path, int *level)
3044 u64 root_owner;
3045 u64 root_gen;
3046 struct btrfs_root_item *root_item = &root->root_item;
3047 int i;
3048 int slot;
3049 int ret;
3051 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3052 slot = path->slots[i];
3053 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3054 struct extent_buffer *node;
3055 struct btrfs_disk_key disk_key;
3056 node = path->nodes[i];
3057 path->slots[i]++;
3058 *level = i;
3059 WARN_ON(*level == 0);
3060 btrfs_node_key(node, &disk_key, path->slots[i]);
3061 memcpy(&root_item->drop_progress,
3062 &disk_key, sizeof(disk_key));
3063 root_item->drop_level = i;
3064 return 0;
3065 } else {
3066 struct extent_buffer *parent;
3067 if (path->nodes[*level] == root->node)
3068 parent = path->nodes[*level];
3069 else
3070 parent = path->nodes[*level + 1];
3072 root_owner = btrfs_header_owner(parent);
3073 root_gen = btrfs_header_generation(parent);
3074 ret = btrfs_free_extent(trans, root,
3075 path->nodes[*level]->start,
3076 path->nodes[*level]->len,
3077 parent->start, root_owner,
3078 root_gen, *level, 1);
3079 BUG_ON(ret);
3080 free_extent_buffer(path->nodes[*level]);
3081 path->nodes[*level] = NULL;
3082 *level = i + 1;
3085 return 1;
3088 #endif
3090 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3092 struct btrfs_space_info *sinfo;
3093 struct btrfs_block_group_cache *cache;
3094 u64 start;
3095 u64 end;
3096 u64 ptr;
3097 int ret;
3099 while(1) {
3100 ret = find_first_extent_bit(&info->block_group_cache, 0,
3101 &start, &end, (unsigned int)-1);
3102 if (ret)
3103 break;
3104 ret = get_state_private(&info->block_group_cache, start, &ptr);
3105 if (!ret) {
3106 cache = u64_to_ptr(ptr);
3107 if (cache->free_space_ctl) {
3108 btrfs_remove_free_space_cache(cache);
3109 kfree(cache->free_space_ctl);
3111 kfree(cache);
3113 clear_extent_bits(&info->block_group_cache, start,
3114 end, (unsigned int)-1);
3116 while(1) {
3117 ret = find_first_extent_bit(&info->free_space_cache, 0,
3118 &start, &end, EXTENT_DIRTY);
3119 if (ret)
3120 break;
3121 clear_extent_dirty(&info->free_space_cache, start, end);
3124 while (!list_empty(&info->space_info)) {
3125 sinfo = list_entry(info->space_info.next,
3126 struct btrfs_space_info, list);
3127 list_del_init(&sinfo->list);
3128 kfree(sinfo);
3130 return 0;
3133 static int find_first_block_group(struct btrfs_root *root,
3134 struct btrfs_path *path, struct btrfs_key *key)
3136 int ret;
3137 struct btrfs_key found_key;
3138 struct extent_buffer *leaf;
3139 int slot;
3141 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3142 if (ret < 0)
3143 return ret;
3144 while(1) {
3145 slot = path->slots[0];
3146 leaf = path->nodes[0];
3147 if (slot >= btrfs_header_nritems(leaf)) {
3148 ret = btrfs_next_leaf(root, path);
3149 if (ret == 0)
3150 continue;
3151 if (ret < 0)
3152 goto error;
3153 break;
3155 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3157 if (found_key.objectid >= key->objectid &&
3158 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
3159 return 0;
3160 path->slots[0]++;
3162 ret = -ENOENT;
3163 error:
3164 return ret;
3167 static void account_super_bytes(struct btrfs_fs_info *fs_info,
3168 struct btrfs_block_group_cache *cache)
3170 u64 bytenr;
3171 u64 *logical;
3172 int stripe_len;
3173 int i, nr, ret;
3175 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
3176 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
3177 cache->bytes_super += stripe_len;
3180 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3181 bytenr = btrfs_sb_offset(i);
3182 ret = btrfs_rmap_block(fs_info,
3183 cache->key.objectid, bytenr,
3184 0, &logical, &nr, &stripe_len);
3185 if (ret)
3186 return;
3188 while (nr--) {
3189 u64 start, len;
3191 if (logical[nr] > cache->key.objectid +
3192 cache->key.offset)
3193 continue;
3195 if (logical[nr] + stripe_len <= cache->key.objectid)
3196 continue;
3198 start = logical[nr];
3199 if (start < cache->key.objectid) {
3200 start = cache->key.objectid;
3201 len = (logical[nr] + stripe_len) - start;
3202 } else {
3203 len = min_t(u64, stripe_len,
3204 cache->key.objectid +
3205 cache->key.offset - start);
3208 cache->bytes_super += len;
3211 kfree(logical);
3215 int btrfs_read_block_groups(struct btrfs_root *root)
3217 struct btrfs_path *path;
3218 int ret;
3219 int bit;
3220 struct btrfs_block_group_cache *cache;
3221 struct btrfs_fs_info *info = root->fs_info;
3222 struct btrfs_space_info *space_info;
3223 struct extent_io_tree *block_group_cache;
3224 struct btrfs_key key;
3225 struct btrfs_key found_key;
3226 struct extent_buffer *leaf;
3228 block_group_cache = &info->block_group_cache;
3230 root = info->extent_root;
3231 key.objectid = 0;
3232 key.offset = 0;
3233 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3234 path = btrfs_alloc_path();
3235 if (!path)
3236 return -ENOMEM;
3238 while(1) {
3239 ret = find_first_block_group(root, path, &key);
3240 if (ret > 0) {
3241 ret = 0;
3242 goto error;
3244 if (ret != 0) {
3245 goto error;
3247 leaf = path->nodes[0];
3248 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3250 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3251 if (!cache) {
3252 ret = -ENOMEM;
3253 goto error;
3256 read_extent_buffer(leaf, &cache->item,
3257 btrfs_item_ptr_offset(leaf, path->slots[0]),
3258 sizeof(cache->item));
3259 memcpy(&cache->key, &found_key, sizeof(found_key));
3260 cache->cached = 0;
3261 cache->pinned = 0;
3262 key.objectid = found_key.objectid + found_key.offset;
3263 if (found_key.offset == 0)
3264 key.objectid++;
3265 btrfs_release_path(path);
3268 * Skip 0 sized block group, don't insert them into block
3269 * group cache tree, as its length is 0, it won't get
3270 * freed at close_ctree() time.
3272 if (found_key.offset == 0) {
3273 free(cache);
3274 continue;
3277 cache->flags = btrfs_block_group_flags(&cache->item);
3278 bit = 0;
3279 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3280 bit = BLOCK_GROUP_DATA;
3281 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3282 bit = BLOCK_GROUP_SYSTEM;
3283 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3284 bit = BLOCK_GROUP_METADATA;
3286 set_avail_alloc_bits(info, cache->flags);
3287 if (btrfs_chunk_readonly(info, cache->key.objectid))
3288 cache->ro = 1;
3290 account_super_bytes(info, cache);
3292 ret = update_space_info(info, cache->flags, found_key.offset,
3293 btrfs_block_group_used(&cache->item),
3294 &space_info);
3295 BUG_ON(ret);
3296 cache->space_info = space_info;
3298 /* use EXTENT_LOCKED to prevent merging */
3299 set_extent_bits(block_group_cache, found_key.objectid,
3300 found_key.objectid + found_key.offset - 1,
3301 bit | EXTENT_LOCKED);
3302 set_state_private(block_group_cache, found_key.objectid,
3303 (unsigned long)cache);
3305 ret = 0;
3306 error:
3307 btrfs_free_path(path);
3308 return ret;
3311 struct btrfs_block_group_cache *
3312 btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
3313 u64 chunk_offset, u64 size)
3315 int ret;
3316 int bit = 0;
3317 struct btrfs_block_group_cache *cache;
3318 struct extent_io_tree *block_group_cache;
3320 block_group_cache = &fs_info->block_group_cache;
3322 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3323 BUG_ON(!cache);
3324 cache->key.objectid = chunk_offset;
3325 cache->key.offset = size;
3327 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3328 btrfs_set_block_group_used(&cache->item, bytes_used);
3329 btrfs_set_block_group_chunk_objectid(&cache->item,
3330 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
3331 cache->flags = type;
3332 btrfs_set_block_group_flags(&cache->item, type);
3334 account_super_bytes(fs_info, cache);
3335 ret = update_space_info(fs_info, cache->flags, size, bytes_used,
3336 &cache->space_info);
3337 BUG_ON(ret);
3339 bit = block_group_state_bits(type);
3340 ret = set_extent_bits(block_group_cache, chunk_offset,
3341 chunk_offset + size - 1,
3342 bit | EXTENT_LOCKED);
3343 BUG_ON(ret);
3345 ret = set_state_private(block_group_cache, chunk_offset,
3346 (unsigned long)cache);
3347 BUG_ON(ret);
3348 set_avail_alloc_bits(fs_info, type);
3350 return cache;
3353 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3354 struct btrfs_fs_info *fs_info, u64 bytes_used,
3355 u64 type, u64 chunk_offset, u64 size)
3357 int ret;
3358 struct btrfs_root *extent_root = fs_info->extent_root;
3359 struct btrfs_block_group_cache *cache;
3361 cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset,
3362 size);
3363 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3364 sizeof(cache->item));
3365 BUG_ON(ret);
3367 ret = finish_current_insert(trans, extent_root);
3368 BUG_ON(ret);
3369 ret = del_pending_extents(trans, extent_root);
3370 BUG_ON(ret);
3372 return 0;
3376 * This is for converter use only.
3378 * In that case, we don't know where are free blocks located.
3379 * Therefore all block group cache entries must be setup properly
3380 * before doing any block allocation.
3382 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
3383 struct btrfs_fs_info *fs_info)
3385 u64 total_bytes;
3386 u64 cur_start;
3387 u64 group_type;
3388 u64 group_size;
3389 u64 group_align;
3390 u64 total_data = 0;
3391 u64 total_metadata = 0;
3392 u64 chunk_objectid;
3393 int ret;
3394 int bit;
3395 struct btrfs_root *extent_root = fs_info->extent_root;
3396 struct btrfs_block_group_cache *cache;
3397 struct extent_io_tree *block_group_cache;
3399 block_group_cache = &fs_info->block_group_cache;
3400 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3401 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
3402 group_align = 64 * fs_info->sectorsize;
3404 cur_start = 0;
3405 while (cur_start < total_bytes) {
3406 group_size = total_bytes / 12;
3407 group_size = min_t(u64, group_size, total_bytes - cur_start);
3408 if (cur_start == 0) {
3409 bit = BLOCK_GROUP_SYSTEM;
3410 group_type = BTRFS_BLOCK_GROUP_SYSTEM;
3411 group_size /= 4;
3412 group_size &= ~(group_align - 1);
3413 group_size = max_t(u64, group_size, SZ_8M);
3414 group_size = min_t(u64, group_size, SZ_32M);
3415 } else {
3416 group_size &= ~(group_align - 1);
3417 if (total_data >= total_metadata * 2) {
3418 group_type = BTRFS_BLOCK_GROUP_METADATA;
3419 group_size = min_t(u64, group_size, SZ_1G);
3420 total_metadata += group_size;
3421 } else {
3422 group_type = BTRFS_BLOCK_GROUP_DATA;
3423 group_size = min_t(u64, group_size,
3424 5ULL * SZ_1G);
3425 total_data += group_size;
3427 if ((total_bytes - cur_start) * 4 < group_size * 5)
3428 group_size = total_bytes - cur_start;
3431 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3432 BUG_ON(!cache);
3434 cache->key.objectid = cur_start;
3435 cache->key.offset = group_size;
3436 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3438 btrfs_set_block_group_used(&cache->item, 0);
3439 btrfs_set_block_group_chunk_objectid(&cache->item,
3440 chunk_objectid);
3441 btrfs_set_block_group_flags(&cache->item, group_type);
3443 cache->flags = group_type;
3445 ret = update_space_info(fs_info, group_type, group_size,
3446 0, &cache->space_info);
3447 BUG_ON(ret);
3448 set_avail_alloc_bits(fs_info, group_type);
3450 set_extent_bits(block_group_cache, cur_start,
3451 cur_start + group_size - 1,
3452 bit | EXTENT_LOCKED);
3453 set_state_private(block_group_cache, cur_start,
3454 (unsigned long)cache);
3455 cur_start += group_size;
3457 /* then insert all the items */
3458 cur_start = 0;
3459 while(cur_start < total_bytes) {
3460 cache = btrfs_lookup_block_group(fs_info, cur_start);
3461 BUG_ON(!cache);
3463 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3464 sizeof(cache->item));
3465 BUG_ON(ret);
3467 finish_current_insert(trans, extent_root);
3468 ret = del_pending_extents(trans, extent_root);
3469 BUG_ON(ret);
3471 cur_start = cache->key.objectid + cache->key.offset;
3473 return 0;
3476 int btrfs_update_block_group(struct btrfs_root *root,
3477 u64 bytenr, u64 num_bytes, int alloc,
3478 int mark_free)
3480 return update_block_group(root, bytenr, num_bytes,
3481 alloc, mark_free);
3485 * Just remove a block group item in extent tree
3486 * Caller should ensure the block group is empty and all space is pinned.
3487 * Or new tree block/data may be allocated into it.
3489 static int free_block_group_item(struct btrfs_trans_handle *trans,
3490 struct btrfs_fs_info *fs_info,
3491 u64 bytenr, u64 len)
3493 struct btrfs_path *path;
3494 struct btrfs_key key;
3495 struct btrfs_root *root = fs_info->extent_root;
3496 int ret = 0;
3498 key.objectid = bytenr;
3499 key.offset = len;
3500 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3502 path = btrfs_alloc_path();
3503 if (!path)
3504 return -ENOMEM;
3506 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3507 if (ret > 0) {
3508 ret = -ENOENT;
3509 goto out;
3511 if (ret < 0)
3512 goto out;
3514 ret = btrfs_del_item(trans, root, path);
3515 out:
3516 btrfs_free_path(path);
3517 return ret;
3520 static int free_dev_extent_item(struct btrfs_trans_handle *trans,
3521 struct btrfs_fs_info *fs_info,
3522 u64 devid, u64 dev_offset)
3524 struct btrfs_root *root = fs_info->dev_root;
3525 struct btrfs_path *path;
3526 struct btrfs_key key;
3527 int ret;
3529 path = btrfs_alloc_path();
3530 if (!path)
3531 return -ENOMEM;
3533 key.objectid = devid;
3534 key.type = BTRFS_DEV_EXTENT_KEY;
3535 key.offset = dev_offset;
3537 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3538 if (ret < 0)
3539 goto out;
3540 if (ret > 0) {
3541 ret = -ENOENT;
3542 goto out;
3545 ret = btrfs_del_item(trans, root, path);
3546 out:
3547 btrfs_free_path(path);
3548 return ret;
3551 static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
3552 struct btrfs_fs_info *fs_info,
3553 u64 chunk_offset)
3555 struct btrfs_chunk *chunk = NULL;
3556 struct btrfs_root *root= fs_info->chunk_root;
3557 struct btrfs_path *path;
3558 struct btrfs_key key;
3559 u16 num_stripes;
3560 int i;
3561 int ret;
3563 path = btrfs_alloc_path();
3564 if (!path)
3565 return -ENOMEM;
3567 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3568 key.type = BTRFS_CHUNK_ITEM_KEY;
3569 key.offset = chunk_offset;
3571 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
3572 if (ret < 0)
3573 goto out;
3574 if (ret > 0) {
3575 ret = -ENOENT;
3576 goto out;
3578 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3579 struct btrfs_chunk);
3580 num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
3581 for (i = 0; i < num_stripes; i++) {
3582 ret = free_dev_extent_item(trans, fs_info,
3583 btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
3584 btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
3585 if (ret < 0)
3586 goto out;
3588 out:
3589 btrfs_free_path(path);
3590 return ret;
3593 static int free_system_chunk_item(struct btrfs_super_block *super,
3594 struct btrfs_key *key)
3596 struct btrfs_disk_key *disk_key;
3597 struct btrfs_key cpu_key;
3598 u32 array_size = btrfs_super_sys_array_size(super);
3599 char *ptr = (char *)super->sys_chunk_array;
3600 int cur = 0;
3601 int ret = -ENOENT;
3603 while (cur < btrfs_super_sys_array_size(super)) {
3604 struct btrfs_chunk *chunk;
3605 u32 num_stripes;
3606 u32 chunk_len;
3608 disk_key = (struct btrfs_disk_key *)(ptr + cur);
3609 btrfs_disk_key_to_cpu(&cpu_key, disk_key);
3610 if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
3611 /* just in case */
3612 ret = -EIO;
3613 goto out;
3616 chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
3617 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
3618 chunk_len = btrfs_chunk_item_size(num_stripes) +
3619 sizeof(*disk_key);
3621 if (key->objectid == cpu_key.objectid &&
3622 key->offset == cpu_key.offset &&
3623 key->type == cpu_key.type) {
3624 memmove(ptr + cur, ptr + cur + chunk_len,
3625 array_size - cur - chunk_len);
3626 array_size -= chunk_len;
3627 btrfs_set_super_sys_array_size(super, array_size);
3628 ret = 0;
3629 goto out;
3632 cur += chunk_len;
3634 out:
3635 return ret;
3638 static int free_chunk_item(struct btrfs_trans_handle *trans,
3639 struct btrfs_fs_info *fs_info,
3640 u64 bytenr)
3642 struct btrfs_path *path;
3643 struct btrfs_key key;
3644 struct btrfs_root *root = fs_info->chunk_root;
3645 struct btrfs_chunk *chunk;
3646 u64 chunk_type;
3647 int ret;
3649 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3650 key.offset = bytenr;
3651 key.type = BTRFS_CHUNK_ITEM_KEY;
3653 path = btrfs_alloc_path();
3654 if (!path)
3655 return -ENOMEM;
3657 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3658 if (ret > 0) {
3659 ret = -ENOENT;
3660 goto out;
3662 if (ret < 0)
3663 goto out;
3664 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3665 struct btrfs_chunk);
3666 chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
3668 ret = btrfs_del_item(trans, root, path);
3669 if (ret < 0)
3670 goto out;
3672 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3673 ret = free_system_chunk_item(fs_info->super_copy, &key);
3674 out:
3675 btrfs_free_path(path);
3676 return ret;
3679 static u64 get_dev_extent_len(struct map_lookup *map)
3681 int div;
3683 switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
3684 case 0: /* Single */
3685 case BTRFS_BLOCK_GROUP_DUP:
3686 case BTRFS_BLOCK_GROUP_RAID1:
3687 div = 1;
3688 break;
3689 case BTRFS_BLOCK_GROUP_RAID5:
3690 div = (map->num_stripes - 1);
3691 break;
3692 case BTRFS_BLOCK_GROUP_RAID6:
3693 div = (map->num_stripes - 2);
3694 break;
3695 case BTRFS_BLOCK_GROUP_RAID10:
3696 div = (map->num_stripes / map->sub_stripes);
3697 break;
3698 default:
3699 /* normally, read chunk security hook should handled it */
3700 BUG_ON(1);
3702 return map->ce.size / div;
3705 /* free block group/chunk related caches */
3706 static int free_block_group_cache(struct btrfs_trans_handle *trans,
3707 struct btrfs_fs_info *fs_info,
3708 u64 bytenr, u64 len)
3710 struct btrfs_block_group_cache *cache;
3711 struct cache_extent *ce;
3712 struct map_lookup *map;
3713 int ret;
3714 int i;
3715 u64 flags;
3717 /* Free block group cache first */
3718 cache = btrfs_lookup_block_group(fs_info, bytenr);
3719 if (!cache)
3720 return -ENOENT;
3721 flags = cache->flags;
3722 if (cache->free_space_ctl) {
3723 btrfs_remove_free_space_cache(cache);
3724 kfree(cache->free_space_ctl);
3726 clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len - 1,
3727 (unsigned int)-1);
3728 ret = free_space_info(fs_info, flags, len, 0, NULL);
3729 if (ret < 0)
3730 goto out;
3731 kfree(cache);
3733 /* Then free mapping info and dev usage info */
3734 ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
3735 if (!ce || ce->start != bytenr) {
3736 ret = -ENOENT;
3737 goto out;
3739 map = container_of(ce, struct map_lookup, ce);
3740 for (i = 0; i < map->num_stripes; i++) {
3741 struct btrfs_device *device;
3743 device = map->stripes[i].dev;
3744 device->bytes_used -= get_dev_extent_len(map);
3745 ret = btrfs_update_device(trans, device);
3746 if (ret < 0)
3747 goto out;
3749 remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
3750 free(map);
3751 out:
3752 return ret;
3755 int btrfs_free_block_group(struct btrfs_trans_handle *trans,
3756 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
3758 struct btrfs_root *extent_root = fs_info->extent_root;
3759 struct btrfs_path *path;
3760 struct btrfs_block_group_item *bgi;
3761 struct btrfs_key key;
3762 int ret = 0;
3764 path = btrfs_alloc_path();
3765 if (!path)
3766 return -ENOMEM;
3768 key.objectid = bytenr;
3769 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3770 key.offset = len;
3772 /* Double check the block group to ensure it's empty */
3773 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3774 if (ret > 0) {
3775 ret = -ENONET;
3776 goto out;
3778 if (ret < 0)
3779 goto out;
3781 bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3782 struct btrfs_block_group_item);
3783 if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
3784 fprintf(stderr,
3785 "WARNING: block group [%llu,%llu) is not empty\n",
3786 bytenr, bytenr + len);
3787 ret = -EINVAL;
3788 goto out;
3790 btrfs_release_path(path);
3793 * Now pin all space in the block group, to prevent further transaction
3794 * allocate space from it.
3795 * Every operation needs a transaction must be in the range.
3797 btrfs_pin_extent(fs_info, bytenr, len);
3799 /* delete block group item and chunk item */
3800 ret = free_block_group_item(trans, fs_info, bytenr, len);
3801 if (ret < 0) {
3802 fprintf(stderr,
3803 "failed to free block group item for [%llu,%llu)\n",
3804 bytenr, bytenr + len);
3805 btrfs_unpin_extent(fs_info, bytenr, len);
3806 goto out;
3809 ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
3810 if (ret < 0) {
3811 fprintf(stderr,
3812 "failed to dev extents belongs to [%llu,%llu)\n",
3813 bytenr, bytenr + len);
3814 btrfs_unpin_extent(fs_info, bytenr, len);
3815 goto out;
3817 ret = free_chunk_item(trans, fs_info, bytenr);
3818 if (ret < 0) {
3819 fprintf(stderr,
3820 "failed to free chunk for [%llu,%llu)\n",
3821 bytenr, bytenr + len);
3822 btrfs_unpin_extent(fs_info, bytenr, len);
3823 goto out;
3826 /* Now release the block_group_cache */
3827 ret = free_block_group_cache(trans, fs_info, bytenr, len);
3828 btrfs_unpin_extent(fs_info, bytenr, len);
3830 out:
3831 btrfs_free_path(path);
3832 return ret;
3836 * Fixup block accounting. The initial block accounting created by
3837 * make_block_groups isn't accuracy in this case.
3839 int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
3840 struct btrfs_root *root)
3842 int ret = 0;
3843 int slot;
3844 u64 start = 0;
3845 u64 bytes_used = 0;
3846 struct btrfs_path path;
3847 struct btrfs_key key;
3848 struct extent_buffer *leaf;
3849 struct btrfs_block_group_cache *cache;
3850 struct btrfs_fs_info *fs_info = root->fs_info;
3852 root = root->fs_info->extent_root;
3854 while(extent_root_pending_ops(fs_info)) {
3855 ret = finish_current_insert(trans, root);
3856 if (ret)
3857 return ret;
3858 ret = del_pending_extents(trans, root);
3859 if (ret)
3860 return ret;
3863 while(1) {
3864 cache = btrfs_lookup_first_block_group(fs_info, start);
3865 if (!cache)
3866 break;
3867 start = cache->key.objectid + cache->key.offset;
3868 btrfs_set_block_group_used(&cache->item, 0);
3869 cache->space_info->bytes_used = 0;
3870 set_extent_bits(&root->fs_info->block_group_cache,
3871 cache->key.objectid,
3872 cache->key.objectid + cache->key.offset -1,
3873 BLOCK_GROUP_DIRTY);
3876 btrfs_init_path(&path);
3877 key.offset = 0;
3878 key.objectid = 0;
3879 key.type = BTRFS_EXTENT_ITEM_KEY;
3880 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3881 &key, &path, 0, 0);
3882 if (ret < 0)
3883 return ret;
3884 while(1) {
3885 leaf = path.nodes[0];
3886 slot = path.slots[0];
3887 if (slot >= btrfs_header_nritems(leaf)) {
3888 ret = btrfs_next_leaf(root, &path);
3889 if (ret < 0)
3890 return ret;
3891 if (ret > 0)
3892 break;
3893 leaf = path.nodes[0];
3894 slot = path.slots[0];
3896 btrfs_item_key_to_cpu(leaf, &key, slot);
3897 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3898 bytes_used += key.offset;
3899 ret = btrfs_update_block_group(root,
3900 key.objectid, key.offset, 1, 0);
3901 BUG_ON(ret);
3902 } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
3903 bytes_used += fs_info->nodesize;
3904 ret = btrfs_update_block_group(root,
3905 key.objectid, fs_info->nodesize, 1, 0);
3906 if (ret)
3907 goto out;
3909 path.slots[0]++;
3911 btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
3912 ret = 0;
3913 out:
3914 btrfs_release_path(&path);
3915 return ret;
3918 static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path,
3919 u64 *start, u64 *len)
3921 struct btrfs_key key;
3923 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3924 BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY ||
3925 key.type == BTRFS_METADATA_ITEM_KEY));
3926 *start = key.objectid;
3927 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3928 *len = key.offset;
3929 else
3930 *len = root->fs_info->nodesize;
3934 * Find first overlap extent for range [bytenr, bytenr + len)
3935 * Return 0 for found and point path to it.
3936 * Return >0 for not found.
3937 * Return <0 for err
3939 int btrfs_search_overlap_extent(struct btrfs_root *root,
3940 struct btrfs_path *path, u64 bytenr, u64 len)
3942 struct btrfs_key key;
3943 u64 cur_start;
3944 u64 cur_len;
3945 int ret;
3947 key.objectid = bytenr;
3948 key.type = BTRFS_EXTENT_DATA_KEY;
3949 key.offset = (u64)-1;
3951 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3952 if (ret < 0)
3953 return ret;
3954 BUG_ON(ret == 0);
3956 ret = btrfs_previous_extent_item(root, path, 0);
3957 if (ret < 0)
3958 return ret;
3959 /* no previous, check next extent */
3960 if (ret > 0)
3961 goto next;
3962 __get_extent_size(root, path, &cur_start, &cur_len);
3963 /* Tail overlap */
3964 if (cur_start + cur_len > bytenr)
3965 return 1;
3967 next:
3968 ret = btrfs_next_extent_item(root, path, bytenr + len);
3969 if (ret < 0)
3970 return ret;
3971 /* No next, prev already checked, no overlap */
3972 if (ret > 0)
3973 return 0;
3974 __get_extent_size(root, path, &cur_start, &cur_len);
3975 /* head overlap*/
3976 if (cur_start < bytenr + len)
3977 return 1;
3978 return 0;
3981 static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
3982 struct btrfs_root *root, u64 objectid,
3983 struct btrfs_inode_item *inode,
3984 u64 file_pos, u64 disk_bytenr,
3985 u64 *ret_num_bytes)
3987 int ret;
3988 struct btrfs_fs_info *info = root->fs_info;
3989 struct btrfs_root *extent_root = info->extent_root;
3990 struct extent_buffer *leaf;
3991 struct btrfs_file_extent_item *fi;
3992 struct btrfs_key ins_key;
3993 struct btrfs_path *path;
3994 struct btrfs_extent_item *ei;
3995 u64 nbytes;
3996 u64 extent_num_bytes;
3997 u64 extent_bytenr;
3998 u64 extent_offset;
3999 u64 num_bytes = *ret_num_bytes;
4002 * All supported file system should not use its 0 extent.
4003 * As it's for hole
4005 * And hole extent has no size limit, no need to loop.
4007 if (disk_bytenr == 0) {
4008 ret = btrfs_insert_file_extent(trans, root, objectid,
4009 file_pos, disk_bytenr,
4010 num_bytes, num_bytes);
4011 return ret;
4013 num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE);
4015 path = btrfs_alloc_path();
4016 if (!path)
4017 return -ENOMEM;
4019 /* First to check extent overlap */
4020 ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr,
4021 num_bytes);
4022 if (ret < 0)
4023 goto fail;
4024 if (ret > 0) {
4025 /* Found overlap */
4026 u64 cur_start;
4027 u64 cur_len;
4029 __get_extent_size(extent_root, path, &cur_start, &cur_len);
4031 * For convert case, this extent should be a subset of
4032 * existing one.
4034 BUG_ON(disk_bytenr < cur_start);
4036 extent_bytenr = cur_start;
4037 extent_num_bytes = cur_len;
4038 extent_offset = disk_bytenr - extent_bytenr;
4039 } else {
4040 /* No overlap, create new extent */
4041 btrfs_release_path(path);
4042 ins_key.objectid = disk_bytenr;
4043 ins_key.offset = num_bytes;
4044 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
4046 ret = btrfs_insert_empty_item(trans, extent_root, path,
4047 &ins_key, sizeof(*ei));
4048 if (ret == 0) {
4049 leaf = path->nodes[0];
4050 ei = btrfs_item_ptr(leaf, path->slots[0],
4051 struct btrfs_extent_item);
4053 btrfs_set_extent_refs(leaf, ei, 0);
4054 btrfs_set_extent_generation(leaf, ei, 0);
4055 btrfs_set_extent_flags(leaf, ei,
4056 BTRFS_EXTENT_FLAG_DATA);
4057 btrfs_mark_buffer_dirty(leaf);
4059 ret = btrfs_update_block_group(root, disk_bytenr,
4060 num_bytes, 1, 0);
4061 if (ret)
4062 goto fail;
4063 } else if (ret != -EEXIST) {
4064 goto fail;
4066 btrfs_extent_post_op(trans, extent_root);
4067 extent_bytenr = disk_bytenr;
4068 extent_num_bytes = num_bytes;
4069 extent_offset = 0;
4071 btrfs_release_path(path);
4072 ins_key.objectid = objectid;
4073 ins_key.offset = file_pos;
4074 ins_key.type = BTRFS_EXTENT_DATA_KEY;
4075 ret = btrfs_insert_empty_item(trans, root, path, &ins_key,
4076 sizeof(*fi));
4077 if (ret)
4078 goto fail;
4079 leaf = path->nodes[0];
4080 fi = btrfs_item_ptr(leaf, path->slots[0],
4081 struct btrfs_file_extent_item);
4082 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4083 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
4084 btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr);
4085 btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes);
4086 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
4087 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
4088 btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes);
4089 btrfs_set_file_extent_compression(leaf, fi, 0);
4090 btrfs_set_file_extent_encryption(leaf, fi, 0);
4091 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
4092 btrfs_mark_buffer_dirty(leaf);
4094 nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
4095 btrfs_set_stack_inode_nbytes(inode, nbytes);
4096 btrfs_release_path(path);
4098 ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes,
4099 0, root->root_key.objectid, objectid,
4100 file_pos - extent_offset);
4101 if (ret)
4102 goto fail;
4103 ret = 0;
4104 *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes);
4105 fail:
4106 btrfs_free_path(path);
4107 return ret;
4111 * Record a file extent. Do all the required works, such as inserting
4112 * file extent item, inserting extent item and backref item into extent
4113 * tree and updating block accounting.
4115 int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
4116 struct btrfs_root *root, u64 objectid,
4117 struct btrfs_inode_item *inode,
4118 u64 file_pos, u64 disk_bytenr,
4119 u64 num_bytes)
4121 u64 cur_disk_bytenr = disk_bytenr;
4122 u64 cur_file_pos = file_pos;
4123 u64 cur_num_bytes = num_bytes;
4124 int ret = 0;
4126 while (num_bytes > 0) {
4127 ret = __btrfs_record_file_extent(trans, root, objectid,
4128 inode, cur_file_pos,
4129 cur_disk_bytenr,
4130 &cur_num_bytes);
4131 if (ret < 0)
4132 break;
4133 cur_disk_bytenr += cur_num_bytes;
4134 cur_file_pos += cur_num_bytes;
4135 num_bytes -= cur_num_bytes;
4137 return ret;
4141 static int add_excluded_extent(struct btrfs_root *root,
4142 u64 start, u64 num_bytes)
4144 u64 end = start + num_bytes - 1;
4145 set_extent_bits(&root->fs_info->pinned_extents,
4146 start, end, EXTENT_UPTODATE);
4147 return 0;
4150 void free_excluded_extents(struct btrfs_root *root,
4151 struct btrfs_block_group_cache *cache)
4153 u64 start, end;
4155 start = cache->key.objectid;
4156 end = start + cache->key.offset - 1;
4158 clear_extent_bits(&root->fs_info->pinned_extents,
4159 start, end, EXTENT_UPTODATE);
4162 int exclude_super_stripes(struct btrfs_root *root,
4163 struct btrfs_block_group_cache *cache)
4165 u64 bytenr;
4166 u64 *logical;
4167 int stripe_len;
4168 int i, nr, ret;
4170 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
4171 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
4172 cache->bytes_super += stripe_len;
4173 ret = add_excluded_extent(root, cache->key.objectid,
4174 stripe_len);
4175 if (ret)
4176 return ret;
4179 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
4180 bytenr = btrfs_sb_offset(i);
4181 ret = btrfs_rmap_block(root->fs_info,
4182 cache->key.objectid, bytenr,
4183 0, &logical, &nr, &stripe_len);
4184 if (ret)
4185 return ret;
4187 while (nr--) {
4188 u64 start, len;
4190 if (logical[nr] > cache->key.objectid +
4191 cache->key.offset)
4192 continue;
4194 if (logical[nr] + stripe_len <= cache->key.objectid)
4195 continue;
4197 start = logical[nr];
4198 if (start < cache->key.objectid) {
4199 start = cache->key.objectid;
4200 len = (logical[nr] + stripe_len) - start;
4201 } else {
4202 len = min_t(u64, stripe_len,
4203 cache->key.objectid +
4204 cache->key.offset - start);
4207 cache->bytes_super += len;
4208 ret = add_excluded_extent(root, start, len);
4209 if (ret) {
4210 kfree(logical);
4211 return ret;
4215 kfree(logical);
4217 return 0;
4220 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
4221 struct btrfs_fs_info *info, u64 start, u64 end)
4223 u64 extent_start, extent_end, size, total_added = 0;
4224 int ret;
4226 while (start < end) {
4227 ret = find_first_extent_bit(&info->pinned_extents, start,
4228 &extent_start, &extent_end,
4229 EXTENT_DIRTY | EXTENT_UPTODATE);
4230 if (ret)
4231 break;
4233 if (extent_start <= start) {
4234 start = extent_end + 1;
4235 } else if (extent_start > start && extent_start < end) {
4236 size = extent_start - start;
4237 total_added += size;
4238 ret = btrfs_add_free_space(block_group->free_space_ctl,
4239 start, size);
4240 BUG_ON(ret); /* -ENOMEM or logic error */
4241 start = extent_end + 1;
4242 } else {
4243 break;
4247 if (start < end) {
4248 size = end - start;
4249 total_added += size;
4250 ret = btrfs_add_free_space(block_group->free_space_ctl, start,
4251 size);
4252 BUG_ON(ret); /* -ENOMEM or logic error */
4255 return total_added;