btrfs-progs: check: Initialize all filed of btrfs_inode_item in insert_inode_item()
[btrfs-progs-unstable/devel.git] / extent-tree.c
blob0643815bd41c679baf441b9856df1a75727e9ac1
1 /*
2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <stdint.h>
22 #include <math.h>
23 #include "kerncompat.h"
24 #include "radix-tree.h"
25 #include "ctree.h"
26 #include "disk-io.h"
27 #include "print-tree.h"
28 #include "transaction.h"
29 #include "crc32c.h"
30 #include "volumes.h"
31 #include "free-space-cache.h"
32 #include "utils.h"
34 #define PENDING_EXTENT_INSERT 0
35 #define PENDING_EXTENT_DELETE 1
36 #define PENDING_BACKREF_UPDATE 2
38 struct pending_extent_op {
39 int type;
40 u64 bytenr;
41 u64 num_bytes;
42 u64 flags;
43 struct btrfs_disk_key key;
44 int level;
47 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
48 struct btrfs_root *root,
49 u64 root_objectid, u64 generation,
50 u64 flags, struct btrfs_disk_key *key,
51 int level, struct btrfs_key *ins);
52 static int __free_extent(struct btrfs_trans_handle *trans,
53 struct btrfs_root *root,
54 u64 bytenr, u64 num_bytes, u64 parent,
55 u64 root_objectid, u64 owner_objectid,
56 u64 owner_offset, int refs_to_drop);
57 static int finish_current_insert(struct btrfs_trans_handle *trans);
58 static int del_pending_extents(struct btrfs_trans_handle *trans);
59 static struct btrfs_block_group_cache *
60 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
61 *hint, u64 search_start, int data, int owner);
63 static int remove_sb_from_cache(struct btrfs_root *root,
64 struct btrfs_block_group_cache *cache)
66 u64 bytenr;
67 u64 *logical;
68 int stripe_len;
69 int i, nr, ret;
70 struct btrfs_fs_info *fs_info = root->fs_info;
71 struct extent_io_tree *free_space_cache;
73 free_space_cache = &fs_info->free_space_cache;
74 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
75 bytenr = btrfs_sb_offset(i);
76 ret = btrfs_rmap_block(fs_info, cache->key.objectid, bytenr,
77 &logical, &nr, &stripe_len);
78 BUG_ON(ret);
79 while (nr--) {
80 clear_extent_dirty(free_space_cache, logical[nr],
81 logical[nr] + stripe_len - 1);
83 kfree(logical);
85 return 0;
88 static int cache_block_group(struct btrfs_root *root,
89 struct btrfs_block_group_cache *block_group)
91 struct btrfs_path *path;
92 int ret;
93 struct btrfs_key key;
94 struct extent_buffer *leaf;
95 struct extent_io_tree *free_space_cache;
96 int slot;
97 u64 last;
98 u64 hole_size;
100 if (!block_group)
101 return 0;
103 root = root->fs_info->extent_root;
104 free_space_cache = &root->fs_info->free_space_cache;
106 if (block_group->cached)
107 return 0;
109 path = btrfs_alloc_path();
110 if (!path)
111 return -ENOMEM;
113 path->reada = READA_FORWARD;
114 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
115 key.objectid = last;
116 key.offset = 0;
117 key.type = 0;
119 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
120 if (ret < 0)
121 goto err;
123 while(1) {
124 leaf = path->nodes[0];
125 slot = path->slots[0];
126 if (slot >= btrfs_header_nritems(leaf)) {
127 ret = btrfs_next_leaf(root, path);
128 if (ret < 0)
129 goto err;
130 if (ret == 0) {
131 continue;
132 } else {
133 break;
136 btrfs_item_key_to_cpu(leaf, &key, slot);
137 if (key.objectid < block_group->key.objectid) {
138 goto next;
140 if (key.objectid >= block_group->key.objectid +
141 block_group->key.offset) {
142 break;
145 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
146 key.type == BTRFS_METADATA_ITEM_KEY) {
147 if (key.objectid > last) {
148 hole_size = key.objectid - last;
149 set_extent_dirty(free_space_cache, last,
150 last + hole_size - 1);
152 if (key.type == BTRFS_METADATA_ITEM_KEY)
153 last = key.objectid + root->fs_info->nodesize;
154 else
155 last = key.objectid + key.offset;
157 next:
158 path->slots[0]++;
161 if (block_group->key.objectid +
162 block_group->key.offset > last) {
163 hole_size = block_group->key.objectid +
164 block_group->key.offset - last;
165 set_extent_dirty(free_space_cache, last, last + hole_size - 1);
167 remove_sb_from_cache(root, block_group);
168 block_group->cached = 1;
169 err:
170 btrfs_free_path(path);
171 return 0;
174 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
175 btrfs_fs_info *info,
176 u64 bytenr)
178 struct extent_io_tree *block_group_cache;
179 struct btrfs_block_group_cache *block_group = NULL;
180 u64 ptr;
181 u64 start;
182 u64 end;
183 int ret;
185 bytenr = max_t(u64, bytenr,
186 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
187 block_group_cache = &info->block_group_cache;
188 ret = find_first_extent_bit(block_group_cache,
189 bytenr, &start, &end,
190 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
191 BLOCK_GROUP_SYSTEM);
192 if (ret) {
193 return NULL;
195 ret = get_state_private(block_group_cache, start, &ptr);
196 if (ret)
197 return NULL;
199 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
200 return block_group;
203 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
204 btrfs_fs_info *info,
205 u64 bytenr)
207 struct extent_io_tree *block_group_cache;
208 struct btrfs_block_group_cache *block_group = NULL;
209 u64 ptr;
210 u64 start;
211 u64 end;
212 int ret;
214 block_group_cache = &info->block_group_cache;
215 ret = find_first_extent_bit(block_group_cache,
216 bytenr, &start, &end,
217 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
218 BLOCK_GROUP_SYSTEM);
219 if (ret) {
220 return NULL;
222 ret = get_state_private(block_group_cache, start, &ptr);
223 if (ret)
224 return NULL;
226 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
227 if (block_group->key.objectid <= bytenr && bytenr <
228 block_group->key.objectid + block_group->key.offset)
229 return block_group;
230 return NULL;
233 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
235 return (cache->flags & bits) == bits;
238 static int noinline find_search_start(struct btrfs_root *root,
239 struct btrfs_block_group_cache **cache_ret,
240 u64 *start_ret, int num, int data)
242 int ret;
243 struct btrfs_block_group_cache *cache = *cache_ret;
244 u64 last = *start_ret;
245 u64 start = 0;
246 u64 end = 0;
247 u64 search_start = *start_ret;
248 int wrapped = 0;
250 if (!cache)
251 goto out;
252 again:
253 ret = cache_block_group(root, cache);
254 if (ret)
255 goto out;
257 last = max(search_start, cache->key.objectid);
258 if (cache->ro || !block_group_bits(cache, data))
259 goto new_group;
261 while(1) {
262 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
263 last, &start, &end, EXTENT_DIRTY);
264 if (ret) {
265 goto new_group;
268 start = max(last, start);
269 last = end + 1;
270 if (last - start < num) {
271 continue;
273 if (start + num > cache->key.objectid + cache->key.offset) {
274 goto new_group;
276 *start_ret = start;
277 return 0;
279 out:
280 *start_ret = last;
281 cache = btrfs_lookup_block_group(root->fs_info, search_start);
282 if (!cache) {
283 printk("Unable to find block group for %llu\n",
284 (unsigned long long)search_start);
285 return -ENOENT;
287 return -ENOSPC;
289 new_group:
290 last = cache->key.objectid + cache->key.offset;
291 wrapped:
292 cache = btrfs_lookup_first_block_group(root->fs_info, last);
293 if (!cache) {
294 if (!wrapped) {
295 wrapped = 1;
296 last = search_start;
297 goto wrapped;
299 goto out;
301 *cache_ret = cache;
302 goto again;
305 static int block_group_state_bits(u64 flags)
307 int bits = 0;
308 if (flags & BTRFS_BLOCK_GROUP_DATA)
309 bits |= BLOCK_GROUP_DATA;
310 if (flags & BTRFS_BLOCK_GROUP_METADATA)
311 bits |= BLOCK_GROUP_METADATA;
312 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
313 bits |= BLOCK_GROUP_SYSTEM;
314 return bits;
317 static struct btrfs_block_group_cache *
318 btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache
319 *hint, u64 search_start, int data, int owner)
321 struct btrfs_block_group_cache *cache;
322 struct extent_io_tree *block_group_cache;
323 struct btrfs_block_group_cache *found_group = NULL;
324 struct btrfs_fs_info *info = root->fs_info;
325 u64 used;
326 u64 last = 0;
327 u64 hint_last;
328 u64 start;
329 u64 end;
330 u64 free_check;
331 u64 ptr;
332 int bit;
333 int ret;
334 int full_search = 0;
335 int factor = 10;
337 block_group_cache = &info->block_group_cache;
339 if (!owner)
340 factor = 10;
342 bit = block_group_state_bits(data);
344 if (search_start) {
345 struct btrfs_block_group_cache *shint;
346 shint = btrfs_lookup_block_group(info, search_start);
347 if (shint && !shint->ro && block_group_bits(shint, data)) {
348 used = btrfs_block_group_used(&shint->item);
349 if (used + shint->pinned <
350 div_factor(shint->key.offset, factor)) {
351 return shint;
355 if (hint && !hint->ro && block_group_bits(hint, data)) {
356 used = btrfs_block_group_used(&hint->item);
357 if (used + hint->pinned <
358 div_factor(hint->key.offset, factor)) {
359 return hint;
361 last = hint->key.objectid + hint->key.offset;
362 hint_last = last;
363 } else {
364 if (hint)
365 hint_last = max(hint->key.objectid, search_start);
366 else
367 hint_last = search_start;
369 last = hint_last;
371 again:
372 while(1) {
373 ret = find_first_extent_bit(block_group_cache, last,
374 &start, &end, bit);
375 if (ret)
376 break;
378 ret = get_state_private(block_group_cache, start, &ptr);
379 if (ret)
380 break;
382 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
383 last = cache->key.objectid + cache->key.offset;
384 used = btrfs_block_group_used(&cache->item);
386 if (!cache->ro && block_group_bits(cache, data)) {
387 if (full_search)
388 free_check = cache->key.offset;
389 else
390 free_check = div_factor(cache->key.offset,
391 factor);
393 if (used + cache->pinned < free_check) {
394 found_group = cache;
395 goto found;
398 cond_resched();
400 if (!full_search) {
401 last = search_start;
402 full_search = 1;
403 goto again;
405 found:
406 return found_group;
410 * Back reference rules. Back refs have three main goals:
412 * 1) differentiate between all holders of references to an extent so that
413 * when a reference is dropped we can make sure it was a valid reference
414 * before freeing the extent.
416 * 2) Provide enough information to quickly find the holders of an extent
417 * if we notice a given block is corrupted or bad.
419 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
420 * maintenance. This is actually the same as #2, but with a slightly
421 * different use case.
423 * There are two kinds of back refs. The implicit back refs is optimized
424 * for pointers in non-shared tree blocks. For a given pointer in a block,
425 * back refs of this kind provide information about the block's owner tree
426 * and the pointer's key. These information allow us to find the block by
427 * b-tree searching. The full back refs is for pointers in tree blocks not
428 * referenced by their owner trees. The location of tree block is recorded
429 * in the back refs. Actually the full back refs is generic, and can be
430 * used in all cases the implicit back refs is used. The major shortcoming
431 * of the full back refs is its overhead. Every time a tree block gets
432 * COWed, we have to update back refs entry for all pointers in it.
434 * For a newly allocated tree block, we use implicit back refs for
435 * pointers in it. This means most tree related operations only involve
436 * implicit back refs. For a tree block created in old transaction, the
437 * only way to drop a reference to it is COW it. So we can detect the
438 * event that tree block loses its owner tree's reference and do the
439 * back refs conversion.
441 * When a tree block is COW'd through a tree, there are four cases:
443 * The reference count of the block is one and the tree is the block's
444 * owner tree. Nothing to do in this case.
446 * The reference count of the block is one and the tree is not the
447 * block's owner tree. In this case, full back refs is used for pointers
448 * in the block. Remove these full back refs, add implicit back refs for
449 * every pointers in the new block.
451 * The reference count of the block is greater than one and the tree is
452 * the block's owner tree. In this case, implicit back refs is used for
453 * pointers in the block. Add full back refs for every pointers in the
454 * block, increase lower level extents' reference counts. The original
455 * implicit back refs are entailed to the new block.
457 * The reference count of the block is greater than one and the tree is
458 * not the block's owner tree. Add implicit back refs for every pointer in
459 * the new block, increase lower level extents' reference count.
461 * Back Reference Key composing:
463 * The key objectid corresponds to the first byte in the extent,
464 * The key type is used to differentiate between types of back refs.
465 * There are different meanings of the key offset for different types
466 * of back refs.
468 * File extents can be referenced by:
470 * - multiple snapshots, subvolumes, or different generations in one subvol
471 * - different files inside a single subvolume
472 * - different offsets inside a file (bookend extents in file.c)
474 * The extent ref structure for the implicit back refs has fields for:
476 * - Objectid of the subvolume root
477 * - objectid of the file holding the reference
478 * - original offset in the file
479 * - how many bookend extents
481 * The key offset for the implicit back refs is hash of the first
482 * three fields.
484 * The extent ref structure for the full back refs has field for:
486 * - number of pointers in the tree leaf
488 * The key offset for the implicit back refs is the first byte of
489 * the tree leaf
491 * When a file extent is allocated, The implicit back refs is used.
492 * the fields are filled in:
494 * (root_key.objectid, inode objectid, offset in file, 1)
496 * When a file extent is removed file truncation, we find the
497 * corresponding implicit back refs and check the following fields:
499 * (btrfs_header_owner(leaf), inode objectid, offset in file)
501 * Btree extents can be referenced by:
503 * - Different subvolumes
505 * Both the implicit back refs and the full back refs for tree blocks
506 * only consist of key. The key offset for the implicit back refs is
507 * objectid of block's owner tree. The key offset for the full back refs
508 * is the first byte of parent block.
510 * When implicit back refs is used, information about the lowest key and
511 * level of the tree block are required. These information are stored in
512 * tree block info structure.
515 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
516 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
517 struct btrfs_root *root,
518 struct btrfs_path *path,
519 u64 owner, u32 extra_size)
521 struct btrfs_extent_item *item;
522 struct btrfs_extent_item_v0 *ei0;
523 struct btrfs_extent_ref_v0 *ref0;
524 struct btrfs_tree_block_info *bi;
525 struct extent_buffer *leaf;
526 struct btrfs_key key;
527 struct btrfs_key found_key;
528 u32 new_size = sizeof(*item);
529 u64 refs;
530 int ret;
532 leaf = path->nodes[0];
533 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
535 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
536 ei0 = btrfs_item_ptr(leaf, path->slots[0],
537 struct btrfs_extent_item_v0);
538 refs = btrfs_extent_refs_v0(leaf, ei0);
540 if (owner == (u64)-1) {
541 while (1) {
542 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
543 ret = btrfs_next_leaf(root, path);
544 if (ret < 0)
545 return ret;
546 BUG_ON(ret > 0);
547 leaf = path->nodes[0];
549 btrfs_item_key_to_cpu(leaf, &found_key,
550 path->slots[0]);
551 BUG_ON(key.objectid != found_key.objectid);
552 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
553 path->slots[0]++;
554 continue;
556 ref0 = btrfs_item_ptr(leaf, path->slots[0],
557 struct btrfs_extent_ref_v0);
558 owner = btrfs_ref_objectid_v0(leaf, ref0);
559 break;
562 btrfs_release_path(path);
564 if (owner < BTRFS_FIRST_FREE_OBJECTID)
565 new_size += sizeof(*bi);
567 new_size -= sizeof(*ei0);
568 ret = btrfs_search_slot(trans, root, &key, path, new_size, 1);
569 if (ret < 0)
570 return ret;
571 BUG_ON(ret);
573 ret = btrfs_extend_item(root, path, new_size);
574 BUG_ON(ret);
576 leaf = path->nodes[0];
577 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
578 btrfs_set_extent_refs(leaf, item, refs);
579 /* FIXME: get real generation */
580 btrfs_set_extent_generation(leaf, item, 0);
581 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
582 btrfs_set_extent_flags(leaf, item,
583 BTRFS_EXTENT_FLAG_TREE_BLOCK |
584 BTRFS_BLOCK_FLAG_FULL_BACKREF);
585 bi = (struct btrfs_tree_block_info *)(item + 1);
586 /* FIXME: get first key of the block */
587 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
588 btrfs_set_tree_block_level(leaf, bi, (int)owner);
589 } else {
590 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
592 btrfs_mark_buffer_dirty(leaf);
593 return 0;
595 #endif
597 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
599 u32 high_crc = ~(u32)0;
600 u32 low_crc = ~(u32)0;
601 __le64 lenum;
603 lenum = cpu_to_le64(root_objectid);
604 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
605 lenum = cpu_to_le64(owner);
606 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
607 lenum = cpu_to_le64(offset);
608 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
610 return ((u64)high_crc << 31) ^ (u64)low_crc;
613 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
614 struct btrfs_extent_data_ref *ref)
616 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
617 btrfs_extent_data_ref_objectid(leaf, ref),
618 btrfs_extent_data_ref_offset(leaf, ref));
621 static int match_extent_data_ref(struct extent_buffer *leaf,
622 struct btrfs_extent_data_ref *ref,
623 u64 root_objectid, u64 owner, u64 offset)
625 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
626 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
627 btrfs_extent_data_ref_offset(leaf, ref) != offset)
628 return 0;
629 return 1;
632 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
633 struct btrfs_root *root,
634 struct btrfs_path *path,
635 u64 bytenr, u64 parent,
636 u64 root_objectid,
637 u64 owner, u64 offset)
639 struct btrfs_key key;
640 struct btrfs_extent_data_ref *ref;
641 struct extent_buffer *leaf;
642 u32 nritems;
643 int ret;
644 int recow;
645 int err = -ENOENT;
647 key.objectid = bytenr;
648 if (parent) {
649 key.type = BTRFS_SHARED_DATA_REF_KEY;
650 key.offset = parent;
651 } else {
652 key.type = BTRFS_EXTENT_DATA_REF_KEY;
653 key.offset = hash_extent_data_ref(root_objectid,
654 owner, offset);
656 again:
657 recow = 0;
658 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
659 if (ret < 0) {
660 err = ret;
661 goto fail;
664 if (parent) {
665 if (!ret)
666 return 0;
667 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
668 key.type = BTRFS_EXTENT_REF_V0_KEY;
669 btrfs_release_path(path);
670 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
671 if (ret < 0) {
672 err = ret;
673 goto fail;
675 if (!ret)
676 return 0;
677 #endif
678 goto fail;
681 leaf = path->nodes[0];
682 nritems = btrfs_header_nritems(leaf);
683 while (1) {
684 if (path->slots[0] >= nritems) {
685 ret = btrfs_next_leaf(root, path);
686 if (ret < 0)
687 err = ret;
688 if (ret)
689 goto fail;
691 leaf = path->nodes[0];
692 nritems = btrfs_header_nritems(leaf);
693 recow = 1;
696 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
697 if (key.objectid != bytenr ||
698 key.type != BTRFS_EXTENT_DATA_REF_KEY)
699 goto fail;
701 ref = btrfs_item_ptr(leaf, path->slots[0],
702 struct btrfs_extent_data_ref);
704 if (match_extent_data_ref(leaf, ref, root_objectid,
705 owner, offset)) {
706 if (recow) {
707 btrfs_release_path(path);
708 goto again;
710 err = 0;
711 break;
713 path->slots[0]++;
715 fail:
716 return err;
719 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
720 struct btrfs_root *root,
721 struct btrfs_path *path,
722 u64 bytenr, u64 parent,
723 u64 root_objectid, u64 owner,
724 u64 offset, int refs_to_add)
726 struct btrfs_key key;
727 struct extent_buffer *leaf;
728 u32 size;
729 u32 num_refs;
730 int ret;
732 key.objectid = bytenr;
733 if (parent) {
734 key.type = BTRFS_SHARED_DATA_REF_KEY;
735 key.offset = parent;
736 size = sizeof(struct btrfs_shared_data_ref);
737 } else {
738 key.type = BTRFS_EXTENT_DATA_REF_KEY;
739 key.offset = hash_extent_data_ref(root_objectid,
740 owner, offset);
741 size = sizeof(struct btrfs_extent_data_ref);
744 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
745 if (ret && ret != -EEXIST)
746 goto fail;
748 leaf = path->nodes[0];
749 if (parent) {
750 struct btrfs_shared_data_ref *ref;
751 ref = btrfs_item_ptr(leaf, path->slots[0],
752 struct btrfs_shared_data_ref);
753 if (ret == 0) {
754 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
755 } else {
756 num_refs = btrfs_shared_data_ref_count(leaf, ref);
757 num_refs += refs_to_add;
758 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
760 } else {
761 struct btrfs_extent_data_ref *ref;
762 while (ret == -EEXIST) {
763 ref = btrfs_item_ptr(leaf, path->slots[0],
764 struct btrfs_extent_data_ref);
765 if (match_extent_data_ref(leaf, ref, root_objectid,
766 owner, offset))
767 break;
768 btrfs_release_path(path);
770 key.offset++;
771 ret = btrfs_insert_empty_item(trans, root, path, &key,
772 size);
773 if (ret && ret != -EEXIST)
774 goto fail;
776 leaf = path->nodes[0];
778 ref = btrfs_item_ptr(leaf, path->slots[0],
779 struct btrfs_extent_data_ref);
780 if (ret == 0) {
781 btrfs_set_extent_data_ref_root(leaf, ref,
782 root_objectid);
783 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
784 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
785 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
786 } else {
787 num_refs = btrfs_extent_data_ref_count(leaf, ref);
788 num_refs += refs_to_add;
789 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
792 btrfs_mark_buffer_dirty(leaf);
793 ret = 0;
794 fail:
795 btrfs_release_path(path);
796 return ret;
799 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
800 struct btrfs_root *root,
801 struct btrfs_path *path,
802 int refs_to_drop)
804 struct btrfs_key key;
805 struct btrfs_extent_data_ref *ref1 = NULL;
806 struct btrfs_shared_data_ref *ref2 = NULL;
807 struct extent_buffer *leaf;
808 u32 num_refs = 0;
809 int ret = 0;
811 leaf = path->nodes[0];
812 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
814 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
815 ref1 = btrfs_item_ptr(leaf, path->slots[0],
816 struct btrfs_extent_data_ref);
817 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
818 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
819 ref2 = btrfs_item_ptr(leaf, path->slots[0],
820 struct btrfs_shared_data_ref);
821 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
822 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
823 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
824 struct btrfs_extent_ref_v0 *ref0;
825 ref0 = btrfs_item_ptr(leaf, path->slots[0],
826 struct btrfs_extent_ref_v0);
827 num_refs = btrfs_ref_count_v0(leaf, ref0);
828 #endif
829 } else {
830 BUG();
833 BUG_ON(num_refs < refs_to_drop);
834 num_refs -= refs_to_drop;
836 if (num_refs == 0) {
837 ret = btrfs_del_item(trans, root, path);
838 } else {
839 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
840 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
841 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
842 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
843 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
844 else {
845 struct btrfs_extent_ref_v0 *ref0;
846 ref0 = btrfs_item_ptr(leaf, path->slots[0],
847 struct btrfs_extent_ref_v0);
848 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
850 #endif
851 btrfs_mark_buffer_dirty(leaf);
853 return ret;
856 static noinline u32 extent_data_ref_count(struct btrfs_path *path,
857 struct btrfs_extent_inline_ref *iref)
859 struct btrfs_key key;
860 struct extent_buffer *leaf;
861 struct btrfs_extent_data_ref *ref1;
862 struct btrfs_shared_data_ref *ref2;
863 u32 num_refs = 0;
865 leaf = path->nodes[0];
866 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
867 if (iref) {
868 if (btrfs_extent_inline_ref_type(leaf, iref) ==
869 BTRFS_EXTENT_DATA_REF_KEY) {
870 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
871 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
872 } else {
873 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
874 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
876 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
877 ref1 = btrfs_item_ptr(leaf, path->slots[0],
878 struct btrfs_extent_data_ref);
879 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
880 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
881 ref2 = btrfs_item_ptr(leaf, path->slots[0],
882 struct btrfs_shared_data_ref);
883 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
884 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
885 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
886 struct btrfs_extent_ref_v0 *ref0;
887 ref0 = btrfs_item_ptr(leaf, path->slots[0],
888 struct btrfs_extent_ref_v0);
889 num_refs = btrfs_ref_count_v0(leaf, ref0);
890 #endif
891 } else {
892 BUG();
894 return num_refs;
897 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
898 struct btrfs_root *root,
899 struct btrfs_path *path,
900 u64 bytenr, u64 parent,
901 u64 root_objectid)
903 struct btrfs_key key;
904 int ret;
906 key.objectid = bytenr;
907 if (parent) {
908 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
909 key.offset = parent;
910 } else {
911 key.type = BTRFS_TREE_BLOCK_REF_KEY;
912 key.offset = root_objectid;
915 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
916 if (ret > 0)
917 ret = -ENOENT;
918 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
919 if (ret == -ENOENT && parent) {
920 btrfs_release_path(path);
921 key.type = BTRFS_EXTENT_REF_V0_KEY;
922 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
923 if (ret > 0)
924 ret = -ENOENT;
926 #endif
927 return ret;
930 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
931 struct btrfs_root *root,
932 struct btrfs_path *path,
933 u64 bytenr, u64 parent,
934 u64 root_objectid)
936 struct btrfs_key key;
937 int ret;
939 key.objectid = bytenr;
940 if (parent) {
941 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
942 key.offset = parent;
943 } else {
944 key.type = BTRFS_TREE_BLOCK_REF_KEY;
945 key.offset = root_objectid;
948 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
950 btrfs_release_path(path);
951 return ret;
954 static inline int extent_ref_type(u64 parent, u64 owner)
956 int type;
957 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
958 if (parent > 0)
959 type = BTRFS_SHARED_BLOCK_REF_KEY;
960 else
961 type = BTRFS_TREE_BLOCK_REF_KEY;
962 } else {
963 if (parent > 0)
964 type = BTRFS_SHARED_DATA_REF_KEY;
965 else
966 type = BTRFS_EXTENT_DATA_REF_KEY;
968 return type;
971 static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
972 struct btrfs_root *root,
973 struct btrfs_path *path,
974 struct btrfs_extent_inline_ref **ref_ret,
975 u64 bytenr, u64 num_bytes,
976 u64 parent, u64 root_objectid,
977 u64 owner, u64 offset, int insert)
979 struct btrfs_key key;
980 struct extent_buffer *leaf;
981 struct btrfs_extent_item *ei;
982 struct btrfs_extent_inline_ref *iref;
983 u64 flags;
984 u32 item_size;
985 unsigned long ptr;
986 unsigned long end;
987 int extra_size;
988 int type;
989 int want;
990 int ret;
991 int err = 0;
992 int skinny_metadata =
993 btrfs_fs_incompat(root->fs_info, SKINNY_METADATA);
995 key.objectid = bytenr;
996 key.type = BTRFS_EXTENT_ITEM_KEY;
997 key.offset = num_bytes;
999 want = extent_ref_type(parent, owner);
1000 if (insert)
1001 extra_size = btrfs_extent_inline_ref_size(want);
1002 else
1003 extra_size = -1;
1005 if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
1006 key.type = BTRFS_METADATA_ITEM_KEY;
1007 key.offset = owner;
1008 } else if (skinny_metadata) {
1009 skinny_metadata = 0;
1012 again:
1013 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1014 if (ret < 0) {
1015 err = ret;
1016 goto out;
1020 * We may be a newly converted file system which still has the old fat
1021 * extent entries for metadata, so try and see if we have one of those.
1023 if (ret > 0 && skinny_metadata) {
1024 skinny_metadata = 0;
1025 if (path->slots[0]) {
1026 path->slots[0]--;
1027 btrfs_item_key_to_cpu(path->nodes[0], &key,
1028 path->slots[0]);
1029 if (key.objectid == bytenr &&
1030 key.type == BTRFS_EXTENT_ITEM_KEY &&
1031 key.offset == num_bytes)
1032 ret = 0;
1034 if (ret) {
1035 key.type = BTRFS_EXTENT_ITEM_KEY;
1036 key.offset = num_bytes;
1037 btrfs_release_path(path);
1038 goto again;
1042 if (ret) {
1043 printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
1044 return -ENOENT;
1047 BUG_ON(ret);
1049 leaf = path->nodes[0];
1050 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1051 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1052 if (item_size < sizeof(*ei)) {
1053 if (!insert) {
1054 err = -ENOENT;
1055 goto out;
1057 ret = convert_extent_item_v0(trans, root, path, owner,
1058 extra_size);
1059 if (ret < 0) {
1060 err = ret;
1061 goto out;
1063 leaf = path->nodes[0];
1064 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1066 #endif
1067 if (item_size < sizeof(*ei)) {
1068 printf("Size is %u, needs to be %u, slot %d\n",
1069 (unsigned)item_size,
1070 (unsigned)sizeof(*ei), path->slots[0]);
1071 btrfs_print_leaf(leaf);
1072 return -EINVAL;
1074 BUG_ON(item_size < sizeof(*ei));
1076 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1077 flags = btrfs_extent_flags(leaf, ei);
1079 ptr = (unsigned long)(ei + 1);
1080 end = (unsigned long)ei + item_size;
1082 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1083 ptr += sizeof(struct btrfs_tree_block_info);
1084 BUG_ON(ptr > end);
1085 } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1086 if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
1087 return -EIO;
1091 err = -ENOENT;
1092 while (1) {
1093 if (ptr >= end) {
1094 WARN_ON(ptr > end);
1095 break;
1097 iref = (struct btrfs_extent_inline_ref *)ptr;
1098 type = btrfs_extent_inline_ref_type(leaf, iref);
1099 if (want < type)
1100 break;
1101 if (want > type) {
1102 ptr += btrfs_extent_inline_ref_size(type);
1103 continue;
1106 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1107 struct btrfs_extent_data_ref *dref;
1108 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1109 if (match_extent_data_ref(leaf, dref, root_objectid,
1110 owner, offset)) {
1111 err = 0;
1112 break;
1114 if (hash_extent_data_ref_item(leaf, dref) <
1115 hash_extent_data_ref(root_objectid, owner, offset))
1116 break;
1117 } else {
1118 u64 ref_offset;
1119 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1120 if (parent > 0) {
1121 if (parent == ref_offset) {
1122 err = 0;
1123 break;
1125 if (ref_offset < parent)
1126 break;
1127 } else {
1128 if (root_objectid == ref_offset) {
1129 err = 0;
1130 break;
1132 if (ref_offset < root_objectid)
1133 break;
1136 ptr += btrfs_extent_inline_ref_size(type);
1138 if (err == -ENOENT && insert) {
1139 if (item_size + extra_size >=
1140 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1141 err = -EAGAIN;
1142 goto out;
1145 * To add new inline back ref, we have to make sure
1146 * there is no corresponding back ref item.
1147 * For simplicity, we just do not add new inline back
1148 * ref if there is any back ref item.
1150 if (find_next_key(path, &key) == 0 && key.objectid == bytenr &&
1151 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1152 err = -EAGAIN;
1153 goto out;
1156 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1157 out:
1158 return err;
1161 static int setup_inline_extent_backref(struct btrfs_root *root,
1162 struct btrfs_path *path,
1163 struct btrfs_extent_inline_ref *iref,
1164 u64 parent, u64 root_objectid,
1165 u64 owner, u64 offset, int refs_to_add)
1167 struct extent_buffer *leaf;
1168 struct btrfs_extent_item *ei;
1169 unsigned long ptr;
1170 unsigned long end;
1171 unsigned long item_offset;
1172 u64 refs;
1173 int size;
1174 int type;
1175 int ret;
1177 leaf = path->nodes[0];
1178 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1179 item_offset = (unsigned long)iref - (unsigned long)ei;
1181 type = extent_ref_type(parent, owner);
1182 size = btrfs_extent_inline_ref_size(type);
1184 ret = btrfs_extend_item(root, path, size);
1185 BUG_ON(ret);
1187 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1188 refs = btrfs_extent_refs(leaf, ei);
1189 refs += refs_to_add;
1190 btrfs_set_extent_refs(leaf, ei, refs);
1192 ptr = (unsigned long)ei + item_offset;
1193 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1194 if (ptr < end - size)
1195 memmove_extent_buffer(leaf, ptr + size, ptr,
1196 end - size - ptr);
1198 iref = (struct btrfs_extent_inline_ref *)ptr;
1199 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1200 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1201 struct btrfs_extent_data_ref *dref;
1202 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1203 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1204 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1205 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1206 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1207 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1208 struct btrfs_shared_data_ref *sref;
1209 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1210 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1211 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1212 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1213 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1214 } else {
1215 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1217 btrfs_mark_buffer_dirty(leaf);
1218 return 0;
1221 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1222 struct btrfs_root *root,
1223 struct btrfs_path *path,
1224 struct btrfs_extent_inline_ref **ref_ret,
1225 u64 bytenr, u64 num_bytes, u64 parent,
1226 u64 root_objectid, u64 owner, u64 offset)
1228 int ret;
1230 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1231 bytenr, num_bytes, parent,
1232 root_objectid, owner, offset, 0);
1233 if (ret != -ENOENT)
1234 return ret;
1236 btrfs_release_path(path);
1237 *ref_ret = NULL;
1239 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1240 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1241 root_objectid);
1242 } else {
1243 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1244 root_objectid, owner, offset);
1246 return ret;
1249 static int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1250 struct btrfs_root *root,
1251 struct btrfs_path *path,
1252 struct btrfs_extent_inline_ref *iref,
1253 int refs_to_mod)
1255 struct extent_buffer *leaf;
1256 struct btrfs_extent_item *ei;
1257 struct btrfs_extent_data_ref *dref = NULL;
1258 struct btrfs_shared_data_ref *sref = NULL;
1259 unsigned long ptr;
1260 unsigned long end;
1261 u32 item_size;
1262 int size;
1263 int type;
1264 int ret;
1265 u64 refs;
1267 leaf = path->nodes[0];
1268 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1269 refs = btrfs_extent_refs(leaf, ei);
1270 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1271 refs += refs_to_mod;
1272 btrfs_set_extent_refs(leaf, ei, refs);
1274 type = btrfs_extent_inline_ref_type(leaf, iref);
1276 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1277 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1278 refs = btrfs_extent_data_ref_count(leaf, dref);
1279 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1280 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1281 refs = btrfs_shared_data_ref_count(leaf, sref);
1282 } else {
1283 refs = 1;
1284 BUG_ON(refs_to_mod != -1);
1287 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1288 refs += refs_to_mod;
1290 if (refs > 0) {
1291 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1292 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1293 else
1294 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1295 } else {
1296 size = btrfs_extent_inline_ref_size(type);
1297 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1298 ptr = (unsigned long)iref;
1299 end = (unsigned long)ei + item_size;
1300 if (ptr + size < end)
1301 memmove_extent_buffer(leaf, ptr, ptr + size,
1302 end - ptr - size);
1303 item_size -= size;
1304 ret = btrfs_truncate_item(root, path, item_size, 1);
1305 BUG_ON(ret);
1307 btrfs_mark_buffer_dirty(leaf);
1308 return 0;
1311 static int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1312 struct btrfs_root *root,
1313 struct btrfs_path *path,
1314 u64 bytenr, u64 num_bytes, u64 parent,
1315 u64 root_objectid, u64 owner,
1316 u64 offset, int refs_to_add)
1318 struct btrfs_extent_inline_ref *iref;
1319 int ret;
1321 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1322 bytenr, num_bytes, parent,
1323 root_objectid, owner, offset, 1);
1324 if (ret == 0) {
1325 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1326 ret = update_inline_extent_backref(trans, root, path, iref,
1327 refs_to_add);
1328 } else if (ret == -ENOENT) {
1329 ret = setup_inline_extent_backref(root, path, iref,
1330 parent, root_objectid,
1331 owner, offset, refs_to_add);
1333 return ret;
1336 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1337 struct btrfs_root *root,
1338 struct btrfs_path *path,
1339 u64 bytenr, u64 parent, u64 root_objectid,
1340 u64 owner, u64 offset, int refs_to_add)
1342 int ret;
1344 if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
1345 ret = insert_extent_data_ref(trans, root, path, bytenr,
1346 parent, root_objectid,
1347 owner, offset, refs_to_add);
1348 } else {
1349 BUG_ON(refs_to_add != 1);
1350 ret = insert_tree_block_ref(trans, root, path, bytenr,
1351 parent, root_objectid);
1353 return ret;
1356 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1357 struct btrfs_root *root,
1358 struct btrfs_path *path,
1359 struct btrfs_extent_inline_ref *iref,
1360 int refs_to_drop, int is_data)
1362 int ret;
1364 BUG_ON(!is_data && refs_to_drop != 1);
1365 if (iref) {
1366 ret = update_inline_extent_backref(trans, root, path, iref,
1367 -refs_to_drop);
1368 } else if (is_data) {
1369 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1370 } else {
1371 ret = btrfs_del_item(trans, root, path);
1373 return ret;
1376 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1377 struct btrfs_root *root,
1378 u64 bytenr, u64 num_bytes, u64 parent,
1379 u64 root_objectid, u64 owner, u64 offset)
1381 struct btrfs_path *path;
1382 struct extent_buffer *leaf;
1383 struct btrfs_extent_item *item;
1384 u64 refs;
1385 int ret;
1386 int err = 0;
1388 path = btrfs_alloc_path();
1389 if (!path)
1390 return -ENOMEM;
1392 path->reada = READA_BACK;
1394 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1395 path, bytenr, num_bytes, parent,
1396 root_objectid, owner, offset, 1);
1397 if (ret == 0)
1398 goto out;
1400 if (ret != -EAGAIN) {
1401 err = ret;
1402 goto out;
1405 leaf = path->nodes[0];
1406 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1407 refs = btrfs_extent_refs(leaf, item);
1408 btrfs_set_extent_refs(leaf, item, refs + 1);
1410 btrfs_mark_buffer_dirty(leaf);
1411 btrfs_release_path(path);
1413 path->reada = READA_BACK;
1415 /* now insert the actual backref */
1416 ret = insert_extent_backref(trans, root->fs_info->extent_root,
1417 path, bytenr, parent, root_objectid,
1418 owner, offset, 1);
1419 if (ret)
1420 err = ret;
1421 out:
1422 btrfs_free_path(path);
1423 finish_current_insert(trans);
1424 del_pending_extents(trans);
1425 BUG_ON(err);
1426 return err;
1429 int btrfs_extent_post_op(struct btrfs_trans_handle *trans)
1431 finish_current_insert(trans);
1432 del_pending_extents(trans);
1433 return 0;
1436 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1437 struct btrfs_fs_info *fs_info, u64 bytenr,
1438 u64 offset, int metadata, u64 *refs, u64 *flags)
1440 struct btrfs_path *path;
1441 int ret;
1442 struct btrfs_key key;
1443 struct extent_buffer *l;
1444 struct btrfs_extent_item *item;
1445 u32 item_size;
1446 u64 num_refs;
1447 u64 extent_flags;
1449 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
1450 offset = fs_info->nodesize;
1451 metadata = 0;
1454 path = btrfs_alloc_path();
1455 if (!path)
1456 return -ENOMEM;
1457 path->reada = READA_BACK;
1459 key.objectid = bytenr;
1460 key.offset = offset;
1461 if (metadata)
1462 key.type = BTRFS_METADATA_ITEM_KEY;
1463 else
1464 key.type = BTRFS_EXTENT_ITEM_KEY;
1466 again:
1467 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1468 if (ret < 0)
1469 goto out;
1472 * Deal with the fact that we may have mixed SKINNY and normal refs. If
1473 * we didn't find what we wanted check and see if we have a normal ref
1474 * right next to us, or re-search if we are on the edge of the leaf just
1475 * to make sure.
1477 if (ret > 0 && metadata) {
1478 if (path->slots[0]) {
1479 path->slots[0]--;
1480 btrfs_item_key_to_cpu(path->nodes[0], &key,
1481 path->slots[0]);
1482 if (key.objectid == bytenr &&
1483 key.type == BTRFS_EXTENT_ITEM_KEY &&
1484 key.offset == fs_info->nodesize)
1485 ret = 0;
1488 if (ret) {
1489 btrfs_release_path(path);
1490 key.type = BTRFS_EXTENT_ITEM_KEY;
1491 key.offset = fs_info->nodesize;
1492 metadata = 0;
1493 goto again;
1497 if (ret != 0) {
1498 ret = -EIO;
1499 goto out;
1502 l = path->nodes[0];
1503 item_size = btrfs_item_size_nr(l, path->slots[0]);
1504 if (item_size >= sizeof(*item)) {
1505 item = btrfs_item_ptr(l, path->slots[0],
1506 struct btrfs_extent_item);
1507 num_refs = btrfs_extent_refs(l, item);
1508 extent_flags = btrfs_extent_flags(l, item);
1509 } else {
1510 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1511 struct btrfs_extent_item_v0 *ei0;
1512 BUG_ON(item_size != sizeof(*ei0));
1513 ei0 = btrfs_item_ptr(l, path->slots[0],
1514 struct btrfs_extent_item_v0);
1515 num_refs = btrfs_extent_refs_v0(l, ei0);
1516 /* FIXME: this isn't correct for data */
1517 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
1518 #else
1519 BUG();
1520 #endif
1522 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1523 if (refs)
1524 *refs = num_refs;
1525 if (flags)
1526 *flags = extent_flags;
1527 out:
1528 btrfs_free_path(path);
1529 return ret;
1532 int btrfs_set_block_flags(struct btrfs_trans_handle *trans, u64 bytenr,
1533 int level, u64 flags)
1535 struct btrfs_fs_info *fs_info = trans->fs_info;
1536 struct btrfs_path *path;
1537 int ret;
1538 struct btrfs_key key;
1539 struct extent_buffer *l;
1540 struct btrfs_extent_item *item;
1541 u32 item_size;
1542 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
1544 path = btrfs_alloc_path();
1545 if (!path)
1546 return -ENOMEM;
1547 path->reada = READA_BACK;
1549 key.objectid = bytenr;
1550 if (skinny_metadata) {
1551 key.offset = level;
1552 key.type = BTRFS_METADATA_ITEM_KEY;
1553 } else {
1554 key.offset = fs_info->nodesize;
1555 key.type = BTRFS_EXTENT_ITEM_KEY;
1558 again:
1559 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
1560 if (ret < 0)
1561 goto out;
1563 if (ret > 0 && skinny_metadata) {
1564 skinny_metadata = 0;
1565 if (path->slots[0]) {
1566 path->slots[0]--;
1567 btrfs_item_key_to_cpu(path->nodes[0], &key,
1568 path->slots[0]);
1569 if (key.objectid == bytenr &&
1570 key.offset == fs_info->nodesize &&
1571 key.type == BTRFS_EXTENT_ITEM_KEY)
1572 ret = 0;
1574 if (ret) {
1575 btrfs_release_path(path);
1576 key.offset = fs_info->nodesize;
1577 key.type = BTRFS_EXTENT_ITEM_KEY;
1578 goto again;
1582 if (ret != 0) {
1583 btrfs_print_leaf(path->nodes[0]);
1584 printk("failed to find block number %Lu\n",
1585 (unsigned long long)bytenr);
1586 BUG();
1588 l = path->nodes[0];
1589 item_size = btrfs_item_size_nr(l, path->slots[0]);
1590 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1591 if (item_size < sizeof(*item)) {
1592 ret = convert_extent_item_v0(trans, fs_info->extent_root, path,
1593 (u64)-1, 0);
1594 if (ret < 0)
1595 goto out;
1597 l = path->nodes[0];
1598 item_size = btrfs_item_size_nr(l, path->slots[0]);
1600 #endif
1601 BUG_ON(item_size < sizeof(*item));
1602 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
1603 flags |= btrfs_extent_flags(l, item);
1604 btrfs_set_extent_flags(l, item, flags);
1605 out:
1606 btrfs_free_path(path);
1607 finish_current_insert(trans);
1608 del_pending_extents(trans);
1609 return ret;
1612 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1613 struct btrfs_root *root,
1614 struct extent_buffer *buf,
1615 int record_parent, int inc)
1617 u64 bytenr;
1618 u64 num_bytes;
1619 u64 parent;
1620 u64 ref_root;
1621 u32 nritems;
1622 struct btrfs_key key;
1623 struct btrfs_file_extent_item *fi;
1624 int i;
1625 int level;
1626 int ret = 0;
1627 int (*process_func)(struct btrfs_trans_handle *trans,
1628 struct btrfs_root *root,
1629 u64, u64, u64, u64, u64, u64);
1631 ref_root = btrfs_header_owner(buf);
1632 nritems = btrfs_header_nritems(buf);
1633 level = btrfs_header_level(buf);
1635 if (!root->ref_cows && level == 0)
1636 return 0;
1638 if (inc)
1639 process_func = btrfs_inc_extent_ref;
1640 else
1641 process_func = btrfs_free_extent;
1643 if (record_parent)
1644 parent = buf->start;
1645 else
1646 parent = 0;
1648 for (i = 0; i < nritems; i++) {
1649 cond_resched();
1650 if (level == 0) {
1651 btrfs_item_key_to_cpu(buf, &key, i);
1652 if (key.type != BTRFS_EXTENT_DATA_KEY)
1653 continue;
1654 fi = btrfs_item_ptr(buf, i,
1655 struct btrfs_file_extent_item);
1656 if (btrfs_file_extent_type(buf, fi) ==
1657 BTRFS_FILE_EXTENT_INLINE)
1658 continue;
1659 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1660 if (bytenr == 0)
1661 continue;
1663 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1664 key.offset -= btrfs_file_extent_offset(buf, fi);
1665 ret = process_func(trans, root, bytenr, num_bytes,
1666 parent, ref_root, key.objectid,
1667 key.offset);
1668 if (ret) {
1669 WARN_ON(1);
1670 goto fail;
1672 } else {
1673 bytenr = btrfs_node_blockptr(buf, i);
1674 num_bytes = root->fs_info->nodesize;
1675 ret = process_func(trans, root, bytenr, num_bytes,
1676 parent, ref_root, level - 1, 0);
1677 if (ret) {
1678 WARN_ON(1);
1679 goto fail;
1683 return 0;
1684 fail:
1685 WARN_ON(1);
1686 return ret;
1689 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1690 struct extent_buffer *buf, int record_parent)
1692 return __btrfs_mod_ref(trans, root, buf, record_parent, 1);
1695 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1696 struct extent_buffer *buf, int record_parent)
1698 return __btrfs_mod_ref(trans, root, buf, record_parent, 0);
1701 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1702 struct btrfs_path *path,
1703 struct btrfs_block_group_cache *cache)
1705 int ret;
1706 int pending_ret;
1707 struct btrfs_root *extent_root = trans->fs_info->extent_root;
1708 unsigned long bi;
1709 struct extent_buffer *leaf;
1711 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1712 if (ret < 0)
1713 goto fail;
1714 BUG_ON(ret);
1716 leaf = path->nodes[0];
1717 bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1718 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1719 btrfs_mark_buffer_dirty(leaf);
1720 btrfs_release_path(path);
1721 fail:
1722 finish_current_insert(trans);
1723 pending_ret = del_pending_extents(trans);
1724 if (ret)
1725 return ret;
1726 if (pending_ret)
1727 return pending_ret;
1728 return 0;
1732 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1733 struct btrfs_root *root)
1735 struct extent_io_tree *block_group_cache;
1736 struct btrfs_block_group_cache *cache;
1737 int ret;
1738 struct btrfs_path *path;
1739 u64 last = 0;
1740 u64 start;
1741 u64 end;
1742 u64 ptr;
1744 block_group_cache = &root->fs_info->block_group_cache;
1745 path = btrfs_alloc_path();
1746 if (!path)
1747 return -ENOMEM;
1749 while(1) {
1750 ret = find_first_extent_bit(block_group_cache, last,
1751 &start, &end, BLOCK_GROUP_DIRTY);
1752 if (ret) {
1753 if (last == 0)
1754 break;
1755 last = 0;
1756 continue;
1759 last = end + 1;
1760 ret = get_state_private(block_group_cache, start, &ptr);
1761 BUG_ON(ret);
1763 clear_extent_bits(block_group_cache, start, end,
1764 BLOCK_GROUP_DIRTY);
1766 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1767 ret = write_one_cache_group(trans, path, cache);
1769 btrfs_free_path(path);
1770 return 0;
1773 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1774 u64 flags)
1776 struct btrfs_space_info *found;
1778 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
1780 list_for_each_entry(found, &info->space_info, list) {
1781 if (found->flags & flags)
1782 return found;
1784 return NULL;
1788 static int free_space_info(struct btrfs_fs_info *fs_info, u64 flags,
1789 u64 total_bytes, u64 bytes_used,
1790 struct btrfs_space_info **space_info)
1792 struct btrfs_space_info *found;
1794 /* only support free block group which is empty */
1795 if (bytes_used)
1796 return -ENOTEMPTY;
1798 found = __find_space_info(fs_info, flags);
1799 if (!found)
1800 return -ENOENT;
1801 if (found->total_bytes < total_bytes) {
1802 fprintf(stderr,
1803 "WARNING: bad space info to free %llu only have %llu\n",
1804 total_bytes, found->total_bytes);
1805 return -EINVAL;
1807 found->total_bytes -= total_bytes;
1808 if (space_info)
1809 *space_info = found;
1810 return 0;
1813 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1814 u64 total_bytes, u64 bytes_used,
1815 struct btrfs_space_info **space_info)
1817 struct btrfs_space_info *found;
1819 found = __find_space_info(info, flags);
1820 if (found) {
1821 found->total_bytes += total_bytes;
1822 found->bytes_used += bytes_used;
1823 if (found->total_bytes < found->bytes_used) {
1824 fprintf(stderr, "warning, bad space info total_bytes "
1825 "%llu used %llu\n",
1826 (unsigned long long)found->total_bytes,
1827 (unsigned long long)found->bytes_used);
1829 *space_info = found;
1830 return 0;
1832 found = kmalloc(sizeof(*found), GFP_NOFS);
1833 if (!found)
1834 return -ENOMEM;
1836 list_add(&found->list, &info->space_info);
1837 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
1838 found->total_bytes = total_bytes;
1839 found->bytes_used = bytes_used;
1840 found->bytes_pinned = 0;
1841 found->full = 0;
1842 *space_info = found;
1843 return 0;
1847 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1849 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1850 BTRFS_BLOCK_GROUP_RAID1 |
1851 BTRFS_BLOCK_GROUP_RAID10 |
1852 BTRFS_BLOCK_GROUP_RAID5 |
1853 BTRFS_BLOCK_GROUP_RAID6 |
1854 BTRFS_BLOCK_GROUP_DUP);
1855 if (extra_flags) {
1856 if (flags & BTRFS_BLOCK_GROUP_DATA)
1857 fs_info->avail_data_alloc_bits |= extra_flags;
1858 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1859 fs_info->avail_metadata_alloc_bits |= extra_flags;
1860 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1861 fs_info->avail_system_alloc_bits |= extra_flags;
1865 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1866 struct btrfs_fs_info *fs_info, u64 alloc_bytes,
1867 u64 flags)
1869 struct btrfs_space_info *space_info;
1870 u64 thresh;
1871 u64 start;
1872 u64 num_bytes;
1873 int ret;
1875 space_info = __find_space_info(fs_info, flags);
1876 if (!space_info) {
1877 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
1878 BUG_ON(ret);
1880 BUG_ON(!space_info);
1882 if (space_info->full)
1883 return 0;
1885 thresh = div_factor(space_info->total_bytes, 7);
1886 if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1887 thresh)
1888 return 0;
1891 * Avoid allocating given chunk type
1893 if (fs_info->avoid_meta_chunk_alloc &&
1894 (flags & BTRFS_BLOCK_GROUP_METADATA))
1895 return 0;
1896 if (fs_info->avoid_sys_chunk_alloc &&
1897 (flags & BTRFS_BLOCK_GROUP_SYSTEM))
1898 return 0;
1900 ret = btrfs_alloc_chunk(trans, fs_info, &start, &num_bytes,
1901 space_info->flags);
1902 if (ret == -ENOSPC) {
1903 space_info->full = 1;
1904 return 0;
1907 BUG_ON(ret);
1909 ret = btrfs_make_block_group(trans, fs_info, 0, space_info->flags,
1910 start, num_bytes);
1911 BUG_ON(ret);
1912 return 0;
1915 static int update_block_group(struct btrfs_root *root,
1916 u64 bytenr, u64 num_bytes, int alloc,
1917 int mark_free)
1919 struct btrfs_block_group_cache *cache;
1920 struct btrfs_fs_info *info = root->fs_info;
1921 u64 total = num_bytes;
1922 u64 old_val;
1923 u64 byte_in_group;
1924 u64 start;
1925 u64 end;
1927 /* block accounting for super block */
1928 old_val = btrfs_super_bytes_used(info->super_copy);
1929 if (alloc)
1930 old_val += num_bytes;
1931 else
1932 old_val -= num_bytes;
1933 btrfs_set_super_bytes_used(info->super_copy, old_val);
1935 /* block accounting for root item */
1936 old_val = btrfs_root_used(&root->root_item);
1937 if (alloc)
1938 old_val += num_bytes;
1939 else
1940 old_val -= num_bytes;
1941 btrfs_set_root_used(&root->root_item, old_val);
1943 while(total) {
1944 cache = btrfs_lookup_block_group(info, bytenr);
1945 if (!cache) {
1946 return -1;
1948 byte_in_group = bytenr - cache->key.objectid;
1949 WARN_ON(byte_in_group > cache->key.offset);
1950 start = cache->key.objectid;
1951 end = start + cache->key.offset - 1;
1952 set_extent_bits(&info->block_group_cache, start, end,
1953 BLOCK_GROUP_DIRTY);
1955 old_val = btrfs_block_group_used(&cache->item);
1956 num_bytes = min(total, cache->key.offset - byte_in_group);
1958 if (alloc) {
1959 old_val += num_bytes;
1960 cache->space_info->bytes_used += num_bytes;
1961 } else {
1962 old_val -= num_bytes;
1963 cache->space_info->bytes_used -= num_bytes;
1964 if (mark_free) {
1965 set_extent_dirty(&info->free_space_cache,
1966 bytenr, bytenr + num_bytes - 1);
1969 btrfs_set_block_group_used(&cache->item, old_val);
1970 total -= num_bytes;
1971 bytenr += num_bytes;
1973 return 0;
1976 static int update_pinned_extents(struct btrfs_fs_info *fs_info,
1977 u64 bytenr, u64 num, int pin)
1979 u64 len;
1980 struct btrfs_block_group_cache *cache;
1982 if (pin) {
1983 set_extent_dirty(&fs_info->pinned_extents,
1984 bytenr, bytenr + num - 1);
1985 } else {
1986 clear_extent_dirty(&fs_info->pinned_extents,
1987 bytenr, bytenr + num - 1);
1989 while (num > 0) {
1990 cache = btrfs_lookup_block_group(fs_info, bytenr);
1991 if (!cache) {
1992 len = min((u64)fs_info->sectorsize, num);
1993 goto next;
1995 WARN_ON(!cache);
1996 len = min(num, cache->key.offset -
1997 (bytenr - cache->key.objectid));
1998 if (pin) {
1999 cache->pinned += len;
2000 cache->space_info->bytes_pinned += len;
2001 fs_info->total_pinned += len;
2002 } else {
2003 cache->pinned -= len;
2004 cache->space_info->bytes_pinned -= len;
2005 fs_info->total_pinned -= len;
2007 next:
2008 bytenr += len;
2009 num -= len;
2011 return 0;
2014 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2015 struct btrfs_root *root,
2016 struct extent_io_tree *unpin)
2018 u64 start;
2019 u64 end;
2020 int ret;
2021 struct extent_io_tree *free_space_cache;
2022 free_space_cache = &root->fs_info->free_space_cache;
2024 while(1) {
2025 ret = find_first_extent_bit(unpin, 0, &start, &end,
2026 EXTENT_DIRTY);
2027 if (ret)
2028 break;
2029 update_pinned_extents(trans->fs_info, start, end + 1 - start,
2031 clear_extent_dirty(unpin, start, end);
2032 set_extent_dirty(free_space_cache, start, end);
2034 return 0;
2037 static int extent_root_pending_ops(struct btrfs_fs_info *info)
2039 u64 start;
2040 u64 end;
2041 int ret;
2043 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2044 &end, EXTENT_LOCKED);
2045 if (!ret) {
2046 ret = find_first_extent_bit(&info->pending_del, 0, &start, &end,
2047 EXTENT_LOCKED);
2049 return ret == 0;
2052 static int finish_current_insert(struct btrfs_trans_handle *trans)
2054 u64 start;
2055 u64 end;
2056 u64 priv;
2057 struct btrfs_fs_info *info = trans->fs_info;
2058 struct btrfs_root *extent_root = info->extent_root;
2059 struct pending_extent_op *extent_op;
2060 struct btrfs_key key;
2061 int ret;
2062 int skinny_metadata =
2063 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2065 while(1) {
2066 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
2067 &end, EXTENT_LOCKED);
2068 if (ret)
2069 break;
2071 ret = get_state_private(&info->extent_ins, start, &priv);
2072 BUG_ON(ret);
2073 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2075 if (extent_op->type == PENDING_EXTENT_INSERT) {
2076 key.objectid = start;
2077 if (skinny_metadata) {
2078 key.offset = extent_op->level;
2079 key.type = BTRFS_METADATA_ITEM_KEY;
2080 } else {
2081 key.offset = extent_op->num_bytes;
2082 key.type = BTRFS_EXTENT_ITEM_KEY;
2084 ret = alloc_reserved_tree_block(trans, extent_root,
2085 extent_root->root_key.objectid,
2086 trans->transid,
2087 extent_op->flags,
2088 &extent_op->key,
2089 extent_op->level, &key);
2090 BUG_ON(ret);
2091 } else {
2092 BUG_ON(1);
2095 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED);
2096 kfree(extent_op);
2098 return 0;
2101 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2102 struct btrfs_root *root,
2103 u64 bytenr, u64 num_bytes, int is_data)
2105 int err = 0;
2106 struct extent_buffer *buf;
2108 if (is_data)
2109 goto pinit;
2111 buf = btrfs_find_tree_block(root->fs_info, bytenr, num_bytes);
2112 if (!buf)
2113 goto pinit;
2115 /* we can reuse a block if it hasn't been written
2116 * and it is from this transaction. We can't
2117 * reuse anything from the tree log root because
2118 * it has tiny sub-transactions.
2120 if (btrfs_buffer_uptodate(buf, 0)) {
2121 u64 header_owner = btrfs_header_owner(buf);
2122 u64 header_transid = btrfs_header_generation(buf);
2123 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2124 header_transid == trans->transid &&
2125 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2126 clean_tree_block(buf);
2127 free_extent_buffer(buf);
2128 return 1;
2131 free_extent_buffer(buf);
2132 pinit:
2133 update_pinned_extents(trans->fs_info, bytenr, num_bytes, 1);
2135 BUG_ON(err < 0);
2136 return 0;
2139 void btrfs_pin_extent(struct btrfs_fs_info *fs_info,
2140 u64 bytenr, u64 num_bytes)
2142 update_pinned_extents(fs_info, bytenr, num_bytes, 1);
2145 void btrfs_unpin_extent(struct btrfs_fs_info *fs_info,
2146 u64 bytenr, u64 num_bytes)
2148 update_pinned_extents(fs_info, bytenr, num_bytes, 0);
2152 * remove an extent from the root, returns 0 on success
2154 static int __free_extent(struct btrfs_trans_handle *trans,
2155 struct btrfs_root *root,
2156 u64 bytenr, u64 num_bytes, u64 parent,
2157 u64 root_objectid, u64 owner_objectid,
2158 u64 owner_offset, int refs_to_drop)
2161 struct btrfs_key key;
2162 struct btrfs_path *path;
2163 struct btrfs_root *extent_root = root->fs_info->extent_root;
2164 struct extent_buffer *leaf;
2165 struct btrfs_extent_item *ei;
2166 struct btrfs_extent_inline_ref *iref;
2167 int ret;
2168 int is_data;
2169 int extent_slot = 0;
2170 int found_extent = 0;
2171 int num_to_del = 1;
2172 u32 item_size;
2173 u64 refs;
2174 int skinny_metadata =
2175 btrfs_fs_incompat(extent_root->fs_info, SKINNY_METADATA);
2177 if (root->fs_info->free_extent_hook) {
2178 root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
2179 parent, root_objectid, owner_objectid,
2180 owner_offset, refs_to_drop);
2183 path = btrfs_alloc_path();
2184 if (!path)
2185 return -ENOMEM;
2187 path->reada = READA_BACK;
2189 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2190 if (is_data)
2191 skinny_metadata = 0;
2192 BUG_ON(!is_data && refs_to_drop != 1);
2194 ret = lookup_extent_backref(trans, extent_root, path, &iref,
2195 bytenr, num_bytes, parent,
2196 root_objectid, owner_objectid,
2197 owner_offset);
2198 if (ret == 0) {
2199 extent_slot = path->slots[0];
2200 while (extent_slot >= 0) {
2201 btrfs_item_key_to_cpu(path->nodes[0], &key,
2202 extent_slot);
2203 if (key.objectid != bytenr)
2204 break;
2205 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2206 key.offset == num_bytes) {
2207 found_extent = 1;
2208 break;
2210 if (key.type == BTRFS_METADATA_ITEM_KEY &&
2211 key.offset == owner_objectid) {
2212 found_extent = 1;
2213 break;
2215 if (path->slots[0] - extent_slot > 5)
2216 break;
2217 extent_slot--;
2219 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2220 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
2221 if (found_extent && item_size < sizeof(*ei))
2222 found_extent = 0;
2223 #endif
2224 if (!found_extent) {
2225 BUG_ON(iref);
2226 ret = remove_extent_backref(trans, extent_root, path,
2227 NULL, refs_to_drop,
2228 is_data);
2229 BUG_ON(ret);
2230 btrfs_release_path(path);
2232 key.objectid = bytenr;
2234 if (skinny_metadata) {
2235 key.type = BTRFS_METADATA_ITEM_KEY;
2236 key.offset = owner_objectid;
2237 } else {
2238 key.type = BTRFS_EXTENT_ITEM_KEY;
2239 key.offset = num_bytes;
2242 ret = btrfs_search_slot(trans, extent_root,
2243 &key, path, -1, 1);
2244 if (ret > 0 && skinny_metadata && path->slots[0]) {
2245 path->slots[0]--;
2246 btrfs_item_key_to_cpu(path->nodes[0],
2247 &key,
2248 path->slots[0]);
2249 if (key.objectid == bytenr &&
2250 key.type == BTRFS_EXTENT_ITEM_KEY &&
2251 key.offset == num_bytes)
2252 ret = 0;
2255 if (ret > 0 && skinny_metadata) {
2256 skinny_metadata = 0;
2257 btrfs_release_path(path);
2258 key.type = BTRFS_EXTENT_ITEM_KEY;
2259 key.offset = num_bytes;
2260 ret = btrfs_search_slot(trans, extent_root,
2261 &key, path, -1, 1);
2264 if (ret) {
2265 printk(KERN_ERR "umm, got %d back from search"
2266 ", was looking for %llu\n", ret,
2267 (unsigned long long)bytenr);
2268 btrfs_print_leaf(path->nodes[0]);
2270 BUG_ON(ret);
2271 extent_slot = path->slots[0];
2273 } else {
2274 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2275 "parent %llu root %llu owner %llu offset %llu\n",
2276 (unsigned long long)bytenr,
2277 (unsigned long long)parent,
2278 (unsigned long long)root_objectid,
2279 (unsigned long long)owner_objectid,
2280 (unsigned long long)owner_offset);
2281 ret = -EIO;
2282 goto fail;
2285 leaf = path->nodes[0];
2286 item_size = btrfs_item_size_nr(leaf, extent_slot);
2287 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2288 if (item_size < sizeof(*ei)) {
2289 BUG_ON(found_extent || extent_slot != path->slots[0]);
2290 ret = convert_extent_item_v0(trans, extent_root, path,
2291 owner_objectid, 0);
2292 BUG_ON(ret < 0);
2294 btrfs_release_path(path);
2296 key.objectid = bytenr;
2297 key.type = BTRFS_EXTENT_ITEM_KEY;
2298 key.offset = num_bytes;
2300 ret = btrfs_search_slot(trans, extent_root, &key, path,
2301 -1, 1);
2302 if (ret) {
2303 printk(KERN_ERR "umm, got %d back from search"
2304 ", was looking for %llu\n", ret,
2305 (unsigned long long)bytenr);
2306 btrfs_print_leaf(path->nodes[0]);
2308 BUG_ON(ret);
2309 extent_slot = path->slots[0];
2310 leaf = path->nodes[0];
2311 item_size = btrfs_item_size_nr(leaf, extent_slot);
2313 #endif
2314 BUG_ON(item_size < sizeof(*ei));
2315 ei = btrfs_item_ptr(leaf, extent_slot,
2316 struct btrfs_extent_item);
2317 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
2318 key.type == BTRFS_EXTENT_ITEM_KEY) {
2319 struct btrfs_tree_block_info *bi;
2320 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2321 bi = (struct btrfs_tree_block_info *)(ei + 1);
2322 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2325 refs = btrfs_extent_refs(leaf, ei);
2326 BUG_ON(refs < refs_to_drop);
2327 refs -= refs_to_drop;
2329 if (refs > 0) {
2331 * In the case of inline back ref, reference count will
2332 * be updated by remove_extent_backref
2334 if (iref) {
2335 BUG_ON(!found_extent);
2336 } else {
2337 btrfs_set_extent_refs(leaf, ei, refs);
2338 btrfs_mark_buffer_dirty(leaf);
2340 if (found_extent) {
2341 ret = remove_extent_backref(trans, extent_root, path,
2342 iref, refs_to_drop,
2343 is_data);
2344 BUG_ON(ret);
2346 } else {
2347 int mark_free = 0;
2348 int pin = 1;
2350 if (found_extent) {
2351 BUG_ON(is_data && refs_to_drop !=
2352 extent_data_ref_count(path, iref));
2353 if (iref) {
2354 BUG_ON(path->slots[0] != extent_slot);
2355 } else {
2356 BUG_ON(path->slots[0] != extent_slot + 1);
2357 path->slots[0] = extent_slot;
2358 num_to_del = 2;
2362 if (pin) {
2363 ret = pin_down_bytes(trans, root, bytenr, num_bytes,
2364 is_data);
2365 if (ret > 0)
2366 mark_free = 1;
2367 BUG_ON(ret < 0);
2370 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2371 num_to_del);
2372 BUG_ON(ret);
2373 btrfs_release_path(path);
2375 if (is_data) {
2376 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2377 BUG_ON(ret);
2380 update_block_group(root, bytenr, num_bytes, 0, mark_free);
2382 fail:
2383 btrfs_free_path(path);
2384 finish_current_insert(trans);
2385 return ret;
2389 * find all the blocks marked as pending in the radix tree and remove
2390 * them from the extent map
2392 static int del_pending_extents(struct btrfs_trans_handle *trans)
2394 int ret;
2395 int err = 0;
2396 u64 start;
2397 u64 end;
2398 u64 priv;
2399 struct extent_io_tree *pending_del;
2400 struct extent_io_tree *extent_ins;
2401 struct pending_extent_op *extent_op;
2402 struct btrfs_fs_info *fs_info = trans->fs_info;
2403 struct btrfs_root *extent_root = fs_info->extent_root;
2405 extent_ins = &extent_root->fs_info->extent_ins;
2406 pending_del = &extent_root->fs_info->pending_del;
2408 while(1) {
2409 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2410 EXTENT_LOCKED);
2411 if (ret)
2412 break;
2414 ret = get_state_private(pending_del, start, &priv);
2415 BUG_ON(ret);
2416 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2418 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED);
2420 if (!test_range_bit(extent_ins, start, end,
2421 EXTENT_LOCKED, 0)) {
2422 ret = __free_extent(trans, extent_root,
2423 start, end + 1 - start, 0,
2424 extent_root->root_key.objectid,
2425 extent_op->level, 0, 1);
2426 kfree(extent_op);
2427 } else {
2428 kfree(extent_op);
2429 ret = get_state_private(extent_ins, start, &priv);
2430 BUG_ON(ret);
2431 extent_op = (struct pending_extent_op *)
2432 (unsigned long)priv;
2434 clear_extent_bits(extent_ins, start, end,
2435 EXTENT_LOCKED);
2437 if (extent_op->type == PENDING_BACKREF_UPDATE)
2438 BUG_ON(1);
2440 kfree(extent_op);
2442 if (ret)
2443 err = ret;
2445 return err;
2449 int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
2450 struct btrfs_root *root,
2451 struct extent_buffer *buf,
2452 u64 parent, int last_ref)
2454 return btrfs_free_extent(trans, root, buf->start, buf->len, parent,
2455 root->root_key.objectid,
2456 btrfs_header_level(buf), 0);
2460 * remove an extent from the root, returns 0 on success
2463 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2464 struct btrfs_root *root,
2465 u64 bytenr, u64 num_bytes, u64 parent,
2466 u64 root_objectid, u64 owner, u64 offset)
2468 struct btrfs_root *extent_root = root->fs_info->extent_root;
2469 int pending_ret;
2470 int ret;
2472 WARN_ON(num_bytes < root->fs_info->sectorsize);
2473 if (root == extent_root) {
2474 struct pending_extent_op *extent_op;
2476 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2477 BUG_ON(!extent_op);
2479 extent_op->type = PENDING_EXTENT_DELETE;
2480 extent_op->bytenr = bytenr;
2481 extent_op->num_bytes = num_bytes;
2482 extent_op->level = (int)owner;
2484 set_extent_bits(&root->fs_info->pending_del,
2485 bytenr, bytenr + num_bytes - 1,
2486 EXTENT_LOCKED);
2487 set_state_private(&root->fs_info->pending_del,
2488 bytenr, (unsigned long)extent_op);
2489 return 0;
2491 ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2492 root_objectid, owner, offset, 1);
2493 pending_ret = del_pending_extents(trans);
2494 return ret ? ret : pending_ret;
2497 static u64 stripe_align(struct btrfs_root *root, u64 val)
2499 return round_up(val, (u64)root->fs_info->stripesize);
2503 * walks the btree of allocated extents and find a hole of a given size.
2504 * The key ins is changed to record the hole:
2505 * ins->objectid == block start
2506 * ins->flags = BTRFS_EXTENT_ITEM_KEY
2507 * ins->offset == number of blocks
2508 * Any available blocks before search_start are skipped.
2510 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2511 struct btrfs_root *orig_root,
2512 u64 num_bytes, u64 empty_size,
2513 u64 search_start, u64 search_end,
2514 u64 hint_byte, struct btrfs_key *ins,
2515 u64 exclude_start, u64 exclude_nr,
2516 int data)
2518 int ret;
2519 u64 orig_search_start = search_start;
2520 struct btrfs_root * root = orig_root->fs_info->extent_root;
2521 struct btrfs_fs_info *info = root->fs_info;
2522 u64 total_needed = num_bytes;
2523 struct btrfs_block_group_cache *block_group;
2524 int full_scan = 0;
2525 int wrapped = 0;
2527 WARN_ON(num_bytes < info->sectorsize);
2528 ins->type = BTRFS_EXTENT_ITEM_KEY;
2530 search_start = stripe_align(root, search_start);
2532 if (hint_byte) {
2533 block_group = btrfs_lookup_first_block_group(info, hint_byte);
2534 if (!block_group)
2535 hint_byte = search_start;
2536 block_group = btrfs_find_block_group(root, block_group,
2537 hint_byte, data, 1);
2538 } else {
2539 block_group = btrfs_find_block_group(root,
2540 trans->block_group,
2541 search_start, data, 1);
2544 total_needed += empty_size;
2546 check_failed:
2547 search_start = stripe_align(root, search_start);
2548 if (!block_group) {
2549 block_group = btrfs_lookup_first_block_group(info,
2550 search_start);
2551 if (!block_group)
2552 block_group = btrfs_lookup_first_block_group(info,
2553 orig_search_start);
2555 ret = find_search_start(root, &block_group, &search_start,
2556 total_needed, data);
2557 if (ret)
2558 goto new_group;
2560 ins->objectid = search_start;
2561 ins->offset = num_bytes;
2563 if (ins->objectid + num_bytes >
2564 block_group->key.objectid + block_group->key.offset) {
2565 search_start = block_group->key.objectid +
2566 block_group->key.offset;
2567 goto new_group;
2570 if (test_range_bit(&info->extent_ins, ins->objectid,
2571 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
2572 search_start = ins->objectid + num_bytes;
2573 goto new_group;
2576 if (test_range_bit(&info->pinned_extents, ins->objectid,
2577 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2578 search_start = ins->objectid + num_bytes;
2579 goto new_group;
2582 if (info->excluded_extents &&
2583 test_range_bit(info->excluded_extents, ins->objectid,
2584 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
2585 search_start = ins->objectid + num_bytes;
2586 goto new_group;
2589 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2590 ins->objectid < exclude_start + exclude_nr)) {
2591 search_start = exclude_start + exclude_nr;
2592 goto new_group;
2595 if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
2596 if (check_crossing_stripes(info, ins->objectid, num_bytes)) {
2597 struct btrfs_block_group_cache *bg_cache;
2598 u64 bg_offset;
2600 bg_cache = btrfs_lookup_block_group(info, ins->objectid);
2601 if (!bg_cache)
2602 goto no_bg_cache;
2603 bg_offset = ins->objectid - bg_cache->key.objectid;
2605 search_start = round_up(
2606 bg_offset + num_bytes, BTRFS_STRIPE_LEN) +
2607 bg_cache->key.objectid;
2608 goto new_group;
2610 no_bg_cache:
2611 block_group = btrfs_lookup_block_group(info, ins->objectid);
2612 if (block_group)
2613 trans->block_group = block_group;
2615 ins->offset = num_bytes;
2616 return 0;
2618 new_group:
2619 block_group = btrfs_lookup_first_block_group(info, search_start);
2620 if (!block_group) {
2621 search_start = orig_search_start;
2622 if (full_scan) {
2623 ret = -ENOSPC;
2624 goto error;
2626 if (wrapped) {
2627 if (!full_scan)
2628 total_needed -= empty_size;
2629 full_scan = 1;
2630 } else
2631 wrapped = 1;
2633 cond_resched();
2634 block_group = btrfs_find_block_group(root, block_group,
2635 search_start, data, 0);
2636 goto check_failed;
2638 error:
2639 return ret;
2642 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2643 struct btrfs_root *root,
2644 u64 num_bytes, u64 empty_size,
2645 u64 hint_byte, u64 search_end,
2646 struct btrfs_key *ins, bool is_data)
2648 int ret;
2649 u64 search_start = 0;
2650 u64 alloc_profile;
2651 u64 profile;
2652 struct btrfs_fs_info *info = root->fs_info;
2654 if (is_data) {
2655 alloc_profile = info->avail_data_alloc_bits &
2656 info->data_alloc_profile;
2657 profile = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2658 } else if (info->system_allocs == 1 || root == info->chunk_root) {
2659 alloc_profile = info->avail_system_alloc_bits &
2660 info->system_alloc_profile;
2661 profile = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2662 } else {
2663 alloc_profile = info->avail_metadata_alloc_bits &
2664 info->metadata_alloc_profile;
2665 profile = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2668 if (root->ref_cows) {
2669 if (!(profile & BTRFS_BLOCK_GROUP_METADATA)) {
2670 ret = do_chunk_alloc(trans, info,
2671 num_bytes,
2672 BTRFS_BLOCK_GROUP_METADATA);
2673 BUG_ON(ret);
2675 ret = do_chunk_alloc(trans, info,
2676 num_bytes + SZ_2M, profile);
2677 BUG_ON(ret);
2680 WARN_ON(num_bytes < info->sectorsize);
2681 ret = find_free_extent(trans, root, num_bytes, empty_size,
2682 search_start, search_end, hint_byte, ins,
2683 trans->alloc_exclude_start,
2684 trans->alloc_exclude_nr, profile);
2685 if (ret < 0)
2686 return ret;
2687 clear_extent_dirty(&info->free_space_cache,
2688 ins->objectid, ins->objectid + ins->offset - 1);
2689 return ret;
2692 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
2693 struct btrfs_root *root,
2694 u64 root_objectid, u64 generation,
2695 u64 flags, struct btrfs_disk_key *key,
2696 int level, struct btrfs_key *ins)
2698 int ret;
2699 struct btrfs_fs_info *fs_info = root->fs_info;
2700 struct btrfs_extent_item *extent_item;
2701 struct btrfs_tree_block_info *block_info;
2702 struct btrfs_extent_inline_ref *iref;
2703 struct btrfs_path *path;
2704 struct extent_buffer *leaf;
2705 u32 size = sizeof(*extent_item) + sizeof(*iref);
2706 int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
2708 if (!skinny_metadata)
2709 size += sizeof(*block_info);
2711 path = btrfs_alloc_path();
2712 if (!path)
2713 return -ENOMEM;
2715 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
2716 ins, size);
2717 BUG_ON(ret);
2719 leaf = path->nodes[0];
2720 extent_item = btrfs_item_ptr(leaf, path->slots[0],
2721 struct btrfs_extent_item);
2722 btrfs_set_extent_refs(leaf, extent_item, 1);
2723 btrfs_set_extent_generation(leaf, extent_item, generation);
2724 btrfs_set_extent_flags(leaf, extent_item,
2725 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
2727 if (skinny_metadata) {
2728 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
2729 } else {
2730 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
2731 btrfs_set_tree_block_key(leaf, block_info, key);
2732 btrfs_set_tree_block_level(leaf, block_info, level);
2733 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
2736 btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
2737 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
2739 btrfs_mark_buffer_dirty(leaf);
2740 btrfs_free_path(path);
2742 ret = update_block_group(root, ins->objectid, fs_info->nodesize,
2743 1, 0);
2744 return ret;
2747 static int alloc_tree_block(struct btrfs_trans_handle *trans,
2748 struct btrfs_root *root, u64 num_bytes,
2749 u64 root_objectid, u64 generation,
2750 u64 flags, struct btrfs_disk_key *key,
2751 int level, u64 empty_size, u64 hint_byte,
2752 u64 search_end, struct btrfs_key *ins)
2754 int ret;
2755 ret = btrfs_reserve_extent(trans, root, num_bytes, empty_size,
2756 hint_byte, search_end, ins, 0);
2757 BUG_ON(ret);
2759 if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) {
2760 struct pending_extent_op *extent_op;
2762 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2763 BUG_ON(!extent_op);
2765 extent_op->type = PENDING_EXTENT_INSERT;
2766 extent_op->bytenr = ins->objectid;
2767 extent_op->num_bytes = ins->offset;
2768 extent_op->level = level;
2769 extent_op->flags = flags;
2770 memcpy(&extent_op->key, key, sizeof(*key));
2772 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2773 ins->objectid + ins->offset - 1,
2774 EXTENT_LOCKED);
2775 set_state_private(&root->fs_info->extent_ins,
2776 ins->objectid, (unsigned long)extent_op);
2777 } else {
2778 if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
2779 ins->offset = level;
2780 ins->type = BTRFS_METADATA_ITEM_KEY;
2782 ret = alloc_reserved_tree_block(trans, root, root_objectid,
2783 generation, flags,
2784 key, level, ins);
2785 finish_current_insert(trans);
2786 del_pending_extents(trans);
2788 return ret;
2792 * helper function to allocate a block for a given tree
2793 * returns the tree buffer or NULL.
2795 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2796 struct btrfs_root *root,
2797 u32 blocksize, u64 root_objectid,
2798 struct btrfs_disk_key *key, int level,
2799 u64 hint, u64 empty_size)
2801 struct btrfs_key ins;
2802 int ret;
2803 struct extent_buffer *buf;
2805 ret = alloc_tree_block(trans, root, blocksize, root_objectid,
2806 trans->transid, 0, key, level,
2807 empty_size, hint, (u64)-1, &ins);
2808 if (ret) {
2809 BUG_ON(ret > 0);
2810 return ERR_PTR(ret);
2813 buf = btrfs_find_create_tree_block(root->fs_info, ins.objectid);
2814 if (!buf) {
2815 btrfs_free_extent(trans, root, ins.objectid, ins.offset,
2816 0, root->root_key.objectid, level, 0);
2817 BUG_ON(1);
2818 return ERR_PTR(-ENOMEM);
2820 btrfs_set_buffer_uptodate(buf);
2821 trans->blocks_used++;
2823 return buf;
2826 #if 0
2828 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2829 struct btrfs_root *root,
2830 struct extent_buffer *leaf)
2832 u64 leaf_owner;
2833 u64 leaf_generation;
2834 struct btrfs_key key;
2835 struct btrfs_file_extent_item *fi;
2836 int i;
2837 int nritems;
2838 int ret;
2840 BUG_ON(!btrfs_is_leaf(leaf));
2841 nritems = btrfs_header_nritems(leaf);
2842 leaf_owner = btrfs_header_owner(leaf);
2843 leaf_generation = btrfs_header_generation(leaf);
2845 for (i = 0; i < nritems; i++) {
2846 u64 disk_bytenr;
2848 btrfs_item_key_to_cpu(leaf, &key, i);
2849 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2850 continue;
2851 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2852 if (btrfs_file_extent_type(leaf, fi) ==
2853 BTRFS_FILE_EXTENT_INLINE)
2854 continue;
2856 * FIXME make sure to insert a trans record that
2857 * repeats the snapshot del on crash
2859 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2860 if (disk_bytenr == 0)
2861 continue;
2862 ret = btrfs_free_extent(trans, root, disk_bytenr,
2863 btrfs_file_extent_disk_num_bytes(leaf, fi),
2864 leaf->start, leaf_owner, leaf_generation,
2865 key.objectid, 0);
2866 BUG_ON(ret);
2868 return 0;
2871 static void noinline reada_walk_down(struct btrfs_root *root,
2872 struct extent_buffer *node,
2873 int slot)
2875 u64 bytenr;
2876 u64 last = 0;
2877 u32 nritems;
2878 u32 refs;
2879 u32 blocksize;
2880 int ret;
2881 int i;
2882 int level;
2883 int skipped = 0;
2885 nritems = btrfs_header_nritems(node);
2886 level = btrfs_header_level(node);
2887 if (level)
2888 return;
2890 for (i = slot; i < nritems && skipped < 32; i++) {
2891 bytenr = btrfs_node_blockptr(node, i);
2892 if (last && ((bytenr > last && bytenr - last > SZ_32K) ||
2893 (last > bytenr && last - bytenr > SZ_32K))) {
2894 skipped++;
2895 continue;
2897 blocksize = btrfs_level_size(root, level - 1);
2898 if (i != slot) {
2899 ret = btrfs_lookup_extent_ref(NULL, root, bytenr,
2900 blocksize, &refs);
2901 BUG_ON(ret);
2902 if (refs != 1) {
2903 skipped++;
2904 continue;
2907 mutex_unlock(&root->fs_info->fs_mutex);
2908 ret = readahead_tree_block(root, bytenr, blocksize,
2909 btrfs_node_ptr_generation(node, i));
2910 last = bytenr + blocksize;
2911 cond_resched();
2912 mutex_lock(&root->fs_info->fs_mutex);
2913 if (ret)
2914 break;
2919 * helper function for drop_snapshot, this walks down the tree dropping ref
2920 * counts as it goes.
2922 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2923 struct btrfs_root *root,
2924 struct btrfs_path *path, int *level)
2926 u64 root_owner;
2927 u64 root_gen;
2928 u64 bytenr;
2929 u64 ptr_gen;
2930 struct extent_buffer *next;
2931 struct extent_buffer *cur;
2932 struct extent_buffer *parent;
2933 u32 blocksize;
2934 int ret;
2935 u32 refs;
2937 WARN_ON(*level < 0);
2938 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2939 ret = btrfs_lookup_extent_ref(trans, root,
2940 path->nodes[*level]->start,
2941 path->nodes[*level]->len, &refs);
2942 BUG_ON(ret);
2943 if (refs > 1)
2944 goto out;
2947 * walk down to the last node level and free all the leaves
2949 while(*level >= 0) {
2950 WARN_ON(*level < 0);
2951 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2952 cur = path->nodes[*level];
2954 if (btrfs_header_level(cur) != *level)
2955 WARN_ON(1);
2957 if (path->slots[*level] >=
2958 btrfs_header_nritems(cur))
2959 break;
2960 if (*level == 0) {
2961 ret = drop_leaf_ref(trans, root, cur);
2962 BUG_ON(ret);
2963 break;
2965 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2966 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2967 blocksize = btrfs_level_size(root, *level - 1);
2968 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
2969 &refs);
2970 BUG_ON(ret);
2971 if (refs != 1) {
2972 parent = path->nodes[*level];
2973 root_owner = btrfs_header_owner(parent);
2974 root_gen = btrfs_header_generation(parent);
2975 path->slots[*level]++;
2976 ret = btrfs_free_extent(trans, root, bytenr, blocksize,
2977 parent->start, root_owner,
2978 root_gen, *level - 1, 1);
2979 BUG_ON(ret);
2980 continue;
2982 next = btrfs_find_tree_block(root, bytenr, blocksize);
2983 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2984 free_extent_buffer(next);
2985 reada_walk_down(root, cur, path->slots[*level]);
2986 mutex_unlock(&root->fs_info->fs_mutex);
2987 next = read_tree_block(root, bytenr, blocksize,
2988 ptr_gen);
2989 mutex_lock(&root->fs_info->fs_mutex);
2990 if (!extent_buffer_uptodate(next)) {
2991 if (IS_ERR(next))
2992 ret = PTR_ERR(next);
2993 else
2994 ret = -EIO;
2995 break;
2998 WARN_ON(*level <= 0);
2999 if (path->nodes[*level-1])
3000 free_extent_buffer(path->nodes[*level-1]);
3001 path->nodes[*level-1] = next;
3002 *level = btrfs_header_level(next);
3003 path->slots[*level] = 0;
3005 out:
3006 WARN_ON(*level < 0);
3007 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3009 if (path->nodes[*level] == root->node) {
3010 root_owner = root->root_key.objectid;
3011 parent = path->nodes[*level];
3012 } else {
3013 parent = path->nodes[*level + 1];
3014 root_owner = btrfs_header_owner(parent);
3017 root_gen = btrfs_header_generation(parent);
3018 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
3019 path->nodes[*level]->len, parent->start,
3020 root_owner, root_gen, *level, 1);
3021 free_extent_buffer(path->nodes[*level]);
3022 path->nodes[*level] = NULL;
3023 *level += 1;
3024 BUG_ON(ret);
3025 return 0;
3029 * helper for dropping snapshots. This walks back up the tree in the path
3030 * to find the first node higher up where we haven't yet gone through
3031 * all the slots
3033 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3034 struct btrfs_root *root,
3035 struct btrfs_path *path, int *level)
3037 u64 root_owner;
3038 u64 root_gen;
3039 struct btrfs_root_item *root_item = &root->root_item;
3040 int i;
3041 int slot;
3042 int ret;
3044 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3045 slot = path->slots[i];
3046 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3047 struct extent_buffer *node;
3048 struct btrfs_disk_key disk_key;
3049 node = path->nodes[i];
3050 path->slots[i]++;
3051 *level = i;
3052 WARN_ON(*level == 0);
3053 btrfs_node_key(node, &disk_key, path->slots[i]);
3054 memcpy(&root_item->drop_progress,
3055 &disk_key, sizeof(disk_key));
3056 root_item->drop_level = i;
3057 return 0;
3058 } else {
3059 struct extent_buffer *parent;
3060 if (path->nodes[*level] == root->node)
3061 parent = path->nodes[*level];
3062 else
3063 parent = path->nodes[*level + 1];
3065 root_owner = btrfs_header_owner(parent);
3066 root_gen = btrfs_header_generation(parent);
3067 ret = btrfs_free_extent(trans, root,
3068 path->nodes[*level]->start,
3069 path->nodes[*level]->len,
3070 parent->start, root_owner,
3071 root_gen, *level, 1);
3072 BUG_ON(ret);
3073 free_extent_buffer(path->nodes[*level]);
3074 path->nodes[*level] = NULL;
3075 *level = i + 1;
3078 return 1;
3081 #endif
3083 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3085 struct btrfs_space_info *sinfo;
3086 struct btrfs_block_group_cache *cache;
3087 u64 start;
3088 u64 end;
3089 u64 ptr;
3090 int ret;
3092 while(1) {
3093 ret = find_first_extent_bit(&info->block_group_cache, 0,
3094 &start, &end, (unsigned int)-1);
3095 if (ret)
3096 break;
3097 ret = get_state_private(&info->block_group_cache, start, &ptr);
3098 if (!ret) {
3099 cache = u64_to_ptr(ptr);
3100 if (cache->free_space_ctl) {
3101 btrfs_remove_free_space_cache(cache);
3102 kfree(cache->free_space_ctl);
3104 kfree(cache);
3106 clear_extent_bits(&info->block_group_cache, start,
3107 end, (unsigned int)-1);
3109 while(1) {
3110 ret = find_first_extent_bit(&info->free_space_cache, 0,
3111 &start, &end, EXTENT_DIRTY);
3112 if (ret)
3113 break;
3114 clear_extent_dirty(&info->free_space_cache, start, end);
3117 while (!list_empty(&info->space_info)) {
3118 sinfo = list_entry(info->space_info.next,
3119 struct btrfs_space_info, list);
3120 list_del_init(&sinfo->list);
3121 kfree(sinfo);
3123 return 0;
3126 static int find_first_block_group(struct btrfs_root *root,
3127 struct btrfs_path *path, struct btrfs_key *key)
3129 int ret;
3130 struct btrfs_key found_key;
3131 struct extent_buffer *leaf;
3132 int slot;
3134 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3135 if (ret < 0)
3136 return ret;
3137 while(1) {
3138 slot = path->slots[0];
3139 leaf = path->nodes[0];
3140 if (slot >= btrfs_header_nritems(leaf)) {
3141 ret = btrfs_next_leaf(root, path);
3142 if (ret == 0)
3143 continue;
3144 if (ret < 0)
3145 goto error;
3146 break;
3148 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3150 if (found_key.objectid >= key->objectid &&
3151 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
3152 return 0;
3153 path->slots[0]++;
3155 ret = -ENOENT;
3156 error:
3157 return ret;
3160 int btrfs_read_block_groups(struct btrfs_root *root)
3162 struct btrfs_path *path;
3163 int ret;
3164 int bit;
3165 struct btrfs_block_group_cache *cache;
3166 struct btrfs_fs_info *info = root->fs_info;
3167 struct btrfs_space_info *space_info;
3168 struct extent_io_tree *block_group_cache;
3169 struct btrfs_key key;
3170 struct btrfs_key found_key;
3171 struct extent_buffer *leaf;
3173 block_group_cache = &info->block_group_cache;
3175 root = info->extent_root;
3176 key.objectid = 0;
3177 key.offset = 0;
3178 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3179 path = btrfs_alloc_path();
3180 if (!path)
3181 return -ENOMEM;
3183 while(1) {
3184 ret = find_first_block_group(root, path, &key);
3185 if (ret > 0) {
3186 ret = 0;
3187 goto error;
3189 if (ret != 0) {
3190 goto error;
3192 leaf = path->nodes[0];
3193 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3195 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3196 if (!cache) {
3197 ret = -ENOMEM;
3198 goto error;
3201 read_extent_buffer(leaf, &cache->item,
3202 btrfs_item_ptr_offset(leaf, path->slots[0]),
3203 sizeof(cache->item));
3204 memcpy(&cache->key, &found_key, sizeof(found_key));
3205 cache->cached = 0;
3206 cache->pinned = 0;
3207 key.objectid = found_key.objectid + found_key.offset;
3208 if (found_key.offset == 0)
3209 key.objectid++;
3210 btrfs_release_path(path);
3213 * Skip 0 sized block group, don't insert them into block
3214 * group cache tree, as its length is 0, it won't get
3215 * freed at close_ctree() time.
3217 if (found_key.offset == 0) {
3218 free(cache);
3219 continue;
3222 cache->flags = btrfs_block_group_flags(&cache->item);
3223 bit = 0;
3224 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3225 bit = BLOCK_GROUP_DATA;
3226 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3227 bit = BLOCK_GROUP_SYSTEM;
3228 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3229 bit = BLOCK_GROUP_METADATA;
3231 set_avail_alloc_bits(info, cache->flags);
3232 if (btrfs_chunk_readonly(info, cache->key.objectid))
3233 cache->ro = 1;
3235 exclude_super_stripes(root, cache);
3237 ret = update_space_info(info, cache->flags, found_key.offset,
3238 btrfs_block_group_used(&cache->item),
3239 &space_info);
3240 BUG_ON(ret);
3241 cache->space_info = space_info;
3243 /* use EXTENT_LOCKED to prevent merging */
3244 set_extent_bits(block_group_cache, found_key.objectid,
3245 found_key.objectid + found_key.offset - 1,
3246 bit | EXTENT_LOCKED);
3247 set_state_private(block_group_cache, found_key.objectid,
3248 (unsigned long)cache);
3250 ret = 0;
3251 error:
3252 btrfs_free_path(path);
3253 return ret;
3256 struct btrfs_block_group_cache *
3257 btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type,
3258 u64 chunk_offset, u64 size)
3260 int ret;
3261 int bit = 0;
3262 struct btrfs_block_group_cache *cache;
3263 struct extent_io_tree *block_group_cache;
3265 block_group_cache = &fs_info->block_group_cache;
3267 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3268 BUG_ON(!cache);
3269 cache->key.objectid = chunk_offset;
3270 cache->key.offset = size;
3272 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3273 btrfs_set_block_group_used(&cache->item, bytes_used);
3274 btrfs_set_block_group_chunk_objectid(&cache->item,
3275 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
3276 cache->flags = type;
3277 btrfs_set_block_group_flags(&cache->item, type);
3279 exclude_super_stripes(fs_info->extent_root, cache);
3280 ret = update_space_info(fs_info, cache->flags, size, bytes_used,
3281 &cache->space_info);
3282 BUG_ON(ret);
3284 bit = block_group_state_bits(type);
3285 ret = set_extent_bits(block_group_cache, chunk_offset,
3286 chunk_offset + size - 1,
3287 bit | EXTENT_LOCKED);
3288 BUG_ON(ret);
3290 ret = set_state_private(block_group_cache, chunk_offset,
3291 (unsigned long)cache);
3292 BUG_ON(ret);
3293 set_avail_alloc_bits(fs_info, type);
3295 return cache;
3298 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3299 struct btrfs_fs_info *fs_info, u64 bytes_used,
3300 u64 type, u64 chunk_offset, u64 size)
3302 int ret;
3303 struct btrfs_root *extent_root = fs_info->extent_root;
3304 struct btrfs_block_group_cache *cache;
3306 cache = btrfs_add_block_group(fs_info, bytes_used, type, chunk_offset,
3307 size);
3308 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3309 sizeof(cache->item));
3310 BUG_ON(ret);
3312 ret = finish_current_insert(trans);
3313 BUG_ON(ret);
3314 ret = del_pending_extents(trans);
3315 BUG_ON(ret);
3317 return 0;
3321 * This is for converter use only.
3323 * In that case, we don't know where are free blocks located.
3324 * Therefore all block group cache entries must be setup properly
3325 * before doing any block allocation.
3327 int btrfs_make_block_groups(struct btrfs_trans_handle *trans,
3328 struct btrfs_fs_info *fs_info)
3330 u64 total_bytes;
3331 u64 cur_start;
3332 u64 group_type;
3333 u64 group_size;
3334 u64 group_align;
3335 u64 total_data = 0;
3336 u64 total_metadata = 0;
3337 u64 chunk_objectid;
3338 int ret;
3339 int bit;
3340 struct btrfs_root *extent_root = fs_info->extent_root;
3341 struct btrfs_block_group_cache *cache;
3342 struct extent_io_tree *block_group_cache;
3344 block_group_cache = &fs_info->block_group_cache;
3345 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3346 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
3347 group_align = 64 * fs_info->sectorsize;
3349 cur_start = 0;
3350 while (cur_start < total_bytes) {
3351 group_size = total_bytes / 12;
3352 group_size = min_t(u64, group_size, total_bytes - cur_start);
3353 if (cur_start == 0) {
3354 bit = BLOCK_GROUP_SYSTEM;
3355 group_type = BTRFS_BLOCK_GROUP_SYSTEM;
3356 group_size /= 4;
3357 group_size &= ~(group_align - 1);
3358 group_size = max_t(u64, group_size, SZ_8M);
3359 group_size = min_t(u64, group_size, SZ_32M);
3360 } else {
3361 group_size &= ~(group_align - 1);
3362 if (total_data >= total_metadata * 2) {
3363 group_type = BTRFS_BLOCK_GROUP_METADATA;
3364 group_size = min_t(u64, group_size, SZ_1G);
3365 total_metadata += group_size;
3366 } else {
3367 group_type = BTRFS_BLOCK_GROUP_DATA;
3368 group_size = min_t(u64, group_size,
3369 5ULL * SZ_1G);
3370 total_data += group_size;
3372 if ((total_bytes - cur_start) * 4 < group_size * 5)
3373 group_size = total_bytes - cur_start;
3376 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3377 BUG_ON(!cache);
3379 cache->key.objectid = cur_start;
3380 cache->key.offset = group_size;
3381 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3383 btrfs_set_block_group_used(&cache->item, 0);
3384 btrfs_set_block_group_chunk_objectid(&cache->item,
3385 chunk_objectid);
3386 btrfs_set_block_group_flags(&cache->item, group_type);
3388 cache->flags = group_type;
3390 ret = update_space_info(fs_info, group_type, group_size,
3391 0, &cache->space_info);
3392 BUG_ON(ret);
3393 set_avail_alloc_bits(fs_info, group_type);
3395 set_extent_bits(block_group_cache, cur_start,
3396 cur_start + group_size - 1,
3397 bit | EXTENT_LOCKED);
3398 set_state_private(block_group_cache, cur_start,
3399 (unsigned long)cache);
3400 cur_start += group_size;
3402 /* then insert all the items */
3403 cur_start = 0;
3404 while(cur_start < total_bytes) {
3405 cache = btrfs_lookup_block_group(fs_info, cur_start);
3406 BUG_ON(!cache);
3408 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3409 sizeof(cache->item));
3410 BUG_ON(ret);
3412 finish_current_insert(trans);
3413 ret = del_pending_extents(trans);
3414 BUG_ON(ret);
3416 cur_start = cache->key.objectid + cache->key.offset;
3418 return 0;
3421 int btrfs_update_block_group(struct btrfs_root *root,
3422 u64 bytenr, u64 num_bytes, int alloc,
3423 int mark_free)
3425 return update_block_group(root, bytenr, num_bytes,
3426 alloc, mark_free);
3430 * Just remove a block group item in extent tree
3431 * Caller should ensure the block group is empty and all space is pinned.
3432 * Or new tree block/data may be allocated into it.
3434 static int free_block_group_item(struct btrfs_trans_handle *trans,
3435 struct btrfs_fs_info *fs_info,
3436 u64 bytenr, u64 len)
3438 struct btrfs_path *path;
3439 struct btrfs_key key;
3440 struct btrfs_root *root = fs_info->extent_root;
3441 int ret = 0;
3443 key.objectid = bytenr;
3444 key.offset = len;
3445 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3447 path = btrfs_alloc_path();
3448 if (!path)
3449 return -ENOMEM;
3451 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3452 if (ret > 0) {
3453 ret = -ENOENT;
3454 goto out;
3456 if (ret < 0)
3457 goto out;
3459 ret = btrfs_del_item(trans, root, path);
3460 out:
3461 btrfs_free_path(path);
3462 return ret;
3465 static int free_dev_extent_item(struct btrfs_trans_handle *trans,
3466 struct btrfs_fs_info *fs_info,
3467 u64 devid, u64 dev_offset)
3469 struct btrfs_root *root = fs_info->dev_root;
3470 struct btrfs_path *path;
3471 struct btrfs_key key;
3472 int ret;
3474 path = btrfs_alloc_path();
3475 if (!path)
3476 return -ENOMEM;
3478 key.objectid = devid;
3479 key.type = BTRFS_DEV_EXTENT_KEY;
3480 key.offset = dev_offset;
3482 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3483 if (ret < 0)
3484 goto out;
3485 if (ret > 0) {
3486 ret = -ENOENT;
3487 goto out;
3490 ret = btrfs_del_item(trans, root, path);
3491 out:
3492 btrfs_free_path(path);
3493 return ret;
3496 static int free_chunk_dev_extent_items(struct btrfs_trans_handle *trans,
3497 struct btrfs_fs_info *fs_info,
3498 u64 chunk_offset)
3500 struct btrfs_chunk *chunk = NULL;
3501 struct btrfs_root *root= fs_info->chunk_root;
3502 struct btrfs_path *path;
3503 struct btrfs_key key;
3504 u16 num_stripes;
3505 int i;
3506 int ret;
3508 path = btrfs_alloc_path();
3509 if (!path)
3510 return -ENOMEM;
3512 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3513 key.type = BTRFS_CHUNK_ITEM_KEY;
3514 key.offset = chunk_offset;
3516 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
3517 if (ret < 0)
3518 goto out;
3519 if (ret > 0) {
3520 ret = -ENOENT;
3521 goto out;
3523 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3524 struct btrfs_chunk);
3525 num_stripes = btrfs_chunk_num_stripes(path->nodes[0], chunk);
3526 for (i = 0; i < num_stripes; i++) {
3527 ret = free_dev_extent_item(trans, fs_info,
3528 btrfs_stripe_devid_nr(path->nodes[0], chunk, i),
3529 btrfs_stripe_offset_nr(path->nodes[0], chunk, i));
3530 if (ret < 0)
3531 goto out;
3533 out:
3534 btrfs_free_path(path);
3535 return ret;
3538 static int free_system_chunk_item(struct btrfs_super_block *super,
3539 struct btrfs_key *key)
3541 struct btrfs_disk_key *disk_key;
3542 struct btrfs_key cpu_key;
3543 u32 array_size = btrfs_super_sys_array_size(super);
3544 char *ptr = (char *)super->sys_chunk_array;
3545 int cur = 0;
3546 int ret = -ENOENT;
3548 while (cur < btrfs_super_sys_array_size(super)) {
3549 struct btrfs_chunk *chunk;
3550 u32 num_stripes;
3551 u32 chunk_len;
3553 disk_key = (struct btrfs_disk_key *)(ptr + cur);
3554 btrfs_disk_key_to_cpu(&cpu_key, disk_key);
3555 if (cpu_key.type != BTRFS_CHUNK_ITEM_KEY) {
3556 /* just in case */
3557 ret = -EIO;
3558 goto out;
3561 chunk = (struct btrfs_chunk *)(ptr + cur + sizeof(*disk_key));
3562 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
3563 chunk_len = btrfs_chunk_item_size(num_stripes) +
3564 sizeof(*disk_key);
3566 if (key->objectid == cpu_key.objectid &&
3567 key->offset == cpu_key.offset &&
3568 key->type == cpu_key.type) {
3569 memmove(ptr + cur, ptr + cur + chunk_len,
3570 array_size - cur - chunk_len);
3571 array_size -= chunk_len;
3572 btrfs_set_super_sys_array_size(super, array_size);
3573 ret = 0;
3574 goto out;
3577 cur += chunk_len;
3579 out:
3580 return ret;
3583 static int free_chunk_item(struct btrfs_trans_handle *trans,
3584 struct btrfs_fs_info *fs_info,
3585 u64 bytenr)
3587 struct btrfs_path *path;
3588 struct btrfs_key key;
3589 struct btrfs_root *root = fs_info->chunk_root;
3590 struct btrfs_chunk *chunk;
3591 u64 chunk_type;
3592 int ret;
3594 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3595 key.offset = bytenr;
3596 key.type = BTRFS_CHUNK_ITEM_KEY;
3598 path = btrfs_alloc_path();
3599 if (!path)
3600 return -ENOMEM;
3602 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3603 if (ret > 0) {
3604 ret = -ENOENT;
3605 goto out;
3607 if (ret < 0)
3608 goto out;
3609 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
3610 struct btrfs_chunk);
3611 chunk_type = btrfs_chunk_type(path->nodes[0], chunk);
3613 ret = btrfs_del_item(trans, root, path);
3614 if (ret < 0)
3615 goto out;
3617 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3618 ret = free_system_chunk_item(fs_info->super_copy, &key);
3619 out:
3620 btrfs_free_path(path);
3621 return ret;
3624 static u64 get_dev_extent_len(struct map_lookup *map)
3626 int div;
3628 switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
3629 case 0: /* Single */
3630 case BTRFS_BLOCK_GROUP_DUP:
3631 case BTRFS_BLOCK_GROUP_RAID1:
3632 div = 1;
3633 break;
3634 case BTRFS_BLOCK_GROUP_RAID5:
3635 div = (map->num_stripes - 1);
3636 break;
3637 case BTRFS_BLOCK_GROUP_RAID6:
3638 div = (map->num_stripes - 2);
3639 break;
3640 case BTRFS_BLOCK_GROUP_RAID10:
3641 div = (map->num_stripes / map->sub_stripes);
3642 break;
3643 default:
3644 /* normally, read chunk security hook should handled it */
3645 BUG_ON(1);
3647 return map->ce.size / div;
3650 /* free block group/chunk related caches */
3651 static int free_block_group_cache(struct btrfs_trans_handle *trans,
3652 struct btrfs_fs_info *fs_info,
3653 u64 bytenr, u64 len)
3655 struct btrfs_block_group_cache *cache;
3656 struct cache_extent *ce;
3657 struct map_lookup *map;
3658 int ret;
3659 int i;
3660 u64 flags;
3662 /* Free block group cache first */
3663 cache = btrfs_lookup_block_group(fs_info, bytenr);
3664 if (!cache)
3665 return -ENOENT;
3666 flags = cache->flags;
3667 if (cache->free_space_ctl) {
3668 btrfs_remove_free_space_cache(cache);
3669 kfree(cache->free_space_ctl);
3671 clear_extent_bits(&fs_info->block_group_cache, bytenr, bytenr + len - 1,
3672 (unsigned int)-1);
3673 ret = free_space_info(fs_info, flags, len, 0, NULL);
3674 if (ret < 0)
3675 goto out;
3676 kfree(cache);
3678 /* Then free mapping info and dev usage info */
3679 ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, bytenr);
3680 if (!ce || ce->start != bytenr) {
3681 ret = -ENOENT;
3682 goto out;
3684 map = container_of(ce, struct map_lookup, ce);
3685 for (i = 0; i < map->num_stripes; i++) {
3686 struct btrfs_device *device;
3688 device = map->stripes[i].dev;
3689 device->bytes_used -= get_dev_extent_len(map);
3690 ret = btrfs_update_device(trans, device);
3691 if (ret < 0)
3692 goto out;
3694 remove_cache_extent(&fs_info->mapping_tree.cache_tree, ce);
3695 free(map);
3696 out:
3697 return ret;
3700 int btrfs_free_block_group(struct btrfs_trans_handle *trans,
3701 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
3703 struct btrfs_root *extent_root = fs_info->extent_root;
3704 struct btrfs_path *path;
3705 struct btrfs_block_group_item *bgi;
3706 struct btrfs_key key;
3707 int ret = 0;
3709 path = btrfs_alloc_path();
3710 if (!path)
3711 return -ENOMEM;
3713 key.objectid = bytenr;
3714 key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
3715 key.offset = len;
3717 /* Double check the block group to ensure it's empty */
3718 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
3719 if (ret > 0) {
3720 ret = -ENONET;
3721 goto out;
3723 if (ret < 0)
3724 goto out;
3726 bgi = btrfs_item_ptr(path->nodes[0], path->slots[0],
3727 struct btrfs_block_group_item);
3728 if (btrfs_disk_block_group_used(path->nodes[0], bgi)) {
3729 fprintf(stderr,
3730 "WARNING: block group [%llu,%llu) is not empty\n",
3731 bytenr, bytenr + len);
3732 ret = -EINVAL;
3733 goto out;
3735 btrfs_release_path(path);
3738 * Now pin all space in the block group, to prevent further transaction
3739 * allocate space from it.
3740 * Every operation needs a transaction must be in the range.
3742 btrfs_pin_extent(fs_info, bytenr, len);
3744 /* delete block group item and chunk item */
3745 ret = free_block_group_item(trans, fs_info, bytenr, len);
3746 if (ret < 0) {
3747 fprintf(stderr,
3748 "failed to free block group item for [%llu,%llu)\n",
3749 bytenr, bytenr + len);
3750 btrfs_unpin_extent(fs_info, bytenr, len);
3751 goto out;
3754 ret = free_chunk_dev_extent_items(trans, fs_info, bytenr);
3755 if (ret < 0) {
3756 fprintf(stderr,
3757 "failed to dev extents belongs to [%llu,%llu)\n",
3758 bytenr, bytenr + len);
3759 btrfs_unpin_extent(fs_info, bytenr, len);
3760 goto out;
3762 ret = free_chunk_item(trans, fs_info, bytenr);
3763 if (ret < 0) {
3764 fprintf(stderr,
3765 "failed to free chunk for [%llu,%llu)\n",
3766 bytenr, bytenr + len);
3767 btrfs_unpin_extent(fs_info, bytenr, len);
3768 goto out;
3771 /* Now release the block_group_cache */
3772 ret = free_block_group_cache(trans, fs_info, bytenr, len);
3773 btrfs_unpin_extent(fs_info, bytenr, len);
3775 out:
3776 btrfs_free_path(path);
3777 return ret;
3781 * Fixup block accounting. The initial block accounting created by
3782 * make_block_groups isn't accuracy in this case.
3784 int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans)
3786 int ret = 0;
3787 int slot;
3788 u64 start = 0;
3789 u64 bytes_used = 0;
3790 struct btrfs_path path;
3791 struct btrfs_key key;
3792 struct extent_buffer *leaf;
3793 struct btrfs_block_group_cache *cache;
3794 struct btrfs_fs_info *fs_info = trans->fs_info;
3795 struct btrfs_root *root = fs_info->extent_root;
3797 while(extent_root_pending_ops(fs_info)) {
3798 ret = finish_current_insert(trans);
3799 if (ret)
3800 return ret;
3801 ret = del_pending_extents(trans);
3802 if (ret)
3803 return ret;
3806 while(1) {
3807 cache = btrfs_lookup_first_block_group(fs_info, start);
3808 if (!cache)
3809 break;
3810 start = cache->key.objectid + cache->key.offset;
3811 btrfs_set_block_group_used(&cache->item, 0);
3812 cache->space_info->bytes_used = 0;
3813 set_extent_bits(&root->fs_info->block_group_cache,
3814 cache->key.objectid,
3815 cache->key.objectid + cache->key.offset -1,
3816 BLOCK_GROUP_DIRTY);
3819 btrfs_init_path(&path);
3820 key.offset = 0;
3821 key.objectid = 0;
3822 key.type = BTRFS_EXTENT_ITEM_KEY;
3823 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
3824 &key, &path, 0, 0);
3825 if (ret < 0)
3826 return ret;
3827 while(1) {
3828 leaf = path.nodes[0];
3829 slot = path.slots[0];
3830 if (slot >= btrfs_header_nritems(leaf)) {
3831 ret = btrfs_next_leaf(root, &path);
3832 if (ret < 0)
3833 return ret;
3834 if (ret > 0)
3835 break;
3836 leaf = path.nodes[0];
3837 slot = path.slots[0];
3839 btrfs_item_key_to_cpu(leaf, &key, slot);
3840 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
3841 bytes_used += key.offset;
3842 ret = btrfs_update_block_group(root,
3843 key.objectid, key.offset, 1, 0);
3844 BUG_ON(ret);
3845 } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
3846 bytes_used += fs_info->nodesize;
3847 ret = btrfs_update_block_group(root,
3848 key.objectid, fs_info->nodesize, 1, 0);
3849 if (ret)
3850 goto out;
3852 path.slots[0]++;
3854 btrfs_set_super_bytes_used(root->fs_info->super_copy, bytes_used);
3855 ret = 0;
3856 out:
3857 btrfs_release_path(&path);
3858 return ret;
3861 static void __get_extent_size(struct btrfs_root *root, struct btrfs_path *path,
3862 u64 *start, u64 *len)
3864 struct btrfs_key key;
3866 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3867 BUG_ON(!(key.type == BTRFS_EXTENT_ITEM_KEY ||
3868 key.type == BTRFS_METADATA_ITEM_KEY));
3869 *start = key.objectid;
3870 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3871 *len = key.offset;
3872 else
3873 *len = root->fs_info->nodesize;
3877 * Find first overlap extent for range [bytenr, bytenr + len)
3878 * Return 0 for found and point path to it.
3879 * Return >0 for not found.
3880 * Return <0 for err
3882 int btrfs_search_overlap_extent(struct btrfs_root *root,
3883 struct btrfs_path *path, u64 bytenr, u64 len)
3885 struct btrfs_key key;
3886 u64 cur_start;
3887 u64 cur_len;
3888 int ret;
3890 key.objectid = bytenr;
3891 key.type = BTRFS_EXTENT_DATA_KEY;
3892 key.offset = (u64)-1;
3894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3895 if (ret < 0)
3896 return ret;
3897 BUG_ON(ret == 0);
3899 ret = btrfs_previous_extent_item(root, path, 0);
3900 if (ret < 0)
3901 return ret;
3902 /* no previous, check next extent */
3903 if (ret > 0)
3904 goto next;
3905 __get_extent_size(root, path, &cur_start, &cur_len);
3906 /* Tail overlap */
3907 if (cur_start + cur_len > bytenr)
3908 return 1;
3910 next:
3911 ret = btrfs_next_extent_item(root, path, bytenr + len);
3912 if (ret < 0)
3913 return ret;
3914 /* No next, prev already checked, no overlap */
3915 if (ret > 0)
3916 return 0;
3917 __get_extent_size(root, path, &cur_start, &cur_len);
3918 /* head overlap*/
3919 if (cur_start < bytenr + len)
3920 return 1;
3921 return 0;
3924 static int __btrfs_record_file_extent(struct btrfs_trans_handle *trans,
3925 struct btrfs_root *root, u64 objectid,
3926 struct btrfs_inode_item *inode,
3927 u64 file_pos, u64 disk_bytenr,
3928 u64 *ret_num_bytes)
3930 int ret;
3931 struct btrfs_fs_info *info = root->fs_info;
3932 struct btrfs_root *extent_root = info->extent_root;
3933 struct extent_buffer *leaf;
3934 struct btrfs_file_extent_item *fi;
3935 struct btrfs_key ins_key;
3936 struct btrfs_path *path;
3937 struct btrfs_extent_item *ei;
3938 u64 nbytes;
3939 u64 extent_num_bytes;
3940 u64 extent_bytenr;
3941 u64 extent_offset;
3942 u64 num_bytes = *ret_num_bytes;
3945 * All supported file system should not use its 0 extent.
3946 * As it's for hole
3948 * And hole extent has no size limit, no need to loop.
3950 if (disk_bytenr == 0) {
3951 ret = btrfs_insert_file_extent(trans, root, objectid,
3952 file_pos, disk_bytenr,
3953 num_bytes, num_bytes);
3954 return ret;
3956 num_bytes = min_t(u64, num_bytes, BTRFS_MAX_EXTENT_SIZE);
3958 path = btrfs_alloc_path();
3959 if (!path)
3960 return -ENOMEM;
3962 /* First to check extent overlap */
3963 ret = btrfs_search_overlap_extent(extent_root, path, disk_bytenr,
3964 num_bytes);
3965 if (ret < 0)
3966 goto fail;
3967 if (ret > 0) {
3968 /* Found overlap */
3969 u64 cur_start;
3970 u64 cur_len;
3972 __get_extent_size(extent_root, path, &cur_start, &cur_len);
3974 * For convert case, this extent should be a subset of
3975 * existing one.
3977 BUG_ON(disk_bytenr < cur_start);
3979 extent_bytenr = cur_start;
3980 extent_num_bytes = cur_len;
3981 extent_offset = disk_bytenr - extent_bytenr;
3982 } else {
3983 /* No overlap, create new extent */
3984 btrfs_release_path(path);
3985 ins_key.objectid = disk_bytenr;
3986 ins_key.offset = num_bytes;
3987 ins_key.type = BTRFS_EXTENT_ITEM_KEY;
3989 ret = btrfs_insert_empty_item(trans, extent_root, path,
3990 &ins_key, sizeof(*ei));
3991 if (ret == 0) {
3992 leaf = path->nodes[0];
3993 ei = btrfs_item_ptr(leaf, path->slots[0],
3994 struct btrfs_extent_item);
3996 btrfs_set_extent_refs(leaf, ei, 0);
3997 btrfs_set_extent_generation(leaf, ei, 0);
3998 btrfs_set_extent_flags(leaf, ei,
3999 BTRFS_EXTENT_FLAG_DATA);
4000 btrfs_mark_buffer_dirty(leaf);
4002 ret = btrfs_update_block_group(root, disk_bytenr,
4003 num_bytes, 1, 0);
4004 if (ret)
4005 goto fail;
4006 } else if (ret != -EEXIST) {
4007 goto fail;
4009 btrfs_extent_post_op(trans);
4010 extent_bytenr = disk_bytenr;
4011 extent_num_bytes = num_bytes;
4012 extent_offset = 0;
4014 btrfs_release_path(path);
4015 ins_key.objectid = objectid;
4016 ins_key.offset = file_pos;
4017 ins_key.type = BTRFS_EXTENT_DATA_KEY;
4018 ret = btrfs_insert_empty_item(trans, root, path, &ins_key,
4019 sizeof(*fi));
4020 if (ret)
4021 goto fail;
4022 leaf = path->nodes[0];
4023 fi = btrfs_item_ptr(leaf, path->slots[0],
4024 struct btrfs_file_extent_item);
4025 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
4026 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
4027 btrfs_set_file_extent_disk_bytenr(leaf, fi, extent_bytenr);
4028 btrfs_set_file_extent_disk_num_bytes(leaf, fi, extent_num_bytes);
4029 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
4030 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
4031 btrfs_set_file_extent_ram_bytes(leaf, fi, extent_num_bytes);
4032 btrfs_set_file_extent_compression(leaf, fi, 0);
4033 btrfs_set_file_extent_encryption(leaf, fi, 0);
4034 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
4035 btrfs_mark_buffer_dirty(leaf);
4037 nbytes = btrfs_stack_inode_nbytes(inode) + num_bytes;
4038 btrfs_set_stack_inode_nbytes(inode, nbytes);
4039 btrfs_release_path(path);
4041 ret = btrfs_inc_extent_ref(trans, root, extent_bytenr, extent_num_bytes,
4042 0, root->root_key.objectid, objectid,
4043 file_pos - extent_offset);
4044 if (ret)
4045 goto fail;
4046 ret = 0;
4047 *ret_num_bytes = min(extent_num_bytes - extent_offset, num_bytes);
4048 fail:
4049 btrfs_free_path(path);
4050 return ret;
4054 * Record a file extent. Do all the required works, such as inserting
4055 * file extent item, inserting extent item and backref item into extent
4056 * tree and updating block accounting.
4058 int btrfs_record_file_extent(struct btrfs_trans_handle *trans,
4059 struct btrfs_root *root, u64 objectid,
4060 struct btrfs_inode_item *inode,
4061 u64 file_pos, u64 disk_bytenr,
4062 u64 num_bytes)
4064 u64 cur_disk_bytenr = disk_bytenr;
4065 u64 cur_file_pos = file_pos;
4066 u64 cur_num_bytes = num_bytes;
4067 int ret = 0;
4069 while (num_bytes > 0) {
4070 ret = __btrfs_record_file_extent(trans, root, objectid,
4071 inode, cur_file_pos,
4072 cur_disk_bytenr,
4073 &cur_num_bytes);
4074 if (ret < 0)
4075 break;
4076 cur_disk_bytenr += cur_num_bytes;
4077 cur_file_pos += cur_num_bytes;
4078 num_bytes -= cur_num_bytes;
4080 return ret;
4084 static int add_excluded_extent(struct btrfs_root *root,
4085 u64 start, u64 num_bytes)
4087 u64 end = start + num_bytes - 1;
4088 set_extent_bits(&root->fs_info->pinned_extents,
4089 start, end, EXTENT_UPTODATE);
4090 return 0;
4093 void free_excluded_extents(struct btrfs_root *root,
4094 struct btrfs_block_group_cache *cache)
4096 u64 start, end;
4098 start = cache->key.objectid;
4099 end = start + cache->key.offset - 1;
4101 clear_extent_bits(&root->fs_info->pinned_extents,
4102 start, end, EXTENT_UPTODATE);
4105 int exclude_super_stripes(struct btrfs_root *root,
4106 struct btrfs_block_group_cache *cache)
4108 u64 bytenr;
4109 u64 *logical;
4110 int stripe_len;
4111 int i, nr, ret;
4113 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
4114 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
4115 cache->bytes_super += stripe_len;
4116 ret = add_excluded_extent(root, cache->key.objectid,
4117 stripe_len);
4118 if (ret)
4119 return ret;
4122 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
4123 bytenr = btrfs_sb_offset(i);
4124 ret = btrfs_rmap_block(root->fs_info,
4125 cache->key.objectid, bytenr,
4126 &logical, &nr, &stripe_len);
4127 if (ret)
4128 return ret;
4130 while (nr--) {
4131 u64 start, len;
4133 if (logical[nr] > cache->key.objectid +
4134 cache->key.offset)
4135 continue;
4137 if (logical[nr] + stripe_len <= cache->key.objectid)
4138 continue;
4140 start = logical[nr];
4141 if (start < cache->key.objectid) {
4142 start = cache->key.objectid;
4143 len = (logical[nr] + stripe_len) - start;
4144 } else {
4145 len = min_t(u64, stripe_len,
4146 cache->key.objectid +
4147 cache->key.offset - start);
4150 cache->bytes_super += len;
4151 ret = add_excluded_extent(root, start, len);
4152 if (ret) {
4153 kfree(logical);
4154 return ret;
4158 kfree(logical);
4160 return 0;
4163 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
4164 struct btrfs_fs_info *info, u64 start, u64 end)
4166 u64 extent_start, extent_end, size, total_added = 0;
4167 int ret;
4169 while (start < end) {
4170 ret = find_first_extent_bit(&info->pinned_extents, start,
4171 &extent_start, &extent_end,
4172 EXTENT_DIRTY | EXTENT_UPTODATE);
4173 if (ret)
4174 break;
4176 if (extent_start <= start) {
4177 start = extent_end + 1;
4178 } else if (extent_start > start && extent_start < end) {
4179 size = extent_start - start;
4180 total_added += size;
4181 ret = btrfs_add_free_space(block_group->free_space_ctl,
4182 start, size);
4183 BUG_ON(ret); /* -ENOMEM or logic error */
4184 start = extent_end + 1;
4185 } else {
4186 break;
4190 if (start < end) {
4191 size = end - start;
4192 total_added += size;
4193 ret = btrfs_add_free_space(block_group->free_space_ctl, start,
4194 size);
4195 BUG_ON(ret); /* -ENOMEM or logic error */
4198 return total_added;