2 * Copyright 2011, Jérôme Duval, korli@users.berlios.de.
3 * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
4 * This file may be used under the terms of the MIT License.
8 //! BTree implementation
16 # define TRACE(x...) dprintf("\33[34mbtrfs:\33[0m " x)
18 # define TRACE(x...) ;
20 # define ERROR(x...) dprintf("\33[34mbtrfs:\33[0m " x)
23 BTree::Node::Node(Volume
* volume
)
33 BTree::Node::Node(Volume
* volume
, off_t block
)
60 block_cache_put(fVolume
->BlockCache(), fBlockNumber
);
67 BTree::Node::SetTo(off_t block
)
71 fNode
= (btrfs_stream
*)block_cache_get(fVolume
->BlockCache(), block
);
76 BTree::Node::SetToWritable(off_t block
, int32 transactionId
, bool empty
)
82 fNode
= (btrfs_stream
*)block_cache_get_empty(fVolume
->BlockCache(),
83 block
, transactionId
);
85 fNode
= (btrfs_stream
*)block_cache_get_writable(fVolume
->BlockCache(),
86 block
, transactionId
);
92 BTree::Node::SearchSlot(const btrfs_key
& key
, int* slot
, btree_traversing type
)
95 //binary search for item slot in a node
96 int entrySize
= sizeof(btrfs_entry
);
99 entrySize
= sizeof(btrfs_index
);
102 int high
= ItemCount();
103 int low
= 0, mid
= 0, comp
= 0;
104 uint8
* base
= (uint8
*)fNode
+ sizeof(btrfs_header
);
105 const btrfs_key
* other
;
107 mid
= (low
+ high
) / 2;
108 other
= (const btrfs_key
*)(base
+ mid
* entrySize
);
109 comp
= key
.Compare(*other
);
116 return B_OK
; //if key is in node
120 // |--item1--|--item2--|--item3--|--etc--|
124 if (type
== BTREE_BACKWARD
) {
128 } else if (type
== BTREE_FORWARD
) {
134 if (type
== BTREE_EXACT
|| *slot
< 0)
135 return B_ENTRY_NOT_FOUND
;
137 TRACE("SearchSlot() found slot %" B_PRId32
" comp %" B_PRId32
"\n",
144 * calculate used space except the header.
145 * type is only for leaf node
146 * type 1: only item space
147 * type 2: only item data space
148 * type 3: both type 1 and 2
151 BTree::Node::_CalculateSpace(uint32 from
, uint32 to
, uint8 type
) const
153 if (to
< from
|| to
>= ItemCount())
157 return sizeof(btrfs_index
) * (to
- from
+ 1);
160 if ((type
& 1) == 1) {
161 result
+= sizeof(btrfs_entry
) * (to
- from
+ 1);
163 if ((type
& 2) == 2) {
164 result
+= Item(from
)->Offset() + Item(from
)->Size()
165 - Item(to
)->Offset();
173 BTree::Node::SpaceUsed() const
176 return _CalculateSpace(0, ItemCount() - 1, 3);
177 return _CalculateSpace(0, ItemCount() - 1, 0);
182 BTree::Node::SpaceLeft() const
184 return fVolume
->BlockSize() - SpaceUsed() - sizeof(btrfs_header
);
189 BTree::Node::_Copy(const Node
* origin
, uint32 at
, uint32 from
, uint32 to
,
192 TRACE("Node::_Copy() at: %d from: %d to: %d length: %d\n",
193 at
, from
, to
, length
);
196 memcpy(Item(at
), origin
->Item(from
), origin
->_CalculateSpace(from
, to
));
197 // Item offset of copied node must be changed to get the
198 // item data offset correctly. length can be zero
199 // but let it there doesn't harm anything.
200 for (uint32 i
= at
; i
- at
<= to
- from
; ++i
)
201 Item(i
)->SetOffset(Item(i
)->Offset() - length
);
203 memcpy(ItemData(at
+ to
- from
), origin
->ItemData(to
),
204 origin
->_CalculateSpace(from
, to
, 2));
206 memcpy(Index(at
), origin
->Index(from
),
207 origin
->_CalculateSpace(from
, to
));
213 BTree::Node::_SpaceCheck(int length
) const
215 // this is a little bit weird here because we can't find
216 // any suitable error code
217 if (length
< 0 && std::abs(length
) >= SpaceUsed())
218 return B_DIRECTORY_NOT_EMPTY
; // not enough data to delete
219 if (length
> 0 && length
>= SpaceLeft())
220 return B_DEVICE_FULL
; // no spare space
226 * copy node header, items and items data
227 * length is size to insert/remove
228 * if node is a internal node, length isnt used
229 * length = 0: Copy a whole
230 * length < 0: removing
231 * length > 0: inserting
234 BTree::Node::Copy(const Node
* origin
, uint32 start
, uint32 end
, int length
)
237 status_t status
= origin
->_SpaceCheck(length
);
241 memcpy(fNode
, origin
->fNode
, sizeof(btrfs_header
));
243 // like removing [0, start - 1] and keeping [start, end]
244 length
= -origin
->_CalculateSpace(0, start
- 1, 2);
245 _Copy(origin
, 0, start
, end
, length
);
246 } else if (length
< 0) {
247 //removing all items in [start, end]
249 _Copy(origin
, 0, 0, start
- 1, 0); // <-- [start,...
250 if (end
+ 1 < origin
->ItemCount()) {
252 // we only care data size
253 length
+= origin
->_CalculateSpace(start
, end
);
254 _Copy(origin
, start
, end
+ 1, origin
->ItemCount() - 1, length
);
257 //inserting in [start, end] - make a hole for later
259 _Copy(origin
, 0, 0, start
- 1, 0);
260 if (start
< origin
->ItemCount()) {
261 length
-= origin
->_CalculateSpace(start
, end
);
262 _Copy(origin
, end
+ 1, start
, origin
->ItemCount() - 1, length
);
270 /* Like copy but here we use memmove on current node.
273 BTree::Node::MoveEntries(uint32 start
, uint32 end
, int length
) const
275 status_t status
= _SpaceCheck(length
);
276 if (status
!= B_OK
|| length
== 0/*B_OK*/)
279 int entrySize
= sizeof(btrfs_entry
);
281 entrySize
= sizeof(btrfs_index
);
283 uint8
* base
= (uint8
*)fNode
+ sizeof(btrfs_header
);
286 // removing [start, end]
287 TRACE("Node::MoveEntries() removing ... start %" B_PRIu32
" end %"
288 B_PRIu32
" length %i\n", start
, end
, length
);
289 length
+= _CalculateSpace(start
, end
- 1);
292 // inserting into [start, end] - make room for later
293 TRACE("Node::MoveEntries() inserting ... start %" B_PRIu32
" end %"
294 B_PRIu32
" length %i\n", start
, end
, length
);
295 length
-= _CalculateSpace(start
, end
- 1);
296 std::swap(start
, end
);
299 if (end
>= ItemCount())
302 int dataSize
= _CalculateSpace(end
, ItemCount() - 1, 2);
303 // moving items/block pointers
304 memmove(base
+ start
* entrySize
, base
+ end
* entrySize
,
305 _CalculateSpace(end
, ItemCount() - 1));
308 int num
= start
- end
;
309 for(uint32 i
= start
; i
< ItemCount() + num
; ++i
)
310 Item(i
)->SetOffset(Item(i
)->Offset() - length
);
312 memmove(ItemData(ItemCount() - 1) - length
, ItemData(ItemCount() - 1),
320 // #pragma mark - BTree::Path implementation
323 BTree::Path::Path(BTree
* tree
)
327 for (int i
= 0; i
< BTRFS_MAX_TREE_DEPTH
; ++i
) {
336 for (int i
= 0; i
< BTRFS_MAX_TREE_DEPTH
; ++i
) {
345 BTree::Path::GetNode(int level
, int* _slot
) const
348 *_slot
= fSlots
[level
];
349 return fNodes
[level
];
354 BTree::Path::SetNode(off_t block
, int slot
)
356 Node
node(fTree
->SystemVolume(), block
);
357 return SetNode(&node
, slot
);
362 BTree::Path::SetNode(const Node
* node
, int slot
)
364 uint8 level
= node
->Level();
365 if (fNodes
[level
] == NULL
) {
366 fNodes
[level
] = new Node(fTree
->SystemVolume(), node
->BlockNum());
367 if (fNodes
[level
] == NULL
)
370 fNodes
[level
]->SetTo(node
->BlockNum());
373 fSlots
[level
] = fNodes
[level
]->ItemCount() - 1;
375 fSlots
[level
] = slot
;
376 return fNodes
[level
];
381 BTree::Path::Move(int level
, int step
)
383 fSlots
[level
] += step
;
384 if (fSlots
[level
] < 0)
386 if (fSlots
[level
] >= fNodes
[level
]->ItemCount())
393 BTree::Path::GetEntry(int slot
, btrfs_key
* _key
, void** _value
, uint32
* _size
,
396 BTree::Node
* leaf
= fNodes
[0];
397 if (slot
< 0 || slot
>= leaf
->ItemCount())
398 return B_ENTRY_NOT_FOUND
;
401 *_key
= leaf
->Item(slot
)->key
;
403 uint32 itemSize
= leaf
->Item(slot
)->Size();
404 if (_value
!= NULL
) {
405 *_value
= malloc(itemSize
);
409 memcpy(*_value
, leaf
->ItemData(slot
), itemSize
);
416 *_offset
= leaf
->Item(slot
)->Offset();
422 // #pragma mark - BTree implementation
425 BTree::BTree(Volume
* volume
)
430 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
434 BTree::BTree(Volume
* volume
, btrfs_stream
* stream
)
439 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
443 BTree::BTree(Volume
* volume
, fsblock_t rootBlock
)
445 fRootBlock(rootBlock
),
448 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
454 // if there are any TreeIterators left, we need to stop them
455 // (can happen when the tree's inode gets deleted while
456 // traversing the tree - a TreeIterator doesn't lock the inode)
457 mutex_lock(&fIteratorLock
);
459 SinglyLinkedList
<TreeIterator
>::Iterator iterator
460 = fIterators
.GetIterator();
461 while (iterator
.HasNext())
462 iterator
.Next()->Stop();
463 mutex_destroy(&fIteratorLock
);
468 btrfs_key::Compare(const btrfs_key
& key
) const
470 if (ObjectID() > key
.ObjectID())
472 if (ObjectID() < key
.ObjectID())
474 if (Type() > key
.Type())
476 if (Type() < key
.Type())
478 if (Offset() > key
.Offset())
480 if (Offset() < key
.Offset())
486 /* Traverse from root to fill in the path along way its finding.
487 * Return current slot at leaf if successful.
490 BTree::Traverse(btree_traversing type
, Path
* path
, const btrfs_key
& key
)
493 TRACE("BTree::Traverse() objectid %" B_PRId64
" type %d offset %"
494 B_PRId64
" \n", key
.ObjectID(), key
.Type(), key
.Offset());
495 fsblock_t physicalBlock
= fRootBlock
;
496 Node
node(fVolume
, physicalBlock
);
498 status_t status
= B_OK
;
500 while (node
.Level() != 0) {
501 TRACE("BTree::Traverse() level %d count %d\n", node
.Level(),
503 status
= node
.SearchSlot(key
, &slot
, BTREE_BACKWARD
);
506 if (path
->SetNode(&node
, slot
) == NULL
)
509 TRACE("BTree::Traverse() getting index %" B_PRIu32
"\n", slot
);
511 status
= fVolume
->FindBlock(node
.Index(slot
)->LogicalAddress(),
513 if (status
!= B_OK
) {
514 ERROR("BTree::Traverse() unmapped block %" B_PRId64
"\n",
515 node
.Index(slot
)->LogicalAddress());
518 node
.SetTo(physicalBlock
);
521 TRACE("BTree::Traverse() dump count %" B_PRId32
"\n", node
.ItemCount());
522 status
= node
.SearchSlot(key
, &slot
, type
);
525 if (path
->SetNode(&node
, slot
) == NULL
)
528 TRACE("BTree::Traverse() found %" B_PRIu32
" %" B_PRIu32
"\n",
529 node
.Item(slot
)->Offset(), node
.Item(slot
)->Size());
534 /*! Searches the key in the tree, and stores the allocated found item in
535 _value, if successful.
536 Returns B_OK when the key could be found, B_ENTRY_NOT_FOUND if not.
537 It can also return other errors to indicate that something went wrong.
540 BTree::_Find(Path
* path
, btrfs_key
& wanted
, void** _value
, uint32
* _size
,
541 uint32
* _offset
, btree_traversing type
) const
543 status_t status
= Traverse(type
, path
, wanted
);
548 status
= path
->GetCurrentEntry(&found
, _value
, _size
, _offset
);
552 if (found
.Type() != wanted
.Type() && wanted
.Type() != BTRFS_KEY_TYPE_ANY
)
553 return B_ENTRY_NOT_FOUND
;
561 BTree::FindNext(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
562 uint32
* _offset
) const
564 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_FORWARD
);
569 BTree::FindPrevious(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
570 uint32
* _offset
) const
572 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_BACKWARD
);
577 BTree::FindExact(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
578 uint32
* _offset
) const
580 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_EXACT
);
585 BTree::PreviousLeaf(Path
* path
) const
587 // TODO: use Traverse() ???
591 // iterate to the root until satisfy the condition
593 node
= path
->GetNode(level
, &slot
);
594 if (node
== NULL
|| slot
!= 0)
599 // the current leaf is already the left most leaf or
600 // path was not initialized
602 return B_ENTRY_NOT_FOUND
;
604 path
->Move(level
, BTREE_BACKWARD
);
605 fsblock_t physicalBlock
;
606 // change all nodes below this level and slot to the ending
608 status_t status
= fVolume
->FindBlock(
609 node
->Index(slot
)->LogicalAddress(), physicalBlock
);
613 node
= path
->SetNode(physicalBlock
, -1);
616 slot
= node
->ItemCount() - 1;
625 BTree::NextLeaf(Path
* path
) const
630 // iterate to the root until satisfy the condition
632 node
= path
->GetNode(level
, &slot
);
633 if (node
== NULL
|| slot
< node
->ItemCount() - 1)
638 // the current leaf is already the right most leaf or
639 // path was not initialized
641 return B_ENTRY_NOT_FOUND
;
643 path
->Move(level
, BTREE_FORWARD
);
644 fsblock_t physicalBlock
;
645 // change all nodes below this level and slot to the beginning
647 status_t status
= fVolume
->FindBlock(
648 node
->Index(slot
)->LogicalAddress(), physicalBlock
);
652 node
= path
->SetNode(physicalBlock
, 0);
664 BTree::SetRoot(off_t logical
, fsblock_t
* block
)
668 //TODO: mapping physical block -> logical address
670 fLogicalRoot
= logical
;
671 if (fVolume
->FindBlock(logical
, fRootBlock
) != B_OK
) {
672 ERROR("Find() unmapped block %" B_PRId64
"\n", fRootBlock
);
681 BTree::_AddIterator(TreeIterator
* iterator
)
683 MutexLocker
_(fIteratorLock
);
684 fIterators
.Add(iterator
);
689 BTree::_RemoveIterator(TreeIterator
* iterator
)
691 MutexLocker
_(fIteratorLock
);
692 fIterators
.Remove(iterator
);
699 TreeIterator::TreeIterator(BTree
* tree
, const btrfs_key
& key
)
703 fIteratorStatus(B_NO_INIT
)
705 tree
->_AddIterator(this);
706 fPath
= new(std::nothrow
) BTree::Path(tree
);
708 fIteratorStatus
= B_NO_MEMORY
;
712 TreeIterator::~TreeIterator()
715 fTree
->_RemoveIterator(this);
723 TreeIterator::Rewind(bool inverse
)
726 fKey
.SetOffset(BTREE_END
);
728 fKey
.SetOffset(BTREE_BEGIN
);
729 fIteratorStatus
= B_NO_INIT
;
733 /*! Iterates through the tree in the specified direction.
736 TreeIterator::_Traverse(btree_traversing direction
)
738 status_t status
= fTree
->Traverse(direction
, fPath
, fKey
);
740 ERROR("TreeIterator::Traverse() Find failed\n");
744 return (fIteratorStatus
= B_OK
);
748 // Like GetEntry in BTree::Path but here we check the type and moving.
750 TreeIterator::_GetEntry(btree_traversing type
, void** _value
, uint32
* _size
,
754 if (fIteratorStatus
== B_NO_INIT
) {
755 status
= _Traverse(type
);
761 if (fIteratorStatus
!= B_OK
)
762 return fIteratorStatus
;
764 int move
= fPath
->Move(0, type
);
766 status
= fTree
->NextLeaf(fPath
);
768 status
= fTree
->PreviousLeaf(fPath
);
773 status
= fPath
->GetCurrentEntry(&found
, _value
, _size
, _offset
);
777 fKey
.SetObjectID(found
.ObjectID());
778 fKey
.SetOffset(found
.Offset());
779 if (fKey
.Type() != found
.Type() && fKey
.Type() != BTRFS_KEY_TYPE_ANY
)
780 return B_ENTRY_NOT_FOUND
;
786 /*! just sets the current key in the iterator.
789 TreeIterator::Find(const btrfs_key
& key
)
791 if (fIteratorStatus
== B_INTERRUPTED
)
792 return fIteratorStatus
;
795 fIteratorStatus
= B_NO_INIT
;
804 fIteratorStatus
= B_INTERRUPTED
;