2 * Copyright 2017, Chế Vũ Gia Hy, cvghy116@gmail.com.
3 * Copyright 2011, Jérôme Duval, korli@users.berlios.de.
4 * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
5 * This file may be used under the terms of the MIT License.
9 //! BTree implementation
20 # define TRACE(x...) dprintf("\33[34mbtrfs:\33[0m " x)
22 # define TRACE(x...) ;
24 # define ERROR(x...) dprintf("\33[34mbtrfs:\33[0m " x)
27 BTree::Node::Node(Volume
* volume
)
37 BTree::Node::Node(Volume
* volume
, off_t block
)
64 block_cache_put(fVolume
->BlockCache(), fBlockNumber
);
71 BTree::Node::SetTo(off_t block
)
75 fNode
= (btrfs_stream
*)block_cache_get(fVolume
->BlockCache(), block
);
80 BTree::Node::SetToWritable(off_t block
, int32 transactionId
, bool empty
)
86 fNode
= (btrfs_stream
*)block_cache_get_empty(fVolume
->BlockCache(),
87 block
, transactionId
);
89 fNode
= (btrfs_stream
*)block_cache_get_writable(fVolume
->BlockCache(),
90 block
, transactionId
);
96 BTree::Node::SearchSlot(const btrfs_key
& key
, int* slot
, btree_traversing type
)
99 //binary search for item slot in a node
100 int entrySize
= sizeof(btrfs_entry
);
103 entrySize
= sizeof(btrfs_index
);
106 int high
= ItemCount();
107 int low
= 0, mid
= 0, comp
= 0;
108 uint8
* base
= (uint8
*)fNode
+ sizeof(btrfs_header
);
109 const btrfs_key
* other
;
111 mid
= (low
+ high
) / 2;
112 other
= (const btrfs_key
*)(base
+ mid
* entrySize
);
113 comp
= key
.Compare(*other
);
120 return B_OK
; //if key is in node
124 // |--item1--|--item2--|--item3--|--etc--|
128 if (type
== BTREE_BACKWARD
) {
132 } else if (type
== BTREE_FORWARD
) {
138 if (type
== BTREE_EXACT
|| *slot
< 0)
139 return B_ENTRY_NOT_FOUND
;
141 TRACE("SearchSlot() found slot %" B_PRId32
" comp %" B_PRId32
"\n",
148 * calculate used space except the header.
149 * type is only for leaf node
150 * type 1: only item space
151 * type 2: only item data space
152 * type 3: both type 1 and 2
155 BTree::Node::_CalculateSpace(uint32 from
, uint32 to
, uint8 type
) const
157 if (to
< from
|| to
>= ItemCount())
161 return sizeof(btrfs_index
) * (to
- from
+ 1);
164 if ((type
& 1) == 1) {
165 result
+= sizeof(btrfs_entry
) * (to
- from
+ 1);
167 if ((type
& 2) == 2) {
168 result
+= Item(from
)->Offset() + Item(from
)->Size()
169 - Item(to
)->Offset();
177 BTree::Node::SpaceUsed() const
180 return _CalculateSpace(0, ItemCount() - 1, 3);
181 return _CalculateSpace(0, ItemCount() - 1, 0);
186 BTree::Node::SpaceLeft() const
188 return fVolume
->BlockSize() - SpaceUsed() - sizeof(btrfs_header
);
193 BTree::Node::_Copy(const Node
* origin
, uint32 at
, uint32 from
, uint32 to
,
196 TRACE("Node::_Copy() at: %d from: %d to: %d length: %d\n",
197 at
, from
, to
, length
);
200 memcpy(Item(at
), origin
->Item(from
), origin
->_CalculateSpace(from
, to
));
201 // Item offset of copied node must be changed to get the
202 // item data offset correctly. length can be zero
203 // but let it there doesn't harm anything.
204 for (uint32 i
= at
; i
- at
<= to
- from
; ++i
)
205 Item(i
)->SetOffset(Item(i
)->Offset() - length
);
207 memcpy(ItemData(at
+ to
- from
), origin
->ItemData(to
),
208 origin
->_CalculateSpace(from
, to
, 2));
210 memcpy(Index(at
), origin
->Index(from
),
211 origin
->_CalculateSpace(from
, to
));
217 BTree::Node::_SpaceCheck(int length
) const
219 // this is a little bit weird here because we can't find
220 // any suitable error code
221 if (length
< 0 && std::abs(length
) >= SpaceUsed())
222 return B_DIRECTORY_NOT_EMPTY
; // not enough data to delete
223 if (length
> 0 && length
>= SpaceLeft())
224 return B_DEVICE_FULL
; // no spare space
230 * copy node header, items and items data
231 * length is size to insert/remove
232 * if node is a internal node, length isnt used
233 * length = 0: Copy a whole
234 * length < 0: removing
235 * length > 0: inserting
238 BTree::Node::Copy(const Node
* origin
, uint32 start
, uint32 end
, int length
)
241 status_t status
= origin
->_SpaceCheck(length
);
245 memcpy(fNode
, origin
->fNode
, sizeof(btrfs_header
));
247 // like removing [0, start - 1] and keeping [start, end]
248 length
= -origin
->_CalculateSpace(0, start
- 1, 2);
249 _Copy(origin
, 0, start
, end
, length
);
250 } else if (length
< 0) {
251 //removing all items in [start, end]
253 _Copy(origin
, 0, 0, start
- 1, 0); // <-- [start,...
254 if (end
+ 1 < origin
->ItemCount()) {
256 // we only care data size
257 length
+= origin
->_CalculateSpace(start
, end
);
258 _Copy(origin
, start
, end
+ 1, origin
->ItemCount() - 1, length
);
261 //inserting in [start, end] - make a hole for later
263 _Copy(origin
, 0, 0, start
- 1, 0);
264 if (start
< origin
->ItemCount()) {
265 length
-= origin
->_CalculateSpace(start
, end
);
266 _Copy(origin
, end
+ 1, start
, origin
->ItemCount() - 1, length
);
274 /* Like copy but here we use memmove on current node.
277 BTree::Node::MoveEntries(uint32 start
, uint32 end
, int length
) const
279 status_t status
= _SpaceCheck(length
);
280 if (status
!= B_OK
|| length
== 0/*B_OK*/)
283 int entrySize
= sizeof(btrfs_entry
);
285 entrySize
= sizeof(btrfs_index
);
287 uint8
* base
= (uint8
*)fNode
+ sizeof(btrfs_header
);
290 // removing [start, end]
291 TRACE("Node::MoveEntries() removing ... start %" B_PRIu32
" end %"
292 B_PRIu32
" length %i\n", start
, end
, length
);
293 length
+= _CalculateSpace(start
, end
- 1);
296 // inserting into [start, end] - make room for later
297 TRACE("Node::MoveEntries() inserting ... start %" B_PRIu32
" end %"
298 B_PRIu32
" length %i\n", start
, end
, length
);
299 length
-= _CalculateSpace(start
, end
- 1);
300 std::swap(start
, end
);
303 if (end
>= ItemCount())
306 int dataSize
= _CalculateSpace(end
, ItemCount() - 1, 2);
307 // moving items/block pointers
308 memmove(base
+ start
* entrySize
, base
+ end
* entrySize
,
309 _CalculateSpace(end
, ItemCount() - 1));
312 int num
= start
- end
;
313 for(uint32 i
= start
; i
< ItemCount() + num
; ++i
)
314 Item(i
)->SetOffset(Item(i
)->Offset() - length
);
316 memmove(ItemData(ItemCount() - 1) - length
, ItemData(ItemCount() - 1),
324 // #pragma mark - BTree::Path implementation
327 BTree::Path::Path(BTree
* tree
)
331 for (int i
= 0; i
< BTRFS_MAX_TREE_DEPTH
; ++i
) {
340 for (int i
= 0; i
< BTRFS_MAX_TREE_DEPTH
; ++i
) {
349 BTree::Path::GetNode(int level
, int* _slot
) const
352 *_slot
= fSlots
[level
];
353 return fNodes
[level
];
358 BTree::Path::SetNode(off_t block
, int slot
)
360 Node
node(fTree
->SystemVolume(), block
);
361 return SetNode(&node
, slot
);
366 BTree::Path::SetNode(const Node
* node
, int slot
)
368 uint8 level
= node
->Level();
369 if (fNodes
[level
] == NULL
) {
370 fNodes
[level
] = new Node(fTree
->SystemVolume(), node
->BlockNum());
371 if (fNodes
[level
] == NULL
)
374 fNodes
[level
]->SetTo(node
->BlockNum());
377 fSlots
[level
] = fNodes
[level
]->ItemCount() - 1;
379 fSlots
[level
] = slot
;
380 return fNodes
[level
];
385 BTree::Path::Move(int level
, int step
)
387 fSlots
[level
] += step
;
388 if (fSlots
[level
] < 0)
390 if (fSlots
[level
] >= fNodes
[level
]->ItemCount())
397 BTree::Path::GetEntry(int slot
, btrfs_key
* _key
, void** _value
, uint32
* _size
,
400 BTree::Node
* leaf
= fNodes
[0];
401 if (slot
< 0 || slot
>= leaf
->ItemCount())
402 return B_ENTRY_NOT_FOUND
;
405 *_key
= leaf
->Item(slot
)->key
;
407 uint32 itemSize
= leaf
->Item(slot
)->Size();
408 if (_value
!= NULL
) {
409 *_value
= malloc(itemSize
);
413 memcpy(*_value
, leaf
->ItemData(slot
), itemSize
);
420 *_offset
= leaf
->Item(slot
)->Offset();
427 BTree::Path::SetEntry(int slot
, const btrfs_entry
& entry
, void* value
)
430 return B_ENTRY_NOT_FOUND
;
432 memcpy(fNodes
[0]->Item(slot
), &entry
, sizeof(btrfs_entry
));
433 memcpy(fNodes
[0]->ItemData(slot
), value
, entry
.Size());
439 * Allocate and copy block and do all the changes that it can.
440 * for now, we only copy-on-write tree block,
441 * file data is "nocow" by default.
448 BTree::Path::CopyOnWrite(Transaction
& transaction
, int level
, uint32 start
,
451 Node
* node
= fNodes
[level
];
456 if (transaction
.HasBlock(node
->BlockNum())) {
457 // cow-ed block can not be cow-ed again
458 status
= node
->MoveEntries(start
, start
+ num
- 1, length
);
462 node
->SetGeneration(transaction
.SystemID());
464 node
->SetItemCount(node
->ItemCount() - num
);
466 node
->SetItemCount(node
->ItemCount() + num
);
473 status
= fTree
->SystemVolume()->GetNewBlock(address
, block
);
477 fNodes
[level
] = new(std::nothrow
) BTree::Node(fTree
->SystemVolume());
478 if (fNodes
[level
] == NULL
)
481 fNodes
[level
]->SetToWritable(block
, transaction
.ID(), true);
483 status
= fNodes
[level
]->Copy(node
, start
, start
+ num
- 1, length
);
487 fNodes
[level
]->SetGeneration(transaction
.SystemID());
488 fNodes
[level
]->SetLogicalAddress(address
);
490 fNodes
[level
]->SetItemCount(node
->ItemCount() - num
);
492 fNodes
[level
]->SetItemCount(node
->ItemCount() + num
);
494 fNodes
[level
]->SetItemCount(num
);
496 // change pointer of this node in parent
498 Node
* parentNode
= GetNode(level
+ 1, &parentSlot
);
499 if (parentNode
!= NULL
)
500 parentNode
->Index(parentSlot
)->SetLogicalAddress(address
);
502 if (level
== fTree
->RootLevel())
503 fTree
->SetRoot(fNodes
[level
]);
510 /* Copy-On-Write all internal nodes start from a specific level.
512 * level <= 0: to leaf
514 * path cow-path path cow-path
515 * =================================================
516 * root cow-root root level < 0
518 * n1 cow-n1 ...______
520 * n2 cow-n2 n1 cow-n1
524 * leaf level > 0 leaf cow-leaf
527 BTree::Path::InternalCopy(Transaction
& transaction
, int level
)
529 if (std::abs(level
) >= fTree
->RootLevel())
532 TRACE("Path::InternalCopy() level %i\n", level
);
536 to
= fTree
->RootLevel();
539 to
= std::abs(level
);
546 status
= CopyOnWrite(transaction
, from
, 0, node
->ItemCount(), 0);
556 // #pragma mark - BTree implementation
559 BTree::BTree(Volume
* volume
)
565 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
569 BTree::BTree(Volume
* volume
, btrfs_stream
* stream
)
575 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
579 BTree::BTree(Volume
* volume
, fsblock_t rootBlock
)
581 fRootBlock(rootBlock
),
584 mutex_init(&fIteratorLock
, "btrfs b+tree iterator");
590 // if there are any TreeIterators left, we need to stop them
591 // (can happen when the tree's inode gets deleted while
592 // traversing the tree - a TreeIterator doesn't lock the inode)
593 mutex_lock(&fIteratorLock
);
595 SinglyLinkedList
<TreeIterator
>::Iterator iterator
596 = fIterators
.GetIterator();
597 while (iterator
.HasNext())
598 iterator
.Next()->Stop();
599 mutex_destroy(&fIteratorLock
);
604 btrfs_key::Compare(const btrfs_key
& key
) const
606 if (ObjectID() > key
.ObjectID())
608 if (ObjectID() < key
.ObjectID())
610 if (Type() > key
.Type())
612 if (Type() < key
.Type())
614 if (Offset() > key
.Offset())
616 if (Offset() < key
.Offset())
622 /* Traverse from root to fill in the path along way its finding.
623 * Return current slot at leaf if successful.
626 BTree::Traverse(btree_traversing type
, Path
* path
, const btrfs_key
& key
)
629 TRACE("BTree::Traverse() objectid %" B_PRId64
" type %d offset %"
630 B_PRId64
" \n", key
.ObjectID(), key
.Type(), key
.Offset());
631 fsblock_t physicalBlock
= fRootBlock
;
632 Node
node(fVolume
, physicalBlock
);
634 status_t status
= B_OK
;
636 while (node
.Level() != 0) {
637 TRACE("BTree::Traverse() level %d count %d\n", node
.Level(),
639 status
= node
.SearchSlot(key
, &slot
, BTREE_BACKWARD
);
642 if (path
->SetNode(&node
, slot
) == NULL
)
645 TRACE("BTree::Traverse() getting index %" B_PRIu32
"\n", slot
);
647 status
= fVolume
->FindBlock(node
.Index(slot
)->LogicalAddress(),
649 if (status
!= B_OK
) {
650 ERROR("BTree::Traverse() unmapped block %" B_PRId64
"\n",
651 node
.Index(slot
)->LogicalAddress());
654 node
.SetTo(physicalBlock
);
657 TRACE("BTree::Traverse() dump count %" B_PRId32
"\n", node
.ItemCount());
658 status
= node
.SearchSlot(key
, &slot
, type
);
661 if (path
->SetNode(&node
, slot
) == NULL
)
664 TRACE("BTree::Traverse() found %" B_PRIu32
" %" B_PRIu32
"\n",
665 node
.Item(slot
)->Offset(), node
.Item(slot
)->Size());
670 /*! Searches the key in the tree, and stores the allocated found item in
671 _value, if successful.
672 Returns B_OK when the key could be found, B_ENTRY_NOT_FOUND if not.
673 It can also return other errors to indicate that something went wrong.
676 BTree::_Find(Path
* path
, btrfs_key
& wanted
, void** _value
, uint32
* _size
,
677 uint32
* _offset
, btree_traversing type
) const
679 status_t status
= Traverse(type
, path
, wanted
);
684 status
= path
->GetCurrentEntry(&found
, _value
, _size
, _offset
);
688 if (found
.Type() != wanted
.Type() && wanted
.Type() != BTRFS_KEY_TYPE_ANY
)
689 return B_ENTRY_NOT_FOUND
;
697 BTree::FindNext(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
698 uint32
* _offset
) const
700 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_FORWARD
);
705 BTree::FindPrevious(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
706 uint32
* _offset
) const
708 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_BACKWARD
);
713 BTree::FindExact(Path
* path
, btrfs_key
& key
, void** _value
, uint32
* _size
,
714 uint32
* _offset
) const
716 return _Find(path
, key
, _value
, _size
, _offset
, BTREE_EXACT
);
721 * Insert "num" of consecutive empty entries start with slot of "startKey"
722 * if successful return the starting slot, otherwise return error code.
725 BTree::MakeEntries(Transaction
& transaction
, Path
* path
,
726 const btrfs_key
& startKey
, int num
, int length
)
728 TRACE("BTree::MakeEntries() num %i key (% " B_PRIu64
" %" B_PRIu8
" %"
729 B_PRIu64
")\n", num
, startKey
.ObjectID(), startKey
.Type(),
732 status_t status
= Traverse(BTREE_FORWARD
, path
, startKey
);
737 status
= path
->InternalCopy(transaction
, 1);
741 status
= path
->CopyOnWrite(transaction
, 0, slot
, num
, length
);
742 if (status
== B_DEVICE_FULL
) {
743 // TODO: push data or split node
753 /* MakeEntries and then fill in them.
756 BTree::InsertEntries(Transaction
& transaction
, Path
* path
,
757 btrfs_entry
* entries
, void** data
, int num
)
759 int totalLength
= sizeof(btrfs_entry
) * num
;
760 for (int i
= 0; i
< num
; i
++)
761 totalLength
+= entries
[i
].Size();
763 status_t slot
= MakeEntries(transaction
, path
, entries
[0].key
, num
,
770 path
->GetEntry(slot
- 1, NULL
, NULL
, NULL
, &upperLimit
);
772 upperLimit
= fVolume
->BlockSize() - sizeof(btrfs_header
);
774 TRACE("BTree::InsertEntries() num: %i upper limit %" B_PRIu32
"\n", num
,
776 for (int i
= 0; i
< num
; i
++) {
777 upperLimit
-= entries
[i
].Size();
778 entries
[i
].SetOffset(upperLimit
);
779 path
->SetEntry(slot
+ i
, entries
[i
], data
[i
]);
786 /* Like MakeEntries, but here we remove entries instead.
787 * Removed data stored in _data
788 * May merge those functions into one.
791 BTree::RemoveEntries(Transaction
& transaction
, Path
* path
,
792 const btrfs_key
& startKey
, void** _data
, int num
)
794 TRACE("BTree::RemoveEntries() num %i key (% " B_PRIu64
" %" B_PRIu8
" %"
795 B_PRIu64
")\n", num
, startKey
.ObjectID(), startKey
.Type(),
798 status_t status
= Traverse(BTREE_EXACT
, path
, startKey
);
803 int length
= -sizeof(btrfs_entry
) * num
;
804 for (int i
= 0; i
< num
; i
++) {
806 path
->GetEntry(slot
+ i
, NULL
, &_data
[i
], &itemSize
);
810 status
= path
->InternalCopy(transaction
, 1);
814 status
= path
->CopyOnWrite(transaction
, 0, slot
, num
, length
);
815 if (status
== B_DIRECTORY_NOT_EMPTY
) {
816 // TODO: merge node or push data
824 BTree::PreviousLeaf(Path
* path
) const
826 // TODO: use Traverse() ???
830 // iterate to the root until satisfy the condition
832 node
= path
->GetNode(level
, &slot
);
833 if (node
== NULL
|| slot
!= 0)
838 // the current leaf is already the left most leaf or
839 // path was not initialized
841 return B_ENTRY_NOT_FOUND
;
843 path
->Move(level
, BTREE_BACKWARD
);
844 fsblock_t physicalBlock
;
845 // change all nodes below this level and slot to the ending
847 status_t status
= fVolume
->FindBlock(
848 node
->Index(slot
)->LogicalAddress(), physicalBlock
);
852 node
= path
->SetNode(physicalBlock
, -1);
855 slot
= node
->ItemCount() - 1;
864 BTree::NextLeaf(Path
* path
) const
869 // iterate to the root until satisfy the condition
871 node
= path
->GetNode(level
, &slot
);
872 if (node
== NULL
|| slot
< node
->ItemCount() - 1)
877 // the current leaf is already the right most leaf or
878 // path was not initialized
880 return B_ENTRY_NOT_FOUND
;
882 path
->Move(level
, BTREE_FORWARD
);
883 fsblock_t physicalBlock
;
884 // change all nodes below this level and slot to the beginning
886 status_t status
= fVolume
->FindBlock(
887 node
->Index(slot
)->LogicalAddress(), physicalBlock
);
891 node
= path
->SetNode(physicalBlock
, 0);
902 /* Set root infomation, to use this function root must be valid and
906 BTree::SetRoot(off_t logical
, fsblock_t
* block
)
911 if (fVolume
->FindBlock(logical
, fRootBlock
) != B_OK
) {
912 ERROR("Find() unmapped block %" B_PRId64
"\n", fRootBlock
);
918 read_pos(fVolume
->Device(), fRootBlock
* fVolume
->BlockSize(), &header
,
919 sizeof(btrfs_header
));
920 fRootLevel
= header
.Level();
921 fLogicalRoot
= header
.LogicalAddress();
927 BTree::SetRoot(Node
* root
)
929 fRootBlock
= root
->BlockNum();
930 fLogicalRoot
= root
->LogicalAddress();
931 fRootLevel
= root
->Level();
936 BTree::_AddIterator(TreeIterator
* iterator
)
938 MutexLocker
_(fIteratorLock
);
939 fIterators
.Add(iterator
);
944 BTree::_RemoveIterator(TreeIterator
* iterator
)
946 MutexLocker
_(fIteratorLock
);
947 fIterators
.Remove(iterator
);
954 TreeIterator::TreeIterator(BTree
* tree
, const btrfs_key
& key
)
958 fIteratorStatus(B_NO_INIT
)
960 tree
->_AddIterator(this);
961 fPath
= new(std::nothrow
) BTree::Path(tree
);
963 fIteratorStatus
= B_NO_MEMORY
;
967 TreeIterator::~TreeIterator()
970 fTree
->_RemoveIterator(this);
978 TreeIterator::Rewind(bool inverse
)
981 fKey
.SetOffset(BTREE_END
);
983 fKey
.SetOffset(BTREE_BEGIN
);
984 fIteratorStatus
= B_NO_INIT
;
988 /*! Iterates through the tree in the specified direction.
991 TreeIterator::_Traverse(btree_traversing direction
)
993 status_t status
= fTree
->Traverse(direction
, fPath
, fKey
);
995 ERROR("TreeIterator::Traverse() Find failed\n");
999 return (fIteratorStatus
= B_OK
);
1003 // Like GetEntry in BTree::Path but here we check the type and moving.
1005 TreeIterator::_GetEntry(btree_traversing type
, void** _value
, uint32
* _size
,
1009 if (fIteratorStatus
== B_NO_INIT
) {
1010 status
= _Traverse(type
);
1016 if (fIteratorStatus
!= B_OK
)
1017 return fIteratorStatus
;
1019 int move
= fPath
->Move(0, type
);
1021 status
= fTree
->NextLeaf(fPath
);
1023 status
= fTree
->PreviousLeaf(fPath
);
1028 status
= fPath
->GetCurrentEntry(&found
, _value
, _size
, _offset
);
1032 fKey
.SetObjectID(found
.ObjectID());
1033 fKey
.SetOffset(found
.Offset());
1034 if (fKey
.Type() != found
.Type() && fKey
.Type() != BTRFS_KEY_TYPE_ANY
)
1035 return B_ENTRY_NOT_FOUND
;
1041 /*! just sets the current key in the iterator.
1044 TreeIterator::Find(const btrfs_key
& key
)
1046 if (fIteratorStatus
== B_INTERRUPTED
)
1047 return fIteratorStatus
;
1050 fIteratorStatus
= B_NO_INIT
;
1056 TreeIterator::Stop()
1059 fIteratorStatus
= B_INTERRUPTED
;