btrfs: Attempt to fix GCC2 build.
[haiku.git] / src / bin / bfs_tools / lib / BPlusTree.cpp
blob032b181ea808b7502414a019493e411a0ff484f1
1 /* BPlusTree - BFS B+Tree implementation
2 **
3 ** Copyright 2001-2002 pinc Software. All Rights Reserved.
4 ** Released under the terms of the MIT license.
5 */
8 #include "BPlusTree.h"
9 #include "Inode.h"
10 #include "Stack.h"
11 #include "dump.h"
13 #include <Debug.h>
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
20 #define MAX_NODES_IN_CACHE 20
22 class CacheableNode : public NodeCache::Cacheable
24 public:
25 CacheableNode(off_t offset,bplustree_node *node)
27 fOffset(offset),
28 fNode(node)
32 virtual ~CacheableNode()
34 if (fNode)
35 free(fNode);
38 virtual bool Equals(off_t offset)
40 return offset == fOffset;
43 off_t fOffset;
44 bplustree_node *fNode;
48 NodeCache::NodeCache(BPlusTree *tree)
49 : Cache<off_t>(),
50 fTree(tree)
55 Cache<off_t>::Cacheable *
56 NodeCache::NewCacheable(off_t offset)
58 return new CacheableNode(offset,fTree->Node(offset,false));
62 bplustree_node *
63 NodeCache::Get(off_t offset)
65 CacheableNode *node = (CacheableNode *)Cache<off_t>::Get(offset);
66 return node->fNode;
70 // #pragma mark -
73 BPlusTree::BPlusTree(int32 keyType,int32 nodeSize,bool allowDuplicates)
75 fStream(NULL),
76 fHeader(NULL),
77 fCache(this),
78 fCurrentNodeOffset(BPLUSTREE_NULL)
80 SetTo(keyType,nodeSize,allowDuplicates);
84 BPlusTree::BPlusTree(BPositionIO *stream,bool allowDuplicates)
86 fStream(NULL),
87 fHeader(NULL),
88 fCache(this),
89 fCurrentNodeOffset(BPLUSTREE_NULL)
91 SetTo(stream,allowDuplicates);
95 BPlusTree::BPlusTree()
97 fStream(NULL),
98 fHeader(NULL),
99 fNodeSize(BPLUSTREE_NODE_SIZE),
100 fAllowDuplicates(true),
101 fStatus(B_NO_INIT),
102 fCache(this),
103 fCurrentNodeOffset(BPLUSTREE_NULL)
108 BPlusTree::~BPlusTree()
110 fCache.Clear();
114 void BPlusTree::Initialize(int32 nodeSize)
116 // free old data
117 fCache.Clear(0,true);
119 if (fHeader)
120 free(fHeader);
122 fStream = NULL;
124 fNodeSize = nodeSize;
125 fHeader = (bplustree_header *)malloc(fNodeSize);
126 memset(fHeader,0,fNodeSize);
128 fCurrentNodeOffset = BPLUSTREE_NULL;
132 status_t BPlusTree::SetTo(int32 keyType,int32 nodeSize,bool allowDuplicates)
134 // initializes in-memory B+Tree
136 Initialize(nodeSize);
138 fAllowDuplicates = allowDuplicates;
139 fCache.SetHoldChanges(true);
141 // initialize b+tree header
142 fHeader->magic = BPLUSTREE_MAGIC;
143 fHeader->node_size = fNodeSize;
144 fHeader->max_number_of_levels = 1;
145 fHeader->data_type = keyType;
146 fHeader->root_node_pointer = fNodeSize;
147 fHeader->free_node_pointer = BPLUSTREE_NULL;
148 fHeader->maximum_size = fNodeSize * 2;
150 return fStatus = B_OK;
154 status_t BPlusTree::SetTo(BPositionIO *stream,bool allowDuplicates)
156 // initializes on-disk B+Tree
158 bplustree_header header;
159 ssize_t read = stream->ReadAt(0,&header,sizeof(bplustree_header));
160 if (read < 0)
161 return fStatus = read;
163 // is header valid?
165 stream->Seek(0,SEEK_END);
166 off_t size = stream->Position();
167 stream->Seek(0,SEEK_SET);
169 //dump_bplustree_header(&header);
171 if (header.magic != BPLUSTREE_MAGIC
172 || header.maximum_size != size
173 || (header.root_node_pointer % header.node_size) != 0
174 || !header.IsValidLink(header.root_node_pointer)
175 || !header.IsValidLink(header.free_node_pointer))
176 return fStatus = B_BAD_DATA;
178 fAllowDuplicates = allowDuplicates;
180 if (DataStream *dataStream = dynamic_cast<DataStream *>(stream))
182 uint32 toMode[] = {S_STR_INDEX, S_INT_INDEX, S_UINT_INDEX, S_LONG_LONG_INDEX,
183 S_ULONG_LONG_INDEX, S_FLOAT_INDEX, S_DOUBLE_INDEX};
184 uint32 mode = dataStream->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX
185 | S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX);
187 if (header.data_type > BPLUSTREE_DOUBLE_TYPE
188 || (dataStream->Mode() & S_INDEX_DIR) && toMode[header.data_type] != mode
189 || !dataStream->IsDirectory())
190 return fStatus = B_BAD_TYPE;
192 // although it's in stat.h, the S_ALLOW_DUPS flag is obviously unused
193 fAllowDuplicates = (dataStream->Mode() & (S_INDEX_DIR | 0777)) == S_INDEX_DIR;
195 //printf("allows duplicates? %s\n",fAllowDuplicates ? "yes" : "no");
198 Initialize(header.node_size);
200 memcpy(fHeader,&header,sizeof(bplustree_header));
202 fStream = stream;
204 bplustree_node *node = fCache.Get(header.root_node_pointer);
205 //if (node)
206 // dump_bplustree_node(node);
208 return fStatus = node && CheckNode(node) ? B_OK : B_BAD_DATA;
212 status_t BPlusTree::InitCheck()
214 return fStatus;
218 struct validate_info {
219 off_t offset;
220 off_t from;
221 bool free;
225 status_t
226 BPlusTree::Validate(bool verbose)
228 // validates the on-disk b+tree
229 // (for now only the node integrity is checked)
231 if (!fStream)
232 return B_OK;
234 struct validate_info info;
236 Stack<validate_info> stack;
237 info.offset = fHeader->root_node_pointer;
238 info.from = BPLUSTREE_NULL;
239 info.free = false;
240 stack.Push(info);
242 if (fHeader->free_node_pointer != BPLUSTREE_NULL) {
243 info.offset = fHeader->free_node_pointer;
244 info.free = true;
245 stack.Push(info);
248 char escape[3] = {0x1b, '[', 0};
249 int32 count = 0,freeCount = 0;
251 while (true) {
252 if (!stack.Pop(&info)) {
253 if (verbose) {
254 printf(" %" B_PRId32 " B+tree node(s) processed (%" B_PRId32
255 " free node(s)).\n", count, freeCount);
257 return B_OK;
260 if (!info.free && verbose && ++count % 10 == 0)
261 printf(" %" B_PRId32 "%s1A\n", count, escape);
262 else if (info.free)
263 freeCount++;
265 bplustree_node *node;
266 if ((node = fCache.Get(info.offset)) == NULL
267 || !info.free && !CheckNode(node)) {
268 if (verbose) {
269 fprintf(stderr," B+Tree: Could not get data at position %"
270 B_PRIdOFF " (referenced by node at %" B_PRIdOFF ")\n",
271 info.offset, info.from);
272 if ((node = fCache.Get(info.from)) != NULL)
273 dump_bplustree_node(node,fHeader);
275 return B_BAD_DATA;
278 info.from = info.offset;
280 if (node->overflow_link != BPLUSTREE_NULL && !info.free) {
281 info.offset = node->overflow_link;
282 stack.Push(info);
284 //dump_bplustree_node(node,fHeader);
285 off_t *values = node->Values();
286 for (int32 i = 0;i < node->all_key_count;i++) {
287 info.offset = values[i];
288 stack.Push(info);
290 } else if (node->left_link != BPLUSTREE_NULL && info.free) {
291 info.offset = node->left_link;
292 stack.Push(info);
298 status_t
299 BPlusTree::WriteTo(BPositionIO *stream)
301 ssize_t written = stream->WriteAt(0,fHeader,fNodeSize);
302 if (written < fNodeSize)
303 return written < B_OK ? written : B_IO_ERROR;
305 for (off_t offset = fNodeSize;offset < fHeader->maximum_size;offset += fNodeSize)
307 bplustree_node *node = fCache.Get(offset);
308 if (node == NULL)
309 return B_ERROR;
311 written = stream->WriteAt(offset,node,fNodeSize);
312 if (written < fNodeSize)
313 return written < B_OK ? written : B_IO_ERROR;
315 return stream->SetSize(fHeader->maximum_size);
319 // #pragma mark -
322 void BPlusTree::SetCurrentNode(bplustree_node *node,off_t offset,int8 to)
324 fCurrentNodeOffset = offset;
325 fCurrentKey = to == BPLUSTREE_BEGIN ? -1 : node->all_key_count;
326 fDuplicateNode = BPLUSTREE_NULL;
330 status_t BPlusTree::Goto(int8 to)
332 if (fHeader == NULL)
333 return B_BAD_VALUE;
335 Stack<off_t> stack;
336 if (stack.Push(fHeader->root_node_pointer) < B_OK)
337 return B_NO_MEMORY;
339 bplustree_node *node;
340 off_t pos;
341 while (stack.Pop(&pos) && (node = fCache.Get(pos)) != NULL && CheckNode(node))
343 // is the node a leaf node?
344 if (node->overflow_link == BPLUSTREE_NULL)
346 SetCurrentNode(node,pos,to);
348 return B_OK;
351 if (to == BPLUSTREE_END || node->all_key_count == 0)
352 pos = node->overflow_link;
353 else
355 if (node->all_key_length > fNodeSize
356 || (addr_t)node->Values() > (addr_t)node + fNodeSize
357 - 8 * node->all_key_count)
358 return B_ERROR;
360 pos = *node->Values();
363 if (stack.Push(pos) < B_OK)
364 break;
366 return B_ERROR;
370 status_t BPlusTree::Traverse(int8 direction,void *key,uint16 *keyLength,uint16 maxLength,off_t *value)
372 if (fCurrentNodeOffset == BPLUSTREE_NULL
373 && Goto(direction == BPLUSTREE_FORWARD ? BPLUSTREE_BEGIN : BPLUSTREE_END) < B_OK)
374 return B_ERROR;
376 bplustree_node *node;
378 if (fDuplicateNode != BPLUSTREE_NULL)
380 // regardless of traverse direction the duplicates are always presented in
381 // the same order; since they are all considered as equal, this shouldn't
382 // cause any problems
384 if (!fIsFragment || fDuplicate < fNumDuplicates)
385 node = fCache.Get(bplustree_node::FragmentOffset(fDuplicateNode));
386 else
387 node = NULL;
389 if (node != NULL)
391 if (!fIsFragment && fDuplicate >= fNumDuplicates)
393 // if the node is out of duplicates, we go directly to the next one
394 fDuplicateNode = node->right_link;
395 if (fDuplicateNode != BPLUSTREE_NULL
396 && (node = fCache.Get(fDuplicateNode)) != NULL)
398 fNumDuplicates = node->CountDuplicates(fDuplicateNode,false);
399 fDuplicate = 0;
402 if (fDuplicate < fNumDuplicates)
404 *value = node->DuplicateAt(fDuplicateNode,fIsFragment,fDuplicate++);
405 return B_OK;
408 fDuplicateNode = BPLUSTREE_NULL;
411 off_t savedNodeOffset = fCurrentNodeOffset;
412 if ((node = fCache.Get(fCurrentNodeOffset)) == NULL || !CheckNode(node))
413 return B_ERROR;
415 fCurrentKey += direction;
417 // is the current key in the current node?
418 while ((direction == BPLUSTREE_FORWARD && fCurrentKey >= node->all_key_count)
419 || (direction == BPLUSTREE_BACKWARD && fCurrentKey < 0))
421 fCurrentNodeOffset = direction == BPLUSTREE_FORWARD ? node->right_link : node->left_link;
423 // are there any more nodes?
424 if (fCurrentNodeOffset != BPLUSTREE_NULL)
426 node = fCache.Get(fCurrentNodeOffset);
427 if (!node || !CheckNode(node))
428 return B_ERROR;
430 // reset current key
431 fCurrentKey = direction == BPLUSTREE_FORWARD ? 0 : node->all_key_count;
433 else
435 // there are no nodes left, so turn back to the last key
436 fCurrentNodeOffset = savedNodeOffset;
437 fCurrentKey = direction == BPLUSTREE_FORWARD ? node->all_key_count : -1;
439 return B_ENTRY_NOT_FOUND;
443 if (node->all_key_count == 0)
444 return B_ERROR; //B_ENTRY_NOT_FOUND;
446 uint16 length;
447 uint8 *keyStart = node->KeyAt(fCurrentKey,&length);
449 length = min_c(length,maxLength);
450 memcpy(key,keyStart,length);
452 if (fHeader->data_type == BPLUSTREE_STRING_TYPE) // terminate string type
454 if (length == maxLength)
455 length--;
456 ((char *)key)[length] = '\0';
458 *keyLength = length;
460 off_t offset = node->Values()[fCurrentKey];
462 // duplicate fragments?
463 uint8 type = bplustree_node::LinkType(offset);
464 if (type == BPLUSTREE_DUPLICATE_FRAGMENT || type == BPLUSTREE_DUPLICATE_NODE)
466 fDuplicateNode = offset;
468 node = fCache.Get(bplustree_node::FragmentOffset(fDuplicateNode));
469 if (node == NULL)
470 return B_ERROR;
472 fIsFragment = type == BPLUSTREE_DUPLICATE_FRAGMENT;
474 fNumDuplicates = node->CountDuplicates(offset,fIsFragment);
475 if (fNumDuplicates)
477 // give back first duplicate
478 fDuplicate = 1;
479 offset = node->DuplicateAt(offset,fIsFragment,0);
481 else
483 // shouldn't happen, but we're dealing here with corrupt disks...
484 fDuplicateNode = BPLUSTREE_NULL;
485 offset = 0;
488 *value = offset;
490 return B_OK;
494 int32 BPlusTree::CompareKeys(const void *key1, int keyLength1, const void *key2, int keyLength2)
496 switch (fHeader->data_type)
498 case BPLUSTREE_STRING_TYPE:
500 int len = min_c(keyLength1,keyLength2);
501 int result = strncmp((const char *)key1,(const char *)key2,len);
503 if (result == 0)
504 result = keyLength1 - keyLength2;
506 return result;
509 case BPLUSTREE_INT32_TYPE:
510 return *(int32 *)key1 - *(int32 *)key2;
512 case BPLUSTREE_UINT32_TYPE:
514 if (*(uint32 *)key1 == *(uint32 *)key2)
515 return 0;
516 else if (*(uint32 *)key1 > *(uint32 *)key2)
517 return 1;
519 return -1;
522 case BPLUSTREE_INT64_TYPE:
524 if (*(int64 *)key1 == *(int64 *)key2)
525 return 0;
526 else if (*(int64 *)key1 > *(int64 *)key2)
527 return 1;
529 return -1;
532 case BPLUSTREE_UINT64_TYPE:
534 if (*(uint64 *)key1 == *(uint64 *)key2)
535 return 0;
536 else if (*(uint64 *)key1 > *(uint64 *)key2)
537 return 1;
539 return -1;
542 case BPLUSTREE_FLOAT_TYPE:
544 float result = *(float *)key1 - *(float *)key2;
545 if (result == 0.0f)
546 return 0;
548 return (result < 0.0f) ? -1 : 1;
551 case BPLUSTREE_DOUBLE_TYPE:
553 double result = *(double *)key1 - *(double *)key2;
554 if (result == 0.0)
555 return 0;
557 return (result < 0.0) ? -1 : 1;
560 return 0;
564 status_t BPlusTree::FindKey(bplustree_node *node,uint8 *key,uint16 keyLength,uint16 *index,off_t *next)
566 if (node->all_key_count == 0)
568 if (index)
569 *index = 0;
570 if (next)
571 *next = node->overflow_link;
572 return B_ENTRY_NOT_FOUND;
575 off_t *values = node->Values();
576 int16 saveIndex = 0;
578 // binary search in the key array
579 for (int16 first = 0,last = node->all_key_count - 1;first <= last;)
581 uint16 i = (first + last) >> 1;
583 uint16 searchLength;
584 uint8 *searchKey = node->KeyAt(i,&searchLength);
586 int32 cmp = CompareKeys(key,keyLength,searchKey,searchLength);
587 if (cmp < 0)
589 last = i - 1;
590 saveIndex = i;
592 else if (cmp > 0)
594 saveIndex = first = i + 1;
596 else
598 if (index)
599 *index = i;
600 if (next)
601 *next = values[i];
602 return B_OK;
606 if (index)
607 *index = saveIndex;
608 if (next)
610 if (saveIndex == node->all_key_count)
611 *next = node->overflow_link;
612 else
613 *next = values[saveIndex];
615 return B_ENTRY_NOT_FOUND;
619 status_t BPlusTree::SeekDown(Stack<node_and_key> &stack,uint8 *key,uint16 keyLength)
621 node_and_key nodeAndKey;
622 nodeAndKey.nodeOffset = fHeader->root_node_pointer;
623 nodeAndKey.keyIndex = 0;
625 bplustree_node *node;
626 while ((node = fCache.Get(nodeAndKey.nodeOffset)) != NULL && CheckNode(node)) {
627 // if we are already on leaf level, we're done
628 if (node->overflow_link == BPLUSTREE_NULL) {
629 // put the node on the stack
630 // node that the keyIndex is not properly set here!
631 nodeAndKey.keyIndex = 0;
632 stack.Push(nodeAndKey);
633 return B_OK;
636 off_t nextOffset;
637 status_t status = FindKey(node,key,keyLength,&nodeAndKey.keyIndex,&nextOffset);
639 if (status == B_ENTRY_NOT_FOUND && nextOffset == nodeAndKey.nodeOffset)
640 return B_ERROR;
642 // put the node & the correct keyIndex on the stack
643 stack.Push(nodeAndKey);
645 nodeAndKey.nodeOffset = nextOffset;
647 return B_ERROR;
651 void BPlusTree::InsertKey(bplustree_node *node,uint8 *key,uint16 keyLength,off_t value,uint16 index)
653 // should never happen, but who knows?
654 if (index > node->all_key_count)
655 return;
657 off_t *values = node->Values();
658 uint16 *keyLengths = node->KeyLengths();
659 uint8 *keys = node->Keys();
661 node->all_key_count++;
662 node->all_key_length += keyLength;
664 off_t *newValues = node->Values();
665 uint16 *newKeyLengths = node->KeyLengths();
667 // move values and copy new value into them
668 memmove(newValues + index + 1,values + index,sizeof(off_t) * (node->all_key_count - 1 - index));
669 memmove(newValues,values,sizeof(off_t) * index);
671 newValues[index] = value;
673 // move and update key length index
674 for (uint16 i = node->all_key_count;i-- > index + 1;)
675 newKeyLengths[i] = keyLengths[i - 1] + keyLength;
676 memmove(newKeyLengths,keyLengths,sizeof(uint16) * index);
678 int32 keyStart;
679 newKeyLengths[index] = keyLength + (keyStart = index > 0 ? newKeyLengths[index - 1] : 0);
681 // move keys and copy new key into them
682 int32 size = node->all_key_length - newKeyLengths[index];
683 if (size > 0)
684 memmove(keys + newKeyLengths[index],keys + newKeyLengths[index] - keyLength,size);
686 memcpy(keys + keyStart,key,keyLength);
690 status_t BPlusTree::InsertDuplicate(bplustree_node */*node*/,uint16 /*index*/)
692 printf("DUPLICATE ENTRY!!\n");
694 // /* handle dup keys */
695 // if (dupflg == 0)
696 // {
697 // bt_errno(b) = BT_DUPKEY;
698 // goto bombout;
699 // }
700 // else
701 // {
702 // /* paste new data ptr in page */
703 // /* and write it out again. */
704 // off_t *p;
705 // p = (off_t *)KEYCLD(op->p);
706 // *(p + keyat) = rrn;
707 // op->flags = BT_CHE_DIRTY;
708 // if(bt_wpage(b,op) == BT_ERR ||
709 // bt_wpage(b,kp) == BT_ERR)
710 // return(BT_ERR);
712 // /* mark all as well with tree */
713 // bt_clearerr(b);
714 // return(BT_OK);
715 // }
717 return B_OK;
721 status_t BPlusTree::SplitNode(bplustree_node *node,off_t nodeOffset,uint16 *_keyIndex,uint8 *key,uint16 *_keyLength,off_t *_value)
723 if (*_keyIndex > node->all_key_count + 1)
724 return B_BAD_VALUE;
726 //printf("before (insert \"%s\" (%d bytes) at %d):\n\n",key,*_keyLength,*_keyIndex);
727 //dump_bplustree_node(node,fHeader);
728 //hexdump(node,fNodeSize);
730 off_t otherOffset;
731 bplustree_node *other = fCache.Get(otherOffset = fHeader->maximum_size); //Node(otherOffset = fHeader->maximum_size/*,false*/);
732 if (other == NULL)
733 return B_NO_MEMORY;
735 uint16 *inKeyLengths = node->KeyLengths();
736 off_t *inKeyValues = node->Values();
737 uint8 *inKeys = node->Keys();
738 uint8 *outKeys = other->Keys();
739 uint16 keyIndex = *_keyIndex;
741 // how many keys will fit in one (half) page?
743 // "bytes" is the number of bytes written for the new key,
744 // "bytesBefore" are the bytes before that key
745 // "bytesAfter" are the bytes after the new key, if any
746 int32 bytes = 0,bytesBefore = 0,bytesAfter = 0;
748 size_t size = fNodeSize >> 1;
749 int32 out,in;
750 for (in = out = 0;in < node->all_key_count + 1;) {
751 if (!bytes)
752 bytesBefore = in > 0 ? inKeyLengths[in - 1] : 0;
753 if (in == keyIndex && !bytes) {
754 bytes = *_keyLength;
755 } else {
756 if (keyIndex < out) {
757 bytesAfter = inKeyLengths[in] - bytesBefore;
758 // fix the key lengths for the new node
759 inKeyLengths[in] = bytesAfter + bytesBefore + bytes;
760 } //else
761 in++;
763 out++;
765 if (round_up(sizeof(bplustree_node) + bytesBefore + bytesAfter + bytes) +
766 out * (sizeof(uint16) + sizeof(off_t)) >= size) {
767 // we have found the number of keys in the new node!
768 break;
772 // if the new key was not inserted, set the length of the keys
773 // that can be copied directly
774 if (keyIndex >= out && in > 0)
775 bytesBefore = inKeyLengths[in - 1];
777 if (bytesBefore < 0 || bytesAfter < 0)
778 return B_BAD_DATA;
780 //printf("put %ld keys in the other node (%ld, %ld, %ld) (in = %ld)\n",out,bytesBefore,bytes,bytesAfter,in);
782 other->left_link = node->left_link;
783 other->right_link = nodeOffset;
784 other->all_key_length = bytes + bytesBefore + bytesAfter;
785 other->all_key_count = out;
786 //printf("-> used = %ld (bplustree_node = %ld bytes)\n",other->Used(),sizeof(bplustree_node));
788 uint16 *outKeyLengths = other->KeyLengths();
789 off_t *outKeyValues = other->Values();
790 int32 keys = out > keyIndex ? keyIndex : out;
792 if (bytesBefore) {
793 // copy the keys
794 memcpy(outKeys,inKeys,bytesBefore);
795 memcpy(outKeyLengths,inKeyLengths,keys * sizeof(uint16));
796 memcpy(outKeyValues,inKeyValues,keys * sizeof(off_t));
798 if (bytes) {
799 // copy the newly inserted key
800 memcpy(outKeys + bytesBefore,key,bytes);
801 outKeyLengths[keyIndex] = bytes + bytesBefore;
802 outKeyValues[keyIndex] = *_value;
804 if (bytesAfter) {
805 // copy the keys after the new key
806 memcpy(outKeys + bytesBefore + bytes,inKeys + bytesBefore,bytesAfter);
807 keys = out - keyIndex - 1;
808 memcpy(outKeyLengths + keyIndex + 1,inKeyLengths + keyIndex,keys * sizeof(uint16));
809 memcpy(outKeyValues + keyIndex + 1,inKeyValues + keyIndex,keys * sizeof(off_t));
813 // if the new key was already inserted, we shouldn't use it again
814 if (in != out)
815 keyIndex--;
817 int32 total = bytesBefore + bytesAfter;
819 // if we have split an index node, we have to drop the first key
820 // of the next node (which can also be the new key to insert)
821 if (node->overflow_link != BPLUSTREE_NULL) {
822 if (in == keyIndex) {
823 other->overflow_link = *_value;
824 keyIndex--;
825 } else {
826 other->overflow_link = inKeyValues[in];
827 total = inKeyLengths[in++];
831 // and now the same game for the other page and the rest of the keys
832 // (but with memmove() instead of memcpy(), because they may overlap)
834 bytesBefore = bytesAfter = bytes = 0;
835 out = 0;
836 int32 skip = in;
837 while (in < node->all_key_count + 1) {
838 //printf("in = %ld, keyIndex = %d, bytes = %ld\n",in,keyIndex,bytes);
839 if (in == keyIndex && !bytes) {
840 bytesBefore = in > skip ? inKeyLengths[in - 1] : 0;
841 //printf("bytesBefore = %ld\n",bytesBefore);
842 bytes = *_keyLength;
843 } else if (in < node->all_key_count) {
844 //printf("1.inKeyLength[%ld] = %u\n",in,inKeyLengths[in]);
845 inKeyLengths[in] -= total;
846 if (bytes) {
847 inKeyLengths[in] += bytes;
848 bytesAfter = inKeyLengths[in] - bytesBefore - bytes;
849 //printf("2.inKeyLength[%ld] = %u, bytesAfter = %ld, bytesBefore = %ld\n",in,inKeyLengths[in],bytesAfter,bytesBefore);
852 in++;
853 } else
854 in++;
856 out++;
858 //printf("-> out = %ld, keylen = %ld, %ld bytes total\n",out,bytesBefore,round_up(sizeof(bplustree_node) + bytesBefore + bytesAfter + bytes) +
859 // out * (sizeof(uint16) + sizeof(off_t)));
860 if (in > node->all_key_count && keyIndex < in)
861 break;
864 //printf("bytes = (%ld, %ld, %ld), in = %ld, total = %ld\n",bytesBefore,bytes,bytesAfter,in,total);
866 if (keyIndex >= in && keyIndex - skip < out) {
867 bytesAfter = inKeyLengths[in] - bytesBefore - total;
868 } else if (keyIndex < skip)
869 bytesBefore = node->all_key_length - total;
871 //printf("bytes = (%ld, %ld, %ld), in = %ld, total = %ld\n",bytesBefore,bytes,bytesAfter,in,total);
873 if (bytesBefore < 0 || bytesAfter < 0)
874 return B_BAD_DATA;
876 node->left_link = otherOffset;
877 // right link, and overflow link can stay the same
878 node->all_key_length = bytes + bytesBefore + bytesAfter;
879 node->all_key_count = out - 1;
881 // array positions have changed
882 outKeyLengths = node->KeyLengths();
883 outKeyValues = node->Values();
885 //printf("put %ld keys in the first node (%ld, %ld, %ld) (in = %ld)\n",out,bytesBefore,bytes,bytesAfter,in);
887 // move the keys in the old node: the order is important here,
888 // because we don't want to overwrite any contents
890 keys = keyIndex <= skip ? out : keyIndex - skip;
891 keyIndex -= skip;
893 if (bytesBefore)
894 memmove(inKeys,inKeys + total,bytesBefore);
895 if (bytesAfter)
896 memmove(inKeys + bytesBefore + bytes,inKeys + total + bytesBefore,bytesAfter);
898 if (bytesBefore)
899 memmove(outKeyLengths,inKeyLengths + skip,keys * sizeof(uint16));
900 in = out - keyIndex - 1;
901 if (bytesAfter)
902 memmove(outKeyLengths + keyIndex + 1,inKeyLengths + skip + keyIndex,in * sizeof(uint16));
904 if (bytesBefore)
905 memmove(outKeyValues,inKeyValues + skip,keys * sizeof(off_t));
906 if (bytesAfter)
907 memmove(outKeyValues + keyIndex + 1,inKeyValues + skip + keyIndex,in * sizeof(off_t));
909 if (bytes) {
910 // finally, copy the newly inserted key (don't overwrite anything)
911 memcpy(inKeys + bytesBefore,key,bytes);
912 outKeyLengths[keyIndex] = bytes + bytesBefore;
913 outKeyValues[keyIndex] = *_value;
916 //puts("\n!!!!!!!!!!!!!!! after: !!!!!!!!!!!!!!!\n");
917 //dump_bplustree_node(other,fHeader);
918 //hexdump(other,fNodeSize);
919 //puts("\n");
920 //dump_bplustree_node(node,fHeader);
921 //hexdump(node,fNodeSize);
923 // write the updated nodes back
925 fCache.SetDirty(otherOffset,true);
926 fCache.SetDirty(nodeOffset,true);
928 // update the right link of the node in the left of the new node
929 if (other->left_link != BPLUSTREE_NULL) {
930 bplustree_node *left = fCache.Get(other->left_link);
931 if (left != NULL) {
932 left->right_link = otherOffset;
933 fCache.SetDirty(other->left_link,true);
937 // prepare key to insert in the parent node
939 //printf("skip: %ld, count-1: %u\n",skip,other->all_key_count - 1);
940 uint16 length;
941 uint8 *lastKey = other->KeyAt(other->all_key_count - 1,&length);
942 memcpy(key,lastKey,length);
943 *_keyLength = length;
944 *_value = otherOffset;
946 return B_OK;
950 status_t BPlusTree::Insert(uint8 *key,uint16 keyLength,off_t value)
952 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH)
953 return B_BAD_VALUE;
955 Stack<node_and_key> stack;
956 if (SeekDown(stack,key,keyLength) != B_OK)
957 return B_ERROR;
959 uint8 keyBuffer[BPLUSTREE_MAX_KEY_LENGTH + 1];
961 memcpy(keyBuffer,key,keyLength);
962 keyBuffer[keyLength] = 0;
964 off_t valueToInsert = value;
966 fCurrentNodeOffset = BPLUSTREE_NULL;
968 node_and_key nodeAndKey;
969 bplustree_node *node;
970 uint32 count = 0;
972 while (stack.Pop(&nodeAndKey) && (node = fCache.Get(nodeAndKey.nodeOffset)) != NULL && CheckNode(node))
974 if (count++ == 0) // first round, check for duplicate entries
976 status_t status = FindKey(node,key,keyLength,&nodeAndKey.keyIndex);
977 if (status == B_ERROR)
978 return B_ERROR;
980 // is this a duplicate entry?
981 if (status == B_OK && node->overflow_link == BPLUSTREE_NULL)
983 if (fAllowDuplicates)
984 return InsertDuplicate(node,nodeAndKey.keyIndex);
985 else
986 return B_NAME_IN_USE;
990 // is the node big enough to hold the pair?
991 if (int32(round_up(sizeof(bplustree_node) + node->all_key_length + keyLength)
992 + (node->all_key_count + 1) * (sizeof(uint16) + sizeof(off_t))) < fNodeSize)
994 InsertKey(node,keyBuffer,keyLength,valueToInsert,nodeAndKey.keyIndex);
995 fCache.SetDirty(nodeAndKey.nodeOffset,true);
997 return B_OK;
999 else
1001 // do we need to allocate a new root node? if so, then do
1002 // it now
1003 bplustree_node *rootNode = NULL;
1004 off_t newRoot = BPLUSTREE_NULL;
1005 if (nodeAndKey.nodeOffset == fHeader->root_node_pointer) {
1006 rootNode = fCache.Get(newRoot = fHeader->maximum_size);
1007 if (rootNode == NULL) {
1008 // the tree is most likely dead
1009 return B_NO_MEMORY;
1013 if (SplitNode(node,nodeAndKey.nodeOffset,&nodeAndKey.keyIndex,keyBuffer,&keyLength,&valueToInsert) < B_OK) {
1014 if (newRoot != BPLUSTREE_NULL) {
1015 // free root node
1017 return B_ERROR;
1020 if (newRoot != BPLUSTREE_NULL) {
1021 rootNode->overflow_link = nodeAndKey.nodeOffset;
1023 InsertKey(rootNode,keyBuffer,keyLength,node->left_link,0);
1025 fHeader->root_node_pointer = newRoot;
1026 fHeader->max_number_of_levels++;
1027 // write header
1029 fCache.SetDirty(newRoot,true);
1031 return B_OK;
1036 return B_ERROR;
1040 status_t BPlusTree::Find(uint8 *key,uint16 keyLength,off_t *value)
1042 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH)
1043 return B_BAD_VALUE;
1045 Stack<node_and_key> stack;
1046 if (SeekDown(stack,key,keyLength) != B_OK)
1047 return B_ERROR;
1049 fCurrentNodeOffset = BPLUSTREE_NULL;
1051 node_and_key nodeAndKey;
1052 bplustree_node *node;
1054 if (stack.Pop(&nodeAndKey) && (node = fCache.Get(nodeAndKey.nodeOffset)) != NULL && CheckNode(node))
1056 status_t status = FindKey(node,key,keyLength,&nodeAndKey.keyIndex);
1057 if (status == B_ERROR)
1058 return B_ERROR;
1060 SetCurrentNode(node,nodeAndKey.nodeOffset);
1062 if (status == B_OK && node->overflow_link == BPLUSTREE_NULL)
1064 *value = node->Values()[nodeAndKey.keyIndex];
1065 return B_OK;
1068 return B_ENTRY_NOT_FOUND;
1072 // #pragma mark -
1075 bool
1076 BPlusTree::CheckNode(bplustree_node *node)
1078 if (!fHeader->IsValidLink(node->left_link)
1079 || !fHeader->IsValidLink(node->right_link)
1080 || !fHeader->IsValidLink(node->overflow_link)
1081 || (int8 *)node->Values() + node->all_key_count * sizeof(off_t) > (int8 *)node + fNodeSize)
1082 return false;
1084 return true;
1088 bplustree_node *BPlusTree::Node(off_t nodeOffset,bool check)
1090 /*printf("1: %d,%d,%d\n",
1091 nodeOffset > fHeader->maximum_size - fNodeSize,
1092 nodeOffset < 0 && nodeOffset != BPLUSTREE_NULL,
1093 (nodeOffset % fNodeSize) != 0);
1095 // the super node is always in memory, and shouldn't
1096 // never be taken out of the cache
1097 if (nodeOffset > fHeader->maximum_size /*- fNodeSize*/
1098 || nodeOffset <= 0 && nodeOffset != BPLUSTREE_NULL
1099 || (nodeOffset % fNodeSize) != 0)
1100 return NULL;
1102 bplustree_node *node = (bplustree_node *)malloc(fNodeSize);
1103 if (node == NULL)
1104 return NULL;
1106 if (nodeOffset == BPLUSTREE_NULL || !fStream)
1108 node->left_link = BPLUSTREE_NULL;
1109 node->right_link = BPLUSTREE_NULL;
1110 node->overflow_link = BPLUSTREE_NULL;
1111 node->all_key_count = 0;
1112 node->all_key_length = 0;
1114 else if (fStream && fStream->ReadAt(nodeOffset,node,fNodeSize) < fNodeSize)
1116 free(node);
1117 return NULL;
1120 if (check && node != NULL)
1122 // sanity checks (links, all_key_count)
1123 if (!fHeader->IsValidLink(node->left_link)
1124 || !fHeader->IsValidLink(node->right_link)
1125 || !fHeader->IsValidLink(node->overflow_link)
1126 || (int8 *)node->Values() + node->all_key_count * sizeof(off_t) > (int8 *)node + fNodeSize)
1127 return NULL;
1130 if (!fStream && nodeOffset > fHeader->maximum_size - fNodeSize) {
1131 // do some hacks here
1132 fHeader->maximum_size += fNodeSize;
1135 return node;
1139 void BPlusTree::SetHoldChanges(bool hold)
1141 fCache.SetHoldChanges(hold);
1145 // #pragma mark -
1148 uint8 *bplustree_node::KeyAt(int32 index,uint16 *keyLength) const
1150 if (index < 0 || index > all_key_count)
1151 return NULL;
1153 uint8 *keyStart = Keys();
1154 uint16 *keyLengths = KeyLengths();
1156 *keyLength = keyLengths[index] - (index != 0 ? keyLengths[index - 1] : 0);
1157 if (index > 0)
1158 keyStart += keyLengths[index - 1];
1160 return keyStart;
1164 uint8 bplustree_node::CountDuplicates(off_t offset,bool isFragment) const
1166 // the duplicate fragment handling is currently hard-coded to a node size
1167 // of 1024 bytes - with future versions of BFS, this may be a problem
1169 if (isFragment)
1171 uint32 fragment = 8 * ((uint64)offset & 0x3ff);
1173 return ((off_t *)this)[fragment];
1175 return overflow_link;
1179 off_t bplustree_node::DuplicateAt(off_t offset,bool isFragment,int8 index) const
1181 uint32 start;
1182 if (isFragment)
1183 start = 8 * ((uint64)offset & 0x3ff);
1184 else
1185 start = 2;
1187 return ((off_t *)this)[start + 1 + index];