1 /* non-recursive Red-Black Tree implementation
2 based on the code from http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx
4 #ifndef _KLISP_RBTREE_MODULE_
5 #define _KLISP_RBTREE_MODULE_
18 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
19 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
20 #define KLP_GET32BITS(d) (*((const uint32_t *) (d)))
23 #if !defined (KLP_GET32BITS)
24 #define KLP_GET32BITS(d) (\
26 + ((uint32_t)(d)[1]<<UINT32_C(8)) \
27 + ((uint32_t)(d)[2]<<UINT32_C(16)) \
28 + ((uint32_t)(d)[3]<<UINT32_C(24)) )
32 #define KLP_HASHSIZE(n) ((uint32_t)1<<(n))
33 #define KLP_HASHMASK(n) (KLP_HASHSIZE(n)-1)
37 --------------------------------------------------------------------
38 mix -- mix 3 32-bit values reversibly.
39 For every delta with one or two bits set, and the deltas of all three
40 high bits or all three low bits, whether the original value of a,b,c
41 is almost all zero or is uniformly distributed,
42 * If mix() is run forward or backward, at least 32 bits in a,b,c
43 have at least 1/4 probability of changing.
44 * If mix() is run forward, every bit of c will change between 1/3 and
45 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
46 mix() was built out of 36 single-cycle latency instructions in a
47 structure that could supported 2x parallelism, like so:
55 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
56 of that parallelism. They've also turned some of those single-cycle
57 latency instructions into multi-cycle latency instructions. Still,
58 this is the fastest good hash I could find. There were about 2^^68
59 to choose from. I only looked at a billion or so.
60 --------------------------------------------------------------------
62 #define KLP_MIX(a,b,c) \
64 a -= b; a -= c; a ^= (c>>13); \
65 b -= c; b -= a; b ^= (a<<8); \
66 c -= a; c -= b; c ^= (b>>13); \
67 a -= b; a -= c; a ^= (c>>12); \
68 b -= c; b -= a; b ^= (a<<16); \
69 c -= a; c -= b; c ^= (b>>5); \
70 a -= b; a -= c; a ^= (c>>3); \
71 b -= c; b -= a; b ^= (a<<10); \
72 c -= a; c -= b; c ^= (b>>15); \
76 --------------------------------------------------------------------
77 hash() -- hash a variable-length key into a 32-bit value
78 k : the key (the unaligned variable-length array of bytes)
79 len : the length of the key, counting by bytes
80 initval : can be any 4-byte value
81 Returns a 32-bit value. Every bit of the key affects every bit of
82 the return value. Every 1-bit and 2-bit delta achieves avalanche.
83 About 6*len+35 instructions.
85 The best hash table sizes are powers of 2. There is no need to do
86 mod a prime (mod is sooo slow!). If you need less than 32 bits,
87 use a bitmask. For example, if you need only 10 bits, do
88 h = (h & KLP_HASHMASK(10));
89 In which case, the hash table should have KLP_HASHSIZE(10) elements.
91 If you are hashing n strings (uint8_t **)k, do it like this:
92 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
94 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
95 code any way you wish, private, educational, or commercial. It's free.
97 See http://burtleburtle.net/bob/hash/evahash.html
98 Use for hash table lookup, or anything where one collision in 2^^32 is
99 acceptable. Do NOT use for cryptographic purposes.
100 --------------------------------------------------------------------
103 static uint32_t hashBobJenkinsUpdate (const char *k
, int length
, uint32_t initval
) {
104 register uint32_t a
, b
, c
, len
;
106 /* Set up the internal state */
108 a
= b
= 0x9e3779b9; /* the golden ratio; an arbitrary value */
109 c
= initval
; /* the previous hash value */
110 /*---------------------------------------- handle most of the key */
112 a
+= KLP_GET32BITS(k
);
113 b
+= KLP_GET32BITS(k
+4);
114 c
+= KLP_GET32BITS(k
+8);
118 /*------------------------------------- handle the last 11 bytes */
120 switch (len
) { /* all the case statements fall through */
121 case 11: c
+=((uint32_t)k
[10]<<24);
122 case 10: c
+=((uint32_t)k
[9]<<16);
123 case 9 : c
+=((uint32_t)k
[8]<<8);
124 /* the first byte of c is reserved for the length */
125 case 8 : b
+=((uint32_t)k
[7]<<24);
126 case 7 : b
+=((uint32_t)k
[6]<<16);
127 case 6 : b
+=((uint32_t)k
[5]<<8);
129 case 4 : a
+=((uint32_t)k
[3]<<24);
130 case 3 : a
+=((uint32_t)k
[2]<<16);
131 case 2 : a
+=((uint32_t)k
[1]<<8);
133 /* case 0: nothing left to add */
142 static uint32_t HashBuffer (const char *k, int length) {
143 return hashBobJenkinsUpdate (k, length, 0);
148 static uint32_t HashString (const char *s
) {
149 return hashBobJenkinsUpdate (s
, strlen(s
), 0);
153 /*static TKLispRBTNode *rbtNodePool = NULL;
154 static int rbtNodeCount = 0;*/
155 static TKLispRBTNode
*rbtNodeFree
= NULL
;
158 #define KLispRBTIsRedNode(root) \
159 ( (root) != NULL && ((root)->red) )
162 void KLispRBTCleanup (void) {
166 /*if (!rbtNodePool) return;
167 for (f = 0; f < rbtNodeCount; f++) rbtNodePool[f].value1 = 0;*/
168 while (rbtNodeFree
) {
169 node
= rbtNodeFree
->nextFree
;
170 KLISP_MEMFREE(rbtNodeFree
);
173 /*for (f = 0; f < rbtNodeCount; f++) {
174 if (rbtNodePool[f].value1 != -666) continue;
175 if (rbtNodePool[f].freeIt && rbtNodePool[f].str) KLISP_MEMFREE(rbtNodePool[f].str);
177 KLISP_MEMFREE(rbtNodePool);
179 rbtNodeCount = 0; rbtNodeFree = NULL;*/
183 char *KLispStrDup (const char *str
) {
186 if (str
) s
= KLISP_MEMALLOC((strlen(str
)+1)*sizeof(char)); else s
= KLISP_MEMALLOC(sizeof(char));
188 if (str
) strcpy(s
, str
); else s
[0] = '\0';
194 static void RBTI_FreeNode (TKLispRBTNode
*node
) {
198 KLISP_MEMFREE(node
->str
);
200 /*KLISP_MEMFREE(node);*/
202 rbtNodeFree
= node
->nextFree
;
206 static void RBTI_GrowPool (void) {
207 /* note that nextFree pointers are invalid after the growth! */
212 /*assert(!rbtNodeFree);*/
213 /* mark all free nodes */
214 /*newSz = rbtNodeCount+8192;
215 rn = KLISP_MEMREALLOC(rbtNodePool, (newSz+1)*sizeof(TKLispRBTNode));
216 if (!rn) { fprintf(stderr, "***memory!\n"); abort(); }
218 for (f = rbtNodeCount; f < newSz; f++) {
219 rbtNodePool[f].nextFree = &(rbtNodePool[f+1]);
221 rbtNodePool[newSz-1].nextFree = NULL;
222 rbtNodeFree = &(rbtNodePool[rbtNodeCount]);
223 rbtNodeCount = newSz;*/
224 for (f
= 512; f
; f
--) {
225 rn
= KLISP_MEMALLOC(sizeof(TKLispRBTNode
));
227 rn
->nextFree
= rbtNodeFree
;
233 static TKLispRBTNode
*RBTI_NewNode (const char *str
, int value0
, int value1
, int freeIt
) {
237 /* no free nodes; realloc pool */
238 if (!rbtNodeFree
) RBTI_GrowPool();
242 rbtNodeFree
= rn
->nextFree
;
245 /*memset(rn, 0, sizeof(TKLispRBTNode));*/
246 rn
->red
= 1; /* 1 is red, 0 is black */
247 rn
->link
[0] = rn
->link
[1] = NULL
;
248 if (!freeIt
) rn
->str
= (char *)str
;
250 if (!(rn
->str
= KLispStrDup(str
))) {
251 /*KLISP_MEMFREE(rn);*/
256 rn
->value0
= value0
; rn
->value1
= value1
;
258 rn
->hash
= HashString(rn
->str
);
265 static void RBTI_ClearTreeNodes (TKLispRBTNode
*node
) {
267 RBTI_ClearTreeNodes(node
->link
[0]);
268 RBTI_ClearTreeNodes(node
->link
[1]);
273 void KLispRBTClearTree (TKLispRBTree
*tree
) {
274 if (!tree
|| !tree
->root
) return;
275 RBTI_ClearTreeNodes(tree
->root
);
276 tree
->root
= NULL
; tree
->nodeCount
= 0;
280 void KLispRBTInitTree (TKLispRBTree
*tree
) {
281 memset(tree
, 0, sizeof(TKLispRBTree
));
282 tree
->root
= NULL
; tree
->nodeCount
= 0;
286 TKLispRBTree
*KLispRBTNewTree (void) {
289 tree
= KLISP_MEMALLOC(sizeof(TKLispRBTree
));
290 if (!tree
) return NULL
;
291 memset(tree
, 0, sizeof(TKLispRBTree
));
292 KLispRBTInitTree(tree
);
298 void KLispRBTFreeTree (TKLispRBTree
*tree
) {
300 KLispRBTClearTree(tree
);
305 static TKLispRBTNode
*KLispRBTSingleRotation (TKLispRBTNode
*root
, int dir
) {
308 save
= root
->link
[!dir
];
309 root
->link
[!dir
] = save
->link
[dir
];
310 save
->link
[dir
] = root
;
318 static TKLispRBTNode
*KLispRBTDoubleRotation (TKLispRBTNode
*root
, int dir
) {
319 root
->link
[!dir
] = KLispRBTSingleRotation(root
->link
[!dir
], !dir
);
321 return KLispRBTSingleRotation(root
, dir
);
325 /* dataptr: pointer to data; data will be copied */
326 /* newnode will be non-zero, if new node was generated; newnode can be NULL */
327 TKLispRBTNode
*KLispRBTInsert (TKLispRBTree
*tree
, const char *str
, int value0
, int value1
, int freeIt
, int *newnode
) {
328 TKLispRBTNode
*res
= NULL
;
329 TKLispRBTNode head
= {0}; /* false tree root */
330 TKLispRBTNode
*g
, *t
; /* grandparent & parent */
331 TKLispRBTNode
*p
, *q
; /* iterator & parent */
332 int dir
= 0, last
= -1, cmp
, dir2
;
333 uint32_t strhash
= HashString(str
), qhash
;
335 if (tree
->root
== NULL
) {
336 /* empty tree case */
337 if (newnode
) *newnode
= 1;
338 tree
->root
= RBTI_NewNode(str
, value0
, value1
, freeIt
);
340 /* make root black */
347 if (newnode
) *newnode
= 0;
351 q
= t
->link
[1] = tree
->root
;
352 /* search down the tree */
355 /* insert new node at the bottom */
356 p
->link
[dir
] = q
= RBTI_NewNode(str
, value0
, value1
, freeIt
);
357 if (q
== NULL
) return NULL
;
358 if (newnode
) *newnode
= 1;
361 } else if (KLispRBTIsRedNode(q
->link
[0]) && KLispRBTIsRedNode(q
->link
[1])) {
367 /* fix red violation */
368 if (KLispRBTIsRedNode(q
) && KLispRBTIsRedNode(p
)) {
369 dir2
= t
->link
[1] == g
;
370 if (q
== p
->link
[last
]) t
->link
[dir2
] = KLispRBTSingleRotation(g
, !last
);
371 else t
->link
[dir2
] = KLispRBTDoubleRotation(g
, !last
);
374 qhash
= q
->hash
; cmp
= (qhash
== strhash
)?strcmp(q
->str
, str
):(qhash
<strhash
?-1:1);
379 last
= dir
; dir
= cmp
<0?1:0;
381 if (g
!= NULL
) t
= g
;
387 tree
->root
= head
.link
[1];
388 /* make root black */
391 #ifdef KLISP_RBT_DEBUG
392 KLispRBTAssert(tree
->root
, "insert");
398 #define KLISP_RBT_SWAP(fld,tmp) {tmp=found->fld;found->fld=q->fld;q->fld=tmp;}
399 int KLispRBTDelete (TKLispRBTree
*tree
, const char *str
) {
400 /*TKLispRBTNode *nodetokill = NULL;*/
401 TKLispRBTNode head
= {0}; /* false tree root */
402 TKLispRBTNode
*q
, *p
, *g
, *s
; /* helpers */
403 TKLispRBTNode
*found
= NULL
; /* found item */
404 int dir
= 1, cmp
, last
, dir2
;
406 uint32_t strhash
= HashString(str
), qhash
;
408 #ifdef KLISP_RBT_DEBUG_DEL
409 fprintf(stderr
, "delete: '%s'\n", str
);
411 if (tree
->root
!= NULL
) {
415 q
->link
[1] = tree
->root
;
416 /* search and push a red down */
417 while (q
->link
[dir
] != NULL
) {
422 /*!!!cmp = strcmp(q->str, str);*/
423 qhash
= q
->hash
; cmp
= (qhash
== strhash
)?strcmp(q
->str
, str
):(qhash
<strhash
?-1:1);
424 /* save found node */
425 if (!cmp
) { assert(!found
); found
= q
; }
428 /* push the red node down */
429 if (!KLispRBTIsRedNode(q
) && !KLispRBTIsRedNode(q
->link
[dir
])) {
430 if (KLispRBTIsRedNode(q
->link
[!dir
])) p
= p
->link
[last
] = KLispRBTSingleRotation(q
, dir
);
431 else if (!KLispRBTIsRedNode(q
->link
[!dir
])) {
434 if (!KLispRBTIsRedNode(s
->link
[!last
]) && !KLispRBTIsRedNode(s
->link
[last
])) {
440 dir2
= g
->link
[1] == p
;
441 if (KLispRBTIsRedNode(s
->link
[last
])) g
->link
[dir2
] = KLispRBTDoubleRotation(p
, last
);
442 else if (KLispRBTIsRedNode(s
->link
[!last
])) g
->link
[dir2
] = KLispRBTSingleRotation(p
, last
);
443 /* ensure correct coloring */
444 q
->red
= g
->link
[dir2
]->red
= 1;
445 g
->link
[dir2
]->link
[0]->red
= 0;
446 g
->link
[dir2
]->link
[1]->red
= 0;
452 /* replace and remove if found */
454 #ifdef KLISP_RBT_DEBUG_DEL
455 fprintf(stderr
, "found: '%s'\n", found
->str
);
456 fprintf(stderr
, "q: '%s'\n", q
->str
);
458 p
->link
[p
->link
[1] == q
] = q
->link
[q
->link
[0] == NULL
];
459 /* swap data for found node and q */
460 KLISP_RBT_SWAP(freeIt
, last
);
461 KLISP_RBT_SWAP(str
, tmpstr
);;
462 found
->value0
= q
->value0
;
463 found
->value1
= q
->value1
;
464 found
->hash
= q
->hash
;
466 /*found->str = q->str;*/
469 /* Update root and make it black */
470 tree
->root
= head
.link
[1];
471 if (tree
->root
!= NULL
) tree
->root
->red
= 0;
474 #ifdef KLISP_RBT_DEBUG_DEL
475 fprintf(stderr
, "killing: '%s'\n", found
->str
);
477 RBTI_FreeNode(found
);
479 #ifdef KLISP_RBT_DEBUG
480 KLispRBTAssert(tree
->root
, "delete");
490 TKLispRBTNode
*KLispRBTFind (TKLispRBTree
*tree
, const char *str
) {
491 TKLispRBTNode
*res
= tree
->root
;
494 uint32_t strhash
= HashString(str
), qhash
;
497 /*!!!cmp = strcmp(res->str, str);*/
498 qhash
= res
->hash
; cmp
= (qhash
== strhash
)?strcmp(res
->str
, str
):(qhash
<strhash
?-1:1);
500 res
= res
->link
[cmp
<0?1:0];
502 if (cnt
> tree
->nodeCount
) {
503 fprintf(stderr
, "KLispRBTFind: DIA!\n");
512 void KLispRBTInitWalker (TKLispRBTree
*tree
, TKLispRBTWalker
*walker
) {
514 memset(walker
, 0, sizeof(TKLispRBTWalker
));
515 walker
->tree
= tree
; walker
->curnode
= tree
->root
;
516 walker
->sp
= 0; walker
->phase
= 0;
517 #ifdef KLISP_RBT_DEBUG
523 void KLispRBTDeinitWalker (TKLispRBTWalker
*wdata
) {
527 TKLispRBTWalker
*KLispRBTNewWalker (TKLispRBTree
*tree
) {
528 TKLispRBTWalker
*res
;
530 if (!tree
) return NULL
;
531 res
= KLISP_MEMALLOC(sizeof(TKLispRBTWalker
));
532 if (!res
) return NULL
;
533 KLispRBTInitWalker(tree
, res
);
539 void KLispRBTFreeWalker (TKLispRBTWalker
*wdata
) {
541 KLispRBTDeinitWalker(wdata
);
542 KLISP_MEMFREE(wdata
);
546 /* DO NOT change the tree while walking with this function! */
547 TKLispRBTNode
*KLispRBTWalkerNext (TKLispRBTWalker
*wdata
) {
548 TKLispRBTNode
*node
= wdata
->curnode
;
549 if (!node
) return NULL
;
552 switch (wdata
->phase
) {
553 case 0: /* left node */
554 if (!node
->link
[0]) {
556 wdata
->curnode
= node
;
559 if (sp
>= KLISP_RBT_WALK_STACK_SIZE
) {
560 fprintf(stderr
, "stack overflow!\n");
563 wdata
->stack
[sp
].node
= node
;
564 wdata
->stack
[sp
].phase
= 1;
566 #ifdef KLISP_RBT_DEBUG
567 wdata
->maxsp
= (wdata
->sp
>wdata
->maxsp
)?wdata
->sp
:wdata
->maxsp
;
569 node
= node
->link
[0];
571 case 1: /* current node */
573 wdata
->curnode
= node
;
575 case 2: /* right node */
576 if (!node
->link
[1]) wdata
->phase
= 3; /* go up */
579 if (sp
>= KLISP_RBT_WALK_STACK_SIZE
) {
580 fprintf(stderr
, "stack overflow!\n");
583 wdata
->stack
[sp
].node
= node
;
584 wdata
->stack
[sp
].phase
= 3;
586 #ifdef KLISP_RBT_DEBUG
587 wdata
->maxsp
= (wdata
->sp
>wdata
->maxsp
)?wdata
->sp
:wdata
->maxsp
;
589 node
= node
->link
[1];
596 wdata
->curnode
= NULL
;
600 wdata
->phase
= wdata
->stack
[sp
].phase
;
601 node
= wdata
->stack
[sp
].node
;
610 #ifdef KLISP_RBT_DEBUG
613 If KLispRBTAssert returns 0, the tree is an invalid red black tree.
614 Otherwise it will return the black height of the entire tree.
615 For convenience, a message will also print out telling you which violations occurred. :-)
617 /* uses: KLispRBTIsRedNode */
618 int KLispRBTAssert (TKLispRBTNode
*root
, const char *msg
) {
619 TKLispRBTNode
*ln
, *rn
;
627 /* Consecutive red links */
628 if (KLispRBTIsRedNode(root
)) {
629 if (KLispRBTIsRedNode(ln
) || KLispRBTIsRedNode(rn
)) {
630 fprintf(stderr
, "%s", "red violation\n");
636 lh
= KLispRBTAssert(ln
, msg
); rh
= KLispRBTAssert(rn
, msg
);
638 /* Invalid binary search tree */
640 cmp
= (ln
->hash
== root
->hash
)?strcmp(ln
->str
, root
->str
):(ln
->hash
<root
->hash
?-1:1);
641 if (cmp
>= 0) { fprintf(stderr
, "%s: %s", msg
, "binary tree violation (ln)\n"); abort(); }
644 cmp
= (rn
->hash
== root
->hash
)?strcmp(rn
->str
, root
->str
):(rn
->hash
<root
->hash
?-1:1);
645 if (cmp
<= 0) { fprintf(stderr
, "%s: %s", msg
, "binary tree violation (rn)\n"); abort(); }
648 if ((ln && strcmp(ln->str, root->str) >= 0) ||
649 (rn && strcmp(rn->str, root->str) <= 0)) {
650 fprintf(stderr, "%s: %s", msg, "binary tree violation\n");
654 /* Black height mismatch */
655 if (lh
!= 0 && rh
!= 0 && lh
!= rh
) {
656 fprintf(stderr
, "%s: %s", msg
, "black violation\n");
661 /* Only count black links */
662 if (lh
&& rh
) return KLispRBTIsRedNode(root
)?lh
:lh
+1; else return 0;
667 #undef KLispRBTIsRedNode
670 #endif /* _KLISP_RBTREE_MODULE_ */