i've got a perfectly idiotic bug in syren and searching it in klisp %-)
[syren.git] / src / klisp / klisp_rbtree.c
blob51deadc6a5adc6bdcd20e3266eb58c690b769cc0
1 /* non-recursive Red-Black Tree implementation
2 based on the code from http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_rbtree.aspx
3 */
4 #ifndef _KLISP_RBTREE_MODULE_
5 #define _KLISP_RBTREE_MODULE_
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12 #include <assert.h>
14 #include "klisp.h"
17 #undef KLP_GET32BITS
18 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
19 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
20 #define KLP_GET32BITS(d) (*((const uint32_t *) (d)))
21 #endif
23 #if !defined (KLP_GET32BITS)
24 #define KLP_GET32BITS(d) (\
25 (uint32_t)(d)[0] \
26 + ((uint32_t)(d)[1]<<UINT32_C(8)) \
27 + ((uint32_t)(d)[2]<<UINT32_C(16)) \
28 + ((uint32_t)(d)[3]<<UINT32_C(24)) )
29 #endif
32 #define KLP_HASHSIZE(n) ((uint32_t)1<<(n))
33 #define KLP_HASHMASK(n) (KLP_HASHSIZE(n)-1)
37 --------------------------------------------------------------------
38 mix -- mix 3 32-bit values reversibly.
39 For every delta with one or two bits set, and the deltas of all three
40 high bits or all three low bits, whether the original value of a,b,c
41 is almost all zero or is uniformly distributed,
42 * If mix() is run forward or backward, at least 32 bits in a,b,c
43 have at least 1/4 probability of changing.
44 * If mix() is run forward, every bit of c will change between 1/3 and
45 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
46 mix() was built out of 36 single-cycle latency instructions in a
47 structure that could supported 2x parallelism, like so:
48 a -= b;
49 a -= c; x = (c>>13);
50 b -= c; a ^= x;
51 b -= a; x = (a<<8);
52 c -= a; b ^= x;
53 c -= b; x = (b>>13);
54 ...
55 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
56 of that parallelism. They've also turned some of those single-cycle
57 latency instructions into multi-cycle latency instructions. Still,
58 this is the fastest good hash I could find. There were about 2^^68
59 to choose from. I only looked at a billion or so.
60 --------------------------------------------------------------------
62 #define KLP_MIX(a,b,c) \
63 { \
64 a -= b; a -= c; a ^= (c>>13); \
65 b -= c; b -= a; b ^= (a<<8); \
66 c -= a; c -= b; c ^= (b>>13); \
67 a -= b; a -= c; a ^= (c>>12); \
68 b -= c; b -= a; b ^= (a<<16); \
69 c -= a; c -= b; c ^= (b>>5); \
70 a -= b; a -= c; a ^= (c>>3); \
71 b -= c; b -= a; b ^= (a<<10); \
72 c -= a; c -= b; c ^= (b>>15); \
76 --------------------------------------------------------------------
77 hash() -- hash a variable-length key into a 32-bit value
78 k : the key (the unaligned variable-length array of bytes)
79 len : the length of the key, counting by bytes
80 initval : can be any 4-byte value
81 Returns a 32-bit value. Every bit of the key affects every bit of
82 the return value. Every 1-bit and 2-bit delta achieves avalanche.
83 About 6*len+35 instructions.
85 The best hash table sizes are powers of 2. There is no need to do
86 mod a prime (mod is sooo slow!). If you need less than 32 bits,
87 use a bitmask. For example, if you need only 10 bits, do
88 h = (h & KLP_HASHMASK(10));
89 In which case, the hash table should have KLP_HASHSIZE(10) elements.
91 If you are hashing n strings (uint8_t **)k, do it like this:
92 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
94 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
95 code any way you wish, private, educational, or commercial. It's free.
97 See http://burtleburtle.net/bob/hash/evahash.html
98 Use for hash table lookup, or anything where one collision in 2^^32 is
99 acceptable. Do NOT use for cryptographic purposes.
100 --------------------------------------------------------------------
103 static uint32_t hashBobJenkinsUpdate (const char *k, int length, uint32_t initval) {
104 register uint32_t a, b, c, len;
106 /* Set up the internal state */
107 len = length;
108 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
109 c = initval; /* the previous hash value */
110 /*---------------------------------------- handle most of the key */
111 while (len >= 12) {
112 a += KLP_GET32BITS(k);
113 b += KLP_GET32BITS(k+4);
114 c += KLP_GET32BITS(k+8);
115 KLP_MIX(a,b,c);
116 k += 12; len -= 12;
118 /*------------------------------------- handle the last 11 bytes */
119 c += length;
120 switch (len) { /* all the case statements fall through */
121 case 11: c+=((uint32_t)k[10]<<24);
122 case 10: c+=((uint32_t)k[9]<<16);
123 case 9 : c+=((uint32_t)k[8]<<8);
124 /* the first byte of c is reserved for the length */
125 case 8 : b+=((uint32_t)k[7]<<24);
126 case 7 : b+=((uint32_t)k[6]<<16);
127 case 6 : b+=((uint32_t)k[5]<<8);
128 case 5 : b+=k[4];
129 case 4 : a+=((uint32_t)k[3]<<24);
130 case 3 : a+=((uint32_t)k[2]<<16);
131 case 2 : a+=((uint32_t)k[1]<<8);
132 case 1 : a+=k[0];
133 /* case 0: nothing left to add */
135 KLP_MIX(a,b,c);
137 return c;
142 static uint32_t HashBuffer (const char *k, int length) {
143 return hashBobJenkinsUpdate (k, length, 0);
148 static uint32_t HashString (const char *s) {
149 return hashBobJenkinsUpdate (s, strlen(s), 0);
153 /*static TKLispRBTNode *rbtNodePool = NULL;
154 static int rbtNodeCount = 0;*/
155 static TKLispRBTNode *rbtNodeFree = NULL;
158 #define KLispRBTIsRedNode(root) \
159 ( (root) != NULL && ((root)->red) )
162 void KLispRBTCleanup (void) {
163 TKLispRBTNode *node;
164 /*int f;*/
166 /*if (!rbtNodePool) return;
167 for (f = 0; f < rbtNodeCount; f++) rbtNodePool[f].value1 = 0;*/
168 while (rbtNodeFree) {
169 node = rbtNodeFree->nextFree;
170 KLISP_MEMFREE(rbtNodeFree);
171 rbtNodeFree = node;
173 /*for (f = 0; f < rbtNodeCount; f++) {
174 if (rbtNodePool[f].value1 != -666) continue;
175 if (rbtNodePool[f].freeIt && rbtNodePool[f].str) KLISP_MEMFREE(rbtNodePool[f].str);
177 KLISP_MEMFREE(rbtNodePool);
178 rbtNodePool = NULL;
179 rbtNodeCount = 0; rbtNodeFree = NULL;*/
183 char *KLispStrDup (const char *str) {
184 char *s;
186 if (str) s = KLISP_MEMALLOC((strlen(str)+1)*sizeof(char)); else s = KLISP_MEMALLOC(sizeof(char));
187 if (!s) return NULL;
188 if (str) strcpy(s, str); else s[0] = '\0';
190 return s;
194 static void RBTI_FreeNode (TKLispRBTNode *node) {
195 if (!node) return;
196 if (node->freeIt) {
197 assert(node->str);
198 KLISP_MEMFREE(node->str);
200 /*KLISP_MEMFREE(node);*/
201 node->str = NULL;
202 rbtNodeFree = node->nextFree;
206 static void RBTI_GrowPool (void) {
207 /* note that nextFree pointers are invalid after the growth! */
208 TKLispRBTNode *rn;
209 int f;
210 /*int newSz;*/
212 /*assert(!rbtNodeFree);*/
213 /* mark all free nodes */
214 /*newSz = rbtNodeCount+8192;
215 rn = KLISP_MEMREALLOC(rbtNodePool, (newSz+1)*sizeof(TKLispRBTNode));
216 if (!rn) { fprintf(stderr, "***memory!\n"); abort(); }
217 rbtNodePool = rn;
218 for (f = rbtNodeCount; f < newSz; f++) {
219 rbtNodePool[f].nextFree = &(rbtNodePool[f+1]);
221 rbtNodePool[newSz-1].nextFree = NULL;
222 rbtNodeFree = &(rbtNodePool[rbtNodeCount]);
223 rbtNodeCount = newSz;*/
224 for (f = 512; f; f--) {
225 rn = KLISP_MEMALLOC(sizeof(TKLispRBTNode));
226 if (!rn) return;
227 rn->nextFree = rbtNodeFree;
228 rbtNodeFree = rn;
233 static TKLispRBTNode *RBTI_NewNode (const char *str, int value0, int value1, int freeIt) {
234 TKLispRBTNode *rn;
236 assert(str);
237 /* no free nodes; realloc pool */
238 if (!rbtNodeFree) RBTI_GrowPool();
239 /* get free node */
240 assert(rbtNodeFree);
241 rn = rbtNodeFree;
242 rbtNodeFree = rn->nextFree;
243 rn->nextFree = NULL;
244 if (rn != NULL) {
245 /*memset(rn, 0, sizeof(TKLispRBTNode));*/
246 rn->red = 1; /* 1 is red, 0 is black */
247 rn->link[0] = rn->link[1] = NULL;
248 if (!freeIt) rn->str = (char *)str;
249 else {
250 if (!(rn->str = KLispStrDup(str))) {
251 /*KLISP_MEMFREE(rn);*/
252 RBTI_FreeNode(rn);
253 return NULL;
256 rn->value0 = value0; rn->value1 = value1;
257 rn->freeIt = freeIt;
258 rn->hash = HashString(rn->str);
261 return rn;
265 static void RBTI_ClearTreeNodes (TKLispRBTNode *node) {
266 if (!node) return;
267 RBTI_ClearTreeNodes(node->link[0]);
268 RBTI_ClearTreeNodes(node->link[1]);
269 RBTI_FreeNode(node);
273 void KLispRBTClearTree (TKLispRBTree *tree) {
274 if (!tree || !tree->root) return;
275 RBTI_ClearTreeNodes(tree->root);
276 tree->root = NULL; tree->nodeCount = 0;
280 void KLispRBTInitTree (TKLispRBTree *tree) {
281 memset(tree, 0, sizeof(TKLispRBTree));
282 tree->root = NULL; tree->nodeCount = 0;
286 TKLispRBTree *KLispRBTNewTree (void) {
287 TKLispRBTree *tree;
289 tree = KLISP_MEMALLOC(sizeof(TKLispRBTree));
290 if (!tree) return NULL;
291 memset(tree, 0, sizeof(TKLispRBTree));
292 KLispRBTInitTree(tree);
294 return tree;
298 void KLispRBTFreeTree (TKLispRBTree *tree) {
299 if (!tree) return;
300 KLispRBTClearTree(tree);
301 KLISP_MEMFREE(tree);
305 static TKLispRBTNode *KLispRBTSingleRotation (TKLispRBTNode *root, int dir) {
306 TKLispRBTNode *save;
308 save = root->link[!dir];
309 root->link[!dir] = save->link[dir];
310 save->link[dir] = root;
311 root->red = 1;
312 save->red = 0;
314 return save;
318 static TKLispRBTNode *KLispRBTDoubleRotation (TKLispRBTNode *root, int dir) {
319 root->link[!dir] = KLispRBTSingleRotation(root->link[!dir], !dir);
321 return KLispRBTSingleRotation(root, dir);
325 /* dataptr: pointer to data; data will be copied */
326 /* newnode will be non-zero, if new node was generated; newnode can be NULL */
327 TKLispRBTNode *KLispRBTInsert (TKLispRBTree *tree, const char *str, int value0, int value1, int freeIt, int *newnode) {
328 TKLispRBTNode *res = NULL;
329 TKLispRBTNode head = {0}; /* false tree root */
330 TKLispRBTNode *g, *t; /* grandparent & parent */
331 TKLispRBTNode *p, *q; /* iterator & parent */
332 int dir = 0, last = -1, cmp, dir2;
333 uint32_t strhash = HashString(str), qhash;
335 if (tree->root == NULL) {
336 /* empty tree case */
337 if (newnode) *newnode = 1;
338 tree->root = RBTI_NewNode(str, value0, value1, freeIt);
339 if (tree->root) {
340 /* make root black */
341 tree->root->red = 0;
342 (tree->nodeCount)++;
344 return tree->root;
347 if (newnode) *newnode = 0;
348 /* set up helpers */
349 t = &head;
350 g = p = NULL;
351 q = t->link[1] = tree->root;
352 /* search down the tree */
353 for (;;) {
354 if (q == NULL) {
355 /* insert new node at the bottom */
356 p->link[dir] = q = RBTI_NewNode(str, value0, value1, freeIt);
357 if (q == NULL) return NULL;
358 if (newnode) *newnode = 1;
359 res = q;
360 (tree->nodeCount)++;
361 } else if (KLispRBTIsRedNode(q->link[0]) && KLispRBTIsRedNode(q->link[1])) {
362 /* color flip */
363 q->red = 1;
364 q->link[0]->red = 0;
365 q->link[1]->red = 0;
367 /* fix red violation */
368 if (KLispRBTIsRedNode(q) && KLispRBTIsRedNode(p)) {
369 dir2 = t->link[1] == g;
370 if (q == p->link[last]) t->link[dir2] = KLispRBTSingleRotation(g, !last);
371 else t->link[dir2] = KLispRBTDoubleRotation(g, !last);
373 /* stop if found */
374 qhash = q->hash; cmp = (qhash == strhash)?strcmp(q->str, str):(qhash<strhash?-1:1);
375 if (!cmp) {
376 if (!res) res = q;
377 break;
379 last = dir; dir = cmp<0?1:0;
380 /* update helpers */
381 if (g != NULL) t = g;
382 g = p, p = q;
383 q = q->link[dir];
385 assert(res);
386 /* update root */
387 tree->root = head.link[1];
388 /* make root black */
389 tree->root->red = 0;
391 #ifdef KLISP_RBT_DEBUG
392 KLispRBTAssert(tree->root, "insert");
393 #endif
394 return res;
398 #define KLISP_RBT_SWAP(fld,tmp) {tmp=found->fld;found->fld=q->fld;q->fld=tmp;}
399 int KLispRBTDelete (TKLispRBTree *tree, const char *str) {
400 /*TKLispRBTNode *nodetokill = NULL;*/
401 TKLispRBTNode head = {0}; /* false tree root */
402 TKLispRBTNode *q, *p, *g, *s; /* helpers */
403 TKLispRBTNode *found = NULL; /* found item */
404 int dir = 1, cmp, last, dir2;
405 char *tmpstr;
406 uint32_t strhash = HashString(str), qhash;
408 #ifdef KLISP_RBT_DEBUG_DEL
409 fprintf(stderr, "delete: '%s'\n", str);
410 #endif
411 if (tree->root != NULL) {
412 /* set up helpers */
413 q = &head;
414 g = p = NULL;
415 q->link[1] = tree->root;
416 /* search and push a red down */
417 while (q->link[dir] != NULL) {
418 last = dir;
419 /* update helpers */
420 g = p, p = q;
421 q = q->link[dir];
422 /*!!!cmp = strcmp(q->str, str);*/
423 qhash = q->hash; cmp = (qhash == strhash)?strcmp(q->str, str):(qhash<strhash?-1:1);
424 /* save found node */
425 if (!cmp) { assert(!found); found = q; }
426 /* new dir */
427 dir = cmp<0?1:0;
428 /* push the red node down */
429 if (!KLispRBTIsRedNode(q) && !KLispRBTIsRedNode(q->link[dir])) {
430 if (KLispRBTIsRedNode(q->link[!dir])) p = p->link[last] = KLispRBTSingleRotation(q, dir);
431 else if (!KLispRBTIsRedNode(q->link[!dir])) {
432 s = p->link[!last];
433 if (s != NULL) {
434 if (!KLispRBTIsRedNode(s->link[!last]) && !KLispRBTIsRedNode(s->link[last])) {
435 /* color flip */
436 p->red = 0;
437 s->red = 1;
438 q->red = 1;
439 } else {
440 dir2 = g->link[1] == p;
441 if (KLispRBTIsRedNode(s->link[last])) g->link[dir2] = KLispRBTDoubleRotation(p, last);
442 else if (KLispRBTIsRedNode(s->link[!last])) g->link[dir2] = KLispRBTSingleRotation(p, last);
443 /* ensure correct coloring */
444 q->red = g->link[dir2]->red = 1;
445 g->link[dir2]->link[0]->red = 0;
446 g->link[dir2]->link[1]->red = 0;
452 /* replace and remove if found */
453 if (found != NULL) {
454 #ifdef KLISP_RBT_DEBUG_DEL
455 fprintf(stderr, "found: '%s'\n", found->str);
456 fprintf(stderr, "q: '%s'\n", q->str);
457 #endif
458 p->link[p->link[1] == q] = q->link[q->link[0] == NULL];
459 /* swap data for found node and q */
460 KLISP_RBT_SWAP(freeIt, last);
461 KLISP_RBT_SWAP(str, tmpstr);;
462 found->value0 = q->value0;
463 found->value1 = q->value1;
464 found->hash = q->hash;
465 found = q;
466 /*found->str = q->str;*/
467 /*nodetokill = q;*/
469 /* Update root and make it black */
470 tree->root = head.link[1];
471 if (tree->root != NULL) tree->root->red = 0;
473 if (found) {
474 #ifdef KLISP_RBT_DEBUG_DEL
475 fprintf(stderr, "killing: '%s'\n", found->str);
476 #endif
477 RBTI_FreeNode(found);
478 (tree->nodeCount)--;
479 #ifdef KLISP_RBT_DEBUG
480 KLispRBTAssert(tree->root, "delete");
481 #endif
483 return 1;
486 return 0;
490 TKLispRBTNode *KLispRBTFind (TKLispRBTree *tree, const char *str) {
491 TKLispRBTNode *res = tree->root;
492 int cmp;
493 int cnt = 0;
494 uint32_t strhash = HashString(str), qhash;
496 while (res) {
497 /*!!!cmp = strcmp(res->str, str);*/
498 qhash = res->hash; cmp = (qhash == strhash)?strcmp(res->str, str):(qhash<strhash?-1:1);
499 if (!cmp) break;
500 res = res->link[cmp<0?1:0];
501 cnt++;
502 if (cnt > tree->nodeCount) {
503 fprintf(stderr, "KLispRBTFind: DIA!\n");
504 abort();
508 return res;
512 void KLispRBTInitWalker (TKLispRBTree *tree, TKLispRBTWalker *walker) {
513 if (!tree) return;
514 memset(walker, 0, sizeof(TKLispRBTWalker));
515 walker->tree = tree; walker->curnode = tree->root;
516 walker->sp = 0; walker->phase = 0;
517 #ifdef KLISP_RBT_DEBUG
518 walker->maxsp = 0;
519 #endif
523 void KLispRBTDeinitWalker (TKLispRBTWalker *wdata) {
527 TKLispRBTWalker *KLispRBTNewWalker (TKLispRBTree *tree) {
528 TKLispRBTWalker *res;
530 if (!tree) return NULL;
531 res = KLISP_MEMALLOC(sizeof(TKLispRBTWalker));
532 if (!res) return NULL;
533 KLispRBTInitWalker(tree, res);
535 return res;
539 void KLispRBTFreeWalker (TKLispRBTWalker *wdata) {
540 if (!wdata) return;
541 KLispRBTDeinitWalker(wdata);
542 KLISP_MEMFREE(wdata);
546 /* DO NOT change the tree while walking with this function! */
547 TKLispRBTNode *KLispRBTWalkerNext (TKLispRBTWalker *wdata) {
548 TKLispRBTNode *node = wdata->curnode;
549 if (!node) return NULL;
550 do {
551 int sp = wdata->sp;
552 switch (wdata->phase) {
553 case 0: /* left node */
554 if (!node->link[0]) {
555 wdata->phase = 2;
556 wdata->curnode = node;
557 return node;
559 if (sp >= KLISP_RBT_WALK_STACK_SIZE) {
560 fprintf(stderr, "stack overflow!\n");
561 abort();
563 wdata->stack[sp].node = node;
564 wdata->stack[sp].phase = 1;
565 (wdata->sp)++;
566 #ifdef KLISP_RBT_DEBUG
567 wdata->maxsp = (wdata->sp>wdata->maxsp)?wdata->sp:wdata->maxsp;
568 #endif
569 node = node->link[0];
570 break;
571 case 1: /* current node */
572 wdata->phase = 2;
573 wdata->curnode = node;
574 return node;
575 case 2: /* right node */
576 if (!node->link[1]) wdata->phase = 3; /* go up */
577 else {
578 /* go down */
579 if (sp >= KLISP_RBT_WALK_STACK_SIZE) {
580 fprintf(stderr, "stack overflow!\n");
581 abort();
583 wdata->stack[sp].node = node;
584 wdata->stack[sp].phase = 3;
585 (wdata->sp)++;
586 #ifdef KLISP_RBT_DEBUG
587 wdata->maxsp = (wdata->sp>wdata->maxsp)?wdata->sp:wdata->maxsp;
588 #endif
589 node = node->link[1];
590 wdata->phase = 0;
592 break;
593 case 3:
594 /* go up */
595 if (!sp) {
596 wdata->curnode = NULL;
597 return NULL;
599 (wdata->sp)--; sp--;
600 wdata->phase = wdata->stack[sp].phase;
601 node = wdata->stack[sp].node;
602 break;
604 } while (1);
606 return NULL;
610 #ifdef KLISP_RBT_DEBUG
611 /* debug */
613 If KLispRBTAssert returns 0, the tree is an invalid red black tree.
614 Otherwise it will return the black height of the entire tree.
615 For convenience, a message will also print out telling you which violations occurred. :-)
617 /* uses: KLispRBTIsRedNode */
618 int KLispRBTAssert (TKLispRBTNode *root, const char *msg) {
619 TKLispRBTNode *ln, *rn;
620 int lh, rh;
621 int cmp;
623 if (!root) return 1;
625 ln = root->link[0];
626 rn = root->link[1];
627 /* Consecutive red links */
628 if (KLispRBTIsRedNode(root)) {
629 if (KLispRBTIsRedNode(ln) || KLispRBTIsRedNode(rn)) {
630 fprintf(stderr, "%s", "red violation\n");
631 /*return 0;*/
632 abort();
636 lh = KLispRBTAssert(ln, msg); rh = KLispRBTAssert(rn, msg);
638 /* Invalid binary search tree */
639 if (ln) {
640 cmp = (ln->hash == root->hash)?strcmp(ln->str, root->str):(ln->hash<root->hash?-1:1);
641 if (cmp >= 0) { fprintf(stderr, "%s: %s", msg, "binary tree violation (ln)\n"); abort(); }
643 if (rn) {
644 cmp = (rn->hash == root->hash)?strcmp(rn->str, root->str):(rn->hash<root->hash?-1:1);
645 if (cmp <= 0) { fprintf(stderr, "%s: %s", msg, "binary tree violation (rn)\n"); abort(); }
648 if ((ln && strcmp(ln->str, root->str) >= 0) ||
649 (rn && strcmp(rn->str, root->str) <= 0)) {
650 fprintf(stderr, "%s: %s", msg, "binary tree violation\n");
651 abort();
654 /* Black height mismatch */
655 if (lh != 0 && rh != 0 && lh != rh) {
656 fprintf(stderr, "%s: %s", msg, "black violation\n");
657 abort();
658 /*return 0;*/
661 /* Only count black links */
662 if (lh && rh) return KLispRBTIsRedNode(root)?lh:lh+1; else return 0;
664 #endif
667 #undef KLispRBTIsRedNode
670 #endif /* _KLISP_RBTREE_MODULE_ */