Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / mcas / skip_cas_adt.c
blobbf2983ad6f641b02616084bb2dcb9f51f86812c9
1 /******************************************************************************
2 * skip_cas_adt.c
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Matt Benjamin <matt@linuxbox.com>
8 * Adapts MCAS skip list to use a pointer-key and typed comparison
9 * function style (because often, your key isn't an integer).
11 * Also, set_for_each (and future additions) allow a set to be iterated.
12 * Current set_for_each iteration is unordered.
14 * Caution, pointer values 0x0, 0x01, and 0x02 are reserved. Fortunately,
15 * no real pointer is likely to have one of these values.
17 Copyright (c) 2003, Keir Fraser All rights reserved.
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are
21 met:
23 * Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer in the documentation and/or other materials provided
28 * with the distribution. Neither the name of the Keir Fraser
29 * nor the names of its contributors may be used to endorse or
30 * promote products derived from this software without specific
31 * prior written permission.
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 #define __SET_IMPLEMENTATION__
48 #include <stdlib.h>
49 #include <string.h>
50 #include <assert.h>
51 #include "portable_defns.h"
52 #include "ptst.h"
53 #include "set_queue_adt.h"
55 // todo: get rid of me
56 typedef struct {
57 unsigned long key;
58 unsigned long secret_key;
59 } harness_ulong_t;
64 * SKIP LIST
67 typedef struct node_st node_t;
68 typedef struct set_st osi_set_t;
69 typedef VOLATILE node_t *sh_node_pt;
71 struct node_st {
72 int level;
73 // int xxx[1024]; /* XXXX testing gc */
74 #define LEVEL_MASK 0x0ff
75 #define READY_FOR_FREE 0x100
76 setkey_t k;
77 setval_t v;
78 sh_node_pt next[1];
81 typedef int (*osi_set_cmp_func) (const void *lhs, const void *rhs);
83 struct set_st {
84 CACHE_PAD(0);
85 osi_set_cmp_func cmpf;
86 CACHE_PAD(1);
87 node_t head;
88 CACHE_PAD(2);
91 static int gc_id[NUM_LEVELS];
94 * PRIVATE FUNCTIONS
97 #define compare_keys(s, k1, k2) (s->cmpf((const void*) k1, (const void *) k2))
100 * Random level generator. Drop-off rate is 0.5 per level.
101 * Returns value 1 <= level <= NUM_LEVELS.
103 static int
104 get_level(ptst_t * ptst)
106 unsigned long r = rand_next(ptst);
107 int l = 1;
108 r = (r >> 4) & ((1 << (NUM_LEVELS - 1)) - 1);
109 while ((r & 1)) {
110 l++;
111 r >>= 1;
113 return (l);
118 * Allocate a new node, and initialise its @level field.
119 * NB. Initialisation will eventually be pushed into garbage collector,
120 * because of dependent read reordering.
122 static node_t *
123 alloc_node(ptst_t * ptst)
125 int l;
126 node_t *n;
127 l = get_level(ptst);
128 n = gc_alloc(ptst, gc_id[l - 1]);
129 n->level = l;
130 return (n);
134 /* Free a node to the garbage collector. */
135 static void
136 free_node(ptst_t * ptst, sh_node_pt n)
138 gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
143 * Search for first non-deleted node, N, with key >= @k at each level in @l.
144 * RETURN VALUES:
145 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
146 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
147 * MAIN RETURN VALUE: same as @na[0].
149 static sh_node_pt
150 strong_search_predecessors(osi_set_t * l, setkey_t k, sh_node_pt * pa,
151 sh_node_pt * na)
153 sh_node_pt x, x_next, old_x_next, y, y_next;
154 setkey_t y_k;
155 int i;
157 retry:
158 RMB();
160 x = &l->head;
161 for (i = NUM_LEVELS - 1; i >= 0; i--) {
162 /* We start our search at previous level's unmarked predecessor. */
163 READ_FIELD(x_next, x->next[i]);
164 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
165 if (is_marked_ref(x_next))
166 goto retry;
168 for (y = x_next;; y = y_next) {
169 /* Shift over a sequence of marked nodes. */
170 for (;;) {
171 READ_FIELD(y_next, y->next[i]);
172 if (!is_marked_ref(y_next))
173 break;
174 y = get_unmarked_ref(y_next);
177 READ_FIELD(y_k, y->k);
178 if (compare_keys(l, y_k, k) >= 0)
179 break;
181 /* Update estimate of predecessor at this level. */
182 x = y;
183 x_next = y_next;
186 /* Swing forward pointer over any marked nodes. */
187 if (x_next != y) {
188 old_x_next = CASPO(&x->next[i], x_next, y);
189 if (old_x_next != x_next)
190 goto retry;
193 if (pa)
194 pa[i] = x;
195 if (na)
196 na[i] = y;
199 return (y);
203 /* This function does not remove marked nodes. Use it optimistically. */
204 static sh_node_pt
205 weak_search_predecessors(osi_set_t * l, setkey_t k, sh_node_pt * pa,
206 sh_node_pt * na)
208 sh_node_pt x, x_next;
209 setkey_t x_next_k;
210 int i;
212 x = &l->head;
213 for (i = NUM_LEVELS - 1; i >= 0; i--) {
214 for (;;) {
215 READ_FIELD(x_next, x->next[i]);
216 x_next = get_unmarked_ref(x_next);
218 READ_FIELD(x_next_k, x_next->k);
219 if (compare_keys(l, x_next_k, k) >= 0)
220 break;
222 x = x_next;
225 if (pa)
226 pa[i] = x;
227 if (na)
228 na[i] = x_next;
231 return (x_next);
236 * Mark @x deleted at every level in its list from @level down to level 1.
237 * When all forward pointers are marked, node is effectively deleted.
238 * Future searches will properly remove node by swinging predecessors'
239 * forward pointers.
241 static void
242 mark_deleted(sh_node_pt x, int level)
244 sh_node_pt x_next;
246 while (--level >= 0) {
247 x_next = x->next[level];
248 while (!is_marked_ref(x_next)) {
249 x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next));
251 WEAK_DEP_ORDER_WMB(); /* mark in order */
256 static int
257 check_for_full_delete(sh_node_pt x)
259 int level = x->level;
260 return ((level & READY_FOR_FREE) ||
261 (CASIO(&x->level, level, level | READY_FOR_FREE) != level));
265 static void
266 do_full_delete(ptst_t * ptst, osi_set_t * l, sh_node_pt x, int level)
268 setkey_t k = x->k;
269 #ifdef WEAK_MEM_ORDER
270 sh_node_pt preds[NUM_LEVELS];
271 int i = level;
272 retry:
273 (void)strong_search_predecessors(l, k, preds, NULL);
275 * Above level 1, references to @x can disappear if a node is inserted
276 * immediately before and we see an old value for its forward pointer. This
277 * is a conservative way of checking for that situation.
279 if (i > 0)
280 RMB();
281 while (i > 0) {
282 node_t *n = get_unmarked_ref(preds[i]->next[i]);
283 while (compare_keys(l, n->k, k) < 0) {
284 n = get_unmarked_ref(n->next[i]);
285 RMB(); /* we don't want refs to @x to "disappear" */
287 if (n == x)
288 goto retry;
289 i--; /* don't need to check this level again, even if we retry. */
291 #else
292 (void)strong_search_predecessors(l, k, NULL, NULL);
293 #endif
294 free_node(ptst, x);
299 * PUBLIC FUNCTIONS
303 * Called once before any set operations, including set_alloc
305 void
306 _init_osi_cas_skip_subsystem(void)
308 int i;
310 for (i = 0; i < NUM_LEVELS; i++) {
311 gc_id[i] = gc_add_allocator(sizeof(node_t) + i * sizeof(node_t *),
312 "cas_skip_level");
317 osi_set_t *
318 osi_cas_skip_alloc(osi_set_cmp_func cmpf)
320 osi_set_t *l;
321 node_t *n;
322 int i;
324 n = malloc(sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
325 memset(n, 0, sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
326 n->k = (setkey_t *) SENTINEL_KEYMAX;
329 * Set the forward pointers of final node to other than NULL,
330 * otherwise READ_FIELD() will continually execute costly barriers.
331 * Note use of 0xfe -- that doesn't look like a marked value!
333 memset(n->next, 0xfe, NUM_LEVELS * sizeof(node_t *));
335 l = malloc(sizeof(*l) + (NUM_LEVELS - 1) * sizeof(node_t *));
336 l->cmpf = cmpf;
337 l->head.k = (setkey_t *) SENTINEL_KEYMIN;
338 l->head.level = NUM_LEVELS;
339 for (i = 0; i < NUM_LEVELS; i++) {
340 l->head.next[i] = n;
343 return (l);
347 setval_t
348 osi_cas_skip_update(osi_set_t * l, setkey_t k, setval_t v, int overwrite)
350 setval_t ov, new_ov;
351 ptst_t *ptst;
352 sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
353 sh_node_pt pred, succ, new = NULL, new_next, old_next;
354 int i, level;
356 ptst = critical_enter();
358 succ = weak_search_predecessors(l, k, preds, succs);
360 retry:
361 ov = NULL;
363 if (compare_keys(l, succ->k, k) == 0) {
364 /* Already a @k node in the list: update its mapping. */
365 new_ov = succ->v;
366 do {
367 if ((ov = new_ov) == NULL) {
368 /* Finish deleting the node, then retry. */
369 READ_FIELD(level, succ->level);
370 mark_deleted(succ, level & LEVEL_MASK);
371 succ = strong_search_predecessors(l, k, preds, succs);
372 goto retry;
374 } while (overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov));
376 if (new != NULL)
377 free_node(ptst, new);
378 goto out;
380 #ifdef WEAK_MEM_ORDER
381 /* Free node from previous attempt, if this is a retry. */
382 if (new != NULL) {
383 free_node(ptst, new);
384 new = NULL;
386 #endif
388 /* Not in the list, so initialise a new node for insertion. */
389 if (new == NULL) {
390 new = alloc_node(ptst);
391 new->k = k;
392 new->v = v;
394 level = new->level;
396 /* If successors don't change, this saves us some CAS operations. */
397 for (i = 0; i < level; i++) {
398 new->next[i] = succs[i];
401 /* We've committed when we've inserted at level 1. */
402 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
403 old_next = CASPO(&preds[0]->next[0], succ, new);
404 if (old_next != succ) {
405 succ = strong_search_predecessors(l, k, preds, succs);
406 goto retry;
409 /* Insert at each of the other levels in turn. */
410 i = 1;
411 while (i < level) {
412 pred = preds[i];
413 succ = succs[i];
415 /* Someone *can* delete @new under our feet! */
416 new_next = new->next[i];
417 if (is_marked_ref(new_next))
418 goto success;
420 /* Ensure forward pointer of new node is up to date. */
421 if (new_next != succ) {
422 old_next = CASPO(&new->next[i], new_next, succ);
423 if (is_marked_ref(old_next))
424 goto success;
425 assert(old_next == new_next);
428 /* Ensure we have unique key values at every level. */
429 if (compare_keys(l, succ->k, k) == 0)
430 goto new_world_view;
432 assert((compare_keys(l, pred->k, k) < 0) &&
433 (compare_keys(l, succ->k, k) > 0));
435 /* Replumb predecessor's forward pointer. */
436 old_next = CASPO(&pred->next[i], succ, new);
437 if (old_next != succ) {
438 new_world_view:
439 RMB(); /* get up-to-date view of the world. */
440 (void)strong_search_predecessors(l, k, preds, succs);
441 continue;
444 /* Succeeded at this level. */
445 i++;
448 success:
449 /* Ensure node is visible at all levels before punting deletion. */
450 WEAK_DEP_ORDER_WMB();
451 if (check_for_full_delete(new)) {
452 MB(); /* make sure we see all marks in @new. */
453 do_full_delete(ptst, l, new, level - 1);
455 out:
456 critical_exit(ptst);
457 return (ov);
460 setval_t
461 osi_cas_skip_remove(osi_set_t * l, setkey_t k)
463 setval_t v = NULL, new_v;
464 ptst_t *ptst;
465 sh_node_pt preds[NUM_LEVELS], x;
466 int level, i;
468 ptst = critical_enter();
470 x = weak_search_predecessors(l, k, preds, NULL);
471 if (compare_keys(l, x->k, k) > 0)
472 goto out;
474 READ_FIELD(level, x->level);
475 level = level & LEVEL_MASK;
477 /* Once we've marked the value field, the node is effectively deleted. */
478 new_v = x->v;
479 do {
480 v = new_v;
481 if (v == NULL)
482 goto out;
483 } while ((new_v = CASPO(&x->v, v, NULL)) != v);
485 /* Committed to @x: mark lower-level forward pointers. */
486 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
487 mark_deleted(x, level);
490 * We must swing predecessors' pointers, or we can end up with
491 * an unbounded number of marked but not fully deleted nodes.
492 * Doing this creates a bound equal to number of threads in the system.
493 * Furthermore, we can't legitimately call 'free_node' until all shared
494 * references are gone.
496 for (i = level - 1; i >= 0; i--) {
497 if (CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x) {
498 if ((i != (level - 1)) || check_for_full_delete(x)) {
499 MB(); /* make sure we see node at all levels. */
500 do_full_delete(ptst, l, x, i);
502 goto out;
506 free_node(ptst, x);
508 out:
509 critical_exit(ptst);
510 return (v);
514 setval_t
515 osi_cas_skip_lookup(osi_set_t * l, setkey_t k)
517 setval_t v = NULL;
518 ptst_t *ptst;
519 sh_node_pt x;
521 ptst = critical_enter();
523 x = weak_search_predecessors(l, k, NULL, NULL);
524 if (compare_keys(l, x->k, k) == 0)
525 READ_FIELD(v, x->v);
527 critical_exit(ptst);
528 return (v);
532 /* Hybrid Set/Queue Operations (Matt) */
534 /* Iterate over a sequential structure, calling callback_func
535 * on each (undeleted) element visited. */
537 /* Each-element function passed to set_for_each */
538 typedef void (*osi_set_each_func) (osi_set_t * l, setval_t v, void *arg);
539 void
540 osi_cas_skip_for_each(osi_set_t * l, osi_set_each_func each_func, void *arg)
542 sh_node_pt x, y, x_next, old_x_next;
543 setkey_t x_next_k;
544 ptst_t *ptst;
545 int i;
547 ptst = critical_enter();
549 x = &l->head;
550 for (i = NUM_LEVELS - 1; i >= 0; i--) {
551 /* must revisit x at each level to see all
552 * nodes in the structure */
553 y = x;
555 for (;;) {
556 READ_FIELD(x_next, y->next[i]);
557 x_next = get_unmarked_ref(x_next);
559 READ_FIELD(x_next_k, x_next->k);
560 if (x_next_k == (setkey_t) SENTINEL_KEYMAX)
561 break;
563 /* in our variation, a (stored) k is a v,
564 * ie, x_next_k is x_next_v */
565 each_func(l, x_next_k, arg);
567 y = x_next;
571 critical_exit(ptst);