Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / mcas / skip_cas_func.c
blobdaeced36774a0bb8f4f56ae6afe3337aca443ba4
1 /******************************************************************************
2 * skip_cas_func.c
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Matt Benjamin <matt@linuxbox.com>
8 * Adapts MCAS skip list to use a pointer-key and typed comparison
9 * function style (because often, your key isn't an integer).
11 * Caution, pointer values 0x0, 0x01, and 0x02 are reserved. Fortunately,
12 * no real pointer is likely to have one of these values.
14 Copyright (c) 2003, Keir Fraser All rights reserved.
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are
18 met:
20 * Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials provided
25 * with the distribution. Neither the name of the Keir Fraser
26 * nor the names of its contributors may be used to endorse or
27 * promote products derived from this software without specific
28 * prior written permission.
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #define __SET_IMPLEMENTATION__
45 #include <stdlib.h>
46 #include <string.h>
47 #include <assert.h>
48 #include "portable_defns.h"
49 #include "ptst.h"
50 #include "set_func.h"
54 * SKIP LIST
57 typedef struct node_st node_t;
58 typedef struct set_st set_t;
59 typedef VOLATILE node_t *sh_node_pt;
61 struct node_st {
62 int level;
63 #define LEVEL_MASK 0x0ff
64 #define READY_FOR_FREE 0x100
65 setkey_t k;
66 setval_t v;
67 sh_node_pt next[1];
70 struct set_st {
71 node_t head;
72 int (*cmpf) (const void *, const void *);
75 static int gc_id[NUM_LEVELS];
78 * PRIVATE FUNCTIONS
81 #define compare_keys(s, k1, k2) (s->cmpf(k1, k2));
84 * Random level generator. Drop-off rate is 0.5 per level.
85 * Returns value 1 <= level <= NUM_LEVELS.
87 static int
88 get_level(ptst_t * ptst)
90 unsigned long r = rand_next(ptst);
91 int l = 1;
92 r = (r >> 4) & ((1 << (NUM_LEVELS - 1)) - 1);
93 while ((r & 1)) {
94 l++;
95 r >>= 1;
97 return (l);
102 * Allocate a new node, and initialise its @level field.
103 * NB. Initialisation will eventually be pushed into garbage collector,
104 * because of dependent read reordering.
106 static node_t *
107 alloc_node(ptst_t * ptst)
109 int l;
110 node_t *n;
111 l = get_level(ptst);
112 n = gc_alloc(ptst, gc_id[l - 1]);
113 n->level = l;
114 return (n);
118 /* Free a node to the garbage collector. */
119 static void
120 free_node(ptst_t * ptst, sh_node_pt n)
122 gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
127 * Search for first non-deleted node, N, with key >= @k at each level in @l.
128 * RETURN VALUES:
129 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
130 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
131 * MAIN RETURN VALUE: same as @na[0].
133 static sh_node_pt
134 strong_search_predecessors(set_t * l, setkey_t k, sh_node_pt * pa,
135 sh_node_pt * na)
137 sh_node_pt x, x_next, old_x_next, y, y_next;
138 setkey_t y_k;
139 int i;
141 retry:
142 RMB();
144 x = &l->head;
145 for (i = NUM_LEVELS - 1; i >= 0; i--) {
146 /* We start our search at previous level's unmarked predecessor. */
147 READ_FIELD(x_next, x->next[i]);
148 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
149 if (is_marked_ref(x_next))
150 goto retry;
152 for (y = x_next;; y = y_next) {
153 /* Shift over a sequence of marked nodes. */
154 for (;;) {
155 READ_FIELD(y_next, y->next[i]);
156 if (!is_marked_ref(y_next))
157 break;
158 y = get_unmarked_ref(y_next);
161 READ_FIELD(y_k, y->k);
162 if (compare_keys(l, y_k, k) >= 0)
163 break;
165 /* Update estimate of predecessor at this level. */
166 x = y;
167 x_next = y_next;
170 /* Swing forward pointer over any marked nodes. */
171 if (x_next != y) {
172 old_x_next = CASPO(&x->next[i], x_next, y);
173 if (old_x_next != x_next)
174 goto retry;
177 if (pa)
178 pa[i] = x;
179 if (na)
180 na[i] = y;
183 return (y);
187 /* This function does not remove marked nodes. Use it optimistically. */
188 static sh_node_pt
189 weak_search_predecessors(set_t * l, setkey_t k, sh_node_pt * pa,
190 sh_node_pt * na)
192 sh_node_pt x, x_next;
193 setkey_t x_next_k;
194 int i;
196 x = &l->head;
197 for (i = NUM_LEVELS - 1; i >= 0; i--) {
198 for (;;) {
199 READ_FIELD(x_next, x->next[i]);
200 x_next = get_unmarked_ref(x_next);
202 READ_FIELD(x_next_k, x_next->k);
203 if (compare_keys(l, x_next_k, k) >= 0)
204 break;
206 x = x_next;
209 if (pa)
210 pa[i] = x;
211 if (na)
212 na[i] = x_next;
215 return (x_next);
220 * Mark @x deleted at every level in its list from @level down to level 1.
221 * When all forward pointers are marked, node is effectively deleted.
222 * Future searches will properly remove node by swinging predecessors'
223 * forward pointers.
225 static void
226 mark_deleted(sh_node_pt x, int level)
228 sh_node_pt x_next;
230 while (--level >= 0) {
231 x_next = x->next[level];
232 while (!is_marked_ref(x_next)) {
233 x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next));
235 WEAK_DEP_ORDER_WMB(); /* mark in order */
240 static int
241 check_for_full_delete(sh_node_pt x)
243 int level = x->level;
244 return ((level & READY_FOR_FREE) ||
245 (CASIO(&x->level, level, level | READY_FOR_FREE) != level));
249 static void
250 do_full_delete(ptst_t * ptst, set_t * l, sh_node_pt x, int level)
252 int k = x->k;
253 #ifdef WEAK_MEM_ORDER
254 sh_node_pt preds[NUM_LEVELS];
255 int i = level;
256 retry:
257 (void)strong_search_predecessors(l, k, preds, NULL);
259 * Above level 1, references to @x can disappear if a node is inserted
260 * immediately before and we see an old value for its forward pointer. This
261 * is a conservative way of checking for that situation.
263 if (i > 0)
264 RMB();
265 while (i > 0) {
266 node_t *n = get_unmarked_ref(preds[i]->next[i]);
267 while (compare_keys(l, n->k, k) < 0) {
268 n = get_unmarked_ref(n->next[i]);
269 RMB(); /* we don't want refs to @x to "disappear" */
271 if (n == x)
272 goto retry;
273 i--; /* don't need to check this level again, even if we retry. */
275 #else
276 (void)strong_search_predecessors(l, k, NULL, NULL);
277 #endif
278 free_node(ptst, x);
283 * PUBLIC FUNCTIONS
286 set_t *
287 set_alloc(int (*cmpf) (const void *, const void *))
289 set_t *l;
290 node_t *n;
291 int i;
293 n = malloc(sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
294 memset(n, 0, sizeof(*n) + (NUM_LEVELS - 1) * sizeof(node_t *));
295 n->k = SENTINEL_KEYMAX;
298 * Set the forward pointers of final node to other than NULL,
299 * otherwise READ_FIELD() will continually execute costly barriers.
300 * Note use of 0xfe -- that doesn't look like a marked value!
302 memset(n->next, 0xfe, NUM_LEVELS * sizeof(node_t *));
304 l = malloc(sizeof(*l) + (NUM_LEVELS - 1) * sizeof(node_t *));
305 l->cmpf = cmpf;
306 l->head.k = SENTINEL_KEYMIN;
307 l->head.level = NUM_LEVELS;
308 for (i = 0; i < NUM_LEVELS; i++) {
309 l->head.next[i] = n;
312 return (l);
316 setval_t
317 set_update(set_t * l, setkey_t k, setval_t v, int overwrite)
319 setval_t ov, new_ov;
320 ptst_t *ptst;
321 sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
322 sh_node_pt pred, succ, new = NULL, new_next, old_next;
323 int i, level;
325 ptst = critical_enter();
327 succ = weak_search_predecessors(l, k, preds, succs);
329 retry:
330 ov = NULL;
332 if (compare_keys(l, succ->k, k) == 0) {
333 /* Already a @k node in the list: update its mapping. */
334 new_ov = succ->v;
335 do {
336 if ((ov = new_ov) == NULL) {
337 /* Finish deleting the node, then retry. */
338 READ_FIELD(level, succ->level);
339 mark_deleted(succ, level & LEVEL_MASK);
340 succ = strong_search_predecessors(l, k, preds, succs);
341 goto retry;
343 } while (overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov));
345 if (new != NULL)
346 free_node(ptst, new);
347 goto out;
349 #ifdef WEAK_MEM_ORDER
350 /* Free node from previous attempt, if this is a retry. */
351 if (new != NULL) {
352 free_node(ptst, new);
353 new = NULL;
355 #endif
357 /* Not in the list, so initialise a new node for insertion. */
358 if (new == NULL) {
359 new = alloc_node(ptst);
360 new->k = k;
361 new->v = v;
363 level = new->level;
365 /* If successors don't change, this saves us some CAS operations. */
366 for (i = 0; i < level; i++) {
367 new->next[i] = succs[i];
370 /* We've committed when we've inserted at level 1. */
371 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
372 old_next = CASPO(&preds[0]->next[0], succ, new);
373 if (old_next != succ) {
374 succ = strong_search_predecessors(l, k, preds, succs);
375 goto retry;
378 /* Insert at each of the other levels in turn. */
379 i = 1;
380 while (i < level) {
381 pred = preds[i];
382 succ = succs[i];
384 /* Someone *can* delete @new under our feet! */
385 new_next = new->next[i];
386 if (is_marked_ref(new_next))
387 goto success;
389 /* Ensure forward pointer of new node is up to date. */
390 if (new_next != succ) {
391 old_next = CASPO(&new->next[i], new_next, succ);
392 if (is_marked_ref(old_next))
393 goto success;
394 assert(old_next == new_next);
397 /* Ensure we have unique key values at every level. */
398 if (compare_keys(l, succ->k, k) == 0)
399 goto new_world_view;
401 assert((compare_keys(l, pred->k, k) < 0) &&
402 (compare_keys(l, succ->k, k) > 0));
404 /* Replumb predecessor's forward pointer. */
405 old_next = CASPO(&pred->next[i], succ, new);
406 if (old_next != succ) {
407 new_world_view:
408 RMB(); /* get up-to-date view of the world. */
409 (void)strong_search_predecessors(l, k, preds, succs);
410 continue;
413 /* Succeeded at this level. */
414 i++;
417 success:
418 /* Ensure node is visible at all levels before punting deletion. */
419 WEAK_DEP_ORDER_WMB();
420 if (check_for_full_delete(new)) {
421 MB(); /* make sure we see all marks in @new. */
422 do_full_delete(ptst, l, new, level - 1);
424 out:
425 critical_exit(ptst);
426 return (ov);
430 setval_t
431 set_remove(set_t * l, setkey_t k)
433 setval_t v = NULL, new_v;
434 ptst_t *ptst;
435 sh_node_pt preds[NUM_LEVELS], x;
436 int level, i;
438 ptst = critical_enter();
440 x = weak_search_predecessors(l, k, preds, NULL);
441 if (compare_keys(l, x->k, k) > 0)
442 goto out;
444 READ_FIELD(level, x->level);
445 level = level & LEVEL_MASK;
447 /* Once we've marked the value field, the node is effectively deleted. */
448 new_v = x->v;
449 do {
450 v = new_v;
451 if (v == NULL)
452 goto out;
453 } while ((new_v = CASPO(&x->v, v, NULL)) != v);
455 /* Committed to @x: mark lower-level forward pointers. */
456 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
457 mark_deleted(x, level);
460 * We must swing predecessors' pointers, or we can end up with
461 * an unbounded number of marked but not fully deleted nodes.
462 * Doing this creates a bound equal to number of threads in the system.
463 * Furthermore, we can't legitimately call 'free_node' until all shared
464 * references are gone.
466 for (i = level - 1; i >= 0; i--) {
467 if (CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x) {
468 if ((i != (level - 1)) || check_for_full_delete(x)) {
469 MB(); /* make sure we see node at all levels. */
470 do_full_delete(ptst, l, x, i);
472 goto out;
476 free_node(ptst, x);
478 out:
479 critical_exit(ptst);
480 return (v);
484 setval_t
485 set_lookup(set_t * l, setkey_t k)
487 setval_t v = NULL;
488 ptst_t *ptst;
489 sh_node_pt x;
491 ptst = critical_enter();
493 x = weak_search_predecessors(l, k, NULL, NULL);
494 if (compare_keys(l, x->k, k) == 0)
495 READ_FIELD(v, x->v);
497 critical_exit(ptst);
498 return (v);
502 void
503 _init_set_subsystem(void)
505 int i;
507 for (i = 0; i < NUM_LEVELS; i++) {
508 gc_id[i] = gc_add_allocator(sizeof(node_t) + i * sizeof(node_t *));