Update NEWS for 1.6.22
[pkg-k5-afs_openafs.git] / src / mcas / skip_cas.c
blob6b3c64afd0ebf1594e311f76dcaaec26857454c2
1 /******************************************************************************
2 * skip_cas.c
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Copyright (c) 2001-2003, K A Fraser
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
12 * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution. Neither the name of the Keir Fraser
18 * nor the names of its contributors may be used to endorse or
19 * promote products derived from this software without specific
20 * prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #define __SET_IMPLEMENTATION__
37 #include <stdlib.h>
38 #include <string.h>
39 #include <assert.h>
40 #include "portable_defns.h"
41 #include "ptst.h"
42 #include "set.h"
46 * SKIP LIST
49 typedef struct node_st node_t;
50 typedef struct set_st set_t;
51 typedef VOLATILE node_t *sh_node_pt;
53 struct node_st
55 int level;
56 #define LEVEL_MASK 0x0ff
57 #define READY_FOR_FREE 0x100
58 setkey_t k;
59 setval_t v;
60 sh_node_pt next[1];
63 struct set_st
65 node_t head;
68 static int gc_id[NUM_LEVELS];
71 * PRIVATE FUNCTIONS
75 * Random level generator. Drop-off rate is 0.5 per level.
76 * Returns value 1 <= level <= NUM_LEVELS.
78 static int get_level(ptst_t *ptst)
80 unsigned long r = rand_next(ptst);
81 int l = 1;
82 r = (r >> 4) & ((1 << (NUM_LEVELS-1)) - 1);
83 while ( (r & 1) ) { l++; r >>= 1; }
84 return(l);
89 * Allocate a new node, and initialise its @level field.
90 * NB. Initialisation will eventually be pushed into garbage collector,
91 * because of dependent read reordering.
93 static node_t *alloc_node(ptst_t *ptst)
95 int l;
96 node_t *n;
97 l = get_level(ptst);
98 n = gc_alloc(ptst, gc_id[l - 1]);
99 n->level = l;
100 return(n);
104 /* Free a node to the garbage collector. */
105 static void free_node(ptst_t *ptst, sh_node_pt n)
107 gc_free(ptst, (void *)n, gc_id[(n->level & LEVEL_MASK) - 1]);
112 * Search for first non-deleted node, N, with key >= @k at each level in @l.
113 * RETURN VALUES:
114 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
115 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
116 * MAIN RETURN VALUE: same as @na[0].
118 static sh_node_pt strong_search_predecessors(
119 set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
121 sh_node_pt x, x_next, old_x_next, y, y_next;
122 setkey_t y_k;
123 int i;
125 retry:
126 RMB();
128 x = &l->head;
129 for ( i = NUM_LEVELS - 1; i >= 0; i-- )
131 /* We start our search at previous level's unmarked predecessor. */
132 READ_FIELD(x_next, x->next[i]);
133 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
134 if ( is_marked_ref(x_next) ) goto retry;
136 for ( y = x_next; ; y = y_next )
138 /* Shift over a sequence of marked nodes. */
139 for ( ; ; )
141 READ_FIELD(y_next, y->next[i]);
142 if ( !is_marked_ref(y_next) ) break;
143 y = get_unmarked_ref(y_next);
146 READ_FIELD(y_k, y->k);
147 if ( y_k >= k ) break;
149 /* Update estimate of predecessor at this level. */
150 x = y;
151 x_next = y_next;
154 /* Swing forward pointer over any marked nodes. */
155 if ( x_next != y )
157 old_x_next = CASPO(&x->next[i], x_next, y);
158 if ( old_x_next != x_next ) goto retry;
161 if ( pa ) pa[i] = x;
162 if ( na ) na[i] = y;
165 return(y);
169 /* This function does not remove marked nodes. Use it optimistically. */
170 static sh_node_pt weak_search_predecessors(
171 set_t *l, setkey_t k, sh_node_pt *pa, sh_node_pt *na)
173 sh_node_pt x, x_next;
174 setkey_t x_next_k;
175 int i;
177 x = &l->head;
178 for ( i = NUM_LEVELS - 1; i >= 0; i-- )
180 for ( ; ; )
182 READ_FIELD(x_next, x->next[i]);
183 x_next = get_unmarked_ref(x_next);
185 READ_FIELD(x_next_k, x_next->k);
186 if ( x_next_k >= k ) break;
188 x = x_next;
191 if ( pa ) pa[i] = x;
192 if ( na ) na[i] = x_next;
195 return(x_next);
200 * Mark @x deleted at every level in its list from @level down to level 1.
201 * When all forward pointers are marked, node is effectively deleted.
202 * Future searches will properly remove node by swinging predecessors'
203 * forward pointers.
205 static void mark_deleted(sh_node_pt x, int level)
207 sh_node_pt x_next;
209 while ( --level >= 0 )
211 x_next = x->next[level];
212 while ( !is_marked_ref(x_next) )
214 x_next = CASPO(&x->next[level], x_next, get_marked_ref(x_next));
216 WEAK_DEP_ORDER_WMB(); /* mark in order */
221 static int check_for_full_delete(sh_node_pt x)
223 int level = x->level;
224 return ((level & READY_FOR_FREE) ||
225 (CASIO(&x->level, level, level | READY_FOR_FREE) != level));
229 static void do_full_delete(ptst_t *ptst, set_t *l, sh_node_pt x, int level)
231 int k = x->k;
232 #ifdef WEAK_MEM_ORDER
233 sh_node_pt preds[NUM_LEVELS];
234 int i = level;
235 retry:
236 (void)strong_search_predecessors(l, k, preds, NULL);
238 * Above level 1, references to @x can disappear if a node is inserted
239 * immediately before and we see an old value for its forward pointer. This
240 * is a conservative way of checking for that situation.
242 if ( i > 0 ) RMB();
243 while ( i > 0 )
245 node_t *n = get_unmarked_ref(preds[i]->next[i]);
246 while ( n->k < k )
248 n = get_unmarked_ref(n->next[i]);
249 RMB(); /* we don't want refs to @x to "disappear" */
251 if ( n == x ) goto retry;
252 i--; /* don't need to check this level again, even if we retry. */
254 #else
255 (void)strong_search_predecessors(l, k, NULL, NULL);
256 #endif
257 free_node(ptst, x);
262 * PUBLIC FUNCTIONS
265 set_t *set_alloc(void)
267 set_t *l;
268 node_t *n;
269 int i;
271 n = malloc(sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
272 memset(n, 0, sizeof(*n) + (NUM_LEVELS-1)*sizeof(node_t *));
273 n->k = SENTINEL_KEYMAX;
276 * Set the forward pointers of final node to other than NULL,
277 * otherwise READ_FIELD() will continually execute costly barriers.
278 * Note use of 0xfe -- that doesn't look like a marked value!
280 memset(n->next, 0xfe, NUM_LEVELS*sizeof(node_t *));
282 l = malloc(sizeof(*l) + (NUM_LEVELS-1)*sizeof(node_t *));
283 l->head.k = SENTINEL_KEYMIN;
284 l->head.level = NUM_LEVELS;
285 for ( i = 0; i < NUM_LEVELS; i++ )
287 l->head.next[i] = n;
290 return(l);
294 setval_t set_update(set_t *l, setkey_t k, setval_t v, int overwrite)
296 setval_t ov, new_ov;
297 ptst_t *ptst;
298 sh_node_pt preds[NUM_LEVELS], succs[NUM_LEVELS];
299 sh_node_pt pred, succ, new = NULL, new_next, old_next;
300 int i, level;
302 k = CALLER_TO_INTERNAL_KEY(k);
304 ptst = critical_enter();
306 succ = weak_search_predecessors(l, k, preds, succs);
308 retry:
309 ov = NULL;
311 if ( succ->k == k )
313 /* Already a @k node in the list: update its mapping. */
314 new_ov = succ->v;
315 do {
316 if ( (ov = new_ov) == NULL )
318 /* Finish deleting the node, then retry. */
319 READ_FIELD(level, succ->level);
320 mark_deleted(succ, level & LEVEL_MASK);
321 succ = strong_search_predecessors(l, k, preds, succs);
322 goto retry;
325 while ( overwrite && ((new_ov = CASPO(&succ->v, ov, v)) != ov) );
327 if ( new != NULL ) free_node(ptst, new);
328 goto out;
331 #ifdef WEAK_MEM_ORDER
332 /* Free node from previous attempt, if this is a retry. */
333 if ( new != NULL )
335 free_node(ptst, new);
336 new = NULL;
338 #endif
340 /* Not in the list, so initialise a new node for insertion. */
341 if ( new == NULL )
343 new = alloc_node(ptst);
344 new->k = k;
345 new->v = v;
347 level = new->level;
349 /* If successors don't change, this saves us some CAS operations. */
350 for ( i = 0; i < level; i++ )
352 new->next[i] = succs[i];
355 /* We've committed when we've inserted at level 1. */
356 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
357 old_next = CASPO(&preds[0]->next[0], succ, new);
358 if ( old_next != succ )
360 succ = strong_search_predecessors(l, k, preds, succs);
361 goto retry;
364 /* Insert at each of the other levels in turn. */
365 i = 1;
366 while ( i < level )
368 pred = preds[i];
369 succ = succs[i];
371 /* Someone *can* delete @new under our feet! */
372 new_next = new->next[i];
373 if ( is_marked_ref(new_next) ) goto success;
375 /* Ensure forward pointer of new node is up to date. */
376 if ( new_next != succ )
378 old_next = CASPO(&new->next[i], new_next, succ);
379 if ( is_marked_ref(old_next) ) goto success;
380 assert(old_next == new_next);
383 /* Ensure we have unique key values at every level. */
384 if ( succ->k == k ) goto new_world_view;
385 assert((pred->k < k) && (succ->k > k));
387 /* Replumb predecessor's forward pointer. */
388 old_next = CASPO(&pred->next[i], succ, new);
389 if ( old_next != succ )
391 new_world_view:
392 RMB(); /* get up-to-date view of the world. */
393 (void)strong_search_predecessors(l, k, preds, succs);
394 continue;
397 /* Succeeded at this level. */
398 i++;
401 success:
402 /* Ensure node is visible at all levels before punting deletion. */
403 WEAK_DEP_ORDER_WMB();
404 if ( check_for_full_delete(new) )
406 MB(); /* make sure we see all marks in @new. */
407 do_full_delete(ptst, l, new, level - 1);
409 out:
410 critical_exit(ptst);
411 return(ov);
415 setval_t set_remove(set_t *l, setkey_t k)
417 setval_t v = NULL, new_v;
418 ptst_t *ptst;
419 sh_node_pt preds[NUM_LEVELS], x;
420 int level, i;
422 k = CALLER_TO_INTERNAL_KEY(k);
424 ptst = critical_enter();
426 x = weak_search_predecessors(l, k, preds, NULL);
427 if ( x->k > k ) goto out;
428 READ_FIELD(level, x->level);
429 level = level & LEVEL_MASK;
431 /* Once we've marked the value field, the node is effectively deleted. */
432 new_v = x->v;
433 do {
434 v = new_v;
435 if ( v == NULL ) goto out;
437 while ( (new_v = CASPO(&x->v, v, NULL)) != v );
439 /* Committed to @x: mark lower-level forward pointers. */
440 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
441 mark_deleted(x, level);
444 * We must swing predecessors' pointers, or we can end up with
445 * an unbounded number of marked but not fully deleted nodes.
446 * Doing this creates a bound equal to number of threads in the system.
447 * Furthermore, we can't legitimately call 'free_node' until all shared
448 * references are gone.
450 for ( i = level - 1; i >= 0; i-- )
452 if ( CASPO(&preds[i]->next[i], x, get_unmarked_ref(x->next[i])) != x )
454 if ( (i != (level - 1)) || check_for_full_delete(x) )
456 MB(); /* make sure we see node at all levels. */
457 do_full_delete(ptst, l, x, i);
459 goto out;
463 free_node(ptst, x);
465 out:
466 critical_exit(ptst);
467 return(v);
471 setval_t set_lookup(set_t *l, setkey_t k)
473 setval_t v = NULL;
474 ptst_t *ptst;
475 sh_node_pt x;
477 k = CALLER_TO_INTERNAL_KEY(k);
479 ptst = critical_enter();
481 x = weak_search_predecessors(l, k, NULL, NULL);
482 if ( x->k == k ) READ_FIELD(v, x->v);
484 critical_exit(ptst);
485 return(v);
489 void _init_set_subsystem(void)
491 int i;
493 for ( i = 0; i < NUM_LEVELS; i++ )
495 gc_id[i] = gc_add_allocator(sizeof(node_t) + i*sizeof(node_t *));