1 /******************************************************************************
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Matt Benjamin <matt@linuxbox.com>
8 * Adapts MCAS skip list to use a pointer-key and typed comparison
9 * function style (because often, your key isn't an integer).
11 * Caution, pointer values 0x0, 0x01, and 0x02 are reserved. Fortunately,
12 * no real pointer is likely to have one of these values.
14 Copyright (c) 2003, Keir Fraser All rights reserved.
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are
20 * Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials provided
25 * with the distribution. Neither the name of the Keir Fraser
26 * nor the names of its contributors may be used to endorse or
27 * promote products derived from this software without specific
28 * prior written permission.
30 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
31 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
32 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
33 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
34 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
35 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
36 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
40 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 #define __SET_IMPLEMENTATION__
48 #include "portable_defns.h"
57 typedef struct node_st node_t
;
58 typedef struct set_st set_t
;
59 typedef VOLATILE node_t
*sh_node_pt
;
63 #define LEVEL_MASK 0x0ff
64 #define READY_FOR_FREE 0x100
72 int (*cmpf
) (const void *, const void *);
75 static int gc_id
[NUM_LEVELS
];
81 #define compare_keys(s, k1, k2) (s->cmpf(k1, k2));
84 * Random level generator. Drop-off rate is 0.5 per level.
85 * Returns value 1 <= level <= NUM_LEVELS.
88 get_level(ptst_t
* ptst
)
90 unsigned long r
= rand_next(ptst
);
92 r
= (r
>> 4) & ((1 << (NUM_LEVELS
- 1)) - 1);
102 * Allocate a new node, and initialise its @level field.
103 * NB. Initialisation will eventually be pushed into garbage collector,
104 * because of dependent read reordering.
107 alloc_node(ptst_t
* ptst
)
112 n
= gc_alloc(ptst
, gc_id
[l
- 1]);
118 /* Free a node to the garbage collector. */
120 free_node(ptst_t
* ptst
, sh_node_pt n
)
122 gc_free(ptst
, (void *)n
, gc_id
[(n
->level
& LEVEL_MASK
) - 1]);
127 * Search for first non-deleted node, N, with key >= @k at each level in @l.
129 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
130 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
131 * MAIN RETURN VALUE: same as @na[0].
134 strong_search_predecessors(set_t
* l
, setkey_t k
, sh_node_pt
* pa
,
137 sh_node_pt x
, x_next
, old_x_next
, y
, y_next
;
145 for (i
= NUM_LEVELS
- 1; i
>= 0; i
--) {
146 /* We start our search at previous level's unmarked predecessor. */
147 READ_FIELD(x_next
, x
->next
[i
]);
148 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
149 if (is_marked_ref(x_next
))
152 for (y
= x_next
;; y
= y_next
) {
153 /* Shift over a sequence of marked nodes. */
155 READ_FIELD(y_next
, y
->next
[i
]);
156 if (!is_marked_ref(y_next
))
158 y
= get_unmarked_ref(y_next
);
161 READ_FIELD(y_k
, y
->k
);
162 if (compare_keys(l
, y_k
, k
) >= 0)
165 /* Update estimate of predecessor at this level. */
170 /* Swing forward pointer over any marked nodes. */
172 old_x_next
= CASPO(&x
->next
[i
], x_next
, y
);
173 if (old_x_next
!= x_next
)
187 /* This function does not remove marked nodes. Use it optimistically. */
189 weak_search_predecessors(set_t
* l
, setkey_t k
, sh_node_pt
* pa
,
192 sh_node_pt x
, x_next
;
197 for (i
= NUM_LEVELS
- 1; i
>= 0; i
--) {
199 READ_FIELD(x_next
, x
->next
[i
]);
200 x_next
= get_unmarked_ref(x_next
);
202 READ_FIELD(x_next_k
, x_next
->k
);
203 if (compare_keys(l
, x_next_k
, k
) >= 0)
220 * Mark @x deleted at every level in its list from @level down to level 1.
221 * When all forward pointers are marked, node is effectively deleted.
222 * Future searches will properly remove node by swinging predecessors'
226 mark_deleted(sh_node_pt x
, int level
)
230 while (--level
>= 0) {
231 x_next
= x
->next
[level
];
232 while (!is_marked_ref(x_next
)) {
233 x_next
= CASPO(&x
->next
[level
], x_next
, get_marked_ref(x_next
));
235 WEAK_DEP_ORDER_WMB(); /* mark in order */
241 check_for_full_delete(sh_node_pt x
)
243 int level
= x
->level
;
244 return ((level
& READY_FOR_FREE
) ||
245 (CASIO(&x
->level
, level
, level
| READY_FOR_FREE
) != level
));
250 do_full_delete(ptst_t
* ptst
, set_t
* l
, sh_node_pt x
, int level
)
253 #ifdef WEAK_MEM_ORDER
254 sh_node_pt preds
[NUM_LEVELS
];
257 (void)strong_search_predecessors(l
, k
, preds
, NULL
);
259 * Above level 1, references to @x can disappear if a node is inserted
260 * immediately before and we see an old value for its forward pointer. This
261 * is a conservative way of checking for that situation.
266 node_t
*n
= get_unmarked_ref(preds
[i
]->next
[i
]);
267 while (compare_keys(l
, n
->k
, k
) < 0) {
268 n
= get_unmarked_ref(n
->next
[i
]);
269 RMB(); /* we don't want refs to @x to "disappear" */
273 i
--; /* don't need to check this level again, even if we retry. */
276 (void)strong_search_predecessors(l
, k
, NULL
, NULL
);
287 set_alloc(int (*cmpf
) (const void *, const void *))
293 n
= malloc(sizeof(*n
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
294 memset(n
, 0, sizeof(*n
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
295 n
->k
= SENTINEL_KEYMAX
;
298 * Set the forward pointers of final node to other than NULL,
299 * otherwise READ_FIELD() will continually execute costly barriers.
300 * Note use of 0xfe -- that doesn't look like a marked value!
302 memset(n
->next
, 0xfe, NUM_LEVELS
* sizeof(node_t
*));
304 l
= malloc(sizeof(*l
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
306 l
->head
.k
= SENTINEL_KEYMIN
;
307 l
->head
.level
= NUM_LEVELS
;
308 for (i
= 0; i
< NUM_LEVELS
; i
++) {
317 set_update(set_t
* l
, setkey_t k
, setval_t v
, int overwrite
)
321 sh_node_pt preds
[NUM_LEVELS
], succs
[NUM_LEVELS
];
322 sh_node_pt pred
, succ
, new = NULL
, new_next
, old_next
;
325 ptst
= critical_enter();
327 succ
= weak_search_predecessors(l
, k
, preds
, succs
);
332 if (compare_keys(l
, succ
->k
, k
) == 0) {
333 /* Already a @k node in the list: update its mapping. */
336 if ((ov
= new_ov
) == NULL
) {
337 /* Finish deleting the node, then retry. */
338 READ_FIELD(level
, succ
->level
);
339 mark_deleted(succ
, level
& LEVEL_MASK
);
340 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
343 } while (overwrite
&& ((new_ov
= CASPO(&succ
->v
, ov
, v
)) != ov
));
346 free_node(ptst
, new);
349 #ifdef WEAK_MEM_ORDER
350 /* Free node from previous attempt, if this is a retry. */
352 free_node(ptst
, new);
357 /* Not in the list, so initialise a new node for insertion. */
359 new = alloc_node(ptst
);
365 /* If successors don't change, this saves us some CAS operations. */
366 for (i
= 0; i
< level
; i
++) {
367 new->next
[i
] = succs
[i
];
370 /* We've committed when we've inserted at level 1. */
371 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
372 old_next
= CASPO(&preds
[0]->next
[0], succ
, new);
373 if (old_next
!= succ
) {
374 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
378 /* Insert at each of the other levels in turn. */
384 /* Someone *can* delete @new under our feet! */
385 new_next
= new->next
[i
];
386 if (is_marked_ref(new_next
))
389 /* Ensure forward pointer of new node is up to date. */
390 if (new_next
!= succ
) {
391 old_next
= CASPO(&new->next
[i
], new_next
, succ
);
392 if (is_marked_ref(old_next
))
394 assert(old_next
== new_next
);
397 /* Ensure we have unique key values at every level. */
398 if (compare_keys(l
, succ
->k
, k
) == 0)
401 assert((compare_keys(l
, pred
->k
, k
) < 0) &&
402 (compare_keys(l
, succ
->k
, k
) > 0));
404 /* Replumb predecessor's forward pointer. */
405 old_next
= CASPO(&pred
->next
[i
], succ
, new);
406 if (old_next
!= succ
) {
408 RMB(); /* get up-to-date view of the world. */
409 (void)strong_search_predecessors(l
, k
, preds
, succs
);
413 /* Succeeded at this level. */
418 /* Ensure node is visible at all levels before punting deletion. */
419 WEAK_DEP_ORDER_WMB();
420 if (check_for_full_delete(new)) {
421 MB(); /* make sure we see all marks in @new. */
422 do_full_delete(ptst
, l
, new, level
- 1);
431 set_remove(set_t
* l
, setkey_t k
)
433 setval_t v
= NULL
, new_v
;
435 sh_node_pt preds
[NUM_LEVELS
], x
;
438 ptst
= critical_enter();
440 x
= weak_search_predecessors(l
, k
, preds
, NULL
);
441 if (compare_keys(l
, x
->k
, k
) > 0)
444 READ_FIELD(level
, x
->level
);
445 level
= level
& LEVEL_MASK
;
447 /* Once we've marked the value field, the node is effectively deleted. */
453 } while ((new_v
= CASPO(&x
->v
, v
, NULL
)) != v
);
455 /* Committed to @x: mark lower-level forward pointers. */
456 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
457 mark_deleted(x
, level
);
460 * We must swing predecessors' pointers, or we can end up with
461 * an unbounded number of marked but not fully deleted nodes.
462 * Doing this creates a bound equal to number of threads in the system.
463 * Furthermore, we can't legitimately call 'free_node' until all shared
464 * references are gone.
466 for (i
= level
- 1; i
>= 0; i
--) {
467 if (CASPO(&preds
[i
]->next
[i
], x
, get_unmarked_ref(x
->next
[i
])) != x
) {
468 if ((i
!= (level
- 1)) || check_for_full_delete(x
)) {
469 MB(); /* make sure we see node at all levels. */
470 do_full_delete(ptst
, l
, x
, i
);
485 set_lookup(set_t
* l
, setkey_t k
)
491 ptst
= critical_enter();
493 x
= weak_search_predecessors(l
, k
, NULL
, NULL
);
494 if (compare_keys(l
, x
->k
, k
) == 0)
503 _init_set_subsystem(void)
507 for (i
= 0; i
< NUM_LEVELS
; i
++) {
508 gc_id
[i
] = gc_add_allocator(sizeof(node_t
) + i
* sizeof(node_t
*));