1 /******************************************************************************
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Matt Benjamin <matt@linuxbox.com>
8 * Adapts MCAS skip list to use a pointer-key and typed comparison
9 * function style (because often, your key isn't an integer).
11 * Also, set_for_each (and future additions) allow a set to be iterated.
12 * Current set_for_each iteration is unordered.
14 * Caution, pointer values 0x0, 0x01, and 0x02 are reserved. Fortunately,
15 * no real pointer is likely to have one of these values.
17 Copyright (c) 2003, Keir Fraser All rights reserved.
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are
23 * Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above
26 * copyright notice, this list of conditions and the following
27 * disclaimer in the documentation and/or other materials provided
28 * with the distribution. Neither the name of the Keir Fraser
29 * nor the names of its contributors may be used to endorse or
30 * promote products derived from this software without specific
31 * prior written permission.
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
34 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
35 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
36 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
37 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
39 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 #define __SET_IMPLEMENTATION__
51 #include "portable_defns.h"
53 #include "set_queue_adt.h"
55 // todo: get rid of me
58 unsigned long secret_key
;
67 typedef struct node_st node_t
;
68 typedef struct set_st osi_set_t
;
69 typedef VOLATILE node_t
*sh_node_pt
;
73 // int xxx[1024]; /* XXXX testing gc */
74 #define LEVEL_MASK 0x0ff
75 #define READY_FOR_FREE 0x100
81 typedef int (*osi_set_cmp_func
) (const void *lhs
, const void *rhs
);
85 osi_set_cmp_func cmpf
;
91 static int gc_id
[NUM_LEVELS
];
97 #define compare_keys(s, k1, k2) (s->cmpf((const void*) k1, (const void *) k2))
100 * Random level generator. Drop-off rate is 0.5 per level.
101 * Returns value 1 <= level <= NUM_LEVELS.
104 get_level(ptst_t
* ptst
)
106 unsigned long r
= rand_next(ptst
);
108 r
= (r
>> 4) & ((1 << (NUM_LEVELS
- 1)) - 1);
118 * Allocate a new node, and initialise its @level field.
119 * NB. Initialisation will eventually be pushed into garbage collector,
120 * because of dependent read reordering.
123 alloc_node(ptst_t
* ptst
)
128 n
= gc_alloc(ptst
, gc_id
[l
- 1]);
134 /* Free a node to the garbage collector. */
136 free_node(ptst_t
* ptst
, sh_node_pt n
)
138 gc_free(ptst
, (void *)n
, gc_id
[(n
->level
& LEVEL_MASK
) - 1]);
143 * Search for first non-deleted node, N, with key >= @k at each level in @l.
145 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
146 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
147 * MAIN RETURN VALUE: same as @na[0].
150 strong_search_predecessors(osi_set_t
* l
, setkey_t k
, sh_node_pt
* pa
,
153 sh_node_pt x
, x_next
, old_x_next
, y
, y_next
;
161 for (i
= NUM_LEVELS
- 1; i
>= 0; i
--) {
162 /* We start our search at previous level's unmarked predecessor. */
163 READ_FIELD(x_next
, x
->next
[i
]);
164 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
165 if (is_marked_ref(x_next
))
168 for (y
= x_next
;; y
= y_next
) {
169 /* Shift over a sequence of marked nodes. */
171 READ_FIELD(y_next
, y
->next
[i
]);
172 if (!is_marked_ref(y_next
))
174 y
= get_unmarked_ref(y_next
);
177 READ_FIELD(y_k
, y
->k
);
178 if (compare_keys(l
, y_k
, k
) >= 0)
181 /* Update estimate of predecessor at this level. */
186 /* Swing forward pointer over any marked nodes. */
188 old_x_next
= CASPO(&x
->next
[i
], x_next
, y
);
189 if (old_x_next
!= x_next
)
203 /* This function does not remove marked nodes. Use it optimistically. */
205 weak_search_predecessors(osi_set_t
* l
, setkey_t k
, sh_node_pt
* pa
,
208 sh_node_pt x
, x_next
;
213 for (i
= NUM_LEVELS
- 1; i
>= 0; i
--) {
215 READ_FIELD(x_next
, x
->next
[i
]);
216 x_next
= get_unmarked_ref(x_next
);
218 READ_FIELD(x_next_k
, x_next
->k
);
219 if (compare_keys(l
, x_next_k
, k
) >= 0)
236 * Mark @x deleted at every level in its list from @level down to level 1.
237 * When all forward pointers are marked, node is effectively deleted.
238 * Future searches will properly remove node by swinging predecessors'
242 mark_deleted(sh_node_pt x
, int level
)
246 while (--level
>= 0) {
247 x_next
= x
->next
[level
];
248 while (!is_marked_ref(x_next
)) {
249 x_next
= CASPO(&x
->next
[level
], x_next
, get_marked_ref(x_next
));
251 WEAK_DEP_ORDER_WMB(); /* mark in order */
257 check_for_full_delete(sh_node_pt x
)
259 int level
= x
->level
;
260 return ((level
& READY_FOR_FREE
) ||
261 (CASIO(&x
->level
, level
, level
| READY_FOR_FREE
) != level
));
266 do_full_delete(ptst_t
* ptst
, osi_set_t
* l
, sh_node_pt x
, int level
)
269 #ifdef WEAK_MEM_ORDER
270 sh_node_pt preds
[NUM_LEVELS
];
273 (void)strong_search_predecessors(l
, k
, preds
, NULL
);
275 * Above level 1, references to @x can disappear if a node is inserted
276 * immediately before and we see an old value for its forward pointer. This
277 * is a conservative way of checking for that situation.
282 node_t
*n
= get_unmarked_ref(preds
[i
]->next
[i
]);
283 while (compare_keys(l
, n
->k
, k
) < 0) {
284 n
= get_unmarked_ref(n
->next
[i
]);
285 RMB(); /* we don't want refs to @x to "disappear" */
289 i
--; /* don't need to check this level again, even if we retry. */
292 (void)strong_search_predecessors(l
, k
, NULL
, NULL
);
303 * Called once before any set operations, including set_alloc
306 _init_osi_cas_skip_subsystem(void)
310 for (i
= 0; i
< NUM_LEVELS
; i
++) {
311 gc_id
[i
] = gc_add_allocator(sizeof(node_t
) + i
* sizeof(node_t
*),
318 osi_cas_skip_alloc(osi_set_cmp_func cmpf
)
324 n
= malloc(sizeof(*n
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
325 memset(n
, 0, sizeof(*n
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
326 n
->k
= (setkey_t
*) SENTINEL_KEYMAX
;
329 * Set the forward pointers of final node to other than NULL,
330 * otherwise READ_FIELD() will continually execute costly barriers.
331 * Note use of 0xfe -- that doesn't look like a marked value!
333 memset(n
->next
, 0xfe, NUM_LEVELS
* sizeof(node_t
*));
335 l
= malloc(sizeof(*l
) + (NUM_LEVELS
- 1) * sizeof(node_t
*));
337 l
->head
.k
= (setkey_t
*) SENTINEL_KEYMIN
;
338 l
->head
.level
= NUM_LEVELS
;
339 for (i
= 0; i
< NUM_LEVELS
; i
++) {
348 osi_cas_skip_update(osi_set_t
* l
, setkey_t k
, setval_t v
, int overwrite
)
352 sh_node_pt preds
[NUM_LEVELS
], succs
[NUM_LEVELS
];
353 sh_node_pt pred
, succ
, new = NULL
, new_next
, old_next
;
356 ptst
= critical_enter();
358 succ
= weak_search_predecessors(l
, k
, preds
, succs
);
363 if (compare_keys(l
, succ
->k
, k
) == 0) {
364 /* Already a @k node in the list: update its mapping. */
367 if ((ov
= new_ov
) == NULL
) {
368 /* Finish deleting the node, then retry. */
369 READ_FIELD(level
, succ
->level
);
370 mark_deleted(succ
, level
& LEVEL_MASK
);
371 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
374 } while (overwrite
&& ((new_ov
= CASPO(&succ
->v
, ov
, v
)) != ov
));
377 free_node(ptst
, new);
380 #ifdef WEAK_MEM_ORDER
381 /* Free node from previous attempt, if this is a retry. */
383 free_node(ptst
, new);
388 /* Not in the list, so initialise a new node for insertion. */
390 new = alloc_node(ptst
);
396 /* If successors don't change, this saves us some CAS operations. */
397 for (i
= 0; i
< level
; i
++) {
398 new->next
[i
] = succs
[i
];
401 /* We've committed when we've inserted at level 1. */
402 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
403 old_next
= CASPO(&preds
[0]->next
[0], succ
, new);
404 if (old_next
!= succ
) {
405 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
409 /* Insert at each of the other levels in turn. */
415 /* Someone *can* delete @new under our feet! */
416 new_next
= new->next
[i
];
417 if (is_marked_ref(new_next
))
420 /* Ensure forward pointer of new node is up to date. */
421 if (new_next
!= succ
) {
422 old_next
= CASPO(&new->next
[i
], new_next
, succ
);
423 if (is_marked_ref(old_next
))
425 assert(old_next
== new_next
);
428 /* Ensure we have unique key values at every level. */
429 if (compare_keys(l
, succ
->k
, k
) == 0)
432 assert((compare_keys(l
, pred
->k
, k
) < 0) &&
433 (compare_keys(l
, succ
->k
, k
) > 0));
435 /* Replumb predecessor's forward pointer. */
436 old_next
= CASPO(&pred
->next
[i
], succ
, new);
437 if (old_next
!= succ
) {
439 RMB(); /* get up-to-date view of the world. */
440 (void)strong_search_predecessors(l
, k
, preds
, succs
);
444 /* Succeeded at this level. */
449 /* Ensure node is visible at all levels before punting deletion. */
450 WEAK_DEP_ORDER_WMB();
451 if (check_for_full_delete(new)) {
452 MB(); /* make sure we see all marks in @new. */
453 do_full_delete(ptst
, l
, new, level
- 1);
461 osi_cas_skip_remove(osi_set_t
* l
, setkey_t k
)
463 setval_t v
= NULL
, new_v
;
465 sh_node_pt preds
[NUM_LEVELS
], x
;
468 ptst
= critical_enter();
470 x
= weak_search_predecessors(l
, k
, preds
, NULL
);
471 if (compare_keys(l
, x
->k
, k
) > 0)
474 READ_FIELD(level
, x
->level
);
475 level
= level
& LEVEL_MASK
;
477 /* Once we've marked the value field, the node is effectively deleted. */
483 } while ((new_v
= CASPO(&x
->v
, v
, NULL
)) != v
);
485 /* Committed to @x: mark lower-level forward pointers. */
486 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
487 mark_deleted(x
, level
);
490 * We must swing predecessors' pointers, or we can end up with
491 * an unbounded number of marked but not fully deleted nodes.
492 * Doing this creates a bound equal to number of threads in the system.
493 * Furthermore, we can't legitimately call 'free_node' until all shared
494 * references are gone.
496 for (i
= level
- 1; i
>= 0; i
--) {
497 if (CASPO(&preds
[i
]->next
[i
], x
, get_unmarked_ref(x
->next
[i
])) != x
) {
498 if ((i
!= (level
- 1)) || check_for_full_delete(x
)) {
499 MB(); /* make sure we see node at all levels. */
500 do_full_delete(ptst
, l
, x
, i
);
515 osi_cas_skip_lookup(osi_set_t
* l
, setkey_t k
)
521 ptst
= critical_enter();
523 x
= weak_search_predecessors(l
, k
, NULL
, NULL
);
524 if (compare_keys(l
, x
->k
, k
) == 0)
532 /* Hybrid Set/Queue Operations (Matt) */
534 /* Iterate over a sequential structure, calling callback_func
535 * on each (undeleted) element visited. */
537 /* Each-element function passed to set_for_each */
538 typedef void (*osi_set_each_func
) (osi_set_t
* l
, setval_t v
, void *arg
);
540 osi_cas_skip_for_each(osi_set_t
* l
, osi_set_each_func each_func
, void *arg
)
542 sh_node_pt x
, y
, x_next
, old_x_next
;
547 ptst
= critical_enter();
550 for (i
= NUM_LEVELS
- 1; i
>= 0; i
--) {
551 /* must revisit x at each level to see all
552 * nodes in the structure */
556 READ_FIELD(x_next
, y
->next
[i
]);
557 x_next
= get_unmarked_ref(x_next
);
559 READ_FIELD(x_next_k
, x_next
->k
);
560 if (x_next_k
== (setkey_t
) SENTINEL_KEYMAX
)
563 /* in our variation, a (stored) k is a v,
564 * ie, x_next_k is x_next_v */
565 each_func(l
, x_next_k
, arg
);