1 /******************************************************************************
4 * Skip lists, allowing concurrent update by use of CAS primitives.
6 * Copyright (c) 2001-2003, K A Fraser
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
12 * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution. Neither the name of the Keir Fraser
18 * nor the names of its contributors may be used to endorse or
19 * promote products derived from this software without specific
20 * prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #define __SET_IMPLEMENTATION__
40 #include "portable_defns.h"
49 typedef struct node_st node_t
;
50 typedef struct set_st set_t
;
51 typedef VOLATILE node_t
*sh_node_pt
;
56 #define LEVEL_MASK 0x0ff
57 #define READY_FOR_FREE 0x100
68 static int gc_id
[NUM_LEVELS
];
75 * Random level generator. Drop-off rate is 0.5 per level.
76 * Returns value 1 <= level <= NUM_LEVELS.
78 static int get_level(ptst_t
*ptst
)
80 unsigned long r
= rand_next(ptst
);
82 r
= (r
>> 4) & ((1 << (NUM_LEVELS
-1)) - 1);
83 while ( (r
& 1) ) { l
++; r
>>= 1; }
89 * Allocate a new node, and initialise its @level field.
90 * NB. Initialisation will eventually be pushed into garbage collector,
91 * because of dependent read reordering.
93 static node_t
*alloc_node(ptst_t
*ptst
)
98 n
= gc_alloc(ptst
, gc_id
[l
- 1]);
104 /* Free a node to the garbage collector. */
105 static void free_node(ptst_t
*ptst
, sh_node_pt n
)
107 gc_free(ptst
, (void *)n
, gc_id
[(n
->level
& LEVEL_MASK
) - 1]);
112 * Search for first non-deleted node, N, with key >= @k at each level in @l.
114 * Array @pa: @pa[i] is non-deleted predecessor of N at level i
115 * Array @na: @na[i] is N itself, which should be pointed at by @pa[i]
116 * MAIN RETURN VALUE: same as @na[0].
118 static sh_node_pt
strong_search_predecessors(
119 set_t
*l
, setkey_t k
, sh_node_pt
*pa
, sh_node_pt
*na
)
121 sh_node_pt x
, x_next
, old_x_next
, y
, y_next
;
129 for ( i
= NUM_LEVELS
- 1; i
>= 0; i
-- )
131 /* We start our search at previous level's unmarked predecessor. */
132 READ_FIELD(x_next
, x
->next
[i
]);
133 /* If this pointer's marked, so is @pa[i+1]. May as well retry. */
134 if ( is_marked_ref(x_next
) ) goto retry
;
136 for ( y
= x_next
; ; y
= y_next
)
138 /* Shift over a sequence of marked nodes. */
141 READ_FIELD(y_next
, y
->next
[i
]);
142 if ( !is_marked_ref(y_next
) ) break;
143 y
= get_unmarked_ref(y_next
);
146 READ_FIELD(y_k
, y
->k
);
147 if ( y_k
>= k
) break;
149 /* Update estimate of predecessor at this level. */
154 /* Swing forward pointer over any marked nodes. */
157 old_x_next
= CASPO(&x
->next
[i
], x_next
, y
);
158 if ( old_x_next
!= x_next
) goto retry
;
169 /* This function does not remove marked nodes. Use it optimistically. */
170 static sh_node_pt
weak_search_predecessors(
171 set_t
*l
, setkey_t k
, sh_node_pt
*pa
, sh_node_pt
*na
)
173 sh_node_pt x
, x_next
;
178 for ( i
= NUM_LEVELS
- 1; i
>= 0; i
-- )
182 READ_FIELD(x_next
, x
->next
[i
]);
183 x_next
= get_unmarked_ref(x_next
);
185 READ_FIELD(x_next_k
, x_next
->k
);
186 if ( x_next_k
>= k
) break;
192 if ( na
) na
[i
] = x_next
;
200 * Mark @x deleted at every level in its list from @level down to level 1.
201 * When all forward pointers are marked, node is effectively deleted.
202 * Future searches will properly remove node by swinging predecessors'
205 static void mark_deleted(sh_node_pt x
, int level
)
209 while ( --level
>= 0 )
211 x_next
= x
->next
[level
];
212 while ( !is_marked_ref(x_next
) )
214 x_next
= CASPO(&x
->next
[level
], x_next
, get_marked_ref(x_next
));
216 WEAK_DEP_ORDER_WMB(); /* mark in order */
221 static int check_for_full_delete(sh_node_pt x
)
223 int level
= x
->level
;
224 return ((level
& READY_FOR_FREE
) ||
225 (CASIO(&x
->level
, level
, level
| READY_FOR_FREE
) != level
));
229 static void do_full_delete(ptst_t
*ptst
, set_t
*l
, sh_node_pt x
, int level
)
232 #ifdef WEAK_MEM_ORDER
233 sh_node_pt preds
[NUM_LEVELS
];
236 (void)strong_search_predecessors(l
, k
, preds
, NULL
);
238 * Above level 1, references to @x can disappear if a node is inserted
239 * immediately before and we see an old value for its forward pointer. This
240 * is a conservative way of checking for that situation.
245 node_t
*n
= get_unmarked_ref(preds
[i
]->next
[i
]);
248 n
= get_unmarked_ref(n
->next
[i
]);
249 RMB(); /* we don't want refs to @x to "disappear" */
251 if ( n
== x
) goto retry
;
252 i
--; /* don't need to check this level again, even if we retry. */
255 (void)strong_search_predecessors(l
, k
, NULL
, NULL
);
265 set_t
*set_alloc(void)
271 n
= malloc(sizeof(*n
) + (NUM_LEVELS
-1)*sizeof(node_t
*));
272 memset(n
, 0, sizeof(*n
) + (NUM_LEVELS
-1)*sizeof(node_t
*));
273 n
->k
= SENTINEL_KEYMAX
;
276 * Set the forward pointers of final node to other than NULL,
277 * otherwise READ_FIELD() will continually execute costly barriers.
278 * Note use of 0xfe -- that doesn't look like a marked value!
280 memset(n
->next
, 0xfe, NUM_LEVELS
*sizeof(node_t
*));
282 l
= malloc(sizeof(*l
) + (NUM_LEVELS
-1)*sizeof(node_t
*));
283 l
->head
.k
= SENTINEL_KEYMIN
;
284 l
->head
.level
= NUM_LEVELS
;
285 for ( i
= 0; i
< NUM_LEVELS
; i
++ )
294 setval_t
set_update(set_t
*l
, setkey_t k
, setval_t v
, int overwrite
)
298 sh_node_pt preds
[NUM_LEVELS
], succs
[NUM_LEVELS
];
299 sh_node_pt pred
, succ
, new = NULL
, new_next
, old_next
;
302 k
= CALLER_TO_INTERNAL_KEY(k
);
304 ptst
= critical_enter();
306 succ
= weak_search_predecessors(l
, k
, preds
, succs
);
313 /* Already a @k node in the list: update its mapping. */
316 if ( (ov
= new_ov
) == NULL
)
318 /* Finish deleting the node, then retry. */
319 READ_FIELD(level
, succ
->level
);
320 mark_deleted(succ
, level
& LEVEL_MASK
);
321 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
325 while ( overwrite
&& ((new_ov
= CASPO(&succ
->v
, ov
, v
)) != ov
) );
327 if ( new != NULL
) free_node(ptst
, new);
331 #ifdef WEAK_MEM_ORDER
332 /* Free node from previous attempt, if this is a retry. */
335 free_node(ptst
, new);
340 /* Not in the list, so initialise a new node for insertion. */
343 new = alloc_node(ptst
);
349 /* If successors don't change, this saves us some CAS operations. */
350 for ( i
= 0; i
< level
; i
++ )
352 new->next
[i
] = succs
[i
];
355 /* We've committed when we've inserted at level 1. */
356 WMB_NEAR_CAS(); /* make sure node fully initialised before inserting */
357 old_next
= CASPO(&preds
[0]->next
[0], succ
, new);
358 if ( old_next
!= succ
)
360 succ
= strong_search_predecessors(l
, k
, preds
, succs
);
364 /* Insert at each of the other levels in turn. */
371 /* Someone *can* delete @new under our feet! */
372 new_next
= new->next
[i
];
373 if ( is_marked_ref(new_next
) ) goto success
;
375 /* Ensure forward pointer of new node is up to date. */
376 if ( new_next
!= succ
)
378 old_next
= CASPO(&new->next
[i
], new_next
, succ
);
379 if ( is_marked_ref(old_next
) ) goto success
;
380 assert(old_next
== new_next
);
383 /* Ensure we have unique key values at every level. */
384 if ( succ
->k
== k
) goto new_world_view
;
385 assert((pred
->k
< k
) && (succ
->k
> k
));
387 /* Replumb predecessor's forward pointer. */
388 old_next
= CASPO(&pred
->next
[i
], succ
, new);
389 if ( old_next
!= succ
)
392 RMB(); /* get up-to-date view of the world. */
393 (void)strong_search_predecessors(l
, k
, preds
, succs
);
397 /* Succeeded at this level. */
402 /* Ensure node is visible at all levels before punting deletion. */
403 WEAK_DEP_ORDER_WMB();
404 if ( check_for_full_delete(new) )
406 MB(); /* make sure we see all marks in @new. */
407 do_full_delete(ptst
, l
, new, level
- 1);
415 setval_t
set_remove(set_t
*l
, setkey_t k
)
417 setval_t v
= NULL
, new_v
;
419 sh_node_pt preds
[NUM_LEVELS
], x
;
422 k
= CALLER_TO_INTERNAL_KEY(k
);
424 ptst
= critical_enter();
426 x
= weak_search_predecessors(l
, k
, preds
, NULL
);
427 if ( x
->k
> k
) goto out
;
428 READ_FIELD(level
, x
->level
);
429 level
= level
& LEVEL_MASK
;
431 /* Once we've marked the value field, the node is effectively deleted. */
435 if ( v
== NULL
) goto out
;
437 while ( (new_v
= CASPO(&x
->v
, v
, NULL
)) != v
);
439 /* Committed to @x: mark lower-level forward pointers. */
440 WEAK_DEP_ORDER_WMB(); /* enforce above as linearisation point */
441 mark_deleted(x
, level
);
444 * We must swing predecessors' pointers, or we can end up with
445 * an unbounded number of marked but not fully deleted nodes.
446 * Doing this creates a bound equal to number of threads in the system.
447 * Furthermore, we can't legitimately call 'free_node' until all shared
448 * references are gone.
450 for ( i
= level
- 1; i
>= 0; i
-- )
452 if ( CASPO(&preds
[i
]->next
[i
], x
, get_unmarked_ref(x
->next
[i
])) != x
)
454 if ( (i
!= (level
- 1)) || check_for_full_delete(x
) )
456 MB(); /* make sure we see node at all levels. */
457 do_full_delete(ptst
, l
, x
, i
);
471 setval_t
set_lookup(set_t
*l
, setkey_t k
)
477 k
= CALLER_TO_INTERNAL_KEY(k
);
479 ptst
= critical_enter();
481 x
= weak_search_predecessors(l
, k
, NULL
, NULL
);
482 if ( x
->k
== k
) READ_FIELD(v
, x
->v
);
489 void _init_set_subsystem(void)
493 for ( i
= 0; i
< NUM_LEVELS
; i
++ )
495 gc_id
[i
] = gc_add_allocator(sizeof(node_t
) + i
*sizeof(node_t
*));