1 /******************************************************************************
4 * MCAS implemented as described in:
5 * A Practical Multi-Word Compare-and-Swap Operation
6 * Timothy Harris, Keir Fraser and Ian Pratt
7 * Proceedings of the IEEE Symposium on Distributed Computing, Oct 2002
9 * Copyright (c) 2002-2003, K A Fraser
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are
15 * Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution. Neither the name of the Keir Fraser
21 * nor the names of its contributors may be used to endorse or
22 * promote products derived from this software without specific
23 * prior written permission.
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <sys/resource.h>
44 typedef struct CasDescriptor CasDescriptor_t
;
45 typedef struct CasEntry CasEntry_t
;
46 typedef struct per_thread_state_t per_thread_state_t
;
48 extern int num_threads
;
50 #define ARENA_SIZE 40960
52 struct per_thread_state_t
55 CasDescriptor_t
*next_descriptor
;
61 static pthread_key_t mcas_ptst_key
;
63 typedef struct pad128
{ char pad
[128]; } pad128_t
;
66 /* CAS descriptors. */
68 #define STATUS_IN_PROGRESS 0
69 #define STATUS_SUCCEEDED 1
70 #define STATUS_FAILED 2
71 #define STATUS_ABORTED 3
79 struct CasDescriptor
{
82 CasDescriptor_t
*pt
[MAX_THREADS
];
84 CasDescriptor_t
*fc
; /* free chain */
85 CasEntry_t entries
[1];
88 /* Marked pointers. */
89 typedef unsigned long ptr_int
;
90 #ifndef MARK_IN_PROGRESS
91 #define MARK_IN_PROGRESS 1
93 #ifndef MARK_PTR_TO_CD
94 #define MARK_PTR_TO_CD 2
97 #define get_markedness(p) (((ptr_int) (p)) & 3)
98 #define get_unmarked_reference(p) ((void *) (((ptr_int) (p)) & (~3)))
99 #define get_marked_reference(p,m) ((void *) (((ptr_int) (p)) | m))
101 static bool_t
mcas0 (per_thread_state_t
*ptst
, CasDescriptor_t
*cd
);
102 static per_thread_state_t
*get_ptst (void);
104 pad128_t p0
; /* I'm worried these important RO vars might be false shared */
106 static int num_ptrs
= 1024;
107 static int ptr_mult
= 1;
110 static void *ALLOC(int size
)
112 void *a
= calloc(1, size
);
113 if ( a
== NULL
) abort();
117 static void *ALLOC_ALONE (int size
)
119 int ps
= sysconf(_SC_PAGESIZE
);
120 int req
= ps
+ size
+ ps
;
121 char *res
= ALLOC(req
);
122 return (void *)(res
+ ps
);
125 static int next_thread_id
= 0;
126 static per_thread_state_t
*ptsts
= NULL
;
128 static void new_arena (per_thread_state_t
*ptst
, int size
)
130 ptst
->arena
= ALLOC(size
);
131 if ( !ptst
->arena
) abort();
132 ptst
->arena_lim
= (((char *) ptst
->arena
) + size
);
135 static per_thread_state_t
*get_ptst (void)
137 per_thread_state_t
*result
;
140 result
= pthread_getspecific(mcas_ptst_key
);
142 if ( result
== NULL
)
145 int largest
= sysconf(_SC_PAGESIZE
);
147 if ( largest
< sizeof (per_thread_state_t
) )
148 largest
= sizeof (per_thread_state_t
);
151 result
= ALLOC (largest
);
154 do { my_id
= next_thread_id
; }
155 while ( CASIO (&next_thread_id
, my_id
, my_id
+ 1) != my_id
);
160 new_arena(result
, ARENA_SIZE
);
162 r
= pthread_setspecific(mcas_ptst_key
, result
);
169 static void release_descriptor (CasDescriptor_t
*cd
)
171 per_thread_state_t
*ptst
= get_ptst ();
172 cd
->fc
= ptst
->next_descriptor
;
173 ptst
->next_descriptor
= cd
;
176 static int rc_delta_descriptor (CasDescriptor_t
*cd
,
179 int rc
, new_rc
= cd
->rc
;
182 while ( (new_rc
= CASIO (&(cd
->rc
), rc
, rc
+ delta
)) != rc
);
187 static void rc_up_descriptor (CasDescriptor_t
*cd
)
189 rc_delta_descriptor(cd
, 2);
193 static void rc_down_descriptor (CasDescriptor_t
*cd
)
195 int old_rc
, new_rc
, cur_rc
= cd
->rc
;
200 if ( new_rc
== 0 ) new_rc
= 1; else MB();
202 while ( (cur_rc
= CASIO(&(cd
->rc
), old_rc
, new_rc
)) != old_rc
);
205 release_descriptor(cd
);
208 static CasDescriptor_t
*new_descriptor (per_thread_state_t
*ptst
, int length
)
210 CasDescriptor_t
*result
;
213 CasDescriptor_t
**ptr
= &(ptst
->next_descriptor
);
215 while ( (result
!= NULL
) && (result
->length
!= length
) )
221 if ( result
== NULL
)
225 alloc_size
= sizeof (CasDescriptor_t
) +
226 ((length
- 1) * sizeof (CasEntry_t
));
228 result
= (CasDescriptor_t
*) ptst
->arena
;
229 ptst
->arena
= ((char *) (ptst
->arena
)) + alloc_size
;
231 if ( ptst
->arena
>= ptst
->arena_lim
)
233 new_arena(ptst
, ARENA_SIZE
);
234 result
= (CasDescriptor_t
*) ptst
->arena
;
235 ptst
->arena
= ((char *) (ptst
->arena
)) + alloc_size
;
238 for ( i
= 0; i
< num_threads
; i
++ )
239 result
->pt
[i
] = result
;
241 result
->length
= length
;
247 assert((result
->rc
& 1) == 1);
248 rc_delta_descriptor(result
, 1); /* clears lowest bit */
251 assert(result
->length
== length
);
256 static void *read_from_cd (void **ptr
, CasDescriptor_t
*cd
, bool_t get_old
)
263 for ( i
= 0; i
< n
; i
++ )
265 ce
= &(cd
->entries
[i
]);
266 if ( ce
->ptr
== ptr
)
267 return get_old
? ce
->old
: ce
->new;
274 static void *read_barrier_lite (void **ptr
)
282 m
= get_markedness(v
);
284 if ( m
== MARK_PTR_TO_CD
)
286 WEAK_DEP_ORDER_RMB();
287 cd
= get_unmarked_reference(v
);
289 rc_up_descriptor(cd
);
292 rc_down_descriptor(cd
);
293 goto retry_read_barrier
;
296 v
= read_from_cd(ptr
, cd
, (cd
->status
!= STATUS_SUCCEEDED
));
298 rc_down_descriptor(cd
);
300 else if ( m
== MARK_IN_PROGRESS
)
302 WEAK_DEP_ORDER_RMB();
303 cd
= *(CasDescriptor_t
**)get_unmarked_reference(v
);
305 rc_up_descriptor(cd
);
308 rc_down_descriptor(cd
);
309 goto retry_read_barrier
;
312 v
= read_from_cd(ptr
, cd
, (cd
->status
!= STATUS_SUCCEEDED
));
314 rc_down_descriptor(cd
);
320 static void clean_descriptor (CasDescriptor_t
*cd
)
327 assert(status
== STATUS_SUCCEEDED
|| status
== STATUS_FAILED
);
329 mcd
= get_marked_reference(cd
, MARK_PTR_TO_CD
);
331 if (status
== STATUS_SUCCEEDED
)
332 for ( i
= 0; i
< cd
->length
; i
++ )
333 CASPO (cd
->entries
[i
].ptr
, mcd
, cd
->entries
[i
].new);
335 for ( i
= 0; i
< cd
->length
; i
++ )
336 CASPO(cd
->entries
[i
].ptr
, mcd
, cd
->entries
[i
].old
);
339 static bool_t
mcas_fixup (void **ptr
,
345 m
= get_markedness(value_read
);
346 if ( m
== MARK_PTR_TO_CD
)
348 CasDescriptor_t
*helpee
;
349 helpee
= get_unmarked_reference(value_read
);
351 rc_up_descriptor(helpee
);
352 if ( *ptr
!= value_read
)
354 rc_down_descriptor(helpee
);
356 goto retry_mcas_fixup
;
361 rc_down_descriptor(helpee
);
365 else if ( m
== MARK_IN_PROGRESS
)
367 CasDescriptor_t
*other_cd
;
369 WEAK_DEP_ORDER_RMB();
370 other_cd
= *(CasDescriptor_t
**)get_unmarked_reference(value_read
);
372 rc_up_descriptor(other_cd
);
373 if ( *ptr
!= value_read
)
375 rc_down_descriptor(other_cd
);
377 goto retry_mcas_fixup
;
380 if ( other_cd
->status
== STATUS_IN_PROGRESS
)
383 get_marked_reference(other_cd
, MARK_PTR_TO_CD
));
387 read_from_cd(ptr
, other_cd
, TRUE
));
389 rc_down_descriptor (other_cd
);
396 static void *read_barrier (void **ptr
)
401 while ( mcas_fixup(ptr
, v
) );
406 static bool_t
mcas0 (per_thread_state_t
*ptst
, CasDescriptor_t
*cd
)
411 bool_t final_success
;
419 MB(); /* required for sequential consistency */
421 if ( cd
->status
== STATUS_SUCCEEDED
)
423 clean_descriptor(cd
);
424 final_success
= TRUE
;
427 else if ( cd
->status
== STATUS_FAILED
)
429 clean_descriptor(cd
);
430 final_success
= FALSE
;
434 /* Attempt to link in all entries in the descriptor. */
435 mcd
= get_marked_reference(cd
, MARK_PTR_TO_CD
);
436 dmcd
= get_marked_reference(&(cd
->pt
[ptst
->id
]), MARK_IN_PROGRESS
);
438 desired_status
= STATUS_SUCCEEDED
;
442 for (i
= 0; i
< n
; i
++)
444 CasEntry_t
*ce
= &(cd
->entries
[i
]);
445 void *value_read
= CASPO(ce
->ptr
, ce
->old
, dmcd
);
447 if ( (value_read
!= ce
->old
) &&
448 (value_read
!= dmcd
) &&
449 (value_read
!= mcd
) )
451 if ( mcas_fixup(ce
->ptr
, value_read
) )
453 desired_status
= STATUS_FAILED
;
457 RMB_NEAR_CAS(); /* ensure check of status occurs after CASPO. */
458 if ( cd
->status
!= STATUS_IN_PROGRESS
)
460 CASPO(ce
->ptr
, dmcd
, ce
->old
);
464 if ( value_read
!= mcd
)
466 value_read
= CASPO(ce
->ptr
, dmcd
, mcd
);
467 assert((value_read
== dmcd
) ||
468 (value_read
== mcd
) ||
469 (cd
->status
!= STATUS_IN_PROGRESS
));
474 * All your ptrs are belong to us (or we've been helped and
475 * already known to have succeeded or failed). Try to
476 * propagate our desired result into the status field.
480 * When changing to success, we must have all pointer ownerships
481 * globally visible. But we get this without a memory barrier, as
482 * 'desired_status' is dependent on the outcome of each CASPO
483 * to MARK_IN_PROGRESS.
485 * Architectures providing CAS natively all specify that the operation
486 * is _indivisible_. That is, the write will be done when the CAS
489 * Architectures providing LL/SC are even better: any following
490 * instruction in program order is control-dependent on the CAS, because
491 * CAS may be retried if SC fails. All we need is that SC gets to point
492 * of coherency before producing its result: even Alpha provides this!
494 WEAK_DEP_ORDER_WMB();
495 old_status
= CASIO((int *)&cd
->status
,
499 * This ensures final sequential consistency.
500 * Also ensures that the status update is visible before cleanup.
504 clean_descriptor(cd
);
505 final_success
= (cd
->status
== STATUS_SUCCEEDED
);
508 return final_success
;
512 void mcas_init (void)
514 int r
= pthread_key_create(&mcas_ptst_key
, NULL
);
515 if ( r
!= 0 ) abort();
518 /***********************************************************************/
521 void **ptr
, void *old
, void *new,
529 per_thread_state_t
*ptst
= get_ptst();
531 cd
= new_descriptor(ptst
, n
);
533 cd
->status
= STATUS_IN_PROGRESS
;
542 for ( i
= 1; i
< n
; i
++ )
545 ce
->ptr
= va_arg(ap
, void **);
546 ce
->old
= va_arg(ap
, void *);
547 ce
->new = va_arg(ap
, void *);
551 /* Insertion sort. Fail on non-unique pointers. */
552 for ( i
= 1, ce
= &cd
->entries
[1]; i
< n
; i
++, ce
++ )
555 CasEntry_t
*cei
, tmp
;
556 for ( j
= i
-1, cei
= ce
-1; j
>= 0; j
--, cei
-- )
557 if ( cei
->ptr
<= ce
->ptr
) break;
558 if ( cei
->ptr
== ce
->ptr
) goto out
;
562 memmove(cei
+1, cei
, (ce
-cei
)*sizeof(CasEntry_t
));
567 result
= mcas0(ptst
, cd
);
568 assert(cd
->status
!= STATUS_IN_PROGRESS
);
571 rc_down_descriptor (cd
);