1 /******************************************************************************
4 * Lock-free software transactional memory (STM).
6 * Copyright (c) 2002-2003, K A Fraser
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
12 * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution. Neither the name of the Keir Fraser
18 * nor the names of its contributors may be used to endorse or
19 * promote products derived from this software without specific
20 * prior written permission.
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 #include "portable_defns.h"
46 typedef struct stm_blk_st stm_blk
;
47 typedef struct stm_tx_entry_st stm_tx_entry
;
48 typedef struct stm_tx_st stm_tx
;
49 typedef struct stm_st stm
;
55 struct stm_tx_entry_st
{
68 stm_tx_entry
*alloc_ptr
, *check
;
69 int gc_data_id
, blk_size
; /* copied from 'stm' structure */
78 /* Private per-thread state. The array is indexed off ptst->id. */
80 void *arena
, *arena_lim
;
81 stm_tx
*next_descriptor
;
86 static priv_t priv_ptst
[MAX_THREADS
];
87 static int gc_blk_id
; /* Allocation id for block descriptors. */
88 static int do_padding
; /* Should all allocations be padded to a cache line? */
90 #define ALLOCATOR_SIZE(_s) (do_padding ? CACHE_LINE_SIZE : (_s))
92 #define ARENA_SIZE 40960
93 #define DESCRIPTOR_SIZE 4096
95 #define TXS_IN_PROGRESS 0
96 #define TXS_READ_PHASE 1
98 #define TXS_SUCCESSFUL 3
100 #define is_descriptor(_p) ((unsigned long)(_p) & 1)
101 #define ptr_to_descriptor(_p) ((stm_tx *)((unsigned long)(_p) & ~1))
102 #define make_marked_ptr(_p) ((void *)((unsigned long)(_p) | 1))
104 /* Is transaction read-only? */
105 #define read_only(_t) ((_t)->writes == NULL)
107 bool_t
commit_stm_tx(ptst_t
*ptst
, stm_tx
*t
);
109 static void new_arena (priv_t
*priv
, int size
)
111 priv
->arena
= malloc(size
);
112 if ( priv
->arena
== NULL
) abort();
113 priv
->arena_lim
= (((char *) priv
->arena
) + size
);
116 static void release_descriptor(ptst_t
*ptst
, stm_tx
*t
)
119 priv_t
*priv
= &priv_ptst
[ptst
->id
];
122 assert(t
->status
>= TXS_FAILED
);
124 t
->next_free
= priv
->next_descriptor
;
125 priv
->next_descriptor
= t
;
127 if ( t
->status
== TXS_SUCCESSFUL
)
129 for ( ent
= t
->writes
; ent
!= NULL
; ent
= ent
->next
)
131 gc_free(ptst
, ent
->old
, t
->gc_data_id
);
136 for ( ent
= t
->writes
; ent
!= NULL
; ent
= ent
->next
)
138 gc_unsafe_free(ptst
, ent
->new, t
->gc_data_id
);
143 static int rc_delta_descriptor(stm_tx
*t
, int delta
)
145 int rc
, new_rc
= t
->rc
;
148 while ( (new_rc
= CASIO (&t
->rc
, rc
, rc
+ delta
)) != rc
);
153 static void rc_up_descriptor(stm_tx
*t
)
155 rc_delta_descriptor(t
, 2);
159 static void rc_down_descriptor(ptst_t
*ptst
, stm_tx
*t
)
161 int old_rc
, new_rc
, cur_rc
= t
->rc
;
168 if ( new_rc
== 0 ) new_rc
= 1;
170 while ( (cur_rc
= CASIO (&t
->rc
, old_rc
, new_rc
)) != old_rc
);
172 if ( old_rc
== 2 ) release_descriptor(ptst
, t
);
175 static stm_tx
*new_descriptor(priv_t
*priv
)
179 t
= priv
->next_descriptor
;
183 priv
->next_descriptor
= t
->next_free
;
184 /* 'Unfree' descriptor, if it was previously freed. */
185 if ( (t
->rc
& 1) == 1 ) rc_delta_descriptor(t
, 1);
189 t
= (stm_tx
*) priv
->arena
;
190 priv
->arena
= ((char *) (priv
->arena
)) + DESCRIPTOR_SIZE
;
192 if ( priv
->arena
>= priv
->arena_lim
)
194 new_arena(priv
, ARENA_SIZE
);
195 t
= (stm_tx
*) priv
->arena
;
196 priv
->arena
= ((char *) (priv
->arena
)) + DESCRIPTOR_SIZE
;
207 static stm_tx_entry
*alloc_stm_tx_entry(stm_tx
*t
)
209 stm_tx_entry
*ent
= t
->alloc_ptr
++;
210 assert(((unsigned long)t
->alloc_ptr
- (unsigned long)t
) <=
216 static stm_tx_entry
**search_stm_tx_entry(stm_tx_entry
**pnext
, stm_blk
*b
)
218 stm_tx_entry
*next
= *pnext
;
220 while ( (next
!= NULL
) && ((unsigned long)next
->b
< (unsigned long)b
) )
230 static void *read_blk_data(ptst_t
*ptst
, stm_blk
*b
)
240 if ( !is_descriptor(data
) ) return data
;
242 t
= ptr_to_descriptor(data
);
244 if ( b
->data
!= data
)
246 rc_down_descriptor(ptst
, t
);
251 * Commit even when we could just read from descriptor, as it gets
252 * the descriptor out of the way in future.
254 commit_stm_tx(ptst
, t
);
259 stm
*new_stm(ptst_t
*ptst
, int blk_size
)
261 stm
*mem
= malloc(CACHE_LINE_SIZE
);
262 mem
->blk_size
= blk_size
;
263 mem
->gc_data_id
= gc_add_allocator(ALLOCATOR_SIZE(blk_size
));
268 void free_stm(ptst_t
*ptst
, stm
*mem
)
270 gc_remove_allocator(mem
->gc_data_id
);
275 stm_blk
*new_stm_blk(ptst_t
*ptst
, stm
*mem
)
278 b
= gc_alloc(ptst
, gc_blk_id
);
279 b
->data
= gc_alloc(ptst
, mem
->gc_data_id
);
284 void free_stm_blk(ptst_t
*ptst
, stm
*mem
, stm_blk
*b
)
287 * We have to use read_stm_blk(), as some doomed transaction may still
288 * install a marked pointer here while in its write phase.
290 void *data
= read_blk_data(ptst
, b
);
291 assert(!is_descriptor(data
));
292 gc_free(ptst
, data
, mem
->gc_data_id
);
293 gc_free(ptst
, b
, gc_blk_id
);
297 void *init_stm_blk(ptst_t
*ptst
, stm
*mem
, stm_blk
*b
)
303 int sizeof_stm_blk(ptst_t
*ptst
, stm
*mem
, stm_blk
*b
)
305 return mem
->blk_size
;
309 stm_tx
*new_stm_tx(ptst_t
*ptst
, stm
*mem
, sigjmp_buf
*penv
)
311 priv_t
*priv
= &priv_ptst
[ptst
->id
];
314 if ( priv
->cur_tx
!= NULL
) goto nesting
;
315 t
= new_descriptor(priv
);
316 t
->status
= TXS_IN_PROGRESS
;
317 t
->reads
= t
->writes
= NULL
;
318 t
->alloc_ptr
= t
->check
= (stm_tx_entry
*)(t
+ 1);
319 t
->gc_data_id
= mem
->gc_data_id
;
320 t
->blk_size
= mem
->blk_size
;
326 fprintf(stderr
, "No nesting of transactions is allowed\n");
331 bool_t
commit_stm_tx(ptst_t
*ptst
, stm_tx
*t
)
333 int desired_status
, other_status
, old_status
, new_status
, final_status
;
334 void *marked_tx
, *data
;
336 stm_tx_entry
**other_pent
, *ent
;
337 priv_t
*priv
= &priv_ptst
[ptst
->id
];
339 if ( priv
->cur_tx
== t
) priv
->cur_tx
= NULL
;
341 marked_tx
= make_marked_ptr(t
);
342 desired_status
= TXS_FAILED
;
345 * PHASE 1: WRITE-CHECKING PHASE.
347 if ( (t
->status
== TXS_IN_PROGRESS
) && ((ent
= t
->writes
) != NULL
) )
349 /* Others should see up-to-date contents of descriptor. */
355 data
= CASPO(&ent
->b
->data
, ent
->old
, marked_tx
);
356 if ( (data
== ent
->old
) || (data
== marked_tx
) ) break;
358 if ( !is_descriptor(data
) ) goto fail
;
360 other
= ptr_to_descriptor(data
);
361 rc_up_descriptor(other
);
362 if ( ent
->b
->data
!= data
)
364 rc_down_descriptor(ptst
, other
);
368 commit_stm_tx(ptst
, other
);
371 while ( (ent
= ent
->next
) != NULL
);
374 /* On success we linearise at this point. */
375 WEAK_DEP_ORDER_WMB();
378 * PHASE 2: READ-CHECKING PHASE.
380 if ( (t
->status
<= TXS_READ_PHASE
) && (t
->reads
!= NULL
) )
384 CASIO(&t
->status
, TXS_IN_PROGRESS
, TXS_READ_PHASE
);
389 for ( ent
= t
->reads
; ent
!= NULL
; ent
= ent
->next
)
394 if ( data
== ent
->old
) break;
396 /* Someone else made progress at our expense. */
397 if ( !is_descriptor(data
) ) goto fail
;
398 other
= ptr_to_descriptor(data
);
401 * Descriptor always belongs to a contending operation.
402 * Before continuing, we must increment the reference count.
405 rc_up_descriptor(other
);
406 if ( ent
->b
->data
!= data
)
408 rc_down_descriptor(ptst
, other
);
413 * What we do now depends on the status of the contending
414 * operation. This is easy for any status other than
415 * TXS_READ_PHASE -- usually we just check against the
416 * appropriate 'old' or 'new' data pointer. Transactions
417 * in their read-checking phase must be aborted, or helped
418 * to completion, depending on relative ordering of the
419 * transaction descriptors.
421 while ( (other_status
= other
->status
) == TXS_READ_PHASE
)
425 CASIO(&other
->status
, TXS_READ_PHASE
, TXS_FAILED
);
429 rc_up_descriptor(other
);
430 commit_stm_tx(ptst
, other
);
434 other_pent
= search_stm_tx_entry(&other
->writes
, ent
->b
);
435 assert(*other_pent
!= NULL
);
436 data
= (other_status
== TXS_SUCCESSFUL
) ?
437 (*other_pent
)->new : (*other_pent
)->old
;
438 rc_down_descriptor(ptst
, other
);
439 if ( data
!= ent
->old
) goto fail
;
446 desired_status
= TXS_SUCCESSFUL
;
451 /* A very fast path: we can immediately reuse the descriptor. */
452 t
->next_free
= priv
->next_descriptor
;
453 priv
->next_descriptor
= t
;
454 return desired_status
== TXS_SUCCESSFUL
;
457 /* Loop until we push the status to a "final decision" value. */
458 old_status
= t
->status
;
459 while ( old_status
<= TXS_READ_PHASE
)
461 new_status
= CASIO(&t
->status
, old_status
, desired_status
);
462 if ( old_status
== new_status
) break;
463 old_status
= new_status
;
470 final_status
= t
->status
;
471 for ( ent
= t
->writes
; ent
!= NULL
; ent
= ent
->next
)
473 /* If CAS fails, someone did it for us already. */
474 (void)CASPO(&ent
->b
->data
, marked_tx
,
475 (final_status
== TXS_SUCCESSFUL
) ? ent
->new: ent
->old
);
478 rc_down_descriptor(ptst
, t
);
479 return final_status
== TXS_SUCCESSFUL
;
483 bool_t
validate_stm_tx(ptst_t
*ptst
, stm_tx
*t
)
489 for ( ent
= t
->reads
; ent
!= NULL
; ent
= ent
->next
)
491 if ( read_blk_data(ptst
, ent
->b
) != ent
->old
) goto fail
;
494 for ( ent
= t
->writes
; ent
!= NULL
; ent
= ent
->next
)
496 if ( read_blk_data(ptst
, ent
->b
) != ent
->old
) goto fail
;
502 t
->status
= TXS_FAILED
;
507 void abort_stm_tx(ptst_t
*ptst
, stm_tx
*t
)
509 t
->status
= TXS_FAILED
;
513 void *read_stm_blk(ptst_t
*ptst
, stm_tx
*t
, stm_blk
*b
)
515 stm_tx_entry
**pent
, *ent
;
519 pent
= search_stm_tx_entry(&t
->writes
, b
);
521 if ( (ent
!= NULL
) && (ent
->b
== b
) ) goto found
;
523 pent
= search_stm_tx_entry(&t
->reads
, b
);
525 if ( (ent
!= NULL
) && (ent
->b
== b
) ) goto found
;
527 ent
= alloc_stm_tx_entry(t
);
529 ent
->old
= read_blk_data(ptst
, b
);
534 assert(!is_descriptor(ent
->new));
540 if ( read_blk_data(ptst
, ent
->b
) != ent
->old
) goto fail
;
541 if ( ++t
->check
== t
->alloc_ptr
) t
->check
= (stm_tx_entry
*)(t
+ 1);
546 abort_stm_tx(ptst
, t
);
547 commit_stm_tx(ptst
, t
);
548 siglongjmp(*penv
, 0);
554 void *write_stm_blk(ptst_t
*ptst
, stm_tx
*t
, stm_blk
*b
)
556 stm_tx_entry
**r_pent
, **w_pent
, *ent
;
560 w_pent
= search_stm_tx_entry(&t
->writes
, b
);
562 if ( (ent
!= NULL
) && (ent
->b
== b
) ) goto found
;
564 r_pent
= search_stm_tx_entry(&t
->reads
, b
);
566 if ( (ent
!= NULL
) && (ent
->b
== b
) )
572 ent
= alloc_stm_tx_entry(t
);
574 ent
->old
= read_blk_data(ptst
, b
);
577 ent
->new = gc_alloc(ptst
, t
->gc_data_id
);
580 memcpy(ent
->new, ent
->old
, t
->blk_size
);
582 assert(!is_descriptor(ent
->old
));
583 assert(!is_descriptor(ent
->new));
589 if ( read_blk_data(ptst
, ent
->b
) != ent
->old
) goto fail
;
590 if ( ++t
->check
== t
->alloc_ptr
) t
->check
= (stm_tx_entry
*)(t
+ 1);
595 abort_stm_tx(ptst
, t
);
596 commit_stm_tx(ptst
, t
);
597 siglongjmp(*penv
, 0);
603 void remove_from_tx(ptst_t
*ptst
, stm_tx
*t
, stm_blk
*b
)
605 stm_tx_entry
**pent
, *ent
;
608 pent
= search_stm_tx_entry(&t
->writes
, b
);
610 if ( (ent
!= NULL
) && (ent
->b
== b
) )
614 assert(!is_descriptor(data
));
615 gc_free(ptst
, data
, t
->gc_data_id
);
619 pent
= search_stm_tx_entry(&t
->reads
, b
);
621 if ( (ent
!= NULL
) && (ent
->b
== b
) )
628 static void handle_fault(int sig
)
633 ptst
= critical_enter();
634 t
= priv_ptst
[ptst
->id
].cur_tx
;
635 if ( (t
!= NULL
) && !validate_stm_tx(ptst
, t
) )
637 sigjmp_buf
*penv
= t
->penv
;
638 commit_stm_tx(ptst
, t
);
640 siglongjmp(*penv
, 0);
644 fprintf(stderr
, "Error: unhandleable SIGSEGV!\n");
649 void _init_stm_subsystem(int pad_data
)
651 struct sigaction act
;
653 do_padding
= pad_data
;
654 gc_blk_id
= gc_add_allocator(ALLOCATOR_SIZE(sizeof(stm_blk
)));
655 memset(priv_ptst
, 0, sizeof(priv_ptst
));
657 act
.sa_handler
= handle_fault
;
658 sigemptyset(&act
.sa_mask
);
660 sigaction(SIGSEGV
, &act
, NULL
);