egra: even more comments
[iv.d.git] / follin / rwlock.d
blob116b1fce44ad512c3957bfb25641676ec77173ad
1 /*-
2 * Public Domain 2014-2016 MongoDB, Inc.
3 * Public Domain 2008-2014 WiredTiger, Inc.
5 * This is free and unencumbered software released into the public domain.
7 * Anyone is free to copy, modify, publish, use, compile, sell, or
8 * distribute this software, either in source code form or as a compiled
9 * binary, for any purpose, commercial or non-commercial, and by any
10 * means.
12 * In jurisdictions that recognize copyright laws, the author or authors
13 * of this software dedicate any and all copyright interest in the
14 * software to the public domain. We make this dedication for the benefit
15 * of the public at large and to the detriment of our heirs and
16 * successors. We intend this dedication to be an overt act of
17 * relinquishment in perpetuity of all present and future rights to this
18 * software under copyright law.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
24 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26 * OTHER DEALINGS IN THE SOFTWARE.
28 module iv.follin.rwlock /*is aliced*/;
29 import iv.alice;
32 * Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst:
33 * http://locklessinc.com/articles/locks/
35 * Dr. Fuerst further credits:
36 * There exists a form of the ticket lock that is designed for read-write
37 * locks. An example written in assembly was posted to the Linux kernel mailing
38 * list in 2002 by David Howells from RedHat. This was a highly optimized
39 * version of a read-write ticket lock developed at IBM in the early 90's by
40 * Joseph Seigh. Note that a similar (but not identical) algorithm was published
41 * by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable
42 * Reader-Writer Synchronization for Shared-Memory Multiprocessors".
44 * The following is an explanation of this code. First, the underlying lock
45 * structure.
47 * struct {
48 * uint16_t writers; Now serving for writers
49 * uint16_t readers; Now serving for readers
50 * uint16_t users; Next available ticket number
51 * uint16_t __notused; Padding
52 * }
54 * First, imagine a store's 'take a number' ticket algorithm. A customer takes
55 * a unique ticket number and customers are served in ticket order. In the data
56 * structure, 'writers' is the next writer to be served, 'readers' is the next
57 * reader to be served, and 'users' is the next available ticket number.
59 * Next, consider exclusive (write) locks. The 'now serving' number for writers
60 * is 'writers'. To lock, 'take a number' and wait until that number is being
61 * served; more specifically, atomically copy and increment the current value of
62 * 'users', and then wait until 'writers' equals that copied number.
64 * Shared (read) locks are similar. Like writers, readers atomically get the
65 * next number available. However, instead of waiting for 'writers' to equal
66 * their number, they wait for 'readers' to equal their number.
68 * This has the effect of queuing lock requests in the order they arrive
69 * (incidentally avoiding starvation).
71 * Each lock/unlock pair requires incrementing both 'readers' and 'writers'.
72 * In the case of a reader, the 'readers' increment happens when the reader
73 * acquires the lock (to allow read-lock sharing), and the 'writers' increment
74 * happens when the reader releases the lock. In the case of a writer, both
75 * 'readers' and 'writers' are incremented when the writer releases the lock.
77 * For example, consider the following read (R) and write (W) lock requests:
79 * writers readers users
80 * 0 0 0
81 * R: ticket 0, readers match OK 0 1 1
82 * R: ticket 1, readers match OK 0 2 2
83 * R: ticket 2, readers match OK 0 3 3
84 * W: ticket 3, writers no match block 0 3 4
85 * R: ticket 2, unlock 1 3 4
86 * R: ticket 0, unlock 2 3 4
87 * R: ticket 1, unlock 3 3 4
88 * W: ticket 3, writers match OK 3 3 4
90 * Note the writer blocks until 'writers' equals its ticket number and it does
91 * not matter if readers unlock in order or not.
93 * Readers or writers entering the system after the write lock is queued block,
94 * and the next ticket holder (reader or writer) will unblock when the writer
95 * unlocks. An example, continuing from the last line of the above example:
97 * writers readers users
98 * W: ticket 3, writers match OK 3 3 4
99 * R: ticket 4, readers no match block 3 3 5
100 * R: ticket 5, readers no match block 3 3 6
101 * W: ticket 6, writers no match block 3 3 7
102 * W: ticket 3, unlock 4 4 7
103 * R: ticket 4, readers match OK 4 5 7
104 * R: ticket 5, readers match OK 4 6 7
106 * The 'users' field is a 2-byte value so the available ticket number wraps at
107 * 64K requests. If a thread's lock request is not granted until the 'users'
108 * field cycles and the same ticket is taken by another thread, we could grant
109 * a lock to two separate threads at the same time, and bad things happen: two
110 * writer threads or a reader thread and a writer thread would run in parallel,
111 * and lock waiters could be skipped if the unlocks race. This is unlikely, it
112 * only happens if a lock request is blocked by 64K other requests. The fix is
113 * to grow the lock structure fields, but the largest atomic instruction we have
114 * is 8 bytes, the structure has no room to grow.
118 * !!!
119 * Don't modify this structure without understanding the read/write locking
120 * functions.
122 /* Read/write lock */
123 align(1) shared union TflRWLock {
124 nothrow @trusted @nogc:
125 align(1):
126 ulong u;
127 struct {
128 align(1):
129 uint wr; /* Writers and readers */
131 struct {
132 align(1):
133 ushort writers; /* Now serving for writers */
134 ushort readers; /* Now serving for readers */
135 ushort users; /* Next available ticket number */
136 //ushort __notused; /* Padding */
139 //@disable this (this);
141 // try to get a shared lock, fail immediately if unavailable
142 bool tryReadLock () {
143 import core.atomic;
144 TflRWLock newl = void, old = void;
145 newl = old = this;
147 * This read lock can only be granted if the lock was last granted to
148 * a reader and there are no readers or writers blocked on the lock,
149 * that is, if this thread's ticket would be the next ticket granted.
150 * Do the cheap test to see if this can possibly succeed (and confirm
151 * the lock is in the correct state to grant this read lock).
153 if (old.readers != old.users) return false;
155 * The replacement lock value is a result of allocating a newl ticket and
156 * incrementing the reader value to match it.
158 newl.readers = newl.users = cast(ushort)(old.users+1);
159 return (cas(&u, old.u, newl.u) ? true : false);
162 // et a shared lock
163 void readLock () {
164 import core.atomic;
166 * Possibly wrap: if we have more than 64K lockers waiting, the ticket
167 * value will wrap and two lockers will simultaneously be granted the
168 * lock.
170 ushort ticket = cast(ushort)(atomicOp!"+="(users, 1)-1);
171 for (int pause_cnt = 0; ticket != atomicLoad(readers); ) {
173 * We failed to get the lock; pause before retrying and if we've
174 * paused enough, sleep so we don't burn CPU to no purpose. This
175 * situation happens if there are more threads than cores in the
176 * system and we're thrashing on shared resources.
178 * Don't sleep long when waiting on a read lock, hopefully we're
179 * waiting on another read thread to increment the reader count.
181 if (++pause_cnt < 1000) {
182 asm nothrow @safe @nogc {
183 db 0xf3,0x90; //pause;
185 } else {
186 // one second is 1_000_000_000 nanoseconds or 1_000_000 microseconds or 1_000 milliseconds
187 import core.sys.posix.signal : timespec;
188 import core.sys.posix.time : nanosleep;
189 timespec ts = void;
190 ts.tv_sec = 0;
191 ts.tv_nsec = 10*1000; // micro to nano
192 nanosleep(&ts, null); // idc how much time was passed
196 * We're the only writer of the readers field, so the update does not
197 * need to be atomic. But stupid DMD insists. Alas.
199 //++readers;
200 atomicOp!"+="(readers, 1);
203 // release a shared lock
204 void readUnlock () {
205 import core.atomic;
207 * Increment the writers value (other readers are doing the same, make
208 * sure we don't race).
210 cast(void)atomicOp!"+="(writers, 1);
213 // try to get an exclusive lock, fail immediately if unavailable
214 bool tryWriteLock () {
215 import core.atomic;
216 TflRWLock newl = void, old = void;
217 old = newl = this;
219 * This write lock can only be granted if the lock was last granted to
220 * a writer and there are no readers or writers blocked on the lock,
221 * that is, if this thread's ticket would be the next ticket granted.
222 * Do the cheap test to see if this can possibly succeed (and confirm
223 * the lock is in the correct state to grant this write lock).
225 if (old.writers != old.users) return false;
226 /* The replacement lock value is a result of allocating a newl ticket. */
227 //++newl.users;
228 atomicOp!"+="(newl.users, 1); // Stupid DMD insists. Alas.
229 return (cas(&u, old.u, newl.u) ? true : false);
232 // wait to get an exclusive lock
233 void writeLock () {
234 import core.atomic;
236 * Possibly wrap: if we have more than 64K lockers waiting, the ticket
237 * value will wrap and two lockers will simultaneously be granted the
238 * lock.
240 ushort ticket = cast(ushort)(atomicOp!"+="(users, 1)-1);
241 for (int pause_cnt = 0; ticket != atomicLoad(writers); ) {
243 * We failed to get the lock; pause before retrying and if we've
244 * paused enough, sleep so we don't burn CPU to no purpose. This
245 * situation happens if there are more threads than cores in the
246 * system and we're thrashing on shared resources.
248 if (++pause_cnt < 1000) {
249 asm nothrow @safe @nogc {
250 db 0xf3,0x90; //pause;
252 } else {
253 // one second is 1_000_000_000 nanoseconds or 1_000_000 microseconds or 1_000 milliseconds
254 import core.sys.posix.signal : timespec;
255 import core.sys.posix.time : nanosleep;
256 timespec ts = void;
257 ts.tv_sec = 0;
258 ts.tv_nsec = 10*1000; // micro to nano
259 nanosleep(&ts, null); // idc how much time was passed
264 // release an exclusive lock
265 void writeUnlock () {
266 import core.atomic;
267 TflRWLock copy = this;
269 * We're the only writer of the writers/readers fields, so the update
270 * does not need to be atomic; we have to update both values at the
271 * same time though, otherwise we'd potentially race with the thread
272 * next granted the lock.
274 * Use a memory barrier to ensure the compiler doesn't mess with these
275 * instructions and rework the code in a way that avoids the update as
276 * a unit.
278 atomicFence();
279 //++copy.writers;
280 //++copy.readers;
281 // Stupid DMD insists. Alas.
282 atomicOp!"+="(copy.writers, 1);
283 atomicOp!"+="(copy.readers, 1);
284 wr = copy.wr;