2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* Copyright (C) 1994 Cazamar Systems, Inc. */
12 #include <afs/param.h>
21 #include "osi_internal.h"
23 /* Locking hierarchy for these critical sections:
25 * 1. lock osi_sleepFDCS
26 * 2. lock osi_critSec[i]
27 * 3. lock osi_sleepInfoAllocCS
30 /* file descriptor for iterating over sleeping threads */
31 osi_fdOps_t osi_sleepFDOps
= {
38 * Thread-local storage for sleep Info structures
42 /* critical section serializing contents of all sleep FDs, so that
43 * concurrent GetInfo calls don't damage each other if applied
46 CRITICAL_SECTION osi_sleepFDCS
;
48 /* critical regions used for SleepSched to guarantee atomicity.
49 * protects all sleep info structures while they're in the
52 static CRITICAL_SECTION osi_critSec
[OSI_SLEEPHASHSIZE
];
54 /* the sleep info structure hash table.
55 * all active entries are in here. In addition, deleted entries
56 * may be present, referenced by file descriptors from remote
57 * debuggers; these will have OSI_SLEEPINFO_DELETED set and
60 static osi_sleepInfo_t
*osi_sleepers
[OSI_SLEEPHASHSIZE
];
61 static osi_sleepInfo_t
*osi_sleepersEnd
[OSI_SLEEPHASHSIZE
];
63 /* allocate space for lock operations */
64 osi_lockOps_t
*osi_lockOps
[OSI_NLOCKTYPES
];
66 /* some global statistics */
67 long osi_totalSleeps
= 0;
69 /* critical section protecting sleepInfoFreeListp and all sleep entries in
72 CRITICAL_SECTION osi_sleepInfoAllocCS
;
74 /* sleep entry free list */
75 osi_sleepInfo_t
*osi_sleepInfoFreeListp
;
78 unsigned long osi_bootTime
;
80 /* count of free entries in free list, protected by osi_sleepInfoAllocCS */
81 long osi_sleepInfoCount
=0;
83 /* count of # of allocates of sleep info structures */
84 long osi_sleepInfoAllocs
= 0;
86 /* the sleep bucket lock must be held.
87 * Releases the reference count and frees the structure if the item has
90 void osi_ReleaseSleepInfo(osi_sleepInfo_t
*sp
)
92 if (InterlockedDecrement(&sp
->refCount
) == 0 && (sp
->states
& OSI_SLEEPINFO_DELETED
))
93 osi_FreeSleepInfo(sp
);
96 /* must be called with sleep bucket locked.
97 * Frees the structure if it has a 0 reference count (and removes it
98 * from the hash bucket). Otherwise, we simply mark the item
99 * for deleting when the ref count hits zero.
101 void osi_FreeSleepInfo(osi_sleepInfo_t
*sp
)
105 if (sp
->refCount
> 0) {
106 TlsSetValue(osi_SleepSlot
, NULL
); /* don't reuse me */
107 _InterlockedOr(&sp
->states
, OSI_SLEEPINFO_DELETED
);
111 /* remove from hash if still there */
112 if (sp
->states
& OSI_SLEEPINFO_INHASH
) {
113 idx
= osi_SLEEPHASH(sp
->value
);
114 osi_QRemoveHT((osi_queue_t
**) &osi_sleepers
[idx
], (osi_queue_t
**) &osi_sleepersEnd
[idx
], &sp
->q
);
115 _InterlockedAnd(&sp
->states
, ~OSI_SLEEPINFO_INHASH
);
118 if (sp
->states
& OSI_SLEEPINFO_DELETED
) {
119 EnterCriticalSection(&osi_sleepInfoAllocCS
);
120 sp
->q
.nextp
= (osi_queue_t
*) osi_sleepInfoFreeListp
;
121 osi_sleepInfoFreeListp
= sp
;
122 _InterlockedAnd(&sp
->states
, ~OSI_SLEEPINFO_DELETED
);
123 InterlockedIncrement(&osi_sleepInfoCount
);
124 LeaveCriticalSection(&osi_sleepInfoAllocCS
);
128 /* allocate a new sleep structure from the free list */
129 osi_sleepInfo_t
*osi_AllocSleepInfo()
133 EnterCriticalSection(&osi_sleepInfoAllocCS
);
134 if (!(sp
= osi_sleepInfoFreeListp
)) {
135 sp
= (osi_sleepInfo_t
*) malloc(sizeof(osi_sleepInfo_t
));
136 memset(sp
, 0, sizeof(*sp
));
137 sp
->sema
= CreateSemaphore(NULL
, 0, 65536, NULL
);
138 InterlockedIncrement(&osi_sleepInfoAllocs
);
141 osi_sleepInfoFreeListp
= (osi_sleepInfo_t
*) sp
->q
.nextp
;
142 InterlockedDecrement(&osi_sleepInfoCount
);
144 sp
->tid
= GetCurrentThreadId();
145 LeaveCriticalSection(&osi_sleepInfoAllocCS
);
150 int osi_Once(osi_once_t
*argp
)
154 while ((i
=InterlockedExchange(&argp
->atomic
, 1)) != 0) {
158 if (argp
->done
== 0) {
163 /* otherwise we've already been initialized, so clear lock and return */
164 InterlockedExchange(&argp
->atomic
, 0);
168 void osi_EndOnce(osi_once_t
*argp
)
170 InterlockedExchange(&argp
->atomic
, 0);
173 int osi_TestOnce(osi_once_t
*argp
)
178 while ((i
=InterlockedExchange(&argp
->atomic
, 1)) != 0) {
182 localDone
= argp
->done
;
185 InterlockedExchange(&argp
->atomic
, 0);
187 return (localDone
? 0 : 1);
190 /* Initialize the package, should be called while single-threaded.
191 * Can be safely called multiple times.
192 * Must be called before any osi package calls.
197 static osi_once_t once
;
198 unsigned long remainder
; /* for division output */
201 osi_hyper_t bootTime
;
203 /* check to see if already initialized; if so, claim success */
204 if (!osi_Once(&once
))
207 /* setup boot time values */
208 GetSystemTimeAsFileTime(&fileTime
);
210 /* change the base of the time so it won't be negative for a long time */
211 fileTime
.dwHighDateTime
-= 28000000;
213 bootTime
.HighPart
= fileTime
.dwHighDateTime
;
214 bootTime
.LowPart
= fileTime
.dwLowDateTime
;
215 /* now, bootTime is in 100 nanosecond units, and we'd really rather
216 * have it in 1 second units, units 10,000,000 times bigger.
219 bootTime
= ExtendedLargeIntegerDivide(bootTime
, 10000000, &remainder
);
220 osi_bootTime
= bootTime
.LowPart
;
222 /* initialize thread-local storage for sleep Info structures */
223 osi_SleepSlot
= TlsAlloc();
228 /* initialize critical regions and semaphores */
229 for(i
=0;i
<OSI_SLEEPHASHSIZE
; i
++) {
230 InitializeCriticalSection(&osi_critSec
[i
]);
231 osi_sleepers
[i
] = NULL
;
232 osi_sleepersEnd
[i
] = NULL
;
236 InitializeCriticalSection(&osi_sleepInfoAllocCS
);
238 /* initialize cookie system */
239 InitializeCriticalSection(&osi_sleepFDCS
);
241 /* register the FD type */
242 typep
= osi_RegisterFDType("sleep", &osi_sleepFDOps
, NULL
);
244 /* add formatting info */
245 osi_AddFDFormatInfo(typep
, OSI_DBRPC_REGIONINT
, 0,
246 "Sleep address", OSI_DBRPC_HEX
);
247 osi_AddFDFormatInfo(typep
, OSI_DBRPC_REGIONINT
, 1,
249 osi_AddFDFormatInfo(typep
, OSI_DBRPC_REGIONINT
, 2,
250 "States", OSI_DBRPC_HEX
);
262 void osi_TWait(osi_turnstile_t
*turnp
, int waitFor
, void *patchp
, DWORD
*tidp
, CRITICAL_SECTION
*releasep
)
264 osi_TWaitExt(turnp
, waitFor
, patchp
, tidp
, releasep
, TRUE
);
267 void osi_TWaitExt(osi_turnstile_t
*turnp
, int waitFor
, void *patchp
, DWORD
*tidp
, CRITICAL_SECTION
*releasep
, int prepend
)
272 sp
= TlsGetValue(osi_SleepSlot
);
274 sp
= osi_AllocSleepInfo();
275 TlsSetValue(osi_SleepSlot
, sp
);
278 _InterlockedAnd(&sp
->states
, 0);
280 sp
->waitFor
= waitFor
;
281 sp
->value
= (LONG_PTR
) patchp
;
285 osi_QAddH((osi_queue_t
**) &turnp
->firstp
, (osi_queue_t
**) &turnp
->lastp
, &sp
->q
);
287 osi_QAddT((osi_queue_t
**) &turnp
->firstp
, (osi_queue_t
**) &turnp
->lastp
, &sp
->q
);
288 LeaveCriticalSection(releasep
);
290 /* now wait for the signal */
293 code
= WaitForSingleObject(sp
->sema
,
294 /* timeout */ INFINITE
);
296 /* if the reason for the wakeup was that we were signalled,
297 * break out, otherwise try again, since the semaphore count is
298 * decreased only when we get WAIT_OBJECT_0 back.
300 if (code
== WAIT_OBJECT_0
) break;
301 } /* while we're waiting */
303 /* we're the only one who should be looking at or changing this
304 * structure after it gets signalled. Sema sp->sema isn't signalled
305 * any longer after we're back from WaitForSingleObject, so we can
306 * free this element directly.
308 osi_assert(sp
->states
& OSI_SLEEPINFO_SIGNALLED
);
310 osi_FreeSleepInfo(sp
);
312 /* reobtain, since caller commonly needs it */
313 EnterCriticalSection(releasep
);
316 /* must be called with a critical section held that guards the turnstile
317 * structure. We remove the sleepInfo structure from the queue so we don't
318 * wake the guy again, but we don't free it because we're still using the
319 * semaphore until the guy waiting wakes up.
321 void osi_TSignal(osi_turnstile_t
*turnp
)
329 osi_QRemoveHT((osi_queue_t
**) &turnp
->firstp
, (osi_queue_t
**) &turnp
->lastp
, &sp
->q
);
330 _InterlockedOr(&sp
->states
, OSI_SLEEPINFO_SIGNALLED
);
331 ReleaseSemaphore(sp
->sema
, 1, NULL
);
334 /* like TSignal, only wake *everyone* */
335 void osi_TBroadcast(osi_turnstile_t
*turnp
)
339 while(sp
= turnp
->lastp
) {
340 osi_QRemoveHT((osi_queue_t
**) &turnp
->firstp
, (osi_queue_t
**) &turnp
->lastp
, &sp
->q
);
341 _InterlockedOr(&sp
->states
, OSI_SLEEPINFO_SIGNALLED
);
342 ReleaseSemaphore(sp
->sema
, 1, NULL
);
343 } /* while someone's still asleep */
346 /* special turnstile signal for mutexes and locks. Wakes up only those who
347 * will really be able to lock the lock. The assumption is that everyone who
348 * already can use the lock has already been woken (and is thus not in the
349 * turnstile any longer).
351 * The stillHaveReaders parm is set to 1 if this is a convert from write to read,
352 * indicating that there is still at least one reader, and we should only wake
353 * up other readers. We use it in a tricky manner: we just pretent we already woke
354 * a reader, and that is sufficient to prevent us from waking a writer.
356 * The crit sec. csp is released before the threads are woken, but after they
357 * are removed from the turnstile. It helps ensure that we won't have a spurious
358 * context swap back to us if the release performs a context swap for some reason.
360 void osi_TSignalForMLs(osi_turnstile_t
*turnp
, int stillHaveReaders
, CRITICAL_SECTION
*csp
)
362 osi_sleepInfo_t
*tsp
; /* a temp */
363 osi_sleepInfo_t
*nsp
; /* a temp */
364 osi_queue_t
*wakeupListp
; /* list of dudes to wakeup after dropping lock */
369 wokeReader
= stillHaveReaders
;
371 while(tsp
= turnp
->lastp
) {
372 /* look at each sleepInfo until we find someone we're not supposed to
375 if (tsp
->waitFor
& OSI_SLEEPINFO_W4WRITE
) {
382 /* otherwise, we will wake this guy. For now, remove from this list
383 * and move to private one, so we can do the wakeup after releasing
386 osi_QRemoveHT((osi_queue_t
**) &turnp
->firstp
, (osi_queue_t
**) &turnp
->lastp
, &tsp
->q
);
388 /* do the patching required for lock obtaining */
389 if (tsp
->waitFor
& OSI_SLEEPINFO_W4WRITE
) {
390 cp
= (void *) tsp
->value
;
391 (*cp
) |= OSI_LOCKFLAG_EXCL
;
392 tsp
->tidp
[0] = tsp
->tid
;
394 else if (tsp
->waitFor
& OSI_SLEEPINFO_W4READ
) {
395 sp
= (void *) tsp
->value
;
397 if ((*sp
) < OSI_RWLOCK_THREADS
)
398 tsp
->tidp
[*sp
] = tsp
->tid
;
403 /* and add to our own list */
404 tsp
->q
.nextp
= wakeupListp
;
405 wakeupListp
= &tsp
->q
;
407 /* now if we woke a writer, we're done, since it is pointless
408 * to wake more than one writer.
414 /* hit end, or found someone we're not supposed to wakeup */
416 LeaveCriticalSection(csp
);
418 /* finally, wakeup everyone we found. Don't free things since the sleeper
419 * will free the sleepInfo structure.
421 for(tsp
= (osi_sleepInfo_t
*) wakeupListp
; tsp
; tsp
= nsp
) {
422 /* pull this out first, since *tsp *could* get freed immediately
423 * after the ReleaseSemaphore, if a context swap occurs.
425 nsp
= (osi_sleepInfo_t
*) tsp
->q
.nextp
;
426 _InterlockedOr(&tsp
->states
, OSI_SLEEPINFO_SIGNALLED
);
427 ReleaseSemaphore(tsp
->sema
, 1, NULL
);
431 /* utility function to atomically (with respect to WakeSched)
432 * release an atomic counter spin lock and sleep on an
434 * Called with no locks held.
436 void osi_SleepSpin(LONG_PTR sleepValue
, CRITICAL_SECTION
*releasep
)
440 CRITICAL_SECTION
*csp
;
442 sp
= TlsGetValue(osi_SleepSlot
);
444 sp
= osi_AllocSleepInfo();
445 TlsSetValue(osi_SleepSlot
, sp
);
448 _InterlockedAnd(&sp
->states
, 0);
451 sp
->value
= sleepValue
;
453 sp
->idx
= osi_SLEEPHASH(sleepValue
);
454 csp
= &osi_critSec
[sp
->idx
];
455 EnterCriticalSection(csp
);
456 osi_QAddT((osi_queue_t
**) &osi_sleepers
[sp
->idx
], (osi_queue_t
**) &osi_sleepersEnd
[sp
->idx
], &sp
->q
);
457 _InterlockedOr(&sp
->states
, OSI_SLEEPINFO_INHASH
);
458 LeaveCriticalSection(csp
);
459 LeaveCriticalSection(releasep
);
460 InterlockedIncrement(&osi_totalSleeps
); /* stats */
463 code
= WaitForSingleObject(sp
->sema
,
464 /* timeout */ INFINITE
);
466 /* if the reason for the wakeup was that we were signalled,
467 * break out, otherwise try again, since the semaphore count is
468 * decreased only when we get WAIT_OBJECT_0 back.
470 if (code
== WAIT_OBJECT_0
) break;
474 EnterCriticalSection(csp
);
476 /* must be signalled */
477 osi_assert(sp
->states
& OSI_SLEEPINFO_SIGNALLED
);
479 /* free the sleep structure, must be done under bucket lock
480 * so that we can check reference count and serialize with
481 * those who change it.
483 osi_FreeSleepInfo(sp
);
485 LeaveCriticalSection(csp
);
488 /* utility function to wakeup someone sleeping in SleepSched */
489 void osi_WakeupSpin(LONG_PTR sleepValue
)
492 CRITICAL_SECTION
*csp
;
493 osi_sleepInfo_t
*tsp
;
495 idx
= osi_SLEEPHASH(sleepValue
);
496 csp
= &osi_critSec
[idx
];
497 EnterCriticalSection(csp
);
498 for(tsp
=osi_sleepers
[idx
]; tsp
; tsp
=(osi_sleepInfo_t
*) osi_QNext(&tsp
->q
)) {
499 if ((!(tsp
->states
& (OSI_SLEEPINFO_DELETED
|OSI_SLEEPINFO_SIGNALLED
)))
500 && tsp
->value
== sleepValue
) {
501 _InterlockedOr(&tsp
->states
, OSI_SLEEPINFO_SIGNALLED
);
502 ReleaseSemaphore(tsp
->sema
, 1, NULL
);
505 LeaveCriticalSection(csp
);
508 void osi_Sleep(LONG_PTR sleepVal
)
510 CRITICAL_SECTION
*csp
;
512 /* may as well save some code by using SleepSched again */
513 csp
= &osi_baseAtomicCS
[0];
514 EnterCriticalSection(csp
);
515 osi_SleepSpin(sleepVal
, csp
);
518 void osi_Wakeup(LONG_PTR sleepVal
)
520 /* how do we do osi_Wakeup on a per-lock package type? */
522 osi_WakeupSpin(sleepVal
);
525 long osi_SleepFDCreate(osi_fdType_t
*fdTypep
, osi_fd_t
**outpp
)
529 cp
= (osi_sleepFD_t
*)malloc(sizeof(*cp
));
530 memset((void *) cp
, 0, sizeof(*cp
));
539 long osi_SleepFDClose(osi_fd_t
*cp
)
545 /* called with osi_sleepFDCS locked; returns with same, so that
546 * we know that the sleep info pointed to by the cookie won't change
547 * until the caller releases the lock.
549 void osi_AdvanceSleepFD(osi_sleepFD_t
*cp
)
551 int idx
; /* index we're dealing with */
552 int oidx
; /* index we locked */
553 osi_sleepInfo_t
*sip
;
554 osi_sleepInfo_t
*nsip
;
556 idx
= 0; /* so we go around once safely */
558 while(idx
< OSI_SLEEPHASHSIZE
) {
559 /* cp->sip should be held */
561 EnterCriticalSection(&osi_critSec
[idx
]);
562 oidx
= idx
; /* remember original index; that's the one we locked */
564 /* if there's a sleep info structure in the FD, it should be held; it
565 * is the one we just processed, so we want to move on to the next.
566 * If not, then we want to process the chain in the bucket idx points
569 if ((sip
= cp
->sip
) == NULL
) {
570 sip
= osi_sleepers
[idx
];
573 InterlockedIncrement(&sip
->refCount
);
576 /* it is safe to release the current sleep info guy now
577 * since we hold the bucket lock. Pull next guy out first,
578 * since if sip is deleted, Release will move him into
581 nsip
= (osi_sleepInfo_t
*) sip
->q
.nextp
;
582 osi_ReleaseSleepInfo(sip
);
586 InterlockedIncrement(&sip
->refCount
);
592 LeaveCriticalSection(&osi_critSec
[oidx
]);
594 /* now, if we advanced to a new sleep info structure, we're
595 * done, otherwise we continue and look at the next hash bucket
596 * until we're out of them.
604 long osi_SleepFDGetInfo(osi_fd_t
*ifdp
, osi_remGetInfoParms_t
*parmsp
)
606 osi_sleepFD_t
*fdp
= (osi_sleepFD_t
*) ifdp
;
607 osi_sleepInfo_t
*sip
;
610 /* now, grab a mutex serializing all iterations over FDs, so that
611 * if the RPC screws up and sends us two calls on the same FD, we don't
612 * crash and burn advancing the same FD concurrently. Probably paranoia,
613 * but you generally shouldn't trust stuff coming over the network.
615 EnterCriticalSection(&osi_sleepFDCS
);
617 /* this next call advances the FD to the next guy, and simultaneously validates
618 * that the info from the network is valid. If it isn't, we do our best to
619 * resynchronize our position, but we might return some info multiple times.
621 osi_AdvanceSleepFD(fdp
);
623 /* now copy out info */
624 if (sip
= fdp
->sip
) { /* one '=' */
625 parmsp
->idata
[0] = sip
->value
;
626 parmsp
->idata
[1] = sip
->tid
;
627 parmsp
->idata
[2] = sip
->states
;
632 else code
= OSI_DBRPC_EOF
;
634 LeaveCriticalSection(&osi_sleepFDCS
);
639 /* finally, DLL-specific code for NT */
640 BOOL APIENTRY
DLLMain(HANDLE inst
, DWORD why
, char *reserved
)
645 /* some misc functions for setting hash table sizes */
647 /* return true iff x is prime */
648 int osi_IsPrime(unsigned long x
)
652 /* even numbers aren't prime */
653 if ((x
& 1) == 0 && x
!= 2) return 0;
655 for(c
= 3; c
<x
; c
+= 2) {
656 /* see if x is divisible by c */
658 return 0; /* yup, it ain't prime */
660 /* see if we've gone far enough; only have to compute until
667 /* probably never get here */
671 /* return first prime number less than or equal to x */
672 unsigned long osi_PrimeLessThan(unsigned long x
) {
675 for(c
= x
; c
> 1; c
--) {
684 /* return the # of seconds since some fixed date */
685 unsigned long osi_GetBootTime(void)
690 static int (*notifFunc
)(char *, char *, long) = NULL
;
692 void osi_InitPanic(void *anotifFunc
)
694 notifFunc
= anotifFunc
;
697 void osi_panic(char *msgp
, char *filep
, long line
)
700 (*notifFunc
)(msgp
, filep
, line
);
702 osi_LogPanic(msgp
, filep
, line
);
705 /* get time in seconds since some relatively recent time */
706 time_t osi_Time(void)
709 unsigned long remainder
;
710 LARGE_INTEGER bootTime
;
712 /* setup boot time values */
713 GetSystemTimeAsFileTime(&fileTime
);
715 /* change the base of the time so it won't be negative for a long time */
716 fileTime
.dwHighDateTime
-= 28000000;
718 bootTime
.HighPart
= fileTime
.dwHighDateTime
;
719 bootTime
.LowPart
= fileTime
.dwLowDateTime
;
720 /* now, bootTime is in 100 nanosecond units, and we'd really rather
721 * have it in 1 second units, units 10,000,000 times bigger.
724 bootTime
= ExtendedLargeIntegerDivide(bootTime
, 10000000, &remainder
);
726 return bootTime
.QuadPart
;
728 return bootTime
.LowPart
;
732 /* get time in seconds since some relatively recent time */
733 void osi_GetTime(long *timesp
)
736 unsigned long remainder
;
737 LARGE_INTEGER bootTime
;
739 /* setup boot time values */
740 GetSystemTimeAsFileTime(&fileTime
);
742 /* change the base of the time so it won't be negative for a long time */
743 fileTime
.dwHighDateTime
-= 28000000;
745 bootTime
.HighPart
= fileTime
.dwHighDateTime
;
746 bootTime
.LowPart
= fileTime
.dwLowDateTime
;
747 /* now, bootTime is in 100 nanosecond units, and we'd really rather
748 * have it in 1 microsecond units, units 10 times bigger.
751 bootTime
= ExtendedLargeIntegerDivide(bootTime
, 10, &remainder
);
752 bootTime
= ExtendedLargeIntegerDivide(bootTime
, 1000000, &remainder
);
753 timesp
[0] = bootTime
.LowPart
; /* seconds */
754 timesp
[1] = remainder
; /* microseconds */