2 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
8 #include "IOSchedulerSimple.h"
18 #include <thread_types.h>
20 #include <util/AutoLock.h>
22 #include "IOSchedulerRoster.h"
25 //#define TRACE_IO_SCHEDULER
26 #ifdef TRACE_IO_SCHEDULER
27 # define TRACE(x...) dprintf(x)
29 # define TRACE(x...) ;
37 IORequestOwner::Dump() const
39 kprintf("IORequestOwner at %p\n", this);
40 kprintf(" team: %" B_PRId32
"\n", team
);
41 kprintf(" thread: %" B_PRId32
"\n", thread
);
42 kprintf(" priority: %" B_PRId32
"\n", priority
);
44 kprintf(" requests:");
45 for (IORequestList::ConstIterator it
= requests
.GetIterator();
46 IORequest
* request
= it
.Next();) {
47 kprintf(" %p", request
);
51 kprintf(" completed requests:");
52 for (IORequestList::ConstIterator it
= completed_requests
.GetIterator();
53 IORequest
* request
= it
.Next();) {
54 kprintf(" %p", request
);
58 kprintf(" operations:");
59 for (IOOperationList::ConstIterator it
= operations
.GetIterator();
60 IOOperation
* operation
= it
.Next();) {
61 kprintf(" %p", operation
);
70 struct IOSchedulerSimple::RequestOwnerHashDefinition
{
71 typedef thread_id KeyType
;
72 typedef IORequestOwner ValueType
;
74 size_t HashKey(thread_id key
) const { return key
; }
75 size_t Hash(const IORequestOwner
* value
) const { return value
->thread
; }
76 bool Compare(thread_id key
, const IORequestOwner
* value
) const
77 { return value
->thread
== key
; }
78 IORequestOwner
*& GetLink(IORequestOwner
* value
) const
79 { return value
->hash_link
; }
82 struct IOSchedulerSimple::RequestOwnerHashTable
83 : BOpenHashTable
<RequestOwnerHashDefinition
, false> {
87 IOSchedulerSimple::IOSchedulerSimple(DMAResource
* resource
)
89 IOScheduler(resource
),
91 fRequestNotifierThread(-1),
92 fOperationArray(NULL
),
93 fAllocatedRequestOwners(NULL
),
96 fPendingOperations(0),
99 mutex_init(&fLock
, "I/O scheduler");
100 B_INITIALIZE_SPINLOCK(&fFinisherLock
);
102 fNewRequestCondition
.Init(this, "I/O new request");
103 fFinishedOperationCondition
.Init(this, "I/O finished operation");
104 fFinishedRequestCondition
.Init(this, "I/O finished request");
109 IOSchedulerSimple::~IOSchedulerSimple()
112 MutexLocker
locker(fLock
);
113 InterruptsSpinLocker
finisherLocker(fFinisherLock
);
116 fNewRequestCondition
.NotifyAll();
117 fFinishedOperationCondition
.NotifyAll();
118 fFinishedRequestCondition
.NotifyAll();
120 finisherLocker
.Unlock();
123 if (fSchedulerThread
>= 0)
124 wait_for_thread(fSchedulerThread
, NULL
);
126 if (fRequestNotifierThread
>= 0)
127 wait_for_thread(fRequestNotifierThread
, NULL
);
129 // destroy our belongings
131 mutex_destroy(&fLock
);
133 while (IOOperation
* operation
= fUnusedOperations
.RemoveHead())
136 delete[] fOperationArray
;
138 delete fRequestOwners
;
139 delete[] fAllocatedRequestOwners
;
144 IOSchedulerSimple::Init(const char* name
)
146 status_t error
= IOScheduler::Init(name
);
150 size_t count
= fDMAResource
!= NULL
? fDMAResource
->BufferCount() : 16;
151 for (size_t i
= 0; i
< count
; i
++) {
152 IOOperation
* operation
= new(std::nothrow
) IOOperation
;
153 if (operation
== NULL
)
156 fUnusedOperations
.Add(operation
);
159 fOperationArray
= new(std::nothrow
) IOOperation
*[count
];
161 if (fDMAResource
!= NULL
)
162 fBlockSize
= fDMAResource
->BlockSize();
166 fAllocatedRequestOwnerCount
= thread_max_threads();
167 fAllocatedRequestOwners
168 = new(std::nothrow
) IORequestOwner
[fAllocatedRequestOwnerCount
];
169 if (fAllocatedRequestOwners
== NULL
)
172 for (int32 i
= 0; i
< fAllocatedRequestOwnerCount
; i
++) {
173 IORequestOwner
& owner
= fAllocatedRequestOwners
[i
];
176 owner
.priority
= B_IDLE_PRIORITY
;
177 fUnusedRequestOwners
.Add(&owner
);
180 fRequestOwners
= new(std::nothrow
) RequestOwnerHashTable
;
181 if (fRequestOwners
== NULL
)
184 error
= fRequestOwners
->Init(fAllocatedRequestOwnerCount
);
188 // TODO: Use a device speed dependent bandwidths!
189 fIterationBandwidth
= fBlockSize
* 8192;
190 fMinOwnerBandwidth
= fBlockSize
* 1024;
191 fMaxOwnerBandwidth
= fBlockSize
* 4096;
194 char buffer
[B_OS_NAME_LENGTH
];
195 strlcpy(buffer
, name
, sizeof(buffer
));
196 strlcat(buffer
, " scheduler ", sizeof(buffer
));
197 size_t nameLength
= strlen(buffer
);
198 snprintf(buffer
+ nameLength
, sizeof(buffer
) - nameLength
, "%" B_PRId32
,
200 fSchedulerThread
= spawn_kernel_thread(&_SchedulerThread
, buffer
,
201 B_NORMAL_PRIORITY
+ 2, (void *)this);
202 if (fSchedulerThread
< B_OK
)
203 return fSchedulerThread
;
205 strlcpy(buffer
, name
, sizeof(buffer
));
206 strlcat(buffer
, " notifier ", sizeof(buffer
));
207 nameLength
= strlen(buffer
);
208 snprintf(buffer
+ nameLength
, sizeof(buffer
) - nameLength
, "%" B_PRId32
,
210 fRequestNotifierThread
= spawn_kernel_thread(&_RequestNotifierThread
,
211 buffer
, B_NORMAL_PRIORITY
+ 2, (void *)this);
212 if (fRequestNotifierThread
< B_OK
)
213 return fRequestNotifierThread
;
215 resume_thread(fSchedulerThread
);
216 resume_thread(fRequestNotifierThread
);
223 IOSchedulerSimple::ScheduleRequest(IORequest
* request
)
225 TRACE("%p->IOSchedulerSimple::ScheduleRequest(%p)\n", this, request
);
227 IOBuffer
* buffer
= request
->Buffer();
229 // TODO: it would be nice to be able to lock the memory later, but we can't
230 // easily do it in the I/O scheduler without being able to asynchronously
231 // lock memory (via another thread or a dedicated call).
233 if (buffer
->IsVirtual()) {
234 status_t status
= buffer
->LockMemory(request
->TeamID(),
236 if (status
!= B_OK
) {
237 request
->SetStatusAndNotify(status
);
242 MutexLocker
locker(fLock
);
244 IORequestOwner
* owner
= _GetRequestOwner(request
->TeamID(),
245 request
->ThreadID(), true);
247 panic("IOSchedulerSimple: Out of request owners!\n");
249 if (buffer
->IsVirtual())
250 buffer
->UnlockMemory(request
->TeamID(), request
->IsWrite());
251 request
->SetStatusAndNotify(B_NO_MEMORY
);
255 bool wasActive
= owner
->IsActive();
256 request
->SetOwner(owner
);
257 owner
->requests
.Add(request
);
259 int32 priority
= thread_get_io_priority(request
->ThreadID());
261 owner
->priority
= priority
;
262 //dprintf(" request %p -> owner %p (thread %ld, active %d)\n", request, owner, owner->thread, wasActive);
265 fActiveRequestOwners
.Add(owner
);
267 IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_REQUEST_SCHEDULED
, this,
270 fNewRequestCondition
.NotifyAll();
277 IOSchedulerSimple::AbortRequest(IORequest
* request
, status_t status
)
285 IOSchedulerSimple::OperationCompleted(IOOperation
* operation
, status_t status
,
286 generic_size_t transferredBytes
)
288 InterruptsSpinLocker
_(fFinisherLock
);
290 // finish operation only once
291 if (operation
->Status() <= 0)
294 operation
->SetStatus(status
);
296 // set the bytes transferred (of the net data)
297 generic_size_t partialBegin
298 = operation
->OriginalOffset() - operation
->Offset();
299 operation
->SetTransferredBytes(
300 transferredBytes
> partialBegin
? transferredBytes
- partialBegin
: 0);
302 fCompletedOperations
.Add(operation
);
303 fFinishedOperationCondition
.NotifyAll();
308 IOSchedulerSimple::Dump() const
310 kprintf("IOSchedulerSimple at %p\n", this);
311 kprintf(" DMA resource: %p\n", fDMAResource
);
313 kprintf(" active request owners:");
314 for (RequestOwnerList::ConstIterator it
315 = fActiveRequestOwners
.GetIterator();
316 IORequestOwner
* owner
= it
.Next();) {
317 kprintf(" %p", owner
);
323 /*! Must not be called with the fLock held. */
325 IOSchedulerSimple::_Finisher()
328 InterruptsSpinLocker
locker(fFinisherLock
);
329 IOOperation
* operation
= fCompletedOperations
.RemoveHead();
330 if (operation
== NULL
)
335 TRACE("IOSchedulerSimple::_Finisher(): operation: %p\n", operation
);
337 bool operationFinished
= operation
->Finish();
339 IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_OPERATION_FINISHED
,
340 this, operation
->Parent(), operation
);
341 // Notify for every time the operation is passed to the I/O hook,
342 // not only when it is fully finished.
344 if (!operationFinished
) {
345 TRACE(" operation: %p not finished yet\n", operation
);
346 MutexLocker
_(fLock
);
347 operation
->SetTransferredBytes(0);
348 operation
->Parent()->Owner()->operations
.Add(operation
);
349 fPendingOperations
--;
353 // notify request and remove operation
354 IORequest
* request
= operation
->Parent();
356 generic_size_t operationOffset
357 = operation
->OriginalOffset() - request
->Offset();
358 request
->OperationFinished(operation
, operation
->Status(),
359 operation
->TransferredBytes() < operation
->OriginalLength(),
360 operation
->Status() == B_OK
361 ? operationOffset
+ operation
->OriginalLength()
364 // recycle the operation
365 MutexLocker
_(fLock
);
366 if (fDMAResource
!= NULL
)
367 fDMAResource
->RecycleBuffer(operation
->Buffer());
369 fPendingOperations
--;
370 fUnusedOperations
.Add(operation
);
372 // If the request is done, we need to perform its notifications.
373 if (request
->IsFinished()) {
374 if (request
->Status() == B_OK
&& request
->RemainingBytes() > 0) {
375 // The request has been processed OK so far, but it isn't really
377 request
->SetUnfinished();
379 // Remove the request from the request owner.
380 IORequestOwner
* owner
= request
->Owner();
381 owner
->requests
.MoveFrom(&owner
->completed_requests
);
382 owner
->requests
.Remove(request
);
383 request
->SetOwner(NULL
);
385 if (!owner
->IsActive()) {
386 fActiveRequestOwners
.Remove(owner
);
387 fUnusedRequestOwners
.Add(owner
);
390 if (request
->HasCallbacks()) {
391 // The request has callbacks that may take some time to
392 // perform, so we hand it over to the request notifier.
393 fFinishedRequests
.Add(request
);
394 fFinishedRequestCondition
.NotifyAll();
396 // No callbacks -- finish the request right now.
397 IOSchedulerRoster::Default()->Notify(
398 IO_SCHEDULER_REQUEST_FINISHED
, this, request
);
399 request
->NotifyFinished();
407 /*! Called with \c fFinisherLock held.
410 IOSchedulerSimple::_FinisherWorkPending()
412 return !fCompletedOperations
.IsEmpty();
417 IOSchedulerSimple::_PrepareRequestOperations(IORequest
* request
,
418 IOOperationList
& operations
, int32
& operationsPrepared
, off_t quantum
,
419 off_t
& usedBandwidth
)
421 //dprintf("IOSchedulerSimple::_PrepareRequestOperations(%p)\n", request);
424 if (fDMAResource
!= NULL
) {
425 while (quantum
>= (off_t
)fBlockSize
&& request
->RemainingBytes() > 0) {
426 IOOperation
* operation
= fUnusedOperations
.RemoveHead();
427 if (operation
== NULL
)
430 status_t status
= fDMAResource
->TranslateNext(request
, operation
,
432 if (status
!= B_OK
) {
433 operation
->SetParent(NULL
);
434 fUnusedOperations
.Add(operation
);
436 // B_BUSY means some resource (DMABuffers or
437 // DMABounceBuffers) was temporarily unavailable. That's OK,
438 // we'll retry later.
439 if (status
== B_BUSY
)
442 AbortRequest(request
, status
);
445 //dprintf(" prepared operation %p\n", operation);
447 off_t bandwidth
= operation
->Length();
448 quantum
-= bandwidth
;
449 usedBandwidth
+= bandwidth
;
451 operations
.Add(operation
);
452 operationsPrepared
++;
455 // TODO: If the device has block size restrictions, we might need to use
457 IOOperation
* operation
= fUnusedOperations
.RemoveHead();
458 if (operation
== NULL
)
461 status_t status
= operation
->Prepare(request
);
462 if (status
!= B_OK
) {
463 operation
->SetParent(NULL
);
464 fUnusedOperations
.Add(operation
);
465 AbortRequest(request
, status
);
469 operation
->SetOriginalRange(request
->Offset(), request
->Length());
470 request
->Advance(request
->Length());
472 off_t bandwidth
= operation
->Length();
473 quantum
-= bandwidth
;
474 usedBandwidth
+= bandwidth
;
476 operations
.Add(operation
);
477 operationsPrepared
++;
485 IOSchedulerSimple::_ComputeRequestOwnerBandwidth(int32 priority
) const
487 // TODO: Use a priority dependent quantum!
488 return fMinOwnerBandwidth
;
493 IOSchedulerSimple::_NextActiveRequestOwner(IORequestOwner
*& owner
,
501 owner
= fActiveRequestOwners
.GetNext(owner
);
503 owner
= fActiveRequestOwners
.Head();
506 quantum
= _ComputeRequestOwnerBandwidth(owner
->priority
);
510 // Wait for new requests owners. First check whether any finisher work
512 InterruptsSpinLocker
finisherLocker(fFinisherLock
);
513 if (_FinisherWorkPending()) {
514 finisherLocker
.Unlock();
515 mutex_unlock(&fLock
);
521 // Wait for new requests.
522 ConditionVariableEntry entry
;
523 fNewRequestCondition
.Add(&entry
);
525 finisherLocker
.Unlock();
526 mutex_unlock(&fLock
);
528 entry
.Wait(B_CAN_INTERRUPT
);
535 struct OperationComparator
{
536 inline bool operator()(const IOOperation
* a
, const IOOperation
* b
)
538 off_t offsetA
= a
->Offset();
539 off_t offsetB
= b
->Offset();
540 return offsetA
< offsetB
541 || (offsetA
== offsetB
&& a
->Length() > b
->Length());
547 IOSchedulerSimple::_SortOperations(IOOperationList
& operations
,
550 // TODO: _Scheduler() could directly add the operations to the array.
551 // move operations to an array and sort it
553 while (IOOperation
* operation
= operations
.RemoveHead())
554 fOperationArray
[count
++] = operation
;
556 std::sort(fOperationArray
, fOperationArray
+ count
, OperationComparator());
558 // move the sorted operations to a temporary list we can work with
559 //dprintf("operations after sorting:\n");
560 IOOperationList sortedOperations
;
561 for (int32 i
= 0; i
< count
; i
++)
563 //dprintf(" %3ld: %p: offset: %lld, length: %lu\n", i, fOperationArray[i], fOperationArray[i]->Offset(), fOperationArray[i]->Length());
564 sortedOperations
.Add(fOperationArray
[i
]);
567 // Sort the operations so that no two adjacent operations overlap. This
568 // might result in several elevator runs.
569 while (!sortedOperations
.IsEmpty()) {
570 IOOperation
* operation
= sortedOperations
.Head();
571 while (operation
!= NULL
) {
572 IOOperation
* nextOperation
= sortedOperations
.GetNext(operation
);
573 if (operation
->Offset() >= lastOffset
) {
574 sortedOperations
.Remove(operation
);
575 //dprintf(" adding operation %p\n", operation);
576 operations
.Add(operation
);
577 lastOffset
= operation
->Offset() + operation
->Length();
580 operation
= nextOperation
;
583 if (!sortedOperations
.IsEmpty())
590 IOSchedulerSimple::_Scheduler()
592 IORequestOwner marker
;
595 MutexLocker
locker(fLock
);
596 fActiveRequestOwners
.Add(&marker
, false);
599 off_t lastOffset
= 0;
601 IORequestOwner
* owner
= NULL
;
604 while (!fTerminating
) {
605 //dprintf("IOSchedulerSimple::_Scheduler(): next iteration: request owner: %p, quantum: %lld\n", owner, quantum);
606 MutexLocker
locker(fLock
);
608 IOOperationList operations
;
609 int32 operationCount
= 0;
610 bool resourcesAvailable
= true;
611 off_t iterationBandwidth
= fIterationBandwidth
;
614 owner
= fActiveRequestOwners
.GetPrevious(&marker
);
616 fActiveRequestOwners
.Remove(&marker
);
619 if (owner
== NULL
|| quantum
< (off_t
)fBlockSize
) {
620 if (!_NextActiveRequestOwner(owner
, quantum
)) {
621 // we've been asked to terminate
626 while (resourcesAvailable
&& iterationBandwidth
>= (off_t
)fBlockSize
) {
627 //dprintf("IOSchedulerSimple::_Scheduler(): request owner: %p (thread %ld)\n",
628 //owner, owner->thread);
629 // Prepare operations for the owner.
631 // There might still be unfinished ones.
632 while (IOOperation
* operation
= owner
->operations
.RemoveHead()) {
633 // TODO: We might actually grant the owner more bandwidth than
635 // TODO: We should make sure that after the first read operation
636 // of a partial write, no other write operation to the same
637 // location is scheduled!
638 operations
.Add(operation
);
640 off_t bandwidth
= operation
->Length();
641 quantum
-= bandwidth
;
642 iterationBandwidth
-= bandwidth
;
644 if (quantum
< (off_t
)fBlockSize
645 || iterationBandwidth
< (off_t
)fBlockSize
) {
650 while (resourcesAvailable
&& quantum
>= (off_t
)fBlockSize
651 && iterationBandwidth
>= (off_t
)fBlockSize
) {
652 IORequest
* request
= owner
->requests
.Head();
653 if (request
== NULL
) {
654 resourcesAvailable
= false;
655 if (operationCount
== 0)
656 panic("no more requests for owner %p (thread %" B_PRId32
")", owner
, owner
->thread
);
661 resourcesAvailable
= _PrepareRequestOperations(request
,
662 operations
, operationCount
, quantum
, bandwidth
);
663 quantum
-= bandwidth
;
664 iterationBandwidth
-= bandwidth
;
665 if (request
->RemainingBytes() == 0 || request
->Status() <= 0) {
666 // If the request has been completed, move it to the
667 // completed list, so we don't pick it up again.
668 owner
->requests
.Remove(request
);
669 owner
->completed_requests
.Add(request
);
673 // Get the next owner.
674 if (resourcesAvailable
)
675 _NextActiveRequestOwner(owner
, quantum
);
678 // If the current owner doesn't have anymore requests, we have to
679 // insert our marker, since the owner will be gone in the next
681 if (owner
->requests
.IsEmpty()) {
682 fActiveRequestOwners
.Insert(owner
, &marker
);
686 if (operations
.IsEmpty())
689 fPendingOperations
= operationCount
;
693 // sort the operations
694 _SortOperations(operations
, lastOffset
);
696 // execute the operations
697 #ifdef TRACE_IO_SCHEDULER
700 while (IOOperation
* operation
= operations
.RemoveHead()) {
701 TRACE("IOSchedulerSimple::_Scheduler(): calling callback for "
702 "operation %ld: %p\n", i
++, operation
);
704 IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_OPERATION_STARTED
,
705 this, operation
->Parent(), operation
);
707 fIOCallback(fIOCallbackData
, operation
);
712 // wait for all operations to finish
713 while (!fTerminating
) {
716 if (fPendingOperations
== 0)
719 // Before waiting first check whether any finisher work has to be
721 InterruptsSpinLocker
finisherLocker(fFinisherLock
);
722 if (_FinisherWorkPending()) {
723 finisherLocker
.Unlock();
729 // wait for finished operations
730 ConditionVariableEntry entry
;
731 fFinishedOperationCondition
.Add(&entry
);
733 finisherLocker
.Unlock();
736 entry
.Wait(B_CAN_INTERRUPT
);
746 IOSchedulerSimple::_SchedulerThread(void *_self
)
748 IOSchedulerSimple
*self
= (IOSchedulerSimple
*)_self
;
749 return self
->_Scheduler();
754 IOSchedulerSimple::_RequestNotifier()
757 MutexLocker
locker(fLock
);
760 IORequest
* request
= fFinishedRequests
.RemoveHead();
762 if (request
== NULL
) {
766 ConditionVariableEntry entry
;
767 fFinishedRequestCondition
.Add(&entry
);
777 IOSchedulerRoster::Default()->Notify(IO_SCHEDULER_REQUEST_FINISHED
,
780 // notify the request
781 request
->NotifyFinished();
784 // never can get here
790 IOSchedulerSimple::_RequestNotifierThread(void *_self
)
792 IOSchedulerSimple
*self
= (IOSchedulerSimple
*)_self
;
793 return self
->_RequestNotifier();
798 IOSchedulerSimple::_GetRequestOwner(team_id team
, thread_id thread
,
802 IORequestOwner
* owner
= fRequestOwners
->Lookup(thread
);
803 if (owner
!= NULL
&& !owner
->IsActive())
804 fUnusedRequestOwners
.Remove(owner
);
805 if (owner
!= NULL
|| !allocate
)
808 // not in table -- allocate an unused one
809 RequestOwnerList existingOwners
;
811 while ((owner
= fUnusedRequestOwners
.RemoveHead()) != NULL
) {
812 if (owner
->thread
< 0 || !Thread::IsAlive(owner
->thread
)) {
813 if (owner
->thread
>= 0)
814 fRequestOwners
->RemoveUnchecked(owner
);
816 owner
->thread
= thread
;
817 owner
->priority
= B_IDLE_PRIORITY
;
818 fRequestOwners
->InsertUnchecked(owner
);
822 existingOwners
.Add(owner
);
825 fUnusedRequestOwners
.MoveFrom(&existingOwners
);