2 * Copyright 2013-2014, Paweł Dziepak, pdziepak@quarnos.org.
3 * Copyright 2009, Rene Gollent, rene@gollent.com.
4 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
5 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
6 * Copyright 2002, Angelo Mottola, a.mottola@libero.it.
7 * Distributed under the terms of the MIT License.
9 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
10 * Distributed under the terms of the NewOS License.
14 /*! The thread scheduler */
19 #include <AutoDeleter.h>
24 #include <kscheduler.h>
25 #include <listeners.h>
26 #include <load_tracking.h>
27 #include <scheduler_defs.h>
30 #include <util/Random.h>
32 #include "scheduler_common.h"
33 #include "scheduler_cpu.h"
34 #include "scheduler_locking.h"
35 #include "scheduler_modes.h"
36 #include "scheduler_profiler.h"
37 #include "scheduler_thread.h"
38 #include "scheduler_tracing.h"
44 class ThreadEnqueuer
: public ThreadProcessing
{
46 void operator()(ThreadData
* thread
);
49 scheduler_mode gCurrentModeID
;
50 scheduler_mode_operations
* gCurrentMode
;
56 } // namespace Scheduler
58 using namespace Scheduler
;
61 static bool sSchedulerEnabled
;
63 SchedulerListenerList gSchedulerListeners
;
64 spinlock gSchedulerListenersLock
= B_SPINLOCK_INITIALIZER
;
66 static scheduler_mode_operations
* sSchedulerModes
[] = {
67 &gSchedulerLowLatencyMode
,
68 &gSchedulerPowerSavingMode
,
71 // Since CPU IDs used internally by the kernel bear no relation to the actual
72 // CPU topology the following arrays are used to efficiently get the core
73 // and the package that CPU in question belongs to.
74 static int32
* sCPUToCore
;
75 static int32
* sCPUToPackage
;
78 static void enqueue(Thread
* thread
, bool newOne
);
82 ThreadEnqueuer::operator()(ThreadData
* thread
)
84 enqueue(thread
->GetThread(), false);
89 scheduler_dump_thread_data(Thread
* thread
)
91 thread
->scheduler_data
->Dump();
96 enqueue(Thread
* thread
, bool newOne
)
98 SCHEDULER_ENTER_FUNCTION();
100 ThreadData
* threadData
= thread
->scheduler_data
;
102 int32 threadPriority
= threadData
->GetEffectivePriority();
103 T(EnqueueThread(thread
, threadPriority
));
105 CPUEntry
* targetCPU
= NULL
;
106 CoreEntry
* targetCore
= NULL
;
107 if (thread
->pinned_to_cpu
> 0) {
108 ASSERT(thread
->previous_cpu
!= NULL
);
109 ASSERT(threadData
->Core() != NULL
);
110 targetCPU
= &gCPUEntries
[thread
->previous_cpu
->cpu_num
];
111 } else if (gSingleCore
)
112 targetCore
= &gCoreEntries
[0];
113 else if (threadData
->Core() != NULL
114 && (!newOne
|| !threadData
->HasCacheExpired())) {
115 targetCore
= threadData
->Rebalance();
118 bool rescheduleNeeded
= threadData
->ChooseCoreAndCPU(targetCore
, targetCPU
);
120 TRACE("enqueueing thread %ld with priority %ld on CPU %ld (core %ld)\n",
121 thread
->id
, threadPriority
, targetCPU
->fCPUNumber
, targetCore
->fCoreID
);
123 threadData
->Enqueue();
126 NotifySchedulerListeners(&SchedulerListener::ThreadEnqueuedInRunQueue
,
129 int32 heapPriority
= CPUPriorityHeap::GetKey(targetCPU
);
130 if (threadPriority
> heapPriority
131 || (threadPriority
== heapPriority
&& rescheduleNeeded
)) {
133 if (targetCPU
->ID() == smp_get_current_cpu())
134 gCPU
[targetCPU
->ID()].invoke_scheduler
= true;
136 smp_send_ici(targetCPU
->ID(), SMP_MSG_RESCHEDULE
, 0, 0, 0,
137 NULL
, SMP_MSG_FLAG_ASYNC
);
143 /*! Enqueues the thread into the run queue.
144 Note: thread lock must be held when entering this function
147 scheduler_enqueue_in_run_queue(Thread
*thread
)
149 ASSERT(!are_interrupts_enabled());
150 SCHEDULER_ENTER_FUNCTION();
152 SchedulerModeLocker _
;
154 TRACE("enqueueing new thread %ld with static priority %ld\n", thread
->id
,
157 ThreadData
* threadData
= thread
->scheduler_data
;
159 if (threadData
->ShouldCancelPenalty())
160 threadData
->CancelPenalty();
162 enqueue(thread
, true);
166 /*! Sets the priority of a thread.
169 scheduler_set_thread_priority(Thread
*thread
, int32 priority
)
171 ASSERT(are_interrupts_enabled());
173 InterruptsSpinLocker
_(thread
->scheduler_lock
);
174 SchedulerModeLocker modeLocker
;
176 SCHEDULER_ENTER_FUNCTION();
178 ThreadData
* threadData
= thread
->scheduler_data
;
179 int32 oldPriority
= thread
->priority
;
181 TRACE("changing thread %ld priority to %ld (old: %ld, effective: %ld)\n",
182 thread
->id
, priority
, oldPriority
, threadData
->GetEffectivePriority());
184 thread
->priority
= priority
;
185 threadData
->CancelPenalty();
187 if (priority
== thread
->priority
)
188 return thread
->priority
;
190 if (thread
->state
!= B_THREAD_READY
) {
191 if (thread
->state
== B_THREAD_RUNNING
) {
192 ASSERT(threadData
->Core() != NULL
);
194 ASSERT(thread
->cpu
!= NULL
);
195 CPUEntry
* cpu
= &gCPUEntries
[thread
->cpu
->cpu_num
];
197 CoreCPUHeapLocker
_(threadData
->Core());
198 cpu
->UpdatePriority(priority
);
204 // The thread is in the run queue. We need to remove it and re-insert it at
207 T(RemoveThread(thread
));
210 NotifySchedulerListeners(&SchedulerListener::ThreadRemovedFromRunQueue
,
213 if (threadData
->Dequeue())
214 enqueue(thread
, true);
221 scheduler_reschedule_ici()
223 // This function is called as a result of an incoming ICI.
224 // Make sure the reschedule() is invoked.
225 get_cpu_struct()->invoke_scheduler
= true;
230 stop_cpu_timers(Thread
* fromThread
, Thread
* toThread
)
232 SpinLocker
teamLocker(&fromThread
->team
->time_lock
);
233 SpinLocker
threadLocker(&fromThread
->time_lock
);
235 if (fromThread
->HasActiveCPUTimeUserTimers()
236 || fromThread
->team
->HasActiveCPUTimeUserTimers()) {
237 user_timer_stop_cpu_timers(fromThread
, toThread
);
243 continue_cpu_timers(Thread
* thread
, cpu_ent
* cpu
)
245 SpinLocker
teamLocker(&thread
->team
->time_lock
);
246 SpinLocker
threadLocker(&thread
->time_lock
);
248 if (thread
->HasActiveCPUTimeUserTimers()
249 || thread
->team
->HasActiveCPUTimeUserTimers()) {
250 user_timer_continue_cpu_timers(thread
, cpu
->previous_thread
);
256 thread_resumes(Thread
* thread
)
258 cpu_ent
* cpu
= thread
->cpu
;
260 release_spinlock(&cpu
->previous_thread
->scheduler_lock
);
262 // continue CPU time based user timers
263 continue_cpu_timers(thread
, cpu
);
265 // notify the user debugger code
266 if ((thread
->flags
& THREAD_FLAGS_DEBUGGER_INSTALLED
) != 0)
267 user_debug_thread_scheduled(thread
);
272 scheduler_new_thread_entry(Thread
* thread
)
274 thread_resumes(thread
);
276 SpinLocker
locker(thread
->time_lock
);
277 thread
->last_time
= system_time();
281 /*! Switches the currently running thread.
282 This is a service function for scheduler implementations.
284 \param fromThread The currently running thread.
285 \param toThread The thread to switch to. Must be different from
289 switch_thread(Thread
* fromThread
, Thread
* toThread
)
291 // notify the user debugger code
292 if ((fromThread
->flags
& THREAD_FLAGS_DEBUGGER_INSTALLED
) != 0)
293 user_debug_thread_unscheduled(fromThread
);
295 // stop CPU time based user timers
296 stop_cpu_timers(fromThread
, toThread
);
298 // update CPU and Thread structures and perform the context switch
299 cpu_ent
* cpu
= fromThread
->cpu
;
300 toThread
->previous_cpu
= toThread
->cpu
= cpu
;
301 fromThread
->cpu
= NULL
;
302 cpu
->running_thread
= toThread
;
303 cpu
->previous_thread
= fromThread
;
305 arch_thread_set_current_thread(toThread
);
306 arch_thread_context_switch(fromThread
, toThread
);
308 // The use of fromThread below looks weird, but is correct. fromThread had
309 // been unscheduled earlier, but is back now. For a thread scheduled the
310 // first time the same is done in thread.cpp:common_thread_entry().
311 thread_resumes(fromThread
);
316 reschedule(int32 nextState
)
318 ASSERT(!are_interrupts_enabled());
319 SCHEDULER_ENTER_FUNCTION();
321 int32 thisCPU
= smp_get_current_cpu();
323 CPUEntry
* cpu
= CPUEntry::GetCPU(thisCPU
);
324 CoreEntry
* core
= CoreEntry::GetCore(thisCPU
);
326 Thread
* oldThread
= thread_get_current_thread();
327 ThreadData
* oldThreadData
= oldThread
->scheduler_data
;
329 oldThreadData
->StopCPUTime();
331 SchedulerModeLocker modeLocker
;
333 TRACE("reschedule(): cpu %ld, current thread = %ld\n", thisCPU
,
336 oldThread
->state
= nextState
;
338 // return time spent in interrupts
339 oldThreadData
->SetStolenInterruptTime(gCPU
[thisCPU
].interrupt_time
);
341 bool enqueueOldThread
= false;
342 bool putOldThreadAtBack
= false;
344 case B_THREAD_RUNNING
:
346 enqueueOldThread
= true;
348 if (!oldThreadData
->IsIdle()) {
349 oldThreadData
->Continues();
350 if (oldThreadData
->HasQuantumEnded(oldThread
->cpu
->preempted
,
351 oldThread
->has_yielded
)) {
352 TRACE("enqueueing thread %ld into run queue priority ="
353 " %ld\n", oldThread
->id
,
354 oldThreadData
->GetEffectivePriority());
355 putOldThreadAtBack
= true;
357 TRACE("putting thread %ld back in run queue priority ="
358 " %ld\n", oldThread
->id
,
359 oldThreadData
->GetEffectivePriority());
360 putOldThreadAtBack
= false;
365 case THREAD_STATE_FREE_ON_RESCHED
:
366 oldThreadData
->Dies();
369 oldThreadData
->GoesAway();
370 TRACE("not enqueueing thread %ld into run queue next_state = %ld\n",
371 oldThread
->id
, nextState
);
375 oldThread
->has_yielded
= false;
377 // select thread with the biggest priority and enqueue back the old thread
378 ThreadData
* nextThreadData
;
379 if (gCPU
[thisCPU
].disabled
) {
380 if (!oldThreadData
->IsIdle()) {
381 putOldThreadAtBack
= oldThread
->pinned_to_cpu
== 0;
382 oldThreadData
->UnassignCore(true);
384 CPURunQueueLocker
cpuLocker(cpu
);
385 nextThreadData
= cpu
->PeekIdleThread();
386 cpu
->Remove(nextThreadData
);
388 nextThreadData
= oldThreadData
;
391 = cpu
->ChooseNextThread(enqueueOldThread
? oldThreadData
: NULL
,
395 CoreCPUHeapLocker
cpuLocker(core
);
396 cpu
->UpdatePriority(nextThreadData
->GetEffectivePriority());
399 Thread
* nextThread
= nextThreadData
->GetThread();
400 ASSERT(!gCPU
[thisCPU
].disabled
|| nextThreadData
->IsIdle());
402 if (nextThread
!= oldThread
) {
403 if (enqueueOldThread
) {
404 if (putOldThreadAtBack
)
405 enqueue(oldThread
, false);
407 oldThreadData
->PutBack();
410 acquire_spinlock(&nextThread
->scheduler_lock
);
413 TRACE("reschedule(): cpu %ld, next thread = %ld\n", thisCPU
,
416 T(ScheduleThread(nextThread
, oldThread
));
419 NotifySchedulerListeners(&SchedulerListener::ThreadScheduled
,
420 oldThread
, nextThread
);
422 ASSERT(nextThreadData
->Core() == core
);
423 nextThread
->state
= B_THREAD_RUNNING
;
424 nextThreadData
->StartCPUTime();
426 // track CPU activity
427 cpu
->TrackActivity(oldThreadData
, nextThreadData
);
429 if (nextThread
!= oldThread
|| oldThread
->cpu
->preempted
) {
430 cpu
->StartQuantumTimer(nextThreadData
, oldThread
->cpu
->preempted
);
432 oldThread
->cpu
->preempted
= false;
433 if (!nextThreadData
->IsIdle())
434 nextThreadData
->Continues();
436 gCurrentMode
->rebalance_irqs(true);
437 nextThreadData
->StartQuantum();
441 SCHEDULER_EXIT_FUNCTION();
443 if (nextThread
!= oldThread
)
444 switch_thread(oldThread
, nextThread
);
449 /*! Runs the scheduler.
450 Note: expects thread spinlock to be held
453 scheduler_reschedule(int32 nextState
)
455 ASSERT(!are_interrupts_enabled());
456 SCHEDULER_ENTER_FUNCTION();
458 if (!sSchedulerEnabled
) {
459 Thread
* thread
= thread_get_current_thread();
460 if (thread
!= NULL
&& nextState
!= B_THREAD_READY
)
461 panic("scheduler_reschedule_no_op() called in non-ready thread");
465 reschedule(nextState
);
470 scheduler_on_thread_create(Thread
* thread
, bool idleThread
)
472 thread
->scheduler_data
= new(std::nothrow
) ThreadData(thread
);
473 if (thread
->scheduler_data
== NULL
)
480 scheduler_on_thread_init(Thread
* thread
)
482 ASSERT(thread
->scheduler_data
!= NULL
);
484 if (thread_is_idle_thread(thread
)) {
485 static int32 sIdleThreadsID
;
486 int32 cpuID
= atomic_add(&sIdleThreadsID
, 1);
488 thread
->previous_cpu
= &gCPU
[cpuID
];
489 thread
->pinned_to_cpu
= 1;
491 thread
->scheduler_data
->Init(CoreEntry::GetCore(cpuID
));
493 thread
->scheduler_data
->Init();
498 scheduler_on_thread_destroy(Thread
* thread
)
500 delete thread
->scheduler_data
;
504 /*! This starts the scheduler. Must be run in the context of the initial idle
505 thread. Interrupts must be disabled and will be disabled when returning.
510 InterruptsSpinLocker
_(thread_get_current_thread()->scheduler_lock
);
511 SCHEDULER_ENTER_FUNCTION();
513 reschedule(B_THREAD_READY
);
518 scheduler_set_operation_mode(scheduler_mode mode
)
520 if (mode
!= SCHEDULER_MODE_LOW_LATENCY
521 && mode
!= SCHEDULER_MODE_POWER_SAVING
) {
525 dprintf("scheduler: switching to %s mode\n", sSchedulerModes
[mode
]->name
);
527 InterruptsBigSchedulerLocker _
;
529 gCurrentModeID
= mode
;
530 gCurrentMode
= sSchedulerModes
[mode
];
531 gCurrentMode
->switch_to_mode();
533 ThreadData::ComputeQuantumLengths();
540 scheduler_set_cpu_enabled(int32 cpuID
, bool enabled
)
543 if (are_interrupts_enabled())
544 panic("scheduler_set_cpu_enabled: called with interrupts enabled");
547 dprintf("scheduler: %s CPU %" B_PRId32
"\n",
548 enabled
? "enabling" : "disabling", cpuID
);
550 InterruptsBigSchedulerLocker _
;
552 gCurrentMode
->set_cpu_enabled(cpuID
, enabled
);
554 CPUEntry
* cpu
= &gCPUEntries
[cpuID
];
555 CoreEntry
* core
= cpu
->Core();
557 ASSERT(core
->CPUCount() >= 0);
561 cpu
->UpdatePriority(B_IDLE_PRIORITY
);
563 ThreadEnqueuer enqueuer
;
564 core
->RemoveCPU(cpu
, enqueuer
);
567 gCPU
[cpuID
].disabled
= !enabled
;
572 // don't wait until the thread quantum ends
573 if (smp_get_current_cpu() != cpuID
) {
574 smp_send_ici(cpuID
, SMP_MSG_RESCHEDULE
, 0, 0, 0, NULL
,
582 traverse_topology_tree(const cpu_topology_node
* node
, int packageID
, int coreID
)
584 switch (node
->level
) {
585 case CPU_TOPOLOGY_SMT
:
586 sCPUToCore
[node
->id
] = coreID
;
587 sCPUToPackage
[node
->id
] = packageID
;
590 case CPU_TOPOLOGY_CORE
:
594 case CPU_TOPOLOGY_PACKAGE
:
595 packageID
= node
->id
;
602 for (int32 i
= 0; i
< node
->children_count
; i
++)
603 traverse_topology_tree(node
->children
[i
], packageID
, coreID
);
608 build_topology_mappings(int32
& cpuCount
, int32
& coreCount
, int32
& packageCount
)
610 cpuCount
= smp_get_num_cpus();
612 sCPUToCore
= new(std::nothrow
) int32
[cpuCount
];
613 if (sCPUToCore
== NULL
)
615 ArrayDeleter
<int32
> cpuToCoreDeleter(sCPUToCore
);
617 sCPUToPackage
= new(std::nothrow
) int32
[cpuCount
];
618 if (sCPUToPackage
== NULL
)
620 ArrayDeleter
<int32
> cpuToPackageDeleter(sCPUToPackage
);
623 for (int32 i
= 0; i
< cpuCount
; i
++) {
624 if (gCPU
[i
].topology_id
[CPU_TOPOLOGY_SMT
] == 0)
629 for (int32 i
= 0; i
< cpuCount
; i
++) {
630 if (gCPU
[i
].topology_id
[CPU_TOPOLOGY_SMT
] == 0
631 && gCPU
[i
].topology_id
[CPU_TOPOLOGY_CORE
] == 0) {
636 const cpu_topology_node
* root
= get_cpu_topology();
637 traverse_topology_tree(root
, 0, 0);
639 cpuToCoreDeleter
.Detach();
640 cpuToPackageDeleter
.Detach();
648 // create logical processor to core and package mappings
649 int32 cpuCount
, coreCount
, packageCount
;
650 status_t result
= build_topology_mappings(cpuCount
, coreCount
,
655 // disable parts of the scheduler logic that are not needed
656 gSingleCore
= coreCount
== 1;
657 gTrackCPULoad
= increase_cpu_performance(0) == B_OK
;
658 gTrackCoreLoad
= !gSingleCore
|| gTrackCPULoad
;
659 dprintf("scheduler switches: single core: %s, cpu load tracking: %s,"
660 " core load tracking: %s\n", gSingleCore
? "true" : "false",
661 gTrackCPULoad
? "true" : "false",
662 gTrackCoreLoad
? "true" : "false");
664 gCoreCount
= coreCount
;
665 gPackageCount
= packageCount
;
667 gCPUEntries
= new(std::nothrow
) CPUEntry
[cpuCount
];
668 if (gCPUEntries
== NULL
)
670 ArrayDeleter
<CPUEntry
> cpuEntriesDeleter(gCPUEntries
);
672 gCoreEntries
= new(std::nothrow
) CoreEntry
[coreCount
];
673 if (gCoreEntries
== NULL
)
675 ArrayDeleter
<CoreEntry
> coreEntriesDeleter(gCoreEntries
);
677 gPackageEntries
= new(std::nothrow
) PackageEntry
[packageCount
];
678 if (gPackageEntries
== NULL
)
680 ArrayDeleter
<PackageEntry
> packageEntriesDeleter(gPackageEntries
);
682 new(&gCoreLoadHeap
) CoreLoadHeap(coreCount
);
683 new(&gCoreHighLoadHeap
) CoreLoadHeap(coreCount
);
685 new(&gIdlePackageList
) IdlePackageList
;
687 for (int32 i
= 0; i
< cpuCount
; i
++) {
688 CoreEntry
* core
= &gCoreEntries
[sCPUToCore
[i
]];
689 PackageEntry
* package
= &gPackageEntries
[sCPUToPackage
[i
]];
691 package
->Init(sCPUToPackage
[i
]);
692 core
->Init(sCPUToCore
[i
], package
);
693 gCPUEntries
[i
].Init(i
, core
);
695 core
->AddCPU(&gCPUEntries
[i
]);
698 packageEntriesDeleter
.Detach();
699 coreEntriesDeleter
.Detach();
700 cpuEntriesDeleter
.Detach();
709 int32 cpuCount
= smp_get_num_cpus();
710 dprintf("scheduler_init: found %" B_PRId32
" logical cpu%s and %" B_PRId32
711 " cache level%s\n", cpuCount
, cpuCount
!= 1 ? "s" : "",
712 gCPUCacheLevelCount
, gCPUCacheLevelCount
!= 1 ? "s" : "");
714 #ifdef SCHEDULER_PROFILING
715 Profiling::Profiler::Initialize();
718 status_t result
= init();
720 panic("scheduler_init: failed to initialize scheduler\n");
722 scheduler_set_operation_mode(SCHEDULER_MODE_LOW_LATENCY
);
724 init_debug_commands();
726 #if SCHEDULER_TRACING
727 add_debugger_command_etc("scheduler", &cmd_scheduler
,
728 "Analyze scheduler tracing information",
730 "Analyzes scheduler tracing information for a given thread.\n"
731 " <thread> - ID of the thread.\n", 0);
737 scheduler_enable_scheduling()
739 sSchedulerEnabled
= true;
743 // #pragma mark - SchedulerListener
746 SchedulerListener::~SchedulerListener()
751 // #pragma mark - kernel private
754 /*! Add the given scheduler listener. Thread lock must be held.
757 scheduler_add_listener(struct SchedulerListener
* listener
)
759 InterruptsSpinLocker
_(gSchedulerListenersLock
);
760 gSchedulerListeners
.Add(listener
);
764 /*! Remove the given scheduler listener. Thread lock must be held.
767 scheduler_remove_listener(struct SchedulerListener
* listener
)
769 InterruptsSpinLocker
_(gSchedulerListenersLock
);
770 gSchedulerListeners
.Remove(listener
);
774 // #pragma mark - Syscalls
778 _user_estimate_max_scheduling_latency(thread_id id
)
780 syscall_64_bit_return_value();
785 thread
= thread_get_current_thread();
786 thread
->AcquireReference();
788 thread
= Thread::Get(id
);
792 BReference
<Thread
> threadReference(thread
, true);
794 #ifdef SCHEDULER_PROFILING
798 ThreadData
* threadData
= thread
->scheduler_data
;
799 CoreEntry
* core
= threadData
->Core();
801 core
= &gCoreEntries
[get_random
<int32
>() % gCoreCount
];
803 int32 threadCount
= core
->ThreadCount();
804 if (core
->CPUCount() > 0)
805 threadCount
/= core
->CPUCount();
807 if (threadData
->GetEffectivePriority() > 0) {
808 threadCount
-= threadCount
* THREAD_MAX_SET_PRIORITY
809 / threadData
->GetEffectivePriority();
812 return std::min(std::max(threadCount
* gCurrentMode
->base_quantum
,
813 gCurrentMode
->minimal_quantum
),
814 gCurrentMode
->maximum_latency
);
819 _user_set_scheduler_mode(int32 mode
)
821 scheduler_mode schedulerMode
= static_cast<scheduler_mode
>(mode
);
822 status_t error
= scheduler_set_operation_mode(schedulerMode
);
824 cpu_set_scheduler_mode(schedulerMode
);
830 _user_get_scheduler_mode()
832 return gCurrentModeID
;