diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h index 133122c5f4..e7594db689 100644 --- a/src/common/thread_queue_list.h +++ b/src/common/thread_queue_list.h @@ -49,6 +49,22 @@ struct ThreadQueueList { return T(); } + template + T get_first_filter(UnaryPredicate filter) const { + const Queue* cur = first; + while (cur != nullptr) { + if (!cur->data.empty()) { + for (const auto& item : cur->data) { + if (filter(item)) + return item; + } + } + cur = cur->next_nonempty; + } + + return T(); + } + T pop_first() { Queue* cur = first; while (cur != nullptr) { diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 5a5f4cef1a..df4d6cf0a0 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "core/arm/arm_interface.h" #include "core/core.h" +#include "core/core_cpu.h" #include "core/core_timing.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" @@ -179,4 +180,69 @@ void Scheduler::SetThreadPriority(Thread* thread, u32 priority) { ready_queue.prepare(priority); } +Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const { + std::lock_guard lock(scheduler_mutex); + + const u32 mask = 1U << core; + return ready_queue.get_first_filter([mask, maximum_priority](Thread const* thread) { + return (thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority; + }); +} + +void Scheduler::YieldWithoutLoadBalancing(Thread* thread) { + ASSERT(thread != nullptr); + // Avoid yielding if the thread isn't even running. + ASSERT(thread->GetStatus() == ThreadStatus::Running); + + // Sanity check that the priority is valid + ASSERT(thread->GetPriority() < THREADPRIO_COUNT); + + // Yield this thread -- sleep for zero time and force reschedule to different thread + WaitCurrentThread_Sleep(); + GetCurrentThread()->WakeAfterDelay(0); +} + +void Scheduler::YieldWithLoadBalancing(Thread* thread) { + ASSERT(thread != nullptr); + const auto priority = thread->GetPriority(); + const auto core = static_cast(thread->GetProcessorID()); + + // Avoid yielding if the thread isn't even running. + ASSERT(thread->GetStatus() == ThreadStatus::Running); + + // Sanity check that the priority is valid + ASSERT(priority < THREADPRIO_COUNT); + + // Sleep for zero time to be able to force reschedule to different thread + WaitCurrentThread_Sleep(); + GetCurrentThread()->WakeAfterDelay(0); + + Thread* suggested_thread = nullptr; + + // Search through all of the cpu cores (except this one) for a suggested thread. + // Take the first non-nullptr one + for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { + const auto res = + Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( + core, priority); + + // If scheduler provides a suggested thread + if (res != nullptr) { + // And its better than the current suggested thread (or is the first valid one) + if (suggested_thread == nullptr || + suggested_thread->GetPriority() > res->GetPriority()) { + suggested_thread = res; + } + } + } + + // If a suggested thread was found, queue that for this core + if (suggested_thread != nullptr) + suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask()); +} + +void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) { + UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!"); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index c63032b7d5..97ced4dfc3 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -51,6 +51,75 @@ public: /// Sets the priority of a thread in the scheduler void SetThreadPriority(Thread* thread, u32 priority); + /// Gets the next suggested thread for load balancing + Thread* GetNextSuggestedThread(u32 core, u32 minimum_priority) const; + + /** + * YieldWithoutLoadBalancing -- analogous to normal yield on a system + * Moves the thread to the end of the ready queue for its priority, and then reschedules the + * system to the new head of the queue. + * + * Example (Single Core -- but can be extrapolated to multi): + * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->) + * Currently Running: ThreadR + * + * ThreadR calls YieldWithoutLoadBalancing + * + * ThreadR is moved to the end of ready_queue[prio=0]: + * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->) + * Currently Running: Nothing + * + * System is rescheduled (ThreadA is popped off of queue): + * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->) + * Currently Running: ThreadA + * + * If the queue is empty at time of call, no yielding occurs. This does not cross between cores + * or priorities at all. + */ + void YieldWithoutLoadBalancing(Thread* thread); + + /** + * YieldWithLoadBalancing -- yield but with better selection of the new running thread + * Moves the current thread to the end of the ready queue for its priority, then selects a + * 'suggested thread' (a thread on a different core that could run on this core) from the + * scheduler, changes its core, and reschedules the current core to that thread. + * + * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were + * single core): + * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant + * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only] + * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1 + * + * ThreadQ calls YieldWithLoadBalancing + * + * ThreadQ is moved to the end of ready_queue[core=0][prio=0]: + * ready_queue[core=0][prio=0]: ThreadA, ThreadB + * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only] + * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1 + * + * A list of suggested threads for each core is compiled + * Suggested Threads: {ThreadC on Core 1} + * If this were quad core (as the switch is), there could be between 0 and 3 threads in this + * list. If there are more than one, the thread is selected by highest prio. + * + * ThreadC is core changed to Core 0: + * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ + * ready_queue[core=1][prio=0]: ThreadD + * Currently Running: None on Core 0 || ThreadP on Core 1 + * + * System is rescheduled (ThreadC is popped off of queue): + * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ + * ready_queue[core=1][prio=0]: ThreadD + * Currently Running: ThreadC on Core 0 || ThreadP on Core 1 + * + * If no suggested threads can be found this will behave just as normal yield. If there are + * multiple candidates for the suggested thread on a core, the highest prio is taken. + */ + void YieldWithLoadBalancing(Thread* thread); + + /// Currently unknown -- asserts as unimplemented on call + void YieldAndWaitForLoadBalancing(Thread* thread); + /// Returns a list of all threads managed by the scheduler const std::vector>& GetThreadList() const { return thread_list; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 5d36792ca7..348a229047 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1208,18 +1208,38 @@ static void ExitThread() { static void SleepThread(s64 nanoseconds) { LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); - // Don't attempt to yield execution if there are no available threads to run, - // this way we avoid a useless reschedule to the idle thread. - if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads()) - return; + enum class SleepType : s64 { + YieldWithoutLoadBalancing = 0, + YieldWithLoadBalancing = -1, + YieldAndWaitForLoadBalancing = -2, + }; - // Sleep current thread and check for next thread to schedule - WaitCurrentThread_Sleep(); + if (nanoseconds <= 0) { + auto& scheduler{Core::System::GetInstance().CurrentScheduler()}; + switch (static_cast(nanoseconds)) { + case SleepType::YieldWithoutLoadBalancing: + scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); + break; + case SleepType::YieldWithLoadBalancing: + scheduler.YieldWithLoadBalancing(GetCurrentThread()); + break; + case SleepType::YieldAndWaitForLoadBalancing: + scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); + break; + default: + UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); + } + } else { + // Sleep current thread and check for next thread to schedule + WaitCurrentThread_Sleep(); - // Create an event to wake the thread up after the specified nanosecond delay has passed - GetCurrentThread()->WakeAfterDelay(nanoseconds); + // Create an event to wake the thread up after the specified nanosecond delay has passed + GetCurrentThread()->WakeAfterDelay(nanoseconds); + } - Core::System::GetInstance().PrepareReschedule(); + // Reschedule all CPU cores + for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) + Core::System::GetInstance().CpuCore(i).PrepareReschedule(); } /// Wait process wide key atomic diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index d384d50dbc..77aec099ae 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -26,6 +26,7 @@ enum ThreadPriority : u32 { THREADPRIO_USERLAND_MAX = 24, ///< Highest thread priority for userland apps THREADPRIO_DEFAULT = 44, ///< Default thread priority for userland apps THREADPRIO_LOWEST = 63, ///< Lowest thread priority + THREADPRIO_COUNT = 64, ///< Total number of possible thread priorities. }; enum ThreadProcessorId : s32 {