core: Create a thread for each CPU core, keep in lock-step with a barrier.

2018-05-02 21:26:14 -04:00 · 2018-05-02 21:26:14 -04:00 · 9776ff9179
commit 9776ff9179
parent 5590245930
4 changed files with 94 additions and 18 deletions
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -27,6 +27,13 @@ namespace Core {

 System::~System() = default;

+/// Runs a CPU core while the system is powered on
+static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System().GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
    status = ResultStatus::Success;

@ -109,7 +116,7 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }

 void System::PrepareReschedule() {
-    cpu_cores[0]->PrepareReschedule();
+    CurrentCpuCore().PrepareReschedule();
 }

 PerfStats::Results System::GetAndResetPerfStats() {
@ -123,14 +130,13 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {

    current_process = Kernel::Process::Create("main");

-    for (auto& cpu_core : cpu_cores) {
-        cpu_core = std::make_unique<Cpu>();
+    cpu_barrier = std::make_shared<CpuBarrier>();
+    for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index);
    }

    gpu_core = std::make_unique<Tegra::GPU>();
-
    telemetry_session = std::make_unique<Core::TelemetrySession>();
-
    service_manager = std::make_shared<Service::SM::ServiceManager>();

    HW::Init();
@ -142,6 +148,14 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
        return ResultStatus::ErrorVideoCore;
    }

+    // Create threads for CPU cores 1-3, and build thread_to_cpu map
+    // CPU core 0 is run on the main thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+    for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+        cpu_core_threads[index] = std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+        thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+    }
+
    NGLOG_DEBUG(Core, "Initialized OK");

    // Reset counters and set time origin to current frame
@ -171,9 +185,15 @@ void System::Shutdown() {
    telemetry_session.reset();
    gpu_core.reset();

+    // Close all CPU/threading state
+    thread_to_cpu.clear();
    for (auto& cpu_core : cpu_cores) {
        cpu_core.reset();
    }
+    for (auto& thread : cpu_core_threads) {
+        thread->join();
+        thread.reset();
+    }

    CoreTiming::Shutdown();

--- a/src/core/core.h
+++ b/src/core/core.h
@ -7,6 +7,7 @@
 #include <array>
 #include <memory>
 #include <string>
+#include <thread>
 #include "common/common_types.h"
 #include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
@ -112,7 +113,7 @@ public:
     * @returns A reference to the emulated CPU.
     */
    ARM_Interface& CPU() {
-        return cpu_cores[0]->CPU();
+        return CurrentCpuCore().CPU();
    }

    Tegra::GPU& GPU() {
@ -120,7 +121,7 @@ public:
    }

    Kernel::Scheduler& Scheduler() {
-        return cpu_cores[0]->Scheduler();
+        return CurrentCpuCore().Scheduler();
    }

    Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
@ -157,6 +158,14 @@ public:
    }

 private:
+    /// Returns the current CPU core based on the calling host thread
+    Cpu& CurrentCpuCore() {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
    /**
     * Initialize the emulated system.
     * @param emu_window Pointer to the host-system window used for video output and keyboard input.
@ -167,14 +176,12 @@ private:

    /// AppLoader used to load the current executing application
    std::unique_ptr<Loader::AppLoader> app_loader;
-
-    std::array<std::unique_ptr<Cpu>, 4> cpu_cores;
    std::unique_ptr<Tegra::GPU> gpu_core;
    std::shared_ptr<Tegra::DebugContext> debug_context;
    Kernel::SharedPtr<Kernel::Process> current_process;
-
-    /// When true, signals that a reschedule should happen
-    bool reschedule_pending{};
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;

    /// Service manager
    std::shared_ptr<Service::SM::ServiceManager> service_manager;
@ -186,6 +193,9 @@ private:

    ResultStatus status = ResultStatus::Success;
    std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };

 inline ARM_Interface& CPU() {
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@ -2,6 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#include <condition_variable>
+#include <mutex>
+
 #include "common/logging/log.h"
 #ifdef ARCHITECTURE_x86_64
 #include "core/arm/dynarmic/arm_dynarmic.h"
@ -16,7 +19,9 @@

 namespace Core {

-Cpu::Cpu() {
+Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+    : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
+
    if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
        arm_interface = std::make_shared<ARM_Dynarmic>();
@ -32,15 +37,25 @@ Cpu::Cpu() {
 }

 void Cpu::RunLoop(bool tight_loop) {
+    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
+    cpu_barrier->Rendezvous();
+
    // If we don't have a currently active thread then don't execute instructions,
    // instead advance to the next event and try to yield to the next thread
    if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
+        NGLOG_TRACE(Core, "Core-{} idling", core_index);
+
+        if (IsMainCore()) {
+            CoreTiming::Idle();
+            CoreTiming::Advance();
+        }
+
        PrepareReschedule();
    } else {
-        CoreTiming::Advance();
+        if (IsMainCore()) {
+            CoreTiming::Advance();
+        }
+
        if (tight_loop) {
            arm_interface->Run();
        } else {
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@ -4,7 +4,9 @@

 #pragma once

+#include <condition_variable>
 #include <memory>
+#include <mutex>
 #include <string>
 #include "common/common_types.h"

@ -16,9 +18,32 @@ class Scheduler;

 namespace Core {

+constexpr unsigned NUM_CPU_CORES{4};
+
+class CpuBarrier {
+public:
+    void Rendezvous() {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return;
+        }
+
+        condition.wait(lock);
+    }
+
+private:
+    unsigned cores_waiting{NUM_CPU_CORES};
+    std::mutex mutex;
+    std::condition_variable condition;
+};
+
 class Cpu {
 public:
-    Cpu();
+    Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);

    void RunLoop(bool tight_loop = true);

@ -34,13 +59,19 @@ public:
        return *scheduler;
    }

+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
 private:
    void Reschedule();

    std::shared_ptr<ARM_Interface> arm_interface;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
    std::unique_ptr<Kernel::Scheduler> scheduler;

    bool reschedule_pending{};
+    size_t core_index;
 };

 } // namespace Core