Merge pull request #178 from Subv/command_buffers

GPU: Added a command processor to decode the GPU pushbuffers and forward the commands to their respective engines
2018-02-12 13:51:52 -05:00 · 2018-02-12 13:51:52 -05:00 · be5ba4d952
commit be5ba4d952
parent 890e98a33e 6cddf9d88e
20 changed files with 364 additions and 23 deletions
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@ -139,8 +139,6 @@ add_library(core STATIC
    hle/service/nvdrv/devices/nvmap.h
    hle/service/nvdrv/interface.cpp
    hle/service/nvdrv/interface.h
-    hle/service/nvdrv/memory_manager.cpp
-    hle/service/nvdrv/memory_manager.h
    hle/service/nvdrv/nvdrv.cpp
    hle/service/nvdrv/nvdrv.h
    hle/service/nvdrv/nvmemp.cpp
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -154,6 +154,8 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
        break;
    }

+    gpu_core = std::make_unique<Tegra::GPU>();
+
    telemetry_session = std::make_unique<Core::TelemetrySession>();

    CoreTiming::Init();
--- a/src/core/core.h
+++ b/src/core/core.h
@ -11,6 +11,7 @@
 #include "core/memory.h"
 #include "core/perf_stats.h"
 #include "core/telemetry_session.h"
+#include "video_core/gpu.h"

 class EmuWindow;
 class ARM_Interface;
@ -102,6 +103,10 @@ public:
        return *cpu_core;
    }

+    Tegra::GPU& GPU() {
+        return *gpu_core;
+    }
+
    PerfStats perf_stats;
    FrameLimiter frame_limiter;

@ -138,6 +143,8 @@ private:
    ///< ARM11 CPU core
    std::unique_ptr<ARM_Interface> cpu_core;

+    std::unique_ptr<Tegra::GPU> gpu_core;
+
    /// When true, signals that a reschedule should happen
    bool reschedule_pending{};

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@ -4,6 +4,7 @@

 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
 #include "core/hle/service/nvdrv/devices/nvmap.h"

@ -44,11 +45,12 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
    LOG_DEBUG(Service_NVDRV, "called, pages=%x, page_size=%x, flags=%x", params.pages,
              params.page_size, params.flags);

+    auto& gpu = Core::System::GetInstance().GPU();
    const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
    if (params.flags & 1) {
-        params.offset = memory_manager->AllocateSpace(params.offset, size, 1);
+        params.offset = gpu.memory_manager->AllocateSpace(params.offset, size, 1);
    } else {
-        params.offset = memory_manager->AllocateSpace(size, params.align);
+        params.offset = gpu.memory_manager->AllocateSpace(size, params.align);
    }

    std::memcpy(output.data(), &params, output.size());
@ -71,10 +73,12 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
    auto object = nvmap_dev->GetObject(params.nvmap_handle);
    ASSERT(object);

+    auto& gpu = Core::System::GetInstance().GPU();
+
    if (params.flags & 1) {
-        params.offset = memory_manager->MapBufferEx(object->addr, params.offset, object->size);
+        params.offset = gpu.memory_manager->MapBufferEx(object->addr, params.offset, object->size);
    } else {
-        params.offset = memory_manager->MapBufferEx(object->addr, object->size);
+        params.offset = gpu.memory_manager->MapBufferEx(object->addr, object->size);
    }

    std::memcpy(output.data(), &params, output.size());
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
-#include "core/hle/service/nvdrv/memory_manager.h"

 namespace Service {
 namespace Nvidia {
@ -20,9 +19,7 @@ class nvmap;

 class nvhost_as_gpu final : public nvdevice {
 public:
-    nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvdevice(), nvmap_dev(std::move(nvmap_dev)) {
-        memory_manager = std::make_shared<MemoryManager>();
-    }
+    nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
    ~nvhost_as_gpu() override = default;

    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
@ -101,7 +98,6 @@ private:
    u32 GetVARegions(const std::vector<u8>& input, std::vector<u8>& output);

    std::shared_ptr<nvmap> nvmap_dev;
-    std::shared_ptr<MemoryManager> memory_manager;
 };

 } // namespace Devices
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@ -5,6 +5,7 @@
 #include <map>
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"

 namespace Service {
@ -131,7 +132,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
                params.num_entries * sizeof(IoctlGpfifoEntry));
    for (auto entry : entries) {
        VAddr va_addr = entry.Address();
-        // TODO(ogniK): Process these
+        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
    }
    params.fence_out.id = 0;
    params.fence_out.value = 0;
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@ -4,6 +4,7 @@

 #pragma once

+#include <memory>
 #include <vector>
 #include "common/common_types.h"
 #include "common/swap.h"
@ -12,12 +13,14 @@
 namespace Service {
 namespace Nvidia {
 namespace Devices {
+
+class nvmap;
 constexpr u32 NVGPU_IOCTL_MAGIC('H');
 constexpr u32 NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO(0x8);

 class nvhost_gpu final : public nvdevice {
 public:
-    nvhost_gpu() = default;
+    nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {}
    ~nvhost_gpu() override = default;

    u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
@ -132,6 +135,8 @@ private:
    u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output);
    u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output);
    u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output);
+
+    std::shared_ptr<nvmap> nvmap_dev;
 };

 } // namespace Devices
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@ -32,11 +32,11 @@ void InstallInterfaces(SM::ServiceManager& service_manager) {
 Module::Module() {
    auto nvmap_dev = std::make_shared<Devices::nvmap>();
    devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev);
+    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev);
    devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>();
    devices["/dev/nvmap"] = nvmap_dev;
    devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev);
    devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>();
-    devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>();
 }

 u32 Module::Open(std::string device_name) {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -1,4 +1,15 @@
 add_library(video_core STATIC
+    command_processor.cpp
+    command_processor.h
+    engines/fermi_2d.cpp
+    engines/fermi_2d.h
+    engines/maxwell_3d.cpp
+    engines/maxwell_3d.h
+    engines/maxwell_compute.cpp
+    engines/maxwell_compute.h
+    gpu.h
+    memory_manager.cpp
+    memory_manager.h
    renderer_base.cpp
    renderer_base.h
    renderer_opengl/gl_resource_manager.h
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@ -0,0 +1,119 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include <cstddef>
+#include <memory>
+#include <utility>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "common/vector_math.h"
+#include "core/memory.h"
+#include "core/tracer/recorder.h"
+#include "video_core/command_processor.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_compute.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
+
+namespace Tegra {
+
+enum class BufferMethods {
+    BindObject = 0,
+    CountBufferMethods = 0x100,
+};
+
+void GPU::WriteReg(u32 method, u32 subchannel, u32 value) {
+    LOG_WARNING(HW_GPU, "Processing method %08X on subchannel %u value %08X", method, subchannel,
+                value);
+
+    if (method == static_cast<u32>(BufferMethods::BindObject)) {
+        // Bind the current subchannel to the desired engine id.
+        LOG_DEBUG(HW_GPU, "Binding subchannel %u to engine %u", subchannel, value);
+        ASSERT(bound_engines.find(subchannel) == bound_engines.end());
+        bound_engines[subchannel] = static_cast<EngineID>(value);
+        return;
+    }
+
+    if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+        // TODO(Subv): Research and implement these methods.
+        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+        return;
+    }
+
+    ASSERT(bound_engines.find(subchannel) != bound_engines.end());
+
+    const EngineID engine = bound_engines[subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        fermi_2d->WriteReg(method, value);
+        break;
+    case EngineID::MAXWELL_B:
+        maxwell_3d->WriteReg(method, value);
+        break;
+    case EngineID::MAXWELL_COMPUTE_B:
+        maxwell_compute->WriteReg(method, value);
+        break;
+    default:
+        UNIMPLEMENTED();
+    }
+}
+
+void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
+    // TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an
+    // application VAddr.
+    const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address);
+    VAddr current_addr = head_address;
+    while (current_addr < head_address + size * sizeof(CommandHeader)) {
+        const CommandHeader header = {Memory::Read32(current_addr)};
+        current_addr += sizeof(u32);
+
+        switch (header.mode.Value()) {
+        case SubmissionMode::IncreasingOld:
+        case SubmissionMode::Increasing: {
+            // Increase the method value with each argument.
+            for (unsigned i = 0; i < header.arg_count; ++i) {
+                WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr));
+                current_addr += sizeof(u32);
+            }
+            break;
+        }
+        case SubmissionMode::NonIncreasingOld:
+        case SubmissionMode::NonIncreasing: {
+            // Use the same method value for all arguments.
+            for (unsigned i = 0; i < header.arg_count; ++i) {
+                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
+                current_addr += sizeof(u32);
+            }
+            break;
+        }
+        case SubmissionMode::IncreaseOnce: {
+            ASSERT(header.arg_count.Value() >= 1);
+            // Use the original method for the first argument and then the next method for all other
+            // arguments.
+            WriteReg(header.method, header.subchannel, Memory::Read32(current_addr));
+            current_addr += sizeof(u32);
+            // Use the same method value for all arguments.
+            for (unsigned i = 1; i < header.arg_count; ++i) {
+                WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr));
+                current_addr += sizeof(u32);
+            }
+            break;
+        }
+        case SubmissionMode::Inline: {
+            // The register value is stored in the bits 16-28 as an immediate
+            WriteReg(header.method, header.subchannel, header.inline_data);
+            break;
+        }
+        default:
+            UNIMPLEMENTED();
+        }
+    }
+}
+
+} // namespace Tegra
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@ -0,0 +1,39 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+
+namespace Tegra {
+
+enum class SubmissionMode : u32 {
+    IncreasingOld = 0,
+    Increasing = 1,
+    NonIncreasingOld = 2,
+    NonIncreasing = 3,
+    Inline = 4,
+    IncreaseOnce = 5
+};
+
+union CommandHeader {
+    u32 hex;
+
+    BitField<0, 13, u32> method;
+    BitField<13, 3, u32> subchannel;
+
+    BitField<16, 13, u32> arg_count;
+    BitField<16, 13, u32> inline_data;
+
+    BitField<29, 3, SubmissionMode> mode;
+};
+static_assert(std::is_standard_layout<CommandHeader>::value == true,
+              "CommandHeader does not use standard layout");
+static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
+
+void ProcessCommandList(VAddr address, u32 size);
+
+} // namespace Tegra
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@ -0,0 +1,13 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/fermi_2d.h"
+
+namespace Tegra {
+namespace Engines {
+
+void Fermi2D::WriteReg(u32 method, u32 value) {}
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@ -0,0 +1,22 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+
+class Fermi2D final {
+public:
+    Fermi2D() = default;
+    ~Fermi2D() = default;
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+};
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@ -0,0 +1,13 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/maxwell_3d.h"
+
+namespace Tegra {
+namespace Engines {
+
+void Maxwell3D::WriteReg(u32 method, u32 value) {}
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@ -0,0 +1,22 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+
+class Maxwell3D final {
+public:
+    Maxwell3D() = default;
+    ~Maxwell3D() = default;
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+};
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/engines/maxwell_compute.cpp
+++ b/src/video_core/engines/maxwell_compute.cpp
@ -0,0 +1,13 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/engines/maxwell_compute.h"
+
+namespace Tegra {
+namespace Engines {
+
+void MaxwellCompute::WriteReg(u32 method, u32 value) {}
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/maxwell_compute.h
@ -0,0 +1,22 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+
+namespace Tegra {
+namespace Engines {
+
+class MaxwellCompute final {
+public:
+    MaxwellCompute() = default;
+    ~MaxwellCompute() = default;
+
+    /// Write the value to the register identified by method.
+    void WriteReg(u32 method, u32 value);
+};
+
+} // namespace Engines
+} // namespace Tegra
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@ -0,0 +1,55 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include "common/common_types.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_compute.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+
+enum class EngineID {
+    FERMI_TWOD_A = 0x902D, // 2D Engine
+    MAXWELL_B = 0xB197,    // 3D Engine
+    MAXWELL_COMPUTE_B = 0xB1C0,
+    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+    MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+class GPU final {
+public:
+    GPU() {
+        memory_manager = std::make_unique<MemoryManager>();
+        maxwell_3d = std::make_unique<Engines::Maxwell3D>();
+        fermi_2d = std::make_unique<Engines::Fermi2D>();
+        maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+    }
+    ~GPU() = default;
+
+    /// Processes a command list stored at the specified address in GPU memory.
+    void ProcessCommandList(GPUVAddr address, u32 size);
+
+    std::unique_ptr<MemoryManager> memory_manager;
+
+private:
+    /// Writes a single register in the engine bound to the specified subchannel
+    void WriteReg(u32 method, u32 subchannel, u32 value);
+
+    /// Mapping of command subchannels to their bound engine ids.
+    std::unordered_map<u32, EngineID> bound_engines;
+
+    /// 3D engine
+    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+    /// 2D engine
+    std::unique_ptr<Engines::Fermi2D> fermi_2d;
+    /// Compute engine
+    std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+};
+
+} // namespace Tegra
--- a/src/core/hle/service/nvdrv/memory_manager.cpp
+++ b/src/core/hle/service/nvdrv/memory_manager.cpp
@ -3,10 +3,9 @@
 // Refer to the license.txt file included.

 #include "common/assert.h"
-#include "core/hle/service/nvdrv/memory_manager.h"
+#include "video_core/memory_manager.h"

-namespace Service {
-namespace Nvidia {
+namespace Tegra {

 PAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
    boost::optional<PAddr> paddr = FindFreeBlock(size, align);
@ -108,5 +107,4 @@ VAddr& MemoryManager::PageSlot(PAddr paddr) {
    return (*block)[(paddr >> Memory::PAGE_BITS) & PAGE_BLOCK_MASK];
 }

-} // namespace Nvidia
-} // namespace Service
+} // namespace Tegra
--- a/src/core/hle/service/nvdrv/memory_manager.h
+++ b/src/core/hle/service/nvdrv/memory_manager.h
@ -9,8 +9,10 @@
 #include "common/common_types.h"
 #include "core/memory.h"

-namespace Service {
-namespace Nvidia {
+namespace Tegra {
+
+/// Virtual addresses in the GPU's memory map are 64 bit.
+using GPUVAddr = u64;

 class MemoryManager final {
 public:
@ -44,5 +46,4 @@ private:
    std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
 };

-} // namespace Nvidia
-} // namespace Service
+} // namespace Tegra