From 34d581f2dcffa9f54e96af230a56cb01e8e2fccd Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 16 Dec 2016 21:41:38 -0800 Subject: [PATCH 01/15] VideoCore/Shader: Extract input vertex loading code into function --- src/video_core/command_processor.cpp | 6 ++++-- src/video_core/shader/shader.cpp | 30 ++++++++++++---------------- src/video_core/shader/shader.h | 12 ++++++++--- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index ea58e9f548..36f72393b9 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -149,7 +149,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); - g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1); + shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); + g_state.vs.Run(shader_unit); Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); @@ -283,7 +284,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); + shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); + g_state.vs.Run(shader_unit); // Retrieve vertex from register data output_vertex = shader_unit.output_registers.ToVertex(regs.vs); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 7ae57e6199..8dca9d0cb1 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -76,6 +76,17 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { return ret; } +void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { + // Setup input register table + const auto& attribute_register_map = g_state.regs.vs.input_register_map; + + for (int i = 0; i < num_attributes; i++) + registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; + + conditional_code[0] = false; + conditional_code[1] = false; +} + #ifdef ARCHITECTURE_x86_64 static std::unordered_map> shader_map; static const JitShader* jit_shader; @@ -109,21 +120,12 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { +void ShaderSetup::Run(UnitState& state) { auto& config = g_state.regs.vs; auto& setup = g_state.vs; MICROPROFILE_SCOPE(GPU_Shader); - // Setup input register table - const auto& attribute_register_map = config.input_register_map; - - for (int i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; - - state.conditional_code[0] = false; - state.conditional_code[1] = false; - #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { jit_shader->Run(setup, state, config.main_offset); @@ -145,13 +147,7 @@ DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); - const auto& attribute_register_map = config.input_register_map; - for (int i = 0; i < num_attributes; i++) - state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; - - state.conditional_code[0] = false; - state.conditional_code[1] = false; - + state.LoadInputVertex(input, num_attributes); RunInterpreter(setup, state, debug_data, config.main_offset); return debug_data; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2b07759b94..c5d23e0ea6 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -142,6 +142,14 @@ struct UnitState { return 0; } } + + /** + * Loads the unit state with an input vertex. + * + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes to load + */ + void LoadInputVertex(const InputVertex& input, int num_attributes); }; /// Clears the shader cache @@ -182,10 +190,8 @@ struct ShaderSetup { /** * Runs the currently setup shader * @param state Shader unit state, must be setup per shader and per shader unit - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes */ - void Run(UnitState& state, const InputVertex& input, int num_attributes); + void Run(UnitState& state); /** * Produce debug information based on the given shader and input vertex From e3caf669b05bc0727053885ee7e6e5c78d655df4 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 16 Dec 2016 21:48:36 -0800 Subject: [PATCH 02/15] VideoCore/Shader: Use self instead of g_state.vs in ShaderSetup --- .../debugger/graphics/graphics_vertex_shader.cpp | 3 +-- src/video_core/shader/shader.cpp | 16 +++++++--------- src/video_core/shader/shader.h | 3 +-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index ff2e7e3635..89512146ea 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -518,8 +518,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({entry_point, "main"}); // Generate debug information - debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, - shader_setup); + debug_data = shader_setup.ProduceDebugInfo(input_vertex, num_attributes, shader_config); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 8dca9d0cb1..868be13604 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -102,8 +102,8 @@ void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = - Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ - Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)); + Common::ComputeHash64(&program_code, sizeof(program_code)) ^ + Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { @@ -122,33 +122,31 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); void ShaderSetup::Run(UnitState& state) { auto& config = g_state.regs.vs; - auto& setup = g_state.vs; MICROPROFILE_SCOPE(GPU_Shader); #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(setup, state, config.main_offset); + jit_shader->Run(*this, state, config.main_offset); } else { DebugData dummy_debug_data; - RunInterpreter(setup, state, dummy_debug_data, config.main_offset); + RunInterpreter(*this, state, dummy_debug_data, config.main_offset); } #else DebugData dummy_debug_data; - RunInterpreter(setup, state, dummy_debug_data, config.main_offset); + RunInterpreter(*this, state, dummy_debug_data, config.main_offset); #endif // ARCHITECTURE_x86_64 } DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, - const Regs::ShaderConfig& config, - const ShaderSetup& setup) { + const Regs::ShaderConfig& config) { UnitState state; DebugData debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(setup, state, debug_data, config.main_offset); + RunInterpreter(*this, state, debug_data, config.main_offset); return debug_data; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index c5d23e0ea6..61becb6e5f 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -198,11 +198,10 @@ struct ShaderSetup { * @param input Input vertex into the shader * @param num_attributes The number of vertex shader attributes * @param config Configuration object for the shader pipeline - * @param setup Setup object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - const Regs::ShaderConfig& config, const ShaderSetup& setup); + const Regs::ShaderConfig& config); }; } // namespace Shader From 1e1f9398176e4f1ec608f31f22a576c749a0a723 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 16 Dec 2016 22:30:00 -0800 Subject: [PATCH 03/15] VideoCore/Shader: Use only entry_point as ShaderSetup param This removes all implicit dependency of ShaderState on global PICA state. --- .../debugger/graphics/graphics_vertex_shader.cpp | 2 +- src/video_core/command_processor.cpp | 4 ++-- src/video_core/shader/shader.cpp | 16 +++++++++------- src/video_core/shader/shader.h | 4 ++-- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index 89512146ea..c556d3b151 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -518,7 +518,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({entry_point, "main"}); // Generate debug information - debug_data = shader_setup.ProduceDebugInfo(input_vertex, num_attributes, shader_config); + debug_data = shader_setup.ProduceDebugInfo(input_vertex, num_attributes, entry_point); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 36f72393b9..fc224c6f20 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -150,7 +150,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); - g_state.vs.Run(shader_unit); + g_state.vs.Run(shader_unit, regs.vs.main_offset); Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); @@ -285,7 +285,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); - g_state.vs.Run(shader_unit); + g_state.vs.Run(shader_unit, regs.vs.main_offset); // Retrieve vertex from register data output_vertex = shader_unit.output_registers.ToVertex(regs.vs); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 868be13604..936db05826 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -120,33 +120,35 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState& state) { - auto& config = g_state.regs.vs; +void ShaderSetup::Run(UnitState& state, unsigned int entry_point) { + ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(*this, state, config.main_offset); + jit_shader->Run(*this, state, entry_point); } else { DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, config.main_offset); + RunInterpreter(*this, state, dummy_debug_data, entry_point); } #else DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, config.main_offset); + RunInterpreter(*this, state, dummy_debug_data, entry_point); #endif // ARCHITECTURE_x86_64 } DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, - const Regs::ShaderConfig& config) { + unsigned int entry_point) { + ASSERT(entry_point < 1024); + UnitState state; DebugData debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(*this, state, debug_data, config.main_offset); + RunInterpreter(*this, state, debug_data, entry_point); return debug_data; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 61becb6e5f..d21f481abb 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -191,7 +191,7 @@ struct ShaderSetup { * Runs the currently setup shader * @param state Shader unit state, must be setup per shader and per shader unit */ - void Run(UnitState& state); + void Run(UnitState& state, unsigned int entry_point); /** * Produce debug information based on the given shader and input vertex @@ -201,7 +201,7 @@ struct ShaderSetup { * @return Debug information for this shader with regards to the given vertex */ DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - const Regs::ShaderConfig& config); + unsigned int entry_point); }; } // namespace Shader From bd82cffd0bf5d61eab8c7c856bcc284b1a77e33b Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 16 Dec 2016 22:32:35 -0800 Subject: [PATCH 04/15] VideoCore/Shader: Add constness to methods --- src/video_core/shader/shader.cpp | 4 ++-- src/video_core/shader/shader.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 936db05826..ae696533fb 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -120,7 +120,7 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState& state, unsigned int entry_point) { +void ShaderSetup::Run(UnitState& state, unsigned int entry_point) const { ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); @@ -139,7 +139,7 @@ void ShaderSetup::Run(UnitState& state, unsigned int entry_point) { } DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) { + unsigned int entry_point) const { ASSERT(entry_point < 1024); UnitState state; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index d21f481abb..44b9861e99 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -191,7 +191,7 @@ struct ShaderSetup { * Runs the currently setup shader * @param state Shader unit state, must be setup per shader and per shader unit */ - void Run(UnitState& state, unsigned int entry_point); + void Run(UnitState& state, unsigned int entry_point) const; /** * Produce debug information based on the given shader and input vertex @@ -201,7 +201,7 @@ struct ShaderSetup { * @return Debug information for this shader with regards to the given vertex */ DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point); + unsigned int entry_point) const; }; } // namespace Shader From dd4a1672a77830a53de61cf0554b34e9e17a2905 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 16 Dec 2016 23:21:26 -0800 Subject: [PATCH 05/15] VideoCore/Shader: Split shader uniform state and shader engine Currently there's only a single dummy implementation, which will be split in a following commit. --- .../graphics/graphics_vertex_shader.cpp | 4 +- src/video_core/command_processor.cpp | 13 +++--- src/video_core/shader/shader.cpp | 44 ++++++++++++++----- src/video_core/shader/shader.h | 17 ++++--- src/video_core/shader/shader_interpreter.h | 1 + 5 files changed, 57 insertions(+), 22 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index c556d3b151..7adc3ad143 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -518,7 +518,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({entry_point, "main"}); // Generate debug information - debug_data = shader_setup.ProduceDebugInfo(input_vertex, num_attributes, entry_point); + auto* shader_engine = Pica::Shader::GetEngine(); + shader_engine->SetupBatch(&shader_setup); + debug_data = shader_engine->ProduceDebugInfo(input_vertex, num_attributes, entry_point); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index fc224c6f20..694c9f169d 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -142,15 +142,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { MICROPROFILE_SCOPE(GPU_Drawing); immediate_attribute_id = 0; - Shader::UnitState shader_unit; - g_state.vs.Setup(); + auto* shader_engine = Shader::GetEngine(); + shader_engine->SetupBatch(&g_state.vs); // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); + Shader::UnitState shader_unit; shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); - g_state.vs.Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(shader_unit, regs.vs.main_offset); Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); @@ -244,8 +245,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); + auto* shader_engine = Shader::GetEngine(); Shader::UnitState shader_unit; - g_state.vs.Setup(); + + shader_engine->SetupBatch(&g_state.vs); for (unsigned int index = 0; index < regs.num_vertices; ++index) { // Indexed rendering doesn't use the start offset @@ -285,7 +288,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); - g_state.vs.Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(shader_unit, regs.vs.main_offset); // Retrieve vertex from register data output_vertex = shader_unit.output_registers.ToVertex(regs.vs); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index ae696533fb..d276a1221b 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -87,6 +87,17 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { conditional_code[1] = false; } +class MergedShaderEngine : public ShaderEngine { +public: + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; + +private: + const ShaderSetup* setup = nullptr; +}; + #ifdef ARCHITECTURE_x86_64 static std::unordered_map> shader_map; static const JitShader* jit_shader; @@ -98,13 +109,17 @@ void ClearCache() { #endif // ARCHITECTURE_x86_64 } -void ShaderSetup::Setup() { +void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) { + setup = setup_; + if (setup == nullptr) + return; + #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - u64 cache_key = - Common::ComputeHash64(&program_code, sizeof(program_code)) ^ - Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); + u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); + u64 cache_key = code_hash ^ swizzle_hash; auto iter = shader_map.find(cache_key); if (iter != shader_map.end()) { jit_shader = iter->second.get(); @@ -120,26 +135,28 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void ShaderSetup::Run(UnitState& state, unsigned int entry_point) const { +void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(*this, state, entry_point); + jit_shader->Run(*setup, state, entry_point); } else { DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, entry_point); + RunInterpreter(*setup, state, dummy_debug_data, entry_point); } #else DebugData dummy_debug_data; - RunInterpreter(*this, state, dummy_debug_data, entry_point); + RunInterpreter(*setup, state, dummy_debug_data, entry_point); #endif // ARCHITECTURE_x86_64 } -DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const { +DebugData MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + ASSERT(setup != nullptr); ASSERT(entry_point < 1024); UnitState state; @@ -148,10 +165,15 @@ DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(*this, state, debug_data, entry_point); + RunInterpreter(*setup, state, debug_data, entry_point); return debug_data; } +ShaderEngine* GetEngine() { + static MergedShaderEngine merged_engine; + return &merged_engine; +} + } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 44b9861e99..899fb26071 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -156,7 +156,6 @@ struct UnitState { void ClearCache(); struct ShaderSetup { - struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are // therefore required to be 16-byte aligned. @@ -180,18 +179,23 @@ struct ShaderSetup { std::array program_code; std::array swizzle_data; +}; + +class ShaderEngine { +public: + virtual ~ShaderEngine() = default; /** * Performs any shader unit setup that only needs to happen once per shader (as opposed to once * per vertex, which would happen within the `Run` function). */ - void Setup(); + virtual void SetupBatch(const ShaderSetup* setup) = 0; /** * Runs the currently setup shader * @param state Shader unit state, must be setup per shader and per shader unit */ - void Run(UnitState& state, unsigned int entry_point) const; + virtual void Run(UnitState& state, unsigned int entry_point) const = 0; /** * Produce debug information based on the given shader and input vertex @@ -200,10 +204,13 @@ struct ShaderSetup { * @param config Configuration object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const; + virtual DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const = 0; }; +// TODO(yuriks): Remove and make it non-global state somewhere +ShaderEngine* GetEngine(); + } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index d31dcd7a68..3237b50b36 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -8,6 +8,7 @@ namespace Pica { namespace Shader { +struct ShaderSetup; struct UnitState; template From 8eefc62833bc8c3052c23f4f0d01d8b60a01925c Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 00:06:23 -0800 Subject: [PATCH 06/15] VideoCore/Shader: Rename shader_jit_x64{ => _compiler}.{cpp,h} --- src/video_core/CMakeLists.txt | 4 ++-- src/video_core/shader/shader.cpp | 2 +- .../{shader_jit_x64.cpp => shader_jit_x64_compiler.cpp} | 2 +- .../shader/{shader_jit_x64.h => shader_jit_x64_compiler.h} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename src/video_core/shader/{shader_jit_x64.cpp => shader_jit_x64_compiler.cpp} (99%) rename src/video_core/shader/{shader_jit_x64.h => shader_jit_x64_compiler.h} (100%) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6ca319b59c..36397cce96 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -50,10 +50,10 @@ set(HEADERS if(ARCHITECTURE_x86_64) set(SRCS ${SRCS} - shader/shader_jit_x64.cpp) + shader/shader_jit_x64_compiler.cpp) set(HEADERS ${HEADERS} - shader/shader_jit_x64.h) + shader/shader_jit_x64_compiler.h) endif() create_directory_groups(${SRCS} ${HEADERS}) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index d276a1221b..97c6519d64 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -17,7 +17,7 @@ #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" #ifdef ARCHITECTURE_x86_64 -#include "video_core/shader/shader_jit_x64.h" +#include "video_core/shader/shader_jit_x64_compiler.h" #endif // ARCHITECTURE_x86_64 #include "video_core/video_core.h" diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp similarity index 99% rename from src/video_core/shader/shader_jit_x64.cpp rename to src/video_core/shader/shader_jit_x64_compiler.cpp index c588b778bf..8805433060 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -17,7 +17,7 @@ #include "video_core/pica_state.h" #include "video_core/pica_types.h" #include "video_core/shader/shader.h" -#include "video_core/shader/shader_jit_x64.h" +#include "video_core/shader/shader_jit_x64_compiler.h" using namespace Common::X64; using namespace Xbyak::util; diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64_compiler.h similarity index 100% rename from src/video_core/shader/shader_jit_x64.h rename to src/video_core/shader/shader_jit_x64_compiler.h From 114d6b2f97eb62c7d8c958ebb391b70b026130f9 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 01:21:16 -0800 Subject: [PATCH 07/15] VideoCore/Shader: Split interpreter and JIT into separate ShaderEngines --- src/video_core/CMakeLists.txt | 2 + src/video_core/pica.cpp | 2 +- src/video_core/shader/shader.cpp | 106 ++++--------------- src/video_core/shader/shader.h | 5 +- src/video_core/shader/shader_interpreter.cpp | 39 ++++++- src/video_core/shader/shader_interpreter.h | 19 ++-- src/video_core/shader/shader_jit_x64.cpp | 56 ++++++++++ src/video_core/shader/shader_jit_x64.h | 35 ++++++ 8 files changed, 160 insertions(+), 104 deletions(-) create mode 100644 src/video_core/shader/shader_jit_x64.cpp create mode 100644 src/video_core/shader/shader_jit_x64.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 36397cce96..d55b84ce06 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -50,9 +50,11 @@ set(HEADERS if(ARCHITECTURE_x86_64) set(SRCS ${SRCS} + shader/shader_jit_x64.cpp shader/shader_jit_x64_compiler.cpp) set(HEADERS ${HEADERS} + shader/shader_jit_x64.h shader/shader_jit_x64_compiler.h) endif() diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index ce2bd455e6..b4a77c632d 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -499,7 +499,7 @@ void Init() { } void Shutdown() { - Shader::ClearCache(); + Shader::Shutdown(); } template diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 97c6519d64..b30dae476b 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -2,14 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include -#include -#include -#include -#include "common/bit_field.h" -#include "common/hash.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "video_core/pica.h" @@ -17,7 +11,7 @@ #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" #ifdef ARCHITECTURE_x86_64 -#include "video_core/shader/shader_jit_x64_compiler.h" +#include "video_core/shader/shader_jit_x64.h" #endif // ARCHITECTURE_x86_64 #include "video_core/video_core.h" @@ -87,91 +81,31 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { conditional_code[1] = false; } -class MergedShaderEngine : public ShaderEngine { -public: - void SetupBatch(const ShaderSetup* setup) override; - void Run(UnitState& state, unsigned int entry_point) const override; - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const override; - -private: - const ShaderSetup* setup = nullptr; -}; - -#ifdef ARCHITECTURE_x86_64 -static std::unordered_map> shader_map; -static const JitShader* jit_shader; -#endif // ARCHITECTURE_x86_64 - -void ClearCache() { -#ifdef ARCHITECTURE_x86_64 - shader_map.clear(); -#endif // ARCHITECTURE_x86_64 -} - -void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) { - setup = setup_; - if (setup == nullptr) - return; - -#ifdef ARCHITECTURE_x86_64 - if (VideoCore::g_shader_jit_enabled) { - u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); - u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); - - u64 cache_key = code_hash ^ swizzle_hash; - auto iter = shader_map.find(cache_key); - if (iter != shader_map.end()) { - jit_shader = iter->second.get(); - } else { - auto shader = std::make_unique(); - shader->Compile(); - jit_shader = shader.get(); - shader_map[cache_key] = std::move(shader); - } - } -#endif // ARCHITECTURE_x86_64 -} - MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const { - ASSERT(setup != nullptr); - ASSERT(entry_point < 1024); - - MICROPROFILE_SCOPE(GPU_Shader); - #ifdef ARCHITECTURE_x86_64 - if (VideoCore::g_shader_jit_enabled) { - jit_shader->Run(*setup, state, entry_point); - } else { - DebugData dummy_debug_data; - RunInterpreter(*setup, state, dummy_debug_data, entry_point); - } -#else - DebugData dummy_debug_data; - RunInterpreter(*setup, state, dummy_debug_data, entry_point); +static std::unique_ptr jit_engine; #endif // ARCHITECTURE_x86_64 -} - -DebugData MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const { - ASSERT(setup != nullptr); - ASSERT(entry_point < 1024); - - UnitState state; - DebugData debug_data; - - // Setup input register table - boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); - state.LoadInputVertex(input, num_attributes); - RunInterpreter(*setup, state, debug_data, entry_point); - return debug_data; -} +static InterpreterEngine interpreter_engine; ShaderEngine* GetEngine() { - static MergedShaderEngine merged_engine; - return &merged_engine; +#ifdef ARCHITECTURE_x86_64 + // TODO(yuriks): Re-initialize on each change rather than being persistent + if (VideoCore::g_shader_jit_enabled) { + if (jit_engine == nullptr) { + jit_engine = std::make_unique(); + } + return jit_engine.get(); + } +#endif // ARCHITECTURE_x86_64 + + return &interpreter_engine; +} + +void Shutdown() { +#ifdef ARCHITECTURE_x86_64 + jit_engine = nullptr; +#endif // ARCHITECTURE_x86_64 } } // namespace Shader diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 899fb26071..2afd1024fe 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -6,7 +6,6 @@ #include #include -#include #include #include #include "common/assert.h" @@ -152,9 +151,6 @@ struct UnitState { void LoadInputVertex(const InputVertex& input, int num_attributes); }; -/// Clears the shader cache -void ClearCache(); - struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are @@ -210,6 +206,7 @@ public: // TODO(yuriks): Remove and make it non-global state somewhere ShaderEngine* GetEngine(); +void Shutdown(); } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 20fb9754b3..8e2b8c5483 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -7,10 +7,12 @@ #include #include #include +#include #include #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/vector_math.h" #include "video_core/pica_state.h" #include "video_core/pica_types.h" @@ -37,8 +39,8 @@ struct CallStackElement { }; template -void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned offset) { +static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, + unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector call_stack; u32 program_counter = offset; @@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } } -// Explicit instantiation -template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData&, unsigned offset); -template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData&, unsigned offset); +void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) { + setup = setup_; +} + +MICROPROFILE_DECLARE(GPU_Shader); + +void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(entry_point < 1024); + + MICROPROFILE_SCOPE(GPU_Shader); + + DebugData dummy_debug_data; + RunInterpreter(*setup, state, dummy_debug_data, entry_point); +} + +DebugData InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(entry_point < 1024); + + UnitState state; + DebugData debug_data; + + // Setup input register table + boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); + state.LoadInputVertex(input, num_attributes); + RunInterpreter(*setup, state, debug_data, entry_point); + return debug_data; +} } // namespace diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 3237b50b36..43c1ed5ea8 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -4,19 +4,22 @@ #pragma once +#include "video_core/shader/shader.h" + namespace Pica { namespace Shader { -struct ShaderSetup; -struct UnitState; +class InterpreterEngine final : public ShaderEngine { +public: + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; -template -struct DebugData; - -template -void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned offset); +private: + const ShaderSetup* setup = nullptr; +}; } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp new file mode 100644 index 0000000000..fea79538a5 --- /dev/null +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -0,0 +1,56 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/hash.h" +#include "common/microprofile.h" +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_jit_x64.h" +#include "video_core/shader/shader_jit_x64_compiler.h" + +namespace Pica { +namespace Shader { + +JitX64Engine::JitX64Engine() = default; +JitX64Engine::~JitX64Engine() = default; + +void JitX64Engine::SetupBatch(const ShaderSetup* setup_) { + cached_shader = nullptr; + setup = setup_; + if (setup == nullptr) + return; + + u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); + u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); + + u64 cache_key = code_hash ^ swizzle_hash; + auto iter = cache.find(cache_key); + if (iter != cache.end()) { + cached_shader = iter->second.get(); + } else { + auto shader = std::make_unique(); + shader->Compile(); + cached_shader = shader.get(); + cache.emplace_hint(iter, cache_key, std::move(shader)); + } +} + +MICROPROFILE_DECLARE(GPU_Shader); + +void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const { + ASSERT(setup != nullptr); + ASSERT(cached_shader != nullptr); + ASSERT(entry_point < 1024); + + MICROPROFILE_SCOPE(GPU_Shader); + + cached_shader->Run(*setup, state, entry_point); +} + +DebugData JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const { + UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT."); +} + +} // namespace Shader +} // namespace Pica diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h new file mode 100644 index 0000000000..df18de2c24 --- /dev/null +++ b/src/video_core/shader/shader_jit_x64.h @@ -0,0 +1,35 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/shader/shader.h" + +namespace Pica { +namespace Shader { + +class JitShader; + +class JitX64Engine final : public ShaderEngine { +public: + JitX64Engine(); + ~JitX64Engine() override; + + void SetupBatch(const ShaderSetup* setup) override; + void Run(UnitState& state, unsigned int entry_point) const override; + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, + unsigned int entry_point) const override; + +private: + const ShaderSetup* setup = nullptr; + + std::unordered_map> cache; + const JitShader* cached_shader = nullptr; +}; + +} // namespace Shader +} // namespace Pica From a9b7752b9d834335410a9e14a99cb943d0850575 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 01:26:07 -0800 Subject: [PATCH 08/15] Debugger: Always use interpreter for shader debugging --- src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index 7adc3ad143..c6f807eb3f 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -18,7 +18,9 @@ #include "citra_qt/util/util.h" #include "video_core/pica.h" #include "video_core/pica_state.h" +#include "video_core/shader/debug_data.h" #include "video_core/shader/shader.h" +#include "video_core/shader/shader_interpreter.h" using nihstro::OpCode; using nihstro::Instruction; @@ -518,9 +520,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({entry_point, "main"}); // Generate debug information - auto* shader_engine = Pica::Shader::GetEngine(); - shader_engine->SetupBatch(&shader_setup); - debug_data = shader_engine->ProduceDebugInfo(input_vertex, num_attributes, entry_point); + Pica::Shader::InterpreterEngine shader_engine; + shader_engine.SetupBatch(&shader_setup); + debug_data = shader_engine.ProduceDebugInfo(input_vertex, num_attributes, entry_point); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { From ade7ed7c5fd383e77c4d6949e652e1fd83844233 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 01:30:55 -0800 Subject: [PATCH 09/15] VideoCore/Shader: Move ProduceDebugInfo to InterpreterEngine --- .../debugger/graphics/graphics_vertex_shader.h | 1 + src/video_core/shader/shader.h | 11 ----------- src/video_core/shader/shader_interpreter.h | 11 ++++++++++- src/video_core/shader/shader_jit_x64.cpp | 5 ----- src/video_core/shader/shader_jit_x64.h | 2 -- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h index bedea0bed2..3292573f3b 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h @@ -8,6 +8,7 @@ #include #include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" #include "nihstro/parser_shbin.h" +#include "video_core/shader/debug_data.h" #include "video_core/shader/shader.h" class QLabel; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 2afd1024fe..9d24104876 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -14,7 +14,6 @@ #include "common/vector_math.h" #include "video_core/pica.h" #include "video_core/pica_types.h" -#include "video_core/shader/debug_data.h" using nihstro::RegisterType; using nihstro::SourceRegister; @@ -192,16 +191,6 @@ public: * @param state Shader unit state, must be setup per shader and per shader unit */ virtual void Run(UnitState& state, unsigned int entry_point) const = 0; - - /** - * Produce debug information based on the given shader and input vertex - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @param config Configuration object for the shader pipeline - * @return Debug information for this shader with regards to the given vertex - */ - virtual DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const = 0; }; // TODO(yuriks): Remove and make it non-global state somewhere diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 43c1ed5ea8..c3691da700 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -4,6 +4,7 @@ #pragma once +#include "video_core/shader/debug_data.h" #include "video_core/shader/shader.h" namespace Pica { @@ -14,8 +15,16 @@ class InterpreterEngine final : public ShaderEngine { public: void SetupBatch(const ShaderSetup* setup) override; void Run(UnitState& state, unsigned int entry_point) const override; + + /** + * Produce debug information based on the given shader and input vertex + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @param config Configuration object for the shader pipeline + * @return Debug information for this shader with regards to the given vertex + */ DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const override; + unsigned int entry_point) const; private: const ShaderSetup* setup = nullptr; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index fea79538a5..6d83948e18 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -47,10 +47,5 @@ void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const { cached_shader->Run(*setup, state, entry_point); } -DebugData JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const { - UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT."); -} - } // namespace Shader } // namespace Pica diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index df18de2c24..b260444774 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -21,8 +21,6 @@ public: void SetupBatch(const ShaderSetup* setup) override; void Run(UnitState& state, unsigned int entry_point) const override; - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const override; private: const ShaderSetup* setup = nullptr; From fa4ac279a77871f45733d43fdecf756ff1e7ece0 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 02:29:22 -0800 Subject: [PATCH 10/15] shader_jit_x64: Don't read program from global state --- src/video_core/shader/shader_jit_x64.cpp | 2 +- .../shader/shader_jit_x64_compiler.cpp | 36 +++++++++---------- .../shader/shader_jit_x64_compiler.h | 6 +++- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 6d83948e18..755ae119fa 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -29,7 +29,7 @@ void JitX64Engine::SetupBatch(const ShaderSetup* setup_) { cached_shader = iter->second.get(); } else { auto shader = std::make_unique(); - shader->Compile(); + shader->Compile(&setup->program_code, &setup->swizzle_data); cached_shader = shader.get(); cache.emplace_hint(iter, cache_key, std::move(shader)); } diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 8805433060..49806e8c90 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -151,15 +151,6 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b; /// Raw constant for the destination register enable mask that indicates all components are enabled static const u8 NO_DEST_REG_MASK = 0xf; -/** - * Get the vertex shader instruction for a given offset in the current shader program - * @param offset Offset in the current shader program of the instruction - * @return Instruction at the specified offset - */ -static Instruction GetVertexShaderInstruction(size_t offset) { - return {g_state.vs.program_code[offset]}; -} - static void LogCritical(const char* msg) { LOG_CRITICAL(HW_GPU, "%s", msg); } @@ -233,7 +224,7 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe movaps(dest, xword[src_ptr + src_offset_disp]); } - SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; + SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]}; // Generate instructions for source register swizzling as needed u8 sel = swiz.GetRawSelector(src_num); @@ -264,7 +255,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { dest = instr.common.dest.Value(); } - SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]}; + SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]}; size_t dest_offset_disp = UnitState::OutputOffset(dest); @@ -522,7 +513,7 @@ void JitShader::Compile_MIN(Instruction instr) { } void JitShader::Compile_MOVA(Instruction instr) { - SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]}; + SwizzlePattern swiz = {(*swizzle_data)[instr.common.operand_desc_id]}; if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { return; // NoOp @@ -796,7 +787,7 @@ void JitShader::Compile_NextInstr() { L(instruction_labels[program_counter]); - Instruction instr = GetVertexShaderInstruction(program_counter++); + Instruction instr = {(*program_code)[program_counter++]}; OpCode::Id opcode = instr.opcode.Value(); auto instr_func = instr_table[static_cast(opcode)]; @@ -814,8 +805,8 @@ void JitShader::Compile_NextInstr() { void JitShader::FindReturnOffsets() { return_offsets.clear(); - for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) { - Instruction instr = GetVertexShaderInstruction(offset); + for (size_t offset = 0; offset < program_code->size(); ++offset) { + Instruction instr = {(*program_code)[offset]}; switch (instr.opcode.Value()) { case OpCode::Id::CALL: @@ -833,7 +824,11 @@ void JitShader::FindReturnOffsets() { std::sort(return_offsets.begin(), return_offsets.end()); } -void JitShader::Compile() { +void JitShader::Compile(const std::array* program_code_, + const std::array* swizzle_data_) { + program_code = program_code_; + swizzle_data = swizzle_data_; + // Reset flow control state program = (CompiledShader*)getCurr(); program_counter = 0; @@ -868,17 +863,18 @@ void JitShader::Compile() { jmp(ABI_PARAM3); // Compile entire program - Compile_Block(static_cast(g_state.vs.program_code.size())); + Compile_Block(static_cast(program_code->size())); // Free memory that's no longer needed + program_code = nullptr; + swizzle_data = nullptr; return_offsets.clear(); return_offsets.shrink_to_fit(); ready(); - uintptr_t size = reinterpret_cast(getCurr()) - reinterpret_cast(program); - ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); - LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); + ASSERT_MSG(getSize() <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); + LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", getSize()); } JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {} diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index f37548306a..29e9875ea8 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -38,7 +38,8 @@ public: program(&setup, &state, instruction_labels[offset].getAddress()); } - void Compile(); + void Compile(const std::array* program_code, + const std::array* swizzle_data); void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); @@ -103,6 +104,9 @@ private: */ void FindReturnOffsets(); + const std::array* program_code = nullptr; + const std::array* swizzle_data = nullptr; + /// Mapping of Pica VS instructions to pointers in the emitted code std::array instruction_labels; From 1a2acc3baae01b9469ee97333c2ec4d58c8a0b91 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 02:33:43 -0800 Subject: [PATCH 11/15] Shader: Don't read ShaderSetup from global state --- src/video_core/shader/shader_interpreter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 8e2b8c5483..d1f11142dc 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -75,9 +75,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } }; - const auto& uniforms = g_state.vs.uniforms; - const auto& swizzle_data = g_state.vs.swizzle_data; - const auto& program_code = g_state.vs.program_code; + const auto& uniforms = setup.uniforms; + const auto& swizzle_data = setup.swizzle_data; + const auto& program_code = setup.program_code; // Placeholder for invalid inputs static float24 dummy_vec4_float24[4]; From 9ea5eacf919c8c257f8c5fda65e5fac2b6adee07 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 14:09:02 -0800 Subject: [PATCH 12/15] Shader: Initialize conditional_code in interpreter This doesn't belong in LoadInputVertex because it also happens for non-VS invocations. Since it's not used by the JIT it seems adequate to initialize it in the interpreter which is the only thing that cares about them. --- src/video_core/shader/shader.cpp | 3 --- src/video_core/shader/shader_interpreter.cpp | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index b30dae476b..1662b5d387 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -76,9 +76,6 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) { for (int i = 0; i < num_attributes; i++) registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; - - conditional_code[0] = false; - conditional_code[1] = false; } MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index d1f11142dc..ecc2270891 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -45,6 +45,9 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData boost::container::static_vector call_stack; u32 program_counter = offset; + state.conditional_code[0] = false; + state.conditional_code[1] = false; + auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, u8 repeat_count, u8 loop_increment) { // -1 to make sure when incrementing the PC we end up at the correct offset From 6fa3687afc97685101f9ee5c65cf98f505980695 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 14:38:03 -0800 Subject: [PATCH 13/15] Shader: Remove OutputRegisters struct --- src/video_core/command_processor.cpp | 7 ++++--- src/video_core/shader/shader.cpp | 11 ++++++----- src/video_core/shader/shader.h | 17 +++++------------ src/video_core/shader/shader_interpreter.cpp | 4 ++-- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 694c9f169d..66d19cba03 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -152,8 +152,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Shader::UnitState shader_unit; shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); shader_engine->Run(shader_unit, regs.vs.main_offset); - Shader::OutputVertex output_vertex = - shader_unit.output_registers.ToVertex(regs.vs); + auto output_vertex = Shader::OutputVertex::FromRegisters( + shader_unit.registers.output, regs, regs.vs.output_mask); // Send to renderer using Pica::Shader::OutputVertex; @@ -291,7 +291,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { shader_engine->Run(shader_unit, regs.vs.main_offset); // Retrieve vertex from register data - output_vertex = shader_unit.output_registers.ToVertex(regs.vs); + output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, + regs, regs.vs.output_mask); if (is_indexed) { vertex_cache[vertex_cache_pos] = output_vertex; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 1662b5d387..2da50bd620 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -19,7 +19,8 @@ namespace Pica { namespace Shader { -OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { +OutputVertex OutputVertex::FromRegisters(Math::Vec4 output_regs[16], const Regs& regs, + u32 output_mask) { // Setup output data OutputVertex ret; // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to @@ -27,13 +28,13 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { unsigned index = 0; for (unsigned i = 0; i < 7; ++i) { - if (index >= g_state.regs.vs_output_total) + if (index >= regs.vs_output_total) break; - if ((config.output_mask & (1 << i)) == 0) + if ((output_mask & (1 << i)) == 0) continue; - const auto& output_register_map = g_state.regs.vs_output_attributes[index]; + const auto& output_register_map = regs.vs_output_attributes[index]; u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y, output_register_map.map_z, output_register_map.map_w}; @@ -41,7 +42,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { for (unsigned comp = 0; comp < 4; ++comp) { float24* out = ((float24*)&ret) + semantics[comp]; if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { - *out = value[i][comp]; + *out = output_regs[i][comp]; } else { // Zero output so that attributes which aren't output won't have denormals in them, // which would slow us down later. diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 9d24104876..7d51d0044b 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -73,19 +73,13 @@ struct OutputVertex { ret.Lerp(factor, v1); return ret; } + + static OutputVertex FromRegisters(Math::Vec4 output_regs[16], const Regs& regs, + u32 output_mask); }; static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -struct OutputRegisters { - OutputRegisters() = default; - - alignas(16) Math::Vec4 value[16]; - - OutputVertex ToVertex(const Regs::ShaderConfig& config) const; -}; -static_assert(std::is_pod::value, "Structure is not POD"); - /** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a @@ -98,11 +92,10 @@ struct UnitState { // required to be 16-byte aligned. alignas(16) Math::Vec4 input[16]; alignas(16) Math::Vec4 temporary[16]; + alignas(16) Math::Vec4 output[16]; } registers; static_assert(std::is_pod::value, "Structure is not POD"); - OutputRegisters output_registers; - bool conditional_code[2]; // Two Address registers and one loop counter @@ -128,7 +121,7 @@ struct UnitState { static size_t OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return offsetof(UnitState, output_registers.value) + + return offsetof(UnitState, registers.output) + reg.GetIndex() * sizeof(Math::Vec4); case RegisterType::Temporary: diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index ecc2270891..a6197c10a4 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -175,7 +175,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData float24* dest = (instr.common.dest.Value() < 0x10) - ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] + ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; @@ -518,7 +518,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData float24* dest = (instr.mad.dest.Value() < 0x10) - ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] + ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; From 0f642741451e3f75c2f1d64ae9beccaf1437f12c Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 16:06:04 -0800 Subject: [PATCH 14/15] VideoCore/Shader: Move per-batch ShaderEngine state into ShaderSetup --- .../graphics/graphics_vertex_shader.cpp | 5 ++-- src/video_core/command_processor.cpp | 8 +++--- src/video_core/shader/shader.h | 17 ++++++++++--- src/video_core/shader/shader_interpreter.cpp | 16 ++++++------ src/video_core/shader/shader_interpreter.h | 11 +++----- src/video_core/shader/shader_jit_x64.cpp | 25 ++++++++----------- src/video_core/shader/shader_jit_x64.h | 7 ++---- 7 files changed, 43 insertions(+), 46 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index c6f807eb3f..616b34d568 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -521,8 +521,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d // Generate debug information Pica::Shader::InterpreterEngine shader_engine; - shader_engine.SetupBatch(&shader_setup); - debug_data = shader_engine.ProduceDebugInfo(input_vertex, num_attributes, entry_point); + shader_engine.SetupBatch(shader_setup); + debug_data = + shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes, entry_point); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 66d19cba03..c3872d06ca 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -143,7 +143,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_attribute_id = 0; auto* shader_engine = Shader::GetEngine(); - shader_engine->SetupBatch(&g_state.vs); + shader_engine->SetupBatch(g_state.vs); // Send to vertex shader if (g_debug_context) @@ -151,7 +151,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { static_cast(&immediate_input)); Shader::UnitState shader_unit; shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); - shader_engine->Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(g_state.vs, shader_unit, regs.vs.main_offset); auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.registers.output, regs, regs.vs.output_mask); @@ -248,7 +248,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { auto* shader_engine = Shader::GetEngine(); Shader::UnitState shader_unit; - shader_engine->SetupBatch(&g_state.vs); + shader_engine->SetupBatch(g_state.vs); for (unsigned int index = 0; index < regs.num_vertices; ++index) { // Indexed rendering doesn't use the start offset @@ -288,7 +288,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); - shader_engine->Run(shader_unit, regs.vs.main_offset); + shader_engine->Run(g_state.vs, shader_unit, regs.vs.main_offset); // Retrieve vertex from register data output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 7d51d0044b..f26d2ba4f2 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -167,6 +167,12 @@ struct ShaderSetup { std::array program_code; std::array swizzle_data; + + /// Data private to ShaderEngines + struct EngineData { + /// Used by the JIT, points to a compiled shader object. + const void* cached_shader = nullptr; + } engine_data; }; class ShaderEngine { @@ -177,13 +183,16 @@ public: * Performs any shader unit setup that only needs to happen once per shader (as opposed to once * per vertex, which would happen within the `Run` function). */ - virtual void SetupBatch(const ShaderSetup* setup) = 0; + virtual void SetupBatch(ShaderSetup& setup) = 0; /** - * Runs the currently setup shader - * @param state Shader unit state, must be setup per shader and per shader unit + * Runs the currently setup shader. + * + * @param setup Shader engine state, must be setup with SetupBatch on each shader change. + * @param state Shader unit state, must be setup with input data before each shader invocation. */ - virtual void Run(UnitState& state, unsigned int entry_point) const = 0; + virtual void Run(const ShaderSetup& setup, UnitState& state, + unsigned int entry_point) const = 0; }; // TODO(yuriks): Remove and make it non-global state somewhere diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index a6197c10a4..e44abbf1d3 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -652,25 +652,23 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } } -void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) { - setup = setup_; -} +void InterpreterEngine::SetupBatch(ShaderSetup& setup) {} MICROPROFILE_DECLARE(GPU_Shader); -void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const { - ASSERT(setup != nullptr); +void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state, + unsigned int entry_point) const { ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); DebugData dummy_debug_data; - RunInterpreter(*setup, state, dummy_debug_data, entry_point); + RunInterpreter(setup, state, dummy_debug_data, entry_point); } -DebugData InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes, +DebugData InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, + const InputVertex& input, int num_attributes, unsigned int entry_point) const { - ASSERT(setup != nullptr); ASSERT(entry_point < 1024); UnitState state; @@ -679,7 +677,7 @@ DebugData InterpreterEngine::ProduceDebugInfo(const InputVertex& input, in // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(*setup, state, debug_data, entry_point); + RunInterpreter(setup, state, debug_data, entry_point); return debug_data; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index c3691da700..7f94d405f7 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -13,8 +13,8 @@ namespace Shader { class InterpreterEngine final : public ShaderEngine { public: - void SetupBatch(const ShaderSetup* setup) override; - void Run(UnitState& state, unsigned int entry_point) const override; + void SetupBatch(ShaderSetup& setup) override; + void Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const override; /** * Produce debug information based on the given shader and input vertex @@ -23,11 +23,8 @@ public: * @param config Configuration object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ - DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, - unsigned int entry_point) const; - -private: - const ShaderSetup* setup = nullptr; + DebugData ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, + int num_attributes, unsigned int entry_point) const; }; } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 755ae119fa..15c1d60b51 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -14,37 +14,32 @@ namespace Shader { JitX64Engine::JitX64Engine() = default; JitX64Engine::~JitX64Engine() = default; -void JitX64Engine::SetupBatch(const ShaderSetup* setup_) { - cached_shader = nullptr; - setup = setup_; - if (setup == nullptr) - return; - - u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code)); - u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data)); +void JitX64Engine::SetupBatch(ShaderSetup& setup) { + u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); + u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); u64 cache_key = code_hash ^ swizzle_hash; auto iter = cache.find(cache_key); if (iter != cache.end()) { - cached_shader = iter->second.get(); + setup.engine_data.cached_shader = iter->second.get(); } else { auto shader = std::make_unique(); - shader->Compile(&setup->program_code, &setup->swizzle_data); - cached_shader = shader.get(); + shader->Compile(&setup.program_code, &setup.swizzle_data); + setup.engine_data.cached_shader = shader.get(); cache.emplace_hint(iter, cache_key, std::move(shader)); } } MICROPROFILE_DECLARE(GPU_Shader); -void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const { - ASSERT(setup != nullptr); - ASSERT(cached_shader != nullptr); +void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const { + ASSERT(setup.engine_data.cached_shader != nullptr); ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); - cached_shader->Run(*setup, state, entry_point); + const JitShader* shader = static_cast(setup.engine_data.cached_shader); + shader->Run(setup, state, entry_point); } } // namespace Shader diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b260444774..bd30f51e24 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -19,14 +19,11 @@ public: JitX64Engine(); ~JitX64Engine() override; - void SetupBatch(const ShaderSetup* setup) override; - void Run(UnitState& state, unsigned int entry_point) const override; + void SetupBatch(ShaderSetup& setup) override; + void Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const override; private: - const ShaderSetup* setup = nullptr; - std::unordered_map> cache; - const JitShader* cached_shader = nullptr; }; } // namespace Shader From 0e9081b97348c65029c96697443acb0dbbc58756 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sat, 17 Dec 2016 16:16:02 -0800 Subject: [PATCH 15/15] VideoCore/Shader: Move entry_point to SetupBatch --- .../graphics/graphics_vertex_shader.cpp | 5 ++--- src/video_core/command_processor.cpp | 8 ++++---- src/video_core/shader/shader.h | 6 +++--- src/video_core/shader/shader_interpreter.cpp | 19 +++++++++---------- src/video_core/shader/shader_interpreter.h | 6 +++--- src/video_core/shader/shader_jit_x64.cpp | 10 ++++++---- src/video_core/shader/shader_jit_x64.h | 4 ++-- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index 616b34d568..f375241908 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -521,9 +521,8 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d // Generate debug information Pica::Shader::InterpreterEngine shader_engine; - shader_engine.SetupBatch(shader_setup); - debug_data = - shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes, entry_point); + shader_engine.SetupBatch(shader_setup, entry_point); + debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index c3872d06ca..eb79974a8a 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -143,7 +143,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_attribute_id = 0; auto* shader_engine = Shader::GetEngine(); - shader_engine->SetupBatch(g_state.vs); + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); // Send to vertex shader if (g_debug_context) @@ -151,7 +151,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { static_cast(&immediate_input)); Shader::UnitState shader_unit; shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1); - shader_engine->Run(g_state.vs, shader_unit, regs.vs.main_offset); + shader_engine->Run(g_state.vs, shader_unit); auto output_vertex = Shader::OutputVertex::FromRegisters( shader_unit.registers.output, regs, regs.vs.output_mask); @@ -248,7 +248,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { auto* shader_engine = Shader::GetEngine(); Shader::UnitState shader_unit; - shader_engine->SetupBatch(g_state.vs); + shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); for (unsigned int index = 0; index < regs.num_vertices; ++index) { // Indexed rendering doesn't use the start offset @@ -288,7 +288,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes()); - shader_engine->Run(g_state.vs, shader_unit, regs.vs.main_offset); + shader_engine->Run(g_state.vs, shader_unit); // Retrieve vertex from register data output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output, diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index f26d2ba4f2..44d9f76c3a 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -170,6 +170,7 @@ struct ShaderSetup { /// Data private to ShaderEngines struct EngineData { + unsigned int entry_point; /// Used by the JIT, points to a compiled shader object. const void* cached_shader = nullptr; } engine_data; @@ -183,7 +184,7 @@ public: * Performs any shader unit setup that only needs to happen once per shader (as opposed to once * per vertex, which would happen within the `Run` function). */ - virtual void SetupBatch(ShaderSetup& setup) = 0; + virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0; /** * Runs the currently setup shader. @@ -191,8 +192,7 @@ public: * @param setup Shader engine state, must be setup with SetupBatch on each shader change. * @param state Shader unit state, must be setup with input data before each shader invocation. */ - virtual void Run(const ShaderSetup& setup, UnitState& state, - unsigned int entry_point) const = 0; + virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0; }; // TODO(yuriks): Remove and make it non-global state somewhere diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index e44abbf1d3..c0c89b8578 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -652,32 +652,31 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } } -void InterpreterEngine::SetupBatch(ShaderSetup& setup) {} +void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { + ASSERT(entry_point < 1024); + setup.engine_data.entry_point = entry_point; +} MICROPROFILE_DECLARE(GPU_Shader); -void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state, - unsigned int entry_point) const { - ASSERT(entry_point < 1024); +void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const { MICROPROFILE_SCOPE(GPU_Shader); DebugData dummy_debug_data; - RunInterpreter(setup, state, dummy_debug_data, entry_point); + RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point); } DebugData InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup, - const InputVertex& input, int num_attributes, - unsigned int entry_point) const { - ASSERT(entry_point < 1024); - + const InputVertex& input, + int num_attributes) const { UnitState state; DebugData debug_data; // Setup input register table boost::fill(state.registers.input, Math::Vec4::AssignToAll(float24::Zero())); state.LoadInputVertex(input, num_attributes); - RunInterpreter(setup, state, debug_data, entry_point); + RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point); return debug_data; } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 7f94d405f7..d6c0e2d8c1 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -13,8 +13,8 @@ namespace Shader { class InterpreterEngine final : public ShaderEngine { public: - void SetupBatch(ShaderSetup& setup) override; - void Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const override; + void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override; + void Run(const ShaderSetup& setup, UnitState& state) const override; /** * Produce debug information based on the given shader and input vertex @@ -24,7 +24,7 @@ public: * @return Debug information for this shader with regards to the given vertex */ DebugData ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input, - int num_attributes, unsigned int entry_point) const; + int num_attributes) const; }; } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 15c1d60b51..0ee0dd9ef7 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -14,7 +14,10 @@ namespace Shader { JitX64Engine::JitX64Engine() = default; JitX64Engine::~JitX64Engine() = default; -void JitX64Engine::SetupBatch(ShaderSetup& setup) { +void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { + ASSERT(entry_point < 1024); + setup.engine_data.entry_point = entry_point; + u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data)); @@ -32,14 +35,13 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup) { MICROPROFILE_DECLARE(GPU_Shader); -void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const { +void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const { ASSERT(setup.engine_data.cached_shader != nullptr); - ASSERT(entry_point < 1024); MICROPROFILE_SCOPE(GPU_Shader); const JitShader* shader = static_cast(setup.engine_data.cached_shader); - shader->Run(setup, state, entry_point); + shader->Run(setup, state, setup.engine_data.entry_point); } } // namespace Shader diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index bd30f51e24..078b2cba5d 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -19,8 +19,8 @@ public: JitX64Engine(); ~JitX64Engine() override; - void SetupBatch(ShaderSetup& setup) override; - void Run(const ShaderSetup& setup, UnitState& state, unsigned int entry_point) const override; + void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override; + void Run(const ShaderSetup& setup, UnitState& state) const override; private: std::unordered_map> cache;