From 2bb7ea436dc74f812a8092201dc597ed58ff3c7a Mon Sep 17 00:00:00 2001 From: Liam Date: Sat, 10 Jun 2023 11:40:58 -0400 Subject: [PATCH] shader_recompiler: remove barriers in conditional control flow when device lacks support --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 3 ++ src/shader_recompiler/host_translate_info.h | 2 + .../ir_opt/conditional_barrier_pass.cpp | 44 +++++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_shader_cache.cpp | 1 + .../vulkan_common/vulkan_device.cpp | 2 + src/video_core/vulkan_common/vulkan_device.h | 5 +++ 10 files changed, 65 insertions(+) create mode 100644 src/shader_recompiler/ir_opt/conditional_barrier_pass.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 525b2363c3..2baa64322d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -216,6 +216,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate_program.h host_translate_info.h ir_opt/collect_shader_info_pass.cpp + ir_opt/conditional_barrier_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/dual_vertex_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 17a6d48883..5293823550 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -286,6 +286,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool 0) { + conditional_return_count++; + } + break; + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + if ((conditional_control_flow_count > 0 || conditional_return_count > 0) && + inst.GetOpcode() == IR::Opcode::Barrier) { + LOG_WARNING(Shader, "Barrier within conditional control flow"); + inst.ReplaceOpcode(IR::Opcode::Identity); + } + } + break; + default: + break; + } + } + ASSERT(conditional_control_flow_count == 0); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 1f8f2ba95e..a677bfc653 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -13,6 +13,7 @@ struct HostTranslateInfo; namespace Shader::Optimization { void CollectShaderInfoPass(Environment& env, IR::Program& program); +void ConditionalBarrierPass(IR::Program& program); void ConstantPropagationPass(Environment& env, IR::Program& program); void DeadCodeEliminationPass(IR::Program& program); void GlobalMemoryToStorageBufferPass(IR::Program& program); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 400c219814..03d234f2fd 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -201,6 +201,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)) && !strict_context_required; use_driver_cache = is_nvidia; + supports_conditional_barriers = !is_intel; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index cc0b95f1a5..ad27264e5f 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -188,6 +188,10 @@ public: return strict_context_required; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -233,6 +237,7 @@ private: bool has_bool_ref_bug{}; bool can_report_memory{}; bool strict_context_required{}; + bool supports_conditional_barriers{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ecda29842..183c1a7ea3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -238,6 +238,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_snorm_render_buffer = false, .support_viewport_index_layer = device.HasVertexViewportLayer(), .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), + .support_conditional_barrier = device.SupportsConditionalBarriers(), } { if (use_asynchronous_shaders) { workers = CreateWorkers(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0158b6b0d3..a46f9beed5 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -386,6 +386,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, FormatType::Optimal); + supports_conditional_barriers = !(is_intel_anv || is_intel_windows); + CollectPhysicalMemoryInfo(); CollectToolingInfo(); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d62a103a1b..ccce9429ac 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -580,6 +580,10 @@ public: return properties.properties.limits.maxVertexInputBindings; } + bool SupportsConditionalBarriers() const { + return supports_conditional_barriers; + } + private: /// Checks if the physical device is suitable and configures the object state /// with all necessary info about its properties. @@ -683,6 +687,7 @@ private: bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. + bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool