From 9293c3a0f21b0729ed64fbc417f4102e5e27d009 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 29 Oct 2019 20:48:18 -0400 Subject: [PATCH] Shader_IR: Fix TLD4 and add Bindless Variant. This commit fixes an issue where not all 4 results of tld4 were being written, the color component was defaulted to red, among other things. It also implements the bindless variant. --- src/video_core/engines/shader_bytecode.h | 30 +++++++++++++++++++++- src/video_core/shader/decode/texture.cpp | 32 ++++++++++++++++++------ src/video_core/shader/shader_ir.h | 4 +-- 3 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d3d05a8665..8f6bc76eb3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1237,6 +1237,32 @@ union Instruction { } } tld4; + union { + BitField<35, 1, u64> ndv_flag; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> dc_flag; + BitField<33, 2, u64> info; + BitField<37, 2, u64> component; + + bool UsesMiscMode(TextureMiscMode mode) const { + switch (mode) { + case TextureMiscMode::NDV: + return ndv_flag != 0; + case TextureMiscMode::NODEP: + return nodep_flag != 0; + case TextureMiscMode::DC: + return dc_flag != 0; + case TextureMiscMode::AOFFI: + return info == 1; + case TextureMiscMode::PTP: + return info == 2; + default: + break; + } + return false; + } + } tld4_b; + union { BitField<49, 1, u64> nodep_flag; BitField<50, 1, u64> dc_flag; @@ -1590,7 +1616,8 @@ public: TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLD, // Texture Load TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Load 4 + TLD4, // Texture Gather 4 + TLD4_B, // Texture Gather 4 Bindless TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level @@ -1881,6 +1908,7 @@ private: INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), + INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index d61e656b75..0599ef34f1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -96,6 +96,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD4_B: { + is_bindless = true; + [[fallthrough]]; + } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), @@ -108,11 +112,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) + : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) + : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); WriteTexInstructionFloat( - bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); + bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true); break; } case OpCode::Id::TLD4S: { @@ -359,10 +366,11 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, return *used_samplers.emplace(entry).first; } -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { +void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, + bool is_tld4) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { + if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; } @@ -583,7 +591,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi) { + bool is_array, bool is_aoffi, bool is_bindless) { const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -597,6 +605,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de } u64 parameter_register = instr.gpr20.Value(); + + const auto& sampler = + is_bindless + ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) + : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + std::vector aoffi; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); @@ -607,12 +621,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + const Node component = is_bindless ? Immediate(static_cast(instr.tld4_b.component)) + : Immediate(static_cast(instr.tld4.component)); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, + element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1fd44bde1d..7582999a51 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -326,7 +326,7 @@ private: Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool is_tld4 = false); void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components, bool ignore_mask = false); @@ -343,7 +343,7 @@ private: bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi); + bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); Node4 GetTldCode(Tegra::Shader::Instruction instr);