From ae8d4b6c0c930178d17bb61bb17270aea133754d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 18 Dec 2019 00:36:21 -0300 Subject: [PATCH] shader/memory: Implement LDG.U8 and unaligned U8 loads LDG can load single bytes instead of full integers or packs of integers. These have the advantage of loading bytes that are not aligned to 4 bytes. To emulate these this commit gets the byte being referenced (by doing "address & 3" and then using that to extract the byte from the loaded integer: result = bitfieldExtract(loaded_integer, (address % 4) * 8, 8) --- src/video_core/shader/decode/memory.cpp | 38 +++++++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 78e92f52e7..c934d07193 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -22,7 +22,23 @@ using Tegra::Shader::Register; namespace { -u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { +u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::UnsignedByte: + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast(uniform_type)); + return 1; + } +} + +u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { case Tegra::Shader::UniformType::Single: return 1; @@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const auto [real_address_base, base_address, descriptor] = TrackGlobalMemory(bb, instr, false); - const u32 count = GetUniformTypeElementsCount(type); + const u32 count = GetLdgMemorySize(type); if (!real_address_base || !base_address) { // Tracking failed, load zeroes. for (u32 i = 0; i < count; ++i) { @@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = MakeNode(real_address, base_address, descriptor); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); + Node gmem = MakeNode(real_address, base_address, descriptor); + + if (type == Tegra::Shader::UniformType::UnsignedByte) { + // To handle unaligned loads get the byte used to dereferenced global memory + // and extract that byte from the loaded uint32. + Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); + byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); + + gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), + Immediate(8)); + } SetTemporary(bb, i, gmem); } + for (u32 i = 0; i < count; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } @@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } - const u32 count = GetUniformTypeElementsCount(type); + const u32 count = GetStgMemorySize(type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);