diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d84caa6db6..70e124dc48 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -171,7 +171,7 @@ public: code.AddLine(fmt::format("case 0x{:x}u: {{", address)); ++code.scope; - VisitBasicBlock(bb); + VisitBlock(bb); --code.scope; code.AddLine('}'); @@ -423,7 +423,7 @@ private: code.AddNewLine(); } - void VisitBasicBlock(const BasicBlock& bb) { + void VisitBlock(const NodeBlock& bb) { for (const Node node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { code.AddLine(expr); @@ -575,7 +575,7 @@ private: code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); ++code.scope; - VisitBasicBlock(conditional->GetCode()); + VisitBlock(conditional->GetCode()); --code.scope; code.AddLine('}'); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 812983a995..740ac31186 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) { return exit_method = ExitMethod::AlwaysReturn; } -BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - BasicBlock basic_block; +NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { + NodeBlock basic_block; for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { pc = DecodeInstr(basic_block, pc); } return basic_block; } -u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { // Ignore sched instructions when generating code. if (IsSchedInstruction(pc, main_offset)) { return pc + 1; @@ -151,39 +151,38 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, "NeverExecute predicate not implemented"); - static const std::map - decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; + static const std::map decoders = { + {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, + {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, + {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, + {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, + {OpCode::Type::Shift, &ShaderIR::DecodeShift}, + {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, + {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, + {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, + {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, + {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, + {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, + {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, + {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, + {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, + {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, + {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, + {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, + {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, + {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, + {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, + {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, + {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, + {OpCode::Type::Video, &ShaderIR::DecodeVideo}, + {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, + }; std::vector tmp_block; if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, bb, pc); + pc = (this->*decoder->second)(tmp_block, pc); } else { - pc = DecodeOther(tmp_block, bb, pc); + pc = DecodeOther(tmp_block, pc); } // Some instructions (like SSY) don't have a predicate field, they are always unconditionally @@ -192,11 +191,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { const auto pred_index = static_cast(instr.pred.pred_index); if (can_be_predicated && pred_index != static_cast(Pred::UnusedIndex)) { - bb.push_back( - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); + const Node conditional = + Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); + global_code.push_back(conditional); + bb.push_back(conditional); } else { for (auto& node : tmp_block) { - bb.push_back(std::move(node)); + global_code.push_back(node); + bb.push_back(node); } } diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 51b8d55d48..3190e2d7c1 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::SubOp; -u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 37eef2bf2c..baee89107f 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7b4f7d284b..c2164ba50c 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 4fd3db54e1..0d139c0d2f 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index cc9a76a194..38bb692d6f 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Pred; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3 return pc; } -void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, +void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc) { constexpr u32 lop_iterations = 32; const Node one = Immediate(1); diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index b26a6e4731..3ed5ccc5a9 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::PredicateResultMode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& return pc; } -void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, - Node op_a, Node op_b, PredicateResultMode predicate_mode, - Pred predicate, bool sets_cc) { +void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, + Node op_b, PredicateResultMode predicate_mode, Pred predicate, + bool sets_cc) { const Node result = [&]() { switch (logic_op) { case LogicOperation::And: diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 0734141b04..6a95dc9281 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 942d6729d9..601d66f1f9 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 728a393a14..a992f73f81 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 52f39d3ff3..0559cc8dea 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 9f9da22781..1bd6755ddb 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index dd3aef6f21..9285b8d05e 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index dfd7cb98f3..7483685556 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -14,7 +14,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 53c44ae5ab..e685126925 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 43a0a9e109..7a07c5ec65 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 16eb3985f7..a3bf17eba5 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index daf97174b2..aad836d244 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3dd26da20e..e006f81388 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -36,7 +36,7 @@ static std::size_t GetCoordCount(TextureType texture_type) { } } -u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -160,7 +160,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { }(); const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = TrackCbuf(addr_register, code, static_cast(code.size())); + const Node base_address = + TrackCbuf(addr_register, global_code, static_cast(global_code.size())); const auto cbuf = std::get_if(base_address); ASSERT(cbuf != nullptr); const auto cbuf_offset_imm = std::get_if(cbuf->GetOffset()); @@ -464,8 +465,7 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, - const Node4& components) { +void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { @@ -480,7 +480,7 @@ void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, } } -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -504,7 +504,7 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, } } -void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, +void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, const Node4& components) { // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half // float instruction). diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c1e5f4efb2..f9502e3d04 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -14,7 +14,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 1717f0653d..83c61680ed 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 8bd15fb001..d0495995da 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index bdb4424a6d..f070e89125 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 6623f8ff95..951e85f44a 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index c3432356de..956c01d9b0 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::VideoType; using Tegra::Shader::VmadShr; -u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 9cb8645008..c348433076 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index d7747103e9..ac5112d78f 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { } } -void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { +void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); } -void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { +void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); } -void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { +void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); } -void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { +void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); } -void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { +void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { SetRegister(bb, Register::ZeroIndex + 1 + id, value); } -void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { if (!sets_cc) { return; } @@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); } -void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { if (!sets_cc) { return; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ef0f3a1066..1d4fbef53e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -39,7 +39,7 @@ using NodeData = PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; using Node = const NodeData*; using Node4 = std::array; -using BasicBlock = std::vector; +using NodeBlock = std::vector; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; @@ -539,7 +539,7 @@ public: Decode(); } - const std::map& GetBasicBlocks() const { + const std::map& GetBasicBlocks() const { return basic_blocks; } @@ -590,7 +590,7 @@ private: ExitMethod Scan(u32 begin, u32 end, std::set& labels); - BasicBlock DecodeRange(u32 begin, u32 end); + NodeBlock DecodeRange(u32 begin, u32 end); /** * Decodes a single instruction from Tegra to IR. @@ -598,33 +598,33 @@ private: * @param pc Program counter. Offset to decode. * @return Next address to decode. */ - u32 DecodeInstr(BasicBlock& bb, u32 pc); + u32 DecodeInstr(NodeBlock& bb, u32 pc); - u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); + u32 DecodeArithmetic(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); + u32 DecodeBfe(NodeBlock& bb, u32 pc); + u32 DecodeBfi(NodeBlock& bb, u32 pc); + u32 DecodeShift(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); + u32 DecodeFfma(NodeBlock& bb, u32 pc); + u32 DecodeHfma2(NodeBlock& bb, u32 pc); + u32 DecodeConversion(NodeBlock& bb, u32 pc); + u32 DecodeMemory(NodeBlock& bb, u32 pc); + u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); + u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeFloatSet(NodeBlock& bb, u32 pc); + u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); + u32 DecodeHalfSet(NodeBlock& bb, u32 pc); + u32 DecodeVideo(NodeBlock& bb, u32 pc); + u32 DecodeXmad(NodeBlock& bb, u32 pc); + u32 DecodeOther(NodeBlock& bb, u32 pc); /// Internalizes node's data and returns a managed pointer to a clone of that node Node StoreNode(NodeData&& node_data); @@ -673,20 +673,20 @@ private: Node GetTemporal(u32 id); /// Sets a register. src value must be a number-evaluated node. - void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); + void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(BasicBlock& bb, u64 dest, Node src); + void SetPredicate(NodeBlock& bb, u64 dest, Node src); /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); + void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); /// Sets a local memory address. address and value must be a number-evaluated node - void SetLocalMemory(BasicBlock& bb, Node address, Node value); + void SetLocalMemory(NodeBlock& bb, Node address, Node value); /// Sets a temporal. Internally it uses a post-RZ register - void SetTemporal(BasicBlock& bb, u32 id, Node value); + void SetTemporal(NodeBlock& bb, u32 id, Node value); /// Sets internal flags from a float - void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); + void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); + void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); /// Conditionally absolute/negated float. Absolute is applied first Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); @@ -727,12 +727,12 @@ private: /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); - void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, @@ -761,16 +761,16 @@ private: Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); - void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, + void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, Tegra::Shader::PredicateResultMode predicate_mode, Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, + void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); + Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); - std::pair TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); + std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); template Node Operation(OperationCode code, const T*... operands) { @@ -812,7 +812,8 @@ private: u32 coverage_end{}; std::map, ExitMethod> exit_method_map; - std::map basic_blocks; + std::map basic_blocks; + NodeBlock global_code; std::vector> stored_nodes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d6d29ee9fc..be46353426 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -11,7 +11,7 @@ namespace VideoCommon::Shader { namespace { -std::pair FindOperation(const BasicBlock& code, s64 cursor, +std::pair FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { const Node node = code[cursor]; @@ -19,12 +19,19 @@ std::pair FindOperation(const BasicBlock& code, s64 cursor, if (operation->GetCode() == operation_code) return {node, cursor}; } + if (const auto conditional = std::get_if(node)) { + const auto& code = conditional->GetCode(); + const auto [found, internal_cursor] = + FindOperation(code, static_cast(code.size() - 1), operation_code); + if (found) + return {found, cursor}; + } } return {}; } } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { if (const auto cbuf = std::get_if(tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative(*cbuf->GetOffset()) ? tracked : nullptr; @@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { } return nullptr; } + if (const auto conditional = std::get_if(tracked)) { + const auto& code = conditional->GetCode(); + return TrackCbuf(tracked, code, static_cast(code.size())); + } return nullptr; } -std::pair ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, +std::pair ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) { for (; cursor >= 0; --cursor) { const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);