shader_decode: Improve zero flag implementation

This commit is contained in:
ReinUsesLisp 2018-12-27 16:50:36 -03:00
parent d911740e5d
commit 2d6c064e66
15 changed files with 79 additions and 75 deletions

View File

@ -45,8 +45,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(
instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in FMUL is not implemented");
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@ -75,21 +73,20 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in FADD is not implemented");
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@ -126,9 +123,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in FMNMX is not implemented");
op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
@ -136,9 +130,10 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetRegister(bb, instr.gpr0,
Operation(OperationCode::Select, NO_PRECISE, condition, min, max));
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::RRO_C:

View File

@ -22,24 +22,22 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
break;
}
case OpCode::Id::FMUL32_IMM: {
UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
"Condition codes generation in FMUL32 is not implemented");
Node value =
Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
value = GetSaturatedFloat(value, instr.fmul32.saturate);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::FADD32I: {
UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
"Condition codes generation in FADD32I is not implemented");
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
instr.fadd32i.negate_a);
const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
instr.fadd32i.negate_b);
const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}

View File

@ -34,22 +34,20 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in IADD is not implemented");
UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::IADD3_C:
case OpCode::Id::IADD3_R:
case OpCode::Id::IADD3_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in IADD3 is not implemented");
Node op_c = GetRegister(instr.gpr39);
const auto ApplyHeight = [&](IAdd3Height height, Node value) {
@ -100,6 +98,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
}();
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@ -115,6 +114,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@ -139,24 +140,19 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in LOP is not implemented");
if (instr.alu.lop.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
if (instr.alu.lop.invert_b)
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
instr.generates_cc);
break;
}
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in LOP3 is not implemented");
const Node op_c = GetRegister(instr.gpr39);
const Node lut = [&]() {
if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
@ -166,15 +162,13 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
}
}();
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut);
WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
break;
}
case OpCode::Id::IMNMX_C:
case OpCode::Id::IMNMX_R:
case OpCode::Id::IMNMX_IMM: {
UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in IMNMX is not implemented");
const bool is_signed = instr.imnmx.is_signed;
@ -182,6 +176,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@ -247,7 +243,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
}
void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
Node imm_lut) {
Node imm_lut, bool sets_cc) {
constexpr u32 lop_iterations = 32;
const Node one = Immediate(1);
const Node two = Immediate(2);
@ -284,6 +280,7 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
}
}
SetInternalFlagsFromInteger(bb, value, sets_cc);
SetRegister(bb, dest, value);
}

View File

@ -25,20 +25,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
"Condition codes generation in IADD32I is not implemented");
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::LOP32I: {
UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
"Condition codes generation in LOP32I is not implemented");
if (instr.alu.lop32i.invert_a)
op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
@ -46,8 +43,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
Tegra::Shader::PredicateResultMode::None,
Tegra::Shader::Pred::UnusedIndex);
PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
break;
}
default:
@ -60,7 +56,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
Node op_a, Node op_b, PredicateResultMode predicate_mode,
Pred predicate) {
Pred predicate, bool sets_cc) {
const Node result = [&]() {
switch (logic_op) {
case LogicOperation::And:
@ -77,11 +73,9 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation
}
}();
if (dest != Register::ZeroIndex) {
SetRegister(bb, dest, result);
}
SetInternalFlagsFromInteger(bb, result, sets_cc);
SetRegister(bb, dest, result);
using Tegra::Shader::PredicateResultMode;
// Write the predicate value depending on the predicate mode.
switch (predicate_mode) {
case PredicateResultMode::None:

View File

@ -35,6 +35,7 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
const Node outer_shift =
Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
SetRegister(bb, instr.gpr0, outer_shift);
break;
}

View File

@ -16,8 +16,6 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF(instr.generates_cc);
const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::BFI_IMM_R:
@ -33,6 +31,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
const Node value =
Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;

View File

@ -33,15 +33,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
if (instr.generates_cc) {
const Node zero_condition =
SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0));
SetInternalFlag(bb, InternalFlag::Zero, zero_condition);
LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete.");
}
break;
}
case OpCode::Id::I2F_R:
@ -64,6 +57,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
@ -103,6 +97,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
}();
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}

View File

@ -21,8 +21,6 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
instr.ffma.tab5980_1.Value());
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in FFMA is not implemented");
const Node op_a = GetRegister(instr.gpr8);
@ -52,6 +50,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
value = GetSaturatedFloat(value, instr.alu.saturate_d);
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
return pc;

View File

@ -45,13 +45,12 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
SetRegister(bb, instr.gpr0, value);
if (instr.generates_cc) {
const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
SetInternalFlag(bb, InternalFlag::Zero, is_zero);
LOG_WARNING(HW_GPU, "FSET condition code is incomplete");
if (instr.fset.bf) {
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
} else {
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
}
SetRegister(bb, instr.gpr0, value);
return pc;
}

View File

@ -32,6 +32,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
const Node value =
Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
if (instr.pset.bf) {
SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
} else {
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
}
SetRegister(bb, instr.gpr0, value);
return pc;

View File

@ -31,22 +31,20 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in SHR is not implemented");
const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
instr.shift.is_signed, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM:
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in SHL is not implemented");
SetRegister(bb, instr.gpr0,
Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b));
case OpCode::Id::SHL_IMM: {
const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
default:
UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
}

View File

@ -38,9 +38,6 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::VMAD: {
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in VMAD is not implemented");
const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
const Node op_c = GetRegister(instr.gpr39);
@ -53,8 +50,8 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
}
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
}
case OpCode::Id::VSETP: {

View File

@ -86,6 +86,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
}
SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
SetRegister(bb, instr.gpr0, sum);
return pc;

View File

@ -7,6 +7,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@ -356,6 +357,24 @@ void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) {
SetRegister(bb, Register::ZeroIndex + 1 + id, value);
}
void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) {
if (!sets_cc) {
return;
}
const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
SetInternalFlag(bb, InternalFlag::Zero, zerop);
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) {
if (!sets_cc) {
return;
}
const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
SetInternalFlag(bb, InternalFlag::Zero, zerop);
LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
}
Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
Immediate(bits));

View File

@ -649,6 +649,11 @@ private:
/// Sets a temporal. Internally it uses a post-RZ register
void SetTemporal(BasicBlock& bb, u32 id, Node value);
/// Sets internal flags from a float
void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true);
/// Sets internal flags from an integer
void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true);
/// Conditionally absolute/negated float. Absolute is applied first
Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
/// Conditionally saturates a float
@ -725,9 +730,9 @@ private:
void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest,
Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
Tegra::Shader::PredicateResultMode predicate_mode,
Tegra::Shader::Pred predicate);
Tegra::Shader::Pred predicate, bool sets_cc);
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
Node op_c, Node imm_lut);
Node op_c, Node imm_lut, bool sets_cc);
template <typename... T>
Node Operation(OperationCode code, const T*... operands) {