From 6c51f496320f698e123207c09ca61e55180a31b5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 28 Mar 2021 22:23:45 -0400 Subject: [PATCH] shader: Implement FSWZADD --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 8 ++++ .../backend/spirv/emit_context.h | 2 + .../backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_warp.cpp | 16 +++++++ .../frontend/ir/ir_emitter.cpp | 3 ++ .../frontend/ir/ir_emitter.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../impl/floating_point_swizzled_add.cpp | 44 +++++++++++++++++++ .../frontend/maxwell/translate/impl/impl.cpp | 4 ++ .../frontend/maxwell/translate/impl/impl.h | 1 + .../translate/impl/not_implemented.cpp | 4 -- .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/shader_info.h | 1 + 14 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 44ab929b79..5ce420cbf1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_set_predicate.cpp + frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/half_floating_point_helper.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 96d0e9b4d2..7531f8b214 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) { subgroup_local_invocation_id = DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); } + if (info.uses_fswzadd) { + const Id f32_one{Constant(F32[1], 1.0f)}; + const Id f32_minus_one{Constant(F32[1], -1.0f)}; + const Id f32_zero{Constant(F32[1], 0.0f)}; + fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); + fswzadd_lut_b = + ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 1a4e8221aa..ffac39c4f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,6 +103,8 @@ public: Id vertex_index{}; Id base_vertex{}; Id front_face{}; + Id fswzadd_lut_a{}; + Id fswzadd_lut_b{}; Id local_memory{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 02648d769c..3d0c6f7ba4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam Id segmentation_mask); Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 44d8a347fc..cbc5b1c961 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id return SelectValue(ctx, in_range, value, src_thread_id); } +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { + const Id three{ctx.Constant(ctx.U32[1], 3)}; + Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1)); + mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); + mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); + + const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; + const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; + + const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; + const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; + return ctx.OpFAdd(ctx.F32[1], result_a, result_b); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5524724878..505fba46a5 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons const IR::U32& seg_mask) { return Inst(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); } +F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { + return Inst(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); +} } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 17bc32fc83..8f3325738c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -277,6 +277,8 @@ public: const IR::U32& seg_mask); [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, const IR::U32& seg_mask); + [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, + FpControl control = {}); private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fb79e3d8dc..717aa71caa 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -408,3 +408,4 @@ OPCODE(ShuffleIndex, U32, U32, OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) +OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 0000000000..e42921a216 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -0,0 +1,44 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +void TranslatorVisitor::FSWZADD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<28, 8, u64> swizzle; + BitField<38, 1, u64> ndv; + BitField<39, 2, FpRounding> round; + BitField<44, 1, u64> ftz; + BitField<47, 1, u64> cc; + } const fswzadd{insn}; + + if (fswzadd.ndv != 0) { + throw NotImplementedException("FSWZADD NDV"); + } + + const IR::F32 src_a{GetFloatReg8(insn)}; + const IR::F32 src_b{GetFloatReg20(insn)}; + const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; + + const IR::FpControl fp_control{ + .no_contraction{false}, + .rounding{CastFpRounding(fswzadd.round)}, + .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; + F(fswzadd.dest_reg, result); + + if (fswzadd.cc != 0) { + throw NotImplementedException("FSWZADD CC"); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 9bae89c109..30b570ce4d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } +IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { + return ir.BitCast(GetReg8(insn)); +} + IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 54c31deb4e..bf7d1bae87 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -353,6 +353,7 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0057a4739..6a580f8319 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSWZADD(u64) { - ThrowNotImplemented(Opcode::FSWZADD); -} - void TranslatorVisitor::GETCRSPTR(u64) { ThrowNotImplemented(Opcode::GETCRSPTR); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c932c307ba..81090335f2 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SubgroupBallot: info.uses_subgroup_vote = true; break; + case IR::Opcode::FSwizzleAdd: + info.uses_fswzadd = true; + break; default: break; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 9111159f36..4b4006b7f8 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -94,6 +94,7 @@ struct Info { bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; bool uses_subgroup_vote{}; + bool uses_fswzadd{}; IR::Type used_constant_buffer_types{};