Merge pull request #3208 from ReinUsesLisp/vk-shader-decompiler

vk_shader_decompiler: Add tessellation and misc changes
2019-12-10 08:01:41 -04:00 · 2019-12-10 08:01:41 -04:00 · 6edadef96d
commit 6edadef96d
parent f2458106e6 233ed96a5c
11 changed files with 1737 additions and 750 deletions
--- a/externals/sirit
+++ b/externals/sirit
@ -1 +1 @@
-Subproject commit f7c4b07a7e14edb1dcd93bc9879c823423705c2e
+Subproject commit e1a6729df7f11e33f6dc0939b18995a57c8bf3d8
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@ -98,10 +98,11 @@ union Attribute {
        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
+        BitField<31, 1, u64> patch;
        BitField<47, 3, AttributeSize> size;

        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
+            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
        }
    } fmt20;

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@ -1915,6 +1915,10 @@ private:
        return {};
    }

+    Expression InvocationId(Operation operation) {
+        return {"gl_InvocationID", Type::Int};
+    }
+
    Expression YNegate(Operation operation) {
        return {"y_direction", Type::Float};
    }
@ -2153,6 +2157,7 @@ private:
        &GLSLDecompiler::EmitVertex,
        &GLSLDecompiler::EndPrimitive,

+        &GLSLDecompiler::InvocationId,
        &GLSLDecompiler::YNegate,
        &GLSLDecompiler::LocalInvocationId<0>,
        &GLSLDecompiler::LocalInvocationId<1>,
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@ -5,29 +5,28 @@
 #pragma once

 #include <array>
+#include <bitset>
 #include <memory>
 #include <set>
+#include <type_traits>
 #include <utility>
 #include <vector>

-#include <sirit/sirit.h>
-
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
 #include "video_core/shader/shader_ir.h"

-namespace VideoCommon::Shader {
-class ShaderIR;
-}
-
 namespace Vulkan {
 class VKDevice;
 }

-namespace Vulkan::VKShader {
+namespace Vulkan {

 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using TexelBufferEntry = VideoCommon::Shader::Sampler;
 using SamplerEntry = VideoCommon::Shader::Sampler;
+using ImageEntry = VideoCommon::Shader::Image;

 constexpr u32 DESCRIPTOR_SET = 0;

@ -46,39 +45,74 @@ private:

 class GlobalBufferEntry {
 public:
-    explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset)
-        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
+    constexpr explicit GlobalBufferEntry(u32 cbuf_index, u32 cbuf_offset, bool is_written)
+        : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_written{is_written} {}

-    u32 GetCbufIndex() const {
+    constexpr u32 GetCbufIndex() const {
        return cbuf_index;
    }

-    u32 GetCbufOffset() const {
+    constexpr u32 GetCbufOffset() const {
        return cbuf_offset;
    }

+    constexpr bool IsWritten() const {
+        return is_written;
+    }
+
 private:
    u32 cbuf_index{};
    u32 cbuf_offset{};
+    bool is_written{};
 };

 struct ShaderEntries {
-    u32 const_buffers_base_binding{};
-    u32 global_buffers_base_binding{};
-    u32 samplers_base_binding{};
+    u32 NumBindings() const {
+        return static_cast<u32>(const_buffers.size() + global_buffers.size() +
+                                texel_buffers.size() + samplers.size() + images.size());
+    }
+
    std::vector<ConstBufferEntry> const_buffers;
    std::vector<GlobalBufferEntry> global_buffers;
+    std::vector<TexelBufferEntry> texel_buffers;
    std::vector<SamplerEntry> samplers;
+    std::vector<ImageEntry> images;
    std::set<u32> attributes;
    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
    std::size_t shader_length{};
-    Sirit::Id entry_function{};
-    std::vector<Sirit::Id> interfaces;
+    bool uses_warps{};
 };

-using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
+struct Specialization final {
+    u32 base_binding{};

-DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
-                           Tegra::Engines::ShaderType stage);
+    // Compute specific
+    std::array<u32, 3> workgroup_size{};
+    u32 shared_memory_size{};

-} // namespace Vulkan::VKShader
+    // Graphics specific
+    Maxwell::PrimitiveTopology primitive_topology{};
+    std::optional<float> point_size{};
+    std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
+
+    // Tessellation specific
+    struct {
+        Maxwell::TessellationPrimitive primitive{};
+        Maxwell::TessellationSpacing spacing{};
+        bool clockwise{};
+    } tessellation;
+};
+// Old gcc versions don't consider this trivially copyable.
+// static_assert(std::is_trivially_copyable_v<Specialization>);
+
+struct SPIRVShader {
+    std::vector<u32> code;
+    ShaderEntries entries;
+};
+
+ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
+
+std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Tegra::Engines::ShaderType stage, const Specialization& specialization);
+
+} // namespace Vulkan
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;

 namespace {
+
 u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
        return 1;
    }
 }
+
 } // Anonymous namespace

 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");

-        u64 next_element = instr.attribute.fmt20.element;
-        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+        u64 element = instr.attribute.fmt20.element;
+        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());

-        const auto StoreNextElement = [&](u32 reg_offset) {
-            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
-                                                 next_element, GetRegister(instr.gpr39));
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            Node dest;
+            if (instr.attribute.fmt20.patch) {
+                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
+                dest = MakeNode<PatchNode>(offset);
+            } else {
+                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
+                                          GetRegister(instr.gpr39));
+            }
            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);

            bb.push_back(Operation(OperationCode::Assign, dest, src));

-            // Load the next attribute element into the following register. If the element
-            // to load goes beyond the vec4 size, load the first element of the next
-            // attribute.
-            next_element = (next_element + 1) % 4;
-            next_index = next_index + (next_element == 0 ? 1 : 0);
-        };
-
-        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-            StoreNextElement(reg_offset);
+            // Load the next attribute element into the following register. If the element to load
+            // goes beyond the vec4 size, load the first element of the next attribute.
+            element = (element + 1) % 4;
+            index = index + (element == 0 ? 1 : 0);
        }
-
        break;
    }
    case OpCode::Id::ST_L:
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    case OpCode::Id::MOV_SYS: {
        const Node value = [this, instr] {
            switch (instr.sys20) {
+            case SystemVariable::InvocationId:
+                return Operation(OperationCode::InvocationId);
            case SystemVariable::Ydirection:
                return Operation(OperationCode::YNegate);
            case SystemVariable::InvocationInfo:
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@ -38,6 +38,9 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);

+    // Signal the backend that this shader uses warp instructions.
+    uses_warps = true;
+
    switch (opcode->get().GetId()) {
    case OpCode::Id::VOTE: {
        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@ -172,6 +172,7 @@ enum class OperationCode {
    EmitVertex,   /// () -> void
    EndPrimitive, /// () -> void

+    InvocationId,       /// () -> int
    YNegate,            /// () -> float
    LocalInvocationIdX, /// () -> uint
    LocalInvocationIdY, /// () -> uint
@ -213,13 +214,14 @@ class PredicateNode;
 class AbufNode;
 class CbufNode;
 class LmemNode;
+class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;

-using NodeData =
-    std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
-                 PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
+                              LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
@ -542,6 +544,19 @@ private:
    u32 element{};
 };

+/// Patch memory (used to communicate tessellation stages).
+class PatchNode final {
+public:
+    explicit PatchNode(u32 offset) : offset{offset} {}
+
+    u32 GetOffset() const {
+        return offset;
+    }
+
+private:
+    u32 offset{};
+};
+
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
 class CbufNode final {
 public:
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@ -137,6 +137,10 @@ public:
        return uses_vertex_id;
    }

+    bool UsesWarps() const {
+        return uses_warps;
+    }
+
    bool HasPhysicalAttributes() const {
        return uses_physical_attributes;
    }
@ -415,6 +419,7 @@ private:
    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
    bool uses_instance_id{};
    bool uses_vertex_id{};
+    bool uses_warps{};

    Tegra::Shader::Header header;
 };
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@ -7,6 +7,7 @@
 #include <variant>

 #include "common/common_types.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"

 namespace VideoCommon::Shader {