diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d7c0080fa6..2afa0916d9 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -659,6 +659,10 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, } } +void CopyBlock(VAddr dest_addr, VAddr src_addr, size_t size) { + CopyBlock(*Core::CurrentProcess(), dest_addr, src_addr, size); +} + boost::optional TryVirtualToPhysicalAddress(const VAddr addr) { if (addr == 0) { return 0; diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index f6a88f031b..2eaece2988 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -24,10 +24,7 @@ namespace Tegra { enum class BufferMethods { BindObject = 0, - SetGraphMacroCode = 0x45, - SetGraphMacroCodeArg = 0x46, - SetGraphMacroEntry = 0x47, - CountBufferMethods = 0x100, + CountBufferMethods = 0x40, }; void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { @@ -36,28 +33,6 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) "{:08X} remaining params {}", method, subchannel, value, remaining_params); - if (method == static_cast(BufferMethods::SetGraphMacroEntry)) { - // Prepare to upload a new macro, reset the upload counter. - NGLOG_DEBUG(HW_GPU, "Uploading GPU macro {:08X}", value); - current_macro_entry = value; - current_macro_code.clear(); - return; - } - - if (method == static_cast(BufferMethods::SetGraphMacroCodeArg)) { - // Append a new code word to the current macro. - current_macro_code.push_back(value); - - // There are no more params remaining, submit the code to the 3D engine. - if (remaining_params == 0) { - maxwell_3d->SubmitMacroCode(current_macro_entry, std::move(current_macro_code)); - current_macro_entry = InvalidGraphMacroEntry; - current_macro_code.clear(); - } - - return; - } - if (method == static_cast(BufferMethods::BindObject)) { // Bind the current subchannel to the desired engine id. NGLOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 7aab163dca..9019f25045 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -2,12 +2,71 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/textures/decoders.h" namespace Tegra { namespace Engines { -void Fermi2D::WriteReg(u32 method, u32 value) {} +Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} + +void Fermi2D::WriteReg(u32 method, u32 value) { + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Fermi2D register, increase the size of the Regs structure"); + + regs.reg_array[method] = value; + + switch (method) { + case FERMI2D_REG_INDEX(trigger): { + HandleSurfaceCopy(); + break; + } + } +} + +void Fermi2D::HandleSurfaceCopy() { + NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", + static_cast(regs.operation)); + + const GPUVAddr source = regs.src.Address(); + const GPUVAddr dest = regs.dst.Address(); + + // TODO(Subv): Only same-format and same-size copies are allowed for now. + ASSERT(regs.src.format == regs.dst.format); + ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); + + // TODO(Subv): Only raw copies are implemented. + ASSERT(regs.operation == Regs::Operation::SrcCopy); + + const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); + const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); + + u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); + u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); + + if (regs.src.linear == regs.dst.linear) { + // If the input layout and the output layout are the same, just perform a raw copy. + Memory::CopyBlock(dest_cpu, source_cpu, + src_bytes_per_pixel * regs.dst.width * regs.dst.height); + return; + } + + u8* src_buffer = Memory::GetPointer(source_cpu); + u8* dst_buffer = Memory::GetPointer(dest_cpu); + + if (!regs.src.linear && regs.dst.linear) { + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, src_buffer, dst_buffer, true, + regs.src.block_height); + } else { + // If the input is linear and the output is tiled, swizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, dst_buffer, src_buffer, false, + regs.dst.block_height); + } +} } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8967ddede0..0c5b413cca 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -4,19 +4,106 @@ #pragma once +#include +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" namespace Tegra { namespace Engines { +#define FERMI2D_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) + class Fermi2D final { public: - Fermi2D() = default; + explicit Fermi2D(MemoryManager& memory_manager); ~Fermi2D() = default; /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value); + + struct Regs { + static constexpr size_t NUM_REGS = 0x258; + + struct Surface { + RenderTargetFormat format; + BitField<0, 1, u32> linear; + union { + BitField<0, 4, u32> block_depth; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_width; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 address_high; + u32 address_low; + + GPUVAddr Address() const { + return static_cast((static_cast(address_high) << 32) | + address_low); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + + enum class Operation : u32 { + SrcCopyAnd = 0, + ROPAnd = 1, + Blend = 2, + SrcCopy = 3, + ROP = 4, + SrcCopyPremult = 5, + BlendPremult = 6, + }; + + union { + struct { + INSERT_PADDING_WORDS(0x80); + + Surface dst; + + INSERT_PADDING_WORDS(2); + + Surface src; + + INSERT_PADDING_WORDS(0x15); + + Operation operation; + + INSERT_PADDING_WORDS(0x9); + + // TODO(Subv): This is only a guess. + u32 trigger; + + INSERT_PADDING_WORDS(0x1A3); + }; + std::array reg_array; + }; + } regs{}; + + MemoryManager& memory_manager; + +private: + /// Performs the copy from the source surface to the destination surface as configured in the + /// registers. + void HandleSurfaceCopy(); }; +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(dst, 0x80); +ASSERT_REG_POSITION(src, 0x8C); +ASSERT_REG_POSITION(operation, 0xAB); +ASSERT_REG_POSITION(trigger, 0xB5); +#undef ASSERT_REG_POSITION + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2acbb9cd66..4306b894f7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager), macro_interpreter(*this) {} -void Maxwell3D::SubmitMacroCode(u32 entry, std::vector code) { - uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); -} - void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { auto macro_code = uploaded_macros.find(method); // The requested macro must have been uploaded already. @@ -37,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector parameters) { } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid Maxwell3D register, increase the size of the Regs structure"); - auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); // It is an error to write to a register other than the current macro's ARG register before it @@ -68,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { return; } + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Maxwell3D register, increase the size of the Regs structure"); + if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } @@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { regs.reg_array[method] = value; switch (method) { + case MAXWELL3D_REG_INDEX(macros.data): { + ProcessMacroUpload(value); + break; + } case MAXWELL3D_REG_INDEX(code_address.code_address_high): case MAXWELL3D_REG_INDEX(code_address.code_address_low): { // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS @@ -141,6 +141,12 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } } +void Maxwell3D::ProcessMacroUpload(u32 data) { + // Store the uploaded macro code to interpret them when they're called. + auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; + macro.push_back(data); +} + void Maxwell3D::ProcessQueryGet() { GPUVAddr sequence_address = regs.query.QueryAddress(); // Since the sequence address is given as a GPU VAddr, we have to convert it to an application diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index a022665eba..5cf62fb016 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -31,7 +31,7 @@ public: /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. struct Regs { - static constexpr size_t NUM_REGS = 0xE36; + static constexpr size_t NUM_REGS = 0xE00; static constexpr size_t NumRenderTargets = 8; static constexpr size_t NumViewports = 16; @@ -322,7 +322,15 @@ public: union { struct { - INSERT_PADDING_WORDS(0x200); + INSERT_PADDING_WORDS(0x45); + + struct { + INSERT_PADDING_WORDS(1); + u32 data; + u32 entry; + } macros; + + INSERT_PADDING_WORDS(0x1B8); struct { u32 address_high; @@ -605,7 +613,7 @@ public: u32 size[MaxShaderStage]; } tex_info_buffers; - INSERT_PADDING_WORDS(0x102); + INSERT_PADDING_WORDS(0xCC); }; std::array reg_array; }; @@ -637,9 +645,6 @@ public: /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value, u32 remaining_params); - /// Uploads the code for a GPU macro program associated with the specified entry. - void SubmitMacroCode(u32 entry, std::vector code); - /// Returns a list of enabled textures for the specified shader stage. std::vector GetStageTextures(Regs::ShaderStage stage) const; @@ -670,6 +675,9 @@ private: */ void CallMacroMethod(u32 method, std::vector parameters); + /// Handles writes to the macro uploading registers. + void ProcessMacroUpload(u32 data); + /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -687,6 +695,7 @@ private: static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +ASSERT_REG_POSITION(macros, 0x45); ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 9463cd5d66..9eb1439180 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -12,7 +12,7 @@ namespace Tegra { GPU::GPU() { memory_manager = std::make_unique(); maxwell_3d = std::make_unique(*memory_manager); - fermi_2d = std::make_unique(); + fermi_2d = std::make_unique(*memory_manager); maxwell_compute = std::make_unique(); } @@ -22,4 +22,16 @@ const Tegra::Engines::Maxwell3D& GPU::Get3DEngine() const { return *maxwell_3d; } +u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { + ASSERT(format != RenderTargetFormat::NONE); + + switch (format) { + case RenderTargetFormat::RGBA8_UNORM: + case RenderTargetFormat::RGB10_A2_UNORM: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented render target format %u", static_cast(format)); + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2888daedcf..f168a51712 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -21,6 +21,9 @@ enum class RenderTargetFormat : u32 { RGBA8_SRGB = 0xD6, }; +/// Returns the number of bytes per pixel of each rendertarget format. +u32 RenderTargetBytesPerPixel(RenderTargetFormat format); + class DebugContext; /** @@ -86,8 +89,6 @@ public: } private: - static constexpr u32 InvalidGraphMacroEntry = 0xFFFFFFFF; - /// Writes a single register in the engine bound to the specified subchannel void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params); @@ -100,11 +101,6 @@ private: std::unique_ptr fermi_2d; /// Compute engine std::unique_ptr maxwell_compute; - - /// Entry of the macro that is currently being uploaded - u32 current_macro_entry = InvalidGraphMacroEntry; - /// Code being uploaded for the current macro - std::vector current_macro_code; }; } // namespace Tegra diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 9c3ae875c2..8b39b2bdfc 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -27,9 +27,8 @@ static u32 GetSwizzleOffset(u32 x, u32 y, u32 image_width, u32 bytes_per_pixel, return address; } -static void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, - u8* swizzled_data, u8* unswizzled_data, bool unswizzle, - u32 block_height) { +void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, + u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { u8* data_ptrs[2]; for (unsigned y = 0; y < height; ++y) { for (unsigned x = 0; x < width; ++x) { diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index a700911cf7..2562c4b060 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -17,6 +17,10 @@ namespace Texture { std::vector UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, u32 block_height = TICEntry::DefaultBlockHeight); +/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. +void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, + u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height); + /** * Decodes an unswizzled texture into a A8R8G8B8 texture. */