From 749aef3dd0ccba7104ac630a59f01fa369c3581d Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 5 Oct 2018 23:09:01 -0400 Subject: [PATCH 1/4] gl_rasterizer_cache: Implement a simpler surface copy using glCopyImageSubData. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ce967c4d65..5fe6befe15 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -559,6 +559,18 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, return true; } +static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + const u32 width{std::min(src_params.width, dst_params.width)}; + const u32 height{std::min(src_params.height, dst_params.height)}; + + glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, + 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, + 0, 0, width, height, 1); +} + static void CopySurface(const Surface& src_surface, const Surface& dst_surface, GLuint copy_pbo_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1041,6 +1053,15 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; + // For compatible surfaces, we can just do fast glCopyImageSubData based copy + if (old_params.target == new_params.target && old_params.type == new_params.type && + old_params.depth == new_params.depth && old_params.depth == 1 && + SurfaceParams::GetFormatBpp(old_params.pixel_format) == + SurfaceParams::GetFormatBpp(new_params.pixel_format)) { + FastCopySurface(old_surface, new_surface); + return new_surface; + } + // If the format is the same, just do a framebuffer blit. This is significantly faster than // using PBOs. The is also likely less accurate, as textures will be converted rather than // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate From 011cf77796aee57c23e97876aad65712efd6c92b Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 5 Oct 2018 23:39:03 -0400 Subject: [PATCH 2/4] gl_rasterizer: Add rasterizer cache code to handle accerated fermi copies. --- src/video_core/rasterizer_interface.h | 11 ++--- .../renderer_opengl/gl_rasterizer.cpp | 10 ++--- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 42 +++++++++++++++++++ .../renderer_opengl/gl_rasterizer_cache.h | 9 ++++ 5 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cd819d69fe..06fc59dbe8 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -33,13 +34,9 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; - /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 - virtual bool AccelerateDisplayTransfer(const void* config) { - return false; - } - - /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 - virtual bool AccelerateTextureCopy(const void* config) { + /// Attempt to use a faster method to perform a surface copy + virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 60dcdc184b..a3a14efd9b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -616,14 +616,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { InvalidateRegion(addr, size); } -bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { +bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) { MICROPROFILE_SCOPE(OpenGL_Blits); - UNREACHABLE(); - return true; -} - -bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - UNREACHABLE(); + res_cache.FermiCopySurface(src, dst); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bf954bb5d8..8b6b622419 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -52,8 +52,8 @@ public: void FlushRegion(VAddr addr, u64 size) override; void InvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; - bool AccelerateDisplayTransfer(const void* config) override; - bool AccelerateTextureCopy(const void* config) override; + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; bool AccelerateFill(const void* config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5fe6befe15..56ff83eff0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -143,6 +143,28 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { return params; } +/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params{}; + params.addr = TryGetCpuAddr(config.Address()); + params.is_tiled = !config.linear; + params.block_height = params.is_tiled ? config.BlockHeight() : 0, + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.target = SurfaceTarget::Texture2D; + params.depth = 1; + params.size_in_bytes_total = params.SizeInBytesTotal(); + params.size_in_bytes_2d = params.SizeInBytes2D(); + params.max_mip_level = 0; + params.rt = {}; + + return params; +} + static constexpr std::array tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S @@ -1045,6 +1067,26 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { return surface; } +void RasterizerCacheOpenGL::FermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { + + const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); + const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); + + ASSERT(src_params.width == dst_params.width); + ASSERT(src_params.height == dst_params.height); + ASSERT(src_params.pixel_format == dst_params.pixel_format); + ASSERT(src_params.block_height == dst_params.block_height); + ASSERT(src_params.is_tiled == dst_params.is_tiled); + ASSERT(src_params.depth == dst_params.depth); + ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces + ASSERT(src_params.target == dst_params.target); + ASSERT(src_params.rt.index == dst_params.rt.index); + + FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); +} + Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params) { // Verify surface is compatible for blitting diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 49025a3fe7..0b4940b3c3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" +#include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -719,6 +720,10 @@ struct SurfaceParams { Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); + /// Creates SurfaceParams for a Fermi2D surface copy + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { return std::tie(pixel_format, type, width, height, target, depth) == @@ -837,6 +842,10 @@ public: /// Tries to find a framebuffer using on the provided CPU address Surface TryFindFramebufferSurface(VAddr addr) const; + /// Copies the contents of one surface to another + void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); From 9aec85d39c0c95419bd086ccac158dbb77c20002 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 5 Oct 2018 23:46:40 -0400 Subject: [PATCH 3/4] fermi_2d: Implement simple copies with AccelerateSurfaceCopy. --- src/video_core/engines/fermi_2d.cpp | 48 ++++++++++++++++------------- src/video_core/engines/fermi_2d.h | 8 ++++- src/video_core/gpu.cpp | 2 +- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ea1555c5d5..912e785b9a 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -4,11 +4,13 @@ #include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/rasterizer_interface.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines { -Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} +Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) + : memory_manager(memory_manager), rasterizer{rasterizer} {} void Fermi2D::WriteReg(u32 method, u32 value) { ASSERT_MSG(method < Regs::NUM_REGS, @@ -44,27 +46,31 @@ void Fermi2D::HandleSurfaceCopy() { u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); - if (regs.src.linear == regs.dst.linear) { - // If the input layout and the output layout are the same, just perform a raw copy. - ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); - Memory::CopyBlock(dest_cpu, source_cpu, - src_bytes_per_pixel * regs.dst.width * regs.dst.height); - return; - } + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { + // TODO(bunnei): The below implementation currently will not get hit, as + // AccelerateSurfaceCopy tries to always copy and will always return success. This should be + // changed once we properly support flushing. - u8* src_buffer = Memory::GetPointer(source_cpu); - u8* dst_buffer = Memory::GetPointer(dest_cpu); - - if (!regs.src.linear && regs.dst.linear) { - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, - dst_bytes_per_pixel, src_buffer, dst_buffer, true, - regs.src.BlockHeight()); - } else { - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, - dst_bytes_per_pixel, dst_buffer, src_buffer, false, - regs.dst.BlockHeight()); + if (regs.src.linear == regs.dst.linear) { + // If the input layout and the output layout are the same, just perform a raw copy. + ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); + Memory::CopyBlock(dest_cpu, source_cpu, + src_bytes_per_pixel * regs.dst.width * regs.dst.height); + return; + } + u8* src_buffer = Memory::GetPointer(source_cpu); + u8* dst_buffer = Memory::GetPointer(dest_cpu); + if (!regs.src.linear && regs.dst.linear) { + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, src_buffer, dst_buffer, true, + regs.src.BlockHeight()); + } else { + // If the input is linear and the output is tiled, swizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, dst_buffer, src_buffer, false, + regs.dst.BlockHeight()); + } } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 021b83eaa0..81d15c62a8 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -12,6 +12,10 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { #define FERMI2D_REG_INDEX(field_name) \ @@ -19,7 +23,7 @@ namespace Tegra::Engines { class Fermi2D final { public: - explicit Fermi2D(MemoryManager& memory_manager); + explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); ~Fermi2D() = default; /// Write the value to the register identified by method. @@ -94,6 +98,8 @@ public: MemoryManager& memory_manager; private: + VideoCore::RasterizerInterface& rasterizer; + /// Performs the copy from the source surface to the destination surface as configured in the /// registers. void HandleSurfaceCopy(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index baa8b63b79..9ba7e35338 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -25,7 +25,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique(); maxwell_3d = std::make_unique(rasterizer, *memory_manager); - fermi_2d = std::make_unique(*memory_manager); + fermi_2d = std::make_unique(rasterizer, *memory_manager); maxwell_compute = std::make_unique(); maxwell_dma = std::make_unique(*memory_manager); kepler_memory = std::make_unique(*memory_manager); From 2fbb20b2b5bd543770dfd5e48ebf66df16d35996 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 6 Oct 2018 12:06:40 -0400 Subject: [PATCH 4/4] yuzu/yuzu_cmd: Add checks for required extension ARB_copy_image. --- src/yuzu/main.cpp | 2 ++ src/yuzu_cmd/emu_window/emu_window_sdl2.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ad62a82d0a..e11833c5a2 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -485,6 +485,8 @@ QStringList GMainWindow::GetUnsupportedGLExtensions() { unsupported_ext.append("ARB_texture_storage"); if (!GLAD_GL_ARB_multi_bind) unsupported_ext.append("ARB_multi_bind"); + if (!GLAD_GL_ARB_copy_image) + unsupported_ext.append("ARB_copy_image"); // Extensions required to support some texture formats. if (!GLAD_GL_EXT_texture_compression_s3tc) diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index 0733301b28..1550950954 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -98,6 +98,8 @@ bool EmuWindow_SDL2::SupportsRequiredGLExtensions() { unsupported_ext.push_back("ARB_texture_storage"); if (!GLAD_GL_ARB_multi_bind) unsupported_ext.push_back("ARB_multi_bind"); + if (!GLAD_GL_ARB_copy_image) + unsupported_ext.push_back("ARB_copy_image"); // Extensions required to support some texture formats. if (!GLAD_GL_EXT_texture_compression_s3tc)