From dd1aab5446cc043fe001f6cd118954a21edc9ad4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 15 Dec 2018 00:20:00 -0500 Subject: [PATCH] gl_rasterizer: Implement a more accurate fermi 2D copy. - This is a blit, use the blit registers. --- src/video_core/engines/fermi_2d.cpp | 62 ++------ src/video_core/engines/fermi_2d.h | 29 +++- src/video_core/rasterizer_interface.h | 4 +- .../renderer_opengl/gl_rasterizer.cpp | 12 +- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 141 +++++++++++++++++- .../renderer_opengl/gl_rasterizer_cache.h | 4 +- 7 files changed, 188 insertions(+), 68 deletions(-) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 9f1533263e..ec1a572261 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -21,7 +21,9 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; switch (method_call.method) { - case FERMI2D_REG_INDEX(trigger): { + // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, + // so trigger on the second 32-bit write. + case FERMI2D_REG_INDEX(blit_src_y) + 1: { HandleSurfaceCopy(); break; } @@ -32,57 +34,23 @@ void Fermi2D::HandleSurfaceCopy() { LOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", static_cast(regs.operation)); - const GPUVAddr source = regs.src.Address(); - const GPUVAddr dest = regs.dst.Address(); - - // TODO(Subv): Only same-format and same-size copies are allowed for now. - ASSERT(regs.src.format == regs.dst.format); - ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); - // TODO(Subv): Only raw copies are implemented. ASSERT(regs.operation == Regs::Operation::SrcCopy); - const auto source_cpu = memory_manager.GpuToCpuAddress(source); - const auto dest_cpu = memory_manager.GpuToCpuAddress(dest); - ASSERT_MSG(source_cpu, "Invalid source GPU address"); - ASSERT_MSG(dest_cpu, "Invalid destination GPU address"); + const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; + const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; + const u32 src_blit_x2{ + static_cast((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; + const u32 src_blit_y2{ + static_cast((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); - u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); + const MathUtil::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; + const MathUtil::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, + regs.blit_dst_x + regs.blit_dst_width, + regs.blit_dst_y + regs.blit_dst_height}; - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) { - // All copies here update the main memory, so mark all rasterizer states as invalid. - Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); - - rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); - // We have to invalidate the destination region to evict any outdated surfaces from the - // cache. We do this before actually writing the new data because the destination address - // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, - dst_bytes_per_pixel * regs.dst.width * regs.dst.height); - - if (regs.src.linear == regs.dst.linear) { - // If the input layout and the output layout are the same, just perform a raw copy. - ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); - Memory::CopyBlock(*dest_cpu, *source_cpu, - src_bytes_per_pixel * regs.dst.width * regs.dst.height); - return; - } - u8* src_buffer = Memory::GetPointer(*source_cpu); - u8* dst_buffer = Memory::GetPointer(*dest_cpu); - if (!regs.src.linear && regs.dst.linear) { - // If the input is tiled and the output is linear, deswizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, - src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, - dst_buffer, true, regs.src.BlockHeight(), - regs.src.BlockDepth(), 0); - } else { - // If the input is linear and the output is tiled, swizzle the input and copy it over. - Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, - src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, - src_buffer, false, regs.dst.BlockHeight(), - regs.dst.BlockDepth(), 0); - } + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { + UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 50009bf753..c69f74cc59 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -94,12 +94,22 @@ public: Operation operation; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x177); - // TODO(Subv): This is only a guess. - u32 trigger; + u32 blit_control; - INSERT_PADDING_WORDS(0x1A3); + INSERT_PADDING_WORDS(0x8); + + u32 blit_dst_x; + u32 blit_dst_y; + u32 blit_dst_width; + u32 blit_dst_height; + u64 blit_du_dx; + u64 blit_dv_dy; + u64 blit_src_x; + u64 blit_src_y; + + INSERT_PADDING_WORDS(0x21); }; std::array reg_array; }; @@ -122,7 +132,16 @@ private: ASSERT_REG_POSITION(dst, 0x80); ASSERT_REG_POSITION(src, 0x8C); ASSERT_REG_POSITION(operation, 0xAB); -ASSERT_REG_POSITION(trigger, 0xB5); +ASSERT_REG_POSITION(blit_control, 0x223); +ASSERT_REG_POSITION(blit_dst_x, 0x22c); +ASSERT_REG_POSITION(blit_dst_y, 0x22d); +ASSERT_REG_POSITION(blit_dst_width, 0x22e); +ASSERT_REG_POSITION(blit_dst_height, 0x22f); +ASSERT_REG_POSITION(blit_du_dx, 0x230); +ASSERT_REG_POSITION(blit_dv_dy, 0x232); +ASSERT_REG_POSITION(blit_src_x, 0x234); +ASSERT_REG_POSITION(blit_src_y, 0x236); + #undef ASSERT_REG_POSITION } // namespace Tegra::Engines diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 77da135a0d..b2a2237057 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -46,7 +46,9 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst) { + const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) { return false; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 974ca6a20e..12d8761200 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -778,15 +778,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { } bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst) { + const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); - - if (Settings::values.use_accurate_gpu_emulation) { - // Skip the accelerated copy and perform a slow but more accurate copy - return false; - } - - res_cache.FermiCopySurface(src, dst); + res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f3b607f4d6..258d622596 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -61,7 +61,9 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, - const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; + const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a79eee03ef..f585c88843 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1021,24 +1021,155 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, } } +static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect, GLuint read_fb_handle, + GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, + std::size_t cubemap_face = 0) { + + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + OpenGLState prev_state{OpenGLState::GetCurState()}; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + u32 buffers{}; + + if (src_params.type == SurfaceType::ColorTexture) { + switch (src_params.target) { + case SurfaceTarget::Texture2D: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + case SurfaceTarget::TextureCubemap: + glFramebufferTexture2D( + GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), + src_surface->Texture().handle, 0); + glFramebufferTexture2D( + GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); + break; + case SurfaceTarget::Texture2DArray: + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + src_surface->Texture().handle, 0, 0); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); + break; + case SurfaceTarget::Texture3D: + glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + SurfaceTargetToGL(src_params.target), + src_surface->Texture().handle, 0, 0); + glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + SurfaceTargetToGL(src_params.target), 0, 0, 0); + break; + default: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + } + + switch (dst_params.target) { + case SurfaceTarget::Texture2D: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + case SurfaceTarget::TextureCubemap: + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), + dst_surface->Texture().handle, 0); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); + break; + case SurfaceTarget::Texture2DArray: + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + dst_surface->Texture().handle, 0, 0); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); + break; + + case SurfaceTarget::Texture3D: + glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + SurfaceTargetToGL(dst_params.target), + dst_surface->Texture().handle, 0, 0); + glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + SurfaceTargetToGL(dst_params.target), 0, 0, 0); + break; + default: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + } + + buffers = GL_COLOR_BUFFER_BIT; + } else if (src_params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (src_params.type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_surface->Texture().handle, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_surface->Texture().handle, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, + dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + return true; +} + void RasterizerCacheOpenGL::FermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config) { + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const MathUtil::Rectangle& src_rect, const MathUtil::Rectangle& dst_rect) { const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - ASSERT(src_params.width == dst_params.width); - ASSERT(src_params.height == dst_params.height); ASSERT(src_params.pixel_format == dst_params.pixel_format); ASSERT(src_params.block_height == dst_params.block_height); ASSERT(src_params.is_tiled == dst_params.is_tiled); ASSERT(src_params.depth == dst_params.depth); - ASSERT(src_params.depth == 1); // Currently, FastCopySurface only works with 2D surfaces ASSERT(src_params.target == dst_params.target); ASSERT(src_params.rt.index == dst_params.rt.index); - FastCopySurface(GetSurface(src_params, true), GetSurface(dst_params, false)); + auto src_surface = GetSurface(src_params, true); + auto dst_surface = GetSurface(dst_params, true); + + BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, + draw_framebuffer.handle); } void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 490b8252eb..c742f4a93b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -421,7 +421,9 @@ public: /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config); + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const MathUtil::Rectangle& src_rect, + const MathUtil::Rectangle& dst_rect); private: void LoadSurface(const Surface& surface);