From 2ec7fcecb7d1f0bc8f943a3f7cb4d2e215bc4e76 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 03:17:02 +0100 Subject: [PATCH 01/10] Vulkan: implement D24S8 <-> RGBA8 convertions. --- src/video_core/host_shaders/CMakeLists.txt | 2 + .../host_shaders/convert_abgr8_to_d24s8.frag | 17 ++++ .../host_shaders/convert_d24s8_to_abgr8.frag | 21 ++++ src/video_core/renderer_vulkan/blit_image.cpp | 98 +++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.h | 16 +++ .../renderer_vulkan/vk_texture_cache.cpp | 12 +++ 6 files changed, 166 insertions(+) create mode 100644 src/video_core/host_shaders/convert_abgr8_to_d24s8.frag create mode 100644 src/video_core/host_shaders/convert_d24s8_to_abgr8.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index d779a967aa..fd3e414349 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -10,6 +10,8 @@ set(SHADER_FILES astc_decoder.comp block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + convert_abgr8_to_d24s8.frag + convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag new file mode 100644 index 0000000000..f7657e50a6 --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -0,0 +1,17 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uvec4 color = uvec4(texelFetch(color_texture, coord, 0).rgba * (exp2(8) - 1.0f)); + uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; + + gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(color.a); +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag new file mode 100644 index 0000000000..ff3bf82091 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_abgr8.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.r = float(depth >> 16) / (exp2(8) - 1.0); + color.g = float((depth >> 8) & 0x00FF) / (exp2(8) - 1.0); + color.b = float(depth & 0x00FF) / (exp2(8) - 1.0); + color.a = float(stencil) / (exp2(8) - 1.0); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b3884a4f5d..01535d0c0e 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -4,6 +4,8 @@ #include +#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" @@ -354,6 +356,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), + convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -448,6 +452,23 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } +void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d24s8_frag, true); + Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + +void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_abgr8_frag, false); + ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -495,6 +516,54 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } +void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift) { + const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkImageView src_depth_view = src_image_view.DepthView(); + const VkImageView src_stencil_view = src_image_view.StencilView(); + const VkSampler sampler = *nearest_sampler; + const VkExtent2D extent{ + .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), + .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), + }; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent, up_scale, + down_shift, this](vk::CommandBuffer cmdbuf) { + const VkOffset2D offset{ + .x = 0, + .y = 0, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 0.0f, + }; + const VkRect2D scissor{ + .offset = offset, + .extent = extent, + }; + const PushConstants push_constants{ + .tex_scale = {viewport.width, viewport.height}, + .tex_offset = {0.0f, 0.0f}, + }; + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); + UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, + src_stencil_view); + // TODO: Barriers + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + VkPipeline BlitImageHelper::FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key) { const auto it = std::ranges::find(blit_color_keys, key); if (it != blit_color_keys.end()) { @@ -636,4 +705,33 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend }); } +void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *module); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index d77f766788..f754a72944 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,10 +56,19 @@ public: void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); + + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, u32 down_shift); + [[nodiscard]] VkPipeline FindOrEmplaceColorPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); @@ -68,6 +77,9 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); + void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); + const Device& device; VKScheduler& scheduler; StateTracker& state_tracker; @@ -83,6 +95,8 @@ private: vk::ShaderModule blit_depth_stencil_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; + vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -94,6 +108,8 @@ private: vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; + vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_d24s8_to_abgr8_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 407fd2a151..6dfd45f31e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -881,6 +881,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD16ToR16(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::A8B8G8R8_UNORM: + case PixelFormat::B8G8R8A8_UNORM: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); @@ -891,6 +897,12 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertR16ToD16(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::S8_UINT_D24_UNORM: + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM) { + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { return blit_image_helper.ConvertR32ToD32(dst, src_view, up_scale, down_shift); From b130f648d7c629411c487722f864c6bafcd2562c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 03:17:54 +0100 Subject: [PATCH 02/10] TextureCache: Fix regression caused by ART and improve blit detection algorithm to be smarter. --- src/video_core/texture_cache/texture_cache.h | 9 +++---- src/video_core/texture_cache/util.cpp | 28 +++++++++++++++++--- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 241f71a91f..5ade3ce55d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -475,6 +475,7 @@ void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const BlitImages images = GetBlitImages(dst, src); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); PrepareImage(dst_id, true, false); @@ -1094,12 +1095,8 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } + src_id = FindOrInsertImage(src_info, src_addr); + dst_id = FindOrInsertImage(dst_info, dst_addr); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index ddc9fb13a7..8f9eb387ce 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,17 +1151,37 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { - if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + if (src) { src_info.format = src->info.format; + src_info.num_samples = src->info.num_samples; + src_info.size = src->info.size; } - if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { + if (dst) { dst_info.format = dst->info.format; + dst_info.num_samples = dst->info.num_samples; + dst_info.size = dst->info.size; } if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { - dst_info.format = src->info.format; + if (dst) { + src_info.format = dst_info.format; + } else { + dst_info.format = src->info.format; + } } if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { - src_info.format = dst->info.format; + if (src) { + if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { + dst_info.format = src->info.format; + } + } else { + src_info.format = dst->info.format; + } + } + if (src_info.num_samples > 1) { + dst_info.format = src_info.format; + } + if (dst_info.num_samples > 1) { + src_info.format = dst_info.format; } } From 0ff228405faae92a39167b9aec072e14744eae35 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 05:46:57 +0100 Subject: [PATCH 03/10] TextureCache: force same image format when resolving an image. --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++-- src/video_core/texture_cache/types.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5ade3ce55d..06257f0647 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -759,7 +759,8 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, return ImageId{}; } } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool broken_views = + runtime.HasBrokenTextureViewFormats() || True(options & RelaxedOptions::ForceBrokenViews); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { @@ -1096,7 +1097,12 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( continue; } src_id = FindOrInsertImage(src_info, src_addr); - dst_id = FindOrInsertImage(dst_info, dst_addr); + RelaxedOptions dst_options{}; + if (src_info.num_samples > 1) { + // it's a resolve, we must enforce the same format. + dst_options = RelaxedOptions::ForceBrokenViews; + } + dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 5c274abdf8..5ac27b3a7c 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -54,6 +54,7 @@ enum class RelaxedOptions : u32 { Size = 1 << 0, Format = 1 << 1, Samples = 1 << 2, + ForceBrokenViews = 1 << 3, }; DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) From b805c7bf058c6da04620cf75880509bdf6d5986c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 06:27:44 +0100 Subject: [PATCH 04/10] TextureCache: Implement additional D24S8 convertions. --- src/video_core/host_shaders/CMakeLists.txt | 2 ++ .../convert_d24s8_to_b10g11r11.frag | 21 ++++++++++++++++++ .../host_shaders/convert_d24s8_to_r16g16.frag | 21 ++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.cpp | 22 +++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.h | 10 +++++++++ .../renderer_vulkan/vk_texture_cache.cpp | 10 +++++++++ 6 files changed, 86 insertions(+) create mode 100644 src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag create mode 100644 src/video_core/host_shaders/convert_d24s8_to_r16g16.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fd3e414349..87042195a0 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,6 +12,8 @@ set(SHADER_FILES block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag convert_d24s8_to_abgr8.frag + convert_d24s8_to_b10g11r11.frag + convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag full_screen_triangle.vert diff --git a/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag new file mode 100644 index 0000000000..c743d3a138 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_b10g11r11.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.b = float(depth >> 22) / (exp2(10) - 1.0); + color.g = float((depth >> 11) & 0x00FF) / (exp2(11) - 1.0); + color.r = float(depth & 0x00FF) / (exp2(11) - 1.0); + color.a = 1.0f; +} diff --git a/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag new file mode 100644 index 0000000000..2a9443d3d6 --- /dev/null +++ b/src/video_core/host_shaders/convert_d24s8_to_r16g16.frag @@ -0,0 +1,21 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + color.r = float(depth >> 16) / (exp2(16) - 1.0); + color.g = float((depth >> 16) & 0x00FF) / (exp2(16) - 1.0); + color.b = 0.0f; + color.a = 1.0f; +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 01535d0c0e..12b28aaddd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -6,6 +6,8 @@ #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" +#include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" @@ -358,6 +360,8 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), + convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), + convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) { if (device.IsExtShaderStencilExportSupported()) { @@ -469,6 +473,24 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, down_shift); } +void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_b10g11r11_frag, false); + ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, + up_scale, down_shift); +} + +void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, + ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_r16g16_frag, false); + ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, + up_scale, down_shift); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { const VkPipelineLayout layout = *one_texture_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index f754a72944..10d24c4b72 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -62,6 +62,12 @@ public: void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + + void ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, + u32 up_scale, u32 down_shift); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -97,6 +103,8 @@ private: vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; + vk::ShaderModule convert_d24s8_to_b10g11r11_frag; + vk::ShaderModule convert_d24s8_to_r16g16_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -110,6 +118,8 @@ private: vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; + vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; + vk::Pipeline convert_d24s8_to_r16g16_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 6dfd45f31e..fd60642713 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -887,6 +887,16 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view, up_scale, down_shift); } break; + case PixelFormat::B10G11R11_FLOAT: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToB10G11R11(dst, src_view, up_scale, down_shift); + } + break; + case PixelFormat::R16G16_UNORM: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToR16G16(dst, src_view, up_scale, down_shift); + } + break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { return blit_image_helper.ConvertD32ToR32(dst, src_view, up_scale, down_shift); From 6f896d1fae3d244f83450a485d15e7cebe79abaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 22:23:48 +0100 Subject: [PATCH 05/10] TextureCache: Further fixes on resolve algorithm. --- src/video_core/texture_cache/texture_cache.h | 8 +++---- src/video_core/texture_cache/util.cpp | 25 ++++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 06257f0647..4188f93c5a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1096,13 +1096,13 @@ typename TextureCache

::BlitImages TextureCache

::GetBlitImages( if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { continue; } - src_id = FindOrInsertImage(src_info, src_addr); - RelaxedOptions dst_options{}; + RelaxedOptions find_options{}; if (src_info.num_samples > 1) { // it's a resolve, we must enforce the same format. - dst_options = RelaxedOptions::ForceBrokenViews; + find_options = RelaxedOptions::ForceBrokenViews; } - dst_id = FindOrInsertImage(dst_info, dst_addr, dst_options); + src_id = FindOrInsertImage(src_info, src_addr, find_options); + dst_id = FindOrInsertImage(dst_info, dst_addr, find_options); } while (has_deleted_images); return BlitImages{ .dst_id = dst_id, diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 8f9eb387ce..e4d82631e4 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1151,19 +1151,25 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, const ImageBase* src) { + bool is_resolve = false; + const auto original_src_format = src_info.format; + const auto original_dst_format = dst_info.format; if (src) { - src_info.format = src->info.format; + if (GetFormatType(src->info.format) != SurfaceType::ColorTexture) { + src_info.format = src->info.format; + } + is_resolve = src->info.num_samples > 1; src_info.num_samples = src->info.num_samples; src_info.size = src->info.size; } - if (dst) { + if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { dst_info.format = dst->info.format; - dst_info.num_samples = dst->info.num_samples; - dst_info.size = dst->info.size; } if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { if (dst) { - src_info.format = dst_info.format; + if (GetFormatType(dst->info.format) == SurfaceType::ColorTexture) { + src_info.format = original_src_format; + } } else { dst_info.format = src->info.format; } @@ -1171,18 +1177,13 @@ void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { if (src) { if (GetFormatType(src->info.format) == SurfaceType::ColorTexture) { - dst_info.format = src->info.format; + dst_info.format = original_dst_format; } } else { src_info.format = dst->info.format; } } - if (src_info.num_samples > 1) { - dst_info.format = src_info.format; - } - if (dst_info.num_samples > 1) { - src_info.format = dst_info.format; - } + ASSERT(!is_resolve || dst_info.format == src_info.format); } u32 MapSizeBytes(const ImageBase& image) { From 1d5e6a51d7f66cf089d541a009c84c373fd5c6ab Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Nov 2021 23:22:44 +0100 Subject: [PATCH 06/10] TextureCache: Add B10G11R11 to D24S8 converter. --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../convert_b10g11r11_to_d24s8.frag | 19 ++++++ src/video_core/renderer_vulkan/blit_image.cpp | 62 +++++++++++++++---- src/video_core/renderer_vulkan/blit_image.h | 12 +++- .../renderer_vulkan/vk_texture_cache.cpp | 3 + 5 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 87042195a0..a2e046f126 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -11,6 +11,7 @@ set(SHADER_FILES block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp convert_abgr8_to_d24s8.frag + convert_b10g11r11_to_d24s8.frag convert_d24s8_to_abgr8.frag convert_d24s8_to_b10g11r11.frag convert_d24s8_to_r16g16.frag diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag new file mode 100644 index 0000000000..b7358c15c6 --- /dev/null +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).rgba; + uint depth_stencil_unorm = (uint(color.b * (exp2(10) - 1.0f)) << 22) + | (uint(color.g * (exp2(11) - 1.0f)) << 11) + | (uint(color.r * (exp2(11) - 1.0f))); + + gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 12b28aaddd..e70459de5a 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -5,6 +5,7 @@ #include #include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_b10g11r11_to_d24s8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_b10g11r11_frag_spv.h" #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" @@ -359,6 +360,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), + convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), @@ -459,16 +461,25 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), - convert_abgr8_to_d24s8_frag, true); + ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_abgr8_to_d24s8_frag, true); Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } +void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineDepthTargetEx(convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_b10g11r11_to_d24s8_frag, true); + Convert(*convert_b10g11r11_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_abgr8_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_abgr8_frag, false); ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -476,8 +487,8 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_b10g11r11_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_b10g11r11_frag, false); ConvertDepthStencil(*convert_d24s8_to_b10g11r11_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -485,8 +496,8 @@ void BlitImageHelper::ConvertD24S8ToB10G11R11(const Framebuffer* dst_framebuffer void BlitImageHelper::ConvertD24S8ToR16G16(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - ConvertPipelineEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), - convert_d24s8_to_r16g16_frag, false); + ConvertPipelineColorTargetEx(convert_d24s8_to_r16g16_pipeline, dst_framebuffer->RenderPass(), + convert_d24s8_to_r16g16_frag, false); ConvertDepthStencil(*convert_d24s8_to_r16g16_pipeline, dst_framebuffer, src_image_view, up_scale, down_shift); } @@ -540,7 +551,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { - const VkPipelineLayout layout = *one_texture_pipeline_layout; + const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkImageView src_depth_view = src_image_view.DepthView(); const VkImageView src_stencil_view = src_image_view.StencilView(); const VkSampler sampler = *nearest_sampler; @@ -727,8 +738,37 @@ void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRend }); } -void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture) { +void BlitImageHelper::ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { + if (pipeline) { + return; + } + const std::array stages = MakeStages(*full_screen_vert, *module); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = nullptr, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = single_texture ? *one_texture_pipeline_layout : *two_textures_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture) { if (pipeline) { return; } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 10d24c4b72..607964b5e3 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -59,6 +59,9 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -83,8 +86,11 @@ private: void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass); - void ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, - vk::ShaderModule& module, bool single_texture); + void ConvertPipelineColorTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); + + void ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRenderPass renderpass, + vk::ShaderModule& module, bool single_texture); const Device& device; VKScheduler& scheduler; @@ -102,6 +108,7 @@ private: vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; + vk::ShaderModule convert_b10g11r11_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_b10g11r11_frag; vk::ShaderModule convert_d24s8_to_r16g16_frag; @@ -117,6 +124,7 @@ private: vk::Pipeline convert_d16_to_r16_pipeline; vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; + vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; vk::Pipeline convert_d24s8_to_r16g16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index fd60642713..28a659c0e0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -912,6 +912,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im src_view.format == PixelFormat::B8G8R8A8_UNORM) { return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view, up_scale, down_shift); } + if (src_view.format == PixelFormat::B10G11R11_FLOAT) { + return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { From e02cff2f69f9a90777f87f85f290f83fc04c16ec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 00:02:12 +0100 Subject: [PATCH 07/10] TextureCache: Add R16G16 to D24S8 converter. --- src/video_core/host_shaders/CMakeLists.txt | 1 + .../host_shaders/convert_r16g16_to_d24s8.frag | 18 ++++++++++++++++++ src/video_core/renderer_vulkan/blit_image.cpp | 11 +++++++++++ src/video_core/renderer_vulkan/blit_image.h | 5 +++++ .../renderer_vulkan/vk_texture_cache.cpp | 3 +++ 5 files changed, 38 insertions(+) create mode 100644 src/video_core/host_shaders/convert_r16g16_to_d24s8.frag diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index a2e046f126..1c91999d7e 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -17,6 +17,7 @@ set(SHADER_FILES convert_d24s8_to_r16g16.frag convert_depth_to_float.frag convert_float_to_depth.frag + convert_r16g16_to_d24s8.frag full_screen_triangle.vert fxaa.frag fxaa.vert diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag new file mode 100644 index 0000000000..7b1b914f6e --- /dev/null +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 +// #extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 color = texelFetch(color_texture, coord, 0).rgba; + uint depth_stencil_unorm = (uint(color.r * (exp2(16) - 1.0f)) << 16) + | (uint(color.g * (exp2(16) - 1.0f)) << 16); + + gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); + // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); +} diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index e70459de5a..28b631f73f 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -11,6 +11,7 @@ #include "video_core/host_shaders/convert_d24s8_to_r16g16_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/convert_r16g16_to_d24s8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" @@ -361,6 +362,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_b10g11r11_to_d24s8_frag(BuildShader(device, CONVERT_B10G11R11_TO_D24S8_FRAG_SPV)), + convert_r16g16_to_d24s8_frag(BuildShader(device, CONVERT_R16G16_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_b10g11r11_frag(BuildShader(device, CONVERT_D24S8_TO_B10G11R11_FRAG_SPV)), convert_d24s8_to_r16g16_frag(BuildShader(device, CONVERT_D24S8_TO_R16G16_FRAG_SPV)), @@ -476,6 +478,15 @@ void BlitImageHelper::ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer down_shift); } +void BlitImageHelper::ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view, u32 up_scale, + u32 down_shift) { + ConvertPipelineDepthTargetEx(convert_r16g16_to_d24s8_pipeline, dst_framebuffer->RenderPass(), + convert_r16g16_to_d24s8_frag, true); + Convert(*convert_r16g16_to_d24s8_pipeline, dst_framebuffer, src_image_view, up_scale, + down_shift); +} + void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift) { ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(), diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 607964b5e3..cec0953413 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -62,6 +62,9 @@ public: void ConvertB10G11R11ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, u32 up_scale, u32 down_shift); + void ConvertR16G16ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, + u32 up_scale, u32 down_shift); + void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view, u32 up_scale, u32 down_shift); @@ -109,6 +112,7 @@ private: vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; vk::ShaderModule convert_b10g11r11_to_d24s8_frag; + vk::ShaderModule convert_r16g16_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_b10g11r11_frag; vk::ShaderModule convert_d24s8_to_r16g16_frag; @@ -125,6 +129,7 @@ private: vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; vk::Pipeline convert_b10g11r11_to_d24s8_pipeline; + vk::Pipeline convert_r16g16_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_b10g11r11_pipeline; vk::Pipeline convert_d24s8_to_r16g16_pipeline; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 28a659c0e0..af1a110592 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -915,6 +915,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im if (src_view.format == PixelFormat::B10G11R11_FLOAT) { return blit_image_helper.ConvertB10G11R11ToD24S8(dst, src_view, up_scale, down_shift); } + if (src_view.format == PixelFormat::R16G16_UNORM) { + return blit_image_helper.ConvertR16G16ToD24S8(dst, src_view, up_scale, down_shift); + } break; case PixelFormat::D32_FLOAT: if (src_view.format == PixelFormat::R32_FLOAT) { From 0857f82913d0bcf2de4721233f74cd40ecddcdae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 06:15:29 +0100 Subject: [PATCH 08/10] TextureCache: Implement buffer copies on Vulkan. --- .../renderer_opengl/gl_texture_cache.cpp | 4 +- .../renderer_opengl/gl_texture_cache.h | 7 +- .../renderer_vulkan/vk_texture_cache.cpp | 174 ++++++++++++++++++ .../renderer_vulkan/vk_texture_cache.h | 11 +- src/video_core/texture_cache/texture_cache.h | 4 +- .../texture_cache/texture_cache_base.h | 2 - 6 files changed, 193 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6956535e5e..e70bbec810 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -526,8 +526,8 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, } } -void TextureCacheRuntime::ConvertImage(Image& dst, Image& src, - std::span copies) { +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); format_conversion_pass.ConvertImage(dst, src, copies); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 578f8d5231..ad5157d667 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -84,9 +84,13 @@ public: u64 GetDeviceLocalMemory() const; + bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { + return true; + } + void CopyImage(Image& dst, Image& src, std::span copies); - void ConvertImage(Image& dst, Image& src, std::span copies); + void ReinterpretImage(Image& dst, Image& src, std::span copies); void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled) { UNIMPLEMENTED(); @@ -338,7 +342,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = true; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index af1a110592..02215cfc20 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -308,6 +308,19 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } +[[nodiscard]] VkBufferImageCopy MakeBufferImageCopy(const VideoCommon::ImageCopy& copy, bool is_src, + VkImageAspectFlags aspect_mask) noexcept { + return VkBufferImageCopy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = MakeImageSubresourceLayers( + is_src ? copy.src_subresource : copy.dst_subresource, aspect_mask), + .imageOffset = MakeOffset3D(is_src ? copy.src_offset : copy.dst_offset), + .imageExtent = MakeExtent3D(copy.extent), + }; +} + [[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); @@ -754,6 +767,167 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Download); } +bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { + if (VideoCore::Surface::GetFormatType(dst.info.format) == + VideoCore::Surface::SurfaceType::DepthStencil) { + return !device.IsExtShaderStencilExportSupported(); + } + return false; +} + +[[nodiscard]] size_t NextPow2(size_t value) { + return static_cast(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); +} + +VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { + const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); + if (buffer_commits[level]) { + return *buffers[level]; + } + const auto new_size = NextPow2(needed_size); + VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; + buffers[level] = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = new_size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); + buffer_commits[level] = std::make_unique( + memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); + return *buffers[level]; +} + +void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, + std::span copies) { + std::vector vk_in_copies(copies.size()); + std::vector vk_out_copies(copies.size()); + const VkImageAspectFlags src_aspect_mask = src.AspectMask(); + const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); + + std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, true, src_aspect_mask); + }); + std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { + return MakeBufferImageCopy(copy, false, dst_aspect_mask); + }); + const u32 img_bpp = BytesPerBlock(src.info.format); + size_t total_size = 0; + for (const auto& copy : copies) { + total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; + } + const VkBuffer copy_buffer = GetTemporaryBuffer(total_size); + const VkImage dst_image = dst.Handle(); + const VkImage src_image = src.Handle(); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([dst_image, src_image, copy_buffer, src_aspect_mask, dst_aspect_mask, + vk_in_copies, vk_out_copies](vk::CommandBuffer cmdbuf) { + RangedBarrierRange dst_range; + RangedBarrierRange src_range; + for (const VkBufferImageCopy& copy : vk_in_copies) { + src_range.AddLayers(copy.imageSubresource); + } + for (const VkBufferImageCopy& copy : vk_out_copies) { + dst_range.AddLayers(copy.imageSubresource); + } + static constexpr VkMemoryBarrier READ_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + }; + static constexpr VkMemoryBarrier WRITE_BARRIER{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const std::array pre_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_in_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = src_range.SubresourceRange(src_aspect_mask), + }, + }; + const std::array middle_out_barrier{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + const std::array post_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_barriers); + + cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, + vk_in_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, WRITE_BARRIER, nullptr, middle_in_barrier); + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, READ_BARRIER, {}, middle_out_barrier); + cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, {}, {}, post_barriers); + }); +} + void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f5f8f9a74a..44e9dcee42 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -61,6 +61,10 @@ public: void CopyImage(Image& dst, Image& src, std::span copies); + bool ShouldReinterpret(Image& dst, Image& src); + + void ReinterpretImage(Image& dst, Image& src, std::span copies); + void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view, bool rescaled); bool CanAccelerateImageUpload(Image&) const noexcept { @@ -82,6 +86,8 @@ public: return true; } + [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); + const Device& device; VKScheduler& scheduler; MemoryAllocator& memory_allocator; @@ -90,6 +96,10 @@ public: ASTCDecoderPass& astc_decoder_pass; RenderPassCache& render_pass_cache; const Settings::ResolutionScalingInfo& resolution; + + constexpr static size_t indexing_slots = 8 * sizeof(size_t); + std::array buffers{}; + std::array, indexing_slots> buffer_commits{}; }; class Image : public VideoCommon::ImageBase { @@ -316,7 +326,6 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; - static constexpr bool HAS_PIXEL_FORMAT_CONVERSIONS = false; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4188f93c5a..44a0d42ba7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1762,8 +1762,8 @@ void TextureCache

::CopyImage(ImageId dst_id, ImageId src_id, std::vector Date: Sat, 20 Nov 2021 06:17:01 +0100 Subject: [PATCH 09/10] TextureCache: Assure full conversions on depth/stencil write shaders. --- src/video_core/host_shaders/convert_abgr8_to_d24s8.frag | 4 ++-- src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag | 4 ++-- src/video_core/host_shaders/convert_r16g16_to_d24s8.frag | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag index f7657e50a6..4e4ab6a26b 100644 --- a/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_abgr8_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -13,5 +13,5 @@ void main() { uint depth_unorm = (color.r << 16) | (color.g << 8) | color.b; gl_FragDepth = float(depth_unorm) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(color.a); + gl_FragStencilRefARB = int(color.a); } diff --git a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag index b7358c15c6..2999a84cf2 100644 --- a/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_b10g11r11_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -15,5 +15,5 @@ void main() { | (uint(color.r * (exp2(11) - 1.0f))); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } diff --git a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag index 7b1b914f6e..3df70575ec 100644 --- a/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag +++ b/src/video_core/host_shaders/convert_r16g16_to_d24s8.frag @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #version 450 -// #extension GL_ARB_shader_stencil_export : require +#extension GL_ARB_shader_stencil_export : require layout(binding = 0) uniform sampler2D color_texture; @@ -14,5 +14,5 @@ void main() { | (uint(color.g * (exp2(16) - 1.0f)) << 16); gl_FragDepth = float(depth_stencil_unorm >> 8) / (exp2(24.0) - 1.0f); - // gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); + gl_FragStencilRefARB = int(depth_stencil_unorm & 0x00FF); } From da2fe8190518d3266df7f4a48f9b651eaea84d4b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Nov 2021 14:46:19 +0100 Subject: [PATCH 10/10] TextureCache: Refactor and fix linux compiling. --- src/common/bit_util.h | 7 +++++++ src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 ++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 64520ca4e6..eef8c1c5a7 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "common/common_types.h" @@ -44,4 +45,10 @@ template return static_cast(log2_f + static_cast((value ^ (1ULL << log2_f)) != 0ULL)); } +template +requires std::is_integral_v +[[nodiscard]] T NextPow2(T value) { + return static_cast(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); +} + } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e70bbec810..ecb215a7da 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,7 @@ #include +#include "common/bit_util.h" #include "common/literals.h" #include "common/settings.h" #include "video_core/renderer_opengl/gl_device.h" @@ -397,9 +398,6 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } -[[nodiscard]] u32 NextPow2(u32 value) { - return 1U << (32U - std::countl_zero(value - 1U)); -} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -1308,7 +1306,7 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image, const u32 copy_size = region.width * region.height * region.depth * img_bpp; if (pbo_size < copy_size) { intermediate_pbo.Create(); - pbo_size = NextPow2(copy_size); + pbo_size = Common::NextPow2(copy_size); glNamedBufferData(intermediate_pbo.handle, pbo_size, nullptr, GL_STREAM_COPY); } // Copy from source to PBO diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 02215cfc20..f194110e52 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -8,6 +8,7 @@ #include #include "common/bit_cast.h" +#include "common/bit_util.h" #include "common/settings.h" #include "video_core/engines/fermi_2d.h" @@ -775,16 +776,12 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { return false; } -[[nodiscard]] size_t NextPow2(size_t value) { - return static_cast(1ULL << ((8U * sizeof(size_t)) - std::countl_zero(value - 1U))); -} - VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); if (buffer_commits[level]) { return *buffers[level]; } - const auto new_size = NextPow2(needed_size); + const auto new_size = Common::NextPow2(needed_size); VkBufferUsageFlags flags = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;