From ce452049d3053662b3a0af6ad26eb267a699c742 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 11:33:55 -0400 Subject: [PATCH 01/10] gl_rasterizer_cache: Keep track of surface 2D size separately from total size. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 60 +++++++++++-------- .../renderer_opengl/gl_rasterizer_cache.h | 18 ++++-- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 24a540258..b11206925 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -71,7 +71,8 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { break; } - params.size_in_bytes = params.SizeInBytes(); + params.size_in_bytes_total = params.SizeInBytesTotal(); + params.size_in_bytes_2d = params.SizeInBytes2D(); return params; } @@ -89,7 +90,8 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.depth = 1; - params.size_in_bytes = params.SizeInBytes(); + params.size_in_bytes_total = params.SizeInBytesTotal(); + params.size_in_bytes_2d = params.SizeInBytes2D(); return params; } @@ -108,7 +110,8 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; - params.size_in_bytes = params.SizeInBytes(); + params.size_in_bytes_total = params.SizeInBytesTotal(); + params.size_in_bytes_2d = params.SizeInBytes2D(); return params; } @@ -585,10 +588,13 @@ void CachedSurface::LoadGLBuffer() { const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); const u32 copy_size = params.width * params.height * bytes_per_pixel; + const std::size_t total_size = copy_size * params.depth; MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); if (params.is_tiled) { + gl_buffer.resize(total_size); + // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do // this for 3D textures, etc. switch (params.target) { @@ -601,13 +607,11 @@ void CachedSurface::LoadGLBuffer() { UNREACHABLE(); } - gl_buffer.resize(static_cast(params.depth) * copy_size); morton_to_gl_fns[static_cast(params.pixel_format)]( params.width, params.block_height, params.height, gl_buffer.data(), copy_size, params.addr); } else { - const u8* const texture_src_data_end{texture_src_data + - (static_cast(params.depth) * copy_size)}; + const u8* const texture_src_data_end{texture_src_data + total_size}; gl_buffer.assign(texture_src_data, texture_src_data_end); } @@ -663,15 +667,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle glCompressedTexImage2D( SurfaceTargetToGL(params.target), 0, tuple.internal_format, static_cast(params.width), static_cast(params.height), 0, - static_cast(params.size_in_bytes), &gl_buffer[buffer_offset]); + static_cast(params.size_in_bytes_2d), &gl_buffer[buffer_offset]); break; case SurfaceParams::SurfaceTarget::Texture3D: case SurfaceParams::SurfaceTarget::Texture2DArray: glCompressedTexImage3D( SurfaceTargetToGL(params.target), 0, tuple.internal_format, static_cast(params.width), static_cast(params.height), - static_cast(params.depth), 0, static_cast(params.size_in_bytes), - &gl_buffer[buffer_offset]); + static_cast(params.depth), 0, + static_cast(params.size_in_bytes_total), &gl_buffer[buffer_offset]); break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", @@ -679,8 +683,8 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle UNREACHABLE(); glCompressedTexImage2D( GL_TEXTURE_2D, 0, tuple.internal_format, static_cast(params.width), - static_cast(params.height), 0, static_cast(params.size_in_bytes), - &gl_buffer[buffer_offset]); + static_cast(params.height), 0, + static_cast(params.size_in_bytes_2d), &gl_buffer[buffer_offset]); } } else { @@ -811,15 +815,15 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { return surface; } -Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, +Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params) { // Verify surface is compatible for blitting - const auto& params{surface->GetSurfaceParams()}; + const auto& old_params{old_surface->GetSurfaceParams()}; // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; - if (params.pixel_format == new_params.pixel_format || + if (old_params.pixel_format == new_params.pixel_format || !Settings::values.use_accurate_framebuffers) { // If the format is the same, just do a framebuffer blit. This is significantly faster than // using PBOs. The is also likely less accurate, as textures will be converted rather than @@ -833,24 +837,26 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // where pixels are reinterpreted as a new format (without conversion). This code path uses // OpenGL PBOs and is quite slow. - auto source_format = GetFormatTuple(params.pixel_format, params.component_type); + auto source_format = GetFormatTuple(old_params.pixel_format, old_params.component_type); auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); - std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); + std::size_t buffer_size = + std::max(old_params.size_in_bytes_total, new_params.size_in_bytes_total); glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); if (source_format.compressed) { - glGetCompressedTextureImage(surface->Texture().handle, 0, - static_cast(params.SizeInBytes()), nullptr); + glGetCompressedTextureImage(old_surface->Texture().handle, 0, + static_cast(old_params.size_in_bytes_total), + nullptr); } else { - glGetTextureImage(surface->Texture().handle, 0, source_format.format, - source_format.type, static_cast(params.SizeInBytes()), - nullptr); + glGetTextureImage(old_surface->Texture().handle, 0, source_format.format, + source_format.type, + static_cast(old_params.size_in_bytes_total), nullptr); } // If the new texture is bigger than the previous one, we need to fill in the rest with data // from the CPU. - if (params.SizeInBytes() < new_params.SizeInBytes()) { + if (old_params.size_in_bytes_total < new_params.size_in_bytes_total) { // Upload the rest of the memory. if (new_params.is_tiled) { // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest @@ -860,10 +866,12 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " "reinterpretation but the texture is tiled."); } - std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); + std::size_t remaining_size = + new_params.size_in_bytes_total - old_params.size_in_bytes_total; std::vector data(remaining_size); - Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); - glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, + Memory::ReadBlock(new_params.addr + old_params.size_in_bytes_total, data.data(), + data.size()); + glBufferSubData(GL_PIXEL_PACK_BUFFER, old_params.size_in_bytes_total, remaining_size, data.data()); } @@ -898,7 +906,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(params.target)); + static_cast(new_params.target)); UNREACHABLE(); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 80c5f324b..9df909d01 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -689,13 +689,18 @@ struct SurfaceParams { /// Returns the rectangle corresponding to this surface MathUtil::Rectangle GetRect() const; - /// Returns the size of this surface in bytes, adjusted for compression - std::size_t SizeInBytes() const { + /// Returns the size of this surface as a 2D texture in bytes, adjusted for compression + std::size_t SizeInBytes2D() const { const u32 compression_factor{GetCompressionFactor(pixel_format)}; ASSERT(width % compression_factor == 0); ASSERT(height % compression_factor == 0); return (width / compression_factor) * (height / compression_factor) * - GetFormatBpp(pixel_format) * depth / CHAR_BIT; + GetFormatBpp(pixel_format) / CHAR_BIT; + } + + /// Returns the total size of this surface in bytes, adjusted for compression + std::size_t SizeInBytesTotal() const { + return SizeInBytes2D() * depth; } /// Creates SurfaceParams from a texture configuration @@ -725,7 +730,8 @@ struct SurfaceParams { u32 height; u32 depth; u32 unaligned_height; - std::size_t size_in_bytes; + std::size_t size_in_bytes_total; + std::size_t size_in_bytes_2d; SurfaceTarget target; }; @@ -759,7 +765,7 @@ public: } std::size_t GetSizeInBytes() const { - return params.size_in_bytes; + return params.size_in_bytes_total; } const OGLTexture& Texture() const { @@ -822,7 +828,7 @@ private: Surface GetUncachedSurface(const SurfaceParams& params); /// Recreates a surface with new parameters - Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); + Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); /// Reserves a unique surface that can be reused later void ReserveSurface(const Surface& surface); From fefb003b23ab4a7be28e7bb0e8a8fa9802b3cb1a Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 11:42:28 -0400 Subject: [PATCH 02/10] gl_rasterizer_cache: Workaround for Texture2D -> Texture2DArray scenario. --- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_rasterizer_cache.cpp | 18 +++++++++++++++--- .../renderer_opengl/gl_rasterizer_cache.h | 7 +++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1fcd13f04..14d82a7bc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -738,7 +738,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, } texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); - Surface surface = res_cache.GetTextureSurface(texture); + Surface surface = res_cache.GetTextureSurface(texture, entry); if (surface != nullptr) { state.texture_units[current_bindpoint].texture = surface->Texture().handle; state.texture_units[current_bindpoint].target = surface->Target(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index b11206925..00351d743 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -41,7 +41,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { } /*static*/ SurfaceParams SurfaceParams::CreateForTexture( - const Tegra::Texture::FullTextureInfo& config) { + const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { SurfaceParams params{}; params.addr = TryGetCpuAddr(config.tic.Address()); params.is_tiled = config.tic.IsTiled(); @@ -61,8 +61,19 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.depth = 1; break; case SurfaceTarget::Texture3D: + params.depth = config.tic.Depth(); + break; case SurfaceTarget::Texture2DArray: params.depth = config.tic.Depth(); + if (!entry.IsArray()) { + // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of + // one, but sample the texture in the shader as if it were not an array texture. This + // probably is valid on hardware, but we still need to write a test to confirm this. In + // emulation, the workaround here is to continue to treat this as a Texture2D. An + // example game that does this is Super Mario Odyssey (in Cloud Kingdom). + ASSERT(params.depth == 1); + params.target = SurfaceTarget::Texture2D; + } break; default: LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast(params.target)); @@ -726,8 +737,9 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { copy_pbo.Create(); } -Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return GetSurface(SurfaceParams::CreateForTexture(config)); +Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const GLShader::SamplerEntry& entry) { + return GetSurface(SurfaceParams::CreateForTexture(config, entry)); } Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9df909d01..6474d9129 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -15,6 +15,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/textures/texture.h" namespace OpenGL { @@ -704,7 +705,8 @@ struct SurfaceParams { } /// Creates SurfaceParams from a texture configuration - static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, + const GLShader::SamplerEntry& entry); /// Creates SurfaceParams from a framebuffer configuration static SurfaceParams CreateForFramebuffer(std::size_t index); @@ -806,7 +808,8 @@ public: RasterizerCacheOpenGL(); /// Get a surface based on the texture configuration - Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); + Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const GLShader::SamplerEntry& entry); /// Get the depth surface based on the framebuffer configuration Surface GetDepthBufferSurface(bool preserve_contents); From 2e1cdde994ee5384863ce596f2e613af8078c682 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 11:43:25 -0400 Subject: [PATCH 03/10] gl_rasterizer_cache: Track texture target and depth in the cache. --- src/video_core/renderer_opengl/gl_rasterizer_cache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 6474d9129..2aed83bbc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -718,8 +718,9 @@ struct SurfaceParams { /// Checks if surfaces are compatible for caching bool IsCompatibleSurface(const SurfaceParams& other) const { - return std::tie(pixel_format, type, width, height) == - std::tie(other.pixel_format, other.type, other.width, other.height); + return std::tie(pixel_format, type, width, height, target, depth) == + std::tie(other.pixel_format, other.type, other.width, other.height, other.target, + other.depth); } VAddr addr; From a9aa1db5522798c06d3c5708f649758b045e9cde Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 12:07:22 -0400 Subject: [PATCH 04/10] gl_rasterizer_cache: Update BlitTextures to support non-Texture2D ColorTexture surfaces. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 111 ++++++++++++++---- 1 file changed, 88 insertions(+), 23 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 00351d743..57af6cdd1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -414,9 +414,12 @@ static constexpr std::array& src_rect, GLuint dst_tex, - const MathUtil::Rectangle& dst_rect, SurfaceType type, - GLuint read_fb_handle, GLuint draw_fb_handle) { +static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, + GLuint read_fb_handle, GLuint draw_fb_handle, std::size_t face = 0) { + + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + OpenGLState prev_state{OpenGLState::GetCurState()}; SCOPE_EXIT({ prev_state.Apply(); }); @@ -427,42 +430,106 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle& src_rec u32 buffers{}; - if (type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, - 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); + if (src_params.type == SurfaceType::ColorTexture) { + switch (src_params.target) { + case SurfaceParams::SurfaceTarget::Texture2D: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), + src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, + 0); + break; + case SurfaceParams::SurfaceTarget::Texture2DArray: + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + src_surface->Texture().handle, 0, 0); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); + break; + case SurfaceParams::SurfaceTarget::Texture3D: + glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + SurfaceTargetToGL(src_params.target), + src_surface->Texture().handle, 0, 0); + glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + SurfaceTargetToGL(src_params.target), 0, 0, 0); + break; + default: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + } - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); + switch (dst_params.target) { + case SurfaceParams::SurfaceTarget::Texture2D: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), + dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, + 0); + break; + case SurfaceParams::SurfaceTarget::Texture2DArray: + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + dst_surface->Texture().handle, 0, 0); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); + break; + + case SurfaceParams::SurfaceTarget::Texture3D: + glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + SurfaceTargetToGL(dst_params.target), + dst_surface->Texture().handle, 0, 0); + glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + SurfaceTargetToGL(dst_params.target), 0, 0, 0); + break; + default: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + break; + } buffers = GL_COLOR_BUFFER_BIT; - } else if (type == SurfaceType::Depth) { + } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + dst_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; - } else if (type == SurfaceType::DepthStencil) { + } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_tex, 0); + src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_tex, 0); + dst_surface->Texture().handle, 0); buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } - glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, - dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, + const auto& rect{src_params.GetRect()}; + glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom, + rect.right, rect.top, buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); return true; @@ -841,9 +908,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, // using PBOs. The is also likely less accurate, as textures will be converted rather than // reinterpreted. - BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, - params.GetRect(), params.type, read_framebuffer.handle, - draw_framebuffer.handle); + BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); } else { // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy, // where pixels are reinterpreted as a new format (without conversion). This code path uses From 871580dcd8433f2063693c69fe09b4abe6344ff0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 12:11:37 -0400 Subject: [PATCH 05/10] gl_rasterizer_cache: Implement LoadGLBuffer for Texture2DArray. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 57af6cdd1..022561d43 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -679,6 +679,14 @@ void CachedSurface::LoadGLBuffer() { case SurfaceParams::SurfaceTarget::Texture2D: // Pass impl. to the fallback code below break; + case SurfaceParams::SurfaceTarget::Texture2DArray: + for (std::size_t index = 0; index < params.depth; ++index) { + const std::size_t offset{index * copy_size}; + morton_to_gl_fns[static_cast(params.pixel_format)]( + params.width, params.block_height, params.height, gl_buffer.data() + offset, + copy_size, params.addr + offset); + } + break; default: LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}", static_cast(params.target)); From ed2e0e85c962c611af0aa6c929a4e56807b6563f Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 12:17:38 -0400 Subject: [PATCH 06/10] gl_rasterizer_cache: Add support for SurfaceTarget::TextureCubemap. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 35 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer_cache.h | 2 ++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 022561d43..bd4330327 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -60,6 +60,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { case SurfaceTarget::Texture2D: params.depth = 1; break; + case SurfaceTarget::TextureCubemap: + params.depth = config.tic.Depth() * 6; + break; case SurfaceTarget::Texture3D: params.depth = config.tic.Depth(); break; @@ -562,6 +565,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) rect.GetWidth()); break; case SurfaceParams::SurfaceTarget::Texture2D: + case SurfaceParams::SurfaceTarget::TextureCubemap: glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format, rect.GetWidth(), rect.GetHeight()); break; @@ -680,6 +684,7 @@ void CachedSurface::LoadGLBuffer() { // Pass impl. to the fallback code below break; case SurfaceParams::SurfaceTarget::Texture2DArray: + case SurfaceParams::SurfaceTarget::TextureCubemap: for (std::size_t index = 0; index < params.depth; ++index) { const std::size_t offset{index * copy_size}; morton_to_gl_fns[static_cast(params.pixel_format)]( @@ -724,7 +729,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle // Load data from memory to the surface const GLint x0 = static_cast(rect.left); const GLint y0 = static_cast(rect.bottom); - const std::size_t buffer_offset = + std::size_t buffer_offset = static_cast(static_cast(y0) * params.width + static_cast(x0)) * GetGLBytesPerPixel(params.pixel_format); @@ -763,6 +768,16 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle static_cast(params.depth), 0, static_cast(params.size_in_bytes_total), &gl_buffer[buffer_offset]); break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + for (std::size_t face = 0; face < params.depth; ++face) { + glCompressedTexImage2D(static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), + 0, tuple.internal_format, static_cast(params.width), + static_cast(params.height), 0, + static_cast(params.size_in_bytes_2d), + &gl_buffer[buffer_offset]); + buffer_offset += params.size_in_bytes_2d; + } + break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast(params.target)); @@ -793,6 +808,15 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle static_cast(rect.GetHeight()), params.depth, tuple.format, tuple.type, &gl_buffer[buffer_offset]); break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + for (std::size_t face = 0; face < params.depth; ++face) { + glTexSubImage2D(static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0, + y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); + buffer_offset += params.size_in_bytes_2d; + } + break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast(params.target)); @@ -989,6 +1013,15 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, static_cast(new_params.depth), dest_format.format, dest_format.type, nullptr); break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + for (std::size_t face = 0; face < new_params.depth; ++face) { + glTextureSubImage3D( + new_surface->Texture().handle, 0, 0, 0, static_cast(face), + static_cast(dest_rect.GetWidth()), + static_cast(dest_rect.GetHeight()), static_cast(1), + dest_format.format, dest_format.type, nullptr); + } + break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast(new_params.target)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 2aed83bbc..e2fd0009e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -127,6 +127,8 @@ struct SurfaceParams { case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: return SurfaceTarget::Texture2D; + case Tegra::Texture::TextureType::TextureCubemap: + return SurfaceTarget::TextureCubemap; case Tegra::Texture::TextureType::Texture1DArray: return SurfaceTarget::Texture1DArray; case Tegra::Texture::TextureType::Texture2DArray: From 15cc729ebdd9f5053a16e2d9e1e7bab6d62aff8a Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 14 Sep 2018 12:18:06 -0400 Subject: [PATCH 07/10] gl_shader_decompiler: TEXS: Implement TextureType::TextureCube. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index b3e95187e..320babdb1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2000,6 +2000,14 @@ private: } break; } + case Tegra::Shader::TextureType::TextureCube: { + ASSERT_MSG(!is_array, "Unimplemented"); + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } default: LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", static_cast(texture_type)); From f543b43fd054ae9ec1e4d693a9bd1540e408ddac Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 20 Sep 2018 23:41:25 -0400 Subject: [PATCH 08/10] gl_rasterizer_cache: Implement render to cubemap. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 301 +++++++++++------- .../renderer_opengl/gl_rasterizer_cache.h | 34 ++ src/video_core/textures/texture.h | 2 + 3 files changed, 218 insertions(+), 119 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd4330327..8abbe0113 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -87,6 +87,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.size_in_bytes_total = params.SizeInBytesTotal(); params.size_in_bytes_2d = params.SizeInBytes2D(); + params.max_mip_level = config.tic.max_mip_level + 1; + params.rt = {}; + return params; } @@ -106,6 +109,14 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.depth = 1; params.size_in_bytes_total = params.SizeInBytesTotal(); params.size_in_bytes_2d = params.SizeInBytes2D(); + params.max_mip_level = 0; + + // Render target specific parameters, not used for caching + params.rt.index = static_cast(index); + params.rt.array_mode = config.array_mode; + params.rt.layer_stride = config.layer_stride; + params.rt.base_layer = config.base_layer; + return params; } @@ -126,6 +137,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { params.depth = 1; params.size_in_bytes_total = params.SizeInBytesTotal(); params.size_in_bytes_2d = params.SizeInBytes2D(); + params.max_mip_level = 0; + params.rt = {}; + return params; } @@ -418,7 +432,8 @@ static constexpr std::arrayGetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; @@ -436,34 +451,35 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, if (src_params.type == SurfaceType::ColorTexture) { switch (src_params.target) { case SurfaceParams::SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; case SurfaceParams::SurfaceTarget::TextureCubemap: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), - src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, - 0); + glFramebufferTexture2D( + GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), + src_surface->Texture().handle, 0); + glFramebufferTexture2D( + GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); break; case SurfaceParams::SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, src_surface->Texture().handle, 0, 0); glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); break; case SurfaceParams::SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, SurfaceTargetToGL(src_params.target), src_surface->Texture().handle, 0, 0); glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, SurfaceTargetToGL(src_params.target), 0, 0, 0); break; default: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; @@ -471,35 +487,36 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, switch (dst_params.target) { case SurfaceParams::SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, dst_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; case SurfaceParams::SurfaceTarget::TextureCubemap: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), - dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, - 0); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), + dst_surface->Texture().handle, 0); + glFramebufferTexture2D( + GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); break; case SurfaceParams::SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, dst_surface->Texture().handle, 0, 0); glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); break; case SurfaceParams::SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, SurfaceTargetToGL(dst_params.target), dst_surface->Texture().handle, 0, 0); glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, SurfaceTargetToGL(dst_params.target), 0, 0, 0); break; default: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, dst_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); break; @@ -507,23 +524,27 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, buffers = GL_COLOR_BUFFER_BIT; } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_surface->Texture().handle, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->Texture().handle, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, + GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, + GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->Texture().handle, 0); @@ -538,6 +559,92 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, return true; } +static void CopySurface(const Surface& src_surface, const Surface& dst_surface, + GLuint copy_pbo_handle, GLenum src_attachment = 0, + GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { + ASSERT_MSG(dst_attachment == 0, "Unimplemented"); + + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + + std::size_t buffer_size = + std::max(src_params.size_in_bytes_total, dst_params.size_in_bytes_total); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); + glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); + if (source_format.compressed) { + glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, + static_cast(src_params.size_in_bytes_total), nullptr); + } else { + glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, + source_format.type, static_cast(src_params.size_in_bytes_total), + nullptr); + } + // If the new texture is bigger than the previous one, we need to fill in the rest with data + // from the CPU. + if (src_params.size_in_bytes_total < dst_params.size_in_bytes_total) { + // Upload the rest of the memory. + if (dst_params.is_tiled) { + // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest + // of the data in this case. Games like Super Mario Odyssey seem to hit this case + // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer + // but it doesn't clear it beforehand, the texture is already full of zeros. + LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " + "reinterpretation but the texture is tiled."); + } + std::size_t remaining_size = + dst_params.size_in_bytes_total - src_params.size_in_bytes_total; + std::vector data(remaining_size); + Memory::ReadBlock(dst_params.addr + src_params.size_in_bytes_total, data.data(), + data.size()); + glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes_total, remaining_size, + data.data()); + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + const GLsizei width{static_cast( + std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; + const GLsizei height{static_cast( + std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + if (dest_format.compressed) { + LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); + UNREACHABLE(); + } else { + switch (dst_params.target) { + case SurfaceParams::SurfaceTarget::Texture1D: + glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, + dest_format.type, nullptr); + break; + case SurfaceParams::SurfaceTarget::Texture2D: + glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceParams::SurfaceTarget::Texture3D: + case SurfaceParams::SurfaceTarget::Texture2DArray: + glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, + static_cast(dst_params.depth), dest_format.format, + dest_format.type, nullptr); + break; + case SurfaceParams::SurfaceTarget::TextureCubemap: + glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, + static_cast(cubemap_face), width, height, 1, + dest_format.format, dest_format.type, nullptr); + break; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", + static_cast(dst_params.target)); + UNREACHABLE(); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } +} + CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_target(SurfaceTargetToGL(params.target)) { texture.Create(); @@ -929,106 +1036,62 @@ Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params) { // Verify surface is compatible for blitting - const auto& old_params{old_surface->GetSurfaceParams()}; + auto old_params{old_surface->GetSurfaceParams()}; // Get a new surface with the new parameters, and blit the previous surface to it Surface new_surface{GetUncachedSurface(new_params)}; - if (old_params.pixel_format == new_params.pixel_format || - !Settings::values.use_accurate_framebuffers) { - // If the format is the same, just do a framebuffer blit. This is significantly faster than - // using PBOs. The is also likely less accurate, as textures will be converted rather than - // reinterpreted. + // If the format is the same, just do a framebuffer blit. This is significantly faster than + // using PBOs. The is also likely less accurate, as textures will be converted rather than + // reinterpreted. When use_accurate_framebuffers setting is enabled, perform a more accurate + // surface copy, where pixels are reinterpreted as a new format (without conversion). This + // code path uses OpenGL PBOs and is quite slow. + const bool is_blit{old_params.pixel_format == new_params.pixel_format || + !Settings::values.use_accurate_framebuffers}; - BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); - } else { - // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy, - // where pixels are reinterpreted as a new format (without conversion). This code path uses - // OpenGL PBOs and is quite slow. - - auto source_format = GetFormatTuple(old_params.pixel_format, old_params.component_type); - auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); - - std::size_t buffer_size = - std::max(old_params.size_in_bytes_total, new_params.size_in_bytes_total); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); - glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); - if (source_format.compressed) { - glGetCompressedTextureImage(old_surface->Texture().handle, 0, - static_cast(old_params.size_in_bytes_total), - nullptr); + switch (new_params.target) { + case SurfaceParams::SurfaceTarget::Texture2D: + if (is_blit) { + BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); } else { - glGetTextureImage(old_surface->Texture().handle, 0, source_format.format, - source_format.type, - static_cast(old_params.size_in_bytes_total), nullptr); + CopySurface(old_surface, new_surface, copy_pbo.handle); } - // If the new texture is bigger than the previous one, we need to fill in the rest with data - // from the CPU. - if (old_params.size_in_bytes_total < new_params.size_in_bytes_total) { - // Upload the rest of the memory. - if (new_params.is_tiled) { - // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest - // of the data in this case. Games like Super Mario Odyssey seem to hit this case - // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer - // but it doesn't clear it beforehand, the texture is already full of zeros. - LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " - "reinterpretation but the texture is tiled."); + break; + case SurfaceParams::SurfaceTarget::TextureCubemap: { + const u32 byte_stride{old_params.rt.layer_stride * + (SurfaceParams::GetFormatBpp(old_params.pixel_format) / CHAR_BIT)}; + + // This seems to be used for render-to-cubemap texture + const std::size_t size_with_mipmaps{new_params.SizeInBytes2DWithMipmap()}; + ASSERT_MSG(size_with_mipmaps == byte_stride, "Unexpected"); + ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); + ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); + ASSERT_MSG(old_params.width == new_params.width, "Unexpected"); + ASSERT_MSG(old_params.height == new_params.height, "Unexpected"); + ASSERT_MSG(old_params.rt.array_mode == 1, "Unexpected"); + ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); + + for (std::size_t index = 0; index < new_params.depth; ++index) { + Surface face_surface{TryGetReservedSurface(old_params)}; + ASSERT_MSG(face_surface, "Unexpected"); + + if (is_blit) { + BlitSurface(face_surface, new_surface, read_framebuffer.handle, + draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index, + new_params.rt.index, index); + } else { + CopySurface(face_surface, new_surface, copy_pbo.handle, + face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index); } - std::size_t remaining_size = - new_params.size_in_bytes_total - old_params.size_in_bytes_total; - std::vector data(remaining_size); - Memory::ReadBlock(new_params.addr + old_params.size_in_bytes_total, data.data(), - data.size()); - glBufferSubData(GL_PIXEL_PACK_BUFFER, old_params.size_in_bytes_total, remaining_size, - data.data()); - } - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - const auto& dest_rect{new_params.GetRect()}; - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle); - if (dest_format.compressed) { - LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); - UNREACHABLE(); - } else { - switch (new_params.target) { - case SurfaceParams::SurfaceTarget::Texture1D: - glTextureSubImage1D(new_surface->Texture().handle, 0, 0, - static_cast(dest_rect.GetWidth()), dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceParams::SurfaceTarget::Texture2D: - glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, - static_cast(dest_rect.GetWidth()), - static_cast(dest_rect.GetHeight()), dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceParams::SurfaceTarget::Texture3D: - case SurfaceParams::SurfaceTarget::Texture2DArray: - glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0, - static_cast(dest_rect.GetWidth()), - static_cast(dest_rect.GetHeight()), - static_cast(new_params.depth), dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceParams::SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < new_params.depth; ++face) { - glTextureSubImage3D( - new_surface->Texture().handle, 0, 0, 0, static_cast(face), - static_cast(dest_rect.GetWidth()), - static_cast(dest_rect.GetHeight()), static_cast(1), - dest_format.format, dest_format.type, nullptr); - } - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(new_params.target)); - UNREACHABLE(); - } + old_params.addr += size_with_mipmaps; } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + break; + } + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", + static_cast(new_params.target)); + UNREACHABLE(); } return new_surface; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e2fd0009e..51eb9b6dd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -9,6 +9,7 @@ #include #include +#include "common/alignment.h" #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -706,6 +707,29 @@ struct SurfaceParams { return SizeInBytes2D() * depth; } + /** + * Returns the size in bytes of the 2D surface with mipmaps. Each mipmap level proceeds the + * previous with half the width and half the height. Once the size of the next mip reaches 0, we + * are done. + */ + std::size_t SizeInBytes2DWithMipmap() const { + std::size_t size_in_bytes{}; + auto mip_params{*this}; + for (std::size_t level = 0; level < max_mip_level; level++) { + size_in_bytes += mip_params.SizeInBytes2D(); + + mip_params.width /= 2; + mip_params.height /= 2; + + if (!mip_params.width || !mip_params.height) { + break; + } + } + + // TODO(bunnei): This alignup is unverified, but necessary in games tested (e.g. in SMO) + return Common::AlignUp(size_in_bytes, 0x1000); + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); @@ -738,6 +762,15 @@ struct SurfaceParams { std::size_t size_in_bytes_total; std::size_t size_in_bytes_2d; SurfaceTarget target; + u32 max_mip_level; + + // Render target specific parameters, not used in caching + struct { + u32 index; + u32 array_mode; + u32 layer_stride; + u32 base_layer; + } rt; }; }; // namespace OpenGL @@ -747,6 +780,7 @@ struct SurfaceReserveKey : Common::HashableStruct { static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { SurfaceReserveKey res; res.state = params; + res.state.rt = {}; // Ignore rt config in caching return res; } }; diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c2fb824b2..14aea4838 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -165,6 +165,8 @@ struct TICEntry { // High 16 bits of the pitch value BitField<0, 16, u32> pitch_high; + + BitField<28, 4, u32> max_mip_level; }; union { BitField<0, 16, u32> width_minus_1; From 29782273ecf2d679a9e5043fdc15152e1294625f Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 29 Sep 2018 11:53:18 -0400 Subject: [PATCH 09/10] gl_rasterizer_cache: Add check for array rendering to cubemap texture. --- src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 8abbe0113..249b0061a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1061,6 +1061,14 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, const u32 byte_stride{old_params.rt.layer_stride * (SurfaceParams::GetFormatBpp(old_params.pixel_format) / CHAR_BIT)}; + if (old_params.rt.array_mode != 1) { + // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this + // yet (array rendering used as a cubemap texture). + LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode); + UNREACHABLE(); + return new_surface; + } + // This seems to be used for render-to-cubemap texture const std::size_t size_with_mipmaps{new_params.SizeInBytes2DWithMipmap()}; ASSERT_MSG(size_with_mipmaps == byte_stride, "Unexpected"); From df3799a00899a76a0b4adc9f93af403101b2332d Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 30 Sep 2018 14:28:36 -0400 Subject: [PATCH 10/10] gl_rasterizer_cache: Fixes to how we do render to cubemap. - Fixes issues with Splatoon 2. --- .../renderer_opengl/gl_rasterizer_cache.cpp | 14 ++++------- .../renderer_opengl/gl_rasterizer_cache.h | 23 ------------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 249b0061a..ce967c4d6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1058,9 +1058,6 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, } break; case SurfaceParams::SurfaceTarget::TextureCubemap: { - const u32 byte_stride{old_params.rt.layer_stride * - (SurfaceParams::GetFormatBpp(old_params.pixel_format) / CHAR_BIT)}; - if (old_params.rt.array_mode != 1) { // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this // yet (array rendering used as a cubemap texture). @@ -1070,15 +1067,14 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, } // This seems to be used for render-to-cubemap texture - const std::size_t size_with_mipmaps{new_params.SizeInBytes2DWithMipmap()}; - ASSERT_MSG(size_with_mipmaps == byte_stride, "Unexpected"); ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected"); ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected"); - ASSERT_MSG(old_params.width == new_params.width, "Unexpected"); - ASSERT_MSG(old_params.height == new_params.height, "Unexpected"); - ASSERT_MSG(old_params.rt.array_mode == 1, "Unexpected"); ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented"); + // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels. + // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild. + const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)}; + for (std::size_t index = 0; index < new_params.depth; ++index) { Surface face_surface{TryGetReservedSurface(old_params)}; ASSERT_MSG(face_surface, "Unexpected"); @@ -1092,7 +1088,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index); } - old_params.addr += size_with_mipmaps; + old_params.addr += byte_stride; } break; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 51eb9b6dd..49025a3fe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -707,29 +707,6 @@ struct SurfaceParams { return SizeInBytes2D() * depth; } - /** - * Returns the size in bytes of the 2D surface with mipmaps. Each mipmap level proceeds the - * previous with half the width and half the height. Once the size of the next mip reaches 0, we - * are done. - */ - std::size_t SizeInBytes2DWithMipmap() const { - std::size_t size_in_bytes{}; - auto mip_params{*this}; - for (std::size_t level = 0; level < max_mip_level; level++) { - size_in_bytes += mip_params.SizeInBytes2D(); - - mip_params.width /= 2; - mip_params.height /= 2; - - if (!mip_params.width || !mip_params.height) { - break; - } - } - - // TODO(bunnei): This alignup is unverified, but necessary in games tested (e.g. in SMO) - return Common::AlignUp(size_in_bytes, 0x1000); - } - /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry);