From 1a1af3fda354f3e81cace3f3f64138bcea1995c6 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Fri, 13 Apr 2018 14:18:37 -0400
Subject: [PATCH 1/2] gl_rasterizer: Implement indexed vertex mode.

---
 src/video_core/engines/maxwell_3d.cpp         |  4 +-
 src/video_core/engines/maxwell_3d.h           | 44 +++++++++++++++-
 .../renderer_opengl/gl_rasterizer.cpp         | 50 ++++++++++++-------
 .../renderer_opengl/gl_rasterizer.h           |  1 -
 .../renderer_opengl/maxwell_to_gl.h           | 16 +++++-
 5 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 98ed11ec5..0e1d6d785 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -165,6 +165,7 @@ void Maxwell3D::ProcessQueryGet() {
 void Maxwell3D::DrawArrays() {
     LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
               regs.vertex_buffer.count);
+    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
 
     auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
 
@@ -176,7 +177,8 @@ void Maxwell3D::DrawArrays() {
         debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
     }
 
-    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
+    const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
+    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
 }
 
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1fae41cb2..2b45ffed7 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -248,6 +248,12 @@ public:
             Patches = 0xe,
         };
 
+        enum class IndexFormat : u32 {
+            UnsignedByte = 0x0,
+            UnsignedShort = 0x1,
+            UnsignedInt = 0x2,
+        };
+
         union {
             struct {
                 INSERT_PADDING_WORDS(0x200);
@@ -375,7 +381,42 @@ public:
                     };
                 } draw;
 
-                INSERT_PADDING_WORDS(0x139);
+                INSERT_PADDING_WORDS(0x6B);
+
+                struct {
+                    u32 start_addr_high;
+                    u32 start_addr_low;
+                    u32 end_addr_high;
+                    u32 end_addr_low;
+                    IndexFormat format;
+                    u32 first;
+                    u32 count;
+
+                    unsigned FormatSizeInBytes() const {
+                        switch (format) {
+                        case IndexFormat::UnsignedByte:
+                            return 1;
+                        case IndexFormat::UnsignedShort:
+                            return 2;
+                        case IndexFormat::UnsignedInt:
+                            return 4;
+                        }
+                        UNREACHABLE();
+                    }
+
+                    GPUVAddr StartAddress() const {
+                        return static_cast<GPUVAddr>(
+                            (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
+                    }
+
+                    GPUVAddr EndAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
+                                                     end_addr_low);
+                    }
+                } index_array;
+
+                INSERT_PADDING_WORDS(0xC7);
+
                 struct {
                     u32 query_address_high;
                     u32 query_address_low;
@@ -572,6 +613,7 @@ ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 28abc563a..309ad9af8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -97,7 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL() {
     state.draw.vertex_buffer = stream_buffer->GetHandle();
 
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
-
     state.draw.shader_program = 0;
     state.draw.vertex_array = hw_vao.handle;
     state.Apply();
@@ -128,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
     }
 }
 
-void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
-    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-
-    if (is_indexed) {
-        UNREACHABLE();
-    }
-
-    // TODO(bunnei): Add support for 1+ vertex arrays
-    vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
-}
-
 void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     MICROPROFILE_SCOPE(OpenGL_VAO);
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
@@ -150,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
 
     // TODO(bunnei): Add support for 1+ vertex arrays
     const auto& vertex_array{regs.vertex_array[0]};
+    const auto& vertex_array_limit{regs.vertex_array_limit[0]};
     ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
     ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
     for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
@@ -162,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     // to avoid OpenGL errors.
     for (unsigned index = 0; index < 16; ++index) {
         auto& attrib = regs.vertex_attrib_format[index];
+        LOG_DEBUG(HW_GPU, "vertex attrib %d, count=%d, size=%s, type=%s, offset=%d, normalize=%d",
+                  index, attrib.ComponentCount(), attrib.SizeString().c_str(),
+                  attrib.TypeString().c_str(), attrib.offset.Value(), attrib.IsNormalized());
+
         glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
                               attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
                               reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
@@ -170,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     }
 
     // Copy vertex array data
-    const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
+    const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
     const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
     res_cache.FlushRegion(data_addr, data_size, nullptr);
     Memory::ReadBlock(data_addr, array_ptr, data_size);
@@ -333,13 +326,18 @@ void RasterizerOpenGL::DrawArrays() {
 
     // Draw the vertex batch
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-    AnalyzeVertexArray(is_indexed);
+    const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+    const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
+
+    // TODO(bunnei): Add support for 1+ vertex arrays
+    vs_input_size = vertex_num * regs.vertex_array[0].stride;
+
     state.draw.vertex_buffer = stream_buffer->GetHandle();
     state.Apply();
 
     size_t buffer_size = static_cast<size_t>(vs_input_size);
     if (is_indexed) {
-        UNREACHABLE();
+        buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
     }
 
     // Uniform space for the 5 shader stages
@@ -354,9 +352,18 @@ void RasterizerOpenGL::DrawArrays() {
     SetupVertexArray(buffer_ptr, buffer_offset);
     ptr_pos += vs_input_size;
 
+    // If indexed mode, copy the index buffer
     GLintptr index_buffer_offset = 0;
     if (is_indexed) {
-        UNREACHABLE();
+        ptr_pos = Common::AlignUp(ptr_pos, 4);
+
+        const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+        const VAddr index_data_addr{
+            memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
+        Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
+
+        index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+        ptr_pos += index_buffer_size;
     }
 
     SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
@@ -366,11 +373,16 @@ void RasterizerOpenGL::DrawArrays() {
     shader_program_manager->ApplyTo(state);
     state.Apply();
 
+    const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
     if (is_indexed) {
-        UNREACHABLE();
+        const GLint index_min{static_cast<GLint>(regs.index_array.first)};
+        const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
+        glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
+                                      MaxwellToGL::IndexFormat(regs.index_array.format),
+                                      reinterpret_cast<const void*>(index_buffer_offset),
+                                      -index_min);
     } else {
-        glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
-                     regs.vertex_buffer.count);
+        glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
     }
 
     // Disable scissor test
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 548ce0453..fb5d99ba2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -155,7 +155,6 @@ private:
 
     GLsizeiptr vs_input_size;
 
-    void AnalyzeVertexArray(bool is_indexed);
     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
 
     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7909dcfc3..68bb77b4d 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -45,6 +45,20 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
     return {};
 }
 
+inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        return GL_UNSIGNED_BYTE;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return GL_UNSIGNED_SHORT;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return GL_UNSIGNED_INT;
+    }
+    LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format=%d", index_format);
+    UNREACHABLE();
+    return {};
+}
+
 inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
     switch (topology) {
     case Maxwell::PrimitiveTopology::Triangles:
@@ -52,7 +66,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
     case Maxwell::PrimitiveTopology::TriangleStrip:
         return GL_TRIANGLE_STRIP;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
+    LOG_CRITICAL(Render_OpenGL, "Unimplemented topology=%d", topology);
     UNREACHABLE();
     return {};
 }

From 77bdc49343e3f7e51a5908245f2259dae2d86060 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 16 Apr 2018 21:23:28 -0400
Subject: [PATCH 2/2] gl_rendering: Use NGLOG* for changed code.

---
 src/video_core/renderer_opengl/gl_rasterizer.cpp |  6 +++---
 src/video_core/renderer_opengl/maxwell_to_gl.h   | 15 ++++++++-------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 309ad9af8..75b4031a7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -151,9 +151,9 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     // to avoid OpenGL errors.
     for (unsigned index = 0; index < 16; ++index) {
         auto& attrib = regs.vertex_attrib_format[index];
-        LOG_DEBUG(HW_GPU, "vertex attrib %d, count=%d, size=%s, type=%s, offset=%d, normalize=%d",
-                  index, attrib.ComponentCount(), attrib.SizeString().c_str(),
-                  attrib.TypeString().c_str(), attrib.offset.Value(), attrib.IsNormalized());
+        NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+                    index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+                    attrib.offset.Value(), attrib.IsNormalized());
 
         glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
                               attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 68bb77b4d..632d14b78 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -31,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
             return GL_UNSIGNED_BYTE;
         }
 
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
+        NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
         UNREACHABLE();
         return {};
     }
@@ -40,7 +40,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
         return GL_FLOAT;
     }
 
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
     UNREACHABLE();
     return {};
 }
@@ -54,7 +54,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
     case Maxwell::IndexFormat::UnsignedInt:
         return GL_UNSIGNED_INT;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format=%d", index_format);
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
     UNREACHABLE();
     return {};
 }
@@ -66,7 +66,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
     case Maxwell::PrimitiveTopology::TriangleStrip:
         return GL_TRIANGLE_STRIP;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented topology=%d", topology);
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
     UNREACHABLE();
     return {};
 }
@@ -78,8 +78,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
     case Tegra::Texture::TextureFilter::Nearest:
         return GL_NEAREST;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u",
-                 static_cast<u32>(filter_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
+                   static_cast<u32>(filter_mode));
     UNREACHABLE();
     return {};
 }
@@ -89,7 +89,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
     case Tegra::Texture::WrapMode::ClampToEdge:
         return GL_CLAMP_TO_EDGE;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
+                   static_cast<u32>(wrap_mode));
     UNREACHABLE();
     return {};
 }