From 38c7b20475cb2c718b2d126acf07dd480c9b5038 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 9 Sep 2015 18:30:03 -0400 Subject: [PATCH 01/32] pica: Add pica_types module and move float24 definition. --- src/video_core/CMakeLists.txt | 1 + src/video_core/pica.h | 114 +------------------------------ src/video_core/pica_types.h | 124 ++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 112 deletions(-) create mode 100644 src/video_core/pica_types.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c3d7294d5..4b5d298f3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADERS command_processor.h gpu_debugger.h pica.h + pica_types.h primitive_assembly.h rasterizer.h rasterizer_interface.h diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 2f1b2dec4..b8db7869a 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -16,6 +16,8 @@ #include "common/vector_math.h" #include "common/logging/log.h" +#include "pica_types.h" + namespace Pica { // Returns index corresponding to the Regs member labeled by field_name @@ -1026,118 +1028,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); -struct float24 { - static float24 FromFloat32(float val) { - float24 ret; - ret.value = val; - return ret; - } - - // 16 bit mantissa, 7 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float24 FromRawFloat24(u32 hex) { - float24 ret; - if ((hex & 0xFFFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0xFFFF; - u32 exponent = (hex >> 16) & 0x7F; - u32 sign = hex >> 23; - ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); - if (sign) - ret.value = -ret.value; - } - return ret; - } - - static float24 Zero() { - return FromFloat32(0.f); - } - - // Not recommended for anything but logging - float ToFloat32() const { - return value; - } - - float24 operator * (const float24& flt) const { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - return Zero(); - return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); - } - - float24 operator / (const float24& flt) const { - return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); - } - - float24 operator + (const float24& flt) const { - return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); - } - - float24 operator - (const float24& flt) const { - return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); - } - - float24& operator *= (const float24& flt) { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - *this = Zero(); - else value *= flt.ToFloat32(); - return *this; - } - - float24& operator /= (const float24& flt) { - value /= flt.ToFloat32(); - return *this; - } - - float24& operator += (const float24& flt) { - value += flt.ToFloat32(); - return *this; - } - - float24& operator -= (const float24& flt) { - value -= flt.ToFloat32(); - return *this; - } - - float24 operator - () const { - return float24::FromFloat32(-ToFloat32()); - } - - bool operator < (const float24& flt) const { - return ToFloat32() < flt.ToFloat32(); - } - - bool operator > (const float24& flt) const { - return ToFloat32() > flt.ToFloat32(); - } - - bool operator >= (const float24& flt) const { - return ToFloat32() >= flt.ToFloat32(); - } - - bool operator <= (const float24& flt) const { - return ToFloat32() <= flt.ToFloat32(); - } - - bool operator == (const float24& flt) const { - return ToFloat32() == flt.ToFloat32(); - } - - bool operator != (const float24& flt) const { - return ToFloat32() != flt.ToFloat32(); - } - -private: - // Stored as a regular float, merely for convenience - // TODO: Perform proper arithmetic on this! - float value; -}; -static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); - /// Struct used to describe current Pica state struct State { /// Pica registers diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h new file mode 100644 index 000000000..de798aa81 --- /dev/null +++ b/src/video_core/pica_types.h @@ -0,0 +1,124 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Pica { + +struct float24 { + static float24 FromFloat32(float val) { + float24 ret; + ret.value = val; + return ret; + } + + // 16 bit mantissa, 7 bit exponent, 1 bit sign + // TODO: No idea if this works as intended + static float24 FromRawFloat24(u32 hex) { + float24 ret; + if ((hex & 0xFFFFFF) == 0) { + ret.value = 0; + } else { + u32 mantissa = hex & 0xFFFF; + u32 exponent = (hex >> 16) & 0x7F; + u32 sign = hex >> 23; + ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); + if (sign) + ret.value = -ret.value; + } + return ret; + } + + static float24 Zero() { + return FromFloat32(0.f); + } + + // Not recommended for anything but logging + float ToFloat32() const { + return value; + } + + float24 operator * (const float24& flt) const { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + return Zero(); + return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); + } + + float24 operator / (const float24& flt) const { + return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); + } + + float24 operator + (const float24& flt) const { + return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); + } + + float24 operator - (const float24& flt) const { + return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); + } + + float24& operator *= (const float24& flt) { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + *this = Zero(); + else value *= flt.ToFloat32(); + return *this; + } + + float24& operator /= (const float24& flt) { + value /= flt.ToFloat32(); + return *this; + } + + float24& operator += (const float24& flt) { + value += flt.ToFloat32(); + return *this; + } + + float24& operator -= (const float24& flt) { + value -= flt.ToFloat32(); + return *this; + } + + float24 operator - () const { + return float24::FromFloat32(-ToFloat32()); + } + + bool operator < (const float24& flt) const { + return ToFloat32() < flt.ToFloat32(); + } + + bool operator > (const float24& flt) const { + return ToFloat32() > flt.ToFloat32(); + } + + bool operator >= (const float24& flt) const { + return ToFloat32() >= flt.ToFloat32(); + } + + bool operator <= (const float24& flt) const { + return ToFloat32() <= flt.ToFloat32(); + } + + bool operator == (const float24& flt) const { + return ToFloat32() == flt.ToFloat32(); + } + + bool operator != (const float24& flt) const { + return ToFloat32() != flt.ToFloat32(); + } + +private: + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; +}; + +static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); + +} // namespace Pica From 4369767c721e9633fc6cd9c49b6142ff9b2fa8ea Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Sep 2015 18:47:15 -0400 Subject: [PATCH 02/32] pica: Add decodings for distance attenuation and LUT registers. --- src/video_core/pica.h | 105 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b8db7869a..81a568e88 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -643,7 +643,110 @@ struct Regs { } } - INSERT_PADDING_WORDS(0xe0); + INSERT_PADDING_WORDS(0x20); + + struct { + union LightColor { + BitField< 0, 10, u32> b; + BitField<10, 10, u32> g; + BitField<20, 10, u32> r; + + Math::Vec3f ToVec3f() const { + return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); + } + }; + + struct LightSrc { + LightColor specular_0; // material.specular_0 * light.specular_0 + LightColor specular_1; // material.specular_1 * light.specular_1 + LightColor diffuse; // material.diffuse * light.diffuse + LightColor ambient; // material.ambient * light.ambient + + struct { + // Encoded as 16-bit floating point + u16 x; + u16 y; + u16 z; + u16 unk; + + INSERT_PADDING_WORDS(0x3); + + // 1.f if 0, otherwise 0.f + BitField<0, 1, u32> w; + } position; + + + BitField<0, 20, u32> dist_atten_bias; + BitField<0, 20, u32> dist_atten_scale; + + INSERT_PADDING_WORDS(0x4); + }; + static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); + + LightSrc light[8]; + LightColor global_ambient; // emission + (material.ambient * lighting.ambient) + INSERT_PADDING_WORDS(0x1); + BitField<0, 3, u32> src_num; // number of enabled lights - 1 + INSERT_PADDING_WORDS(0x1); + + union { + // Each bit specifies whether distance attenuation should be applied for the + // corresponding light + + BitField<24, 1, u32> light_0; + BitField<25, 1, u32> light_1; + BitField<26, 1, u32> light_2; + BitField<27, 1, u32> light_3; + BitField<28, 1, u32> light_4; + BitField<29, 1, u32> light_5; + BitField<30, 1, u32> light_6; + BitField<31, 1, u32> light_7; + + bool IsEnabled(unsigned index) const { + const unsigned enable[] = { light_0, light_1, light_2, light_3, light_4, light_5, light_6, light_7 }; + return enable[index] == 0; + } + } dist_atten_enable; + + union { + BitField<0, 8, u32> index; ///< Index at which to set data in the LUT + BitField<8, 5, u32> type; ///< Type of LUT for which to set data + } lut_config; + + BitField<0, 1, u32> disable; + INSERT_PADDING_WORDS(0x1); + + // When data is written to any of these registers, it gets written to the lookup table of + // the selected type at the selected index, specified above in the `lut_config` register. + // With each write, `lut_config.index` is incremented. It does not matter which of these + // registers is written to, the behavior will be the same. + u32 lut_data[8]; + + INSERT_PADDING_WORDS(0x9); + + union { + // There are 8 light enable "slots", corresponding to the total number of lights + // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' + // above), the first N slots below will be set to integers within the range of 0-7, + // corresponding to the actual light that is enabled for each slot. + + BitField< 0, 3, u32> slot_0; + BitField< 4, 3, u32> slot_1; + BitField< 8, 3, u32> slot_2; + BitField<12, 3, u32> slot_3; + BitField<16, 3, u32> slot_4; + BitField<20, 3, u32> slot_5; + BitField<24, 3, u32> slot_6; + BitField<28, 3, u32> slot_7; + + unsigned GetNum(unsigned index) const { + const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; + return enable_slots[index]; + } + } light_enable; + } lighting; + + INSERT_PADDING_WORDS(0x26); enum class VertexAttributeFormat : u64 { BYTE = 0, From 281bc90ad2afe16853178a56e0127cff8b53eb14 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Sep 2015 18:56:12 -0400 Subject: [PATCH 03/32] pica: Implement fragment lighting LUTs. --- src/video_core/command_processor.cpp | 15 +++++++++++++++ src/video_core/pica.h | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 59c75042c..7409534b6 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -464,6 +464,21 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; + lut_config.index = lut_config.index + 1; + break; + } + default: break; } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 81a568e88..b09484de4 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -1156,6 +1156,25 @@ struct State { ShaderSetup vs; ShaderSetup gs; + struct { + union LutEntry { + // Used for raw access + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField< 0, 12, u32> value; + + // Used by HW for efficient interpolation, Citra does not use these + BitField<12, 12, u32> difference; + + float ToFloat() { + return static_cast(value) / 4095.f; + } + }; + + std::array luts[24]; + } lighting; + /// Current Pica command list struct { const u32* head_ptr; From b0030755708849eb27fe2bf1cc481c5ab905468e Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 9 Sep 2015 22:39:43 -0400 Subject: [PATCH 04/32] pica: Implement decoding of basic fragment lighting components. - Diffuse - Distance attenuation - float16/float20 types - Vertex Shader 'view' output --- src/video_core/clipper.cpp | 2 + src/video_core/pica.h | 63 +++++++++++++++++++++++++++----- src/video_core/pica_types.h | 56 ++++++++++++++++++++++++++++ src/video_core/shader/shader.cpp | 6 ++- src/video_core/shader/shader.h | 8 ++-- 5 files changed, 120 insertions(+), 15 deletions(-) diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 5d609da06..3a09d62f4 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -68,6 +68,8 @@ static void InitScreenCoordinates(OutputVertex& vtx) float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; + vtx.view *= inv_w; + vtx.quat *= inv_w; vtx.tc0 *= inv_w; vtx.tc1 *= inv_w; vtx.tc2 *= inv_w; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b09484de4..178a4b83f 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -241,7 +241,8 @@ struct Regs { TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; - INSERT_PADDING_WORDS(0x2); + BitField<0, 1, u32> fragment_lighting_enable; + INSERT_PADDING_WORDS(0x1); TextureConfig texture1; BitField<0, 4, TextureFormat> texture1_format; INSERT_PADDING_WORDS(0x2); @@ -645,6 +646,22 @@ struct Regs { INSERT_PADDING_WORDS(0x20); + enum class LightingSampler { + Distribution0 = 0, + Distribution1 = 1, + Fresnel = 3, + Blue = 4, + Green = 5, + Red = 6, + SpotlightAttenuation = 8, + DistanceAttenuation = 16, + }; + + enum class LightingLutInput { + NH = 0, // Cosine of the angle between the normal and half-angle vectors + LN = 3, // Cosine of the angle between the light and the normal vectors + }; + struct { union LightColor { BitField< 0, 10, u32> b; @@ -664,17 +681,21 @@ struct Regs { struct { // Encoded as 16-bit floating point - u16 x; - u16 y; - u16 z; - u16 unk; + union { + BitField< 0, 16, u32> x; + BitField<16, 16, u32> y; + }; + union { + BitField< 0, 16, u32> z; + }; INSERT_PADDING_WORDS(0x3); - // 1.f if 0, otherwise 0.f - BitField<0, 1, u32> w; - } position; - + union { + BitField<0, 1, u32> w; // 1.f if 0, otherwise 0.f + BitField<1, 1, u32> two_sided_diffuse; // when disabled, clamp dot-product to 0 + }; + }; BitField<0, 20, u32> dist_atten_bias; BitField<0, 20, u32> dist_atten_scale; @@ -722,7 +743,27 @@ struct Regs { // registers is written to, the behavior will be the same. u32 lut_data[8]; - INSERT_PADDING_WORDS(0x9); + union { + BitField< 1, 1, u32> d0; + BitField< 5, 1, u32> d1; + BitField< 9, 1, u32> sp; + BitField<13, 1, u32> fr; + BitField<17, 1, u32> rb; + BitField<21, 1, u32> rg; + BitField<25, 1, u32> rr; + } abs_lut_input; + + union { + BitField< 0, 3, u32> d0; + BitField< 4, 3, u32> d1; + BitField< 8, 3, u32> sp; + BitField<12, 3, u32> fr; + BitField<16, 3, u32> rb; + BitField<20, 3, u32> rg; + BitField<24, 3, u32> rr; + } lut_input; + + INSERT_PADDING_WORDS(0x7); union { // There are 8 light enable "slots", corresponding to the total number of lights @@ -1095,6 +1136,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f); ASSERT_REG_POSITION(texture1, 0x91); ASSERT_REG_POSITION(texture1_format, 0x96); ASSERT_REG_POSITION(texture2, 0x99); @@ -1109,6 +1151,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); +ASSERT_REG_POSITION(lighting, 0x140); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index de798aa81..a34421c5d 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -121,4 +121,60 @@ private: static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); +struct float16 { + // 10 bit mantissa, 5 bit exponent, 1 bit sign + // TODO: No idea if this works as intended + static float16 FromRawFloat16(u32 hex) { + float16 ret; + if ((hex & 0xFFFF) == 0) { + ret.value = 0; + } else { + u32 mantissa = hex & 0x3FF; + u32 exponent = (hex >> 10) & 0x1F; + u32 sign = (hex >> 15) & 1; + ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f)); + if (sign) + ret.value = -ret.value; + } + return ret; + } + + float ToFloat32() const { + return value; + } + +private: + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; +}; + +struct float20 { + // 12 bit mantissa, 7 bit exponent, 1 bit sign + // TODO: No idea if this works as intended + static float20 FromRawFloat20(u32 hex) { + float20 ret; + if ((hex & 0xFFFFF) == 0) { + ret.value = 0; + } else { + u32 mantissa = hex & 0xFFF; + u32 exponent = (hex >> 12) & 0x7F; + u32 sign = (hex >> 19) & 1; + ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f)); + if (sign) + ret.value = -ret.value; + } + return ret; + } + + float ToFloat32() const { + return value; + } + +private: + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; +}; + } // namespace Pica diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 59f54236b..44c234ed8 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -134,11 +134,13 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } - LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", + LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), + ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1c6fa592c..f068cd93f 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -37,17 +37,19 @@ struct OutputVertex { Math::Vec4 color; Math::Vec2 tc0; Math::Vec2 tc1; - float24 pad[6]; + INSERT_PADDING_WORDS(2); + Math::Vec3 view; + INSERT_PADDING_WORDS(1); Math::Vec2 tc2; // Padding for optimal alignment - float24 pad2[4]; + INSERT_PADDING_WORDS(4); // Attributes used to store intermediate results // position after perspective divide Math::Vec3 screenpos; - float24 pad3; + INSERT_PADDING_WORDS(1); // Linear interpolation // factor: 0=this, 1=vtx From afbef525163af1b28e5b7493e58383d442762228 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 12 Nov 2015 17:33:21 -0500 Subject: [PATCH 05/32] renderer_opengl: Implement diffuse component of HW fragment lighting. --- src/video_core/pica.h | 21 +-- .../renderer_opengl/gl_rasterizer.cpp | 141 ++++++++++++++++++ .../renderer_opengl/gl_rasterizer.h | 61 +++++++- .../renderer_opengl/gl_shader_gen.cpp | 53 ++++++- .../renderer_opengl/gl_shader_util.h | 2 + src/video_core/renderer_opengl/pica_to_gl.h | 7 + 6 files changed, 270 insertions(+), 15 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 178a4b83f..b82ecf68a 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -662,17 +662,18 @@ struct Regs { LN = 3, // Cosine of the angle between the light and the normal vectors }; - struct { - union LightColor { - BitField< 0, 10, u32> b; - BitField<10, 10, u32> g; - BitField<20, 10, u32> r; - - Math::Vec3f ToVec3f() const { - return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); - } - }; + union LightColor { + BitField< 0, 10, u32> b; + BitField<10, 10, u32> g; + BitField<20, 10, u32> r; + + Math::Vec3f ToVec3f() const { + // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component + return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); + } + }; + struct { struct LightSrc { LightColor specular_0; // material.specular_0 * light.specular_0 LightColor specular_1; // material.specular_1 * light.specular_1 diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6441e2586..1e51a7655 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); + SetShader(); // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation @@ -283,6 +289,98 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): + SyncLightPosition(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10): + SyncLightPosition(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10): + SyncLightPosition(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10): + SyncLightPosition(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10): + SyncLightPosition(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10): + SyncLightPosition(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10): + SyncLightPosition(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10): + SyncLightPosition(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): + SyncGlobalAmbient(); + break; + } } @@ -503,6 +601,13 @@ void RasterizerOpenGL::SetShader() { auto& tev_stages = Pica::g_state.regs.GetTevStages(); for (int index = 0; index < tev_stages.size(); ++index) SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (int light_index = 0; light_index < 8; light_index++) { + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + } } void RasterizerOpenGL::SyncFramebuffer() { @@ -683,6 +788,42 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS } } +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + std::array position = { + Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncDrawState() { const auto& regs = Pica::g_state.regs; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 569beaa5c..698ca5c4c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -71,6 +71,18 @@ struct PicaShaderConfig { regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; + // Fragment lighting + + res.lighting_enabled = !regs.lighting.disable; + res.num_lights = regs.lighting.src_num + 1; + + for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) { + unsigned num = regs.lighting.light_enable.GetNum(light_index); + res.light_src[light_index].num = num; + res.light_src[light_index].directional = regs.lighting.light[num].w; + res.light_src[light_index].two_sided_diffuse = regs.lighting.light[num].two_sided_diffuse; + } + return res; } @@ -89,6 +101,16 @@ struct PicaShaderConfig { Pica::Regs::CompareFunc alpha_test_func; std::array tev_stages = {}; u8 combiner_buffer_input; + + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enabled; + } light_src[8]; + + bool lighting_enabled; + unsigned num_lights; }; namespace std { @@ -182,6 +204,13 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); } GLfloat position[4]; @@ -189,6 +218,17 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat normquat[4]; + GLfloat view[3]; + }; + + struct LightSrc { + std::array diffuse; + INSERT_PADDING_WORDS(1); + std::array ambient; + INSERT_PADDING_WORDS(1); + std::array position; + INSERT_PADDING_WORDS(1); }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned @@ -198,11 +238,14 @@ private: std::array tev_combiner_buffer_color; GLint alphatest_ref; GLfloat depth_offset; - INSERT_PADDING_BYTES(8); + INSERT_PADDING_WORDS(2); + std::array lighting_global_ambient; + INSERT_PADDING_WORDS(1); + LightSrc light_src[8]; }; - static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); + static_assert(sizeof(UniformData) == 0x210, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Reconfigure the OpenGL color texture to use the given format and dimensions void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); @@ -249,6 +292,18 @@ private: /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + /// Syncs the remaining OpenGL drawing state to match the current PICA state void SyncDrawState(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 22022f7f4..5bc588b0b 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,8 +32,7 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "primary_color"; break; case Source::PrimaryFragmentColor: - // HACK: Until we implement fragment lighting, use primary_color - out += "primary_color"; + out += "primary_fragment_color"; break; case Source::SecondaryFragmentColor: // HACK: Until we implement fragment lighting, use zero @@ -324,24 +323,67 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 in vec4 primary_color; in vec2 texcoord[3]; +in vec4 normquat; +in vec3 view; out vec4 color; +struct LightSrc { + vec3 diffuse; + vec3 ambient; + vec3 position; +}; + layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; float depth_offset; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; }; uniform sampler2D tex[3]; void main() { +vec4 primary_fragment_color = vec4(0.0); )"; + if (config.lighting_enabled) { + out += "vec3 normal = normalize(vec3(\n"; + out += " 2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n"; + out += " 2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n"; + out += " 1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n"; + out += "vec4 secondary_color = vec4(0.0);\n"; + out += "vec3 diffuse_sum = vec3(0.0);\n"; + out += "vec3 fragment_position = -view;\n"; + + for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { + unsigned num = config.light_src[light_index].num; + + std::string light_vector; + if (config.light_src[light_index].directional) + light_vector = "normalize(-light_src[" + std::to_string(num) + "].position)"; + else + light_vector = "normalize(light_src[" + std::to_string(num) + "].position - fragment_position)"; + + std::string dot_product; + if (config.light_src[light_index].two_sided_diffuse) + dot_product = "abs(dot(" + light_vector + ", normal))"; + else + dot_product = "max(dot(" + light_vector + ", normal), 0.0)"; + + out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * 1.0;\n"; + } + + out += "diffuse_sum += lighting_global_ambient;\n"; + out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; + } + // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (config.alpha_test_func == Regs::CompareFunc::Never) { out += "discard; }"; @@ -369,21 +411,28 @@ void main() { std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out vec4 normquat; +out vec3 view; void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; + normquat = vert_normquat; + view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 046aae14f..097242f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,8 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, }; /** diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 04c1d1a34..346c9391d 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -183,4 +183,11 @@ inline std::array ColorRGBA8(const u32 color) { } }; } +inline std::array LightColor(const Pica::Regs::LightColor& color) { + return { { color.r / 255.0f, + color.g / 255.0f, + color.b / 255.0f + } }; +} + } // namespace From e9af70eaf3e9d190b2c75c039b004beb71f0e436 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 13 Nov 2015 22:52:20 -0500 Subject: [PATCH 06/32] renderer_opengl: Implement HW fragment lighting LUTs within our default UBO. --- src/video_core/pica.h | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 65 +++++++++++++++---- .../renderer_opengl/gl_rasterizer.h | 9 ++- .../renderer_opengl/gl_shader_gen.cpp | 7 ++ 4 files changed, 67 insertions(+), 16 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b82ecf68a..aad9effdc 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -1216,7 +1216,7 @@ struct State { } }; - std::array luts[24]; + std::array, 24> luts; } lighting; /// Current Pica command list diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1e51a7655..80693fa29 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -162,6 +162,13 @@ void RasterizerOpenGL::DrawTriangles() { state.draw.shader_dirty = false; } + for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); index++) { + if (uniform_block_data.lut_dirty[index]) { + SyncLightingLUT(index); + uniform_block_data.lut_dirty[index] = false; + } + } + if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); uniform_block_data.dirty = false; @@ -381,6 +388,21 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncGlobalAmbient(); break; + // Fragment lighting lookup tables + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lut_dirty[lut_config.type] = true; + break; + } + } } @@ -593,20 +615,23 @@ void RasterizerOpenGL::SetShader() { unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - // Update uniforms - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = Pica::g_state.regs.GetTevStages(); - for (int index = 0; index < tev_stages.size(); ++index) - SyncTevConstColor(index, tev_stages[index]); - - SyncGlobalAmbient(); - for (int light_index = 0; light_index < 8; light_index++) { - SyncLightDiffuse(light_index); - SyncLightAmbient(light_index); - SyncLightPosition(light_index); + // Update uniforms + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.GetTevStages(); + for (int index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); ++index) + SyncLightingLUT(index); + + SyncGlobalAmbient(); + for (int light_index = 0; light_index < 8; light_index++) { + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + } } } @@ -796,6 +821,20 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } } +void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { + auto& lut = uniform_block_data.data.lighting_lut[lut_index / 4]; + std::array, 256> new_lut; + + for (int offset = 0; offset < new_lut.size(); ++offset) { + new_lut[offset][lut_index & 3] = Pica::g_state.lighting.luts[lut_index][offset].ToFloat(); + } + + if (new_lut != lut) { + lut = new_lut; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncLightDiffuse(int light_index) { auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); if (color != uniform_block_data.data.light_src[light_index].diffuse) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 698ca5c4c..fa4a78cb1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -242,10 +242,11 @@ private: std::array lighting_global_ambient; INSERT_PADDING_WORDS(1); LightSrc light_src[8]; + std::array, 256>, 6> lighting_lut; }; - static_assert(sizeof(UniformData) == 0x210, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); + static_assert(sizeof(UniformData) == 0x6210, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 32768, "UniformData structure must be less than 32kb"); /// Reconfigure the OpenGL color texture to use the given format and dimensions void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); @@ -295,6 +296,9 @@ private: /// Syncs the lighting global ambient color to match the PICA register void SyncGlobalAmbient(); + /// Syncs the lighting lookup tables + void SyncLightingLUT(unsigned index); + /// Syncs the specified light's diffuse color to match the PICA register void SyncLightDiffuse(int light_index); @@ -346,6 +350,7 @@ private: struct { UniformData data; + bool lut_dirty[24]; bool dirty; } uniform_block_data; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 5bc588b0b..4e02671dd 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -324,6 +324,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { #version 330 core #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 +#define LIGHTING_LUT_SIZE 256 in vec4 primary_color; in vec2 texcoord[3]; @@ -345,6 +346,12 @@ layout (std140) uniform shader_data { float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; + vec4 lighting_lut_0[LIGHTING_LUT_SIZE]; + vec4 lighting_lut_1[LIGHTING_LUT_SIZE]; + vec4 lighting_lut_2[LIGHTING_LUT_SIZE]; + vec4 lighting_lut_3[LIGHTING_LUT_SIZE]; + vec4 lighting_lut_4[LIGHTING_LUT_SIZE]; + vec4 lighting_lut_5[LIGHTING_LUT_SIZE]; }; uniform sampler2D tex[3]; From e34fa6365ff87af247b0ae8ed880c4032bcb2ed0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 13 Nov 2015 23:04:19 -0500 Subject: [PATCH 07/32] renderer_opengl: Implement HW fragment lighting distance attenuation. --- .../renderer_opengl/gl_rasterizer.h | 36 +++++++++++-------- .../renderer_opengl/gl_shader_gen.cpp | 19 ++++++++-- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index fa4a78cb1..ba0b05802 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -78,9 +78,13 @@ struct PicaShaderConfig { for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; res.light_src[light_index].num = num; - res.light_src[light_index].directional = regs.lighting.light[num].w; - res.light_src[light_index].two_sided_diffuse = regs.lighting.light[num].two_sided_diffuse; + res.light_src[light_index].directional = light.w; + res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse; + res.light_src[light_index].dist_atten_enabled = regs.lighting.dist_atten_enable.IsEnabled(num); + res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); + res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); } return res; @@ -98,19 +102,23 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func; - std::array tev_stages = {}; - u8 combiner_buffer_input; - struct { - unsigned num; - bool directional; - bool two_sided_diffuse; - bool dist_atten_enabled; - } light_src[8]; - - bool lighting_enabled; - unsigned num_lights; + Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; + std::array tev_stages = {}; + u8 combiner_buffer_input = 0; + + struct { + unsigned num = 0; + bool directional = false; + bool two_sided_diffuse = false; + bool dist_atten_enabled = false; + GLfloat dist_atten_scale = 0.0f; + GLfloat dist_atten_bias = 0.0f; + } light_src[8]; + + bool lighting_enabled = false; + unsigned num_lights = 0; + }; }; namespace std { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 4e02671dd..cf99cff76 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -371,12 +371,13 @@ vec4 primary_fragment_color = vec4(0.0); for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { unsigned num = config.light_src[light_index].num; + std::string light_src = "light_src[" + std::to_string(num) + "]"; std::string light_vector; if (config.light_src[light_index].directional) - light_vector = "normalize(-light_src[" + std::to_string(num) + "].position)"; + light_vector = "normalize(-" + light_src + ".position)"; else - light_vector = "normalize(light_src[" + std::to_string(num) + "].position - fragment_position)"; + light_vector = "normalize(" + light_src + ".position - fragment_position)"; std::string dot_product; if (config.light_src[light_index].two_sided_diffuse) @@ -384,7 +385,19 @@ vec4 primary_fragment_color = vec4(0.0); else dot_product = "max(dot(" + light_vector + ", normal), 0.0)"; - out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * 1.0;\n"; + std::string dist_atten = "1.0"; + if (config.light_src[light_index].dist_atten_enabled) { + std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale); + std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias); + std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")"; + std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))"; + + unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); + + dist_atten = "lighting_lut_" + std::to_string(lut_num /4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]"; + } + + out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * " + dist_atten + ";\n"; } out += "diffuse_sum += lighting_global_ambient;\n"; From bf89870437ebb0d983cfc20c3ac0490169f59f44 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 14 Nov 2015 23:23:08 -0500 Subject: [PATCH 08/32] renderer_opengl: Initial implementation of basic specular lighting. --- src/video_core/pica.h | 8 +- .../renderer_opengl/gl_rasterizer.cpp | 68 +++++++++++++++ .../renderer_opengl/gl_rasterizer.h | 20 +++++ .../renderer_opengl/gl_shader_gen.cpp | 82 ++++++++++++++++--- 4 files changed, 165 insertions(+), 13 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index aad9effdc..c63d87a36 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -659,6 +659,8 @@ struct Regs { enum class LightingLutInput { NH = 0, // Cosine of the angle between the normal and half-angle vectors + VH = 1, // Cosine of the angle between the view and half-angle vectors + NV = 2, // Cosine of the angle between the normal and the view vector LN = 3, // Cosine of the angle between the light and the normal vectors }; @@ -709,7 +711,11 @@ struct Regs { LightColor global_ambient; // emission + (material.ambient * lighting.ambient) INSERT_PADDING_WORDS(0x1); BitField<0, 3, u32> src_num; // number of enabled lights - 1 - INSERT_PADDING_WORDS(0x1); + + union { + BitField< 4, 4, u32> config; + BitField<27, 1, u32> clamp_highlights; + } light_env; union { // Each bit specifies whether distance attenuation should be applied for the diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 80693fa29..c6fb37c53 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -297,6 +297,58 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncCombinerColor(); break; + // Fragment lighting specular 0 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10): + SyncLightSpecular1(7); + break; + // Fragment lighting diffuse color case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): SyncLightDiffuse(0); @@ -835,6 +887,22 @@ void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { } } +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncLightDiffuse(int light_index) { auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); if (color != uniform_block_data.data.light_src[light_index].diffuse) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ba0b05802..9e93b8b2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,6 +87,10 @@ struct PicaShaderConfig { res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); } + res.lighting_lut.d0_abs = (regs.lighting.abs_lut_input.d0 == 0); + res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); + res.clamp_highlights = regs.lighting.light_env.clamp_highlights; + return res; } @@ -118,6 +122,12 @@ struct PicaShaderConfig { bool lighting_enabled = false; unsigned num_lights = 0; + bool clamp_highlights = false; + + struct { + bool d0_abs = false; + Pica::Regs::LightingLutInput d0_type = Pica::Regs::LightingLutInput::NH; + } lighting_lut; }; }; @@ -231,6 +241,10 @@ private: }; struct LightSrc { + std::array specular_0; + INSERT_PADDING_WORDS(1); + std::array specular_1; + INSERT_PADDING_WORDS(1); std::array diffuse; INSERT_PADDING_WORDS(1); std::array ambient; @@ -316,6 +330,12 @@ private: /// Syncs the specified light's position to match the PICA register void SyncLightPosition(int light_index); + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + /// Syncs the remaining OpenGL drawing state to match the current PICA state void SyncDrawState(); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index cf99cff76..abcc89f1d 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -35,8 +35,7 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "primary_fragment_color"; break; case Source::SecondaryFragmentColor: - // HACK: Until we implement fragment lighting, use zero - out += "vec4(0.0)"; + out += "secondary_fragment_color"; break; case Source::Texture0: out += "texture(tex[0], texcoord[0])"; @@ -334,6 +333,8 @@ in vec3 view; out vec4 color; struct LightSrc { + vec3 specular_0; + vec3 specular_1; vec3 diffuse; vec3 ambient; vec3 position; @@ -358,6 +359,7 @@ uniform sampler2D tex[3]; void main() { vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); )"; if (config.lighting_enabled) { @@ -367,41 +369,97 @@ vec4 primary_fragment_color = vec4(0.0); out += " 1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n"; out += "vec4 secondary_color = vec4(0.0);\n"; out += "vec3 diffuse_sum = vec3(0.0);\n"; + out += "vec3 specular_sum = vec3(0.0);\n"; out += "vec3 fragment_position = -view;\n"; + out += "vec3 light_vector = vec3(0.0);\n"; + out += "float dist_atten = 1.0;\n"; + + // Gets the index into the specified lookup table for specular lighting + auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) { + const std::string half_angle = "normalize(view + light_vector)"; + std::string index; + switch (input) { + case Regs::LightingLutInput::NH: + index = "dot(normal, " + half_angle + ")"; + break; + + case Regs::LightingLutInput::VH: + index = std::string("dot(view, " + half_angle + ")"); + break; + + case Regs::LightingLutInput::NV: + index = std::string("dot(normal, view)"); + break; + + case Regs::LightingLutInput::LN: + index = std::string("dot(light_vector, normal)"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + UNIMPLEMENTED(); + break; + } + + if (abs) { + // In the range of [ 0.f, 1.f] + index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + return "clamp(int(" + index + " * 256.0), 0, 255)"; + } else { + // In the range of [-1.f, 1.f] + index = "clamp(" + index + ", -1.0, 1.0)"; + return std::string("uint(int(" + index + " * 127.f) & 0xff)"); + } + + return std::string(); + }; for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { unsigned num = config.light_src[light_index].num; std::string light_src = "light_src[" + std::to_string(num) + "]"; - std::string light_vector; if (config.light_src[light_index].directional) - light_vector = "normalize(-" + light_src + ".position)"; + out += "light_vector = normalize(-" + light_src + ".position);\n"; else - light_vector = "normalize(" + light_src + ".position - fragment_position)"; + out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n"; std::string dot_product; if (config.light_src[light_index].two_sided_diffuse) - dot_product = "abs(dot(" + light_vector + ", normal))"; + dot_product = "abs(dot(light_vector, normal))"; else - dot_product = "max(dot(" + light_vector + ", normal), 0.0)"; + dot_product = "max(dot(light_vector, normal), 0.0)"; - std::string dist_atten = "1.0"; + // Compute distance attenuation value + out += "dist_atten = 1.0;\n"; if (config.light_src[light_index].dist_atten_enabled) { std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale); std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias); std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")"; std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))"; - unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); - - dist_atten = "lighting_lut_" + std::to_string(lut_num /4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]"; + const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); + out += "dist_atten = lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "];\n"; } - out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * " + dist_atten + ";\n"; + // Compute primary fragment color (diffuse lighting) function + out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n"; + + // Compute secondary fragment color (specular lighting) function + std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); + const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0; + std::string lut_lookup = "lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]"; + + out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n"; + } + + out += "float clamp_highlights = 1.0;\n"; + if (config.clamp_highlights) { + out += "if (dot(light_vector, normal) <= 0.0) clamp_highlights = 0.0;\n"; } out += "diffuse_sum += lighting_global_ambient;\n"; out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; + out += "secondary_fragment_color = vec4(clamp(clamp_highlights * specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; } // Do not do any sort of processing if it's obvious we're not going to pass the alpha test From 021cb0bced1d8045f04b85024b97a07a4d0df12f Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 15 Nov 2015 17:43:01 -0500 Subject: [PATCH 09/32] renderer_opengl: Use textures for fragment shader LUTs instead of UBOs. - Gets us LUT interpolation for free. - Some older Intel GPU drivers did not support the big UBOs needed to store the LUTs. --- .../renderer_opengl/gl_rasterizer.cpp | 51 ++++++++++++++----- .../renderer_opengl/gl_rasterizer.h | 10 ++-- .../renderer_opengl/gl_shader_gen.cpp | 18 +++---- src/video_core/renderer_opengl/gl_state.cpp | 8 +++ src/video_core/renderer_opengl/gl_state.h | 4 ++ 5 files changed, 64 insertions(+), 27 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c6fb37c53..6e7d6a40d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -126,6 +126,19 @@ void RasterizerOpenGL::InitObjects() { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + for (size_t i = 0; i < lighting_lut.size(); ++i) { + lighting_lut[i].Create(); + state.lighting_lut[i].texture_1d = lighting_lut[i].handle; + + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); + + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + state.Apply(); + ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); } @@ -162,7 +175,7 @@ void RasterizerOpenGL::DrawTriangles() { state.draw.shader_dirty = false; } - for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); index++) { + for (unsigned index = 0; index < lighting_lut.size(); index++) { if (uniform_block_data.lut_dirty[index]) { SyncLightingLUT(index); uniform_block_data.lut_dirty[index] = false; @@ -451,7 +464,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lut_dirty[lut_config.type] = true; + uniform_block_data.lut_dirty[lut_config.type / 4] = true; break; } @@ -663,6 +676,20 @@ void RasterizerOpenGL::SetShader() { uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } + // Set the texture samplers to correspond to different lookup table texture units + GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); @@ -675,9 +702,6 @@ void RasterizerOpenGL::SetShader() { for (int index = 0; index < tev_stages.size(); ++index) SyncTevConstColor(index, tev_stages[index]); - for (unsigned index = 0; index < Pica::g_state.lighting.luts.size(); ++index) - SyncLightingLUT(index); - SyncGlobalAmbient(); for (int light_index = 0; light_index < 8; light_index++) { SyncLightDiffuse(light_index); @@ -874,16 +898,19 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - auto& lut = uniform_block_data.data.lighting_lut[lut_index / 4]; - std::array, 256> new_lut; + std::array, 256> new_data; - for (int offset = 0; offset < new_lut.size(); ++offset) { - new_lut[offset][lut_index & 3] = Pica::g_state.lighting.luts[lut_index][offset].ToFloat(); + for (unsigned offset = 0; offset < new_data.size(); ++offset) { + new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); + new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); + new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); + new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); } - if (new_lut != lut) { - lut = new_lut; - uniform_block_data.dirty = true; + if (new_data != lighting_lut_data[lut_index]) { + lighting_lut_data[lut_index] = new_data; + glActiveTexture(GL_TEXTURE3 + lut_index); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9e93b8b2f..b50542701 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -264,11 +264,10 @@ private: std::array lighting_global_ambient; INSERT_PADDING_WORDS(1); LightSrc light_src[8]; - std::array, 256>, 6> lighting_lut; }; - static_assert(sizeof(UniformData) == 0x6210, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 32768, "UniformData structure must be less than 32kb"); + static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Reconfigure the OpenGL color texture to use the given format and dimensions void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); @@ -378,7 +377,7 @@ private: struct { UniformData data; - bool lut_dirty[24]; + bool lut_dirty[6]; bool dirty; } uniform_block_data; @@ -386,4 +385,7 @@ private: OGLBuffer vertex_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + + std::array lighting_lut; + std::array, 256>, 6> lighting_lut_data; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index abcc89f1d..cb570c4d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -324,6 +324,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 #define LIGHTING_LUT_SIZE 256 +#define FLOAT_255 0.99609375 in vec4 primary_color; in vec2 texcoord[3]; @@ -347,15 +348,10 @@ layout (std140) uniform shader_data { float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; - vec4 lighting_lut_0[LIGHTING_LUT_SIZE]; - vec4 lighting_lut_1[LIGHTING_LUT_SIZE]; - vec4 lighting_lut_2[LIGHTING_LUT_SIZE]; - vec4 lighting_lut_3[LIGHTING_LUT_SIZE]; - vec4 lighting_lut_4[LIGHTING_LUT_SIZE]; - vec4 lighting_lut_5[LIGHTING_LUT_SIZE]; }; uniform sampler2D tex[3]; +uniform sampler1D lut[6]; void main() { vec4 primary_fragment_color = vec4(0.0); @@ -404,11 +400,11 @@ vec4 secondary_fragment_color = vec4(0.0); if (abs) { // In the range of [ 0.f, 1.f] index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; - return "clamp(int(" + index + " * 256.0), 0, 255)"; + return "clamp(" + index + ", 0.0, FLOAT_255)"; } else { // In the range of [-1.f, 1.f] index = "clamp(" + index + ", -1.0, 1.0)"; - return std::string("uint(int(" + index + " * 127.f) & 0xff)"); + return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)"; } return std::string(); @@ -435,10 +431,10 @@ vec4 secondary_fragment_color = vec4(0.0); std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale); std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias); std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")"; - std::string clamped_lut_index = "((clamp(int(" + lut_index + " * 256.0), 0, 255)))"; + std::string clamped_lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))"; const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); - out += "dist_atten = lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "];\n"; + out += "dist_atten = texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "];\n"; } // Compute primary fragment color (diffuse lighting) function @@ -447,7 +443,7 @@ vec4 secondary_fragment_color = vec4(0.0); // Compute secondary fragment color (specular lighting) function std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0; - std::string lut_lookup = "lighting_lut_" + std::to_string(lut_num / 4) + "[" + clamped_lut_index + "][" + std::to_string(lut_num & 3) + "]"; + std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]"; out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n"; } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index a82372995..ab4b6c7b1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -170,6 +170,14 @@ void OpenGLState::Apply() { } } + // Lighting LUTs + for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { + if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); + } + } + // Framebuffer if (draw.framebuffer != cur_state.draw.framebuffer) { glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b8ab45bb8..e848058d7 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -61,6 +61,10 @@ public: GLuint sampler; // GL_SAMPLER_BINDING } texture_units[3]; + struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } lighting_lut[6]; + struct { GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING From 603b619cbe81ba1fc4dda83dfd88d99e53c95270 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 16 Nov 2015 20:56:28 -0500 Subject: [PATCH 10/32] gl_shader_gen: View vector needs to be normalized when computing half angle vector. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index cb570c4d2..73de94457 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -368,19 +368,19 @@ vec4 secondary_fragment_color = vec4(0.0); out += "vec3 specular_sum = vec3(0.0);\n"; out += "vec3 fragment_position = -view;\n"; out += "vec3 light_vector = vec3(0.0);\n"; + out += "vec3 half_angle_vector = vec3(0.0);\n"; out += "float dist_atten = 1.0;\n"; // Gets the index into the specified lookup table for specular lighting auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) { - const std::string half_angle = "normalize(view + light_vector)"; std::string index; switch (input) { case Regs::LightingLutInput::NH: - index = "dot(normal, " + half_angle + ")"; + index = "dot(normal, half_angle_vector)"; break; case Regs::LightingLutInput::VH: - index = std::string("dot(view, " + half_angle + ")"); + index = std::string("dot(view, half_angle_vector)"); break; case Regs::LightingLutInput::NV: @@ -441,6 +441,7 @@ vec4 secondary_fragment_color = vec4(0.0); out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n"; // Compute secondary fragment color (specular lighting) function + out += "half_angle_vector = normalize(normalize(view) + light_vector);\n"; std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0; std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]"; From bdc72d090458ab8af288304463ea75e975f1327d Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 21:14:50 -0500 Subject: [PATCH 11/32] gl_shader_gen: Fix bug with lighting where clamp highlights was only applied to last light. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 73de94457..7821170db 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -369,6 +369,7 @@ vec4 secondary_fragment_color = vec4(0.0); out += "vec3 fragment_position = -view;\n"; out += "vec3 light_vector = vec3(0.0);\n"; out += "vec3 half_angle_vector = vec3(0.0);\n"; + out += "float clamp_highlights = 1.0;\n"; out += "float dist_atten = 1.0;\n"; // Gets the index into the specified lookup table for specular lighting @@ -446,17 +447,16 @@ vec4 secondary_fragment_color = vec4(0.0); const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0; std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]"; - out += "specular_sum += (" + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten);\n"; - } + if (config.clamp_highlights) { + out += "clamp_highlights = (dot(light_vector, normal) <= 0.0) ? 0.0 : 1.0;\n"; + } - out += "float clamp_highlights = 1.0;\n"; - if (config.clamp_highlights) { - out += "if (dot(light_vector, normal) <= 0.0) clamp_highlights = 0.0;\n"; + out += "specular_sum += clamp_highlights * " + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten;\n"; } out += "diffuse_sum += lighting_global_ambient;\n"; out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; - out += "secondary_fragment_color = vec4(clamp(clamp_highlights * specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; + out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; } // Do not do any sort of processing if it's obvious we're not going to pass the alpha test From 5f3bad8fb19004eebc1aec7df295d9c807a64fef Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 21:16:06 -0500 Subject: [PATCH 12/32] gl_shader_gen: Fix directional lights. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7821170db..8bc8e2b36 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -416,7 +416,7 @@ vec4 secondary_fragment_color = vec4(0.0); std::string light_src = "light_src[" + std::to_string(num) + "]"; if (config.light_src[light_index].directional) - out += "light_vector = normalize(-" + light_src + ".position);\n"; + out += "light_vector = normalize(" + light_src + ".position);\n"; else out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n"; From 76f303538b8fd5c4bed1f5878058fb4c18fb045f Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 22:36:01 -0500 Subject: [PATCH 13/32] gl_shader_gen: Reorganize and cleanup lighting code. - No functional difference. --- .../renderer_opengl/gl_shader_gen.cpp | 207 +++++++++--------- 1 file changed, 107 insertions(+), 100 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8bc8e2b36..10cb2d065 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -318,6 +318,111 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; } +/// Writes the code to emulate fragment lighting +static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + // Define lighting globals + out += "vec3 diffuse_sum = vec3(0.0);\n"; + out += "vec3 specular_sum = vec3(0.0);\n"; + out += "vec3 light_vector = vec3(0.0);\n"; + + // Convert interpolated quaternion to a GL fragment normal + out += "vec3 normal = normalize(vec3(\n"; + out += " 2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n"; + out += " 2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n"; + out += " 1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n"; + + // Gets the index into the specified lookup table for specular lighting + auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + const std::string half_angle = "normalize(normalize(view) + light_vector)"; + std::string index; + switch (input) { + case Regs::LightingLutInput::NH: + index = "dot(normal, " + half_angle + ")"; + break; + + case Regs::LightingLutInput::VH: + index = std::string("dot(view, " + half_angle + ")"); + break; + + case Regs::LightingLutInput::NV: + index = std::string("dot(normal, view)"); + break; + + case Regs::LightingLutInput::LN: + index = std::string("dot(light_vector, normal)"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + UNIMPLEMENTED(); + break; + } + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + return "clamp(" + index + ", 0.0, FLOAT_255)"; + } else { + // LUT index is in the range of (-1.0, 1.0) + index = "clamp(" + index + ", -1.0, 1.0)"; + return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)"; + } + + return std::string(); + }; + + // Gets the lighting lookup table value given the specified sampler and index + auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) { + return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + + lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); + }; + + // Write the code to emulate each enabled light + for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { + unsigned num = config.light_src[light_index].num; + const auto& light_config = config.light_src[light_index]; + std::string light_src = "light_src[" + std::to_string(num) + "]"; + + // Compute light vector (directional or positional) + if (light_config.directional) + out += "light_vector = normalize(" + light_src + ".position);\n"; + else + out += "light_vector = normalize(" + light_src + ".position + view);\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided + std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enabled) { + std::string scale = std::to_string(light_config.dist_atten_scale); + std::string bias = std::to_string(light_config.dist_atten_bias); + std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; + lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))"; + const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); + dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index); + } + + // Compute primary fragment color (diffuse lighting) function + out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; + + // If enabled, clamp specular component if lighting result is negative + std::string clamp_highlights = config.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + + // Lookup specular distribution 0 LUT value + std::string d0_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); + std::string d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index); + + // Compute secondary fragment color (specular lighting) function + out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n"; + } + + // Sum final lighting result + out += "diffuse_sum += lighting_global_ambient;\n"; + out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; + out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; +} + std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core @@ -358,106 +463,8 @@ vec4 primary_fragment_color = vec4(0.0); vec4 secondary_fragment_color = vec4(0.0); )"; - if (config.lighting_enabled) { - out += "vec3 normal = normalize(vec3(\n"; - out += " 2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n"; - out += " 2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n"; - out += " 1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n"; - out += "vec4 secondary_color = vec4(0.0);\n"; - out += "vec3 diffuse_sum = vec3(0.0);\n"; - out += "vec3 specular_sum = vec3(0.0);\n"; - out += "vec3 fragment_position = -view;\n"; - out += "vec3 light_vector = vec3(0.0);\n"; - out += "vec3 half_angle_vector = vec3(0.0);\n"; - out += "float clamp_highlights = 1.0;\n"; - out += "float dist_atten = 1.0;\n"; - - // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [&](unsigned light_num, Regs::LightingLutInput input, bool abs) { - std::string index; - switch (input) { - case Regs::LightingLutInput::NH: - index = "dot(normal, half_angle_vector)"; - break; - - case Regs::LightingLutInput::VH: - index = std::string("dot(view, half_angle_vector)"); - break; - - case Regs::LightingLutInput::NV: - index = std::string("dot(normal, view)"); - break; - - case Regs::LightingLutInput::LN: - index = std::string("dot(light_vector, normal)"); - break; - - default: - LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); - UNIMPLEMENTED(); - break; - } - - if (abs) { - // In the range of [ 0.f, 1.f] - index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; - return "clamp(" + index + ", 0.0, FLOAT_255)"; - } else { - // In the range of [-1.f, 1.f] - index = "clamp(" + index + ", -1.0, 1.0)"; - return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)"; - } - - return std::string(); - }; - - for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { - unsigned num = config.light_src[light_index].num; - std::string light_src = "light_src[" + std::to_string(num) + "]"; - - if (config.light_src[light_index].directional) - out += "light_vector = normalize(" + light_src + ".position);\n"; - else - out += "light_vector = normalize(" + light_src + ".position - fragment_position);\n"; - - std::string dot_product; - if (config.light_src[light_index].two_sided_diffuse) - dot_product = "abs(dot(light_vector, normal))"; - else - dot_product = "max(dot(light_vector, normal), 0.0)"; - - // Compute distance attenuation value - out += "dist_atten = 1.0;\n"; - if (config.light_src[light_index].dist_atten_enabled) { - std::string scale = std::to_string(config.light_src[light_index].dist_atten_scale); - std::string bias = std::to_string(config.light_src[light_index].dist_atten_bias); - std::string lut_index = "(" + scale + " * length(fragment_position - " + light_src + ".position) + " + bias + ")"; - std::string clamped_lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))"; - - const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); - out += "dist_atten = texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "];\n"; - } - - // Compute primary fragment color (diffuse lighting) function - out += "diffuse_sum += ((light_src[" + std::to_string(num) + "].diffuse * " + dot_product + ") + light_src[" + std::to_string(num) + "].ambient) * dist_atten;\n"; - - // Compute secondary fragment color (specular lighting) function - out += "half_angle_vector = normalize(normalize(view) + light_vector);\n"; - std::string clamped_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); - const unsigned lut_num = (unsigned)Regs::LightingSampler::Distribution0; - std::string lut_lookup = "texture(lut[" + std::to_string(lut_num / 4) + "], " + clamped_lut_index + ")[" + std::to_string(lut_num & 3) + "]"; - - if (config.clamp_highlights) { - out += "clamp_highlights = (dot(light_vector, normal) <= 0.0) ? 0.0 : 1.0;\n"; - } - - out += "specular_sum += clamp_highlights * " + lut_lookup + " * light_src[" + std::to_string(num) + "].specular_0 * dist_atten;\n"; - } - - out += "diffuse_sum += lighting_global_ambient;\n"; - out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; - out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; - } + if (config.lighting_enabled) + WriteLighting(out, config); // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (config.alpha_test_func == Regs::CompareFunc::Never) { From 6878ba7608b14b6508f8de8f0070acdba6bb1837 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 22:55:24 -0500 Subject: [PATCH 14/32] gl_rasterizer: Minor naming refactor on Pica register naming. --- src/video_core/pica.h | 33 ++++++++++--------- .../renderer_opengl/gl_rasterizer.h | 10 +++--- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index c63d87a36..1808d4396 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -715,26 +715,29 @@ struct Regs { union { BitField< 4, 4, u32> config; BitField<27, 1, u32> clamp_highlights; - } light_env; + }; union { // Each bit specifies whether distance attenuation should be applied for the // corresponding light - BitField<24, 1, u32> light_0; - BitField<25, 1, u32> light_1; - BitField<26, 1, u32> light_2; - BitField<27, 1, u32> light_3; - BitField<28, 1, u32> light_4; - BitField<29, 1, u32> light_5; - BitField<30, 1, u32> light_6; - BitField<31, 1, u32> light_7; - - bool IsEnabled(unsigned index) const { - const unsigned enable[] = { light_0, light_1, light_2, light_3, light_4, light_5, light_6, light_7 }; - return enable[index] == 0; - } - } dist_atten_enable; + BitField<24, 1, u32> dist_atten_enable_light_0; + BitField<25, 1, u32> dist_atten_enable_light_1; + BitField<26, 1, u32> dist_atten_enable_light_2; + BitField<27, 1, u32> dist_atten_enable_light_3; + BitField<28, 1, u32> dist_atten_enable_light_4; + BitField<29, 1, u32> dist_atten_enable_light_5; + BitField<30, 1, u32> dist_atten_enable_light_6; + BitField<31, 1, u32> dist_atten_enable_light_7; + }; + + bool IsDistAttenEnabled(unsigned index) const { + const unsigned enable[] = { dist_atten_enable_light_0, dist_atten_enable_light_1, + dist_atten_enable_light_2, dist_atten_enable_light_3, + dist_atten_enable_light_4, dist_atten_enable_light_5, + dist_atten_enable_light_6, dist_atten_enable_light_7 }; + return enable[index] == 0; + } union { BitField<0, 8, u32> index; ///< Index at which to set data in the LUT diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b50542701..17bda2d1d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,16 +80,16 @@ struct PicaShaderConfig { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; res.light_src[light_index].num = num; - res.light_src[light_index].directional = light.w; - res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse; - res.light_src[light_index].dist_atten_enabled = regs.lighting.dist_atten_enable.IsEnabled(num); + res.light_src[light_index].directional = light.w != 0; + res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num); res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); } - res.lighting_lut.d0_abs = (regs.lighting.abs_lut_input.d0 == 0); + res.lighting_lut.d0_abs = regs.lighting.abs_lut_input.d0 == 0; res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); - res.clamp_highlights = regs.lighting.light_env.clamp_highlights; + res.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; } From 6307999116d250a9805c0d7ae2c131407772fc3f Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 23:17:25 -0500 Subject: [PATCH 15/32] pica: Cleanup and add some comments to lighting registers. --- src/video_core/pica.h | 36 +++++++++---------- .../renderer_opengl/gl_rasterizer.h | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 1808d4396..48854dda2 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -695,8 +695,8 @@ struct Regs { INSERT_PADDING_WORDS(0x3); union { - BitField<0, 1, u32> w; // 1.f if 0, otherwise 0.f - BitField<1, 1, u32> two_sided_diffuse; // when disabled, clamp dot-product to 0 + BitField<0, 1, u32> directional; + BitField<1, 1, u32> two_sided_diffuse; // 1: GL_TRUE, 0: GL_FALSE; when disabled, clamp dot-product to 0 }; }; @@ -714,21 +714,21 @@ struct Regs { union { BitField< 4, 4, u32> config; - BitField<27, 1, u32> clamp_highlights; + BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE }; union { // Each bit specifies whether distance attenuation should be applied for the // corresponding light - BitField<24, 1, u32> dist_atten_enable_light_0; - BitField<25, 1, u32> dist_atten_enable_light_1; - BitField<26, 1, u32> dist_atten_enable_light_2; - BitField<27, 1, u32> dist_atten_enable_light_3; - BitField<28, 1, u32> dist_atten_enable_light_4; - BitField<29, 1, u32> dist_atten_enable_light_5; - BitField<30, 1, u32> dist_atten_enable_light_6; - BitField<31, 1, u32> dist_atten_enable_light_7; + BitField<24, 1, u32> dist_atten_enable_light_0; // 0: GL_TRUE, 1: GL_FALSE + BitField<25, 1, u32> dist_atten_enable_light_1; // 0: GL_TRUE, 1: GL_FALSE + BitField<26, 1, u32> dist_atten_enable_light_2; // 0: GL_TRUE, 1: GL_FALSE + BitField<27, 1, u32> dist_atten_enable_light_3; // 0: GL_TRUE, 1: GL_FALSE + BitField<28, 1, u32> dist_atten_enable_light_4; // 0: GL_TRUE, 1: GL_FALSE + BitField<29, 1, u32> dist_atten_enable_light_5; // 0: GL_TRUE, 1: GL_FALSE + BitField<30, 1, u32> dist_atten_enable_light_6; // 0: GL_TRUE, 1: GL_FALSE + BitField<31, 1, u32> dist_atten_enable_light_7; // 0: GL_TRUE, 1: GL_FALSE }; bool IsDistAttenEnabled(unsigned index) const { @@ -754,13 +754,13 @@ struct Regs { u32 lut_data[8]; union { - BitField< 1, 1, u32> d0; - BitField< 5, 1, u32> d1; - BitField< 9, 1, u32> sp; - BitField<13, 1, u32> fr; - BitField<17, 1, u32> rb; - BitField<21, 1, u32> rg; - BitField<25, 1, u32> rr; + BitField< 1, 1, u32> d0; // 0: GL_TRUE, 1: GL_FALSE + BitField< 5, 1, u32> d1; // 0: GL_TRUE, 1: GL_FALSE + BitField< 9, 1, u32> sp; // 0: GL_TRUE, 1: GL_FALSE + BitField<13, 1, u32> fr; // 0: GL_TRUE, 1: GL_FALSE + BitField<17, 1, u32> rb; // 0: GL_TRUE, 1: GL_FALSE + BitField<21, 1, u32> rg; // 0: GL_TRUE, 1: GL_FALSE + BitField<25, 1, u32> rr; // 0: GL_TRUE, 1: GL_FALSE } abs_lut_input; union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 17bda2d1d..6be161efd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,7 +80,7 @@ struct PicaShaderConfig { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; res.light_src[light_index].num = num; - res.light_src[light_index].directional = light.w != 0; + res.light_src[light_index].directional = light.directional != 0; res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num); res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); From 3d89dacd56064c3c49cd1ae9482a0221f1912f56 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 18 Nov 2015 23:40:18 -0500 Subject: [PATCH 16/32] gl_shader_gen: Refactor lighting config to match Pica register naming. - Also implement D0 LUT enable. --- src/video_core/pica.h | 2 + .../renderer_opengl/gl_rasterizer.h | 60 ++++++++++--------- .../renderer_opengl/gl_shader_gen.cpp | 30 +++++----- 3 files changed, 50 insertions(+), 42 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 48854dda2..b1cf072f1 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -718,6 +718,8 @@ struct Regs { }; union { + BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE + // Each bit specifies whether distance attenuation should be applied for the // corresponding light diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6be161efd..2042be786 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -73,23 +73,24 @@ struct PicaShaderConfig { // Fragment lighting - res.lighting_enabled = !regs.lighting.disable; - res.num_lights = regs.lighting.src_num + 1; + res.lighting.enable = !regs.lighting.disable; + res.lighting.src_num = regs.lighting.src_num + 1; - for (unsigned light_index = 0; light_index < res.num_lights; ++light_index) { + for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; - res.light_src[light_index].num = num; - res.light_src[light_index].directional = light.directional != 0; - res.light_src[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; - res.light_src[light_index].dist_atten_enabled = regs.lighting.IsDistAttenEnabled(num); - res.light_src[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); - res.light_src[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); + res.lighting.light[light_index].num = num; + res.lighting.light[light_index].directional = light.directional != 0; + res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num); + res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); + res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); } - res.lighting_lut.d0_abs = regs.lighting.abs_lut_input.d0 == 0; - res.lighting_lut.d0_type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); - res.clamp_highlights = regs.lighting.clamp_highlights != 0; + res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0; + res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0; + res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); + res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; } @@ -112,22 +113,25 @@ struct PicaShaderConfig { u8 combiner_buffer_input = 0; struct { - unsigned num = 0; - bool directional = false; - bool two_sided_diffuse = false; - bool dist_atten_enabled = false; - GLfloat dist_atten_scale = 0.0f; - GLfloat dist_atten_bias = 0.0f; - } light_src[8]; - - bool lighting_enabled = false; - unsigned num_lights = 0; - bool clamp_highlights = false; - - struct { - bool d0_abs = false; - Pica::Regs::LightingLutInput d0_type = Pica::Regs::LightingLutInput::NH; - } lighting_lut; + struct { + unsigned num = 0; + bool directional = false; + bool two_sided_diffuse = false; + bool dist_atten_enable = false; + GLfloat dist_atten_scale = 0.0f; + GLfloat dist_atten_bias = 0.0f; + } light[8]; + + bool enable = false; + unsigned src_num = 0; + bool clamp_highlights = false; + + struct { + bool enable = false; + bool abs_input = false; + Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; + } lut_d0; + } lighting; }; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 10cb2d065..a2770cc6e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -360,7 +360,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) - index = config.light_src[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; return "clamp(" + index + ", 0.0, FLOAT_255)"; } else { // LUT index is in the range of (-1.0, 1.0) @@ -378,10 +378,9 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { }; // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < config.num_lights; ++light_index) { - unsigned num = config.light_src[light_index].num; - const auto& light_config = config.light_src[light_index]; - std::string light_src = "light_src[" + std::to_string(num) + "]"; + for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { + const auto& light_config = config.lighting.light[light_index]; + std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; // Compute light vector (directional or positional) if (light_config.directional) @@ -394,12 +393,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; - if (light_config.dist_atten_enabled) { + if (light_config.dist_atten_enable) { std::string scale = std::to_string(light_config.dist_atten_scale); std::string bias = std::to_string(light_config.dist_atten_bias); std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))"; - const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + num); + const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index); } @@ -407,11 +406,14 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; // If enabled, clamp specular component if lighting result is negative - std::string clamp_highlights = config.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; - // Lookup specular distribution 0 LUT value - std::string d0_lut_index = GetLutIndex(num, config.lighting_lut.d0_type, config.lighting_lut.d0_abs); - std::string d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index); + // Lookup specular "distribution 0" LUT value + std::string d0_lut_value = "1.0"; + if (config.lighting.lut_d0.enable) { + std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); + d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index); + } // Compute secondary fragment color (specular lighting) function out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n"; @@ -463,15 +465,15 @@ vec4 primary_fragment_color = vec4(0.0); vec4 secondary_fragment_color = vec4(0.0); )"; - if (config.lighting_enabled) - WriteLighting(out, config); - // Do not do any sort of processing if it's obvious we're not going to pass the alpha test if (config.alpha_test_func == Regs::CompareFunc::Never) { out += "discard; }"; return out; } + if (config.lighting.enable) + WriteLighting(out, config); + out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; From 781b0465795fb80404e2790be2d10bfb1f7149aa Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 19 Nov 2015 19:00:42 -0500 Subject: [PATCH 17/32] gl_shader_gen: Add support for D0 LUT scaling. --- src/video_core/pica.h | 68 ++++++++++++++++++- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/gl_shader_gen.cpp | 2 +- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b1cf072f1..5d27da5d1 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -657,6 +657,44 @@ struct Regs { DistanceAttenuation = 16, }; + /** + * Pica fragment lighting supports using different LUTs for each lighting component: + * Reflectance R, G, and B channels, distribution function for specular components 0 and 1, + * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel + * (or whether a channel is enabled at all) is specified by various pre-defined lighting + * configurations. With configurations that require more LUTs, more cycles are required on HW to + * perform lighting computations. + */ + enum class LightingConfig { + Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight + Config1 = 1, ///< Reflect Red, Fresnel, Spotlight + Config2 = 2, ///< Reflect Red, Distribution 0/1 + Config3 = 3, ///< Distribution 0/1, Fresnel + Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight + Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight + Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight + Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight + ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration + }; + + /// Selects which lighting components are affected by fresnel + enum class LightingFresnelSelector { + None = 0, ///< Fresnel is disabled + PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel + SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel + Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel + }; + + /// Factor used to scale the output of a lighting LUT + enum class LightingScale { + Scale1 = 0, ///< Scale is 1x + Scale2 = 1, ///< Scale is 2x + Scale4 = 2, ///< Scale is 4x + Scale8 = 3, ///< Scale is 8x + Scale1_4 = 6, ///< Scale is 0.25x + Scale1_2 = 7, ///< Scale is 0.5x + }; + enum class LightingLutInput { NH = 0, // Cosine of the angle between the normal and half-angle vectors VH = 1, // Cosine of the angle between the view and half-angle vectors @@ -775,7 +813,35 @@ struct Regs { BitField<24, 3, u32> rr; } lut_input; - INSERT_PADDING_WORDS(0x7); + union { + BitField< 0, 3, LightingScale> d0; + BitField< 4, 3, LightingScale> d1; + BitField< 8, 3, LightingScale> sp; + BitField<12, 3, LightingScale> fr; + BitField<16, 3, LightingScale> rb; + BitField<20, 3, LightingScale> rg; + BitField<24, 3, LightingScale> rr; + + static float GetScale(LightingScale scale) { + switch (scale) { + case LightingScale::Scale1: + return 1.0f; + case LightingScale::Scale2: + return 2.0f; + case LightingScale::Scale4: + return 4.0f; + case LightingScale::Scale8: + return 8.0f; + case LightingScale::Scale1_4: + return 0.25f; + case LightingScale::Scale1_2: + return 0.5f; + } + return 0.0f; + } + } lut_scale; + + INSERT_PADDING_WORDS(0x6); union { // There are 8 light enable "slots", corresponding to the total number of lights diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2042be786..72ded8f22 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -90,6 +90,7 @@ struct PicaShaderConfig { res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0; res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0; res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); + res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; @@ -130,7 +131,8 @@ struct PicaShaderConfig { bool enable = false; bool abs_input = false; Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; - } lut_d0; + float scale = 1.0f; + } lut_d0, lut_d1, lut_fr; } lighting; }; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index a2770cc6e..9044a3813 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -412,7 +412,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { std::string d0_lut_value = "1.0"; if (config.lighting.lut_d0.enable) { std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); - d0_lut_value = GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index); + d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")"; } // Compute secondary fragment color (specular lighting) function From 0e67c21c9e5bb0e213d3b13bdd7592ff2a44a31c Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 19 Nov 2015 22:42:06 -0500 Subject: [PATCH 18/32] gl_shader_gen: Implement fragment lighting specular 1 component. --- src/video_core/pica.h | 27 +++++++++++++------ .../renderer_opengl/gl_rasterizer.h | 8 ++++++ .../renderer_opengl/gl_shader_gen.cpp | 17 +++++++++--- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5d27da5d1..83af6a127 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -713,6 +713,16 @@ struct Regs { } }; + static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { + switch (sampler) { + case LightingSampler::Distribution0: + return (config != LightingConfig::Config1); + case LightingSampler::Distribution1: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + } + return false; + } + struct { struct LightSrc { LightColor specular_0; // material.specular_0 * light.specular_0 @@ -751,12 +761,13 @@ struct Regs { BitField<0, 3, u32> src_num; // number of enabled lights - 1 union { - BitField< 4, 4, u32> config; + BitField< 4, 4, LightingConfig> config; BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE }; union { BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE + BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE // Each bit specifies whether distance attenuation should be applied for the // corresponding light @@ -804,13 +815,13 @@ struct Regs { } abs_lut_input; union { - BitField< 0, 3, u32> d0; - BitField< 4, 3, u32> d1; - BitField< 8, 3, u32> sp; - BitField<12, 3, u32> fr; - BitField<16, 3, u32> rb; - BitField<20, 3, u32> rg; - BitField<24, 3, u32> rr; + BitField< 0, 3, LightingLutInput> d0; + BitField< 4, 3, LightingLutInput> d1; + BitField< 8, 3, LightingLutInput> sp; + BitField<12, 3, LightingLutInput> fr; + BitField<16, 3, LightingLutInput> rb; + BitField<20, 3, LightingLutInput> rg; + BitField<24, 3, LightingLutInput> rr; } lut_input; union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 72ded8f22..788618ed2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -91,6 +91,13 @@ struct PicaShaderConfig { res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0; res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0; + res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0; + res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value(); + res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + res.lighting.config = regs.lighting.config; res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; @@ -126,6 +133,7 @@ struct PicaShaderConfig { bool enable = false; unsigned src_num = 0; bool clamp_highlights = false; + Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; struct { bool enable = false; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9044a3813..4f8b675bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -408,15 +408,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // If enabled, clamp specular component if lighting result is negative std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; - // Lookup specular "distribution 0" LUT value + // Specular 0 component std::string d0_lut_value = "1.0"; - if (config.lighting.lut_d0.enable) { + if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")"; } + std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + std::string d1_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, d1_lut_index) + ")"; + } + std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)"; // Compute secondary fragment color (specular lighting) function - out += "specular_sum += " + clamp_highlights + " * " + d0_lut_value + " * " + light_src + ".specular_0 * " + dist_atten + ";\n"; + out += "specular_sum += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; } // Sum final lighting result From c37de30cfc21cd6d742eed27a996a273f5ec2ca1 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 23 Nov 2015 20:26:09 -0500 Subject: [PATCH 19/32] gl_shader_gen: Implement fragment lighting fresnel effect. --- src/video_core/pica.h | 5 +++ .../renderer_opengl/gl_rasterizer.h | 7 ++++ .../renderer_opengl/gl_shader_gen.cpp | 35 ++++++++++++++----- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 83af6a127..76db51038 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -713,12 +713,15 @@ struct Regs { } }; + /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { switch (sampler) { case LightingSampler::Distribution0: return (config != LightingConfig::Config1); case LightingSampler::Distribution1: return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + case LightingSampler::Fresnel: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); } return false; } @@ -761,6 +764,7 @@ struct Regs { BitField<0, 3, u32> src_num; // number of enabled lights - 1 union { + BitField< 2, 2, LightingFresnelSelector> fresnel_selector; BitField< 4, 4, LightingConfig> config; BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE }; @@ -768,6 +772,7 @@ struct Regs { union { BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE + BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE // Each bit specifies whether distance attenuation should be applied for the // corresponding light diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 788618ed2..1d4d73ae1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -97,7 +97,13 @@ struct PicaShaderConfig { res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value(); res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0; + res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0; + res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value(); + res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + res.lighting.config = regs.lighting.config; + res.lighting.fresnel_selector = regs.lighting.fresnel_selector; res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; @@ -134,6 +140,7 @@ struct PicaShaderConfig { unsigned src_num = 0; bool clamp_highlights = false; Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; + Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; struct { bool enable = false; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 4f8b675bf..6487172b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -321,8 +321,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Define lighting globals - out += "vec3 diffuse_sum = vec3(0.0);\n"; - out += "vec3 specular_sum = vec3(0.0);\n"; + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"; + out += "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"; out += "vec3 light_vector = vec3(0.0);\n"; // Convert interpolated quaternion to a GL fragment normal @@ -402,9 +402,6 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index); } - // Compute primary fragment color (diffuse lighting) function - out += "diffuse_sum += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; - // If enabled, clamp specular component if lighting result is negative std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; @@ -426,14 +423,34 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)"; + // Fresnel + if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string fr_lut_index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); + std::string fr_lut_value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, fr_lut_index) + ")"; + + // Enabled for difffuse lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha) + out += "diffuse_sum.a *= " + fr_lut_value + ";\n"; + + // Enabled for the specular lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha) + out += "specular_sum.a *= " + fr_lut_value + ";\n"; + } + + // Compute primary fragment color (diffuse lighting) function + out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; + // Compute secondary fragment color (specular lighting) function - out += "specular_sum += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; + out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; } // Sum final lighting result - out += "diffuse_sum += lighting_global_ambient;\n"; - out += "primary_fragment_color = vec4(clamp(diffuse_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; - out += "secondary_fragment_color = vec4(clamp(specular_sum, vec3(0.0), vec3(1.0)), 1.0);\n"; + out += "diffuse_sum.rgb += lighting_global_ambient;\n"; + out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"; + out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; } std::string GenerateFragmentShader(const PicaShaderConfig& config) { From 01b407638cdee1b2435018d730a698aa1c65d6a4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 24 Nov 2015 22:59:14 -0500 Subject: [PATCH 20/32] gl_shader_gen: View should be normalized. --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 6487172b4..4f87c5846 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -341,11 +341,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { break; case Regs::LightingLutInput::VH: - index = std::string("dot(view, " + half_angle + ")"); + index = std::string("dot(normalize(view), " + half_angle + ")"); break; case Regs::LightingLutInput::NV: - index = std::string("dot(normal, view)"); + index = std::string("dot(normal, normalize(view))"); break; case Regs::LightingLutInput::LN: From 348c9c9ff32a493a005da72dba1832da118e4b0b Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 25 Nov 2015 20:25:02 -0500 Subject: [PATCH 21/32] gl_shader_gen: Implement lighting red, green, and blue reflection. --- src/video_core/pica.h | 18 +++++- .../renderer_opengl/gl_rasterizer.h | 18 +++++- .../renderer_opengl/gl_shader_gen.cpp | 62 ++++++++++++++----- 3 files changed, 77 insertions(+), 21 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 76db51038..267070e45 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -650,9 +650,9 @@ struct Regs { Distribution0 = 0, Distribution1 = 1, Fresnel = 3, - Blue = 4, - Green = 5, - Red = 6, + ReflectBlue = 4, + ReflectGreen = 5, + ReflectRed = 6, SpotlightAttenuation = 8, DistanceAttenuation = 16, }; @@ -718,10 +718,19 @@ struct Regs { switch (sampler) { case LightingSampler::Distribution0: return (config != LightingConfig::Config1); + case LightingSampler::Distribution1: return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + case LightingSampler::Fresnel: return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); + + case LightingSampler::ReflectRed: + return (config != LightingConfig::Config3); + + case LightingSampler::ReflectGreen: + case LightingSampler::ReflectBlue: + return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); } return false; } @@ -773,6 +782,9 @@ struct Regs { BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE + BitField<20, 1, u32> lut_enable_rr; // 0: GL_TRUE, 1: GL_FALSE + BitField<21, 1, u32> lut_enable_rg; // 0: GL_TRUE, 1: GL_FALSE + BitField<22, 1, u32> lut_enable_rb; // 0: GL_TRUE, 1: GL_FALSE // Each bit specifies whether distance attenuation should be applied for the // corresponding light diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1d4d73ae1..62a4d8953 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -102,6 +102,21 @@ struct PicaShaderConfig { res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value(); res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0; + res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0; + res.lighting.lut_rr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rr.Value(); + res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0; + res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0; + res.lighting.lut_rg.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rg.Value(); + res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0; + res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0; + res.lighting.lut_rb.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rb.Value(); + res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + res.lighting.config = regs.lighting.config; res.lighting.fresnel_selector = regs.lighting.fresnel_selector; res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; @@ -139,6 +154,7 @@ struct PicaShaderConfig { bool enable = false; unsigned src_num = 0; bool clamp_highlights = false; + Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; @@ -147,7 +163,7 @@ struct PicaShaderConfig { bool abs_input = false; Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; float scale = 1.0f; - } lut_d0, lut_d1, lut_fr; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; } lighting; }; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 4f87c5846..984aef586 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -321,9 +321,10 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Define lighting globals - out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"; - out += "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"; - out += "vec3 light_vector = vec3(0.0);\n"; + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "vec3 refl_value = vec3(0.0);\n"; // Convert interpolated quaternion to a GL fragment normal out += "vec3 normal = normalize(vec3(\n"; @@ -396,10 +397,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (light_config.dist_atten_enable) { std::string scale = std::to_string(light_config.dist_atten_scale); std::string bias = std::to_string(light_config.dist_atten_bias); - std::string lut_index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; - lut_index = "((clamp(" + lut_index + ", 0.0, FLOAT_255)))"; + std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; + index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); - dist_atten = GetLutValue((Regs::LightingSampler)lut_num, lut_index); + dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); } // If enabled, clamp specular component if lighting result is negative @@ -409,35 +410,62 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { std::string d0_lut_value = "1.0"; if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string d0_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, d0_lut_index) + ")"; + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + out += "refl_value.r = " + value + ";\n"; + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + out += "refl_value.g = " + value + ";\n"; + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + out += "refl_value.b = " + value + ";\n"; + } else { + out += "refl_value.b = refl_value.r;\n"; + } + // Specular 1 component std::string d1_lut_value = "1.0"; if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string d1_lut_index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, d1_lut_index) + ")"; + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; } - std::string specular_1 = "(" + d1_lut_value + " * " + light_src + ".specular_1)"; + std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; // Fresnel if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string fr_lut_index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); - std::string fr_lut_value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, fr_lut_index) + ")"; + std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; // Enabled for difffuse lighting alpha component if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha) - out += "diffuse_sum.a *= " + fr_lut_value + ";\n"; + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "diffuse_sum.a *= " + value + ";\n"; // Enabled for the specular lighting alpha component if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::BothAlpha) - out += "specular_sum.a *= " + fr_lut_value + ";\n"; + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "specular_sum.a *= " + value + ";\n"; } // Compute primary fragment color (diffuse lighting) function From 449902b5583d6a2dbb1e4aea9802da5ad2493981 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 25 Nov 2015 20:30:27 -0500 Subject: [PATCH 22/32] gl_shader_gen: Fix bug in LUT range (should within range [0, 255] not [0, 256]). --- src/video_core/renderer_opengl/gl_shader_gen.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 984aef586..d59f2054b 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -362,11 +362,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; - return "clamp(" + index + ", 0.0, FLOAT_255)"; + return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; } else { // LUT index is in the range of (-1.0, 1.0) index = "clamp(" + index + ", -1.0, 1.0)"; - return "clamp(((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0, 0.0, FLOAT_255)"; + return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; } return std::string(); @@ -487,7 +487,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 #define LIGHTING_LUT_SIZE 256 -#define FLOAT_255 0.99609375 +#define FLOAT_255 (255.0 / 256.0) in vec4 primary_color; in vec2 texcoord[3]; From 9dfb223d26a7d700e38a4c0eec9d32d78c42f91d Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 25 Nov 2015 20:49:48 -0500 Subject: [PATCH 23/32] gl_rasterizer: Initial implementation of bump mapping. --- src/video_core/pica.h | 9 ++++++ .../renderer_opengl/gl_rasterizer.h | 6 ++++ .../renderer_opengl/gl_shader_gen.cpp | 32 ++++++++++++++++--- 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 267070e45..809b16d2b 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -702,6 +702,12 @@ struct Regs { LN = 3, // Cosine of the angle between the light and the normal vectors }; + enum class LightingBumpMode : u32 { + None = 0, + NormalMap = 1, + TangentMap = 2, + }; + union LightColor { BitField< 0, 10, u32> b; BitField<10, 10, u32> g; @@ -775,7 +781,10 @@ struct Regs { union { BitField< 2, 2, LightingFresnelSelector> fresnel_selector; BitField< 4, 4, LightingConfig> config; + BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE + BitField<28, 2, LightingBumpMode> bump_mode; // 1: GL_TRUE, 0: GL_FALSE + BitField<30, 1, u32> bump_renorm; // 0: GL_TRUE, 1: GL_FALSE }; union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 62a4d8953..d7eac5213 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -119,6 +119,9 @@ struct PicaShaderConfig { res.lighting.config = regs.lighting.config; res.lighting.fresnel_selector = regs.lighting.fresnel_selector; + res.lighting.bump_mode = regs.lighting.bump_mode; + res.lighting.bump_selector = regs.lighting.bump_selector; + res.lighting.bump_renorm = regs.lighting.bump_renorm == 0; res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; @@ -153,6 +156,9 @@ struct PicaShaderConfig { bool enable = false; unsigned src_num = 0; + Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; + unsigned bump_selector = 0; + bool bump_renorm = false; bool clamp_highlights = false; Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index d59f2054b..ee4b54ab9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -326,11 +326,28 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "vec3 light_vector = vec3(0.0);\n" "vec3 refl_value = vec3(0.0);\n"; - // Convert interpolated quaternion to a GL fragment normal - out += "vec3 normal = normalize(vec3(\n"; - out += " 2.f*(normquat.x*normquat.z + normquat.y*normquat.w),\n"; - out += " 2.f*(normquat.y*normquat.z + normquat.x*normquat.w),\n"; - out += " 1.f - 2.f*(normquat.x*normquat.x + normquat.y*normquat.y)));\n"; + // Compute fragment normals + if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture + std::string bump_selector = std::to_string(config.lighting.bump_selector); + out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result + if (config.lighting.bump_renorm) { + std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; + } + } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + // Bump mapping is enabled using a tangent map + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); + UNIMPLEMENTED(); + } else { + // No bump mapping - surface local normal is just a unit normal + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace + out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; // Gets the index into the specified lookup table for specular lighting auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { @@ -516,6 +533,11 @@ layout (std140) uniform shader_data { uniform sampler2D tex[3]; uniform sampler1D lut[6]; +// Rotate the vector v by the quaternion q +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} + void main() { vec4 primary_fragment_color = vec4(0.0); vec4 secondary_fragment_color = vec4(0.0); From c229503f4a01b390f348d9f6c742921e7fc1ed48 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 14 Dec 2015 21:14:54 -0500 Subject: [PATCH 24/32] gl_rasterizer: Fix PicaShaderConfig on GCC. --- .../renderer_opengl/gl_rasterizer.h | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d7eac5213..111448b70 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -139,39 +139,37 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; - struct { - Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; - std::array tev_stages = {}; - u8 combiner_buffer_input = 0; + Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; + std::array tev_stages = {}; + u8 combiner_buffer_input = 0; + struct { struct { - struct { - unsigned num = 0; - bool directional = false; - bool two_sided_diffuse = false; - bool dist_atten_enable = false; - GLfloat dist_atten_scale = 0.0f; - GLfloat dist_atten_bias = 0.0f; - } light[8]; + unsigned num = 0; + bool directional = false; + bool two_sided_diffuse = false; + bool dist_atten_enable = false; + GLfloat dist_atten_scale = 0.0f; + GLfloat dist_atten_bias = 0.0f; + } light[8]; + + bool enable = false; + unsigned src_num = 0; + Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; + unsigned bump_selector = 0; + bool bump_renorm = false; + bool clamp_highlights = false; + + Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; + Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + struct { bool enable = false; - unsigned src_num = 0; - Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; - unsigned bump_selector = 0; - bool bump_renorm = false; - bool clamp_highlights = false; - - Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; - Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; - - struct { - bool enable = false; - bool abs_input = false; - Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; - float scale = 1.0f; - } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; - }; + bool abs_input = false; + Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; + float scale = 1.0f; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; }; namespace std { From 310a1c30ca430013621df77cf3e6a1a6d4513b98 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 14 Dec 2015 22:14:29 -0500 Subject: [PATCH 25/32] gl_rasterizer: Remove unnecessary casts. --- src/video_core/renderer_opengl/gl_rasterizer.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 111448b70..4e681f9ea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -89,32 +89,32 @@ struct PicaShaderConfig { res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0; res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0; - res.lighting.lut_d0.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d0.Value(); + res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0; res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0; - res.lighting.lut_d1.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.d1.Value(); + res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0; res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0; - res.lighting.lut_fr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.fr.Value(); + res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0; res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0; - res.lighting.lut_rr.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rr.Value(); + res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0; res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0; - res.lighting.lut_rg.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rg.Value(); + res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0; res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0; - res.lighting.lut_rb.type = (Pica::Regs::LightingLutInput)regs.lighting.lut_input.rb.Value(); + res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); res.lighting.config = regs.lighting.config; From d171822dcecc7b234d63147270d21307605a6347 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 16 Dec 2015 18:49:20 -0500 Subject: [PATCH 26/32] command_processor: Add an assertion to ensure LUTs are not written past their boundaries. --- src/video_core/command_processor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 7409534b6..6540ccb26 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -474,6 +474,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; + + ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); + g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; lut_config.index = lut_config.index + 1; break; From a949fd5f2560b94dc8e8571497d0cfbebdb6bed7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 16 Dec 2015 23:23:50 -0500 Subject: [PATCH 27/32] pica_types: Replace float24/20/16 with a template class. --- src/video_core/clipper.cpp | 8 +- src/video_core/command_processor.cpp | 16 +- src/video_core/pica_types.h | 156 +++++++----------- .../renderer_opengl/gl_rasterizer.cpp | 14 +- .../renderer_opengl/gl_rasterizer.h | 4 +- 5 files changed, 82 insertions(+), 116 deletions(-) diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 3a09d62f4..a385589d2 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -59,12 +59,12 @@ static void InitScreenCoordinates(OutputVertex& vtx) } viewport; const auto& regs = g_state.regs; - viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); - viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); + viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); + viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast(regs.viewport_corner.y)); - viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); - viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); + viewport.zscale = float24::FromRaw(regs.viewport_depth_range); + viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 6540ccb26..5dfedfe31 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Math::Vec4& attribute = g_state.vs.default_attributes[setup.index]; // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), @@ -418,10 +418,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); } else { // TODO: Untested - uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); } LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index a34421c5d..53f61f287 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -4,35 +4,51 @@ #pragma once +#include + #include "common/common_types.h" namespace Pica { -struct float24 { - static float24 FromFloat32(float val) { - float24 ret; +/** + * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision + * floating point. + * + * When decoding, format is as follows: + * - The first `M` bits are the mantissa + * - The next `E` bits are the exponent + * - The last bit is the sign bit + * + * @todo Verify on HW if this conversion is sufficently accurate. + */ +template +struct Float { +public: + static Float FromFloat32(float val) { + Float ret; ret.value = val; return ret; } - // 16 bit mantissa, 7 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float24 FromRawFloat24(u32 hex) { - float24 ret; - if ((hex & 0xFFFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0xFFFF; - u32 exponent = (hex >> 16) & 0x7F; - u32 sign = hex >> 23; - ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); - if (sign) - ret.value = -ret.value; - } - return ret; + static Float FromRaw(u32 hex) { + Float res; + + const int width = M + E + 1; + const int bias = 128 - (1 << (E - 1)); + const int exponent = (hex >> M) & ((1 << E) - 1); + const unsigned mantissa = hex & ((1 << M) - 1); + + if (hex & ((1 << (width - 1)) - 1)) + hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23); + else + hex = ((hex >> (E + M)) << 31); + + std::memcpy(&res.value, &hex, sizeof(float)); + + return res; } - static float24 Zero() { + static Float Zero() { return FromFloat32(0.f); } @@ -41,27 +57,27 @@ struct float24 { return value; } - float24 operator * (const float24& flt) const { + Float operator * (const Float& flt) const { if ((this->value == 0.f && !std::isnan(flt.value)) || (flt.value == 0.f && !std::isnan(this->value))) // PICA gives 0 instead of NaN when multiplying by inf return Zero(); - return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); + return Float::FromFloat32(ToFloat32() * flt.ToFloat32()); } - float24 operator / (const float24& flt) const { - return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); + Float operator / (const Float& flt) const { + return Float::FromFloat32(ToFloat32() / flt.ToFloat32()); } - float24 operator + (const float24& flt) const { - return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); + Float operator + (const Float& flt) const { + return Float::FromFloat32(ToFloat32() + flt.ToFloat32()); } - float24 operator - (const float24& flt) const { - return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); + Float operator - (const Float& flt) const { + return Float::FromFloat32(ToFloat32() - flt.ToFloat32()); } - float24& operator *= (const float24& flt) { + Float& operator *= (const Float& flt) { if ((this->value == 0.f && !std::isnan(flt.value)) || (flt.value == 0.f && !std::isnan(this->value))) // PICA gives 0 instead of NaN when multiplying by inf @@ -70,111 +86,61 @@ struct float24 { return *this; } - float24& operator /= (const float24& flt) { + Float& operator /= (const Float& flt) { value /= flt.ToFloat32(); return *this; } - float24& operator += (const float24& flt) { + Float& operator += (const Float& flt) { value += flt.ToFloat32(); return *this; } - float24& operator -= (const float24& flt) { + Float& operator -= (const Float& flt) { value -= flt.ToFloat32(); return *this; } - float24 operator - () const { - return float24::FromFloat32(-ToFloat32()); + Float operator - () const { + return Float::FromFloat32(-ToFloat32()); } - bool operator < (const float24& flt) const { + bool operator < (const Float& flt) const { return ToFloat32() < flt.ToFloat32(); } - bool operator > (const float24& flt) const { + bool operator > (const Float& flt) const { return ToFloat32() > flt.ToFloat32(); } - bool operator >= (const float24& flt) const { + bool operator >= (const Float& flt) const { return ToFloat32() >= flt.ToFloat32(); } - bool operator <= (const float24& flt) const { + bool operator <= (const Float& flt) const { return ToFloat32() <= flt.ToFloat32(); } - bool operator == (const float24& flt) const { + bool operator == (const Float& flt) const { return ToFloat32() == flt.ToFloat32(); } - bool operator != (const float24& flt) const { + bool operator != (const Float& flt) const { return ToFloat32() != flt.ToFloat32(); } private: - // Stored as a regular float, merely for convenience - // TODO: Perform proper arithmetic on this! - float value; -}; - -static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); - -struct float16 { - // 10 bit mantissa, 5 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float16 FromRawFloat16(u32 hex) { - float16 ret; - if ((hex & 0xFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0x3FF; - u32 exponent = (hex >> 10) & 0x1F; - u32 sign = (hex >> 15) & 1; - ret.value = std::pow(2.0f, (float)exponent - 15.0f) * (1.0f + mantissa * std::pow(2.0f, -10.f)); - if (sign) - ret.value = -ret.value; - } - return ret; - } - - float ToFloat32() const { - return value; - } + static const unsigned MASK = (1 << (M + E + 1)) - 1; + static const unsigned MANTISSA_MASK = (1 << M) - 1; + static const unsigned EXPONENT_MASK = (1 << E) - 1; -private: // Stored as a regular float, merely for convenience // TODO: Perform proper arithmetic on this! float value; }; -struct float20 { - // 12 bit mantissa, 7 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float20 FromRawFloat20(u32 hex) { - float20 ret; - if ((hex & 0xFFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0xFFF; - u32 exponent = (hex >> 12) & 0x7F; - u32 sign = (hex >> 19) & 1; - ret.value = std::pow(2.0f, (float)exponent - 63.0f) * (1.0f + mantissa * std::pow(2.0f, -12.f)); - if (sign) - ret.value = -ret.value; - } - return ret; - } - - float ToFloat32() const { - return value; - } - -private: - // Stored as a regular float, merely for convenience - // TODO: Perform proper arithmetic on this! - float value; -}; +using float24 = Float<16, 7>; +using float20 = Float<12, 7>; +using float16 = Float<10, 5>; } // namespace Pica diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6e7d6a40d..d70d62ede 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -810,8 +810,8 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncDepthModifiers() { - float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; // TODO: Implement scale modifier uniform_block_data.data.depth_offset = depth_offset; @@ -948,9 +948,9 @@ void RasterizerOpenGL::SyncLightAmbient(int light_index) { void RasterizerOpenGL::SyncLightPosition(int light_index) { std::array position = { - Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), - Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), - Pica::float16::FromRawFloat16(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; if (position != uniform_block_data.data.light_src[light_index].position) { uniform_block_data.data.light_src[light_index].position = position; @@ -962,8 +962,8 @@ void RasterizerOpenGL::SyncDrawState() { const auto& regs = Pica::g_state.regs; // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; + GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; // OpenGL uses different y coordinates, so negate corner offset and flip origin // TODO: Ensure viewport_corner.x should not be negated or origin flipped diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4e681f9ea..b9c1d61bd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -83,8 +83,8 @@ struct PicaShaderConfig { res.lighting.light[light_index].directional = light.directional != 0; res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num); - res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRawFloat20(light.dist_atten_bias).ToFloat32(); - res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRawFloat20(light.dist_atten_scale).ToFloat32(); + res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0; From b694423d09b618f245306d069b60cec44958565d Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 28 Dec 2015 21:03:53 -0500 Subject: [PATCH 28/32] pica_types: Fix typo in docstring. --- src/video_core/pica_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h index 53f61f287..ecf45654b 100644 --- a/src/video_core/pica_types.h +++ b/src/video_core/pica_types.h @@ -19,7 +19,7 @@ namespace Pica { * - The next `E` bits are the exponent * - The last bit is the sign bit * - * @todo Verify on HW if this conversion is sufficently accurate. + * @todo Verify on HW if this conversion is sufficiently accurate. */ template struct Float { From 8e9318f20a6fbcd511cf0f1b06b041ea1663467f Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 28 Jan 2016 23:29:33 -0500 Subject: [PATCH 29/32] gl_rasterizer: Fix issue with interpolation of opposite quaternions. --- .../renderer_opengl/gl_rasterizer.cpp | 28 +++++++++++++++++-- .../renderer_opengl/gl_rasterizer.h | 8 +++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d70d62ede..6ed67efeb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -158,12 +158,34 @@ void RasterizerOpenGL::Reset() { res_cache.InvalidateAll(); } +/** + * This is a helper function to resolve an issue with opposite quaternions being interpolated by + * OpenGL. See below for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore + * making Dot(-Q1, W2) positive. + * + * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This + * should be correct for nearly all cases, however a more correct implementation (but less trivial + * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions + * manually using two Lerps, and doing this correction before each Lerp. + */ +static bool AreQuaternionsOpposite(Math::Vec4 qa, Math::Vec4 qb) { + Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; + Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + + return (Math::Dot(a, b) < 0.f); +} + void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0); - vertex_batch.emplace_back(v1); - vertex_batch.emplace_back(v2); + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); } void RasterizerOpenGL::DrawTriangles() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b9c1d61bd..99266854c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -248,7 +248,7 @@ private: /// Structure that the hardware rendered vertices are composed of struct HardwareVertex { - HardwareVertex(const Pica::Shader::OutputVertex& v) { + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); position[2] = v.pos.z.ToFloat32(); @@ -270,6 +270,12 @@ private: view[0] = v.view.x.ToFloat32(); view[1] = v.view.y.ToFloat32(); view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + for (float& x : normquat) { + x = -x; + } + } } GLfloat position[4]; From aaa7beeda8be312294a32e620a172c33cb231866 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 4 Feb 2016 00:03:20 -0500 Subject: [PATCH 30/32] renderer_opengl: Use GLvec3/GLvec4 aliases for commonly used types. --- .../renderer_opengl/gl_rasterizer.cpp | 4 ++-- .../renderer_opengl/gl_rasterizer.h | 19 ++++++++++--------- src/video_core/renderer_opengl/pica_to_gl.h | 5 ++++- .../renderer_opengl/renderer_opengl.cpp | 4 ++-- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6ed67efeb..b7d19bf94 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -920,7 +920,7 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - std::array, 256> new_data; + std::array new_data; for (unsigned offset = 0; offset < new_data.size(); ++offset) { new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); @@ -969,7 +969,7 @@ void RasterizerOpenGL::SyncLightAmbient(int light_index) { } void RasterizerOpenGL::SyncLightPosition(int light_index) { - std::array position = { + GLvec3 position = { Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 99266854c..e7fec30cf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,6 +17,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/shader/shader_interpreter.h" /** @@ -288,27 +289,27 @@ private: }; struct LightSrc { - std::array specular_0; + GLvec3 specular_0; INSERT_PADDING_WORDS(1); - std::array specular_1; + GLvec3 specular_1; INSERT_PADDING_WORDS(1); - std::array diffuse; + GLvec3 diffuse; INSERT_PADDING_WORDS(1); - std::array ambient; + GLvec3 ambient; INSERT_PADDING_WORDS(1); - std::array position; + GLvec3 position; INSERT_PADDING_WORDS(1); }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned struct UniformData { // A vec4 color for each of the six tev stages - std::array const_color[6]; - std::array tev_combiner_buffer_color; + GLvec4 const_color[6]; + GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; GLfloat depth_offset; INSERT_PADDING_WORDS(2); - std::array lighting_global_ambient; + GLvec3 lighting_global_ambient; INSERT_PADDING_WORDS(1); LightSrc light_src[8]; }; @@ -434,5 +435,5 @@ private: OGLFramebuffer framebuffer; std::array lighting_lut; - std::array, 256>, 6> lighting_lut_data; + std::array, 6> lighting_lut_data; }; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 346c9391d..3d6c4e9e5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -10,6 +10,9 @@ #include "video_core/pica.h" +using GLvec3 = std::array; +using GLvec4 = std::array; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { @@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { return stencil_op_table[(unsigned)action]; } -inline std::array ColorRGBA8(const u32 color) { +inline GLvec4 ColorRGBA8(const u32 color) { return { { (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a6a38f0af..ca3a6a6b4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -81,8 +81,8 @@ struct ScreenRectVertex { * The projection part of the matrix is trivial, hence these operations are represented * by a 3x2 matrix. */ -static std::array MakeOrthographicMatrix(const float width, const float height) { - std::array matrix; +static std::array MakeOrthographicMatrix(const float width, const float height) { + std::array matrix; matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; From c4d318f6915702e09866442f78d78747251779cb Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 4 Feb 2016 00:13:17 -0500 Subject: [PATCH 31/32] gl_rasterizer: Use alignas(16) instead of explicit padding. --- .../renderer_opengl/gl_rasterizer.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e7fec30cf..208a7bcb6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -289,16 +289,11 @@ private: }; struct LightSrc { - GLvec3 specular_0; - INSERT_PADDING_WORDS(1); - GLvec3 specular_1; - INSERT_PADDING_WORDS(1); - GLvec3 diffuse; - INSERT_PADDING_WORDS(1); - GLvec3 ambient; - INSERT_PADDING_WORDS(1); - GLvec3 position; - INSERT_PADDING_WORDS(1); + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned @@ -308,9 +303,7 @@ private: GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; GLfloat depth_offset; - INSERT_PADDING_WORDS(2); - GLvec3 lighting_global_ambient; - INSERT_PADDING_WORDS(1); + alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; }; From 19557aaab3434a9a6e9b6730a76923de053084cd Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 4 Feb 2016 21:51:56 -0500 Subject: [PATCH 32/32] pica: Cleanup lighting register definitions and documentation. --- src/video_core/pica.h | 69 ++++++++++--------- .../renderer_opengl/gl_rasterizer.h | 30 ++++---- 2 files changed, 51 insertions(+), 48 deletions(-) diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 809b16d2b..9077b1725 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -762,7 +762,7 @@ struct Regs { union { BitField<0, 1, u32> directional; - BitField<1, 1, u32> two_sided_diffuse; // 1: GL_TRUE, 0: GL_FALSE; when disabled, clamp dot-product to 0 + BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 }; }; @@ -774,46 +774,46 @@ struct Regs { static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); LightSrc light[8]; - LightColor global_ambient; // emission + (material.ambient * lighting.ambient) + LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) INSERT_PADDING_WORDS(0x1); - BitField<0, 3, u32> src_num; // number of enabled lights - 1 + BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 union { BitField< 2, 2, LightingFresnelSelector> fresnel_selector; BitField< 4, 4, LightingConfig> config; BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 - BitField<27, 1, u32> clamp_highlights; // 1: GL_TRUE, 0: GL_FALSE - BitField<28, 2, LightingBumpMode> bump_mode; // 1: GL_TRUE, 0: GL_FALSE - BitField<30, 1, u32> bump_renorm; // 0: GL_TRUE, 1: GL_FALSE + BitField<27, 1, u32> clamp_highlights; + BitField<28, 2, LightingBumpMode> bump_mode; + BitField<30, 1, u32> disable_bump_renorm; }; union { - BitField<16, 1, u32> lut_enable_d0; // 0: GL_TRUE, 1: GL_FALSE - BitField<17, 1, u32> lut_enable_d1; // 0: GL_TRUE, 1: GL_FALSE - BitField<19, 1, u32> lut_enable_fr; // 0: GL_TRUE, 1: GL_FALSE - BitField<20, 1, u32> lut_enable_rr; // 0: GL_TRUE, 1: GL_FALSE - BitField<21, 1, u32> lut_enable_rg; // 0: GL_TRUE, 1: GL_FALSE - BitField<22, 1, u32> lut_enable_rb; // 0: GL_TRUE, 1: GL_FALSE + BitField<16, 1, u32> disable_lut_d0; + BitField<17, 1, u32> disable_lut_d1; + BitField<19, 1, u32> disable_lut_fr; + BitField<20, 1, u32> disable_lut_rr; + BitField<21, 1, u32> disable_lut_rg; + BitField<22, 1, u32> disable_lut_rb; // Each bit specifies whether distance attenuation should be applied for the // corresponding light - BitField<24, 1, u32> dist_atten_enable_light_0; // 0: GL_TRUE, 1: GL_FALSE - BitField<25, 1, u32> dist_atten_enable_light_1; // 0: GL_TRUE, 1: GL_FALSE - BitField<26, 1, u32> dist_atten_enable_light_2; // 0: GL_TRUE, 1: GL_FALSE - BitField<27, 1, u32> dist_atten_enable_light_3; // 0: GL_TRUE, 1: GL_FALSE - BitField<28, 1, u32> dist_atten_enable_light_4; // 0: GL_TRUE, 1: GL_FALSE - BitField<29, 1, u32> dist_atten_enable_light_5; // 0: GL_TRUE, 1: GL_FALSE - BitField<30, 1, u32> dist_atten_enable_light_6; // 0: GL_TRUE, 1: GL_FALSE - BitField<31, 1, u32> dist_atten_enable_light_7; // 0: GL_TRUE, 1: GL_FALSE + BitField<24, 1, u32> disable_dist_atten_light_0; + BitField<25, 1, u32> disable_dist_atten_light_1; + BitField<26, 1, u32> disable_dist_atten_light_2; + BitField<27, 1, u32> disable_dist_atten_light_3; + BitField<28, 1, u32> disable_dist_atten_light_4; + BitField<29, 1, u32> disable_dist_atten_light_5; + BitField<30, 1, u32> disable_dist_atten_light_6; + BitField<31, 1, u32> disable_dist_atten_light_7; }; - bool IsDistAttenEnabled(unsigned index) const { - const unsigned enable[] = { dist_atten_enable_light_0, dist_atten_enable_light_1, - dist_atten_enable_light_2, dist_atten_enable_light_3, - dist_atten_enable_light_4, dist_atten_enable_light_5, - dist_atten_enable_light_6, dist_atten_enable_light_7 }; - return enable[index] == 0; + bool IsDistAttenDisabled(unsigned index) const { + const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1, + disable_dist_atten_light_2, disable_dist_atten_light_3, + disable_dist_atten_light_4, disable_dist_atten_light_5, + disable_dist_atten_light_6, disable_dist_atten_light_7 }; + return disable[index] != 0; } union { @@ -830,14 +830,17 @@ struct Regs { // registers is written to, the behavior will be the same. u32 lut_data[8]; + // These are used to specify if absolute (abs) value should be used for each LUT index. When + // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in + // the range of (0.0, 1.0). union { - BitField< 1, 1, u32> d0; // 0: GL_TRUE, 1: GL_FALSE - BitField< 5, 1, u32> d1; // 0: GL_TRUE, 1: GL_FALSE - BitField< 9, 1, u32> sp; // 0: GL_TRUE, 1: GL_FALSE - BitField<13, 1, u32> fr; // 0: GL_TRUE, 1: GL_FALSE - BitField<17, 1, u32> rb; // 0: GL_TRUE, 1: GL_FALSE - BitField<21, 1, u32> rg; // 0: GL_TRUE, 1: GL_FALSE - BitField<25, 1, u32> rr; // 0: GL_TRUE, 1: GL_FALSE + BitField< 1, 1, u32> disable_d0; + BitField< 5, 1, u32> disable_d1; + BitField< 9, 1, u32> disable_sp; + BitField<13, 1, u32> disable_fr; + BitField<17, 1, u32> disable_rb; + BitField<21, 1, u32> disable_rg; + BitField<25, 1, u32> disable_rr; } abs_lut_input; union { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 208a7bcb6..fef5f5331 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -75,7 +75,7 @@ struct PicaShaderConfig { // Fragment lighting res.lighting.enable = !regs.lighting.disable; - res.lighting.src_num = regs.lighting.src_num + 1; + res.lighting.src_num = regs.lighting.num_lights + 1; for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); @@ -83,38 +83,38 @@ struct PicaShaderConfig { res.lighting.light[light_index].num = num; res.lighting.light[light_index].directional = light.directional != 0; res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; - res.lighting.light[light_index].dist_atten_enable = regs.lighting.IsDistAttenEnabled(num); + res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } - res.lighting.lut_d0.enable = regs.lighting.lut_enable_d0 == 0; - res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.d0 == 0; + res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - res.lighting.lut_d1.enable = regs.lighting.lut_enable_d1 == 0; - res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.d1 == 0; + res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - res.lighting.lut_fr.enable = regs.lighting.lut_enable_fr == 0; - res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.fr == 0; + res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - res.lighting.lut_rr.enable = regs.lighting.lut_enable_rr == 0; - res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.rr == 0; + res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - res.lighting.lut_rg.enable = regs.lighting.lut_enable_rg == 0; - res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.rg == 0; + res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - res.lighting.lut_rb.enable = regs.lighting.lut_enable_rb == 0; - res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.rb == 0; + res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); @@ -122,7 +122,7 @@ struct PicaShaderConfig { res.lighting.fresnel_selector = regs.lighting.fresnel_selector; res.lighting.bump_mode = regs.lighting.bump_mode; res.lighting.bump_selector = regs.lighting.bump_selector; - res.lighting.bump_renorm = regs.lighting.bump_renorm == 0; + res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res;