From d885abc528df3758dce229ad3fda67c3b0d10f75 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Thu, 26 Sep 2019 01:40:55 +1000 Subject: [PATCH] GTE: More cleanups --- src/pse/gte.cpp | 108 ++++++++++++++---------------------------------- src/pse/gte.h | 18 ++++---- src/pse/gte.inl | 57 +++++++++++-------------- 3 files changed, 62 insertions(+), 121 deletions(-) diff --git a/src/pse/gte.cpp b/src/pse/gte.cpp index 7ff676bcd..813abb454 100644 --- a/src/pse/gte.cpp +++ b/src/pse/gte.cpp @@ -316,44 +316,6 @@ void Core::ExecuteInstruction(Instruction inst) } } -void Core::SetMAC(u32 index, s64 value) -{ - if (value < INT64_C(-2147483648)) - m_regs.FLAG.SetMACUnderflow(index); - else if (value > INT64_C(2147483647)) - m_regs.FLAG.SetMACOverflow(index); - - m_regs.dr32[24 + index] = Truncate32(static_cast(value)); -} - -void Core::SetIR(u32 index, s32 value, bool lm) -{ - if (lm && value < 0) - { - m_regs.FLAG.SetIRSaturated(index); - m_regs.dr32[8 + index] = 0; - return; - } - - // saturate to -32768..32767 - if (!lm && value < -32768) - { - m_regs.FLAG.SetIRSaturated(index); - m_regs.dr32[8 + index] = static_cast(-1); - return; - } - - if (value > 32767) - { - m_regs.FLAG.SetIRSaturated(index); - m_regs.dr32[8 + index] = UINT32_C(0x7FFF); - return; - } - - // store the sign extension in the padding bits - m_regs.dr32[8 + index] = value; -} - void Core::SetOTZ(s32 value) { if (value < 0) @@ -455,9 +417,9 @@ void Core::RTPS(const s16 V[3], bool sf, bool lm) // MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh // MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh // MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h - const s32 Sx = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 16); - const s32 Sy = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 16); - const s32 Sz = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 12); + const s32 Sx = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 16)); + const s32 Sy = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 16)); + const s32 Sz = s32(TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 12)); PushSXY(Sx, Sy); TruncateAndSetIR<0>(Sz, true); } @@ -486,11 +448,10 @@ void Core::Execute_NCLIP(Instruction inst) // MAC0 = SX0*SY1 + SX1*SY2 + SX2*SY0 - SX0*SY2 - SX1*SY0 - SX2*SY1 m_regs.FLAG.Clear(); - const s64 MAC0x = s64(m_regs.SXY0[0]) * s64(m_regs.SXY1[1]) + s64(m_regs.SXY1[0]) * s64(m_regs.SXY2[1]) + - s64(m_regs.SXY2[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY0[0]) * s64(m_regs.SXY2[1]) - - s64(m_regs.SXY1[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY2[0]) * s64(m_regs.SXY1[1]); - - SetMAC(0, MAC0x); + TruncateAndSetMAC<0>(s64(m_regs.SXY0[0]) * s64(m_regs.SXY1[1]) + s64(m_regs.SXY1[0]) * s64(m_regs.SXY2[1]) + + s64(m_regs.SXY2[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY0[0]) * s64(m_regs.SXY2[1]) - + s64(m_regs.SXY1[0]) * s64(m_regs.SXY0[1]) - s64(m_regs.SXY2[0]) * s64(m_regs.SXY1[1]), + 0); m_regs.FLAG.UpdateError(); } @@ -499,15 +460,16 @@ void Core::Execute_SQR(Instruction inst) { m_regs.FLAG.Clear(); - const u8 shift = inst.sf ? 12 : 0; - SetMAC(1, (s32(m_regs.IR1) * s32(m_regs.IR1)) >> shift); - SetMAC(2, (s32(m_regs.IR2) * s32(m_regs.IR2)) >> shift); - SetMAC(3, (s32(m_regs.IR3) * s32(m_regs.IR3)) >> shift); + // 32-bit multiply for speed - 16x16 isn't >32bit, and we know it won't overflow/underflow. + const u8 shift = inst.GetShift(); + m_regs.MAC1 = (s32(m_regs.IR1) * s32(m_regs.IR1)) >> shift; + m_regs.MAC2 = (s32(m_regs.IR2) * s32(m_regs.IR2)) >> shift; + m_regs.MAC3 = (s32(m_regs.IR3) * s32(m_regs.IR3)) >> shift; const bool lm = inst.lm; - SetIR(1, m_regs.MAC1, lm); - SetIR(2, m_regs.MAC2, lm); - SetIR(3, m_regs.MAC3, lm); + TruncateAndSetIR<1>(m_regs.MAC1, lm); + TruncateAndSetIR<2>(m_regs.MAC2, lm); + TruncateAndSetIR<3>(m_regs.MAC3, lm); m_regs.FLAG.UpdateError(); } @@ -516,10 +478,10 @@ void Core::Execute_AVSZ3(Instruction inst) { m_regs.FLAG.Clear(); - const s64 MAC0 = static_cast(m_regs.ZSF3) * - static_cast(ZeroExtend32(m_regs.SZ1) + ZeroExtend32(m_regs.SZ2) + ZeroExtend32(m_regs.SZ3)); - SetMAC(0, MAC0); - SetOTZ(static_cast(MAC0 / 0x1000)); + const s64 result = + TruncateAndSetMAC<0>(s64(m_regs.ZSF3) * s32(u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0); + TruncateAndSetMAC<0>(result, 0); + SetOTZ(s32(result >> 12)); m_regs.FLAG.UpdateError(); } @@ -528,30 +490,19 @@ void Core::Execute_AVSZ4(Instruction inst) { m_regs.FLAG.Clear(); - const s64 MAC0 = - static_cast(m_regs.ZSF4) * static_cast(ZeroExtend32(m_regs.SZ0) + ZeroExtend32(m_regs.SZ1) + - ZeroExtend32(m_regs.SZ2) + ZeroExtend32(m_regs.SZ3)); - SetMAC(0, MAC0); - SetOTZ(static_cast(MAC0 / 0x1000)); + const s64 result = TruncateAndSetMAC<0>( + s64(m_regs.ZSF4) * s32(u32(m_regs.SZ0) + u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0); + TruncateAndSetMAC<0>(result, 0); + SetOTZ(s32(result >> 12)); m_regs.FLAG.UpdateError(); } -s64 Core::VecDot(const s16 A[3], const s16 B[3]) -{ - return s64(s32(A[0]) * s32(B[0])) + s64(s32(A[1]) * s32(B[1])) + s64(s32(A[2]) * s32(B[2])); -} - -s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z) -{ - return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z)); -} - void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) { #define dot3(i) \ TruncateAndSetMAC( \ - TruncateMAC(TruncateMAC(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \ + CheckMACResult(CheckMACResult(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \ s64(s32(M[i][2]) * s32(Vz)), \ shift) @@ -569,11 +520,12 @@ void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz void Core::MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm) { #define dot3(i) \ - TruncateAndSetMAC((s64(T[i]) << 12) + \ - TruncateMAC(TruncateMAC(TruncateMAC(s64(s32(M[i][0]) * s32(Vx))) + \ - s64(s32(M[i][1]) * s32(Vy))) + \ - s64(s32(M[i][2]) * s32(Vz))), \ - shift) + TruncateAndSetMAC( \ + (s64(T[i]) << 12) + \ + CheckMACResult( \ + CheckMACResult(CheckMACResult(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \ + s64(s32(M[i][2]) * s32(Vz))), \ + shift) dot3(0); dot3(1); diff --git a/src/pse/gte.h b/src/pse/gte.h index fe977736c..ef4af00a7 100644 --- a/src/pse/gte.h +++ b/src/pse/gte.h @@ -26,8 +26,8 @@ public: void ExecuteInstruction(Instruction inst); private: - static constexpr s64 MAC0_MIN_VALUE = -(INT64_C(1) << 43); - static constexpr s64 MAC0_MAX_VALUE = (INT64_C(1) << 43) - 1; + static constexpr s64 MAC0_MIN_VALUE = -(INT64_C(1) << 31); + static constexpr s64 MAC0_MAX_VALUE = (INT64_C(1) << 31) - 1; static constexpr s64 MAC123_MIN_VALUE = -(INT64_C(1) << 43); static constexpr s64 MAC123_MAX_VALUE = (INT64_C(1) << 43) - 1; static constexpr s32 IR0_MIN_VALUE = 0x0000; @@ -35,28 +35,24 @@ private: static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15); static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1; + // Checks for underflow/overflow. Returns the value untouched so it can be threaded through an expression. template - s64 TruncateMAC(s64 value); + s64 CheckMACResult(s64 value); template - s32 TruncateAndSetMAC(s64 value, u8 shift); + s64 TruncateAndSetMAC(s64 value, u8 shift); template - u8 TruncateRGB(s32 value); + s16 TruncateAndSetIR(s32 value, bool lm); template - s16 TruncateAndSetIR(s32 value, bool lm); + u8 TruncateRGB(s32 value); - void SetMAC(u32 index, s64 value); - void SetIR(u32 index, s32 value, bool lm); void SetOTZ(s32 value); void PushSXY(s32 x, s32 y); void PushSZ(s32 value); void PushRGB(u8 r, u8 g, u8 b, u8 c); - s64 VecDot(const s16 A[3], const s16 B[3]); - s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z); - // 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3] void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm); diff --git a/src/pse/gte.inl b/src/pse/gte.inl index d9c900872..9c7e201d7 100644 --- a/src/pse/gte.inl +++ b/src/pse/gte.inl @@ -1,25 +1,7 @@ #include "gte.h" template -u8 GTE::Core::TruncateRGB(s32 value) -{ - if (value < 0 || value > 0xFF) - { - if constexpr (index == 0) - m_regs.FLAG.color_r_saturated = true; - else if constexpr (index == 1) - m_regs.FLAG.color_g_saturated = true; - else - m_regs.FLAG.color_b_saturated = true; - - value = (value < 0) ? 0 : 0xFF; - } - - return static_cast(value); -} - -template -s64 GTE::Core::TruncateMAC(s64 value) +s64 GTE::Core::CheckMACResult(s64 value) { constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE; constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE; @@ -33,8 +15,6 @@ s64 GTE::Core::TruncateMAC(s64 value) m_regs.FLAG.mac2_underflow = true; else if constexpr (index == 3) m_regs.FLAG.mac3_underflow = true; - - return MIN_VALUE; } else if (value > MAX_VALUE) { @@ -46,26 +26,21 @@ s64 GTE::Core::TruncateMAC(s64 value) m_regs.FLAG.mac2_overflow = true; else if constexpr (index == 3) m_regs.FLAG.mac3_overflow = true; - - return MAX_VALUE; - } - else - { - return value; } + + return value; } template -s32 GTE::Core::TruncateAndSetMAC(s64 value, u8 shift) +s64 GTE::Core::TruncateAndSetMAC(s64 value, u8 shift) { - value = TruncateMAC(value); + value = CheckMACResult(value); // shift should be done before storing to avoid losing precision value >>= shift; - const s32 value32 = static_cast(value); - m_regs.dr32[24 + index] = value32; - return value32; + m_regs.dr32[24 + index] = Truncate32(static_cast(value)); + return value; } template @@ -103,3 +78,21 @@ s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm) m_regs.dr32[8 + index] = value; return static_cast(value); } + +template +u8 GTE::Core::TruncateRGB(s32 value) +{ + if (value < 0 || value > 0xFF) + { + if constexpr (index == 0) + m_regs.FLAG.color_r_saturated = true; + else if constexpr (index == 1) + m_regs.FLAG.color_g_saturated = true; + else + m_regs.FLAG.color_b_saturated = true; + + value = (value < 0) ? 0 : 0xFF; + } + + return static_cast(value); +}