From 8772c336ec036ec1d14d424c72261574457c64e1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 14 Feb 2025 23:34:10 +1000 Subject: [PATCH] CPU: Don't stall until GTE completion on write Apparently this is what the real CPU does, and some very optimized code can run slower if we stall early. Obviously if you write to a register being read by the command, things are going to break. --- src/core/cpu_core.cpp | 8 +++++--- src/core/cpu_recompiler.cpp | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index f11175f0b..ed921e82e 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -1851,8 +1851,6 @@ restart_instruction: return; } - StallUntilGTEComplete(); - if (inst.cop.IsCommonInstruction()) { // TODO: Combine with cop0. @@ -1860,6 +1858,8 @@ restart_instruction: { case CopCommonInstruction::cfcn: { + StallUntilGTEComplete(); + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue()) + 32); WriteRegDelayed(inst.r.rt, value); @@ -1880,6 +1880,8 @@ restart_instruction: case CopCommonInstruction::mfcn: { + StallUntilGTEComplete(); + const u32 value = GTE::ReadRegister(static_cast(inst.r.rd.GetValue())); WriteRegDelayed(inst.r.rt, value); @@ -1906,6 +1908,7 @@ restart_instruction: } else { + StallUntilGTEComplete(); GTE::ExecuteInstruction(inst.bits); } } @@ -1925,7 +1928,6 @@ restart_instruction: if (!ReadMemoryWord(addr, &value)) return; - StallUntilGTEComplete(); GTE::WriteRegister(ZeroExtend32(static_cast(inst.i.rt.GetValue())), value); if constexpr (pgxp_mode >= PGXPMode::Memory) diff --git a/src/core/cpu_recompiler.cpp b/src/core/cpu_recompiler.cpp index bff8a4615..0983a2939 100644 --- a/src/core/cpu_recompiler.cpp +++ b/src/core/cpu_recompiler.cpp @@ -1301,8 +1301,8 @@ void CPU::Recompiler::Recompiler::CompileInstruction() { case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break; - case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; - case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_GTE_STALL | TF_READS_T | TF_PGXP_WITHOUT_CPU); break; + case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_READS_T | TF_PGXP_WITHOUT_CPU); break; + case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_READS_T | TF_PGXP_WITHOUT_CPU); break; default: Compile_Fallback(); break; } } @@ -1314,7 +1314,7 @@ void CPU::Recompiler::Recompiler::CompileInstruction() } break; - case InstructionOp::lwc2: CompileLoadStoreTemplate(&Recompiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_GTE_STALL | TF_READS_S | TF_LOAD_DELAY); break; + case InstructionOp::lwc2: CompileLoadStoreTemplate(&Recompiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_READS_S | TF_LOAD_DELAY); break; case InstructionOp::swc2: CompileLoadStoreTemplate(&Recompiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break; // swc0/lwc0/cop1/cop3 are essentially no-ops