From 5ff9783626c74e8e71887afdb473fefd338e1b10 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Fri, 23 Jul 2021 22:59:24 +1000 Subject: [PATCH] CPU/Recompiler/ARM: Optimize icache check --- .../cpu_recompiler_code_generator_aarch32.cpp | 42 ++++++++++++++++++ .../cpu_recompiler_code_generator_aarch64.cpp | 43 +++++++++++++++++++ .../cpu_recompiler_code_generator_generic.cpp | 2 +- 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/src/core/cpu_recompiler_code_generator_aarch32.cpp b/src/core/cpu_recompiler_code_generator_aarch32.cpp index be0fb37f1..13931cbda 100644 --- a/src/core/cpu_recompiler_code_generator_aarch32.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch32.cpp @@ -1749,6 +1749,48 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) m_emit->Bind(&skip_cancel); } +void CodeGenerator::EmitICacheCheckAndUpdate() +{ + if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) + { + EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks))); + } + else + { + const auto& ticks_reg = a32::r0; + const auto& current_tag_reg = a32::r1; + const auto& existing_tag_reg = a32::r2; + + VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; + m_emit->ldr(ticks_reg, a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); + m_emit->Mov(current_tag_reg, current_pc); + + for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) + { + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + continue; + + const u32 line = GetICacheLine(current_pc); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + + a32::Label cache_hit; + m_emit->ldr(existing_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); + m_emit->cmp(existing_tag_reg, current_tag_reg); + m_emit->B(a32::eq, &cache_hit); + + m_emit->str(current_tag_reg, a32::MemOperand(GetCPUPtrReg(), offset)); + EmitAdd(0, 0, Value::FromConstantU32(static_cast(fill_ticks)), false); + m_emit->Bind(&cache_hit); + + if (i != (m_block->icache_line_count - 1)) + m_emit->add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); + } + + m_emit->str(ticks_reg, a32::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); + } +} + void CodeGenerator::EmitStallUntilGTEComplete() { static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick)); diff --git a/src/core/cpu_recompiler_code_generator_aarch64.cpp b/src/core/cpu_recompiler_code_generator_aarch64.cpp index d62e3026b..e58a79800 100644 --- a/src/core/cpu_recompiler_code_generator_aarch64.cpp +++ b/src/core/cpu_recompiler_code_generator_aarch64.cpp @@ -1936,6 +1936,49 @@ void CodeGenerator::EmitCancelInterpreterLoadDelayForReg(Reg reg) m_emit->Bind(&skip_cancel); } +void CodeGenerator::EmitICacheCheckAndUpdate() +{ + if (GetSegmentForAddress(m_pc) >= Segment::KSEG1) + { + EmitAddCPUStructField(offsetof(State, pending_ticks), + Value::FromConstantU32(static_cast(m_block->uncached_fetch_ticks))); + } + else + { + const auto& ticks_reg = a64::w0; + const auto& current_tag_reg = a64::w1; + const auto& existing_tag_reg = a64::w2; + + VirtualMemoryAddress current_pc = m_pc & ICACHE_TAG_ADDRESS_MASK; + m_emit->Ldr(ticks_reg, a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); + m_emit->Mov(current_tag_reg, current_pc); + + for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE) + { + const TickCount fill_ticks = GetICacheFillTicks(current_pc); + if (fill_ticks <= 0) + continue; + + const u32 line = GetICacheLine(current_pc); + const u32 offset = offsetof(State, icache_tags) + (line * sizeof(u32)); + + a64::Label cache_hit; + m_emit->Ldr(existing_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); + m_emit->Cmp(existing_tag_reg, current_tag_reg); + m_emit->B(&cache_hit, a64::eq); + + m_emit->Str(current_tag_reg, a64::MemOperand(GetCPUPtrReg(), offset)); + EmitAdd(0, 0, Value::FromConstantU32(static_cast(fill_ticks)), false); + m_emit->Bind(&cache_hit); + + if (i != (m_block->icache_line_count - 1)) + m_emit->Add(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE); + } + + m_emit->Str(ticks_reg, a64::MemOperand(GetCPUPtrReg(), offsetof(State, pending_ticks))); + } +} + void CodeGenerator::EmitStallUntilGTEComplete() { static_assert(offsetof(State, pending_ticks) + sizeof(u32) == offsetof(State, gte_completion_tick)); diff --git a/src/core/cpu_recompiler_code_generator_generic.cpp b/src/core/cpu_recompiler_code_generator_generic.cpp index 1bdf3215f..c1043905c 100644 --- a/src/core/cpu_recompiler_code_generator_generic.cpp +++ b/src/core/cpu_recompiler_code_generator_generic.cpp @@ -160,7 +160,7 @@ void CodeGenerator::EmitStoreGuestMemory(const CodeBlockInstruction& cbi, const } } -#ifndef CPU_X64 +#if 0 // Not used void CodeGenerator::EmitICacheCheckAndUpdate() {