System: Refactor main loop

Reduces JIT exits.
Improves runahead performance.
pull/2995/head
Stenzek 2 years ago
parent 4ebd34fcb3
commit 5b980dafa5

@ -239,6 +239,8 @@ elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm" OR "${CMAKE_SYSTEM_PROCESSOR}"
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -marm -march=armv7-a")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -march=armv7-a")
endif()
elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "riscv64")
set(CPU_ARCH "riscv64")
else()
message(FATAL_ERROR "Unknown system processor: ${CMAKE_SYSTEM_PROCESSOR}")
endif()

@ -88,4 +88,7 @@ if(${CPU_ARCH} STREQUAL "aarch64")
)
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message("Enabling vixl debug assertions")
target_compile_definitions(vixl PUBLIC VIXL_DEBUG)
endif()

@ -12,6 +12,8 @@ add_library(common
dimensional_array.h
error.cpp
error.h
fastjmp.cpp
fastjmp.h
fifo_queue.h
file_system.cpp
file_system.h
@ -97,6 +99,14 @@ if(WIN32)
windows_headers.h
)
target_link_libraries(common PRIVATE d3dcompiler.lib)
if(${CPU_ARCH} STREQUAL "x64")
enable_language(ASM_MASM)
target_sources(common PRIVATE fastjmp_x86.asm)
elseif(${CPU_ARCH} STREQUAL "aarch32" OR ${CPU_ARCH} STREQUAL "aarch64")
enable_language(ASM_MARMASM)
target_sources(common PRIVATE fastjmp_arm.asm)
endif()
endif()
if(NOT WIN32 AND NOT ANDROID)

@ -23,6 +23,7 @@
<ClInclude Include="dimensional_array.h" />
<ClInclude Include="easing.h" />
<ClInclude Include="error.h" />
<ClInclude Include="fastjmp.h" />
<ClInclude Include="fifo_queue.h" />
<ClInclude Include="file_system.h" />
<ClInclude Include="gl\context.h">
@ -123,6 +124,7 @@
<ClCompile Include="d3d12\stream_buffer.cpp" />
<ClCompile Include="d3d12\texture.cpp" />
<ClCompile Include="d3d12\util.cpp" />
<ClCompile Include="fastjmp.cpp" />
<ClCompile Include="file_system.cpp" />
<ClCompile Include="gl\context.cpp">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
@ -192,6 +194,16 @@
<Natvis Include="bitfield.natvis" />
</ItemGroup>
<ItemGroup>
<MARMASM Include="fastjmp_arm.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</MARMASM>
<MASM Include="fastjmp_x86.asm">
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Platform)'!='Win32' And '$(Platform)'!='x64'">true</ExcludedFromBuild>
<PreprocessorDefinitions Condition="'$(Platform)'=='Win32'">_M_X86_32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(Platform)'=='x64'">_M_X86_64;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</MASM>
<None Include="vulkan\entry_points.inl">
<ExcludedFromBuild Condition="'$(Platform)'=='ARM64'">true</ExcludedFromBuild>
</None>
@ -219,9 +231,17 @@
<Project>{73ee0c55-6ffe-44e7-9c12-baa52434a797}</Project>
</ProjectReference>
</ItemGroup>
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.targets" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
</ImportGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{EE054E08-3799-4A59-A422-18259C105FFD}</ProjectGuid>
</PropertyGroup>
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath)\BuildCustomizations\marmasm.props" />
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.props" />
</ImportGroup>
<Import Project="..\..\dep\msvc\vsprops\StaticLibrary.props" />
<Import Project="common.props" />
<ItemDefinitionGroup>

@ -129,6 +129,7 @@
<ClInclude Include="build_timestamp.h" />
<ClInclude Include="sha1_digest.h" />
<ClInclude Include="gpu_texture.h" />
<ClInclude Include="fastjmp.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="gl\program.cpp">
@ -234,6 +235,7 @@
<ClCompile Include="threading.cpp" />
<ClCompile Include="sha1_digest.cpp" />
<ClCompile Include="gpu_texture.cpp" />
<ClCompile Include="fastjmp.cpp" />
</ItemGroup>
<ItemGroup>
<Natvis Include="bitfield.natvis" />
@ -260,4 +262,10 @@
<Filter>vulkan</Filter>
</None>
</ItemGroup>
<ItemGroup>
<MASM Include="fastjmp_x86.asm" />
</ItemGroup>
<ItemGroup>
<MARMASM Include="fastjmp_arm.asm" />
</ItemGroup>
</Project>

@ -0,0 +1,166 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#ifndef _WIN32
#include "fastjmp.h"
#if defined(__APPLE__)
#define PREFIX "_"
#else
#define PREFIX ""
#endif
#if defined(__x86_64__)
asm("\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t" PREFIX "fastjmp_set:"
R"(
movq 0(%rsp), %rax
movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set
addq $8, %rdx
movq %rax, 0(%rdi) # actually rip
movq %rbx, 8(%rdi)
movq %rdx, 16(%rdi) # actually rsp
movq %rbp, 24(%rdi)
movq %r12, 32(%rdi)
movq %r13, 40(%rdi)
movq %r14, 48(%rdi)
movq %r15, 56(%rdi)
xorl %eax, %eax
ret
)"
"\t" PREFIX "fastjmp_jmp:"
R"(
movl %esi, %eax
movq 0(%rdi), %rdx # actually rip
movq 8(%rdi), %rbx
movq 16(%rdi), %rsp # actually rsp
movq 24(%rdi), %rbp
movq 32(%rdi), %r12
movq 40(%rdi), %r13
movq 48(%rdi), %r14
movq 56(%rdi), %r15
jmp *%rdx
)");
#elif defined(__aarch64__)
asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
)");
#elif defined(__riscv) && __riscv_xlen == 64
asm(
"\t.global " PREFIX "fastjmp_set\n"
"\t.global " PREFIX "fastjmp_jmp\n"
"\t.text\n"
"\t.align 16\n"
"\t" PREFIX "fastjmp_set:" R"(
sd sp, 0(a0)
sd s0, 8(a0)
sd s1, 16(a0)
sd s2, 24(a0)
sd s3, 32(a0)
sd s4, 40(a0)
sd s5, 48(a0)
sd s6, 56(a0)
sd s7, 64(a0)
sd s8, 72(a0)
sd s9, 80(a0)
sd s10, 88(a0)
sd s11, 96(a0)
fsd fs0, 104(a0)
fsd fs1, 112(a0)
fsd fs2, 120(a0)
fsd fs3, 128(a0)
fsd fs4, 136(a0)
fsd fs5, 144(a0)
fsd fs6, 152(a0)
fsd fs7, 160(a0)
fsd fs8, 168(a0)
fsd fs9, 176(a0)
fsd fs10, 184(a0)
fsd fs11, 192(a0)
sd ra, 208(a0)
li a0, 0
jr ra
)"
".align 16\n"
"\t" PREFIX "fastjmp_jmp:" R"(
ld ra, 208(a0)
fld fs11, 192(a0)
fld fs10, 184(a0)
fld fs9, 176(a0)
fld fs8, 168(a0)
fld fs7, 160(a0)
fld fs6, 152(a0)
fld fs5, 144(a0)
fld fs4, 136(a0)
fld fs3, 128(a0)
fld fs2, 120(a0)
fld fs1, 112(a0)
fld fs0, 104(a0)
ld s11, 96(a0)
ld s10, 88(a0)
ld s9, 80(a0)
ld s8, 72(a0)
ld s7, 64(a0)
ld s6, 56(a0)
ld s5, 48(a0)
ld s4, 40(a0)
ld s3, 32(a0)
ld s2, 24(a0)
ld s1, 16(a0)
ld s0, 8(a0)
ld sp, 0(a0)
mv a0, a1
jr ra
)");
#else
#error Unknown platform.
#endif
#endif // __WIN32

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include "types.h"
#include <cstddef>
#include <cstdint>
struct fastjmp_buf
{
#if defined(_WIN32) && defined(_M_AMD64)
static constexpr std::size_t BUF_SIZE = 240;
#elif defined(_M_ARM64) || defined(__aarch64__)
static constexpr std::size_t BUF_SIZE = 168;
#elif defined(__x86_64__)
static constexpr std::size_t BUF_SIZE = 64;
#elif defined(_M_IX86) || defined(__i386__)
static constexpr std::size_t BUF_SIZE = 24;
#elif defined(__riscv) && __riscv_xlen == 64
static constexpr std::size_t BUF_SIZE = 208;
#else
#error Unknown architecture.
#endif
alignas(16) std::uint8_t buf[BUF_SIZE];
};
extern "C" {
int fastjmp_set(fastjmp_buf* buf);
[[noreturn]] void fastjmp_jmp(const fastjmp_buf* buf, int ret);
}

@ -0,0 +1,47 @@
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "ksarm64.h"
EXPORT fastjmp_set
EXPORT fastjmp_jmp
TEXTAREA
; void fastjmp_set(fastjmp_buf*)
LEAF_ENTRY fastjmp_set
mov x16, sp
stp x16, x30, [x0]
stp x19, x20, [x0, #16]
stp x21, x22, [x0, #32]
stp x23, x24, [x0, #48]
stp x25, x26, [x0, #64]
stp x27, x28, [x0, #80]
str x29, [x0, #96]
stp d8, d9, [x0, #112]
stp d10, d11, [x0, #128]
stp d12, d13, [x0, #144]
stp d14, d15, [x0, #160]
mov w0, wzr
br x30
LEAF_END
; void fastjmp_jmp(fastjmp_buf*, int)
LEAF_ENTRY fastjmp_jmp
ldp x16, x30, [x0]
mov sp, x16
ldp x19, x20, [x0, #16]
ldp x21, x22, [x0, #32]
ldp x23, x24, [x0, #48]
ldp x25, x26, [x0, #64]
ldp x27, x28, [x0, #80]
ldr x29, [x0, #96]
ldp d8, d9, [x0, #112]
ldp d10, d11, [x0, #128]
ldp d12, d13, [x0, #144]
ldp d14, d15, [x0, #160]
mov w0, w1
br x30
LEAF_END
END

@ -0,0 +1,119 @@
; SPDX-FileCopyrightText: 2021 Connor McLaughlin <stenzek@gmail.com>
; SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
IFDEF _M_X86_32
; -----------------------------------------
; 32-bit X86
; -----------------------------------------
.386
.model flat
_TEXT SEGMENT
PUBLIC @fastjmp_set@4
PUBLIC @fastjmp_jmp@8
; void fastjmp_set(fastjmp_buf*)
@fastjmp_set@4 PROC
mov eax, dword ptr [esp]
mov edx, esp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
add edx, 4
mov dword ptr [ecx], eax ; actually eip
mov dword ptr [ecx + 4], ebx
mov dword ptr [ecx + 8], edx ; actually esp
mov dword ptr [ecx + 12], ebp
mov dword ptr [ecx + 16], esi
mov dword ptr [ecx + 20], edi
xor eax, eax
ret
@fastjmp_set@4 ENDP
; void __fastcall fastjmp_jmp(fastjmp_buf*, int)
@fastjmp_jmp@8 PROC
mov eax, edx ; return code
mov edx, dword ptr [ecx + 0]
mov ebx, dword ptr [ecx + 4]
mov esp, dword ptr [ecx + 8]
mov ebp, dword ptr [ecx + 12]
mov esi, dword ptr [ecx + 16]
mov edi, dword ptr [ecx + 20]
jmp edx
@fastjmp_jmp@8 ENDP
_TEXT ENDS
ENDIF ; _M_X86_32
IFDEF _M_X86_64
; -----------------------------------------
; 64-bit X86
; -----------------------------------------
_TEXT SEGMENT
PUBLIC fastjmp_set
PUBLIC fastjmp_jmp
; void fastjmp_set(fastjmp_buf*)
fastjmp_set PROC
mov rax, qword ptr [rsp]
mov rdx, rsp ; fixup stack pointer, so it doesn't include the call to fastjmp_set
add rdx, 8
mov qword ptr [rcx], rax ; actually rip
mov qword ptr [rcx + 8], rbx
mov qword ptr [rcx + 16], rdx ; actually rsp
mov qword ptr [rcx + 24], rbp
mov qword ptr [rcx + 32], rsi
mov qword ptr [rcx + 40], rdi
mov qword ptr [rcx + 48], r12
mov qword ptr [rcx + 56], r13
mov qword ptr [rcx + 64], r14
mov qword ptr [rcx + 72], r15
movaps xmmword ptr [rcx + 80], xmm6
movaps xmmword ptr [rcx + 96], xmm7
movaps xmmword ptr [rcx + 112], xmm8
add rcx, 112 ; split to two batches to fit displacement in a single byte
movaps xmmword ptr [rcx + 16], xmm9
movaps xmmword ptr [rcx + 32], xmm10
movaps xmmword ptr [rcx + 48], xmm11
movaps xmmword ptr [rcx + 64], xmm12
movaps xmmword ptr [rcx + 80], xmm13
movaps xmmword ptr [rcx + 96], xmm14
movaps xmmword ptr [rcx + 112], xmm15
xor eax, eax
ret
fastjmp_set ENDP
; void fastjmp_jmp(fastjmp_buf*, int)
fastjmp_jmp PROC
mov eax, edx ; return code
mov rdx, qword ptr [rcx + 0] ; actually rip
mov rbx, qword ptr [rcx + 8]
mov rsp, qword ptr [rcx + 16]
mov rbp, qword ptr [rcx + 24]
mov rsi, qword ptr [rcx + 32]
mov rdi, qword ptr [rcx + 40]
mov r12, qword ptr [rcx + 48]
mov r13, qword ptr [rcx + 56]
mov r14, qword ptr [rcx + 64]
mov r15, qword ptr [rcx + 72]
movaps xmm6, xmmword ptr [rcx + 80]
movaps xmm7, xmmword ptr [rcx + 96]
movaps xmm8, xmmword ptr [rcx + 112]
add rcx, 112 ; split to two batches to fit displacement in a single byte
movaps xmm9, xmmword ptr [rcx + 16]
movaps xmm10, xmmword ptr [rcx + 32]
movaps xmm11, xmmword ptr [rcx + 48]
movaps xmm12, xmmword ptr [rcx + 64]
movaps xmm13, xmmword ptr [rcx + 80]
movaps xmm14, xmmword ptr [rcx + 96]
movaps xmm15, xmmword ptr [rcx + 112]
jmp rdx
fastjmp_jmp ENDP
_TEXT ENDS
ENDIF ; _M_X86_64
END

@ -27,6 +27,8 @@
#define CPU_AARCH64 1
#elif defined(__arm__)
#define CPU_AARCH32 1
#elif defined(__riscv) && __riscv_xlen == 64
#define CPU_RISCV64 1
#else
#error Unknown architecture.
#endif

@ -65,6 +65,13 @@ char (&__countof_ArraySizeHelper(T (&array)[N]))[N];
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#endif
// [[noreturn]] which can be used on function pointers.
#ifdef _MSC_VER
// __declspec(noreturn) produces error C3829.
#define NORETURN_FUNCTION_POINTER
#else
#define NORETURN_FUNCTION_POINTER __attribute__((noreturn))
#endif
// disable warnings that show up at warning level 4
// TODO: Move to build system instead

@ -21,6 +21,7 @@
#include "sio.h"
#include "spu.h"
#include "timers.h"
#include "timing_event.h"
#include "util/state_wrapper.h"
#include <cstdio>
#include <tuple>
@ -1418,7 +1419,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address)
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks)
{
VirtualMemoryAddress current_pc = g_state.regs.pc & ICACHE_TAG_ADDRESS_MASK;
VirtualMemoryAddress current_pc = g_state.pc & ICACHE_TAG_ADDRESS_MASK;
if (IsCachedAddress(current_pc))
{
TickCount ticks = 0;
@ -1541,10 +1542,20 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address,
}
template<MemoryAccessType type, MemoryAccessSize size>
static ALWAYS_INLINE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value)
static ALWAYS_INLINE_RELEASE TickCount DoMemoryAccess(VirtualMemoryAddress address, u32& value)
{
using namespace Bus;
#if 0
if (type == MemoryAccessType::Write && address == 0x80113028)
{
if ((TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks) == 5051485)
__debugbreak();
Log_WarningPrintf("VAL %08X @ %u", value, (TimingEvents::GetGlobalTickCounter() + CPU::g_state.pending_ticks));
}
#endif
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1723,9 +1734,9 @@ static bool DoAlignmentCheck(VirtualMemoryAddress address)
bool FetchInstruction()
{
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc;
const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1764,16 +1775,16 @@ bool FetchInstruction()
}
}
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits);
g_state.pc = g_state.npc;
g_state.npc += sizeof(g_state.next_instruction.bits);
return true;
}
bool FetchInstructionForInterpreterFallback()
{
DebugAssert(Common::IsAlignedPow2(g_state.regs.npc, 4));
DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
const PhysicalMemoryAddress address = g_state.regs.npc;
const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29)
{
case 0x00: // KUSEG 0M-512M
@ -1801,8 +1812,8 @@ bool FetchInstructionForInterpreterFallback()
}
}
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += sizeof(g_state.next_instruction.bits);
g_state.pc = g_state.npc;
g_state.npc += sizeof(g_state.next_instruction.bits);
return true;
}

@ -196,6 +196,8 @@
<Import Project="core.props" />
<ItemDefinitionGroup>
<ClCompile>
<PreprocessorDefinitions>ZYDIS_DISABLE_ENCODER;ZYDIS_DISABLE_AVX512;ZYDIS_DISABLE_KNC;ZYDIS_STATIC_BUILD;ZYCORE_STATIC_BUILD;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Platform)'=='x64'">$(SolutionDir)dep\zydis\include;$(SolutionDir)dep\zydis\dependencies\zycore\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName>
</ClCompile>
</ItemDefinitionGroup>

@ -8,6 +8,7 @@
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "cpu_disasm.h"
#include "cpu_recompiler_types.h"
#include "settings.h"
#include "system.h"
#include "timing_event.h"
@ -17,6 +18,8 @@ Log_SetChannel(CPU::CodeCache);
#include "cpu_recompiler_code_generator.h"
#endif
#include <zlib.h>
namespace CPU::CodeCache {
static constexpr bool USE_BLOCK_LINKING = true;
@ -50,6 +53,10 @@ alignas(Recompiler::CODE_STORAGE_ALIGNMENT) static u8
#endif
static JitCodeBuffer s_code_buffer;
#endif
#ifdef WITH_RECOMPILER
static FastMapTable s_fast_map[FAST_MAP_TABLE_COUNT];
static std::unique_ptr<CodeBlock::HostCodePointer[]> s_fast_map_pointers;
@ -253,12 +260,19 @@ void Initialize()
{
Panic("Failed to initialize code space");
}
}
#endif
AllocateFastMap();
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
AllocateFastMap();
CompileDispatcher();
ResetFastMap();
}
@ -293,22 +307,13 @@ void Shutdown()
}
template<PGXPMode pgxp_mode>
static void ExecuteImpl()
[[noreturn]] static void ExecuteImpl()
{
CodeBlockKey next_block_key;
g_using_interpreter = false;
g_state.frame_done = false;
while (!g_state.frame_done)
for (;;)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
next_block_key = GetNextBlockKey();
while (g_state.pending_ticks < g_state.downcount)
@ -384,27 +389,10 @@ static void ExecuteImpl()
}
}
}
TimingEvents::RunEvents();
}
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
}
void Execute()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
g_state.npc = g_state.pc;
}
#ifdef WITH_RECOMPILER
@ -430,21 +418,15 @@ FastMapTable* GetFastMapPointer()
return s_fast_map;
}
void ExecuteRecompiler()
[[noreturn]] static void ExecuteRecompiler()
{
g_using_interpreter = false;
g_state.frame_done = false;
#if 0
while (!g_state.frame_done)
for (;;)
{
if (HasPendingInterrupt())
{
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
DispatchInterrupt();
}
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount)
{
@ -452,18 +434,50 @@ void ExecuteRecompiler()
LogCurrentState();
#endif
const u32 pc = g_state.regs.pc;
const u32 pc = g_state.pc;
s_single_block_asm_dispatcher(s_fast_map[pc >> 16][pc >> 2]);
}
TimingEvents::RunEvents();
}
#else
s_asm_dispatcher();
#endif
}
// in case we switch to interpreter...
g_state.regs.npc = g_state.regs.pc;
#endif
[[noreturn]] void Execute()
{
switch (g_settings.cpu_execution_mode)
{
#ifdef WITH_RECOMPILER
case CPUExecutionMode::Recompiler:
ExecuteRecompiler();
break;
#endif
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU>();
else
ExecuteImpl<PGXPMode::Memory>();
}
else
{
ExecuteImpl<PGXPMode::Disabled>();
}
}
break;
}
}
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer()
{
return s_code_buffer;
}
#endif
@ -473,13 +487,14 @@ void Reinitialize()
ClearState();
#ifdef WITH_RECOMPILER
ShutdownFastmem();
#endif
#if defined(WITH_RECOMPILER)
s_code_buffer.Destroy();
if (g_settings.IsUsingRecompiler())
{
#ifdef USE_STATIC_CODE_BUFFER
if (!s_code_buffer.Initialize(s_code_storage, sizeof(s_code_storage), RECOMPILER_FAR_CODE_CACHE_SIZE,
RECOMPILER_GUARD_SIZE))
@ -489,7 +504,12 @@ void Reinitialize()
{
Panic("Failed to initialize code space");
}
}
#endif
#ifdef WITH_RECOMPILER
if (g_settings.IsUsingRecompiler())
{
if (g_settings.IsUsingFastmem() && !InitializeFastmem())
Panic("Failed to initialize fastmem");
@ -509,25 +529,40 @@ void Flush()
#endif
}
#ifndef _MSC_VER
void __debugbreak() {}
#endif
void LogCurrentState()
{
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) == 2546728915)
__debugbreak();
#endif
#if 0
if ((TimingEvents::GetGlobalTickCounter() + GetPendingTicks()) < 2546729174)
return;
#endif
const auto& regs = g_state.regs;
WriteToExecutionLog("tick=%u pc=%08X zero=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
"ldv=%08X\n",
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), regs.pc, regs.zero, regs.at, regs.v0,
regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8,
regs.t9, regs.k0, regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value);
WriteToExecutionLog(
"tick=%u dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X "
"t1=%08X t2=%08X t3=%08X t4=%08X t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X "
"s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X ldr=%s "
"ldv=%08X cause=%08X sr=%08X gte=%08X\n",
TimingEvents::GetGlobalTickCounter() + GetPendingTicks(), g_state.pending_ticks, g_state.downcount, g_state.pc,
regs.at, regs.v0, regs.v1, regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5,
regs.t6, regs.t7, regs.s0, regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0,
regs.k1, regs.gp, regs.sp, regs.fp, regs.ra,
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits,
g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
}
CodeBlockKey GetNextBlockKey()
{
CodeBlockKey key = {};
key.SetPC(g_state.regs.pc);
key.SetPC(g_state.pc);
key.user_mode = InUserMode();
return key;
}
@ -836,7 +871,7 @@ void FastCompileBlockFunction()
void InvalidCodeFunction()
{
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.regs.pc);
Log_ErrorPrintf("Trying to execute invalid code at 0x%08X", g_state.pc);
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
@ -1249,7 +1284,7 @@ void CPU::Recompiler::Thunks::ResolveBranch(CodeBlock* block, void* host_pc, voi
void CPU::Recompiler::Thunks::LogPC(u32 pc)
{
#if 0
#if 1
CPU::CodeCache::LogCurrentState();
#endif
#if 0

@ -121,14 +121,17 @@ using FastMapTable = CodeBlock::HostCodePointer*;
void Initialize();
void Shutdown();
void Execute();
[[noreturn]] void Execute();
#ifdef WITH_RECOMPILER
using DispatcherFunction = void (*)();
using SingleBlockDispatcherFunction = void (*)(const CodeBlock::HostCodePointer);
FastMapTable* GetFastMapPointer();
void ExecuteRecompiler();
#endif
#if defined(WITH_RECOMPILER)
JitCodeBuffer& GetCodeBuffer();
#endif
/// Flushes the code cache, forcing all blocks to be recompiled.

@ -4,6 +4,7 @@
#include "cpu_core.h"
#include "bus.h"
#include "common/align.h"
#include "common/fastjmp.h"
#include "common/file_system.h"
#include "common/log.h"
#include "cpu_core_private.h"
@ -29,9 +30,10 @@ static void Branch(u32 target);
static void FlushPipeline();
State g_state;
bool g_using_interpreter = false;
bool TRACE_EXECUTION = false;
static fastjmp_buf s_jmp_buf;
static std::FILE* s_log_file = nullptr;
static bool s_log_file_opened = false;
static bool s_trace_to_log = false;
@ -41,6 +43,7 @@ static std::vector<Breakpoint> s_breakpoints;
static u32 s_breakpoint_counter = 1;
static u32 s_last_breakpoint_check_pc = INVALID_BREAKPOINT_PC;
static bool s_single_step = false;
static bool s_single_step_done = false;
bool IsTraceEnabled()
{
@ -134,6 +137,7 @@ void Reset()
GTE::Reset();
// TODO: This consumes cycles...
SetPC(RESET_VECTOR);
}
@ -141,7 +145,9 @@ bool DoState(StateWrapper& sw)
{
sw.Do(&g_state.pending_ticks);
sw.Do(&g_state.downcount);
sw.DoArray(g_state.regs.r, countof(g_state.regs.r));
sw.DoArray(g_state.regs.r, static_cast<u32>(Reg::count));
sw.Do(&g_state.pc);
sw.Do(&g_state.npc);
sw.Do(&g_state.cop0_regs.BPC);
sw.Do(&g_state.cop0_regs.BDA);
sw.Do(&g_state.cop0_regs.TAR);
@ -161,11 +167,23 @@ bool DoState(StateWrapper& sw)
sw.Do(&g_state.next_instruction_is_branch_delay_slot);
sw.Do(&g_state.branch_was_taken);
sw.Do(&g_state.exception_raised);
sw.Do(&g_state.interrupt_delay);
if (sw.GetVersion() < 59)
{
bool interrupt_delay;
sw.Do(&interrupt_delay);
}
sw.Do(&g_state.load_delay_reg);
sw.Do(&g_state.load_delay_value);
sw.Do(&g_state.next_load_delay_reg);
sw.Do(&g_state.next_load_delay_value);
// Compatibility with old states.
if (sw.GetVersion() < 59)
{
g_state.load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
g_state.next_load_delay_reg = static_cast<Reg>(std::min(static_cast<u8>(g_state.load_delay_reg), static_cast<u8>(Reg::count)));
}
sw.Do(&g_state.cache_control.bits);
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
@ -203,7 +221,7 @@ void UpdateFastmemBase()
ALWAYS_INLINE_RELEASE void SetPC(u32 new_pc)
{
DebugAssert(Common::IsAlignedPow2(new_pc, 4));
g_state.regs.npc = new_pc;
g_state.npc = new_pc;
FlushPipeline();
}
@ -217,7 +235,7 @@ ALWAYS_INLINE_RELEASE void Branch(u32 target)
return;
}
g_state.regs.npc = target;
g_state.npc = target;
g_state.branch_was_taken = true;
}
@ -257,14 +275,14 @@ ALWAYS_INLINE_RELEASE static void RaiseException(u32 CAUSE_bits, u32 EPC, u32 ve
// TAR is set to the address which was being fetched in this instruction, or the next instruction to execute if the
// exception hadn't occurred in the delay slot.
g_state.cop0_regs.EPC -= UINT32_C(4);
g_state.cop0_regs.TAR = g_state.regs.pc;
g_state.cop0_regs.TAR = g_state.pc;
}
// current -> previous, switch to kernel mode and disable interrupts
g_state.cop0_regs.sr.mode_bits <<= 2;
// flush the pipeline - we don't want to execute the previously fetched instruction
g_state.regs.npc = vector;
g_state.npc = vector;
g_state.exception_raised = true;
FlushPipeline();
}
@ -299,7 +317,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
if (PCDrv::HandleSyscall(instruction_bits, g_state.regs))
{
// immediately return
g_state.regs.npc = EPC + 4;
g_state.npc = EPC + 4;
FlushPipeline();
return;
}
@ -311,16 +329,7 @@ void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits)
void SetExternalInterrupt(u8 bit)
{
g_state.cop0_regs.cause.Ip |= static_cast<u8>(1u << bit);
if (g_settings.cpu_execution_mode == CPUExecutionMode::Interpreter)
{
g_state.interrupt_delay = 1;
}
else
{
g_state.interrupt_delay = 0;
CheckForPendingInterrupt();
}
CheckForPendingInterrupt();
}
void ClearExternalInterrupt(u8 bit)
@ -331,9 +340,7 @@ void ClearExternalInterrupt(u8 bit)
ALWAYS_INLINE_RELEASE static void UpdateLoadDelay()
{
// the old value is needed in case the delay slot instruction overwrites the same register
if (g_state.load_delay_reg != Reg::count)
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = g_state.next_load_delay_reg;
g_state.load_delay_value = g_state.next_load_delay_value;
g_state.next_load_delay_reg = Reg::count;
@ -343,16 +350,13 @@ ALWAYS_INLINE_RELEASE static void FlushPipeline()
{
// loads are flushed
g_state.next_load_delay_reg = Reg::count;
if (g_state.load_delay_reg != Reg::count)
{
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
}
g_state.regs.r[static_cast<u8>(g_state.load_delay_reg)] = g_state.load_delay_value;
g_state.load_delay_reg = Reg::count;
// not in a branch delay slot
g_state.branch_was_taken = false;
g_state.next_instruction_is_branch_delay_slot = false;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
// prefetch the next instruction
FetchInstruction();
@ -649,8 +653,8 @@ const std::array<DebuggerRegisterListEntry, NUM_DEBUGGER_REGISTER_LIST_ENTRIES>
{"ra", &CPU::g_state.regs.ra},
{"hi", &CPU::g_state.regs.hi},
{"lo", &CPU::g_state.regs.lo},
{"pc", &CPU::g_state.regs.pc},
{"npc", &CPU::g_state.regs.npc},
{"pc", &CPU::g_state.pc},
{"npc", &CPU::g_state.npc},
{"COP0_SR", &CPU::g_state.cop0_regs.sr.bits},
{"COP0_CAUSE", &CPU::g_state.cop0_regs.cause.bits},
@ -1111,7 +1115,7 @@ restart_instruction:
{
g_state.next_instruction_is_branch_delay_slot = true;
const u32 target = ReadReg(inst.r.rs);
WriteReg(inst.r.rd, g_state.regs.npc);
WriteReg(inst.r.rd, g_state.npc);
Branch(target);
}
break;
@ -1267,7 +1271,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, sxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, sxvalue);
}
break;
@ -1285,7 +1289,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, sxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, sxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, sxvalue);
}
break;
@ -1302,7 +1306,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, value, addr);
PGXP::CPU_LW(inst.bits, addr, value);
}
break;
@ -1320,7 +1324,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LBx(inst.bits, zxvalue, addr);
PGXP::CPU_LBx(inst.bits, addr, zxvalue);
}
break;
@ -1338,7 +1342,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, zxvalue);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LHx(inst.bits, zxvalue, addr);
PGXP::CPU_LHx(inst.bits, addr, zxvalue);
}
break;
@ -1372,7 +1376,7 @@ restart_instruction:
WriteRegDelayed(inst.i.rt, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LW(inst.bits, new_value, addr);
PGXP::CPU_LW(inst.bits, addr, new_value);
}
break;
@ -1386,7 +1390,7 @@ restart_instruction:
WriteMemoryByte(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SB(inst.bits, Truncate8(value), addr);
PGXP::CPU_SB(inst.bits, addr, value);
}
break;
@ -1400,7 +1404,7 @@ restart_instruction:
WriteMemoryHalfWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SH(inst.bits, Truncate16(value), addr);
PGXP::CPU_SH(inst.bits, addr, value);
}
break;
@ -1414,7 +1418,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, value, addr);
PGXP::CPU_SW(inst.bits, addr, value);
}
break;
@ -1447,22 +1451,22 @@ restart_instruction:
WriteMemoryWord(aligned_addr, new_value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SW(inst.bits, new_value, addr);
PGXP::CPU_SW(inst.bits, aligned_addr, new_value);
}
break;
case InstructionOp::j:
{
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
case InstructionOp::jal:
{
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
g_state.next_instruction_is_branch_delay_slot = true;
Branch((g_state.regs.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
Branch((g_state.pc & UINT32_C(0xF0000000)) | (inst.j.target << 2));
}
break;
@ -1472,7 +1476,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) == ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1481,7 +1485,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (ReadReg(inst.i.rs) != ReadReg(inst.i.rt));
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1490,7 +1494,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) > 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1499,7 +1503,7 @@ restart_instruction:
g_state.next_instruction_is_branch_delay_slot = true;
const bool branch = (static_cast<s32>(ReadReg(inst.i.rs)) <= 0);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1515,10 +1519,10 @@ restart_instruction:
// register is still linked even if the branch isn't taken
const bool link = (rt & u8(0x1E)) == u8(0x10);
if (link)
WriteReg(Reg::ra, g_state.regs.npc);
WriteReg(Reg::ra, g_state.npc);
if (branch)
Branch(g_state.regs.pc + (inst.i.imm_sext32() << 2));
Branch(g_state.pc + (inst.i.imm_sext32() << 2));
}
break;
@ -1610,7 +1614,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1620,7 +1624,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()) + 32, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_CTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1630,7 +1634,7 @@ restart_instruction:
WriteRegDelayed(inst.r.rt, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MFC2(inst.bits, value, value);
PGXP::CPU_MFC2(inst.bits, value);
}
break;
@ -1640,7 +1644,7 @@ restart_instruction:
GTE::WriteRegister(static_cast<u32>(inst.r.rd.GetValue()), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_MTC2(inst.bits, value, value);
PGXP::CPU_MTC2(inst.bits, value);
}
break;
@ -1674,7 +1678,7 @@ restart_instruction:
GTE::WriteRegister(ZeroExtend32(static_cast<u8>(inst.i.rt.GetValue())), value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_LWC2(inst.bits, value, addr);
PGXP::CPU_LWC2(inst.bits, addr, value);
}
break;
@ -1694,7 +1698,7 @@ restart_instruction:
WriteMemoryWord(addr, value);
if constexpr (pgxp_mode >= PGXPMode::Memory)
PGXP::CPU_SWC2(inst.bits, value, addr);
PGXP::CPU_SWC2(inst.bits, addr, value);
}
break;
@ -1734,7 +1738,7 @@ void DispatchInterrupt()
{
// If the instruction we're about to execute is a GTE instruction, delay dispatching the interrupt until the next
// instruction. For some reason, if we don't do this, we end up with incorrectly sorted polygons and flickering..
SafeReadInstruction(g_state.regs.pc, &g_state.next_instruction.bits);
SafeReadInstruction(g_state.pc, &g_state.next_instruction.bits);
if (g_state.next_instruction.op == InstructionOp::cop2 && !g_state.next_instruction.cop.IsCommonInstruction())
{
StallUntilGTEComplete();
@ -1745,7 +1749,10 @@ void DispatchInterrupt()
RaiseException(
Cop0Registers::CAUSE::MakeValueForException(Exception::INT, g_state.next_instruction_is_branch_delay_slot,
g_state.branch_was_taken, g_state.next_instruction.cop.cop_n),
g_state.regs.pc);
g_state.pc);
// Fix up downcount, the pending IRQ set it to zero.
TimingEvents::UpdateCPUDowncount();
}
void UpdateDebugDispatcherFlag()
@ -1763,14 +1770,16 @@ void UpdateDebugDispatcherFlag()
Log_DevPrintf("%s debug dispatcher", use_debug_dispatcher ? "Now using" : "No longer using");
g_state.use_debug_dispatcher = use_debug_dispatcher;
ForceDispatcherExit();
ExitExecution();
}
void ForceDispatcherExit()
void ExitExecution()
{
// zero the downcount so we break out and switch
g_state.downcount = 0;
g_state.frame_done = true;
// can't exit while running events without messing things up
if (TimingEvents::IsRunningEvents())
TimingEvents::SetFrameDone();
else
fastjmp_jmp(&s_jmp_buf, 1);
}
bool HasAnyBreakpoints()
@ -1869,7 +1878,7 @@ void ClearBreakpoints()
bool AddStepOverBreakpoint()
{
u32 bp_pc = g_state.regs.pc;
u32 bp_pc = g_state.pc;
Instruction inst;
if (!SafeReadInstruction(bp_pc, &inst.bits))
@ -1880,7 +1889,7 @@ bool AddStepOverBreakpoint()
if (!IsCallInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(Host::TranslateString("DebuggerMessage", "0x%08X is not a call instruction."),
g_state.regs.pc);
g_state.pc);
return false;
}
@ -1890,7 +1899,7 @@ bool AddStepOverBreakpoint()
if (IsBranchInstruction(inst))
{
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.regs.pc);
Host::TranslateString("DebuggerMessage", "Can't step over double branch at 0x%08X"), g_state.pc);
return false;
}
@ -1905,7 +1914,7 @@ bool AddStepOverBreakpoint()
bool AddStepOutBreakpoint(u32 max_instructions_to_search)
{
// find the branch-to-ra instruction.
u32 ret_pc = g_state.regs.pc;
u32 ret_pc = g_state.pc;
for (u32 i = 0; i < max_instructions_to_search; i++)
{
ret_pc += sizeof(Instruction);
@ -1929,21 +1938,24 @@ bool AddStepOutBreakpoint(u32 max_instructions_to_search)
Host::ReportFormattedDebuggerMessage(
Host::TranslateString("DebuggerMessage", "No return instruction found after %u instructions for step-out at %08X."),
max_instructions_to_search, g_state.regs.pc);
max_instructions_to_search, g_state.pc);
return false;
}
ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
{
const u32 pc = g_state.regs.pc;
const u32 pc = g_state.pc;
// single step - we want to break out after this instruction, so set a pending exit
// the bp check happens just before execution, so this is fine
if (s_single_step)
{
ForceDispatcherExit();
s_single_step = false;
if (s_single_step_done)
ExitExecution();
else
s_single_step_done = true;
s_last_breakpoint_check_pc = pc;
return false;
}
@ -2004,19 +2016,14 @@ ALWAYS_INLINE_RELEASE static bool BreakpointCheck()
}
template<PGXPMode pgxp_mode, bool debug>
static void ExecuteImpl()
[[noreturn]] static void ExecuteImpl()
{
g_using_interpreter = true;
g_state.frame_done = false;
while (!g_state.frame_done)
for (;;)
{
TimingEvents::UpdateCPUDowncount();
TimingEvents::RunEvents();
while (g_state.pending_ticks < g_state.downcount)
{
if (HasPendingInterrupt() && !g_state.interrupt_delay)
DispatchInterrupt();
if constexpr (debug)
{
Cop0ExecutionBreakpointCheck();
@ -2028,12 +2035,11 @@ static void ExecuteImpl()
}
}
g_state.interrupt_delay = false;
g_state.pending_ticks++;
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2065,46 +2071,74 @@ static void ExecuteImpl()
// next load delay
UpdateLoadDelay();
}
TimingEvents::RunEvents();
}
}
void Execute()
static void ExecuteDebug()
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
ExecuteImpl<PGXPMode::CPU, true>();
else
ExecuteImpl<PGXPMode::Memory, false>();
ExecuteImpl<PGXPMode::Memory, true>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
ExecuteImpl<PGXPMode::Disabled, true>();
}
}
void ExecuteDebug()
void Execute()
{
if (g_settings.gpu_pgxp_enable)
const CPUExecutionMode exec_mode = g_settings.cpu_execution_mode;
const bool use_debug_dispatcher = g_state.use_debug_dispatcher;
if (fastjmp_set(&s_jmp_buf) != 0)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, true>();
else
ExecuteImpl<PGXPMode::Memory, true>();
// Before we return, set npc to pc so that we can switch from recs to int.
if (exec_mode != CPUExecutionMode::Interpreter && !use_debug_dispatcher)
g_state.npc = g_state.pc;
return;
}
else
if (use_debug_dispatcher)
{
ExecuteImpl<PGXPMode::Disabled, true>();
ExecuteDebug();
return;
}
switch (exec_mode)
{
case CPUExecutionMode::Recompiler:
case CPUExecutionMode::CachedInterpreter:
CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
{
if (g_settings.gpu_pgxp_enable)
{
if (g_settings.gpu_pgxp_cpu)
ExecuteImpl<PGXPMode::CPU, false>();
else
ExecuteImpl<PGXPMode::Memory, false>();
}
else
{
ExecuteImpl<PGXPMode::Disabled, false>();
}
}
break;
}
}
void SingleStep()
{
s_single_step = true;
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.regs.pc);
if (fastjmp_set(&s_jmp_buf) == 0)
ExecuteDebug();
Host::ReportFormattedDebuggerMessage("Stepped to 0x%08X.", g_state.pc);
}
namespace CodeCache {
@ -2113,8 +2147,8 @@ template<PGXPMode pgxp_mode>
void InterpretCachedBlock(const CodeBlock& block)
{
// set up the state so we've already fetched the instruction
DebugAssert(g_state.regs.pc == block.GetPC());
g_state.regs.npc = block.GetPC() + 4;
DebugAssert(g_state.pc == block.GetPC());
g_state.npc = block.GetPC() + 4;
for (const CodeBlockInstruction& cbi : block.instructions)
{
@ -2129,8 +2163,8 @@ void InterpretCachedBlock(const CodeBlock& block)
g_state.exception_raised = false;
// update pc
g_state.regs.pc = g_state.regs.npc;
g_state.regs.npc += 4;
g_state.pc = g_state.npc;
g_state.npc += 4;
// execute the instruction we previously fetched
ExecuteInstruction<pgxp_mode, false>();
@ -2153,7 +2187,7 @@ template void InterpretCachedBlock<PGXPMode::CPU>(const CodeBlock& block);
template<PGXPMode pgxp_mode>
void InterpretUncachedBlock()
{
g_state.regs.npc = g_state.regs.pc;
g_state.npc = g_state.pc;
if (!FetchInstructionForInterpreterFallback())
return;
@ -2166,7 +2200,7 @@ void InterpretUncachedBlock()
// now executing the instruction we previously fetched
g_state.current_instruction.bits = g_state.next_instruction.bits;
g_state.current_instruction_pc = g_state.regs.pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = g_state.next_instruction_is_branch_delay_slot;
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.next_instruction_is_branch_delay_slot = false;
@ -2182,7 +2216,7 @@ void InterpretUncachedBlock()
}
else
{
g_state.regs.pc = g_state.regs.npc;
g_state.pc = g_state.npc;
}
// execute the instruction we previously fetched

@ -56,7 +56,9 @@ struct State
Registers regs = {};
Cop0Registers cop0_regs = {};
Instruction next_instruction = {};
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
// address of the instruction currently being executed
Instruction current_instruction = {};
@ -66,15 +68,14 @@ struct State
bool next_instruction_is_branch_delay_slot = false;
bool branch_was_taken = false;
bool exception_raised = false;
bool interrupt_delay = false;
bool frame_done = false;
// load delays
Reg load_delay_reg = Reg::count;
u32 load_delay_value = 0;
Reg next_load_delay_reg = Reg::count;
u32 load_delay_value = 0;
u32 next_load_delay_value = 0;
Instruction next_instruction = {};
CacheControl cache_control{0};
// GTE registers are stored here so we can access them on ARM with a single instruction
@ -95,7 +96,6 @@ struct State
};
extern State g_state;
extern bool g_using_interpreter;
void Initialize();
void Shutdown();
@ -106,38 +106,37 @@ void UpdateFastmemBase();
/// Executes interpreter loop.
void Execute();
void ExecuteDebug();
void SingleStep();
// Forces an early exit from the CPU dispatcher.
void ForceDispatcherExit();
void ExitExecution();
ALWAYS_INLINE Registers& GetRegs()
ALWAYS_INLINE static Registers& GetRegs()
{
return g_state.regs;
}
ALWAYS_INLINE TickCount GetPendingTicks()
ALWAYS_INLINE static TickCount GetPendingTicks()
{
return g_state.pending_ticks;
}
ALWAYS_INLINE void ResetPendingTicks()
ALWAYS_INLINE static void ResetPendingTicks()
{
g_state.gte_completion_tick =
(g_state.pending_ticks < g_state.gte_completion_tick) ? (g_state.gte_completion_tick - g_state.pending_ticks) : 0;
g_state.pending_ticks = 0;
}
ALWAYS_INLINE void AddPendingTicks(TickCount ticks)
ALWAYS_INLINE static void AddPendingTicks(TickCount ticks)
{
g_state.pending_ticks += ticks;
}
// state helpers
ALWAYS_INLINE bool InUserMode()
ALWAYS_INLINE static bool InUserMode()
{
return g_state.cop0_regs.sr.KUc;
}
ALWAYS_INLINE bool InKernelMode()
ALWAYS_INLINE static bool InKernelMode()
{
return !g_state.cop0_regs.sr.KUc;
}

@ -12,13 +12,13 @@ void RaiseException(Exception excode);
void RaiseException(u32 CAUSE_bits, u32 EPC);
void RaiseBreakException(u32 CAUSE_bits, u32 EPC, u32 instruction_bits);
ALWAYS_INLINE bool HasPendingInterrupt()
ALWAYS_INLINE static bool HasPendingInterrupt()
{
return g_state.cop0_regs.sr.IEc &&
(((g_state.cop0_regs.cause.bits & g_state.cop0_regs.sr.bits) & (UINT32_C(0xFF) << 8)) != 0);
}
ALWAYS_INLINE void CheckForPendingInterrupt()
ALWAYS_INLINE static void CheckForPendingInterrupt()
{
if (HasPendingInterrupt())
g_state.downcount = 0;
@ -28,36 +28,36 @@ void DispatchInterrupt();
void UpdateDebugDispatcherFlag();
// icache stuff
ALWAYS_INLINE bool IsCachedAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static bool IsCachedAddress(VirtualMemoryAddress address)
{
// KUSEG, KSEG0
return (address >> 29) <= 4;
}
ALWAYS_INLINE u32 GetICacheLine(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheLine(VirtualMemoryAddress address)
{
return ((address >> 4) & 0xFFu);
}
ALWAYS_INLINE u32 GetICacheLineOffset(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheLineOffset(VirtualMemoryAddress address)
{
return (address & (ICACHE_LINE_SIZE - 1));
}
ALWAYS_INLINE u32 GetICacheTagForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheTagForAddress(VirtualMemoryAddress address)
{
return (address & ICACHE_TAG_ADDRESS_MASK);
}
ALWAYS_INLINE u32 GetICacheFillTagForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheFillTagForAddress(VirtualMemoryAddress address)
{
static const u32 invalid_bits[4] = {0, 1, 3, 7};
return GetICacheTagForAddress(address) | invalid_bits[(address >> 2) & 0x03u];
}
ALWAYS_INLINE u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static u32 GetICacheTagMaskForAddress(VirtualMemoryAddress address)
{
static const u32 mask[4] = {ICACHE_TAG_ADDRESS_MASK | 1, ICACHE_TAG_ADDRESS_MASK | 2, ICACHE_TAG_ADDRESS_MASK | 4,
ICACHE_TAG_ADDRESS_MASK | 8};
return mask[(address >> 2) & 0x03u];
}
ALWAYS_INLINE bool CompareICacheTag(VirtualMemoryAddress address)
ALWAYS_INLINE static bool CompareICacheTag(VirtualMemoryAddress address)
{
const u32 line = GetICacheLine(address);
return ((g_state.icache_tags[line] & GetICacheTagMaskForAddress(address)) == GetICacheTagForAddress(address));
@ -68,7 +68,7 @@ TickCount GetICacheFillTicks(VirtualMemoryAddress address);
u32 FillICache(VirtualMemoryAddress address);
void CheckAndUpdateICacheTags(u32 line_count, TickCount uncached_ticks);
ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address)
ALWAYS_INLINE static Segment GetSegmentForAddress(VirtualMemoryAddress address)
{
switch ((address >> 29))
{
@ -91,12 +91,12 @@ ALWAYS_INLINE Segment GetSegmentForAddress(VirtualMemoryAddress address)
}
}
ALWAYS_INLINE PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address)
ALWAYS_INLINE static constexpr PhysicalMemoryAddress VirtualAddressToPhysical(VirtualMemoryAddress address)
{
return (address & PHYSICAL_MEMORY_ADDRESS_MASK);
}
ALWAYS_INLINE VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment)
ALWAYS_INLINE static VirtualMemoryAddress PhysicalAddressToVirtual(PhysicalMemoryAddress address, Segment segment)
{
static constexpr std::array<VirtualMemoryAddress, 4> bases = {{0x00000000, 0x80000000, 0xA0000000, 0xE0000000}};
return bases[static_cast<u32>(segment)] | address;
@ -115,12 +115,12 @@ bool WriteMemoryWord(VirtualMemoryAddress addr, u32 value);
void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size, TickCount* read_ticks);
void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize size);
ALWAYS_INLINE void AddGTETicks(TickCount ticks)
ALWAYS_INLINE static void AddGTETicks(TickCount ticks)
{
g_state.gte_completion_tick = g_state.pending_ticks + ticks + 1;
}
ALWAYS_INLINE void StallUntilGTEComplete()
ALWAYS_INLINE static void StallUntilGTEComplete()
{
g_state.pending_ticks =
(g_state.gte_completion_tick > g_state.pending_ticks) ? g_state.gte_completion_tick : g_state.pending_ticks;

@ -1156,7 +1156,7 @@ Value CodeGenerator::GetCurrentInstructionPC(u32 offset /* = 0 */)
void CodeGenerator::WriteNewPC(const Value& value, bool commit)
{
// TODO: This _could_ be moved into the register cache, but would it gain anything?
EmitStoreGuestRegister(Reg::pc, value);
EmitStoreCPUStructField(offsetof(CPU::State, pc), value);
if (commit)
{
m_pc_valid = value.IsConstant();
@ -1450,7 +1450,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_8);
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lb));
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LBx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
{
@ -1468,7 +1468,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
ConvertValueSizeInPlace(&result, RegSize_32, (cbi.instruction.op == InstructionOp::lh));
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LHx, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
{
@ -1483,7 +1483,7 @@ bool CodeGenerator::Compile_Load(const CodeBlockInstruction& cbi)
{
result = EmitLoadGuestMemory(cbi, address, address_spec, RegSize_32);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), result, address);
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, result);
if (address_spec)
value_spec = SpeculativeReadMemory(*address_spec);
@ -1522,10 +1522,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sb:
{
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_8), address);
}
EmitFunctionCall(nullptr, PGXP::CPU_SB, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_8, value);
@ -1553,10 +1550,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sh:
{
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits),
value.ViewAsSize(RegSize_16), address);
}
EmitFunctionCall(nullptr, PGXP::CPU_SH, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_16, value);
@ -1584,7 +1578,7 @@ bool CodeGenerator::Compile_Store(const CodeBlockInstruction& cbi)
case InstructionOp::sw:
{
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, value);
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, value);
@ -1688,7 +1682,7 @@ bool CodeGenerator::Compile_LoadLeftRight(const CodeBlockInstruction& cbi)
shift.ReleaseAndClear();
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), mem, address);
EmitFunctionCall(nullptr, PGXP::CPU_LW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.i.rt, std::move(mem));
@ -1751,7 +1745,7 @@ bool CodeGenerator::Compile_StoreLeftRight(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, address_spec, RegSize_32, mem);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), mem, address);
EmitFunctionCall(nullptr, PGXP::CPU_SW, Value::FromConstantU32(cbi.instruction.bits), address, mem);
InstructionEpilogue(cbi);
return true;
@ -2950,7 +2944,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_LWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
}
else
{
@ -2958,7 +2952,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
EmitStoreGuestMemory(cbi, address, spec_address, RegSize_32, value);
if (g_settings.gpu_pgxp_enable)
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), value, address);
EmitFunctionCall(nullptr, PGXP::CPU_SWC2, Value::FromConstantU32(cbi.instruction.bits), address, value);
SpeculativeValue spec_base = SpeculativeReadReg(cbi.instruction.i.rs);
if (spec_base)
@ -2988,11 +2982,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
// PGXP done first here before ownership is transferred.
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::cfcn) ? PGXP::CPU_CFC2 : PGXP::CPU_MFC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
EmitFunctionCall(nullptr, PGXP::CPU_MFC2, Value::FromConstantU32(cbi.instruction.bits), value);
m_register_cache.WriteGuestRegisterDelayed(cbi.instruction.r.rt, std::move(value));
SpeculativeWriteReg(cbi.instruction.r.rt, std::nullopt);
@ -3014,11 +3004,7 @@ bool CodeGenerator::Compile_cop2(const CodeBlockInstruction& cbi)
DoGTERegisterWrite(reg, value);
if (g_settings.gpu_pgxp_enable)
{
EmitFunctionCall(
nullptr, (cbi.instruction.cop.CommonOp() == CopCommonInstruction::ctcn) ? PGXP::CPU_CTC2 : PGXP::CPU_MTC2,
Value::FromConstantU32(cbi.instruction.bits), value, value);
}
EmitFunctionCall(nullptr, PGXP::CPU_MTC2, Value::FromConstantU32(cbi.instruction.bits), value);
InstructionEpilogue(cbi);
return true;

@ -32,9 +32,6 @@ constexpr u32 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u32 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s32 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -201,10 +198,7 @@ void CodeGenerator::EmitEndBlock(bool free_registers /* = true */, bool emit_ret
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
if (emit_return)
{
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr);
}
}
void CodeGenerator::EmitExceptionExit()
@ -219,7 +213,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->add(a32::sp, a32::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->bx(a32::lr);
}
@ -2072,64 +2065,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a32::Label frame_done_loop;
a32::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, frame_done)));
m_emit->tst(a32::r0, 1);
m_emit->b(a32::ne, &exit_dispatcher);
// r0 <- sr
a32::Label no_interrupt;
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tst(a32::r0, 1);
m_emit->b(a32::eq, &no_interrupt);
// r1 <- cause
// r0 (sr) & cause
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a32::r0, a32::r0, a32::r1);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a32::r0, 0xFF00);
m_emit->b(a32::eq, &no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// r0 <- head event->downcount
// downcount <- r0
EmitLoadGlobalAddress(0, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0));
m_emit->ldr(a32::r0, a32::MemOperand(a32::r0, offsetof(TimingEvent, m_downcount)));
m_emit->str(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
a32::Label event_test;
m_emit->b(&event_test);
// main dispatch loop
a32::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// r0 <- pending_ticks
// r1 <- downcount
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::ge, &downcount_hit);
// time to lookup the block
// r0 <- pc
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, regs.pc)));
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pc)));
// r1 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(2, CodeCache::GetFastMapPointer());
@ -2140,21 +2085,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a32::r0, a32::MemOperand(a32::r1, a32::r0));
m_emit->blx(a32::r0);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
// r0 <- pending_ticks
// r1 <- downcount
m_emit->ldr(a32::r0, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(1, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1));
m_emit->ldr(a32::r1, a32::MemOperand(a32::r1, offsetof(TimingEvent, m_downcount)));
m_emit->ldr(a32::r1, a32::MemOperand(GetHostReg32(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a32::Label downcount_hit;
m_emit->cmp(a32::r0, a32::r1);
m_emit->b(a32::lt, &frame_done_loop);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
m_emit->b(a32::lt, &main_loop);
// all done
m_emit->Bind(&exit_dispatcher);
// end while
m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&main_loop);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);

@ -30,9 +30,6 @@ constexpr u64 FUNCTION_CALLER_SAVED_SPACE_RESERVE = 144; // 18 registers -> 224
constexpr u64 FUNCTION_STACK_SIZE =
FUNCTION_CALLEE_SAVED_SPACE_RESERVE + FUNCTION_CALLER_SAVED_SPACE_RESERVE + FUNCTION_CALL_SHADOW_SPACE;
// PC we return to after the end of the block
static void* s_dispatcher_return_address;
static s64 GetPCDisplacement(const void* current, const void* target)
{
Assert(Common::IsAlignedPow2(reinterpret_cast<size_t>(current), 4));
@ -256,7 +253,6 @@ void CodeGenerator::EmitExceptionExit()
m_register_cache.PopCalleeSavedRegisters(false);
m_emit->Add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);
// m_emit->b(GetPCDisplacement(GetCurrentCodePointer(), s_dispatcher_return_address));
m_emit->Ret();
}
@ -2278,62 +2274,16 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(RCPUPTR, &g_state);
a64::Label frame_done_loop;
a64::Label exit_dispatcher;
m_emit->Bind(&frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->ldrb(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, frame_done)));
m_emit->tbnz(a64::w8, 0, &exit_dispatcher);
// x8 <- sr
a64::Label no_interrupt;
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.sr.bits)));
// if Iec == 0 then goto no_interrupt
m_emit->tbz(a64::w8, 0, &no_interrupt);
// x9 <- cause
// x8 (sr) & cause
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, cop0_regs.cause.bits)));
m_emit->and_(a64::w8, a64::w8, a64::w9);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->tst(a64::w8, 0xFF00);
m_emit->b(&no_interrupt, a64::eq);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->Bind(&no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// x8 <- head event->downcount
// downcount <- x8
EmitLoadGlobalAddress(8, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x8, a64::MemOperand(a64::x8));
m_emit->ldr(a64::w8, a64::MemOperand(a64::x8, offsetof(TimingEvent, m_downcount)));
m_emit->str(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
a64::Label event_test;
m_emit->b(&event_test);
// main dispatch loop
a64::Label main_loop;
m_emit->Bind(&main_loop);
s_dispatcher_return_address = GetCurrentCodePointer();
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
a64::Label downcount_hit;
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&downcount_hit, a64::ge);
// time to lookup the block
// w8 <- pc
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, regs.pc)));
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pc)));
// x9 <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(10, CodeCache::GetFastMapPointer());
@ -2345,21 +2295,20 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
m_emit->ldr(a64::x8, a64::MemOperand(a64::x9, a64::x8, a64::LSL, 3));
m_emit->blr(a64::x8);
// end while
m_emit->Bind(&downcount_hit);
// check events then for frame done
// w8 <- pending_ticks
// w9 <- downcount
m_emit->ldr(a64::w8, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, pending_ticks)));
EmitLoadGlobalAddress(9, TimingEvents::GetHeadEventPtr());
m_emit->ldr(a64::x9, a64::MemOperand(a64::x9));
m_emit->ldr(a64::w9, a64::MemOperand(a64::x9, offsetof(TimingEvent, m_downcount)));
m_emit->ldr(a64::w9, a64::MemOperand(GetHostReg64(RCPUPTR), offsetof(State, downcount)));
// while downcount < pending_ticks
m_emit->cmp(a64::w8, a64::w9);
m_emit->b(&frame_done_loop, a64::lt);
m_emit->b(&main_loop, a64::lt);
m_emit->Bind(&event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->b(&frame_done_loop);
m_emit->b(&main_loop);
// all done
m_emit->Bind(&exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->add(a64::sp, a64::sp, FUNCTION_STACK_SIZE);

@ -3024,59 +3024,17 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
EmitLoadGlobalAddress(Xbyak::Operand::RBP, &g_state);
Xbyak::Label frame_done_loop;
Xbyak::Label exit_dispatcher;
m_emit->L(frame_done_loop);
// if frame_done goto exit_dispatcher
m_emit->test(m_emit->byte[m_emit->rbp + offsetof(State, frame_done)], 1);
m_emit->jnz(exit_dispatcher, Xbyak::CodeGenerator::T_NEAR);
// eax <- sr
Xbyak::Label no_interrupt;
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.sr.bits)]);
// if Iec == 0 then goto no_interrupt
m_emit->test(m_emit->eax, 1);
m_emit->jz(no_interrupt);
// sr & cause
m_emit->and_(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, cop0_regs.cause.bits)]);
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
m_emit->test(m_emit->eax, 0xFF00);
m_emit->jz(no_interrupt);
// we have an interrupt
EmitCall(reinterpret_cast<const void*>(&DispatchInterrupt));
// no interrupt or we just serviced it
m_emit->L(no_interrupt);
// TimingEvents::UpdateCPUDowncount:
// eax <- head event->downcount
// downcount <- eax
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->mov(m_emit->dword[m_emit->rbp + offsetof(State, downcount)], m_emit->eax);
Xbyak::Label event_test;
m_emit->jmp(event_test);
// main dispatch loop
Xbyak::Label main_loop;
m_emit->align(16);
m_emit->L(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jge(downcount_hit);
// time to lookup the block
// eax <- pc
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, regs.pc)]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pc)]);
// rcx <- s_fast_map[pc >> 16]
EmitLoadGlobalAddress(Xbyak::Operand::RBX, CodeCache::GetFastMapPointer());
@ -3087,22 +3045,19 @@ CodeCache::DispatcherFunction CodeGenerator::CompileDispatcher()
// call(rcx[pc * 2]) (fast_map[pc >> 2])
m_emit->call(m_emit->qword[m_emit->rcx + m_emit->rax * 2]);
m_emit->jmp(main_loop);
// eax <- pending_ticks
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
// end while
m_emit->L(downcount_hit);
// while eax < downcount
Xbyak::Label downcount_hit;
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, downcount)]);
m_emit->jl(main_loop);
// check events then for frame done
EmitLoadGlobalAddress(Xbyak::Operand::RAX, TimingEvents::GetHeadEventPtr());
m_emit->mov(m_emit->rax, m_emit->qword[m_emit->rax]);
m_emit->mov(m_emit->eax, m_emit->dword[m_emit->rax + offsetof(TimingEvent, m_downcount)]);
m_emit->cmp(m_emit->eax, m_emit->dword[m_emit->rbp + offsetof(State, pending_ticks)]);
m_emit->jg(frame_done_loop);
m_emit->L(event_test);
EmitCall(reinterpret_cast<const void*>(&TimingEvents::RunEvents));
m_emit->jmp(frame_done_loop);
m_emit->jmp(main_loop);
// all done
m_emit->L(exit_dispatcher);
RestoreStackAfterCall(stack_adjust);
m_register_cache.PopCalleeSavedRegisters(true);
m_emit->ret();

@ -130,6 +130,13 @@ constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128;
// Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#elif defined(CPU_RISCV64)
using HostReg = unsigned;
// Alignment of code stoarge.
constexpr u32 CODE_STORAGE_ALIGNMENT = 4096;
#else
using HostReg = int;

@ -60,12 +60,8 @@ enum class Reg : u8
sp,
fp,
ra,
// not accessible to instructions
hi,
lo,
pc,
npc,
count
};
@ -213,6 +209,7 @@ union Instruction
}
ALWAYS_INLINE Cop0Instruction Cop0Op() const { return static_cast<Cop0Instruction>(bits & UINT32_C(0x3F)); }
ALWAYS_INLINE u32 Cop2Index() const { return ((bits >> 11) & 0x1F) | ((bits >> 17) & 0x20); }
} cop;
bool IsCop2Instruction() const
@ -240,7 +237,7 @@ struct Registers
{
union
{
u32 r[static_cast<u8>(Reg::count)];
u32 r[static_cast<u8>(Reg::count) + 1]; // +1 for the dummy load delay write slot
struct
{
@ -276,12 +273,8 @@ struct Registers
u32 sp; // r29
u32 fp; // r30
u32 ra; // r31
// not accessible to instructions
u32 hi;
u32 lo;
u32 pc; // at execution time: the address of the next instruction to execute (already fetched)
u32 npc; // at execution time: the address of the next instruction to fetch
};
};
};

@ -107,7 +107,7 @@ static const std::array<u32*, 38> REGISTERS {
&CPU::g_state.regs.hi,
&CPU::g_state.cop0_regs.BadVaddr,
&CPU::g_state.cop0_regs.cause.bits,
&CPU::g_state.regs.pc,
&CPU::g_state.pc,
};
/// Number of registers in GDB remote protocol for MIPS III.

@ -901,9 +901,10 @@ void GPU::CRTCTickEvent(TickCount ticks)
InterruptController::InterruptRequest(InterruptController::IRQ::VBLANK);
// flush any pending draws and "scan out" the image
// TODO: move present in here I guess
FlushRender();
UpdateDisplay();
System::FrameDone();
TimingEvents::SetFrameDone();
// switch fields early. this is needed so we draw to the correct one.
if (m_GPUSTAT.InInterleaved480iMode())

@ -4,13 +4,13 @@
#include "gte.h"
#include "common/assert.h"
#include "common/bitutils.h"
#include "util/state_wrapper.h"
#include "cpu_core.h"
#include "cpu_core_private.h"
#include "host_display.h"
#include "pgxp.h"
#include "settings.h"
#include "timing_event.h"
#include "util/state_wrapper.h"
#include <algorithm>
#include <array>
#include <numeric>
@ -471,11 +471,12 @@ ALWAYS_INLINE static u32 UNRDivide(u32 lhs, u32 rhs)
return std::min<u32>(0x1FFFF, result);
}
static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
static void MulMatVec(const s16* M_, const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][0]) * s64(Vx)) + (s64(M[i][1]) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 0)) * s64(Vx)) + (s64(M(i, 1)) * s64(Vy))) + \
(s64(M(i, 2)) * s64(Vz)), \
shift, lm)
dot3(0);
@ -483,15 +484,17 @@ static void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 V
dot3(2);
#undef dot3
#undef M
}
static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
static void MulMatVec(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
TruncateAndSetMACAndIR<i + 1>( \
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx))) + \
(s64(M[i][1]) * s64(Vy))) + \
(s64(M[i][2]) * s64(Vz)), \
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx))) + \
(s64(M(i, 1)) * s64(Vy))) + \
(s64(M(i, 2)) * s64(Vz)), \
shift, lm)
dot3(0);
@ -499,19 +502,20 @@ static void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16
dot3(2);
#undef dot3
#undef M
}
static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift,
bool lm)
static void MulMatVecBuggy(const s16* M_, const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
{
#define M(i, j) M_[((i)*3) + (j)]
#define dot3(i) \
do \
{ \
TruncateAndSetIR<i + 1>(static_cast<s32>(SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>( \
(s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx)))) >> \
(s64(T[i]) << 12) + (s64(M(i, 0)) * s64(Vx)))) >> \
shift), \
false); \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][1]) * s64(Vy))) + (s64(M[i][2]) * s64(Vz)), \
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M(i, 1)) * s64(Vy))) + (s64(M(i, 2)) * s64(Vz)), \
shift, lm); \
} while (0)
@ -520,82 +524,50 @@ static void MulMatVecBuggy(const s16 M[3][3], const s32 T[3], const s16 Vx, cons
dot3(2);
#undef dot3
#undef M
}
static void Execute_MVMVA(Instruction inst)
{
REGS.FLAG.Clear();
// TODO: Remove memcpy..
s16 M[3][3];
switch (inst.mvmva_multiply_matrix)
static constexpr const s16* M_lookup[4] = {&REGS.RT[0][0], &REGS.LLM[0][0], &REGS.LCM[0][0], nullptr};
static constexpr const s16* V_lookup[4][3] = {
{&REGS.V0[0], &REGS.V0[1], &REGS.V0[2]},
{&REGS.V1[0], &REGS.V1[1], &REGS.V1[2]},
{&REGS.V2[0], &REGS.V2[1], &REGS.V2[2]},
{&REGS.IR1, &REGS.IR2, &REGS.IR3},
};
static constexpr const s32 zero_T[3] = {};
static constexpr const s32* T_lookup[4] = {REGS.TR, REGS.BK, REGS.FC, zero_T};
const s16* M = M_lookup[inst.mvmva_multiply_matrix];
const s16* const* const V = V_lookup[inst.mvmva_multiply_vector];
const s32* const T = T_lookup[inst.mvmva_translation_vector];
s16 buggy_M[3][3];
if (!M)
{
case 0:
std::memcpy(M, REGS.RT, sizeof(s16) * 3 * 3);
break;
case 1:
std::memcpy(M, REGS.LLM, sizeof(s16) * 3 * 3);
break;
case 2:
std::memcpy(M, REGS.LCM, sizeof(s16) * 3 * 3);
break;
default:
{
// buggy
M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
M[0][2] = REGS.IR0;
M[1][0] = REGS.RT[0][2];
M[1][1] = REGS.RT[0][2];
M[1][2] = REGS.RT[0][2];
M[2][0] = REGS.RT[1][1];
M[2][1] = REGS.RT[1][1];
M[2][2] = REGS.RT[1][1];
}
break;
// buggy
buggy_M[0][0] = -static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
buggy_M[0][1] = static_cast<s16>(ZeroExtend16(REGS.RGBC[0]) << 4);
buggy_M[0][2] = REGS.IR0;
buggy_M[1][0] = REGS.RT[0][2];
buggy_M[1][1] = REGS.RT[0][2];
buggy_M[1][2] = REGS.RT[0][2];
buggy_M[2][0] = REGS.RT[1][1];
buggy_M[2][1] = REGS.RT[1][1];
buggy_M[2][2] = REGS.RT[1][1];
M = &buggy_M[0][0];
}
s16 Vx, Vy, Vz;
switch (inst.mvmva_multiply_vector)
{
case 0:
Vx = REGS.V0[0];
Vy = REGS.V0[1];
Vz = REGS.V0[2];
break;
case 1:
Vx = REGS.V1[0];
Vy = REGS.V1[1];
Vz = REGS.V1[2];
break;
case 2:
Vx = REGS.V2[0];
Vy = REGS.V2[1];
Vz = REGS.V2[2];
break;
default:
Vx = REGS.IR1;
Vy = REGS.IR2;
Vz = REGS.IR3;
break;
}
static const s32 zero_T[3] = {};
switch (inst.mvmva_translation_vector)
{
case 0:
MulMatVec(M, REGS.TR, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 1:
MulMatVec(M, REGS.BK, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
case 2:
MulMatVecBuggy(M, REGS.FC, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
default:
MulMatVec(M, zero_T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
break;
}
const s16 Vx = *V[0];
const s16 Vy = *V[1];
const s16 Vz = *V[2];
if (inst.mvmva_translation_vector != 2)
MulMatVec(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
else
MulMatVecBuggy(M, T, Vx, Vy, Vz, inst.GetShift(), inst.lm);
REGS.FLAG.UpdateError();
}
@ -874,10 +846,10 @@ static ALWAYS_INLINE void InterpolateColor(s64 in_MAC1, s64 in_MAC2, s64 in_MAC3
static void NCS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
PushRGBFromMAC();
@ -909,10 +881,10 @@ static void Execute_NCT(Instruction inst)
static void NCCS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
@ -950,10 +922,10 @@ static void Execute_NCCT(Instruction inst)
static void NCDS(const s16 V[3], u8 shift, bool lm)
{
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
MulMatVec(REGS.LLM, V[0], V[1], V[2], shift, lm);
MulMatVec(&REGS.LLM[0][0], V[0], V[1], V[2], shift, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
@ -999,7 +971,7 @@ static void Execute_CC(Instruction inst)
const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12)
@ -1021,7 +993,7 @@ static void Execute_CDP(Instruction inst)
const bool lm = inst.lm;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
MulMatVec(REGS.LCM, REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
MulMatVec(&REGS.LCM[0][0], REGS.BK, REGS.IR1, REGS.IR2, REGS.IR3, shift, lm);
// No need to assign these to MAC[1-3], as it'll never overflow.
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4

@ -108,8 +108,7 @@ static PGXP_value CP0_reg[32];
#define CPU_Lo CPU_reg[33]
// GTE registers
static PGXP_value GTE_data_reg[32];
static PGXP_value GTE_ctrl_reg[32];
static PGXP_value GTE_regs[64];
static PGXP_value* Mem = nullptr;
static PGXP_value* vertexCache = nullptr;
@ -274,8 +273,7 @@ void Initialize()
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
if (!Mem)
{
@ -306,8 +304,7 @@ void Reset()
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
if (Mem)
std::memset(Mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE);
@ -329,8 +326,7 @@ void Shutdown()
Mem = nullptr;
}
std::memset(GTE_data_reg, 0, sizeof(GTE_data_reg));
std::memset(GTE_ctrl_reg, 0, sizeof(GTE_ctrl_reg));
std::memset(GTE_regs, 0, sizeof(GTE_regs));
std::memset(CPU_reg, 0, sizeof(CPU_reg));
std::memset(CP0_reg, 0, sizeof(CP0_reg));
@ -344,18 +340,19 @@ void Shutdown()
#define rt(_instr) ((_instr >> 16) & 0x1F) // The rt part of the instruction register
#define rs(_instr) ((_instr >> 21) & 0x1F) // The rs part of the instruction register
#define imm(_instr) (_instr & 0xFFFF) // The immediate part of the instruction register
#define cop2idx(_instr) (((_instr >> 11) & 0x1F) | ((_instr >> 17) & 0x20))
#define SX0 (GTE_data_reg[12].x)
#define SY0 (GTE_data_reg[12].y)
#define SX1 (GTE_data_reg[13].x)
#define SY1 (GTE_data_reg[13].y)
#define SX2 (GTE_data_reg[14].x)
#define SY2 (GTE_data_reg[14].y)
#define SX0 (GTE_regs[12].x)
#define SY0 (GTE_regs[12].y)
#define SX1 (GTE_regs[13].x)
#define SY1 (GTE_regs[13].y)
#define SX2 (GTE_regs[14].x)
#define SY2 (GTE_regs[14].y)
#define SXY0 (GTE_data_reg[12])
#define SXY1 (GTE_data_reg[13])
#define SXY2 (GTE_data_reg[14])
#define SXYP (GTE_data_reg[15])
#define SXY0 (GTE_regs[12])
#define SXY1 (GTE_regs[13])
#define SXY2 (GTE_regs[14])
#define SXYP (GTE_regs[15])
void GTE_PushSXYZ2f(float x, float y, float z, u32 v)
{
@ -428,49 +425,35 @@ static void PGXP_MTC2_int(PGXP_value value, u32 reg)
return;
}
GTE_data_reg[reg] = value;
GTE_regs[reg] = value;
}
////////////////////////////////////
// Data transfer tracking
////////////////////////////////////
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal)
void CPU_MFC2(u32 instr, u32 rdVal)
{
// CPU[Rt] = GTE_D[Rd]
Validate(&GTE_data_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_data_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
const u32 idx = cop2idx(instr);
Validate(&GTE_regs[idx], rdVal);
CPU_reg[rt(instr)] = GTE_regs[idx];
CPU_reg[rt(instr)].value = rdVal;
}
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal)
void CPU_MTC2(u32 instr, u32 rtVal)
{
// GTE_D[Rd] = CPU[Rt]
const u32 idx = cop2idx(instr);
Validate(&CPU_reg[rt(instr)], rtVal);
PGXP_MTC2_int(CPU_reg[rt(instr)], rd(instr));
GTE_data_reg[rd(instr)].value = rdVal;
}
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal)
{
// CPU[Rt] = GTE_C[Rd]
Validate(&GTE_ctrl_reg[rd(instr)], rdVal);
CPU_reg[rt(instr)] = GTE_ctrl_reg[rd(instr)];
CPU_reg[rt(instr)].value = rtVal;
}
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal)
{
// GTE_C[Rd] = CPU[Rt]
Validate(&CPU_reg[rt(instr)], rtVal);
GTE_ctrl_reg[rd(instr)] = CPU_reg[rt(instr)];
GTE_ctrl_reg[rd(instr)].value = rdVal;
PGXP_MTC2_int(CPU_reg[rt(instr)], idx);
GTE_regs[idx].value = rtVal;
}
////////////////////////////////////
// Memory Access
////////////////////////////////////
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
void CPU_LWC2(u32 instr, u32 addr, u32 rtVal)
{
// GTE_D[Rt] = Mem[addr]
PGXP_value val;
@ -478,11 +461,11 @@ void CPU_LWC2(u32 instr, u32 rtVal, u32 addr)
PGXP_MTC2_int(val, rt(instr));
}
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr)
void CPU_SWC2(u32 instr, u32 addr, u32 rtVal)
{
// Mem[addr] = GTE_D[Rt]
Validate(&GTE_data_reg[rt(instr)], rtVal);
WriteMem(&GTE_data_reg[rt(instr)], addr);
Validate(&GTE_regs[rt(instr)], rtVal);
WriteMem(&GTE_regs[rt(instr)], addr);
}
ALWAYS_INLINE_RELEASE void PGXP_CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex)
@ -575,29 +558,29 @@ bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, f
#define imm_sext(_instr) \
static_cast<s32>(static_cast<s16>(_instr & 0xFFFF)) // The immediate part of the instruction register
void CPU_LW(u32 instr, u32 rtVal, u32 addr)
void CPU_LW(u32 instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im]
ValidateAndCopyMem(&CPU_reg[rt(instr)], addr, rtVal);
}
void CPU_LBx(u32 instr, u32 rtVal, u32 addr)
void CPU_LBx(u32 instr, u32 addr, u32 rtVal)
{
CPU_reg[rt(instr)] = PGXP_value_invalid;
}
void CPU_LHx(u32 instr, u32 rtVal, u32 addr)
void CPU_LHx(u32 instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (sign/zero extended)
ValidateAndCopyMem16(&CPU_reg[rt(instr)], addr, rtVal, 1);
}
void CPU_SB(u32 instr, u8 rtVal, u32 addr)
void CPU_SB(u32 instr, u32 addr, u32 rtVal)
{
WriteMem(&PGXP_value_invalid, addr);
}
void CPU_SH(u32 instr, u16 rtVal, u32 addr)
void CPU_SH(u32 instr, u32 addr, u32 rtVal)
{
PGXP_value* val = &CPU_reg[rt(instr)];
@ -606,7 +589,7 @@ void CPU_SH(u32 instr, u16 rtVal, u32 addr)
WriteMem16(val, addr);
}
void CPU_SW(u32 instr, u32 rtVal, u32 addr)
void CPU_SW(u32 instr, u32 addr, u32 rtVal)
{
// Mem[Rs + Im] = Rt
PGXP_value* val = &CPU_reg[rt(instr)];
@ -1587,10 +1570,10 @@ void CPU_MFHI(u32 instr, u32 hiVal)
CPU_reg[rd(instr)] = CPU_Hi;
}
void CPU_MTHI(u32 instr, u32 rdVal)
void CPU_MTHI(u32 instr, u32 rsVal)
{
// Hi = Rd
Validate(&CPU_reg[rd(instr)], rdVal);
Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Hi = CPU_reg[rd(instr)];
}
@ -1603,10 +1586,10 @@ void CPU_MFLO(u32 instr, u32 loVal)
CPU_reg[rd(instr)] = CPU_Lo;
}
void CPU_MTLO(u32 instr, u32 rdVal)
void CPU_MTLO(u32 instr, u32 rsVal)
{
// Lo = Rd
Validate(&CPU_reg[rd(instr)], rdVal);
Validate(&CPU_reg[rs(instr)], rsVal);
CPU_Lo = CPU_reg[rd(instr)];
}

@ -34,24 +34,22 @@ int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2);
float GTE_NCLIP();
// Data transfer tracking
void CPU_MFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
void CPU_CFC2(u32 instr, u32 rtVal, u32 rdVal); // copy GTE ctrl reg to GPR reg (CFC2)
void CPU_CTC2(u32 instr, u32 rdVal, u32 rtVal); // copy GPR reg to GTE ctrl reg (CTC2)
void CPU_MFC2(u32 instr, u32 rdVal); // copy GTE data reg to GPR reg (MFC2)
void CPU_MTC2(u32 instr, u32 rtVal); // copy GPR reg to GTE data reg (MTC2)
// Memory Access
void CPU_LWC2(u32 instr, u32 rtVal, u32 addr); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 rtVal, u32 addr); // copy GTE reg to memory
void CPU_LWC2(u32 instr, u32 addr, u32 rtVal); // copy memory to GTE reg
void CPU_SWC2(u32 instr, u32 addr, u32 rtVal); // copy GTE reg to memory
bool GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w);
// -- CPU functions
void CPU_LW(u32 instr, u32 rtVal, u32 addr);
void CPU_LHx(u32 instr, u32 rtVal, u32 addr);
void CPU_LBx(u32 instr, u32 rtVal, u32 addr);
void CPU_SB(u32 instr, u8 rtVal, u32 addr);
void CPU_SH(u32 instr, u16 rtVal, u32 addr);
void CPU_SW(u32 instr, u32 rtVal, u32 addr);
void CPU_LW(u32 instr, u32 addr, u32 rtVal);
void CPU_LHx(u32 instr, u32 addr, u32 rtVal);
void CPU_LBx(u32 instr, u32 addr, u32 rtVal);
void CPU_SB(u32 instr, u32 addr, u32 rtVal);
void CPU_SH(u32 instr, u32 addr, u32 rtVal);
void CPU_SW(u32 instr, u32 addr, u32 rtVal);
void CPU_MOVE(u32 rd_and_rs, u32 rsVal);
// Arithmetic with immediate value
@ -93,9 +91,9 @@ void CPU_SRAV(u32 instr, u32 rtVal, u32 rsVal);
// Move registers
void CPU_MFHI(u32 instr, u32 hiVal);
void CPU_MTHI(u32 instr, u32 rdVal);
void CPU_MTHI(u32 instr, u32 rsVal);
void CPU_MFLO(u32 instr, u32 loVal);
void CPU_MTLO(u32 instr, u32 rdVal);
void CPU_MTLO(u32 instr, u32 rsVal);
// CP0 Data transfer tracking
void CPU_MFC0(u32 instr, u32 rdVal);

@ -5,7 +5,7 @@
#include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 58;
static constexpr u32 SAVE_STATE_VERSION = 59;
static constexpr u32 SAVE_STATE_MINIMUM_VERSION = 42;
static_assert(SAVE_STATE_VERSION >= SAVE_STATE_MINIMUM_VERSION);

@ -103,20 +103,21 @@ static void DestroySystem();
static std::string GetMediaPathFromSaveState(const char* path);
static bool DoLoadState(ByteStream* stream, bool force_software_renderer, bool update_display);
static bool DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display, bool is_memory_state);
static void DoRunFrame();
static bool CreateGPU(GPURenderer renderer);
static bool SaveUndoLoadState();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
static void Throttle();
static void SetRewinding(bool enabled);
static bool SaveRewindState();
static void DoRewind();
static void SaveRunaheadState();
static void DoRunahead();
static void DoMemorySaveStates();
static bool DoRunahead();
static bool Initialize(bool force_software_renderer);
static bool FastForwardToFirstFrame();
static bool UpdateGameSettingsLayer();
static void UpdateRunningGame(const char* path, CDImage* image, bool booting);
@ -149,12 +150,16 @@ static std::string s_running_game_serial;
static std::string s_running_game_title;
static System::GameHash s_running_game_hash;
static bool s_running_unknown_game;
static bool s_was_fast_booted;
static float s_throttle_frequency = 60.0f;
static float s_target_speed = 1.0f;
static Common::Timer::Value s_frame_period = 0;
static Common::Timer::Value s_next_frame_time = 0;
static bool s_last_frame_skipped = false;
static bool s_system_executing = false;
static bool s_system_interrupted = false;
static bool s_frame_step_request = false;
static bool s_fast_forward_enabled = false;
static bool s_turbo_enabled = false;
@ -208,6 +213,7 @@ static bool s_rewinding_first_save = false;
static std::deque<MemorySaveState> s_runahead_states;
static bool s_runahead_replay_pending = false;
static u32 s_runahead_frames = 0;
static u32 s_runahead_replay_frames = 0;
static TinyString GetTimestampStringForFileName()
{
@ -227,9 +233,6 @@ void System::SetState(State new_state)
Assert(s_state == State::Paused || s_state == State::Running);
Assert(new_state == State::Paused || new_state == State::Running);
s_state = new_state;
if (new_state == State::Paused)
CPU::ForceDispatcherExit();
}
bool System::IsRunning()
@ -237,6 +240,11 @@ bool System::IsRunning()
return s_state == State::Running;
}
bool System::IsExecutionInterrupted()
{
return s_state != State::Running || s_system_interrupted;
}
bool System::IsPaused()
{
return s_state == State::Paused;
@ -304,18 +312,6 @@ u32 System::GetInternalFrameNumber()
return s_internal_frame_number;
}
void System::FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
CPU::g_state.downcount = 0;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
}
const std::string& System::GetDiscPath()
{
return s_running_game_path;
@ -340,6 +336,11 @@ bool System::IsRunningUnknownGame()
return s_running_unknown_game;
}
bool System::WasFastBooted()
{
return s_was_fast_booted;
}
const BIOS::ImageInfo* System::GetBIOSImageInfo()
{
return s_bios_image_info;
@ -529,7 +530,7 @@ bool System::GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash
pos++;
}
}
if (out_id)
{
if (id.empty())
@ -644,7 +645,7 @@ std::string System::GetExecutableNameForImage(CDImage* cdi, bool strip_subdirect
}
bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name,
std::vector<u8>* out_executable_data)
std::vector<u8>* out_executable_data)
{
ISOReader iso;
if (!iso.Open(cdi, 1))
@ -653,7 +654,8 @@ bool System::ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_n
return ReadExecutableFromImage(iso, out_executable_name, out_executable_data);
}
bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name, std::vector<u8>* out_executable_data)
bool System::ReadExecutableFromImage(ISOReader& iso, std::string* out_executable_name,
std::vector<u8>* out_executable_data)
{
const std::string executable_path = GetExecutableNameForImage(iso, false);
Log_DevPrintf("Executable path: '%s'", executable_path.c_str());
@ -886,7 +888,11 @@ void System::ApplySettings(bool display_osd_messages)
Host::CheckForSettingsChanges(old_config);
if (IsValid())
{
ResetPerformanceCounters();
if (s_system_executing)
s_system_interrupted = true;
}
}
bool System::ReloadGameSettings(bool display_osd_messages)
@ -1304,9 +1310,15 @@ bool System::BootSystem(SystemBootParameters parameters)
g_settings.bios_patch_fast_boot))
{
if (s_bios_image_info && s_bios_image_info->patch_compatible)
{
// TODO: Fast boot without patches...
BIOS::PatchBIOSFastBoot(Bus::g_bios, Bus::BIOS_SIZE);
s_was_fast_booted = true;
}
else
{
Log_ErrorPrintf("Not patching fast boot, as BIOS is not patch compatible.");
}
}
// Good to go.
@ -1346,6 +1358,9 @@ bool System::BootSystem(SystemBootParameters parameters)
if (parameters.load_image_to_ram || g_settings.cdrom_load_image_to_ram)
CDROM::PrecacheMedia();
if (parameters.fast_forward_to_first_frame)
FastForwardToFirstFrame();
if (g_settings.audio_dump_on_boot)
StartDumpingAudio();
@ -1370,6 +1385,10 @@ bool System::Initialize(bool force_software_renderer)
s_turbo_enabled = false;
s_fast_forward_enabled = false;
s_rewind_load_frequency = -1;
s_rewind_load_counter = -1;
s_rewinding_first_save = true;
s_average_frame_time_accumulator = 0.0f;
s_minimum_frame_time_accumulator = 0.0f;
s_maximum_frame_time_accumulator = 0.0f;
@ -1488,6 +1507,7 @@ bool System::Initialize(bool force_software_renderer)
void System::DestroySystem()
{
DebugAssert(!s_system_executing);
if (s_state == State::Shutdown)
return;
@ -1528,6 +1548,10 @@ void System::DestroySystem()
s_bios_hash = {};
s_bios_image_info = nullptr;
s_was_fast_booted = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnSystemDestroyed();
}
@ -1539,8 +1563,6 @@ void System::ClearRunningGame()
s_running_game_title.clear();
s_running_game_hash = 0;
s_running_unknown_game = false;
s_cheat_list.reset();
s_state = State::Shutdown;
Host::OnGameChanged(s_running_game_path, s_running_game_serial, s_running_game_title);
@ -1549,25 +1571,124 @@ void System::ClearRunningGame()
#endif
}
bool System::FastForwardToFirstFrame()
{
// If we're taking more than 60 seconds to load the game, oof..
static constexpr u32 MAX_FRAMES_TO_SKIP = 30 * 60;
const u32 current_frame_number = s_frame_number;
const u32 current_internal_frame_number = s_internal_frame_number;
SPU::SetAudioOutputMuted(true);
while (s_internal_frame_number == current_internal_frame_number &&
(s_frame_number - current_frame_number) <= MAX_FRAMES_TO_SKIP)
{
Panic("Fixme");
// System::RunFrame();
}
SPU::SetAudioOutputMuted(false);
return (s_internal_frame_number != current_internal_frame_number);
}
void System::Execute()
{
while (System::IsRunning())
for (;;)
{
if (s_display_all_frames)
System::RunFrame();
else
System::RunFrames();
switch (s_state)
{
case State::Running:
{
s_system_executing = true;
// this can shut us down
Host::PumpMessagesOnCPUThread();
if (!IsValid())
return;
// TODO: Purge reset/restore
g_gpu->RestoreGraphicsAPIState();
if (s_rewind_load_counter >= 0)
DoRewind();
else
CPU::Execute();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
continue;
}
case State::Stopping:
{
DestroySystem();
return;
}
case State::Paused:
default:
return;
}
}
}
void System::FrameDone()
{
s_frame_number++;
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
if (s_frame_step_request)
if (s_frame_step_request)
{
s_frame_step_request = false;
PauseSystem(true);
}
// Save states for rewind and runahead.
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{
s_frame_step_request = false;
PauseSystem(true);
SaveRewindState();
s_rewind_save_counter = s_rewind_save_frequency;
}
else
{
s_rewind_save_counter--;
}
}
else if (s_runahead_frames > 0)
{
// We don't want to poll during replay, because otherwise we'll lose frames.
if (s_runahead_replay_frames == 0)
{
// For runahead, poll input early, that way we can use the remainder of this frame to replay.
// *technically* this means higher input latency (by less than a frame), but runahead itself
// counter-acts that.
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
if (DoRunahead())
{
// running ahead, get it done as soon as possible
return;
}
SaveRunaheadState();
}
const Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time < s_next_frame_time || s_display_all_frames || s_last_frame_skipped)
{
s_last_frame_skipped = false;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
const bool skip_present = g_host_display->ShouldSkipDisplayingFrame();
Host::RenderDisplay(skip_present);
@ -1577,14 +1698,109 @@ void System::Execute()
s_presents_since_last_update++;
}
if (s_throttler_enabled)
System::Throttle();
g_gpu->RestoreGraphicsAPIState();
}
else if (current_time >= s_next_frame_time)
{
Log_DebugPrintf("Skipping displaying frame");
s_last_frame_skipped = true;
}
if (s_throttler_enabled && !IsExecutionInterrupted())
Throttle();
// Input poll already done above
if (s_runahead_frames == 0)
{
Host::PumpMessagesOnCPUThread();
if (IsExecutionInterrupted())
{
s_system_interrupted = false;
CPU::ExitExecution();
return;
}
}
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
System::UpdatePerformanceCounters();
}
void System::SetThrottleFrequency(float frequency)
{
if (s_throttle_frequency == frequency)
return;
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue() + s_frame_period;
}
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
System::UpdatePerformanceCounters();
void System::Throttle()
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period + s_frame_period;
return;
}
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
s_next_frame_time += s_frame_period;
}
void System::SingleStepCPU()
{
s_frame_timer.Reset();
s_system_executing = true;
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
g_gpu->ResetGraphicsAPIState();
s_system_executing = false;
}
void System::IncrementInternalFrameNumber()
{
s_internal_frame_number++;
}
void System::RecreateSystem()
@ -2163,159 +2379,11 @@ bool System::InternalSaveState(ByteStream* state, u32 screenshot_size /* = 256 *
return true;
}
void System::SingleStepCPU()
{
const u32 old_frame_number = s_frame_number;
s_frame_timer.Reset();
g_gpu->RestoreGraphicsAPIState();
CPU::SingleStep();
SPU::GeneratePendingSamples();
if (s_frame_number != old_frame_number && s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::DoRunFrame()
{
g_gpu->RestoreGraphicsAPIState();
if (CPU::g_state.use_debug_dispatcher)
{
CPU::ExecuteDebug();
}
else
{
switch (g_settings.cpu_execution_mode)
{
case CPUExecutionMode::Recompiler:
#ifdef WITH_RECOMPILER
CPU::CodeCache::ExecuteRecompiler();
#else
CPU::CodeCache::Execute();
#endif
break;
case CPUExecutionMode::CachedInterpreter:
CPU::CodeCache::Execute();
break;
case CPUExecutionMode::Interpreter:
default:
CPU::Execute();
break;
}
}
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
SPU::GeneratePendingSamples();
if (s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
}
void System::RunFrame()
{
if (s_rewind_load_counter >= 0)
{
DoRewind();
return;
}
if (s_runahead_frames > 0)
DoRunahead();
DoRunFrame();
s_next_frame_time += s_frame_period;
if (s_memory_saves_enabled)
DoMemorySaveStates();
}
float System::GetTargetSpeed()
{
return s_target_speed;
}
void System::SetThrottleFrequency(float frequency)
{
s_throttle_frequency = frequency;
UpdateThrottlePeriod();
}
void System::UpdateThrottlePeriod()
{
if (s_target_speed > std::numeric_limits<double>::epsilon())
{
const double target_speed = std::max(static_cast<double>(s_target_speed), std::numeric_limits<double>::epsilon());
s_frame_period =
Common::Timer::ConvertSecondsToValue(1.0 / (static_cast<double>(s_throttle_frequency) * target_speed));
}
else
{
s_frame_period = 1;
}
ResetThrottler();
}
void System::ResetThrottler()
{
s_next_frame_time = Common::Timer::GetCurrentValue();
}
void System::Throttle()
{
// If we're running too slow, advance the next frame time based on the time we lost. Effectively skips
// running those frames at the intended time, because otherwise if we pause in the debugger, we'll run
// hundreds of frames when we resume.
Common::Timer::Value current_time = Common::Timer::GetCurrentValue();
if (current_time > s_next_frame_time)
{
const Common::Timer::Value diff = static_cast<s64>(current_time) - static_cast<s64>(s_next_frame_time);
s_next_frame_time += (diff / s_frame_period) * s_frame_period;
return;
}
// Use a spinwait if we undersleep for all platforms except android.. don't want to burn battery.
// Linux also seems to do a much better job of waking up at the requested time.
#if !defined(__linux__) && !defined(__ANDROID__)
Common::Timer::SleepUntil(s_next_frame_time, g_settings.display_all_frames);
#else
Common::Timer::SleepUntil(s_next_frame_time, false);
#endif
}
void System::RunFrames()
{
// If we're running more than this in a single loop... we're in for a bad time.
const u32 max_frames_to_run = 2;
u32 frames_run = 0;
Common::Timer::Value value = Common::Timer::GetCurrentValue();
while (frames_run < max_frames_to_run)
{
if (value < s_next_frame_time)
break;
RunFrame();
frames_run++;
value = Common::Timer::GetCurrentValue();
}
if (frames_run != 1)
Log_VerbosePrintf("Ran %u frames in a single host frame", frames_run);
}
void System::UpdatePerformanceCounters()
{
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMillisecondsAndReset());
@ -3625,18 +3693,22 @@ void System::SetRewinding(bool enabled)
{
if (enabled)
{
const bool was_enabled = IsRewinding();
// Try to rewind at the replay speed, or one per second maximum.
const float load_frequency = std::min(g_settings.rewind_save_frequency, 1.0f);
s_rewind_load_frequency = static_cast<s32>(std::ceil(load_frequency * s_throttle_frequency));
s_rewind_load_counter = 0;
if (!was_enabled && s_system_executing)
s_system_interrupted = true;
}
else
{
s_rewind_load_frequency = -1;
s_rewind_load_counter = -1;
s_rewinding_first_save = true;
}
s_rewinding_first_save = true;
}
void System::DoRewind()
@ -3655,6 +3727,15 @@ void System::DoRewind()
}
s_next_frame_time += s_frame_period;
// TODO: Purge reset/restore
g_gpu->ResetGraphicsAPIState();
Host::RenderDisplay(false);
g_gpu->RestoreGraphicsAPIState();
Host::PumpMessagesOnCPUThread();
Throttle();
}
void System::SaveRunaheadState()
@ -3676,84 +3757,70 @@ void System::SaveRunaheadState()
s_runahead_states.push_back(std::move(mss));
}
void System::DoRunahead()
bool System::DoRunahead()
{
#ifdef PROFILE_MEMORY_SAVE_STATES
Common::Timer timer;
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
static Common::Timer replay_timer;
#endif
if (s_runahead_replay_pending)
{
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead starting at frame %u", s_frame_number);
replay_timer.Reset();
#endif
// we need to replay and catch up - load the state,
s_runahead_replay_pending = false;
if (s_runahead_states.empty() || !LoadMemoryState(s_runahead_states.front()))
{
s_runahead_states.clear();
return;
return false;
}
// figure out how many frames we need to run to catch up
s_runahead_replay_frames = static_cast<u32>(s_runahead_states.size());
// and throw away all the states, forcing us to catch up below
// TODO: can we leave one frame here and run, avoiding the extra save?
s_runahead_states.clear();
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds());
#endif
}
// run the frames with no audio
s32 frames_to_run = static_cast<s32>(s_runahead_frames) - static_cast<s32>(s_runahead_states.size());
if (frames_to_run > 0)
{
Common::Timer timer2;
#ifdef PROFILE_MEMORY_SAVE_STATES
const s32 temp = frames_to_run;
#endif
// run the frames with no audio
SPU::SetAudioOutputMuted(true);
while (frames_to_run > 0)
{
DoRunFrame();
SaveRunaheadState();
frames_to_run--;
}
SPU::SetAudioOutputMuted(false);
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", temp, timer2.GetTimeMilliseconds());
Log_VerbosePrintf("Rewound to frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif
// we don't want to save the frame we just loaded. but we are "one frame ahead", because the frame we just tossed
// was never saved, so return but don't decrement the counter
return true;
}
else
else if (s_runahead_replay_frames == 0)
{
// save this frame
return false;
}
s_runahead_replay_frames--;
if (s_runahead_replay_frames > 0)
{
// keep running ahead
SaveRunaheadState();
return true;
}
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, timer.GetTimeMilliseconds());
Log_VerbosePrintf("Running %d frames to catch up took %.2f ms", s_runahead_frames,
replay_timer.GetTimeMilliseconds());
#endif
}
void System::DoMemorySaveStates()
{
if (s_rewind_save_counter >= 0)
{
if (s_rewind_save_counter == 0)
{
SaveRewindState();
s_rewind_save_counter = s_rewind_save_frequency;
}
else
{
s_rewind_save_counter--;
}
}
// we're all caught up. this frame gets saved in DoMemoryStates().
SPU::SetAudioOutputMuted(false);
if (s_runahead_frames > 0)
SaveRunaheadState();
#ifdef PROFILE_MEMORY_SAVE_STATES
Log_DevPrintf("runahead ending at frame %u, took %.2f ms", s_frame_number, replay_timer.GetTimeMilliseconds());
#endif
return false;
}
void System::SetRunaheadReplayFlag()
@ -3776,7 +3843,10 @@ void System::ShutdownSystem(bool save_resume_state)
if (save_resume_state)
SaveResumeState();
DestroySystem();
if (s_system_executing)
s_state = State::Stopping;
else
DestroySystem();
}
bool System::CanUndoLoadState()

@ -42,6 +42,7 @@ struct SystemBootParameters
u32 media_playlist_index = 0;
bool load_image_to_ram = false;
bool force_software_renderer = false;
bool fast_forward_to_first_frame = false;
};
struct SaveStateInfo
@ -85,7 +86,8 @@ enum class State
Shutdown,
Starting,
Running,
Paused
Paused,
Stopping,
};
using GameHash = u64;
@ -110,7 +112,6 @@ ConsoleRegion GetConsoleRegionForDiscRegion(DiscRegion region);
std::string GetExecutableNameForImage(CDImage* cdi, bool strip_subdirectories);
bool ReadExecutableFromImage(CDImage* cdi, std::string* out_executable_name, std::vector<u8>* out_executable_data);
bool IsValidGameImage(CDImage* cdi);
std::string GetGameHashId(GameHash hash);
bool GetGameDetailsFromImage(CDImage* cdi, std::string* out_id, GameHash* out_hash);
DiscRegion GetRegionForSerial(std::string_view serial);
@ -129,6 +130,7 @@ std::string GetInputProfilePath(const std::string_view& name);
State GetState();
void SetState(State new_state);
bool IsRunning();
bool IsExecutionInterrupted();
bool IsPaused();
bool IsShutdown();
bool IsValid();
@ -176,14 +178,15 @@ bool InjectEXEFromBuffer(const void* buffer, u32 buffer_size, bool patch_loader
u32 GetFrameNumber();
u32 GetInternalFrameNumber();
void FrameDone();
void IncrementInternalFrameNumber();
void FrameDone();
const std::string& GetDiscPath();
const std::string& GetGameSerial();
const std::string& GetGameTitle();
GameHash GetGameHash();
bool IsRunningUnknownGame();
bool WasFastBooted();
const BIOS::ImageInfo* GetBIOSImageInfo();
const BIOS::Hash& GetBIOSHash();
@ -237,8 +240,6 @@ void RecreateSystem();
bool RecreateGPU(GPURenderer renderer, bool force_recreate_display = false, bool update_display = true);
void SingleStepCPU();
void RunFrame();
void RunFrames();
/// Sets target emulation speed.
float GetTargetSpeed();
@ -250,9 +251,6 @@ void SetThrottleFrequency(float frequency);
void UpdateThrottlePeriod();
void ResetThrottler();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
void Throttle();
void UpdatePerformanceCounters();
void ResetPerformanceCounters();

@ -17,6 +17,7 @@ static TimingEvent* s_active_events_tail;
static TimingEvent* s_current_event = nullptr;
static u32 s_active_event_count = 0;
static u32 s_global_tick_counter = 0;
static bool s_frame_done = false;
u32 GetGlobalTickCounter()
{
@ -51,10 +52,7 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
void UpdateCPUDowncount()
{
if (!CPU::g_state.frame_done && (!CPU::HasPendingInterrupt() || CPU::g_using_interpreter))
{
CPU::g_state.downcount = s_active_events_head->GetDowncount();
}
CPU::g_state.downcount = CPU::HasPendingInterrupt() ? 0 : s_active_events_head->GetDowncount();
}
TimingEvent** GetHeadEventPtr()
@ -260,48 +258,76 @@ static TimingEvent* FindActiveEvent(const char* name)
return nullptr;
}
bool IsRunningEvents()
{
return (s_current_event != nullptr);
}
void SetFrameDone()
{
s_frame_done = true;
CPU::g_state.downcount = 0;
}
void RunEvents()
{
DebugAssert(!s_current_event);
TickCount pending_ticks = CPU::GetPendingTicks();
CPU::ResetPendingTicks();
while (pending_ticks > 0)
do
{
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount());
s_global_tick_counter += static_cast<u32>(time);
pending_ticks -= time;
if (CPU::HasPendingInterrupt())
CPU::DispatchInterrupt();
// Apply downcount to all events.
// This will result in a negative downcount for those events which are late.
for (TimingEvent* event = s_active_events_head; event; event = event->next)
TickCount pending_ticks = CPU::GetPendingTicks();
if (pending_ticks >= s_active_events_head->GetDowncount())
{
event->m_downcount -= time;
event->m_time_since_last_run += time;
CPU::ResetPendingTicks();
do
{
const TickCount time = std::min(pending_ticks, s_active_events_head->GetDowncount());
s_global_tick_counter += static_cast<u32>(time);
pending_ticks -= time;
// Apply downcount to all events.
// This will result in a negative downcount for those events which are late.
for (TimingEvent* event = s_active_events_head; event; event = event->next)
{
event->m_downcount -= time;
event->m_time_since_last_run += time;
}
// Now we can actually run the callbacks.
while (s_active_events_head->m_downcount <= 0)
{
// move it to the end, since that'll likely be its new position
TimingEvent* event = s_active_events_head;
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
}
} while (pending_ticks > 0);
s_current_event = nullptr;
}
// Now we can actually run the callbacks.
while (s_active_events_head->m_downcount <= 0)
if (s_frame_done)
{
// move it to the end, since that'll likely be its new position
TimingEvent* event = s_active_events_head;
s_current_event = event;
// Factor late time into the time for the next invocation.
const TickCount ticks_late = -event->m_downcount;
const TickCount ticks_to_execute = event->m_time_since_last_run;
event->m_downcount += event->m_interval;
event->m_time_since_last_run = 0;
// The cycles_late is only an indicator, it doesn't modify the cycles to execute.
event->m_callback(event->m_callback_param, ticks_to_execute, ticks_late);
if (event->m_active)
SortEvent(event);
s_frame_done = false;
System::FrameDone();
}
}
s_current_event = nullptr;
UpdateCPUDowncount();
UpdateCPUDowncount();
} while (CPU::GetPendingTicks() >= CPU::g_state.downcount);
}
bool DoState(StateWrapper& sw)
@ -347,7 +373,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&last_event_run_time);
}
Log_DevPrintf("Loaded %u events from save state.", event_count);
Log_DebugPrintf("Loaded %u events from save state.", event_count);
SortEvents();
}
else
@ -364,7 +390,7 @@ bool DoState(StateWrapper& sw)
sw.Do(&event->m_interval);
}
Log_DevPrintf("Wrote %u events to save state.", s_active_event_count);
Log_DebugPrintf("Wrote %u events to save state.", s_active_event_count);
}
return !sw.HasError();
@ -407,6 +433,8 @@ void TimingEvent::Delay(TickCount ticks)
DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
void TimingEvent::Schedule(TickCount ticks)
@ -426,7 +454,11 @@ void TimingEvent::Schedule(TickCount ticks)
// Event is already active, so we leave the time since last run alone, and just modify the downcount.
// If this is a call from an IO handler for example, re-sort the event queue.
if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
}
}
@ -451,7 +483,11 @@ void TimingEvent::Reset()
m_downcount = m_interval;
m_time_since_last_run = 0;
if (TimingEvents::s_current_event != this)
{
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
}
void TimingEvent::InvokeEarly(bool force /* = false */)
@ -471,6 +507,8 @@ void TimingEvent::InvokeEarly(bool force /* = false */)
// Since we've changed the downcount, we need to re-sort the events.
DebugAssert(TimingEvents::s_current_event != this);
TimingEvents::SortEvent(this);
if (TimingEvents::s_active_events_head == this)
TimingEvents::UpdateCPUDowncount();
}
void TimingEvent::Activate()

@ -93,6 +93,8 @@ std::unique_ptr<TimingEvent> CreateTimingEvent(std::string name, TickCount perio
/// Serialization.
bool DoState(StateWrapper& sw);
bool IsRunningEvents();
void SetFrameDone();
void RunEvents();
void UpdateCPUDowncount();

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "win32_nogui_platform.h"
@ -161,7 +161,7 @@ std::optional<WindowInfo> Win32NoGUIPlatform::GetPlatformWindowInfo()
return std::nullopt;
RECT rc = {};
GetWindowRect(m_hwnd, &rc);
GetClientRect(m_hwnd, &rc);
WindowInfo wi;
wi.surface_width = static_cast<u32>(rc.right - rc.left);
@ -338,7 +338,7 @@ LRESULT CALLBACK Win32NoGUIPlatform::WndProc(HWND hwnd, UINT msg, WPARAM wParam,
const WCHAR utf16[1] = {static_cast<WCHAR>(wParam)};
char utf8[8] = {};
const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, utf16, static_cast<int>(std::size(utf16)), utf8,
static_cast<int>(sizeof(utf8)) - 1, nullptr, nullptr);
static_cast<int>(sizeof(utf8) - 1), nullptr, nullptr);
if (utf8_len > 0)
{
utf8[utf8_len] = 0;
@ -439,4 +439,4 @@ std::unique_ptr<NoGUIPlatform> NoGUIPlatform::CreateWin32Platform()
return {};
return ret;
}
}

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "x11_nogui_platform.h"
@ -24,7 +24,7 @@ X11NoGUIPlatform::~X11NoGUIPlatform()
bool X11NoGUIPlatform::Initialize()
{
const int res = XInitThreads();
if (res != 0)
if (res == 0)
Log_WarningPrintf("XInitThreads() returned %d, things might not be stable.", res);
m_display = XOpenDisplay(nullptr);
@ -34,6 +34,7 @@ bool X11NoGUIPlatform::Initialize()
return false;
}
InitializeKeyMap();
return true;
}
@ -68,7 +69,7 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title)
window_height, 0, 0, BlackPixel(m_display, 0));
if (!m_window)
{
Log_ErrorPrintf("Failed to create X window");
Log_ErrorPrint("Failed to create X window");
return false;
}
@ -92,7 +93,6 @@ bool X11NoGUIPlatform::CreatePlatformWindow(std::string title)
XMapRaised(m_display, m_window);
XFlush(m_display);
XSync(m_display, True);
InitializeKeyMap();
}
ProcessXEvents();
@ -144,15 +144,15 @@ void X11NoGUIPlatform::InitializeKeyMap()
if (keysym == NoSymbol)
continue;
KeySym upper_sym;
XConvertCase(keysym, &keysym, &upper_sym);
KeySym upper_keysym;
XConvertCase(keysym, &keysym, &upper_keysym);
// Would this fail?
const char* keyname = XKeysymToString(keysym);
const char* keyname = XKeysymToString(upper_keysym);
if (!keyname)
continue;
m_key_map.emplace(static_cast<s32>(keysym), keyname);
m_key_map.emplace(static_cast<s32>(upper_keysym), keyname);
}
}
@ -160,7 +160,7 @@ std::optional<u32> X11NoGUIPlatform::ConvertHostKeyboardStringToCode(const std::
{
for (const auto& it : m_key_map)
{
if (StringUtil::Strncasecmp(it.second.c_str(), str.data(), str.length()) == 0)
if (str == it.second)
return it.first;
}
@ -175,7 +175,7 @@ std::optional<std::string> X11NoGUIPlatform::ConvertHostKeyboardCodeToString(u32
void X11NoGUIPlatform::ProcessXEvents()
{
XLockDisplay(m_display);
XDisplayLocker locker(m_display);
for (int num_events = XPending(m_display); num_events > 0; num_events--)
{
@ -186,19 +186,30 @@ void X11NoGUIPlatform::ProcessXEvents()
case KeyPress:
case KeyRelease:
{
const KeySym sym = XLookupKeysym(&event.xkey, 0);
KeySym sym = XLookupKeysym(&event.xkey, 0);
if (sym != NoSymbol)
NoGUIHost::ProcessPlatformKeyEvent(static_cast<s32>(sym), (event.type == KeyPress));
{
KeySym upper_sym = sym;
XConvertCase(sym, &sym, &upper_sym);
NoGUIHost::ProcessPlatformKeyEvent(static_cast<s32>(upper_sym), (event.type == KeyPress));
}
}
break;
case ButtonPress:
case ButtonRelease:
{
if (event.xbutton.button >= Button1)
if (event.xbutton.button >= Button4 && event.xbutton.button <= Button5)
{
NoGUIHost::ProcessPlatformMouseButtonEvent(static_cast<s32>(event.xbutton.button - Button1),
event.type == ButtonPress);
// Button 4/5 are mouse wheel events on X, apparently...
NoGUIHost::ProcessPlatformMouseWheelEvent(0.0f, (event.xbutton.button == Button4) ? 1.0f : -1.0f);
}
else if (event.xbutton.button >= Button1)
{
// Swap middle and right buttons.
const u32 xbutton = event.xbutton.button;
const u32 mapped_button = (xbutton == Button3) ? 1 : (xbutton == Button2 ? 2 : (xbutton - Button1));
NoGUIHost::ProcessPlatformMouseButtonEvent(mapped_button, event.type == ButtonPress);
}
}
break;
@ -241,8 +252,6 @@ void X11NoGUIPlatform::ProcessXEvents()
break;
}
}
XUnlockDisplay(m_display);
}
void X11NoGUIPlatform::RunMessageLoop()

@ -58,13 +58,13 @@ void DebuggerWindow::refreshAll()
m_stack_model->invalidateView();
m_ui.memoryView->repaint();
m_code_model->setPC(CPU::g_state.regs.pc);
m_code_model->setPC(CPU::g_state.pc);
scrollToPC();
}
void DebuggerWindow::scrollToPC()
{
return scrollToCodeAddress(CPU::g_state.regs.pc);
return scrollToCodeAddress(CPU::g_state.pc);
}
void DebuggerWindow::scrollToCodeAddress(VirtualMemoryAddress address)

@ -95,10 +95,10 @@ bool RegTestHost::InitializeConfig()
si.SetStringValue("MemoryCards", "Card2Type", Settings::GetMemoryCardTypeName(MemoryCardType::None));
si.SetStringValue("ControllerPorts", "MultitapMode", Settings::GetMultitapModeName(MultitapMode::Disabled));
si.SetStringValue("Audio", "Backend", Settings::GetAudioBackendName(AudioBackend::Null));
si.SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(LOGLEVEL_VERBOSE));
si.SetBoolValue("Logging", "LogToConsole", true);
si.SetBoolValue("Main", "ApplyGameSettings", false); // don't want game settings interfering
si.SetBoolValue("BIOS", "PatchFastBoot", true); // no point validating the bios intro..
si.SetFloatValue("Main", "EmulationSpeed", 0.0f);
// disable all sources
for (u32 i = 0; i < static_cast<u32>(InputSourceType::Count); i++)
@ -251,7 +251,9 @@ void Host::OnGameChanged(const std::string& disc_path, const std::string& game_s
void Host::PumpMessagesOnCPUThread()
{
//
s_frames_to_run--;
if (s_frames_to_run == 0)
System::ShutdownSystem(false);
}
void Host::RunOnCPUThread(std::function<void()> function, bool block /* = false */)
@ -496,6 +498,7 @@ bool RegTestHost::ParseCommandLineParameters(int argc, char* argv[], std::option
}
Log::SetConsoleOutputParams(true, nullptr, level.value());
s_base_settings_interface->SetStringValue("Logging", "LogLevel", Settings::GetLogLevelName(level.value()));
continue;
}
else if (CHECK_ARG_PARAM("-renderer"))
@ -577,16 +580,7 @@ int main(int argc, char* argv[])
}
Log_InfoPrintf("Running for %d frames...", s_frames_to_run);
for (u32 frame = 0; frame < s_frames_to_run; frame++)
{
System::RunFrame();
Host::RenderDisplay(false);
System::UpdatePerformanceCounters();
}
Log_InfoPrintf("All done, shutting down system.");
System::ShutdownSystem(false);
System::Execute();
Log_InfoPrintf("Exiting with success.");
result = 0;

@ -2690,7 +2690,7 @@ void FullscreenUI::DrawInterfaceSettingsPage()
DrawToggleSetting(bsi, ICON_FA_SPINNER " Show GPU Usage",
"Shows the host's GPU usage in the top-right corner of the display.", "Display", "ShowGPU", false);
DrawToggleSetting(bsi, ICON_FA_RULER_HORIZONTAL " Show Frame Times",
"Shows a visual history of frame times in the upper-left corner of the display.", "EmuCore/GS",
"Shows a visual history of frame times in the upper-left corner of the display.", "Display",
"ShowFrameTimes", false);
DrawToggleSetting(bsi, ICON_FA_RULER_VERTICAL " Show Resolution",
"Shows the current rendering resolution of the system in the top-right corner of the display.",

@ -209,7 +209,7 @@ void JitCodeBuffer::CommitCode(u32 length)
if (length == 0)
return;
#if defined(CPU_AARCH32) || defined(CPU_AARCH64)
#if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64)
// ARM instruction and data caches are not coherent, we need to flush after every block.
FlushInstructionCache(m_free_code_ptr, length);
#endif
@ -224,7 +224,7 @@ void JitCodeBuffer::CommitFarCode(u32 length)
if (length == 0)
return;
#if defined(CPU_AARCH32) || defined(CPU_AARCH64)
#if defined(CPU_AARCH32) || defined(CPU_AARCH64) || defined(CPU_RISCV64)
// ARM instruction and data caches are not coherent, we need to flush after every block.
FlushInstructionCache(m_free_far_code_ptr, length);
#endif

@ -21,6 +21,15 @@ public:
ALWAYS_INLINE u8* GetCodePointer() const { return m_code_ptr; }
ALWAYS_INLINE u32 GetTotalSize() const { return m_total_size; }
ALWAYS_INLINE float GetUsedPct() const
{
return (static_cast<float>(m_code_used) / static_cast<float>(m_code_size)) * 100.0f;
}
ALWAYS_INLINE float GetFarUsedPct() const
{
return (static_cast<float>(m_far_code_used) / static_cast<float>(m_far_code_size)) * 100.0f;
}
ALWAYS_INLINE u32 GetTotalUsed() const { return m_code_used + m_far_code_used; }
ALWAYS_INLINE u8* GetFreeCodePointer() const { return m_free_code_ptr; }
ALWAYS_INLINE u32 GetFreeCodeSpace() const { return static_cast<u32>(m_code_size - m_code_used); }

@ -81,6 +81,14 @@ static bool IsStoreInstruction(const void* ptr)
return false;
}
}
#elif defined(CPU_RISCV64)
static bool IsStoreInstruction(const void* ptr)
{
u32 bits;
std::memcpy(&bits, ptr, sizeof(bits));
return ((bits & 0x7Fu) == 0b0100011u);
}
#endif
#if defined(_WIN32) && (defined(CPU_X64) || defined(CPU_AARCH64))
@ -143,6 +151,9 @@ static void SIGSEGVHandler(int sig, siginfo_t* info, void* ctx)
#elif defined(CPU_AARCH64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.pc);
const bool is_write = IsStoreInstruction(exception_pc);
#elif defined(CPU_RISCV64)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.__gregs[REG_PC]);
const bool is_write = IsStoreInstruction(exception_pc);
#else
void* const exception_pc = nullptr;
const bool is_write = false;

Loading…
Cancel
Save