From 717394c980509ee68608e6378cf58162adb5edaa Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Tue, 19 Feb 2019 09:44:33 +0100 Subject: [PATCH] video_core/dma_pusher: The full list of headers at once. Fetching every u32 from memory leads to a big overhead. So let's fetch all of them as a block if possible. This reduces the Memory::* calls by the dma_pusher by a factor of 10. --- src/video_core/dma_pusher.cpp | 104 ++++++++++++++++++---------------- src/video_core/dma_pusher.h | 2 + 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index eb9bf1878..654e4d9aa 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -38,59 +38,67 @@ bool DmaPusher::Step() { const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); ASSERT_MSG(address, "Invalid GPU address"); - const CommandHeader command_header{Memory::Read32(*address)}; + GPUVAddr size = dma_put - dma_get; + ASSERT_MSG(size % sizeof(CommandHeader) == 0, "Invalid aligned GPU addresses"); + command_headers.resize(size / sizeof(CommandHeader)); + + Memory::ReadBlock(*address, command_headers.data(), size); + + for (const CommandHeader& command_header : command_headers) { + + // now, see if we're in the middle of a command + if (dma_state.length_pending) { + // Second word of long non-inc methods command - method count + dma_state.length_pending = 0; + dma_state.method_count = command_header.method_count_; + } else if (dma_state.method_count) { + // Data word of methods command + CallMethod(command_header.argument); + + if (!dma_state.non_incrementing) { + dma_state.method++; + } + + if (dma_increment_once) { + dma_state.non_incrementing = true; + } + + dma_state.method_count--; + } else { + // No command active - this is the first word of a new one + switch (command_header.mode) { + case SubmissionMode::Increasing: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = false; + break; + case SubmissionMode::NonIncreasing: + SetState(command_header); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::Inline: + dma_state.method = command_header.method; + dma_state.subchannel = command_header.subchannel; + CallMethod(command_header.arg_count); + dma_state.non_incrementing = true; + dma_increment_once = false; + break; + case SubmissionMode::IncreaseOnce: + SetState(command_header); + dma_state.non_incrementing = false; + dma_increment_once = true; + break; + } + } + } - dma_get += sizeof(u32); + dma_get = dma_put; if (!non_main) { + // TODO (degasus): This is dead code, as dma_mget is never read. dma_mget = dma_get; } - - // now, see if we're in the middle of a command - if (dma_state.length_pending) { - // Second word of long non-inc methods command - method count - dma_state.length_pending = 0; - dma_state.method_count = command_header.method_count_; - } else if (dma_state.method_count) { - // Data word of methods command - CallMethod(command_header.argument); - - if (!dma_state.non_incrementing) { - dma_state.method++; - } - - if (dma_increment_once) { - dma_state.non_incrementing = true; - } - - dma_state.method_count--; - } else { - // No command active - this is the first word of a new one - switch (command_header.mode) { - case SubmissionMode::Increasing: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = false; - break; - case SubmissionMode::NonIncreasing: - SetState(command_header); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::Inline: - dma_state.method = command_header.method; - dma_state.subchannel = command_header.subchannel; - CallMethod(command_header.arg_count); - dma_state.non_incrementing = true; - dma_increment_once = false; - break; - case SubmissionMode::IncreaseOnce: - SetState(command_header); - dma_state.non_incrementing = false; - dma_increment_once = true; - break; - } - } } else if (ib_enable && !dma_pushbuffer.empty()) { // Current pushbuffer empty, but we have more IB entries to read const CommandList& command_list{dma_pushbuffer.front()}; diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1097e5c49..14b23b1d7 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -75,6 +75,8 @@ private: GPU& gpu; + std::vector command_headers; ///< Buffer for list of commands fetched at once + std::queue dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer