mirror of https://github.com/yuzu-mirror/yuzu
				
				
				
			Merge pull request #4273 from ogniK5377/async-shaders-prod
video_core: Add asynchronous shader decompilation and compilationpull/8/head
						commit
						90cbcaa44a
					
				@ -0,0 +1,181 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <condition_variable>
 | 
			
		||||
#include <mutex>
 | 
			
		||||
#include <thread>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/renderer_base.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
 | 
			
		||||
#include "video_core/shader/async_shaders.h"
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon::Shader {
 | 
			
		||||
 | 
			
		||||
AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
 | 
			
		||||
 | 
			
		||||
AsyncShaders::~AsyncShaders() {
 | 
			
		||||
    KillWorkers();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
 | 
			
		||||
    // If we're already have workers queued or don't want to queue workers, ignore
 | 
			
		||||
    if (num_workers == worker_threads.size() || num_workers == 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // If workers already exist, clear them
 | 
			
		||||
    if (!worker_threads.empty()) {
 | 
			
		||||
        FreeWorkers();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Create workers
 | 
			
		||||
    for (std::size_t i = 0; i < num_workers; i++) {
 | 
			
		||||
        context_list.push_back(emu_window.CreateSharedContext());
 | 
			
		||||
        worker_threads.push_back(std::move(
 | 
			
		||||
            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::FreeWorkers() {
 | 
			
		||||
    // Mark all threads to quit
 | 
			
		||||
    is_thread_exiting.store(true);
 | 
			
		||||
    cv.notify_all();
 | 
			
		||||
    for (auto& thread : worker_threads) {
 | 
			
		||||
        thread.join();
 | 
			
		||||
    }
 | 
			
		||||
    // Clear our shared contexts
 | 
			
		||||
    context_list.clear();
 | 
			
		||||
 | 
			
		||||
    // Clear our worker threads
 | 
			
		||||
    worker_threads.clear();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::KillWorkers() {
 | 
			
		||||
    is_thread_exiting.store(true);
 | 
			
		||||
    for (auto& thread : worker_threads) {
 | 
			
		||||
        thread.detach();
 | 
			
		||||
    }
 | 
			
		||||
    // Clear our shared contexts
 | 
			
		||||
    context_list.clear();
 | 
			
		||||
 | 
			
		||||
    // Clear our worker threads
 | 
			
		||||
    worker_threads.clear();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool AsyncShaders::HasWorkQueued() {
 | 
			
		||||
    return !pending_queue.empty();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool AsyncShaders::HasCompletedWork() {
 | 
			
		||||
    std::shared_lock lock{completed_mutex};
 | 
			
		||||
    return !finished_work.empty();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
 | 
			
		||||
    const auto& regs = gpu.Maxwell3D().regs;
 | 
			
		||||
 | 
			
		||||
    // If something is using depth, we can assume that games are not rendering anything which will
 | 
			
		||||
    // be used one time.
 | 
			
		||||
    if (regs.zeta_enable) {
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // If games are using a small index count, we can assume these are full screen quads. Usually
 | 
			
		||||
    // these shaders are only used once for building textures so we can assume they can't be built
 | 
			
		||||
    // async
 | 
			
		||||
    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
 | 
			
		||||
    std::vector<AsyncShaders::Result> results;
 | 
			
		||||
    {
 | 
			
		||||
        std::unique_lock lock{completed_mutex};
 | 
			
		||||
        results.assign(std::make_move_iterator(finished_work.begin()),
 | 
			
		||||
                       std::make_move_iterator(finished_work.end()));
 | 
			
		||||
        finished_work.clear();
 | 
			
		||||
    }
 | 
			
		||||
    return results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
 | 
			
		||||
                                     Tegra::Engines::ShaderType shader_type, u64 uid,
 | 
			
		||||
                                     std::vector<u64> code, std::vector<u64> code_b,
 | 
			
		||||
                                     u32 main_offset,
 | 
			
		||||
                                     VideoCommon::Shader::CompilerSettings compiler_settings,
 | 
			
		||||
                                     const VideoCommon::Shader::Registry& registry,
 | 
			
		||||
                                     VAddr cpu_addr) {
 | 
			
		||||
    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
 | 
			
		||||
                                                    : AsyncShaders::Backend::OpenGL,
 | 
			
		||||
                        device,
 | 
			
		||||
                        shader_type,
 | 
			
		||||
                        uid,
 | 
			
		||||
                        std::move(code),
 | 
			
		||||
                        std::move(code_b),
 | 
			
		||||
                        main_offset,
 | 
			
		||||
                        compiler_settings,
 | 
			
		||||
                        registry,
 | 
			
		||||
                        cpu_addr};
 | 
			
		||||
    std::unique_lock lock(queue_mutex);
 | 
			
		||||
    pending_queue.push_back(std::move(params));
 | 
			
		||||
    cv.notify_one();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
 | 
			
		||||
    using namespace std::chrono_literals;
 | 
			
		||||
    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
 | 
			
		||||
        std::unique_lock lock{queue_mutex};
 | 
			
		||||
        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
 | 
			
		||||
        if (is_thread_exiting) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Partial lock to allow all threads to read at the same time
 | 
			
		||||
        if (!HasWorkQueued()) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        // Another thread beat us, just unlock and wait for the next load
 | 
			
		||||
        if (pending_queue.empty()) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        // Pull work from queue
 | 
			
		||||
        WorkerParams work = std::move(pending_queue.front());
 | 
			
		||||
        pending_queue.pop_front();
 | 
			
		||||
 | 
			
		||||
        lock.unlock();
 | 
			
		||||
 | 
			
		||||
        if (work.backend == AsyncShaders::Backend::OpenGL ||
 | 
			
		||||
            work.backend == AsyncShaders::Backend::GLASM) {
 | 
			
		||||
            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
 | 
			
		||||
            const auto scope = context->Acquire();
 | 
			
		||||
            auto program =
 | 
			
		||||
                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
 | 
			
		||||
            Result result{};
 | 
			
		||||
            result.backend = work.backend;
 | 
			
		||||
            result.cpu_address = work.cpu_address;
 | 
			
		||||
            result.uid = work.uid;
 | 
			
		||||
            result.code = std::move(work.code);
 | 
			
		||||
            result.code_b = std::move(work.code_b);
 | 
			
		||||
            result.shader_type = work.shader_type;
 | 
			
		||||
 | 
			
		||||
            if (work.backend == AsyncShaders::Backend::OpenGL) {
 | 
			
		||||
                result.program.opengl = std::move(program->source_program);
 | 
			
		||||
            } else if (work.backend == AsyncShaders::Backend::GLASM) {
 | 
			
		||||
                result.program.glasm = std::move(program->assembly_program);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            {
 | 
			
		||||
                std::unique_lock complete_lock(completed_mutex);
 | 
			
		||||
                finished_work.push_back(std::move(result));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCommon::Shader
 | 
			
		||||
@ -0,0 +1,109 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <condition_variable>
 | 
			
		||||
#include <deque>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <shared_mutex>
 | 
			
		||||
#include <thread>
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_device.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 | 
			
		||||
 | 
			
		||||
namespace Core::Frontend {
 | 
			
		||||
class EmuWindow;
 | 
			
		||||
class GraphicsContext;
 | 
			
		||||
} // namespace Core::Frontend
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
class GPU;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon::Shader {
 | 
			
		||||
 | 
			
		||||
class AsyncShaders {
 | 
			
		||||
public:
 | 
			
		||||
    enum class Backend {
 | 
			
		||||
        OpenGL,
 | 
			
		||||
        GLASM,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    struct ResultPrograms {
 | 
			
		||||
        OpenGL::OGLProgram opengl;
 | 
			
		||||
        OpenGL::OGLAssemblyProgram glasm;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    struct Result {
 | 
			
		||||
        u64 uid;
 | 
			
		||||
        VAddr cpu_address;
 | 
			
		||||
        Backend backend;
 | 
			
		||||
        ResultPrograms program;
 | 
			
		||||
        std::vector<u64> code;
 | 
			
		||||
        std::vector<u64> code_b;
 | 
			
		||||
        Tegra::Engines::ShaderType shader_type;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
 | 
			
		||||
    ~AsyncShaders();
 | 
			
		||||
 | 
			
		||||
    /// Start up shader worker threads
 | 
			
		||||
    void AllocateWorkers(std::size_t num_workers);
 | 
			
		||||
 | 
			
		||||
    /// Clear the shader queue and kill all worker threads
 | 
			
		||||
    void FreeWorkers();
 | 
			
		||||
 | 
			
		||||
    // Force end all threads
 | 
			
		||||
    void KillWorkers();
 | 
			
		||||
 | 
			
		||||
    /// Check to see if any shaders have actually been compiled
 | 
			
		||||
    bool HasCompletedWork();
 | 
			
		||||
 | 
			
		||||
    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
 | 
			
		||||
    /// every shader async as some shaders are only built and executed once. We try to "guess" which
 | 
			
		||||
    /// shader would be used only once
 | 
			
		||||
    bool IsShaderAsync(const Tegra::GPU& gpu) const;
 | 
			
		||||
 | 
			
		||||
    /// Pulls completed compiled shaders
 | 
			
		||||
    std::vector<Result> GetCompletedWork();
 | 
			
		||||
 | 
			
		||||
    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
 | 
			
		||||
                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
 | 
			
		||||
                           VideoCommon::Shader::CompilerSettings compiler_settings,
 | 
			
		||||
                           const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
 | 
			
		||||
 | 
			
		||||
    /// Check our worker queue to see if we have any work queued already
 | 
			
		||||
    bool HasWorkQueued();
 | 
			
		||||
 | 
			
		||||
    struct WorkerParams {
 | 
			
		||||
        AsyncShaders::Backend backend;
 | 
			
		||||
        OpenGL::Device device;
 | 
			
		||||
        Tegra::Engines::ShaderType shader_type;
 | 
			
		||||
        u64 uid;
 | 
			
		||||
        std::vector<u64> code;
 | 
			
		||||
        std::vector<u64> code_b;
 | 
			
		||||
        u32 main_offset;
 | 
			
		||||
        VideoCommon::Shader::CompilerSettings compiler_settings;
 | 
			
		||||
        VideoCommon::Shader::Registry registry;
 | 
			
		||||
        VAddr cpu_address;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    std::condition_variable cv;
 | 
			
		||||
    std::mutex queue_mutex;
 | 
			
		||||
    std::shared_mutex completed_mutex;
 | 
			
		||||
    std::atomic<bool> is_thread_exiting{};
 | 
			
		||||
    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
 | 
			
		||||
    std::vector<std::thread> worker_threads;
 | 
			
		||||
    std::deque<WorkerParams> pending_queue;
 | 
			
		||||
    std::vector<AsyncShaders::Result> finished_work;
 | 
			
		||||
    Core::Frontend::EmuWindow& emu_window;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCommon::Shader
 | 
			
		||||
@ -0,0 +1,42 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include "video_core/shader_notify.h"
 | 
			
		||||
 | 
			
		||||
using namespace std::chrono_literals;
 | 
			
		||||
 | 
			
		||||
namespace VideoCore {
 | 
			
		||||
namespace {
 | 
			
		||||
constexpr auto UPDATE_TICK = 32ms;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ShaderNotify::ShaderNotify() = default;
 | 
			
		||||
ShaderNotify::~ShaderNotify() = default;
 | 
			
		||||
 | 
			
		||||
std::size_t ShaderNotify::GetShadersBuilding() {
 | 
			
		||||
    const auto now = std::chrono::high_resolution_clock::now();
 | 
			
		||||
    const auto diff = now - last_update;
 | 
			
		||||
    if (diff > UPDATE_TICK) {
 | 
			
		||||
        std::shared_lock lock(mutex);
 | 
			
		||||
        last_updated_count = accurate_count;
 | 
			
		||||
    }
 | 
			
		||||
    return last_updated_count;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::size_t ShaderNotify::GetShadersBuildingAccurate() {
 | 
			
		||||
    std::shared_lock lock{mutex};
 | 
			
		||||
    return accurate_count;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ShaderNotify::MarkShaderComplete() {
 | 
			
		||||
    std::unique_lock lock{mutex};
 | 
			
		||||
    accurate_count--;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ShaderNotify::MarkSharderBuilding() {
 | 
			
		||||
    std::unique_lock lock{mutex};
 | 
			
		||||
    accurate_count++;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace VideoCore
 | 
			
		||||
@ -0,0 +1,29 @@
 | 
			
		||||
// Copyright 2020 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <chrono>
 | 
			
		||||
#include <shared_mutex>
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
 | 
			
		||||
namespace VideoCore {
 | 
			
		||||
class ShaderNotify {
 | 
			
		||||
public:
 | 
			
		||||
    ShaderNotify();
 | 
			
		||||
    ~ShaderNotify();
 | 
			
		||||
 | 
			
		||||
    std::size_t GetShadersBuilding();
 | 
			
		||||
    std::size_t GetShadersBuildingAccurate();
 | 
			
		||||
 | 
			
		||||
    void MarkShaderComplete();
 | 
			
		||||
    void MarkSharderBuilding();
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    std::size_t last_updated_count{};
 | 
			
		||||
    std::size_t accurate_count{};
 | 
			
		||||
    std::shared_mutex mutex;
 | 
			
		||||
    std::chrono::high_resolution_clock::time_point last_update{};
 | 
			
		||||
};
 | 
			
		||||
} // namespace VideoCore
 | 
			
		||||
					Loading…
					
					
				
		Reference in New Issue