gl_shader_cache: Specialize local memory size for compute shaders

Local memory size in compute shaders was stubbed with an arbitary size.
This commit specializes local memory size from guest GPU parameters.
pull/8/head
ReinUsesLisp 6 years ago
parent dbeb523879
commit 287ae2b9e8
No known key found for this signature in database
GPG Key ID: 2DFC508897B39CFE

@ -178,7 +178,12 @@ public:
BitField<24, 5, u32> gpr_alloc;
};
INSERT_PADDING_WORDS(0x11);
union {
BitField<0, 20, u32> local_crs_alloc;
BitField<24, 5, u32> sass_version;
};
INSERT_PADDING_WORDS(0x10);
} launch_description{};
struct {

@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
launch_desc.block_dim_z, launch_desc.shared_alloc);
launch_desc.block_dim_z, launch_desc.shared_alloc,
launch_desc.local_pos_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;

@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
source += fmt::format("shared uint smem[{}];",
Common::AlignUp(variant.shared_memory_size, 4) / 4);
}
if (variant.local_memory_size > 0) {
source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
Common::AlignUp(variant.local_memory_size, 4) / 4);
}
}
source += '\n';

@ -510,10 +510,14 @@ private:
}
void DeclareLocalMemory() {
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
// specialization time.
const u64 local_memory_size =
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
if (stage == ProgramType::Compute) {
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
code.AddLine("#endif");
return;
}
const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
@ -851,9 +855,6 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
return {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
@ -1228,9 +1229,6 @@ private:
}
target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};

@ -52,11 +52,11 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
constexpr u32 NativeVersion = 8;
constexpr u32 NativeVersion = 9;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
static_assert(sizeof(ProgramVariant) == 32);
static_assert(sizeof(ProgramVariant) == 36);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};

@ -64,10 +64,10 @@ struct ProgramVariant final {
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
/// Compute constructor.
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
u32 shared_memory_size) noexcept
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
shared_memory_size{shared_memory_size} {}
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
BaseBindings base_bindings{};
@ -78,12 +78,13 @@ struct ProgramVariant final {
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
rhs.block_x, rhs.block_y, rhs.block_z,
rhs.shared_memory_size);
shared_memory_size, local_memory_size) ==
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
(static_cast<std::size_t>(variant.shared_memory_size) << 16);
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
(static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};

Loading…
Cancel
Save