mirror of https://github.com/yuzu-mirror/yuzu
shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available
parent
84298ce191
commit
e860870dd2
@ -0,0 +1,175 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
Id Word(EmitContext& ctx, Id offset) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) {
|
||||
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))};
|
||||
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))};
|
||||
const Id count_id{ctx.Constant(ctx.U32[1], count)};
|
||||
return {bit, count_id};
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer));
|
||||
} else {
|
||||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)};
|
||||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)};
|
||||
return ctx.OpLoad(ctx.U32[1], pointer);
|
||||
} else {
|
||||
return Word(ctx, offset);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
return ctx.OpLoad(ctx.U32[2], pointer);
|
||||
} else {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
|
||||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
|
||||
ctx.OpLoad(ctx.U32[1], rhs_pointer));
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
return ctx.OpLoad(ctx.U32[4], pointer);
|
||||
}
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
std::array<Id, 4> values{};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index
|
||||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
values[i] = ctx.OpLoad(ctx.U32[1], pointer);
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], values);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{
|
||||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value));
|
||||
} else {
|
||||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)};
|
||||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value));
|
||||
} else {
|
||||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
|
||||
Id pointer{};
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2);
|
||||
} else {
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
|
||||
}
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))};
|
||||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
|
||||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
|
||||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
|
||||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
|
||||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) {
|
||||
if (ctx.profile.support_explicit_workgroup_layout) {
|
||||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)};
|
||||
ctx.OpStore(pointer, value);
|
||||
return;
|
||||
}
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const Id index{i == 0 ? base_index
|
||||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))};
|
||||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
|
||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
@ -0,0 +1,197 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Size : u64 {
|
||||
U8,
|
||||
S8,
|
||||
U16,
|
||||
S16,
|
||||
B32,
|
||||
B64,
|
||||
B128,
|
||||
};
|
||||
|
||||
IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<20, 24, u64> absolute_offset;
|
||||
BitField<20, 24, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<int, bool> GetSize(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<48, 3, Size> size;
|
||||
} const encoding{insn};
|
||||
|
||||
const Size nnn = encoding.size;
|
||||
switch (encoding.size) {
|
||||
case Size::U8:
|
||||
return {8, false};
|
||||
case Size::S8:
|
||||
return {8, true};
|
||||
case Size::U16:
|
||||
return {16, false};
|
||||
case Size::S16:
|
||||
return {16, true};
|
||||
case Size::B32:
|
||||
return {32, false};
|
||||
case Size::B64:
|
||||
return {64, false};
|
||||
case Size::B128:
|
||||
return {128, false};
|
||||
default:
|
||||
throw NotImplementedException("Invalid size {}", encoding.size.Value());
|
||||
}
|
||||
}
|
||||
|
||||
IR::Reg Reg(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> reg;
|
||||
} const encoding{insn};
|
||||
|
||||
return encoding.reg;
|
||||
}
|
||||
|
||||
IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
|
||||
}
|
||||
|
||||
IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
|
||||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::LDL(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
|
||||
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed));
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed));
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
X(dest, ir.LoadLocal(word_offset));
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i))));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::LDS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg dest{Reg(insn)};
|
||||
const auto [bit_size, is_signed]{GetSize(insn)};
|
||||
const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
X(dest, IR::U32{value});
|
||||
break;
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(dest, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned destination register {}", dest);
|
||||
}
|
||||
for (int element = 0; element < bit_size / 32; ++element) {
|
||||
X(dest + element, IR::U32{ir.CompositeExtract(value, element)});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STL(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))};
|
||||
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const IR::U32 src{X(reg)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8: {
|
||||
const IR::U32 bit{ByteOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
const IR::U32 bit{ShortOffset(ir, offset)};
|
||||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
|
||||
ir.WriteLocal(word_offset, value);
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
if (!IR::IsAligned(reg, bit_size / 32)) {
|
||||
throw NotImplementedException("Unaligned source register");
|
||||
}
|
||||
ir.WriteLocal(word_offset, src);
|
||||
for (int i = 1; i < bit_size / 32; ++i) {
|
||||
ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TranslatorVisitor::STS(u64 insn) {
|
||||
const IR::U32 offset{Offset(*this, insn)};
|
||||
const IR::Reg reg{Reg(insn)};
|
||||
const int bit_size{GetSize(insn).first};
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
ir.WriteShared(bit_size, offset, X(reg));
|
||||
break;
|
||||
case 64:
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
|
||||
break;
|
||||
case 128: {
|
||||
if (!IR::IsAligned(reg, 2)) {
|
||||
throw NotImplementedException("Unaligned source register {}", reg);
|
||||
}
|
||||
const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
|
||||
ir.WriteShared(128, offset, vector);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
Loading…
Reference in New Issue