mirror of https://github.com/yuzu-mirror/yuzu
shader: Implement ATOM/S and RED
parent
479ca00071
commit
3db2b3effa
@ -0,0 +1,528 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
|
||||
Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))};
|
||||
return ctx.profile.support_explicit_workgroup_layout
|
||||
? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
|
||||
: ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
|
||||
}
|
||||
|
||||
Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
|
||||
if (offset.IsImmediate()) {
|
||||
const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
|
||||
return ctx.Constant(ctx.U32[1], imm_offset);
|
||||
}
|
||||
const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
|
||||
const Id index{ctx.Def(offset)};
|
||||
if (shift == 0) {
|
||||
return index;
|
||||
}
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
|
||||
return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
|
||||
}
|
||||
|
||||
Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
u32 index_offset = 0) {
|
||||
// TODO: Support reinterpreting bindings, guaranteed to be aligned
|
||||
if (!binding.IsImmediate()) {
|
||||
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||
}
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))};
|
||||
return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index);
|
||||
}
|
||||
|
||||
std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) {
|
||||
const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))};
|
||||
const Id semantics{ctx.u32_zero_value};
|
||||
return {scope, semantics};
|
||||
}
|
||||
|
||||
Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) {
|
||||
const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
|
||||
const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
|
||||
const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)};
|
||||
return ctx.OpBitcast(ctx.U64, original_composite);
|
||||
}
|
||||
|
||||
void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) {
|
||||
const Id composite{ctx.OpBitcast(ctx.U32[2], result)};
|
||||
ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0));
|
||||
ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value,
|
||||
ctx.shared_memory_u32);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
|
||||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value,
|
||||
ctx.shared_memory_u32);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer{GetSharedPointer(ctx, pointer_offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) {
|
||||
const Id pointer_1{GetSharedPointer(ctx, pointer_offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, value);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer{GetStoragePointer(ctx, binding, offset)};
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpIAdd(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpSMin(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpUMin(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpSMax(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpUMax(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, result);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
|
||||
if (ctx.profile.support_int64_atomics) {
|
||||
const auto [scope, semantics]{GetAtomicArgs(ctx)};
|
||||
return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
|
||||
}
|
||||
// LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
|
||||
const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
|
||||
const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
|
||||
StoreResult(ctx, pointer_1, pointer_2, value);
|
||||
return original_value;
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
|
||||
return ctx.OpBitcast(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
|
||||
return ctx.OpPackHalf2x16(ctx.U32[1], result);
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicIAdd32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMin32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMin32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMax32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMax32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicInc32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicDec32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAnd32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicOr32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicXor32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicExchange32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicIAdd64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMin64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMin64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicSMax64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicUMax64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicInc64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicDec64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAnd64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicOr64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicXor64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicExchange64(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF32(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicAddF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMinF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMinF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
@ -0,0 +1,222 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
SAFEADD,
|
||||
};
|
||||
|
||||
enum class AtomSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
F32,
|
||||
F16x2,
|
||||
S64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
|
||||
AtomOp op, bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.GlobalAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.GlobalAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.GlobalAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.GlobalAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.GlobalAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.GlobalAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.GlobalAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
|
||||
AtomSize size) {
|
||||
static constexpr IR::FpControl f16_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::DontCare},
|
||||
};
|
||||
static constexpr IR::FpControl f32_control{
|
||||
.no_contraction{false},
|
||||
.rounding{IR::FpRounding::RN},
|
||||
.fmz_mode{IR::FmzMode::FTZ},
|
||||
};
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
|
||||
: ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
|
||||
case AtomOp::MIN:
|
||||
return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
|
||||
case AtomOp::MAX:
|
||||
return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
|
||||
default:
|
||||
throw NotImplementedException("FP Atom Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<28, 20, s64> addr_offset;
|
||||
BitField<28, 20, u64> rz_addr_offset;
|
||||
BitField<48, 1, u64> e;
|
||||
} const mem{insn};
|
||||
|
||||
const IR::U64 address{[&]() -> IR::U64 {
|
||||
if (mem.e == 0) {
|
||||
return v.ir.UConvert(64, v.X(mem.addr_reg));
|
||||
}
|
||||
return v.L(mem.addr_reg);
|
||||
}()};
|
||||
const u64 addr_offset{[&]() -> u64 {
|
||||
if (mem.addr_reg == IR::Reg::RZ) {
|
||||
// When RZ is used, the address is an absolute address
|
||||
return static_cast<u64>(mem.rz_addr_offset.Value());
|
||||
} else {
|
||||
return static_cast<u64>(mem.addr_offset.Value());
|
||||
}
|
||||
}()};
|
||||
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
|
||||
}
|
||||
|
||||
bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
|
||||
// TODO: SAFEADD
|
||||
switch (size) {
|
||||
case AtomSize::S32:
|
||||
case AtomSize::U64:
|
||||
return (op == AtomOp::INC || op == AtomOp::DEC);
|
||||
case AtomSize::S64:
|
||||
return !(op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
case AtomSize::F32:
|
||||
return op != AtomOp::ADD;
|
||||
case AtomSize::F16x2:
|
||||
return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F32:
|
||||
case AtomSize::F16x2:
|
||||
return ir.LoadGlobal32(offset);
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return ir.PackUint2x32(ir.LoadGlobal64(offset));
|
||||
default:
|
||||
throw NotImplementedException("Atom Size {}", size);
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
|
||||
switch (size) {
|
||||
case AtomSize::U32:
|
||||
case AtomSize::S32:
|
||||
case AtomSize::F16x2:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomSize::U64:
|
||||
case AtomSize::S64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
case AtomSize::F32:
|
||||
return v.F(dest_reg, IR::F32{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOM(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<49, 3, AtomSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atom{insn};
|
||||
|
||||
const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
|
||||
const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
|
||||
const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
IR::Value result;
|
||||
|
||||
if (AtomOpNotApplicable(atom.size, atom.op)) {
|
||||
result = LoadGlobal(ir, offset, atom.size);
|
||||
} else if (!is_integer) {
|
||||
if (atom.size == AtomSize::F32) {
|
||||
result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
|
||||
} else {
|
||||
const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
|
||||
result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
|
||||
}
|
||||
} else if (size_64) {
|
||||
result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
|
||||
} else {
|
||||
result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
|
||||
}
|
||||
StoreResult(*this, atom.dest_reg, result, atom.size);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RED(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> src_reg_b;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 3, AtomSize> size;
|
||||
BitField<23, 3, AtomOp> op;
|
||||
} const red{insn};
|
||||
|
||||
if (AtomOpNotApplicable(red.size, red.op)) {
|
||||
return;
|
||||
}
|
||||
const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
|
||||
const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
|
||||
const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
|
||||
const IR::U64 offset{AtomOffset(*this, insn)};
|
||||
if (!is_integer) {
|
||||
if (red.size == AtomSize::F32) {
|
||||
ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
|
||||
} else {
|
||||
const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
|
||||
ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
|
||||
}
|
||||
} else if (size_64) {
|
||||
ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
|
||||
} else {
|
||||
ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@ -0,0 +1,110 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class AtomOp : u64 {
|
||||
ADD,
|
||||
MIN,
|
||||
MAX,
|
||||
INC,
|
||||
DEC,
|
||||
AND,
|
||||
OR,
|
||||
XOR,
|
||||
EXCH,
|
||||
};
|
||||
|
||||
enum class AtomsSize : u64 {
|
||||
U32,
|
||||
S32,
|
||||
U64,
|
||||
};
|
||||
|
||||
IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
|
||||
bool is_signed) {
|
||||
switch (op) {
|
||||
case AtomOp::ADD:
|
||||
return ir.SharedAtomicIAdd(offset, op_b);
|
||||
case AtomOp::MIN:
|
||||
return ir.SharedAtomicIMin(offset, op_b, is_signed);
|
||||
case AtomOp::MAX:
|
||||
return ir.SharedAtomicIMax(offset, op_b, is_signed);
|
||||
case AtomOp::INC:
|
||||
return ir.SharedAtomicInc(offset, op_b);
|
||||
case AtomOp::DEC:
|
||||
return ir.SharedAtomicDec(offset, op_b);
|
||||
case AtomOp::AND:
|
||||
return ir.SharedAtomicAnd(offset, op_b);
|
||||
case AtomOp::OR:
|
||||
return ir.SharedAtomicOr(offset, op_b);
|
||||
case AtomOp::XOR:
|
||||
return ir.SharedAtomicXor(offset, op_b);
|
||||
case AtomOp::EXCH:
|
||||
return ir.SharedAtomicExchange(offset, op_b);
|
||||
default:
|
||||
throw NotImplementedException("Integer Atoms Operation {}", op);
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<8, 8, IR::Reg> offset_reg;
|
||||
BitField<30, 22, u64> absolute_offset;
|
||||
BitField<30, 22, s64> relative_offset;
|
||||
} const encoding{insn};
|
||||
|
||||
if (encoding.offset_reg == IR::Reg::RZ) {
|
||||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
|
||||
} else {
|
||||
const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
|
||||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
|
||||
}
|
||||
}
|
||||
|
||||
void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
|
||||
switch (size) {
|
||||
case AtomsSize::U32:
|
||||
case AtomsSize::S32:
|
||||
return v.X(dest_reg, IR::U32{result});
|
||||
case AtomsSize::U64:
|
||||
return v.L(dest_reg, IR::U64{result});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::ATOMS(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> addr_reg;
|
||||
BitField<20, 8, IR::Reg> src_reg_b;
|
||||
BitField<28, 2, AtomsSize> size;
|
||||
BitField<52, 4, AtomOp> op;
|
||||
} const atoms{insn};
|
||||
|
||||
const bool size_64{atoms.size == AtomsSize::U64};
|
||||
if (size_64 && atoms.op != AtomOp::EXCH) {
|
||||
throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
|
||||
}
|
||||
const bool is_signed{atoms.size == AtomsSize::S32};
|
||||
const IR::U32 offset{AtomsOffset(*this, insn)};
|
||||
|
||||
IR::Value result;
|
||||
if (size_64) {
|
||||
result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
|
||||
} else {
|
||||
result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
|
||||
}
|
||||
StoreResult(*this, atoms.dest_reg, result, atoms.size);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
Loading…
Reference in New Issue