mirror of https://github.com/yuzu-mirror/yuzu
Merge pull request #7219 from FernandoS27/aristotles-right-testicle
Project A.R.T. Advanced Rendering Techniquespull/8/head
commit
71313509f7
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@ -0,0 +1,327 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
[[nodiscard]] bool IsTextureTypeRescalable(TextureType type) {
|
||||
switch (type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D:
|
||||
return true;
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void VisitMark(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ShuffleIndex:
|
||||
case IR::Opcode::ShuffleUp:
|
||||
case IR::Opcode::ShuffleDown:
|
||||
case IR::Opcode::ShuffleButterfly: {
|
||||
const IR::Value shfl_arg{inst.Arg(0)};
|
||||
if (shfl_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
const IR::Inst* const arg_inst{shfl_arg.InstRecursive()};
|
||||
if (arg_inst->GetOpcode() != IR::Opcode::BitCastU32F32) {
|
||||
break;
|
||||
}
|
||||
const IR::Value bitcast_arg{arg_inst->Arg(0)};
|
||||
if (bitcast_arg.IsImmediate()) {
|
||||
break;
|
||||
}
|
||||
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
|
||||
bool must_patch_outside = false;
|
||||
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
|
||||
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
|
||||
must_patch_outside = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (must_patch_outside) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 down_factor{ir.ResolutionDownFactor()};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(inst.Arg(0).Attribute())};
|
||||
const IR::F32 downscaled_frag_coord{ir.FPMul(frag_coord, down_factor)};
|
||||
inst.ReplaceUsesWith(downscaled_frag_coord);
|
||||
}
|
||||
|
||||
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::F32 point_value{inst.Arg(1)};
|
||||
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
|
||||
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
|
||||
inst.SetArg(1, upscaled_point_value);
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IMul(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftRightArithmetic(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 SubScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value,
|
||||
const IR::Attribute attrib) {
|
||||
const IR::F32 up_factor{ir.Imm32(Settings::values.resolution_info.up_factor)};
|
||||
const IR::F32 base{ir.FPMul(ir.ConvertUToF(32, 32, value), up_factor)};
|
||||
const IR::F32 frag_coord{ir.GetAttribute(attrib)};
|
||||
const IR::F32 down_factor{ir.Imm32(Settings::values.resolution_info.down_factor)};
|
||||
const IR::F32 floor{ir.FPMul(up_factor, ir.FPFloor(ir.FPMul(frag_coord, down_factor)))};
|
||||
const IR::F16F32F64 deviation{ir.FPAdd(base, ir.FPAdd(frag_coord, ir.FPNeg(floor)))};
|
||||
return IR::U32{ir.Select(is_scaled, ir.ConvertFToU(32, deviation), value)};
|
||||
}
|
||||
|
||||
[[nodiscard]] IR::U32 DownScale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
|
||||
IR::U32 scaled_value{value};
|
||||
if (const u32 down_shift = Settings::values.resolution_info.down_shift; down_shift != 0) {
|
||||
scaled_value = ir.ShiftLeftLogical(scaled_value, ir.Imm32(down_shift));
|
||||
}
|
||||
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
|
||||
scaled_value = ir.IDiv(scaled_value, ir.Imm32(up_scale));
|
||||
}
|
||||
return IR::U32{ir.Select(is_scaled, scaled_value, value)};
|
||||
}
|
||||
|
||||
void PatchImageQueryDimensions(IR::Block& block, IR::Inst& inst) {
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::Value new_inst{&*block.PrependNewInst(it, inst)};
|
||||
const IR::U32 width{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 0)})};
|
||||
const IR::U32 height{DownScale(ir, is_scaled, IR::U32{ir.CompositeExtract(new_inst, 1)})};
|
||||
const IR::Value replacement{ir.CompositeConstruct(
|
||||
width, height, ir.CompositeExtract(new_inst, 2), ir.CompositeExtract(new_inst, 3))};
|
||||
inst.ReplaceUsesWith(replacement);
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled,
|
||||
size_t index) {
|
||||
const IR::Value composite{inst.Arg(index)};
|
||||
if (composite.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})};
|
||||
const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(composite, 2)};
|
||||
inst.SetArg(index, ir.CompositeConstruct(x, y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) {
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
const IR::Value coord{inst.Arg(1)};
|
||||
const IR::U32 coord_x{ir.CompositeExtract(coord, 0)};
|
||||
const IR::U32 coord_y{ir.CompositeExtract(coord, 1)};
|
||||
|
||||
const IR::U32 scaled_x{SubScale(ir, is_scaled, coord_x, IR::Attribute::PositionX)};
|
||||
const IR::U32 scaled_y{SubScale(ir, is_scaled, coord_y, IR::Attribute::PositionY)};
|
||||
switch (info.type) {
|
||||
case TextureType::Color2D:
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y));
|
||||
break;
|
||||
case TextureType::ColorArray2D: {
|
||||
const IR::U32 z{ir.CompositeExtract(coord, 2)};
|
||||
inst.SetArg(1, ir.CompositeConstruct(scaled_x, scaled_y, z));
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color3D:
|
||||
case TextureType::ColorCube:
|
||||
case TextureType::ColorArrayCube:
|
||||
case TextureType::Buffer:
|
||||
// Nothing to patch here
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void SubScaleImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
SubScaleCoord(ir, inst, is_scaled);
|
||||
}
|
||||
|
||||
void PatchImageFetch(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
// Scale ImageFetch offset
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 2);
|
||||
}
|
||||
|
||||
void PatchImageRead(IR::Block& block, IR::Inst& inst) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
if (!IsTextureTypeRescalable(info.type)) {
|
||||
return;
|
||||
}
|
||||
const IR::U1 is_scaled{ir.IsImageScaled(ir.Imm32(info.descriptor_index))};
|
||||
ScaleIntegerComposite(ir, inst, is_scaled, 1);
|
||||
}
|
||||
|
||||
void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::GetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PositionX:
|
||||
case IR::Attribute::PositionY:
|
||||
if (is_fragment_shader && inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchFragCoord(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SetAttribute: {
|
||||
const IR::Attribute attr{inst.Arg(0).Attribute()};
|
||||
switch (attr) {
|
||||
case IR::Attribute::PointSize:
|
||||
if (inst.Flags<u32>() != 0xDEADBEEF) {
|
||||
PatchPointSize(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::ImageQueryDimensions:
|
||||
PatchImageQueryDimensions(block, inst);
|
||||
break;
|
||||
case IR::Opcode::ImageFetch:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageFetch(block, inst);
|
||||
} else {
|
||||
PatchImageFetch(block, inst);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::ImageRead:
|
||||
if (is_fragment_shader) {
|
||||
SubScaleImageRead(block, inst);
|
||||
} else {
|
||||
PatchImageRead(block, inst);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void RescalingPass(IR::Program& program) {
|
||||
const bool is_fragment_shader{program.stage == Stage::Fragment};
|
||||
if (is_fragment_shader) {
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
VisitMark(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IR::Block* const block : program.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Visit(program, *block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
@ -0,0 +1,116 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
//!#version 460 core
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
#extension GL_ARB_shading_language_420pack : enable
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_EXT_shader_explicit_arithmetic_types : require
|
||||
|
||||
// FidelityFX Super Resolution Sample
|
||||
//
|
||||
// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files(the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions :
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
layout( push_constant ) uniform constants {
|
||||
uvec4 Const0;
|
||||
uvec4 Const1;
|
||||
uvec4 Const2;
|
||||
uvec4 Const3;
|
||||
};
|
||||
|
||||
layout(set=0,binding=0) uniform sampler2D InputTexture;
|
||||
layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
|
||||
|
||||
#define A_GPU 1
|
||||
#define A_GLSL 1
|
||||
|
||||
#ifndef YUZU_USE_FP16
|
||||
#include "ffx_a.h"
|
||||
|
||||
#if USE_EASU
|
||||
#define FSR_EASU_F 1
|
||||
AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
|
||||
AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
|
||||
AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
|
||||
#endif
|
||||
#if USE_RCAS
|
||||
#define FSR_RCAS_F 1
|
||||
AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
|
||||
void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
|
||||
#endif
|
||||
#else
|
||||
#define A_HALF
|
||||
#include "ffx_a.h"
|
||||
|
||||
#if USE_EASU
|
||||
#define FSR_EASU_H 1
|
||||
AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
|
||||
AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
|
||||
AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
|
||||
#endif
|
||||
#if USE_RCAS
|
||||
#define FSR_RCAS_H 1
|
||||
AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
|
||||
void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "ffx_fsr1.h"
|
||||
|
||||
void CurrFilter(AU2 pos) {
|
||||
#if USE_BILINEAR
|
||||
AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
|
||||
imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
|
||||
#endif
|
||||
#if USE_EASU
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
|
||||
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||
#else
|
||||
AH3 c;
|
||||
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
|
||||
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||
#endif
|
||||
#endif
|
||||
#if USE_RCAS
|
||||
#ifndef YUZU_USE_FP16
|
||||
AF3 c;
|
||||
FsrRcasF(c.r, c.g, c.b, pos, Const0);
|
||||
imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
|
||||
#else
|
||||
AH3 c;
|
||||
FsrRcasH(c.r, c.g, c.b, pos, Const0);
|
||||
imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
layout(local_size_x=64) in;
|
||||
void main() {
|
||||
// Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
|
||||
AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
|
||||
CurrFilter(gxy);
|
||||
gxy.x += 8u;
|
||||
CurrFilter(gxy);
|
||||
gxy.y += 8u;
|
||||
CurrFilter(gxy);
|
||||
gxy.x -= 8u;
|
||||
CurrFilter(gxy);
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Source code is adapted from
|
||||
// https://www.geeks3d.com/20110405/fxaa-fast-approximate-anti-aliasing-demo-glsl-opengl-test-radeon-geforce/3/
|
||||
|
||||
#version 460
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
|
||||
#endif
|
||||
|
||||
layout (location = 0) in vec4 posPos;
|
||||
|
||||
layout (location = 0) out vec4 frag_color;
|
||||
|
||||
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||
|
||||
const float FXAA_SPAN_MAX = 8.0;
|
||||
const float FXAA_REDUCE_MUL = 1.0 / 8.0;
|
||||
const float FXAA_REDUCE_MIN = 1.0 / 128.0;
|
||||
|
||||
#define FxaaTexLod0(t, p) textureLod(t, p, 0.0)
|
||||
#define FxaaTexOff(t, p, o) textureLodOffset(t, p, 0.0, o)
|
||||
|
||||
vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
|
||||
|
||||
vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
|
||||
vec3 rgbNE = FxaaTexOff(tex, posPos.zw, ivec2(1,0)).xyz;
|
||||
vec3 rgbSW = FxaaTexOff(tex, posPos.zw, ivec2(0,1)).xyz;
|
||||
vec3 rgbSE = FxaaTexOff(tex, posPos.zw, ivec2(1,1)).xyz;
|
||||
vec3 rgbM = FxaaTexLod0(tex, posPos.xy).xyz;
|
||||
/*---------------------------------------------------------*/
|
||||
vec3 luma = vec3(0.299, 0.587, 0.114);
|
||||
float lumaNW = dot(rgbNW, luma);
|
||||
float lumaNE = dot(rgbNE, luma);
|
||||
float lumaSW = dot(rgbSW, luma);
|
||||
float lumaSE = dot(rgbSE, luma);
|
||||
float lumaM = dot(rgbM, luma);
|
||||
/*---------------------------------------------------------*/
|
||||
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
|
||||
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
|
||||
/*---------------------------------------------------------*/
|
||||
vec2 dir;
|
||||
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
|
||||
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
|
||||
/*---------------------------------------------------------*/
|
||||
float dirReduce = max(
|
||||
(lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
|
||||
FXAA_REDUCE_MIN);
|
||||
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
|
||||
dir = min(vec2( FXAA_SPAN_MAX, FXAA_SPAN_MAX),
|
||||
max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
|
||||
dir * rcpDirMin)) / textureSize(tex, 0);
|
||||
/*--------------------------------------------------------*/
|
||||
vec3 rgbA = (1.0 / 2.0) * (
|
||||
FxaaTexLod0(tex, posPos.xy + dir * (1.0 / 3.0 - 0.5)).xyz +
|
||||
FxaaTexLod0(tex, posPos.xy + dir * (2.0 / 3.0 - 0.5)).xyz);
|
||||
vec3 rgbB = rgbA * (1.0 / 2.0) + (1.0 / 4.0) * (
|
||||
FxaaTexLod0(tex, posPos.xy + dir * (0.0 / 3.0 - 0.5)).xyz +
|
||||
FxaaTexLod0(tex, posPos.xy + dir * (3.0 / 3.0 - 0.5)).xyz);
|
||||
float lumaB = dot(rgbB, luma);
|
||||
if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
|
||||
return rgbB;
|
||||
}
|
||||
|
||||
void main() {
|
||||
frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460
|
||||
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
};
|
||||
|
||||
const vec2 vertices[4] =
|
||||
vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
|
||||
|
||||
layout (location = 0) out vec4 posPos;
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
#define VERTEX_ID gl_VertexIndex
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
#define VERTEX_ID gl_VertexID
|
||||
|
||||
#endif
|
||||
|
||||
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||
|
||||
const float FXAA_SUBPIX_SHIFT = 0;
|
||||
|
||||
void main() {
|
||||
vec2 vertex = vertices[VERTEX_ID];
|
||||
gl_Position = vec4(vertex, 0.0, 1.0);
|
||||
vec2 vert_tex_coord = (vertex + 1.0) / 2.0;
|
||||
posPos.xy = vert_tex_coord;
|
||||
posPos.zw = vert_tex_coord - (0.5 + FXAA_SUBPIX_SHIFT) / textureSize(input_texture, 0);
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2020 BreadFish64
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
// Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
|
||||
|
||||
//! #version 460
|
||||
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
#ifdef YUZU_USE_FP16
|
||||
|
||||
#extension GL_AMD_gpu_shader_half_float : enable
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
|
||||
#define lfloat float16_t
|
||||
#define lvec2 f16vec2
|
||||
#define lvec3 f16vec3
|
||||
#define lvec4 f16vec4
|
||||
|
||||
#else
|
||||
|
||||
#define lfloat float
|
||||
#define lvec2 vec2
|
||||
#define lvec3 vec3
|
||||
#define lvec4 vec4
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
|
||||
#endif
|
||||
|
||||
layout (location = 0) in vec2 tex_coord;
|
||||
|
||||
layout (location = 0) out vec4 frag_color;
|
||||
|
||||
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
|
||||
|
||||
const bool ignore_alpha = true;
|
||||
|
||||
lfloat ColorDist1(lvec4 a, lvec4 b) {
|
||||
// https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
|
||||
const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
|
||||
const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
|
||||
const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
|
||||
lvec4 diff = a - b;
|
||||
lfloat Y = dot(diff.rgb, K);
|
||||
lfloat Cb = scaleB * (diff.b - Y);
|
||||
lfloat Cr = scaleR * (diff.r - Y);
|
||||
lvec3 YCbCr = lvec3(Y, Cb, Cr);
|
||||
lfloat d = length(YCbCr);
|
||||
if (ignore_alpha) {
|
||||
return d;
|
||||
}
|
||||
return sqrt(a.a * b.a * d * d + diff.a * diff.a);
|
||||
}
|
||||
|
||||
lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
|
||||
return lvec4(
|
||||
ColorDist1(ref, A),
|
||||
ColorDist1(ref, B),
|
||||
ColorDist1(ref, C),
|
||||
ColorDist1(ref, D)
|
||||
);
|
||||
}
|
||||
|
||||
vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
|
||||
lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
|
||||
lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
|
||||
lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
|
||||
lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
|
||||
lvec4 cc = lvec4(texture(tex, tex_coord));
|
||||
lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
|
||||
lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
|
||||
lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
|
||||
lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
|
||||
|
||||
lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
|
||||
lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
|
||||
|
||||
// Calculate how different cc is from the texels around it
|
||||
const lfloat plus_weight = lfloat(1.5);
|
||||
const lfloat cross_weight = lfloat(1.5);
|
||||
lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
|
||||
|
||||
if (total_dist == lfloat(0.0)) {
|
||||
return cc;
|
||||
} else {
|
||||
// Add together all the distances with direction taken into account
|
||||
lvec4 tmp = offset_tl - offset_br;
|
||||
lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
|
||||
|
||||
// When the image has thin points, they tend to split apart.
|
||||
// This is because the texels all around are different and total_offset reaches into clear areas.
|
||||
// This works pretty well to keep the offset in bounds for these cases.
|
||||
lfloat clamp_val = length(total_offset) / total_dist;
|
||||
vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
|
||||
|
||||
return texture(tex, tex_coord - final_offset);
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
frag_color = Scaleforce(input_texture, tex_coord);
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
||||
|
||||
layout (location = 0) out vec4 color;
|
||||
|
||||
layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
|
||||
|
||||
vec4 cubic(float v) {
|
||||
vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
|
||||
vec4 s = n * n * n;
|
||||
float x = s.x;
|
||||
float y = s.y - 4.0 * s.x;
|
||||
float z = s.z - 4.0 * s.y + 6.0 * s.x;
|
||||
float w = 6.0 - x - y - z;
|
||||
return vec4(x, y, z, w) * (1.0 / 6.0);
|
||||
}
|
||||
|
||||
vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
|
||||
|
||||
vec2 texSize = textureSize(textureSampler, 0);
|
||||
vec2 invTexSize = 1.0 / texSize;
|
||||
|
||||
texCoords = texCoords * texSize - 0.5;
|
||||
|
||||
vec2 fxy = fract(texCoords);
|
||||
texCoords -= fxy;
|
||||
|
||||
vec4 xcubic = cubic(fxy.x);
|
||||
vec4 ycubic = cubic(fxy.y);
|
||||
|
||||
vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy;
|
||||
|
||||
vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
|
||||
vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
|
||||
|
||||
offset *= invTexSize.xxyy;
|
||||
|
||||
vec4 sample0 = texture(textureSampler, offset.xz);
|
||||
vec4 sample1 = texture(textureSampler, offset.yz);
|
||||
vec4 sample2 = texture(textureSampler, offset.xw);
|
||||
vec4 sample3 = texture(textureSampler, offset.yw);
|
||||
|
||||
float sx = s.x / (s.x + s.y);
|
||||
float sy = s.z / (s.z + s.w);
|
||||
|
||||
return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy);
|
||||
}
|
||||
|
||||
void main() {
|
||||
color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Code adapted from the following sources:
|
||||
// - https://learnopengl.com/Advanced-Lighting/Bloom
|
||||
// - https://www.rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling/
|
||||
|
||||
#version 460 core
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#define BINDING_COLOR_TEXTURE 0
|
||||
|
||||
#endif
|
||||
|
||||
layout(location = 0) in vec2 frag_tex_coord;
|
||||
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture;
|
||||
|
||||
const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308);
|
||||
const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703);
|
||||
|
||||
vec4 blurVertical(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||
vec4 result = vec4(0.0f);
|
||||
for (int i = 1; i < 3; i++) {
|
||||
result += texture(textureSampler, vec2(coord) + (vec2(0.0, offset[i]) * norm)) * weight[i];
|
||||
result += texture(textureSampler, vec2(coord) - (vec2(0.0, offset[i]) * norm)) * weight[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 blurHorizontal(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||
vec4 result = vec4(0.0f);
|
||||
for (int i = 1; i < 3; i++) {
|
||||
result += texture(textureSampler, vec2(coord) + (vec2(offset[i], 0.0) * norm)) * weight[i];
|
||||
result += texture(textureSampler, vec2(coord) - (vec2(offset[i], 0.0) * norm)) * weight[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
|
||||
vec4 result = vec4(0.0f);
|
||||
for (int i = 1; i < 3; i++) {
|
||||
result +=
|
||||
texture(textureSampler, vec2(coord) + (vec2(offset[i], offset[i]) * norm)) * weight[i];
|
||||
result +=
|
||||
texture(textureSampler, vec2(coord) - (vec2(offset[i], offset[i]) * norm)) * weight[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
|
||||
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
|
||||
|
||||
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
|
||||
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
|
||||
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
|
||||
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
|
||||
color = vec4(combination + base, 1.0f);
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define YUZU_USE_FP16
|
||||
#define USE_EASU 1
|
||||
|
||||
#include "fidelityfx_fsr.comp"
|
@ -0,0 +1,10 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define USE_EASU 1
|
||||
|
||||
#include "fidelityfx_fsr.comp"
|
@ -0,0 +1,11 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define YUZU_USE_FP16
|
||||
#define USE_RCAS 1
|
||||
|
||||
#include "fidelityfx_fsr.comp"
|
@ -0,0 +1,10 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 460 core
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define USE_RCAS 1
|
||||
|
||||
#include "fidelityfx_fsr.comp"
|
@ -0,0 +1,7 @@
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define YUZU_USE_FP16
|
||||
|
||||
#include "opengl_present_scaleforce.frag"
|
@ -0,0 +1,5 @@
|
||||
#version 460
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "opengl_present_scaleforce.frag"
|
@ -0,0 +1,553 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cmath>
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
|
||||
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_fsr.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
// Reimplementations of the constant generating functions in ffx_fsr1.h
|
||||
// GCC generated a lot of warnings when using the official header.
|
||||
u32 AU1_AH1_AF1(f32 f) {
|
||||
static constexpr u32 base[512]{
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040,
|
||||
0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
|
||||
0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
|
||||
0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
|
||||
0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008,
|
||||
0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
|
||||
0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
|
||||
0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
|
||||
0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
|
||||
};
|
||||
static constexpr s8 shift[512]{
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16,
|
||||
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
|
||||
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
|
||||
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
|
||||
0x18, 0x18,
|
||||
};
|
||||
const u32 u = Common::BitCast<u32>(f);
|
||||
const u32 i = u >> 23;
|
||||
return base[i] + ((u & 0x7fffff) >> shift[i]);
|
||||
}
|
||||
|
||||
u32 AU1_AH2_AF2(f32 a[2]) {
|
||||
return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16);
|
||||
}
|
||||
|
||||
void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], f32 inputViewportInPixelsX,
|
||||
f32 inputViewportInPixelsY, f32 inputSizeInPixelsX, f32 inputSizeInPixelsY,
|
||||
f32 outputSizeInPixelsX, f32 outputSizeInPixelsY) {
|
||||
con0[0] = Common::BitCast<u32>(inputViewportInPixelsX / outputSizeInPixelsX);
|
||||
con0[1] = Common::BitCast<u32>(inputViewportInPixelsY / outputSizeInPixelsY);
|
||||
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f);
|
||||
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f);
|
||||
con1[0] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||
con1[1] = Common::BitCast<u32>(1.0f / inputSizeInPixelsY);
|
||||
con1[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||
con1[3] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsY);
|
||||
con2[0] = Common::BitCast<u32>(-1.0f / inputSizeInPixelsX);
|
||||
con2[1] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
|
||||
con2[2] = Common::BitCast<u32>(1.0f / inputSizeInPixelsX);
|
||||
con2[3] = Common::BitCast<u32>(2.0f / inputSizeInPixelsY);
|
||||
con3[0] = Common::BitCast<u32>(0.0f / inputSizeInPixelsX);
|
||||
con3[1] = Common::BitCast<u32>(4.0f / inputSizeInPixelsY);
|
||||
con3[2] = con3[3] = 0;
|
||||
}
|
||||
|
||||
void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4],
|
||||
f32 inputViewportInPixelsX, f32 inputViewportInPixelsY,
|
||||
f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX,
|
||||
f32 outputSizeInPixelsY, f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) {
|
||||
FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY,
|
||||
inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY);
|
||||
con0[2] = Common::BitCast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f +
|
||||
inputOffsetInPixelsX);
|
||||
con0[3] = Common::BitCast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f +
|
||||
inputOffsetInPixelsY);
|
||||
}
|
||||
|
||||
void FsrRcasCon(u32* con, f32 sharpness) {
|
||||
sharpness = std::exp2f(-sharpness);
|
||||
f32 hSharp[2]{sharpness, sharpness};
|
||||
con[0] = Common::BitCast<u32>(sharpness);
|
||||
con[1] = AU1_AH2_AF2(hSharp);
|
||||
con[2] = 0;
|
||||
con[3] = 0;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
|
||||
VkExtent2D output_size_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_},
|
||||
output_size{output_size_} {
|
||||
|
||||
CreateImages();
|
||||
CreateSampler();
|
||||
CreateShaders();
|
||||
CreateDescriptorPool();
|
||||
CreateDescriptorSetLayout();
|
||||
CreateDescriptorSets();
|
||||
CreatePipelineLayout();
|
||||
CreatePipeline();
|
||||
}
|
||||
|
||||
VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
|
||||
|
||||
UpdateDescriptorSet(image_index, image_view);
|
||||
|
||||
scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier base_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = 0,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = {},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
|
||||
|
||||
std::array<u32, 4 * 4> push_constants;
|
||||
FsrEasuConOffset(
|
||||
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
|
||||
push_constants.data() + 12,
|
||||
|
||||
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()),
|
||||
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height),
|
||||
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height),
|
||||
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top));
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||
|
||||
{
|
||||
VkImageMemoryBarrier fsr_write_barrier = base_barrier;
|
||||
fsr_write_barrier.image = *images[image_index],
|
||||
fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier);
|
||||
}
|
||||
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_sets[image_index * 2], {});
|
||||
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
|
||||
Common::DivCeil(output_size.height, 16u), 1);
|
||||
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
|
||||
|
||||
FsrRcasCon(push_constants.data(), 0.25f);
|
||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
|
||||
|
||||
{
|
||||
std::array<VkImageMemoryBarrier, 2> barriers;
|
||||
auto& fsr_read_barrier = barriers[0];
|
||||
auto& blit_write_barrier = barriers[1];
|
||||
|
||||
fsr_read_barrier = base_barrier;
|
||||
fsr_read_barrier.image = *images[image_index];
|
||||
fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
blit_write_barrier = base_barrier;
|
||||
blit_write_barrier.image = *images[image_count + image_index];
|
||||
blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers);
|
||||
}
|
||||
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
|
||||
descriptor_sets[image_index * 2 + 1], {});
|
||||
cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u),
|
||||
Common::DivCeil(output_size.height, 16u), 1);
|
||||
|
||||
{
|
||||
std::array<VkImageMemoryBarrier, 1> barriers;
|
||||
auto& blit_read_barrier = barriers[0];
|
||||
|
||||
blit_read_barrier = base_barrier;
|
||||
blit_read_barrier.image = *images[image_count + image_index];
|
||||
blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers);
|
||||
}
|
||||
});
|
||||
|
||||
return *image_views[image_count + image_index];
|
||||
}
|
||||
|
||||
void FSR::CreateDescriptorPool() {
|
||||
const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = static_cast<u32>(image_count * 2),
|
||||
},
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = static_cast<u32>(image_count * 2),
|
||||
},
|
||||
}};
|
||||
|
||||
const VkDescriptorPoolCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
.maxSets = static_cast<u32>(image_count * 2),
|
||||
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
|
||||
.pPoolSizes = pool_sizes.data(),
|
||||
};
|
||||
descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
|
||||
}
|
||||
|
||||
void FSR::CreateDescriptorSetLayout() {
|
||||
const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{
|
||||
{
|
||||
.binding = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = sampler.address(),
|
||||
},
|
||||
{
|
||||
.binding = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = sampler.address(),
|
||||
},
|
||||
}};
|
||||
|
||||
const VkDescriptorSetLayoutCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = static_cast<u32>(layout_bindings.size()),
|
||||
.pBindings = layout_bindings.data(),
|
||||
};
|
||||
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
|
||||
}
|
||||
|
||||
void FSR::CreateDescriptorSets() {
|
||||
const u32 sets = static_cast<u32>(image_count * 2);
|
||||
const std::vector layouts(sets, *descriptor_set_layout);
|
||||
|
||||
const VkDescriptorSetAllocateInfo ai{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.descriptorPool = *descriptor_pool,
|
||||
.descriptorSetCount = sets,
|
||||
.pSetLayouts = layouts.data(),
|
||||
};
|
||||
|
||||
descriptor_sets = descriptor_pool.Allocate(ai);
|
||||
}
|
||||
|
||||
void FSR::CreateImages() {
|
||||
images.resize(image_count * 2);
|
||||
image_views.resize(image_count * 2);
|
||||
buffer_commits.resize(image_count * 2);
|
||||
|
||||
for (size_t i = 0; i < image_count * 2; ++i) {
|
||||
images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
.extent =
|
||||
{
|
||||
.width = output_size.width,
|
||||
.height = output_size.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
});
|
||||
buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
|
||||
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *images[i],
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
.components =
|
||||
{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void FSR::CreatePipelineLayout() {
|
||||
VkPushConstantRange push_const{
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(std::array<u32, 4 * 4>),
|
||||
};
|
||||
VkPipelineLayoutCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = descriptor_set_layout.address(),
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_const,
|
||||
};
|
||||
|
||||
pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
|
||||
}
|
||||
|
||||
void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
|
||||
const auto fsr_image_view = *image_views[image_index];
|
||||
const auto blit_image_view = *image_views[image_count + image_index];
|
||||
|
||||
const VkDescriptorImageInfo image_info{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
const VkDescriptorImageInfo fsr_image_info{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = fsr_image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
const VkDescriptorImageInfo blit_image_info{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = blit_image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet sampler_write{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_sets[image_index * 2],
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = &image_info,
|
||||
.pBufferInfo = nullptr,
|
||||
.pTexelBufferView = nullptr,
|
||||
};
|
||||
|
||||
VkWriteDescriptorSet output_write{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_sets[image_index * 2],
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.pImageInfo = &fsr_image_info,
|
||||
.pBufferInfo = nullptr,
|
||||
.pTexelBufferView = nullptr,
|
||||
};
|
||||
|
||||
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
|
||||
|
||||
sampler_write.dstSet = descriptor_sets[image_index * 2 + 1];
|
||||
sampler_write.pImageInfo = &fsr_image_info;
|
||||
output_write.dstSet = descriptor_sets[image_index * 2 + 1];
|
||||
output_write.pImageInfo = &blit_image_info;
|
||||
|
||||
device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {});
|
||||
}
|
||||
|
||||
void FSR::CreateSampler() {
|
||||
const VkSamplerCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.magFilter = VK_FILTER_LINEAR,
|
||||
.minFilter = VK_FILTER_LINEAR,
|
||||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.mipLodBias = 0.0f,
|
||||
.anisotropyEnable = VK_FALSE,
|
||||
.maxAnisotropy = 0.0f,
|
||||
.compareEnable = VK_FALSE,
|
||||
.compareOp = VK_COMPARE_OP_NEVER,
|
||||
.minLod = 0.0f,
|
||||
.maxLod = 0.0f,
|
||||
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK,
|
||||
.unnormalizedCoordinates = VK_FALSE,
|
||||
};
|
||||
|
||||
sampler = device.GetLogical().CreateSampler(ci);
|
||||
}
|
||||
|
||||
void FSR::CreateShaders() {
|
||||
if (device.IsFloat16Supported()) {
|
||||
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
|
||||
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
|
||||
} else {
|
||||
easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
|
||||
rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
|
||||
}
|
||||
}
|
||||
|
||||
void FSR::CreatePipeline() {
|
||||
VkPipelineShaderStageCreateInfo shader_stage_easu{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *easu_shader,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
};
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stage_rcas{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *rcas_shader,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_ci_easu{
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = shader_stage_easu,
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_ci_rcas{
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = shader_stage_rcas,
|
||||
.layout = *pipeline_layout,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
};
|
||||
|
||||
easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu);
|
||||
rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
@ -0,0 +1,54 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class VKScheduler;
|
||||
|
||||
class FSR {
|
||||
public:
|
||||
explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
|
||||
VkExtent2D output_size);
|
||||
VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
|
||||
VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
|
||||
|
||||
private:
|
||||
void CreateDescriptorPool();
|
||||
void CreateDescriptorSetLayout();
|
||||
void CreateDescriptorSets();
|
||||
void CreateImages();
|
||||
void CreateSampler();
|
||||
void CreateShaders();
|
||||
void CreatePipeline();
|
||||
void CreatePipelineLayout();
|
||||
|
||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
size_t image_count;
|
||||
VkExtent2D output_size;
|
||||
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::DescriptorSetLayout descriptor_set_layout;
|
||||
vk::DescriptorSets descriptor_sets;
|
||||
vk::PipelineLayout pipeline_layout;
|
||||
vk::ShaderModule easu_shader;
|
||||
vk::ShaderModule rcas_shader;
|
||||
vk::Pipeline easu_pipeline;
|
||||
vk::Pipeline rcas_pipeline;
|
||||
vk::Sampler sampler;
|
||||
std::vector<vk::Image> images;
|
||||
std::vector<vk::ImageView> image_views;
|
||||
std::vector<MemoryCommit> buffer_commits;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue