Compare commits

...

4 Commits

Author SHA1 Message Date
gdkchan
fc26189fe1 Eliminate redundant multiplications by gl_FragCoord.w on the shader (#4578)
* Eliminate redundant multiplications by gl_FragCoord.w on the shader

* Shader cache version bump
2023-05-19 11:52:31 -03:00
dependabot[bot]
a40c90e7dd nuget: bump DynamicData from 7.13.5 to 7.13.8 (#5001)
Bumps [DynamicData](https://github.com/reactiveui/DynamicData) from 7.13.5 to 7.13.8.
- [Release notes](https://github.com/reactiveui/DynamicData/releases)
- [Changelog](https://github.com/reactivemarbles/DynamicData/blob/main/ReleaseNotes.md)
- [Commits](https://github.com/reactiveui/DynamicData/compare/7.13.5...7.13.8)

---
updated-dependencies:
- dependency-name: DynamicData
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-05-19 06:52:44 +02:00
gdkchan
f864a49014 Fix Vulkan blit-like operations swizzle (#5003) 2023-05-18 18:16:03 -03:00
riperiperi
ecbf303266 GPU: Avoid using garbage size for non-cb0 storage buffers (#4999)
* GPU: Avoid using garbage size for non-cb0 storage buffers

In the depths area, Tears of the Kingdom uses a global memory access with address on constant buffer slot 6. This isn't standard and thus doesn't actually have a size 8 bytes after it, so we were reading back a garbage size that ended up very large (at least in version 1.1.0), and would synchronize a lot of data per frame.

This PR makes storage buffers created from addresses outside constant buffer slot 0 get their size as the number of bytes remaining in the GPU mapping starting at the given virtual address. This should bound the buffer to a reasonable size, and ideally stop it crossing into other memory.

* Limit max size

* Add TODO

* Feedback
2023-05-18 08:56:34 +02:00
9 changed files with 187 additions and 10 deletions

View File

@@ -13,7 +13,7 @@
<PackageVersion Include="CommandLineParser" Version="2.9.1" />
<PackageVersion Include="Concentus" Version="1.1.7" />
<PackageVersion Include="DiscordRichPresence" Version="1.1.3.18" />
<PackageVersion Include="DynamicData" Version="7.13.5" />
<PackageVersion Include="DynamicData" Version="7.13.8" />
<PackageVersion Include="FluentAvaloniaUI" Version="1.4.5" />
<PackageVersion Include="GtkSharp.Dependencies" Version="1.1.1" />
<PackageVersion Include="GtkSharp.Dependencies.osx" Version="0.0.5" />

View File

@@ -23,6 +23,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
public const int PrimitiveRestartStateIndex = 12;
public const int RenderTargetStateIndex = 27;
private const ulong MaxUnknownStorageSize = 0x100000;
private readonly GpuContext _context;
private readonly GpuChannel _channel;
private readonly DeviceStateWithShadow<ThreedClassState> _state;
@@ -356,7 +358,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
uint size;
if (sb.SbCbSlot == 0)
{
// Only trust the SbDescriptor size if it comes from slot 0.
size = (uint)sbDescriptor.Size;
}
else
{
// TODO: Use full mapped size and somehow speed up buffer sync.
size = (uint)_channel.MemoryManager.GetMappedSize(sbDescriptor.PackAddress(), MaxUnknownStorageSize);
}
_channel.BufferManager.SetGraphicsStorageBuffer(stage, sb.Slot, sbDescriptor.PackAddress(), size, sb.Flags);
}
}
}

View File

@@ -637,6 +637,33 @@ namespace Ryujinx.Graphics.Gpu.Memory
return UnpackPaFromPte(pte) + (va & PageMask);
}
/// <summary>
/// Translates a GPU virtual address and returns the number of bytes that are mapped after it.
/// </summary>
/// <param name="va">GPU virtual address to be translated</param>
/// <param name="maxSize">Maximum size in bytes to scan</param>
/// <returns>Number of bytes, 0 if unmapped</returns>
public ulong GetMappedSize(ulong va, ulong maxSize)
{
if (!ValidateAddress(va))
{
return 0;
}
ulong startVa = va;
ulong endVa = va + maxSize;
ulong pte = GetPte(va);
while (pte != PteUnmapped && va < endVa)
{
va += PageSize - (va & PageMask);
pte = GetPte(va);
}
return Math.Min(maxSize, va - startVa);
}
/// <summary>
/// Gets the kind of a given memory page.
/// This might indicate the type of resource that can be allocated on the page, and also texture tiling.

View File

@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 4892;
private const uint CodeGenVersion = 4578;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View File

@@ -20,6 +20,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
BindlessToIndexed.RunPass(blocks[blkIndex], config);
BindlessElimination.RunPass(blocks[blkIndex], config);
// FragmentCoord only exists on fragment shaders, so we don't need to check other stages.
if (config.Stage == ShaderStage.Fragment)
{
EliminateMultiplyByFragmentCoordW(blocks[blkIndex]);
}
}
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
@@ -281,6 +287,75 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return modified;
}
private static void EliminateMultiplyByFragmentCoordW(BasicBlock block)
{
foreach (INode node in block.Operations)
{
if (node is Operation operation)
{
EliminateMultiplyByFragmentCoordW(operation);
}
}
}
private static void EliminateMultiplyByFragmentCoordW(Operation operation)
{
// We're looking for the pattern:
// y = x * gl_FragCoord.w
// v = y * (1.0 / gl_FragCoord.w)
// Then we transform it into:
// v = x
// This pattern is common on fragment shaders due to the way how perspective correction is done.
// We are expecting a multiplication by the reciprocal of gl_FragCoord.w.
if (operation.Inst != (Instruction.FP32 | Instruction.Multiply))
{
return;
}
Operand lhs = operation.GetSource(0);
Operand rhs = operation.GetSource(1);
// Check LHS of the the main multiplication operation. We expect an input being multiplied by gl_FragCoord.w.
if (!(lhs.AsgOp is Operation attrMulOp) || attrMulOp.Inst != (Instruction.FP32 | Instruction.Multiply))
{
return;
}
Operand attrMulLhs = attrMulOp.GetSource(0);
Operand attrMulRhs = attrMulOp.GetSource(1);
// LHS should be any input, RHS should be exactly gl_FragCoord.w.
if (!Utils.IsInputLoad(attrMulLhs.AsgOp) || !Utils.IsInputLoad(attrMulRhs.AsgOp, IoVariable.FragmentCoord, 3))
{
return;
}
// RHS of the main multiplication should be a reciprocal operation (1.0 / x).
if (!(rhs.AsgOp is Operation reciprocalOp) || reciprocalOp.Inst != (Instruction.FP32 | Instruction.Divide))
{
return;
}
Operand reciprocalLhs = reciprocalOp.GetSource(0);
Operand reciprocalRhs = reciprocalOp.GetSource(1);
// Check if the divisor is a constant equal to 1.0.
if (reciprocalLhs.Type != OperandType.Constant || reciprocalLhs.AsFloat() != 1.0f)
{
return;
}
// Check if the dividend is gl_FragCoord.w.
if (!Utils.IsInputLoad(reciprocalRhs.AsgOp, IoVariable.FragmentCoord, 3))
{
return;
}
// If everything matches, we can replace the operation with the input load result.
operation.TurnIntoCopy(attrMulLhs);
}
private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
{
// Remove a node from the nodes list, and also remove itself

View File

@@ -4,6 +4,35 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Utils
{
public static bool IsInputLoad(INode node)
{
return (node is Operation operation) &&
operation.Inst == Instruction.Load &&
operation.StorageKind == StorageKind.Input;
}
public static bool IsInputLoad(INode node, IoVariable ioVariable, int elemIndex)
{
if (!(node is Operation operation) ||
operation.Inst != Instruction.Load ||
operation.StorageKind != StorageKind.Input ||
operation.SourcesCount != 2)
{
return false;
}
Operand ioVariableSrc = operation.GetSource(0);
if (ioVariableSrc.Type != OperandType.Constant || (IoVariable)ioVariableSrc.Value != ioVariable)
{
return false;
}
Operand elemIndexSrc = operation.GetSource(1);
return elemIndexSrc.Type == OperandType.Constant && elemIndexSrc.Value == elemIndex;
}
private static Operation FindBranchSource(BasicBlock block)
{
foreach (BasicBlock sourceBlock in block.Predecessors)

View File

@@ -228,7 +228,12 @@ namespace Ryujinx.Graphics.Vulkan
SignalDirty(DirtyFlags.Storage);
}
public void SetTextureAndSampler(CommandBufferScoped cbs, ShaderStage stage, int binding, ITexture texture, ISampler sampler)
public void SetTextureAndSampler(
CommandBufferScoped cbs,
ShaderStage stage,
int binding,
ITexture texture,
ISampler sampler)
{
if (texture is TextureBuffer textureBuffer)
{
@@ -251,6 +256,28 @@ namespace Ryujinx.Graphics.Vulkan
SignalDirty(DirtyFlags.Texture);
}
public void SetTextureAndSamplerIdentitySwizzle(
CommandBufferScoped cbs,
ShaderStage stage,
int binding,
ITexture texture,
ISampler sampler)
{
if (texture is TextureView view)
{
view.Storage.InsertWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, stage.ConvertToPipelineStageFlags());
_textureRefs[binding] = view.GetIdentityImageView();
_samplerRefs[binding] = ((SamplerHolder)sampler)?.GetSampler();
SignalDirty(DirtyFlags.Texture);
}
else
{
SetTextureAndSampler(cbs, stage, binding, texture, sampler);
}
}
public void SetUniformBuffers(CommandBuffer commandBuffer, ReadOnlySpan<BufferAssignment> buffers)
{
for (int i = 0; i < buffers.Length; i++)

View File

@@ -415,7 +415,7 @@ namespace Ryujinx.Graphics.Vulkan
var sampler = linearFilter ? _samplerLinear : _samplerNearest;
_pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, sampler);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Fragment, 0, src, sampler);
Span<float> region = stackalloc float[RegionBufferSize / sizeof(float)];
@@ -625,7 +625,7 @@ namespace Ryujinx.Graphics.Vulkan
private void BlitDepthStencilDraw(TextureView src, bool isDepth)
{
_pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, _samplerNearest);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Fragment, 0, src, _samplerNearest);
if (isDepth)
{
@@ -1037,7 +1037,7 @@ namespace Ryujinx.Graphics.Vulkan
var srcView = Create2DLayerView(src, srcLayer + z, srcLevel + l, srcFormat);
var dstView = Create2DLayerView(dst, dstLayer + z, dstLevel + l);
_pipeline.SetTextureAndSampler(ShaderStage.Compute, 0, srcView, null);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Compute, 0, srcView, null);
_pipeline.SetImage(0, dstView, dstFormat);
int dispatchX = (Math.Min(srcView.Info.Width, dstView.Info.Width) + 31) / 32;
@@ -1177,7 +1177,7 @@ namespace Ryujinx.Graphics.Vulkan
var srcView = Create2DLayerView(src, srcLayer + z, 0, format);
var dstView = Create2DLayerView(dst, dstLayer + z, 0);
_pipeline.SetTextureAndSampler(ShaderStage.Compute, 0, srcView, null);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Compute, 0, srcView, null);
_pipeline.SetImage(0, dstView, format);
_pipeline.DispatchCompute(dispatchX, dispatchY, 1);
@@ -1313,7 +1313,7 @@ namespace Ryujinx.Graphics.Vulkan
var srcView = Create2DLayerView(src, srcLayer + z, 0, format);
var dstView = Create2DLayerView(dst, dstLayer + z, 0);
_pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, srcView, null);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Fragment, 0, srcView, null);
_pipeline.SetRenderTarget(
((TextureView)dstView).GetView(format).GetImageViewForAttachment(),
(uint)dst.Width,
@@ -1384,7 +1384,7 @@ namespace Ryujinx.Graphics.Vulkan
private void CopyMSAspectDraw(TextureView src, bool fromMS, bool isDepth)
{
_pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, _samplerNearest);
_pipeline.SetTextureAndSamplerIdentitySwizzle(ShaderStage.Fragment, 0, src, _samplerNearest);
if (isDepth)
{

View File

@@ -1098,6 +1098,11 @@ namespace Ryujinx.Graphics.Vulkan
_descriptorSetUpdater.SetTextureAndSampler(Cbs, stage, binding, texture, sampler);
}
public void SetTextureAndSamplerIdentitySwizzle(ShaderStage stage, int binding, ITexture texture, ISampler sampler)
{
_descriptorSetUpdater.SetTextureAndSamplerIdentitySwizzle(Cbs, stage, binding, texture, sampler);
}
public void SetTransformFeedbackBuffers(ReadOnlySpan<BufferRange> buffers)
{
PauseTransformFeedbackInternal();