Compare commits

...

4 Commits

Author SHA1 Message Date
Ikko Eltociear Ashimine
f8ec878796 Fix typo in TextureBindingsManager.cs (#4798)
accomodate -> accommodate
2023-05-05 22:17:36 +02:00
Skyth (Asilkan)
9ff21f9ab6 Use ToLowerInvariant when detecting GPU vendor. (#4815) 2023-05-05 16:35:59 +00:00
gdkchan
aa021085cf Allow any shader SSBO constant buffer slot and offset (#2237)
* Allow any shader SSBO constant buffer slot and offset

* Fix slot value passed to SetUsedStorageBuffer on fallback case

* Shader cache version

* Ensure that the storage buffer source constant buffer offset is word aligned

* Fix FirstBinding on GetUniformBufferDescriptors
2023-05-05 14:20:20 +00:00
riperiperi
1f5d881860 GPU: Allow granular buffer updates from the constant buffer updater (#4749)
* GPU: Allow granular buffer updates from the constant buffer updater

Sometimes, constant buffer updates can't be avoided, either due to a cb0 access that cannot be eliminated, or the game updating a buffer between draws to the detriment of everyone.

To avoid uploading the full 4096 bytes each time, this PR remembers the offset and size containing all constant buffer updates since the last sync. It will then upload that range after sync.

* Allow clearing the dirty range

* Always use precise

Might want to not do this if distance between the existing range and new one is too high.

* Use old force dirty mechanism when distance between regions is too great

* Update src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Fix inheritance of _dirtyStart and _dirtyEnd

---------

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2023-05-05 13:47:15 +00:00
13 changed files with 303 additions and 62 deletions

View File

@@ -157,11 +157,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(sb.SbCbSlot);
sbDescAddress += (ulong)sb.SbCbOffset * 4;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);

View File

@@ -351,11 +351,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, 0);
int sbDescOffset = 0x110 + stage * 0x100 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
ulong sbDescAddress = _channel.BufferManager.GetGraphicsUniformBufferAddress(stage, sb.SbCbSlot);
sbDescAddress += (ulong)sb.SbCbOffset * 4;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);

View File

@@ -537,7 +537,7 @@ namespace Ryujinx.Graphics.Gpu.Image
if (hostTexture != null && texture.Target == Target.TextureBuffer)
{
// Ensure that the buffer texture is using the correct buffer as storage.
// Buffers are frequently re-created to accomodate larger data, so we need to re-bind
// Buffers are frequently re-created to accommodate larger data, so we need to re-bind
// to ensure we're not using a old buffer that was already deleted.
_channel.BufferManager.SetBufferTextureStorage(stage, hostTexture, texture.Range.GetSubRange(0).Address, texture.Size, bindingInfo, bindingInfo.Format, false);
@@ -666,7 +666,7 @@ namespace Ryujinx.Graphics.Gpu.Image
if (hostTexture != null && texture.Target == Target.TextureBuffer)
{
// Ensure that the buffer texture is using the correct buffer as storage.
// Buffers are frequently re-created to accomodate larger data, so we need to re-bind
// Buffers are frequently re-created to accommodate larger data, so we need to re-bind
// to ensure we're not using a old buffer that was already deleted.
Format format = bindingInfo.Format;
@@ -879,4 +879,4 @@ namespace Ryujinx.Graphics.Gpu.Image
Array.Clear(_imageState);
}
}
}
}

View File

@@ -68,6 +68,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
private int _referenceCount = 1;
private ulong _dirtyStart = ulong.MaxValue;
private ulong _dirtyEnd = ulong.MaxValue;
/// <summary>
/// Creates a new instance of the buffer.
/// </summary>
@@ -221,6 +224,26 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
_sequenceNumber = _context.SequenceNumber;
_dirtyStart = ulong.MaxValue;
}
}
if (_dirtyStart != ulong.MaxValue)
{
ulong end = address + size;
if (end > _dirtyStart && address < _dirtyEnd)
{
if (_modifiedRanges != null)
{
_modifiedRanges.ExcludeModifiedRegions(_dirtyStart, _dirtyEnd - _dirtyStart, _loadDelegate);
}
else
{
LoadRegion(_dirtyStart, _dirtyEnd - _dirtyStart);
}
_dirtyStart = ulong.MaxValue;
}
}
}
@@ -291,7 +314,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
/// Inherit modified ranges from another buffer.
/// Inherit modified and dirty ranges from another buffer.
/// </summary>
/// <param name="from">The buffer to inherit from</param>
public void InheritModifiedRanges(Buffer from)
@@ -320,6 +343,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
_modifiedRanges.InheritRanges(from._modifiedRanges, registerRangeAction);
}
if (from._dirtyStart != ulong.MaxValue)
{
ForceDirty(from._dirtyStart, from._dirtyEnd - from._dirtyStart);
}
}
/// <summary>
@@ -338,6 +366,44 @@ namespace Ryujinx.Graphics.Gpu.Memory
return false;
}
/// <summary>
/// Clear the dirty range that overlaps with the given region.
/// </summary>
/// <param name="address">Start address of the modified region</param>
/// <param name="size">Size of the modified region</param>
private void ClearDirty(ulong address, ulong size)
{
if (_dirtyStart != ulong.MaxValue)
{
ulong end = address + size;
if (end > _dirtyStart && address < _dirtyEnd)
{
if (address <= _dirtyStart)
{
// Cut off the start.
if (end < _dirtyEnd)
{
_dirtyStart = end;
}
else
{
_dirtyStart = ulong.MaxValue;
}
}
else if (end >= _dirtyEnd)
{
// Cut off the end.
_dirtyEnd = address;
}
// If fully contained, do nothing.
}
}
}
/// <summary>
/// Indicate that a region of the buffer was modified, and must be loaded from memory.
/// </summary>
@@ -357,6 +423,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
mSize = maxSize;
}
ClearDirty(mAddress, mSize);
if (_modifiedRanges != null)
{
_modifiedRanges.ExcludeModifiedRegions(mAddress, mSize, _loadDelegate);
@@ -380,14 +448,12 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
/// <summary>
/// Force a region of the buffer to be dirty. Avoids reprotection and nullifies sequence number check.
/// Force a region of the buffer to be dirty within the memory tracking. Avoids reprotection and nullifies sequence number check.
/// </summary>
/// <param name="mAddress">Start address of the modified region</param>
/// <param name="mSize">Size of the region to force dirty</param>
public void ForceDirty(ulong mAddress, ulong mSize)
private void ForceTrackingDirty(ulong mAddress, ulong mSize)
{
_modifiedRanges?.Clear(mAddress, mSize);
if (_useGranular)
{
_memoryTrackingGranular.ForceDirty(mAddress, mSize);
@@ -399,6 +465,39 @@ namespace Ryujinx.Graphics.Gpu.Memory
}
}
/// <summary>
/// Force a region of the buffer to be dirty. Avoids reprotection and nullifies sequence number check.
/// </summary>
/// <param name="mAddress">Start address of the modified region</param>
/// <param name="mSize">Size of the region to force dirty</param>
public void ForceDirty(ulong mAddress, ulong mSize)
{
_modifiedRanges?.Clear(mAddress, mSize);
ulong end = mAddress + mSize;
if (_dirtyStart == ulong.MaxValue)
{
_dirtyStart = mAddress;
_dirtyEnd = end;
}
else
{
// Is the new range more than a page away from the existing one?
if ((long)(mAddress - _dirtyEnd) >= (long)MemoryManager.PageSize ||
(long)(_dirtyStart - end) >= (long)MemoryManager.PageSize)
{
ForceTrackingDirty(mAddress, mSize);
}
else
{
_dirtyStart = Math.Min(_dirtyStart, mAddress);
_dirtyEnd = Math.Max(_dirtyEnd, end);
}
}
}
/// <summary>
/// Performs copy of all the buffer data from one buffer to another.
/// </summary>

View File

@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 4735;
private const uint CodeGenVersion = 2237;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View File

@@ -104,7 +104,7 @@ namespace Ryujinx.Graphics.OpenGL
private static GpuVendor GetGpuVendor()
{
string vendor = GL.GetString(StringName.Vendor).ToLower();
string vendor = GL.GetString(StringName.Vendor).ToLowerInvariant();
if (vendor == "nvidia corporation")
{
@@ -112,7 +112,7 @@ namespace Ryujinx.Graphics.OpenGL
}
else if (vendor == "intel")
{
string renderer = GL.GetString(StringName.Renderer).ToLower();
string renderer = GL.GetString(StringName.Renderer).ToLowerInvariant();
return renderer.Contains("mesa") ? GpuVendor.IntelUnix : GpuVendor.IntelWindows;
}

View File

@@ -5,13 +5,27 @@ namespace Ryujinx.Graphics.Shader
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
public readonly int Binding;
public readonly int Slot;
public readonly byte Slot;
public readonly byte SbCbSlot;
public readonly ushort SbCbOffset;
public BufferUsageFlags Flags;
public BufferDescriptor(int binding, int slot)
{
Binding = binding;
Slot = slot;
Slot = (byte)slot;
SbCbSlot = 0;
SbCbOffset = 0;
Flags = BufferUsageFlags.None;
}
public BufferDescriptor(int binding, int slot, int sbCbSlot, int sbCbOffset)
{
Binding = binding;
Slot = (byte)slot;
SbCbSlot = (byte)sbCbSlot;
SbCbOffset = (ushort)sbCbOffset;
Flags = BufferUsageFlags.None;
}

View File

@@ -6,7 +6,7 @@ namespace Ryujinx.Graphics.Shader
/// Flags that indicate how a buffer will be used in a shader.
/// </summary>
[Flags]
public enum BufferUsageFlags
public enum BufferUsageFlags : byte
{
None = 0,

View File

@@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Translation
public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
public const int UbeFirstCbuf = 8;
public const int DriverReservedCb = 0;
public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
{
return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||

View File

@@ -8,6 +8,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class GlobalToStorage
{
private struct SearchResult
{
public static SearchResult NotFound => new SearchResult(-1, 0);
public bool Found => SbCbSlot != -1;
public int SbCbSlot { get; }
public int SbCbOffset { get; }
public SearchResult(int sbCbSlot, int sbCbOffset)
{
SbCbSlot = sbCbSlot;
SbCbOffset = sbCbOffset;
}
}
public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
{
int sbStart = GetStorageBaseCbOffset(config.Stage);
@@ -49,30 +63,33 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
Operand source = operation.GetSource(0);
int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
if (storageIndex >= 0)
var result = SearchForStorageBase(config, block, source);
if (!result.Found)
{
// Storage buffers are implemented using global memory access.
// If we know from where the base address of the access is loaded,
// we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage
// buffer access.
node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
continue;
}
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
if (config.Stage == ShaderStage.Compute &&
operation.Inst == Instruction.LoadGlobal &&
result.SbCbSlot == DriverReservedCb &&
result.SbCbOffset >= UbeBaseOffset &&
result.SbCbOffset < UbeBaseOffset + UbeDescsSize)
{
// Here we effectively try to replace a LDG instruction with LDC.
// The hardware only supports a limited amount of constant buffers
// so NVN "emulates" more constant buffers using global memory access.
// Here we try to replace the global access back to a constant buffer
// load.
storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd);
if (storageIndex >= 0)
{
node = ReplaceLdgWithLdc(node, config, storageIndex);
}
node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize);
}
else
{
// Storage buffers are implemented using global memory access.
// If we know from where the base address of the access is loaded,
// we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage
// buffer access.
node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset));
}
}
}
@@ -159,7 +176,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (byteOffset == null)
{
Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
(int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex);
Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset);
Operand baseAddrTrunc = Local();
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
@@ -360,20 +379,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return node;
}
private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress)
{
globalAddress = Utils.FindLastOperation(globalAddress, block);
if (globalAddress.Type == OperandType.ConstantBuffer)
{
return GetStorageIndex(globalAddress, sbStart, sbEnd);
return GetStorageIndex(config, globalAddress);
}
Operation operation = globalAddress.AsgOp as Operation;
if (operation == null || operation.Inst != Instruction.Add)
{
return -1;
return SearchResult.NotFound;
}
Operand src1 = operation.GetSource(0);
@@ -382,34 +401,65 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
(src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
{
Operand baseAddr;
if (src1.Type == OperandType.LocalVariable)
{
operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
baseAddr = Utils.FindLastOperation(src1, block);
}
else
{
operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
baseAddr = Utils.FindLastOperation(src2, block);
}
var result = GetStorageIndex(config, baseAddr);
if (result.Found)
{
return result;
}
operation = baseAddr.AsgOp as Operation;
if (operation == null || operation.Inst != Instruction.Add)
{
return -1;
return SearchResult.NotFound;
}
}
var selectedResult = SearchResult.NotFound;
for (int index = 0; index < operation.SourcesCount; index++)
{
Operand source = operation.GetSource(index);
int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
var result = GetStorageIndex(config, source);
if (storageIndex != -1)
// If we already have a result, we give preference to the ones from
// the driver reserved constant buffer, as those are the ones that
// contains the base address.
if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb))
{
return storageIndex;
selectedResult = result;
}
}
return -1;
return selectedResult;
}
private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand)
{
if (operand.Type == OperandType.ConstantBuffer)
{
int slot = operand.GetCbufSlot();
int offset = operand.GetCbufOffset();
if ((offset & 3) == 0)
{
return new SearchResult(slot, offset);
}
}
return SearchResult.NotFound;
}
private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)

View File

@@ -68,7 +68,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
ConstantFolding.RunPass(operation);
Simplification.RunPass(operation);
if (DestIsLocalVar(operation))

View File

@@ -110,9 +110,9 @@ namespace Ryujinx.Graphics.Shader.Translation
Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
{
baseAddrLow = Cbuf(0, cbOffset);
Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
Operand size = Cbuf(0, cbOffset + 2);
baseAddrLow = Cbuf(DriverReservedCb, cbOffset);
Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1);
Operand size = Cbuf(DriverReservedCb, cbOffset + 2);
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
@@ -134,9 +134,10 @@ namespace Ryujinx.Graphics.Shader.Translation
sbUseMask &= ~(1 << slot);
config.SetUsedStorageBuffer(slot, isWrite);
int cbOffset = GetStorageCbOffset(config.Stage, slot);
slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset);
config.SetUsedStorageBuffer(slot, isWrite);
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);

View File

@@ -125,6 +125,9 @@ namespace Ryujinx.Graphics.Shader.Translation
private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
private readonly Dictionary<int, int> _sbSlots;
private readonly Dictionary<int, int> _sbSlotsReverse;
private BufferDescriptor[] _cachedConstantBufferDescriptors;
private BufferDescriptor[] _cachedStorageBufferDescriptors;
private TextureDescriptor[] _cachedTextureDescriptors;
@@ -152,6 +155,9 @@ namespace Ryujinx.Graphics.Shader.Translation
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
_sbSlots = new Dictionary<int, int>();
_sbSlotsReverse = new Dictionary<int, int>();
}
public ShaderConfig(
@@ -770,9 +776,8 @@ namespace Ryujinx.Graphics.Shader.Translation
usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
}
return _cachedConstantBufferDescriptors = GetBufferDescriptors(
return _cachedConstantBufferDescriptors = GetUniformBufferDescriptors(
usedMask,
0,
UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
out _firstConstantBufferBinding,
GpuAccessor.QueryBindingConstantBuffer);
@@ -785,7 +790,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return _cachedStorageBufferDescriptors;
}
return _cachedStorageBufferDescriptors = GetBufferDescriptors(
return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors(
_usedStorageBuffers,
_usedStorageBuffersWrite,
true,
@@ -793,7 +798,48 @@ namespace Ryujinx.Graphics.Shader.Translation
GpuAccessor.QueryBindingStorageBuffer);
}
private static BufferDescriptor[] GetBufferDescriptors(
private static BufferDescriptor[] GetUniformBufferDescriptors(int usedMask, bool isArray, out int firstBinding, Func<int, int> getBindingCallback)
{
firstBinding = 0;
int lastSlot = -1;
bool hasFirstBinding = false;
var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
for (int i = 0; i < descriptors.Length; i++)
{
int slot = BitOperations.TrailingZeroCount(usedMask);
if (isArray)
{
// The next array entries also consumes bindings, even if they are unused.
for (int j = lastSlot + 1; j < slot; j++)
{
int binding = getBindingCallback(j);
if (!hasFirstBinding)
{
firstBinding = binding;
hasFirstBinding = true;
}
}
}
lastSlot = slot;
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
if (!hasFirstBinding)
{
firstBinding = descriptors[i].Binding;
hasFirstBinding = true;
}
usedMask &= ~(1 << slot);
}
return descriptors;
}
private BufferDescriptor[] GetStorageBufferDescriptors(
int usedMask,
int writtenMask,
bool isArray,
@@ -827,7 +873,9 @@ namespace Ryujinx.Graphics.Shader.Translation
lastSlot = slot;
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
(int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot);
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset);
if (!hasFirstBinding)
{
@@ -924,6 +972,40 @@ namespace Ryujinx.Graphics.Shader.Translation
return FindDescriptorIndex(GetImageDescriptors(), texOp);
}
public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset)
{
int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
if (!_sbSlots.TryGetValue(key, out int slot))
{
slot = _sbSlots.Count;
_sbSlots.Add(key, slot);
_sbSlotsReverse.Add(slot, key);
}
return slot;
}
public (int, int) GetSbCbInfo(int slot)
{
if (_sbSlotsReverse.TryGetValue(slot, out int key))
{
return UnpackSbCbInfo(key);
}
throw new ArgumentException($"Invalid slot {slot}.", nameof(slot));
}
private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
{
return sbCbOffset | ((int)sbCbSlot << 16);
}
private static (int, int) UnpackSbCbInfo(int key)
{
return ((byte)(key >> 16), (ushort)key);
}
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
{
return new ShaderProgramInfo(