Compare commits

...

5 Commits

Author SHA1 Message Date
795539bc82 Vulkan: Use staging buffer for temporary constants (#6168)
* Vulkan: Use staging buffer for temporary constants

Helper shaders and post processing effects typically need some parameters to tell them what to do, which we pass via constant buffers that are created and destroyed each time.

This can vary in cost between different Vulkan drivers. It shows up on profiles on mesa and MoltenVK, so it's worth avoiding. Some games only do it once (BlitColor for present), others multiple times. It's also done for post processing filters and FSR upscaling, which creates two buffers.

For mirrors, I added the ability to reserve a range on the staging buffer for use as any type of binding. This PR allows these constant buffers to be instead temporarily allocated on the staging buffer, skipping allocation and buffer management costs entirely.

Two temporary allocations do remain:
- DrawTexture, because it doesn't have access to the command buffer scope
- Index buffer indirect conversion, because one of them is a storage buffer and thus is a little more complicated.

There's a small cost in that the uniform buffer takes up more space due to alignment requirements. At worst that's 256 bytes (on a GTX 1070) but more modern GPUs should have a better time.

Worth testing across different games and post effects to make sure they still work.

* Use temporary buffer for ConvertIndexBufferIndirect

* Simplify alignment passing for now

* Fix shader params length for CopyIncompatibleFormats

* Set data for helpershaders without overlap checks

The data is in the staging buffer, so its usage range is guarded using that.
2024-01-25 19:29:53 +01:00
dd2e851e95 OpenTK (#6143) 2024-01-25 19:25:47 +01:00
2ca70eb9a0 Implement SQSHL (immediate) CPU instruction (#6155)
* Implement SQSHL (immediate) CPU instruction

* Fix test
2024-01-24 23:50:43 +01:00
6575952432 Vulkan: Enumerate Query Pool properly (#6167)
Turns out that ElementAt for Queue<T> runs the default implementation as it doesn't implement IList, which enumerates elements of the queue up to the given index. This code was creating `count` enumerators and iterating way more queue items than it needed to at higher counts. The solution is just to use one enumerator and break out of the loop when we get the count that we need.

3.5% of backend time was being spent _just_ enumerating at the usual spot in SMO.
2024-01-24 19:33:52 -03:00
9a28ba72b1 Use unix timestamps on GetFileTimeStampRaw (#6169) 2024-01-24 19:26:59 -03:00
14 changed files with 407 additions and 91 deletions

View File

@ -28,10 +28,10 @@
<PackageVersion Include="NetCoreServer" Version="7.0.0" />
<PackageVersion Include="NUnit" Version="3.13.3" />
<PackageVersion Include="NUnit3TestAdapter" Version="4.1.0" />
<PackageVersion Include="OpenTK.Core" Version="4.8.1" />
<PackageVersion Include="OpenTK.Graphics" Version="4.8.1" />
<PackageVersion Include="OpenTK.Audio.OpenAL" Version="4.8.1" />
<PackageVersion Include="OpenTK.Windowing.GraphicsLibraryFramework" Version="4.8.1" />
<PackageVersion Include="OpenTK.Core" Version="4.8.2" />
<PackageVersion Include="OpenTK.Graphics" Version="4.8.2" />
<PackageVersion Include="OpenTK.Audio.OpenAL" Version="4.8.2" />
<PackageVersion Include="OpenTK.Windowing.GraphicsLibraryFramework" Version="4.8.2" />
<PackageVersion Include="Ryujinx.Audio.OpenAL.Dependencies" Version="1.21.0.1" />
<PackageVersion Include="Ryujinx.Graphics.Nvdec.Dependencies" Version="5.0.1-build13" />
<PackageVersion Include="Ryujinx.Graphics.Vulkan.Dependencies.MoltenVK" Version="1.2.0" />

View File

@ -517,7 +517,10 @@ namespace ARMeilleure.Decoders
SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, OpCodeSimdShImm.Create);
SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, OpCodeSimdShImm.Create);
SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, OpCodeSimdShImm.Create);
SetA64("010111110>>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Si, InstEmit.Sqshl_Si, OpCodeSimdShImm.Create);
SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, OpCodeSimdReg.Create);
SetA64("0000111100>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Vi, InstEmit.Sqshl_Vi, OpCodeSimdShImm.Create);
SetA64("010011110>>>>xxx011101xxxxxxxxxx", InstName.Sqshl_Vi, InstEmit.Sqshl_Vi, OpCodeSimdShImm.Create);
SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, OpCodeSimdShImm.Create);
SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, OpCodeSimdShImm.Create);
SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, OpCodeSimdShImm.Create);

View File

@ -116,7 +116,7 @@ namespace ARMeilleure.Instructions
}
else if (shift >= eSize)
{
if ((op.RegisterSize == RegisterSize.Simd64))
if (op.RegisterSize == RegisterSize.Simd64)
{
Operand res = context.VectorZeroUpper64(GetVec(op.Rd));
@ -359,6 +359,16 @@ namespace ARMeilleure.Instructions
}
}
public static void Sqshl_Si(ArmEmitterContext context)
{
EmitShlImmOp(context, signedDst: true, ShlRegFlags.Signed | ShlRegFlags.Scalar | ShlRegFlags.Saturating);
}
public static void Sqshl_Vi(ArmEmitterContext context)
{
EmitShlImmOp(context, signedDst: true, ShlRegFlags.Signed | ShlRegFlags.Saturating);
}
public static void Sqshrn_S(ArmEmitterContext context)
{
if (Optimizations.UseAdvSimd)
@ -1593,6 +1603,99 @@ namespace ARMeilleure.Instructions
Saturating = 1 << 3,
}
private static void EmitShlImmOp(ArmEmitterContext context, bool signedDst, ShlRegFlags flags = ShlRegFlags.None)
{
bool scalar = flags.HasFlag(ShlRegFlags.Scalar);
bool signed = flags.HasFlag(ShlRegFlags.Signed);
bool saturating = flags.HasFlag(ShlRegFlags.Saturating);
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
Operand res = context.VectorZero();
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
for (int index = 0; index < elems; index++)
{
Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed);
Operand e = !saturating
? EmitShlImm(context, ne, GetImmShl(op), op.Size)
: EmitShlImmSatQ(context, ne, GetImmShl(op), op.Size, signed, signedDst);
res = EmitVectorInsert(context, res, e, index, op.Size);
}
context.Copy(GetVec(op.Rd), res);
}
private static Operand EmitShlImm(ArmEmitterContext context, Operand op, int shiftLsB, int size)
{
int eSize = 8 << size;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
Operand res = context.AllocateLocal(OperandType.I64);
if (shiftLsB >= eSize)
{
Operand shl = context.ShiftLeft(op, Const(shiftLsB));
context.Copy(res, shl);
}
else
{
Operand zeroL = Const(0L);
context.Copy(res, zeroL);
}
return res;
}
private static Operand EmitShlImmSatQ(ArmEmitterContext context, Operand op, int shiftLsB, int size, bool signedSrc, bool signedDst)
{
int eSize = 8 << size;
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64);
Operand lblEnd = Label();
Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op);
if (shiftLsB >= eSize)
{
context.Copy(res, signedSrc
? EmitSignedSignSatQ(context, op, size)
: EmitUnsignedSignSatQ(context, op, size));
}
else
{
Operand shl = context.ShiftLeft(op, Const(shiftLsB));
if (eSize == 64)
{
Operand sarOrShr = signedSrc
? context.ShiftRightSI(shl, Const(shiftLsB))
: context.ShiftRightUI(shl, Const(shiftLsB));
context.Copy(res, shl);
context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal);
context.Copy(res, signedSrc
? EmitSignedSignSatQ(context, op, size)
: EmitUnsignedSignSatQ(context, op, size));
}
else
{
context.Copy(res, signedSrc
? EmitSignedSrcSatQ(context, shl, size, signedDst)
: EmitUnsignedSrcSatQ(context, shl, size, signedDst));
}
}
context.MarkLabel(lblEnd);
return res;
}
private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None)
{
bool scalar = flags.HasFlag(ShlRegFlags.Scalar);

View File

@ -384,7 +384,9 @@ namespace ARMeilleure.Instructions
Sqrshrn_V,
Sqrshrun_S,
Sqrshrun_V,
Sqshl_Si,
Sqshl_V,
Sqshl_Vi,
Sqshrn_S,
Sqshrn_V,
Sqshrun_S,

View File

@ -4,6 +4,7 @@ using Silk.NET.Vulkan;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using VkBuffer = Silk.NET.Vulkan.Buffer;
using VkFormat = Silk.NET.Vulkan.Format;
@ -384,7 +385,7 @@ namespace Ryujinx.Graphics.Vulkan
var baseData = new Span<byte>((void*)(_map + offset), size);
var modData = _pendingData.AsSpan(offset, size);
StagingBufferReserved? newMirror = _gd.BufferManager.StagingBuffer.TryReserveData(cbs, size, (int)_gd.Capabilities.MinResourceAlignment);
StagingBufferReserved? newMirror = _gd.BufferManager.StagingBuffer.TryReserveData(cbs, size);
if (newMirror != null)
{
@ -838,6 +839,11 @@ namespace Ryujinx.Graphics.Vulkan
}
}
public unsafe void SetDataUnchecked<T>(int offset, ReadOnlySpan<T> data) where T : unmanaged
{
SetDataUnchecked(offset, MemoryMarshal.AsBytes(data));
}
public void SetDataInline(CommandBufferScoped cbs, Action endRenderPass, int dstOffset, ReadOnlySpan<byte> data)
{
if (!TryPushData(cbs, endRenderPass, dstOffset, data))

View File

@ -9,6 +9,36 @@ using VkFormat = Silk.NET.Vulkan.Format;
namespace Ryujinx.Graphics.Vulkan
{
readonly struct ScopedTemporaryBuffer : IDisposable
{
private readonly BufferManager _bufferManager;
private readonly bool _isReserved;
public readonly BufferRange Range;
public readonly BufferHolder Holder;
public BufferHandle Handle => Range.Handle;
public int Offset => Range.Offset;
public ScopedTemporaryBuffer(BufferManager bufferManager, BufferHolder holder, BufferHandle handle, int offset, int size, bool isReserved)
{
_bufferManager = bufferManager;
Range = new BufferRange(handle, offset, size);
Holder = holder;
_isReserved = isReserved;
}
public void Dispose()
{
if (!_isReserved)
{
_bufferManager.Delete(Range.Handle);
}
}
}
class BufferManager : IDisposable
{
public const MemoryPropertyFlags DefaultBufferMemoryFlags =
@ -238,6 +268,23 @@ namespace Ryujinx.Graphics.Vulkan
return Unsafe.As<ulong, BufferHandle>(ref handle64);
}
public ScopedTemporaryBuffer ReserveOrCreate(VulkanRenderer gd, CommandBufferScoped cbs, int size)
{
StagingBufferReserved? result = StagingBuffer.TryReserveData(cbs, size);
if (result.HasValue)
{
return new ScopedTemporaryBuffer(this, result.Value.Buffer, StagingBuffer.Handle, result.Value.Offset, result.Value.Size, true);
}
else
{
// Create a temporary buffer.
BufferHandle handle = CreateWithHandle(gd, size, out BufferHolder holder);
return new ScopedTemporaryBuffer(this, holder, handle, 0, size, false);
}
}
public unsafe MemoryRequirements GetHostImportedUsageRequirements(VulkanRenderer gd)
{
var usage = HostImportedBufferUsageFlags;
@ -635,13 +682,14 @@ namespace Ryujinx.Graphics.Vulkan
{
if (disposing)
{
StagingBuffer.Dispose();
foreach (BufferHolder buffer in _buffers)
{
buffer.Dispose();
}
_buffers.Clear();
StagingBuffer.Dispose();
}
}

View File

@ -142,19 +142,18 @@ namespace Ryujinx.Graphics.Vulkan.Effects
};
int rangeSize = dimensionsBuffer.Length * sizeof(float);
var bufferHandle = _renderer.BufferManager.CreateWithHandle(_renderer, rangeSize);
_renderer.BufferManager.SetData(bufferHandle, 0, dimensionsBuffer);
using var buffer = _renderer.BufferManager.ReserveOrCreate(_renderer, cbs, rangeSize);
buffer.Holder.SetDataUnchecked(buffer.Offset, dimensionsBuffer);
ReadOnlySpan<float> sharpeningBuffer = stackalloc float[] { 1.5f - (Level * 0.01f * 1.5f) };
var sharpeningBufferHandle = _renderer.BufferManager.CreateWithHandle(_renderer, sizeof(float));
_renderer.BufferManager.SetData(sharpeningBufferHandle, 0, sharpeningBuffer);
ReadOnlySpan<float> sharpeningBufferData = stackalloc float[] { 1.5f - (Level * 0.01f * 1.5f) };
using var sharpeningBuffer = _renderer.BufferManager.ReserveOrCreate(_renderer, cbs, sizeof(float));
sharpeningBuffer.Holder.SetDataUnchecked(sharpeningBuffer.Offset, sharpeningBufferData);
int threadGroupWorkRegionDim = 16;
int dispatchX = (width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
int dispatchY = (height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
var bufferRanges = new BufferRange(bufferHandle, 0, rangeSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, bufferRanges) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) });
_pipeline.SetImage(0, _intermediaryTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format));
_pipeline.DispatchCompute(dispatchX, dispatchY, 1);
_pipeline.ComputeBarrier();
@ -162,16 +161,12 @@ namespace Ryujinx.Graphics.Vulkan.Effects
// Sharpening pass
_pipeline.SetProgram(_sharpeningProgram);
_pipeline.SetTextureAndSampler(ShaderStage.Compute, 1, _intermediaryTexture, _sampler);
var sharpeningRange = new BufferRange(sharpeningBufferHandle, 0, sizeof(float));
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(4, sharpeningRange) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(4, sharpeningBuffer.Range) });
_pipeline.SetImage(0, destinationTexture);
_pipeline.DispatchCompute(dispatchX, dispatchY, 1);
_pipeline.ComputeBarrier();
_pipeline.Finish();
_renderer.BufferManager.Delete(bufferHandle);
_renderer.BufferManager.Delete(sharpeningBufferHandle);
}
}
}

View File

@ -66,12 +66,11 @@ namespace Ryujinx.Graphics.Vulkan.Effects
ReadOnlySpan<float> resolutionBuffer = stackalloc float[] { view.Width, view.Height };
int rangeSize = resolutionBuffer.Length * sizeof(float);
var bufferHandle = _renderer.BufferManager.CreateWithHandle(_renderer, rangeSize);
using var buffer = _renderer.BufferManager.ReserveOrCreate(_renderer, cbs, rangeSize);
_renderer.BufferManager.SetData(bufferHandle, 0, resolutionBuffer);
buffer.Holder.SetDataUnchecked(buffer.Offset, resolutionBuffer);
var bufferRanges = new BufferRange(bufferHandle, 0, rangeSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, bufferRanges) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) });
var dispatchX = BitUtils.DivRoundUp(view.Width, IPostProcessingEffect.LocalGroupSize);
var dispatchY = BitUtils.DivRoundUp(view.Height, IPostProcessingEffect.LocalGroupSize);
@ -79,7 +78,6 @@ namespace Ryujinx.Graphics.Vulkan.Effects
_pipeline.SetImage(0, _texture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format));
_pipeline.DispatchCompute(dispatchX, dispatchY, 1);
_renderer.BufferManager.Delete(bufferHandle);
_pipeline.ComputeBarrier();
_pipeline.Finish();

View File

@ -215,11 +215,10 @@ namespace Ryujinx.Graphics.Vulkan.Effects
ReadOnlySpan<float> resolutionBuffer = stackalloc float[] { view.Width, view.Height };
int rangeSize = resolutionBuffer.Length * sizeof(float);
var bufferHandle = _renderer.BufferManager.CreateWithHandle(_renderer, rangeSize);
using var buffer = _renderer.BufferManager.ReserveOrCreate(_renderer, cbs, rangeSize);
_renderer.BufferManager.SetData(bufferHandle, 0, resolutionBuffer);
var bufferRanges = new BufferRange(bufferHandle, 0, rangeSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, bufferRanges) });
buffer.Holder.SetDataUnchecked(buffer.Offset, resolutionBuffer);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(2, buffer.Range) });
_pipeline.SetImage(0, _edgeOutputTexture, FormatTable.ConvertRgba8SrgbToUnorm(view.Info.Format));
_pipeline.DispatchCompute(dispatchX, dispatchY, 1);
_pipeline.ComputeBarrier();
@ -245,8 +244,6 @@ namespace Ryujinx.Graphics.Vulkan.Effects
_pipeline.Finish();
_renderer.BufferManager.Delete(bufferHandle);
return _outputTexture;
}

View File

@ -430,11 +430,11 @@ namespace Ryujinx.Graphics.Vulkan
(region[2], region[3]) = (region[3], region[2]);
}
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, RegionBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, RegionBufferSize);
gd.BufferManager.SetData<float>(bufferHandle, 0, region);
buffer.Holder.SetDataUnchecked<float>(buffer.Offset, region);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, new BufferRange(bufferHandle, 0, RegionBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, buffer.Range) });
Span<Viewport> viewports = stackalloc Viewport[1];
@ -490,8 +490,6 @@ namespace Ryujinx.Graphics.Vulkan
}
_pipeline.Finish(gd, cbs);
gd.BufferManager.Delete(bufferHandle);
}
private void BlitDepthStencil(
@ -527,11 +525,11 @@ namespace Ryujinx.Graphics.Vulkan
(region[2], region[3]) = (region[3], region[2]);
}
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, RegionBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, RegionBufferSize);
gd.BufferManager.SetData<float>(bufferHandle, 0, region);
buffer.Holder.SetDataUnchecked<float>(buffer.Offset, region);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, new BufferRange(bufferHandle, 0, RegionBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, buffer.Range) });
Span<Viewport> viewports = stackalloc Viewport[1];
@ -582,8 +580,6 @@ namespace Ryujinx.Graphics.Vulkan
}
_pipeline.Finish(gd, cbs);
gd.BufferManager.Delete(bufferHandle);
}
private static TextureView CreateDepthOrStencilView(TextureView depthStencilTexture, DepthStencilMode depthStencilMode)
@ -681,11 +677,11 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetCommandBuffer(cbs);
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ClearColorBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ClearColorBufferSize);
gd.BufferManager.SetData(bufferHandle, 0, clearColor);
buffer.Holder.SetDataUnchecked(buffer.Offset, clearColor);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, new BufferRange(bufferHandle, 0, ClearColorBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, buffer.Range) });
Span<Viewport> viewports = stackalloc Viewport[1];
@ -721,8 +717,6 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
_pipeline.Draw(4, 1, 0, 0);
_pipeline.Finish();
gd.BufferManager.Delete(bufferHandle);
}
public void Clear(
@ -745,11 +739,11 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetCommandBuffer(cbs);
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ClearColorBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ClearColorBufferSize);
gd.BufferManager.SetData<float>(bufferHandle, 0, stackalloc float[] { depthValue });
buffer.Holder.SetDataUnchecked<float>(buffer.Offset, stackalloc float[] { depthValue });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, new BufferRange(bufferHandle, 0, ClearColorBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, buffer.Range) });
Span<Viewport> viewports = stackalloc Viewport[1];
@ -771,8 +765,6 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetStencilTest(CreateStencilTestDescriptor(stencilMask != 0, stencilValue, 0xff, stencilMask));
_pipeline.Draw(4, 1, 0, 0);
_pipeline.Finish();
gd.BufferManager.Delete(bufferHandle);
}
public void DrawTexture(
@ -878,13 +870,13 @@ namespace Ryujinx.Graphics.Vulkan
shaderParams[2] = size;
shaderParams[3] = srcOffset;
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
gd.BufferManager.SetData<int>(bufferHandle, 0, shaderParams);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
_pipeline.SetCommandBuffer(cbs);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(bufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, buffer.Range) });
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
@ -896,8 +888,6 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetProgram(_programStrideChange);
_pipeline.DispatchCompute(1 + elems / ConvertElementsPerWorkgroup, 1, 1);
gd.BufferManager.Delete(bufferHandle);
_pipeline.Finish(gd, cbs);
}
else
@ -1025,7 +1015,7 @@ namespace Ryujinx.Graphics.Vulkan
{
const int ParamsBufferSize = 4;
Span<int> shaderParams = stackalloc int[sizeof(int)];
Span<int> shaderParams = stackalloc int[ParamsBufferSize / sizeof(int)];
int srcBpp = src.Info.BytesPerPixel;
int dstBpp = dst.Info.BytesPerPixel;
@ -1034,9 +1024,9 @@ namespace Ryujinx.Graphics.Vulkan
shaderParams[0] = BitOperations.Log2((uint)ratio);
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
gd.BufferManager.SetData<int>(bufferHandle, 0, shaderParams);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
TextureView.InsertImageBarrier(
gd.Api,
@ -1064,7 +1054,7 @@ namespace Ryujinx.Graphics.Vulkan
var srcFormat = GetFormat(componentSize, srcBpp / componentSize);
var dstFormat = GetFormat(componentSize, dstBpp / componentSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(bufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, buffer.Range) });
for (int l = 0; l < levels; l++)
{
@ -1093,8 +1083,6 @@ namespace Ryujinx.Graphics.Vulkan
}
}
gd.BufferManager.Delete(bufferHandle);
_pipeline.Finish(gd, cbs);
TextureView.InsertImageBarrier(
@ -1128,9 +1116,9 @@ namespace Ryujinx.Graphics.Vulkan
(shaderParams[0], shaderParams[1]) = GetSampleCountXYLog2(samples);
(shaderParams[2], shaderParams[3]) = GetSampleCountXYLog2((int)TextureStorage.ConvertToSampleCountFlags(gd.Capabilities.SupportedSampleCounts, (uint)samples));
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
gd.BufferManager.SetData<int>(bufferHandle, 0, shaderParams);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
TextureView.InsertImageBarrier(
gd.Api,
@ -1147,7 +1135,7 @@ namespace Ryujinx.Graphics.Vulkan
1);
_pipeline.SetCommandBuffer(cbs);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(bufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, buffer.Range) });
if (isDepthOrStencil)
{
@ -1226,8 +1214,6 @@ namespace Ryujinx.Graphics.Vulkan
}
}
gd.BufferManager.Delete(bufferHandle);
_pipeline.Finish(gd, cbs);
TextureView.InsertImageBarrier(
@ -1261,9 +1247,9 @@ namespace Ryujinx.Graphics.Vulkan
(shaderParams[0], shaderParams[1]) = GetSampleCountXYLog2(samples);
(shaderParams[2], shaderParams[3]) = GetSampleCountXYLog2((int)TextureStorage.ConvertToSampleCountFlags(gd.Capabilities.SupportedSampleCounts, (uint)samples));
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
gd.BufferManager.SetData<int>(bufferHandle, 0, shaderParams);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
TextureView.InsertImageBarrier(
gd.Api,
@ -1299,7 +1285,7 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetViewports(viewports);
_pipeline.SetPrimitiveTopology(PrimitiveTopology.TriangleStrip);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(bufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, buffer.Range) });
if (isDepthOrStencil)
{
@ -1364,8 +1350,6 @@ namespace Ryujinx.Graphics.Vulkan
}
}
gd.BufferManager.Delete(bufferHandle);
_pipeline.Finish(gd, cbs);
TextureView.InsertImageBarrier(
@ -1616,10 +1600,11 @@ namespace Ryujinx.Graphics.Vulkan
pattern.OffsetIndex.CopyTo(shaderParams[..pattern.OffsetIndex.Length]);
var patternBufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize, out var patternBuffer);
using var patternScoped = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
var patternBuffer = patternScoped.Holder;
var patternBufferAuto = patternBuffer.GetBuffer();
gd.BufferManager.SetData<int>(patternBufferHandle, 0, shaderParams);
patternBuffer.SetDataUnchecked<int>(patternScoped.Offset, shaderParams);
_pipeline.SetCommandBuffer(cbs);
@ -1635,7 +1620,8 @@ namespace Ryujinx.Graphics.Vulkan
indirectDataSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, drawCountBufferAligned) });
_pipeline.SetStorageBuffers(1, new[] { srcIndirectBuffer.GetBuffer(), dstIndirectBuffer.GetBuffer(), patternBuffer.GetBuffer() });
_pipeline.SetStorageBuffers(1, new[] { srcIndirectBuffer.GetBuffer(), dstIndirectBuffer.GetBuffer() });
_pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(3, patternScoped.Range) });
_pipeline.SetProgram(_programConvertIndirectData);
_pipeline.DispatchCompute(1, 1, 1);
@ -1643,12 +1629,12 @@ namespace Ryujinx.Graphics.Vulkan
BufferHolder.InsertBufferBarrier(
gd,
cbs.CommandBuffer,
patternBufferAuto.Get(cbs, ParamsIndirectDispatchOffset, ParamsIndirectDispatchSize).Value,
patternBufferAuto.Get(cbs, patternScoped.Offset + ParamsIndirectDispatchOffset, ParamsIndirectDispatchSize).Value,
AccessFlags.ShaderWriteBit,
AccessFlags.IndirectCommandReadBit,
PipelineStageFlags.ComputeShaderBit,
PipelineStageFlags.DrawIndirectBit,
ParamsIndirectDispatchOffset,
patternScoped.Offset + ParamsIndirectDispatchOffset,
ParamsIndirectDispatchSize);
BufferHolder.InsertBufferBarrier(
@ -1662,11 +1648,11 @@ namespace Ryujinx.Graphics.Vulkan
0,
convertedCount * outputIndexSize);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(patternBufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(patternScoped.Handle, patternScoped.Offset, ParamsBufferSize)) });
_pipeline.SetStorageBuffers(1, new[] { srcIndexBuffer.GetBuffer(), dstIndexBuffer.GetBuffer() });
_pipeline.SetProgram(_programConvertIndexBuffer);
_pipeline.DispatchComputeIndirect(patternBufferAuto, ParamsIndirectDispatchOffset);
_pipeline.DispatchComputeIndirect(patternBufferAuto, patternScoped.Offset + ParamsIndirectDispatchOffset);
BufferHolder.InsertBufferBarrier(
gd,
@ -1679,8 +1665,6 @@ namespace Ryujinx.Graphics.Vulkan
0,
convertedCount * outputIndexSize);
gd.BufferManager.Delete(patternBufferHandle);
_pipeline.Finish(gd, cbs);
}
@ -1726,13 +1710,13 @@ namespace Ryujinx.Graphics.Vulkan
shaderParams[0] = pixelCount;
shaderParams[1] = dstOffset;
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, ParamsBufferSize);
using var buffer = gd.BufferManager.ReserveOrCreate(gd, cbs, ParamsBufferSize);
gd.BufferManager.SetData<int>(bufferHandle, 0, shaderParams);
buffer.Holder.SetDataUnchecked<int>(buffer.Offset, shaderParams);
_pipeline.SetCommandBuffer(cbs);
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, new BufferRange(bufferHandle, 0, ParamsBufferSize)) });
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, buffer.Range) });
Span<Auto<DisposableBuffer>> sbRanges = new Auto<DisposableBuffer>[2];
@ -1744,8 +1728,6 @@ namespace Ryujinx.Graphics.Vulkan
_pipeline.SetProgram(_programConvertD32S8ToD24S8);
_pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1);
gd.BufferManager.Delete(bufferHandle);
_pipeline.Finish(gd, cbs);
BufferHolder.InsertBufferBarrier(

View File

@ -67,9 +67,18 @@ namespace Ryujinx.Graphics.Vulkan.Queries
lock (_queryPool)
{
count = Math.Min(count, _queryPool.Count);
for (int i = 0; i < count; i++)
if (count > 0)
{
_queryPool.ElementAt(i).PoolReset(cmd, ResetSequence);
foreach (BufferedQuery query in _queryPool)
{
query.PoolReset(cmd, ResetSequence);
if (--count == 0)
{
break;
}
}
}
}
}

View File

@ -1,5 +1,6 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using System;
using System.Collections.Generic;
using System.Diagnostics;
@ -29,6 +30,9 @@ namespace Ryujinx.Graphics.Vulkan
private readonly VulkanRenderer _gd;
private readonly BufferHolder _buffer;
private readonly int _resourceAlignment;
public readonly BufferHandle Handle;
private readonly struct PendingCopy
{
@ -48,9 +52,10 @@ namespace Ryujinx.Graphics.Vulkan
public StagingBuffer(VulkanRenderer gd, BufferManager bufferManager)
{
_gd = gd;
_buffer = bufferManager.Create(gd, BufferSize);
Handle = bufferManager.CreateWithHandle(gd, BufferSize, out _buffer);
_pendingCopies = new Queue<PendingCopy>();
_freeSize = BufferSize;
_resourceAlignment = (int)gd.Capabilities.MinResourceAlignment;
}
public void PushData(CommandBufferPool cbp, CommandBufferScoped? cbs, Action endRenderPass, BufferHolder dst, int dstOffset, ReadOnlySpan<byte> data)
@ -197,7 +202,7 @@ namespace Ryujinx.Graphics.Vulkan
/// Reserve a range on the staging buffer for the current command buffer and upload data to it.
/// </summary>
/// <param name="cbs">Command buffer to reserve the data on</param>
/// <param name="data">The data to upload</param>
/// <param name="size">The minimum size the reserved data requires</param>
/// <param name="alignment">The required alignment for the buffer offset</param>
/// <returns>The reserved range of the staging buffer</returns>
public unsafe StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size, int alignment)
@ -223,6 +228,18 @@ namespace Ryujinx.Graphics.Vulkan
return ReserveDataImpl(cbs, size, alignment);
}
/// <summary>
/// Reserve a range on the staging buffer for the current command buffer and upload data to it.
/// Uses the most permissive byte alignment.
/// </summary>
/// <param name="cbs">Command buffer to reserve the data on</param>
/// <param name="size">The minimum size the reserved data requires</param>
/// <returns>The reserved range of the staging buffer</returns>
public unsafe StagingBufferReserved? TryReserveData(CommandBufferScoped cbs, int size)
{
return TryReserveData(cbs, size, _resourceAlignment);
}
private bool WaitFreeCompleted(CommandBufferPool cbp)
{
if (_pendingCopies.TryPeek(out var pc))
@ -263,7 +280,7 @@ namespace Ryujinx.Graphics.Vulkan
{
if (disposing)
{
_buffer.Dispose();
_gd.BufferManager.Delete(Handle);
while (_pendingCopies.TryDequeue(out var pc))
{

View File

@ -186,7 +186,12 @@ namespace Ryujinx.HLE.FileSystem
public void InitializeFsServer(LibHac.Horizon horizon, out HorizonClient fsServerClient)
{
LocalFileSystem serverBaseFs = new(AppDataManager.BaseDirPath);
LocalFileSystem serverBaseFs = new(useUnixTimeStamps: true);
Result result = serverBaseFs.Initialize(AppDataManager.BaseDirPath, LocalFileSystem.PathMode.DefaultCaseSensitivity, ensurePathExists: true);
if (result.IsFailure())
{
throw new HorizonResultException(result, "Error creating LocalFileSystem.");
}
fsServerClient = horizon.CreatePrivilegedHorizonClient();
var fsServer = new FileSystemServer(fsServerClient);

View File

@ -311,6 +311,46 @@ namespace Ryujinx.Tests.Cpu
};
}
private static uint[] _ShlImm_S_D_()
{
return new[]
{
0x5F407400u, // SQSHL D0, D0, #0
};
}
private static uint[] _ShlImm_V_8B_16B_()
{
return new[]
{
0x0F087400u, // SQSHL V0.8B, V0.8B, #0
};
}
private static uint[] _ShlImm_V_4H_8H_()
{
return new[]
{
0x0F107400u, // SQSHL V0.4H, V0.4H, #0
};
}
private static uint[] _ShlImm_V_2S_4S_()
{
return new[]
{
0x0F207400u, // SQSHL V0.2S, V0.2S, #0
};
}
private static uint[] _ShlImm_V_2D_()
{
return new[]
{
0x4F407400u, // SQSHL V0.2D, V0.2D, #0
};
}
private static uint[] _ShrImm_Sri_S_D_()
{
return new[]
@ -813,6 +853,117 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlImm_S_D([ValueSource(nameof(_ShlImm_S_D_))] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource(nameof(_1D_))] ulong z,
[ValueSource(nameof(_1D_))] ulong a,
[Values(1u, 64u)] uint shift)
{
uint immHb = (64 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0(a);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlImm_V_8B_16B([ValueSource(nameof(_ShlImm_V_8B_16B_))] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource(nameof(_8B_))] ulong z,
[ValueSource(nameof(_8B_))] ulong a,
[Values(1u, 8u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <8B, 16B>
{
uint immHb = (8 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= ((q & 1) << 30);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, a * q);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlImm_V_4H_8H([ValueSource(nameof(_ShlImm_V_4H_8H_))] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource(nameof(_4H_))] ulong z,
[ValueSource(nameof(_4H_))] ulong a,
[Values(1u, 16u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <4H, 8H>
{
uint immHb = (16 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= ((q & 1) << 30);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, a * q);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlImm_V_2S_4S([ValueSource(nameof(_ShlImm_V_2S_4S_))] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource(nameof(_2S_))] ulong z,
[ValueSource(nameof(_2S_))] ulong a,
[Values(1u, 32u)] uint shift,
[Values(0b0u, 0b1u)] uint q) // <2S, 4S>
{
uint immHb = (32 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
opcodes |= (((q | (immHb >> 6)) & 1) << 30);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, a * q);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShlImm_V_2D([ValueSource(nameof(_ShlImm_V_2D_))] uint opcodes,
[Values(0u)] uint rd,
[Values(1u, 0u)] uint rn,
[ValueSource(nameof(_1D_))] ulong z,
[ValueSource(nameof(_1D_))] ulong a,
[Values(1u, 64u)] uint shift)
{
uint immHb = (64 + shift) & 0x7F;
opcodes |= ((rn & 31) << 5) | ((rd & 31) << 0);
opcodes |= (immHb << 16);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, a);
SingleOpcode(opcodes, v0: v0, v1: v1);
CompareAgainstUnicorn();
}
[Test, Pairwise]
public void ShrImm_Sri_S_D([ValueSource(nameof(_ShrImm_Sri_S_D_))] uint opcodes,
[Values(0u)] uint rd,