Compare commits

..

3 Commits

Author SHA1 Message Date
TSRBerry
022d495335 am: Stub GetSaveDataSizeMax (#3857)
* am: Stub GetSaveDataSizeMax()

* am: Remove todo comment for GetSaveDataSizeMax()

* am: saveDataSize & journalDataSize should be of type long

* am: Add explanation for returning default values in GetSaveDataSizeMax()
2022-11-18 03:29:01 +00:00
Berkan Diler
c1372ed775 Use ReadOnlySpan<byte> compiler optimization in more places (#3853)
* Use ReadOnlySpan<byte> compiler optimization in more places

* Revert changes in ShaderBinaries.cs

* Remove unused using;

* Use ReadOnlySpan<byte> compiler optimization in more places
2022-11-18 03:10:44 +00:00
riperiperi
a16682cfd3 Allow _volatile to be set from MultiRegionHandle checks again (#3830)
* Allow _volatile to be set from MultiRegionHandle checks again

Tracking handles have a `_volatile` flag which indicates that the resource being tracked is modified every time it is used under a new sequence number. This is used to reduce the time spent reprotecting memory for tracking writes to commonly modified buffers, like constant buffers.

This optimisation works by detecting if a buffer is modified every time a check happens. If a buffer is checked but it is not dirty, then that data is likely not modified every sequence number, and should use memory protection for write tracking. If the opposite is the case all the time, it is faster to just assume it's dirty as we'd just be wasting time protecting the memory.

The new MultiRegionBitmap could not notify handles that they had been checked as part of the fast bitmap lookup, so bindings larger than 4096 bytes wouldn't trigger it at all. This meant that they would be subject to a ton of reprotection if they were modified often.

This does mean there are two separate sources for a _volatile set: VolatileOrDirty + _checkCount, and the bitmap check. These shouldn't interfere with each other, though.

This fixes performance regressions from #3775 in Pokemon Sword, and hopefully Yu-Gi-Oh! RUSH DUEL: Dawn of the Battle Royale. May affect other games.

* Fix stupid mistake
2022-11-18 02:54:20 +00:00
6 changed files with 66 additions and 38 deletions

View File

@@ -1,15 +1,11 @@
using System;
using System.Numerics;
namespace ARMeilleure.Common
{
static class BitUtils
{
private static readonly sbyte[] HbsNibbleLut;
static BitUtils()
{
HbsNibbleLut = new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
}
private static ReadOnlySpan<sbyte> HbsNibbleLut => new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
public static long FillWithOnes(int bits)
{

View File

@@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
short dqv = dq[0];
ReadOnlySpan<byte> cat6Prob = (xd.Bd == 12)
? Luts.Vp9Cat6ProbHigh12
: (xd.Bd == 10) ? new ReadOnlySpan<byte>(Luts.Vp9Cat6ProbHigh12).Slice(2) : Luts.Vp9Cat6Prob;
: (xd.Bd == 10) ? Luts.Vp9Cat6ProbHigh12.Slice(2) : Luts.Vp9Cat6Prob;
int cat6Bits = (xd.Bd == 12) ? 18 : (xd.Bd == 10) ? 16 : 14;
// Keep value, range, and count as locals. The compiler produces better
// results with the locals than using r directly.

View File

@@ -1,14 +1,12 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Nvdec.Vp9.Types;
using System;
namespace Ryujinx.Graphics.Nvdec.Vp9
{
internal static class Luts
{
public static readonly byte[] SizeGroupLookup = new byte[]
{
0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3
};
public static ReadOnlySpan<byte> SizeGroupLookup => new byte[] { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 };
public static readonly BlockSize[][] SubsizeLookup = new BlockSize[][]
{
@@ -1070,18 +1068,18 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
-(sbyte)MvClassType.MvClass10,
};
public static readonly sbyte[] Vp9MvFPTree = new sbyte[] { -0, 2, -1, 4, -2, -3 };
public static ReadOnlySpan<sbyte> Vp9MvFPTree => new sbyte[] { -0, 2, -1, 4, -2, -3 };
// Entropy
public static readonly byte[] Vp9Cat1Prob = new byte[] { 159 };
public static readonly byte[] Vp9Cat2Prob = new byte[] { 165, 145 };
public static readonly byte[] Vp9Cat3Prob = new byte[] { 173, 148, 140 };
public static readonly byte[] Vp9Cat4Prob = new byte[] { 176, 155, 140, 135 };
public static readonly byte[] Vp9Cat5Prob = new byte[] { 180, 157, 141, 134, 130 };
public static readonly byte[] Vp9Cat6Prob = new byte[] { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
public static ReadOnlySpan<byte> Vp9Cat1Prob => new byte[] { 159 };
public static ReadOnlySpan<byte> Vp9Cat2Prob => new byte[] { 165, 145 };
public static ReadOnlySpan<byte> Vp9Cat3Prob => new byte[] { 173, 148, 140 };
public static ReadOnlySpan<byte> Vp9Cat4Prob => new byte[] { 176, 155, 140, 135 };
public static ReadOnlySpan<byte> Vp9Cat5Prob => new byte[] { 180, 157, 141, 134, 130 };
public static ReadOnlySpan<byte> Vp9Cat6Prob => new byte[] { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
public static readonly byte[] Vp9Cat6ProbHigh12 = new byte[]
public static ReadOnlySpan<byte> Vp9Cat6ProbHigh12 => new byte[]
{
255, 255, 255, 255, 254, 254, 54, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
};
@@ -1131,12 +1129,12 @@ namespace Ryujinx.Graphics.Nvdec.Vp9
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
private static readonly byte[] Vp9CoefbandTrans4X4 = new byte[]
private static ReadOnlySpan<byte> Vp9CoefbandTrans4X4 => new byte[]
{
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
};
public static byte[] get_band_translate(TxSize txSize)
public static ReadOnlySpan<byte> get_band_translate(TxSize txSize)
{
return txSize == TxSize.Tx4x4 ? Vp9CoefbandTrans4X4 : Vp9CoefbandTrans8X8Plus;
}

View File

@@ -26,8 +26,8 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletOE.ApplicationProxyService.Applicati
{
class IApplicationFunctions : IpcService
{
private ulong _defaultSaveDataSize = 200000000;
private ulong _defaultJournalSaveDataSize = 200000000;
private long _defaultSaveDataSize = 200000000;
private long _defaultJournalSaveDataSize = 200000000;
private KEvent _gpuErrorDetectedSystemEvent;
private KEvent _friendInvitationStorageChannelEvent;
@@ -203,13 +203,13 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletOE.ApplicationProxyService.Applicati
}
[CommandHipc(25)] // 3.0.0+
// ExtendSaveData(u8 save_data_type, nn::account::Uid, u64 save_size, u64 journal_size) -> u64 result_code
// ExtendSaveData(u8 save_data_type, nn::account::Uid, s64 save_size, s64 journal_size) -> u64 result_code
public ResultCode ExtendSaveData(ServiceCtx context)
{
SaveDataType saveDataType = (SaveDataType)context.RequestData.ReadUInt64();
Uid userId = context.RequestData.ReadStruct<Uid>();
ulong saveDataSize = context.RequestData.ReadUInt64();
ulong journalSize = context.RequestData.ReadUInt64();
long saveDataSize = context.RequestData.ReadInt64();
long journalSize = context.RequestData.ReadInt64();
// NOTE: Service calls nn::fs::ExtendApplicationSaveData.
// Since LibHac currently doesn't support this method, we can stub it for now.
@@ -225,7 +225,7 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletOE.ApplicationProxyService.Applicati
}
[CommandHipc(26)] // 3.0.0+
// GetSaveDataSize(u8 save_data_type, nn::account::Uid) -> (u64 save_size, u64 journal_size)
// GetSaveDataSize(u8 save_data_type, nn::account::Uid) -> (s64 save_size, s64 journal_size)
public ResultCode GetSaveDataSize(ServiceCtx context)
{
SaveDataType saveDataType = (SaveDataType)context.RequestData.ReadUInt64();
@@ -268,6 +268,23 @@ namespace Ryujinx.HLE.HOS.Services.Am.AppletOE.ApplicationProxyService.Applicati
return ResultCode.Success;
}
[CommandHipc(28)] // 11.0.0+
// GetSaveDataSizeMax() -> (s64 save_size_max, s64 journal_size_max)
public ResultCode GetSaveDataSizeMax(ServiceCtx context)
{
// NOTE: We are currently using a stub for GetSaveDataSize() which returns the default values.
// For this method we shouldn't return anything lower than that, but since we aren't interacting
// with fs to get the actual sizes, we return the default values here as well.
// This also helps in case ExtendSaveData() has been executed and the default values were modified.
context.ResponseData.Write(_defaultSaveDataSize);
context.ResponseData.Write(_defaultJournalSaveDataSize);
Logger.Stub?.PrintStub(LogClass.ServiceAm);
return ResultCode.Success;
}
[CommandHipc(30)]
// BeginBlockingHomeButtonShortAndLongPressed()
public ResultCode BeginBlockingHomeButtonShortAndLongPressed(ServiceCtx context)

View File

@@ -25,6 +25,7 @@ namespace Ryujinx.Memory.Tracking
private int _sequenceNumber;
private BitMap _sequenceNumberBitmap;
private BitMap _dirtyCheckedBitmap;
private int _uncheckedHandles;
public bool Dirty { get; private set; } = true;
@@ -36,6 +37,7 @@ namespace Ryujinx.Memory.Tracking
_dirtyBitmap = new ConcurrentBitmap(_handles.Length, true);
_sequenceNumberBitmap = new BitMap(_handles.Length);
_dirtyCheckedBitmap = new BitMap(_handles.Length);
int i = 0;
@@ -246,16 +248,18 @@ namespace Ryujinx.Memory.Tracking
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private void ParseDirtyBits(long dirtyBits, long mask, int index, long[] seqMasks, ref int baseBit, ref int prevHandle, ref ulong rgStart, ref ulong rgSize, Action<ulong, ulong> modifiedAction)
private void ParseDirtyBits(long dirtyBits, long mask, int index, long[] seqMasks, long[] checkMasks, ref int baseBit, ref int prevHandle, ref ulong rgStart, ref ulong rgSize, Action<ulong, ulong> modifiedAction)
{
long seqMask = mask & ~seqMasks[index];
long checkMask = (~dirtyBits) & seqMask;
dirtyBits &= seqMask;
while (dirtyBits != 0)
{
int bit = BitOperations.TrailingZeroCount(dirtyBits);
long bitValue = 1L << bit;
dirtyBits &= ~(1L << bit);
dirtyBits &= ~bitValue;
int handleIndex = baseBit + bit;
@@ -273,11 +277,14 @@ namespace Ryujinx.Memory.Tracking
}
rgSize += handle.Size;
handle.Reprotect();
handle.Reprotect(false, (checkMasks[index] & bitValue) == 0);
checkMasks[index] &= ~bitValue;
prevHandle = handleIndex;
}
checkMasks[index] |= checkMask;
seqMasks[index] |= mask;
_uncheckedHandles -= BitOperations.PopCount((ulong)seqMask);
@@ -328,6 +335,7 @@ namespace Ryujinx.Memory.Tracking
ulong rgSize = 0;
long[] seqMasks = _sequenceNumberBitmap.Masks;
long[] checkedMasks = _dirtyCheckedBitmap.Masks;
long[] masks = _dirtyBitmap.Masks;
int startIndex = startHandle >> ConcurrentBitmap.IntShift;
@@ -345,20 +353,20 @@ namespace Ryujinx.Memory.Tracking
if (startIndex == endIndex)
{
ParseDirtyBits(startValue, startMask & endMask, startIndex, seqMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
ParseDirtyBits(startValue, startMask & endMask, startIndex, seqMasks, checkedMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
}
else
{
ParseDirtyBits(startValue, startMask, startIndex, seqMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
ParseDirtyBits(startValue, startMask, startIndex, seqMasks, checkedMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
for (int i = startIndex + 1; i < endIndex; i++)
{
ParseDirtyBits(Volatile.Read(ref masks[i]), -1L, i, seqMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
ParseDirtyBits(Volatile.Read(ref masks[i]), -1L, i, seqMasks, checkedMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
}
long endValue = Volatile.Read(ref masks[endIndex]);
ParseDirtyBits(endValue, endMask, endIndex, seqMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
ParseDirtyBits(endValue, endMask, endIndex, seqMasks, checkedMasks, ref baseBit, ref prevHandle, ref rgStart, ref rgSize, modifiedAction);
}
if (rgSize != 0)

View File

@@ -263,15 +263,15 @@ namespace Ryujinx.Memory.Tracking
/// </summary>
public void ForceDirty()
{
_checkCount++;
Dirty = true;
}
/// <summary>
/// Consume the dirty flag for this handle, and reprotect so it can be set on the next write.
/// </summary>
public void Reprotect(bool asDirty = false)
/// <param name="asDirty">True if the handle should be reprotected as dirty, rather than have it cleared</param>
/// <param name="consecutiveCheck">True if this reprotect is the result of consecutive dirty checks</param>
public void Reprotect(bool asDirty, bool consecutiveCheck = false)
{
if (_volatile) return;
@@ -296,7 +296,7 @@ namespace Ryujinx.Memory.Tracking
}
else if (!asDirty)
{
if (_checkCount > 0 && _checkCount < CheckCountForInfrequent)
if (consecutiveCheck || (_checkCount > 0 && _checkCount < CheckCountForInfrequent))
{
if (++_volatileCount >= VolatileThreshold && _preAction == null)
{
@@ -313,6 +313,15 @@ namespace Ryujinx.Memory.Tracking
}
}
/// <summary>
/// Consume the dirty flag for this handle, and reprotect so it can be set on the next write.
/// </summary>
/// <param name="asDirty">True if the handle should be reprotected as dirty, rather than have it cleared</param>
public void Reprotect(bool asDirty = false)
{
Reprotect(asDirty, false);
}
/// <summary>
/// Register an action to perform when the tracked region is read or written.
/// The action is automatically removed after it runs.