Compare commits

...

4 Commits

Author SHA1 Message Date
6ed613a6e6 Fix vote and shuffle shader instructions on AMD GPUs (#5540)
* Move shuffle handling out of the backend to a transform pass

* Handle subgroup sizes higher than 32

* Stop using the subgroup size control extension

* Make GenerateShuffleFunction static

* Shader cache version bump
2023-08-16 21:31:07 -03:00
64079c034c Prefer jagged arrays over multidimensional (#5562)
* fix CA1814

* Update .editorconfig

removing .editorconfig rule
2023-08-16 23:24:44 +02:00
17354d59d1 Declare and use gl_PerVertex block for VTG per-vertex built-ins (#5576)
* Declare and use gl_PerVertex block for VTG per-vertex built-ins

* Shader cache version bump
2023-08-16 23:16:25 +02:00
0c445184c1 Vulkan: Periodically free regions of the staging buffer (#5572)
* Vulkan: Periodically free regions of the staging buffer

There was an edge case where a game could submit tens of thousands of small copies over the course of over half a minute to unique fences. This could result in a large stutter when the staging buffer became full and it tried to check and free thousands of completed fences.

This became visible with some games and mirrors on Windows, as they don't submit any buffer data via the staging buffer, but may submit copies of the support buffer.

This change makes the Vulkan backend check for staging buffer completion on each command buffer submit, so it can't get backed up with 1000s of copies to check.

* Add comment
2023-08-16 23:06:46 +02:00
41 changed files with 628 additions and 272 deletions

View File

@ -52,6 +52,7 @@ namespace Ryujinx.Graphics.GAL
public readonly int MaximumComputeSharedMemorySize;
public readonly float MaximumSupportedAnisotropy;
public readonly int ShaderSubgroupSize;
public readonly int StorageBufferOffsetAlignment;
public readonly int GatherBiasPrecision;
@ -101,6 +102,7 @@ namespace Ryujinx.Graphics.GAL
uint maximumImagesPerStage,
int maximumComputeSharedMemorySize,
float maximumSupportedAnisotropy,
int shaderSubgroupSize,
int storageBufferOffsetAlignment,
int gatherBiasPrecision)
{
@ -148,6 +150,7 @@ namespace Ryujinx.Graphics.GAL
MaximumImagesPerStage = maximumImagesPerStage;
MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
ShaderSubgroupSize = shaderSubgroupSize;
StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
GatherBiasPrecision = gatherBiasPrecision;
}

View File

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 5529;
private const uint CodeGenVersion = 5540;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";

View File

@ -137,6 +137,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
public int QueryHostStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment;
public int QueryHostSubgroupSize() => _context.Capabilities.ShaderSubgroupSize;
public bool QueryHostSupportsBgraFormat() => _context.Capabilities.SupportsBgraFormat;
public bool QueryHostSupportsFragmentShaderInterlock() => _context.Capabilities.SupportsFragmentShaderInterlock;

View File

@ -7,5 +7,6 @@
public const int MaxVertexAttribs = 16;
public const int MaxVertexBuffers = 16;
public const int MaxTransformFeedbackBuffers = 4;
public const int MaxSubgroupSize = 64;
}
}

View File

@ -175,6 +175,7 @@ namespace Ryujinx.Graphics.OpenGL
maximumImagesPerStage: 8,
maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
shaderSubgroupSize: Constants.MaxSubgroupSize,
storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment,
gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan.
}

View File

@ -25,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable");
context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable");
context.AppendLine("#extension GL_KHR_shader_subgroup_shuffle : enable");
}
context.AppendLine("#extension GL_ARB_shader_group_vote : enable");
@ -201,26 +202,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");

View File

@ -5,10 +5,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
public static string MultiplyHighU32 = "Helper_MultiplyHighU32";
public static string Shuffle = "Helper_Shuffle";
public static string ShuffleDown = "Helper_ShuffleDown";
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
}
}

View File

@ -1,11 +0,0 @@
float Helper_Shuffle(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -1,11 +0,0 @@
float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = $SUBGROUP_INVOCATION$ + index;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -1,9 +0,0 @@
float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid)
{
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint srcThreadId = $SUBGROUP_INVOCATION$ - index;
valid = int(srcThreadId) >= int(minThreadId);
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -1,11 +0,0 @@
float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index;
valid = srcThreadId <= maxThreadId;
float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x;
}

View File

@ -9,6 +9,7 @@ using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenFSI;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenPacking;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenShuffle;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenVector;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
@ -174,6 +175,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
case Instruction.PackHalf2x16:
return PackHalf2x16(context, operation);
case Instruction.Shuffle:
return Shuffle(context, operation);
case Instruction.Store:
return Store(context, operation);

View File

@ -13,14 +13,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
AggregateType dstType = GetSrcVarType(operation.Inst, 0);
string arg = GetSoureExpr(context, operation.GetSource(0), dstType);
char component = "xyzw"[operation.Index];
if (context.HostCapabilities.SupportsShaderBallot)
{
return $"unpackUint2x32(ballotARB({arg})).x";
return $"unpackUint2x32(ballotARB({arg})).{component}";
}
else
{
return $"subgroupBallot({arg}).x";
return $"subgroupBallot({arg}).{component}";
}
}
}

View File

@ -108,10 +108,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
Add(Instruction.Shuffle, InstType.CallQuaternary, HelperFunctionNames.Shuffle);
Add(Instruction.ShuffleDown, InstType.CallQuaternary, HelperFunctionNames.ShuffleDown);
Add(Instruction.ShuffleUp, InstType.CallQuaternary, HelperFunctionNames.ShuffleUp);
Add(Instruction.ShuffleXor, InstType.CallQuaternary, HelperFunctionNames.ShuffleXor);
Add(Instruction.Shuffle, InstType.Special);
Add(Instruction.ShuffleDown, InstType.CallBinary, "subgroupShuffleDown");
Add(Instruction.ShuffleUp, InstType.CallBinary, "subgroupShuffleUp");
Add(Instruction.ShuffleXor, InstType.CallBinary, "subgroupShuffleXor");
Add(Instruction.Sine, InstType.CallUnary, "sin");
Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt");
Add(Instruction.Store, InstType.Special);

View File

@ -0,0 +1,25 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenShuffle
{
public static string Shuffle(CodeGenContext context, AstOperation operation)
{
string value = GetSoureExpr(context, operation.GetSource(0), AggregateType.FP32);
string index = GetSoureExpr(context, operation.GetSource(1), AggregateType.U32);
if (context.HostCapabilities.SupportsShaderBallot)
{
return $"readInvocationARB({value}, {index})";
}
else
{
return $"subgroupShuffle({value}, {index})";
}
}
}
}

View File

@ -348,12 +348,98 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
}
}
}
else if (IoMap.IsPerVertexBuiltIn(ioDefinition.IoVariable))
{
continue;
}
bool isOutput = ioDefinition.StorageKind.IsOutput();
bool isPerPatch = ioDefinition.StorageKind.IsPerPatch();
DeclareInputOrOutput(context, ioDefinition, isOutput, isPerPatch, iq, firstLocation);
}
DeclarePerVertexBlock(context);
}
private static void DeclarePerVertexBlock(CodeGenContext context)
{
if (context.Definitions.Stage.IsVtg())
{
if (context.Definitions.Stage != ShaderStage.Vertex)
{
var perVertexInputStructType = CreatePerVertexStructType(context);
int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.InputVertices : 32;
var perVertexInputArrayType = context.TypeArray(perVertexInputStructType, context.Constant(context.TypeU32(), arraySize));
var perVertexInputPointerType = context.TypePointer(StorageClass.Input, perVertexInputArrayType);
var perVertexInputVariable = context.Variable(perVertexInputPointerType, StorageClass.Input);
context.Name(perVertexInputVariable, "gl_in");
context.AddGlobalVariable(perVertexInputVariable);
context.Inputs.Add(new IoDefinition(StorageKind.Input, IoVariable.Position), perVertexInputVariable);
}
var perVertexOutputStructType = CreatePerVertexStructType(context);
void DecorateTfo(IoVariable ioVariable, int fieldIndex)
{
if (context.Definitions.TryGetTransformFeedbackOutput(ioVariable, 0, 0, out var transformFeedbackOutput))
{
context.MemberDecorate(perVertexOutputStructType, fieldIndex, Decoration.XfbBuffer, (LiteralInteger)transformFeedbackOutput.Buffer);
context.MemberDecorate(perVertexOutputStructType, fieldIndex, Decoration.XfbStride, (LiteralInteger)transformFeedbackOutput.Stride);
context.MemberDecorate(perVertexOutputStructType, fieldIndex, Decoration.Offset, (LiteralInteger)transformFeedbackOutput.Offset);
}
}
DecorateTfo(IoVariable.Position, 0);
DecorateTfo(IoVariable.PointSize, 1);
DecorateTfo(IoVariable.ClipDistance, 2);
SpvInstruction perVertexOutputArrayType;
if (context.Definitions.Stage == ShaderStage.TessellationControl)
{
int arraySize = context.Definitions.ThreadsPerInputPrimitive;
perVertexOutputArrayType = context.TypeArray(perVertexOutputStructType, context.Constant(context.TypeU32(), arraySize));
}
else
{
perVertexOutputArrayType = perVertexOutputStructType;
}
var perVertexOutputPointerType = context.TypePointer(StorageClass.Output, perVertexOutputArrayType);
var perVertexOutputVariable = context.Variable(perVertexOutputPointerType, StorageClass.Output);
context.AddGlobalVariable(perVertexOutputVariable);
context.Outputs.Add(new IoDefinition(StorageKind.Output, IoVariable.Position), perVertexOutputVariable);
}
}
private static SpvInstruction CreatePerVertexStructType(CodeGenContext context)
{
var vec4FloatType = context.TypeVector(context.TypeFP32(), 4);
var floatType = context.TypeFP32();
var array8FloatType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), 8));
var array1FloatType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), 1));
var perVertexStructType = context.TypeStruct(true, vec4FloatType, floatType, array8FloatType, array1FloatType);
context.Name(perVertexStructType, "gl_PerVertex");
context.MemberName(perVertexStructType, 0, "gl_Position");
context.MemberName(perVertexStructType, 1, "gl_PointSize");
context.MemberName(perVertexStructType, 2, "gl_ClipDistance");
context.MemberName(perVertexStructType, 3, "gl_CullDistance");
context.Decorate(perVertexStructType, Decoration.Block);
context.MemberDecorate(perVertexStructType, 0, Decoration.BuiltIn, (LiteralInteger)BuiltIn.Position);
context.MemberDecorate(perVertexStructType, 1, Decoration.BuiltIn, (LiteralInteger)BuiltIn.PointSize);
context.MemberDecorate(perVertexStructType, 2, Decoration.BuiltIn, (LiteralInteger)BuiltIn.ClipDistance);
context.MemberDecorate(perVertexStructType, 3, Decoration.BuiltIn, (LiteralInteger)BuiltIn.CullDistance);
return perVertexStructType;
}
private static void DeclareInputOrOutput(

View File

@ -231,7 +231,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0);
var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index);
return new OperationResult(AggregateType.U32, mask);
}
@ -1100,117 +1100,40 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.IAdd(context.TypeU32(), threadId, index);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleDown(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var srcThreadId = context.ISub(context.TypeU32(), threadId, index);
var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleUp(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation)
{
var x = context.GetFP32(operation.GetSource(0));
var value = context.GetFP32(operation.GetSource(0));
var index = context.GetU32(operation.GetSource(1));
var mask = context.GetU32(operation.GetSource(2));
var const31 = context.Constant(context.TypeU32(), 31);
var const8 = context.Constant(context.TypeU32(), 8);
var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
var notSegMask = context.Not(context.TypeU32(), segMask);
var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index);
var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
var result = context.Select(context.TypeFP32(), valid, value, x);
var validLocal = (AstOperand)operation.GetSource(3);
context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
var result = context.GroupNonUniformShuffleXor(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index);
return new OperationResult(AggregateType.FP32, result);
}
@ -1788,6 +1711,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
StorageClass storageClass;
SpvInstruction baseObj;
int srcIndex = 0;
IoVariable? perVertexBuiltIn = null;
switch (storageKind)
{
@ -1881,6 +1805,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
else
{
(_, varType) = IoMap.GetSpirvBuiltIn(ioVariable);
if (IoMap.IsPerVertexBuiltIn(ioVariable))
{
perVertexBuiltIn = ioVariable;
ioVariable = IoVariable.Position;
}
}
varType &= AggregateType.ElementTypeMask;
@ -1902,6 +1832,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
int inputsCount = (isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex;
if (perVertexBuiltIn.HasValue)
{
int fieldIndex = IoMap.GetPerVertexStructFieldIndex(perVertexBuiltIn.Value);
var indexes = new SpvInstruction[inputsCount + 1];
int index = 0;
if (IoMap.IsPerVertexArrayBuiltIn(storageKind, context.Definitions.Stage))
{
indexes[index++] = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
indexes[index++] = context.Constant(context.TypeS32(), fieldIndex);
}
else
{
indexes[index++] = context.Constant(context.TypeS32(), fieldIndex);
}
for (; index < inputsCount + 1; srcIndex++, index++)
{
indexes[index] = context.Get(AggregateType.S32, operation.GetSource(srcIndex));
}
return context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, indexes);
}
if (operation.Inst == Instruction.AtomicCompareAndSwap)
{
inputsCount--;

View File

@ -1,5 +1,6 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Spv.Specification;
namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
@ -80,5 +81,43 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
return false;
}
public static bool IsPerVertexBuiltIn(IoVariable ioVariable)
{
switch (ioVariable)
{
case IoVariable.Position:
case IoVariable.PointSize:
case IoVariable.ClipDistance:
return true;
}
return false;
}
public static bool IsPerVertexArrayBuiltIn(StorageKind storageKind, ShaderStage stage)
{
if (storageKind == StorageKind.Output)
{
return stage == ShaderStage.TessellationControl;
}
else
{
return stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
}
public static int GetPerVertexStructFieldIndex(IoVariable ioVariable)
{
return ioVariable switch
{
IoVariable.Position => 0,
IoVariable.PointSize => 1,
IoVariable.ClipDistance => 2,
_ => throw new ArgumentException($"Invalid built-in variable {ioVariable}.")
};
}
}
}

View File

@ -28,12 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
_poolLock = new object();
}
private const HelperFunctionsMask NeedsInvocationIdMask =
HelperFunctionsMask.Shuffle |
HelperFunctionsMask.ShuffleDown |
HelperFunctionsMask.ShuffleUp |
HelperFunctionsMask.ShuffleXor |
HelperFunctionsMask.SwizzleAdd;
private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd;
public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters)
{

View File

@ -307,6 +307,9 @@ namespace Ryujinx.Graphics.Shader.Decoders
case InstName.Sts:
context.SetUsedFeature(FeatureFlags.SharedMemory);
break;
case InstName.Shfl:
context.SetUsedFeature(FeatureFlags.Shuffle);
break;
}
block.OpCodes.Add(op);

View File

@ -194,6 +194,15 @@ namespace Ryujinx.Graphics.Shader
return 16;
}
/// <summary>
/// Queries host shader subgroup size.
/// </summary>
/// <returns>Host shader subgroup size in invocations</returns>
int QueryHostSubgroupSize()
{
return 32;
}
/// <summary>
/// Queries host support for texture formats with BGRA component order (such as BGRA8).
/// </summary>

View File

@ -76,7 +76,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
switch (op.SReg)
{
case SReg.LaneId:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
src = EmitLoadSubgroupLaneId(context);
break;
case SReg.InvocationId:
@ -146,19 +146,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
break;
case SReg.EqMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0));
src = EmitLoadSubgroupMask(context, IoVariable.SubgroupEqMask);
break;
case SReg.LtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0));
src = EmitLoadSubgroupMask(context, IoVariable.SubgroupLtMask);
break;
case SReg.LeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0));
src = EmitLoadSubgroupMask(context, IoVariable.SubgroupLeMask);
break;
case SReg.GtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0));
src = EmitLoadSubgroupMask(context, IoVariable.SubgroupGtMask);
break;
case SReg.GeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0));
src = EmitLoadSubgroupMask(context, IoVariable.SubgroupGeMask);
break;
default:
@ -169,6 +169,52 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(GetDest(op.Dest), src);
}
private static Operand EmitLoadSubgroupLaneId(EmitterContext context)
{
if (context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize() <= 32)
{
return context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
}
return context.BitwiseAnd(context.Load(StorageKind.Input, IoVariable.SubgroupLaneId), Const(0x1f));
}
private static Operand EmitLoadSubgroupMask(EmitterContext context, IoVariable ioVariable)
{
int subgroupSize = context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize();
if (subgroupSize <= 32)
{
return context.Load(StorageKind.Input, ioVariable, null, Const(0));
}
else if (subgroupSize == 64)
{
Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
Operand low = context.Load(StorageKind.Input, ioVariable, null, Const(0));
Operand high = context.Load(StorageKind.Input, ioVariable, null, Const(1));
return context.ConditionalSelect(context.BitwiseAnd(laneId, Const(32)), high, low);
}
else
{
Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
Operand element = context.ShiftRightU32(laneId, Const(5));
Operand res = context.Load(StorageKind.Input, ioVariable, null, Const(0));
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(1)),
context.Load(StorageKind.Input, ioVariable, null, Const(1)), res);
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(2)),
context.Load(StorageKind.Input, ioVariable, null, Const(2)), res);
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(3)),
context.Load(StorageKind.Input, ioVariable, null, Const(3)), res);
return res;
}
}
public static void SelR(EmitterContext context)
{
InstSelR op = context.GetOp<InstSelR>();

View File

@ -10,10 +10,10 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private static readonly int[,] _maskLut = new int[,]
private static readonly int[][] _maskLut = new int[][]
{
{ 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 },
{ 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 },
new int[] { 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 },
new int[] { 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 },
};
public const bool Sample1DAs2D = true;
@ -605,7 +605,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
Operand[] rd1 = new Operand[2] { ConstF(0), ConstF(0) };
int handle = imm;
int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1, writeMask];
int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1][writeMask];
int componentsCount = BitOperations.PopCount((uint)componentMask);

View File

@ -50,20 +50,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
InstVote op = context.GetOp<InstVote>();
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand res = null;
switch (op.VoteMode)
{
case VoteMode.All:
res = context.VoteAll(pred);
break;
case VoteMode.Any:
res = context.VoteAny(pred);
break;
case VoteMode.Eq:
res = context.VoteAllEqual(pred);
break;
}
Operand res = EmitVote(context, op.VoteMode, pred);
if (res != null)
{
@ -76,7 +63,81 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (op.Dest != RegisterConsts.RegisterZeroIndex)
{
context.Copy(GetDest(op.Dest), context.Ballot(pred));
context.Copy(GetDest(op.Dest), EmitBallot(context, pred));
}
}
private static Operand EmitVote(EmitterContext context, VoteMode voteMode, Operand pred)
{
int subgroupSize = context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize();
if (subgroupSize <= 32)
{
return voteMode switch
{
VoteMode.All => context.VoteAll(pred),
VoteMode.Any => context.VoteAny(pred),
VoteMode.Eq => context.VoteAllEqual(pred),
_ => null,
};
}
// Emulate vote with ballot masks.
// We do that when the GPU thread count is not 32,
// since the shader code assumes it is 32.
// allInvocations => ballot(pred) == ballot(true),
// anyInvocation => ballot(pred) != 0,
// allInvocationsEqual => ballot(pred) == balot(true) || ballot(pred) == 0
Operand ballotMask = EmitBallot(context, pred);
Operand AllTrue() => context.ICompareEqual(ballotMask, EmitBallot(context, Const(IrConsts.True)));
return voteMode switch
{
VoteMode.All => AllTrue(),
VoteMode.Any => context.ICompareNotEqual(ballotMask, Const(0)),
VoteMode.Eq => context.BitwiseOr(AllTrue(), context.ICompareEqual(ballotMask, Const(0))),
_ => null,
};
}
private static Operand EmitBallot(EmitterContext context, Operand pred)
{
int subgroupSize = context.TranslatorContext.GpuAccessor.QueryHostSubgroupSize();
if (subgroupSize <= 32)
{
return context.Ballot(pred, 0);
}
else if (subgroupSize == 64)
{
// TODO: Add support for vector destination and do that with a single operation.
Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
Operand low = context.Ballot(pred, 0);
Operand high = context.Ballot(pred, 1);
return context.ConditionalSelect(context.BitwiseAnd(laneId, Const(32)), high, low);
}
else
{
// TODO: Add support for vector destination and do that with a single operation.
Operand laneId = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
Operand element = context.ShiftRightU32(laneId, Const(5));
Operand res = context.Ballot(pred, 0);
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(1)),
context.Ballot(pred, 1), res);
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(2)),
context.Ballot(pred, 2), res);
res = context.ConditionalSelect(
context.ICompareEqual(element, Const(3)),
context.Ballot(pred, 3), res);
return res;
}
}
}

View File

@ -12,10 +12,6 @@
<ItemGroup>
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
</ItemGroup>

View File

@ -23,5 +23,18 @@ namespace Ryujinx.Graphics.Shader
{
return stage == ShaderStage.Vertex || stage == ShaderStage.Fragment || stage == ShaderStage.Compute;
}
/// <summary>
/// Checks if the shader stage is vertex, tessellation or geometry.
/// </summary>
/// <param name="stage">Shader stage</param>
/// <returns>True if the shader stage is vertex, tessellation or geometry, false otherwise</returns>
public static bool IsVtg(this ShaderStage stage)
{
return stage == ShaderStage.Vertex ||
stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
}
}

View File

@ -7,10 +7,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
{
MultiplyHighS32 = 1 << 2,
MultiplyHighU32 = 1 << 3,
Shuffle = 1 << 4,
ShuffleDown = 1 << 5,
ShuffleUp = 1 << 6,
ShuffleXor = 1 << 7,
SwizzleAdd = 1 << 10,
FSI = 1 << 11,
}

View File

@ -109,14 +109,15 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.PackDouble2x32, AggregateType.FP64, AggregateType.U32, AggregateType.U32);
Add(Instruction.PackHalf2x16, AggregateType.U32, AggregateType.FP32, AggregateType.FP32);
Add(Instruction.ReciprocalSquareRoot, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.Return, AggregateType.Void, AggregateType.U32);
Add(Instruction.Round, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.ShiftLeft, AggregateType.S32, AggregateType.S32, AggregateType.S32);
Add(Instruction.ShiftRightS32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
Add(Instruction.ShiftRightU32, AggregateType.U32, AggregateType.U32, AggregateType.S32);
Add(Instruction.Shuffle, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
Add(Instruction.ShuffleDown, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
Add(Instruction.ShuffleUp, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
Add(Instruction.ShuffleXor, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
Add(Instruction.Shuffle, AggregateType.FP32, AggregateType.FP32, AggregateType.U32);
Add(Instruction.ShuffleDown, AggregateType.FP32, AggregateType.FP32, AggregateType.U32);
Add(Instruction.ShuffleUp, AggregateType.FP32, AggregateType.FP32, AggregateType.U32);
Add(Instruction.ShuffleXor, AggregateType.FP32, AggregateType.FP32, AggregateType.U32);
Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar);
Add(Instruction.Store, AggregateType.Void);
@ -131,7 +132,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.VoteAll, AggregateType.Bool, AggregateType.Bool);
Add(Instruction.VoteAllEqual, AggregateType.Bool, AggregateType.Bool);
Add(Instruction.VoteAny, AggregateType.Bool, AggregateType.Bool);
#pragma warning restore IDE0055v
#pragma warning restore IDE0055
}
private static void Add(Instruction inst, AggregateType destType, params AggregateType[] srcTypes)

View File

@ -282,18 +282,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
case Instruction.MultiplyHighU32:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32;
break;
case Instruction.Shuffle:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle;
break;
case Instruction.ShuffleDown:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown;
break;
case Instruction.ShuffleUp:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp;
break;
case Instruction.ShuffleXor:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
break;
case Instruction.SwizzleAdd:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
break;

View File

@ -112,9 +112,13 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand Ballot(this EmitterContext context, Operand a)
public static Operand Ballot(this EmitterContext context, Operand a, int index)
{
return context.Add(Instruction.Ballot, Local(), a);
Operand dest = Local();
context.Add(new Operation(Instruction.Ballot, index, dest, a));
return dest;
}
public static Operand Barrier(this EmitterContext context)
@ -782,21 +786,41 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.ShiftRightU32, Local(), a, b);
}
public static Operand Shuffle(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.Shuffle, Local(), a, b);
}
public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c);
}
public static Operand ShuffleDown(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.ShuffleDown, Local(), a, b);
}
public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c);
}
public static Operand ShuffleUp(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.ShuffleUp, Local(), a, b);
}
public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c);
}
public static Operand ShuffleXor(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.ShuffleXor, Local(), a, b);
}
public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);

View File

@ -18,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.Translation
InstanceId = 1 << 3,
DrawParameters = 1 << 4,
RtLayer = 1 << 5,
Shuffle = 1 << 6,
FixedFuncAttr = 1 << 9,
LocalMemory = 1 << 10,
SharedMemory = 1 << 11,

View File

@ -56,6 +56,20 @@ namespace Ryujinx.Graphics.Shader.Translation
return functionId;
}
public int GetOrCreateShuffleFunctionId(HelperFunctionName functionName, int subgroupSize)
{
if (_functionIds.TryGetValue((int)functionName, out int functionId))
{
return functionId;
}
Function function = GenerateShuffleFunction(functionName, subgroupSize);
functionId = AddFunction(function);
_functionIds.Add((int)functionName, functionId);
return functionId;
}
private Function GenerateFunction(HelperFunctionName functionName)
{
return functionName switch
@ -216,6 +230,137 @@ namespace Ryujinx.Graphics.Shader.Translation
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedStore{bitSize}_{id}", false, 2, 0);
}
private static Function GenerateShuffleFunction(HelperFunctionName functionName, int subgroupSize)
{
return functionName switch
{
HelperFunctionName.Shuffle => GenerateShuffle(subgroupSize),
HelperFunctionName.ShuffleDown => GenerateShuffleDown(subgroupSize),
HelperFunctionName.ShuffleUp => GenerateShuffleUp(subgroupSize),
HelperFunctionName.ShuffleXor => GenerateShuffleXor(subgroupSize),
_ => throw new ArgumentException($"Invalid function name {functionName}"),
};
}
private static Function GenerateShuffle(int subgroupSize)
{
EmitterContext context = new();
Operand value = Argument(0);
Operand index = Argument(1);
Operand mask = Argument(2);
Operand clamp = context.BitwiseAnd(mask, Const(0x1f));
Operand segMask = context.BitwiseAnd(context.ShiftRightU32(mask, Const(8)), Const(0x1f));
Operand minThreadId = context.BitwiseAnd(GenerateLoadSubgroupLaneId(context, subgroupSize), segMask);
Operand maxThreadId = context.BitwiseOr(context.BitwiseAnd(clamp, context.BitwiseNot(segMask)), minThreadId);
Operand srcThreadId = context.BitwiseOr(context.BitwiseAnd(index, context.BitwiseNot(segMask)), minThreadId);
Operand valid = context.ICompareLessOrEqualUnsigned(srcThreadId, maxThreadId);
context.Copy(Argument(3), valid);
Operand result = context.Shuffle(value, GenerateSubgroupShuffleIndex(context, srcThreadId, subgroupSize));
context.Return(context.ConditionalSelect(valid, result, value));
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "Shuffle", true, 3, 1);
}
private static Function GenerateShuffleDown(int subgroupSize)
{
EmitterContext context = new();
Operand value = Argument(0);
Operand index = Argument(1);
Operand mask = Argument(2);
Operand clamp = context.BitwiseAnd(mask, Const(0x1f));
Operand segMask = context.BitwiseAnd(context.ShiftRightU32(mask, Const(8)), Const(0x1f));
Operand laneId = GenerateLoadSubgroupLaneId(context, subgroupSize);
Operand minThreadId = context.BitwiseAnd(laneId, segMask);
Operand maxThreadId = context.BitwiseOr(context.BitwiseAnd(clamp, context.BitwiseNot(segMask)), minThreadId);
Operand srcThreadId = context.IAdd(laneId, index);
Operand valid = context.ICompareLessOrEqualUnsigned(srcThreadId, maxThreadId);
context.Copy(Argument(3), valid);
Operand result = context.Shuffle(value, GenerateSubgroupShuffleIndex(context, srcThreadId, subgroupSize));
context.Return(context.ConditionalSelect(valid, result, value));
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ShuffleDown", true, 3, 1);
}
private static Function GenerateShuffleUp(int subgroupSize)
{
EmitterContext context = new();
Operand value = Argument(0);
Operand index = Argument(1);
Operand mask = Argument(2);
Operand segMask = context.BitwiseAnd(context.ShiftRightU32(mask, Const(8)), Const(0x1f));
Operand laneId = GenerateLoadSubgroupLaneId(context, subgroupSize);
Operand minThreadId = context.BitwiseAnd(laneId, segMask);
Operand srcThreadId = context.ISubtract(laneId, index);
Operand valid = context.ICompareGreaterOrEqual(srcThreadId, minThreadId);
context.Copy(Argument(3), valid);
Operand result = context.Shuffle(value, GenerateSubgroupShuffleIndex(context, srcThreadId, subgroupSize));
context.Return(context.ConditionalSelect(valid, result, value));
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ShuffleUp", true, 3, 1);
}
private static Function GenerateShuffleXor(int subgroupSize)
{
EmitterContext context = new();
Operand value = Argument(0);
Operand index = Argument(1);
Operand mask = Argument(2);
Operand clamp = context.BitwiseAnd(mask, Const(0x1f));
Operand segMask = context.BitwiseAnd(context.ShiftRightU32(mask, Const(8)), Const(0x1f));
Operand laneId = GenerateLoadSubgroupLaneId(context, subgroupSize);
Operand minThreadId = context.BitwiseAnd(laneId, segMask);
Operand maxThreadId = context.BitwiseOr(context.BitwiseAnd(clamp, context.BitwiseNot(segMask)), minThreadId);
Operand srcThreadId = context.BitwiseExclusiveOr(laneId, index);
Operand valid = context.ICompareLessOrEqualUnsigned(srcThreadId, maxThreadId);
context.Copy(Argument(3), valid);
Operand result = context.Shuffle(value, GenerateSubgroupShuffleIndex(context, srcThreadId, subgroupSize));
context.Return(context.ConditionalSelect(valid, result, value));
return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ShuffleXor", true, 3, 1);
}
private static Operand GenerateLoadSubgroupLaneId(EmitterContext context, int subgroupSize)
{
if (subgroupSize <= 32)
{
return context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
}
return context.BitwiseAnd(context.Load(StorageKind.Input, IoVariable.SubgroupLaneId), Const(0x1f));
}
private static Operand GenerateSubgroupShuffleIndex(EmitterContext context, Operand srcThreadId, int subgroupSize)
{
if (subgroupSize <= 32)
{
return srcThreadId;
}
return context.BitwiseOr(
context.BitwiseAnd(context.Load(StorageKind.Input, IoVariable.SubgroupLaneId), Const(0x60)),
srcThreadId);
}
private Function GenerateTexelFetchScaleFunction()
{
EmitterContext context = new();

View File

@ -2,12 +2,18 @@ namespace Ryujinx.Graphics.Shader.Translation
{
enum HelperFunctionName
{
Invalid,
ConvertDoubleToFloat,
ConvertFloatToDouble,
SharedAtomicMaxS32,
SharedAtomicMinS32,
SharedStore8,
SharedStore16,
Shuffle,
ShuffleDown,
ShuffleUp,
ShuffleXor,
TexelFetchScale,
TextureSizeUnscale,
}

View File

@ -0,0 +1,52 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class ShufflePass : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return usedFeatures.HasFlag(FeatureFlags.Shuffle);
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName functionName = operation.Inst switch
{
Instruction.Shuffle => HelperFunctionName.Shuffle,
Instruction.ShuffleDown => HelperFunctionName.ShuffleDown,
Instruction.ShuffleUp => HelperFunctionName.ShuffleUp,
Instruction.ShuffleXor => HelperFunctionName.ShuffleXor,
_ => HelperFunctionName.Invalid,
};
if (functionName == HelperFunctionName.Invalid || operation.SourcesCount != 3 || operation.DestsCount != 2)
{
return node;
}
int functionId = context.Hfm.GetOrCreateShuffleFunctionId(functionName, context.GpuAccessor.QueryHostSubgroupSize());
Operand result = operation.GetDest(0);
Operand valid = operation.GetDest(1);
Operand value = operation.GetSource(0);
Operand index = operation.GetSource(1);
Operand mask = operation.GetSource(2);
operation.Dest = null;
Operand[] callArgs = new Operand[] { Const(functionId), value, index, mask, valid };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
}
}

View File

@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
RunPass<TexturePass>(context);
RunPass<SharedStoreSmallIntCas>(context);
RunPass<SharedAtomicSignedCas>(context);
RunPass<ShufflePass>(context);
}
private static void RunPass<T>(TransformContext context) where T : ITransformPass

View File

@ -25,7 +25,6 @@ namespace Ryujinx.Graphics.Vulkan
public readonly bool SupportsIndirectParameters;
public readonly bool SupportsFragmentShaderInterlock;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsSubgroupSizeControl;
public readonly bool SupportsShaderFloat64;
public readonly bool SupportsShaderInt8;
public readonly bool SupportsShaderStencilExport;
@ -45,9 +44,7 @@ namespace Ryujinx.Graphics.Vulkan
public readonly bool SupportsViewportArray2;
public readonly bool SupportsHostImportedMemory;
public readonly bool SupportsDepthClipControl;
public readonly uint MinSubgroupSize;
public readonly uint MaxSubgroupSize;
public readonly ShaderStageFlags RequiredSubgroupSizeStages;
public readonly uint SubgroupSize;
public readonly SampleCountFlags SupportedSampleCounts;
public readonly PortabilitySubsetFlags PortabilitySubset;
public readonly uint VertexBufferAlignment;
@ -64,7 +61,6 @@ namespace Ryujinx.Graphics.Vulkan
bool supportsIndirectParameters,
bool supportsFragmentShaderInterlock,
bool supportsGeometryShaderPassthrough,
bool supportsSubgroupSizeControl,
bool supportsShaderFloat64,
bool supportsShaderInt8,
bool supportsShaderStencilExport,
@ -84,9 +80,7 @@ namespace Ryujinx.Graphics.Vulkan
bool supportsViewportArray2,
bool supportsHostImportedMemory,
bool supportsDepthClipControl,
uint minSubgroupSize,
uint maxSubgroupSize,
ShaderStageFlags requiredSubgroupSizeStages,
uint subgroupSize,
SampleCountFlags supportedSampleCounts,
PortabilitySubsetFlags portabilitySubset,
uint vertexBufferAlignment,
@ -102,7 +96,6 @@ namespace Ryujinx.Graphics.Vulkan
SupportsIndirectParameters = supportsIndirectParameters;
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
SupportsShaderFloat64 = supportsShaderFloat64;
SupportsShaderInt8 = supportsShaderInt8;
SupportsShaderStencilExport = supportsShaderStencilExport;
@ -122,9 +115,7 @@ namespace Ryujinx.Graphics.Vulkan
SupportsViewportArray2 = supportsViewportArray2;
SupportsHostImportedMemory = supportsHostImportedMemory;
SupportsDepthClipControl = supportsDepthClipControl;
MinSubgroupSize = minSubgroupSize;
MaxSubgroupSize = maxSubgroupSize;
RequiredSubgroupSizeStages = requiredSubgroupSizeStages;
SubgroupSize = subgroupSize;
SupportedSampleCounts = supportedSampleCounts;
PortabilitySubset = portabilitySubset;
VertexBufferAlignment = vertexBufferAlignment;

View File

@ -352,11 +352,6 @@ namespace Ryujinx.Graphics.Vulkan
return pipeline;
}
if (gd.Capabilities.SupportsSubgroupSizeControl)
{
UpdateStageRequiredSubgroupSizes(gd, 1);
}
var pipelineCreateInfo = new ComputePipelineCreateInfo
{
SType = StructureType.ComputePipelineCreateInfo,
@ -616,11 +611,6 @@ namespace Ryujinx.Graphics.Vulkan
PDynamicStates = dynamicStates,
};
if (gd.Capabilities.SupportsSubgroupSizeControl)
{
UpdateStageRequiredSubgroupSizes(gd, (int)StagesCount);
}
var pipelineCreateInfo = new GraphicsPipelineCreateInfo
{
SType = StructureType.GraphicsPipelineCreateInfo,
@ -659,19 +649,6 @@ namespace Ryujinx.Graphics.Vulkan
return pipeline;
}
private readonly unsafe void UpdateStageRequiredSubgroupSizes(VulkanRenderer gd, int count)
{
for (int index = 0; index < count; index++)
{
bool canUseExplicitSubgroupSize =
(gd.Capabilities.RequiredSubgroupSizeStages & Stages[index].Stage) != 0 &&
gd.Capabilities.MinSubgroupSize <= RequiredSubgroupSize &&
gd.Capabilities.MaxSubgroupSize >= RequiredSubgroupSize;
Stages[index].PNext = canUseExplicitSubgroupSize ? StageRequiredSubgroupSizes.Pointer + index : null;
}
}
private void UpdateVertexAttributeDescriptions(VulkanRenderer gd)
{
// Vertex attributes exceeding the stride are invalid.

View File

@ -246,7 +246,7 @@ namespace Ryujinx.Graphics.Vulkan
return true;
}
private void FreeCompleted()
public void FreeCompleted()
{
FenceHolder signalledFence = null;
while (_pendingCopies.TryPeek(out var pc) && (pc.Fence == signalledFence || pc.Fence.IsSignaled()))

View File

@ -37,7 +37,6 @@ namespace Ryujinx.Graphics.Vulkan
"VK_EXT_shader_stencil_export",
"VK_KHR_shader_float16_int8",
"VK_EXT_shader_subgroup_ballot",
"VK_EXT_subgroup_size_control",
"VK_NV_geometry_shader_passthrough",
"VK_NV_viewport_array2",
"VK_EXT_depth_clip_control",

View File

@ -151,6 +151,14 @@ namespace Ryujinx.Graphics.Vulkan
SType = StructureType.PhysicalDeviceProperties2,
};
PhysicalDeviceSubgroupProperties propertiesSubgroup = new()
{
SType = StructureType.PhysicalDeviceSubgroupProperties,
PNext = properties2.PNext,
};
properties2.PNext = &propertiesSubgroup;
PhysicalDeviceBlendOperationAdvancedPropertiesEXT propertiesBlendOperationAdvanced = new()
{
SType = StructureType.PhysicalDeviceBlendOperationAdvancedPropertiesExt,
@ -164,18 +172,6 @@ namespace Ryujinx.Graphics.Vulkan
properties2.PNext = &propertiesBlendOperationAdvanced;
}
PhysicalDeviceSubgroupSizeControlPropertiesEXT propertiesSubgroupSizeControl = new()
{
SType = StructureType.PhysicalDeviceSubgroupSizeControlPropertiesExt,
};
bool supportsSubgroupSizeControl = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_subgroup_size_control");
if (supportsSubgroupSizeControl)
{
properties2.PNext = &propertiesSubgroupSizeControl;
}
bool supportsTransformFeedback = _physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName);
PhysicalDeviceTransformFeedbackPropertiesEXT propertiesTransformFeedback = new()
@ -315,7 +311,6 @@ namespace Ryujinx.Graphics.Vulkan
_physicalDevice.IsDeviceExtensionPresent(KhrDrawIndirectCount.ExtensionName),
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
supportsSubgroupSizeControl,
features2.Features.ShaderFloat64,
featuresShaderInt8.ShaderInt8,
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
@ -335,9 +330,7 @@ namespace Ryujinx.Graphics.Vulkan
_physicalDevice.IsDeviceExtensionPresent("VK_NV_viewport_array2"),
_physicalDevice.IsDeviceExtensionPresent(ExtExternalMemoryHost.ExtensionName),
supportsDepthClipControl && featuresDepthClipControl.DepthClipControl,
propertiesSubgroupSizeControl.MinSubgroupSize,
propertiesSubgroupSizeControl.MaxSubgroupSize,
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
propertiesSubgroup.SubgroupSize,
supportedSampleCounts,
portabilityFlags,
vertexBufferAlignment,
@ -475,6 +468,9 @@ namespace Ryujinx.Graphics.Vulkan
internal void RegisterFlush()
{
SyncManager.RegisterFlush();
// Periodically free unused regions of the staging buffer to avoid doing it all at once.
BufferManager.StagingBuffer.FreeCompleted();
}
public PinnedSpan<byte> GetBufferData(BufferHandle buffer, int offset, int size)
@ -620,6 +616,7 @@ namespace Ryujinx.Graphics.Vulkan
maximumImagesPerStage: Constants.MaxImagesPerStage,
maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
shaderSubgroupSize: (int)Capabilities.SubgroupSize,
storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
}

View File

@ -28,6 +28,7 @@ namespace Spv.Generator
// In the declaration block.
private readonly Dictionary<TypeDeclarationKey, Instruction> _typeDeclarations;
private readonly List<Instruction> _typeDeclarationsList;
// In the declaration block.
private readonly List<Instruction> _globals;
// In the declaration block.
@ -54,6 +55,7 @@ namespace Spv.Generator
_debug = new List<Instruction>();
_annotations = new List<Instruction>();
_typeDeclarations = new Dictionary<TypeDeclarationKey, Instruction>();
_typeDeclarationsList = new List<Instruction>();
_constants = new Dictionary<ConstantKey, Instruction>();
_globals = new List<Instruction>();
_functionsDeclarations = new List<Instruction>();
@ -126,7 +128,8 @@ namespace Spv.Generator
instruction.SetId(GetNewId());
_typeDeclarations.Add(key, instruction);
_typeDeclarations[key] = instruction;
_typeDeclarationsList.Add(instruction);
}
public void AddEntryPoint(ExecutionModel executionModel, Instruction function, string name, params Instruction[] interfaces)
@ -330,7 +333,7 @@ namespace Spv.Generator
// Ensure that everything is in the right order in the declarations section.
List<Instruction> declarations = new();
declarations.AddRange(_typeDeclarations.Values);
declarations.AddRange(_typeDeclarationsList);
declarations.AddRange(_globals);
declarations.AddRange(_constants.Values);
declarations.Sort((Instruction x, Instruction y) => x.Id.CompareTo(y.Id));