Add a pass to turn global memory access into storage access, and do all storage related transformations on IR

This commit is contained in:
gdk
2019-11-30 23:53:09 -03:00
committed by Thog
parent 396768f3b4
commit 6a98c643ca
28 changed files with 532 additions and 282 deletions

View File

@ -6,54 +6,54 @@ namespace Ryujinx.Graphics.Shader.Translation
{
static class EmitterContextInsts
{
public static Operand AtomicAdd(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicAdd(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicAdd | mr, Local(), a, b);
return context.Add(Instruction.AtomicAdd | mr, Local(), a, b, c);
}
public static Operand AtomicAnd(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicAnd(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicAnd | mr, Local(), a, b);
return context.Add(Instruction.AtomicAnd | mr, Local(), a, b, c);
}
public static Operand AtomicCompareAndSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
public static Operand AtomicCompareAndSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c, Operand d)
{
return context.Add(Instruction.AtomicCompareAndSwap | mr, Local(), a, b, c);
return context.Add(Instruction.AtomicCompareAndSwap | mr, Local(), a, b, c, d);
}
public static Operand AtomicMaxS32(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicMaxS32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicMaxS32 | mr, Local(), a, b);
return context.Add(Instruction.AtomicMaxS32 | mr, Local(), a, b, c);
}
public static Operand AtomicMaxU32(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicMaxU32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicMaxU32 | mr, Local(), a, b);
return context.Add(Instruction.AtomicMaxU32 | mr, Local(), a, b, c);
}
public static Operand AtomicMinS32(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicMinS32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicMinS32 | mr, Local(), a, b);
return context.Add(Instruction.AtomicMinS32 | mr, Local(), a, b, c);
}
public static Operand AtomicMinU32(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicMinU32(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicMinU32 | mr, Local(), a, b);
return context.Add(Instruction.AtomicMinU32 | mr, Local(), a, b, c);
}
public static Operand AtomicOr(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicOr(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicOr | mr, Local(), a, b);
return context.Add(Instruction.AtomicOr | mr, Local(), a, b, c);
}
public static Operand AtomicSwap(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicSwap(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicSwap | mr, Local(), a, b);
return context.Add(Instruction.AtomicSwap | mr, Local(), a, b, c);
}
public static Operand AtomicXor(this EmitterContext context, Instruction mr, Operand a, Operand b)
public static Operand AtomicXor(this EmitterContext context, Instruction mr, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.AtomicXor | mr, Local(), a, b);
return context.Add(Instruction.AtomicXor | mr, Local(), a, b, c);
}
public static Operand Ballot(this EmitterContext context, Operand a)
@ -461,9 +461,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.LoadConstant, Local(), a, b);
}
public static Operand LoadGlobal(this EmitterContext context, Operand a)
public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.LoadGlobal, Local(), a);
return context.Add(Instruction.LoadGlobal, Local(), a, b);
}
public static Operand LoadLocal(this EmitterContext context, Operand a)
@ -523,9 +523,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.ShuffleXor, Local(), a, b, c);
}
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b)
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal, null, a, b);
return context.Add(Instruction.StoreGlobal, null, a, b, c);
}
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)

View File

@ -0,0 +1,46 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
namespace Ryujinx.Graphics.Shader.Translation
{
static class GlobalMemory
{
private const int StorageDescsBaseOffset = 0x44; // In words.
public const int StorageDescSize = 4; // In words.
public const int StorageMaxCount = 16;
public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
public static bool UsesGlobalMemory(Instruction inst)
{
return (inst.IsAtomic() && IsGlobalMr(inst)) ||
inst == Instruction.LoadGlobal ||
inst == Instruction.StoreGlobal;
}
private static bool IsGlobalMr(Instruction inst)
{
return (inst & Instruction.MrMask) == Instruction.MrGlobal;
}
public static int GetStorageCbOffset(ShaderStage stage, int slot)
{
return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
}
public static int GetStorageBaseCbOffset(ShaderStage stage)
{
switch (stage)
{
case ShaderStage.Compute: return StorageDescsBaseOffset + 2 * StorageDescsSize;
case ShaderStage.Vertex: return StorageDescsBaseOffset;
case ShaderStage.TessellationControl: return StorageDescsBaseOffset + 1 * StorageDescsSize;
case ShaderStage.TessellationEvaluation: return StorageDescsBaseOffset + 2 * StorageDescsSize;
case ShaderStage.Geometry: return StorageDescsBaseOffset + 3 * StorageDescsSize;
case ShaderStage.Fragment: return StorageDescsBaseOffset + 4 * StorageDescsSize;
}
return 0;
}
}
}

View File

@ -0,0 +1,121 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
namespace Ryujinx.Graphics.Shader.Translation
{
static class Lowering
{
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
{
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
BasicBlock block = blocks[blkIndex];
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (!(node.Value is Operation operation))
{
continue;
}
if (UsesGlobalMemory(operation.Inst))
{
node = LowerGlobal(node, config);
}
}
}
}
private static LinkedListNode<INode> LowerGlobal(LinkedListNode<INode> node, ShaderConfig config)
{
Operation operation = (Operation)node.Value;
Operation storageOp;
Operand PrependOperation(Instruction inst, params Operand[] sources)
{
Operand local = Local();
node.List.AddBefore(node, new Operation(inst, local, sources));
return local;
}
Operand addrLow = operation.GetSource(0);
Operand addrHigh = operation.GetSource(1);
Operand sbBaseAddrLow = Const(0);
Operand sbSlot = Const(0);
for (int slot = 0; slot < StorageMaxCount; slot++)
{
int cbOffset = GetStorageCbOffset(config.Stage, slot);
Operand baseAddrLow = Cbuf(0, cbOffset);
Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
Operand size = Cbuf(0, cbOffset + 2);
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
Operand inRange = PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
}
Operand alignMask = Const(-config.Capabilities.StorageBufferOffsetAlignment);
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, Const(-64));
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
Operand wordOffset = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = sbSlot;
sources[1] = wordOffset;
for (int index = 2; index < operation.SourcesCount; index++)
{
sources[index] = operation.GetSource(index);
}
if (operation.Inst.IsAtomic())
{
Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
storageOp = new Operation(inst, operation.Dest, sources);
}
else if (operation.Inst == Instruction.LoadGlobal)
{
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
}
else
{
storageOp = new Operation(Instruction.StoreStorage, null, sources);
}
for (int index = 0; index < operation.SourcesCount; index++)
{
operation.SetSource(index, null);
}
LinkedListNode<INode> oldNode = node;
node = node.List.AddBefore(node, storageOp);
node.List.Remove(oldNode);
return node;
}
}
}

View File

@ -1,20 +1,16 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class GlobalToStorage
{
private const int StorageDescsBaseOffset = 0x44; // In words.
private const int StorageDescSize = 4; // In words.
private const int StorageMaxCount = 16;
private const int StorageDescsSize = StorageDescSize * StorageMaxCount;
public static void RunPass(BasicBlock block, ShaderStage stage)
public static void RunPass(BasicBlock block, ShaderConfig config)
{
int sbStart = GetStorageBaseCbOffset(stage);
int sbStart = GetStorageBaseCbOffset(config.Stage);
int sbEnd = sbStart + StorageDescsSize;
@ -25,9 +21,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue;
}
if (operation.Inst.IsAtomic() ||
operation.Inst == Instruction.LoadGlobal ||
operation.Inst == Instruction.StoreGlobal)
if (UsesGlobalMemory(operation.Inst))
{
Operand source = operation.GetSource(0);
@ -37,44 +31,68 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (storageIndex >= 0)
{
node = ReplaceGlobalWithStorage(node, storageIndex);
node = ReplaceGlobalWithStorage(node, config, storageIndex);
}
}
}
}
}
private static LinkedListNode<INode> ReplaceGlobalWithStorage(LinkedListNode<INode> node, int storageIndex)
private static LinkedListNode<INode> ReplaceGlobalWithStorage(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
{
Operation operation = (Operation)node.Value;
Operation storageOp;
Operand GetStorageOffset()
{
Operand addrLow = operation.GetSource(0);
Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex));
Operand baseAddrTrunc = Local();
Operand alignMask = Const(-config.Capabilities.StorageBufferOffsetAlignment);
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
node.List.AddBefore(node, andOp);
Operand byteOffset = Local();
Operand wordOffset = Local();
Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, subOp);
node.List.AddBefore(node, shrOp);
return wordOffset;
}
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = Const(storageIndex);
sources[1] = GetStorageOffset();
for (int index = 2; index < operation.SourcesCount; index++)
{
sources[index] = operation.GetSource(index);
}
if (operation.Inst.IsAtomic())
{
Operand[] sources = new Operand[operation.SourcesCount];
for (int index = 0; index < operation.SourcesCount; index++)
{
sources[index] = operation.GetSource(index);
}
Instruction inst = (operation.Inst & ~Instruction.MrMask) | Instruction.MrStorage;
storageOp = new Operation(inst, storageIndex, operation.Dest, sources);
storageOp = new Operation(inst, operation.Dest, sources);
}
else if (operation.Inst == Instruction.LoadGlobal)
{
Operand source = operation.GetSource(0);
storageOp = new Operation(Instruction.LoadStorage, storageIndex, operation.Dest, source);
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
}
else
{
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
storageOp = new Operation(Instruction.StoreStorage, storageIndex, null, src1, src2);
storageOp = new Operation(Instruction.StoreStorage, null, sources);
}
for (int index = 0; index < operation.SourcesCount; index++)
@ -84,7 +102,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
LinkedListNode<INode> oldNode = node;
node = node.List.AddAfter(node, storageOp);
node = node.List.AddBefore(node, storageOp);
node.List.Remove(oldNode);
@ -125,25 +143,5 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return -1;
}
public static int GetStorageCbOffset(ShaderStage stage, int slot)
{
return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
}
private static int GetStorageBaseCbOffset(ShaderStage stage)
{
switch (stage)
{
case ShaderStage.Compute: return StorageDescsBaseOffset + 2 * StorageDescsSize;
case ShaderStage.Vertex: return StorageDescsBaseOffset;
case ShaderStage.TessellationControl: return StorageDescsBaseOffset + 1 * StorageDescsSize;
case ShaderStage.TessellationEvaluation: return StorageDescsBaseOffset + 2 * StorageDescsSize;
case ShaderStage.Geometry: return StorageDescsBaseOffset + 3 * StorageDescsSize;
case ShaderStage.Fragment: return StorageDescsBaseOffset + 4 * StorageDescsSize;
}
return 0;
}
}
}

View File

@ -7,11 +7,11 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Optimizer
{
public static void Optimize(BasicBlock[] blocks, ShaderStage stage)
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
{
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
GlobalToStorage.RunPass(blocks[blkIndex], stage);
GlobalToStorage.RunPass(blocks[blkIndex], config);
}
bool modified;

View File

@ -47,7 +47,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return code.Slice(0, headerSize + (int)endAddress);
}
public static ShaderProgram Translate(Span<byte> code, TranslationFlags flags)
public static ShaderProgram Translate(Span<byte> code, ShaderCapabilities capabilities, TranslationFlags flags)
{
bool compute = (flags & TranslationFlags.Compute) != 0;
bool debugMode = (flags & TranslationFlags.DebugMode) != 0;
@ -82,6 +82,7 @@ namespace Ryujinx.Graphics.Shader.Translation
ShaderConfig config = new ShaderConfig(
stage,
capabilities,
flags,
maxOutputVertexCount,
outputTopology);
@ -89,7 +90,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return Translate(ops, config, size);
}
public static ShaderProgram Translate(Span<byte> vpACode, Span<byte> vpBCode, TranslationFlags flags)
public static ShaderProgram Translate(Span<byte> vpACode, Span<byte> vpBCode, ShaderCapabilities capabilities, TranslationFlags flags)
{
bool debugMode = (flags & TranslationFlags.DebugMode) != 0;
@ -98,6 +99,7 @@ namespace Ryujinx.Graphics.Shader.Translation
ShaderConfig config = new ShaderConfig(
header.Stage,
capabilities,
flags,
header.MaxOutputVertexCount,
header.OutputTopology);
@ -107,20 +109,22 @@ namespace Ryujinx.Graphics.Shader.Translation
private static ShaderProgram Translate(Operation[] ops, ShaderConfig config, int size)
{
BasicBlock[] irBlocks = ControlFlowGraph.MakeCfg(ops);
BasicBlock[] blocks = ControlFlowGraph.MakeCfg(ops);
if (irBlocks.Length > 0)
if (blocks.Length > 0)
{
Dominance.FindDominators(irBlocks[0], irBlocks.Length);
Dominance.FindDominators(blocks[0], blocks.Length);
Dominance.FindDominanceFrontiers(irBlocks);
Dominance.FindDominanceFrontiers(blocks);
Ssa.Rename(irBlocks);
Ssa.Rename(blocks);
Optimizer.Optimize(irBlocks, config.Stage);
Optimizer.RunPass(blocks, config);
Lowering.RunPass(blocks, config);
}
StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(irBlocks, config);
StructuredProgramInfo sInfo = StructuredProgram.MakeStructuredProgram(blocks, config);
GlslProgram program = GlslGenerator.Generate(sInfo, config);