Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
cb250162cb | ||
|
7528f94536 | ||
|
43081c16c4 | ||
|
780627e7b0 |
@@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
||||
private const ushort FileFormatVersionMajor = 1;
|
||||
private const ushort FileFormatVersionMinor = 2;
|
||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||
private const uint CodeGenVersion = 4336;
|
||||
private const uint CodeGenVersion = 4369;
|
||||
|
||||
private const string SharedTocFileName = "shared.toc";
|
||||
private const string SharedDataFileName = "shared.data";
|
||||
|
@@ -485,6 +485,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||
|
||||
AttributeType type = context.Config.GpuAccessor.QueryAttributeType(location);
|
||||
|
||||
return type.ToAggregateType();
|
||||
}
|
||||
else if (context.Config.Stage == ShaderStage.Fragment && isAsgDest &&
|
||||
operand.Value >= AttributeConsts.FragmentOutputColorBase &&
|
||||
operand.Value < AttributeConsts.FragmentOutputColorEnd)
|
||||
{
|
||||
int location = (operand.Value - AttributeConsts.FragmentOutputColorBase) / 16;
|
||||
|
||||
AttributeType type = context.Config.GpuAccessor.QueryFragmentOutputType(location);
|
||||
|
||||
return type.ToAggregateType();
|
||||
}
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@ using System;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Ryujinx.Graphics.Vic
|
||||
@@ -17,10 +18,18 @@ namespace Ryujinx.Graphics.Vic
|
||||
int x2 = Math.Min(src.Width, x1 + targetRect.Width);
|
||||
int y2 = Math.Min(src.Height, y1 + targetRect.Height);
|
||||
|
||||
if (Sse41.IsSupported && ((x1 | x2) & 3) == 0)
|
||||
if (((x1 | x2) & 3) == 0)
|
||||
{
|
||||
BlendOneSse41(dst, src, ref slot, x1, y1, x2, y2);
|
||||
return;
|
||||
if (Sse41.IsSupported)
|
||||
{
|
||||
BlendOneSse41(dst, src, ref slot, x1, y1, x2, y2);
|
||||
return;
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
{
|
||||
BlendOneAdvSimd(dst, src, ref slot, x1, y1, x2, y2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
for (int y = y1; y < y2; y++)
|
||||
@@ -105,6 +114,84 @@ namespace Ryujinx.Graphics.Vic
|
||||
}
|
||||
}
|
||||
|
||||
private unsafe static void BlendOneAdvSimd(Surface dst, Surface src, ref SlotStruct slot, int x1, int y1, int x2, int y2)
|
||||
{
|
||||
Debug.Assert(((x1 | x2) & 3) == 0);
|
||||
|
||||
ref MatrixStruct mtx = ref slot.ColorMatrixStruct;
|
||||
|
||||
Vector128<int> col1 = Vector128.Create(mtx.MatrixCoeff00, mtx.MatrixCoeff10, mtx.MatrixCoeff20, 0);
|
||||
Vector128<int> col2 = Vector128.Create(mtx.MatrixCoeff01, mtx.MatrixCoeff11, mtx.MatrixCoeff21, 0);
|
||||
Vector128<int> col3 = Vector128.Create(mtx.MatrixCoeff02, mtx.MatrixCoeff12, mtx.MatrixCoeff22, 0);
|
||||
Vector128<int> col4 = Vector128.Create(mtx.MatrixCoeff03, mtx.MatrixCoeff13, mtx.MatrixCoeff23, 0);
|
||||
|
||||
Vector128<int> rShift = Vector128.Create(-mtx.MatrixRShift);
|
||||
Vector128<int> selMask = Vector128.Create(0, 0, 0, -1);
|
||||
Vector128<ushort> clMin = Vector128.Create((ushort)slot.SlotConfig.SoftClampLow);
|
||||
Vector128<ushort> clMax = Vector128.Create((ushort)slot.SlotConfig.SoftClampHigh);
|
||||
|
||||
fixed (Pixel* srcPtr = src.Data, dstPtr = dst.Data)
|
||||
{
|
||||
Pixel* ip = srcPtr;
|
||||
Pixel* op = dstPtr;
|
||||
|
||||
if (mtx.MatrixEnable)
|
||||
{
|
||||
for (int y = y1; y < y2; y++, ip += src.Width, op += dst.Width)
|
||||
{
|
||||
for (int x = x1; x < x2; x += 4)
|
||||
{
|
||||
Vector128<ushort> pixel12 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x));
|
||||
Vector128<ushort> pixel34 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 2));
|
||||
|
||||
Vector128<uint> pixel1 = AdvSimd.ZeroExtendWideningLower(pixel12.GetLower());
|
||||
Vector128<uint> pixel2 = AdvSimd.ZeroExtendWideningUpper(pixel12);
|
||||
Vector128<uint> pixel3 = AdvSimd.ZeroExtendWideningLower(pixel34.GetLower());
|
||||
Vector128<uint> pixel4 = AdvSimd.ZeroExtendWideningUpper(pixel34);
|
||||
|
||||
Vector128<int> t1 = MatrixMultiplyAdvSimd(pixel1.AsInt32(), col1, col2, col3, col4, rShift, selMask);
|
||||
Vector128<int> t2 = MatrixMultiplyAdvSimd(pixel2.AsInt32(), col1, col2, col3, col4, rShift, selMask);
|
||||
Vector128<int> t3 = MatrixMultiplyAdvSimd(pixel3.AsInt32(), col1, col2, col3, col4, rShift, selMask);
|
||||
Vector128<int> t4 = MatrixMultiplyAdvSimd(pixel4.AsInt32(), col1, col2, col3, col4, rShift, selMask);
|
||||
|
||||
Vector64<ushort> lower1 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(t1);
|
||||
Vector64<ushort> lower3 = AdvSimd.ExtractNarrowingSaturateUnsignedLower(t3);
|
||||
|
||||
pixel12 = AdvSimd.ExtractNarrowingSaturateUnsignedUpper(lower1, t2);
|
||||
pixel34 = AdvSimd.ExtractNarrowingSaturateUnsignedUpper(lower3, t4);
|
||||
|
||||
pixel12 = AdvSimd.Min(pixel12, clMax);
|
||||
pixel34 = AdvSimd.Min(pixel34, clMax);
|
||||
pixel12 = AdvSimd.Max(pixel12, clMin);
|
||||
pixel34 = AdvSimd.Max(pixel34, clMin);
|
||||
|
||||
AdvSimd.Store((ushort*)(op + (uint)x + 0), pixel12);
|
||||
AdvSimd.Store((ushort*)(op + (uint)x + 2), pixel34);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = y1; y < y2; y++, ip += src.Width, op += dst.Width)
|
||||
{
|
||||
for (int x = x1; x < x2; x += 4)
|
||||
{
|
||||
Vector128<ushort> pixel12 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x));
|
||||
Vector128<ushort> pixel34 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 2));
|
||||
|
||||
pixel12 = AdvSimd.Min(pixel12, clMax);
|
||||
pixel34 = AdvSimd.Min(pixel34, clMax);
|
||||
pixel12 = AdvSimd.Max(pixel12, clMin);
|
||||
pixel34 = AdvSimd.Max(pixel34, clMin);
|
||||
|
||||
AdvSimd.Store((ushort*)(op + (uint)x + 0), pixel12);
|
||||
AdvSimd.Store((ushort*)(op + (uint)x + 2), pixel34);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void MatrixMultiply(ref MatrixStruct mtx, int x, int y, int z, out int r, out int g, out int b)
|
||||
{
|
||||
@@ -159,5 +246,33 @@ namespace Ryujinx.Graphics.Vic
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector128<int> MatrixMultiplyAdvSimd(
|
||||
Vector128<int> pixel,
|
||||
Vector128<int> col1,
|
||||
Vector128<int> col2,
|
||||
Vector128<int> col3,
|
||||
Vector128<int> col4,
|
||||
Vector128<int> rShift,
|
||||
Vector128<int> selectMask)
|
||||
{
|
||||
Vector128<int> x = AdvSimd.DuplicateSelectedScalarToVector128(pixel, 0);
|
||||
Vector128<int> y = AdvSimd.DuplicateSelectedScalarToVector128(pixel, 1);
|
||||
Vector128<int> z = AdvSimd.DuplicateSelectedScalarToVector128(pixel, 2);
|
||||
|
||||
col1 = AdvSimd.Multiply(col1, x);
|
||||
col2 = AdvSimd.Multiply(col2, y);
|
||||
col3 = AdvSimd.Multiply(col3, z);
|
||||
|
||||
Vector128<int> res = AdvSimd.Add(col3, AdvSimd.Add(col1, col2));
|
||||
|
||||
res = AdvSimd.ShiftArithmetic(res, rShift);
|
||||
res = AdvSimd.Add(res, col4);
|
||||
res = AdvSimd.ShiftRightArithmetic(res, 8);
|
||||
res = AdvSimd.BitwiseSelect(selectMask, pixel, res);
|
||||
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@ using Ryujinx.Graphics.Vic.Types;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
|
||||
|
||||
@@ -54,7 +55,7 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
(byte)4, (byte)6, (byte)7, (byte)5,
|
||||
(byte)8, (byte)10, (byte)11, (byte)9,
|
||||
(byte)12, (byte)14, (byte)15, (byte)13);
|
||||
Vector128<short> alphaMask = Vector128.Create(0xffUL << 48).AsInt16();
|
||||
Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16();
|
||||
|
||||
int yStrideGap = yStride - width;
|
||||
int uvStrideGap = uvStride - input.UvWidth;
|
||||
@@ -95,6 +96,11 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16();
|
||||
rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16();
|
||||
|
||||
rgba0 = Sse2.Or(rgba0, alphaMask);
|
||||
rgba1 = Sse2.Or(rgba1, alphaMask);
|
||||
rgba2 = Sse2.Or(rgba2, alphaMask);
|
||||
rgba3 = Sse2.Or(rgba3, alphaMask);
|
||||
|
||||
Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte());
|
||||
Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte()));
|
||||
Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte());
|
||||
@@ -104,15 +110,6 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte());
|
||||
Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte()));
|
||||
|
||||
rgba16_0 = Sse2.Or(rgba16_0, alphaMask);
|
||||
rgba16_1 = Sse2.Or(rgba16_1, alphaMask);
|
||||
rgba16_2 = Sse2.Or(rgba16_2, alphaMask);
|
||||
rgba16_3 = Sse2.Or(rgba16_3, alphaMask);
|
||||
rgba16_4 = Sse2.Or(rgba16_4, alphaMask);
|
||||
rgba16_5 = Sse2.Or(rgba16_5, alphaMask);
|
||||
rgba16_6 = Sse2.Or(rgba16_6, alphaMask);
|
||||
rgba16_7 = Sse2.Or(rgba16_7, alphaMask);
|
||||
|
||||
rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2);
|
||||
rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2);
|
||||
rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2);
|
||||
@@ -149,6 +146,98 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (AdvSimd.Arm64.IsSupported)
|
||||
{
|
||||
Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32();
|
||||
|
||||
int yStrideGap = yStride - width;
|
||||
int uvStrideGap = uvStride - input.UvWidth;
|
||||
|
||||
int widthTrunc = width & ~0xf;
|
||||
|
||||
fixed (Pixel* dstPtr = output.Data)
|
||||
{
|
||||
Pixel* op = dstPtr;
|
||||
|
||||
fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1)
|
||||
{
|
||||
byte* i0p = src0Ptr;
|
||||
|
||||
for (int y = 0; y < height; y++)
|
||||
{
|
||||
byte* i1p = src1Ptr + (y >> 1) * uvStride;
|
||||
|
||||
int x = 0;
|
||||
|
||||
for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16)
|
||||
{
|
||||
Vector128<byte> ya = AdvSimd.LoadVector128(i0p);
|
||||
Vector128<byte> uv = AdvSimd.LoadVector128(i1p);
|
||||
|
||||
Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16();
|
||||
Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16();
|
||||
|
||||
Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16());
|
||||
Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16());
|
||||
|
||||
ya0 = AdvSimd.ShiftLeftLogical(ya0, 8);
|
||||
ya1 = AdvSimd.ShiftLeftLogical(ya1, 8);
|
||||
|
||||
Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0);
|
||||
Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0);
|
||||
Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1);
|
||||
Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1);
|
||||
|
||||
rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16();
|
||||
rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16();
|
||||
rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16();
|
||||
rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16();
|
||||
|
||||
Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16();
|
||||
Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16();
|
||||
Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16();
|
||||
Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16();
|
||||
Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16();
|
||||
Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16();
|
||||
Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16();
|
||||
Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16();
|
||||
|
||||
rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2);
|
||||
rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2);
|
||||
rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2);
|
||||
rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2);
|
||||
rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2);
|
||||
rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2);
|
||||
rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2);
|
||||
rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2);
|
||||
|
||||
AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6);
|
||||
AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7);
|
||||
}
|
||||
|
||||
for (; x < width; x++, i1p += (x & 1) * 2)
|
||||
{
|
||||
Pixel* px = op + (uint)x;
|
||||
|
||||
px->R = Upsample(*i0p++);
|
||||
px->G = Upsample(*i1p);
|
||||
px->B = Upsample(*(i1p + 1));
|
||||
px->A = 0x3ff;
|
||||
}
|
||||
|
||||
op += width;
|
||||
i0p += yStrideGap;
|
||||
i1p += uvStrideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
|
@@ -3,6 +3,7 @@ using Ryujinx.Graphics.Texture;
|
||||
using Ryujinx.Graphics.Vic.Types;
|
||||
using System;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using static Ryujinx.Graphics.Vic.Image.SurfaceCommon;
|
||||
|
||||
@@ -93,6 +94,64 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
{
|
||||
int widthTrunc = width & ~7;
|
||||
int strideGap = stride - width * 4;
|
||||
|
||||
fixed (Pixel* srcPtr = input.Data)
|
||||
{
|
||||
Pixel* ip = srcPtr;
|
||||
|
||||
fixed (byte* dstPtr = dst)
|
||||
{
|
||||
byte* op = dstPtr;
|
||||
|
||||
for (int y = 0; y < height; y++, ip += input.Width)
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for (; x < widthTrunc; x += 8)
|
||||
{
|
||||
Vector128<ushort> pixel12 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x));
|
||||
Vector128<ushort> pixel34 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 2));
|
||||
Vector128<ushort> pixel56 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 4));
|
||||
Vector128<ushort> pixel78 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 6));
|
||||
|
||||
pixel12 = AdvSimd.ShiftRightLogical(pixel12, 2);
|
||||
pixel34 = AdvSimd.ShiftRightLogical(pixel34, 2);
|
||||
pixel56 = AdvSimd.ShiftRightLogical(pixel56, 2);
|
||||
pixel78 = AdvSimd.ShiftRightLogical(pixel78, 2);
|
||||
|
||||
Vector64<byte> lower12 = AdvSimd.ExtractNarrowingLower(pixel12.AsUInt16());
|
||||
Vector64<byte> lower56 = AdvSimd.ExtractNarrowingLower(pixel56.AsUInt16());
|
||||
|
||||
Vector128<byte> pixel1234 = AdvSimd.ExtractNarrowingUpper(lower12, pixel34.AsUInt16());
|
||||
Vector128<byte> pixel5678 = AdvSimd.ExtractNarrowingUpper(lower56, pixel78.AsUInt16());
|
||||
|
||||
AdvSimd.Store(op + 0x00, pixel1234);
|
||||
AdvSimd.Store(op + 0x10, pixel5678);
|
||||
|
||||
op += 0x20;
|
||||
}
|
||||
|
||||
for (; x < width; x++)
|
||||
{
|
||||
Pixel* px = ip + (uint)x;
|
||||
|
||||
*(op + 0) = Downsample(px->R);
|
||||
*(op + 1) = Downsample(px->G);
|
||||
*(op + 2) = Downsample(px->B);
|
||||
*(op + 3) = Downsample(px->A);
|
||||
|
||||
op += 4;
|
||||
}
|
||||
|
||||
op += strideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
@@ -302,6 +361,87 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (AdvSimd.IsSupported)
|
||||
{
|
||||
Vector128<ushort> mask = Vector128.Create(0xffffUL).AsUInt16();
|
||||
|
||||
int widthTrunc = width & ~0xf;
|
||||
int strideGap = yStride - width;
|
||||
|
||||
fixed (Pixel* srcPtr = input.Data)
|
||||
{
|
||||
Pixel* ip = srcPtr;
|
||||
|
||||
fixed (byte* dstPtr = dstY)
|
||||
{
|
||||
byte* op = dstPtr;
|
||||
|
||||
for (int y = 0; y < height; y++, ip += input.Width)
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for (; x < widthTrunc; x += 16)
|
||||
{
|
||||
byte* baseOffset = (byte*)(ip + (ulong)(uint)x);
|
||||
|
||||
Vector128<ushort> pixelp1 = AdvSimd.LoadVector128((ushort*)baseOffset);
|
||||
Vector128<ushort> pixelp2 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x10));
|
||||
Vector128<ushort> pixelp3 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x20));
|
||||
Vector128<ushort> pixelp4 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x30));
|
||||
Vector128<ushort> pixelp5 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x40));
|
||||
Vector128<ushort> pixelp6 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x50));
|
||||
Vector128<ushort> pixelp7 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x60));
|
||||
Vector128<ushort> pixelp8 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x70));
|
||||
|
||||
pixelp1 = AdvSimd.And(pixelp1, mask);
|
||||
pixelp2 = AdvSimd.And(pixelp2, mask);
|
||||
pixelp3 = AdvSimd.And(pixelp3, mask);
|
||||
pixelp4 = AdvSimd.And(pixelp4, mask);
|
||||
pixelp5 = AdvSimd.And(pixelp5, mask);
|
||||
pixelp6 = AdvSimd.And(pixelp6, mask);
|
||||
pixelp7 = AdvSimd.And(pixelp7, mask);
|
||||
pixelp8 = AdvSimd.And(pixelp8, mask);
|
||||
|
||||
Vector64<ushort> lowerp1 = AdvSimd.ExtractNarrowingLower(pixelp1.AsUInt32());
|
||||
Vector64<ushort> lowerp3 = AdvSimd.ExtractNarrowingLower(pixelp3.AsUInt32());
|
||||
Vector64<ushort> lowerp5 = AdvSimd.ExtractNarrowingLower(pixelp5.AsUInt32());
|
||||
Vector64<ushort> lowerp7 = AdvSimd.ExtractNarrowingLower(pixelp7.AsUInt32());
|
||||
|
||||
Vector128<ushort> pixelq1 = AdvSimd.ExtractNarrowingUpper(lowerp1, pixelp2.AsUInt32());
|
||||
Vector128<ushort> pixelq2 = AdvSimd.ExtractNarrowingUpper(lowerp3, pixelp4.AsUInt32());
|
||||
Vector128<ushort> pixelq3 = AdvSimd.ExtractNarrowingUpper(lowerp5, pixelp6.AsUInt32());
|
||||
Vector128<ushort> pixelq4 = AdvSimd.ExtractNarrowingUpper(lowerp7, pixelp8.AsUInt32());
|
||||
|
||||
Vector64<ushort> lowerq1 = AdvSimd.ExtractNarrowingLower(pixelq1.AsUInt32());
|
||||
Vector64<ushort> lowerq3 = AdvSimd.ExtractNarrowingLower(pixelq3.AsUInt32());
|
||||
|
||||
pixelq1 = AdvSimd.ExtractNarrowingUpper(lowerq1, pixelq2.AsUInt32());
|
||||
pixelq2 = AdvSimd.ExtractNarrowingUpper(lowerq3, pixelq4.AsUInt32());
|
||||
|
||||
pixelq1 = AdvSimd.ShiftRightLogical(pixelq1, 2);
|
||||
pixelq2 = AdvSimd.ShiftRightLogical(pixelq2, 2);
|
||||
|
||||
Vector64<byte> pixelLower = AdvSimd.ExtractNarrowingLower(pixelq1.AsUInt16());
|
||||
|
||||
Vector128<byte> pixel = AdvSimd.ExtractNarrowingUpper(pixelLower, pixelq2.AsUInt16());
|
||||
|
||||
AdvSimd.Store(op, pixel);
|
||||
|
||||
op += 0x10;
|
||||
}
|
||||
|
||||
for (; x < width; x++)
|
||||
{
|
||||
Pixel* px = ip + (uint)x;
|
||||
|
||||
*op++ = Downsample(px->R);
|
||||
}
|
||||
|
||||
op += strideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < height; y++)
|
||||
@@ -392,6 +532,69 @@ namespace Ryujinx.Graphics.Vic.Image
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (AdvSimd.Arm64.IsSupported)
|
||||
{
|
||||
int widthTrunc = uvWidth & ~7;
|
||||
int strideGap = uvStride - uvWidth * 2;
|
||||
|
||||
fixed (Pixel* srcPtr = input.Data)
|
||||
{
|
||||
Pixel* ip = srcPtr;
|
||||
|
||||
fixed (byte* dstPtr = dstUv)
|
||||
{
|
||||
byte* op = dstPtr;
|
||||
|
||||
for (int y = 0; y < uvHeight; y++, ip += input.Width * 2)
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for (; x < widthTrunc; x += 8)
|
||||
{
|
||||
byte* baseOffset = (byte*)ip + (ulong)(uint)x * 16;
|
||||
|
||||
Vector128<uint> pixel1 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x02));
|
||||
Vector128<uint> pixel2 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x12));
|
||||
Vector128<uint> pixel3 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x22));
|
||||
Vector128<uint> pixel4 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x32));
|
||||
Vector128<uint> pixel5 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x42));
|
||||
Vector128<uint> pixel6 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x52));
|
||||
Vector128<uint> pixel7 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x62));
|
||||
Vector128<uint> pixel8 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x72));
|
||||
|
||||
Vector128<uint> pixel12 = AdvSimd.Arm64.ZipLow(pixel1, pixel2);
|
||||
Vector128<uint> pixel34 = AdvSimd.Arm64.ZipLow(pixel3, pixel4);
|
||||
Vector128<uint> pixel56 = AdvSimd.Arm64.ZipLow(pixel5, pixel6);
|
||||
Vector128<uint> pixel78 = AdvSimd.Arm64.ZipLow(pixel7, pixel8);
|
||||
|
||||
Vector128<ulong> pixel1234 = AdvSimd.Arm64.ZipLow(pixel12.AsUInt64(), pixel34.AsUInt64());
|
||||
Vector128<ulong> pixel5678 = AdvSimd.Arm64.ZipLow(pixel56.AsUInt64(), pixel78.AsUInt64());
|
||||
|
||||
pixel1234 = AdvSimd.ShiftRightLogical(pixel1234, 2);
|
||||
pixel5678 = AdvSimd.ShiftRightLogical(pixel5678, 2);
|
||||
|
||||
Vector64<byte> pixelLower = AdvSimd.ExtractNarrowingLower(pixel1234.AsUInt16());
|
||||
|
||||
Vector128<byte> pixel = AdvSimd.ExtractNarrowingUpper(pixelLower, pixel5678.AsUInt16());
|
||||
|
||||
AdvSimd.Store(op, pixel);
|
||||
|
||||
op += 0x10;
|
||||
}
|
||||
|
||||
for (; x < uvWidth; x++)
|
||||
{
|
||||
Pixel* px = ip + (uint)(x << 1);
|
||||
|
||||
*op++ = Downsample(px->G);
|
||||
*op++ = Downsample(px->B);
|
||||
}
|
||||
|
||||
op += strideGap;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int y = 0; y < uvHeight; y++)
|
||||
|
@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
_device = device;
|
||||
_attachments = new[] { view };
|
||||
_validColorAttachments = 1u;
|
||||
_validColorAttachments = isDepthStencil ? 0u : 1u;
|
||||
|
||||
Width = width;
|
||||
Height = height;
|
||||
@@ -46,7 +46,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
AttachmentSamples = new[] { samples };
|
||||
AttachmentFormats = new[] { format };
|
||||
AttachmentIndices = new[] { 0 };
|
||||
AttachmentIndices = isDepthStencil ? Array.Empty<int>() : new[] { 0 };
|
||||
|
||||
AttachmentsCount = 1;
|
||||
|
||||
|
@@ -24,6 +24,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
public readonly bool SupportsGeometryShaderPassthrough;
|
||||
public readonly bool SupportsSubgroupSizeControl;
|
||||
public readonly bool SupportsShaderInt8;
|
||||
public readonly bool SupportsShaderStencilExport;
|
||||
public readonly bool SupportsConditionalRendering;
|
||||
public readonly bool SupportsExtendedDynamicState;
|
||||
public readonly bool SupportsMultiView;
|
||||
@@ -48,6 +49,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
bool supportsGeometryShaderPassthrough,
|
||||
bool supportsSubgroupSizeControl,
|
||||
bool supportsShaderInt8,
|
||||
bool supportsShaderStencilExport,
|
||||
bool supportsConditionalRendering,
|
||||
bool supportsExtendedDynamicState,
|
||||
bool supportsMultiView,
|
||||
@@ -71,6 +73,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||
SupportsSubgroupSizeControl = supportsSubgroupSizeControl;
|
||||
SupportsShaderInt8 = supportsShaderInt8;
|
||||
SupportsShaderStencilExport = supportsShaderStencilExport;
|
||||
SupportsConditionalRendering = supportsConditionalRendering;
|
||||
SupportsExtendedDynamicState = supportsExtendedDynamicState;
|
||||
SupportsMultiView = supportsMultiView;
|
||||
|
@@ -33,6 +33,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
private readonly IProgram _programConvertIndirectData;
|
||||
private readonly IProgram _programColorCopyToNonMs;
|
||||
private readonly IProgram _programColorDrawToMs;
|
||||
private readonly IProgram _programDepthBlit;
|
||||
private readonly IProgram _programStencilBlit;
|
||||
|
||||
public HelperShader(VulkanRenderer gd, Device device)
|
||||
{
|
||||
@@ -42,13 +44,13 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
_samplerLinear = gd.CreateSampler(GAL.SamplerCreateInfo.Create(MinFilter.Linear, MagFilter.Linear));
|
||||
_samplerNearest = gd.CreateSampler(GAL.SamplerCreateInfo.Create(MinFilter.Nearest, MagFilter.Nearest));
|
||||
|
||||
var colorBlitVertexBindings = new ShaderBindings(
|
||||
var blitVertexBindings = new ShaderBindings(
|
||||
new[] { 1 },
|
||||
Array.Empty<int>(),
|
||||
Array.Empty<int>(),
|
||||
Array.Empty<int>());
|
||||
|
||||
var colorBlitFragmentBindings = new ShaderBindings(
|
||||
var blitFragmentBindings = new ShaderBindings(
|
||||
Array.Empty<int>(),
|
||||
Array.Empty<int>(),
|
||||
new[] { 0 },
|
||||
@@ -56,14 +58,14 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
_programColorBlit = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, colorBlitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitFragmentShaderSource, colorBlitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitFragmentShaderSource, blitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
_programColorBlitClearAlpha = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, colorBlitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitClearAlphaFragmentShaderSource, colorBlitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorBlitClearAlphaFragmentShaderSource, blitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
var colorClearFragmentBindings = new ShaderBindings(
|
||||
@@ -74,19 +76,19 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
_programColorClearF = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, colorBlitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearFFragmentShaderSource, colorClearFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
_programColorClearSI = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, colorBlitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearSIFragmentShaderSource, colorClearFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
_programColorClearUI = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, colorBlitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.ColorClearUIFragmentShaderSource, colorClearFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
@@ -151,6 +153,21 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ConvertIndirectDataShaderSource, convertIndirectDataBindings, ShaderStage.Compute, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
_programDepthBlit = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.DepthBlitFragmentShaderSource, blitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
|
||||
if (gd.Capabilities.SupportsShaderStencilExport)
|
||||
{
|
||||
_programStencilBlit = gd.CreateProgramWithMinimalLayout(new[]
|
||||
{
|
||||
new ShaderSource(ShaderBinaries.ColorBlitVertexShaderSource, blitVertexBindings, ShaderStage.Vertex, TargetLanguage.Spirv),
|
||||
new ShaderSource(ShaderBinaries.StencilBlitFragmentShaderSource, blitFragmentBindings, ShaderStage.Fragment, TargetLanguage.Spirv),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public void Blit(
|
||||
@@ -162,6 +179,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
VkFormat dstFormat,
|
||||
Extents2D srcRegion,
|
||||
Extents2D dstRegion,
|
||||
bool isDepthOrStencil,
|
||||
bool linearFilter,
|
||||
bool clearAlpha = false)
|
||||
{
|
||||
@@ -169,10 +187,17 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
using var cbs = gd.CommandBufferPool.Rent();
|
||||
|
||||
Blit(gd, cbs, src, dst, dstWidth, dstHeight, dstFormat, srcRegion, dstRegion, linearFilter, clearAlpha);
|
||||
if (isDepthOrStencil)
|
||||
{
|
||||
BlitDepthStencil(gd, cbs, src, dst, dstWidth, dstHeight, dstFormat, srcRegion, dstRegion);
|
||||
}
|
||||
else
|
||||
{
|
||||
BlitColor(gd, cbs, src, dst, dstWidth, dstHeight, dstFormat, srcRegion, dstRegion, linearFilter, clearAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
public void Blit(
|
||||
public void BlitColor(
|
||||
VulkanRenderer gd,
|
||||
CommandBufferScoped cbs,
|
||||
TextureView src,
|
||||
@@ -255,6 +280,173 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
gd.BufferManager.Delete(bufferHandle);
|
||||
}
|
||||
|
||||
private void BlitDepthStencil(
|
||||
VulkanRenderer gd,
|
||||
CommandBufferScoped cbs,
|
||||
TextureView src,
|
||||
Auto<DisposableImageView> dst,
|
||||
int dstWidth,
|
||||
int dstHeight,
|
||||
VkFormat dstFormat,
|
||||
Extents2D srcRegion,
|
||||
Extents2D dstRegion)
|
||||
{
|
||||
_pipeline.SetCommandBuffer(cbs);
|
||||
|
||||
const int RegionBufferSize = 16;
|
||||
|
||||
Span<float> region = stackalloc float[RegionBufferSize / sizeof(float)];
|
||||
|
||||
region[0] = (float)srcRegion.X1 / src.Width;
|
||||
region[1] = (float)srcRegion.X2 / src.Width;
|
||||
region[2] = (float)srcRegion.Y1 / src.Height;
|
||||
region[3] = (float)srcRegion.Y2 / src.Height;
|
||||
|
||||
if (dstRegion.X1 > dstRegion.X2)
|
||||
{
|
||||
(region[0], region[1]) = (region[1], region[0]);
|
||||
}
|
||||
|
||||
if (dstRegion.Y1 > dstRegion.Y2)
|
||||
{
|
||||
(region[2], region[3]) = (region[3], region[2]);
|
||||
}
|
||||
|
||||
var bufferHandle = gd.BufferManager.CreateWithHandle(gd, RegionBufferSize, false);
|
||||
|
||||
gd.BufferManager.SetData<float>(bufferHandle, 0, region);
|
||||
|
||||
_pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(1, new BufferRange(bufferHandle, 0, RegionBufferSize)) });
|
||||
|
||||
Span<GAL.Viewport> viewports = stackalloc GAL.Viewport[1];
|
||||
|
||||
var rect = new Rectangle<float>(
|
||||
MathF.Min(dstRegion.X1, dstRegion.X2),
|
||||
MathF.Min(dstRegion.Y1, dstRegion.Y2),
|
||||
MathF.Abs(dstRegion.X2 - dstRegion.X1),
|
||||
MathF.Abs(dstRegion.Y2 - dstRegion.Y1));
|
||||
|
||||
viewports[0] = new GAL.Viewport(
|
||||
rect,
|
||||
ViewportSwizzle.PositiveX,
|
||||
ViewportSwizzle.PositiveY,
|
||||
ViewportSwizzle.PositiveZ,
|
||||
ViewportSwizzle.PositiveW,
|
||||
0f,
|
||||
1f);
|
||||
|
||||
Span<Rectangle<int>> scissors = stackalloc Rectangle<int>[1];
|
||||
|
||||
scissors[0] = new Rectangle<int>(0, 0, dstWidth, dstHeight);
|
||||
|
||||
_pipeline.SetRenderTarget(dst, (uint)dstWidth, (uint)dstHeight, true, dstFormat);
|
||||
_pipeline.SetScissors(scissors);
|
||||
_pipeline.SetViewports(viewports, false);
|
||||
_pipeline.SetPrimitiveTopology(GAL.PrimitiveTopology.TriangleStrip);
|
||||
|
||||
var aspectFlags = src.Info.Format.ConvertAspectFlags();
|
||||
|
||||
if (aspectFlags.HasFlag(ImageAspectFlags.DepthBit))
|
||||
{
|
||||
var depthTexture = CreateDepthOrStencilView(src, DepthStencilMode.Depth);
|
||||
|
||||
BlitDepthStencilDraw(depthTexture, isDepth: true);
|
||||
|
||||
if (depthTexture != src)
|
||||
{
|
||||
depthTexture.Release();
|
||||
}
|
||||
}
|
||||
|
||||
if (aspectFlags.HasFlag(ImageAspectFlags.StencilBit) && _programStencilBlit != null)
|
||||
{
|
||||
var stencilTexture = CreateDepthOrStencilView(src, DepthStencilMode.Stencil);
|
||||
|
||||
BlitDepthStencilDraw(stencilTexture, isDepth: false);
|
||||
|
||||
if (stencilTexture != src)
|
||||
{
|
||||
stencilTexture.Release();
|
||||
}
|
||||
}
|
||||
|
||||
_pipeline.Finish(gd, cbs);
|
||||
|
||||
gd.BufferManager.Delete(bufferHandle);
|
||||
}
|
||||
|
||||
private static TextureView CreateDepthOrStencilView(TextureView depthStencilTexture, DepthStencilMode depthStencilMode)
|
||||
{
|
||||
if (depthStencilTexture.Info.DepthStencilMode == depthStencilMode)
|
||||
{
|
||||
return depthStencilTexture;
|
||||
}
|
||||
|
||||
return (TextureView)depthStencilTexture.CreateView(new TextureCreateInfo(
|
||||
depthStencilTexture.Info.Width,
|
||||
depthStencilTexture.Info.Height,
|
||||
depthStencilTexture.Info.Depth,
|
||||
depthStencilTexture.Info.Levels,
|
||||
depthStencilTexture.Info.Samples,
|
||||
depthStencilTexture.Info.BlockWidth,
|
||||
depthStencilTexture.Info.BlockHeight,
|
||||
depthStencilTexture.Info.BytesPerPixel,
|
||||
depthStencilTexture.Info.Format,
|
||||
depthStencilMode,
|
||||
depthStencilTexture.Info.Target,
|
||||
SwizzleComponent.Red,
|
||||
SwizzleComponent.Green,
|
||||
SwizzleComponent.Blue,
|
||||
SwizzleComponent.Alpha), 0, 0);
|
||||
}
|
||||
|
||||
private void BlitDepthStencilDraw(TextureView src, bool isDepth)
|
||||
{
|
||||
_pipeline.SetTextureAndSampler(ShaderStage.Fragment, 0, src, _samplerNearest);
|
||||
|
||||
if (isDepth)
|
||||
{
|
||||
_pipeline.SetProgram(_programDepthBlit);
|
||||
_pipeline.SetDepthTest(new DepthTestDescriptor(true, true, GAL.CompareOp.Always));
|
||||
}
|
||||
else
|
||||
{
|
||||
_pipeline.SetProgram(_programStencilBlit);
|
||||
_pipeline.SetStencilTest(CreateStencilTestDescriptor(true));
|
||||
}
|
||||
|
||||
_pipeline.Draw(4, 1, 0, 0);
|
||||
|
||||
if (isDepth)
|
||||
{
|
||||
_pipeline.SetDepthTest(new DepthTestDescriptor(false, false, GAL.CompareOp.Always));
|
||||
}
|
||||
else
|
||||
{
|
||||
_pipeline.SetStencilTest(CreateStencilTestDescriptor(false));
|
||||
}
|
||||
}
|
||||
|
||||
private static StencilTestDescriptor CreateStencilTestDescriptor(bool enabled)
|
||||
{
|
||||
return new StencilTestDescriptor(
|
||||
enabled,
|
||||
GAL.CompareOp.Always,
|
||||
GAL.StencilOp.Replace,
|
||||
GAL.StencilOp.Replace,
|
||||
GAL.StencilOp.Replace,
|
||||
0,
|
||||
0xff,
|
||||
0xff,
|
||||
GAL.CompareOp.Always,
|
||||
GAL.StencilOp.Replace,
|
||||
GAL.StencilOp.Replace,
|
||||
GAL.StencilOp.Replace,
|
||||
0,
|
||||
0xff,
|
||||
0xff);
|
||||
}
|
||||
|
||||
public void Clear(
|
||||
VulkanRenderer gd,
|
||||
Auto<DisposableImageView> dst,
|
||||
@@ -993,6 +1185,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
_programConvertIndirectData.Dispose();
|
||||
_programColorCopyToNonMs.Dispose();
|
||||
_programColorDrawToMs.Dispose();
|
||||
_programDepthBlit.Dispose();
|
||||
_programStencilBlit?.Dispose();
|
||||
_samplerNearest.Dispose();
|
||||
_samplerLinear.Dispose();
|
||||
_pipeline.Dispose();
|
||||
|
@@ -0,0 +1,10 @@
|
||||
#version 450 core
|
||||
|
||||
layout (binding = 0, set = 2) uniform sampler2D texDepth;
|
||||
|
||||
layout (location = 0) in vec2 tex_coord;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_FragDepth = texture(texDepth, tex_coord).r;
|
||||
}
|
@@ -1459,5 +1459,95 @@ namespace Ryujinx.Graphics.Vulkan.Shaders
|
||||
0x3B, 0x01, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xE3, 0x00, 0x00, 0x00,
|
||||
0xF8, 0x00, 0x02, 0x00, 0xE5, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
|
||||
public static readonly byte[] DepthBlitFragmentShaderSource = new byte[]
|
||||
{
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0B, 0x00, 0x08, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0F, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x06, 0x00, 0x08, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x46, 0x72, 0x61, 0x67, 0x44,
|
||||
0x65, 0x70, 0x74, 0x68, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x0C, 0x00, 0x00, 0x00,
|
||||
0x74, 0x65, 0x78, 0x44, 0x65, 0x70, 0x74, 0x68, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F, 0x63, 0x6F, 0x6F, 0x72, 0x64, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x09, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x1B, 0x00, 0x03, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00,
|
||||
0x3B, 0x00, 0x04, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x04, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00,
|
||||
0x3B, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
|
||||
0x0A, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x08, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
|
||||
public static readonly byte[] StencilBlitFragmentShaderSource = new byte[]
|
||||
{
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0B, 0x00, 0x08, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x95, 0x13, 0x00, 0x00, 0x0A, 0x00, 0x09, 0x00, 0x53, 0x50, 0x56, 0x5F, 0x45, 0x58, 0x54, 0x5F,
|
||||
0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x73, 0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x65,
|
||||
0x78, 0x70, 0x6F, 0x72, 0x74, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x4C, 0x53, 0x4C, 0x2E, 0x73, 0x74, 0x64, 0x2E, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x07, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0xA3, 0x13, 0x00, 0x00,
|
||||
0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0xC2, 0x01, 0x00, 0x00, 0x04, 0x00, 0x09, 0x00,
|
||||
0x47, 0x4C, 0x5F, 0x41, 0x52, 0x42, 0x5F, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x5F, 0x73, 0x74,
|
||||
0x65, 0x6E, 0x63, 0x69, 0x6C, 0x5F, 0x65, 0x78, 0x70, 0x6F, 0x72, 0x74, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x46, 0x72, 0x61, 0x67, 0x53,
|
||||
0x74, 0x65, 0x6E, 0x63, 0x69, 0x6C, 0x52, 0x65, 0x66, 0x41, 0x52, 0x42, 0x00, 0x00, 0x00, 0x00,
|
||||
0x05, 0x00, 0x05, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x53, 0x74, 0x65, 0x6E, 0x63,
|
||||
0x69, 0x6C, 0x00, 0x00, 0x05, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x5F,
|
||||
0x63, 0x6F, 0x6F, 0x72, 0x64, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x96, 0x13, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0C, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x1B, 0x00, 0x03, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
|
||||
0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00,
|
||||
0x0E, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x00, 0x00, 0x00,
|
||||
0x0C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x0D, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
}
|
||||
}
|
@@ -0,0 +1,12 @@
|
||||
#version 450 core
|
||||
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout (binding = 0, set = 2) uniform isampler2D texStencil;
|
||||
|
||||
layout (location = 0) in vec2 tex_coord;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_FragStencilRefARB = texture(texStencil, tex_coord).r;
|
||||
}
|
@@ -362,21 +362,16 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
levels,
|
||||
linearFilter);
|
||||
|
||||
return;
|
||||
}
|
||||
else if (srcFormat == GAL.Format.D32FloatS8Uint && srcFormat == dstFormat && SupportsBlitFromD32FS8ToD32FAndS8())
|
||||
{
|
||||
BlitDepthStencilWithBuffer(_gd, cbs, src, dst, srcRegion, dstRegion);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool isDepthOrStencil = dst.Info.Format.IsDepthOrStencil();
|
||||
|
||||
if (VulkanConfiguration.UseSlowSafeBlitOnAmd &&
|
||||
(_gd.Vendor == Vendor.Amd || _gd.IsMoltenVk) &&
|
||||
src.Info.Target == Target.Texture2D &&
|
||||
dst.Info.Target == Target.Texture2D &&
|
||||
!dst.Info.Format.IsDepthOrStencil())
|
||||
dst.Info.Target == Target.Texture2D)
|
||||
{
|
||||
_gd.HelperShader.Blit(
|
||||
_gd,
|
||||
@@ -387,6 +382,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
dst.VkFormat,
|
||||
srcRegion,
|
||||
dstRegion,
|
||||
isDepthOrStencil,
|
||||
linearFilter);
|
||||
|
||||
return;
|
||||
@@ -395,7 +391,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
Auto<DisposableImage> srcImage;
|
||||
Auto<DisposableImage> dstImage;
|
||||
|
||||
if (dst.Info.Format.IsDepthOrStencil())
|
||||
if (isDepthOrStencil)
|
||||
{
|
||||
srcImage = src.Storage.CreateAliasedColorForDepthStorageUnsafe(srcFormat).GetImage();
|
||||
dstImage = dst.Storage.CreateAliasedColorForDepthStorageUnsafe(dstFormat).GetImage();
|
||||
@@ -426,189 +422,6 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
ImageAspectFlags.ColorBit);
|
||||
}
|
||||
|
||||
private static void BlitDepthStencilWithBuffer(
|
||||
VulkanRenderer gd,
|
||||
CommandBufferScoped cbs,
|
||||
TextureView src,
|
||||
TextureView dst,
|
||||
Extents2D srcRegion,
|
||||
Extents2D dstRegion)
|
||||
{
|
||||
int drBaseX = Math.Min(dstRegion.X1, dstRegion.X2);
|
||||
int drBaseY = Math.Min(dstRegion.Y1, dstRegion.Y2);
|
||||
int drWidth = Math.Abs(dstRegion.X2 - dstRegion.X1);
|
||||
int drHeight = Math.Abs(dstRegion.Y2 - dstRegion.Y1);
|
||||
|
||||
var drOriginZero = new Extents2D(
|
||||
dstRegion.X1 - drBaseX,
|
||||
dstRegion.Y1 - drBaseY,
|
||||
dstRegion.X2 - drBaseX,
|
||||
dstRegion.Y2 - drBaseY);
|
||||
|
||||
var d32SrcStorageInfo = TextureStorage.NewCreateInfoWith(ref src._info, GAL.Format.D32Float, 4);
|
||||
var d32DstStorageInfo = TextureStorage.NewCreateInfoWith(ref dst._info, GAL.Format.D32Float, 4, drWidth, drHeight);
|
||||
var s8SrcStorageInfo = TextureStorage.NewCreateInfoWith(ref src._info, GAL.Format.S8Uint, 1);
|
||||
var s8DstStorageInfo = TextureStorage.NewCreateInfoWith(ref dst._info, GAL.Format.S8Uint, 1, drWidth, drHeight);
|
||||
|
||||
using var d32SrcStorage = gd.CreateTextureStorage(d32SrcStorageInfo, src.Storage.ScaleFactor);
|
||||
using var d32DstStorage = gd.CreateTextureStorage(d32DstStorageInfo, dst.Storage.ScaleFactor);
|
||||
using var s8SrcStorage = gd.CreateTextureStorage(s8SrcStorageInfo, src.Storage.ScaleFactor);
|
||||
using var s8DstStorage = gd.CreateTextureStorage(s8DstStorageInfo, dst.Storage.ScaleFactor);
|
||||
|
||||
void SlowBlit(TextureStorage srcTemp, TextureStorage dstTemp, ImageAspectFlags aspectFlags)
|
||||
{
|
||||
int levels = Math.Min(src.Info.Levels, dst.Info.Levels);
|
||||
|
||||
int srcSize = 0;
|
||||
int dstSize = 0;
|
||||
|
||||
for (int l = 0; l < levels; l++)
|
||||
{
|
||||
srcSize += srcTemp.Info.GetMipSize2D(l);
|
||||
dstSize += dstTemp.Info.GetMipSize2D(l);
|
||||
}
|
||||
|
||||
using var srcTempBuffer = gd.BufferManager.Create(gd, srcSize, deviceLocal: true);
|
||||
using var dstTempBuffer = gd.BufferManager.Create(gd, dstSize, deviceLocal: true);
|
||||
|
||||
src.Storage.CopyFromOrToBuffer(
|
||||
cbs.CommandBuffer,
|
||||
srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value,
|
||||
src.GetImage().Get(cbs).Value,
|
||||
srcSize,
|
||||
to: true,
|
||||
0,
|
||||
0,
|
||||
src.FirstLayer,
|
||||
src.FirstLevel,
|
||||
1,
|
||||
levels,
|
||||
true,
|
||||
aspectFlags,
|
||||
false);
|
||||
|
||||
BufferHolder.InsertBufferBarrier(
|
||||
gd,
|
||||
cbs.CommandBuffer,
|
||||
srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value,
|
||||
AccessFlags.TransferWriteBit,
|
||||
AccessFlags.TransferReadBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
0,
|
||||
srcSize);
|
||||
|
||||
srcTemp.CopyFromOrToBuffer(
|
||||
cbs.CommandBuffer,
|
||||
srcTempBuffer.GetBuffer().Get(cbs, 0, srcSize).Value,
|
||||
srcTemp.GetImage().Get(cbs).Value,
|
||||
srcSize,
|
||||
to: false,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
levels,
|
||||
true,
|
||||
aspectFlags,
|
||||
false);
|
||||
|
||||
InsertImageBarrier(
|
||||
gd.Api,
|
||||
cbs.CommandBuffer,
|
||||
srcTemp.GetImage().Get(cbs).Value,
|
||||
AccessFlags.TransferWriteBit,
|
||||
AccessFlags.TransferReadBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
aspectFlags,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
levels);
|
||||
|
||||
TextureCopy.Blit(
|
||||
gd.Api,
|
||||
cbs.CommandBuffer,
|
||||
srcTemp.GetImage().Get(cbs).Value,
|
||||
dstTemp.GetImage().Get(cbs).Value,
|
||||
srcTemp.Info,
|
||||
dstTemp.Info,
|
||||
srcRegion,
|
||||
drOriginZero,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
levels,
|
||||
false,
|
||||
aspectFlags,
|
||||
aspectFlags);
|
||||
|
||||
InsertImageBarrier(
|
||||
gd.Api,
|
||||
cbs.CommandBuffer,
|
||||
dstTemp.GetImage().Get(cbs).Value,
|
||||
AccessFlags.TransferWriteBit,
|
||||
AccessFlags.TransferReadBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
aspectFlags,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
levels);
|
||||
|
||||
dstTemp.CopyFromOrToBuffer(
|
||||
cbs.CommandBuffer,
|
||||
dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value,
|
||||
dstTemp.GetImage().Get(cbs).Value,
|
||||
dstSize,
|
||||
to: true,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
levels,
|
||||
true,
|
||||
aspectFlags,
|
||||
false);
|
||||
|
||||
BufferHolder.InsertBufferBarrier(
|
||||
gd,
|
||||
cbs.CommandBuffer,
|
||||
dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value,
|
||||
AccessFlags.TransferWriteBit,
|
||||
AccessFlags.TransferReadBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
PipelineStageFlags.TransferBit,
|
||||
0,
|
||||
dstSize);
|
||||
|
||||
dst.Storage.CopyFromOrToBuffer(
|
||||
cbs.CommandBuffer,
|
||||
dstTempBuffer.GetBuffer().Get(cbs, 0, dstSize).Value,
|
||||
dst.GetImage().Get(cbs).Value,
|
||||
dstSize,
|
||||
to: false,
|
||||
drBaseX,
|
||||
drBaseY,
|
||||
dst.FirstLayer,
|
||||
dst.FirstLevel,
|
||||
1,
|
||||
levels,
|
||||
true,
|
||||
aspectFlags,
|
||||
false);
|
||||
}
|
||||
|
||||
SlowBlit(d32SrcStorage, d32DstStorage, ImageAspectFlags.DepthBit);
|
||||
SlowBlit(s8SrcStorage, s8DstStorage, ImageAspectFlags.StencilBit);
|
||||
}
|
||||
|
||||
public static unsafe void InsertImageBarrier(
|
||||
Vk api,
|
||||
CommandBuffer commandBuffer,
|
||||
@@ -649,13 +462,6 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
memoryBarrier);
|
||||
}
|
||||
|
||||
private bool SupportsBlitFromD32FS8ToD32FAndS8()
|
||||
{
|
||||
var formatFeatureFlags = FormatFeatureFlags.BlitSrcBit | FormatFeatureFlags.BlitDstBit;
|
||||
return _gd.FormatCapabilities.OptimalFormatSupports(formatFeatureFlags, GAL.Format.D32Float) &&
|
||||
_gd.FormatCapabilities.OptimalFormatSupports(formatFeatureFlags, GAL.Format.S8Uint);
|
||||
}
|
||||
|
||||
public TextureView GetView(GAL.Format format)
|
||||
{
|
||||
if (format == Info.Format)
|
||||
|
@@ -29,6 +29,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
"VK_EXT_fragment_shader_interlock",
|
||||
"VK_EXT_index_type_uint8",
|
||||
"VK_EXT_robustness2",
|
||||
"VK_EXT_shader_stencil_export",
|
||||
"VK_KHR_shader_float16_int8",
|
||||
"VK_EXT_shader_subgroup_ballot",
|
||||
"VK_EXT_subgroup_size_control",
|
||||
|
@@ -263,6 +263,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
supportedExtensions.Contains("VK_NV_geometry_shader_passthrough"),
|
||||
supportedExtensions.Contains("VK_EXT_subgroup_size_control"),
|
||||
featuresShaderInt8.ShaderInt8,
|
||||
supportedExtensions.Contains("VK_EXT_shader_stencil_export"),
|
||||
supportedExtensions.Contains(ExtConditionalRendering.ExtensionName),
|
||||
supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName),
|
||||
features2.Features.MultiViewport,
|
||||
|
@@ -335,7 +335,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
int dstY0 = crop.FlipY ? dstPaddingY : _height - dstPaddingY;
|
||||
int dstY1 = crop.FlipY ? _height - dstPaddingY : dstPaddingY;
|
||||
|
||||
_gd.HelperShader.Blit(
|
||||
_gd.HelperShader.BlitColor(
|
||||
_gd,
|
||||
cbs,
|
||||
view,
|
||||
|
@@ -137,7 +137,7 @@ namespace Ryujinx.HLE.HOS.Services.Account.Acc
|
||||
|
||||
return resultCode;
|
||||
}
|
||||
|
||||
|
||||
[CommandHipc(110)]
|
||||
// StoreSaveDataThumbnail(nn::account::Uid, buffer<bytes, 5>)
|
||||
public ResultCode StoreSaveDataThumbnail(ServiceCtx context)
|
||||
@@ -153,10 +153,17 @@ namespace Ryujinx.HLE.HOS.Services.Account.Acc
|
||||
}
|
||||
|
||||
[CommandHipc(130)] // 5.0.0+
|
||||
// LoadOpenContext(nn::account::Uid)
|
||||
// LoadOpenContext(nn::account::Uid) -> object<nn::account::baas::IManagerForApplication>
|
||||
public ResultCode LoadOpenContext(ServiceCtx context)
|
||||
{
|
||||
Logger.Stub?.PrintStub(LogClass.ServiceAcc);
|
||||
ResultCode resultCode = _applicationServiceServer.CheckUserId(context, out UserId userId);
|
||||
|
||||
if (resultCode != ResultCode.Success)
|
||||
{
|
||||
return resultCode;
|
||||
}
|
||||
|
||||
MakeObject(context, new IManagerForApplication(userId));
|
||||
|
||||
return ResultCode.Success;
|
||||
}
|
||||
|
Reference in New Issue
Block a user