Compare commits

...

8 Commits

Author SHA1 Message Date
gdkchan
9ecbee8032 Batch inline index buffer update (#4587) 2023-03-24 14:19:54 +01:00
gdkchan
80519af67d Update short cache textures if modified (#4586) 2023-03-24 12:54:58 +01:00
gdkchan
26e30faff3 Fix handle leak on IShopServiceAccessServerInterface.CreateServerInterface (#4591) 2023-03-24 11:56:54 +01:00
Wunk
0992310b76 ARMeilleure: Check for XSAVE cpuid flag for AVX{2,512} (#4584)
Protection for the `xgetbv` instruction for systems that do not support
`xcr0` such as nehalem processors.

The `XSAVE` cpuid indicates support for `XSAVE`, `XRESTOR`, `XSETBV`,
`XGETBV` while `OSXSAVE` indicates if the operating system itself has
`XSAVE` turned on. Both must be checked at the same time.
2023-03-22 14:51:21 -03:00
Andrew Glaze
009c1101d2 CI: add a version tag to correlate release versions with commits (#4572)
* add step to tag commit with release version

* add step to tag commit with release version

* Rename step to “Create Tag”

* Fix name
2023-03-22 13:17:28 +01:00
gdkchan
ba95ee54ab Revert "Use source generated json serializers in order to improve code trimming (#4094)" (#4576)
This reverts commit 4ce4299ca2.
2023-03-21 20:14:46 -03:00
Andrey Sukharev
4ce4299ca2 Use source generated json serializers in order to improve code trimming (#4094)
* Use source generated json serializers in order to improve code trimming

* Use strongly typed github releases model to fetch updates instead of raw Newtonsoft.Json parsing

* Use separate model for LogEventArgs serialization

* Make dynamic object formatter static. Fix string builder pooling.

* Do not inherit json version of LogEventArgs from EventArgs

* Fix extra space in object formatting

* Write log json directly to stream instead of using buffer writer

* Rebase fixes

* Rebase fixes

* Rebase fixes

* Enforce block-scoped namespaces in the solution. Convert style for existing code

* Apply suggestions from code review

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Rebase indent fix

* Fix indent

* Delete unnecessary json properties

* Rebase fix

* Remove overridden json property names as they are handled in the options

* Apply suggestions from code review

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Use default json options in github api calls

* Indentation and spacing fixes

---------

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2023-03-21 19:41:19 -03:00
Wunk
17620d18db ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection

Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.

* ARMeilleure: Add initial support for EVEX instruction encoding

Does not implement rounding, or exception controls.

* ARMeilleure: Add `X86Vpternlogd`

Accelerates the vector-`Not` instruction.

* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}

* ARMeilleure: Add check for `XCR0` flags

Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.

* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting

* ARMeilleure: Move XCR0 procedure to GetXcr0Eax

* ARMeilleure: Add `XCR0` to `FeatureInfo` structure

* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly

Avoids an additional allocation

* ARMeilleure: Formatting fixes

* ARMeilleure: Fix EVEX encoding src2 register index

> Just like in VEX prefix, vvvv is provided in inverted form.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`

Passes unit tests, verified instruction utilization

* ARMeilleure: Fix EVEX register operand designations

Operand 2 was being sourced improperly.

EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm

This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`

* ARMeilleure: PTC version bump

* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail

* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 16:09:24 -03:00
17 changed files with 324 additions and 37 deletions

View File

@@ -112,6 +112,17 @@ jobs:
repo: ${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_REPO }}
token: ${{ secrets.RELEASE_TOKEN }}
- name: Create tag
uses: actions/github-script@v5
with:
script: |
github.rest.git.createRef({
owner: context.repo.owner,
repo: context.repo.repo,
ref: 'refs/tags/${{ steps.version_info.outputs.build_version }}',
sha: context.sha
})
flatpak_release:
uses: ./.github/workflows/flatpak.yml
needs: release

View File

@@ -7,6 +7,7 @@
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
<ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" />
</ItemGroup>
<ItemGroup>

View File

@@ -1034,7 +1034,13 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
{
WriteEvexInst(dest, src1, src2, type, flags, opCode);
opCode &= 0xff;
}
else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{
// In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
@@ -1153,6 +1159,103 @@ namespace ARMeilleure.CodeGen.X86
}
}
private void WriteEvexInst(
Operand dest,
Operand src1,
Operand src2,
OperandType type,
InstructionFlags flags,
int opCode,
bool broadcast = false,
int registerWidth = 128,
int maskRegisterIdx = 0,
bool zeroElements = false)
{
int op1Idx = dest.GetRegister().Index;
int op2Idx = src1.GetRegister().Index;
int op3Idx = src2.GetRegister().Index;
WriteByte(0x62);
// P0
// Extend operand 1 register
bool r = (op1Idx & 8) == 0;
// Extend operand 3 register
bool x = (op3Idx & 16) == 0;
// Extend operand 3 register
bool b = (op3Idx & 8) == 0;
// Extend operand 1 register
bool rp = (op1Idx & 16) == 0;
// Escape code index
byte mm = 0b00;
switch ((ushort)(opCode >> 8))
{
case 0xf00: mm = 0b01; break;
case 0xf38: mm = 0b10; break;
case 0xf3a: mm = 0b11; break;
default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
}
WriteByte(
(byte)(
(r ? 0x80 : 0) |
(x ? 0x40 : 0) |
(b ? 0x20 : 0) |
(rp ? 0x10 : 0) |
mm));
// P1
// Specify 64-bit lane mode
bool w = Is64Bits(type);
// Operand 2 register index
byte vvvv = (byte)(~op2Idx & 0b1111);
// Opcode prefix
byte pp = (flags & InstructionFlags.PrefixMask) switch
{
InstructionFlags.Prefix66 => 0b01,
InstructionFlags.PrefixF3 => 0b10,
InstructionFlags.PrefixF2 => 0b11,
_ => 0
};
WriteByte(
(byte)(
(w ? 0x80 : 0) |
(vvvv << 3) |
0b100 |
pp));
// P2
// Mask register determines what elements to zero, rather than what elements to merge
bool z = zeroElements;
// Specifies register-width
byte ll = 0b00;
switch (registerWidth)
{
case 128: ll = 0b00; break;
case 256: ll = 0b01; break;
case 512: ll = 0b10; break;
default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
}
// Embedded broadcast in the case of a memory operand
bool bcast = broadcast;
// Extend operand 2 register
bool vp = (op2Idx & 16) == 0;
// Mask register index
Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
byte aaa = (byte)(maskRegisterIdx & 0b111);
WriteByte(
(byte)(
(z ? 0x80 : 0) |
(ll << 5) |
(bcast ? 0x10 : 0) |
(vp ? 8 : 0) |
aaa));
}
private void WriteCompactInst(Operand operand, int opCode)
{
int regIndex = operand.GetRegister().Index;

View File

@@ -20,6 +20,7 @@ namespace ARMeilleure.CodeGen.X86
Reg8Dest = 1 << 2,
RexW = 1 << 3,
Vex = 1 << 4,
Evex = 1 << 5,
PrefixBit = 16,
PrefixMask = 7 << PrefixBit,
@@ -278,6 +279,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));

View File

@@ -1,10 +1,14 @@
using Ryujinx.Memory;
using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
namespace ARMeilleure.CodeGen.X86
{
static class HardwareCapabilities
{
private delegate uint GetXcr0();
static HardwareCapabilities()
{
if (!X86Base.IsSupported)
@@ -24,6 +28,34 @@ namespace ARMeilleure.CodeGen.X86
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
}
Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
}
private static uint GetXcr0Eax()
{
if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave))
{
// XSAVE feature required for xgetbv
return 0;
}
ReadOnlySpan<byte> asmGetXcr0 = new byte[]
{
0x31, 0xc9, // xor ecx, ecx
0xf, 0x01, 0xd0, // xgetbv
0xc3, // ret
};
using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
memGetXcr0.Write(0, asmGetXcr0);
memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
return fGetXcr0();
}
[Flags]
@@ -44,6 +76,8 @@ namespace ARMeilleure.CodeGen.X86
Sse42 = 1 << 20,
Popcnt = 1 << 23,
Aes = 1 << 25,
Xsave = 1 << 26,
Osxsave = 1 << 27,
Avx = 1 << 28,
F16c = 1 << 29
}
@@ -52,7 +86,11 @@ namespace ARMeilleure.CodeGen.X86
public enum FeatureFlags7Ebx
{
Avx2 = 1 << 5,
Sha = 1 << 29
Avx512f = 1 << 16,
Avx512dq = 1 << 17,
Sha = 1 << 29,
Avx512bw = 1 << 30,
Avx512vl = 1 << 31
}
[Flags]
@@ -61,10 +99,21 @@ namespace ARMeilleure.CodeGen.X86
Gfni = 1 << 8,
}
[Flags]
public enum Xcr0FlagsEax
{
Sse = 1 << 1,
YmmHi128 = 1 << 2,
Opmask = 1 << 5,
ZmmHi256 = 1 << 6,
Hi16Zmm = 1 << 7
}
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
@@ -76,8 +125,13 @@ namespace ARMeilleure.CodeGen.X86
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave)
&& Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
@@ -85,5 +139,6 @@ namespace ARMeilleure.CodeGen.X86
public static bool ForceLegacySse { get; set; }
public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
}
}

View File

@@ -180,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
}

View File

@@ -219,6 +219,7 @@ namespace ARMeilleure.CodeGen.X86
Vfnmsub231sd,
Vfnmsub231ss,
Vpblendvb,
Vpternlogd,
Xor,
Xorpd,
Xorps,

View File

@@ -254,7 +254,22 @@ namespace ARMeilleure.Instructions
public static void Not_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAvx512Ortho)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else if (Optimizations.UseSse2)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
@@ -283,6 +298,22 @@ namespace ARMeilleure.Instructions
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
}
else if (Optimizations.UseAvx512Ortho)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

View File

@@ -151,6 +151,13 @@ namespace ARMeilleure.Instructions
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseAvx512Ortho)
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
});
}
else if (Optimizations.UseSse2)
{
Operand mask = context.VectorOne();

View File

@@ -34,7 +34,14 @@ namespace ARMeilleure.Instructions
public static void Vmvn_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAvx512Ortho)
{
EmitVectorUnaryOpSimd32(context, (op1) =>
{
return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101));
});
}
else if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (op1) =>
{

View File

@@ -173,6 +173,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Vfnmadd231ss,
X86Vfnmsub231sd,
X86Vfnmsub231ss,
X86Vpternlogd,
X86Xorpd,
X86Xorps,

View File

@@ -23,6 +23,10 @@ namespace ARMeilleure
public static bool UseSse42IfAvailable { get; set; } = true;
public static bool UsePopCntIfAvailable { get; set; } = true;
public static bool UseAvxIfAvailable { get; set; } = true;
public static bool UseAvx512FIfAvailable { get; set; } = true;
public static bool UseAvx512VlIfAvailable { get; set; } = true;
public static bool UseAvx512BwIfAvailable { get; set; } = true;
public static bool UseAvx512DqIfAvailable { get; set; } = true;
public static bool UseF16cIfAvailable { get; set; } = true;
public static bool UseFmaIfAvailable { get; set; } = true;
public static bool UseAesniIfAvailable { get; set; } = true;
@@ -47,11 +51,18 @@ namespace ARMeilleure
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse;
internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse;
internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse;
internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse;
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl;
internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq;
}
}

View File

@@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 4484; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@@ -969,6 +969,7 @@ namespace ARMeilleure.Translation.PTC
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
0,
0);
}
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
@@ -977,11 +978,12 @@ namespace ARMeilleure.Translation.PTC
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
(ulong)X86HardwareCapabilities.FeatureInfo1Edx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx);
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx,
(ulong)X86HardwareCapabilities.Xcr0InfoEax);
}
else
{
return new FeatureInfo(0, 0, 0, 0);
return new FeatureInfo(0, 0, 0, 0, 0);
}
}
@@ -1002,7 +1004,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform;
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 78*/)]
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)]
private struct OuterHeader
{
public ulong Magic;
@@ -1034,8 +1036,8 @@ namespace ARMeilleure.Translation.PTC
}
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)]
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)]
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader

View File

@@ -180,7 +180,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
int firstInstance = (int)_state.State.FirstInstance;
int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount();
int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer);
if (inlineIndexCount != 0)
{
@@ -670,7 +670,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
if (indexedInline)
{
int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount();
int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer);
BufferRange br = new BufferRange(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4);
_channel.BufferManager.SetIndexBuffer(br, IndexType.UInt);

View File

@@ -11,9 +11,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
struct IbStreamer
{
private const int BufferCapacity = 256; // Must be a power of 2.
private BufferHandle _inlineIndexBuffer;
private int _inlineIndexBufferSize;
private int _inlineIndexCount;
private uint[] _buffer;
private int _bufferOffset;
/// <summary>
/// Indicates if any index buffer data has been pushed.
@@ -38,9 +42,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// Gets the number of elements on the current inline index buffer,
/// while also reseting it to zero for the next draw.
/// </summary>
/// <param name="renderer">Host renderer</param>
/// <returns>Inline index bufffer count</returns>
public int GetAndResetInlineIndexCount()
public int GetAndResetInlineIndexCount(IRenderer renderer)
{
UpdateRemaining(renderer);
int temp = _inlineIndexCount;
_inlineIndexCount = 0;
return temp;
@@ -58,16 +64,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
byte i2 = (byte)(argument >> 16);
byte i3 = (byte)(argument >> 24);
Span<uint> data = stackalloc uint[4];
int offset = _inlineIndexCount;
data[0] = i0;
data[1] = i1;
data[2] = i2;
data[3] = i3;
int offset = _inlineIndexCount * 4;
renderer.SetBufferData(GetInlineIndexBuffer(renderer, offset), offset, MemoryMarshal.Cast<uint, byte>(data));
PushData(renderer, offset, i0);
PushData(renderer, offset + 1, i1);
PushData(renderer, offset + 2, i2);
PushData(renderer, offset + 3, i3);
_inlineIndexCount += 4;
}
@@ -82,14 +84,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
ushort i0 = (ushort)argument;
ushort i1 = (ushort)(argument >> 16);
Span<uint> data = stackalloc uint[2];
int offset = _inlineIndexCount;
data[0] = i0;
data[1] = i1;
int offset = _inlineIndexCount * 4;
renderer.SetBufferData(GetInlineIndexBuffer(renderer, offset), offset, MemoryMarshal.Cast<uint, byte>(data));
PushData(renderer, offset, i0);
PushData(renderer, offset + 1, i1);
_inlineIndexCount += 2;
}
@@ -103,13 +101,61 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
uint i0 = (uint)argument;
Span<uint> data = stackalloc uint[1];
int offset = _inlineIndexCount++;
data[0] = i0;
PushData(renderer, offset, i0);
}
int offset = _inlineIndexCount++ * 4;
/// <summary>
/// Pushes a 32-bit value to the index buffer.
/// </summary>
/// <param name="renderer">Host renderer</param>
/// <param name="offset">Offset where the data should be written, in 32-bit words</param>
/// <param name="value">Index value to be written</param>
private void PushData(IRenderer renderer, int offset, uint value)
{
if (_buffer == null)
{
_buffer = new uint[BufferCapacity];
}
renderer.SetBufferData(GetInlineIndexBuffer(renderer, offset), offset, MemoryMarshal.Cast<uint, byte>(data));
// We upload data in chunks.
// If we are at the start of a chunk, then the buffer might be full,
// in that case we need to submit any existing data before overwriting the buffer.
int subOffset = offset & (BufferCapacity - 1);
if (subOffset == 0 && offset != 0)
{
int baseOffset = (offset - BufferCapacity) * sizeof(uint);
BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, BufferCapacity * sizeof(uint));
renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer));
}
_buffer[subOffset] = value;
}
/// <summary>
/// Makes sure that any pending data is submitted to the GPU before the index buffer is used.
/// </summary>
/// <param name="renderer">Host renderer</param>
private void UpdateRemaining(IRenderer renderer)
{
int offset = _inlineIndexCount;
if (offset == 0)
{
return;
}
int count = offset & (BufferCapacity - 1);
if (count == 0)
{
count = BufferCapacity;
}
int baseOffset = (offset - count) * sizeof(uint);
int length = count * sizeof(uint);
BufferHandle buffer = GetInlineIndexBuffer(renderer, baseOffset, length);
renderer.SetBufferData(buffer, baseOffset, MemoryMarshal.Cast<uint, byte>(_buffer).Slice(0, length));
}
/// <summary>
@@ -117,12 +163,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
/// <param name="renderer">Host renderer</param>
/// <param name="offset">Offset where the data will be written</param>
/// <param name="length">Number of bytes that will be written</param>
/// <returns>Buffer handle</returns>
private BufferHandle GetInlineIndexBuffer(IRenderer renderer, int offset)
private BufferHandle GetInlineIndexBuffer(IRenderer renderer, int offset, int length)
{
// Calculate a reasonable size for the buffer that can fit all the data,
// and that also won't require frequent resizes if we need to push more data.
int size = BitUtils.AlignUp(offset + 0x10, 0x200);
int size = BitUtils.AlignUp(offset + length + 0x10, 0x200);
if (_inlineIndexBuffer == BufferHandle.Null)
{

View File

@@ -130,6 +130,10 @@ namespace Ryujinx.Graphics.Gpu.Image
return ref descriptor;
}
}
else
{
texture.SynchronizeMemory();
}
Items[id] = texture;
@@ -233,7 +237,7 @@ namespace Ryujinx.Graphics.Gpu.Image
}
/// <summary>
/// Queues a request to update a texture's mapping.
/// Queues a request to update a texture's mapping.
/// Mapping is updated later to avoid deleting the texture if it is still sparsely mapped.
/// </summary>
/// <param name="texture">Texture with potential mapping change</param>

View File

@@ -14,6 +14,9 @@ namespace Ryujinx.HLE.HOS.Services.Nim
// CreateServerInterface(pid, handle<unknown>, u64) -> object<nn::ec::IShopServiceAccessServer>
public ResultCode CreateServerInterface(ServiceCtx context)
{
// Close transfer memory immediately as we don't use it.
context.Device.System.KernelContext.Syscall.CloseHandle(context.Request.HandleDesc.ToCopy[0]);
MakeObject(context, new IShopServiceAccessServer());
Logger.Stub?.PrintStub(LogClass.ServiceNim);