Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
915d6d044c | ||
|
a4780ab33b | ||
|
a947a45d81 | ||
|
9db73f74cf | ||
|
a1efd87c45 | ||
|
49be977588 | ||
|
c95be55091 |
@@ -226,6 +226,8 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||
Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
|
||||
Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
|
||||
Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
|
||||
Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister));
|
||||
Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister));
|
||||
Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
|
||||
Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
|
||||
Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
|
||||
|
@@ -268,11 +268,13 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
|
@@ -249,10 +249,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||
case IntrinsicType.Mxcsr:
|
||||
{
|
||||
Operand offset = operation.GetSource(0);
|
||||
Operand bits = operation.GetSource(1);
|
||||
|
||||
Debug.Assert(offset.Kind == OperandKind.Constant && bits.Kind == OperandKind.Constant);
|
||||
Debug.Assert(offset.Type == OperandType.I32 && bits.Type == OperandType.I32);
|
||||
Debug.Assert(offset.Kind == OperandKind.Constant);
|
||||
Debug.Assert(offset.Type == OperandType.I32);
|
||||
|
||||
int offs = offset.AsInt32() + context.CallArgsRegionSize;
|
||||
|
||||
@@ -261,21 +260,23 @@ namespace ARMeilleure.CodeGen.X86
|
||||
|
||||
Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
|
||||
|
||||
context.Assembler.Stmxcsr(memOp);
|
||||
|
||||
if (operation.Intrinsic == Intrinsic.X86Mxcsrmb)
|
||||
if (operation.Intrinsic == Intrinsic.X86Ldmxcsr)
|
||||
{
|
||||
context.Assembler.Or(memOp, bits, OperandType.I32);
|
||||
Operand bits = operation.GetSource(1);
|
||||
Debug.Assert(bits.Type == OperandType.I32);
|
||||
|
||||
context.Assembler.Mov(memOp, bits, OperandType.I32);
|
||||
context.Assembler.Ldmxcsr(memOp);
|
||||
}
|
||||
else /* if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) */
|
||||
else if (operation.Intrinsic == Intrinsic.X86Stmxcsr)
|
||||
{
|
||||
Operand notBits = Const(~bits.AsInt32());
|
||||
Operand dest = operation.Destination;
|
||||
Debug.Assert(dest.Type == OperandType.I32);
|
||||
|
||||
context.Assembler.And(memOp, notBits, OperandType.I32);
|
||||
context.Assembler.Stmxcsr(memOp);
|
||||
context.Assembler.Mov(dest, memOp, OperandType.I32);
|
||||
}
|
||||
|
||||
context.Assembler.Ldmxcsr(memOp);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -60,6 +60,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
|
||||
Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
|
||||
Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
|
||||
@@ -75,8 +76,6 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Mxcsrmb, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Mask bits.
|
||||
Add(Intrinsic.X86Mxcsrub, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Unmask bits.
|
||||
Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
||||
@@ -160,6 +159,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
|
||||
Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
|
||||
@@ -170,11 +170,13 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
|
||||
Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
|
||||
|
15
ARMeilleure/CodeGen/X86/Mxcsr.cs
Normal file
15
ARMeilleure/CodeGen/X86/Mxcsr.cs
Normal file
@@ -0,0 +1,15 @@
|
||||
using System;
|
||||
|
||||
namespace ARMeilleure.CodeGen.X86
|
||||
{
|
||||
[Flags]
|
||||
enum Mxcsr
|
||||
{
|
||||
Ftz = 1 << 15, // Flush To Zero.
|
||||
Rhi = 1 << 14, // Round Mode high bit.
|
||||
Rlo = 1 << 13, // Round Mode low bit.
|
||||
Um = 1 << 11, // Underflow Mask.
|
||||
Dm = 1 << 8, // Denormal Mask.
|
||||
Daz = 1 << 6 // Denormals Are Zero.
|
||||
}
|
||||
}
|
@@ -120,12 +120,18 @@ namespace ARMeilleure.CodeGen.X86
|
||||
break;
|
||||
|
||||
case Instruction.Extended:
|
||||
if (node.Intrinsic == Intrinsic.X86Mxcsrmb || node.Intrinsic == Intrinsic.X86Mxcsrub)
|
||||
if (node.Intrinsic == Intrinsic.X86Ldmxcsr)
|
||||
{
|
||||
int stackOffset = stackAlloc.Allocate(OperandType.I32);
|
||||
|
||||
node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) });
|
||||
}
|
||||
else if (node.Intrinsic == Intrinsic.X86Stmxcsr)
|
||||
{
|
||||
int stackOffset = stackAlloc.Allocate(OperandType.I32);
|
||||
|
||||
node.SetSources(new Operand[] { Const(stackOffset) });
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@@ -208,11 +208,13 @@ namespace ARMeilleure.CodeGen.X86
|
||||
Vblendvps,
|
||||
Vcvtph2ps,
|
||||
Vcvtps2ph,
|
||||
Vfmadd231pd,
|
||||
Vfmadd231ps,
|
||||
Vfmadd231sd,
|
||||
Vfmadd231ss,
|
||||
Vfmsub231sd,
|
||||
Vfmsub231ss,
|
||||
Vfnmadd231pd,
|
||||
Vfnmadd231ps,
|
||||
Vfnmadd231sd,
|
||||
Vfnmadd231ss,
|
||||
|
@@ -108,6 +108,13 @@ namespace ARMeilleure.Decoders
|
||||
SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create);
|
||||
SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
|
||||
SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create);
|
||||
SetA64("11010101000000110010000011011111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("11010101000000110010000011111111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("110101010000001100100001xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("1101010100000011001000100xx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("1101010100000011001000101>>11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("110101010000001100100011xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("11010101000000110010>>xxxxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint
|
||||
SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, OpCodeSystem.Create);
|
||||
SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, OpCodeMemEx.Create);
|
||||
SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, OpCodeMemEx.Create);
|
||||
|
@@ -615,14 +615,11 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||
|
||||
Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
|
||||
Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
|
||||
|
||||
return context.AddIntrinsic(addInst, op1, op2);
|
||||
}, scalar: false, op1, op2);
|
||||
return context.AddIntrinsic(addInst, op1, op2);
|
||||
}, scalar: false, op1, op2);
|
||||
});
|
||||
}
|
||||
@@ -696,17 +693,33 @@ namespace ARMeilleure.Instructions
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
Operand res;
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addss, a, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
@@ -730,10 +743,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: true, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: true);
|
||||
}
|
||||
else
|
||||
@@ -755,10 +765,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false);
|
||||
}
|
||||
else
|
||||
@@ -886,10 +893,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false, op1, op2);
|
||||
});
|
||||
}
|
||||
@@ -914,10 +918,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||
}, scalar: false, op1, op2);
|
||||
});
|
||||
}
|
||||
@@ -940,10 +941,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: true, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: true);
|
||||
}
|
||||
else
|
||||
@@ -965,10 +963,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false);
|
||||
}
|
||||
else
|
||||
@@ -1096,10 +1091,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false, op1, op2);
|
||||
});
|
||||
}
|
||||
@@ -1124,10 +1116,7 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||
}, scalar: false, op1, op2);
|
||||
});
|
||||
}
|
||||
@@ -1146,6 +1135,37 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe);
|
||||
}
|
||||
else if (Optimizations.UseFma)
|
||||
{
|
||||
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
||||
|
||||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res);
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
int shuffleMask = op.Index | op.Index << 1;
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res);
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
|
||||
@@ -1171,11 +1191,19 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
@@ -1186,9 +1214,15 @@ namespace ARMeilleure.Instructions
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
@@ -1224,8 +1258,15 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addps, d, res);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
@@ -1240,8 +1281,15 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
@@ -1261,6 +1309,37 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe);
|
||||
}
|
||||
else if (Optimizations.UseFma)
|
||||
{
|
||||
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
|
||||
|
||||
Operand d = GetVec(op.Rd);
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6;
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res);
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
int shuffleMask = op.Index | op.Index << 1;
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res);
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
|
||||
@@ -1286,11 +1365,19 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
@@ -1301,9 +1388,15 @@ namespace ARMeilleure.Instructions
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
@@ -1339,8 +1432,15 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, d, res);
|
||||
}
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
@@ -1355,8 +1455,15 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask));
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res);
|
||||
}
|
||||
|
||||
context.Copy(d, res);
|
||||
}
|
||||
@@ -1385,17 +1492,33 @@ namespace ARMeilleure.Instructions
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
Operand res;
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, a, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
@@ -1669,25 +1792,39 @@ namespace ARMeilleure.Instructions
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
Operand res;
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
|
||||
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
|
||||
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
@@ -1716,25 +1853,39 @@ namespace ARMeilleure.Instructions
|
||||
Operand n = GetVec(op.Rn);
|
||||
Operand m = GetVec(op.Rm);
|
||||
|
||||
Operand res;
|
||||
|
||||
if (op.Size == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0f);
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
|
||||
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (op.Size == 1) */
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand mask = X86GetScalar(context, -0d);
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
|
||||
|
||||
Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res);
|
||||
}
|
||||
|
||||
context.Copy(d, context.VectorZeroUpper64(res));
|
||||
}
|
||||
@@ -1830,13 +1981,22 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand mask = X86GetScalar(context, 2f);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
@@ -1845,9 +2005,16 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
Operand mask = X86GetScalar(context, 2d);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
|
||||
@@ -1877,14 +2044,23 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand mask = X86GetAllElements(context, 2f);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
@@ -1897,10 +2073,17 @@ namespace ARMeilleure.Instructions
|
||||
{
|
||||
Operand mask = X86GetAllElements(context, 2d);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
@@ -2113,20 +2296,32 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
public static void Frintx_S(ArmEmitterContext context)
|
||||
{
|
||||
// TODO Arm64: Fast path. Should we set host FPCR?
|
||||
EmitScalarUnaryOpF(context, (op1) =>
|
||||
if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarUnaryOpF(context, (op1) =>
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Frintx_V(ArmEmitterContext context)
|
||||
{
|
||||
// TODO Arm64: Fast path. Should we set host FPCR?
|
||||
EmitVectorUnaryOpF(context, (op1) =>
|
||||
if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorUnaryOpF(context, (op1) =>
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Frintz_S(ArmEmitterContext context)
|
||||
@@ -2237,16 +2432,25 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand maskHalf = X86GetScalar(context, 0.5f);
|
||||
Operand maskThree = X86GetScalar(context, 3f);
|
||||
Operand maskOneHalf = X86GetScalar(context, 1.5f);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
@@ -2257,10 +2461,17 @@ namespace ARMeilleure.Instructions
|
||||
Operand maskThree = X86GetScalar(context, 3d);
|
||||
Operand maskOneHalf = X86GetScalar(context, 1.5d);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res));
|
||||
@@ -2290,15 +2501,24 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
Operand res;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
Operand maskHalf = X86GetAllElements(context, 0.5f);
|
||||
Operand maskThree = X86GetAllElements(context, 3f);
|
||||
Operand maskOneHalf = X86GetAllElements(context, 1.5f);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
|
||||
|
||||
@@ -2315,9 +2535,16 @@ namespace ARMeilleure.Instructions
|
||||
Operand maskThree = X86GetAllElements(context, 3d);
|
||||
Operand maskOneHalf = X86GetAllElements(context, 1.5d);
|
||||
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
if (Optimizations.UseFma)
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
|
||||
}
|
||||
|
||||
res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res);
|
||||
res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF);
|
||||
|
||||
@@ -4728,53 +4955,6 @@ namespace ARMeilleure.Instructions
|
||||
}
|
||||
}
|
||||
|
||||
public static Operand EmitSseOrAvxHandleFzModeOpF(
|
||||
ArmEmitterContext context,
|
||||
Func2I emit,
|
||||
bool scalar,
|
||||
Operand n = default,
|
||||
Operand m = default)
|
||||
{
|
||||
Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n;
|
||||
Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m;
|
||||
|
||||
EmitSseOrAvxEnterFtzAndDazModesOpF(context, out Operand isTrue);
|
||||
|
||||
Operand res = emit(nCopy, mCopy);
|
||||
|
||||
EmitSseOrAvxExitFtzAndDazModesOpF(context, isTrue);
|
||||
|
||||
if (n != default || m != default)
|
||||
{
|
||||
return res;
|
||||
}
|
||||
|
||||
int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
|
||||
|
||||
if (sizeF == 0)
|
||||
{
|
||||
if (scalar)
|
||||
{
|
||||
res = context.VectorZeroUpper96(res);
|
||||
}
|
||||
else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
{
|
||||
if (scalar)
|
||||
{
|
||||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
}
|
||||
|
||||
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||
|
||||
return default;
|
||||
}
|
||||
|
||||
private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
|
||||
{
|
||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||
@@ -4834,10 +5014,7 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||
}, scalar: scalar, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||
}, scalar: scalar, nCopy, mCopy);
|
||||
|
||||
if (n != default || m != default)
|
||||
@@ -4872,10 +5049,7 @@ namespace ARMeilleure.Instructions
|
||||
|
||||
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSseOrAvxHandleFzModeOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||
}, scalar: scalar, op1, op2);
|
||||
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||
}, scalar: scalar, nCopy, mCopy);
|
||||
|
||||
if (n != default || m != default)
|
||||
|
@@ -356,9 +356,11 @@ namespace ARMeilleure.Instructions
|
||||
? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))
|
||||
: typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert));
|
||||
|
||||
context.ExitArmFpMode();
|
||||
context.StoreToContext();
|
||||
Operand res = context.Call(method, src);
|
||||
context.LoadFromContext();
|
||||
context.EnterArmFpMode();
|
||||
|
||||
InsertScalar16(context, op.Vd, op.T, res);
|
||||
}
|
||||
@@ -372,9 +374,11 @@ namespace ARMeilleure.Instructions
|
||||
? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))
|
||||
: typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert));
|
||||
|
||||
context.ExitArmFpMode();
|
||||
context.StoreToContext();
|
||||
Operand res = context.Call(method, src);
|
||||
context.LoadFromContext();
|
||||
context.EnterArmFpMode();
|
||||
|
||||
InsertScalar(context, op.Vd, res);
|
||||
}
|
||||
@@ -542,10 +546,17 @@ namespace ARMeilleure.Instructions
|
||||
// VRINTX (floating-point).
|
||||
public static void Vrintx_S(ArmEmitterContext context)
|
||||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) =>
|
||||
if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarUnaryOpF32(context, (op1) =>
|
||||
{
|
||||
return EmitRoundByRMode(context, op1);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
using ARMeilleure.CodeGen.X86;
|
||||
using ARMeilleure.Decoders;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
@@ -158,6 +159,75 @@ namespace ARMeilleure.Instructions
|
||||
};
|
||||
#endregion
|
||||
|
||||
public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag)
|
||||
{
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
|
||||
|
||||
Operand fzTrue = getFpFlag(FPState.FzFlag);
|
||||
Operand r0True = getFpFlag(FPState.RMode0Flag);
|
||||
Operand r1True = getFpFlag(FPState.RMode1Flag);
|
||||
|
||||
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
|
||||
|
||||
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0)));
|
||||
|
||||
// X86 round modes in order: nearest, negative, positive, zero
|
||||
// ARM round modes in order: nearest, positive, negative, zero
|
||||
// Read the bits backwards to correct this.
|
||||
|
||||
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0)));
|
||||
mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0)));
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
|
||||
}
|
||||
else if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
|
||||
|
||||
Operand fzTrue = getFpFlag(FPState.FzFlag);
|
||||
Operand r0True = getFpFlag(FPState.RMode0Flag);
|
||||
Operand r1True = getFpFlag(FPState.RMode1Flag);
|
||||
|
||||
fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
|
||||
|
||||
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0)));
|
||||
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0)));
|
||||
fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0)));
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
|
||||
|
||||
// TODO: Restore FPSR
|
||||
}
|
||||
}
|
||||
|
||||
public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag)
|
||||
{
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
|
||||
|
||||
// Unset round mode (to nearest) and ftz.
|
||||
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo)));
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
|
||||
|
||||
// Status flags would be stored here if they were used.
|
||||
}
|
||||
else if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
|
||||
|
||||
// Unset round mode (to nearest) and fz.
|
||||
fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1)));
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
|
||||
|
||||
// TODO: Store FPSR
|
||||
}
|
||||
}
|
||||
|
||||
public static int GetImmShl(OpCodeSimdShImm op)
|
||||
{
|
||||
return op.Imm - (8 << op.Size);
|
||||
@@ -465,9 +535,11 @@ namespace ARMeilleure.Instructions
|
||||
? typeof(SoftFloat32).GetMethod(name)
|
||||
: typeof(SoftFloat64).GetMethod(name);
|
||||
|
||||
context.ExitArmFpMode();
|
||||
context.StoreToContext();
|
||||
Operand res = context.Call(info, callArgs);
|
||||
context.LoadFromContext();
|
||||
context.EnterArmFpMode();
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -1358,39 +1430,6 @@ namespace ARMeilleure.Instructions
|
||||
}
|
||||
}
|
||||
|
||||
[Flags]
|
||||
public enum Mxcsr
|
||||
{
|
||||
Ftz = 1 << 15, // Flush To Zero.
|
||||
Um = 1 << 11, // Underflow Mask.
|
||||
Dm = 1 << 8, // Denormal Mask.
|
||||
Daz = 1 << 6 // Denormals Are Zero.
|
||||
}
|
||||
|
||||
public static void EmitSseOrAvxEnterFtzAndDazModesOpF(ArmEmitterContext context, out Operand isTrue)
|
||||
{
|
||||
isTrue = GetFpFlag(FPState.FzFlag);
|
||||
|
||||
Operand lblTrue = Label();
|
||||
context.BranchIfFalse(lblTrue, isTrue);
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrmb, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm | Mxcsr.Daz)));
|
||||
|
||||
context.MarkLabel(lblTrue);
|
||||
}
|
||||
|
||||
public static void EmitSseOrAvxExitFtzAndDazModesOpF(ArmEmitterContext context, Operand isTrue = default)
|
||||
{
|
||||
isTrue = isTrue == default ? GetFpFlag(FPState.FzFlag) : isTrue;
|
||||
|
||||
Operand lblTrue = Label();
|
||||
context.BranchIfFalse(lblTrue, isTrue);
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Mxcsrub, Const((int)(Mxcsr.Ftz | Mxcsr.Daz)));
|
||||
|
||||
context.MarkLabel(lblTrue);
|
||||
}
|
||||
|
||||
public enum CmpCondition
|
||||
{
|
||||
// Legacy Sse.
|
||||
|
@@ -1197,9 +1197,11 @@ namespace ARMeilleure.Instructions
|
||||
Array.Resize(ref callArgs, callArgs.Length + 1);
|
||||
callArgs[callArgs.Length - 1] = Const(1);
|
||||
|
||||
context.ExitArmFpMode();
|
||||
context.StoreToContext();
|
||||
Operand res = context.Call(info, callArgs);
|
||||
context.LoadFromContext();
|
||||
context.EnterArmFpMode();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@@ -192,6 +192,8 @@ namespace ARMeilleure.Instructions
|
||||
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1)));
|
||||
}
|
||||
}
|
||||
|
||||
context.UpdateArmFpMode();
|
||||
}
|
||||
|
||||
private static void EmitSetFpsr(ArmEmitterContext context)
|
||||
@@ -210,6 +212,8 @@ namespace ARMeilleure.Instructions
|
||||
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1)));
|
||||
}
|
||||
}
|
||||
|
||||
context.UpdateArmFpMode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -321,6 +321,8 @@ namespace ARMeilleure.Instructions
|
||||
SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1)));
|
||||
}
|
||||
}
|
||||
|
||||
context.UpdateArmFpMode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -53,6 +53,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||
X86Haddpd,
|
||||
X86Haddps,
|
||||
X86Insertps,
|
||||
X86Ldmxcsr,
|
||||
X86Maxpd,
|
||||
X86Maxps,
|
||||
X86Maxsd,
|
||||
@@ -68,8 +69,6 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||
X86Mulps,
|
||||
X86Mulsd,
|
||||
X86Mulss,
|
||||
X86Mxcsrmb,
|
||||
X86Mxcsrub,
|
||||
X86Paddb,
|
||||
X86Paddd,
|
||||
X86Paddq,
|
||||
@@ -153,6 +152,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||
X86Sqrtps,
|
||||
X86Sqrtsd,
|
||||
X86Sqrtss,
|
||||
X86Stmxcsr,
|
||||
X86Subpd,
|
||||
X86Subps,
|
||||
X86Subsd,
|
||||
@@ -163,11 +163,13 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||
X86Unpcklps,
|
||||
X86Vcvtph2ps,
|
||||
X86Vcvtps2ph,
|
||||
X86Vfmadd231pd,
|
||||
X86Vfmadd231ps,
|
||||
X86Vfmadd231sd,
|
||||
X86Vfmadd231ss,
|
||||
X86Vfmsub231sd,
|
||||
X86Vfmsub231ss,
|
||||
X86Vfnmadd231pd,
|
||||
X86Vfnmadd231ps,
|
||||
X86Vfnmadd231sd,
|
||||
X86Vfnmadd231ss,
|
||||
@@ -394,6 +396,8 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||
Arm64MlsVe,
|
||||
Arm64MlsV,
|
||||
Arm64MoviV,
|
||||
Arm64MrsFpcr,
|
||||
Arm64MsrFpcr,
|
||||
Arm64MrsFpsr,
|
||||
Arm64MsrFpsr,
|
||||
Arm64MulVe,
|
||||
|
@@ -188,6 +188,21 @@ namespace ARMeilleure.Translation
|
||||
}
|
||||
}
|
||||
|
||||
public void EnterArmFpMode()
|
||||
{
|
||||
InstEmitSimdHelper.EnterArmFpMode(this, InstEmitHelper.GetFpFlag);
|
||||
}
|
||||
|
||||
public void UpdateArmFpMode()
|
||||
{
|
||||
EnterArmFpMode();
|
||||
}
|
||||
|
||||
public void ExitArmFpMode()
|
||||
{
|
||||
InstEmitSimdHelper.ExitArmFpMode(this, (flag, value) => InstEmitHelper.SetFpFlag(this, flag, value));
|
||||
}
|
||||
|
||||
public Operand TryGetComparisonResult(Condition condition)
|
||||
{
|
||||
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
|
||||
|
@@ -3,4 +3,5 @@
|
||||
namespace ARMeilleure.Translation
|
||||
{
|
||||
delegate void DispatcherFunction(IntPtr nativeContext, ulong startAddress);
|
||||
delegate ulong WrapperFunction(IntPtr nativeContext, ulong startAddress);
|
||||
}
|
||||
|
@@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
|
||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 4626; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
@@ -25,5 +25,10 @@ namespace ARMeilleure.Translation
|
||||
{
|
||||
return _func(context.NativeContextPtr);
|
||||
}
|
||||
|
||||
public ulong Execute(WrapperFunction dispatcher, State.ExecutionContext context)
|
||||
{
|
||||
return dispatcher(context.NativeContextPtr, (ulong)FuncPointer);
|
||||
}
|
||||
}
|
||||
}
|
@@ -183,7 +183,7 @@ namespace ARMeilleure.Translation
|
||||
|
||||
Statistics.StartTimer();
|
||||
|
||||
ulong nextAddr = func.Execute(context);
|
||||
ulong nextAddr = func.Execute(Stubs.ContextWrapper, context);
|
||||
|
||||
Statistics.StopTimer(address);
|
||||
|
||||
@@ -194,7 +194,7 @@ namespace ARMeilleure.Translation
|
||||
{
|
||||
TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true);
|
||||
|
||||
address = func.Execute(context);
|
||||
address = func.Execute(Stubs.ContextWrapper, context);
|
||||
|
||||
EnqueueForDeletion(address, func);
|
||||
|
||||
|
@@ -21,6 +21,7 @@ namespace ARMeilleure.Translation
|
||||
private readonly Translator _translator;
|
||||
private readonly Lazy<IntPtr> _dispatchStub;
|
||||
private readonly Lazy<DispatcherFunction> _dispatchLoop;
|
||||
private readonly Lazy<WrapperFunction> _contextWrapper;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dispatch stub.
|
||||
@@ -64,6 +65,20 @@ namespace ARMeilleure.Translation
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the context wrapper function.
|
||||
/// </summary>
|
||||
/// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
|
||||
public WrapperFunction ContextWrapper
|
||||
{
|
||||
get
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
return _contextWrapper.Value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
|
||||
/// <see cref="Translator"/> instance.
|
||||
@@ -77,6 +92,7 @@ namespace ARMeilleure.Translation
|
||||
_translator = translator;
|
||||
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
|
||||
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
|
||||
_contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -202,6 +218,32 @@ namespace ARMeilleure.Translation
|
||||
return Marshal.GetFunctionPointerForDelegate(func);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Emits code that syncs FP state before executing guest code, or returns it to normal.
|
||||
/// </summary>
|
||||
/// <param name="context">Emitter context for the method</param>
|
||||
/// <param name="nativeContext">Pointer to the native context</param>
|
||||
/// <param name="enter">True if entering guest code, false otherwise</param>
|
||||
private void EmitSyncFpContext(EmitterContext context, Operand nativeContext, bool enter)
|
||||
{
|
||||
if (enter)
|
||||
{
|
||||
InstEmitSimdHelper.EnterArmFpMode(context, (flag) =>
|
||||
{
|
||||
Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
|
||||
return context.Load(OperandType.I32, flagAddress);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitSimdHelper.ExitArmFpMode(context, (flag, value) =>
|
||||
{
|
||||
Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
|
||||
context.Store(flagAddress, value);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a <see cref="DispatchLoop"/> function.
|
||||
/// </summary>
|
||||
@@ -221,6 +263,8 @@ namespace ARMeilleure.Translation
|
||||
Operand runningAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRunningOffset()));
|
||||
Operand dispatchAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, true);
|
||||
|
||||
context.MarkLabel(beginLbl);
|
||||
context.Store(dispatchAddress, guestAddress);
|
||||
context.Copy(guestAddress, context.Call(Const((ulong)DispatchStub), OperandType.I64, nativeContext));
|
||||
@@ -229,6 +273,9 @@ namespace ARMeilleure.Translation
|
||||
context.Branch(beginLbl);
|
||||
|
||||
context.MarkLabel(endLbl);
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, false);
|
||||
|
||||
context.Return();
|
||||
|
||||
var cfg = context.GetControlFlowGraph();
|
||||
@@ -237,5 +284,29 @@ namespace ARMeilleure.Translation
|
||||
|
||||
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a <see cref="ContextWrapper"/> function.
|
||||
/// </summary>
|
||||
/// <returns><see cref="ContextWrapper"/> function</returns>
|
||||
private WrapperFunction GenerateContextWrapper()
|
||||
{
|
||||
var context = new EmitterContext();
|
||||
|
||||
Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
|
||||
Operand guestMethod = context.LoadArgument(OperandType.I64, 1);
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, true);
|
||||
Operand returnValue = context.Call(guestMethod, OperandType.I64, nativeContext);
|
||||
EmitSyncFpContext(context, nativeContext, false);
|
||||
|
||||
context.Return(returnValue);
|
||||
|
||||
var cfg = context.GetControlFlowGraph();
|
||||
var retType = OperandType.I64;
|
||||
var argTypes = new[] { OperandType.I64, OperandType.I64 };
|
||||
|
||||
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<WrapperFunction>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
148
ARMeilleure/Translation/TranslatorTestMethods.cs
Normal file
148
ARMeilleure/Translation/TranslatorTestMethods.cs
Normal file
@@ -0,0 +1,148 @@
|
||||
using ARMeilleure.CodeGen.X86;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
||||
|
||||
namespace ARMeilleure.Translation
|
||||
{
|
||||
public static class TranslatorTestMethods
|
||||
{
|
||||
public delegate int FpFlagsPInvokeTest(IntPtr managedMethod);
|
||||
|
||||
private static bool SetPlatformFtz(EmitterContext context, bool ftz)
|
||||
{
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
|
||||
|
||||
if (ftz)
|
||||
{
|
||||
mxcsr = context.BitwiseOr(mxcsr, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm)));
|
||||
}
|
||||
else
|
||||
{
|
||||
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)Mxcsr.Ftz));
|
||||
}
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
|
||||
|
||||
if (ftz)
|
||||
{
|
||||
fpcr = context.BitwiseOr(fpcr, Const((int)FPCR.Fz));
|
||||
}
|
||||
else
|
||||
{
|
||||
fpcr = context.BitwiseAnd(fpcr, Const(~(int)FPCR.Fz));
|
||||
}
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand FpBitsToInt(EmitterContext context, Operand fp)
|
||||
{
|
||||
Operand vec = context.VectorInsert(context.VectorZero(), fp, 0);
|
||||
return context.VectorExtract(OperandType.I32, vec, 0);
|
||||
}
|
||||
|
||||
public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
||||
Operand methodAddress = context.Copy(context.LoadArgument(OperandType.I64, 0));
|
||||
|
||||
// Verify that default dotnet fp state does not flush to zero.
|
||||
// This is required for SoftFloat to function.
|
||||
|
||||
// Denormal + zero != 0
|
||||
|
||||
Operand denormal = ConstF(BitConverter.Int32BitsToSingle(1)); // 1.40129846432e-45
|
||||
Operand zeroF = ConstF(0f);
|
||||
Operand zero = Const(0);
|
||||
|
||||
Operand result = context.Add(zeroF, denormal);
|
||||
|
||||
// Must not be zero.
|
||||
|
||||
Operand correct1Label = Label();
|
||||
|
||||
context.BranchIfFalse(correct1Label, context.ICompareEqual(FpBitsToInt(context, result), zero));
|
||||
|
||||
context.Return(Const(1));
|
||||
|
||||
context.MarkLabel(correct1Label);
|
||||
|
||||
// Set flush to zero flag. If unsupported by the backend, just return true.
|
||||
|
||||
if (!SetPlatformFtz(context, true))
|
||||
{
|
||||
context.Return(Const(0));
|
||||
}
|
||||
|
||||
// Denormal + zero == 0
|
||||
|
||||
Operand resultFz = context.Add(zeroF, denormal);
|
||||
|
||||
// Must equal zero.
|
||||
|
||||
Operand correct2Label = Label();
|
||||
|
||||
context.BranchIfTrue(correct2Label, context.ICompareEqual(FpBitsToInt(context, resultFz), zero));
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(2));
|
||||
|
||||
context.MarkLabel(correct2Label);
|
||||
|
||||
// Call a managed method. This method should not change Fz state.
|
||||
|
||||
context.Call(methodAddress, OperandType.None);
|
||||
|
||||
// Denormal + zero == 0
|
||||
|
||||
Operand resultFz2 = context.Add(zeroF, denormal);
|
||||
|
||||
// Must equal zero.
|
||||
|
||||
Operand correct3Label = Label();
|
||||
|
||||
context.BranchIfTrue(correct3Label, context.ICompareEqual(FpBitsToInt(context, resultFz2), zero));
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(3));
|
||||
|
||||
context.MarkLabel(correct3Label);
|
||||
|
||||
// Success.
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(0));
|
||||
|
||||
// Compile and return the function.
|
||||
|
||||
ControlFlowGraph cfg = context.GetControlFlowGraph();
|
||||
|
||||
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
|
||||
|
||||
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<FpFlagsPInvokeTest>();
|
||||
}
|
||||
}
|
||||
}
|
@@ -53,6 +53,8 @@ namespace Ryujinx.Ava.UI.Applet
|
||||
|
||||
bool opened = false;
|
||||
|
||||
_parent.Activate();
|
||||
|
||||
UserResult response = await ContentDialogHelper.ShowDeferredContentDialog(_parent,
|
||||
title,
|
||||
message,
|
||||
|
@@ -226,6 +226,7 @@ namespace Ryujinx.Graphics.OpenGL
|
||||
// Set clip control, viewport and the framebuffer to the output to placate overlays and OBS capture.
|
||||
GL.ClipControl(ClipOrigin.LowerLeft, ClipDepthMode.NegativeOneToOne);
|
||||
GL.Viewport(0, 0, _width, _height);
|
||||
GL.BindFramebuffer(FramebufferTarget.Framebuffer, drawFramebuffer);
|
||||
|
||||
swapBuffersCallback();
|
||||
|
||||
|
@@ -9,21 +9,18 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
private ulong MaxDeviceMemoryUsageEstimate = 16UL * 1024 * 1024 * 1024;
|
||||
|
||||
private readonly Vk _api;
|
||||
private readonly PhysicalDevice _physicalDevice;
|
||||
private readonly VulkanPhysicalDevice _physicalDevice;
|
||||
private readonly Device _device;
|
||||
private readonly List<MemoryAllocatorBlockList> _blockLists;
|
||||
private readonly int _blockAlignment;
|
||||
private readonly PhysicalDeviceMemoryProperties _physicalDeviceMemoryProperties;
|
||||
|
||||
public MemoryAllocator(Vk api, PhysicalDevice physicalDevice, Device device, uint maxMemoryAllocationCount)
|
||||
public MemoryAllocator(Vk api, VulkanPhysicalDevice physicalDevice, Device device)
|
||||
{
|
||||
_api = api;
|
||||
_physicalDevice = physicalDevice;
|
||||
_device = device;
|
||||
_blockLists = new List<MemoryAllocatorBlockList>();
|
||||
_blockAlignment = (int)Math.Min(int.MaxValue, MaxDeviceMemoryUsageEstimate / (ulong)maxMemoryAllocationCount);
|
||||
|
||||
_api.GetPhysicalDeviceMemoryProperties(_physicalDevice, out _physicalDeviceMemoryProperties);
|
||||
_blockAlignment = (int)Math.Min(int.MaxValue, MaxDeviceMemoryUsageEstimate / (ulong)_physicalDevice.PhysicalDeviceProperties.Limits.MaxMemoryAllocationCount);
|
||||
}
|
||||
|
||||
public MemoryAllocation AllocateDeviceMemory(
|
||||
@@ -64,9 +61,9 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
uint memoryTypeBits,
|
||||
MemoryPropertyFlags flags)
|
||||
{
|
||||
for (int i = 0; i < _physicalDeviceMemoryProperties.MemoryTypeCount; i++)
|
||||
for (int i = 0; i < _physicalDevice.PhysicalDeviceMemoryProperties.MemoryTypeCount; i++)
|
||||
{
|
||||
var type = _physicalDeviceMemoryProperties.MemoryTypes[i];
|
||||
var type = _physicalDevice.PhysicalDeviceMemoryProperties.MemoryTypes[i];
|
||||
|
||||
if ((memoryTypeBits & (1 << i)) != 0)
|
||||
{
|
||||
@@ -80,15 +77,11 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
return -1;
|
||||
}
|
||||
|
||||
public static bool IsDeviceMemoryShared(Vk api, PhysicalDevice physicalDevice)
|
||||
public static bool IsDeviceMemoryShared(VulkanPhysicalDevice physicalDevice)
|
||||
{
|
||||
// The device is regarded as having shared memory if all heaps have the device local bit.
|
||||
|
||||
api.GetPhysicalDeviceMemoryProperties(physicalDevice, out var properties);
|
||||
|
||||
for (int i = 0; i < properties.MemoryHeapCount; i++)
|
||||
for (int i = 0; i < physicalDevice.PhysicalDeviceMemoryProperties.MemoryHeapCount; i++)
|
||||
{
|
||||
if (!properties.MemoryHeaps[i].Flags.HasFlag(MemoryHeapFlags.DeviceLocalBit))
|
||||
if (!physicalDevice.PhysicalDeviceMemoryProperties.MemoryHeaps[i].Flags.HasFlag(MemoryHeapFlags.DeviceLocalBit))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@@ -47,35 +47,23 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
KhrSwapchain.ExtensionName
|
||||
};
|
||||
|
||||
internal static Instance CreateInstance(Vk api, GraphicsDebugLevel logLevel, string[] requiredExtensions)
|
||||
internal static VulkanInstance CreateInstance(Vk api, GraphicsDebugLevel logLevel, string[] requiredExtensions)
|
||||
{
|
||||
var enabledLayers = new List<string>();
|
||||
|
||||
var instanceExtensions = VulkanInstance.GetInstanceExtensions(api);
|
||||
var instanceLayers = VulkanInstance.GetInstanceLayers(api);
|
||||
|
||||
void AddAvailableLayer(string layerName)
|
||||
{
|
||||
uint layerPropertiesCount;
|
||||
|
||||
api.EnumerateInstanceLayerProperties(&layerPropertiesCount, null).ThrowOnError();
|
||||
|
||||
LayerProperties[] layerProperties = new LayerProperties[layerPropertiesCount];
|
||||
|
||||
fixed (LayerProperties* pLayerProperties = layerProperties)
|
||||
if (instanceLayers.Contains(layerName))
|
||||
{
|
||||
api.EnumerateInstanceLayerProperties(&layerPropertiesCount, layerProperties).ThrowOnError();
|
||||
|
||||
for (int i = 0; i < layerPropertiesCount; i++)
|
||||
{
|
||||
string currentLayerName = Marshal.PtrToStringAnsi((IntPtr)pLayerProperties[i].LayerName);
|
||||
|
||||
if (currentLayerName == layerName)
|
||||
{
|
||||
enabledLayers.Add(layerName);
|
||||
return;
|
||||
}
|
||||
}
|
||||
enabledLayers.Add(layerName);
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.Warning?.Print(LogClass.Gpu, $"Missing layer {layerName}");
|
||||
}
|
||||
|
||||
Logger.Warning?.Print(LogClass.Gpu, $"Missing layer {layerName}");
|
||||
}
|
||||
|
||||
if (logLevel != GraphicsDebugLevel.None)
|
||||
@@ -85,7 +73,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
var enabledExtensions = requiredExtensions;
|
||||
|
||||
if (api.IsInstanceExtensionPresent("VK_EXT_debug_utils"))
|
||||
if (instanceExtensions.Contains("VK_EXT_debug_utils"))
|
||||
{
|
||||
enabledExtensions = enabledExtensions.Append(ExtDebugUtils.ExtensionName).ToArray();
|
||||
}
|
||||
@@ -124,7 +112,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
EnabledLayerCount = (uint)enabledLayers.Count
|
||||
};
|
||||
|
||||
api.CreateInstance(in instanceCreateInfo, null, out var instance).ThrowOnError();
|
||||
Result result = VulkanInstance.Create(api, ref instanceCreateInfo, out var instance);
|
||||
|
||||
Marshal.FreeHGlobal(appName);
|
||||
|
||||
@@ -138,21 +126,14 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
Marshal.FreeHGlobal(ppEnabledLayers[i]);
|
||||
}
|
||||
|
||||
result.ThrowOnError();
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
internal static PhysicalDevice FindSuitablePhysicalDevice(Vk api, Instance instance, SurfaceKHR surface, string preferredGpuId)
|
||||
internal static VulkanPhysicalDevice FindSuitablePhysicalDevice(Vk api, VulkanInstance instance, SurfaceKHR surface, string preferredGpuId)
|
||||
{
|
||||
uint physicalDeviceCount;
|
||||
|
||||
api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, null).ThrowOnError();
|
||||
|
||||
PhysicalDevice[] physicalDevices = new PhysicalDevice[physicalDeviceCount];
|
||||
|
||||
fixed (PhysicalDevice* pPhysicalDevices = physicalDevices)
|
||||
{
|
||||
api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, pPhysicalDevices).ThrowOnError();
|
||||
}
|
||||
instance.EnumeratePhysicalDevices(out var physicalDevices).ThrowOnError();
|
||||
|
||||
// First we try to pick the the user preferred GPU.
|
||||
for (int i = 0; i < physicalDevices.Length; i++)
|
||||
@@ -198,76 +179,41 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
EnabledLayerCount = 0
|
||||
};
|
||||
|
||||
api.CreateInstance(in instanceCreateInfo, null, out var instance).ThrowOnError();
|
||||
|
||||
// We ensure that vkEnumerateInstanceVersion is present (added in 1.1).
|
||||
// If the instance doesn't support it, no device is going to be 1.1 compatible.
|
||||
if (api.GetInstanceProcAddr(instance, "vkEnumerateInstanceVersion") == IntPtr.Zero)
|
||||
{
|
||||
api.DestroyInstance(instance, null);
|
||||
|
||||
return Array.Empty<DeviceInfo>();
|
||||
}
|
||||
|
||||
// We currently assume that the instance is compatible with Vulkan 1.2
|
||||
// TODO: Remove this once we relax our initialization codepaths.
|
||||
uint instanceApiVerison = 0;
|
||||
api.EnumerateInstanceVersion(ref instanceApiVerison).ThrowOnError();
|
||||
|
||||
if (instanceApiVerison < MinimalInstanceVulkanVersion)
|
||||
{
|
||||
api.DestroyInstance(instance, null);
|
||||
|
||||
return Array.Empty<DeviceInfo>();
|
||||
}
|
||||
Result result = VulkanInstance.Create(api, ref instanceCreateInfo, out var rawInstance);
|
||||
|
||||
Marshal.FreeHGlobal(appName);
|
||||
|
||||
uint physicalDeviceCount;
|
||||
result.ThrowOnError();
|
||||
|
||||
api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, null).ThrowOnError();
|
||||
using VulkanInstance instance = rawInstance;
|
||||
|
||||
PhysicalDevice[] physicalDevices = new PhysicalDevice[physicalDeviceCount];
|
||||
|
||||
fixed (PhysicalDevice* pPhysicalDevices = physicalDevices)
|
||||
// We currently assume that the instance is compatible with Vulkan 1.2
|
||||
// TODO: Remove this once we relax our initialization codepaths.
|
||||
if (instance.InstanceVersion < MinimalInstanceVulkanVersion)
|
||||
{
|
||||
api.EnumeratePhysicalDevices(instance, &physicalDeviceCount, pPhysicalDevices).ThrowOnError();
|
||||
return Array.Empty<DeviceInfo>();
|
||||
}
|
||||
|
||||
DeviceInfo[] devices = new DeviceInfo[physicalDevices.Length];
|
||||
instance.EnumeratePhysicalDevices(out VulkanPhysicalDevice[] physicalDevices).ThrowOnError();
|
||||
|
||||
for (int i = 0; i < physicalDevices.Length; i++)
|
||||
List<DeviceInfo> deviceInfos = new List<DeviceInfo>();
|
||||
|
||||
foreach (VulkanPhysicalDevice physicalDevice in physicalDevices)
|
||||
{
|
||||
var physicalDevice = physicalDevices[i];
|
||||
api.GetPhysicalDeviceProperties(physicalDevice, out var properties);
|
||||
|
||||
if (properties.ApiVersion < MinimalVulkanVersion)
|
||||
if (physicalDevice.PhysicalDeviceProperties.ApiVersion < MinimalVulkanVersion)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
devices[i] = new DeviceInfo(
|
||||
StringFromIdPair(properties.VendorID, properties.DeviceID),
|
||||
VendorUtils.GetNameFromId(properties.VendorID),
|
||||
Marshal.PtrToStringAnsi((IntPtr)properties.DeviceName),
|
||||
properties.DeviceType == PhysicalDeviceType.DiscreteGpu);
|
||||
deviceInfos.Add(physicalDevice.ToDeviceInfo());
|
||||
}
|
||||
|
||||
api.DestroyInstance(instance, null);
|
||||
|
||||
return devices;
|
||||
return deviceInfos.ToArray();
|
||||
}
|
||||
|
||||
public static string StringFromIdPair(uint vendorId, uint deviceId)
|
||||
private static bool IsPreferredAndSuitableDevice(Vk api, VulkanPhysicalDevice physicalDevice, SurfaceKHR surface, string preferredGpuId)
|
||||
{
|
||||
return $"0x{vendorId:X}_0x{deviceId:X}";
|
||||
}
|
||||
|
||||
private static bool IsPreferredAndSuitableDevice(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface, string preferredGpuId)
|
||||
{
|
||||
api.GetPhysicalDeviceProperties(physicalDevice, out var properties);
|
||||
|
||||
if (StringFromIdPair(properties.VendorID, properties.DeviceID) != preferredGpuId)
|
||||
if (physicalDevice.Id != preferredGpuId)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -275,68 +221,47 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
return IsSuitableDevice(api, physicalDevice, surface);
|
||||
}
|
||||
|
||||
private static bool IsSuitableDevice(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface)
|
||||
private static bool IsSuitableDevice(Vk api, VulkanPhysicalDevice physicalDevice, SurfaceKHR surface)
|
||||
{
|
||||
int extensionMatches = 0;
|
||||
uint propertiesCount;
|
||||
|
||||
api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, null).ThrowOnError();
|
||||
|
||||
ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount];
|
||||
|
||||
fixed (ExtensionProperties* pExtensionProperties = extensionProperties)
|
||||
foreach (string requiredExtension in _requiredExtensions)
|
||||
{
|
||||
api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, pExtensionProperties).ThrowOnError();
|
||||
|
||||
for (int i = 0; i < propertiesCount; i++)
|
||||
if (physicalDevice.IsDeviceExtensionPresent(requiredExtension))
|
||||
{
|
||||
string extensionName = Marshal.PtrToStringAnsi((IntPtr)pExtensionProperties[i].ExtensionName);
|
||||
|
||||
if (_requiredExtensions.Contains(extensionName))
|
||||
{
|
||||
extensionMatches++;
|
||||
}
|
||||
extensionMatches++;
|
||||
}
|
||||
}
|
||||
|
||||
return extensionMatches == _requiredExtensions.Length && FindSuitableQueueFamily(api, physicalDevice, surface, out _) != InvalidIndex;
|
||||
}
|
||||
|
||||
internal static uint FindSuitableQueueFamily(Vk api, PhysicalDevice physicalDevice, SurfaceKHR surface, out uint queueCount)
|
||||
internal static uint FindSuitableQueueFamily(Vk api, VulkanPhysicalDevice physicalDevice, SurfaceKHR surface, out uint queueCount)
|
||||
{
|
||||
const QueueFlags RequiredFlags = QueueFlags.GraphicsBit | QueueFlags.ComputeBit;
|
||||
|
||||
var khrSurface = new KhrSurface(api.Context);
|
||||
|
||||
uint propertiesCount;
|
||||
|
||||
api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, null);
|
||||
|
||||
QueueFamilyProperties[] properties = new QueueFamilyProperties[propertiesCount];
|
||||
|
||||
fixed (QueueFamilyProperties* pProperties = properties)
|
||||
for (uint index = 0; index < physicalDevice.QueueFamilyProperties.Length; index++)
|
||||
{
|
||||
api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, &propertiesCount, pProperties);
|
||||
}
|
||||
ref QueueFamilyProperties property = ref physicalDevice.QueueFamilyProperties[index];
|
||||
|
||||
for (uint index = 0; index < propertiesCount; index++)
|
||||
{
|
||||
var queueFlags = properties[index].QueueFlags;
|
||||
khrSurface.GetPhysicalDeviceSurfaceSupport(physicalDevice.PhysicalDevice, index, surface, out var surfaceSupported).ThrowOnError();
|
||||
|
||||
khrSurface.GetPhysicalDeviceSurfaceSupport(physicalDevice, index, surface, out var surfaceSupported).ThrowOnError();
|
||||
|
||||
if (queueFlags.HasFlag(RequiredFlags) && surfaceSupported)
|
||||
if (property.QueueFlags.HasFlag(RequiredFlags) && surfaceSupported)
|
||||
{
|
||||
queueCount = properties[index].QueueCount;
|
||||
queueCount = property.QueueCount;
|
||||
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
queueCount = 0;
|
||||
|
||||
return InvalidIndex;
|
||||
}
|
||||
|
||||
public static Device CreateDevice(Vk api, PhysicalDevice physicalDevice, uint queueFamilyIndex, string[] supportedExtensions, uint queueCount)
|
||||
internal static Device CreateDevice(Vk api, VulkanPhysicalDevice physicalDevice, uint queueFamilyIndex, uint queueCount)
|
||||
{
|
||||
if (queueCount > QueuesCount)
|
||||
{
|
||||
@@ -358,8 +283,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PQueuePriorities = queuePriorities
|
||||
};
|
||||
|
||||
api.GetPhysicalDeviceProperties(physicalDevice, out var properties);
|
||||
bool useRobustBufferAccess = VendorUtils.FromId(properties.VendorID) == Vendor.Nvidia;
|
||||
bool useRobustBufferAccess = VendorUtils.FromId(physicalDevice.PhysicalDeviceProperties.VendorID) == Vendor.Nvidia;
|
||||
|
||||
PhysicalDeviceFeatures2 features2 = new PhysicalDeviceFeatures2()
|
||||
{
|
||||
@@ -380,7 +304,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PNext = features2.PNext
|
||||
};
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_custom_border_color"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_custom_border_color"))
|
||||
{
|
||||
features2.PNext = &supportedFeaturesCustomBorderColor;
|
||||
}
|
||||
@@ -391,7 +315,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PNext = features2.PNext
|
||||
};
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_primitive_topology_list_restart"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_primitive_topology_list_restart"))
|
||||
{
|
||||
features2.PNext = &supportedFeaturesPrimitiveTopologyListRestart;
|
||||
}
|
||||
@@ -402,7 +326,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PNext = features2.PNext
|
||||
};
|
||||
|
||||
if (supportedExtensions.Contains(ExtTransformFeedback.ExtensionName))
|
||||
if (physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName))
|
||||
{
|
||||
features2.PNext = &supportedFeaturesTransformFeedback;
|
||||
}
|
||||
@@ -412,14 +336,14 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
SType = StructureType.PhysicalDeviceRobustness2FeaturesExt
|
||||
};
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_robustness2"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_robustness2"))
|
||||
{
|
||||
supportedFeaturesRobustness2.PNext = features2.PNext;
|
||||
|
||||
features2.PNext = &supportedFeaturesRobustness2;
|
||||
}
|
||||
|
||||
api.GetPhysicalDeviceFeatures2(physicalDevice, &features2);
|
||||
api.GetPhysicalDeviceFeatures2(physicalDevice.PhysicalDevice, &features2);
|
||||
|
||||
var supportedFeatures = features2.Features;
|
||||
|
||||
@@ -452,7 +376,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDeviceTransformFeedbackFeaturesEXT featuresTransformFeedback;
|
||||
|
||||
if (supportedExtensions.Contains(ExtTransformFeedback.ExtensionName))
|
||||
if (physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName))
|
||||
{
|
||||
featuresTransformFeedback = new PhysicalDeviceTransformFeedbackFeaturesEXT()
|
||||
{
|
||||
@@ -466,7 +390,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT featuresPrimitiveTopologyListRestart;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_primitive_topology_list_restart"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_primitive_topology_list_restart"))
|
||||
{
|
||||
featuresPrimitiveTopologyListRestart = new PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT()
|
||||
{
|
||||
@@ -481,7 +405,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDeviceRobustness2FeaturesEXT featuresRobustness2;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_robustness2"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_robustness2"))
|
||||
{
|
||||
featuresRobustness2 = new PhysicalDeviceRobustness2FeaturesEXT()
|
||||
{
|
||||
@@ -497,7 +421,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
SType = StructureType.PhysicalDeviceExtendedDynamicStateFeaturesExt,
|
||||
PNext = pExtendedFeatures,
|
||||
ExtendedDynamicState = supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName)
|
||||
ExtendedDynamicState = physicalDevice.IsDeviceExtensionPresent(ExtExtendedDynamicState.ExtensionName)
|
||||
};
|
||||
|
||||
pExtendedFeatures = &featuresExtendedDynamicState;
|
||||
@@ -515,16 +439,16 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
SType = StructureType.PhysicalDeviceVulkan12Features,
|
||||
PNext = pExtendedFeatures,
|
||||
DescriptorIndexing = supportedExtensions.Contains("VK_EXT_descriptor_indexing"),
|
||||
DrawIndirectCount = supportedExtensions.Contains(KhrDrawIndirectCount.ExtensionName),
|
||||
UniformBufferStandardLayout = supportedExtensions.Contains("VK_KHR_uniform_buffer_standard_layout")
|
||||
DescriptorIndexing = physicalDevice.IsDeviceExtensionPresent("VK_EXT_descriptor_indexing"),
|
||||
DrawIndirectCount = physicalDevice.IsDeviceExtensionPresent(KhrDrawIndirectCount.ExtensionName),
|
||||
UniformBufferStandardLayout = physicalDevice.IsDeviceExtensionPresent("VK_KHR_uniform_buffer_standard_layout")
|
||||
};
|
||||
|
||||
pExtendedFeatures = &featuresVk12;
|
||||
|
||||
PhysicalDeviceIndexTypeUint8FeaturesEXT featuresIndexU8;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_index_type_uint8"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_index_type_uint8"))
|
||||
{
|
||||
featuresIndexU8 = new PhysicalDeviceIndexTypeUint8FeaturesEXT()
|
||||
{
|
||||
@@ -538,7 +462,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDeviceFragmentShaderInterlockFeaturesEXT featuresFragmentShaderInterlock;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_fragment_shader_interlock"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"))
|
||||
{
|
||||
featuresFragmentShaderInterlock = new PhysicalDeviceFragmentShaderInterlockFeaturesEXT()
|
||||
{
|
||||
@@ -552,7 +476,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDeviceSubgroupSizeControlFeaturesEXT featuresSubgroupSizeControl;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_subgroup_size_control"))
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_subgroup_size_control"))
|
||||
{
|
||||
featuresSubgroupSizeControl = new PhysicalDeviceSubgroupSizeControlFeaturesEXT()
|
||||
{
|
||||
@@ -566,7 +490,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
PhysicalDeviceCustomBorderColorFeaturesEXT featuresCustomBorderColor;
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_custom_border_color") &&
|
||||
if (physicalDevice.IsDeviceExtensionPresent("VK_EXT_custom_border_color") &&
|
||||
supportedFeaturesCustomBorderColor.CustomBorderColors &&
|
||||
supportedFeaturesCustomBorderColor.CustomBorderColorWithoutFormat)
|
||||
{
|
||||
@@ -581,7 +505,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
pExtendedFeatures = &featuresCustomBorderColor;
|
||||
}
|
||||
|
||||
var enabledExtensions = _requiredExtensions.Union(_desirableExtensions.Intersect(supportedExtensions)).ToArray();
|
||||
var enabledExtensions = _requiredExtensions.Union(_desirableExtensions.Intersect(physicalDevice.DeviceExtensions)).ToArray();
|
||||
|
||||
IntPtr* ppEnabledExtensions = stackalloc IntPtr[enabledExtensions.Length];
|
||||
|
||||
@@ -601,7 +525,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PEnabledFeatures = &features
|
||||
};
|
||||
|
||||
api.CreateDevice(physicalDevice, in deviceCreateInfo, null, out var device).ThrowOnError();
|
||||
api.CreateDevice(physicalDevice.PhysicalDevice, in deviceCreateInfo, null, out var device).ThrowOnError();
|
||||
|
||||
for (int i = 0; i < enabledExtensions.Length; i++)
|
||||
{
|
||||
@@ -610,21 +534,5 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
public static string[] GetSupportedExtensions(Vk api, PhysicalDevice physicalDevice)
|
||||
{
|
||||
uint propertiesCount;
|
||||
|
||||
api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, null).ThrowOnError();
|
||||
|
||||
ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount];
|
||||
|
||||
fixed (ExtensionProperties* pExtensionProperties = extensionProperties)
|
||||
{
|
||||
api.EnumerateDeviceExtensionProperties(physicalDevice, (byte*)null, &propertiesCount, pExtensionProperties).ThrowOnError();
|
||||
}
|
||||
|
||||
return extensionProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.ExtensionName)).ToArray();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
127
Ryujinx.Graphics.Vulkan/VulkanInstance.cs
Normal file
127
Ryujinx.Graphics.Vulkan/VulkanInstance.cs
Normal file
@@ -0,0 +1,127 @@
|
||||
using Ryujinx.Common.Utilities;
|
||||
using Silk.NET.Core;
|
||||
using Silk.NET.Vulkan;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Linq;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
class VulkanInstance : IDisposable
|
||||
{
|
||||
private readonly Vk _api;
|
||||
public readonly Instance Instance;
|
||||
public readonly Version32 InstanceVersion;
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
private VulkanInstance(Vk api, Instance instance)
|
||||
{
|
||||
_api = api;
|
||||
Instance = instance;
|
||||
|
||||
if (api.GetInstanceProcAddr(instance, "vkEnumerateInstanceVersion") == IntPtr.Zero)
|
||||
{
|
||||
InstanceVersion = Vk.Version10;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint rawInstanceVersion = 0;
|
||||
|
||||
if (api.EnumerateInstanceVersion(ref rawInstanceVersion) != Result.Success)
|
||||
{
|
||||
rawInstanceVersion = Vk.Version11.Value;
|
||||
}
|
||||
|
||||
InstanceVersion = (Version32)rawInstanceVersion;
|
||||
}
|
||||
}
|
||||
|
||||
public static Result Create(Vk api, ref InstanceCreateInfo createInfo, out VulkanInstance instance)
|
||||
{
|
||||
instance = null;
|
||||
|
||||
Instance rawInstance = default;
|
||||
|
||||
Result result = api.CreateInstance(SpanHelpers.AsReadOnlySpan(ref createInfo), ReadOnlySpan<AllocationCallbacks>.Empty, SpanHelpers.AsSpan(ref rawInstance));
|
||||
|
||||
if (result == Result.Success)
|
||||
{
|
||||
instance = new VulkanInstance(api, rawInstance);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public Result EnumeratePhysicalDevices(out VulkanPhysicalDevice[] physicalDevices)
|
||||
{
|
||||
physicalDevices = null;
|
||||
|
||||
uint physicalDeviceCount = 0;
|
||||
|
||||
Result result = _api.EnumeratePhysicalDevices(Instance, SpanHelpers.AsSpan(ref physicalDeviceCount), Span<PhysicalDevice>.Empty);
|
||||
|
||||
if (result != Result.Success)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
PhysicalDevice[] rawPhysicalDevices = new PhysicalDevice[physicalDeviceCount];
|
||||
|
||||
result = _api.EnumeratePhysicalDevices(Instance, SpanHelpers.AsSpan(ref physicalDeviceCount), rawPhysicalDevices);
|
||||
|
||||
if (result != Result.Success)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
physicalDevices = rawPhysicalDevices.Select(x => new VulkanPhysicalDevice(_api, x)).ToArray();
|
||||
|
||||
return Result.Success;
|
||||
}
|
||||
|
||||
public static IReadOnlySet<string> GetInstanceExtensions(Vk api)
|
||||
{
|
||||
uint propertiesCount = 0;
|
||||
|
||||
api.EnumerateInstanceExtensionProperties(ReadOnlySpan<byte>.Empty, SpanHelpers.AsSpan(ref propertiesCount), Span<ExtensionProperties>.Empty).ThrowOnError();
|
||||
|
||||
ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount];
|
||||
|
||||
api.EnumerateInstanceExtensionProperties(ReadOnlySpan<byte>.Empty, SpanHelpers.AsSpan(ref propertiesCount), extensionProperties).ThrowOnError();
|
||||
|
||||
unsafe
|
||||
{
|
||||
return extensionProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.ExtensionName)).ToImmutableHashSet();
|
||||
}
|
||||
}
|
||||
|
||||
public static IReadOnlySet<string> GetInstanceLayers(Vk api)
|
||||
{
|
||||
uint propertiesCount = 0;
|
||||
|
||||
api.EnumerateInstanceLayerProperties(SpanHelpers.AsSpan(ref propertiesCount), Span<LayerProperties>.Empty).ThrowOnError();
|
||||
|
||||
LayerProperties[] layerProperties = new LayerProperties[propertiesCount];
|
||||
|
||||
api.EnumerateInstanceLayerProperties(SpanHelpers.AsSpan(ref propertiesCount), layerProperties).ThrowOnError();
|
||||
|
||||
unsafe
|
||||
{
|
||||
return layerProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.LayerName)).ToImmutableHashSet();
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (!_disposed)
|
||||
{
|
||||
_api.DestroyInstance(Instance, ReadOnlySpan<AllocationCallbacks>.Empty);
|
||||
|
||||
_disposed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
70
Ryujinx.Graphics.Vulkan/VulkanPhysicalDevice.cs
Normal file
70
Ryujinx.Graphics.Vulkan/VulkanPhysicalDevice.cs
Normal file
@@ -0,0 +1,70 @@
|
||||
using Ryujinx.Common.Utilities;
|
||||
using Ryujinx.Graphics.GAL;
|
||||
using Silk.NET.Vulkan;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Collections.Immutable;
|
||||
using System.Linq;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
readonly struct VulkanPhysicalDevice
|
||||
{
|
||||
public readonly PhysicalDevice PhysicalDevice;
|
||||
public readonly PhysicalDeviceFeatures PhysicalDeviceFeatures;
|
||||
public readonly PhysicalDeviceProperties PhysicalDeviceProperties;
|
||||
public readonly PhysicalDeviceMemoryProperties PhysicalDeviceMemoryProperties;
|
||||
public readonly QueueFamilyProperties[] QueueFamilyProperties;
|
||||
public readonly string DeviceName;
|
||||
public readonly IReadOnlySet<string> DeviceExtensions;
|
||||
|
||||
public VulkanPhysicalDevice(Vk api, PhysicalDevice physicalDevice)
|
||||
{
|
||||
PhysicalDevice = physicalDevice;
|
||||
PhysicalDeviceFeatures = api.GetPhysicalDeviceFeature(PhysicalDevice);
|
||||
|
||||
api.GetPhysicalDeviceProperties(PhysicalDevice, out var physicalDeviceProperties);
|
||||
PhysicalDeviceProperties = physicalDeviceProperties;
|
||||
|
||||
api.GetPhysicalDeviceMemoryProperties(PhysicalDevice, out PhysicalDeviceMemoryProperties);
|
||||
|
||||
unsafe
|
||||
{
|
||||
DeviceName = Marshal.PtrToStringAnsi((IntPtr)physicalDeviceProperties.DeviceName);
|
||||
}
|
||||
|
||||
uint propertiesCount = 0;
|
||||
|
||||
api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, SpanHelpers.AsSpan(ref propertiesCount), Span<QueueFamilyProperties>.Empty);
|
||||
|
||||
QueueFamilyProperties = new QueueFamilyProperties[propertiesCount];
|
||||
|
||||
api.GetPhysicalDeviceQueueFamilyProperties(physicalDevice, SpanHelpers.AsSpan(ref propertiesCount), QueueFamilyProperties);
|
||||
|
||||
api.EnumerateDeviceExtensionProperties(PhysicalDevice, Span<byte>.Empty, SpanHelpers.AsSpan(ref propertiesCount), Span<ExtensionProperties>.Empty).ThrowOnError();
|
||||
|
||||
ExtensionProperties[] extensionProperties = new ExtensionProperties[propertiesCount];
|
||||
|
||||
api.EnumerateDeviceExtensionProperties(PhysicalDevice, Span<byte>.Empty, SpanHelpers.AsSpan(ref propertiesCount), extensionProperties).ThrowOnError();
|
||||
|
||||
unsafe
|
||||
{
|
||||
DeviceExtensions = extensionProperties.Select(x => Marshal.PtrToStringAnsi((IntPtr)x.ExtensionName)).ToImmutableHashSet();
|
||||
}
|
||||
}
|
||||
|
||||
public string Id => $"0x{PhysicalDeviceProperties.VendorID:X}_0x{PhysicalDeviceProperties.DeviceID:X}";
|
||||
|
||||
public bool IsDeviceExtensionPresent(string extension) => DeviceExtensions.Contains(extension);
|
||||
|
||||
public DeviceInfo ToDeviceInfo()
|
||||
{
|
||||
return new DeviceInfo(
|
||||
Id,
|
||||
VendorUtils.GetNameFromId(PhysicalDeviceProperties.VendorID),
|
||||
DeviceName,
|
||||
PhysicalDeviceProperties.DeviceType == PhysicalDeviceType.DiscreteGpu);
|
||||
}
|
||||
}
|
||||
}
|
@@ -17,9 +17,9 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
{
|
||||
public sealed class VulkanRenderer : IRenderer
|
||||
{
|
||||
private Instance _instance;
|
||||
private VulkanInstance _instance;
|
||||
private SurfaceKHR _surface;
|
||||
private PhysicalDevice _physicalDevice;
|
||||
private VulkanPhysicalDevice _physicalDevice;
|
||||
private Device _device;
|
||||
private WindowBase _window;
|
||||
|
||||
@@ -106,33 +106,31 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
}
|
||||
}
|
||||
|
||||
private unsafe void LoadFeatures(string[] supportedExtensions, uint maxQueueCount, uint queueFamilyIndex)
|
||||
private unsafe void LoadFeatures(uint maxQueueCount, uint queueFamilyIndex)
|
||||
{
|
||||
FormatCapabilities = new FormatCapabilities(Api, _physicalDevice);
|
||||
FormatCapabilities = new FormatCapabilities(Api, _physicalDevice.PhysicalDevice);
|
||||
|
||||
var supportedFeatures = Api.GetPhysicalDeviceFeature(_physicalDevice);
|
||||
|
||||
if (Api.TryGetDeviceExtension(_instance, _device, out ExtConditionalRendering conditionalRenderingApi))
|
||||
if (Api.TryGetDeviceExtension(_instance.Instance, _device, out ExtConditionalRendering conditionalRenderingApi))
|
||||
{
|
||||
ConditionalRenderingApi = conditionalRenderingApi;
|
||||
}
|
||||
|
||||
if (Api.TryGetDeviceExtension(_instance, _device, out ExtExtendedDynamicState extendedDynamicStateApi))
|
||||
if (Api.TryGetDeviceExtension(_instance.Instance, _device, out ExtExtendedDynamicState extendedDynamicStateApi))
|
||||
{
|
||||
ExtendedDynamicStateApi = extendedDynamicStateApi;
|
||||
}
|
||||
|
||||
if (Api.TryGetDeviceExtension(_instance, _device, out KhrPushDescriptor pushDescriptorApi))
|
||||
if (Api.TryGetDeviceExtension(_instance.Instance, _device, out KhrPushDescriptor pushDescriptorApi))
|
||||
{
|
||||
PushDescriptorApi = pushDescriptorApi;
|
||||
}
|
||||
|
||||
if (Api.TryGetDeviceExtension(_instance, _device, out ExtTransformFeedback transformFeedbackApi))
|
||||
if (Api.TryGetDeviceExtension(_instance.Instance, _device, out ExtTransformFeedback transformFeedbackApi))
|
||||
{
|
||||
TransformFeedbackApi = transformFeedbackApi;
|
||||
}
|
||||
|
||||
if (Api.TryGetDeviceExtension(_instance, _device, out KhrDrawIndirectCount drawIndirectCountApi))
|
||||
if (Api.TryGetDeviceExtension(_instance.Instance, _device, out KhrDrawIndirectCount drawIndirectCountApi))
|
||||
{
|
||||
DrawIndirectCountApi = drawIndirectCountApi;
|
||||
}
|
||||
@@ -154,7 +152,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
SType = StructureType.PhysicalDeviceBlendOperationAdvancedPropertiesExt
|
||||
};
|
||||
|
||||
bool supportsBlendOperationAdvanced = supportedExtensions.Contains("VK_EXT_blend_operation_advanced");
|
||||
bool supportsBlendOperationAdvanced = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_blend_operation_advanced");
|
||||
|
||||
if (supportsBlendOperationAdvanced)
|
||||
{
|
||||
@@ -167,14 +165,14 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
SType = StructureType.PhysicalDeviceSubgroupSizeControlPropertiesExt
|
||||
};
|
||||
|
||||
bool supportsSubgroupSizeControl = supportedExtensions.Contains("VK_EXT_subgroup_size_control");
|
||||
bool supportsSubgroupSizeControl = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_subgroup_size_control");
|
||||
|
||||
if (supportsSubgroupSizeControl)
|
||||
{
|
||||
properties2.PNext = &propertiesSubgroupSizeControl;
|
||||
}
|
||||
|
||||
bool supportsTransformFeedback = supportedExtensions.Contains(ExtTransformFeedback.ExtensionName);
|
||||
bool supportsTransformFeedback = _physicalDevice.IsDeviceExtensionPresent(ExtTransformFeedback.ExtensionName);
|
||||
|
||||
PhysicalDeviceTransformFeedbackPropertiesEXT propertiesTransformFeedback = new PhysicalDeviceTransformFeedbackPropertiesEXT()
|
||||
{
|
||||
@@ -222,30 +220,30 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
SType = StructureType.PhysicalDevicePortabilitySubsetFeaturesKhr
|
||||
};
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_primitive_topology_list_restart"))
|
||||
if (_physicalDevice.IsDeviceExtensionPresent("VK_EXT_primitive_topology_list_restart"))
|
||||
{
|
||||
features2.PNext = &featuresPrimitiveTopologyListRestart;
|
||||
}
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_robustness2"))
|
||||
if (_physicalDevice.IsDeviceExtensionPresent("VK_EXT_robustness2"))
|
||||
{
|
||||
featuresRobustness2.PNext = features2.PNext;
|
||||
features2.PNext = &featuresRobustness2;
|
||||
}
|
||||
|
||||
if (supportedExtensions.Contains("VK_KHR_shader_float16_int8"))
|
||||
if (_physicalDevice.IsDeviceExtensionPresent("VK_KHR_shader_float16_int8"))
|
||||
{
|
||||
featuresShaderInt8.PNext = features2.PNext;
|
||||
features2.PNext = &featuresShaderInt8;
|
||||
}
|
||||
|
||||
if (supportedExtensions.Contains("VK_EXT_custom_border_color"))
|
||||
if (_physicalDevice.IsDeviceExtensionPresent("VK_EXT_custom_border_color"))
|
||||
{
|
||||
featuresCustomBorderColor.PNext = features2.PNext;
|
||||
features2.PNext = &featuresCustomBorderColor;
|
||||
}
|
||||
|
||||
bool usePortability = supportedExtensions.Contains("VK_KHR_portability_subset");
|
||||
bool usePortability = _physicalDevice.IsDeviceExtensionPresent("VK_KHR_portability_subset");
|
||||
|
||||
if (usePortability)
|
||||
{
|
||||
@@ -256,8 +254,8 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
features2.PNext = &featuresPortabilitySubset;
|
||||
}
|
||||
|
||||
Api.GetPhysicalDeviceProperties2(_physicalDevice, &properties2);
|
||||
Api.GetPhysicalDeviceFeatures2(_physicalDevice, &features2);
|
||||
Api.GetPhysicalDeviceProperties2(_physicalDevice.PhysicalDevice, &properties2);
|
||||
Api.GetPhysicalDeviceFeatures2(_physicalDevice.PhysicalDevice, &features2);
|
||||
|
||||
var portabilityFlags = PortabilitySubsetFlags.None;
|
||||
uint vertexBufferAlignment = 1;
|
||||
@@ -272,7 +270,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
portabilityFlags |= featuresPortabilitySubset.SamplerMipLodBias ? 0 : PortabilitySubsetFlags.NoLodBias;
|
||||
}
|
||||
|
||||
bool supportsCustomBorderColor = supportedExtensions.Contains("VK_EXT_custom_border_color") &&
|
||||
bool supportsCustomBorderColor = _physicalDevice.IsDeviceExtensionPresent("VK_EXT_custom_border_color") &&
|
||||
featuresCustomBorderColor.CustomBorderColors &&
|
||||
featuresCustomBorderColor.CustomBorderColorWithoutFormat;
|
||||
|
||||
@@ -284,30 +282,30 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
properties.Limits.FramebufferStencilSampleCounts;
|
||||
|
||||
Capabilities = new HardwareCapabilities(
|
||||
supportedExtensions.Contains("VK_EXT_index_type_uint8"),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_index_type_uint8"),
|
||||
supportsCustomBorderColor,
|
||||
supportsBlendOperationAdvanced,
|
||||
propertiesBlendOperationAdvanced.AdvancedBlendCorrelatedOverlap,
|
||||
propertiesBlendOperationAdvanced.AdvancedBlendNonPremultipliedSrcColor,
|
||||
propertiesBlendOperationAdvanced.AdvancedBlendNonPremultipliedDstColor,
|
||||
supportedExtensions.Contains(KhrDrawIndirectCount.ExtensionName),
|
||||
supportedExtensions.Contains("VK_EXT_fragment_shader_interlock"),
|
||||
supportedExtensions.Contains("VK_NV_geometry_shader_passthrough"),
|
||||
_physicalDevice.IsDeviceExtensionPresent(KhrDrawIndirectCount.ExtensionName),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"),
|
||||
supportsSubgroupSizeControl,
|
||||
featuresShaderInt8.ShaderInt8,
|
||||
supportedExtensions.Contains("VK_EXT_shader_stencil_export"),
|
||||
supportedExtensions.Contains(ExtConditionalRendering.ExtensionName),
|
||||
supportedExtensions.Contains(ExtExtendedDynamicState.ExtensionName),
|
||||
_physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"),
|
||||
_physicalDevice.IsDeviceExtensionPresent(ExtConditionalRendering.ExtensionName),
|
||||
_physicalDevice.IsDeviceExtensionPresent(ExtExtendedDynamicState.ExtensionName),
|
||||
features2.Features.MultiViewport,
|
||||
featuresRobustness2.NullDescriptor || IsMoltenVk,
|
||||
supportedExtensions.Contains(KhrPushDescriptor.ExtensionName),
|
||||
_physicalDevice.IsDeviceExtensionPresent(KhrPushDescriptor.ExtensionName),
|
||||
featuresPrimitiveTopologyListRestart.PrimitiveTopologyListRestart,
|
||||
featuresPrimitiveTopologyListRestart.PrimitiveTopologyPatchListRestart,
|
||||
supportsTransformFeedback,
|
||||
propertiesTransformFeedback.TransformFeedbackQueries,
|
||||
features2.Features.OcclusionQueryPrecise,
|
||||
supportedFeatures.PipelineStatisticsQuery,
|
||||
supportedFeatures.GeometryShader,
|
||||
_physicalDevice.PhysicalDeviceFeatures.PipelineStatisticsQuery,
|
||||
_physicalDevice.PhysicalDeviceFeatures.GeometryShader,
|
||||
propertiesSubgroupSizeControl.MinSubgroupSize,
|
||||
propertiesSubgroupSizeControl.MaxSubgroupSize,
|
||||
propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
|
||||
@@ -315,9 +313,9 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
portabilityFlags,
|
||||
vertexBufferAlignment);
|
||||
|
||||
IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(Api, _physicalDevice);
|
||||
IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice);
|
||||
|
||||
MemoryAllocator = new MemoryAllocator(Api, _physicalDevice, _device, properties.Limits.MaxMemoryAllocationCount);
|
||||
MemoryAllocator = new MemoryAllocator(Api, _physicalDevice, _device);
|
||||
|
||||
CommandBufferPool = new CommandBufferPool(Api, _device, Queue, QueueLock, queueFamilyIndex);
|
||||
|
||||
@@ -345,22 +343,21 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
Api = api;
|
||||
|
||||
_instance = VulkanInitialization.CreateInstance(api, logLevel, _getRequiredExtensions());
|
||||
_debugMessenger = new VulkanDebugMessenger(api, _instance, logLevel);
|
||||
_debugMessenger = new VulkanDebugMessenger(api, _instance.Instance, logLevel);
|
||||
|
||||
if (api.TryGetInstanceExtension(_instance, out KhrSurface surfaceApi))
|
||||
if (api.TryGetInstanceExtension(_instance.Instance, out KhrSurface surfaceApi))
|
||||
{
|
||||
SurfaceApi = surfaceApi;
|
||||
}
|
||||
|
||||
_surface = _getSurface(_instance, api);
|
||||
_surface = _getSurface(_instance.Instance, api);
|
||||
_physicalDevice = VulkanInitialization.FindSuitablePhysicalDevice(api, _instance, _surface, _preferredGpuId);
|
||||
|
||||
var queueFamilyIndex = VulkanInitialization.FindSuitableQueueFamily(api, _physicalDevice, _surface, out uint maxQueueCount);
|
||||
var supportedExtensions = VulkanInitialization.GetSupportedExtensions(api, _physicalDevice);
|
||||
|
||||
_device = VulkanInitialization.CreateDevice(api, _physicalDevice, queueFamilyIndex, supportedExtensions, maxQueueCount);
|
||||
_device = VulkanInitialization.CreateDevice(api, _physicalDevice, queueFamilyIndex, maxQueueCount);
|
||||
|
||||
if (api.TryGetDeviceExtension(_instance, _device, out KhrSwapchain swapchainApi))
|
||||
if (api.TryGetDeviceExtension(_instance.Instance, _device, out KhrSwapchain swapchainApi))
|
||||
{
|
||||
SwapchainApi = swapchainApi;
|
||||
}
|
||||
@@ -369,9 +366,9 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
Queue = queue;
|
||||
QueueLock = new object();
|
||||
|
||||
LoadFeatures(supportedExtensions, maxQueueCount, queueFamilyIndex);
|
||||
LoadFeatures(maxQueueCount, queueFamilyIndex);
|
||||
|
||||
_window = new Window(this, _surface, _physicalDevice, _device);
|
||||
_window = new Window(this, _surface, _physicalDevice.PhysicalDevice, _device);
|
||||
|
||||
_initialized = true;
|
||||
}
|
||||
@@ -536,10 +533,9 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
PNext = &featuresVk12
|
||||
};
|
||||
|
||||
Api.GetPhysicalDeviceFeatures2(_physicalDevice, &features2);
|
||||
Api.GetPhysicalDeviceProperties(_physicalDevice, out var properties);
|
||||
Api.GetPhysicalDeviceFeatures2(_physicalDevice.PhysicalDevice, &features2);
|
||||
|
||||
var limits = properties.Limits;
|
||||
var limits = _physicalDevice.PhysicalDeviceProperties.Limits;
|
||||
|
||||
return new Capabilities(
|
||||
api: TargetApi.Vulkan,
|
||||
@@ -623,7 +619,7 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
|
||||
private unsafe void PrintGpuInformation()
|
||||
{
|
||||
Api.GetPhysicalDeviceProperties(_physicalDevice, out var properties);
|
||||
var properties = _physicalDevice.PhysicalDeviceProperties;
|
||||
|
||||
string vendorName = VendorUtils.GetNameFromId(properties.VendorID);
|
||||
|
||||
@@ -807,14 +803,14 @@ namespace Ryujinx.Graphics.Vulkan
|
||||
sampler.Dispose();
|
||||
}
|
||||
|
||||
SurfaceApi.DestroySurface(_instance, _surface, null);
|
||||
SurfaceApi.DestroySurface(_instance.Instance, _surface, null);
|
||||
|
||||
Api.DestroyDevice(_device, null);
|
||||
|
||||
_debugMessenger.Dispose();
|
||||
|
||||
// Last step destroy the instance
|
||||
Api.DestroyInstance(_instance, null);
|
||||
_instance.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common
|
||||
{
|
||||
struct AtomicStorage<T> where T: unmanaged
|
||||
struct AtomicStorage<T> where T: unmanaged, ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public T Object;
|
||||
@@ -14,9 +14,9 @@ namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common
|
||||
|
||||
public void SetObject(ref T obj)
|
||||
{
|
||||
ISampledData samplingProvider = obj as ISampledData;
|
||||
ulong samplingNumber = ISampledDataStruct.GetSamplingNumber(ref obj);
|
||||
|
||||
Interlocked.Exchange(ref SamplingNumber, samplingProvider.SamplingNumber);
|
||||
Interlocked.Exchange(ref SamplingNumber, samplingNumber);
|
||||
|
||||
Thread.MemoryBarrier();
|
||||
|
||||
|
@@ -1,7 +0,0 @@
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common
|
||||
{
|
||||
interface ISampledData
|
||||
{
|
||||
ulong SamplingNumber { get; }
|
||||
}
|
||||
}
|
@@ -0,0 +1,65 @@
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common
|
||||
{
|
||||
/// <summary>
|
||||
/// This is a "marker interface" to add some compile-time safety to a convention-based optimization.
|
||||
///
|
||||
/// Any struct implementing this interface should:
|
||||
/// - use <c>StructLayoutAttribute</c> (and related attributes) to explicity control how the struct is laid out in memory.
|
||||
/// - ensure that the method <c>ISampledDataStruct.GetSamplingNumberFieldOffset()</c> correctly returns the offset, in bytes,
|
||||
/// to the ulong "Sampling Number" field within the struct. Most types have it as the first field, so the default offset is 0.
|
||||
///
|
||||
/// Example:
|
||||
///
|
||||
/// <c>
|
||||
/// [StructLayout(LayoutKind.Sequential, Pack = 8)]
|
||||
/// struct DebugPadState : ISampledDataStruct
|
||||
/// {
|
||||
/// public ulong SamplingNumber; // 1st field, so no need to add special handling to GetSamplingNumberFieldOffset()
|
||||
/// // other members...
|
||||
/// }
|
||||
///
|
||||
/// [StructLayout(LayoutKind.Sequential, Pack = 8)]
|
||||
/// struct SixAxisSensorState : ISampledDataStruct
|
||||
/// {
|
||||
/// public ulong DeltaTime;
|
||||
/// public ulong SamplingNumber; // Not the first field - needs special handling in GetSamplingNumberFieldOffset()
|
||||
/// // other members...
|
||||
/// }
|
||||
/// </c>
|
||||
/// </summary>
|
||||
internal interface ISampledDataStruct
|
||||
{
|
||||
// No Instance Members - marker interface only
|
||||
|
||||
public static ulong GetSamplingNumber<T>(ref T sampledDataStruct) where T : unmanaged, ISampledDataStruct
|
||||
{
|
||||
ReadOnlySpan<T> structSpan = MemoryMarshal.CreateReadOnlySpan(ref sampledDataStruct, 1);
|
||||
|
||||
ReadOnlySpan<byte> byteSpan = MemoryMarshal.Cast<T, byte>(structSpan);
|
||||
|
||||
int fieldOffset = GetSamplingNumberFieldOffset(ref sampledDataStruct);
|
||||
|
||||
if (fieldOffset > 0)
|
||||
{
|
||||
byteSpan = byteSpan.Slice(fieldOffset);
|
||||
}
|
||||
|
||||
ulong value = BinaryPrimitives.ReadUInt64LittleEndian(byteSpan);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private static int GetSamplingNumberFieldOffset<T>(ref T sampledDataStruct) where T : unmanaged, ISampledDataStruct
|
||||
{
|
||||
return sampledDataStruct switch
|
||||
{
|
||||
Npad.SixAxisSensorState _ => sizeof(ulong),
|
||||
_ => 0
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
@@ -5,7 +5,7 @@ using System.Threading;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common
|
||||
{
|
||||
struct RingLifo<T> where T: unmanaged
|
||||
struct RingLifo<T> where T: unmanaged, ISampledDataStruct
|
||||
{
|
||||
private const ulong MaxEntries = 17;
|
||||
|
||||
|
@@ -1,15 +1,15 @@
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.DebugPad
|
||||
{
|
||||
struct DebugPadState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct DebugPadState : ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public DebugPadAttribute Attributes;
|
||||
public DebugPadButton Buttons;
|
||||
public AnalogStickState AnalogStickR;
|
||||
public AnalogStickState AnalogStickL;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
||||
|
@@ -2,9 +2,8 @@
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Keyboard
|
||||
{
|
||||
// TODO: This seems entirely wrong
|
||||
[Flags]
|
||||
enum KeyboardModifier : uint
|
||||
enum KeyboardModifier : ulong
|
||||
{
|
||||
None = 0,
|
||||
Control = 1 << 0,
|
||||
|
@@ -1,13 +1,13 @@
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Keyboard
|
||||
{
|
||||
struct KeyboardState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct KeyboardState : ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public KeyboardModifier Modifiers;
|
||||
public KeyboardKey Keys;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
||||
|
@@ -1,8 +1,10 @@
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Mouse
|
||||
{
|
||||
struct MouseState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct MouseState : ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public int X;
|
||||
@@ -13,7 +15,5 @@ namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Mouse
|
||||
public int WheelDeltaY;
|
||||
public MouseButton Buttons;
|
||||
public MouseAttribute Attributes;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
||||
|
@@ -1,8 +1,10 @@
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Npad
|
||||
{
|
||||
struct NpadCommonState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct NpadCommonState : ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public NpadButton Buttons;
|
||||
@@ -10,7 +12,5 @@ namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Npad
|
||||
public AnalogStickState AnalogStickR;
|
||||
public NpadAttribute Attributes;
|
||||
private uint _reserved;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
||||
|
@@ -1,15 +1,15 @@
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Npad
|
||||
{
|
||||
struct NpadGcTriggerState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct NpadGcTriggerState : ISampledDataStruct
|
||||
{
|
||||
#pragma warning disable CS0649
|
||||
public ulong SamplingNumber;
|
||||
public uint TriggerL;
|
||||
public uint TriggerR;
|
||||
#pragma warning restore CS0649
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
@@ -1,9 +1,11 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Npad
|
||||
{
|
||||
struct SixAxisSensorState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct SixAxisSensorState : ISampledDataStruct
|
||||
{
|
||||
public ulong DeltaTime;
|
||||
public ulong SamplingNumber;
|
||||
@@ -13,7 +15,5 @@ namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Npad
|
||||
public Array9<float> Direction;
|
||||
public SixAxisSensorAttribute Attributes;
|
||||
private uint _reserved;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
@@ -1,15 +1,15 @@
|
||||
using Ryujinx.Common.Memory;
|
||||
using Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.Common;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.HLE.HOS.Services.Hid.Types.SharedMemory.TouchScreen
|
||||
{
|
||||
struct TouchScreenState : ISampledData
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1)]
|
||||
struct TouchScreenState : ISampledDataStruct
|
||||
{
|
||||
public ulong SamplingNumber;
|
||||
public int TouchesCount;
|
||||
private int _reserved;
|
||||
public Array16<TouchState> Touches;
|
||||
|
||||
ulong ISampledData.SamplingNumber => SamplingNumber;
|
||||
}
|
||||
}
|
||||
|
91
Ryujinx.Tests/Cpu/EnvironmentTests.cs
Normal file
91
Ryujinx.Tests/Cpu/EnvironmentTests.cs
Normal file
@@ -0,0 +1,91 @@
|
||||
using ARMeilleure.Translation;
|
||||
using NUnit.Framework;
|
||||
using Ryujinx.Cpu.Jit;
|
||||
using Ryujinx.Tests.Memory;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Tests.Cpu
|
||||
{
|
||||
internal class EnvironmentTests
|
||||
{
|
||||
private static Translator _translator;
|
||||
|
||||
private void EnsureTranslator()
|
||||
{
|
||||
// Create a translator, as one is needed to register the signal handler or emit methods.
|
||||
_translator ??= new Translator(new JitMemoryAllocator(), new MockMemoryManager(), true);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
|
||||
private float GetDenormal()
|
||||
{
|
||||
return BitConverter.Int32BitsToSingle(1);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
|
||||
private float GetZero()
|
||||
{
|
||||
return BitConverter.Int32BitsToSingle(0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This test ensures that managed methods do not reset floating point control flags.
|
||||
/// This is used to avoid changing control flags when running methods that don't require it, such as SVC calls, software memory...
|
||||
/// </summary>
|
||||
[Test]
|
||||
public void FpFlagsPInvoke()
|
||||
{
|
||||
EnsureTranslator();
|
||||
|
||||
// Subnormal results are not flushed to zero by default.
|
||||
// This operation should not be allowed to do constant propagation, hence the methods that explicitly disallow inlining.
|
||||
Assert.AreNotEqual(GetDenormal() + GetZero(), 0f);
|
||||
|
||||
bool methodCalled = false;
|
||||
bool isFz = false;
|
||||
|
||||
var managedMethod = () =>
|
||||
{
|
||||
// Floating point math should not modify fp flags.
|
||||
float test = 2f * 3.5f;
|
||||
|
||||
if (test < 4f)
|
||||
{
|
||||
throw new System.Exception("Sanity check.");
|
||||
}
|
||||
|
||||
isFz = GetDenormal() + GetZero() == 0f;
|
||||
|
||||
try
|
||||
{
|
||||
if (test >= 4f)
|
||||
{
|
||||
throw new System.Exception("Always throws.");
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Exception handling should not modify fp flags.
|
||||
|
||||
methodCalled = true;
|
||||
}
|
||||
};
|
||||
|
||||
var method = TranslatorTestMethods.GenerateFpFlagsPInvokeTest();
|
||||
|
||||
// This method sets flush-to-zero and then calls the managed method.
|
||||
// Before and after setting the flags, it ensures subnormal addition works as expected.
|
||||
// It returns a positive result if any tests fail, and 0 on success (or if the platform cannot change FP flags)
|
||||
int result = method(Marshal.GetFunctionPointerForDelegate(managedMethod));
|
||||
|
||||
// Subnormal results are not flushed to zero by default, which we should have returned to exiting the method.
|
||||
Assert.AreNotEqual(GetDenormal() + GetZero(), 0f);
|
||||
|
||||
Assert.True(result == 0);
|
||||
Assert.True(methodCalled);
|
||||
Assert.True(isFz);
|
||||
}
|
||||
}
|
||||
}
|
@@ -119,7 +119,7 @@ namespace Ryujinx.Ui
|
||||
}
|
||||
catch (Exception) { }
|
||||
|
||||
Device.DisposeGpu();
|
||||
Device?.DisposeGpu();
|
||||
NpadManager.Dispose();
|
||||
|
||||
// Unbind context and destroy everything
|
||||
@@ -129,7 +129,7 @@ namespace Ryujinx.Ui
|
||||
}
|
||||
catch (Exception) { }
|
||||
|
||||
_openGLContext.Dispose();
|
||||
_openGLContext?.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user