Compare commits

..

8 Commits

Author SHA1 Message Date
4df22eb867 Fix missing data for new copy dependency textures with mismatching size (#6161) 2024-01-22 17:42:26 -03:00
f241f88558 Add a separate device memory manager (#6153)
* Add a separate device memory manager

* Still need this

* Device writes are always tracked

* Device writes are always tracked (2)

* Rename more instances of gmm to mm
2024-01-22 17:14:46 -03:00
90455a05e6 Input: Improve controller identification (#6029)
* Input: Improve controller identification

Controllers were identified before by a combination of their _global_ index in the list of controllers and their GUID. The problem is, disconnecting and reconnecting a controller can change its global index; the controller can appear at the end. This would give it another ID, and the controller would need to be reconfigured.

This happened to me a lot with a switch pro controller and a USB game controller, it was essentially random which appeared first. Now, it consistently detects them.

This PR changes the controller identification to be a combination of an index of controllers with the same GUID (generally 0), and its GUID. It also reworks managing the list of controllers to properly consider instance IDs.

This also changes the NpadManager to attempt to reuse old controllers when refreshing input configuration, which can prevent input from going dead for seconds whenever a controller connects or disconnects (and the switch pro controller just entirely dying).

Testing with different controller types, OS and Avalonia is welcome. Remember that the target is connecting a ton of controllers, and pulling/reconnecting them.

* Remove double empty line
2024-01-22 17:02:44 -03:00
edc76883db Fix integer overflow on downsample surround to stereo (#6160) 2024-01-21 21:11:46 +01:00
427b7d06b5 Implement a new JIT for Arm devices (#6057)
* Implement a new JIT for Arm devices

* Auto-format

* Make a lot of Assembler members read-only

* More read-only

* Fix more warnings

* ObjectDisposedException.ThrowIf

* New JIT cache for platforms that enforce W^X, currently unused

* Remove unused using

* Fix assert

* Pass memory manager type around

* Safe memory manager mode support + other improvements

* Actual safe memory manager mode masking support

* PR feedback
2024-01-20 11:11:28 -03:00
331c07807f Vulkan: Use templates for descriptor updates (#6014)
* WIP: Descriptor template update

* Make configurable

* Wording

* Simplify template creation

* Whitespace

* UTF-8 whatever

* Leave only templated path, better template updater
2024-01-20 11:07:33 -03:00
a772b073ec Support portable mode using the macOS app bundle. (#6147)
* Support portable mode using the macOS app bundle.

* Apply suggestion

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

---------

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2024-01-20 03:09:51 +01:00
870d9599cc Change shader cache init wait method (#6131)
* Change shader cache init wait method

* Make field readonly
2024-01-18 14:17:38 -03:00
186 changed files with 44272 additions and 255 deletions

View File

@ -9,7 +9,7 @@ namespace ARMeilleure.Common
/// Represents a table of guest address to a value.
/// </summary>
/// <typeparam name="TEntry">Type of the value</typeparam>
unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
public unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged
{
/// <summary>
/// Represents a level in an <see cref="AddressTable{TEntry}"/>.

View File

@ -8,6 +8,7 @@ namespace ARMeilleure.Memory
void Commit(ulong offset, ulong size);
void MapAsRw(ulong offset, ulong size);
void MapAsRx(ulong offset, ulong size);
void MapAsRwx(ulong offset, ulong size);
}

View File

@ -2,7 +2,7 @@ using System;
namespace ARMeilleure.Memory
{
class ReservedRegion
public class ReservedRegion
{
public const int DefaultGranularity = 65536; // Mapping granularity in Windows.

View File

@ -5,7 +5,7 @@ using System.Runtime.Versioning;
namespace ARMeilleure.Native
{
[SupportedOSPlatform("macos")]
internal static partial class JitSupportDarwin
static partial class JitSupportDarwin
{
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);

View File

@ -8,7 +8,7 @@ namespace ARMeilleure.Translation
/// </summary>
/// <typeparam name="TK">Key</typeparam>
/// <typeparam name="TV">Value</typeparam>
class IntervalTree<TK, TV> where TK : IComparable<TK>
public class IntervalTree<TK, TV> where TK : IComparable<TK>
{
private const int ArrayGrowthSize = 32;

View File

@ -57,9 +57,6 @@ namespace ARMeilleure.Translation
private Thread[] _backgroundTranslationThreads;
private volatile int _threadCount;
// FIXME: Remove this once the init logic of the emulator will be redone.
public static readonly ManualResetEvent IsReadyForTranslation = new(false);
public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits)
{
_allocator = allocator;
@ -76,7 +73,7 @@ namespace ARMeilleure.Translation
CountTable = new EntryTable<uint>();
Functions = new TranslatorCache<TranslatedFunction>();
FunctionTable = new AddressTable<ulong>(for64Bits ? _levels64Bit : _levels32Bit);
Stubs = new TranslatorStubs(this);
Stubs = new TranslatorStubs(FunctionTable);
FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub;
}
@ -100,8 +97,6 @@ namespace ARMeilleure.Translation
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
IsReadyForTranslation.WaitOne();
if (_ptc.State == PtcState.Enabled)
{
Debug.Assert(Functions.Count == 0);

View File

@ -1,3 +1,4 @@
using ARMeilleure.Common;
using ARMeilleure.Instructions;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
@ -14,11 +15,11 @@ namespace ARMeilleure.Translation
/// </summary>
class TranslatorStubs : IDisposable
{
private static readonly Lazy<IntPtr> _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true);
private readonly Lazy<IntPtr> _slowDispatchStub;
private bool _disposed;
private readonly Translator _translator;
private readonly AddressTable<ulong> _functionTable;
private readonly Lazy<IntPtr> _dispatchStub;
private readonly Lazy<DispatcherFunction> _dispatchLoop;
private readonly Lazy<WrapperFunction> _contextWrapper;
@ -83,13 +84,14 @@ namespace ARMeilleure.Translation
/// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
/// <see cref="Translator"/> instance.
/// </summary>
/// <param name="translator"><see cref="Translator"/> instance to use</param>
/// <param name="functionTable">Function table used to store pointers to the functions that the guest code will call</param>
/// <exception cref="ArgumentNullException"><paramref name="translator"/> is null</exception>
public TranslatorStubs(Translator translator)
public TranslatorStubs(AddressTable<ulong> functionTable)
{
ArgumentNullException.ThrowIfNull(translator);
ArgumentNullException.ThrowIfNull(functionTable);
_translator = translator;
_functionTable = functionTable;
_slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true);
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
_contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
@ -151,15 +153,15 @@ namespace ARMeilleure.Translation
context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
// Check if guest address is within range of the AddressTable.
Operand masked = context.BitwiseAnd(guestAddress, Const(~_translator.FunctionTable.Mask));
Operand masked = context.BitwiseAnd(guestAddress, Const(~_functionTable.Mask));
context.BranchIfTrue(lblFallback, masked);
Operand index = default;
Operand page = Const((long)_translator.FunctionTable.Base);
Operand page = Const((long)_functionTable.Base);
for (int i = 0; i < _translator.FunctionTable.Levels.Length; i++)
for (int i = 0; i < _functionTable.Levels.Length; i++)
{
ref var level = ref _translator.FunctionTable.Levels[i];
ref var level = ref _functionTable.Levels[i];
// level.Mask is not used directly because it is more often bigger than 32-bits, so it will not
// be encoded as an immediate on x86's bitwise and operation.
@ -167,7 +169,7 @@ namespace ARMeilleure.Translation
index = context.BitwiseAnd(context.ShiftRightUI(guestAddress, Const(level.Index)), mask);
if (i < _translator.FunctionTable.Levels.Length - 1)
if (i < _functionTable.Levels.Length - 1)
{
page = context.Load(OperandType.I64, context.Add(page, context.ShiftLeft(index, Const(3))));
context.BranchIfFalse(lblFallback, page);
@ -196,7 +198,7 @@ namespace ARMeilleure.Translation
/// Generates a <see cref="SlowDispatchStub"/>.
/// </summary>
/// <returns>Generated <see cref="SlowDispatchStub"/></returns>
private static IntPtr GenerateSlowDispatchStub()
private IntPtr GenerateSlowDispatchStub()
{
var context = new EmitterContext();
@ -205,8 +207,7 @@ namespace ARMeilleure.Translation
Operand guestAddress = context.Load(OperandType.I64,
context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())));
MethodInfo getFuncAddress = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress));
Operand hostAddress = context.Call(getFuncAddress, guestAddress);
Operand hostAddress = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), guestAddress);
context.Tailcall(hostAddress, nativeContext);
var cfg = context.GetControlFlowGraph();

View File

@ -31,7 +31,7 @@ namespace Ryujinx.Audio.Backends.CompatLayer
private const int Minus6dBInQ15 = (int)(0.501f * RawQ15One);
private const int Minus12dBInQ15 = (int)(0.251f * RawQ15One);
private static readonly int[] _defaultSurroundToStereoCoefficients = new int[4]
private static readonly long[] _defaultSurroundToStereoCoefficients = new long[4]
{
RawQ15One,
Minus3dBInQ15,
@ -39,7 +39,7 @@ namespace Ryujinx.Audio.Backends.CompatLayer
Minus3dBInQ15,
};
private static readonly int[] _defaultStereoToMonoCoefficients = new int[2]
private static readonly long[] _defaultStereoToMonoCoefficients = new long[2]
{
Minus6dBInQ15,
Minus6dBInQ15,
@ -62,19 +62,23 @@ namespace Ryujinx.Audio.Backends.CompatLayer
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static short DownMixStereoToMono(ReadOnlySpan<int> coefficients, short left, short right)
private static short DownMixStereoToMono(ReadOnlySpan<long> coefficients, short left, short right)
{
return (short)((left * coefficients[0] + right * coefficients[1]) >> Q15Bits);
return (short)Math.Clamp((left * coefficients[0] + right * coefficients[1]) >> Q15Bits, short.MinValue, short.MaxValue);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static short DownMixSurroundToStereo(ReadOnlySpan<int> coefficients, short back, short lfe, short center, short front)
private static short DownMixSurroundToStereo(ReadOnlySpan<long> coefficients, short back, short lfe, short center, short front)
{
return (short)((coefficients[3] * back + coefficients[2] * lfe + coefficients[1] * center + coefficients[0] * front + RawQ15HalfOne) >> Q15Bits);
return (short)Math.Clamp(
(coefficients[3] * back +
coefficients[2] * lfe +
coefficients[1] * center +
coefficients[0] * front + RawQ15HalfOne) >> Q15Bits, short.MinValue, short.MaxValue);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static short[] DownMixSurroundToStereo(ReadOnlySpan<int> coefficients, ReadOnlySpan<short> data)
private static short[] DownMixSurroundToStereo(ReadOnlySpan<long> coefficients, ReadOnlySpan<short> data)
{
int samplePerChannelCount = data.Length / SurroundChannelCount;
@ -94,7 +98,7 @@ namespace Ryujinx.Audio.Backends.CompatLayer
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static short[] DownMixStereoToMono(ReadOnlySpan<int> coefficients, ReadOnlySpan<short> data)
private static short[] DownMixStereoToMono(ReadOnlySpan<long> coefficients, ReadOnlySpan<short> data)
{
int samplePerChannelCount = data.Length / StereoChannelCount;

View File

@ -1,4 +1,3 @@
using ARMeilleure.Translation;
using Avalonia;
using Avalonia.Controls;
using Avalonia.Controls.ApplicationLifetimes;
@ -916,7 +915,6 @@ namespace Ryujinx.Ava
{
Device.Gpu.SetGpuThread();
Device.Gpu.InitializeShaderCache(_gpuCancellationTokenSource.Token);
Translator.IsReadyForTranslation.Set();
_renderer.Window.ChangeVSyncMode(Device.EnableDeviceVsync);

View File

@ -63,6 +63,17 @@ namespace Ryujinx.Common.Configuration
string userProfilePath = Path.Combine(appDataPath, DefaultBaseDir);
string portablePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, DefaultPortableDir);
// On macOS, check for a portable directory next to the app bundle as well.
if (OperatingSystem.IsMacOS() && !Directory.Exists(portablePath))
{
string bundlePath = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", ".."));
// Make sure we're actually running within an app bundle.
if (bundlePath.EndsWith(".app"))
{
portablePath = Path.GetFullPath(Path.Combine(bundlePath, "..", DefaultPortableDir));
}
}
if (Directory.Exists(portablePath))
{
BaseDirPath = portablePath;

View File

@ -40,5 +40,9 @@ namespace Ryujinx.Cpu.AppleHv
public void PrepareCodeRange(ulong address, ulong size)
{
}
public void Dispose()
{
}
}
}

View File

@ -0,0 +1,17 @@
using System;
namespace Ryujinx.Cpu
{
public class DummyDiskCacheLoadState : IDiskCacheLoadState
{
#pragma warning disable CS0067 // The event is never used
/// <inheritdoc/>
public event Action<LoadState, int, int> StateChanged;
#pragma warning restore CS0067
/// <inheritdoc/>
public void Cancel()
{
}
}
}

View File

@ -1,9 +1,11 @@
using System;
namespace Ryujinx.Cpu
{
/// <summary>
/// CPU context interface.
/// </summary>
public interface ICpuContext
public interface ICpuContext : IDisposable
{
/// <summary>
/// Creates a new execution context that will store thread CPU register state when executing guest code.

View File

@ -13,7 +13,7 @@ namespace Ryujinx.Cpu.Jit
public JitCpuContext(ITickSource tickSource, IMemoryManager memory, bool for64Bit)
{
_tickSource = tickSource;
_translator = new Translator(new JitMemoryAllocator(), memory, for64Bit);
_translator = new Translator(new JitMemoryAllocator(forJit: true), memory, for64Bit);
if (memory.Type.IsHostMapped())
{
@ -57,5 +57,9 @@ namespace Ryujinx.Cpu.Jit
{
_translator.PrepareCodeRange(address, size);
}
public void Dispose()
{
}
}
}

View File

@ -5,7 +5,14 @@ namespace Ryujinx.Cpu.Jit
{
public class JitMemoryAllocator : IJitMemoryAllocator
{
private readonly MemoryAllocationFlags _jitFlag;
public JitMemoryAllocator(bool forJit = false)
{
_jitFlag = forJit ? MemoryAllocationFlags.Jit : MemoryAllocationFlags.None;
}
public IJitMemoryBlock Allocate(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.None);
public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | MemoryAllocationFlags.Jit);
public IJitMemoryBlock Reserve(ulong size) => new JitMemoryBlock(size, MemoryAllocationFlags.Reserve | _jitFlag);
}
}

View File

@ -16,6 +16,7 @@ namespace Ryujinx.Cpu.Jit
}
public void Commit(ulong offset, ulong size) => _impl.Commit(offset, size);
public void MapAsRw(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndWrite);
public void MapAsRx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadAndExecute);
public void MapAsRwx(ulong offset, ulong size) => _impl.Reprotect(offset, size, MemoryPermission.ReadWriteExecute);

View File

@ -0,0 +1,32 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Arm32;
using Ryujinx.Cpu.LightningJit.Arm64;
using Ryujinx.Cpu.LightningJit.State;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit
{
class AarchCompiler
{
public static CompiledFunction Compile(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
ExecutionMode executionMode,
Architecture targetArch)
{
if (executionMode == ExecutionMode.Aarch64)
{
return A64Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, targetArch);
}
else
{
return A32Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, executionMode == ExecutionMode.Aarch32Thumb, targetArch);
}
}
}
}

View File

@ -0,0 +1,18 @@
namespace Ryujinx.Cpu.LightningJit
{
enum AddressForm : byte
{
None,
OffsetReg,
PostIndexed,
PreIndexed,
SignedScaled,
UnsignedScaled,
BaseRegister,
BasePlusOffset,
Literal,
StructNoOffset,
StructPostIndexedReg,
}
}

View File

@ -0,0 +1,30 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
static class A32Compiler
{
public static CompiledFunction Compile(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
bool isThumb,
Architecture targetArch)
{
if (targetArch == Architecture.Arm64)
{
return Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr, isThumb);
}
else
{
throw new PlatformNotSupportedException();
}
}
}
}

View File

@ -0,0 +1,101 @@
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
class Block
{
public readonly ulong Address;
public readonly ulong EndAddress;
public readonly List<InstInfo> Instructions;
public readonly bool EndsWithBranch;
public readonly bool HasHostCall;
public readonly bool IsTruncated;
public readonly bool IsLoopEnd;
public readonly bool IsThumb;
public Block(
ulong address,
ulong endAddress,
List<InstInfo> instructions,
bool endsWithBranch,
bool hasHostCall,
bool isTruncated,
bool isLoopEnd,
bool isThumb)
{
Debug.Assert(isThumb || (int)((endAddress - address) / 4) == instructions.Count);
Address = address;
EndAddress = endAddress;
Instructions = instructions;
EndsWithBranch = endsWithBranch;
HasHostCall = hasHostCall;
IsTruncated = isTruncated;
IsLoopEnd = isLoopEnd;
IsThumb = isThumb;
}
public (Block, Block) SplitAtAddress(ulong address)
{
int splitIndex = FindSplitIndex(address);
if (splitIndex < 0)
{
return (null, null);
}
int splitCount = Instructions.Count - splitIndex;
// Technically those are valid, but we don't want to create empty blocks.
Debug.Assert(splitIndex != 0);
Debug.Assert(splitCount != 0);
Block leftBlock = new(
Address,
address,
Instructions.GetRange(0, splitIndex),
false,
HasHostCall,
false,
false,
IsThumb);
Block rightBlock = new(
address,
EndAddress,
Instructions.GetRange(splitIndex, splitCount),
EndsWithBranch,
HasHostCall,
IsTruncated,
IsLoopEnd,
IsThumb);
return (leftBlock, rightBlock);
}
private int FindSplitIndex(ulong address)
{
if (IsThumb)
{
ulong pc = Address;
for (int index = 0; index < Instructions.Count; index++)
{
if (pc == address)
{
return index;
}
pc += Instructions[index].Flags.HasFlag(InstFlags.Thumb16) ? 2UL : 4UL;
}
return -1;
}
else
{
return (int)((address - Address) / 4);
}
}
}
}

View File

@ -0,0 +1,15 @@
namespace Ryujinx.Cpu.LightningJit.Arm32
{
enum BranchType
{
Branch,
Call,
IndirectBranch,
TableBranchByte,
TableBranchHalfword,
IndirectCall,
SyncPoint,
SoftwareInterrupt,
ReadCntpct,
}
}

View File

@ -0,0 +1,198 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Collections.Generic;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
class CodeGenContext
{
public CodeWriter CodeWriter { get; }
public Assembler Arm64Assembler { get; }
public RegisterAllocator RegisterAllocator { get; }
public MemoryManagerType MemoryManagerType { get; }
private uint _instructionAddress;
public bool IsThumb { get; }
public uint Pc { get; private set; }
public bool InITBlock { get; private set; }
private InstInfo _nextInstruction;
private bool _skipNextInstruction;
private readonly ArmCondition[] _itConditions;
private int _itCount;
private readonly List<PendingBranch> _pendingBranches;
private bool _nzcvModified;
public CodeGenContext(CodeWriter codeWriter, Assembler arm64Assembler, RegisterAllocator registerAllocator, MemoryManagerType mmType, bool isThumb)
{
CodeWriter = codeWriter;
Arm64Assembler = arm64Assembler;
RegisterAllocator = registerAllocator;
MemoryManagerType = mmType;
_itConditions = new ArmCondition[4];
_pendingBranches = new();
IsThumb = isThumb;
}
public void SetPc(uint address)
{
// Due to historical reasons, the PC value is always 2 instructions ahead on 32-bit Arm CPUs.
Pc = address + (IsThumb ? 4u : 8u);
_instructionAddress = address;
}
public void SetNextInstruction(InstInfo info)
{
_nextInstruction = info;
}
public InstInfo PeekNextInstruction()
{
return _nextInstruction;
}
public void SetSkipNextInstruction()
{
_skipNextInstruction = true;
}
public bool ConsumeSkipNextInstruction()
{
bool skip = _skipNextInstruction;
_skipNextInstruction = false;
return skip;
}
public void AddPendingBranch(InstName name, int offset)
{
_pendingBranches.Add(new(BranchType.Branch, Pc + (uint)offset, 0u, name, CodeWriter.InstructionPointer));
}
public void AddPendingCall(uint targetAddress, uint nextAddress)
{
_pendingBranches.Add(new(BranchType.Call, targetAddress, nextAddress, InstName.BlI, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
RegisterAllocator.MarkGprAsUsed(RegisterUtils.LrRegister);
}
public void AddPendingIndirectBranch(InstName name, uint targetRegister)
{
_pendingBranches.Add(new(BranchType.IndirectBranch, targetRegister, 0u, name, CodeWriter.InstructionPointer));
RegisterAllocator.MarkGprAsUsed((int)targetRegister);
}
public void AddPendingTableBranch(uint rn, uint rm, bool halfword)
{
_pendingBranches.Add(new(halfword ? BranchType.TableBranchHalfword : BranchType.TableBranchByte, rn, rm, InstName.Tbb, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(2);
RegisterAllocator.MarkGprAsUsed((int)rn);
RegisterAllocator.MarkGprAsUsed((int)rm);
}
public void AddPendingIndirectCall(uint targetRegister, uint nextAddress)
{
_pendingBranches.Add(new(BranchType.IndirectCall, targetRegister, nextAddress, InstName.BlxR, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(targetRegister == RegisterUtils.LrRegister ? 1 : 0);
RegisterAllocator.MarkGprAsUsed((int)targetRegister);
RegisterAllocator.MarkGprAsUsed(RegisterUtils.LrRegister);
}
public void AddPendingSyncPoint()
{
_pendingBranches.Add(new(BranchType.SyncPoint, 0, 0, default, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
}
public void AddPendingBkpt(uint imm)
{
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Bkpt, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
}
public void AddPendingSvc(uint imm)
{
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Svc, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
}
public void AddPendingUdf(uint imm)
{
_pendingBranches.Add(new(BranchType.SoftwareInterrupt, imm, _instructionAddress, InstName.Udf, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
}
public void AddPendingReadCntpct(uint rt, uint rt2)
{
_pendingBranches.Add(new(BranchType.ReadCntpct, rt, rt2, InstName.Mrrc, CodeWriter.InstructionPointer));
RegisterAllocator.EnsureTempGprRegisters(1);
}
public IEnumerable<PendingBranch> GetPendingBranches()
{
return _pendingBranches;
}
public void SetItBlockStart(ReadOnlySpan<ArmCondition> conditions)
{
_itCount = conditions.Length;
for (int index = 0; index < conditions.Length; index++)
{
_itConditions[index] = conditions[index];
}
InITBlock = true;
}
public bool ConsumeItCondition(out ArmCondition condition)
{
if (_itCount != 0)
{
condition = _itConditions[--_itCount];
return true;
}
condition = ArmCondition.Al;
return false;
}
public void UpdateItState()
{
if (_itCount == 0)
{
InITBlock = false;
}
}
public void SetNzcvModified()
{
_nzcvModified = true;
}
public bool ConsumeNzcvModified()
{
bool modified = _nzcvModified;
_nzcvModified = false;
return modified;
}
}
}

View File

@ -0,0 +1,546 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
static class Decoder<T> where T : IInstEmit
{
public static MultiBlock DecodeMulti(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, bool isThumb)
{
List<Block> blocks = new();
List<ulong> branchTargets = new();
while (true)
{
Block block = Decode(cpuPreset, memoryManager, address, isThumb);
if (!block.IsTruncated && TryGetBranchTarget(block, out ulong targetAddress))
{
branchTargets.Add(targetAddress);
}
blocks.Add(block);
if (block.IsTruncated || !HasNextBlock(block, block.EndAddress - 4UL, branchTargets))
{
break;
}
address = block.EndAddress;
}
branchTargets.Sort();
SplitBlocks(blocks, branchTargets);
return new(blocks);
}
private static bool TryGetBranchTarget(Block block, out ulong targetAddress)
{
// PC is 2 instructions ahead, since the end address is already one instruction after the last one, we just need to add
// another instruction.
ulong pc = block.EndAddress + (block.IsThumb ? 2UL : 4UL);
return TryGetBranchTarget(block.Instructions[^1].Name, block.Instructions[^1].Flags, pc, block.Instructions[^1].Encoding, block.IsThumb, out targetAddress);
}
private static bool TryGetBranchTarget(InstName name, InstFlags flags, ulong pc, uint encoding, bool isThumb, out ulong targetAddress)
{
int originalOffset;
switch (name)
{
case InstName.B:
if (isThumb)
{
if (flags.HasFlag(InstFlags.Thumb16))
{
if ((encoding & (1u << 29)) != 0)
{
InstImm11b16w11 inst = new(encoding);
originalOffset = ImmUtils.ExtractT16SImm11Times2(inst.Imm11);
}
else
{
InstCondb24w4Imm8b16w8 inst = new(encoding);
originalOffset = ImmUtils.ExtractT16SImm8Times2(inst.Imm8);
}
}
else
{
if ((encoding & (1u << 12)) != 0)
{
InstSb26w1Imm10b16w10J1b13w1J2b11w1Imm11b0w11 inst = new(encoding);
originalOffset = ImmUtils.CombineSImm24Times2(inst.Imm11, inst.Imm10, inst.J1, inst.J2, inst.S);
}
else
{
InstSb26w1Condb22w4Imm6b16w6J1b13w1J2b11w1Imm11b0w11 inst = new(encoding);
originalOffset = ImmUtils.CombineSImm20Times2(inst.Imm11, inst.Imm6, inst.J1, inst.J2, inst.S);
}
}
}
else
{
originalOffset = ImmUtils.ExtractSImm24Times4(encoding);
}
targetAddress = pc + (ulong)originalOffset;
Debug.Assert((targetAddress & 1) == 0);
return true;
case InstName.Cbnz:
originalOffset = ImmUtils.ExtractT16UImm5Times2(encoding);
targetAddress = pc + (ulong)originalOffset;
Debug.Assert((targetAddress & 1) == 0);
return true;
}
targetAddress = 0;
return false;
}
private static void SplitBlocks(List<Block> blocks, List<ulong> branchTargets)
{
int btIndex = 0;
while (btIndex < branchTargets.Count)
{
for (int blockIndex = 0; blockIndex < blocks.Count && btIndex < branchTargets.Count; blockIndex++)
{
Block block = blocks[blockIndex];
ulong currentBranchTarget = branchTargets[btIndex];
while (currentBranchTarget >= block.Address && currentBranchTarget < block.EndAddress)
{
if (block.Address != currentBranchTarget)
{
(Block leftBlock, Block rightBlock) = block.SplitAtAddress(currentBranchTarget);
if (leftBlock != null && rightBlock != null)
{
blocks.Insert(blockIndex, leftBlock);
blocks[blockIndex + 1] = rightBlock;
block = leftBlock;
}
else
{
// Split can only fail in thumb mode, where the instruction size is not fixed.
Debug.Assert(block.IsThumb);
}
}
btIndex++;
while (btIndex < branchTargets.Count && branchTargets[btIndex] == currentBranchTarget)
{
btIndex++;
}
if (btIndex >= branchTargets.Count)
{
break;
}
currentBranchTarget = branchTargets[btIndex];
}
}
Debug.Assert(btIndex < int.MaxValue);
btIndex++;
}
}
private static bool HasNextBlock(in Block block, ulong pc, List<ulong> branchTargets)
{
InstFlags lastInstFlags = block.Instructions[^1].Flags;
// Thumb has separate encodings for conditional and unconditional branch instructions.
if (lastInstFlags.HasFlag(InstFlags.Cond) && (block.IsThumb || (ArmCondition)(block.Instructions[^1].Encoding >> 28) < ArmCondition.Al))
{
return true;
}
switch (block.Instructions[^1].Name)
{
case InstName.B:
return branchTargets.Contains(pc + 4UL) ||
(TryGetBranchTarget(block, out ulong targetAddress) && targetAddress >= pc && targetAddress < pc + 0x1000);
case InstName.Bx:
case InstName.Bxj:
return branchTargets.Contains(pc + 4UL);
case InstName.Cbnz:
case InstName.BlI:
case InstName.BlxR:
return true;
}
if (WritesToPC(block.Instructions[^1].Encoding, block.Instructions[^1].Name, lastInstFlags, block.IsThumb))
{
return branchTargets.Contains(pc + 4UL);
}
return !block.EndsWithBranch;
}
private static Block Decode(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, bool isThumb)
{
ulong startAddress = address;
List<InstInfo> insts = new();
uint encoding;
InstMeta meta;
InstFlags extraFlags = InstFlags.None;
bool hasHostCall = false;
bool isTruncated = false;
do
{
if (!memoryManager.IsMapped(address))
{
encoding = 0;
meta = default;
isTruncated = true;
break;
}
if (isThumb)
{
encoding = (uint)memoryManager.Read<ushort>(address) << 16;
address += 2UL;
extraFlags = InstFlags.Thumb16;
if (!InstTableT16<T>.TryGetMeta(encoding, cpuPreset.Version, cpuPreset.Features, out meta))
{
encoding |= memoryManager.Read<ushort>(address);
if (InstTableT32<T>.TryGetMeta(encoding, cpuPreset.Version, cpuPreset.Features, out meta))
{
address += 2UL;
extraFlags = InstFlags.None;
}
}
}
else
{
encoding = memoryManager.Read<uint>(address);
address += 4UL;
meta = InstTableA32<T>.GetMeta(encoding, cpuPreset.Version, cpuPreset.Features);
}
if (meta.Name.IsSystemOrCall() && !hasHostCall)
{
hasHostCall = meta.Name.IsCall() || InstEmitSystem.NeedsCall(meta.Name);
}
insts.Add(new(encoding, meta.Name, meta.EmitFunc, meta.Flags | extraFlags));
}
while (!IsControlFlow(encoding, meta.Name, meta.Flags | extraFlags, isThumb));
bool isLoopEnd = false;
if (!isTruncated && IsBackwardsBranch(meta.Name, encoding))
{
hasHostCall = true;
isLoopEnd = true;
}
return new(
startAddress,
address,
insts,
!isTruncated,
hasHostCall,
isTruncated,
isLoopEnd,
isThumb);
}
private static bool IsControlFlow(uint encoding, InstName name, InstFlags flags, bool isThumb)
{
switch (name)
{
case InstName.B:
case InstName.BlI:
case InstName.BlxR:
case InstName.Bx:
case InstName.Bxj:
case InstName.Cbnz:
case InstName.Tbb:
return true;
}
return WritesToPC(encoding, name, flags, isThumb);
}
public static bool WritesToPC(uint encoding, InstName name, InstFlags flags, bool isThumb)
{
return (GetRegisterWriteMask(encoding, name, flags, isThumb) & (1u << RegisterUtils.PcRegister)) != 0;
}
private static uint GetRegisterWriteMask(uint encoding, InstName name, InstFlags flags, bool isThumb)
{
uint mask = 0;
if (isThumb)
{
if (flags.HasFlag(InstFlags.Thumb16))
{
if (flags.HasFlag(InstFlags.Rdn))
{
mask |= 1u << RegisterUtils.ExtractRdn(flags, encoding);
}
if (flags.HasFlag(InstFlags.Rd))
{
mask |= 1u << RegisterUtils.ExtractRdT16(flags, encoding);
}
Debug.Assert(!flags.HasFlag(InstFlags.RdHi));
if (IsRegisterWrite(flags, InstFlags.Rt))
{
mask |= 1u << RegisterUtils.ExtractRtT16(flags, encoding);
}
Debug.Assert(!flags.HasFlag(InstFlags.Rt2));
if (IsRegisterWrite(flags, InstFlags.Rlist))
{
mask |= (byte)(encoding >> 16);
if (name == InstName.Push)
{
mask |= (encoding >> 10) & 0x4000; // LR
}
else if (name == InstName.Pop)
{
mask |= (encoding >> 9) & 0x8000; // PC
}
}
Debug.Assert(!flags.HasFlag(InstFlags.WBack));
}
else
{
if (flags.HasFlag(InstFlags.Rd))
{
mask |= 1u << RegisterUtils.ExtractRdT32(flags, encoding);
}
if (flags.HasFlag(InstFlags.RdLo))
{
mask |= 1u << RegisterUtils.ExtractRdLoT32(encoding);
}
if (flags.HasFlag(InstFlags.RdHi))
{
mask |= 1u << RegisterUtils.ExtractRdHiT32(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rt) && IsRtWrite(name, encoding) && !IsR15RtEncodingSpecial(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRtT32(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rt2) && IsRtWrite(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRt2T32(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rlist))
{
mask |= (ushort)encoding;
}
if (flags.HasFlag(InstFlags.WBack) && HasWriteBackT32(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRn(encoding); // This is at the same bit position as A32.
}
}
}
else
{
if (flags.HasFlag(InstFlags.Rd))
{
mask |= 1u << RegisterUtils.ExtractRd(flags, encoding);
}
if (flags.HasFlag(InstFlags.RdHi))
{
mask |= 1u << RegisterUtils.ExtractRdHi(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rt) && IsRtWrite(name, encoding) && !IsR15RtEncodingSpecial(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRt(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rt2) && IsRtWrite(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRt2(encoding);
}
if (IsRegisterWrite(flags, InstFlags.Rlist))
{
mask |= (ushort)encoding;
}
if (flags.HasFlag(InstFlags.WBack) && HasWriteBack(name, encoding))
{
mask |= 1u << RegisterUtils.ExtractRn(encoding);
}
}
return mask;
}
private static bool IsRtWrite(InstName name, uint encoding)
{
// Some instructions can move GPR to FP/SIMD or FP/SIMD to GPR depending on the encoding.
// Detect those cases so that we can tell if we're actually doing a register write.
switch (name)
{
case InstName.VmovD:
case InstName.VmovH:
case InstName.VmovS:
case InstName.VmovSs:
return (encoding & (1u << 20)) != 0;
}
return true;
}
private static bool HasWriteBack(InstName name, uint encoding)
{
if (IsLoadStoreMultiple(name))
{
return (encoding & (1u << 21)) != 0;
}
if (IsVLDnVSTn(name))
{
return (encoding & 0xf) != RegisterUtils.PcRegister;
}
bool w = (encoding & (1u << 21)) != 0;
bool p = (encoding & (1u << 24)) != 0;
return !p || w;
}
private static bool HasWriteBackT32(InstName name, uint encoding)
{
if (IsLoadStoreMultiple(name))
{
return (encoding & (1u << 21)) != 0;
}
if (IsVLDnVSTn(name))
{
return (encoding & 0xf) != RegisterUtils.PcRegister;
}
return (encoding & (1u << 8)) != 0;
}
private static bool IsLoadStoreMultiple(InstName name)
{
switch (name)
{
case InstName.Ldm:
case InstName.Ldmda:
case InstName.Ldmdb:
case InstName.LdmE:
case InstName.Ldmib:
case InstName.LdmU:
case InstName.Stm:
case InstName.Stmda:
case InstName.Stmdb:
case InstName.Stmib:
case InstName.StmU:
case InstName.Fldmx:
case InstName.Fstmx:
case InstName.Vldm:
case InstName.Vstm:
return true;
}
return false;
}
private static bool IsVLDnVSTn(InstName name)
{
switch (name)
{
case InstName.Vld11:
case InstName.Vld1A:
case InstName.Vld1M:
case InstName.Vld21:
case InstName.Vld2A:
case InstName.Vld2M:
case InstName.Vld31:
case InstName.Vld3A:
case InstName.Vld3M:
case InstName.Vld41:
case InstName.Vld4A:
case InstName.Vld4M:
case InstName.Vst11:
case InstName.Vst1M:
case InstName.Vst21:
case InstName.Vst2M:
case InstName.Vst31:
case InstName.Vst3M:
case InstName.Vst41:
case InstName.Vst4M:
return true;
}
return false;
}
private static bool IsR15RtEncodingSpecial(InstName name, uint encoding)
{
if (name == InstName.Vmrs)
{
return ((encoding >> 16) & 0xf) == 1;
}
return false;
}
private static bool IsRegisterWrite(InstFlags flags, InstFlags testFlag)
{
return flags.HasFlag(testFlag) && !flags.HasFlag(InstFlags.ReadRd);
}
private static bool IsBackwardsBranch(InstName name, uint encoding)
{
if (name == InstName.B)
{
return ImmUtils.ExtractSImm24Times4(encoding) < 0;
}
return false;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,137 @@
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
static class ImmUtils
{
public static uint ExpandImm(uint imm)
{
return BitOperations.RotateRight((byte)imm, (int)(imm >> 8) * 2);
}
public static bool ExpandedImmRotated(uint imm)
{
return (imm >> 8) != 0;
}
public static uint ExpandImm(uint imm8, uint imm3, uint i)
{
uint imm = CombineImmU12(imm8, imm3, i);
if (imm >> 10 == 0)
{
return ((imm >> 8) & 3) switch
{
0 => (byte)imm,
1 => (byte)imm * 0x00010001u,
2 => (byte)imm * 0x01000100u,
3 => (byte)imm * 0x01010101u,
_ => 0,
};
}
else
{
return BitOperations.RotateRight(0x80u | (byte)imm, (int)(imm >> 7));
}
}
public static bool ExpandedImmRotated(uint imm8, uint imm3, uint i)
{
uint imm = CombineImmU12(imm8, imm3, i);
return (imm >> 7) != 0;
}
public static uint CombineImmU5(uint imm2, uint imm3)
{
return imm2 | (imm3 << 2);
}
public static uint CombineImmU5IImm4(uint i, uint imm4)
{
return i | (imm4 << 1);
}
public static uint CombineImmU8(uint imm4l, uint imm4h)
{
return imm4l | (imm4h << 4);
}
public static uint CombineImmU8(uint imm4, uint imm3, uint i)
{
return imm4 | (imm3 << 4) | (i << 7);
}
public static uint CombineImmU12(uint imm8, uint imm3, uint i)
{
return imm8 | (imm3 << 8) | (i << 11);
}
public static uint CombineImmU16(uint imm12, uint imm4)
{
return imm12 | (imm4 << 12);
}
public static uint CombineImmU16(uint imm8, uint imm3, uint i, uint imm4)
{
return imm8 | (imm3 << 8) | (i << 11) | (imm4 << 12);
}
public static int CombineSImm20Times2(uint imm11, uint imm6, uint j1, uint j2, uint s)
{
int imm32 = (int)(imm11 | (imm6 << 11) | (j1 << 17) | (j2 << 18) | (s << 19));
return (imm32 << 13) >> 12;
}
public static int CombineSImm24Times2(uint imm11, uint imm10, uint j1, uint j2, uint s)
{
uint i1 = j1 ^ s ^ 1;
uint i2 = j2 ^ s ^ 1;
int imm32 = (int)(imm11 | (imm10 << 11) | (i2 << 21) | (i1 << 22) | (s << 23));
return (imm32 << 8) >> 7;
}
public static int CombineSImm24Times4(uint imm10L, uint imm10H, uint j1, uint j2, uint s)
{
uint i1 = j1 ^ s ^ 1;
uint i2 = j2 ^ s ^ 1;
int imm32 = (int)(imm10L | (imm10H << 10) | (i2 << 20) | (i1 << 21) | (s << 22));
return (imm32 << 9) >> 7;
}
public static uint CombineRegisterList(uint registerList, uint m)
{
return registerList | (m << 14);
}
public static uint CombineRegisterList(uint registerList, uint m, uint p)
{
return registerList | (m << 14) | (p << 15);
}
public static int ExtractSImm24Times4(uint encoding)
{
return (int)(encoding << 8) >> 6;
}
public static int ExtractT16UImm5Times2(uint encoding)
{
return (int)(encoding >> 18) & 0x3e;
}
public static int ExtractT16SImm8Times2(uint encoding)
{
return (int)(encoding << 24) >> 23;
}
public static int ExtractT16SImm11Times2(uint encoding)
{
return (int)(encoding << 21) >> 20;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,63 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
[Flags]
enum InstFlags
{
None = 0,
Cond = 1 << 0,
Rd = 1 << 1,
RdLo = 1 << 2,
RdHi = 1 << 3,
Rdn = 1 << 4,
Dn = 1 << 5,
Rt = 1 << 6,
Rt2 = 1 << 7,
Rlist = 1 << 8,
Rd16 = 1 << 9,
ReadRd = 1 << 10,
WBack = 1 << 11,
Thumb16 = 1 << 12,
RdnDn = Rdn | Dn,
RdRd16 = Rd | Rd16,
RtRt2 = Rt | Rt2,
RdLoRdHi = RdLo | RdHi,
RdLoHi = Rd | RdHi,
RdRtRead = Rd | RtRead,
RdRtReadRd16 = Rd | RtRead | Rd16,
RdRt2Read = Rd | Rt2 | RtRead,
RdRt2ReadRd16 = Rd | Rt2 | RtRead | Rd16,
RtRd16 = Rt | Rd16,
RtWBack = Rt | WBack,
Rt2WBack = Rt2 | RtWBack,
RtRead = Rt | ReadRd,
RtReadRd16 = Rt | ReadRd | Rd16,
Rt2Read = Rt2 | RtRead,
RtReadWBack = RtRead | WBack,
Rt2ReadWBack = Rt2 | RtReadWBack,
RlistWBack = Rlist | WBack,
RlistRead = Rlist | ReadRd,
RlistReadWBack = Rlist | ReadRd | WBack,
CondRd = Cond | Rd,
CondRdLoHi = Cond | Rd | RdHi,
CondRt = Cond | Rt,
CondRt2 = Cond | Rt | Rt2,
CondRd16 = Cond | Rd | Rd16,
CondWBack = Cond | WBack,
CondRdRtRead = Cond | Rd | RtRead,
CondRdRt2Read = Cond | Rd | Rt2 | RtRead,
CondRtWBack = Cond | RtWBack,
CondRt2WBack = Cond | Rt2 | RtWBack,
CondRtRead = Cond | RtRead,
CondRt2Read = Cond | Rt2 | RtRead,
CondRtReadWBack = Cond | RtReadWBack,
CondRt2ReadWBack = Cond | Rt2 | RtReadWBack,
CondRlist = Cond | Rlist,
CondRlistWBack = Cond | Rlist | WBack,
CondRlistRead = Cond | Rlist | ReadRd,
CondRlistReadWBack = Cond | Rlist | ReadRd | WBack,
}
}

View File

@ -0,0 +1,20 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
readonly struct InstInfo
{
public readonly uint Encoding;
public readonly InstName Name;
public readonly Action<CodeGenContext, uint> EmitFunc;
public readonly InstFlags Flags;
public InstInfo(uint encoding, InstName name, Action<CodeGenContext, uint> emitFunc, InstFlags flags)
{
Encoding = encoding;
Name = name;
EmitFunc = emitFunc;
Flags = flags;
}
}
}

View File

@ -0,0 +1,79 @@
using Ryujinx.Cpu.LightningJit.Table;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
readonly struct InstInfoForTable : IInstInfo
{
public uint Encoding { get; }
public uint EncodingMask { get; }
public InstEncoding[] Constraints { get; }
public InstMeta Meta { get; }
public IsaVersion Version => Meta.Version;
public IsaFeature Feature => Meta.Feature;
public InstInfoForTable(
uint encoding,
uint encodingMask,
InstEncoding[] constraints,
InstName name,
Action<CodeGenContext, uint> emitFunc,
IsaVersion isaVersion,
IsaFeature isaFeature,
InstFlags flags)
{
Encoding = encoding;
EncodingMask = encodingMask;
Constraints = constraints;
Meta = new(name, emitFunc, isaVersion, isaFeature, flags);
}
public InstInfoForTable(
uint encoding,
uint encodingMask,
InstEncoding[] constraints,
InstName name,
Action<CodeGenContext, uint> emitFunc,
IsaVersion isaVersion,
InstFlags flags) : this(encoding, encodingMask, constraints, name, emitFunc, isaVersion, IsaFeature.None, flags)
{
}
public InstInfoForTable(
uint encoding,
uint encodingMask,
InstName name,
Action<CodeGenContext, uint> emitFunc,
IsaVersion isaVersion,
IsaFeature isaFeature,
InstFlags flags) : this(encoding, encodingMask, null, name, emitFunc, isaVersion, isaFeature, flags)
{
}
public InstInfoForTable(
uint encoding,
uint encodingMask,
InstName name,
Action<CodeGenContext, uint> emitFunc,
IsaVersion isaVersion,
InstFlags flags) : this(encoding, encodingMask, null, name, emitFunc, isaVersion, IsaFeature.None, flags)
{
}
public bool IsConstrained(uint encoding)
{
if (Constraints != null)
{
foreach (InstEncoding constraint in Constraints)
{
if ((encoding & constraint.EncodingMask) == constraint.Encoding)
{
return true;
}
}
}
return false;
}
}
}

View File

@ -0,0 +1,22 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
readonly struct InstMeta
{
public readonly InstName Name;
public readonly Action<CodeGenContext, uint> EmitFunc;
public readonly IsaVersion Version;
public readonly IsaFeature Feature;
public readonly InstFlags Flags;
public InstMeta(InstName name, Action<CodeGenContext, uint> emitFunc, IsaVersion isaVersion, IsaFeature isaFeature, InstFlags flags)
{
Name = name;
EmitFunc = emitFunc;
Version = isaVersion;
Feature = isaFeature;
Flags = flags;
}
}
}

View File

@ -0,0 +1,562 @@
namespace Ryujinx.Cpu.LightningJit.Arm32
{
enum InstName
{
AdcI,
AdcR,
AdcRr,
AddI,
AddR,
AddRr,
AddSpI,
AddSpR,
Adr,
Aesd,
Aese,
Aesimc,
Aesmc,
AndI,
AndR,
AndRr,
B,
Bfc,
Bfi,
BicI,
BicR,
BicRr,
Bkpt,
BlxR,
BlI,
Bx,
Bxj,
Cbnz,
Clrbhb,
Clrex,
Clz,
CmnI,
CmnR,
CmnRr,
CmpI,
CmpR,
CmpRr,
Cps,
Crc32,
Crc32c,
Csdb,
Dbg,
Dcps1,
Dcps2,
Dcps3,
Dmb,
Dsb,
EorI,
EorR,
EorRr,
Eret,
Esb,
Fldmx,
Fstmx,
Hlt,
Hvc,
Isb,
It,
Lda,
Ldab,
Ldaex,
Ldaexb,
Ldaexd,
Ldaexh,
Ldah,
LdcI,
LdcL,
Ldm,
Ldmda,
Ldmdb,
Ldmib,
LdmE,
LdmU,
Ldrbt,
LdrbI,
LdrbL,
LdrbR,
LdrdI,
LdrdL,
LdrdR,
Ldrex,
Ldrexb,
Ldrexd,
Ldrexh,
Ldrht,
LdrhI,
LdrhL,
LdrhR,
Ldrsbt,
LdrsbI,
LdrsbL,
LdrsbR,
Ldrsht,
LdrshI,
LdrshL,
LdrshR,
Ldrt,
LdrI,
LdrL,
LdrR,
Mcr,
Mcrr,
Mla,
Mls,
Movt,
MovI,
MovR,
MovRr,
Mrc,
Mrrc,
Mrs,
MrsBr,
MsrBr,
MsrI,
MsrR,
Mul,
MvnI,
MvnR,
MvnRr,
Nop,
OrnI,
OrnR,
OrrI,
OrrR,
OrrRr,
Pkh,
PldI,
PldL,
PldR,
PliI,
PliR,
Pop,
Pssbb,
Push,
Qadd,
Qadd16,
Qadd8,
Qasx,
Qdadd,
Qdsub,
Qsax,
Qsub,
Qsub16,
Qsub8,
Rbit,
Rev,
Rev16,
Revsh,
Rfe,
RsbI,
RsbR,
RsbRr,
RscI,
RscR,
RscRr,
Sadd16,
Sadd8,
Sasx,
Sb,
SbcI,
SbcR,
SbcRr,
Sbfx,
Sdiv,
Sel,
Setend,
Setpan,
Sev,
Sevl,
Sha1c,
Sha1h,
Sha1m,
Sha1p,
Sha1su0,
Sha1su1,
Sha256h,
Sha256h2,
Sha256su0,
Sha256su1,
Shadd16,
Shadd8,
Shasx,
Shsax,
Shsub16,
Shsub8,
Smc,
Smlabb,
Smlad,
Smlal,
Smlalbb,
Smlald,
Smlawb,
Smlsd,
Smlsld,
Smmla,
Smmls,
Smmul,
Smuad,
Smulbb,
Smull,
Smulwb,
Smusd,
Srs,
Ssat,
Ssat16,
Ssax,
Ssbb,
Ssub16,
Ssub8,
Stc,
Stl,
Stlb,
Stlex,
Stlexb,
Stlexd,
Stlexh,
Stlh,
Stm,
Stmda,
Stmdb,
Stmib,
StmU,
Strbt,
StrbI,
StrbR,
StrdI,
StrdR,
Strex,
Strexb,
Strexd,
Strexh,
Strht,
StrhI,
StrhR,
Strt,
StrI,
StrR,
SubI,
SubR,
SubRr,
SubSpI,
SubSpR,
Svc,
Sxtab,
Sxtab16,
Sxtah,
Sxtb,
Sxtb16,
Sxth,
Tbb,
TeqI,
TeqR,
TeqRr,
Tsb,
TstI,
TstR,
TstRr,
Uadd16,
Uadd8,
Uasx,
Ubfx,
Udf,
Udiv,
Uhadd16,
Uhadd8,
Uhasx,
Uhsax,
Uhsub16,
Uhsub8,
Umaal,
Umlal,
Umull,
Uqadd16,
Uqadd8,
Uqasx,
Uqsax,
Uqsub16,
Uqsub8,
Usad8,
Usada8,
Usat,
Usat16,
Usax,
Usub16,
Usub8,
Uxtab,
Uxtab16,
Uxtah,
Uxtb,
Uxtb16,
Uxth,
Vaba,
Vabal,
VabdlI,
VabdF,
VabdI,
Vabs,
Vacge,
Vacgt,
Vaddhn,
Vaddl,
Vaddw,
VaddF,
VaddI,
VandR,
VbicI,
VbicR,
Vbif,
Vbit,
Vbsl,
Vcadd,
VceqI,
VceqR,
VcgeI,
VcgeR,
VcgtI,
VcgtR,
VcleI,
Vcls,
VcltI,
Vclz,
Vcmla,
VcmlaS,
Vcmp,
Vcmpe,
Vcnt,
VcvtaAsimd,
VcvtaVfp,
Vcvtb,
VcvtbBfs,
VcvtmAsimd,
VcvtmVfp,
VcvtnAsimd,
VcvtnVfp,
VcvtpAsimd,
VcvtpVfp,
VcvtrIv,
Vcvtt,
VcvttBfs,
VcvtBfs,
VcvtDs,
VcvtHs,
VcvtIs,
VcvtIv,
VcvtVi,
VcvtXs,
VcvtXv,
Vdiv,
Vdot,
VdotS,
VdupR,
VdupS,
Veor,
Vext,
Vfma,
Vfmal,
VfmalS,
VfmaBf,
VfmaBfs,
Vfms,
Vfmsl,
VfmslS,
Vfnma,
Vfnms,
Vhadd,
Vhsub,
Vins,
Vjcvt,
Vld11,
Vld1A,
Vld1M,
Vld21,
Vld2A,
Vld2M,
Vld31,
Vld3A,
Vld3M,
Vld41,
Vld4A,
Vld4M,
Vldm,
VldrI,
VldrL,
Vmaxnm,
VmaxF,
VmaxI,
Vminnm,
VminF,
VminI,
VmlalI,
VmlalS,
VmlaF,
VmlaI,
VmlaS,
VmlslI,
VmlslS,
VmlsF,
VmlsI,
VmlsS,
Vmmla,
Vmovl,
Vmovn,
Vmovx,
VmovD,
VmovH,
VmovI,
VmovR,
VmovRs,
VmovS,
VmovSr,
VmovSs,
Vmrs,
Vmsr,
VmullI,
VmullS,
VmulF,
VmulI,
VmulS,
VmvnI,
VmvnR,
Vneg,
Vnmla,
Vnmls,
Vnmul,
VornR,
VorrI,
VorrR,
Vpadal,
Vpaddl,
VpaddF,
VpaddI,
VpmaxF,
VpmaxI,
VpminF,
VpminI,
Vqabs,
Vqadd,
Vqdmlal,
Vqdmlsl,
Vqdmulh,
Vqdmull,
Vqmovn,
Vqneg,
Vqrdmlah,
Vqrdmlsh,
Vqrdmulh,
Vqrshl,
Vqrshrn,
VqshlI,
VqshlR,
Vqshrn,
Vqsub,
Vraddhn,
Vrecpe,
Vrecps,
Vrev16,
Vrev32,
Vrev64,
Vrhadd,
VrintaAsimd,
VrintaVfp,
VrintmAsimd,
VrintmVfp,
VrintnAsimd,
VrintnVfp,
VrintpAsimd,
VrintpVfp,
VrintrVfp,
VrintxAsimd,
VrintxVfp,
VrintzAsimd,
VrintzVfp,
Vrshl,
Vrshr,
Vrshrn,
Vrsqrte,
Vrsqrts,
Vrsra,
Vrsubhn,
Vsdot,
VsdotS,
Vsel,
Vshll,
VshlI,
VshlR,
Vshr,
Vshrn,
Vsli,
Vsmmla,
Vsqrt,
Vsra,
Vsri,
Vst11,
Vst1M,
Vst21,
Vst2M,
Vst31,
Vst3M,
Vst41,
Vst4M,
Vstm,
Vstr,
Vsubhn,
Vsubl,
Vsubw,
VsubF,
VsubI,
VsudotS,
Vswp,
Vtbl,
Vtrn,
Vtst,
Vudot,
VudotS,
Vummla,
Vusdot,
VusdotS,
Vusmmla,
Vuzp,
Vzip,
Wfe,
Wfi,
Yield,
}
static class InstNameExtensions
{
public static bool IsCall(this InstName name)
{
return name == InstName.BlI || name == InstName.BlxR;
}
public static bool IsSystem(this InstName name)
{
switch (name)
{
case InstName.Mcr:
case InstName.Mcrr:
case InstName.Mrc:
case InstName.Mrs:
case InstName.MrsBr:
case InstName.MsrBr:
case InstName.MsrI:
case InstName.MsrR:
case InstName.Mrrc:
case InstName.Svc:
return true;
}
return false;
}
public static bool IsSystemOrCall(this InstName name)
{
return name.IsSystem() || name.IsCall();
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,146 @@
using Ryujinx.Cpu.LightningJit.Table;
using System.Collections.Generic;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
static class InstTableT16<T> where T : IInstEmit
{
private static readonly InstTableLevel<InstInfoForTable> _table;
static InstTableT16()
{
InstEncoding[] rmRdndnConstraints = new InstEncoding[]
{
new(0x00680000, 0x00780000),
new(0x00850000, 0x00870000),
};
InstEncoding[] rmConstraints = new InstEncoding[]
{
new(0x00680000, 0x00780000),
};
InstEncoding[] condCondConstraints = new InstEncoding[]
{
new(0x0E000000, 0x0F000000),
new(0x0F000000, 0x0F000000),
};
InstEncoding[] maskConstraints = new InstEncoding[]
{
new(0x00000000, 0x000F0000),
};
InstEncoding[] opConstraints = new InstEncoding[]
{
new(0x18000000, 0x18000000),
};
InstEncoding[] opOpOpOpConstraints = new InstEncoding[]
{
new(0x00000000, 0x03C00000),
new(0x00400000, 0x03C00000),
new(0x01400000, 0x03C00000),
new(0x01800000, 0x03C00000),
};
List<InstInfoForTable> insts = new()
{
new(0x41400000, 0xFFC00000, InstName.AdcR, T.AdcRT1, IsaVersion.v80, InstFlags.Rdn),
new(0x1C000000, 0xFE000000, InstName.AddI, T.AddIT1, IsaVersion.v80, InstFlags.Rd),
new(0x30000000, 0xF8000000, InstName.AddI, T.AddIT2, IsaVersion.v80, InstFlags.Rdn),
new(0x18000000, 0xFE000000, InstName.AddR, T.AddRT1, IsaVersion.v80, InstFlags.Rd),
new(0x44000000, 0xFF000000, rmRdndnConstraints, InstName.AddR, T.AddRT2, IsaVersion.v80, InstFlags.RdnDn),
new(0xA8000000, 0xF8000000, InstName.AddSpI, T.AddSpIT1, IsaVersion.v80, InstFlags.RdRd16),
new(0xB0000000, 0xFF800000, InstName.AddSpI, T.AddSpIT2, IsaVersion.v80, InstFlags.None),
new(0x44680000, 0xFF780000, InstName.AddSpR, T.AddSpRT1, IsaVersion.v80, InstFlags.None),
new(0x44850000, 0xFF870000, rmConstraints, InstName.AddSpR, T.AddSpRT2, IsaVersion.v80, InstFlags.None),
new(0xA0000000, 0xF8000000, InstName.Adr, T.AdrT1, IsaVersion.v80, InstFlags.RdRd16),
new(0x40000000, 0xFFC00000, InstName.AndR, T.AndRT1, IsaVersion.v80, InstFlags.Rdn),
new(0xD0000000, 0xF0000000, condCondConstraints, InstName.B, T.BT1, IsaVersion.v80, InstFlags.Cond),
new(0xE0000000, 0xF8000000, InstName.B, T.BT2, IsaVersion.v80, InstFlags.None),
new(0x43800000, 0xFFC00000, InstName.BicR, T.BicRT1, IsaVersion.v80, InstFlags.Rdn),
new(0xBE000000, 0xFF000000, InstName.Bkpt, T.BkptT1, IsaVersion.v80, InstFlags.None),
new(0x47800000, 0xFF870000, InstName.BlxR, T.BlxRT1, IsaVersion.v80, InstFlags.None),
new(0x47000000, 0xFF870000, InstName.Bx, T.BxT1, IsaVersion.v80, InstFlags.None),
new(0xB1000000, 0xF5000000, InstName.Cbnz, T.CbnzT1, IsaVersion.v80, InstFlags.None),
new(0x42C00000, 0xFFC00000, InstName.CmnR, T.CmnRT1, IsaVersion.v80, InstFlags.None),
new(0x28000000, 0xF8000000, InstName.CmpI, T.CmpIT1, IsaVersion.v80, InstFlags.None),
new(0x42800000, 0xFFC00000, InstName.CmpR, T.CmpRT1, IsaVersion.v80, InstFlags.None),
new(0x45000000, 0xFF000000, InstName.CmpR, T.CmpRT2, IsaVersion.v80, InstFlags.None),
new(0xB6600000, 0xFFE80000, InstName.Cps, T.CpsT1, IsaVersion.v80, InstFlags.None),
new(0x40400000, 0xFFC00000, InstName.EorR, T.EorRT1, IsaVersion.v80, InstFlags.Rdn),
new(0xBA800000, 0xFFC00000, InstName.Hlt, T.HltT1, IsaVersion.v80, InstFlags.None),
new(0xBF000000, 0xFF000000, maskConstraints, InstName.It, T.ItT1, IsaVersion.v80, InstFlags.None),
new(0xC8000000, 0xF8000000, InstName.Ldm, T.LdmT1, IsaVersion.v80, InstFlags.Rlist),
new(0x78000000, 0xF8000000, InstName.LdrbI, T.LdrbIT1, IsaVersion.v80, InstFlags.Rt),
new(0x5C000000, 0xFE000000, InstName.LdrbR, T.LdrbRT1, IsaVersion.v80, InstFlags.Rt),
new(0x88000000, 0xF8000000, InstName.LdrhI, T.LdrhIT1, IsaVersion.v80, InstFlags.Rt),
new(0x5A000000, 0xFE000000, InstName.LdrhR, T.LdrhRT1, IsaVersion.v80, InstFlags.Rt),
new(0x56000000, 0xFE000000, InstName.LdrsbR, T.LdrsbRT1, IsaVersion.v80, InstFlags.Rt),
new(0x5E000000, 0xFE000000, InstName.LdrshR, T.LdrshRT1, IsaVersion.v80, InstFlags.Rt),
new(0x68000000, 0xF8000000, InstName.LdrI, T.LdrIT1, IsaVersion.v80, InstFlags.Rt),
new(0x98000000, 0xF8000000, InstName.LdrI, T.LdrIT2, IsaVersion.v80, InstFlags.RtRd16),
new(0x48000000, 0xF8000000, InstName.LdrL, T.LdrLT1, IsaVersion.v80, InstFlags.RtRd16),
new(0x58000000, 0xFE000000, InstName.LdrR, T.LdrRT1, IsaVersion.v80, InstFlags.Rt),
new(0x20000000, 0xF8000000, InstName.MovI, T.MovIT1, IsaVersion.v80, InstFlags.RdRd16),
new(0x46000000, 0xFF000000, InstName.MovR, T.MovRT1, IsaVersion.v80, InstFlags.Rd),
new(0x00000000, 0xE0000000, opConstraints, InstName.MovR, T.MovRT2, IsaVersion.v80, InstFlags.Rd),
new(0x40000000, 0xFE000000, opOpOpOpConstraints, InstName.MovRr, T.MovRrT1, IsaVersion.v80, InstFlags.None),
new(0x43400000, 0xFFC00000, InstName.Mul, T.MulT1, IsaVersion.v80, InstFlags.None),
new(0x43C00000, 0xFFC00000, InstName.MvnR, T.MvnRT1, IsaVersion.v80, InstFlags.Rd),
new(0xBF000000, 0xFFFF0000, InstName.Nop, T.NopT1, IsaVersion.v80, InstFlags.None),
new(0x43000000, 0xFFC00000, InstName.OrrR, T.OrrRT1, IsaVersion.v80, InstFlags.Rdn),
new(0xBC000000, 0xFE000000, InstName.Pop, T.PopT1, IsaVersion.v80, InstFlags.Rlist),
new(0xB4000000, 0xFE000000, InstName.Push, T.PushT1, IsaVersion.v80, InstFlags.RlistRead),
new(0xBA000000, 0xFFC00000, InstName.Rev, T.RevT1, IsaVersion.v80, InstFlags.Rd),
new(0xBA400000, 0xFFC00000, InstName.Rev16, T.Rev16T1, IsaVersion.v80, InstFlags.Rd),
new(0xBAC00000, 0xFFC00000, InstName.Revsh, T.RevshT1, IsaVersion.v80, InstFlags.Rd),
new(0x42400000, 0xFFC00000, InstName.RsbI, T.RsbIT1, IsaVersion.v80, InstFlags.Rd),
new(0x41800000, 0xFFC00000, InstName.SbcR, T.SbcRT1, IsaVersion.v80, InstFlags.Rdn),
new(0xB6500000, 0xFFF70000, InstName.Setend, T.SetendT1, IsaVersion.v80, InstFlags.None),
new(0xB6100000, 0xFFF70000, InstName.Setpan, T.SetpanT1, IsaVersion.v81, IsaFeature.FeatPan, InstFlags.None),
new(0xBF400000, 0xFFFF0000, InstName.Sev, T.SevT1, IsaVersion.v80, InstFlags.None),
new(0xBF500000, 0xFFFF0000, InstName.Sevl, T.SevlT1, IsaVersion.v80, InstFlags.None),
new(0xC0000000, 0xF8000000, InstName.Stm, T.StmT1, IsaVersion.v80, InstFlags.RlistRead),
new(0x70000000, 0xF8000000, InstName.StrbI, T.StrbIT1, IsaVersion.v80, InstFlags.RtRead),
new(0x54000000, 0xFE000000, InstName.StrbR, T.StrbRT1, IsaVersion.v80, InstFlags.RtRead),
new(0x80000000, 0xF8000000, InstName.StrhI, T.StrhIT1, IsaVersion.v80, InstFlags.RtRead),
new(0x52000000, 0xFE000000, InstName.StrhR, T.StrhRT1, IsaVersion.v80, InstFlags.RtRead),
new(0x60000000, 0xF8000000, InstName.StrI, T.StrIT1, IsaVersion.v80, InstFlags.RtRead),
new(0x90000000, 0xF8000000, InstName.StrI, T.StrIT2, IsaVersion.v80, InstFlags.RtReadRd16),
new(0x50000000, 0xFE000000, InstName.StrR, T.StrRT1, IsaVersion.v80, InstFlags.RtRead),
new(0x1E000000, 0xFE000000, InstName.SubI, T.SubIT1, IsaVersion.v80, InstFlags.Rd),
new(0x38000000, 0xF8000000, InstName.SubI, T.SubIT2, IsaVersion.v80, InstFlags.Rdn),
new(0x1A000000, 0xFE000000, InstName.SubR, T.SubRT1, IsaVersion.v80, InstFlags.Rd),
new(0xB0800000, 0xFF800000, InstName.SubSpI, T.SubSpIT1, IsaVersion.v80, InstFlags.None),
new(0xDF000000, 0xFF000000, InstName.Svc, T.SvcT1, IsaVersion.v80, InstFlags.None),
new(0xB2400000, 0xFFC00000, InstName.Sxtb, T.SxtbT1, IsaVersion.v80, InstFlags.Rd),
new(0xB2000000, 0xFFC00000, InstName.Sxth, T.SxthT1, IsaVersion.v80, InstFlags.Rd),
new(0x42000000, 0xFFC00000, InstName.TstR, T.TstRT1, IsaVersion.v80, InstFlags.None),
new(0xDE000000, 0xFF000000, InstName.Udf, T.UdfT1, IsaVersion.v80, InstFlags.None),
new(0xB2C00000, 0xFFC00000, InstName.Uxtb, T.UxtbT1, IsaVersion.v80, InstFlags.Rd),
new(0xB2800000, 0xFFC00000, InstName.Uxth, T.UxthT1, IsaVersion.v80, InstFlags.Rd),
new(0xBF200000, 0xFFFF0000, InstName.Wfe, T.WfeT1, IsaVersion.v80, InstFlags.None),
new(0xBF300000, 0xFFFF0000, InstName.Wfi, T.WfiT1, IsaVersion.v80, InstFlags.None),
new(0xBF100000, 0xFFFF0000, InstName.Yield, T.YieldT1, IsaVersion.v80, InstFlags.None),
};
_table = new(insts);
}
public static bool TryGetMeta(uint encoding, IsaVersion version, IsaFeature features, out InstMeta meta)
{
if (_table.TryFind(encoding, version, features, out InstInfoForTable info))
{
meta = info.Meta;
return true;
}
meta = new(InstName.Udf, T.UdfA1, IsaVersion.v80, IsaFeature.None, InstFlags.None);
return false;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
using System.Collections.Generic;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
class MultiBlock
{
public readonly List<Block> Blocks;
public readonly bool HasHostCall;
public readonly bool IsTruncated;
public MultiBlock(List<Block> blocks)
{
Blocks = blocks;
Block block = blocks[0];
HasHostCall = block.HasHostCall;
for (int index = 1; index < blocks.Count; index++)
{
block = blocks[index];
HasHostCall |= block.HasHostCall;
}
block = blocks[^1];
IsTruncated = block.IsTruncated;
}
}
}

View File

@ -0,0 +1,20 @@
namespace Ryujinx.Cpu.LightningJit.Arm32
{
readonly struct PendingBranch
{
public readonly BranchType BranchType;
public readonly uint TargetAddress;
public readonly uint NextAddress;
public readonly InstName Name;
public readonly int WriterPointer;
public PendingBranch(BranchType branchType, uint targetAddress, uint nextAddress, InstName name, int writerPointer)
{
BranchType = branchType;
TargetAddress = targetAddress;
NextAddress = nextAddress;
Name = name;
WriterPointer = writerPointer;
}
}
}

View File

@ -0,0 +1,169 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
class RegisterAllocator
{
public const int MaxTemps = 1;
private uint _gprMask;
private uint _fpSimdMask;
public int FixedContextRegister { get; }
public int FixedPageTableRegister { get; }
public uint UsedGprsMask { get; private set; }
public uint UsedFpSimdMask { get; private set; }
public RegisterAllocator()
{
_gprMask = ushort.MaxValue;
_fpSimdMask = ushort.MaxValue;
FixedContextRegister = AllocateTempRegisterWithPreferencing();
FixedPageTableRegister = AllocateTempRegisterWithPreferencing();
}
public void MarkGprAsUsed(int index)
{
UsedGprsMask |= 1u << index;
}
public void MarkFpSimdAsUsed(int index)
{
UsedFpSimdMask |= 1u << index;
}
public void MarkFpSimdRangeAsUsed(int index, int count)
{
UsedFpSimdMask |= (uint.MaxValue >> (32 - count)) << index;
}
public Operand RemapGprRegister(int index)
{
MarkGprAsUsed(index);
return new Operand(OperandKind.Register, OperandType.I32, (ulong)index);
}
public Operand RemapFpRegister(int index, bool isFP32)
{
MarkFpSimdAsUsed(index);
return new Operand(OperandKind.Register, isFP32 ? OperandType.FP32 : OperandType.FP64, (ulong)index);
}
public Operand RemapSimdRegister(int index)
{
MarkFpSimdAsUsed(index);
return new Operand(OperandKind.Register, OperandType.V128, (ulong)index);
}
public Operand RemapSimdRegister(int index, int count)
{
MarkFpSimdRangeAsUsed(index, count);
return new Operand(OperandKind.Register, OperandType.V128, (ulong)index);
}
public void EnsureTempGprRegisters(int count)
{
if (count != 0)
{
Span<int> registers = stackalloc int[count];
for (int index = 0; index < count; index++)
{
registers[index] = AllocateTempGprRegister();
}
for (int index = 0; index < count; index++)
{
FreeTempGprRegister(registers[index]);
}
}
}
public int AllocateTempGprRegister()
{
int index = AllocateTempRegister(ref _gprMask, AbiConstants.ReservedRegsMask);
MarkGprAsUsed(index);
return index;
}
private int AllocateTempRegisterWithPreferencing()
{
int firstCalleeSaved = BitOperations.TrailingZeroCount(~_gprMask & AbiConstants.GprCalleeSavedRegsMask);
if (firstCalleeSaved < 32)
{
uint regMask = 1u << firstCalleeSaved;
if ((regMask & AbiConstants.ReservedRegsMask) == 0)
{
_gprMask |= regMask;
return firstCalleeSaved;
}
}
return AllocateTempRegister(ref _gprMask, AbiConstants.ReservedRegsMask);
}
public int AllocateTempFpSimdRegister()
{
int index = AllocateTempRegister(ref _fpSimdMask, 0);
MarkFpSimdAsUsed(index);
return index;
}
public ScopedRegister AllocateTempGprRegisterScoped()
{
return new(this, new(OperandKind.Register, OperandType.I32, (ulong)AllocateTempGprRegister()));
}
public ScopedRegister AllocateTempFpRegisterScoped(bool isFP32)
{
return new(this, new(OperandKind.Register, isFP32 ? OperandType.FP32 : OperandType.FP64, (ulong)AllocateTempFpSimdRegister()));
}
public ScopedRegister AllocateTempSimdRegisterScoped()
{
return new(this, new(OperandKind.Register, OperandType.V128, (ulong)AllocateTempFpSimdRegister()));
}
public void FreeTempGprRegister(int index)
{
FreeTempRegister(ref _gprMask, index);
}
public void FreeTempFpSimdRegister(int index)
{
FreeTempRegister(ref _fpSimdMask, index);
}
private static int AllocateTempRegister(ref uint mask, uint reservedMask)
{
int index = BitOperations.TrailingZeroCount(~(mask | reservedMask));
if (index == sizeof(uint) * 8)
{
throw new InvalidOperationException("No free registers.");
}
mask |= 1u << index;
return index;
}
private static void FreeTempRegister(ref uint mask, int index)
{
mask &= ~(1u << index);
}
}
}

View File

@ -0,0 +1,109 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
static class RegisterUtils
{
public const int SpRegister = 13;
public const int LrRegister = 14;
public const int PcRegister = 15;
private const int RmBit = 0;
private const int RdRtBit = 12;
private const int RdHiRnBit = 16;
private const int RdRtT16Bit = 16;
private const int RdRtT16AltBit = 24;
private const int RdRt2RdHiT32Bit = 8;
private const int RdT32AltBit = 0;
private const int RtRdLoT32Bit = 12;
public static int ExtractRt(uint encoding)
{
return (int)(encoding >> RdRtBit) & 0xf;
}
public static int ExtractRt2(uint encoding)
{
return (int)GetRt2((uint)ExtractRt(encoding));
}
public static int ExtractRd(InstFlags flags, uint encoding)
{
return flags.HasFlag(InstFlags.Rd16) ? ExtractRn(encoding) : ExtractRd(encoding);
}
public static int ExtractRd(uint encoding)
{
return (int)(encoding >> RdRtBit) & 0xf;
}
public static int ExtractRdHi(uint encoding)
{
return (int)(encoding >> RdHiRnBit) & 0xf;
}
public static int ExtractRn(uint encoding)
{
return (int)(encoding >> RdHiRnBit) & 0xf;
}
public static int ExtractRm(uint encoding)
{
return (int)(encoding >> RmBit) & 0xf;
}
public static uint GetRt2(uint rt)
{
return Math.Min(rt + 1, PcRegister);
}
public static int ExtractRdn(InstFlags flags, uint encoding)
{
if (flags.HasFlag(InstFlags.Dn))
{
return ((int)(encoding >> RdRtT16Bit) & 7) | (int)((encoding >> 4) & 8);
}
else
{
return ExtractRdT16(flags, encoding);
}
}
public static int ExtractRdT16(InstFlags flags, uint encoding)
{
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdRtT16AltBit) & 7 : (int)(encoding >> RdRtT16Bit) & 7;
}
public static int ExtractRtT16(InstFlags flags, uint encoding)
{
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdRtT16AltBit) & 7 : (int)(encoding >> RdRtT16Bit) & 7;
}
public static int ExtractRdT32(InstFlags flags, uint encoding)
{
return flags.HasFlag(InstFlags.Rd16) ? (int)(encoding >> RdT32AltBit) & 0xf : (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
}
public static int ExtractRdLoT32(uint encoding)
{
return (int)(encoding >> RtRdLoT32Bit) & 0xf;
}
public static int ExtractRdHiT32(uint encoding)
{
return (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
}
public static int ExtractRtT32(uint encoding)
{
return (int)(encoding >> RtRdLoT32Bit) & 0xf;
}
public static int ExtractRt2T32(uint encoding)
{
return (int)(encoding >> RdRt2RdHiT32Bit) & 0xf;
}
}
}

View File

@ -0,0 +1,39 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32
{
readonly struct ScopedRegister : IDisposable
{
private readonly RegisterAllocator _registerAllocator;
private readonly Operand _operand;
private readonly bool _isAllocated;
public readonly Operand Operand => _operand;
public readonly bool IsAllocated => _isAllocated;
public ScopedRegister(RegisterAllocator registerAllocator, Operand operand, bool isAllocated = true)
{
_registerAllocator = registerAllocator;
_operand = operand;
_isAllocated = isAllocated;
}
public readonly void Dispose()
{
if (!_isAllocated)
{
return;
}
if (_operand.Type.IsInteger())
{
_registerAllocator.FreeTempGprRegister(_operand.AsInt32());
}
else
{
_registerAllocator.FreeTempFpSimdRegister(_operand.AsInt32());
}
}
}
}

View File

@ -0,0 +1,789 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class Compiler
{
public const uint UsableGprsMask = 0x7fff;
public const uint UsableFpSimdMask = 0xffff;
public const uint UsablePStateMask = 0xf0000000;
private const int Encodable26BitsOffsetLimit = 0x2000000;
private readonly struct Context
{
public readonly CodeWriter Writer;
public readonly RegisterAllocator RegisterAllocator;
public readonly MemoryManagerType MemoryManagerType;
public readonly TailMerger TailMerger;
public readonly AddressTable<ulong> FuncTable;
public readonly IntPtr DispatchStubPointer;
private readonly RegisterSaveRestore _registerSaveRestore;
private readonly IntPtr _pageTablePointer;
public Context(
CodeWriter writer,
RegisterAllocator registerAllocator,
MemoryManagerType mmType,
TailMerger tailMerger,
AddressTable<ulong> funcTable,
RegisterSaveRestore registerSaveRestore,
IntPtr dispatchStubPointer,
IntPtr pageTablePointer)
{
Writer = writer;
RegisterAllocator = registerAllocator;
MemoryManagerType = mmType;
TailMerger = tailMerger;
FuncTable = funcTable;
_registerSaveRestore = registerSaveRestore;
DispatchStubPointer = dispatchStubPointer;
_pageTablePointer = pageTablePointer;
}
public readonly int GetReservedStackOffset()
{
return _registerSaveRestore.GetReservedStackOffset();
}
public readonly void WritePrologueAt(int instructionPointer)
{
CodeWriter writer = new();
Assembler asm = new(writer);
_registerSaveRestore.WritePrologue(ref asm);
// If needed, set up the fixed registers with the pointers we will use.
// First one is the context pointer (passed as first argument),
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
if (RegisterAllocator.FixedContextRegister != 0)
{
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
}
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
LoadFromContext(ref asm);
// Write the prologue at the specified position in our writer.
Writer.WriteInstructionsAt(instructionPointer, writer);
}
public readonly void WriteEpilogueWithoutContext()
{
Assembler asm = new(Writer);
_registerSaveRestore.WriteEpilogue(ref asm);
}
public void LoadFromContext()
{
Assembler asm = new(Writer);
LoadFromContext(ref asm);
}
private void LoadFromContext(ref Assembler asm)
{
LoadGprFromContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
LoadFpSimdFromContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
LoadPStateFromContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
}
public void StoreToContext()
{
Assembler asm = new(Writer);
StoreToContext(ref asm);
}
private void StoreToContext(ref Assembler asm)
{
StoreGprToContext(ref asm, RegisterAllocator.UsedGprsMask & UsableGprsMask, NativeContextOffsets.GprBaseOffset);
StoreFpSimdToContext(ref asm, RegisterAllocator.UsedFpSimdMask & UsableFpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
StorePStateToContext(ref asm, UsablePStateMask, NativeContextOffsets.FlagsBaseOffset);
}
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.LdpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
}
else
{
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg), contextPtr, offset);
}
}
}
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
asm.MsrNzcv(tempRegister.Operand);
}
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.StpRiUn(Register(reg), Register(reg + 1), contextPtr, offset);
}
else
{
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg), contextPtr, offset);
}
}
}
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = RegisterAllocator.AllocateTempGprRegisterScoped();
asm.LdrRiUn(tempRegister.Operand, contextPtr, baseOffset);
asm.MrsNzcv(tempRegister2.Operand);
asm.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(0xfffffff));
asm.Orr(tempRegister.Operand, tempRegister.Operand, tempRegister2.Operand);
asm.StrRiUn(tempRegister.Operand, contextPtr, baseOffset);
}
}
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr, bool isThumb)
{
MultiBlock multiBlock = Decoder<InstEmit>.DecodeMulti(cpuPreset, memoryManager, address, isThumb);
Dictionary<ulong, int> targets = new();
CodeWriter writer = new();
RegisterAllocator regAlloc = new();
Assembler asm = new(writer);
CodeGenContext cgContext = new(writer, asm, regAlloc, memoryManager.Type, isThumb);
ArmCondition lastCondition = ArmCondition.Al;
int lastConditionIp = 0;
// Required for load/store to context.
regAlloc.EnsureTempGprRegisters(2);
ulong pc = address;
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
{
Block block = multiBlock.Blocks[blockIndex];
Debug.Assert(block.Address == pc);
targets.Add(pc, writer.InstructionPointer);
for (int index = 0; index < block.Instructions.Count; index++)
{
InstInfo instInfo = block.Instructions[index];
if (index < block.Instructions.Count - 1)
{
cgContext.SetNextInstruction(block.Instructions[index + 1]);
}
else
{
cgContext.SetNextInstruction(default);
}
SetConditionalStart(cgContext, ref lastCondition, ref lastConditionIp, instInfo.Name, instInfo.Flags, instInfo.Encoding);
if (block.IsLoopEnd && index == block.Instructions.Count - 1)
{
// If this is a loop, the code might run for a long time uninterrupted.
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
cgContext.AddPendingSyncPoint();
asm.B(0);
}
cgContext.SetPc((uint)pc);
instInfo.EmitFunc(cgContext, instInfo.Encoding);
if (cgContext.ConsumeNzcvModified())
{
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
}
cgContext.UpdateItState();
pc += instInfo.Flags.HasFlag(InstFlags.Thumb16) ? 2UL : 4UL;
}
if (Decoder<InstEmit>.WritesToPC(block.Instructions[^1].Encoding, block.Instructions[^1].Name, block.Instructions[^1].Flags, block.IsThumb))
{
// If the block ends with a PC register write, then we have a branch from register.
InstEmitCommon.SetThumbFlag(cgContext, regAlloc.RemapGprRegister(RegisterUtils.PcRegister));
cgContext.AddPendingIndirectBranch(block.Instructions[^1].Name, RegisterUtils.PcRegister);
asm.B(0);
}
ForceConditionalEnd(cgContext, ref lastCondition, lastConditionIp);
}
RegisterSaveRestore rsr = new(
regAlloc.UsedGprsMask & AbiConstants.GprCalleeSavedRegsMask,
regAlloc.UsedFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
OperandType.FP64,
multiBlock.HasHostCall,
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.UsedGprsMask, regAlloc.UsedFpSimdMask, UsablePStateMask) : 0);
TailMerger tailMerger = new();
Context context = new(writer, regAlloc, memoryManager.Type, tailMerger, funcTable, rsr, dispatchStubPtr, memoryManager.PageTablePointer);
InstInfo lastInstruction = multiBlock.Blocks[^1].Instructions[^1];
bool lastInstIsConditional = GetCondition(lastInstruction, isThumb) != ArmCondition.Al;
if (multiBlock.IsTruncated || lastInstIsConditional || lastInstruction.Name.IsCall() || IsConditionalBranch(lastInstruction))
{
WriteTailCallConstant(context, ref asm, (uint)pc);
}
IEnumerable<PendingBranch> pendingBranches = cgContext.GetPendingBranches();
foreach (PendingBranch pendingBranch in pendingBranches)
{
RewriteBranchInstructionWithTarget(context, pendingBranch, targets);
}
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
context.WritePrologueAt(0);
return new(writer.AsByteSpan(), (int)(pc - address));
}
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
{
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
// so if the function is using the full register, that won't be enough.
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
BitOperations.PopCount(fpSimdUseMask) * 16 +
(pStateUseMask != 0 ? 8 : 0);
}
private static void SetConditionalStart(
CodeGenContext context,
ref ArmCondition condition,
ref int instructionPointer,
InstName name,
InstFlags flags,
uint encoding)
{
if (!context.ConsumeItCondition(out ArmCondition currentCond))
{
currentCond = GetCondition(name, flags, encoding, context.IsThumb);
}
if (currentCond != condition)
{
WriteConditionalEnd(context, condition, instructionPointer);
condition = currentCond;
if (currentCond != ArmCondition.Al)
{
instructionPointer = context.CodeWriter.InstructionPointer;
context.Arm64Assembler.B(currentCond.Invert(), 0);
}
}
}
private static bool IsConditionalBranch(in InstInfo instInfo)
{
return instInfo.Name == InstName.B && (ArmCondition)(instInfo.Encoding >> 28) != ArmCondition.Al;
}
private static ArmCondition GetCondition(in InstInfo instInfo, bool isThumb)
{
return GetCondition(instInfo.Name, instInfo.Flags, instInfo.Encoding, isThumb);
}
private static ArmCondition GetCondition(InstName name, InstFlags flags, uint encoding, bool isThumb)
{
// For branch, we handle conditional execution on the instruction itself.
bool hasCond = flags.HasFlag(InstFlags.Cond) && !CanHandleConditionalInstruction(name, encoding, isThumb);
return hasCond ? (ArmCondition)(encoding >> 28) : ArmCondition.Al;
}
private static bool CanHandleConditionalInstruction(InstName name, uint encoding, bool isThumb)
{
if (name == InstName.B)
{
return true;
}
// We can use CSEL for conditional MOV from registers, as long the instruction is not setting flags.
// We don't handle thumb right now because the condition comes from the IT block which would be more complicated to handle.
if (name == InstName.MovR && !isThumb && (encoding & (1u << 20)) == 0)
{
return true;
}
return false;
}
private static void ForceConditionalEnd(CodeGenContext context, ref ArmCondition condition, int instructionPointer)
{
WriteConditionalEnd(context, condition, instructionPointer);
condition = ArmCondition.Al;
}
private static void WriteConditionalEnd(CodeGenContext context, ArmCondition condition, int instructionPointer)
{
if (condition != ArmCondition.Al)
{
int delta = context.CodeWriter.InstructionPointer - instructionPointer;
uint branchInst = context.CodeWriter.ReadInstructionAt(instructionPointer) | (((uint)delta & 0x7ffff) << 5);
Debug.Assert((int)((branchInst & ~0x1fu) << 8) >> 11 == delta * 4);
context.CodeWriter.WriteInstructionAt(instructionPointer, branchInst);
}
}
private static void RewriteBranchInstructionWithTarget(in Context context, in PendingBranch pendingBranch, Dictionary<ulong, int> targets)
{
switch (pendingBranch.BranchType)
{
case BranchType.Branch:
RewriteBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer, targets);
break;
case BranchType.Call:
RewriteCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
case BranchType.IndirectBranch:
RewriteIndirectBranchInstructionWithTarget(context, pendingBranch.Name, pendingBranch.TargetAddress, pendingBranch.WriterPointer);
break;
case BranchType.TableBranchByte:
case BranchType.TableBranchHalfword:
RewriteTableBranchInstructionWithTarget(
context,
pendingBranch.BranchType == BranchType.TableBranchHalfword,
pendingBranch.TargetAddress,
pendingBranch.NextAddress,
pendingBranch.WriterPointer);
break;
case BranchType.IndirectCall:
RewriteIndirectCallInstructionWithTarget(context, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
case BranchType.SyncPoint:
case BranchType.SoftwareInterrupt:
case BranchType.ReadCntpct:
RewriteHostCall(context, pendingBranch.Name, pendingBranch.BranchType, pendingBranch.TargetAddress, pendingBranch.NextAddress, pendingBranch.WriterPointer);
break;
default:
Debug.Fail($"Invalid branch type '{pendingBranch.BranchType}'");
break;
}
}
private static void RewriteBranchInstructionWithTarget(in Context context, InstName name, uint targetAddress, int branchIndex, Dictionary<ulong, int> targets)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int delta;
int targetIndex;
uint encoding = writer.ReadInstructionAt(branchIndex);
if (encoding == 0x14000000)
{
// Unconditional branch.
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
return;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
WriteTailCallConstant(context, ref asm, targetAddress);
}
else
{
// Conditional branch.
uint branchMask = 0x7ffff;
int branchMax = (int)(branchMask + 1) / 2;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
return;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, encoding | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, targetAddress);
}
else
{
// If the branch target is too far away, we use a regular unconditional branch
// instruction instead which has a much higher range.
// We branch directly to the end of the function, where we put the conditional branch,
// and then branch back to the next instruction or return the branch target depending
// on the branch being taken or not.
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
Debug.Assert((int)(branchInst << 6) >> 4 == delta * 4);
writer.WriteInstructionAt(branchIndex, branchInst);
int movedBranchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Placeholder
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
delta = writer.InstructionPointer - movedBranchIndex;
writer.WriteInstructionAt(movedBranchIndex, encoding | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, targetAddress);
}
}
Debug.Assert(name == InstName.B || name == InstName.Cbnz, $"Unknown branch instruction \"{name}\".");
}
private static void RewriteCallInstructionWithTarget(in Context context, uint targetAddress, uint nextAddress, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
nextAddress,
InstEmitCommon.Const((int)targetAddress));
context.LoadFromContext();
// Branch back to the next instruction (after the call).
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void RewriteIndirectBranchInstructionWithTarget(in Context context, InstName name, uint targetRegister, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
context.StoreToContext();
if ((name == InstName.Bx && targetRegister == RegisterUtils.LrRegister) ||
name == InstName.Ldm ||
name == InstName.Ldmda ||
name == InstName.Ldmdb ||
name == InstName.Ldmib)
{
// Arm32 does not have a return instruction, instead returns are implemented
// either using BX LR (for leaf functions), or POP { ... PC }.
asm.Mov(Register(0), target.Operand);
context.TailMerger.AddUnconditionalReturn(writer, asm);
}
else
{
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
target.Operand,
isTail: true);
}
}
private static void RewriteTableBranchInstructionWithTarget(in Context context, bool halfword, uint rn, uint rm, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.Add(
target.Operand,
context.RegisterAllocator.RemapGprRegister((int)rn),
context.RegisterAllocator.RemapGprRegister((int)rm),
ArmShiftType.Lsl,
halfword ? 1 : 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, asm, target.Operand, target.Operand);
if (halfword)
{
asm.LdrhRiUn(target.Operand, target.Operand, 0);
}
else
{
asm.LdrbRiUn(target.Operand, target.Operand, 0);
}
asm.Add(target.Operand, context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), target.Operand, ArmShiftType.Lsl, 1);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
target.Operand,
isTail: true);
}
private static void RewriteIndirectCallInstructionWithTarget(in Context context, uint targetRegister, uint nextAddress, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
WriteBranchToCurrentPosition(context, branchIndex);
using ScopedRegister target = context.RegisterAllocator.AllocateTempGprRegisterScoped();
asm.And(target.Operand, context.RegisterAllocator.RemapGprRegister((int)targetRegister), InstEmitCommon.Const(~1));
asm.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.LrRegister), nextAddress);
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
nextAddress & ~1u,
target.Operand);
context.LoadFromContext();
// Branch back to the next instruction (after the call).
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void RewriteHostCall(in Context context, InstName name, BranchType type, uint imm, uint pc, int branchIndex)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
uint encoding = writer.ReadInstructionAt(branchIndex);
int targetIndex = writer.InstructionPointer;
int delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
switch (type)
{
case BranchType.SyncPoint:
InstEmitSystem.WriteSyncPoint(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset());
break;
case BranchType.SoftwareInterrupt:
context.StoreToContext();
switch (name)
{
case InstName.Bkpt:
InstEmitSystem.WriteBkpt(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
case InstName.Svc:
InstEmitSystem.WriteSvc(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
case InstName.Udf:
InstEmitSystem.WriteUdf(context.Writer, context.RegisterAllocator, context.TailMerger, context.GetReservedStackOffset(), pc, imm);
break;
}
context.LoadFromContext();
break;
case BranchType.ReadCntpct:
InstEmitSystem.WriteReadCntpct(context.Writer, context.RegisterAllocator, context.GetReservedStackOffset(), (int)imm, (int)pc);
break;
default:
Debug.Fail($"Invalid branch type '{type}'");
break;
}
// Branch back to the next instruction.
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
}
private static void WriteBranchToCurrentPosition(in Context context, int branchIndex)
{
CodeWriter writer = context.Writer;
int targetIndex = writer.InstructionPointer;
if (branchIndex + 1 == targetIndex)
{
writer.RemoveLastInstruction();
}
else
{
uint encoding = writer.ReadInstructionAt(branchIndex);
int delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, encoding | (uint)(delta & 0x3ffffff));
}
}
private static void WriteTailCallConstant(in Context context, ref Assembler asm, uint address)
{
context.StoreToContext();
InstEmitFlow.WriteCallWithGuestAddress(
context.Writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0u,
InstEmitCommon.Const((int)address),
isTail: true);
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
public static void PrintStats()
{
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,87 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitAbsDiff
{
public static void Usad8(CodeGenContext context, uint rd, uint rn, uint rm)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
for (int b = 0; b < 4; b++)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
EmitAbs(context, dest);
if (b > 0)
{
if (b < 3)
{
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
}
else
{
context.Arm64Assembler.Add(rdOperand, tempD.Operand, dest);
}
}
}
}
public static void Usada8(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
for (int b = 0; b < 4; b++)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
Operand dest = b == 0 ? tempD.Operand : tempD2.Operand;
context.Arm64Assembler.Sub(dest, tempN.Operand, tempM.Operand);
EmitAbs(context, dest);
if (b > 0)
{
context.Arm64Assembler.Add(tempD.Operand, tempD.Operand, dest);
}
}
context.Arm64Assembler.Add(rdOperand, tempD.Operand, raOperand);
}
private static void EmitAbs(CodeGenContext context, Operand value)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// r = (value + ((int)value >> 31)) ^ ((int)value >> 31).
// Subtracts 1 and then inverts the value if the sign bit is set, same as a conditional negation.
context.Arm64Assembler.Add(tempRegister.Operand, value, value, ArmShiftType.Asr, 31);
context.Arm64Assembler.Eor(value, tempRegister.Operand, value, ArmShiftType.Asr, 31);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitBit
{
public static void Bfc(CodeGenContext context, uint rd, uint lsb, uint msb)
{
// This is documented as "unpredictable".
if (msb < lsb)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
context.Arm64Assembler.Bfc(rdOperand, (int)lsb, (int)(msb - lsb + 1));
}
public static void Bfi(CodeGenContext context, uint rd, uint rn, uint lsb, uint msb)
{
// This is documented as "unpredictable".
if (msb < lsb)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Bfi(rdOperand, rnOperand, (int)lsb, (int)(msb - lsb + 1));
}
public static void Clz(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Clz(rdOperand, rmOperand);
}
public static void Rbit(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rbit(rdOperand, rmOperand);
}
public static void Rev(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev(rdOperand, rmOperand);
}
public static void Rev16(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
}
public static void Revsh(CodeGenContext context, uint rd, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Rev16(rdOperand, rmOperand);
context.Arm64Assembler.Sxth(rdOperand, rdOperand);
}
public static void Sbfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
{
// This is documented as "unpredictable".
if (lsb + widthMinus1 > 31)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Sbfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
}
public static void Ubfx(CodeGenContext context, uint rd, uint rn, uint lsb, uint widthMinus1)
{
// This is documented as "unpredictable".
if (lsb + widthMinus1 > 31)
{
return;
}
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.Arm64Assembler.Ubfx(rdOperand, rnOperand, (int)lsb, (int)widthMinus1 + 1);
}
}
}

View File

@ -0,0 +1,263 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitCommon
{
public static Operand Const(int value)
{
return new(OperandKind.Constant, OperandType.I32, (uint)value);
}
public static Operand GetInputGpr(CodeGenContext context, uint register)
{
Operand operand = context.RegisterAllocator.RemapGprRegister((int)register);
if (register == RegisterUtils.PcRegister)
{
context.Arm64Assembler.Mov(operand, context.Pc);
}
return operand;
}
public static Operand GetOutputGpr(CodeGenContext context, uint register)
{
return context.RegisterAllocator.RemapGprRegister((int)register);
}
public static void GetCurrentFlags(CodeGenContext context, Operand flagsOut)
{
context.Arm64Assembler.MrsNzcv(flagsOut);
context.Arm64Assembler.Lsr(flagsOut, flagsOut, Const(28));
}
public static void RestoreNzcvFlags(CodeGenContext context, Operand nzcvFlags)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsl(tempRegister.Operand, nzcvFlags, Const(28));
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
}
public static void RestoreCvFlags(CodeGenContext context, Operand cvFlags)
{
// Arm64 zeros the carry and overflow flags for logical operations, but Arm32 keeps them unchanged.
// This will restore carry and overflow after a operation has zeroed them.
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
context.Arm64Assembler.Bfi(tempRegister.Operand, cvFlags, 28, 2);
context.Arm64Assembler.MsrNzcv(tempRegister.Operand);
}
public static void SetThumbFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, Const(1 << 5));
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void SetThumbFlag(CodeGenContext context, Operand value)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfi(tempRegister.Operand, value, 5, 1);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void ClearThumbFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfc(tempRegister.Operand, 5, 1);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, Action<Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
elementAction(tempD.Operand, tempN.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Sxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Sxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitSigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: false);
}
public static void EmitUnsigned16BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitUnsigned16BitXPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand, int> elementAction)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
context.Arm64Assembler.Uxth(tempN.Operand, rnOperand);
context.Arm64Assembler.Lsr(tempM.Operand, rmOperand, Const(16));
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 0);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
context.Arm64Assembler.Lsr(tempN.Operand, rnOperand, Const(16));
context.Arm64Assembler.Uxth(tempM.Operand, rmOperand);
elementAction(tempD.Operand, tempN.Operand, tempM.Operand, 1);
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
public static void EmitUnsigned8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction)
{
Emit8BitPair(context, rd, rn, rm, elementAction, unsigned: true);
}
private static void Emit8BitPair(CodeGenContext context, uint rd, uint rn, uint rm, Action<Operand, Operand, Operand> elementAction, bool unsigned)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = GetOutputGpr(context, rd);
Operand rnOperand = GetInputGpr(context, rn);
Operand rmOperand = GetInputGpr(context, rm);
for (int b = 0; b < 4; b++)
{
if (unsigned)
{
context.Arm64Assembler.Ubfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Ubfx(tempM.Operand, rmOperand, b * 8, 8);
}
else
{
context.Arm64Assembler.Sbfx(tempN.Operand, rnOperand, b * 8, 8);
context.Arm64Assembler.Sbfx(tempM.Operand, rmOperand, b * 8, 8);
}
elementAction(tempD.Operand, tempN.Operand, tempM.Operand);
if (b == 0)
{
context.Arm64Assembler.Uxtb(tempD2.Operand, tempD.Operand);
}
else if (b < 3)
{
context.Arm64Assembler.Uxtb(tempD.Operand, tempD.Operand);
context.Arm64Assembler.Orr(tempD2.Operand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, b * 8);
}
else
{
context.Arm64Assembler.Orr(rdOperand, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 24);
}
}
}
public static uint CombineV(uint low4, uint high1, uint size)
{
return size == 3 ? CombineV(low4, high1) : CombineVF(high1, low4);
}
public static uint CombineV(uint low4, uint high1)
{
return low4 | (high1 << 4);
}
public static uint CombineVF(uint low1, uint high4)
{
return low1 | (high4 << 1);
}
}
}

View File

@ -0,0 +1,26 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitCrc32
{
public static void Crc32(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Crc32(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
}
public static void Crc32c(CodeGenContext context, uint rd, uint rn, uint rm, uint sz)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Crc32c(rdOperand, rnOperand, rmOperand, Math.Min(2, sz));
}
}
}

View File

@ -0,0 +1,25 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitDivide
{
public static void Sdiv(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Sdiv(rdOperand, rnOperand, rmOperand);
}
public static void Udiv(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Udiv(rdOperand, rnOperand, rmOperand);
}
}
}

View File

@ -0,0 +1,191 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitExtension
{
public static void Sxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Sxtb, rd, rn, rm, rotate);
}
public static void Sxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: false);
}
public static void Sxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Sxth, rd, rn, rm, rotate);
}
public static void Sxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Sxtb, rd, rm, rotate);
}
public static void Sxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (rotate != 0)
{
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
}
else
{
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
}
// Sign-extend by broadcasting sign bits.
context.Arm64Assembler.And(tempRegister.Operand, rdOperand, InstEmitCommon.Const(0x800080));
context.Arm64Assembler.Lsl(tempRegister2.Operand, tempRegister.Operand, InstEmitCommon.Const(9));
context.Arm64Assembler.Sub(tempRegister.Operand, tempRegister2.Operand, tempRegister.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
}
public static void Sxth(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Sxth, rd, rm, rotate);
}
public static void Uxtab(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Uxtb, rd, rn, rm, rotate);
}
public static void Uxtab16(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitExtendAccumulate8(context, rd, rn, rm, rotate, unsigned: true);
}
public static void Uxtah(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate)
{
EmitRotated(context, ArmExtensionType.Uxth, rd, rn, rm, rotate);
}
public static void Uxtb(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Uxtb, rd, rm, rotate);
}
public static void Uxtb16(CodeGenContext context, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.And(rdOperand, tempRegister.Operand, InstEmitCommon.Const(0xff00ff));
}
else
{
context.Arm64Assembler.And(rdOperand, rmOperand, InstEmitCommon.Const(0xff00ff));
}
}
public static void Uxth(CodeGenContext context, uint rd, uint rm, uint rotate)
{
EmitRotated(context, context.Arm64Assembler.Uxth, rd, rm, rotate);
}
private static void EmitRotated(CodeGenContext context, Action<Operand, Operand> action, uint rd, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
action(rdOperand, tempRegister.Operand);
}
else
{
action(rdOperand, rmOperand);
}
}
private static void EmitRotated(CodeGenContext context, ArmExtensionType extensionType, uint rd, uint rn, uint rm, uint rotate)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
context.Arm64Assembler.Add(rdOperand, rnOperand, tempRegister.Operand, extensionType);
}
else
{
context.Arm64Assembler.Add(rdOperand, rnOperand, rmOperand, extensionType);
}
}
private static void EmitExtendAccumulate8(CodeGenContext context, uint rd, uint rn, uint rm, uint rotate, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (rotate != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)rotate * 8));
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, tempRegister.Operand, unsigned);
}
else
{
EmitExtendAccumulate8Core(context, rdOperand, rnOperand, rmOperand, unsigned);
}
}
private static void EmitExtendAccumulate8Core(CodeGenContext context, Operand rd, Operand rn, Operand rm, bool unsigned)
{
using ScopedRegister tempD = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempD2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (unsigned)
{
context.Arm64Assembler.Uxth(tempN.Operand, rn);
}
else
{
context.Arm64Assembler.Sxth(tempN.Operand, rn);
}
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, rm, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
context.Arm64Assembler.Uxth(tempD2.Operand, tempD.Operand);
if (unsigned)
{
context.Arm64Assembler.Lsr(tempN.Operand, rn, InstEmitCommon.Const(16));
}
else
{
context.Arm64Assembler.Asr(tempN.Operand, rn, InstEmitCommon.Const(16));
}
context.Arm64Assembler.Lsr(tempD.Operand, rm, InstEmitCommon.Const(16));
context.Arm64Assembler.Add(tempD.Operand, tempN.Operand, tempD.Operand, unsigned ? ArmExtensionType.Uxtb : ArmExtensionType.Sxtb);
context.Arm64Assembler.Orr(rd, tempD2.Operand, tempD.Operand, ArmShiftType.Lsl, 16);
}
}
}

View File

@ -0,0 +1,256 @@
using ARMeilleure.Common;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitFlow
{
private const int SpIndex = 31;
public static void B(CodeGenContext context, int imm, ArmCondition condition)
{
context.AddPendingBranch(InstName.B, imm);
if (condition == ArmCondition.Al)
{
context.Arm64Assembler.B(0);
}
else
{
context.Arm64Assembler.B(condition, 0);
}
}
public static void Bl(CodeGenContext context, int imm, bool sourceIsThumb, bool targetIsThumb)
{
uint nextAddress = sourceIsThumb ? context.Pc | 1u : context.Pc - 4;
uint targetAddress = targetIsThumb ? context.Pc + (uint)imm : (context.Pc & ~3u) + (uint)imm;
if (sourceIsThumb != targetIsThumb)
{
if (targetIsThumb)
{
InstEmitCommon.SetThumbFlag(context);
}
else
{
InstEmitCommon.ClearThumbFlag(context);
}
}
context.AddPendingCall(targetAddress, nextAddress);
context.Arm64Assembler.B(0);
}
public static void Blx(CodeGenContext context, uint rm, bool sourceIsThumb)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
InstEmitCommon.SetThumbFlag(context, rmOperand);
uint nextAddress = sourceIsThumb ? (context.Pc - 2) | 1u : context.Pc - 4;
context.AddPendingIndirectCall(rm, nextAddress);
context.Arm64Assembler.B(0);
}
public static void Bx(CodeGenContext context, uint rm)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
InstEmitCommon.SetThumbFlag(context, rmOperand);
context.AddPendingIndirectBranch(InstName.Bx, rm);
context.Arm64Assembler.B(0);
}
public static void Cbnz(CodeGenContext context, uint rn, int imm, bool op)
{
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
context.AddPendingBranch(InstName.Cbnz, imm);
if (op)
{
context.Arm64Assembler.Cbnz(rnOperand, 0);
}
else
{
context.Arm64Assembler.Cbz(rnOperand, 0);
}
}
public static void It(CodeGenContext context, uint firstCond, uint mask)
{
Debug.Assert(mask != 0);
int instCount = 4 - BitOperations.TrailingZeroCount(mask);
Span<ArmCondition> conditions = stackalloc ArmCondition[instCount];
int i = 0;
for (int index = 5 - instCount; index < 4; index++)
{
bool invert = (mask & (1u << index)) != 0;
if (invert)
{
conditions[i++] = ((ArmCondition)firstCond).Invert();
}
else
{
conditions[i++] = (ArmCondition)firstCond;
}
}
conditions[i] = (ArmCondition)firstCond;
context.SetItBlockStart(conditions);
}
public static void Tbb(CodeGenContext context, uint rn, uint rm, bool h)
{
context.Arm64Assembler.Mov(context.RegisterAllocator.RemapGprRegister(RegisterUtils.PcRegister), context.Pc);
context.AddPendingTableBranch(rn, rm, h);
context.Arm64Assembler.B(0);
}
public unsafe static void WriteCallWithGuestAddress(
CodeWriter writer,
ref Assembler asm,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr funcPtr,
int spillBaseOffset,
uint nextAddress,
Operand guestAddress,
bool isTail = false)
{
int tempRegister;
if (guestAddress.Kind == OperandKind.Constant)
{
tempRegister = regAlloc.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), guestAddress.Value);
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
else
{
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
}
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
if (!isTail)
{
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
Operand rn = Register(tempRegister);
if (regAlloc.FixedContextRegister != 0)
{
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
}
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
{
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
}
else
{
asm.Mov(rn, (ulong)funcPtr);
}
if (isTail)
{
writeEpilogue();
asm.Br(rn);
}
else
{
asm.Blr(rn);
asm.Mov(rn, nextAddress);
asm.Cmp(Register(0), rn);
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
}
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
}
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
}
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
{
uint gprMask = regAlloc.UsedGprsMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@ -0,0 +1,265 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitGE
{
public static void Sadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: false);
}
public static void Sadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: false);
}
public static void Sasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: false);
}
public static void Sel(CodeGenContext context, uint rd, uint rn, uint rm)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ExtractGEFlags(context, geFlags.Operand);
// Broadcast compact GE flags (one bit to one byte, 0b1111 -> 0x1010101).
context.Arm64Assembler.Mov(tempRegister.Operand, 0x204081u);
context.Arm64Assembler.Mul(geFlags.Operand, geFlags.Operand, tempRegister.Operand);
context.Arm64Assembler.And(geFlags.Operand, geFlags.Operand, InstEmitCommon.Const(0x1010101));
// Build mask from expanded flags (0x1010101 -> 0xFFFFFFFF).
context.Arm64Assembler.Lsl(tempRegister.Operand, geFlags.Operand, InstEmitCommon.Const(8));
context.Arm64Assembler.Sub(geFlags.Operand, tempRegister.Operand, geFlags.Operand);
// Result = (n & mask) | (m & ~mask).
context.Arm64Assembler.And(tempRegister.Operand, geFlags.Operand, rnOperand);
context.Arm64Assembler.Bic(rdOperand, rmOperand, geFlags.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
}
public static void Ssax(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: false);
}
public static void Ssub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: false);
}
public static void Ssub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: false);
}
public static void Uadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: true, unsigned: true);
}
public static void Uadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: true, unsigned: true);
}
public static void Uasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: true, unsigned: true);
}
public static void Usax(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAsxSax(context, rd, rn, rm, isAsx: false, unsigned: true);
}
public static void Usub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: true, add: false, unsigned: true);
}
public static void Usub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSub(context, rd, rn, rm, is16Bit: false, add: false, unsigned: true);
}
private static void EmitAddSub(CodeGenContext context, uint rd, uint rn, uint rm, bool is16Bit, bool add, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
int e = 0;
void Emit(Operand d, Operand n, Operand m)
{
if (add)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
if (unsigned && add)
{
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(is16Bit ? 16 : 8));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e);
}
}
e += is16Bit ? 2 : 1;
}
if (is16Bit)
{
if (unsigned)
{
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, Emit);
}
// Duplicate bits.
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
}
else
{
if (unsigned)
{
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, Emit);
}
}
UpdateGEFlags(context, geFlags.Operand);
}
private static void EmitAsxSax(CodeGenContext context, uint rd, uint rn, uint rm, bool isAsx, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister geFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
void Emit(Operand d, Operand n, Operand m, int e)
{
bool add = e == (isAsx ? 1 : 0);
if (add)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
if (unsigned && add)
{
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, d, InstEmitCommon.Const(16));
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Lsr(tempRegister.Operand, d, InstEmitCommon.Const(16));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mvn(tempRegister.Operand, d);
if (e == 0)
{
context.Arm64Assembler.Lsr(geFlags.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Lsr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(31));
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, tempRegister.Operand, ArmShiftType.Lsl, e * 2);
}
}
}
if (unsigned)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, Emit);
}
else
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, Emit);
}
// Duplicate bits.
context.Arm64Assembler.Orr(geFlags.Operand, geFlags.Operand, geFlags.Operand, ArmShiftType.Lsl, 1);
UpdateGEFlags(context, geFlags.Operand);
}
public static void UpdateGEFlags(CodeGenContext context, Operand flags)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Bfi(tempRegister.Operand, flags, 16, 4);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void ExtractGEFlags(CodeGenContext context, Operand flags)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
context.Arm64Assembler.LdrRiUn(flags, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Ubfx(flags, flags, 16, 4);
}
}
}

View File

@ -0,0 +1,178 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitHalve
{
public static void Shadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
}
public static void Shadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
}
public static void Shsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: false);
}
public static void Shsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: false);
}
public static void Shasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Shsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
}
public static void Uhadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHadd(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
}
public static void Uhasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
context.Arm64Assembler.Lsr(d, d, InstEmitCommon.Const(1));
});
}
public static void Uhsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7fff7fff, unsigned: true);
}
public static void Uhsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitHsub(context, rd, rn, rm, 0x7f7f7f7f, unsigned: true);
}
private static void EmitHadd(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister res = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1).
// We mask by 0x7F/0x7FFF to remove the LSB so that it doesn't leak into the field below.
context.Arm64Assembler.And(res.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Lsr(rdOperand, carry.Operand, InstEmitCommon.Const(1));
context.Arm64Assembler.And(rdOperand, rdOperand, InstEmitCommon.Const(mask));
context.Arm64Assembler.Add(rdOperand, rdOperand, res.Operand);
if (!unsigned)
{
// Propagates the sign bit from (x^y)>>1 upwards by one.
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
}
}
private static void EmitHsub(CodeGenContext context, uint rd, uint rn, uint rm, int mask, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister carry = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister left = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister right = context.RegisterAllocator.AllocateTempGprRegisterScoped();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
context.Arm64Assembler.Eor(carry.Operand, rmOperand, rnOperand);
context.Arm64Assembler.Lsr(left.Operand, carry.Operand, InstEmitCommon.Const(1));
context.Arm64Assembler.And(right.Operand, carry.Operand, rmOperand);
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 7/15 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
context.Arm64Assembler.Orr(rdOperand, left.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Sub(rdOperand, rdOperand, right.Operand);
context.Arm64Assembler.Eor(rdOperand, rdOperand, InstEmitCommon.Const(~mask));
if (!unsigned)
{
// We then sign extend the result into this bit.
context.Arm64Assembler.And(carry.Operand, carry.Operand, InstEmitCommon.Const(~mask));
context.Arm64Assembler.Eor(rdOperand, rdOperand, carry.Operand);
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,350 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitMove
{
public static void MvnI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
if (immRotated)
{
if ((imm & (1u << 31)) != 0)
{
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(1 << 29));
}
else
{
context.Arm64Assembler.Bfc(flagsRegister.Operand, 29, 1);
}
}
context.Arm64Assembler.Mov(rdOperand, ~imm);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mov(rdOperand, ~imm);
}
}
public static void MvnR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType);
}
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
if (s)
{
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
public static void MvnRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType);
}
context.Arm64Assembler.Mvn(rdOperand, rmOperand);
if (s)
{
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
public static void MovI(CodeGenContext context, uint rd, uint imm, bool immRotated, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
if (immRotated)
{
if ((imm & (1u << 31)) != 0)
{
context.Arm64Assembler.Orr(flagsRegister.Operand, flagsRegister.Operand, InstEmitCommon.Const(2));
}
else
{
context.Arm64Assembler.Bfc(flagsRegister.Operand, 1, 1);
}
}
context.Arm64Assembler.Mov(rdOperand, imm);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mov(rdOperand, imm);
}
}
public static void MovR(CodeGenContext context, uint rd, uint rm, uint sType, uint imm5, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (InstEmitAlu.CanShift(sType, imm5) && !s)
{
if (imm5 != 0)
{
switch ((ArmShiftType)sType)
{
case ArmShiftType.Lsl:
context.Arm64Assembler.Lsl(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Lsr:
context.Arm64Assembler.Lsr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Asr:
context.Arm64Assembler.Asr(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Ror:
context.Arm64Assembler.Ror(rdOperand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
}
}
else
{
context.Arm64Assembler.Mov(rdOperand, rmOperand);
}
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister flagsRegister = default;
if (s)
{
flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, flagsRegister.Operand);
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
}
context.Arm64Assembler.Mov(rdOperand, rmOperand);
if (s)
{
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
flagsRegister.Dispose();
context.SetNzcvModified();
}
}
}
public static void MovR(CodeGenContext context, uint cond, uint rd, uint rm, uint sType, uint imm5, bool s)
{
if (context.ConsumeSkipNextInstruction())
{
return;
}
if ((ArmCondition)cond >= ArmCondition.Al || s)
{
MovR(context, rd, rm, sType, imm5, s);
return;
}
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (InstEmitAlu.CanShift(sType, imm5))
{
if (imm5 != 0)
{
switch ((ArmShiftType)sType)
{
case ArmShiftType.Lsl:
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Lsr:
context.Arm64Assembler.Lsr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Asr:
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
case ArmShiftType.Ror:
context.Arm64Assembler.Ror(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
break;
}
context.Arm64Assembler.Csel(rdOperand, tempRegister.Operand, rdOperand, (ArmCondition)cond);
}
else
{
Operand other = rdOperand;
InstInfo nextInstruction = context.PeekNextInstruction();
if (nextInstruction.Name == InstName.MovR)
{
// If this instruction is followed by another move with the inverse condition,
// we can just put it into the second operand of the CSEL instruction and skip the next move.
InstCondb28w4Sb20w1Rdb12w4Imm5b7w5Stypeb5w2Rmb0w4 nextInst = new(nextInstruction.Encoding);
if (nextInst.Rd == rd &&
nextInst.S == 0 &&
nextInst.Stype == 0 &&
nextInst.Imm5 == 0 &&
nextInst.Cond == (cond ^ 1u) &&
nextInst.Rm != RegisterUtils.PcRegister)
{
other = InstEmitCommon.GetInputGpr(context, nextInst.Rm);
context.SetSkipNextInstruction();
}
}
context.Arm64Assembler.Csel(rdOperand, rmOperand, other, (ArmCondition)cond);
}
}
else
{
rmOperand = InstEmitAlu.GetMShiftedByImmediate(context, tempRegister.Operand, rmOperand, imm5, sType, null);
context.Arm64Assembler.Csel(rdOperand, rmOperand, rdOperand, (ArmCondition)cond);
}
}
public static void MovRr(CodeGenContext context, uint rd, uint rm, uint sType, uint rs, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rsOperand = InstEmitCommon.GetInputGpr(context, rs);
if (!s)
{
InstEmitAlu.GetMShiftedByReg(context, rdOperand, rmOperand, rsOperand, sType);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
rmOperand = InstEmitAlu.GetMShiftedByReg(context, tempRegister.Operand, rmOperand, rsOperand, sType, flagsRegister.Operand);
context.Arm64Assembler.Mov(rdOperand, rmOperand);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
}
public static void Movt(CodeGenContext context, uint rd, uint imm)
{
Operand rdOperand = InstEmitCommon.GetInputGpr(context, rd);
context.Arm64Assembler.Movk(rdOperand, (int)imm, 1);
}
public static void Pkh(CodeGenContext context, uint rd, uint rn, uint rm, bool tb, uint imm5)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (!tb && imm5 == 0)
{
context.Arm64Assembler.Extr(rdOperand, rnOperand, rmOperand, 16);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (tb)
{
context.Arm64Assembler.Asr(tempRegister.Operand, rmOperand, InstEmitCommon.Const(imm5 == 0 ? 31 : (int)imm5));
context.Arm64Assembler.Extr(rdOperand, tempRegister.Operand, rnOperand, 16);
}
else
{
context.Arm64Assembler.Lsl(tempRegister.Operand, rmOperand, InstEmitCommon.Const((int)imm5));
context.Arm64Assembler.Extr(rdOperand, rnOperand, tempRegister.Operand, 16);
}
}
context.Arm64Assembler.Ror(rdOperand, rdOperand, InstEmitCommon.Const(16));
}
}
}

View File

@ -0,0 +1,603 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitMultiply
{
public static void Mla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
context.Arm64Assembler.Madd(rdOperand, rnOperand, rmOperand, raOperand);
}
public static void Mls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
context.Arm64Assembler.Msub(rdOperand, rnOperand, rmOperand, raOperand);
}
public static void Mul(CodeGenContext context, uint rd, uint rn, uint rm, bool s)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
context.Arm64Assembler.Tst(rdOperand, rdOperand);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
context.Arm64Assembler.Mul(rdOperand, rnOperand, rmOperand);
}
}
public static void Smlabb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool nHigh, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Smaddl(tempN.Operand, tempN.Operand, tempM.Operand, tempA64);
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smlad(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
{
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: true);
}
public static void Smlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyAddLong(context, context.Arm64Assembler.Smaddl, rdLo, rdHi, rn, rm, s);
}
public static void Smlalbb(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool nHigh, bool mHigh)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, tempA64);
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
public static void Smlald(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
{
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: true);
}
public static void Smlawb(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Lsl(tempA64, tempA64, InstEmitCommon.Const(16));
context.Arm64Assembler.Smaddl(tempN.Operand, rnOperand, tempM.Operand, tempA64);
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x)
{
EmitSmladSmlsd(context, rd, rn, rm, ra, x, add: false);
}
public static void Smlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x)
{
EmitSmlaldSmlsld(context, rdLo, rdHi, rn, rm, x, add: false);
}
public static void Smmla(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
{
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: true);
}
public static void Smmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r)
{
EmitSmmlaSmmls(context, rd, rn, rm, ra, r, add: false);
}
public static void Smmul(CodeGenContext context, uint rd, uint rn, uint rm, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
context.Arm64Assembler.Smull(rdOperand64, rnOperand, rmOperand);
if (r)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mov(tempRegister.Operand, 0x80000000u);
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempRegister.Operand);
}
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
}
public static void Smuad(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
{
EmitSmuadSmusd(context, rd, rn, rm, x, add: true);
}
public static void Smulbb(CodeGenContext context, uint rd, uint rn, uint rm, bool nHigh, bool mHigh)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
SelectSignedHalfword(context, tempN.Operand, rnOperand, nHigh);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Smull(rdOperand64, tempN.Operand, tempM.Operand);
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
}
public static void Smull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyLong(context, context.Arm64Assembler.Smull, rdLo, rdHi, rn, rm, s);
}
public static void Smulwb(CodeGenContext context, uint rd, uint rn, uint rm, bool mHigh)
{
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
SelectSignedHalfword(context, tempM.Operand, rmOperand, mHigh);
context.Arm64Assembler.Smull(tempN.Operand, rnOperand, tempM.Operand);
context.Arm64Assembler.Asr(tempN64, tempN64, InstEmitCommon.Const(16));
CheckResultOverflow(context, tempM64, tempN.Operand);
context.Arm64Assembler.Mov(rdOperand, tempN.Operand);
}
public static void Smusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x)
{
EmitSmuadSmusd(context, rd, rn, rm, x, add: false);
}
public static void Umaal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
if (rdLo == rdHi)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
context.Arm64Assembler.Umaddl(tempRegister64, rnOperand, rmOperand, rdLoOperand64);
context.Arm64Assembler.Add(rdLoOperand64, tempRegister64, rdHiOperand64);
}
else
{
context.Arm64Assembler.Umaddl(rdLoOperand64, rnOperand, rmOperand, rdLoOperand64);
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, rdHiOperand64);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
public static void Umlal(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyAddLong(context, context.Arm64Assembler.Umaddl, rdLo, rdHi, rn, rm, s);
}
public static void Umull(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
EmitMultiplyLong(context, context.Arm64Assembler.Umull, rdLo, rdHi, rn, rm, s);
}
private static void EmitMultiplyLong(CodeGenContext context, Action<Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
action(rdLoOperand64, rnOperand, rmOperand);
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
}
else
{
action(rdLoOperand64, rnOperand, rmOperand);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
private static void EmitMultiplyAddLong(CodeGenContext context, Action<Operand, Operand, Operand, Operand> action, uint rdLo, uint rdHi, uint rn, uint rm, bool s)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister raRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raRegister.Operand.Value);
context.Arm64Assembler.Lsl(raOperand64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(raOperand64, raOperand64, rdLoOperand);
if (s)
{
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
context.Arm64Assembler.Tst(rdLoOperand64, rdLoOperand64);
InstEmitCommon.RestoreCvFlags(context, flagsRegister.Operand);
context.SetNzcvModified();
}
else
{
action(rdLoOperand64, rnOperand, rmOperand, raOperand64);
}
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
}
private static void EmitSmladSmlsd(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool x, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Sxtw(tempA64, raOperand);
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
CheckResultOverflow(context, tempM64, rdOperand64);
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void EmitSmlaldSmlsld(CodeGenContext context, uint rdLo, uint rdHi, uint rn, uint rm, bool x, bool add)
{
Operand rdLoOperand = InstEmitCommon.GetOutputGpr(context, rdLo);
Operand rdHiOperand = InstEmitCommon.GetOutputGpr(context, rdHi);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdLoOperand64 = new(OperandKind.Register, OperandType.I64, rdLoOperand.Value);
Operand rdHiOperand64 = new(OperandKind.Register, OperandType.I64, rdHiOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Mul(rdLoOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdLoOperand64, tempN.Operand, tempM.Operand, rdLoOperand64);
}
context.Arm64Assembler.Lsl(tempA64, rdHiOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempA64, tempA64, rdLoOperand);
context.Arm64Assembler.Add(rdLoOperand64, rdLoOperand64, tempA64);
if (rdLo != rdHi)
{
context.Arm64Assembler.Lsr(rdHiOperand64, rdLoOperand64, InstEmitCommon.Const(32));
}
context.Arm64Assembler.Mov(rdLoOperand, rdLoOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void EmitSmmlaSmmls(CodeGenContext context, uint rd, uint rn, uint rm, uint ra, bool r, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand raOperand = InstEmitCommon.GetInputGpr(context, ra);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
Operand raOperand64 = new(OperandKind.Register, OperandType.I64, raOperand.Value);
using ScopedRegister tempA = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempA64 = new(OperandKind.Register, OperandType.I64, tempA.Operand.Value);
context.Arm64Assembler.Lsl(tempA64, raOperand64, InstEmitCommon.Const(32));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, rnOperand, rmOperand, tempA64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, rnOperand, rmOperand, tempA64);
}
if (r)
{
context.Arm64Assembler.Mov(tempA.Operand, 0x80000000u);
context.Arm64Assembler.Add(rdOperand64, rdOperand64, tempA64);
}
context.Arm64Assembler.Lsr(rdOperand64, rdOperand64, InstEmitCommon.Const(32));
}
private static void EmitSmuadSmusd(CodeGenContext context, uint rd, uint rn, uint rm, bool x, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
Operand rdOperand64 = new(OperandKind.Register, OperandType.I64, rdOperand.Value);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
ScopedRegister swapTemp = default;
if (x)
{
swapTemp = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Ror(swapTemp.Operand, rmOperand, InstEmitCommon.Const(16));
rmOperand = swapTemp.Operand;
}
context.Arm64Assembler.Sxth(tempN64, rnOperand);
context.Arm64Assembler.Sxth(tempM64, rmOperand);
context.Arm64Assembler.Mul(rdOperand64, tempN64, tempM64);
context.Arm64Assembler.Asr(tempN.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Asr(tempM.Operand, rmOperand, InstEmitCommon.Const(16));
if (add)
{
context.Arm64Assembler.Smaddl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
else
{
context.Arm64Assembler.Smsubl(rdOperand64, tempN.Operand, tempM.Operand, rdOperand64);
}
context.Arm64Assembler.Mov(rdOperand, rdOperand); // Zero-extend.
if (x)
{
swapTemp.Dispose();
}
}
private static void SelectSignedHalfword(CodeGenContext context, Operand dest, Operand source, bool high)
{
if (high)
{
context.Arm64Assembler.Asr(dest, source, InstEmitCommon.Const(16));
}
else
{
context.Arm64Assembler.Sxth(dest, source);
}
}
private static void CheckResultOverflow(CodeGenContext context, Operand temp64, Operand result)
{
context.Arm64Assembler.Sxtw(temp64, result);
context.Arm64Assembler.Sub(temp64, temp64, result);
int branchIndex = context.CodeWriter.InstructionPointer;
context.Arm64Assembler.Cbz(temp64, 0);
// Set Q flag if we had an overflow.
InstEmitSaturate.SetQFlag(context);
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
}
}
}

View File

@ -0,0 +1,344 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonArithmetic
{
public static void Vaba(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uaba : context.Arm64Assembler.Saba, null);
}
public static void Vabal(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabal : context.Arm64Assembler.Sabal);
}
public static void VabdF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FabdV, context.Arm64Assembler.FabdVH);
}
public static void VabdI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uabd : context.Arm64Assembler.Sabd, null);
}
public static void Vabdl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uabdl : context.Arm64Assembler.Sabdl);
}
public static void Vabs(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FabsSingleAndDouble, context.Arm64Assembler.FabsHalf);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.AbsV);
}
}
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddSingleAndDouble, context.Arm64Assembler.FaddHalf);
}
public static void VaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddV, context.Arm64Assembler.AddS);
}
public static void Vaddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Addhn);
}
public static void Vaddl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddl : context.Arm64Assembler.Saddl);
}
public static void Vaddw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Uaddw : context.Arm64Assembler.Saddw);
}
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlaVecSingleAndDouble, context.Arm64Assembler.FmlaVecHalf);
}
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmlsVecSingleAndDouble, context.Arm64Assembler.FmlsVecHalf);
}
public static void Vhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhadd : context.Arm64Assembler.Shadd, null);
}
public static void Vhsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uhsub : context.Arm64Assembler.Shsub, null);
}
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxnmSingleAndDouble, context.Arm64Assembler.FmaxnmHalf);
}
public static void VmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxSingleAndDouble, context.Arm64Assembler.FmaxHalf);
}
public static void VmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umax : context.Arm64Assembler.Smax, null);
}
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminnmSingleAndDouble, context.Arm64Assembler.FminnmHalf);
}
public static void VminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminSingleAndDouble, context.Arm64Assembler.FminHalf);
}
public static void VminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umin : context.Arm64Assembler.Smin, null);
}
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: false);
}
public static void VmlaI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaVec);
}
public static void VmlaS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: false);
}
else
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlaElt);
}
}
public static void VmlalI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalVec : context.Arm64Assembler.SmlalVec);
}
public static void VmlalS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlalElt : context.Arm64Assembler.SmlalElt);
}
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdF(context, rd, rn, rm, sz, q, negProduct: true);
}
public static void VmlsI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsVec);
}
public static void VmlsS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorTernaryMulNegRdByScalarAnyF(context, rd, rn, rm, size, q, negProduct: true);
}
else
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MlsElt);
}
}
public static void VmlslI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslVec : context.Arm64Assembler.SmlslVec);
}
public static void VmlslS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorTernaryRdLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmlslElt : context.Arm64Assembler.SmlslElt);
}
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmulVecSingleAndDouble, context.Arm64Assembler.FmulVecHalf);
}
public static void VmulI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint size, uint q)
{
if (op)
{
// TODO: Feature check, emulation if not supported.
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.Pmul, null);
}
else
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.MulVec, null);
}
}
public static void VmulS(CodeGenContext context, uint rd, uint rn, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorBinaryByScalarAnyF(context, rd, rn, rm, size, q, context.Arm64Assembler.FmulElt2regElementSingleAndDouble, context.Arm64Assembler.FmulElt2regElementHalf);
}
else
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.MulElt);
}
}
public static void VmullI(CodeGenContext context, uint rd, uint rn, uint rm, bool op, bool u, uint size)
{
if (op)
{
// TODO: Feature check, emulation if not supported.
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size == 2 ? 3 : size, context.Arm64Assembler.Pmull);
}
else
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullVec : context.Arm64Assembler.SmullVec);
}
}
public static void VmullS(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, u ? context.Arm64Assembler.UmullElt : context.Arm64Assembler.SmullElt);
}
public static void Vneg(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FnegSingleAndDouble, context.Arm64Assembler.FnegHalf);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.NegV);
}
}
public static void Vpadal(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryRd(context, rd, rm, size, q, op ? context.Arm64Assembler.Uadalp : context.Arm64Assembler.Sadalp);
}
public static void VpaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FaddpVecSingleAndDouble, context.Arm64Assembler.FaddpVecHalf);
}
public static void VpaddI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.AddpVec, null);
}
public static void Vpaddl(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, op ? context.Arm64Assembler.Uaddlp : context.Arm64Assembler.Saddlp);
}
public static void VpmaxF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FmaxpVecSingleAndDouble, context.Arm64Assembler.FmaxpVecHalf);
}
public static void VpmaxI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Umaxp : context.Arm64Assembler.Smaxp, null);
}
public static void VpminF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FminpVecSingleAndDouble, context.Arm64Assembler.FminpVecHalf);
}
public static void VpminI(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Uminp : context.Arm64Assembler.Sminp, null);
}
public static void Vrecpe(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrecpeV, context.Arm64Assembler.FrecpeVH);
}
else
{
throw new NotImplementedException();
}
}
public static void Vrecps(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrecpsV, context.Arm64Assembler.FrecpsVH);
}
public static void Vrsqrte(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrsqrteV, context.Arm64Assembler.FrsqrteVH);
}
else
{
throw new NotImplementedException();
}
}
public static void Vrsqrts(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FrsqrtsV, context.Arm64Assembler.FrsqrtsVH);
}
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FsubSingleAndDouble, context.Arm64Assembler.FsubHalf);
}
public static void VsubI(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SubV, context.Arm64Assembler.SubS);
}
public static void Vsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Subhn);
}
public static void Vsubl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubl : context.Arm64Assembler.Ssubl);
}
public static void Vsubw(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryWide(context, rd, rn, rm, size, u ? context.Arm64Assembler.Usubw : context.Arm64Assembler.Ssubw);
}
}
}

View File

@ -0,0 +1,35 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonBit
{
public static void Vcls(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cls);
}
public static void Vclz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Clz);
}
public static void Vcnt(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Cnt);
}
public static void Vrev16(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev16);
}
public static void Vrev32(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev32);
}
public static void Vrev64(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.Rev64);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonCompare
{
public static void Vacge(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgeV, context.Arm64Assembler.FacgeVH);
}
public static void Vacgt(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FacgtV, context.Arm64Assembler.FacgtVH);
}
public static void VceqI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmeqZeroV, context.Arm64Assembler.FcmeqZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmeqZeroV);
}
}
public static void VceqR(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmeqRegV, context.Arm64Assembler.CmeqRegS);
}
public static void VceqFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmeqRegV, context.Arm64Assembler.FcmeqRegVH);
}
public static void VcgeI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgeZeroV, context.Arm64Assembler.FcmgeZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgeZeroV);
}
}
public static void VcgeR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.CmhsV : context.Arm64Assembler.CmgeRegV,
u ? context.Arm64Assembler.CmhsS : context.Arm64Assembler.CmgeRegS);
}
public static void VcgeFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgeRegV, context.Arm64Assembler.FcmgeRegVH);
}
public static void VcgtI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmgtZeroV, context.Arm64Assembler.FcmgtZeroVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmgtZeroV);
}
}
public static void VcgtR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.CmhiV : context.Arm64Assembler.CmgtRegV,
u ? context.Arm64Assembler.CmhiS : context.Arm64Assembler.CmgtRegS);
}
public static void VcgtFR(CodeGenContext context, uint rd, uint rn, uint rm, uint sz, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryF(context, rd, rn, rm, sz, q, context.Arm64Assembler.FcmgtRegV, context.Arm64Assembler.FcmgtRegVH);
}
public static void VcleI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmleV, context.Arm64Assembler.FcmleVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmleV);
}
}
public static void VcltI(CodeGenContext context, uint rd, uint rm, bool f, uint size, uint q)
{
if (f)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcmltV, context.Arm64Assembler.FcmltVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.CmltV);
}
}
public static void Vtst(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.CmtstV, context.Arm64Assembler.CmtstS);
}
}
}

View File

@ -0,0 +1,137 @@
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonConvert
{
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtauV, context.Arm64Assembler.FcvtauVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtasV, context.Arm64Assembler.FcvtasVH);
}
}
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmuV, context.Arm64Assembler.FcvtmuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtmsV, context.Arm64Assembler.FcvtmsVH);
}
}
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnuV, context.Arm64Assembler.FcvtnuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtnsV, context.Arm64Assembler.FcvtnsVH);
}
}
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size, uint q)
{
if (op)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpuV, context.Arm64Assembler.FcvtpuVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtpsV, context.Arm64Assembler.FcvtpsVH);
}
}
public static void VcvtHs(CodeGenContext context, uint rd, uint rm, bool op)
{
bool halfToSingle = op;
if (halfToSingle)
{
// Half to single.
InstEmitNeonCommon.EmitVectorUnaryLong(context, rd, rm, 0, context.Arm64Assembler.Fcvtl);
}
else
{
// Single to half.
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, 0, context.Arm64Assembler.Fcvtn);
}
}
public static void VcvtIs(CodeGenContext context, uint rd, uint rm, uint op, uint size, uint q)
{
Debug.Assert(op >> 2 == 0);
bool unsigned = (op & 1) != 0;
bool toInteger = (op >> 1) != 0;
if (toInteger)
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzuIntV, context.Arm64Assembler.FcvtzuIntVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FcvtzsIntV, context.Arm64Assembler.FcvtzsIntVH);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.UcvtfIntV, context.Arm64Assembler.UcvtfIntVH);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.ScvtfIntV, context.Arm64Assembler.ScvtfIntVH);
}
}
}
public static void VcvtXs(CodeGenContext context, uint rd, uint rm, uint imm6, uint op, bool u, uint q)
{
Debug.Assert(op >> 2 == 0);
bool unsigned = u;
bool toFixed = (op & 1) != 0;
uint size = 1 + (op >> 1);
uint fbits = Math.Clamp(64u - imm6, 1, 8u << (int)size);
if (toFixed)
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzuFixV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.FcvtzsFixV);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.UcvtfFixV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryFixedAnyF(context, rd, rm, fbits, size, q, context.Arm64Assembler.ScvtfFixV);
}
}
}
}
}

View File

@ -0,0 +1,43 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonCrypto
{
public static void Aesd(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesd);
}
public static void Aese(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aese);
}
public static void Aesimc(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesimc);
}
public static void Aesmc(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 0);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Aesmc);
}
}
}

View File

@ -0,0 +1,97 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonHash
{
public static void Sha1c(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1c);
}
public static void Sha1h(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1h);
}
public static void Sha1m(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1m);
}
public static void Sha1p(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1p);
}
public static void Sha1su0(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha1su0);
}
public static void Sha1su1(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha1su1);
}
public static void Sha256h(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h);
}
public static void Sha256h2(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256h2);
}
public static void Sha256su0(CodeGenContext context, uint rd, uint rm, uint size)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(size == 2);
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, context.Arm64Assembler.Sha256su0);
}
public static void Sha256su1(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
// TODO: Feature check, emulation if not supported.
Debug.Assert(q == 1);
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, context.Arm64Assembler.Sha256su1);
}
}
}

View File

@ -0,0 +1,79 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonLogical
{
public static void VandR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.And);
}
public static void VbicI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
EmitMovi(context, rd, cmode, imm8, 1, q);
}
public static void VbicR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.BicReg);
}
public static void VbifR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bif);
}
public static void VbitR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bit);
}
public static void VbslR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, q, context.Arm64Assembler.Bsl);
}
public static void VeorR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Eor);
}
public static void VornR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.Orn);
}
public static void VorrI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
EmitMovi(context, rd, cmode, imm8, 0, q);
}
public static void VorrR(CodeGenContext context, uint rd, uint rn, uint rm, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, q, context.Arm64Assembler.OrrReg);
}
private static void EmitMovi(CodeGenContext context, uint rd, uint cmode, uint imm8, uint op, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = InstEmitNeonMove.Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
}
}
}
}

View File

@ -0,0 +1,797 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonMemory
{
public static void Vld11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.Ld1SnglAsNoPostIndex);
});
}
public static void Vld1A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryLoad1SingleReplicateInstruction(context, address, rd, size, t + 1, 1, context.Arm64Assembler.Ld1rAsNoPostIndex);
});
}
public static void Vld1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.Ld1MultAsNoPostIndex);
});
}
public static void Vld21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.Ld2SnglAsNoPostIndex);
});
}
public static void Vld2A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 2, t + 1, context.Arm64Assembler.Ld2rAsNoPostIndex);
});
}
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.Ld2MultAsNoPostIndex);
});
}
public static void Vld2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryLoad1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.Ld2MultAsNoPostIndex);
});
}
public static void Vld31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.Ld3SnglAsNoPostIndex);
});
}
public static void Vld3A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 3, t + 1, context.Arm64Assembler.Ld3rAsNoPostIndex);
});
}
public static void Vld3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.Ld3MultAsNoPostIndex);
});
}
public static void Vld41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryLoad1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.Ld4SnglAsNoPostIndex);
});
}
public static void Vld4A(CodeGenContext context, uint rd, uint rn, uint rm, uint a, uint t, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryLoad234SingleReplicateInstruction(context, address, rd, size, 4, t + 1, context.Arm64Assembler.Ld4rAsNoPostIndex);
});
}
public static void Vld4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryLoad1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.Ld4MultAsNoPostIndex);
});
}
public static void Vldm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
{
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: false);
}
public static void Vldr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
{
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: false);
}
public static void Vst11(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
EmitMemory1234InstructionCore(context, rn, rm, 1 << (int)size, (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 1, 1, context.Arm64Assembler.St1SnglAsNoPostIndex);
});
}
public static void Vst1M(CodeGenContext context, uint rd, uint rn, uint rm, uint registersCount, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 8 * (int)registersCount, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, registersCount, 1, context.Arm64Assembler.St1MultAsNoPostIndex);
});
}
public static void Vst21(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 2 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 2, step, context.Arm64Assembler.St2SnglAsNoPostIndex);
});
}
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 16, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 2, step, context.Arm64Assembler.St2MultAsNoPostIndex);
});
}
public static void Vst2M(CodeGenContext context, uint rd, uint rn, uint rm, uint align, uint size)
{
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryStore1234Multiple2x2Instruction(context, address, rd, size, context.Arm64Assembler.St2MultAsNoPostIndex);
});
}
public static void Vst31(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 3 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 3, step, context.Arm64Assembler.St3SnglAsNoPostIndex);
});
}
public static void Vst3M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 24, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 3, step, context.Arm64Assembler.St3MultAsNoPostIndex);
});
}
public static void Vst41(CodeGenContext context, uint rd, uint rn, uint rm, uint indexAlign, uint size)
{
uint index = indexAlign >> ((int)size + 1);
uint step = size > 0 && (indexAlign & (1u << (int)size)) != 0 ? 2u : 1u;
EmitMemory1234InstructionCore(context, rn, rm, 4 * (1 << (int)size), (address) =>
{
EmitMemoryStore1234SingleInstruction(context, address, rd, index, size, 4, step, context.Arm64Assembler.St4SnglAsNoPostIndex);
});
}
public static void Vst4M(CodeGenContext context, uint rd, uint rn, uint rm, uint type, uint align, uint size)
{
uint step = (type & 1) + 1;
EmitMemory1234InstructionCore(context, rn, rm, 32, (address) =>
{
EmitMemoryStore1234MultipleInstruction(context, address, rd, size, 4, step, context.Arm64Assembler.St4MultAsNoPostIndex);
});
}
public static void Vstm(CodeGenContext context, uint rd, uint rn, uint registerCount, bool u, bool w, bool singleRegs)
{
EmitMemoryMultipleInstruction(context, rd, rn, registerCount, u, w, singleRegs, isStore: true);
}
public static void Vstr(CodeGenContext context, uint rd, uint rn, uint imm8, bool u, uint size)
{
EmitMemoryInstruction(context, rd, rn, imm8, u, size, isStore: true);
}
private static void EmitMemoryMultipleInstruction(
CodeGenContext context,
uint rd,
uint rn,
uint registerCount,
bool add,
bool wBack,
bool singleRegs,
bool isStore)
{
Operand baseAddress = InstEmitCommon.GetInputGpr(context, rn);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand offset = InstEmitCommon.Const((int)registerCount * (singleRegs ? 4 : 8));
if (!add)
{
if (wBack)
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, baseAddress, baseAddress, offset, false, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
}
else
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, tempRegister.Operand, baseAddress, offset, false, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, tempRegister.Operand);
}
}
else
{
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, tempRegister.Operand, baseAddress);
}
EmitMemoryMultipleInstructionCore(context, tempRegister.Operand, rd, registerCount, singleRegs, isStore);
if (add && wBack)
{
context.Arm64Assembler.Add(baseAddress, baseAddress, offset);
}
}
private static void EmitMemoryMultipleInstructionCore(CodeGenContext context, Operand baseAddress, uint rd, uint registerCount, bool singleRegs, bool isStore)
{
int offs = 0;
uint r = rd;
uint upperBound = Math.Min(rd + registerCount, 32u);
uint regMask = singleRegs ? 3u : 1u;
// Read/write misaligned elements first.
for (; (r & regMask) != 0 && r < upperBound; r++)
{
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
offs += singleRegs ? 4 : 8;
}
// Read/write aligned, full vectors.
while (upperBound - r >= (singleRegs ? 4 : 2))
{
int qIndex = (int)(r >> (singleRegs ? 2 : 1));
Operand rtOperand = context.RegisterAllocator.RemapSimdRegister(qIndex);
if (upperBound - r >= (singleRegs ? 8 : 4) && (offs & 0xf) == 0)
{
Operand rt2Operand = context.RegisterAllocator.RemapSimdRegister(qIndex + 1);
if (isStore)
{
context.Arm64Assembler.StpRiUn(rtOperand, rt2Operand, baseAddress, offs);
}
else
{
context.Arm64Assembler.LdpRiUn(rtOperand, rt2Operand, baseAddress, offs);
}
r += singleRegs ? 8u : 4u;
offs += 32;
}
else
{
if ((offs & 0xf) == 0)
{
if (isStore)
{
context.Arm64Assembler.StrRiUn(rtOperand, baseAddress, offs);
}
else
{
context.Arm64Assembler.LdrRiUn(rtOperand, baseAddress, offs);
}
}
else
{
if (isStore)
{
context.Arm64Assembler.Stur(rtOperand, baseAddress, offs);
}
else
{
context.Arm64Assembler.Ldur(rtOperand, baseAddress, offs);
}
}
r += singleRegs ? 4u : 2u;
offs += 16;
}
}
// Read/write last misaligned elements.
for (; r < upperBound; r++)
{
EmitMemoryInstruction(context, baseAddress, r, offs, singleRegs, isStore);
offs += singleRegs ? 4 : 8;
}
}
private static void EmitMemoryInstruction(CodeGenContext context, Operand baseAddress, uint r, int offs, bool singleRegs, bool isStore)
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, r, singleRegs);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, baseAddress, offs);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, baseAddress, offs);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, r, singleRegs);
}
}
private static void EmitMemoryInstruction(CodeGenContext context, uint rd, uint rn, uint imm8, bool add, uint size, bool isStore)
{
bool singleRegs = size != 3;
int offs = (int)imm8;
if (size == 1)
{
offs <<= 1;
}
else
{
offs <<= 2;
}
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (rn == RegisterUtils.PcRegister)
{
if (!add)
{
offs = -offs;
}
context.Arm64Assembler.Mov(address.Operand, (context.Pc & ~3u) + (uint)offs);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
offs = 0;
}
else
{
Operand rnOperand = context.RegisterAllocator.RemapGprRegister((int)rn);
if (InstEmitMemory.CanFoldOffset(context.MemoryManagerType, add ? offs : -offs, (int)size, true, out _))
{
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
if (!add)
{
offs = -offs;
}
}
else
{
InstEmitMemory.WriteAddShiftOffset(context.Arm64Assembler, address.Operand, rnOperand, InstEmitCommon.Const(offs), add, ArmShiftType.Lsl, 0);
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, address.Operand);
offs = 0;
}
}
if ((size == 3 && (offs & 7) != 0) || offs < 0)
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
context.Arm64Assembler.Stur(tempRegister.Operand, address.Operand, offs, size);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.Ldur(tempRegister.Operand, address.Operand, offs, size);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
else
{
if (isStore)
{
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
context.Arm64Assembler.StrRiUn(tempRegister.Operand, address.Operand, offs, size);
}
else
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(singleRegs);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, address.Operand, offs, size);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
}
private static void EmitMemory1234InstructionCore(CodeGenContext context, uint rn, uint rm, int bytes, Action<Operand> callback)
{
bool wBack = rm != RegisterUtils.PcRegister;
bool registerIndex = rm != RegisterUtils.PcRegister && rm != RegisterUtils.SpRegister;
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
using ScopedRegister address = context.RegisterAllocator.AllocateTempGprRegisterScoped();
InstEmitMemory.WriteAddressTranslation(context.MemoryManagerType, context.RegisterAllocator, context.Arm64Assembler, address.Operand, rnOperand);
callback(address.Operand);
if (wBack)
{
if (registerIndex)
{
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
context.Arm64Assembler.Add(rnOperand, rnOperand, rmOperand);
}
else
{
context.Arm64Assembler.Add(rnOperand, rnOperand, InstEmitCommon.Const(bytes));
}
}
}
private static void EmitMemoryLoad1234SingleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint index,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, index, size);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1SingleReplicateInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0 && registerCount == 2)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1)), baseAddress, size, 1);
}
else
{
uint vecsCount = (registerCount + 1) >> 1;
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)vecsCount);
action(tempRegisters[0].Operand, baseAddress, size, registerCount > 1 ? 1u : 0u);
MoveQuadwordsToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
}
private static void EmitMemoryLoad234SingleReplicateInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, size, 0u);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, size, 0);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
MoveQuadwordsLowerToDoublewords(context, rd, registerCount, step, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryLoad1234Multiple2x2Instruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
}
else
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
action(tempRegisters[0].Operand, baseAddress, size, 1);
MoveQuadwordsToDoublewords2x2(context, rd, tempRegisters);
FreeSequentialRegisters(tempRegisters);
}
}
private static void EmitMemoryStore1234SingleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint index,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, index, size);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, size, 0);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234MultipleInstruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
uint registerCount,
uint step,
Action<Operand, Operand, uint, uint, uint> action)
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, (int)registerCount);
MoveDoublewordsToQuadwordsLower(context, rd, registerCount, step, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, registerCount, size, 0);
FreeSequentialRegisters(tempRegisters);
}
private static void EmitMemoryStore1234Multiple2x2Instruction(
CodeGenContext context,
Operand baseAddress,
uint rd,
uint size,
Action<Operand, Operand, uint, uint> action)
{
if ((rd & 1) == 0)
{
action(context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1), 2), baseAddress, size, 1);
}
else
{
ScopedRegister[] tempRegisters = AllocateSequentialRegisters(context, 2);
MoveDoublewordsToQuadwords2x2(context, rd, tempRegisters);
action(tempRegisters[0].Operand, baseAddress, size, 1);
FreeSequentialRegisters(tempRegisters);
}
}
private static ScopedRegister[] AllocateSequentialRegisters(CodeGenContext context, int count)
{
ScopedRegister[] registers = new ScopedRegister[count];
for (int index = 0; index < count; index++)
{
registers[index] = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
}
AssertSequentialRegisters(registers);
return registers;
}
private static void FreeSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registers.Length; index++)
{
registers[index].Dispose();
}
}
[Conditional("DEBUG")]
private static void AssertSequentialRegisters(ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 1; index < registers.Length; index++)
{
Debug.Assert(registers[index].Operand.GetRegister().Index == registers[0].Operand.GetRegister().Index + index);
}
}
private static void MoveQuadwordsLowerToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
}
}
private static void MoveDoublewordsToQuadwordsLower(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
InstEmitNeonCommon.MoveScalarToSide(context, registers[index].Operand, r, false);
}
}
private static void MoveDoublewordsToQuadwords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < 2; index++)
{
uint r = rd + (uint)index * 2;
uint r2 = r + 1;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, false);
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r & 1u) << 3, imm5);
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
context.Arm64Assembler.InsElt(registers[index].Operand, rdOperand, (r2 & 1u) << 3, imm5);
}
}
private static void MoveQuadwordsToDoublewords(CodeGenContext context, uint rd, uint registerCount, uint step, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < registerCount; index++)
{
uint r = rd + (uint)index * step;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index >> 1].Operand, ((uint)index & 1u) << 3, imm5);
}
}
private static void MoveQuadwordsToDoublewords2x2(CodeGenContext context, uint rd, ReadOnlySpan<ScopedRegister> registers)
{
for (int index = 0; index < 2; index++)
{
uint r = rd + (uint)index * 2;
uint r2 = r + 1;
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 0, imm5);
rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(r2 >> 1));
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(r2 & 1u, false);
context.Arm64Assembler.InsElt(rdOperand, registers[index].Operand, 1u << 3, imm5);
}
}
}
}

View File

@ -0,0 +1,665 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonMove
{
public static void VdupR(CodeGenContext context, uint rd, uint rt, uint b, uint e, uint q)
{
uint size = 2 - (e | (b << 1));
Debug.Assert(size < 3);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(0, size);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.DupGen(tempRegister.Operand, rtOperand, imm5, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert((rd & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.DupGen(rdOperand, rtOperand, imm5, q);
}
}
public static void VdupS(CodeGenContext context, uint rd, uint rm, uint imm4, uint q)
{
uint size = (uint)BitOperations.TrailingZeroCount(imm4);
Debug.Assert(size < 3);
uint index = imm4 >> (int)(size + 1);
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(index | ((rm & 1) << (int)(3 - size)), size);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.DupEltVectorFromElement(tempRegister.Operand, rmOperand, imm5, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert((rd & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.DupEltVectorFromElement(rdOperand, rmOperand, imm5, q);
}
}
public static void Vext(CodeGenContext context, uint rd, uint rn, uint rm, uint imm4, uint q)
{
if (q == 0)
{
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
context.Arm64Assembler.Ext(tempRegister.Operand, rnReg.Operand, imm4, rmReg.Operand, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Debug.Assert(((rd | rn | rm) & 1) == 0);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rnOperand = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
context.Arm64Assembler.Ext(rdOperand, rnOperand, imm4, rmOperand, q);
}
}
public static void Vmovl(CodeGenContext context, uint rd, uint rm, bool u, uint imm3h)
{
uint size = (uint)BitOperations.TrailingZeroCount(imm3h);
Debug.Assert(size < 3);
InstEmitNeonCommon.EmitVectorBinaryLongShift(
context,
rd,
rm,
0,
size,
isShl: true,
u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
}
public static void Vmovn(CodeGenContext context, uint rd, uint rm, uint size)
{
Debug.Assert(size < 3);
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.Xtn);
}
public static void Vmovx(CodeGenContext context, uint rd, uint rm)
{
InstEmitNeonCommon.EmitScalarBinaryShift(context, rd, rm, 16, 2, isShl: false, context.Arm64Assembler.UshrS);
}
public static void VmovD(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
{
Operand rmReg = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
uint top = rm & 1;
uint ftype = top + 1;
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
Operand rtOperand64 = new(OperandKind.Register, OperandType.I64, rtOperand.Value);
Operand rt2Operand64 = new(OperandKind.Register, OperandType.I64, rt2Operand.Value);
context.Arm64Assembler.FmovFloatGen(rtOperand64, rmReg, ftype, 1, 0, top);
context.Arm64Assembler.Lsr(rt2Operand64, rtOperand64, InstEmitCommon.Const(32));
context.Arm64Assembler.Mov(rtOperand, rtOperand); // Zero-extend.
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempRegister64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
context.Arm64Assembler.Lsl(tempRegister64, rt2Operand, InstEmitCommon.Const(32));
context.Arm64Assembler.Orr(tempRegister64, tempRegister64, rtOperand);
if (top == 0)
{
// Doing FMOV on Rm directly would clear the high bits if we are moving to the bottom.
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister2.Operand, tempRegister64, ftype, 1, 1, top);
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
}
else
{
context.Arm64Assembler.FmovFloatGen(rmReg, tempRegister64, ftype, 1, 1, top);
}
}
}
public static void VmovH(CodeGenContext context, uint rt, uint rn, bool op)
{
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 3, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 3, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
}
}
public static void VmovI(CodeGenContext context, uint rd, uint op, uint cmode, uint imm8, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.Movi(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, op, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Movi(rdOperand, h, g, f, e, d, cmode, c, b, a, op, q);
}
}
public static void VmovFI(CodeGenContext context, uint rd, uint imm8, uint size)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatImm(tempRegister.Operand, imm8, size ^ 2u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, size != 3);
}
public static void VmovR(CodeGenContext context, uint rd, uint rm, uint size)
{
bool singleRegister = size == 2;
int shift = singleRegister ? 2 : 1;
uint mask = singleRegister ? 3u : 1u;
uint dstElt = rd & mask;
uint srcElt = rm & mask;
uint imm4 = srcElt << (singleRegister ? 2 : 3);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(dstElt, singleRegister);
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> shift));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> shift));
context.Arm64Assembler.InsElt(rdOperand, rmOperand, imm4, imm5);
}
public static void VmovRs(CodeGenContext context, uint rd, uint rt, uint opc1, uint opc2)
{
uint index;
uint size;
if ((opc1 & 2u) != 0)
{
index = opc2 | ((opc1 & 1u) << 2);
size = 0;
}
else if ((opc2 & 1u) != 0)
{
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
size = 1;
}
else
{
Debug.Assert(opc1 == 0 || opc1 == 1);
Debug.Assert(opc2 == 0);
index = opc1 & 1u;
size = 2;
}
index |= (rd & 1u) << (int)(3 - size);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rdReg = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.InsGen(rdReg, rtOperand, InstEmitNeonCommon.GetImm5ForElementIndex(index, size));
}
public static void VmovS(CodeGenContext context, uint rt, uint rn, bool op)
{
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
using ScopedRegister tempRegister = InstEmitNeonCommon.MoveScalarToSide(context, rn, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, tempRegister.Operand, 0, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rn, true);
}
}
public static void VmovSr(CodeGenContext context, uint rt, uint rn, bool u, uint opc1, uint opc2)
{
uint index;
uint size;
if ((opc1 & 2u) != 0)
{
index = opc2 | ((opc1 & 1u) << 2);
size = 0;
}
else if ((opc2 & 1u) != 0)
{
index = (opc2 >> 1) | ((opc1 & 1u) << 1);
size = 1;
}
else
{
Debug.Assert(opc1 == 0 || opc1 == 1);
Debug.Assert(opc2 == 0);
Debug.Assert(!u);
index = opc1 & 1u;
size = 2;
}
index |= (rn & 1u) << (int)(3 - size);
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rnReg = context.RegisterAllocator.RemapSimdRegister((int)(rn >> 1));
if (u || size > 1)
{
context.Arm64Assembler.Umov(rtOperand, rnReg, (int)index, (int)size);
}
else
{
context.Arm64Assembler.Smov(rtOperand, rnReg, (int)index, (int)size);
}
}
public static void VmovSs(CodeGenContext context, uint rt, uint rt2, uint rm, bool op)
{
if ((rm & 1) == 0)
{
// If we are moving an aligned pair of single-precision registers,
// we can just move a single double-precision register.
VmovD(context, rt, rt2, rm >> 1, op);
return;
}
if (op)
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetOutputGpr(context, rt2);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
using ScopedRegister rmReg2 = InstEmitNeonCommon.MoveScalarToSide(context, rm + 1, true);
context.Arm64Assembler.FmovFloatGen(rtOperand, rmReg.Operand, 0, 0, 0, 0);
context.Arm64Assembler.FmovFloatGen(rt2Operand, rmReg2.Operand, 0, 0, 0, 0);
}
else
{
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
Operand rt2Operand = InstEmitCommon.GetInputGpr(context, rt2);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rtOperand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, true);
context.Arm64Assembler.FmovFloatGen(tempRegister.Operand, rt2Operand, 0, 0, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm + 1, true);
}
}
public static void VmvnI(CodeGenContext context, uint rd, uint cmode, uint imm8, uint q)
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(imm8);
if (q == 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
context.Arm64Assembler.Mvni(tempRegister.Operand, h, g, f, e, d, cmode, c, b, a, q);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
context.Arm64Assembler.Mvni(rdOperand, h, g, f, e, d, cmode, c, b, a, q);
}
}
public static void VmvnR(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, q, context.Arm64Assembler.Not);
}
public static void Vswp(CodeGenContext context, uint rd, uint rm, uint q)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (q == 0)
{
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
InstEmitNeonCommon.InsertResult(context, rmReg.Operand, rd, false);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rm, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
context.Arm64Assembler.Orr(tempRegister.Operand, rdOperand, rdOperand); // Temp = Rd
context.Arm64Assembler.Orr(rdOperand, rmOperand, rmOperand); // Rd = Rm
context.Arm64Assembler.Orr(rmOperand, tempRegister.Operand, tempRegister.Operand); // Rm = Temp
}
}
public static void Vtbl(CodeGenContext context, uint rd, uint rn, uint rm, bool op, uint len)
{
// On AArch64, TBL/TBX works with 128-bit vectors, while on AArch32 it works with 64-bit vectors.
// We must combine the 64-bit vectors into a larger 128-bit one in some cases.
// TODO: Peephole optimization to combine adjacent TBL instructions?
Debug.Assert(len <= 3);
bool isTbl = !op;
len = Math.Min(len, 31 - rn);
bool rangeMismatch = !isTbl && (len & 1) == 0;
using ScopedRegister indicesReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false, rangeMismatch);
if (rangeMismatch)
{
// Force any index >= 8 * regs to be the maximum value, since on AArch64 we are working with a full vector,
// and the out of range value is 16 * regs, not 8 * regs.
Debug.Assert(indicesReg.IsAllocated);
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (len == 0)
{
(uint immb, uint immh) = InstEmitNeonCommon.GetImmbImmhForShift(3, 0, isShl: false);
context.Arm64Assembler.UshrV(tempRegister2.Operand, indicesReg.Operand, immb, immh, 0);
context.Arm64Assembler.CmeqZeroV(tempRegister2.Operand, tempRegister2.Operand, 0, 0);
context.Arm64Assembler.Orn(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
}
else
{
(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) = Split(8u * (len + 1));
context.Arm64Assembler.Movi(tempRegister2.Operand, h, g, f, e, d, 0xe, c, b, a, 0, 0);
context.Arm64Assembler.CmgeRegV(tempRegister2.Operand, indicesReg.Operand, tempRegister2.Operand, 0, 0);
context.Arm64Assembler.OrrReg(indicesReg.Operand, indicesReg.Operand, tempRegister2.Operand, 0);
}
}
ScopedRegister tableReg1 = default;
ScopedRegister tableReg2 = default;
switch (len)
{
case 0:
tableReg1 = MoveHalfToSideZeroUpper(context, rn);
break;
case 1:
tableReg1 = MoveDoublewords(context, rn, rn + 1);
break;
case 2:
tableReg1 = MoveDoublewords(context, rn, rn + 1, isOdd: true);
tableReg2 = MoveHalfToSideZeroUpper(context, rn + 2);
break;
case 3:
tableReg1 = MoveDoublewords(context, rn, rn + 1);
tableReg2 = MoveDoublewords(context, rn + 2, rn + 3);
break;
}
// TBL works with consecutive registers, it is assumed that two consecutive calls to the register allocator
// will return consecutive registers.
Debug.Assert(len < 2 || tableReg1.Operand.GetRegister().Index + 1 == tableReg2.Operand.GetRegister().Index);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (isTbl)
{
context.Arm64Assembler.Tbl(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
}
else
{
InstEmitNeonCommon.MoveScalarToSide(context, tempRegister.Operand, rd, false);
context.Arm64Assembler.Tbx(tempRegister.Operand, tableReg1.Operand, len >> 1, indicesReg.Operand, 0);
}
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
tableReg1.Dispose();
if (len > 1)
{
tableReg2.Dispose();
}
}
public static void Vtrn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleavedTrn(context, rd, rm, size, q, context.Arm64Assembler.Trn1, context.Arm64Assembler.Trn2);
}
public static void Vuzp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Uzp1, context.Arm64Assembler.Uzp2);
}
public static void Vzip(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, context.Arm64Assembler.Zip1, context.Arm64Assembler.Zip2);
}
public static (uint, uint, uint, uint, uint, uint, uint, uint) Split(uint imm8)
{
uint a = (imm8 >> 7) & 1;
uint b = (imm8 >> 6) & 1;
uint c = (imm8 >> 5) & 1;
uint d = (imm8 >> 4) & 1;
uint e = (imm8 >> 3) & 1;
uint f = (imm8 >> 2) & 1;
uint g = (imm8 >> 1) & 1;
uint h = imm8 & 1;
return (a, b, c, d, e, f, g, h);
}
private static ScopedRegister MoveHalfToSideZeroUpper(CodeGenContext context, uint srcReg)
{
uint elt = srcReg & 1u;
Operand source = context.RegisterAllocator.RemapSimdRegister((int)(srcReg >> 1));
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(elt, false);
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, source, imm5);
return tempRegister;
}
private static ScopedRegister MoveDoublewords(CodeGenContext context, uint lowerReg, uint upperReg, bool isOdd = false)
{
if ((lowerReg & 1) == 0 && upperReg == lowerReg + 1 && !isOdd)
{
return new ScopedRegister(context.RegisterAllocator, context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1)), false);
}
Operand lowerSrc = context.RegisterAllocator.RemapSimdRegister((int)(lowerReg >> 1));
Operand upperSrc = context.RegisterAllocator.RemapSimdRegister((int)(upperReg >> 1));
ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempFpRegisterScoped(false);
uint imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(lowerReg & 1u, false);
context.Arm64Assembler.DupEltScalarFromElement(tempRegister.Operand, lowerSrc, imm5);
imm5 = InstEmitNeonCommon.GetImm5ForElementIndex(1, false);
context.Arm64Assembler.InsElt(tempRegister.Operand, upperSrc, (upperReg & 1u) << 3, imm5);
return tempRegister;
}
private static void EmitVectorBinaryInterleavedTrn(
CodeGenContext context,
uint rd,
uint rm,
uint size,
uint q,
Action<Operand, Operand, Operand, uint, uint> action1,
Action<Operand, Operand, Operand, uint, uint> action2)
{
if (rd == rm)
{
// The behaviour when the registers are the same is "unpredictable" according to the manual.
if (q == 0)
{
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
action2(tempRegister2.Operand, rdReg.Operand, tempRegister1.Operand, size, q);
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rd, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
action2(rmOperand, rdOperand, tempRegister.Operand, size, q);
}
}
else
{
EmitVectorBinaryInterleaved(context, rd, rm, size, q, action1, action2);
}
}
private static void EmitVectorBinaryInterleaved(
CodeGenContext context,
uint rd,
uint rm,
uint size,
uint q,
Action<Operand, Operand, Operand, uint, uint> action1,
Action<Operand, Operand, Operand, uint, uint> action2)
{
if (q == 0)
{
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, false);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
using ScopedRegister tempRegister1 = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
using ScopedRegister tempRegister2 = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rdReg, rmReg);
action1(tempRegister1.Operand, rdReg.Operand, rmReg.Operand, size, q);
action2(tempRegister2.Operand, rdReg.Operand, rmReg.Operand, size, q);
if (rd != rm)
{
InstEmitNeonCommon.InsertResult(context, tempRegister1.Operand, rd, false);
}
InstEmitNeonCommon.InsertResult(context, tempRegister2.Operand, rm, false);
}
else
{
Operand rdOperand = context.RegisterAllocator.RemapSimdRegister((int)(rd >> 1));
Operand rmOperand = context.RegisterAllocator.RemapSimdRegister((int)(rm >> 1));
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
action1(tempRegister.Operand, rdOperand, rmOperand, size, q);
action2(rmOperand, rdOperand, rmOperand, size, q);
if (rd != rm)
{
context.Arm64Assembler.OrrReg(rdOperand, tempRegister.Operand, tempRegister.Operand, 1);
}
}
}
}
}

View File

@ -0,0 +1,105 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonRound
{
public static void Vraddhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Raddhn);
}
public static void Vrhadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, u ? context.Arm64Assembler.Urhadd : context.Arm64Assembler.Srhadd, null);
}
public static void Vrshl(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rm,
rn,
size,
q,
u ? context.Arm64Assembler.UrshlV : context.Arm64Assembler.SrshlV,
u ? context.Arm64Assembler.UrshlS : context.Arm64Assembler.SrshlS);
}
public static void Vrshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UrshrV : context.Arm64Assembler.SrshrV,
u ? context.Arm64Assembler.UrshrS : context.Arm64Assembler.SrshrS);
}
public static void Vrshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Rshrn);
}
public static void Vrsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorTernaryRdShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UrsraV : context.Arm64Assembler.SrsraV,
u ? context.Arm64Assembler.UrsraS : context.Arm64Assembler.SrsraS);
}
public static void Vrsubhn(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryNarrow(context, rd, rn, rm, size, context.Arm64Assembler.Rsubhn);
}
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintaSingleAndDouble, context.Arm64Assembler.FrintaHalf);
}
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintmSingleAndDouble, context.Arm64Assembler.FrintmHalf);
}
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintnSingleAndDouble, context.Arm64Assembler.FrintnHalf);
}
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintpSingleAndDouble, context.Arm64Assembler.FrintpHalf);
}
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintxSingleAndDouble, context.Arm64Assembler.FrintxHalf);
}
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnaryAnyF(context, rd, rm, size, q, context.Arm64Assembler.FrintzSingleAndDouble, context.Arm64Assembler.FrintzHalf);
}
}
}

View File

@ -0,0 +1,205 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonSaturate
{
public static void Vqabs(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqabsV);
}
public static void Vqadd(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.UqaddV : context.Arm64Assembler.SqaddV,
u ? context.Arm64Assembler.UqaddS : context.Arm64Assembler.SqaddS);
}
public static void Vqdmlal(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalVecV);
}
public static void VqdmlalS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlalElt2regElement);
}
public static void Vqdmlsl(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslVecV);
}
public static void VqdmlslS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmlslElt2regElement);
}
public static void Vqdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhVecV, context.Arm64Assembler.SqdmulhVecS);
}
public static void VqdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqdmulhElt2regElement);
}
public static void Vqdmull(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLong(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullVecV);
}
public static void VqdmullS(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitVectorBinaryLongByScalar(context, rd, rn, rm, size, context.Arm64Assembler.SqdmullElt2regElement);
}
public static void Vqmovn(CodeGenContext context, uint rd, uint rm, uint op, uint size)
{
if (op == 3)
{
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, context.Arm64Assembler.UqxtnV);
}
else
{
InstEmitNeonCommon.EmitVectorUnaryNarrow(context, rd, rm, size, op == 1 ? context.Arm64Assembler.SqxtunV : context.Arm64Assembler.SqxtnV);
}
}
public static void Vqneg(CodeGenContext context, uint rd, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorUnary(context, rd, rm, size, q, context.Arm64Assembler.SqnegV);
}
public static void Vqrdmlah(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahVecV);
}
public static void VqrdmlahS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlahElt2regElement);
}
public static void Vqrdmlsh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRd(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshVecV);
}
public static void VqrdmlshS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorTernaryRdByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmlshElt2regElement);
}
public static void Vqrdmulh(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhVecV, context.Arm64Assembler.SqrdmulhVecS);
}
public static void VqrdmulhS(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinaryByScalar(context, rd, rn, rm, size, q, context.Arm64Assembler.SqrdmulhElt2regElement);
}
public static void Vqrshl(CodeGenContext context, uint rd, uint rn, uint rm, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqrshlV, context.Arm64Assembler.SqrshlS);
}
public static void Vqrshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrunV);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqrshrnV);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqrshrnV);
}
}
public static void VqshlI(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = InstEmitNeonShift.GetShiftLeft(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshluV, context.Arm64Assembler.SqshluS);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.SqshlImmV, context.Arm64Assembler.SqshlImmS);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.UqshlImmV, context.Arm64Assembler.UqshlImmS);
}
}
public static void VqshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
if (u)
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.UqshlRegV, context.Arm64Assembler.UqshlRegS);
}
else
{
InstEmitNeonCommon.EmitVectorBinary(context, rd, rm, rn, size, q, context.Arm64Assembler.SqshlRegV, context.Arm64Assembler.SqshlRegS);
}
}
public static void Vqshrn(CodeGenContext context, uint rd, uint rm, bool u, uint op, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = InstEmitNeonShift.GetShiftRight(imm6, size);
if (u && op == 0)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrunV);
}
else if (!u && op == 1)
{
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.SqshrnV);
}
else
{
Debug.Assert(u && op == 1); // !u && op == 0 is the encoding for another instruction.
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.UqshrnV);
}
}
public static void Vqsub(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rn,
rm,
size,
q,
u ? context.Arm64Assembler.UqsubV : context.Arm64Assembler.SqsubV,
u ? context.Arm64Assembler.UqsubS : context.Arm64Assembler.SqsubS);
}
}
}

View File

@ -0,0 +1,123 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonShift
{
public static void Vshll(CodeGenContext context, uint rd, uint rm, uint imm6, bool u)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6);
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift, size, isShl: true, u ? context.Arm64Assembler.Ushll : context.Arm64Assembler.Sshll);
}
public static void Vshll2(CodeGenContext context, uint rd, uint rm, uint size)
{
// Shift can't be encoded, so shift by value - 1 first, then first again by 1.
// Doesn't matter if we do a signed or unsigned shift in this case since all sign bits will be shifted out.
uint shift = 8u << (int)size;
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rm, shift - 1, size, isShl: true, context.Arm64Assembler.Sshll);
InstEmitNeonCommon.EmitVectorBinaryLongShift(context, rd, rd, 1, size, isShl: true, context.Arm64Assembler.Sshll);
}
public static void VshlI(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: true, context.Arm64Assembler.ShlV, context.Arm64Assembler.ShlS);
}
public static void VshlR(CodeGenContext context, uint rd, uint rn, uint rm, bool u, uint size, uint q)
{
InstEmitNeonCommon.EmitVectorBinary(
context,
rd,
rm,
rn,
size,
q,
u ? context.Arm64Assembler.UshlV : context.Arm64Assembler.SshlV,
u ? context.Arm64Assembler.UshlS : context.Arm64Assembler.SshlS);
}
public static void Vshr(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UshrV : context.Arm64Assembler.SshrV,
u ? context.Arm64Assembler.UshrS : context.Arm64Assembler.SshrS);
}
public static void Vshrn(CodeGenContext context, uint rd, uint rm, uint imm6)
{
uint size = InstEmitNeonCommon.GetSizeFromImm6(imm6);
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryNarrowShift(context, rd, rm, shift, size, isShl: false, context.Arm64Assembler.Shrn);
}
public static void Vsli(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftLeft(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(
context,
rd,
rm,
shift,
size,
q,
isShl: true,
context.Arm64Assembler.SliV,
context.Arm64Assembler.SliS);
}
public static void Vsra(CodeGenContext context, uint rd, uint rm, bool u, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorTernaryRdShift(
context,
rd,
rm,
shift,
size,
q,
isShl: false,
u ? context.Arm64Assembler.UsraV : context.Arm64Assembler.SsraV,
u ? context.Arm64Assembler.UsraS : context.Arm64Assembler.SsraS);
}
public static void Vsri(CodeGenContext context, uint rd, uint rm, uint l, uint imm6, uint q)
{
uint size = InstEmitNeonCommon.GetSizeFromImm7(imm6 | (l << 6));
uint shift = GetShiftRight(imm6, size);
InstEmitNeonCommon.EmitVectorBinaryShift(context, rd, rm, shift, size, q, isShl: false, context.Arm64Assembler.SriV, context.Arm64Assembler.SriS);
}
public static uint GetShiftLeft(uint imm6, uint size)
{
return size < 3 ? imm6 - (8u << (int)size) : imm6;
}
public static uint GetShiftRight(uint imm6, uint size)
{
return (size == 3 ? 64u : (16u << (int)size)) - imm6;
;
}
}
}

View File

@ -0,0 +1,77 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitNeonSystem
{
public static void Vmrs(CodeGenContext context, uint rt, uint reg)
{
if (context.ConsumeSkipNextInstruction())
{
// This case means that we managed to combine a VCMP and VMRS instruction,
// so we have nothing to do here as FCMP/FCMPE already set PSTATE.NZCV.
context.SetNzcvModified();
return;
}
if (reg == 1)
{
// FPSCR
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
if (rt == RegisterUtils.PcRegister)
{
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
context.Arm64Assembler.Lsr(fpsrRegister.Operand, fpsrRegister.Operand, InstEmitCommon.Const(28));
InstEmitCommon.RestoreNzcvFlags(context, fpsrRegister.Operand);
context.SetNzcvModified();
}
else
{
// FPSCR is a combination of the FPCR and FPSR registers.
// We also need to set the FPSR NZCV bits that no longer exist on AArch64.
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
context.Arm64Assembler.MrsFpsr(rtOperand);
context.Arm64Assembler.MrsFpcr(tempRegister.Operand);
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
context.Arm64Assembler.Bfc(tempRegister.Operand, 0, 28);
context.Arm64Assembler.Orr(rtOperand, rtOperand, tempRegister.Operand);
}
}
else
{
Operand rtOperand = InstEmitCommon.GetOutputGpr(context, rt);
context.Arm64Assembler.Mov(rtOperand, 0u);
}
}
public static void Vmsr(CodeGenContext context, uint rt, uint reg)
{
if (reg == 1)
{
// FPSCR
// TODO: Do not set bits related to features that are not supported (like FP16)?
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
context.Arm64Assembler.MsrFpcr(rtOperand);
context.Arm64Assembler.MsrFpsr(rtOperand);
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
}
}
}
}

View File

@ -0,0 +1,452 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitSaturate
{
public static void Qadd(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: true);
}
public static void Qadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
});
}
public static void Qasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qdadd(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: true);
}
public static void Qdsub(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: true, add: false);
}
public static void Qsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qsub(CodeGenContext context, uint rd, uint rn, uint rm)
{
EmitAddSubSaturate(context, rd, rn, rm, doubling: false, add: false);
}
public static void Qsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateRange(context, d, d, 16, unsigned: false, setQ: false);
});
}
public static void Qsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateRange(context, d, d, 8, unsigned: false, setQ: false);
});
}
public static void Ssat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
{
EmitSaturate(context, rd, imm + 1, rn, sh, shift, unsigned: false);
}
public static void Ssat16(CodeGenContext context, uint rd, uint imm, uint rn)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
{
EmitSaturateRange(context, d, n, imm + 1, unsigned: false);
});
}
public static void Uqadd16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqadd8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Add(d, n, m);
EmitSaturateUnsignedRange(context, d, 8);
});
}
public static void Uqasx(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Sub(d, n, m);
}
else
{
context.Arm64Assembler.Add(d, n, m);
}
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsax(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitUnsigned16BitXPair(context, rd, rn, rm, (d, n, m, e) =>
{
if (e == 0)
{
context.Arm64Assembler.Add(d, n, m);
}
else
{
context.Arm64Assembler.Sub(d, n, m);
}
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsub16(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 16);
});
}
public static void Uqsub8(CodeGenContext context, uint rd, uint rn, uint rm)
{
InstEmitCommon.EmitSigned8BitPair(context, rd, rn, rm, (d, n, m) =>
{
context.Arm64Assembler.Sub(d, n, m);
EmitSaturateUnsignedRange(context, d, 8);
});
}
public static void Usat(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift)
{
EmitSaturate(context, rd, imm, rn, sh, shift, unsigned: true);
}
public static void Usat16(CodeGenContext context, uint rd, uint imm, uint rn)
{
InstEmitCommon.EmitSigned16BitPair(context, rd, rn, (d, n) =>
{
EmitSaturateRange(context, d, n, imm, unsigned: true);
});
}
private static void EmitAddSubSaturate(CodeGenContext context, uint rd, uint rn, uint rm, bool doubling, bool add)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
Operand rmOperand = InstEmitCommon.GetInputGpr(context, rm);
using ScopedRegister tempN = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempM = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempN64 = new(OperandKind.Register, OperandType.I64, tempN.Operand.Value);
Operand tempM64 = new(OperandKind.Register, OperandType.I64, tempM.Operand.Value);
context.Arm64Assembler.Sxtw(tempN64, rnOperand);
context.Arm64Assembler.Sxtw(tempM64, rmOperand);
if (doubling)
{
context.Arm64Assembler.Lsl(tempN64, tempN64, InstEmitCommon.Const(1));
EmitSaturateLongToInt(context, tempN64, tempN64);
}
if (add)
{
context.Arm64Assembler.Add(tempN64, tempN64, tempM64);
}
else
{
context.Arm64Assembler.Sub(tempN64, tempN64, tempM64);
}
EmitSaturateLongToInt(context, rdOperand, tempN64);
}
private static void EmitSaturate(CodeGenContext context, uint rd, uint imm, uint rn, bool sh, uint shift, bool unsigned)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
if (sh && shift == 0)
{
shift = 31;
}
if (shift != 0)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (sh)
{
context.Arm64Assembler.Asr(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
}
else
{
context.Arm64Assembler.Lsl(tempRegister.Operand, rnOperand, InstEmitCommon.Const((int)shift));
}
EmitSaturateRange(context, rdOperand, tempRegister.Operand, imm, unsigned);
}
else
{
EmitSaturateRange(context, rdOperand, rnOperand, imm, unsigned);
}
}
private static void EmitSaturateRange(CodeGenContext context, Operand result, Operand value, uint saturateTo, bool unsigned, bool setQ = true)
{
Debug.Assert(saturateTo <= 32);
Debug.Assert(!unsigned || saturateTo < 32);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister tempValue = default;
bool resultValueOverlap = result.Value == value.Value;
if (!unsigned && saturateTo == 32)
{
// No saturation possible for this case.
if (!resultValueOverlap)
{
context.Arm64Assembler.Mov(result, value);
}
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Arm64Assembler.Mov(result, 0u);
return;
}
if (resultValueOverlap)
{
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.Mov(tempValue.Operand, value);
value = tempValue.Operand;
}
if (unsigned)
{
// Negative values always saturate (to zero).
// So we must always ignore the sign bit when masking, so that the truncated value will differ from the original one.
context.Arm64Assembler.And(result, value, InstEmitCommon.Const((int)(uint.MaxValue >> (32 - (int)saturateTo))));
}
else
{
context.Arm64Assembler.Sbfx(result, value, 0, (int)saturateTo);
}
context.Arm64Assembler.Sub(tempRegister.Operand, value, result);
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
// Saturate and set Q flag.
if (unsigned)
{
if (saturateTo == 31)
{
// Only saturation case possible when going from 32 bits signed to 32 or 31 bits unsigned
// is when the signed input is negative, as all positive values are representable on a 31 bits range.
context.Arm64Assembler.Mov(result, 0u);
}
else
{
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
context.Arm64Assembler.Mvn(result, result);
context.Arm64Assembler.Lsr(result, result, InstEmitCommon.Const(32 - (int)saturateTo));
}
}
else
{
if (saturateTo == 1)
{
context.Arm64Assembler.Asr(result, value, InstEmitCommon.Const(31));
}
else
{
context.Arm64Assembler.Mov(result, uint.MaxValue >> (33 - (int)saturateTo));
context.Arm64Assembler.Eor(result, result, value, ArmShiftType.Asr, 31);
}
}
if (setQ)
{
SetQFlag(context);
}
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
if (resultValueOverlap)
{
tempValue.Dispose();
}
}
private static void EmitSaturateUnsignedRange(CodeGenContext context, Operand value, uint saturateTo)
{
Debug.Assert(saturateTo <= 32);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
if (saturateTo == 32)
{
// No saturation possible for this case.
return;
}
else if (saturateTo == 0)
{
// Result is always zero if we saturate 0 bits.
context.Arm64Assembler.Mov(value, 0u);
return;
}
context.Arm64Assembler.Lsr(tempRegister.Operand, value, InstEmitCommon.Const(32 - (int)saturateTo));
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(tempRegister.Operand, 0);
// Saturate.
context.Arm64Assembler.Mov(value, uint.MaxValue >> (32 - (int)saturateTo));
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
}
private static void EmitSaturateLongToInt(CodeGenContext context, Operand result, Operand value)
{
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
ScopedRegister tempValue = default;
bool resultValueOverlap = result.Value == value.Value;
if (resultValueOverlap)
{
tempValue = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand tempValue64 = new(OperandKind.Register, OperandType.I64, tempValue.Operand.Value);
context.Arm64Assembler.Mov(tempValue64, value);
value = tempValue64;
}
Operand temp64 = new(OperandKind.Register, OperandType.I64, tempRegister.Operand.Value);
Operand result64 = new(OperandKind.Register, OperandType.I64, result.Value);
context.Arm64Assembler.Sxtw(result64, value);
context.Arm64Assembler.Sub(temp64, value, result64);
int branchIndex = context.CodeWriter.InstructionPointer;
// If the result is 0, the values are equal and we don't need saturation.
context.Arm64Assembler.Cbz(temp64, 0);
// Saturate and set Q flag.
context.Arm64Assembler.Mov(result, uint.MaxValue >> 1);
context.Arm64Assembler.Eor(result64, result64, value, ArmShiftType.Asr, 63);
SetQFlag(context);
int delta = context.CodeWriter.InstructionPointer - branchIndex;
context.CodeWriter.WriteInstructionAt(branchIndex, context.CodeWriter.ReadInstructionAt(branchIndex) | (uint)((delta & 0x7ffff) << 5));
context.Arm64Assembler.Mov(result, result); // Zero-extend.
if (resultValueOverlap)
{
tempValue.Dispose();
}
}
public static void SetQFlag(CodeGenContext context)
{
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
}
}

View File

@ -0,0 +1,648 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitSystem
{
private delegate void SoftwareInterruptHandler(ulong address, int imm);
private delegate ulong Get64();
private delegate bool GetBool();
private const int SpIndex = 31;
public static void Bkpt(CodeGenContext context, uint imm)
{
context.AddPendingBkpt(imm);
context.Arm64Assembler.B(0);
}
public static void Cps(CodeGenContext context, uint imod, uint m, uint a, uint i, uint f, uint mode)
{
// NOP in user mode.
}
public static void Dbg(CodeGenContext context, uint option)
{
// NOP in ARMv8.
}
public static void Hlt(CodeGenContext context, uint imm)
{
}
public static void Mcr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
switch (crn)
{
case 13: // Process and Thread Info.
if (crm == 0)
{
switch (opc2)
{
case 2:
context.Arm64Assembler.StrRiUn(rtOperand, ctx, NativeContextOffsets.TpidrEl0Offset);
return;
}
}
break;
}
}
public static void Mcrr(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crm)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
// We don't have any system register that needs to be modified using a 64-bit value.
}
public static void Mrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint crn, uint crm, uint opc2)
{
if (coproc != 15 || opc1 != 0)
{
Udf(context, encoding, 0);
return;
}
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
Operand rtOperand = InstEmitCommon.GetInputGpr(context, rt);
bool hasValue = false;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand dest = rt == RegisterUtils.PcRegister ? tempRegister.Operand : rtOperand;
switch (crn)
{
case 13: // Process and Thread Info.
if (crm == 0)
{
switch (opc2)
{
case 2:
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrEl0Offset);
hasValue = true;
break;
case 3:
context.Arm64Assembler.LdrRiUn(dest, ctx, NativeContextOffsets.TpidrroEl0Offset);
hasValue = true;
break;
}
}
break;
}
if (rt == RegisterUtils.PcRegister)
{
context.Arm64Assembler.MsrNzcv(dest);
context.SetNzcvModified();
}
else if (!hasValue)
{
context.Arm64Assembler.Mov(dest, 0u);
}
}
public static void Mrrc(CodeGenContext context, uint encoding, uint coproc, uint opc1, uint rt, uint rt2, uint crm)
{
if (coproc != 15)
{
Udf(context, encoding, 0);
return;
}
switch (crm)
{
case 14:
switch (opc1)
{
case 0:
context.AddPendingReadCntpct(rt, rt2);
context.Arm64Assembler.B(0);
return;
}
break;
}
// Unsupported system register.
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt), 0u);
context.Arm64Assembler.Mov(InstEmitCommon.GetOutputGpr(context, rt2), 0u);
}
public static void Mrs(CodeGenContext context, uint rd, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
if (r)
{
// Reads SPSR, unpredictable in user mode.
context.Arm64Assembler.Mov(rdOperand, 0u);
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
// Copy GE flags to destination register.
context.Arm64Assembler.Ubfx(rdOperand, tempRegister.Operand, 16, 4);
// Insert Q flag.
context.Arm64Assembler.And(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 27));
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
// Insert NZCV flags.
context.Arm64Assembler.MrsNzcv(tempRegister.Operand);
context.Arm64Assembler.Orr(rdOperand, rdOperand, tempRegister.Operand);
// All other flags can't be accessed in user mode or have "unknown" values.
}
}
public static void MrsBr(CodeGenContext context, uint rd, uint m1, bool r)
{
Operand rdOperand = InstEmitCommon.GetOutputGpr(context, rd);
// Reads banked register, unpredictable in user mode.
context.Arm64Assembler.Mov(rdOperand, 0u);
}
public static void MsrBr(CodeGenContext context, uint rn, uint m1, bool r)
{
// Writes banked register, unpredictable in user mode.
}
public static void MsrI(CodeGenContext context, uint imm, uint mask, bool r)
{
if (r)
{
// Writes SPSR, unpredictable in user mode.
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if ((mask & 2) != 0)
{
// Endian flag.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 9) & 1);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
}
if ((mask & 4) != 0)
{
// GE flags.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 16) & 0xf);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
}
if ((mask & 8) != 0)
{
// NZCVQ flags.
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 27) & 0x1f);
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
context.Arm64Assembler.Mov(tempRegister2.Operand, (imm >> 28) & 0xf);
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
context.SetNzcvModified();
}
}
}
public static void MsrR(CodeGenContext context, uint rn, uint mask, bool r)
{
Operand rnOperand = InstEmitCommon.GetInputGpr(context, rn);
if (r)
{
// Writes SPSR, unpredictable in user mode.
}
else
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister tempRegister2 = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if ((mask & 2) != 0)
{
// Endian flag.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(9));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 9, 1);
}
if ((mask & 4) != 0)
{
// GE flags.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(16));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 16, 4);
}
if ((mask & 8) != 0)
{
// NZCVQ flags.
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(27));
context.Arm64Assembler.Bfi(tempRegister.Operand, tempRegister2.Operand, 27, 5);
context.Arm64Assembler.Lsr(tempRegister2.Operand, rnOperand, InstEmitCommon.Const(28));
InstEmitCommon.RestoreNzcvFlags(context, tempRegister2.Operand);
context.SetNzcvModified();
}
}
}
public static void Setend(CodeGenContext context, bool e)
{
Operand ctx = Register(context.RegisterAllocator.FixedContextRegister);
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
context.Arm64Assembler.LdrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
if (e)
{
context.Arm64Assembler.Orr(tempRegister.Operand, tempRegister.Operand, InstEmitCommon.Const(1 << 9));
}
else
{
context.Arm64Assembler.Bfc(tempRegister.Operand, 9, 1);
}
context.Arm64Assembler.StrRiUn(tempRegister.Operand, ctx, NativeContextOffsets.FlagsBaseOffset);
}
public static void Svc(CodeGenContext context, uint imm)
{
context.AddPendingSvc(imm);
context.Arm64Assembler.B(0);
}
public static void Udf(CodeGenContext context, uint encoding, uint imm)
{
context.AddPendingUdf(encoding);
context.Arm64Assembler.B(0);
}
public static void PrivilegedInstruction(CodeGenContext context, uint encoding)
{
Udf(context, encoding, 0);
}
private static IntPtr GetBkptHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
}
private static IntPtr GetSvcHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
}
private static IntPtr GetUdfHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
}
private static IntPtr GetCntpctEl0Ptr()
{
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
}
private static IntPtr CheckSynchronizationPtr()
{
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
}
public static bool NeedsCall(InstName name)
{
// All instructions that might do a host call should be included here.
// That is required to reserve space on the stack for caller saved registers.
switch (name)
{
case InstName.Mcr:
case InstName.Mrc:
case InstName.Mrrc:
case InstName.Svc:
case InstName.Udf:
return true;
}
return false;
}
public static void WriteBkpt(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetBkptHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteSvc(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint svcId)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, svcId);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteUdf(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset, uint pc, uint imm)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), skipContext: true, spillBaseOffset, null, pc, imm);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: true, spillBaseOffset);
}
public static void WriteReadCntpct(CodeWriter writer, RegisterAllocator regAlloc, int spillBaseOffset, int rt, int rt2)
{
Assembler asm = new(writer);
uint resultMask = (1u << rt) | (1u << rt2);
int tempRegister = 0;
while ((resultMask & (1u << tempRegister)) != 0 && tempRegister < 32)
{
tempRegister++;
}
Debug.Assert(tempRegister < 32);
WriteSpill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)GetCntpctEl0Ptr());
asm.Blr(rn);
if (rt != rt2)
{
asm.Lsr(Register(rt2), Register(0), InstEmitCommon.Const(32));
}
asm.Mov(Register(rt, OperandType.I32), Register(0, OperandType.I32)); // Zero-extend.
WriteFill(ref asm, regAlloc, resultMask, skipContext: false, spillBaseOffset, tempRegister);
}
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
Assembler asm = new(writer);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, skipContext: false, spillBaseOffset);
}
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, bool skipContext, int spillBaseOffset)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
int branchIndex = writer.InstructionPointer;
asm.Cbnz(rt, 0);
WriteSpill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister == 0 ? 1 : 0);
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
asm.Blr(rn);
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
WriteFill(ref asm, regAlloc, 1u << tempRegister, skipContext, spillBaseOffset, tempRegister);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
uint branchInst = writer.ReadInstructionAt(branchIndex);
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void WriteCall(
ref Assembler asm,
RegisterAllocator regAlloc,
IntPtr funcPtr,
bool skipContext,
int spillBaseOffset,
int? resultRegister,
params ulong[] callArgs)
{
uint resultMask = 0u;
if (resultRegister.HasValue)
{
resultMask = 1u << resultRegister.Value;
}
int tempRegister = callArgs.Length;
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
{
tempRegister++;
}
WriteSpill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
// We only support up to 7 arguments right now.
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
// We need at least one register to put the function address on, so that reduces the number of
// registers we can use for that by one.
Debug.Assert(callArgs.Length < 8);
for (int index = 0; index < callArgs.Length; index++)
{
asm.Mov(Register(index), callArgs[index]);
}
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)funcPtr);
asm.Blr(rn);
if (resultRegister.HasValue && resultRegister.Value != 0)
{
asm.Mov(Register(resultRegister.Value), Register(0));
}
WriteFill(ref asm, regAlloc, resultMask, skipContext, spillBaseOffset, tempRegister);
}
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: true);
}
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, bool skipContext, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, skipContext, exceptMask, spillOffset, tempRegister, spill: false);
}
private static void WriteSpillOrFill(
ref Assembler asm,
RegisterAllocator regAlloc,
bool skipContext,
uint exceptMask,
int spillOffset,
int tempRegister,
bool spill)
{
uint gprMask = regAlloc.UsedGprsMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
if (skipContext)
{
gprMask &= ~Compiler.UsableGprsMask;
}
if (!spill)
{
// We must reload the status register before reloading the GPRs,
// since we might otherwise trash one of them by using it as temp register.
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
asm.MsrNzcv(rt);
}
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg), Register(reg + 1), Register(SpIndex), spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg), Register(SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
if (spill)
{
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, Register(SpIndex), spillOffset);
}
spillOffset += 8;
if ((spillOffset & 8) != 0)
{
spillOffset += 8;
}
uint fpSimdMask = regAlloc.UsedFpSimdMask;
if (skipContext)
{
fpSimdMask &= ~Compiler.UsableFpSimdMask;
}
while (fpSimdMask != 0)
{
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
}
else
{
asm.LdpRiUn(Register(reg, OperandType.V128), Register(reg + 1, OperandType.V128), Register(SpIndex), spillOffset);
}
fpSimdMask &= ~(3u << reg);
spillOffset += 32;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg, OperandType.V128), Register(SpIndex), spillOffset);
}
fpSimdMask &= ~(1u << reg);
spillOffset += 16;
}
}
}
public static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@ -0,0 +1,95 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpArithmetic
{
public static void VabsF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FabsFloat);
}
public static void VaddF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FaddFloat);
}
public static void VdivF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FdivFloat);
}
public static void VfmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmaddFloat);
}
public static void VfmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FmsubFloat);
}
public static void VfnmaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmaddFloat);
}
public static void VfnmsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryRdF(context, rd, rn, rm, size, context.Arm64Assembler.FnmsubFloat);
}
public static void Vmaxnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmaxnmFloat);
}
public static void Vminnm(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FminnmFloat);
}
public static void VmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: false);
}
public static void VmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: false, negProduct: true);
}
public static void VmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FmulFloat);
}
public static void VnegF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FnegFloat);
}
public static void VnmlaF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: true);
}
public static void VnmlsF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarTernaryMulNegRdF(context, rd, rn, rm, size, negD: true, negProduct: false);
}
public static void VnmulF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FnmulFloat);
}
public static void VsqrtF(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FsqrtFloat);
}
public static void VsubF(CodeGenContext context, uint rd, uint rn, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarBinaryF(context, rd, rn, rm, size, context.Arm64Assembler.FsubFloat);
}
}
}

View File

@ -0,0 +1,133 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpCompare
{
public static void VcmpI(CodeGenContext context, uint cond, uint rd, uint size)
{
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: false);
}
public static void VcmpR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
{
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: false);
}
public static void VcmpeI(CodeGenContext context, uint cond, uint rd, uint size)
{
EmitVcmpVcmpe(context, cond, rd, 0, size, zero: true, e: true);
}
public static void VcmpeR(CodeGenContext context, uint cond, uint rd, uint rm, uint size)
{
EmitVcmpVcmpe(context, cond, rd, rm, size, zero: false, e: true);
}
private static void EmitVcmpVcmpe(CodeGenContext context, uint cond, uint rd, uint rm, uint size, bool zero, bool e)
{
Debug.Assert(size == 1 || size == 2 || size == 3);
bool singleRegs = size != 3;
uint ftype = size ^ 2u;
uint opc = zero ? 1u : 0u;
using ScopedRegister rdReg = InstEmitNeonCommon.MoveScalarToSide(context, rd, singleRegs);
ScopedRegister rmReg;
Operand rmOrZero;
if (zero)
{
rmReg = default;
rmOrZero = new Operand(0, RegisterType.Vector, OperandType.V128);
}
else
{
rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
rmOrZero = rmReg.Operand;
}
using ScopedRegister oldFlags = context.RegisterAllocator.AllocateTempGprRegisterScoped();
bool canPeepholeOptimize = CanFuseVcmpVmrs(context, cond);
if (!canPeepholeOptimize)
{
InstEmitCommon.GetCurrentFlags(context, oldFlags.Operand);
}
if (e)
{
context.Arm64Assembler.FcmpeFloat(rdReg.Operand, rmOrZero, opc, ftype);
}
else
{
context.Arm64Assembler.FcmpFloat(rdReg.Operand, rmOrZero, opc, ftype);
}
// Save result flags from the FCMP operation on FPSCR register, then restore the old flags if needed.
WriteUpdateFpsrNzcv(context);
if (!canPeepholeOptimize)
{
InstEmitCommon.RestoreNzcvFlags(context, oldFlags.Operand);
}
if (!zero)
{
rmReg.Dispose();
}
}
private static void WriteUpdateFpsrNzcv(CodeGenContext context)
{
using ScopedRegister fpsrRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
using ScopedRegister flagsRegister = context.RegisterAllocator.AllocateTempGprRegisterScoped();
Operand ctx = InstEmitSystem.Register(context.RegisterAllocator.FixedContextRegister);
context.Arm64Assembler.LdrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
InstEmitCommon.GetCurrentFlags(context, flagsRegister.Operand);
context.Arm64Assembler.Bfi(fpsrRegister.Operand, flagsRegister.Operand, 28, 4);
context.Arm64Assembler.StrRiUn(fpsrRegister.Operand, ctx, NativeContextOffsets.FpFlagsBaseOffset);
}
private static bool CanFuseVcmpVmrs(CodeGenContext context, uint vcmpCond)
{
// Conditions might be different for the VCMP and VMRS instructions if they are inside a IT block,
// we don't bother to check right now, so just always skip if inside an IT block.
if (context.InITBlock)
{
return false;
}
InstInfo nextInfo = context.PeekNextInstruction();
// We're looking for a VMRS instructions.
if (nextInfo.Name != InstName.Vmrs)
{
return false;
}
// Conditions must match.
if (vcmpCond != (nextInfo.Encoding >> 28))
{
return false;
}
// Reg must be 1, Rt must be PC indicating VMRS to PSTATE.NZCV.
if (((nextInfo.Encoding >> 16) & 0xf) != 1 || ((nextInfo.Encoding >> 12) & 0xf) != RegisterUtils.PcRegister)
{
return false;
}
context.SetSkipNextInstruction();
return true;
}
}
}

View File

@ -0,0 +1,305 @@
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpConvert
{
public static void Vcvta(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtasFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtauFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtasS, context.Arm64Assembler.FcvtasSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtauS, context.Arm64Assembler.FcvtauSH);
}
}
public static void Vcvtb(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
{
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: false);
}
public static void Vcvtm(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtmuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmsS, context.Arm64Assembler.FcvtmsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtmuS, context.Arm64Assembler.FcvtmuSH);
}
}
public static void Vcvtn(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtnuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnsS, context.Arm64Assembler.FcvtnsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtnuS, context.Arm64Assembler.FcvtnuSH);
}
}
public static void Vcvtp(CodeGenContext context, uint rd, uint rm, bool op, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (op)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpsFloat);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtpuFloat);
}
}
else if (op)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpsS, context.Arm64Assembler.FcvtpsSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtpuS, context.Arm64Assembler.FcvtpuSH);
}
}
public static void VcvtDs(CodeGenContext context, uint rd, uint rm, uint size)
{
bool doubleToSingle = size == 3;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (doubleToSingle)
{
// Double to single.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, false);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 0, 1);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, true);
}
else
{
// Single to double.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, true);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 1, 0);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, false);
}
}
public static void VcvtIv(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
{
if (size == 3)
{
// F64 -> S32/U32 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzuFloatInt);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryToGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.FcvtzsFloatInt);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzuIntS, context.Arm64Assembler.FcvtzuIntSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FcvtzsIntS, context.Arm64Assembler.FcvtzsIntSH);
}
}
}
public static void VcvtVi(CodeGenContext context, uint rd, uint rm, bool unsigned, uint size)
{
if (size == 3)
{
// S32/U32 -> F64 conversion on SIMD is not supported, so we convert it to a GPR, then insert it back into the SIMD register.
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.UcvtfFloatInt);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFromGprTempF(context, rd, rm, size, 0, context.Arm64Assembler.ScvtfFloatInt);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.UcvtfIntS, context.Arm64Assembler.UcvtfIntSH);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.ScvtfIntS, context.Arm64Assembler.ScvtfIntSH);
}
}
}
public static void VcvtXv(CodeGenContext context, uint rd, uint imm5, bool sx, uint sf, uint op, bool u)
{
Debug.Assert(op >> 1 == 0);
bool unsigned = u;
bool toFixed = op == 1;
uint size = sf;
uint fbits = Math.Clamp((sx ? 32u : 16u) - imm5, 1, 8u << (int)size);
if (toFixed)
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzuFixS);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: false, context.Arm64Assembler.FcvtzsFixS);
}
}
else
{
if (unsigned)
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.UcvtfFixS);
}
else
{
InstEmitNeonCommon.EmitScalarUnaryFixedF(context, rd, rd, fbits, size, is16Bit: !sx, context.Arm64Assembler.ScvtfFixS);
}
}
}
public static void VcvtrIv(CodeGenContext context, uint rd, uint rm, uint op, uint size)
{
bool unsigned = (op & 1) == 0;
Debug.Assert(size == 1 || size == 2 || size == 3);
bool singleRegs = size != 3;
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rmReg);
// Round using the FPCR rounding mode first, since the FCVTZ instructions will use the round to zero mode.
context.Arm64Assembler.FrintiFloat(tempRegister.Operand, rmReg.Operand, size ^ 2u);
if (unsigned)
{
if (size == 1)
{
context.Arm64Assembler.FcvtzuIntSH(tempRegister.Operand, tempRegister.Operand);
}
else
{
context.Arm64Assembler.FcvtzuIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
}
}
else
{
if (size == 1)
{
context.Arm64Assembler.FcvtzsIntSH(tempRegister.Operand, tempRegister.Operand);
}
else
{
context.Arm64Assembler.FcvtzsIntS(tempRegister.Operand, tempRegister.Operand, size & 1);
}
}
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
public static void Vcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op)
{
EmitVcvtbVcvtt(context, rd, rm, sz, op, top: true);
}
public static void EmitVcvtbVcvtt(CodeGenContext context, uint rd, uint rm, uint sz, uint op, bool top)
{
bool usesDouble = sz == 1;
bool convertFromHalf = op == 0;
using ScopedRegister tempRegister = context.RegisterAllocator.AllocateTempSimdRegisterScoped();
if (convertFromHalf)
{
// Half to single/double.
using ScopedRegister rmReg = InstEmitNeonCommon.Move16BitScalarToSide(context, rm, top);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, usesDouble ? 1u : 0u, 3u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, !usesDouble);
}
else
{
// Single/double to half.
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, !usesDouble);
context.Arm64Assembler.FcvtFloat(tempRegister.Operand, rmReg.Operand, 3u, usesDouble ? 1u : 0u);
InstEmitNeonCommon.Insert16BitResult(context, tempRegister.Operand, rd, top);
}
}
}
}

View File

@ -0,0 +1,22 @@
using Ryujinx.Cpu.LightningJit.CodeGen;
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpMove
{
public static void Vsel(CodeGenContext context, uint rd, uint rn, uint rm, uint cc, uint size)
{
bool singleRegs = size != 3;
uint cond = (cc << 2) | ((cc & 2) ^ ((cc << 1) & 2));
using ScopedRegister rnReg = InstEmitNeonCommon.MoveScalarToSide(context, rn, singleRegs);
using ScopedRegister rmReg = InstEmitNeonCommon.MoveScalarToSide(context, rm, singleRegs);
using ScopedRegister tempRegister = InstEmitNeonCommon.PickSimdRegister(context.RegisterAllocator, rnReg, rmReg);
context.Arm64Assembler.FcselFloat(tempRegister.Operand, rnReg.Operand, cond, rmReg.Operand, size ^ 2u);
InstEmitNeonCommon.InsertResult(context, tempRegister.Operand, rd, singleRegs);
}
}
}

View File

@ -0,0 +1,40 @@
namespace Ryujinx.Cpu.LightningJit.Arm32.Target.Arm64
{
static class InstEmitVfpRound
{
public static void Vrinta(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintaFloat);
}
public static void Vrintm(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintmFloat);
}
public static void Vrintn(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintnFloat);
}
public static void Vrintp(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintpFloat);
}
public static void Vrintr(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintiFloat);
}
public static void Vrintx(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintxFloat);
}
public static void Vrintz(CodeGenContext context, uint rd, uint rm, uint size)
{
InstEmitNeonCommon.EmitScalarUnaryF(context, rd, rm, size, context.Arm64Assembler.FrintzFloat);
}
}
}

View File

@ -0,0 +1,29 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class A64Compiler
{
public static CompiledFunction Compile(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
Architecture targetArch)
{
if (targetArch == Architecture.Arm64)
{
return Compiler.Compile(cpuPreset, memoryManager, address, funcTable, dispatchStubPtr);
}
else
{
throw new PlatformNotSupportedException();
}
}
}
}

View File

@ -0,0 +1,138 @@
using Ryujinx.Cpu.LightningJit.Graph;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class Block : IBlock
{
public int Index { get; private set; }
private readonly List<Block> _predecessors;
private readonly List<Block> _successors;
public int PredecessorsCount => _predecessors.Count;
public int SuccessorsCount => _successors.Count;
public readonly ulong Address;
public readonly ulong EndAddress;
public readonly List<InstInfo> Instructions;
public readonly bool EndsWithBranch;
public readonly bool IsTruncated;
public readonly bool IsLoopEnd;
public Block(ulong address, ulong endAddress, List<InstInfo> instructions, bool endsWithBranch, bool isTruncated, bool isLoopEnd)
{
Debug.Assert((int)((endAddress - address) / 4) == instructions.Count);
_predecessors = new();
_successors = new();
Address = address;
EndAddress = endAddress;
Instructions = instructions;
EndsWithBranch = endsWithBranch;
IsTruncated = isTruncated;
IsLoopEnd = isLoopEnd;
}
public (Block, Block) SplitAtAddress(ulong address)
{
int splitIndex = (int)((address - Address) / 4);
int splitCount = Instructions.Count - splitIndex;
// Technically those are valid, but we don't want to create empty blocks.
Debug.Assert(splitIndex != 0);
Debug.Assert(splitCount != 0);
Block leftBlock = new(
Address,
address,
Instructions.GetRange(0, splitIndex),
false,
false,
false);
Block rightBlock = new(
address,
EndAddress,
Instructions.GetRange(splitIndex, splitCount),
EndsWithBranch,
IsTruncated,
IsLoopEnd);
return (leftBlock, rightBlock);
}
public void Number(int index)
{
Index = index;
}
public void AddSuccessor(Block block)
{
if (!_successors.Contains(block))
{
_successors.Add(block);
}
}
public void AddPredecessor(Block block)
{
if (!_predecessors.Contains(block))
{
_predecessors.Add(block);
}
}
public IBlock GetSuccessor(int index)
{
return _successors[index];
}
public IBlock GetPredecessor(int index)
{
return _predecessors[index];
}
public RegisterUse ComputeUseMasks()
{
if (Instructions.Count == 0)
{
return new(0u, 0u, 0u, 0u, 0u, 0u);
}
RegisterUse use = Instructions[0].RegisterUse;
for (int index = 1; index < Instructions.Count; index++)
{
RegisterUse currentUse = Instructions[index].RegisterUse;
use = new(use.Read | (currentUse.Read & ~use.Write), use.Write | currentUse.Write);
}
return use;
}
public bool EndsWithContextLoad()
{
return !IsTruncated && EndsWithContextStoreAndLoad();
}
public bool EndsWithContextStore()
{
return EndsWithContextStoreAndLoad();
}
private bool EndsWithContextStoreAndLoad()
{
if (Instructions.Count == 0)
{
return false;
}
InstName lastInstructionName = Instructions[^1].Name;
return lastInstructionName.IsCall() || lastInstructionName.IsException();
}
}
}

View File

@ -0,0 +1,20 @@
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class ImmUtils
{
public static int ExtractSImm14Times4(uint encoding)
{
return ((int)(encoding >> 5) << 18) >> 16;
}
public static int ExtractSImm19Times4(uint encoding)
{
return ((int)(encoding >> 5) << 13) >> 11;
}
public static int ExtractSImm26Times4(uint encoding)
{
return (int)(encoding << 6) >> 4;
}
}
}

View File

@ -0,0 +1,108 @@
using System;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
[Flags]
enum InstFlags
{
None = 0,
Rd = 1 << 0,
RdSP = Rd | (1 << 1),
ReadRd = 1 << 2,
Rt = 1 << 3,
RtSeq = Rt | (1 << 4),
ReadRt = 1 << 5,
Rt2 = 1 << 6,
Rn = 1 << 7,
RnSeq = Rn | (1 << 8),
RnSP = Rn | (1 << 9),
Rm = 1 << 10,
Rs = 1 << 11,
Ra = 1 << 12,
Nzcv = 1 << 13,
C = 1 << 14,
S = 1 << 15,
Qc = 1 << 16,
FpSimd = 1 << 17,
FpSimdFromGpr = FpSimd | (1 << 18),
FpSimdToGpr = FpSimd | (1 << 19),
FpSimdFromToGpr = FpSimdFromGpr | FpSimdToGpr,
Memory = 1 << 20,
MemWBack = 1 << 21,
RdFpSimd = Rd | FpSimd,
RdReadRd = Rd | ReadRd,
RdReadRdRn = Rd | ReadRd | Rn,
RdReadRdRnFpSimd = Rd | ReadRd | Rn | FpSimd,
RdReadRdRnFpSimdFromGpr = Rd | ReadRd | Rn | FpSimdFromGpr,
RdReadRdRnQcFpSimd = Rd | ReadRd | Rn | Qc | FpSimd,
RdReadRdRnRmFpSimd = Rd | ReadRd | Rn | Rm | FpSimd,
RdReadRdRnRmQcFpSimd = Rd | ReadRd | Rn | Rm | Qc | FpSimd,
RdRn = Rd | Rn,
RdRnFpSimd = Rd | Rn | FpSimd,
RdRnFpSimdFromGpr = Rd | Rn | FpSimdFromGpr,
RdRnFpSimdToGpr = Rd | Rn | FpSimdToGpr,
RdRnQcFpSimd = Rd | Rn | Qc | FpSimd,
RdRnRm = Rd | Rn | Rm,
RdRnRmC = Rd | Rn | Rm | C,
RdRnRmCS = Rd | Rn | Rm | C | S,
RdRnRmFpSimd = Rd | Rn | Rm | FpSimd,
RdRnRmNzcv = Rd | Rn | Rm | Nzcv,
RdRnRmNzcvFpSimd = Rd | Rn | Rm | Nzcv | FpSimd,
RdRnRmQcFpSimd = Rd | Rn | Rm | Qc | FpSimd,
RdRnRmRa = Rd | Rn | Rm | Ra,
RdRnRmRaFpSimd = Rd | Rn | Rm | Ra | FpSimd,
RdRnRmS = Rd | Rn | Rm | S,
RdRnRsS = Rd | Rn | Rs | S,
RdRnS = Rd | Rn | S,
RdRnSeqRmFpSimd = Rd | RnSeq | Rm | FpSimd,
RdRnSFpSimd = Rd | Rn | S | FpSimd,
RdRnSFpSimdFromToGpr = Rd | Rn | S | FpSimdFromToGpr,
RdRnSP = Rd | RnSP,
RdRnSPRmS = Rd | RnSP | Rm | S,
RdRnSPS = Rd | RnSP | S,
RdSPRn = RdSP | Rn,
RdSPRnSP = RdSP | RnSP,
RdSPRnSPRm = RdSP | RnSP | Rm,
RnC = Rn | C,
RnNzcvS = Rn | Nzcv | S,
RnRm = Rn | Rm,
RnRmNzcvS = Rn | Rm | Nzcv | S,
RnRmNzcvSFpSimd = Rn | Rm | Nzcv | S | FpSimd,
RnRmSFpSimd = Rn | Rm | S | FpSimd,
RnSPRm = RnSP | Rm,
RtFpSimd = Rt | FpSimd,
RtReadRt = Rt | ReadRt,
RtReadRtRnSP = Rt | ReadRt | RnSP,
RtReadRtRnSPFpSimd = Rt | ReadRt | RnSP | FpSimd,
RtReadRtRnSPFpSimdMemWBack = Rt | ReadRt | RnSP | FpSimd | MemWBack,
RtReadRtRnSPMemWBack = Rt | ReadRt | RnSP | MemWBack,
RtReadRtRnSPRm = Rt | ReadRt | RnSP | Rm,
RtReadRtRnSPRmFpSimd = Rt | ReadRt | RnSP | Rm | FpSimd,
RtReadRtRnSPRmFpSimdMemWBack = Rt | ReadRt | RnSP | Rm | FpSimd | MemWBack,
RtReadRtRnSPRs = Rt | ReadRt | RnSP | Rs,
RtReadRtRnSPRsS = Rt | ReadRt | RnSP | Rs | S,
RtReadRtRt2RnSP = Rt | ReadRt | Rt2 | RnSP,
RtReadRtRt2RnSPFpSimd = Rt | ReadRt | Rt2 | RnSP | FpSimd,
RtReadRtRt2RnSPFpSimdMemWBack = Rt | ReadRt | Rt2 | RnSP | FpSimd | MemWBack,
RtReadRtRt2RnSPMemWBack = Rt | ReadRt | Rt2 | RnSP | MemWBack,
RtReadRtRt2RnSPRs = Rt | ReadRt | Rt2 | RnSP | Rs,
RtReadRtRt2RnSPS = Rt | ReadRt | Rt2 | RnSP | S,
RtRnSP = Rt | RnSP,
RtRnSPFpSimd = Rt | RnSP | FpSimd,
RtRnSPFpSimdMemWBack = Rt | RnSP | FpSimd | MemWBack,
RtRnSPMemWBack = Rt | RnSP | MemWBack,
RtRnSPRm = Rt | RnSP | Rm,
RtRnSPRmFpSimd = Rt | RnSP | Rm | FpSimd,
RtRnSPRmFpSimdMemWBack = Rt | RnSP | Rm | FpSimd | MemWBack,
RtRnSPRs = Rt | RnSP | Rs,
RtRt2RnSP = Rt | Rt2 | RnSP,
RtRt2RnSPFpSimd = Rt | Rt2 | RnSP | FpSimd,
RtRt2RnSPFpSimdMemWBack = Rt | Rt2 | RnSP | FpSimd | MemWBack,
RtRt2RnSPMemWBack = Rt | Rt2 | RnSP | MemWBack,
RtSeqReadRtRnSPFpSimd = RtSeq | ReadRt | RnSP | FpSimd,
RtSeqReadRtRnSPRmFpSimdMemWBack = RtSeq | ReadRt | RnSP | Rm | FpSimd | MemWBack,
RtSeqRnSPFpSimd = RtSeq | RnSP | FpSimd,
RtSeqRnSPRmFpSimdMemWBack = RtSeq | RnSP | Rm | FpSimd | MemWBack,
}
}

View File

@ -0,0 +1,22 @@
using Ryujinx.Cpu.LightningJit.Graph;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
readonly struct InstInfo
{
public readonly uint Encoding;
public readonly InstName Name;
public readonly InstFlags Flags;
public readonly AddressForm AddressForm;
public readonly RegisterUse RegisterUse;
public InstInfo(uint encoding, InstName name, InstFlags flags, AddressForm addressForm, in RegisterUse registerUse)
{
Encoding = encoding;
Name = name;
Flags = flags;
AddressForm = addressForm;
RegisterUse = registerUse;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
using Ryujinx.Cpu.LightningJit.Graph;
using System;
using System.Collections.Generic;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class MultiBlock : IBlockList
{
public readonly List<Block> Blocks;
public readonly RegisterMask[] ReadMasks;
public readonly RegisterMask[] WriteMasks;
public readonly RegisterMask GlobalUseMask;
public readonly bool HasHostCall;
public readonly bool HasMemoryInstruction;
public readonly bool IsTruncated;
public int Count => Blocks.Count;
public IBlock this[int index] => Blocks[index];
public MultiBlock(List<Block> blocks, RegisterMask globalUseMask, bool hasHostCall, bool hasMemoryInstruction)
{
Blocks = blocks;
(ReadMasks, WriteMasks) = DataFlow.GetGlobalUses(this);
GlobalUseMask = globalUseMask;
HasHostCall = hasHostCall;
HasMemoryInstruction = hasMemoryInstruction;
IsTruncated = blocks[^1].IsTruncated;
}
public void PrintDebugInfo()
{
foreach (Block block in Blocks)
{
Console.WriteLine($"bb {block.Index}");
List<int> predList = new();
List<int> succList = new();
for (int index = 0; index < block.PredecessorsCount; index++)
{
predList.Add(block.GetPredecessor(index).Index);
}
for (int index = 0; index < block.SuccessorsCount; index++)
{
succList.Add(block.GetSuccessor(index).Index);
}
Console.WriteLine($" predecessors: {string.Join(' ', predList)}");
Console.WriteLine($" successors: {string.Join(' ', succList)}");
Console.WriteLine($" gpr read mask: 0x{ReadMasks[block.Index].GprMask:X} 0x{block.ComputeUseMasks().Read.GprMask:X}");
Console.WriteLine($" gpr write mask: 0x{WriteMasks[block.Index].GprMask:X}");
for (int index = 0; index < block.Instructions.Count; index++)
{
Console.WriteLine($" {index} 0x{block.Instructions[index].Encoding:X8} {block.Instructions[index].Name}");
}
}
}
}
}

View File

@ -0,0 +1,154 @@
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
class RegisterAllocator
{
public const int MaxTemps = 1;
public const int MaxTempsInclFixed = MaxTemps + 2;
private uint _gprMask;
private readonly uint _fpSimdMask;
private readonly uint _pStateMask;
private uint _tempGprsMask;
private readonly int[] _registerMap;
public int FixedContextRegister { get; }
public int FixedPageTableRegister { get; }
public uint AllGprMask => (_gprMask & ~RegisterUtils.ReservedRegsMask) | _tempGprsMask;
public uint AllFpSimdMask => _fpSimdMask;
public uint AllPStateMask => _pStateMask;
public RegisterAllocator(uint gprMask, uint fpSimdMask, uint pStateMask, bool hasHostCall)
{
_gprMask = gprMask;
_fpSimdMask = fpSimdMask;
_pStateMask = pStateMask;
if (hasHostCall)
{
// If the function has calls, we can avoid the need to spill those registers across
// calls by puting them on callee saved registers.
FixedContextRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
FixedPageTableRegister = AllocateAndMarkTempGprRegisterWithPreferencing();
}
else
{
FixedContextRegister = AllocateAndMarkTempGprRegister();
FixedPageTableRegister = AllocateAndMarkTempGprRegister();
}
_tempGprsMask = (1u << FixedContextRegister) | (1u << FixedPageTableRegister);
_registerMap = new int[32];
for (int index = 0; index < _registerMap.Length; index++)
{
_registerMap[index] = index;
}
BuildRegisterMap(_registerMap);
Span<int> tempRegisters = stackalloc int[MaxTemps];
for (int index = 0; index < tempRegisters.Length; index++)
{
tempRegisters[index] = AllocateAndMarkTempGprRegister();
}
for (int index = 0; index < tempRegisters.Length; index++)
{
FreeTempGprRegister(tempRegisters[index]);
}
}
private void BuildRegisterMap(Span<int> map)
{
uint mask = _gprMask & RegisterUtils.ReservedRegsMask;
while (mask != 0)
{
int index = BitOperations.TrailingZeroCount(mask);
int remapIndex = AllocateAndMarkTempGprRegister();
map[index] = remapIndex;
_tempGprsMask |= 1u << remapIndex;
mask &= ~(1u << index);
}
}
public int RemapReservedGprRegister(int index)
{
return _registerMap[index];
}
private int AllocateAndMarkTempGprRegister()
{
int index = AllocateTempGprRegister();
_tempGprsMask |= 1u << index;
return index;
}
private int AllocateAndMarkTempGprRegisterWithPreferencing()
{
int index = AllocateTempRegisterWithPreferencing();
_tempGprsMask |= 1u << index;
return index;
}
public int AllocateTempGprRegister()
{
return AllocateTempRegister(ref _gprMask);
}
public void FreeTempGprRegister(int index)
{
FreeTempRegister(ref _gprMask, index);
}
private int AllocateTempRegisterWithPreferencing()
{
int firstCalleeSaved = BitOperations.TrailingZeroCount(~_gprMask & AbiConstants.GprCalleeSavedRegsMask);
if (firstCalleeSaved < 32)
{
uint regMask = 1u << firstCalleeSaved;
if ((regMask & RegisterUtils.ReservedRegsMask) == 0)
{
_gprMask |= regMask;
return firstCalleeSaved;
}
}
return AllocateTempRegister(ref _gprMask);
}
private static int AllocateTempRegister(ref uint mask)
{
int index = BitOperations.TrailingZeroCount(~(mask | RegisterUtils.ReservedRegsMask));
if (index == sizeof(uint) * 8)
{
throw new InvalidOperationException("No free registers.");
}
mask |= 1u << index;
return index;
}
private static void FreeTempRegister(ref uint mask, int index)
{
mask &= ~(1u << index);
}
}
}

View File

@ -0,0 +1,495 @@
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64
{
static class RegisterUtils
{
private const int RdRtBit = 0;
private const int RnBit = 5;
private const int RmRsBit = 16;
private const int RaRt2Bit = 10;
// Some of those register have specific roles and can't be used as general purpose registers.
// X18 - Reserved for platform specific usage.
// X29 - Frame pointer.
// X30 - Return address.
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
public const uint ReservedRegsMask = (1u << 18) | (1u << 29) | (1u << 30) | (1u << 31);
public const int LrIndex = 30;
public const int SpIndex = 31;
public const int ZrIndex = 31;
public const int SpecialZrIndex = 32;
public static uint RemapRegisters(RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
if (flags.HasFlag(InstFlags.Rd) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpToGpr(flags, encoding)))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit, flags.HasFlag(InstFlags.RdSP));
}
if (flags.HasFlag(InstFlags.Rn) && (!flags.HasFlag(InstFlags.FpSimd) || IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory)))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RnBit, flags.HasFlag(InstFlags.RnSP));
}
if (!flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rm) || flags.HasFlag(InstFlags.Rs))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
}
if (flags.HasFlag(InstFlags.Ra) || flags.HasFlag(InstFlags.Rt2))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RaRt2Bit);
}
if (flags.HasFlag(InstFlags.Rt))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RdRtBit);
}
}
else if (flags.HasFlag(InstFlags.Rm) && flags.HasFlag(InstFlags.Memory))
{
encoding = ReplaceGprRegister(regAlloc, encoding, RmRsBit);
}
return encoding;
}
public static uint ReplaceRt(uint encoding, int newIndex)
{
return ReplaceRegister(encoding, newIndex, RdRtBit);
}
public static uint ReplaceRn(uint encoding, int newIndex)
{
return ReplaceRegister(encoding, newIndex, RnBit);
}
private static uint ReplaceRegister(uint encoding, int newIndex, int bit)
{
encoding &= ~(0x1fu << bit);
encoding |= (uint)newIndex << bit;
return encoding;
}
private static uint ReplaceGprRegister(RegisterAllocator regAlloc, uint encoding, int bit, bool hasSP = false)
{
int oldIndex = (int)(encoding >> bit) & 0x1f;
if (oldIndex == ZrIndex && !hasSP)
{
return encoding;
}
int newIndex = regAlloc.RemapReservedGprRegister(oldIndex);
encoding &= ~(0x1fu << bit);
encoding |= (uint)newIndex << bit;
return encoding;
}
public static (uint, uint) PopulateReadMasks(InstName name, InstFlags flags, uint encoding)
{
uint gprMask = 0;
uint fpSimdMask = 0;
if (flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
{
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
if (IsFpToGpr(flags, encoding))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rn))
{
uint mask = MaskFromIndex(ExtractRn(flags, encoding));
if (flags.HasFlag(InstFlags.RnSeq))
{
int count = GetRnSequenceCount(encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else if (IsFpFromGpr(flags, encoding) || flags.HasFlag(InstFlags.Memory))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rm))
{
uint mask = MaskFromIndex(ExtractRm(flags, encoding));
if (flags.HasFlag(InstFlags.Memory))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Ra))
{
fpSimdMask |= MaskFromIndex(ExtractRa(flags, encoding));
}
if (flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
if (flags.HasFlag(InstFlags.RtSeq))
{
int count = GetRtSequenceCount(name, encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rt2))
{
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
else
{
if (flags.HasFlag(InstFlags.Rd) && flags.HasFlag(InstFlags.ReadRd))
{
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rn))
{
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rm))
{
gprMask |= MaskFromIndex(ExtractRm(flags, encoding));
}
if (flags.HasFlag(InstFlags.Ra))
{
gprMask |= MaskFromIndex(ExtractRa(flags, encoding));
}
if (flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rt2))
{
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
return (gprMask, fpSimdMask);
}
public static (uint, uint) PopulateWriteMasks(InstName name, InstFlags flags, uint encoding)
{
uint gprMask = 0;
uint fpSimdMask = 0;
if (flags.HasFlag(InstFlags.MemWBack))
{
gprMask |= MaskFromIndex(ExtractRn(flags, encoding));
}
if (flags.HasFlag(InstFlags.FpSimd))
{
if (flags.HasFlag(InstFlags.Rd))
{
uint mask = MaskFromIndex(ExtractRd(flags, encoding));
if (IsFpToGpr(flags, encoding))
{
gprMask |= mask;
}
else
{
fpSimdMask |= mask;
}
}
if (!flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
uint mask = MaskFromIndex(ExtractRt(flags, encoding));
if (flags.HasFlag(InstFlags.RtSeq))
{
int count = GetRtSequenceCount(name, encoding);
for (int index = 0; index < count; index++, mask <<= 1)
{
fpSimdMask |= mask;
}
}
else
{
fpSimdMask |= mask;
}
}
if (flags.HasFlag(InstFlags.Rt2))
{
fpSimdMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
}
else
{
if (flags.HasFlag(InstFlags.Rd))
{
gprMask |= MaskFromIndex(ExtractRd(flags, encoding));
}
if (!flags.HasFlag(InstFlags.ReadRt))
{
if (flags.HasFlag(InstFlags.Rt))
{
gprMask |= MaskFromIndex(ExtractRt(flags, encoding));
}
if (flags.HasFlag(InstFlags.Rt2))
{
gprMask |= MaskFromIndex(ExtractRt2(flags, encoding));
}
}
if (flags.HasFlag(InstFlags.Rs))
{
gprMask |= MaskFromIndex(ExtractRs(flags, encoding));
}
}
return (gprMask, fpSimdMask);
}
private static uint MaskFromIndex(int index)
{
if (index < SpecialZrIndex)
{
return 1u << index;
}
return 0u;
}
private static bool IsFpFromGpr(InstFlags flags, uint encoding)
{
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
if ((flags & bothFlags) == bothFlags) // FMOV (general)
{
return (encoding & (1u << 16)) != 0;
}
return flags.HasFlag(InstFlags.FpSimdFromGpr);
}
private static bool IsFpToGpr(InstFlags flags, uint encoding)
{
InstFlags bothFlags = InstFlags.FpSimdFromGpr | InstFlags.FpSimdToGpr;
if ((flags & bothFlags) == bothFlags) // FMOV (general)
{
return (encoding & (1u << 16)) == 0;
}
return flags.HasFlag(InstFlags.FpSimdToGpr);
}
private static int GetRtSequenceCount(InstName name, uint encoding)
{
switch (name)
{
case InstName.Ld1AdvsimdMultAsNoPostIndex:
case InstName.Ld1AdvsimdMultAsPostIndex:
case InstName.St1AdvsimdMultAsNoPostIndex:
case InstName.St1AdvsimdMultAsPostIndex:
return ((encoding >> 12) & 0xf) switch
{
0b0000 => 4,
0b0010 => 4,
0b0100 => 3,
0b0110 => 3,
0b0111 => 1,
0b1000 => 2,
0b1010 => 2,
_ => 1,
};
case InstName.Ld1rAdvsimdAsNoPostIndex:
case InstName.Ld1rAdvsimdAsPostIndex:
case InstName.Ld1AdvsimdSnglAsNoPostIndex:
case InstName.Ld1AdvsimdSnglAsPostIndex:
case InstName.St1AdvsimdSnglAsNoPostIndex:
case InstName.St1AdvsimdSnglAsPostIndex:
return 1;
case InstName.Ld2rAdvsimdAsNoPostIndex:
case InstName.Ld2rAdvsimdAsPostIndex:
case InstName.Ld2AdvsimdMultAsNoPostIndex:
case InstName.Ld2AdvsimdMultAsPostIndex:
case InstName.Ld2AdvsimdSnglAsNoPostIndex:
case InstName.Ld2AdvsimdSnglAsPostIndex:
case InstName.St2AdvsimdMultAsNoPostIndex:
case InstName.St2AdvsimdMultAsPostIndex:
case InstName.St2AdvsimdSnglAsNoPostIndex:
case InstName.St2AdvsimdSnglAsPostIndex:
return 2;
case InstName.Ld3rAdvsimdAsNoPostIndex:
case InstName.Ld3rAdvsimdAsPostIndex:
case InstName.Ld3AdvsimdMultAsNoPostIndex:
case InstName.Ld3AdvsimdMultAsPostIndex:
case InstName.Ld3AdvsimdSnglAsNoPostIndex:
case InstName.Ld3AdvsimdSnglAsPostIndex:
case InstName.St3AdvsimdMultAsNoPostIndex:
case InstName.St3AdvsimdMultAsPostIndex:
case InstName.St3AdvsimdSnglAsNoPostIndex:
case InstName.St3AdvsimdSnglAsPostIndex:
return 3;
case InstName.Ld4rAdvsimdAsNoPostIndex:
case InstName.Ld4rAdvsimdAsPostIndex:
case InstName.Ld4AdvsimdMultAsNoPostIndex:
case InstName.Ld4AdvsimdMultAsPostIndex:
case InstName.Ld4AdvsimdSnglAsNoPostIndex:
case InstName.Ld4AdvsimdSnglAsPostIndex:
case InstName.St4AdvsimdMultAsNoPostIndex:
case InstName.St4AdvsimdMultAsPostIndex:
case InstName.St4AdvsimdSnglAsNoPostIndex:
case InstName.St4AdvsimdSnglAsPostIndex:
return 4;
}
return 1;
}
private static int GetRnSequenceCount(uint encoding)
{
return ((int)(encoding >> 13) & 3) + 1;
}
public static int ExtractRd(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rd));
int index = (int)(encoding >> RdRtBit) & 0x1f;
if (!flags.HasFlag(InstFlags.RdSP) && index == ZrIndex)
{
return SpecialZrIndex;
}
return index;
}
public static int ExtractRn(uint encoding)
{
return (int)(encoding >> RnBit) & 0x1f;
}
public static int ExtractRn(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rn));
int index = ExtractRn(encoding);
if (!flags.HasFlag(InstFlags.RnSP) && index == ZrIndex)
{
return SpecialZrIndex;
}
return index;
}
public static int ExtractRm(uint encoding)
{
return (int)(encoding >> RmRsBit) & 0x1f;
}
public static int ExtractRm(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rm));
int index = ExtractRm(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRs(uint encoding)
{
return (int)(encoding >> RmRsBit) & 0x1f;
}
public static int ExtractRs(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rs));
int index = ExtractRs(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRa(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Ra));
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRt(uint encoding)
{
return (int)(encoding >> RdRtBit) & 0x1f;
}
public static int ExtractRt(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rt));
int index = ExtractRt(encoding);
return index == ZrIndex ? SpecialZrIndex : index;
}
public static int ExtractRt2(InstFlags flags, uint encoding)
{
Debug.Assert(flags.HasFlag(InstFlags.Rt2));
int index = (int)(encoding >> RaRt2Bit) & 0x1f;
return index == ZrIndex ? SpecialZrIndex : index;
}
}
}

View File

@ -0,0 +1,743 @@
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using Ryujinx.Cpu.LightningJit.Graph;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class Compiler
{
private const int Encodable26BitsOffsetLimit = 0x2000000;
private readonly struct Context
{
public readonly CodeWriter Writer;
public readonly RegisterAllocator RegisterAllocator;
public readonly TailMerger TailMerger;
public readonly AddressTable<ulong> FuncTable;
public readonly IntPtr DispatchStubPointer;
private readonly MultiBlock _multiBlock;
private readonly RegisterSaveRestore _registerSaveRestore;
private readonly IntPtr _pageTablePointer;
public Context(
CodeWriter writer,
RegisterAllocator registerAllocator,
TailMerger tailMerger,
RegisterSaveRestore registerSaveRestore,
MultiBlock multiBlock,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPointer,
IntPtr pageTablePointer)
{
Writer = writer;
RegisterAllocator = registerAllocator;
TailMerger = tailMerger;
_registerSaveRestore = registerSaveRestore;
_multiBlock = multiBlock;
FuncTable = funcTable;
DispatchStubPointer = dispatchStubPointer;
_pageTablePointer = pageTablePointer;
}
public readonly int GetLrRegisterIndex()
{
return RemapGprRegister(RegisterUtils.LrIndex);
}
public readonly int RemapGprRegister(int index)
{
return RegisterAllocator.RemapReservedGprRegister(index);
}
public readonly int GetReservedStackOffset()
{
return _registerSaveRestore.GetReservedStackOffset();
}
public readonly void WritePrologue()
{
Assembler asm = new(Writer);
_registerSaveRestore.WritePrologue(ref asm);
// If needed, set up the fixed registers with the pointers we will use.
// First one is the context pointer (passed as first argument),
// second one is the page table or address space base, it is at a fixed memory location and considered constant.
if (RegisterAllocator.FixedContextRegister != 0)
{
asm.Mov(Register(RegisterAllocator.FixedContextRegister), Register(0));
}
if (_multiBlock.HasMemoryInstruction)
{
asm.Mov(Register(RegisterAllocator.FixedPageTableRegister), (ulong)_pageTablePointer);
}
// This assumes that the block with the index 0 is always the entry block.
LoadFromContext(ref asm, _multiBlock.ReadMasks[0]);
}
public readonly void WriteEpilogueWithoutContext()
{
Assembler asm = new(Writer);
_registerSaveRestore.WriteEpilogue(ref asm);
}
public void LoadFromContextAfterCall(int blockIndex)
{
Block block = _multiBlock.Blocks[blockIndex];
if (block.SuccessorsCount != 0)
{
Assembler asm = new(Writer);
RegisterMask readMask = _multiBlock.ReadMasks[block.GetSuccessor(0).Index];
for (int sIndex = 1; sIndex < block.SuccessorsCount; sIndex++)
{
IBlock successor = block.GetSuccessor(sIndex);
readMask |= _multiBlock.ReadMasks[successor.Index];
}
LoadFromContext(ref asm, readMask);
}
}
private void LoadFromContext(ref Assembler asm, RegisterMask readMask)
{
LoadGprFromContext(ref asm, readMask.GprMask, NativeContextOffsets.GprBaseOffset);
LoadFpSimdFromContext(ref asm, readMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
LoadPStateFromContext(ref asm, readMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
}
public void StoreToContextBeforeCall(int blockIndex, ulong? newLrValue = null)
{
Assembler asm = new(Writer);
StoreToContext(ref asm, _multiBlock.WriteMasks[blockIndex], newLrValue);
}
private void StoreToContext(ref Assembler asm, RegisterMask writeMask, ulong? newLrValue)
{
StoreGprToContext(ref asm, writeMask.GprMask, NativeContextOffsets.GprBaseOffset, newLrValue);
StoreFpSimdToContext(ref asm, writeMask.FpSimdMask, NativeContextOffsets.FpSimdBaseOffset);
StorePStateToContext(ref asm, writeMask.PStateMask, NativeContextOffsets.FlagsBaseOffset);
}
private void LoadGprFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.LdpRiUn(
Register(RegisterAllocator.RemapReservedGprRegister(reg)),
Register(RegisterAllocator.RemapReservedGprRegister(reg + 1)),
contextPtr,
offset);
}
else
{
mask &= ~(1u << reg);
asm.LdrRiUn(Register(RegisterAllocator.RemapReservedGprRegister(reg)), contextPtr, offset);
}
}
}
private void LoadFpSimdFromContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.LdrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void LoadPStateFromContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, contextPtr, baseOffset);
asm.MsrNzcv(rt);
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
private void StoreGprToContext(ref Assembler asm, uint mask, int baseOffset, ulong? newLrValue)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = -1;
if (newLrValue.HasValue)
{
// This is required for BLR X30 instructions, where we need to get the target address
// before it is overwritten with the return address that the call would write there.
tempRegister = RegisterAllocator.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), newLrValue.Value);
}
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 8;
if (reg < 31 && (mask & (2u << reg)) != 0 && offset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
mask &= ~(3u << reg);
asm.StpRiUn(
Register(RemapReservedGprRegister(reg, tempRegister)),
Register(RemapReservedGprRegister(reg + 1, tempRegister)),
contextPtr,
offset);
}
else
{
mask &= ~(1u << reg);
asm.StrRiUn(Register(RemapReservedGprRegister(reg, tempRegister)), contextPtr, offset);
}
}
if (tempRegister >= 0)
{
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
}
private int RemapReservedGprRegister(int index, int tempRegister)
{
if (tempRegister >= 0 && index == RegisterUtils.LrIndex)
{
return tempRegister;
}
return RegisterAllocator.RemapReservedGprRegister(index);
}
private void StoreFpSimdToContext(ref Assembler asm, uint mask, int baseOffset)
{
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
while (mask != 0)
{
int reg = BitOperations.TrailingZeroCount(mask);
int offset = baseOffset + reg * 16;
mask &= ~(1u << reg);
asm.StrRiUn(Register(reg, OperandType.V128), contextPtr, offset);
}
}
private void StorePStateToContext(ref Assembler asm, uint mask, int baseOffset)
{
if (mask == 0)
{
return;
}
Operand contextPtr = Register(RegisterAllocator.FixedContextRegister);
int tempRegister = RegisterAllocator.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, contextPtr, baseOffset);
RegisterAllocator.FreeTempGprRegister(tempRegister);
}
}
private readonly struct PendingBranch
{
public readonly int BlockIndex;
public readonly ulong Pc;
public readonly InstName Name;
public readonly uint Encoding;
public readonly int WriterPointer;
public PendingBranch(int blockIndex, ulong pc, InstName name, uint encoding, int writerPointer)
{
BlockIndex = blockIndex;
Pc = pc;
Name = name;
Encoding = encoding;
WriterPointer = writerPointer;
}
}
public static CompiledFunction Compile(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address, AddressTable<ulong> funcTable, IntPtr dispatchStubPtr)
{
MultiBlock multiBlock = Decoder.DecodeMulti(cpuPreset, memoryManager, address);
Dictionary<ulong, int> targets = new();
List<PendingBranch> pendingBranches = new();
uint gprUseMask = multiBlock.GlobalUseMask.GprMask;
uint fpSimdUseMask = multiBlock.GlobalUseMask.FpSimdMask;
uint pStateUseMask = multiBlock.GlobalUseMask.PStateMask;
CodeWriter writer = new();
RegisterAllocator regAlloc = new(gprUseMask, fpSimdUseMask, pStateUseMask, multiBlock.HasHostCall);
RegisterSaveRestore rsr = new(
regAlloc.AllGprMask & AbiConstants.GprCalleeSavedRegsMask,
regAlloc.AllFpSimdMask & AbiConstants.FpSimdCalleeSavedRegsMask,
OperandType.FP64,
multiBlock.HasHostCall,
multiBlock.HasHostCall ? CalculateStackSizeForCallSpill(regAlloc.AllGprMask, regAlloc.AllFpSimdMask, regAlloc.AllPStateMask) : 0);
TailMerger tailMerger = new();
Context context = new(writer, regAlloc, tailMerger, rsr, multiBlock, funcTable, dispatchStubPtr, memoryManager.PageTablePointer);
context.WritePrologue();
ulong pc = address;
for (int blockIndex = 0; blockIndex < multiBlock.Blocks.Count; blockIndex++)
{
Block block = multiBlock.Blocks[blockIndex];
Debug.Assert(block.Address == pc);
targets.Add(pc, writer.InstructionPointer);
int instCount = block.EndsWithBranch ? block.Instructions.Count - 1 : block.Instructions.Count;
for (int index = 0; index < instCount; index++)
{
InstInfo instInfo = block.Instructions[index];
uint encoding = RegisterUtils.RemapRegisters(regAlloc, instInfo.Flags, instInfo.Encoding);
if (instInfo.AddressForm != AddressForm.None)
{
InstEmitMemory.RewriteInstruction(
memoryManager.AddressSpaceBits,
memoryManager.Type,
writer,
regAlloc,
instInfo.Name,
instInfo.Flags,
instInfo.AddressForm,
pc,
encoding);
}
else if (instInfo.Name == InstName.Sys)
{
InstEmitMemory.RewriteSysInstruction(memoryManager.AddressSpaceBits, memoryManager.Type, writer, regAlloc, encoding);
}
else if (instInfo.Name.IsSystem())
{
bool needsContextStoreLoad = InstEmitSystem.NeedsContextStoreLoad(instInfo.Name);
if (needsContextStoreLoad)
{
context.StoreToContextBeforeCall(blockIndex);
}
InstEmitSystem.RewriteInstruction(writer, regAlloc, tailMerger, instInfo.Name, pc, encoding, rsr.GetReservedStackOffset());
if (needsContextStoreLoad)
{
context.LoadFromContextAfterCall(blockIndex);
}
}
else
{
writer.WriteInstruction(encoding);
}
pc += 4UL;
}
if (block.IsLoopEnd)
{
// If this is a loop, the code might run for a long time uninterrupted.
// We insert a "sync point" here to ensure the loop can be interrupted if needed.
InstEmitSystem.WriteSyncPoint(writer, context.RegisterAllocator, tailMerger, context.GetReservedStackOffset());
}
if (blockIndex < multiBlock.Blocks.Count - 1)
{
InstInfo lastInstructionInfo = block.Instructions[^1];
InstName lastInstructionName = lastInstructionInfo.Name;
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
if (lastInstructionName.IsCall())
{
context.StoreToContextBeforeCall(blockIndex, pc + 4UL);
InstEmitSystem.RewriteCallInstruction(
writer,
regAlloc,
tailMerger,
context.WriteEpilogueWithoutContext,
funcTable,
dispatchStubPtr,
lastInstructionName,
pc,
lastInstructionEncoding,
context.GetReservedStackOffset());
context.LoadFromContextAfterCall(blockIndex);
pc += 4UL;
}
else if (lastInstructionName == InstName.Ret)
{
RewriteBranchInstruction(context, blockIndex, lastInstructionName, pc, lastInstructionEncoding);
pc += 4UL;
}
else if (block.EndsWithBranch)
{
pendingBranches.Add(new(blockIndex, pc, lastInstructionName, lastInstructionEncoding, writer.InstructionPointer));
writer.WriteInstruction(0u); // Placeholder.
pc += 4UL;
}
}
}
int lastBlockIndex = multiBlock.Blocks[^1].Index;
if (multiBlock.IsTruncated)
{
Assembler asm = new(writer);
WriteTailCallConstant(context, ref asm, lastBlockIndex, pc);
}
else
{
InstInfo lastInstructionInfo = multiBlock.Blocks[^1].Instructions[^1];
InstName lastInstructionName = lastInstructionInfo.Name;
InstFlags lastInstructionFlags = lastInstructionInfo.Flags;
uint lastInstructionEncoding = lastInstructionInfo.Encoding;
lastInstructionEncoding = RegisterUtils.RemapRegisters(regAlloc, lastInstructionFlags, lastInstructionEncoding);
RewriteBranchInstruction(context, lastBlockIndex, lastInstructionName, pc, lastInstructionEncoding);
pc += 4;
}
foreach (PendingBranch pendingBranch in pendingBranches)
{
RewriteBranchInstructionWithTarget(
context,
pendingBranch.BlockIndex,
pendingBranch.Name,
pendingBranch.Pc,
pendingBranch.Encoding,
pendingBranch.WriterPointer,
targets);
}
tailMerger.WriteReturn(writer, context.WriteEpilogueWithoutContext);
return new(writer.AsByteSpan(), (int)(pc - address));
}
private static int CalculateStackSizeForCallSpill(uint gprUseMask, uint fpSimdUseMask, uint pStateUseMask)
{
// Note that we don't discard callee saved FP/SIMD register because only the lower 64 bits is callee saved,
// so if the function is using the full register, that won't be enough.
// We could do better, but it's likely not worth it since this case happens very rarely in practice.
return BitOperations.PopCount(gprUseMask & ~AbiConstants.GprCalleeSavedRegsMask) * 8 +
BitOperations.PopCount(fpSimdUseMask) * 16 +
(pStateUseMask != 0 ? 8 : 0);
}
private static void RewriteBranchInstruction(in Context context, int blockIndex, InstName name, ulong pc, uint encoding)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int originalOffset;
ulong nextAddress = pc + 4UL;
ulong targetAddress;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
case InstName.Bl:
case InstName.Blr:
case InstName.Br:
if (name == InstName.Bl)
{
asm.Mov(Register(context.GetLrRegisterIndex()), nextAddress);
int imm = ImmUtils.ExtractSImm26Times4(encoding);
WriteTailCallConstant(context, ref asm, blockIndex, pc + (ulong)imm);
}
else
{
bool isCall = name == InstName.Blr;
if (isCall)
{
context.StoreToContextBeforeCall(blockIndex, nextAddress);
}
else
{
context.StoreToContextBeforeCall(blockIndex);
}
InstEmitSystem.RewriteCallInstruction(
context.Writer,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
name,
pc,
encoding,
context.GetReservedStackOffset(),
isTail: true);
}
break;
case InstName.Ret:
int rnIndex = RegisterUtils.ExtractRn(encoding);
if (rnIndex == RegisterUtils.ZrIndex)
{
WriteTailCallConstant(context, ref asm, blockIndex, 0UL);
}
else
{
rnIndex = context.RemapGprRegister(rnIndex);
context.StoreToContextBeforeCall(blockIndex);
if (rnIndex != 0)
{
asm.Mov(Register(0), Register(rnIndex));
}
context.TailMerger.AddUnconditionalReturn(writer, asm);
}
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
uint branchMask;
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
branchMask = 0x3fff;
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
branchMask = 0x7ffff;
}
targetAddress = pc + (ulong)originalOffset;
int branchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Reserved for branch.
WriteTailCallConstant(context, ref asm, blockIndex, nextAddress);
int targetIndex = writer.InstructionPointer;
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)(((targetIndex - branchIndex) & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
private static void RewriteBranchInstructionWithTarget(
in Context context,
int blockIndex,
InstName name,
ulong pc,
uint encoding,
int branchIndex,
Dictionary<ulong, int> targets)
{
CodeWriter writer = context.Writer;
Assembler asm = new(writer);
int delta;
int targetIndex;
int originalOffset;
ulong targetAddress;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -Encodable26BitsOffsetLimit && delta < Encodable26BitsOffsetLimit)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
break;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
writer.WriteInstructionAt(branchIndex, (encoding & ~0x3ffffffu) | (uint)(delta & 0x3ffffff));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
uint branchMask;
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
branchMask = 0x3fff;
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
branchMask = 0x7ffff;
}
int branchMax = (int)(branchMask + 1) / 2;
targetAddress = pc + (ulong)originalOffset;
if (targets.TryGetValue(targetAddress, out targetIndex))
{
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
break;
}
}
targetIndex = writer.InstructionPointer;
delta = targetIndex - branchIndex;
if (delta >= -branchMax && delta < branchMax)
{
writer.WriteInstructionAt(branchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
}
else
{
// If the branch target is too far away, we use a regular unconditional branch
// instruction instead which has a much higher range.
// We branch directly to the end of the function, where we put the conditional branch,
// and then branch back to the next instruction or return the branch target depending
// on the branch being taken or not.
uint branchInst = 0x14000000u | ((uint)delta & 0x3ffffff);
Debug.Assert(ImmUtils.ExtractSImm26Times4(branchInst) == delta * 4);
writer.WriteInstructionAt(branchIndex, branchInst);
int movedBranchIndex = writer.InstructionPointer;
writer.WriteInstruction(0u); // Placeholder
asm.B((branchIndex + 1 - writer.InstructionPointer) * 4);
delta = writer.InstructionPointer - movedBranchIndex;
writer.WriteInstructionAt(movedBranchIndex, (encoding & ~(branchMask << 5)) | (uint)((delta & branchMask) << 5));
WriteTailCallConstant(context, ref asm, blockIndex, targetAddress);
}
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
private static void WriteTailCallConstant(in Context context, ref Assembler asm, int blockIndex, ulong address)
{
context.StoreToContextBeforeCall(blockIndex);
InstEmitSystem.WriteCallWithGuestAddress(
context.Writer,
ref asm,
context.RegisterAllocator,
context.TailMerger,
context.WriteEpilogueWithoutContext,
context.FuncTable,
context.DispatchStubPointer,
context.GetReservedStackOffset(),
0UL,
new Operand(OperandKind.Constant, OperandType.I64, address),
isTail: true);
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

View File

@ -0,0 +1,384 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.Graph;
using System.Collections.Generic;
using System.Diagnostics;
using System.Numerics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class Decoder
{
private const int MaxInstructionsPerBlock = 1000;
private const uint NzcvFlags = 0xfu << 28;
private const uint CFlag = 0x1u << 29;
public static MultiBlock DecodeMulti(CpuPreset cpuPreset, IMemoryManager memoryManager, ulong address)
{
List<Block> blocks = new();
List<ulong> branchTargets = new();
RegisterMask useMask = RegisterMask.Zero;
bool hasHostCall = false;
bool hasMemoryInstruction = false;
while (true)
{
Block block = Decode(cpuPreset, memoryManager, address, ref useMask, ref hasHostCall, ref hasMemoryInstruction);
if (!block.IsTruncated && TryGetBranchTarget(block, out ulong targetAddress))
{
branchTargets.Add(targetAddress);
}
blocks.Add(block);
if (block.IsTruncated || !HasNextBlock(block, block.EndAddress - 4UL, branchTargets))
{
break;
}
address = block.EndAddress;
}
branchTargets.Sort();
SplitBlocks(blocks, branchTargets);
NumberAndLinkBlocks(blocks);
return new(blocks, useMask, hasHostCall, hasMemoryInstruction);
}
private static bool TryGetBranchTarget(Block block, out ulong targetAddress)
{
return TryGetBranchTarget(block.Instructions[^1].Name, block.EndAddress - 4UL, block.Instructions[^1].Encoding, out targetAddress);
}
private static bool TryGetBranchTarget(InstName name, ulong pc, uint encoding, out ulong targetAddress)
{
int originalOffset;
switch (name)
{
case InstName.BUncond:
originalOffset = ImmUtils.ExtractSImm26Times4(encoding);
targetAddress = pc + (ulong)originalOffset;
return true;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
if (name == InstName.Tbnz || name == InstName.Tbz)
{
originalOffset = ImmUtils.ExtractSImm14Times4(encoding);
}
else
{
originalOffset = ImmUtils.ExtractSImm19Times4(encoding);
}
targetAddress = pc + (ulong)originalOffset;
return true;
}
targetAddress = 0;
return false;
}
private static void SplitBlocks(List<Block> blocks, List<ulong> branchTargets)
{
int btIndex = 0;
while (btIndex < branchTargets.Count)
{
for (int blockIndex = 0; blockIndex < blocks.Count && btIndex < branchTargets.Count; blockIndex++)
{
Block block = blocks[blockIndex];
ulong currentBranchTarget = branchTargets[btIndex];
while (currentBranchTarget >= block.Address && currentBranchTarget < block.EndAddress)
{
if (block.Address != currentBranchTarget)
{
(Block leftBlock, Block rightBlock) = block.SplitAtAddress(currentBranchTarget);
blocks.Insert(blockIndex, leftBlock);
blocks[blockIndex + 1] = rightBlock;
block = leftBlock;
}
btIndex++;
while (btIndex < branchTargets.Count && branchTargets[btIndex] == currentBranchTarget)
{
btIndex++;
}
if (btIndex >= branchTargets.Count)
{
break;
}
currentBranchTarget = branchTargets[btIndex];
}
}
Debug.Assert(btIndex < int.MaxValue);
btIndex++;
}
}
private static void NumberAndLinkBlocks(List<Block> blocks)
{
Dictionary<ulong, Block> blocksByAddress = new();
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
{
Block block = blocks[blockIndex];
blocksByAddress.Add(block.Address, block);
}
for (int blockIndex = 0; blockIndex < blocks.Count; blockIndex++)
{
Block block = blocks[blockIndex];
block.Number(blockIndex);
if (!block.IsTruncated)
{
bool hasNext = !block.EndsWithBranch;
bool hasBranch = false;
switch (block.Instructions[^1].Name)
{
case InstName.BUncond:
hasBranch = true;
break;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
hasNext = true;
hasBranch = true;
break;
case InstName.Bl:
case InstName.Blr:
hasNext = true;
break;
case InstName.Ret:
hasNext = false;
hasBranch = false;
break;
}
if (hasNext && blocksByAddress.TryGetValue(block.EndAddress, out Block nextBlock))
{
block.AddSuccessor(nextBlock);
nextBlock.AddPredecessor(block);
}
if (hasBranch &&
TryGetBranchTarget(block, out ulong targetAddress) &&
blocksByAddress.TryGetValue(targetAddress, out Block branchBlock))
{
block.AddSuccessor(branchBlock);
branchBlock.AddPredecessor(block);
}
}
}
}
private static bool HasNextBlock(in Block block, ulong pc, List<ulong> branchTargets)
{
switch (block.Instructions[^1].Name)
{
case InstName.BUncond:
return branchTargets.Contains(pc + 4UL) ||
(TryGetBranchTarget(block, out ulong targetAddress) && targetAddress >= pc && targetAddress < pc + 0x1000);
case InstName.BCond:
case InstName.Bl:
case InstName.Blr:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
return true;
case InstName.Br:
return false;
case InstName.Ret:
return branchTargets.Contains(pc + 4UL);
}
return !block.EndsWithBranch;
}
private static Block Decode(
CpuPreset cpuPreset,
IMemoryManager memoryManager,
ulong address,
ref RegisterMask useMask,
ref bool hasHostCall,
ref bool hasMemoryInstruction)
{
ulong startAddress = address;
List<InstInfo> insts = new();
uint gprUseMask = useMask.GprMask;
uint fpSimdUseMask = useMask.FpSimdMask;
uint pStateUseMask = useMask.PStateMask;
uint encoding;
InstName name;
InstFlags flags;
bool isControlFlow;
bool isTruncated = false;
do
{
encoding = memoryManager.Read<uint>(address);
address += 4UL;
(name, flags, AddressForm addressForm) = InstTable.GetInstNameAndFlags(encoding, cpuPreset.Version, cpuPreset.Features);
if (name.IsPrivileged())
{
name = InstName.UdfPermUndef;
flags = InstFlags.None;
addressForm = AddressForm.None;
}
(uint instGprReadMask, uint instFpSimdReadMask) = RegisterUtils.PopulateReadMasks(name, flags, encoding);
(uint instGprWriteMask, uint instFpSimdWriteMask) = RegisterUtils.PopulateWriteMasks(name, flags, encoding);
if (name.IsCall())
{
instGprWriteMask |= 1u << RegisterUtils.LrIndex;
}
uint tempGprUseMask = gprUseMask | instGprReadMask | instGprWriteMask;
if (CalculateAvailableTemps(tempGprUseMask) < CalculateRequiredGprTemps(tempGprUseMask) || insts.Count >= MaxInstructionsPerBlock)
{
isTruncated = true;
address -= 4UL;
break;
}
gprUseMask = tempGprUseMask;
uint instPStateReadMask = 0;
uint instPStateWriteMask = 0;
if (flags.HasFlag(InstFlags.Nzcv) || IsMrsNzcv(encoding))
{
instPStateReadMask = NzcvFlags;
}
else if (flags.HasFlag(InstFlags.C))
{
instPStateReadMask = CFlag;
}
if (flags.HasFlag(InstFlags.S) || IsMsrNzcv(encoding))
{
instPStateWriteMask = NzcvFlags;
}
if (flags.HasFlag(InstFlags.Memory) || name == InstName.Sys)
{
hasMemoryInstruction = true;
}
fpSimdUseMask |= instFpSimdReadMask | instFpSimdWriteMask;
pStateUseMask |= instPStateReadMask | instPStateWriteMask;
if (name.IsSystemOrCall() && !hasHostCall)
{
hasHostCall = name.IsCall() || InstEmitSystem.NeedsCall(encoding);
}
isControlFlow = name.IsControlFlowOrException();
RegisterUse registerUse = new(
instGprReadMask,
instGprWriteMask,
instFpSimdReadMask,
instFpSimdWriteMask,
instPStateReadMask,
instPStateWriteMask);
insts.Add(new(encoding, name, flags, addressForm, registerUse));
}
while (!isControlFlow);
bool isLoopEnd = false;
if (!isTruncated && IsBackwardsBranch(name, encoding))
{
hasHostCall = true;
isLoopEnd = true;
}
useMask = new(gprUseMask, fpSimdUseMask, pStateUseMask);
return new(startAddress, address, insts, !isTruncated && !name.IsException(), isTruncated, isLoopEnd);
}
private static bool IsMrsNzcv(uint encoding)
{
return (encoding & ~0x1fu) == 0xd53b4200u;
}
private static bool IsMsrNzcv(uint encoding)
{
return (encoding & ~0x1fu) == 0xd51b4200u;
}
private static bool IsBackwardsBranch(InstName name, uint encoding)
{
switch (name)
{
case InstName.BUncond:
return ImmUtils.ExtractSImm26Times4(encoding) < 0;
case InstName.BCond:
case InstName.Cbnz:
case InstName.Cbz:
case InstName.Tbnz:
case InstName.Tbz:
int imm = name == InstName.Tbnz || name == InstName.Tbz
? ImmUtils.ExtractSImm14Times4(encoding)
: ImmUtils.ExtractSImm19Times4(encoding);
return imm < 0;
}
return false;
}
private static int CalculateRequiredGprTemps(uint gprUseMask)
{
return BitOperations.PopCount(gprUseMask & RegisterUtils.ReservedRegsMask) + RegisterAllocator.MaxTempsInclFixed;
}
private static int CalculateAvailableTemps(uint gprUseMask)
{
return BitOperations.PopCount(~(gprUseMask | RegisterUtils.ReservedRegsMask));
}
}
}

View File

@ -0,0 +1,593 @@
using ARMeilleure.Memory;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class InstEmitMemory
{
private const uint XMask = 0x3f808000u;
private const uint XValue = 0x8000000u;
public static void RewriteSysInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
int rtIndex = RegisterUtils.ExtractRt(encoding);
if (rtIndex == RegisterUtils.ZrIndex)
{
writer.WriteInstruction(encoding);
return;
}
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(rtIndex, RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rt, guestAddress);
encoding = RegisterUtils.ReplaceRt(encoding, tempRegister);
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
public static void RewriteInstruction(
int asBits,
MemoryManagerType mmType,
CodeWriter writer,
RegisterAllocator regAlloc,
InstName name,
InstFlags flags,
AddressForm addressForm,
ulong pc,
uint encoding)
{
switch (addressForm)
{
case AddressForm.OffsetReg:
RewriteOffsetRegMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.PostIndexed:
RewritePostIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.PreIndexed:
RewritePreIndexedMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.SignedScaled:
RewriteSignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.UnsignedScaled:
RewriteUnsignedScaledMemoryInstruction(asBits, mmType, writer, regAlloc, flags, encoding);
break;
case AddressForm.BaseRegister:
// Some applications uses unordered memory instructions in places where
// it does need proper ordering, and only work on some CPUs.
// To work around this, make all exclusive access operations ordered.
if ((encoding & XMask) == XValue)
{
// Set ordered flag.
encoding |= 1u << 15;
}
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.StructNoOffset:
RewriteBaseRegisterMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.BasePlusOffset:
RewriteBasePlusOffsetMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
case AddressForm.Literal:
RewriteLiteralMemoryInstruction(asBits, mmType, writer, regAlloc, name, pc, encoding);
break;
case AddressForm.StructPostIndexedReg:
RewriteStructPostIndexedRegMemoryInstruction(asBits, mmType, writer, regAlloc, encoding);
break;
default:
writer.WriteInstruction(encoding);
break;
}
}
private static void RewriteOffsetRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
// TODO: Some unallocated encoding cases.
ArmExtensionType extensionType = (ArmExtensionType)((encoding >> 13) & 7);
uint size = encoding >> 30;
if (flags.HasFlag(InstFlags.FpSimd))
{
size |= (encoding >> 21) & 4u;
}
int shift = (encoding & (1u << 12)) != 0 ? (int)size : 0;
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Operand guestOffset = new(RegisterUtils.ExtractRm(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
asm.Add(rn, guestAddress, guestOffset, extensionType, shift);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, rn);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24); // Register -> Unsigned offset
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewritePostIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
bool isPair = flags.HasFlag(InstFlags.Rt2);
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (isPair)
{
// Post-index -> Signed offset
encoding &= ~(0x7fu << 15);
encoding ^= 3u << 23;
}
else
{
// Post-index -> Unsigned offset
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
}
writer.WriteInstruction(encoding);
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewritePreIndexedMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
bool isPair = flags.HasFlag(InstFlags.Rt2);
int imm = isPair ? ExtractSImm7Scaled(flags, encoding) : ExtractSImm9(encoding);
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
WriteAddConstant(ref asm, guestAddress, guestAddress, imm);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (isPair)
{
// Pre-index -> Signed offset
encoding &= ~(0x7fu << 15);
encoding &= ~(1u << 23);
}
else
{
// Pre-index -> Unsigned offset
encoding = (encoding & ~(0xfffu << 10)) | (1u << 24);
}
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewriteSignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm7Scaled(flags, encoding), 0x7fu << 15);
}
private static void RewriteUnsignedScaledMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstFlags flags, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractUImm12Scaled(flags, encoding), 0xfffu << 10);
}
private static void RewriteBaseRegisterMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, 0, 0u);
}
private static void RewriteBasePlusOffsetMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
RewriteMemoryInstruction(asBits, mmType, writer, regAlloc, encoding, ExtractSImm9(encoding), 0x1ffu << 12);
}
private static void RewriteMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding, int imm, uint immMask)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
Assembler asm = new(writer);
bool canFoldOffset = CanFoldOffset(mmType, imm);
if (canFoldOffset)
{
imm = 0;
}
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress, imm);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
if (!canFoldOffset)
{
encoding &= ~immMask; // Clear offset
}
writer.WriteInstruction(encoding);
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void RewriteLiteralMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, InstName name, ulong pc, uint encoding)
{
Assembler asm = new(writer);
ulong targetAddress;
long imm;
int rtIndex = (int)(encoding & 0x1f);
if (rtIndex == RegisterUtils.ZrIndex && name != InstName.PrfmLit)
{
return;
}
Operand rt;
if (name == InstName.LdrLitFpsimd)
{
uint opc = encoding >> 30;
// TODO: Undefined if opc is invalid?
rt = new(rtIndex, RegisterType.Vector, opc switch
{
0 => OperandType.FP32,
1 => OperandType.FP64,
_ => OperandType.V128,
});
}
else
{
rt = new(rtIndex, RegisterType.Integer, OperandType.I64);
}
switch (name)
{
case InstName.Adr:
case InstName.Adrp:
imm = ((long)(encoding >> 29) & 3) | ((long)(encoding >> 3) & 0x1ffffc);
imm <<= 43;
if (name == InstName.Adrp)
{
imm >>= 31;
targetAddress = (pc & ~0xfffUL) + (ulong)imm;
}
else
{
imm >>= 43;
targetAddress = pc + (ulong)imm;
}
asm.Mov(rt, targetAddress);
break;
case InstName.LdrLitGen:
case InstName.LdrswLit:
case InstName.LdrLitFpsimd:
case InstName.PrfmLit:
imm = encoding & ~0x1fu;
imm <<= 40;
imm >>= 43;
targetAddress = pc + (ulong)imm;
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, targetAddress);
switch (name)
{
case InstName.LdrLitGen:
case InstName.LdrLitFpsimd:
asm.LdrRiUn(rt, rn, 0);
break;
case InstName.LdrswLit:
asm.LdrswRiUn(rt, rn, 0);
break;
case InstName.PrfmLit:
asm.PrfmR(rt, rn);
break;
}
regAlloc.FreeTempGprRegister(tempRegister);
break;
default:
Debug.Fail($"Invalid literal memory instruction '{name}'.");
break;
}
}
private static void RewriteStructPostIndexedRegMemoryInstruction(int asBits, MemoryManagerType mmType, CodeWriter writer, RegisterAllocator regAlloc, uint encoding)
{
// TODO: Some unallocated encoding cases.
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rn = new(tempRegister, RegisterType.Integer, OperandType.I64);
Operand guestAddress = new(RegisterUtils.ExtractRn(encoding), RegisterType.Integer, OperandType.I64);
int rmIndex = RegisterUtils.ExtractRm(encoding);
Assembler asm = new(writer);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, rn, guestAddress);
encoding = RegisterUtils.ReplaceRn(encoding, tempRegister);
encoding &= ~((0x1fu << 16) | (1u << 23)); // Post-index -> No offset
writer.WriteInstruction(encoding);
if (rmIndex == RegisterUtils.ZrIndex)
{
bool isSingleStruct = (encoding & (1u << 24)) != 0;
int offset;
if (isSingleStruct)
{
int sElems = (int)(((encoding >> 12) & 2u) | ((encoding >> 21) & 1u)) + 1;
int size = (int)(encoding >> 10) & 3;
int s = (int)(encoding >> 12) & 1;
int scale = (int)(encoding >> 14) & 3;
int l = (int)(encoding >> 22) & 1;
switch (scale)
{
case 1:
if ((size & 1) != 0)
{
// Undef.
}
break;
case 2:
if ((size & 2) != 0 ||
((size & 1) != 0 && s != 0))
{
// Undef.
}
if ((size & 1) != 0)
{
scale = 3;
}
break;
case 3:
if (l == 0 || s != 0)
{
// Undef.
}
scale = size;
break;
}
int eBytes = 1 << scale;
offset = eBytes * sElems;
}
else
{
int reps;
int sElems;
switch ((encoding >> 12) & 0xf)
{
case 0b0000:
reps = 1;
sElems = 4;
break;
case 0b0010:
reps = 4;
sElems = 1;
break;
case 0b0100:
reps = 1;
sElems = 3;
break;
case 0b0110:
reps = 3;
sElems = 1;
break;
case 0b0111:
reps = 1;
sElems = 1;
break;
case 0b1000:
reps = 1;
sElems = 2;
break;
case 0b1010:
reps = 2;
sElems = 1;
break;
default:
// Undef.
reps = 0;
sElems = 0;
break;
}
int size = (int)(encoding >> 10) & 3;
bool q = (encoding & (1u << 30)) != 0;
if (!q && size == 3 && sElems != 1)
{
// Undef.
}
offset = reps * (q ? 16 : 8) * sElems;
}
asm.Add(guestAddress, guestAddress, new Operand(OperandKind.Constant, OperandType.I32, (ulong)offset));
}
else
{
Operand guestOffset = new(rmIndex, RegisterType.Integer, OperandType.I64);
asm.Add(guestAddress, guestAddress, guestOffset);
}
regAlloc.FreeTempGprRegister(tempRegister);
}
private static void WriteAddressTranslation(
int asBits,
MemoryManagerType mmType,
RegisterAllocator regAlloc,
ref Assembler asm,
Operand destination,
Operand guestAddress,
int offset)
{
if (offset != 0)
{
// They are assumed to be on different registers, otherwise this operation will thrash the address.
Debug.Assert(destination.Value != guestAddress.Value);
if (Math.Abs(offset) >= 0x1000)
{
// Too high to encode as 12-bit immediate, do a separate move.
asm.Mov(destination, (ulong)offset);
asm.Add(destination, destination, guestAddress);
}
else
{
// Encode as 12-bit immediate.
WriteAddConstant(ref asm, destination, guestAddress, offset);
}
guestAddress = destination;
}
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, guestAddress);
}
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, ulong guestAddress)
{
asm.Mov(destination, guestAddress);
WriteAddressTranslation(asBits, mmType, regAlloc, ref asm, destination, destination);
}
private static void WriteAddressTranslation(int asBits, MemoryManagerType mmType, RegisterAllocator regAlloc, ref Assembler asm, Operand destination, Operand guestAddress)
{
Operand basePointer = new(regAlloc.FixedPageTableRegister, RegisterType.Integer, OperandType.I64);
if (mmType == MemoryManagerType.HostMapped || mmType == MemoryManagerType.HostMappedUnsafe)
{
if (mmType == MemoryManagerType.HostMapped)
{
asm.And(destination, guestAddress, new Operand(OperandKind.Constant, OperandType.I64, ulong.MaxValue >> (64 - asBits)));
guestAddress = destination;
}
asm.Add(destination, basePointer, guestAddress);
}
else
{
throw new NotImplementedException(mmType.ToString());
}
}
private static void WriteAddConstant(ref Assembler asm, Operand rd, Operand rn, int value)
{
if (value < 0)
{
asm.Sub(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)-value));
}
else
{
asm.Add(rd, rn, new Operand(OperandKind.Constant, OperandType.I32, (ulong)value));
}
}
private static bool CanFoldOffset(MemoryManagerType mmType, int offset)
{
return mmType == MemoryManagerType.HostMappedUnsafe;
}
private static int ExtractSImm7Scaled(InstFlags flags, uint encoding)
{
uint opc = flags.HasFlag(InstFlags.FpSimd) ? encoding >> 30 : encoding >> 31;
return ExtractSImm7(encoding) << (int)(2 + opc);
}
private static int ExtractSImm7(uint encoding)
{
int imm = (int)(encoding >> 15);
imm <<= 25;
imm >>= 25;
return imm;
}
private static int ExtractSImm9(uint encoding)
{
int imm = (int)(encoding >> 12);
imm <<= 23;
imm >>= 23;
return imm;
}
private static int ExtractUImm12Scaled(InstFlags flags, uint encoding)
{
uint size = encoding >> 30;
if (flags.HasFlag(InstFlags.FpSimd))
{
size |= (encoding >> 21) & 4u;
}
return ExtractUImm12(encoding) << (int)size;
}
private static int ExtractUImm12(uint encoding)
{
return (int)(encoding >> 10) & 0xfff;
}
}
}

View File

@ -0,0 +1,610 @@
using ARMeilleure.Common;
using Ryujinx.Cpu.LightningJit.CodeGen;
using Ryujinx.Cpu.LightningJit.CodeGen.Arm64;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Arm64.Target.Arm64
{
static class InstEmitSystem
{
private delegate void SoftwareInterruptHandler(ulong address, int imm);
private delegate ulong Get64();
private delegate bool GetBool();
public static void RewriteInstruction(
CodeWriter writer,
RegisterAllocator regAlloc,
TailMerger tailMerger,
InstName name,
ulong pc,
uint encoding,
int spillBaseOffset)
{
if (name == InstName.Brk)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetBrkHandlerPtr(), spillBaseOffset, null, pc, encoding);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if (name == InstName.Svc)
{
uint svcId = (ushort)(encoding >> 5);
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetSvcHandlerPtr(), spillBaseOffset, null, pc, svcId);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if (name == InstName.UdfPermUndef)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetUdfHandlerPtr(), spillBaseOffset, null, pc, encoding);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
else if ((encoding & ~0x1f) == 0xd53bd060) // mrs x0, tpidrro_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrroEl0Offset);
}
}
else if ((encoding & ~0x1f) == 0xd53bd040) // mrs x0, tpidr_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.LdrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
}
}
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
// TODO: Use host value? But that register can't be accessed on macOS...
asm.Mov(Register((int)rd, OperandType.I32), 0x8444c004);
}
}
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
WriteCall(ref asm, regAlloc, GetCntpctEl0Ptr(), spillBaseOffset, (int)rd);
}
}
else if ((encoding & ~0x1f) == 0xd51bd040) // msr tpidr_el0, x0
{
uint rd = encoding & 0x1f;
if (rd != RegisterUtils.ZrIndex)
{
Assembler asm = new(writer);
asm.StrRiUn(Register((int)rd), Register(regAlloc.FixedContextRegister), NativeContextOffsets.TpidrEl0Offset);
}
}
else
{
writer.WriteInstruction(encoding);
}
}
public static bool NeedsCall(uint encoding)
{
if ((encoding & ~(0xffffu << 5)) == 0xd4000001u) // svc #0
{
return true;
}
else if ((encoding & ~0x1f) == 0xd53b0020 && IsAppleOS()) // mrs x0, ctr_el0
{
return true;
}
else if ((encoding & ~0x1f) == 0xd53be020) // mrs x0, cntpct_el0
{
return true;
}
return false;
}
private static bool IsAppleOS()
{
return OperatingSystem.IsMacOS() || OperatingSystem.IsIOS();
}
public static bool NeedsContextStoreLoad(InstName name)
{
return name == InstName.Svc;
}
private static IntPtr GetBrkHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Break);
}
private static IntPtr GetSvcHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.SupervisorCall);
}
private static IntPtr GetUdfHandlerPtr()
{
return Marshal.GetFunctionPointerForDelegate<SoftwareInterruptHandler>(NativeInterface.Undefined);
}
private static IntPtr GetCntpctEl0Ptr()
{
return Marshal.GetFunctionPointerForDelegate<Get64>(NativeInterface.GetCntpctEl0);
}
private static IntPtr CheckSynchronizationPtr()
{
return Marshal.GetFunctionPointerForDelegate<GetBool>(NativeInterface.CheckSynchronization);
}
public static void WriteSyncPoint(CodeWriter writer, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
Assembler asm = new(writer);
WriteSyncPoint(writer, ref asm, regAlloc, tailMerger, spillBaseOffset);
}
private static void WriteSyncPoint(CodeWriter writer, ref Assembler asm, RegisterAllocator regAlloc, TailMerger tailMerger, int spillBaseOffset)
{
int tempRegister = regAlloc.AllocateTempGprRegister();
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
int branchIndex = writer.InstructionPointer;
asm.Cbnz(rt, 0);
WriteSpill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
Operand rn = Register(tempRegister == 0 ? 1 : 0);
asm.Mov(rn, (ulong)CheckSynchronizationPtr());
asm.Blr(rn);
tailMerger.AddConditionalZeroReturn(writer, asm, Register(0, OperandType.I32));
WriteFill(ref asm, regAlloc, 1u << tempRegister, spillBaseOffset, tempRegister);
asm.LdrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
uint branchInst = writer.ReadInstructionAt(branchIndex);
writer.WriteInstructionAt(branchIndex, branchInst | (((uint)(writer.InstructionPointer - branchIndex) & 0x7ffff) << 5));
asm.Sub(rt, rt, new Operand(OperandKind.Constant, OperandType.I32, 1));
asm.StrRiUn(rt, Register(regAlloc.FixedContextRegister), NativeContextOffsets.CounterOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
public static void RewriteCallInstruction(
CodeWriter writer,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr dispatchStubPtr,
InstName name,
ulong pc,
uint encoding,
int spillBaseOffset,
bool isTail = false)
{
Assembler asm = new(writer);
switch (name)
{
case InstName.BUncond:
case InstName.Bl:
case InstName.Blr:
case InstName.Br:
if (name == InstName.BUncond || name == InstName.Bl)
{
int imm = ImmUtils.ExtractSImm26Times4(encoding);
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
new(OperandKind.Constant, OperandType.I64, pc + (ulong)imm),
isTail);
}
else
{
int rnIndex = RegisterUtils.ExtractRn(encoding);
if (rnIndex == RegisterUtils.ZrIndex)
{
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
new(OperandKind.Constant, OperandType.I64, 0UL),
isTail);
}
else
{
rnIndex = regAlloc.RemapReservedGprRegister(rnIndex);
WriteCallWithGuestAddress(
writer,
ref asm,
regAlloc,
tailMerger,
writeEpilogue,
funcTable,
dispatchStubPtr,
spillBaseOffset,
pc,
Register(rnIndex),
isTail);
}
}
break;
default:
Debug.Fail($"Unknown branch instruction \"{name}\".");
break;
}
}
public unsafe static void WriteCallWithGuestAddress(
CodeWriter writer,
ref Assembler asm,
RegisterAllocator regAlloc,
TailMerger tailMerger,
Action writeEpilogue,
AddressTable<ulong> funcTable,
IntPtr funcPtr,
int spillBaseOffset,
ulong pc,
Operand guestAddress,
bool isTail = false)
{
int tempRegister;
if (guestAddress.Kind == OperandKind.Constant)
{
tempRegister = regAlloc.AllocateTempGprRegister();
asm.Mov(Register(tempRegister), guestAddress.Value);
asm.StrRiUn(Register(tempRegister), Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
regAlloc.FreeTempGprRegister(tempRegister);
}
else
{
asm.StrRiUn(guestAddress, Register(regAlloc.FixedContextRegister), NativeContextOffsets.DispatchAddressOffset);
}
tempRegister = regAlloc.FixedContextRegister == 1 ? 2 : 1;
if (!isTail)
{
WriteSpillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
Operand rn = Register(tempRegister);
if (regAlloc.FixedContextRegister != 0)
{
asm.Mov(Register(0), Register(regAlloc.FixedContextRegister));
}
if (guestAddress.Kind == OperandKind.Constant && funcTable != null)
{
ulong funcPtrLoc = (ulong)Unsafe.AsPointer(ref funcTable.GetValue(guestAddress.Value));
asm.Mov(rn, funcPtrLoc & ~0xfffUL);
asm.LdrRiUn(rn, rn, (int)(funcPtrLoc & 0xfffUL));
}
else
{
asm.Mov(rn, (ulong)funcPtr);
}
if (isTail)
{
writeEpilogue();
asm.Br(rn);
}
else
{
asm.Blr(rn);
ulong nextAddress = pc + 4UL;
asm.Mov(rn, nextAddress);
asm.Cmp(Register(0), rn);
tailMerger.AddConditionalReturn(writer, asm, ArmCondition.Ne);
WriteFillSkipContext(ref asm, regAlloc, spillBaseOffset);
}
}
private static void WriteCall(
ref Assembler asm,
RegisterAllocator regAlloc,
IntPtr funcPtr,
int spillBaseOffset,
int? resultRegister,
params ulong[] callArgs)
{
uint resultMask = 0u;
if (resultRegister.HasValue)
{
resultMask = 1u << resultRegister.Value;
}
int tempRegister = callArgs.Length;
if (resultRegister.HasValue && tempRegister == resultRegister.Value)
{
tempRegister++;
}
WriteSpill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
// We only support up to 7 arguments right now.
// ABI defines the first 8 integer arguments to be passed on registers X0-X7.
// We need at least one register to put the function address on, so that reduces the number of
// registers we can use for that by one.
Debug.Assert(callArgs.Length < 8);
for (int index = 0; index < callArgs.Length; index++)
{
asm.Mov(Register(index), callArgs[index]);
}
Operand rn = Register(tempRegister);
asm.Mov(rn, (ulong)funcPtr);
asm.Blr(rn);
if (resultRegister.HasValue && resultRegister.Value != 0)
{
asm.Mov(Register(resultRegister.Value), Register(0));
}
WriteFill(ref asm, regAlloc, resultMask, spillBaseOffset, tempRegister);
}
private static void WriteSpill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: true);
}
private static void WriteFill(ref Assembler asm, RegisterAllocator regAlloc, uint exceptMask, int spillOffset, int tempRegister)
{
WriteSpillOrFill(ref asm, regAlloc, exceptMask, spillOffset, tempRegister, spill: false);
}
private static void WriteSpillOrFill(
ref Assembler asm,
RegisterAllocator regAlloc,
uint exceptMask,
int spillOffset,
int tempRegister,
bool spill)
{
uint gprMask = regAlloc.AllGprMask & ~(AbiConstants.GprCalleeSavedRegsMask | exceptMask);
if (regAlloc.AllPStateMask != 0 && !spill)
{
// We must reload the status register before reloading the GPRs,
// since we might otherwise trash one of them by using it as temp register.
Operand rt = Register(tempRegister, OperandType.I32);
asm.LdrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset + BitOperations.PopCount(gprMask) * 8);
asm.MsrNzcv(rt);
}
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
if (regAlloc.AllPStateMask != 0)
{
if (spill)
{
Operand rt = Register(tempRegister, OperandType.I32);
asm.MrsNzcv(rt);
asm.StrRiUn(rt, Register(RegisterUtils.SpIndex), spillOffset);
}
spillOffset += 8;
}
if ((spillOffset & 8) != 0)
{
spillOffset += 8;
}
uint fpSimdMask = regAlloc.AllFpSimdMask;
while (fpSimdMask != 0)
{
int reg = BitOperations.TrailingZeroCount(fpSimdMask);
if (reg < 31 && (fpSimdMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(reg, OperandType.V128),
Register(reg + 1, OperandType.V128),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(reg, OperandType.V128),
Register(reg + 1, OperandType.V128),
Register(RegisterUtils.SpIndex),
spillOffset);
}
fpSimdMask &= ~(3u << reg);
spillOffset += 32;
}
else
{
if (spill)
{
asm.StrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(reg, OperandType.V128), Register(RegisterUtils.SpIndex), spillOffset);
}
fpSimdMask &= ~(1u << reg);
spillOffset += 16;
}
}
}
private static void WriteSpillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: true);
}
private static void WriteFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset)
{
WriteSpillOrFillSkipContext(ref asm, regAlloc, spillOffset, spill: false);
}
private static void WriteSpillOrFillSkipContext(ref Assembler asm, RegisterAllocator regAlloc, int spillOffset, bool spill)
{
uint gprMask = regAlloc.AllGprMask & ((1u << regAlloc.FixedContextRegister) | (1u << regAlloc.FixedPageTableRegister));
gprMask &= ~AbiConstants.GprCalleeSavedRegsMask;
while (gprMask != 0)
{
int reg = BitOperations.TrailingZeroCount(gprMask);
if (reg < 31 && (gprMask & (2u << reg)) != 0 && spillOffset < RegisterSaveRestore.Encodable9BitsOffsetLimit)
{
if (spill)
{
asm.StpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
else
{
asm.LdpRiUn(
Register(regAlloc.RemapReservedGprRegister(reg)),
Register(regAlloc.RemapReservedGprRegister(reg + 1)),
Register(RegisterUtils.SpIndex),
spillOffset);
}
gprMask &= ~(3u << reg);
spillOffset += 16;
}
else
{
if (spill)
{
asm.StrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
else
{
asm.LdrRiUn(Register(regAlloc.RemapReservedGprRegister(reg)), Register(RegisterUtils.SpIndex), spillOffset);
}
gprMask &= ~(1u << reg);
spillOffset += 8;
}
}
}
private static Operand Register(int register, OperandType type = OperandType.I64)
{
return new Operand(register, RegisterType.Integer, type);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
using System;
using System.Diagnostics.CodeAnalysis;
namespace Ryujinx.Cpu.LightningJit.Cache
{
readonly struct CacheEntry : IComparable<CacheEntry>
{
public int Offset { get; }
public int Size { get; }
public CacheEntry(int offset, int size)
{
Offset = offset;
Size = size;
}
public int CompareTo([AllowNull] CacheEntry other)
{
return Offset.CompareTo(other.Offset);
}
}
}

View File

@ -0,0 +1,136 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
namespace Ryujinx.Cpu.LightningJit.Cache
{
class CacheMemoryAllocator
{
private readonly struct MemoryBlock : IComparable<MemoryBlock>
{
public int Offset { get; }
public int Size { get; }
public MemoryBlock(int offset, int size)
{
Offset = offset;
Size = size;
}
public int CompareTo([AllowNull] MemoryBlock other)
{
return Offset.CompareTo(other.Offset);
}
}
private readonly List<MemoryBlock> _blocks = new();
public CacheMemoryAllocator(int capacity)
{
_blocks.Add(new MemoryBlock(0, capacity));
}
public int Allocate(int size)
{
for (int i = 0; i < _blocks.Count; i++)
{
MemoryBlock block = _blocks[i];
if (block.Size > size)
{
_blocks[i] = new(block.Offset + size, block.Size - size);
return block.Offset;
}
else if (block.Size == size)
{
_blocks.RemoveAt(i);
return block.Offset;
}
}
// We don't have enough free memory to perform the allocation.
return -1;
}
public void ForceAllocation(int offset, int size)
{
int index = _blocks.BinarySearch(new(offset, size));
if (index < 0)
{
index = ~index;
}
int endOffset = offset + size;
MemoryBlock block = _blocks[index];
Debug.Assert(block.Offset <= offset && block.Offset + block.Size >= endOffset);
if (offset > block.Offset && endOffset < block.Offset + block.Size)
{
_blocks[index] = new(block.Offset, offset - block.Offset);
_blocks.Insert(index + 1, new(endOffset, (block.Offset + block.Size) - endOffset));
}
else if (offset > block.Offset)
{
_blocks[index] = new(block.Offset, offset - block.Offset);
}
else if (endOffset < block.Offset + block.Size)
{
_blocks[index] = new(endOffset, (block.Offset + block.Size) - endOffset);
}
else
{
_blocks.RemoveAt(index);
}
}
public void Free(int offset, int size)
{
Insert(new MemoryBlock(offset, size));
}
private void Insert(MemoryBlock block)
{
int index = _blocks.BinarySearch(block);
if (index < 0)
{
index = ~index;
}
if (index < _blocks.Count)
{
MemoryBlock next = _blocks[index];
int endOffs = block.Offset + block.Size;
if (next.Offset == endOffs)
{
block = new MemoryBlock(block.Offset, block.Size + next.Size);
_blocks.RemoveAt(index);
}
}
if (index > 0)
{
MemoryBlock prev = _blocks[index - 1];
if (prev.Offset + prev.Size == block.Offset)
{
block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size);
_blocks.RemoveAt(--index);
}
}
_blocks.Insert(index, block);
}
public void Clear()
{
_blocks.Clear();
}
}
}

View File

@ -0,0 +1,197 @@
using ARMeilleure.Memory;
using Ryujinx.Memory;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
namespace Ryujinx.Cpu.LightningJit.Cache
{
static partial class JitCache
{
private static readonly int _pageSize = (int)MemoryBlock.GetPageSize();
private static readonly int _pageMask = _pageSize - 1;
private const int CodeAlignment = 4; // Bytes.
private const int CacheSize = 2047 * 1024 * 1024;
private static ReservedRegion _jitRegion;
private static JitCacheInvalidation _jitCacheInvalidator;
private static CacheMemoryAllocator _cacheAllocator;
private static readonly List<CacheEntry> _cacheEntries = new();
private static readonly object _lock = new();
private static bool _initialized;
[SupportedOSPlatform("windows")]
[LibraryImport("kernel32.dll", SetLastError = true)]
public static partial IntPtr FlushInstructionCache(IntPtr hProcess, IntPtr lpAddress, UIntPtr dwSize);
public static void Initialize(IJitMemoryAllocator allocator)
{
if (_initialized)
{
return;
}
lock (_lock)
{
if (_initialized)
{
return;
}
_jitRegion = new ReservedRegion(allocator, CacheSize);
if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS())
{
_jitCacheInvalidator = new JitCacheInvalidation(allocator);
}
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
_initialized = true;
}
}
public unsafe static IntPtr Map(ReadOnlySpan<byte> code)
{
lock (_lock)
{
Debug.Assert(_initialized);
int funcOffset = Allocate(code.Length);
IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
unsafe
{
fixed (byte* codePtr = code)
{
JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
}
}
}
else
{
ReprotectAsWritable(funcOffset, code.Length);
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
ReprotectAsExecutable(funcOffset, code.Length);
if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
FlushInstructionCache(Process.GetCurrentProcess().Handle, funcPtr, (UIntPtr)code.Length);
}
else
{
_jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length);
}
}
Add(funcOffset, code.Length);
return funcPtr;
}
}
public static void Unmap(IntPtr pointer)
{
lock (_lock)
{
Debug.Assert(_initialized);
int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64());
if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset)
{
_cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size));
_cacheEntries.RemoveAt(entryIndex);
}
}
}
private static void ReprotectAsWritable(int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static void ReprotectAsExecutable(int offset, int size)
{
int endOffs = offset + size;
int regionStart = offset & ~_pageMask;
int regionEnd = (endOffs + _pageMask) & ~_pageMask;
_jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart));
}
private static int Allocate(int codeSize)
{
codeSize = AlignCodeSize(codeSize);
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset < 0)
{
throw new OutOfMemoryException("JIT Cache exhausted.");
}
_jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
return allocOffset;
}
private static int AlignCodeSize(int codeSize)
{
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
}
private static void Add(int offset, int size)
{
CacheEntry entry = new(offset, size);
int index = _cacheEntries.BinarySearch(entry);
if (index < 0)
{
index = ~index;
}
_cacheEntries.Insert(index, entry);
}
public static bool TryFind(int offset, out CacheEntry entry, out int entryIndex)
{
lock (_lock)
{
int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0));
if (index < 0)
{
index = ~index - 1;
}
if (index >= 0)
{
entry = _cacheEntries[index];
entryIndex = index;
return true;
}
}
entry = default;
entryIndex = 0;
return false;
}
}
}

View File

@ -0,0 +1,79 @@
using ARMeilleure.Memory;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Cpu.LightningJit.Cache
{
class JitCacheInvalidation
{
private static readonly int[] _invalidationCode = new int[]
{
unchecked((int)0xd53b0022), // mrs x2, ctr_el0
unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
unchecked((int)0x52800083), // mov w3, #0x4
unchecked((int)0x12000c45), // and w5, w2, #0xf
unchecked((int)0x1ac42064), // lsl w4, w3, w4
unchecked((int)0x51000482), // sub w2, w4, #0x1
unchecked((int)0x8a220002), // bic x2, x0, x2
unchecked((int)0x1ac52063), // lsl w3, w3, w5
unchecked((int)0xeb01005f), // cmp x2, x1
unchecked((int)0x93407c84), // sxtw x4, w4
unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
unchecked((int)0xd50b7b22), // dc cvau, x2
unchecked((int)0x8b040042), // add x2, x2, x4
unchecked((int)0xeb02003f), // cmp x1, x2
unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0x51000462), // sub w2, w3, #0x1
unchecked((int)0x93407c63), // sxtw x3, w3
unchecked((int)0x8a220000), // bic x0, x0, x2
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x540000a9), // b.ls 64 <exit>
unchecked((int)0xd50b7520), // ic ivau, x0
unchecked((int)0x8b030000), // add x0, x0, x3
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0xd5033fdf), // isb
unchecked((int)0xd65f03c0), // ret
};
private delegate void InvalidateCache(ulong start, ulong end);
private readonly InvalidateCache _invalidateCache;
private readonly ReservedRegion _invalidateCacheCodeRegion;
private readonly bool _needsInvalidation;
public JitCacheInvalidation(IJitMemoryAllocator allocator)
{
// On macOS and Windows, a different path is used to write to the JIT cache, which does the invalidation.
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
ulong size = (ulong)_invalidationCode.Length * sizeof(int);
ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
size = (size + mask) & ~mask;
_invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
_invalidateCacheCodeRegion.ExpandIfNeeded(size);
Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
_invalidateCacheCodeRegion.Block.MapAsRx(0, size);
_invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
_needsInvalidation = true;
}
}
public void Invalidate(IntPtr basePointer, ulong size)
{
if (_needsInvalidation)
{
_invalidateCache((ulong)basePointer, (ulong)basePointer + size);
}
}
}
}

View File

@ -0,0 +1,16 @@
using System;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
namespace Ryujinx.Cpu.LightningJit.Cache
{
[SupportedOSPlatform("macos")]
static partial class JitSupportDarwin
{
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
[LibraryImport("libc", EntryPoint = "sys_icache_invalidate", SetLastError = true)]
public static partial void SysIcacheInvalidate(IntPtr start, IntPtr len);
}
}

View File

@ -0,0 +1,340 @@
using ARMeilleure.Memory;
using Ryujinx.Common;
using Ryujinx.Memory;
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Cpu.LightningJit.Cache
{
class NoWxCache : IDisposable
{
private const int CodeAlignment = 4; // Bytes.
private const int SharedCacheSize = 2047 * 1024 * 1024;
private const int LocalCacheSize = 128 * 1024 * 1024;
// How many calls to the same function we allow until we pad the shared cache to force the function to become available there
// and allow the guest to take the fast path.
private const int MinCallsForPad = 8;
private class MemoryCache : IDisposable
{
private readonly ReservedRegion _region;
private readonly CacheMemoryAllocator _cacheAllocator;
public CacheMemoryAllocator Allocator => _cacheAllocator;
public IntPtr Pointer => _region.Block.Pointer;
public MemoryCache(IJitMemoryAllocator allocator, ulong size)
{
_region = new(allocator, size);
_cacheAllocator = new((int)size);
}
public int Allocate(int codeSize)
{
codeSize = AlignCodeSize(codeSize);
int allocOffset = _cacheAllocator.Allocate(codeSize);
if (allocOffset < 0)
{
throw new OutOfMemoryException("JIT Cache exhausted.");
}
_region.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize);
return allocOffset;
}
public void Free(int offset, int size)
{
_cacheAllocator.Free(offset, size);
}
public void ReprotectAsRw(int offset, int size)
{
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
_region.Block.MapAsRw((ulong)offset, (ulong)size);
}
public void ReprotectAsRx(int offset, int size)
{
Debug.Assert(offset >= 0 && (offset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
Debug.Assert(size > 0 && (size & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
_region.Block.MapAsRx((ulong)offset, (ulong)size);
if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS())
{
JitSupportDarwin.SysIcacheInvalidate(_region.Block.Pointer + offset, size);
}
else
{
throw new PlatformNotSupportedException();
}
}
private static int AlignCodeSize(int codeSize)
{
return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
_region.Dispose();
_cacheAllocator.Clear();
}
}
public void Dispose()
{
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
private readonly IStackWalker _stackWalker;
private readonly Translator _translator;
private readonly MemoryCache _sharedCache;
private readonly MemoryCache _localCache;
private readonly PageAlignedRangeList _pendingMap;
private readonly object _lock;
class ThreadLocalCacheEntry
{
public readonly int Offset;
public readonly int Size;
public readonly IntPtr FuncPtr;
private int _useCount;
public ThreadLocalCacheEntry(int offset, int size, IntPtr funcPtr)
{
Offset = offset;
Size = size;
FuncPtr = funcPtr;
_useCount = 0;
}
public int IncrementUseCount()
{
return ++_useCount;
}
}
[ThreadStatic]
private static Dictionary<ulong, ThreadLocalCacheEntry> _threadLocalCache;
public NoWxCache(IJitMemoryAllocator allocator, IStackWalker stackWalker, Translator translator)
{
_stackWalker = stackWalker;
_translator = translator;
_sharedCache = new(allocator, SharedCacheSize);
_localCache = new(allocator, LocalCacheSize);
_pendingMap = new(_sharedCache.ReprotectAsRx, RegisterFunction);
_lock = new();
}
public unsafe IntPtr Map(IntPtr framePointer, ReadOnlySpan<byte> code, ulong guestAddress, ulong guestSize)
{
if (TryGetThreadLocalFunction(guestAddress, out IntPtr funcPtr))
{
return funcPtr;
}
lock (_lock)
{
if (!_pendingMap.Has(guestAddress) && !_translator.Functions.ContainsKey(guestAddress))
{
int funcOffset = _sharedCache.Allocate(code.Length);
funcPtr = _sharedCache.Pointer + funcOffset;
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
TranslatedFunction function = new(funcPtr, guestSize);
_pendingMap.Add(funcOffset, code.Length, guestAddress, function);
}
ClearThreadLocalCache(framePointer);
return AddThreadLocalFunction(code, guestAddress);
}
}
public unsafe IntPtr MapPageAligned(ReadOnlySpan<byte> code)
{
lock (_lock)
{
// Ensure we will get an aligned offset from the allocator.
_pendingMap.Pad(_sharedCache.Allocator);
int sizeAligned = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
int funcOffset = _sharedCache.Allocate(sizeAligned);
Debug.Assert((funcOffset & ((int)MemoryBlock.GetPageSize() - 1)) == 0);
IntPtr funcPtr = _sharedCache.Pointer + funcOffset;
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
_sharedCache.ReprotectAsRx(funcOffset, sizeAligned);
return funcPtr;
}
}
private bool TryGetThreadLocalFunction(ulong guestAddress, out IntPtr funcPtr)
{
if ((_threadLocalCache ??= new()).TryGetValue(guestAddress, out var entry))
{
if (entry.IncrementUseCount() >= MinCallsForPad)
{
// Function is being called often, let's make it available in the shared cache so that the guest code
// can take the fast path and stop calling the emulator to get the function from the thread local cache.
// To do that we pad all "pending" function until they complete a page of memory, allowing us to reprotect them as RX.
lock (_lock)
{
_pendingMap.Pad(_sharedCache.Allocator);
}
}
funcPtr = entry.FuncPtr;
return true;
}
funcPtr = IntPtr.Zero;
return false;
}
private void ClearThreadLocalCache(IntPtr framePointer)
{
// Try to delete functions that are already on the shared cache
// and no longer being executed.
if (_threadLocalCache == null)
{
return;
}
IEnumerable<ulong> callStack = _stackWalker.GetCallStack(
framePointer,
_localCache.Pointer,
LocalCacheSize,
_sharedCache.Pointer,
SharedCacheSize);
List<(ulong, ThreadLocalCacheEntry)> toDelete = new();
foreach ((ulong address, ThreadLocalCacheEntry entry) in _threadLocalCache)
{
// We only want to delete if the function is already on the shared cache,
// otherwise we will keep translating the same function over and over again.
bool canDelete = !_pendingMap.Has(address);
if (!canDelete)
{
continue;
}
// We can only delete if the function is not part of the current thread call stack,
// otherwise we will crash the program when the thread returns to it.
foreach (ulong funcAddress in callStack)
{
if (funcAddress >= (ulong)entry.FuncPtr && funcAddress < (ulong)entry.FuncPtr + (ulong)entry.Size)
{
canDelete = false;
break;
}
}
if (canDelete)
{
toDelete.Add((address, entry));
}
}
int pageSize = (int)MemoryBlock.GetPageSize();
foreach ((ulong address, ThreadLocalCacheEntry entry) in toDelete)
{
_threadLocalCache.Remove(address);
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
_localCache.Free(entry.Offset, sizeAligned);
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
}
}
public void ClearEntireThreadLocalCache()
{
// Thread is exiting, delete everything.
if (_threadLocalCache == null)
{
return;
}
int pageSize = (int)MemoryBlock.GetPageSize();
foreach ((_, ThreadLocalCacheEntry entry) in _threadLocalCache)
{
int sizeAligned = BitUtils.AlignUp(entry.Size, pageSize);
_localCache.Free(entry.Offset, sizeAligned);
_localCache.ReprotectAsRw(entry.Offset, sizeAligned);
}
_threadLocalCache.Clear();
_threadLocalCache = null;
}
private unsafe IntPtr AddThreadLocalFunction(ReadOnlySpan<byte> code, ulong guestAddress)
{
int alignedSize = BitUtils.AlignUp(code.Length, (int)MemoryBlock.GetPageSize());
int funcOffset = _localCache.Allocate(alignedSize);
Debug.Assert((funcOffset & (int)(MemoryBlock.GetPageSize() - 1)) == 0);
IntPtr funcPtr = _localCache.Pointer + funcOffset;
code.CopyTo(new Span<byte>((void*)funcPtr, code.Length));
(_threadLocalCache ??= new()).Add(guestAddress, new(funcOffset, code.Length, funcPtr));
_localCache.ReprotectAsRx(funcOffset, alignedSize);
return funcPtr;
}
private void RegisterFunction(ulong address, TranslatedFunction func)
{
TranslatedFunction oldFunc = _translator.Functions.GetOrAdd(address, func.GuestSize, func);
Debug.Assert(oldFunc == func);
_translator.RegisterFunction(address, func);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
_localCache.Dispose();
_sharedCache.Dispose();
}
}
public void Dispose()
{
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
}

View File

@ -0,0 +1,218 @@
using Ryujinx.Common;
using Ryujinx.Memory;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
namespace Ryujinx.Cpu.LightningJit.Cache
{
class PageAlignedRangeList
{
private readonly struct Range : IComparable<Range>
{
public int Offset { get; }
public int Size { get; }
public Range(int offset, int size)
{
Offset = offset;
Size = size;
}
public int CompareTo([AllowNull] Range other)
{
return Offset.CompareTo(other.Offset);
}
}
private readonly Action<int, int> _alignedRangeAction;
private readonly Action<ulong, TranslatedFunction> _alignedFunctionAction;
private readonly List<(Range, ulong, TranslatedFunction)> _pendingFunctions;
private readonly List<Range> _ranges;
public PageAlignedRangeList(Action<int, int> alignedRangeAction, Action<ulong, TranslatedFunction> alignedFunctionAction)
{
_alignedRangeAction = alignedRangeAction;
_alignedFunctionAction = alignedFunctionAction;
_pendingFunctions = new();
_ranges = new();
}
public bool Has(ulong address)
{
foreach ((_, ulong guestAddress, _) in _pendingFunctions)
{
if (guestAddress == address)
{
return true;
}
}
return false;
}
public void Add(int offset, int size, ulong address, TranslatedFunction function)
{
Range range = new(offset, size);
Insert(range);
_pendingFunctions.Add((range, address, function));
ProcessAlignedRanges();
}
public void Pad(CacheMemoryAllocator allocator)
{
int pageSize = (int)MemoryBlock.GetPageSize();
for (int index = 0; index < _ranges.Count; index++)
{
Range range = _ranges[index];
int endOffset = range.Offset + range.Size;
int alignedStart = BitUtils.AlignDown(range.Offset, pageSize);
int alignedEnd = BitUtils.AlignUp(endOffset, pageSize);
int alignedSize = alignedEnd - alignedStart;
if (alignedStart < range.Offset)
{
allocator.ForceAllocation(alignedStart, range.Offset - alignedStart);
}
if (alignedEnd > endOffset)
{
allocator.ForceAllocation(endOffset, alignedEnd - endOffset);
}
_alignedRangeAction(alignedStart, alignedSize);
_ranges.RemoveAt(index--);
ProcessPendingFunctions(index, alignedEnd);
}
}
private void ProcessAlignedRanges()
{
int pageSize = (int)MemoryBlock.GetPageSize();
for (int index = 0; index < _ranges.Count; index++)
{
Range range = _ranges[index];
int alignedStart = BitUtils.AlignUp(range.Offset, pageSize);
int alignedEnd = BitUtils.AlignDown(range.Offset + range.Size, pageSize);
int alignedSize = alignedEnd - alignedStart;
if (alignedSize <= 0)
{
continue;
}
_alignedRangeAction(alignedStart, alignedSize);
SplitAt(ref index, alignedStart, alignedEnd);
ProcessPendingFunctions(index, alignedEnd);
}
}
private void ProcessPendingFunctions(int rangeIndex, int alignedEnd)
{
if ((rangeIndex > 0 && rangeIndex == _ranges.Count) ||
(rangeIndex >= 0 && rangeIndex < _ranges.Count && _ranges[rangeIndex].Offset >= alignedEnd))
{
rangeIndex--;
}
int alignedStart;
if (rangeIndex >= 0)
{
alignedStart = _ranges[rangeIndex].Offset + _ranges[rangeIndex].Size;
}
else
{
alignedStart = 0;
}
if (rangeIndex < _ranges.Count - 1)
{
alignedEnd = _ranges[rangeIndex + 1].Offset;
}
else
{
alignedEnd = int.MaxValue;
}
for (int index = 0; index < _pendingFunctions.Count; index++)
{
(Range range, ulong address, TranslatedFunction function) = _pendingFunctions[index];
if (range.Offset >= alignedStart && range.Offset + range.Size <= alignedEnd)
{
_alignedFunctionAction(address, function);
_pendingFunctions.RemoveAt(index--);
}
}
}
private void Insert(Range range)
{
int index = _ranges.BinarySearch(range);
if (index < 0)
{
index = ~index;
}
if (index < _ranges.Count)
{
Range next = _ranges[index];
int endOffs = range.Offset + range.Size;
if (next.Offset == endOffs)
{
range = new Range(range.Offset, range.Size + next.Size);
_ranges.RemoveAt(index);
}
}
if (index > 0)
{
Range prev = _ranges[index - 1];
if (prev.Offset + prev.Size == range.Offset)
{
range = new Range(range.Offset - prev.Size, range.Size + prev.Size);
_ranges.RemoveAt(--index);
}
}
_ranges.Insert(index, range);
}
private void SplitAt(ref int index, int alignedStart, int alignedEnd)
{
Range range = _ranges[index];
if (range.Offset < alignedStart)
{
_ranges[index++] = new(range.Offset, alignedStart - range.Offset);
if (range.Offset + range.Size > alignedEnd)
{
_ranges.Insert(index, new(alignedEnd, (range.Offset + range.Size) - alignedEnd));
}
}
else if (range.Offset + range.Size > alignedEnd)
{
_ranges[index] = new(alignedEnd, (range.Offset + range.Size) - alignedEnd);
}
else if (range.Offset == alignedStart && range.Offset + range.Size == alignedEnd)
{
Debug.Assert(range.Offset == alignedStart && range.Offset + range.Size == alignedEnd);
_ranges.RemoveAt(index--);
}
}
}
}

View File

@ -0,0 +1,15 @@
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
{
static class AbiConstants
{
// Some of those register have specific roles and can't be used as general purpose registers.
// X18 - Reserved for platform specific usage.
// X29 - Frame pointer.
// X30 - Return address.
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
public const uint ReservedRegsMask = (1u << 18) | (1u << 29) | (1u << 30) | (1u << 31);
public const uint GprCalleeSavedRegsMask = 0x1ff80000; // X19 to X28
public const uint FpSimdCalleeSavedRegsMask = 0xff00; // D8 to D15
}
}

View File

@ -0,0 +1,30 @@
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
{
enum ArmCondition
{
Eq = 0,
Ne = 1,
GeUn = 2,
LtUn = 3,
Mi = 4,
Pl = 5,
Vs = 6,
Vc = 7,
GtUn = 8,
LeUn = 9,
Ge = 10,
Lt = 11,
Gt = 12,
Le = 13,
Al = 14,
Nv = 15,
}
static class ArmConditionExtensions
{
public static ArmCondition Invert(this ArmCondition condition)
{
return (ArmCondition)((int)condition ^ 1);
}
}
}

View File

@ -0,0 +1,14 @@
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
{
enum ArmExtensionType
{
Uxtb = 0,
Uxth = 1,
Uxtw = 2,
Uxtx = 3,
Sxtb = 4,
Sxth = 5,
Sxtw = 6,
Sxtx = 7,
}
}

View File

@ -0,0 +1,11 @@
namespace Ryujinx.Cpu.LightningJit.CodeGen.Arm64
{
enum ArmShiftType
{
Lsl = 0,
Lsr = 1,
Asr = 2,
Ror = 3,
}
}

Some files were not shown because too many files have changed in this diff Show More