Compare commits

...

218 Commits

Author SHA1 Message Date
30862b5ffd ava: Reorder settings of Resolution Scaler (#4270) 2023-01-13 00:07:53 +01:00
9f57747c57 Ava UI: Various Fixes (#4268)
* Fix saves disappearing

* Better size formatter

* Move TextBox alignment fix to Styles

* Fix bug

* Left align

* Add border

* Update Ryujinx.Ava/UI/Models/SaveModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Models/SaveModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Models/SaveModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Whitespace

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2023-01-12 12:23:24 +00:00
fe29a2ff6e Ava UI: Settings Adjustments (#4273)
* Visual adjustments

* Match border to rest of app

* Fix overlapping controls

* Fix

* Fix
2023-01-12 12:09:32 +00:00
e9a173e00c Ptc: Check process architecture (#4272) 2023-01-12 07:50:45 +00:00
a11784fcbf Arm64: Cpu feature detection (#4264)
* Arm64: Cpu feature detection

* Ptc: Add Arm64 feature info

* nits

* simplify CheckSysctlName

* restore some macos flags

* feedback
2023-01-12 08:05:18 +01:00
fd36c8deca lm: Handle Tail flag in LogPacket (#4274)
* lm: Handle TailFlag in LogPacket

* Addresses feedback
2023-01-12 07:42:05 +01:00
70638340b3 Ava UI: Move Ava logging to Logger.Debug (#4255)
* Ava: Move Ava logging to Logger.Debug

Since #4231 we currently redirect Avalonia logs to our Logger, which is pretty nice. But since it uses our Logging level too, it now leads to a massive flood in our Log files.
To avoid that, I've included all `AvaLogLevel` to the log message, and make all Ava Logs using `Logger.Debug`.

* Logs errors to Error and other to Debug

* missing level

* keep var
2023-01-11 23:27:26 +01:00
4b495f3333 Ava UI: Fixes PerformanceCheck condition (#4271) 2023-01-11 20:47:15 +00:00
934b5a64e5 Ava GUI: User Profile Manager + Other Fixes (#4166)
* Fix redundancies

* Add back elses

* Loading Screen fixes

* Redesign User Profile Manager

- Backported long selection bar in Grid/List view not working
- Backported UserSelector is jank

* Fix SelectionIndicator

* Fix DataType

* Fix SaveManager bug

* Remove debug log

* Load saves on UIThread

* Reduce UI thread blocking

* Fix locale keys

* Use block namespaces

* Fix close button width

* Make UserProfile ordering consistent

* Alphabetical order

* Adjust layout, remove green circle for blue selector

* Fix some inconsistencies

* Fix no inital selected profile

* Adjust appearance of edit button

* Adjust SaveManager

* Remove redundant warning dialog

* Make firmware avatar selector clearer

* View redesign again :hero_depressed:

* Consistency adjustments

* Adjust margins

* Make `UserProfileImageSelector` consistent

* Make `UserFirmwareAvatarSelector` consistent

* Fix long grid view selector

* Switch case

* Remove long selection bar

Handled in #4178

* Consistency

* Started dialog titles

* Fixes

* Remaining titles

* Update Ryujinx.Ava/UI/Controls/NavigationDialogHost.axaml

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Fix build

* Hide UserRecoverer if no LostProfiles are found

* UserEditor Avatar Placeholder

* Watermark + locale adjustment

* Border radius

* Remove unnecessary styles

* Fix firmware avatar image order

* Cleanup `ColorPickerButton`

* Make `UserId` copy/paste able

* Make `FirmwareAvatarSelector` 6 images wide

* Make selection bar better

* Unsaved changes dialogue

* Fix indentation

* Remove extra check

* Address suggestions

* Reorganise

- Remove unused views
- Rename views to match convention
- Fix weird namespacing

* Update Ryujinx.Ava/UI/Views/User/UserFirmwareAvatarSelectorView.axaml

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserFirmwareAvatarSelectorView.axaml

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* UserRecovererView empty placeholder

* Update Ryujinx.Ava/UI/Views/User/UserSelectorView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserSaveManagerView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserSaveManagerView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserSaveManagerView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserRecovererView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/User/UserFirmwareAvatarSelectorView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/UserFirmwareAvatarSelectorViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/UserFirmwareAvatarSelectorViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/UserFirmwareAvatarSelectorViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Models/UserProfile.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Controls/NavigationDialogHost.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Controls/NavigationDialogHost.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Remove AddModel

* Update Ryujinx.Ava/Assets/Locales/en_US.json

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Fix bug

Co-authored-by: Mary-nyan <thog@protonmail.com>
Co-authored-by: Ac_K <Acoustik666@gmail.com>
2023-01-11 06:20:19 +01:00
cee667b491 Ava: Fixes Update count in heading (#4265) 2023-01-11 05:39:25 +01:00
94a64f2aea Remove textures from cache on unmap if not mapped and modified (#4211) 2023-01-11 01:53:56 +00:00
2355c2af62 ava: Generate Locale menu automatically (#4243)
Currently in `MenuMainBarView.axaml` we list all available languages and hardcode the language name with the language key.
It's a bit bad beause if we want to add a new language, we have to edit the `csproj` and the `axaml` with the translated language name and the language code.
I've put all translations in their respective locale files, add code into `MainMenuBarView` constructor to generate the menu automatically. Now we just have to edit the `csproj` if we want to add a new language.
2023-01-11 01:29:22 +01:00
5e0f8e8738 Implement JIT Arm64 backend (#4114)
* Implement JIT Arm64 backend

* PPTC version bump

* Address some feedback from Arm64 JIT PR

* Address even more PR feedback

* Remove unused IsPageAligned function

* Sync Qc flag before calls

* Fix comment and remove unused enum

* Address riperiperi PR feedback

* Delete Breakpoint IR instruction that was only implemented for Arm64
2023-01-10 19:16:59 -03:00
d16288a2a8 Set LSApplicationCategoryType to games (#4257)
https://developer.apple.com/documentation/bundleresources/information_property_list/lsapplicationcategorytype
Makes it auto-add to the macOS Launchpad games folder
2023-01-10 21:49:46 +00:00
600f86dc7b Fix context menu locales (#4242) 2023-01-10 19:15:15 +01:00
7210c17c5e misc: Enforce LF (#4253)
Because we are building everything on Windows for release at the moment,
git default line ending to CRLF causing issues when packing the
Ryujinx.sh script.

This addresses this by enforcing all files to use LF via .gitattributes.
2023-01-10 19:00:14 +01:00
a16854e55a ava: Cleanup AppHost (#4240)
* ava: Cleanup AppHost

This PR cleaned up the AppHost file a bit (adding the infamous extra spaces to improve readability), resorting private vars, remove useless vars, and improve the code here and there, like the AudioBackend check.

Co-Authored-By: gdkchan <5624669+gdkchan@users.noreply.github.com>

* Remove 'renderer"

* Revert currentTime

* revert if condition

Co-authored-by: gdkchan <5624669+gdkchan@users.noreply.github.com>
2023-01-10 18:45:55 +01:00
3e455a90a1 Ava: Add missing null check to ContentDialogHelper.ShowAsync() (#4248)
* ava: Add missing null check to ContentDialogHelper.ShowAsync()

* Replace "is not" with != operator

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2023-01-10 09:22:25 +01:00
e4413542b2 Add command line arguments to override docked mode (#4239)
* Add command line args for docked mode

* Apply suggestions from code review

Co-authored-by: Ac_K <Acoustik666@gmail.com>

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2023-01-10 00:59:23 +01:00
8c720783f5 linux: Fix packaging step for CI & Add Ryujinx.Headless.SDL2 to Ryujinx.sh (#4249) 2023-01-09 23:45:46 +00:00
8734ea9dd4 Linux: Add Avalonia detection to Ryujinx.sh (#4224)
* Revert "ava: Fix regression caused by #4013 (#4222)"

This reverts commit b9f2a96595.

* linux: Detect Ryujinx.Ava and don't rename the Ryujinx.Ava assembly
2023-01-09 22:58:51 +01:00
c586e6d2b7 Replace tabs with spaces across the project (#4244)
* Replace tabs with spaces across the project

* Include AXAML files too
2023-01-09 22:58:29 +01:00
3a4eeb77fe headless: Change window icon size to 48x48 (#4247) 2023-01-09 18:02:41 +00:00
51b3953cfc [Headless] Add missing arguments & Fix typos (#4193)
* headless: Fix typos in command line options

* Remove nullable from command line options

Add EnableMacroHLE option
Add HideCursorOnIdle option

* headless: Adjust enable-ptc help text

* headless: Use switch statement instead of if-else chain

* headless: Improve formatting for long constructors

* headless: Remove discards from SDL_ShowCursor()

* headless: Add window icon

* Fix hiding cursor on idle

At least on Wayland, SDL2 doesn't produce any mouse motion events.

* Add new command line args: BaseDataDir and UserProfile

* headless: Read icon from embedded resource

* headless: Skip SetWindowIcon() on Windows if dll isn't present

* headless: Fix division by zero

* headless: Fix command line options not working correctly

* headless: Fix crash when viewing command line options

* headless: Load window icon bmp from memory

* Add comment to the workaround for SDL_LoadBMP_RW

* headless: Enable logging to file by default

* headless: Add 3 options for --hide-cursor

Replaces --disable-hide-cursor-on-idle
2023-01-09 04:55:37 +01:00
610eecc1c1 ava: Fixes regressions from refactoring (#4237)
* ava: Fix regressions from #4178

* Remove duplicated code

* real fix for right click menu

Co-Authored-By: Isaac Marovitz <42140194+IsaacMarovitz@users.noreply.github.com>

* Remove ContentDialogOverlay

Co-authored-by: Isaac Marovitz <42140194+IsaacMarovitz@users.noreply.github.com>
2023-01-09 03:37:20 +00:00
492056abf6 Ava: Make Avalonia use our logging system (#4231)
* Ava: Make Avalonia use our logging system

* LoggerAdapter: Address review comments

* Update Ryujinx.Common/Logging/LogClass.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2023-01-09 04:32:20 +01:00
ee6e682ab4 Fix selection bar (#4236) 2023-01-09 03:36:11 +01:00
6f60e102a2 HIPC: Fix reply possibly also receiving one request (#4232) 2023-01-08 15:34:49 -03:00
eeb2af9953 Ava GUI: MainWindow Refactor (#4178)
* Fix redundancies

* Add back elses

* `MainWindow` Refactor

* Switch commands to `ReflectionBinding`

Not required in Ava 11

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Windows/MainWindow.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Windows/MainWindow.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Windows/MainWindow.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/Main/MainViewControls.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/AppHost.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/Main/MainMenuBarView.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/Views/Main/MainViewControls.axaml.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Resolve issues

* Remove Ava 11 Fix

* Update Ryujinx.Ava/UI/ViewModels/MainWindowViewModel.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Update Ryujinx.Ava/UI/Windows/MainWindow.axaml.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Fix whitespace + other suggestions

* Move Vsync colours to `Styles.xaml`

* Remove catch all

* Use `switch` instead of `if`

* Update locale keys

* Use block-scoped namespaces

* Fix improper Ava api usage then

* Static PTC

* Fix `GridItemSelectorSize` with `ShowNames`

* Update for new About Window

* Add back search fix

Co-authored-by: Ac_K <Acoustik666@gmail.com>
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2023-01-08 18:46:25 +01:00
550747eac6 Horizon: Impl Prepo, Fixes bugs, Clean things (#4220)
* Horizon: Impl Prepo, Fixes bugs, Clean things

* remove ToArray()

* resultCode > status

* Remove old services

* Addresses gdkchan's comments and more cleanup

* Addresses Gdkchan's feedback 2

* Reorganize services, make sure service are loaded before guest

Co-Authored-By: gdkchan <5624669+gdkchan@users.noreply.github.com>

* Create interfaces for lm and sm

Co-authored-by: gdkchan <5624669+gdkchan@users.noreply.github.com>
2023-01-08 12:13:39 +00:00
3ffceab1fb MainWindow: Vertically center SearchBox TextPresenter (#4223) 2023-01-07 16:01:53 +00:00
b9f2a96595 ava: Fix regression caused by #4013 (#4222)
Avalonia seems to not like when the artifact doesns't match the root namespace...

Address that by moving the binary to "Ryujinx" like we do on macOS build.
2023-01-07 12:24:21 +01:00
cbaa845f5d Include a start.sh file with correct launch options (#4013)
* Include reference to start.sh to be bundled

* Add start.sh

* Fix silly mistake I made on windows-x64

* ... I cannot read properly

* Make same changes for avalonia csproj

* Remove notice from start.sh

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx/Ryujinx.csproj

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.Ava/Ryujinx.Ava.csproj

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update distribution/linux/start.sh

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update distribution/linux/start.sh

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/Ryujinx.Ava.csproj

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.csproj

* Update Ryujinx.Ava.csproj

* Rename start.sh to Ryujinx.sh

* Update Ryujinx.csproj

* Update Ryujinx.Ava.csproj

* Update Ryujinx.Ava.csproj

* Update Ryujinx.Ava/Ryujinx.Ava.csproj

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Add `GDK_BACKEND` variable

* Update Ryujinx.Ava.csproj

* Update Program.cs

* Update Program.cs

* Update Ryujinx.sh

* Update Program.cs

* linux: Register mime types on launch

* Add DOTNET_EnableAlternateStackCheck=1 to desktop file

* linux: Add exclusion for RegisterMimeTypes for flathub builds

* Update logo path

* Cleanup Ryujinx.sh

* Fix typo in ReleaseInformation

* gha: Fix permissions for linux release binaries

* ava: Rename output assembly to Ryujinx

* Update mime database after installing new types

Wait until logging is available before registering mime types

* Copy mime types to output directory

Co-authored-by: Mary-nyan <thog@protonmail.com>
Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2023-01-07 09:06:13 +01:00
9e2681f2d7 Ava GUI: AboutWindow Refactor (#4196)
* Start `AboutWindow` refactor

* Redesign

* Update logos + ViewModel

* Fix GTK

* Cleanup usings

* Fix mismatched font size

* Update LocaleKeys

* Block scoped namespace

* Fix appearence on German

* Update Ryujinx.Ava/UI/ViewModels/AboutWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Ava/UI/ViewModels/AboutWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Move version number up

* Move see all contributors button left

* Add a couple `\n`

* Tooltips

* Layout fix

* Update Ryujinx.Ava/UI/ViewModels/AboutWindowViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2023-01-07 01:29:18 +01:00
81fae0d1a6 [hipc] Fix 'Unexpected result code Success returned' in Reply() (#4215)
* horizon: Add AbortOnFailureUnless()

* hipc: Replace AbortUnless() with AbortOnFailureUnless() in Reply()
2023-01-07 00:57:21 +01:00
38519f3b9a Ava GUI: SettingsWindow Refactor (#4177)
* Fix redundancies

* Add back elses

* Settings Refactor

* Fix Disposal functions

* Use `ReflectionBinding` instead of redundant funcs

* Ac_K suggestions

* Update Ryujinx.Ava/UI/ViewModels/SettingsViewModel.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update locale keys

* Update Ryujinx.Ava/UI/Windows/SettingsWindow.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Views/Settings/SettingsSystemView.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Views/Settings/SettingsSystemView.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Windows/SettingsWindow.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Use block-scoped namespaces

* Fix typo

* Make `TimeZone` internal again

Co-authored-by: Ac_K <Acoustik666@gmail.com>
Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2023-01-07 00:35:21 +01:00
7f27aabbd0 chore: Update Ryujinx.SDL2-CS to 2.26.1 (#4199) 2023-01-06 17:52:25 +01:00
e876c43ce9 Misc; Remove duplicated entries and clean locale csproj (#4209) 2023-01-05 04:30:55 +01:00
8639245533 hle: Add safety measure around overflow in ScheduleFutureInvocation
Fix crash on Linux since 08831eecf7.
2023-01-05 01:55:27 +01:00
d6b86a6629 Readd Ryujinx.Ui.LocaleGenerator removed in #4188 (again) 2023-01-05 00:23:17 +01:00
8f2b7b5b8e Readd Ryujinx.Ui.LocaleGenerator removed in #4188 2023-01-05 00:20:47 +01:00
fc4b7cba2c Make PPTC state non-static (#4157)
* Make PPTC state non-static

* DiskCacheLoadState can be null
2023-01-05 00:01:44 +01:00
08831eecf7 IPC refactor part 3+4: New server HIPC message processor (#4188)
* IPC refactor part 3 + 4: New server HIPC message processor with source generator based serialization

* Make types match on calls to AlignUp/AlignDown

* Formatting

* Address some PR feedback

* Move BitfieldExtensions to Ryujinx.Common.Utilities and consolidate implementations

* Rename Reader/Writer to SpanReader/SpanWriter and move to Ryujinx.Common.Memory

* Implement EventType

* Address more PR feedback

* Log request processing errors since they are not normal

* Rename waitable to multiwait and add missing lock

* PR feedback

* Ac_K PR feedback
2023-01-04 23:15:45 +01:00
c6a139a6e7 New Crowdin updates (#3985)
* New translations en_US.json (French)

* New translations en_US.json (Portuguese, Brazilian)

* New translations en_US.json (French)

* New translations en_US.json (Portuguese, Brazilian)

* Delete fr_FR.json

* Delete pt_BR.json

* New translations en_US.json (French)

* New translations en_US.json (Spanish)

* New translations en_US.json (German)

* New translations en_US.json (Greek)

* New translations en_US.json (Italian)

* New translations en_US.json (Japanese)

* New translations en_US.json (Korean)

* New translations en_US.json (Polish)

* New translations en_US.json (Russian)

* New translations en_US.json (Turkish)

* New translations en_US.json (Ukrainian)

* New translations en_US.json (Chinese Simplified)

* New translations en_US.json (Chinese Traditional)

* New translations en_US.json (Portuguese, Brazilian)

* New translations en_US.json (German)
2023-01-04 22:44:51 +01:00
02714a1291 Avalonia - Add source generator for locale items (#3999)
* Add source generator for locale keys

* use locale keys in Ui subdir
2023-01-03 19:45:08 +01:00
09c9686498 misc: Use official names for NVDEC registers (#4192)
* misc: Uses official names for NVDEC registers

* Address gdkchan's comment

* Address comments
2023-01-02 14:48:46 +00:00
b6614c6ad5 chore: Update tests dependencies (#3978)
* chore: Update tests dependencies

* Apply TSR Berry suggestion to add a GC.SuppressFinalize in MemoryBlock.cs

* Ensure we wait for the test thread to be dead on PartialUnmap

* Use platform attribute for os specific tests

* Make P/Invoke methods private

* Downgrade NUnit3TestAdapter to 4.1.0

* test: Disable warning about platform compat for ThreadLocalMap()

Co-authored-by: TSR Berry <20988865+TSRBerry@users.noreply.github.com>
2023-01-01 17:35:29 +01:00
b1d4b174a6 fix typo in left joycon sl binding (#4195) 2023-01-01 15:46:02 +01:00
2b23463daa Filter hidden game files from the Game List (#4051)
* Filter “._” files from the game list

* Filter all hidden files from the game list

* Fix style

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* merge OR expression into a pattern

* migrate from GetFiles/Directories to Enumerate

* Remove GetFilesInDirectory()

* Update Ryujinx.Ui.Common/App/ApplicationLibrary.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* add error handeling

* code cleanup

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-29 16:52:30 +01:00
9dfe81770a Use vector outputs for texture operations (#3939)
* Change AggregateType to include vector type counts

* Replace VariableType uses with AggregateType and delete VariableType

* Support new local vector types on SPIR-V and GLSL

* Start using vector outputs for texture operations

* Use vectors on more texture operations

* Use vector output for ImageLoad operations

* Replace all uses of single destination texture constructors with multi destination ones

* Update textureGatherOffsets replacement to split vector operations

* Shader cache version bump

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-29 16:09:34 +01:00
52c115a1f8 HLE: Add basic stubs to get Labo VR booting to title screen. (#4007)
* HLE: Add basic stubs to get Labo VR booting to title screen.

* Address code review

* Apply suggestions from code review (pt. 2)

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Apply suggestions from code review (pt. 3)

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Formatting: final batch?

Co-authored-by: Ac_K <Acoustik666@gmail.com>
Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Address last? bit of formatting

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-29 15:57:35 +01:00
e20abbf9cc Vulkan: Don't flush commands when creating most sync (#4087)
* Vulkan: Don't flush commands when creating most sync

When the WaitForIdle method is called, we create sync as some internal GPU method may read back written buffer data. Some games randomly intersperse compute dispatch into their render passes, which result in this happening an unbounded number of times depending on how many times they run compute.

Creating sync in Vulkan is expensive, as we need to flush the current command buffer so that it can be waited on. We have a limited number of active command buffers due to how we track resource usage, so submitting too many command buffers will force us to wait for them to return to the pool.

This PR allows less "important" sync (things which are less likely to be waited on) to wait on a command buffer's result without submitting it, instead relying on AutoFlush or another, more important sync to flush it later on.

Because of the possibility of us waiting for a command buffer that hasn't submitted yet, any thread needs to be able to force the active command buffer to submit. The ability to do this has been added to the backend multithreading via an "Interrupt", though it is not supported without multithreading.

OpenGL drivers should already be doing something similar so they don't blow up when creating lots of sync, which is why this hasn't been a problem for these games over there.

Improves Vulkan performance on Xenoblade DE, Pokemon Scarlet/Violet, and Zelda BOTW (still another large issue here)

* Add strict argument

This is technically a separate concern from whether the sync is a host syncpoint.

* Remove _interrupted variable

* Actually wait for the invoke

This is required by AMD GPUs, and also may have caused some issues on other GPUs.

* Remove unused using.

* I don't know why it added these ones.

* Address Feedback

* Fix typo
2022-12-29 15:39:04 +01:00
76671d63d4 Ava GUI: Restructure Ryujinx.Ava (#4165)
* Restructure `Ryujinx.Ava`

* Stylistic consistency

* Update Ryujinx.Ava/UI/Controls/UserEditor.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Controls/UserEditor.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Controls/UserSelector.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Controls/SaveManager.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Controls/SaveManager.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Windows/SettingsWindow.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Helpers/EmbeddedWindow.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Helpers/EmbeddedWindow.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Helpers/EmbeddedWindow.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Helpers/EmbeddedWindow.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Windows/SettingsWindow.axaml.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/ViewModels/UserProfileViewModel.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/ViewModels/UserProfileViewModel.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Ava/UI/Helpers/EmbeddedWindow.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Fix redundancies

* Remove redunancies

* Add back elses

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2022-12-29 15:24:05 +01:00
3d1a0bf374 [Headless] Fix for not receiving any SDL events on Linux (#4182)
* headless: Fix SDL events not working

* Add nuget.config
2022-12-29 15:09:18 +01:00
c20f3fbebd haydn: Add support for PCMFloat, PCM32 and PCM8 conversions (#4100)
* haydn: Add support for PCMFloat, PCM32 and PCM8 conversions

This adds support in the compatibility layer for other sample format
than PCM16.

This should help extends compatibility with soundio on devices that
doesn't expose PCM16.

I ommited PCM24 conversion for now as it's not simplest of all.

* Address TSRBerry's comment

* Address comments

* Fix conversion issue and clean up saturation usage

* Revert saturation changes

* Address gdkchan's comment
2022-12-28 19:34:30 -03:00
0d3b82477e Use new ArgumentNullException and ObjectDisposedException throw-helper API (#4163) 2022-12-27 20:27:11 +01:00
470be03c2f GPU: Add fallback when 16-bit formats are not supported (#4108)
* Add conversion for 16 bit RGBA formats (not supported in Rosetta)

* Rebase fix

Rebase fix

* Forgot to remove this

* Fix RGBA16 format conversion

* Add RGBA4 -> RGBA8 conversion

* Handle host stride alignment

* Address Feedback Part 1

* Can't count

* Don't zero out rgb when alpha is 0

* Separate RGBA4 and 5-bit component formats

Not sure of a better way to name them...

* Add A1B5G5R5 conversion

* Put this in the right place.

* Make format naming consistent for capabilities

* Change method names
2022-12-26 15:50:27 -03:00
c963b3c804 Added Generic Math to BitUtils (#3929)
* Generic Math Update

Updated Several functions in Ryujinx.Common/Utilities/BitUtils to use generic math

* Updated BitUtil calls

* Removed Whitespace

* Switched decrement

* Fixed changed method calls.

The method calls were originally changed on accident due to me relying too much on intellisense doing stuff for me

* Update Ryujinx.Common/Utilities/BitUtils.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2022-12-26 14:11:05 +00:00
a4fdfb5f94 bsd::RecvFrom: verify output buffer size before writing socket address (#4135)
* bsd::RecvFrom: Ryujinx does not verify output buffer size before writing socket address

* Calculate the size of BsdSockAddr

* use bsdSockAddr variable

* Update Ryujinx.HLE/HOS/Services/Sockets/Bsd/IClient.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.HLE/HOS/Services/Sockets/Bsd/Types/BsdSockAddr.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* set errno to ENOMEM in case we can't write the address to memory

* Update Ryujinx.HLE/HOS/Services/Sockets/Bsd/IClient.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.HLE/HOS/Services/Sockets/Bsd/IClient.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>
Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-26 13:49:26 +00:00
37d27c4c99 Some minor cleanups and optimizations (#4174)
* Replace Array.Clear(x, 0, x.Length) with Array.Clear(x)

* Use DateTime.UnixEpoch field

* Replace SHA256.ComputeHash calls with static SHA256.HashData call

More performant and avoids the need to initialize a SHA256 instance.
2022-12-24 14:30:39 -03:00
f906eb06c2 Implement a software ETC2 texture decoder (#4121)
* Implement a software ETC2 texture decoder

* Fix output size calculation for non-2D textures

* Address PR feedback
2022-12-21 20:39:58 -03:00
219f63ff4e Fix CPU FCVTN instruction implementation (slow path) (#4159)
* Fix CPU FCVTN instruction implementation (slow path)

* PPTC version bump
2022-12-21 23:05:58 +00:00
1cca3e99ab GPU: Force rebind when pool changes (#4129) 2022-12-21 17:35:28 -03:00
Ccl
55a23e5ec8 make UI display correct content in Chinese (#4155) 2022-12-21 17:20:37 -03:00
479d1fd8b0 hle: Handle GPU profiler and debugger device path correctly (#4138)
This fix a warning on "慟哭そして…" by handling correctly the debug mode
flag.

When debug mode isn't enabled, opening /dev/nvhost-dbg-gpu or /dev/nvhost-prof-gpu should fail with a not implemented error code.

This implement this behaviour and also define stubbed interfaces for
completness.
2022-12-21 18:23:11 +00:00
cb70e7bb30 Fix DrawArrays vertex buffer size (#4141) 2022-12-21 19:08:12 +01:00
c200a7b7c6 ARMeilleure: Hash _data pointer instead of value for Operand (#4156)
I noticed a weirdly high cost for dictionary accesses from MarkLabel etc. Turns out that the hash code was always the same for labels, so the whole point of having a dictionary was missed and it was putting everything in the same bucket. I made it always hash the _data pointer as that's a good source of identifiable and "random" data.
2022-12-21 02:28:18 +01:00
6268170a10 fix sw kbd row collision (#4144) 2022-12-19 15:09:36 -03:00
ee0f9b03a4 Eliminate zero-extension moves in more cases on 32-bit games (#4140)
* Eliminate zero-extension moves in more cases on 32-bit games

* PPTC version bump

* Revert X86Optimizer changes
2022-12-19 14:45:58 -03:00
f93c5f006a Revert "ARMeilleure: Add initial support for AVX512(EVEX encoding) (#3663)" (#4145)
This reverts commit 295fbd0542.
2022-12-18 20:21:10 -03:00
295fbd0542 ARMeilleure: Add initial support for AVX512(EVEX encoding) (#3663)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection

Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.

* ARMeilleure: Add initial support for EVEX instruction encoding

Does not implement rounding, or exception controls.

* ARMeilleure: Add `X86Vpternlogd`

Accelerates the vector-`Not` instruction.

* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}

* ARMeilleure: Add check for `XCR0` flags

Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.

* ARMeilleure: Increment InternalVersion

* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting

* ARMeilleure: Move XCR0 procedure to GetXcr0Eax

* ARMeilleure: Add `XCR0` to `FeatureInfo` structure

* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly

Avoids an additional allocation

* ARMeilleure: Formatting fixes
2022-12-18 16:46:13 -03:00
d7310d7a1c hle: Fix wrong conversion in UserPresence.ToString (#4142)
This fixes an error from #3805 that caused a wrong conversion of ``AppKeyValueStorage`` to string.
As that information isn't really relevant without appropriate parsing, it was removed from ``ToString``.

This should get ride of "bell warning" in Mario Kart 8 when entering Time Trials.
2022-12-18 14:23:19 +00:00
8c50943a2e nuget: bump Microsoft.NET.Test.Sdk from 17.4.0 to 17.4.1 (#4137)
Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 17.4.0 to 17.4.1.
- [Release notes](https://github.com/microsoft/vstest/releases)
- [Changelog](https://github.com/microsoft/vstest/blob/main/docs/releases.md)
- [Commits](https://github.com/microsoft/vstest/compare/v17.4.0...v17.4.1)

---
updated-dependencies:
- dependency-name: Microsoft.NET.Test.Sdk
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-17 01:19:04 +01:00
ec4cd57ccf Implement another non-indexed draw method on GPU (#4123) 2022-12-16 12:06:38 -03:00
5a085cba0f GPU: Fix layered attachment write (#4131)
Fixes a regression caused by #4003 where the code that writes `_vtgWritesRtLayer` was removed, breaking the crowd in mario strikers.
2022-12-16 09:40:01 -03:00
1a1d33a018 ava: Fix invisible swkbd applet on Linux (#4130) 2022-12-16 02:40:55 +01:00
0fbcd630bc Replace DllImport usage with LibraryImport (#4084)
* Replace usage of `DllImport` with `LibraryImport`

* Mark methods as `partial`

* Marshalling

* More `partial` & marshalling

* More `partial` and marshalling

* More partial and marshalling

* Update GdiPlusHelper to LibraryImport

* Unicorn

* More Partial

* Marshal

* Specify EntryPoint

* Specify EntryPoint

* Change GlobalMemoryStatusEx to LibraryImport

* Change RegisterClassEx to LibraryImport

* Define EntryPoints

* Update Ryujinx.Ava/Ui/Controls/Win32NativeInterop.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx.Graphics.Nvdec.FFmpeg/Native/FFmpegApi.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Move return mashal

* Remove calling convention specification

* Remove calling conventions

* Update Ryujinx.Common/SystemInfo/WindowsSystemInfo.cs

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update Ryujinx/Modules/Updater/Updater.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.Ava/Modules/Updater/Updater.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
Co-authored-by: Mary-nyan <thog@protonmail.com>
2022-12-15 18:07:31 +01:00
f4d731ae20 Fix NRE when loading Vulkan shader cache with Vertex A shaders (#4124) 2022-12-15 17:52:12 +01:00
8ac53c66b4 Remove Half Conversion (#4106)
* Remove HalfConversion

* Update `CodeGenVersion`
2022-12-14 21:13:23 -03:00
0f50de72be Vulkan: enable VK_EXT_custom_border_color features (#4116)
* Vulkan: enable VK_EXT_custom_border_color features

radv only create the border color bo if this feature is enabled, so it crashed when creating samplers with custom border colors
Fixes #4072
Fixes #3993

* Address gdkchan's comment

Co-authored-by: Mary <mary@mary.zone>
2022-12-14 20:53:33 -03:00
df758eddd1 Bsd: Add support for dns_mitm (#4102)
* bsd: Add dns_mitm from Atmosphère

related AMS files:
- https://github.com/Atmosphere-NX/Atmosphere/blob/master/stratosphere/ams_mitm/source/dns_mitm/dnsmitm_resolver_impl.cpp
- https://github.com/Atmosphere-NX/Atmosphere/blob/master/stratosphere/ams_mitm/source/dns_mitm/dnsmitm_host_redirection.cpp

* Remove debug logging and adjust redirect message

* Improve formatting

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Replace Initialize with instance property

* bsd: dns_mitm - Ignore empty lines

* bsd: Mark _mitmHostEntries as readonly

* bsd: Initialize Aliases when returning IpHostEntry

Fixes NullReferenceException

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-12 18:04:08 +01:00
5f32a8ed94 misc: Update to Ryujinx.Graphics.Nvdec.Dependencies 5.0.1-build13 (#4097)
Fix packaging issues on macOS related to an unsatisfied dependency on libX11
2022-12-12 16:19:46 +01:00
535fbec675 Use NuGet Central Package Management to manage package versions solution-wise (#4095) 2022-12-12 16:03:10 +01:00
6fe88115a3 misc: Some fixes to the updaters (#4092)
This was meant to be only an upgrade of how we set unix permission in
the updater to use .NET 7 new APIs, but I end up finding bugs along the
way.

Changelog:
- Remove direct usage of chmod to use File.SetUnixFileMode.
- Fix command line being broken when updating (#3744) but on
  Ryujinx.Ava.
- Makes Ryujinx.Ava updater fallback to Ryujinx executable if current
  name isn't found.
- Make permission setter function more generic.
2022-12-12 15:17:22 +01:00
475fa4d390 Fix "UI" abbreviation being miscapitalized (#4093) 2022-12-12 15:11:55 +01:00
edf7e628ca Use method overloads that support trimming. Mark some types to be trimming friendly (#4083)
* Use method overloads that support trimming. Mark some types to be trimming friendly

* Use generic version of marshalling method
2022-12-12 15:10:05 +01:00
ba5c0cf5d8 Bsd: Implement Select (#4017)
* bsd: Add gdkchan's Select implementation

Co-authored-by: TSRBerry <20988865+tsrberry@users.noreply.github.com>

* bsd: Fix Select() causing a crash with an ArgumentException

.NET Sockets have to be used for the Select() call

* bsd: Make Select more generic

* bsd: Adjust namespaces and remove unused imports

* bsd: Fix NullReferenceException in Select

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2022-12-12 14:59:31 +01:00
403e67d983 audio: Rewrite SoundIo bindings (#4088)
* audio: Rewrite SoundIo bindings

This rewrite SoundIo bindings to be safer and not a pedantic autogenerated mess.

* Address comments

* Switch DllImport to LibraryImport

* Address gdkchan's comment
2022-12-11 00:57:01 +01:00
c6f1908e0f Fix Lambda Explicit Type Specification Warning (#4090) 2022-12-10 22:12:51 +01:00
851d81d24a Fix Redundant Qualifer Warnings (#4091)
* Fix Redundant Qualifer Warnings

* Remove unnecessary using
2022-12-10 21:21:13 +01:00
459c4caeba Fix HasUnalignedStorageBuffers value when buffers are always unaligned (#4078) 2022-12-09 17:41:40 -03:00
539b22ef7b Add explicit dependency on System.Drawing.Common on Ryujinx.Ava to workaround triming bugs 2022-12-09 20:12:09 +01:00
872f036d64 misc: Remove dependency on System.Drawing.Common (#4082)
We only used it in one spot for DPI scaling factor.

This implements the same behaviour using gdiplus.

This remove 700KB of dependency to download and around 170KB unpacked.
2022-12-09 18:00:53 +01:00
dca96122bf gha: Add concurrency restriction on release workflow (#4081) 2022-12-09 16:15:28 +00:00
e752959109 misc: Update Ryujinx.Graphics.Nvdec.Dependencies to 5.0.1-build12 (#4080)
This adds support for Linux x64, macOS x64 and macOS arm64.
2022-12-09 15:46:07 +01:00
cf01664698 ava: Restyle the Status Bar (#4048) 2022-12-09 15:21:54 +01:00
b283a4adcd nuget: bump CommandLineParser from 2.8.0 to 2.9.1 (#4058)
Bumps [CommandLineParser](https://github.com/commandlineparser/commandline) from 2.8.0 to 2.9.1.
- [Release notes](https://github.com/commandlineparser/commandline/releases)
- [Changelog](https://github.com/commandlineparser/commandline/blob/master/CHANGELOG.md)
- [Commits](https://github.com/commandlineparser/commandline/compare/2.8.0...v2.9.1)

---
updated-dependencies:
- dependency-name: CommandLineParser
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-09 14:16:22 +01:00
8428bb6541 Fix shader FSWZADD instruction (#4069)
* Fix shader FSWZADD instruction

* Shader cache version bump
2022-12-08 14:08:07 -03:00
9a0330f7f8 Shader: Implement PrimitiveID (#4067)
* Shader: Implement PrimitiveID

* Shader cache version bump
2022-12-08 10:55:03 +01:00
57fc996337 Fix inconsistent capitalization (#4070) 2022-12-08 09:32:24 +00:00
1f3b860f06 acc: Stub CheckNetworkServiceAvailabilityAsync (#4052) 2022-12-07 23:19:22 +01:00
abe3c02ab4 nuget: bump DynamicData from 7.12.8 to 7.12.11 (#4059)
Bumps [DynamicData](https://github.com/reactiveui/DynamicData) from 7.12.8 to 7.12.11.
- [Release notes](https://github.com/reactiveui/DynamicData/releases)
- [Changelog](https://github.com/reactivemarbles/DynamicData/blob/main/ReleaseNotes.md)
- [Commits](https://github.com/reactiveui/DynamicData/compare/7.12.8...7.12.11)

---
updated-dependencies:
- dependency-name: DynamicData
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-07 23:07:23 +01:00
45b417b2b4 nuget: bump NUnit from 3.12.0 to 3.13.3 (#4060)
Bumps [NUnit](https://github.com/nunit/nunit) from 3.12.0 to 3.13.3.
- [Release notes](https://github.com/nunit/nunit/releases)
- [Changelog](https://github.com/nunit/nunit/blob/v3.13.3/CHANGES.md)
- [Commits](https://github.com/nunit/nunit/compare/v3.12...v3.13.3)

---
updated-dependencies:
- dependency-name: NUnit
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-07 22:02:31 +01:00
d076339e3e Add Ryujinx license file to builds (#4057) 2022-12-07 18:20:18 +01:00
837836431d nuget: bump System.Drawing.Common from 6.0.0 to 7.0.0 (#4024)
Bumps [System.Drawing.Common](https://github.com/dotnet/runtime) from 6.0.0 to 7.0.0.
- [Release notes](https://github.com/dotnet/runtime/releases)
- [Commits](https://github.com/dotnet/runtime/compare/v6.0.0...v7.0.0)

---
updated-dependencies:
- dependency-name: System.Drawing.Common
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-07 16:24:34 +00:00
9f555db5cd hle: Do not add disabled AoC item to the list (#4044)
* hle: Do not add disabled AoC item to the list

We currently add all AoC items to a list in `ContentManager` and the enable check is only done when FS service ask for the data. Which is wrong. It causes an issue in MK8D which doesn't boot even if you have disabled a not updated DLC.

I've fixed it by not adding the disabled AoC item to the list, I've removed some duplicate code too.
There is still an edge case because we currently don't check the AoC Item version, but that should be fixed later since now MK8D throw an error if the DLC isn't updated.

* remove useless "enabled"
2022-12-07 15:00:28 +01:00
bf7fa60dfc Fix struct layout packing (#4039) 2022-12-07 02:04:01 +00:00
752b93d3b7 gtk: Fixes warnings about obsolete components (#4049)
* gtk: Fixes warnings about obsolete components

* remove wrong using
2022-12-07 01:49:37 +01:00
f23b2878cc Shader: Add fallback for LDG from "ube" buffer ranges. (#4027)
We have a conversion from LDG on the compute shader to a special constant buffer binding that's used to exceed hardware limits on compute, but it was only running if the byte offset could be identified. The fallback that checks all of the bindings at runtime only checks the storage buffers.

This PR adds checking ube ranges to the LoadGlobal fallback. This extends the changes in #4011 to only check ube entries which are accessed by the shader.

Fixes particles affected by the wind in The Legend of Zelda: Breath of the Wild. May fix other weird issues with compute shaders in some games.

Try a bunch of games and drivers to make sure they don't blow up loading constants willynilly from searchable buffers.
2022-12-06 23:15:44 +00:00
e211c3f00a UI: Add Metal surface creation for MoltenVK (#3980)
* Initial implementation of metal surface across UIs

* Fix SDL2 on windows

* Update Ryujinx/Ryujinx.csproj

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Address Feedback

Co-authored-by: Mary-nyan <thog@protonmail.com>
2022-12-06 19:00:25 -03:00
d3709a753f nuget: bump XamlNameReferenceGenerator from 1.4.2 to 1.5.1 (#4026)
Bumps [XamlNameReferenceGenerator](https://github.com/avaloniaui/Avalonia.NameGenerator) from 1.4.2 to 1.5.1.
- [Release notes](https://github.com/avaloniaui/Avalonia.NameGenerator/releases)
- [Commits](https://github.com/avaloniaui/Avalonia.NameGenerator/compare/1.4.2...1.5.1)

---
updated-dependencies:
- dependency-name: XamlNameReferenceGenerator
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-06 19:00:08 +01:00
ab676d58ea nuget: bump System.IdentityModel.Tokens.Jwt from 6.25.0 to 6.25.1 (#4043)
Bumps [System.IdentityModel.Tokens.Jwt](https://github.com/AzureAD/azure-activedirectory-identitymodel-extensions-for-dotnet) from 6.25.0 to 6.25.1.
- [Release notes](https://github.com/AzureAD/azure-activedirectory-identitymodel-extensions-for-dotnet/releases)
- [Changelog](https://github.com/AzureAD/azure-activedirectory-identitymodel-extensions-for-dotnet/blob/dev/CHANGELOG.md)
- [Commits](https://github.com/AzureAD/azure-activedirectory-identitymodel-extensions-for-dotnet/compare/6.25.0...6.25.1)

---
updated-dependencies:
- dependency-name: System.IdentityModel.Tokens.Jwt
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-06 18:01:21 +01:00
2372c194f1 ava: Cleanup Input classes (#4042)
* ava: Cleanup Input classes

This PR just cleanup all Input classes for consistencies.

* Addresses TSRBerry's feedback
2022-12-06 15:32:14 +00:00
40311310d1 amadeus: Add missing compressor effect from REV11 (#4010)
* amadeus: Add missing compressor effect from REV11

This was in my reversing notes but seems I completely forgot to
implement it

Also took the opportunity to simplify the Limiter effect a bit.

* Remove some outdated comment

* Address gdkchan's comments
2022-12-06 15:04:25 +01:00
dde9bb5c69 Fix storage buffer access when match fails (#4037)
* Fix storage buffer access when match fails

* Shader cache version bump
2022-12-06 03:36:54 +00:00
266338a7c9 Change default Vsync toggle hotkey to F1 instead of Tab (#3995) 2022-12-06 02:09:26 +00:00
90156eea4c nuget: bump Microsoft.CodeAnalysis.CSharp from 4.2.0 to 4.4.0 (#4025)
Bumps [Microsoft.CodeAnalysis.CSharp](https://github.com/dotnet/roslyn) from 4.2.0 to 4.4.0.
- [Release notes](https://github.com/dotnet/roslyn/releases)
- [Changelog](https://github.com/dotnet/roslyn/blob/main/docs/Breaking%20API%20Changes.md)
- [Commits](https://github.com/dotnet/roslyn/compare/v4.2.0...Visual-Studio-2019-Version-16.0-Preview-4.4)

---
updated-dependencies:
- dependency-name: Microsoft.CodeAnalysis.CSharp
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-06 01:51:33 +00:00
071c01c235 Fix Sorting Regression (#4032)
* Fix sorting regression + Remove unsued sort

* Fix GTK

* Attempt 2 to fix GTK

* Whoopsie

* Fix whitspace
2022-12-06 01:40:06 +00:00
de06ffb0f7 Fix shaders with global memory access from unknown locations (#4029)
* Fix shaders with global memory access from unknown locations

* Shader cache version bump
2022-12-06 01:09:24 +00:00
8a7de35e3f Update 'OpenGL Log Level' to 'Graphics Backend Log Level' (#3996)
* Update 'OpenGL Log Level' to 'Graphics Backend Log Level'

* update other locals and change keys
2022-12-06 00:48:41 +00:00
121296834a Ava GUI: Several UI Fixes (#3991)
* Fix accessability violations in ListView

* Use accent colour for favourite star

* Hide progress bar when its done

* App Data Formating

- Added space before storage unit
- Changed so minutes have 0 decimals, and hours and days have 1

* Fix theming

* Fix mismatched corner radius

* Fix acceability violations in GridView

* More consistency between Grid and List View

* Fix margin

* Let whitespace defocus controls
2022-12-05 22:04:18 +00:00
bbb24d8c7e Restrict shader storage buffer search when match fails (#4011)
* Restrict storage buffer search when match fails

* Shader cache version bump
2022-12-05 19:11:32 +00:00
4da44e09cb Make structs readonly when applicable (#4002)
* Make all structs readonly when applicable. It should reduce amount of needless defensive copies

* Make structs with trivial boilerplate equality code record structs

* Remove unnecessary readonly modifiers from TextureCreateInfo

* Make BitMap structs readonly too
2022-12-05 14:47:39 +01:00
ae13f0ab4d misc: Fix obsolete warnings in Ryujinx.Graphics.Vulkan (#4020)
Was caused by some merges after the Silk.NET update
2022-12-05 12:57:11 +00:00
a2a35f1be6 nuget: bump Microsoft.NET.Test.Sdk from 16.8.0 to 17.4.0 (#3900)
Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 16.8.0 to 17.4.0.
- [Release notes](https://github.com/microsoft/vstest/releases)
- [Commits](https://github.com/microsoft/vstest/compare/v16.8.0...v17.4.0)

---
updated-dependencies:
- dependency-name: Microsoft.NET.Test.Sdk
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-05 08:17:40 +01:00
aedfadaaf7 Add InfoType.MesosphereCurrentProcess (#3792)
* Add InfoType.MesosphereCurrentProcess

* Make outHandle inlined

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2022-12-04 19:46:02 +00:00
5c0fb0cec3 ui: Disallow checking for updates while emulation active (#3886)
* Disallow updating while game is running

* Reflected change on Avalonia

* Git has gone wonky

* Fix accidental indent
2022-12-04 20:17:11 +01:00
17a1cab5d2 Allow SNorm buffer texture formats on Vulkan (#3957)
* Allow SNorm buffer texture formats on Vulkan

* Shader cache version bump
2022-12-04 15:36:03 -03:00
73aed239c3 Implement non-MS to MS copies with draws (#3958)
* Implement non-MS to MS copies with draws, simplify MS to non-MS copies and supports any host sample count

* Remove unused program
2022-12-04 15:07:11 -03:00
9ac66336a2 GPU: Use lazy checks for specialization state (#4004)
* GPU: Use lazy checks for specialization state

This PR adds a new class, the SpecializationStateUpdater, that allows elements of specialization state to be updated individually, and signal the state is checked when it changes between draws, instead of building and checking it on every draw. This also avoids building spec state when

Most state updates have been moved behind the shader state update, so that their specialization state updates make it in before shaders are fetched.

Downside: Fields in GpuChannelGraphicsState are no longer readonly. To counteract copies that might be caused this I pass it as `ref` when possible, though maybe `in` would be better? Not really sure about the quirks of `in` and the difference probably won't show on a benchmark.

The result is around 2 extra FPS on SMO in the usual spot. Not much right now, but it will remove costs when we're doing more expensive specialization checks, such as fragment output type specialization for macos. It may also help more on other games with more draws.

* Address Feedback

* Oops
2022-12-04 18:41:17 +01:00
4965681e06 GPU: Swap bindings array instead of copying (#4003)
* GPU: Swap bindings array instead of copying

Reduces work on UpdateShaderState. Now the cost is a few reference moves for arrays, rather than copying data.

Downside: bindings arrays are no longer readonly.

* Micro optimisation

* Add missing docs

* Address Feedback
2022-12-04 18:18:40 +01:00
3868a00206 Use source generated regular expressions (#4005) 2022-12-04 00:43:23 +00:00
933e5144a9 Query Available RAM on macOS (#4000) 2022-12-04 00:25:07 +00:00
73a42c85c4 Add crowdin badge and information in readme (#3990)
* Add crowdin bagde and informations in readme.

* Update README.md

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Update README.md

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2022-12-02 23:57:38 +01:00
39ba11054b Update Crowdin configuration file 2022-12-02 15:22:21 +01:00
c250e3392c Fix using in Ava 2022-12-02 15:08:57 +01:00
e56b069081 Update Crowdin configuration file 2022-12-02 14:54:56 +01:00
204c031fef SDL2Driver: Invoke dispatcher on main thread (#3818) 2022-12-02 14:37:22 +01:00
d9053bbe37 Avalonia - Save Manager (#3476)
* Add save manager to account selector

* add fallback to app metadata for titlename if app is not in gamelist

* Allow recovering lost accounts
2022-12-02 13:16:43 +00:00
c25e8427aa amadeus: Fix wrong SendCommands logic (#3969)
* amadeus: Fix wrong SendCommands logic

Might help with audio desync, might cause audio stutters, will see!

* Address gdkchan's comment
2022-12-02 13:01:19 +00:00
21a081b185 Add back locales removed in #3955 (#3983)
Add back `SettingsButtonSave` & `SettingsButtonClose` removed in #3955

Fixes #3982
2022-12-02 12:46:18 +00:00
b540ea80d1 Ava GUI: Make Dialogue More Intuitive (#3955)
* Adjust button position and locales

* Shortcuts + Highlight default action

* Update Locales - Corrections Welcome

* Move `Apply` button back to right side

* OS Reactive Button layout

* Fix reversed boolean :)

* Fix accented button styling
2022-12-02 03:31:21 +01:00
d692a9b83e Revert "nuget: bump SixLabors.ImageSharp from 1.0.4 to 2.1.3 (#3976)"
This reverts commit 9677ddaa5d.

SixLabors.ImageShar switched to a shady and vague license starting with 2.x
without mentioning it on their changelog.

As a result we are staying on 1.x (licensed under Apache-2) and will
seak an alternative package.
2022-12-01 23:06:55 +01:00
9677ddaa5d nuget: bump SixLabors.ImageSharp from 1.0.4 to 2.1.3 (#3976)
* nuget: bump SixLabors.ImageSharp from 1.0.4 to 2.1.3

Bumps [SixLabors.ImageSharp](https://github.com/SixLabors/ImageSharp) from 1.0.4 to 2.1.3.
- [Release notes](https://github.com/SixLabors/ImageSharp/releases)
- [Commits](https://github.com/SixLabors/ImageSharp/compare/v1.0.4...v2.1.3)

---
updated-dependencies:
- dependency-name: SixLabors.ImageSharp
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update for 2.x changes

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mary <mary@mary.zone>
2022-12-01 21:54:41 +00:00
ce92e8cd04 chore: Update Silk.NET to 2.16.0 (#3953) 2022-12-01 19:11:56 +01:00
456fc04007 Better SDL2 Audio Init Error Logging (#3967)
* Better SDL2 Audio Init Error Logging

* Update Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceDriver.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceDriver.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceDriver.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>

* Update Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceDriver.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceDriver.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>

* Update SDL2HardwareDeviceDriver.cs

* Update SDL2HardwareDeviceDriver.cs

Co-authored-by: Mary-nyan <thog@protonmail.com>
Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-12-01 17:53:13 +01:00
458452279c GPU: Track buffer migrations and flush source on incomplete copy (#3952)
* Track buffer migrations and flush source on incomplete copy

Makes sure that the modified range list is always from the latest iteration of the buffer, and flushes earlier iterations of a buffer if the data has not been migrated yet.

* Cleanup 1

* Reduce cost for redundant signal checks on Vulkan

* Only inherit the range list if there are pending ranges.

* Fix OpenGL

* Address Feedback

* Whoops
2022-12-01 16:30:13 +01:00
817b89767a infra: Add distribution files for macOS (#3934)
This upstream macOS packing and distribution files
2022-12-01 14:08:43 +01:00
3fb583c98c Avalonia: Clean up leftover RenderTimer & Fix minimum and initial window size (#3935)
* ava: Cleanup RenderTimer

* ava: Remove ContentControl from RendererHost

* ava: Remove unused actual scale factor

* ava: Enable UseGpu for Linux

* ava: Set better initial size & Scale the window properly

* ava: Realign properties

* ava: Use explicit type & specify where the note applies
2022-11-30 23:34:25 +01:00
d2686e0a5b nuget: bump DiscordRichPresence from 1.0.175 to 1.1.3.18 (#3965)
Bumps [DiscordRichPresence](https://github.com/Lachee/discord-rpc-csharp) from 1.0.175 to 1.1.3.18.
- [Release notes](https://github.com/Lachee/discord-rpc-csharp/releases)
- [Commits](https://github.com/Lachee/discord-rpc-csharp/commits)

---
updated-dependencies:
- dependency-name: DiscordRichPresence
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-30 21:58:45 +00:00
4905101df1 Remove shader dependency on SPV_KHR_shader_ballot and SPV_KHR_subgroup_vote extensions (#3943)
* Remove shader dependency on SPV_KHR_shader_ballot and SPV_KHR_subgroup_vote extensions

* Shader cache version bump
2022-11-30 18:24:15 -03:00
8750b90a7f Ensure that vertex attribute buffer index is valid on GPU (#3942)
* Ensure that vertex attribute buffer index is valid on GPU

* Remove vertex buffer validation code from OpenGL

* Remove some fields that are no longer necessary
2022-11-30 18:06:40 -03:00
af01100050 nuget: bump System.Management from 6.0.0 to 7.0.0 (#3949)
Bumps [System.Management](https://github.com/dotnet/runtime) from 6.0.0 to 7.0.0.
- [Release notes](https://github.com/dotnet/runtime/releases)
- [Commits](https://github.com/dotnet/runtime/compare/v6.0.0...v7.0.0)

---
updated-dependencies:
- dependency-name: System.Management
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-29 23:30:33 +01:00
c0821fee1f Update README.MD (#3946)
Updates the readme to include usage of Metal via MoltenVK, updated game compatibility statistics.
2022-11-29 17:20:26 +01:00
a5c2aead67 ConcurrentBitmap: Use Interlocked Or/And (#3937) 2022-11-29 13:47:57 +00:00
d41c95dcff chore: Update OpenTK to 4.7.5 (#3944) 2022-11-29 13:32:40 +00:00
fbf2b09706 ava: Make dialogs using an overlay window work on Linux (#3938) 2022-11-29 06:33:46 +01:00
1fc0f569de GPU: Always draw polygon topology as triangle fan (#3932)
Polygon topology wasn't really supported and would only work on OpenGL on drivers that haven't removed it. As an alternative, this PR makes all cases of polygon topology use triangle fan. The topology type and transform feedback type have not been changed, as I don't think geo shader/tfb should be used with polygons.

The OpenGL spec states:
Only convex polygons are guaranteed to be drawn correctly by the GL.

For convex polygons, triangle fan is equivalent to polygon. I imagine this is probably how it works on device, as this get-out-of-jail-free card is too enticing to pass up.

This fixes the stat display in Pokemon S/V.
2022-11-28 19:18:22 -03:00
dff138229c amadeus: Fixes and initial 15.0.0 support (#3908)
* amadeus: Allow OOB read of GC-ADPCM coefficients

Fixes "Ninja Gaiden Sigma 2" and possibly "NINJA GAIDEN 3: Razor's Edge"

* amadeus: Fix wrong variable usage in delay effect

We should transform the delay line values, not the input.

* amadeus: Update GroupedBiquadFilterCommand documentation

* amadeus: Simplify PoolMapper alignment checks

* amadeus: Update Surround delay effect matrix to REV11

* amadeus: Add drop parameter support and use 32 bits integers for estimate time

Also implement accurate ExecuteAudioRendererRendering stub.

* Address gdkchan's comments

* Address gdkchan's other comments

* Address gdkchan's comment
2022-11-28 08:28:45 +01:00
472119c8da sfdnsres; Fix deserializer of AddrInfoSerialized when addresses are empty (#3924) 2022-11-28 02:53:57 +01:00
1865ea87e5 bsd: Fix eventfd broken logic (#3647)
* bsd: Fix eventfd broken logic

This commit fix eventfd logic being broken.

The following changes were made:
- EventFd IPC definition had argument inverted
- EventFd events weren't fired correctly
- Poll logic was wrong and unfinished for eventfd
- Reintroduce workaround from #3385 but in a safer way, and spawn 4
  threads.

* ipc: Rework a bit for multithreads

* Clean up debug logs

* Make server thread yield when managed lock isn't availaible

* Fix replyTargetHandle not being added in the proper locking scope

* Simplify some scopes

* Address gdkchan's comments

* Revert IPC workaround for now

* Reintroduce the EventFileDescriptor workaround
2022-11-27 20:18:05 +00:00
18b61aff59 Unbreak bug_report.md (#3915)
* Unbreak bug_report.md

* Update bug_report.md
2022-11-27 20:11:51 +00:00
cb22629ac1 HLE: fix small issue in IPsmSession (#3909) 2022-11-27 01:10:42 +00:00
6f0f99ee2b Avalonia: Fix OpenGL crashing on Linux (#3902)
* ava: Fix OpenGL crashing on Linux

Fixes a regression from #3901

* Fix formatting
2022-11-26 12:06:53 +01:00
70f2da8fdf ava: Fix invisible vulkan window on Linux (#3901)
Co-authored-by: emmauss <emmausssss@gmail.com>

Co-authored-by: emmauss <emmausssss@gmail.com>
2022-11-25 17:40:44 +00:00
5d3ef7761b ava: Refactor Title Update Manager window (#3898)
* ava: Refactor TitleUpdate Manager window

* Update locale
2022-11-25 17:55:08 +01:00
476b4683cf Fix CB0 alignment with addresses used for 8/16-bit LDG/STG (#3897)
This replacement is meant to be done with the original identified byteOffset, not the one assigned later on by the below conditionals (that already has the constant offset added, for instance).

This fixes videos being pixelated in Xenoblade 3, and other regressions that might have happened since #3847.
2022-11-25 14:39:03 +00:00
5fb5079730 chore: Update Avalonia related dependencies (#3891) 2022-11-25 13:27:41 +00:00
3fbacd0f49 ava: Rework DLC Manager, Add various fixes and cleanup (#3896)
* Fixes Everything Part.2

* Change sorting, fix remove and heading
2022-11-25 12:41:34 +01:00
7aa6abc120 nuget: bump SharpZipLib from 1.3.3 to 1.4.1 (#3893)
Bumps [SharpZipLib](https://github.com/icsharpcode/SharpZipLib) from 1.3.3 to 1.4.1.
- [Release notes](https://github.com/icsharpcode/SharpZipLib/releases)
- [Changelog](https://github.com/icsharpcode/SharpZipLib/blob/master/docs/Changes.txt)
- [Commits](https://github.com/icsharpcode/SharpZipLib/compare/v1.3.3...v1.4.1)

---
updated-dependencies:
- dependency-name: SharpZipLib
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-24 20:30:17 +01:00
548bfd60a2 chore: Update Ryujinx.SDL2-CS to 2.24.2 (#3892)
* chore: Update Ryujinx.SDL2-CS to 2.24.2

* Disable SDL_HINT_JOYSTICK_HIDAPI_COMBINE_JOY_CONS
2022-11-24 20:13:16 +01:00
65778a6b78 GPU: Don't trigger uploads for redundant buffer updates (#3828)
* Initial implementation

* Actually do The Thing

* Add remark about performance to IVirtualMemoryManager
2022-11-24 15:50:15 +01:00
f4e879a1e6 Reduce usage of Marshal.PtrToStructure and Marshal.StructureToPtr (#3805)
* common: Make BinaryReaderExtensions Read & Write take unamanged types

This allows us to not rely on Marshal.PtrToStructure and Marshal.StructureToPtr for those.

* common: Make MemoryHelper Read & Write takes unamanged types

* Update Marshal.SizeOf => Unsafe.SizeOf when appropriate and start moving software applet to unmanaged types
2022-11-24 15:26:29 +01:00
a1ddaa2736 ui: Fixes disposing on GTK/Avalonia and Firmware Messages on Avalonia (#3885)
* ui: Only wait on _exitEvent when MainLoop is active under GTK

This fixes a dispose issue under Horizon/GTK, we don't check if the ApplicationClient is null so it throw NCE. We don't check if the main loop is active and waiting an event which is set in the main loop... So that could lead to a freeze.

Everything works fine in GTK now.

Related issue: https://github.com/Ryujinx/Ryujinx/issues/3873

As a side note, same kind of issue appear in Avalonia UI too. Firmware's popup doesn't show anything and the emulator just freeze.

* TSRBerry's change

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>

* Fix Avalonia crashing/freezing

* Add Avalonia OpenGL fixes

* Fix firmware popup on windows

* Fixes everything

* Add _initialized bool to VulkanRenderer and OpenGL Window

Co-authored-by: TSRBerry <20988865+TSRBerry@users.noreply.github.com>
2022-11-24 15:08:27 +01:00
008286b79f Ryujinx.Ava: Add missing redefinition of app name (#3890)
Before this, Ryujinx would possibly report as "Avalonia Application".
2022-11-24 14:52:39 +01:00
a0c77f8d11 Fix NRE on Avalonia for error applets with unknown error message (#3888) 2022-11-24 09:31:00 +01:00
ece36b274d GAL: Send all buffer assignments at once rather than individually (#3881)
* GAL: Send all buffer assignments at once rather than individually

The `(int first, BufferRange[] ranges)` method call has very significant performance implications when the bindings are spread out, which they generally always are in Vulkan. This change makes it so that these methods are only called a maximum of one time per draw.

Significantly improves GPU thread performance in Pokemon Scarlet/Violet.

* Address Feedback

Removed SetUniformBuffers(int first, ReadOnlySpan<BufferRange> buffers)
2022-11-24 07:50:59 +00:00
f3cc2e5703 GPU: Access non-prefetch command buffers directly (#3882)
* GPU: Access non-prefetch command buffers directly

Saves allocating new arrays for them constantly - they can be quite small so it can be very wasteful. About 0.4% of GPU thread in SMO, but was a bit higher in S/V when I checked.

Assumes that non-prefetch command buffers won't be randomly clobbered before they finish executing, though that's probably a safe bet.

* Small change while I'm here

* Address feedback
2022-11-24 01:56:55 +00:00
5a39d3c4a1 GPU: Relax locking on Buffer Cache (#3883)
I did this on ncbuffer2 when we were using it for LDN 3, but I noticed that it can apply to the current buffer manager too, and it's an easy performance win.

The only buffer access that can come from another thread is the overlap search for buffers that have been unmapped. Everything else, including modifications, come from the main GPU thread. That means we only need to lock the range list when it's being modified, as that's the only time where we'll cause a race with the unmapped handler.

This has a significant performance improvements in situations where FIFO is high, like the other two PRs. Joined together they give a nice boost (73.6 master -> 79 -> 83 fps in SMO).
2022-11-24 01:41:16 +00:00
cc51a03af9 nuget: bump Avalonia from 0.10.15 to 0.10.18 (#3817)
Bumps [Avalonia](https://github.com/AvaloniaUI/Avalonia) from 0.10.15 to 0.10.18.
- [Release notes](https://github.com/AvaloniaUI/Avalonia/releases)
- [Commits](https://github.com/AvaloniaUI/Avalonia/compare/0.10.15...0.10.18)

---
updated-dependencies:
- dependency-name: Avalonia
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-24 01:26:53 +00:00
567c64e149 ava: Fix JsonSerializer warnings (#3884)
Since we move to .NET7, JsonSerializer now needs to have explicit options as arguments, which leads to some warnings in Avalonia project. This is fixed by using our `JsonHelper` class.
2022-11-23 17:55:26 +00:00
36f00985d3 Update to LibHac 0.17.0 (#3878)
* Update to LibHac 0.17.0

* Don't clear SD card saves when starting the emulator

This was an old workaround for errors that happened when a user's SD card encryption seed changed. SD card saves have been unencrypted for over a year, so we should be fine to remove the workaround.
2022-11-23 18:32:35 +01:00
748d87adcc Stub IFriendService: 1 (Cancel) (#3841)
* Add friend/Cancel. Closes #3824

* Update according to review comments.

* Add comment base on request

* Update Ryujinx.HLE/HOS/Services/Friend/ServiceCreator/IFriendService.cs

Co-authored-by: Ac_K <Acoustik666@gmail.com>
2022-11-23 16:25:49 +01:00
0fd47ff490 remove redundant logs (#3877) 2022-11-23 11:28:46 +01:00
f088c3d344 Do not update shader state for DrawTextures (#3876) 2022-11-21 18:16:00 +01:00
905a191e28 Use upstream unicorn for Ryujinx.Tests.Unicorn (#3771)
* unicorn: Add modified ver of unicorns const gen

* unicorn: Use upstream consts

These consts were generated from the dev branch of unicorn

* unicorn: Split common consts into multiple enums

* unicorn: Remove arch prefix from consts

* unicorn: Add new windows dll

Windows 10 - MSVC x64 shared build

* unicorn: Use absolute path for const generation

* unicorn: Remove fspcr patch

* unicorn: Fix using the wrong file extension

For some reason _NativeLibraryExtension evaluates to ".so" even on Windows.

* unicorn: Add linux shared object again

* unicron: Add DllImportResolver

* unicorn: Try to import unicorn using an absolute path

* unicorn: Add clean target

* unicorn: Replace IsUnicornAvailable() methods

* unicorn: Skip tests instead of silently passing them if unicorn is missing

* unicorn: Write error message to stderr

* unicorn: Make Interface static

* unicron: Include prefixed unicorn libs (libunicorn.so)

Co-authored-by: merry <git@mary.rs>

* unicorn: Add lib prefix to shared object for linux

Co-authored-by: merry <git@mary.rs>
2022-11-20 20:18:21 +01:00
ab0491817e Reword the description of the 6GB expand DRAM hack to be less tantalizing (#3870)
* Reword the hack to something less tantalizing

* Address feedback
2022-11-20 17:15:57 +00:00
5de6ae426e Unsubscribe MemoryUnmappedHandler even when GPU channel is destroyed (#3872) 2022-11-19 23:54:33 -03:00
69ced3a6e8 Fix shader cache on Vulkan when geometry shaders are inserted (#3868) 2022-11-19 10:24:23 +01:00
2e43d01d36 Move gl_Layer from vertex to geometry if GPU does not support it on vertex (#3866)
* Move gl_Layer from vertex to geometry if GPU does not support it on vertex

* Shader cache version bump

* PR feedback
2022-11-18 23:27:54 -03:00
7373ec5792 Vulkan: Clear dummy texture to (0,0,0,0) on creation (#3867)
This might fix an issue with AMD gpus on linux where the data could contain random garbage data. On the switch, it always samples as 0.
2022-11-18 23:11:34 -03:00
de162a648b Gpu: Fix thread safety of ReregisterRanges (#3865)
A quick fix to prevent reading the wrong value of Count when reregistering ranges for a new target buffer. Buffer flushes from another thread can modify the range list when the lock isn't active, which can change the count.

This prevents some crashes in Pokemon Scarlet/Violet. It's probably likely that buffer migration during flush is causing some other issues in this game, but this at least prevents the crashing.
2022-11-18 21:47:29 +01:00
131baebe2a Vulkan: Don't create preload command buffer outside a render pass (#3864)
* Vulkan: Don't create preload buffer outside a render pass

The preload command buffer is used to avoid render pass splits and barriers when updating buffer data. However, when a render pass is not active (for example, at the start of a pass, or during compute invocations) buffer uploads can be performed at any time, so the optimization isn't as useful.

This PR makes it so that the preload command buffer is only used for buffer updates outside of a render pass. It's still used for textures as I don't want to shake things up right now regarding how the preload buffer is obtained before some other changes, and texture updates are a lot rarer anyways.

Improves performance slightly in Pokemon Scarlet/Violet (43 -> 48), as it was switching to compute, writing a bunch of buffers inline, then dispatching, then flushing commands... It uses 1 command buffer instead of 2 every time it does this now. Maybe it would be nice to find a faster way to sync without creating so many command buffers in a short period of time.

* Address feedback
2022-11-18 14:58:56 +00:00
187372cbde Prune ForceDirty and CheckModified caches on unmap (#3862)
* Prune ForceDirty and CheckModified caches on unmap

Since we're now using this for modified checks on the HLE indirect draw method, I'm worried that leaving these to forever gather cache entries isn't the best idea for performance in the long term, and it could keep old buffer objects alive for longer than they should be.

This PR adds the ability to prune invalid entries before checking these caches, and queues it whenever gpu memory is unmapped. It also aligns modified checks to the page size, as I figured it would be possible for a huge number of overlapping over a game's runtime.

This prevents Super Mario Odyssey from having 10s of thousands of entries in the modified cache in Metro Kingdom, and them duplicating when entering and leaving a building (should be cleared, as they were unmapped).

* Address Feedback
2022-11-18 14:58:24 +00:00
022d495335 am: Stub GetSaveDataSizeMax (#3857)
* am: Stub GetSaveDataSizeMax()

* am: Remove todo comment for GetSaveDataSizeMax()

* am: saveDataSize & journalDataSize should be of type long

* am: Add explanation for returning default values in GetSaveDataSizeMax()
2022-11-18 03:29:01 +00:00
c1372ed775 Use ReadOnlySpan<byte> compiler optimization in more places (#3853)
* Use ReadOnlySpan<byte> compiler optimization in more places

* Revert changes in ShaderBinaries.cs

* Remove unused using;

* Use ReadOnlySpan<byte> compiler optimization in more places
2022-11-18 03:10:44 +00:00
a16682cfd3 Allow _volatile to be set from MultiRegionHandle checks again (#3830)
* Allow _volatile to be set from MultiRegionHandle checks again

Tracking handles have a `_volatile` flag which indicates that the resource being tracked is modified every time it is used under a new sequence number. This is used to reduce the time spent reprotecting memory for tracking writes to commonly modified buffers, like constant buffers.

This optimisation works by detecting if a buffer is modified every time a check happens. If a buffer is checked but it is not dirty, then that data is likely not modified every sequence number, and should use memory protection for write tracking. If the opposite is the case all the time, it is faster to just assume it's dirty as we'd just be wasting time protecting the memory.

The new MultiRegionBitmap could not notify handles that they had been checked as part of the fast bitmap lookup, so bindings larger than 4096 bytes wouldn't trigger it at all. This meant that they would be subject to a ton of reprotection if they were modified often.

This does mean there are two separate sources for a _volatile set: VolatileOrDirty + _checkCount, and the bitmap check. These shouldn't interfere with each other, though.

This fixes performance regressions from #3775 in Pokemon Sword, and hopefully Yu-Gi-Oh! RUSH DUEL: Dawn of the Battle Royale. May affect other games.

* Fix stupid mistake
2022-11-18 02:54:20 +00:00
7c53b69c30 SPIR-V: Fix unscaling helper not being able to find Array textures (#3863)
The type in the `texOp` in the textureSize instruction doesn't have the exact type on SPIR-V (for example, it is missing the Array flag). This PR gives it the proper type before giving it to the unscaling helper.

This fixes the ground textures being broken on Pokemon Scarlet/Violet when scaling. It wasn't finding the texture, so the descriptor index it provided was -1...
2022-11-18 02:37:37 +00:00
33a4d7d1ba GPU: Eliminate CB0 accesses when storage buffer accesses are resolved (#3847)
* Eliminate CB0 accesses

Still some work to do, decouple from hle?

* Forgot the important part somehow

* Fix and improve alignment test

* Address Feedback

* Remove some complexity when checking storage buffer alignment

* Update Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2022-11-17 18:47:41 +01:00
391e08dd27 ci: Clean up Actions leftovers (#3859)
As title say.

Fix Avalonia build versions for PRs.

Also ensure that the --self-contained doesn't warn at build.
2022-11-17 18:30:54 +01:00
b5cf8b8af9 Capitalization to be consistent (#3860)
Thread ID Register, Floating-point Control Register, and Floating-point Status Register all had Register capitalized, so the Register in Processor State register should be capitalized.
2022-11-17 18:13:37 +01:00
55043c8afc Allow to start Ryujinx in wayland environment (#3516)
PrimaryMonitor is only available on X11

At some point it will be deprecated, this change support wayland
2022-11-17 13:02:43 +01:00
5d73a9f5fc Fix Fedora support (#3815)
For some reasons, my fresh installation of Fedora 36 (KDE) doesn't have a
symlink for libX11.so.

This commit fixes this by trying to import the library with its major
version or fallback to the normal way.
2022-11-17 00:18:29 +01:00
2c9ab5e45f Prevent raw Unicode control codes from showing on software keyboard applet. (#3845)
* Revert "Add support for releasing a semaphore to DmaClass (#2926)"

This reverts commit 521a07e612.

* Revert "Revert "Add support for releasing a semaphore to DmaClass (#2926)""

This reverts commit ec8a5fd053.

* Strip non-visible control codes from strings before they are sent to the software keyboard to prevent ugly unicode blocks from being shown on the UI.

* remove debugging junk

* Initialize stringbuilder capacity at the start to prevent resizing (a tiny tiny microoptimization)

* Update remarks documentation. Remove unneeded imports.

* Removing a test that's actually just redundant

Co-authored-by: Logan Stromberg <lostromb@microsoft.com>
2022-11-16 23:53:17 +01:00
d536cc8ae6 Update units of memory from decimal to binary prefixes (#3716)
`MB` and `GB` can either be interpreted as having base-10 units, or
base-2. `MiB` and `GiB` removes this discrepancy so that units of memory
are always interpreted using base-2 units.
2022-11-16 23:27:42 +01:00
d751da84f9 Use new C# 11 u8 string literals (#3854) 2022-11-16 16:30:12 -03:00
11aae9cfbc Make use of Random.Shared (#3852) 2022-11-16 15:34:18 -03:00
b96794e72b Use new LINQ Order() methods (#3851) 2022-11-16 15:17:03 -03:00
f1d1670b0b Implement HLE macro for DrawElementsIndirect (#3748)
* Implement HLE macro for DrawElementsIndirect

* Shader cache version bump

* Use GL_ARB_shader_draw_parameters extension on OpenGL

* Fix DrawIndexedIndirectCount on Vulkan when extension is not supported

* Implement DrawIndex

* Alignment

* Fix some validation errors

* Rename BaseIds to DrawParameters

* Fix incorrect index buffer and vertex buffer size in some cases

* Add HLE macros for DrawArraysInstanced and DrawElementsInstanced

* Perform a regular draw when indirect data is not modified

* Use non-indirect draw methods if indirect buffer was not GPU modified

* Only check if draw parameters match if the shader actually uses them

* Expose Macro HLE setting on GUI

* Reset FirstVertex and FirstInstance after draw

* Update shader cache version again since some people already tested this

* PR feedback

Co-authored-by: riperiperi <rhy3756547@hotmail.com>
2022-11-16 14:53:04 -03:00
b8de72de8f It's REE-YOU-JINX (#3839) 2022-11-15 22:19:25 +01:00
eebc39228d UI: Allow overriding graphics backend + Move command line parser into a new class (#3707)
* Ava: Keep command line args when restarting

* UI: Move common UI functions to ProgramHelper

Add command line option to override the configured graphics backend

* Ava: Add CleanupUpdate task back

* Remove unused usings

* Revert combining common UI functions

Rename ProgramHelper to CommandLineState
Move command line parsing to CommandLineState

* Rename CommandLineProfile to Profile

* Fix assigning the wrong array to Arguments
2022-11-12 20:36:36 -03:00
9daf029f35 Use vector transform feedback outputs if possible (#3832) 2022-11-12 20:20:40 -03:00
51a27032f0 Fix VertexId and InstanceId on Vulkan (#3833)
* Fix VertexId and InstanceId on Vulkan

* Shader cache version bump
2022-11-11 13:22:49 -03:00
a6a67a2b7a Minor improvement to Vulkan pipeline state and bindings management (#3829)
* Minor improvement to Vulkan pipeline state and bindings management

* Clean up buffer textures too

* Use glBindTextureUnit
2022-11-10 13:38:38 -03:00
c6d05301aa infra: Migrate to .NET 7 (#3795)
* Update readme to mention .NET 7

* infra: Migrate to .NET 7

.NET 7 is still in preview but this prepare for the release coming up
next month.

* Use Random.Shared in CreateRandom

* Move UInt128Utils.cs to Ryujinx.Common project

* Fix inverted parameters in System.UInt128 constructor

* Fix Visual Studio complains on  Ryujinx.Graphics.Vic

* time: Fix missing alignment enforcement in SystemClockContext

Fixes at least Smash

* time: Fix missing alignment enforcement in SteadyClockContext

Fix games (like recent version of Smash) using time shared memory

* Switch to .NET 7.0.100 release

* Enable Tiered PGO

* Ensure CreateId validity requirements are meet when doing random generation

Also enforce correct packing layout for other Mii structures.

This fix a Mario Kart 8 crashes related to the default Miis.
2022-11-09 20:22:43 +01:00
647de4cd31 Ensure all pending draws are done before compute dispatch (#3822) 2022-11-03 19:54:30 -03:00
f82309fa2d Vulkan: Implement multisample <-> non-multisample copies and depth-stencil resolve (#3723)
* Vulkan: Implement multisample <-> non-multisample copies and depth-stencil resolve

* FramebufferParams is no longer required there

* Implement Specialization Constants and merge CopyMS Shaders (#15)

* Vulkan: Initial Specialization Constants

* Replace with specialized helper shader

* Reimplement everything

Fix nonexistant interaction with Ryu pipeline caching
Decouple specialization info from data and relocate them
Generalize mapping and add type enum to better match spv types
Use local fixed scopes instead of global unmanaged allocs

* Fix misses in initial implementation

Use correct info variable in Create2DLayerView
Add ShaderStorageImageMultisample to required feature set

* Use texture for source image

* No point in using ReadOnlyMemory

* Apply formatting feedback

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Apply formatting suggestions on shader source

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

* Support conversion with samples count that does not match the requested count, other minor changes

Co-authored-by: mageven <62494521+mageven@users.noreply.github.com>
2022-11-02 18:17:19 -03:00
7d8e198c33 fix: Support FFmpeg 5.1.x for decoding (#3816)
For some reason FFmpeg 5.1.x reverted part of the changes made in 5.0.x
on AVCodec.

This fix decoding issues with it.
2022-11-02 09:26:50 +01:00
1167 changed files with 52469 additions and 21809 deletions

View File

@ -89,6 +89,7 @@ csharp_style_conditional_delegate_call = true:suggestion
# Modifier preferences
csharp_prefer_static_local_function = true:suggestion
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:silent
csharp_style_prefer_readonly_struct = true
# Code-block preferences
csharp_prefer_braces = true:silent

61
.gitattributes vendored
View File

@ -1,63 +1,4 @@
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp
###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
* text=auto eol=lf

View File

@ -1,6 +1,6 @@
---
name: Bug Report
about: Something doesn't work correctly in Ryujinx. Note that game-specific issues should be instead posted on the Game Compatibility List at https://github.com/Ryujinx/Ryujinx-Games-List, unless it is a provable regression.
about: Something doesn't work correctly in Ryujinx. Game-specific issues should be posted at https://github.com/Ryujinx/Ryujinx-Games-List instead, unless it is a provable regression.
#assignees:
---
@ -34,7 +34,7 @@ about: Something doesn't work correctly in Ryujinx. Note that game-specific issu
- OS: *(e.g. Windows 10)*
- CPU: *(e.g. i7-6700)*
- GPU: *(e.g. NVIDIA RTX 2070)*
- RAM: *(e.g. 16GB)*
- RAM: *(e.g. 16GiB)*
- Applied Mods : [ Yes (Which ones) / No ]
### Additional context?

View File

@ -51,27 +51,23 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-dotnet@v3
with:
dotnet-version: 6.0.x
- name: Ensure NuGet Source
uses: fabriciomurta/ensure-nuget-source@v1
dotnet-version: 7.0.x
- name: Get git short hash
id: git_short_hash
run: echo "result=$(git rev-parse --short "${{ github.sha }}")" >> $GITHUB_OUTPUT
shell: bash
- name: Clear
run: dotnet clean && dotnet nuget locals all --clear
- name: Build
run: dotnet build -c "${{ matrix.configuration }}" /p:Version="${{ env.RYUJINX_BASE_VERSION }}" /p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" /p:ExtraDefineConstants=DISABLE_UPDATER
run: dotnet build -c "${{ matrix.configuration }}" -p:Version="${{ env.RYUJINX_BASE_VERSION }}" -p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" -p:ExtraDefineConstants=DISABLE_UPDATER
- name: Test
run: dotnet test --no-build -c "${{ matrix.configuration }}"
- name: Publish Ryujinx
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish /p:Version="${{ env.RYUJINX_BASE_VERSION }}" /p:DebugType=embedded /p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" /p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx --self-contained
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish -p:Version="${{ env.RYUJINX_BASE_VERSION }}" -p:DebugType=embedded -p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" -p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx --self-contained true
if: github.event_name == 'pull_request'
- name: Publish Ryujinx.Headless.SDL2
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish_sdl2_headless /p:Version="${{ env.RYUJINX_BASE_VERSION }}" /p:DebugType=embedded /p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" /p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx.Headless.SDL2 --self-contained
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish_sdl2_headless -p:Version="${{ env.RYUJINX_BASE_VERSION }}" -p:DebugType=embedded -p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" -p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx.Headless.SDL2 --self-contained true
if: github.event_name == 'pull_request'
- name: Publish Ryujinx.Ava
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish_ava /p:Version="1.0.0" /p:DebugType=embedded /p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" /p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx.Ava
run: dotnet publish -c "${{ matrix.configuration }}" -r "${{ matrix.DOTNET_RUNTIME_IDENTIFIER }}" -o ./publish_ava -p:Version="${{ env.RYUJINX_BASE_VERSION }}" -p:DebugType=embedded -p:SourceRevisionId="${{ steps.git_short_hash.outputs.result }}" -p:ExtraDefineConstants=DISABLE_UPDATER Ryujinx.Ava --self-contained true
if: github.event_name == 'pull_request'
- name: Upload Ryujinx artifact
uses: actions/upload-artifact@v3

View File

@ -11,6 +11,7 @@ on:
- '*.yml'
- 'README.md'
concurrency: release
jobs:
release:
@ -28,11 +29,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-dotnet@v3
with:
dotnet-version: 6.0.x
- name: Ensure NuGet Source
uses: fabriciomurta/ensure-nuget-source@v1
- name: Clear
run: dotnet clean && dotnet nuget locals all --clear
dotnet-version: 7.0.x
- name: Get version info
id: version_info
run: |
@ -41,19 +38,19 @@ jobs:
shell: bash
- name: Configure for release
run: |
sed -r --in-place 's/\%\%RYUJINX_BUILD_VERSION\%\%/${{ steps.version_info.outputs.build_version }}/g;' Ryujinx.Common/ReleaseInformations.cs
sed -r --in-place 's/\%\%RYUJINX_BUILD_GIT_HASH\%\%/${{ steps.version_info.outputs.git_short_hash }}/g;' Ryujinx.Common/ReleaseInformations.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_NAME\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_NAME }}/g;' Ryujinx.Common/ReleaseInformations.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_OWNER\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_OWNER }}/g;' Ryujinx.Common/ReleaseInformations.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_REPO\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_REPO }}/g;' Ryujinx.Common/ReleaseInformations.cs
sed -r --in-place 's/\%\%RYUJINX_BUILD_VERSION\%\%/${{ steps.version_info.outputs.build_version }}/g;' Ryujinx.Common/ReleaseInformation.cs
sed -r --in-place 's/\%\%RYUJINX_BUILD_GIT_HASH\%\%/${{ steps.version_info.outputs.git_short_hash }}/g;' Ryujinx.Common/ReleaseInformation.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_NAME\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_NAME }}/g;' Ryujinx.Common/ReleaseInformation.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_OWNER\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_OWNER }}/g;' Ryujinx.Common/ReleaseInformation.cs
sed -r --in-place 's/\%\%RYUJINX_TARGET_RELEASE_CHANNEL_REPO\%\%/${{ env.RYUJINX_TARGET_RELEASE_CHANNEL_REPO }}/g;' Ryujinx.Common/ReleaseInformation.cs
shell: bash
- name: Create output dir
run: "mkdir release_output"
- name: Publish Windows
run: |
dotnet publish -c Release -r win10-x64 -o ./publish_windows/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx --self-contained
dotnet publish -c Release -r win10-x64 -o ./publish_windows_sdl2_headless/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx.Headless.SDL2 --self-contained
dotnet publish -c Release -r win10-x64 -o ./publish_windows_ava/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx.Ava --self-contained
dotnet publish -c Release -r win10-x64 -o ./publish_windows/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx --self-contained true
dotnet publish -c Release -r win10-x64 -o ./publish_windows_sdl2_headless/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx.Headless.SDL2 --self-contained true
dotnet publish -c Release -r win10-x64 -o ./publish_windows_ava/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx.Ava --self-contained true
- name: Packing Windows builds
run: |
pushd publish_windows
@ -71,22 +68,34 @@ jobs:
- name: Publish Linux
run: |
dotnet publish -c Release -r linux-x64 -o ./publish_linux/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx --self-contained
dotnet publish -c Release -r linux-x64 -o ./publish_linux_sdl2_headless/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx.Headless.SDL2 --self-contained
dotnet publish -c Release -r linux-x64 -o ./publish_linux_ava/publish /p:Version="${{ steps.version_info.outputs.build_version }}" /p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" /p:DebugType=embedded Ryujinx.Ava --self-contained
dotnet publish -c Release -r linux-x64 -o ./publish_linux/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx --self-contained true
dotnet publish -c Release -r linux-x64 -o ./publish_linux_sdl2_headless/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx.Headless.SDL2 --self-contained true
dotnet publish -c Release -r linux-x64 -o ./publish_linux_ava/publish -p:Version="${{ steps.version_info.outputs.build_version }}" -p:SourceRevisionId="${{ steps.version_info.outputs.git_short_hash }}" -p:DebugType=embedded Ryujinx.Ava --self-contained true
- name: Packing Linux builds
run: |
pushd publish_linux
tar -czvf ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz publish
tar --exclude "publish/Ryujinx" --exclude "publish/Ryujinx.sh" -cvf ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar publish
python3 ../distribution/misc/add_tar_exec.py ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx" "publish/Ryujinx"
python3 ../distribution/misc/add_tar_exec.py ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx.sh" "publish/Ryujinx.sh"
gzip -9 < ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar > ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz
rm ../release_output/ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar
popd
pushd publish_linux_sdl2_headless
tar -czvf ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz publish
tar --exclude "publish/Ryujinx.Headless.SDL2" --exclude "publish/Ryujinx.sh" -cvf ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar publish
python3 ../distribution/misc/add_tar_exec.py ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx.Headless.SDL2" "publish/Ryujinx.Headless.SDL2"
python3 ../distribution/misc/add_tar_exec.py ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx.sh" "publish/Ryujinx.sh"
gzip -9 < ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar > ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz
rm ../release_output/sdl2-ryujinx-headless-${{ steps.version_info.outputs.build_version }}-linux_x64.tar
popd
pushd publish_linux_ava
tar -czvf ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz publish
tar --exclude "publish/Ryujinx.Ava" --exclude "publish/Ryujinx.sh" -cvf ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar publish
python3 ../distribution/misc/add_tar_exec.py ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx.Ava" "publish/Ryujinx.Ava"
python3 ../distribution/misc/add_tar_exec.py ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar "publish/Ryujinx.sh" "publish/Ryujinx.sh"
gzip -9 < ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar > ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar.gz
rm ../release_output/test-ava-ryujinx-${{ steps.version_info.outputs.build_version }}-linux_x64.tar
popd
shell: bash

3
.gitignore vendored
View File

@ -125,6 +125,9 @@ ClientBin/
packages/*
*.config
# Include nuget.config
!nuget.config
# RIA/Silverlight projects
Generated_Code/

View File

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net7.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
@ -9,4 +9,11 @@
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
</ItemGroup>
<ItemGroup>
<ContentWithTargetPath Include="Native\libs\libarmeilleure-jitsupport.dylib" Condition="'$(RuntimeIdentifier)' == '' OR '$(RuntimeIdentifier)' == 'osx-arm64'">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<TargetPath>libarmeilleure-jitsupport.dylib</TargetPath>
</ContentWithTargetPath>
</ItemGroup>
</Project>

View File

@ -0,0 +1,270 @@
using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System.Collections.Generic;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.CodeGen.Arm64
{
static class Arm64Optimizer
{
private const int MaxConstantUses = 10000;
public static void RunPass(ControlFlowGraph cfg)
{
var constants = new Dictionary<ulong, Operand>();
Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
{
// If the constant has many uses, we also force a new constant mov to be added, in order
// to avoid overflow of the counts field (that is limited to 16 bits).
if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
{
constant = Local(source.Type);
Operation copyOp = Operation(Instruction.Copy, constant, source);
block.Operations.AddBefore(operation, copyOp);
constants[source.Value] = constant;
}
return constant;
}
for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
{
constants.Clear();
Operation nextNode;
for (Operation node = block.Operations.First; node != default; node = nextNode)
{
nextNode = node.ListNext;
// Insert copies for constants that can't fit on a 32-bit immediate.
// Doing this early unblocks a few optimizations.
if (node.Instruction == Instruction.Add)
{
Operand src1 = node.GetSource(0);
Operand src2 = node.GetSource(1);
if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
{
node.SetSource(0, GetConstantCopy(block, node, src1));
}
if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
{
node.SetSource(1, GetConstantCopy(block, node, src2));
}
}
// Try to fold something like:
// lsl x1, x1, #2
// add x0, x0, x1
// ldr x0, [x0]
// add x2, x2, #16
// ldr x2, [x2]
// Into:
// ldr x0, [x0, x1, lsl #2]
// ldr x2, [x2, #16]
if (IsMemoryLoadOrStore(node.Instruction))
{
OperandType type;
if (node.Destination != default)
{
type = node.Destination.Type;
}
else
{
type = node.GetSource(1).Type;
}
Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
if (memOp != default)
{
node.SetSource(0, memOp);
}
}
}
}
Optimizer.RemoveUnusedNodes(cfg);
}
private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
{
Operand baseOp = addr;
// First we check if the address is the result of a local X with immediate
// addition. If that is the case, then the baseOp is X, and the memory operand immediate
// becomes the addition immediate. Otherwise baseOp keeps being the address.
int imm = GetConstOp(ref baseOp, type);
if (imm != 0)
{
return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
}
// Now we check if the baseOp is the result of a local Y with a local Z addition.
// If that is the case, we now set baseOp to Y and indexOp to Z. We further check
// if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
// if that is the case, we set indexOp to W and adjust the scale value of the memory operand
// to match that of the left shift.
// There is one missed case, which is the address being a shift result, but this is
// probably not worth optimizing as it should never happen.
(Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
// If baseOp is still equal to address, then there's nothing that can be optimized.
if (baseOp == addr)
{
return default;
}
return MemoryOp(type, baseOp, indexOp, scale, 0);
}
private static int GetConstOp(ref Operand baseOp, OperandType accessType)
{
Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
if (operation == default)
{
return 0;
}
Operand src1 = operation.GetSource(0);
Operand src2 = operation.GetSource(1);
Operand constOp;
Operand otherOp;
if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
{
constOp = src1;
otherOp = src2;
}
else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
{
constOp = src2;
otherOp = src1;
}
else
{
return 0;
}
// If we have addition by a constant that we can't encode on the instruction,
// then we can't optimize it further.
if (ConstTooLong(constOp, accessType))
{
return 0;
}
baseOp = otherOp;
return constOp.AsInt32();
}
private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
{
Operand indexOp = default;
Multiplier scale = Multiplier.x1;
Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
if (addOp == default)
{
return (indexOp, scale);
}
Operand src1 = addOp.GetSource(0);
Operand src2 = addOp.GetSource(1);
if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
{
return (indexOp, scale);
}
baseOp = src1;
indexOp = src2;
Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
bool indexOnSrc2 = false;
if (shlOp == default)
{
shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
indexOnSrc2 = true;
}
if (shlOp != default)
{
Operand shSrc = shlOp.GetSource(0);
Operand shift = shlOp.GetSource(1);
int maxShift = Assembler.GetScaleForType(accessType);
if (shSrc.Kind == OperandKind.LocalVariable &&
shift.Kind == OperandKind.Constant &&
(shift.Value == 0 || shift.Value == (ulong)maxShift))
{
scale = shift.Value switch
{
1 => Multiplier.x2,
2 => Multiplier.x4,
3 => Multiplier.x8,
4 => Multiplier.x16,
_ => Multiplier.x1
};
baseOp = indexOnSrc2 ? src1 : src2;
indexOp = shSrc;
}
}
return (indexOp, scale);
}
private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
{
// If we have multiple assignments, folding is not safe
// as the value may be different depending on the
// control flow path.
if (op.AssignmentsCount != 1)
{
return default;
}
Operation asgOp = op.Assignments[0];
if (asgOp.Instruction != inst)
{
return default;
}
return asgOp;
}
private static bool IsMemoryLoadOrStore(Instruction inst)
{
return inst == Instruction.Load || inst == Instruction.Store;
}
private static bool ConstTooLong(Operand constOp, OperandType accessType)
{
if ((uint)constOp.Value != constOp.Value)
{
return true;
}
return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
}
}
}

View File

@ -0,0 +1,47 @@
using ARMeilleure.IntermediateRepresentation;
using System;
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmCondition
{
Eq = 0,
Ne = 1,
GeUn = 2,
LtUn = 3,
Mi = 4,
Pl = 5,
Vs = 6,
Vc = 7,
GtUn = 8,
LeUn = 9,
Ge = 10,
Lt = 11,
Gt = 12,
Le = 13,
Al = 14,
Nv = 15
}
static class ComparisonArm64Extensions
{
public static ArmCondition ToArmCondition(this Comparison comp)
{
return comp switch
{
Comparison.Equal => ArmCondition.Eq,
Comparison.NotEqual => ArmCondition.Ne,
Comparison.Greater => ArmCondition.Gt,
Comparison.LessOrEqual => ArmCondition.Le,
Comparison.GreaterUI => ArmCondition.GtUn,
Comparison.LessOrEqualUI => ArmCondition.LeUn,
Comparison.GreaterOrEqual => ArmCondition.Ge,
Comparison.Less => ArmCondition.Lt,
Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
Comparison.LessUI => ArmCondition.LtUn,
_ => throw new ArgumentException(null, nameof(comp))
};
}
}
}

View File

@ -0,0 +1,14 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmExtensionType
{
Uxtb = 0,
Uxth = 1,
Uxtw = 2,
Uxtx = 3,
Sxtb = 4,
Sxth = 5,
Sxtw = 6,
Sxtx = 7
}
}

View File

@ -0,0 +1,11 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum ArmShiftType
{
Lsl = 0,
Lsr = 1,
Asr = 2,
Ror = 3
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,96 @@
using System;
namespace ARMeilleure.CodeGen.Arm64
{
static class CallingConvention
{
private const int RegistersMask = unchecked((int)0xffffffff);
// Some of those register have specific roles and can't be used as general purpose registers.
// X18 - Reserved for platform specific usage.
// X29 - Frame pointer.
// X30 - Return address.
// X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
public static int GetIntAvailableRegisters()
{
return RegistersMask & ~ReservedRegsMask;
}
public static int GetVecAvailableRegisters()
{
return RegistersMask;
}
public static int GetIntCallerSavedRegisters()
{
return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
}
public static int GetFpCallerSavedRegisters()
{
return GetFpCalleeSavedRegisters() ^ RegistersMask;
}
public static int GetVecCallerSavedRegisters()
{
return GetVecCalleeSavedRegisters() ^ RegistersMask;
}
public static int GetIntCalleeSavedRegisters()
{
return 0x1ff80000; // X19 to X28
}
public static int GetFpCalleeSavedRegisters()
{
return 0xff00; // D8 to D15
}
public static int GetVecCalleeSavedRegisters()
{
return 0;
}
public static int GetArgumentsOnRegsCount()
{
return 8;
}
public static int GetIntArgumentRegister(int index)
{
if ((uint)index < (uint)GetArgumentsOnRegsCount())
{
return index;
}
throw new ArgumentOutOfRangeException(nameof(index));
}
public static int GetVecArgumentRegister(int index)
{
if ((uint)index < (uint)GetArgumentsOnRegsCount())
{
return index;
}
throw new ArgumentOutOfRangeException(nameof(index));
}
public static int GetIntReturnRegister()
{
return 0;
}
public static int GetIntReturnRegisterHigh()
{
return 1;
}
public static int GetVecReturnRegister()
{
return 0;
}
}
}

View File

@ -0,0 +1,173 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Numerics;
namespace ARMeilleure.CodeGen.Arm64
{
static class CodeGenCommon
{
public const int TcAddressRegister = 8;
public const int ReservedRegister = 17;
public static bool ConstFitsOnSImm7(int value, int scale)
{
return (((value >> scale) << 25) >> (25 - scale)) == value;
}
public static bool ConstFitsOnSImm9(int value)
{
return ((value << 23) >> 23) == value;
}
public static bool ConstFitsOnUImm12(int value)
{
return (value & 0xfff) == value;
}
public static bool ConstFitsOnUImm12(int value, OperandType type)
{
int scale = Assembler.GetScaleForType(type);
return (((value >> scale) & 0xfff) << scale) == value;
}
public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
{
ulong value = operand.Value;
if (operand.Type == OperandType.I32)
{
value |= value << 32;
}
return TryEncodeBitMask(value, out immN, out immS, out immR);
}
public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
{
// Some special values also can't be encoded:
// 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
// A value with all bits set can't be encoded because it is reserved according to the spec, because:
// Any value AND all ones will be equal itself, so it's effectively a no-op.
// Any value OR all ones will be equal all ones, so one can just use MOV.
// Any value XOR all ones will be equal its inverse, so one can just use MVN.
if (value == ulong.MaxValue)
{
immN = 0;
immS = 0;
immR = 0;
return false;
}
int bitLength = CountSequence(value);
if ((value >> bitLength) != 0)
{
bitLength += CountSequence(value >> bitLength);
}
int bitLengthLog2 = BitOperations.Log2((uint)bitLength);
int bitLengthPow2 = 1 << bitLengthLog2;
if (bitLengthPow2 < bitLength)
{
bitLengthLog2++;
bitLengthPow2 <<= 1;
}
int selectedESize = 64;
int repetitions = 1;
int onesCount = BitOperations.PopCount(value);
if (bitLengthPow2 < 64 && (value >> bitLengthPow2) != 0)
{
for (int eSizeLog2 = bitLengthLog2; eSizeLog2 < 6; eSizeLog2++)
{
bool match = true;
int eSize = 1 << eSizeLog2;
ulong mask = (1UL << eSize) - 1;
ulong eValue = value & mask;
for (int e = 1; e < 64 / eSize; e++)
{
if (((value >> (e * eSize)) & mask) != eValue)
{
match = false;
break;
}
}
if (match)
{
selectedESize = eSize;
repetitions = 64 / eSize;
onesCount = BitOperations.PopCount(eValue);
break;
}
}
}
// Find rotation. We have two cases, one where the highest bit is 0
// and one where it is 1.
// If it's 1, we just need to count the number of 1 bits on the MSB to find the right rotation.
// If it's 0, we just need to count the number of 0 bits on the LSB to find the left rotation,
// then we can convert it to the right rotation shift by subtracting the value from the element size.
int rotation;
long vHigh = (long)(value << (64 - selectedESize));
if (vHigh < 0)
{
rotation = BitOperations.LeadingZeroCount(~(ulong)vHigh);
}
else
{
rotation = (selectedESize - BitOperations.TrailingZeroCount(value)) & (selectedESize - 1);
}
// Reconstruct value and see if it matches. If not, we can't encode.
ulong reconstructed = onesCount == 64 ? ulong.MaxValue : RotateRight((1UL << onesCount) - 1, rotation, selectedESize);
for (int bit = 32; bit >= selectedESize; bit >>= 1)
{
reconstructed |= reconstructed << bit;
}
if (reconstructed != value || onesCount == 0)
{
immN = 0;
immS = 0;
immR = 0;
return false;
}
immR = rotation;
// immN indicates that there are no repetitions.
// The MSB of immS indicates the amount of repetitions, and the LSB the number of bits set.
if (repetitions == 1)
{
immN = 1;
immS = 0;
}
else
{
immN = 0;
immS = (0xf80 >> BitOperations.Log2((uint)repetitions)) & 0x3f;
}
immS |= onesCount - 1;
return true;
}
private static int CountSequence(ulong value)
{
return BitOperations.TrailingZeroCount(value) + BitOperations.TrailingZeroCount(~value);
}
private static ulong RotateRight(ulong bits, int shift, int size)
{
return (bits >> shift) | ((bits << (size - shift)) & (size == 64 ? ulong.MaxValue : (1UL << size) - 1));
}
}
}

View File

@ -0,0 +1,286 @@
using ARMeilleure.CodeGen.Linking;
using ARMeilleure.CodeGen.RegisterAllocators;
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.IO;
namespace ARMeilleure.CodeGen.Arm64
{
class CodeGenContext
{
private const int BccInstLength = 4;
private const int CbnzInstLength = 4;
private const int LdrLitInstLength = 4;
private Stream _stream;
public int StreamOffset => (int)_stream.Length;
public AllocationResult AllocResult { get; }
public Assembler Assembler { get; }
public BasicBlock CurrBlock { get; private set; }
public bool HasCall { get; }
public int CallArgsRegionSize { get; }
public int FpLrSaveRegionSize { get; }
private readonly Dictionary<BasicBlock, long> _visitedBlocks;
private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
private struct ConstantPoolEntry
{
public readonly int Offset;
public readonly Symbol Symbol;
public readonly List<(Operand, int)> LdrOffsets;
public ConstantPoolEntry(int offset, Symbol symbol)
{
Offset = offset;
Symbol = symbol;
LdrOffsets = new List<(Operand, int)>();
}
}
private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
private bool _constantPoolWritten;
private long _constantPoolOffset;
private ArmCondition _jNearCondition;
private Operand _jNearValue;
private long _jNearPosition;
private readonly bool _relocatable;
public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
{
_stream = new MemoryStream();
AllocResult = allocResult;
Assembler = new Assembler(_stream);
bool hasCall = maxCallArgs >= 0;
HasCall = hasCall;
if (maxCallArgs < 0)
{
maxCallArgs = 0;
}
CallArgsRegionSize = maxCallArgs * 16;
FpLrSaveRegionSize = hasCall ? 16 : 0;
_visitedBlocks = new Dictionary<BasicBlock, long>();
_pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
_constantPool = new Dictionary<ulong, ConstantPoolEntry>();
_relocatable = relocatable;
}
public void EnterBlock(BasicBlock block)
{
CurrBlock = block;
long target = _stream.Position;
if (_pendingBranches.TryGetValue(block, out var list))
{
foreach (var tuple in list)
{
_stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
WriteBranch(tuple.Condition, target);
}
_stream.Seek(target, SeekOrigin.Begin);
_pendingBranches.Remove(block);
}
_visitedBlocks.Add(block, target);
}
public void JumpTo(BasicBlock target)
{
JumpTo(ArmCondition.Al, target);
}
public void JumpTo(ArmCondition condition, BasicBlock target)
{
if (_visitedBlocks.TryGetValue(target, out long offset))
{
WriteBranch(condition, offset);
}
else
{
if (!_pendingBranches.TryGetValue(target, out var list))
{
list = new List<(ArmCondition, long)>();
_pendingBranches.Add(target, list);
}
list.Add((condition, _stream.Position));
_stream.Seek(BccInstLength, SeekOrigin.Current);
}
}
private void WriteBranch(ArmCondition condition, long to)
{
int imm = checked((int)(to - _stream.Position));
if (condition != ArmCondition.Al)
{
Assembler.B(condition, imm);
}
else
{
Assembler.B(imm);
}
}
public void JumpToNear(ArmCondition condition)
{
_jNearCondition = condition;
_jNearPosition = _stream.Position;
_stream.Seek(BccInstLength, SeekOrigin.Current);
}
public void JumpToNearIfNotZero(Operand value)
{
_jNearValue = value;
_jNearPosition = _stream.Position;
_stream.Seek(CbnzInstLength, SeekOrigin.Current);
}
public void JumpHere()
{
long currentPosition = _stream.Position;
long offset = currentPosition - _jNearPosition;
_stream.Seek(_jNearPosition, SeekOrigin.Begin);
if (_jNearValue != default)
{
Assembler.Cbnz(_jNearValue, checked((int)offset));
_jNearValue = default;
}
else
{
Assembler.B(_jNearCondition, checked((int)offset));
}
_stream.Seek(currentPosition, SeekOrigin.Begin);
}
public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
{
if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
{
cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
_constantPool.Add(value, cpe);
}
cpe.LdrOffsets.Add((rt, (int)_stream.Position));
_stream.Seek(LdrLitInstLength, SeekOrigin.Current);
}
private long WriteConstantPool()
{
if (_constantPoolWritten)
{
return _constantPoolOffset;
}
long constantPoolBaseOffset = _stream.Position;
foreach (ulong value in _constantPool.Keys)
{
WriteUInt64(value);
}
foreach (ConstantPoolEntry cpe in _constantPool.Values)
{
foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
{
_stream.Seek(ldrOffset, SeekOrigin.Begin);
int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
int pcRelativeOffset = absoluteOffset - ldrOffset;
Assembler.LdrLit(rt, pcRelativeOffset);
}
}
_stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
_constantPoolOffset = constantPoolBaseOffset;
_constantPoolWritten = true;
return constantPoolBaseOffset;
}
public (byte[], RelocInfo) GetCode()
{
long constantPoolBaseOffset = WriteConstantPool();
byte[] code = new byte[_stream.Length];
long originalPosition = _stream.Position;
_stream.Seek(0, SeekOrigin.Begin);
_stream.Read(code, 0, code.Length);
_stream.Seek(originalPosition, SeekOrigin.Begin);
RelocInfo relocInfo;
if (_relocatable)
{
RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
int index = 0;
foreach (ConstantPoolEntry cpe in _constantPool.Values)
{
if (cpe.Symbol.Type != SymbolType.None)
{
int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
}
}
if (index != relocs.Length)
{
Array.Resize(ref relocs, index);
}
relocInfo = new RelocInfo(relocs);
}
else
{
relocInfo = new RelocInfo(new RelocEntry[0]);
}
return (code, relocInfo);
}
private void WriteUInt64(ulong value)
{
_stream.WriteByte((byte)(value >> 0));
_stream.WriteByte((byte)(value >> 8));
_stream.WriteByte((byte)(value >> 16));
_stream.WriteByte((byte)(value >> 24));
_stream.WriteByte((byte)(value >> 32));
_stream.WriteByte((byte)(value >> 40));
_stream.WriteByte((byte)(value >> 48));
_stream.WriteByte((byte)(value >> 56));
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,662 @@
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Diagnostics;
namespace ARMeilleure.CodeGen.Arm64
{
static class CodeGeneratorIntrinsic
{
public static void GenerateOperation(CodeGenContext context, Operation operation)
{
Intrinsic intrin = operation.Intrinsic;
IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
switch (info.Type)
{
case IntrinsicType.ScalarUnary:
GenerateVectorUnary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarUnaryByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorUnaryByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarBinary:
GenerateVectorBinary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryFPByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryRd:
GenerateVectorUnary(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1));
break;
case IntrinsicType.ScalarBinaryShl:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarBinaryShr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPCompare:
GenerateScalarFPCompare(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.ScalarFPConvFixed:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPConvFixedGpr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateScalarFPConvGpr(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.ScalarFPConvGpr:
GenerateScalarFPConvGpr(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.ScalarTernary:
GenerateScalarTernary(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2),
operation.GetSource(0));
break;
case IntrinsicType.ScalarTernaryFPRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.ScalarTernaryShlRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.ScalarTernaryShrRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
0,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.VectorUnary:
GenerateVectorUnary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.VectorUnaryByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorUnaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.VectorBinary:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryBitwise:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryFPByElem:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(2).AsInt32(),
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryRd:
GenerateVectorUnary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1));
break;
case IntrinsicType.VectorBinaryShl:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorBinaryShr:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorFPConvFixed:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
info.Inst,
operation.Destination,
operation.GetSource(0),
(uint)operation.GetSource(1).AsInt32());
break;
case IntrinsicType.VectorInsertByElem:
Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorInsertByElem(
context,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
(uint)operation.GetSource(1).AsInt32(),
operation.Destination,
operation.GetSource(2));
break;
case IntrinsicType.VectorLookupTable:
Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
for (int i = 1; i < operation.SourcesCount - 1; i++)
{
Register currReg = operation.GetSource(i).GetRegister();
Register prevReg = operation.GetSource(i - 1).GetRegister();
Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
}
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
operation.Destination,
operation.GetSource(0),
operation.GetSource(operation.SourcesCount - 1));
break;
case IntrinsicType.VectorTernaryFPRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryFPByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRd:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRdBitwise:
GenerateVectorBinary(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryRdByElem:
Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
GenerateVectorBinaryByElem(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
(uint)operation.GetSource(3).AsInt32(),
operation.Destination,
operation.GetSource(1),
operation.GetSource(2));
break;
case IntrinsicType.VectorTernaryShlRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShlImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.VectorTernaryShrRd:
Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
GenerateVectorBinaryShrImm(
context,
(uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
(uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
info.Inst,
operation.Destination,
operation.GetSource(1),
(uint)operation.GetSource(2).AsInt32());
break;
case IntrinsicType.GetRegister:
context.Assembler.WriteInstruction(info.Inst, operation.Destination);
break;
case IntrinsicType.SetRegister:
context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
break;
default:
throw new NotImplementedException(info.Type.ToString());
}
}
private static void GenerateScalarFPCompare(
CodeGenContext context,
uint sz,
uint instruction,
Operand dest,
Operand rn,
Operand rm)
{
instruction |= (sz << 22);
if (rm.Kind == OperandKind.Constant && rm.Value == 0)
{
instruction |= 0b1000;
rm = rn;
}
context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
}
private static void GenerateScalarFPConvGpr(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn)
{
instruction |= (sz << 22);
if (rd.Type.IsInteger())
{
context.Assembler.WriteInstructionAuto(instruction, rd, rn);
}
else
{
if (rn.Type == OperandType.I64)
{
instruction |= Assembler.SfFlag;
}
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
private static void GenerateScalarFPConvGpr(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint fBits)
{
Debug.Assert(fBits <= 64);
instruction |= (sz << 22);
instruction |= (64 - fBits) << 10;
if (rd.Type.IsInteger())
{
Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
context.Assembler.WriteInstructionAuto(instruction, rd, rn);
}
else
{
if (rn.Type == OperandType.I64)
{
instruction |= Assembler.SfFlag;
}
else
{
Debug.Assert(fBits <= 32);
}
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
private static void GenerateScalarTernary(
CodeGenContext context,
uint sz,
uint instruction,
Operand rd,
Operand rn,
Operand rm,
Operand ra)
{
instruction |= (sz << 22);
context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
}
private static void GenerateVectorUnary(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn)
{
instruction |= (q << 30) | (sz << 22);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorUnaryByElem(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn)
{
uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
instruction |= (q << 30) | (imm5 << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorBinary(
CodeGenContext context,
uint q,
uint instruction,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30);
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinary(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (sz << 22);
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryByElem(
CodeGenContext context,
uint q,
uint size,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (size << 22);
if (size == 2)
{
instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
}
else
{
instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
}
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryFPByElem(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
uint srcIndex,
Operand rd,
Operand rn,
Operand rm)
{
instruction |= (q << 30) | (sz << 22);
if (sz != 0)
{
instruction |= (srcIndex & 1) << 11;
}
else
{
instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
}
context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
}
private static void GenerateVectorBinaryShlImm(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint shift)
{
instruction |= (q << 30);
Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
instruction |= (imm << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorBinaryShrImm(
CodeGenContext context,
uint q,
uint sz,
uint instruction,
Operand rd,
Operand rn,
uint shift)
{
instruction |= (q << 30);
Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
instruction |= (imm << 16);
context.Assembler.WriteInstruction(instruction, rd, rn);
}
private static void GenerateVectorInsertByElem(
CodeGenContext context,
uint sz,
uint instruction,
uint srcIndex,
uint dstIndex,
Operand rd,
Operand rn)
{
uint imm4 = srcIndex << (int)sz;
uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
instruction |= imm4 << 11;
instruction |= imm5 << 16;
context.Assembler.WriteInstruction(instruction, rd, rn);
}
}
}

View File

@ -0,0 +1,185 @@
using System;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Versioning;
namespace ARMeilleure.CodeGen.Arm64
{
static partial class HardwareCapabilities
{
static HardwareCapabilities()
{
if (!ArmBase.Arm64.IsSupported)
{
return;
}
if (OperatingSystem.IsLinux())
{
LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
}
if (OperatingSystem.IsMacOS())
{
for (int i = 0; i < _sysctlNames.Length; i++)
{
if (CheckSysctlName(_sysctlNames[i]))
{
MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
}
}
}
}
#region Linux
private const ulong AT_HWCAP = 16;
private const ulong AT_HWCAP2 = 26;
[LibraryImport("libc", SetLastError = true)]
private static partial ulong getauxval(ulong type);
[Flags]
public enum LinuxFeatureFlagsHwCap : ulong
{
Fp = 1 << 0,
Asimd = 1 << 1,
Evtstrm = 1 << 2,
Aes = 1 << 3,
Pmull = 1 << 4,
Sha1 = 1 << 5,
Sha2 = 1 << 6,
Crc32 = 1 << 7,
Atomics = 1 << 8,
FpHp = 1 << 9,
AsimdHp = 1 << 10,
CpuId = 1 << 11,
AsimdRdm = 1 << 12,
Jscvt = 1 << 13,
Fcma = 1 << 14,
Lrcpc = 1 << 15,
DcpOp = 1 << 16,
Sha3 = 1 << 17,
Sm3 = 1 << 18,
Sm4 = 1 << 19,
AsimdDp = 1 << 20,
Sha512 = 1 << 21,
Sve = 1 << 22,
AsimdFhm = 1 << 23,
Dit = 1 << 24,
Uscat = 1 << 25,
Ilrcpc = 1 << 26,
FlagM = 1 << 27,
Ssbs = 1 << 28,
Sb = 1 << 29,
Paca = 1 << 30,
Pacg = 1UL << 31
}
[Flags]
public enum LinuxFeatureFlagsHwCap2 : ulong
{
Dcpodp = 1 << 0,
Sve2 = 1 << 1,
SveAes = 1 << 2,
SvePmull = 1 << 3,
SveBitperm = 1 << 4,
SveSha3 = 1 << 5,
SveSm4 = 1 << 6,
FlagM2 = 1 << 7,
Frint = 1 << 8,
SveI8mm = 1 << 9,
SveF32mm = 1 << 10,
SveF64mm = 1 << 11,
SveBf16 = 1 << 12,
I8mm = 1 << 13,
Bf16 = 1 << 14,
Dgh = 1 << 15,
Rng = 1 << 16,
Bti = 1 << 17,
Mte = 1 << 18,
Ecv = 1 << 19,
Afp = 1 << 20,
Rpres = 1 << 21,
Mte3 = 1 << 22,
Sme = 1 << 23,
Sme_i16i64 = 1 << 24,
Sme_f64f64 = 1 << 25,
Sme_i8i32 = 1 << 26,
Sme_f16f32 = 1 << 27,
Sme_b16f32 = 1 << 28,
Sme_f32f32 = 1 << 29,
Sme_fa64 = 1 << 30,
Wfxt = 1UL << 31,
Ebf16 = 1UL << 32,
Sve_Ebf16 = 1UL << 33,
Cssc = 1UL << 34,
Rprfm = 1UL << 35,
Sve2p1 = 1UL << 36
}
public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
#endregion
#region macOS
[LibraryImport("libSystem.dylib", SetLastError = true)]
private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
[SupportedOSPlatform("macos")]
private static bool CheckSysctlName(string name)
{
ulong size = sizeof(int);
if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
{
return val != 0;
}
return false;
}
private static string[] _sysctlNames = new string[]
{
"hw.optional.floatingpoint",
"hw.optional.AdvSIMD",
"hw.optional.arm.FEAT_FP16",
"hw.optional.arm.FEAT_AES",
"hw.optional.arm.FEAT_PMULL",
"hw.optional.arm.FEAT_LSE",
"hw.optional.armv8_crc32",
"hw.optional.arm.FEAT_SHA1",
"hw.optional.arm.FEAT_SHA256"
};
[Flags]
public enum MacOsFeatureFlags
{
Fp = 1 << 0,
AdvSimd = 1 << 1,
Fp16 = 1 << 2,
Aes = 1 << 3,
Pmull = 1 << 4,
Lse = 1 << 5,
Crc32 = 1 << 6,
Sha1 = 1 << 7,
Sha256 = 1 << 8
}
public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
#endregion
public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
}
}

View File

@ -0,0 +1,14 @@
namespace ARMeilleure.CodeGen.Arm64
{
struct IntrinsicInfo
{
public uint Inst { get; }
public IntrinsicType Type { get; }
public IntrinsicInfo(uint inst, IntrinsicType type)
{
Inst = inst;
Type = type;
}
}
}

View File

@ -0,0 +1,461 @@
using ARMeilleure.Common;
using ARMeilleure.IntermediateRepresentation;
namespace ARMeilleure.CodeGen.Arm64
{
static class IntrinsicTable
{
private static IntrinsicInfo[] _intrinTable;
static IntrinsicTable()
{
_intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
}
private static void Add(Intrinsic intrin, IntrinsicInfo info)
{
_intrinTable[(int)intrin] = info;
}
public static IntrinsicInfo GetInfo(Intrinsic intrin)
{
return _intrinTable[(int)intrin];
}
}
}

View File

@ -0,0 +1,59 @@
namespace ARMeilleure.CodeGen.Arm64
{
enum IntrinsicType
{
ScalarUnary,
ScalarUnaryByElem,
ScalarBinary,
ScalarBinaryByElem,
ScalarBinaryFPByElem,
ScalarBinaryRd,
ScalarBinaryShl,
ScalarBinaryShr,
ScalarFcsel,
ScalarFmovi,
ScalarFPCompare,
ScalarFPCompareCond,
ScalarFPConv,
ScalarFPConvFixed,
ScalarFPConvFixedGpr,
ScalarFPConvGpr,
ScalarTernary,
ScalarTernaryFPRdByElem,
ScalarTernaryShlRd,
ScalarTernaryShrRd,
VectorUnary,
VectorUnaryBitwise,
VectorUnaryByElem,
VectorBinary,
VectorBinaryBitwise,
VectorBinaryBitwiseImm,
VectorBinaryByElem,
VectorBinaryFPByElem,
VectorBinaryRd,
VectorBinaryShl,
VectorBinaryShr,
VectorExt,
VectorFmovi,
VectorFPConvFixed,
VectorInsertByElem,
VectorLdSt,
VectorLdStSs,
VectorLookupTable,
VectorMovi,
VectorMvni,
VectorTernaryFPRdByElem,
VectorTernaryRd,
VectorTernaryRdBitwise,
VectorTernaryRdByElem,
VectorTernaryShlRd,
VectorTernaryShrRd,
Vector128Unary,
Vector128Binary,
GetRegister,
SetRegister
}
}

View File

@ -0,0 +1,940 @@
using ARMeilleure.CodeGen.RegisterAllocators;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
namespace ARMeilleure.CodeGen.Arm64
{
class PreAllocator
{
private class ConstantDict
{
private readonly Dictionary<(ulong, OperandType), Operand> _constants;
public ConstantDict()
{
_constants = new Dictionary<(ulong, OperandType), Operand>();
}
public void Add(ulong value, OperandType type, Operand local)
{
_constants.Add((value, type), local);
}
public bool TryGetValue(ulong value, OperandType type, out Operand local)
{
return _constants.TryGetValue((value, type), out local);
}
}
public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
{
maxCallArgs = -1;
Span<Operation> buffer = default;
Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
{
ConstantDict constants = new ConstantDict();
Operation nextNode;
for (Operation node = block.Operations.First; node != default; node = nextNode)
{
nextNode = node.ListNext;
if (node.Instruction == Instruction.Phi)
{
continue;
}
HandleConstantRegCopy(constants, block.Operations, node);
HandleDestructiveRegCopy(block.Operations, node);
switch (node.Instruction)
{
case Instruction.Call:
// Get the maximum number of arguments used on a call.
// On windows, when a struct is returned from the call,
// we also need to pass the pointer where the struct
// should be written on the first argument.
int argsCount = node.SourcesCount - 1;
if (node.Destination != default && node.Destination.Type == OperandType.V128)
{
argsCount++;
}
if (maxCallArgs < argsCount)
{
maxCallArgs = argsCount;
}
// Copy values to registers expected by the function
// being called, as mandated by the ABI.
HandleCall(constants, block.Operations, node);
break;
case Instruction.CompareAndSwap:
case Instruction.CompareAndSwap16:
case Instruction.CompareAndSwap8:
nextNode = HandleCompareAndSwap(block.Operations, node);
break;
case Instruction.LoadArgument:
nextNode = HandleLoadArgument(cctx, ref buffer, block.Operations, preservedArgs, node);
break;
case Instruction.Return:
HandleReturn(block.Operations, node);
break;
case Instruction.Tailcall:
HandleTailcall(constants, block.Operations, stackAlloc, node, node);
break;
}
}
}
}
private static void HandleConstantRegCopy(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
{
if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
{
return;
}
Instruction inst = node.Instruction;
Operand src1 = node.GetSource(0);
Operand src2;
if (src1.Kind == OperandKind.Constant)
{
if (!src1.Type.IsInteger())
{
// Handle non-integer types (FP32, FP64 and V128).
// For instructions without an immediate operand, we do the following:
// - Insert a copy with the constant value (as integer) to a GPR.
// - Insert a copy from the GPR to a XMM register.
// - Replace the constant use with the XMM register.
src1 = AddFloatConstantCopy(constants, nodes, node, src1);
node.SetSource(0, src1);
}
else if (!HasConstSrc1(node, src1.Value))
{
// Handle integer types.
// Most ALU instructions accepts a 32-bits immediate on the second operand.
// We need to ensure the following:
// - If the constant is on operand 1, we need to move it.
// -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
// -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
// - If the constant is on operand 2, we check if the instruction supports it,
// if not, we also add a copy. 64-bits constants are usually not supported.
if (IsCommutative(node))
{
src2 = node.GetSource(1);
Operand temp = src1;
src1 = src2;
src2 = temp;
node.SetSource(0, src1);
node.SetSource(1, src2);
}
if (src1.Kind == OperandKind.Constant)
{
src1 = AddIntConstantCopy(constants, nodes, node, src1);
node.SetSource(0, src1);
}
}
}
if (node.SourcesCount < 2)
{
return;
}
src2 = node.GetSource(1);
if (src2.Kind == OperandKind.Constant)
{
if (!src2.Type.IsInteger())
{
src2 = AddFloatConstantCopy(constants, nodes, node, src2);
node.SetSource(1, src2);
}
else if (!HasConstSrc2(inst, src2))
{
src2 = AddIntConstantCopy(constants, nodes, node, src2);
node.SetSource(1, src2);
}
}
if (node.SourcesCount < 3 ||
node.Instruction == Instruction.BranchIf ||
node.Instruction == Instruction.Compare ||
node.Instruction == Instruction.VectorInsert ||
node.Instruction == Instruction.VectorInsert16 ||
node.Instruction == Instruction.VectorInsert8)
{
return;
}
for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
{
Operand src = node.GetSource(srcIndex);
if (src.Kind == OperandKind.Constant)
{
if (!src.Type.IsInteger())
{
src = AddFloatConstantCopy(constants, nodes, node, src);
node.SetSource(srcIndex, src);
}
else
{
src = AddIntConstantCopy(constants, nodes, node, src);
node.SetSource(srcIndex, src);
}
}
}
}
private static void HandleDestructiveRegCopy(IntrusiveList<Operation> nodes, Operation node)
{
if (node.Destination == default || node.SourcesCount == 0)
{
return;
}
Operand dest = node.Destination;
Operand src1 = node.GetSource(0);
if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
{
bool useNewLocal = false;
for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
{
if (node.GetSource(srcIndex) == dest)
{
useNewLocal = true;
break;
}
}
if (useNewLocal)
{
// Dest is being used as some source already, we need to use a new
// local to store the temporary value, otherwise the value on dest
// local would be overwritten.
Operand temp = Local(dest.Type);
nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
node.SetSource(0, temp);
nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
node.Destination = temp;
}
else
{
nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
node.SetSource(0, dest);
}
}
}
private static void HandleCall(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
{
Operation operation = node;
Operand dest = operation.Destination;
List<Operand> sources = new List<Operand>
{
operation.GetSource(0)
};
int argsCount = operation.SourcesCount - 1;
int intMax = CallingConvention.GetArgumentsOnRegsCount();
int vecMax = CallingConvention.GetArgumentsOnRegsCount();
int intCount = 0;
int vecCount = 0;
int stackOffset = 0;
for (int index = 0; index < argsCount; index++)
{
Operand source = operation.GetSource(index + 1);
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount < intMax;
}
else if (source.Type == OperandType.V128)
{
passOnReg = intCount + 1 < intMax;
}
else
{
passOnReg = vecCount < vecMax;
}
if (source.Type == OperandType.V128 && passOnReg)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
continue;
}
if (passOnReg)
{
Operand argReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
Operation copyOp = Operation(Instruction.Copy, argReg, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
sources.Add(argReg);
}
else
{
Operand offset = Const(stackOffset);
Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, spillOp));
stackOffset += source.Type.GetSizeInBytes();
}
}
if (dest != default)
{
if (dest.Type == OperandType.V128)
{
Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
operation.Destination = default;
}
else
{
Operand retReg = dest.Type.IsInteger()
? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
: Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
Operation copyOp = Operation(Instruction.Copy, dest, retReg);
nodes.AddAfter(node, copyOp);
operation.Destination = retReg;
}
}
operation.SetSources(sources.ToArray());
}
private static void HandleTailcall(
ConstantDict constants,
IntrusiveList<Operation> nodes,
StackAllocator stackAlloc,
Operation node,
Operation operation)
{
List<Operand> sources = new List<Operand>
{
operation.GetSource(0)
};
int argsCount = operation.SourcesCount - 1;
int intMax = CallingConvention.GetArgumentsOnRegsCount();
int vecMax = CallingConvention.GetArgumentsOnRegsCount();
int intCount = 0;
int vecCount = 0;
// Handle arguments passed on registers.
for (int index = 0; index < argsCount; index++)
{
Operand source = operation.GetSource(1 + index);
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount + 1 < intMax;
}
else
{
passOnReg = vecCount < vecMax;
}
if (source.Type == OperandType.V128 && passOnReg)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
continue;
}
if (passOnReg)
{
Operand argReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
Operation copyOp = Operation(Instruction.Copy, argReg, source);
HandleConstantRegCopy(constants, nodes, nodes.AddBefore(node, copyOp));
sources.Add(argReg);
}
else
{
throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
}
}
// The target address must be on the return registers, since we
// don't return anything and it is guaranteed to not be a
// callee saved register (which would be trashed on the epilogue).
Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
nodes.AddBefore(node, addrCopyOp);
sources[0] = tcAddress;
operation.SetSources(sources.ToArray());
}
private static Operation HandleCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
{
Operand expected = node.GetSource(1);
if (expected.Type == OperandType.V128)
{
Operand dest = node.Destination;
Operand expectedLow = Local(OperandType.I64);
Operand expectedHigh = Local(OperandType.I64);
Operand desiredLow = Local(OperandType.I64);
Operand desiredHigh = Local(OperandType.I64);
Operand actualLow = Local(OperandType.I64);
Operand actualHigh = Local(OperandType.I64);
Operand address = node.GetSource(0);
Operand desired = node.GetSource(2);
void SplitOperand(Operand source, Operand low, Operand high)
{
nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
}
SplitOperand(expected, expectedLow, expectedHigh);
SplitOperand(desired, desiredLow, desiredHigh);
Operation operation = node;
// Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
// We also need a additional registers that will be used to store temporary information.
operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
// Add some dummy uses of the input operands, as the CAS operation will be a loop,
// so they can't be used as destination operand.
for (int i = 0; i < operation.SourcesCount; i++)
{
Operand src = operation.GetSource(i);
node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
}
// Assemble the vector with the 64-bit values at the given memory location.
node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
}
else
{
// We need a additional register where the store result will be written to.
node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
// Add some dummy uses of the input operands, as the CAS operation will be a loop,
// so they can't be used as destination operand.
Operation operation = node;
for (int i = 0; i < operation.SourcesCount; i++)
{
Operand src = operation.GetSource(i);
node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
}
}
return node.ListNext;
}
private static void HandleReturn(IntrusiveList<Operation> nodes, Operation node)
{
if (node.SourcesCount == 0)
{
return;
}
Operand source = node.GetSource(0);
if (source.Type == OperandType.V128)
{
Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
}
else
{
Operand retReg = source.Type.IsInteger()
? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
: Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
nodes.AddBefore(node, retCopyOp);
}
}
private static Operation HandleLoadArgument(
CompilerContext cctx,
ref Span<Operation> buffer,
IntrusiveList<Operation> nodes,
Operand[] preservedArgs,
Operation node)
{
Operand source = node.GetSource(0);
Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
int index = source.AsInt32();
int intCount = 0;
int vecCount = 0;
for (int cIndex = 0; cIndex < index; cIndex++)
{
OperandType argType = cctx.FuncArgTypes[cIndex];
if (argType.IsInteger())
{
intCount++;
}
else if (argType == OperandType.V128)
{
intCount += 2;
}
else
{
vecCount++;
}
}
bool passOnReg;
if (source.Type.IsInteger())
{
passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
}
else if (source.Type == OperandType.V128)
{
passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
}
else
{
passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
}
if (passOnReg)
{
Operand dest = node.Destination;
if (preservedArgs[index] == default)
{
if (dest.Type == OperandType.V128)
{
// V128 is a struct, we pass each half on a GPR if possible.
Operand pArg = Local(OperandType.V128);
Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
cctx.Cfg.Entry.Operations.AddFirst(copyH);
cctx.Cfg.Entry.Operations.AddFirst(copyL);
preservedArgs[index] = pArg;
}
else
{
Operand pArg = Local(dest.Type);
Operand argReg = dest.Type.IsInteger()
? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
: Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
cctx.Cfg.Entry.Operations.AddFirst(copyOp);
preservedArgs[index] = pArg;
}
}
Operation nextNode;
if (dest.AssignmentsCount == 1)
{
// Let's propagate the argument if we can to avoid copies.
Propagate(ref buffer, dest, preservedArgs[index]);
nextNode = node.ListNext;
}
else
{
Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
nextNode = nodes.AddBefore(node, argCopyOp);
}
Delete(nodes, node);
return nextNode;
}
else
{
// TODO: Pass on stack.
return node;
}
}
private static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
{
ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
{
for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
{
Operand useSrc = use.GetSource(srcIndex);
if (useSrc == dest)
{
use.SetSource(srcIndex, value);
}
else if (useSrc.Kind == OperandKind.Memory)
{
MemoryOperand memoryOp = useSrc.GetMemory();
Operand baseAddr = memoryOp.BaseAddress;
Operand index = memoryOp.Index;
bool changed = false;
if (baseAddr == dest)
{
baseAddr = value;
changed = true;
}
if (index == dest)
{
index = value;
changed = true;
}
if (changed)
{
use.SetSource(srcIndex, MemoryOp(
useSrc.Type,
baseAddr,
index,
memoryOp.Scale,
memoryOp.Displacement));
}
}
}
}
}
private static Operand AddFloatConstantCopy(
ConstantDict constants,
IntrusiveList<Operation> nodes,
Operation node,
Operand source)
{
Operand temp = Local(source.Type);
Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
nodes.AddBefore(node, copyOp);
return temp;
}
private static Operand AddIntConstantCopy(
ConstantDict constants,
IntrusiveList<Operation> nodes,
Operation node,
Operand source)
{
if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
{
return temp;
}
temp = Local(source.Type);
Operation copyOp = Operation(Instruction.Copy, temp, source);
nodes.AddBefore(node, copyOp);
constants.Add(source.Value, source.Type, temp);
return temp;
}
private static Operand GetIntConst(Operand value)
{
if (value.Type == OperandType.FP32)
{
return Const(value.AsInt32());
}
else if (value.Type == OperandType.FP64)
{
return Const(value.AsInt64());
}
return value;
}
private static void Delete(IntrusiveList<Operation> nodes, Operation node)
{
node.Destination = default;
for (int index = 0; index < node.SourcesCount; index++)
{
node.SetSource(index, default);
}
nodes.Remove(node);
}
private static Operand Gpr(int register, OperandType type)
{
return Register(register, RegisterType.Integer, type);
}
private static Operand Xmm(int register, OperandType type)
{
return Register(register, RegisterType.Vector, type);
}
private static bool IsSameOperandDestSrc1(Operation operation)
{
switch (operation.Instruction)
{
case Instruction.Extended:
return IsSameOperandDestSrc1(operation.Intrinsic);
case Instruction.VectorInsert:
case Instruction.VectorInsert16:
case Instruction.VectorInsert8:
return true;
}
return false;
}
private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
{
IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
return info.Type == IntrinsicType.ScalarBinaryRd ||
info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
info.Type == IntrinsicType.ScalarTernaryShlRd ||
info.Type == IntrinsicType.ScalarTernaryShrRd ||
info.Type == IntrinsicType.VectorBinaryRd ||
info.Type == IntrinsicType.VectorInsertByElem ||
info.Type == IntrinsicType.VectorTernaryRd ||
info.Type == IntrinsicType.VectorTernaryRdBitwise ||
info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
info.Type == IntrinsicType.VectorTernaryRdByElem ||
info.Type == IntrinsicType.VectorTernaryShlRd ||
info.Type == IntrinsicType.VectorTernaryShrRd;
}
private static bool HasConstSrc1(Operation node, ulong value)
{
switch (node.Instruction)
{
case Instruction.Add:
case Instruction.BranchIf:
case Instruction.Compare:
case Instruction.Subtract:
// The immediate encoding of those instructions does not allow Rn to be
// XZR (it will be SP instead), so we can't allow a Rn constant in this case.
return value == 0 && NotConstOrConst0(node.GetSource(1));
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseNot:
case Instruction.BitwiseOr:
case Instruction.ByteSwap:
case Instruction.CountLeadingZeros:
case Instruction.Multiply:
case Instruction.Negate:
case Instruction.RotateRight:
case Instruction.ShiftLeft:
case Instruction.ShiftRightSI:
case Instruction.ShiftRightUI:
return value == 0;
case Instruction.Copy:
case Instruction.LoadArgument:
case Instruction.Spill:
case Instruction.SpillArg:
return true;
case Instruction.Extended:
return value == 0;
}
return false;
}
private static bool NotConstOrConst0(Operand operand)
{
return operand.Kind != OperandKind.Constant || operand.Value == 0;
}
private static bool HasConstSrc2(Instruction inst, Operand operand)
{
ulong value = operand.Value;
switch (inst)
{
case Instruction.Add:
case Instruction.BranchIf:
case Instruction.Compare:
case Instruction.Subtract:
return ConstFitsOnUImm12Sh(value);
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseOr:
return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
case Instruction.Multiply:
case Instruction.Store:
case Instruction.Store16:
case Instruction.Store8:
return value == 0;
case Instruction.RotateRight:
case Instruction.ShiftLeft:
case Instruction.ShiftRightSI:
case Instruction.ShiftRightUI:
case Instruction.VectorExtract:
case Instruction.VectorExtract16:
case Instruction.VectorExtract8:
return true;
case Instruction.Extended:
// TODO: Check if actual intrinsic is supposed to have consts here?
// Right now we only hit this case for fixed-point int <-> FP conversion instructions.
return true;
}
return false;
}
private static bool IsCommutative(Operation operation)
{
switch (operation.Instruction)
{
case Instruction.Add:
case Instruction.BitwiseAnd:
case Instruction.BitwiseExclusiveOr:
case Instruction.BitwiseOr:
case Instruction.Multiply:
return true;
case Instruction.BranchIf:
case Instruction.Compare:
{
Operand comp = operation.GetSource(2);
Debug.Assert(comp.Kind == OperandKind.Constant);
var compType = (Comparison)comp.AsInt32();
return compType == Comparison.Equal || compType == Comparison.NotEqual;
}
}
return false;
}
private static bool ConstFitsOnUImm12Sh(ulong value)
{
return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
}
private static bool IsIntrinsicWithConst(Operation operation)
{
bool isIntrinsic = IsIntrinsic(operation.Instruction);
if (isIntrinsic)
{
Intrinsic intrinsic = operation.Intrinsic;
IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
// Those have integer inputs that don't support consts.
return info.Type != IntrinsicType.ScalarFPConvGpr &&
info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
info.Type != IntrinsicType.SetRegister;
}
return false;
}
private static bool IsIntrinsic(Instruction inst)
{
return inst == Instruction.Extended;
}
}
}

View File

@ -90,6 +90,47 @@ namespace ARMeilleure.CodeGen.Optimizations
}
break;
case Instruction.Compare:
if (type == OperandType.I32 &&
operation.GetSource(0).Type == type &&
operation.GetSource(1).Type == type)
{
switch ((Comparison)operation.GetSource(2).Value)
{
case Comparison.Equal:
EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
break;
case Comparison.NotEqual:
EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
break;
case Comparison.Greater:
EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
break;
case Comparison.LessOrEqual:
EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
break;
case Comparison.GreaterUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
break;
case Comparison.LessOrEqualUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
break;
case Comparison.GreaterOrEqual:
EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
break;
case Comparison.Less:
EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
break;
case Comparison.GreaterOrEqualUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
break;
case Comparison.LessUI:
EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
break;
}
}
break;
case Instruction.Copy:
if (type == OperandType.I32)
{

View File

@ -44,8 +44,8 @@ namespace ARMeilleure.CodeGen.Optimizations
ConstantFolding.RunPass(node);
Simplification.RunPass(node);
if (DestIsLocalVar(node))
{
if (DestIsSingleLocalVar(node))
{
if (IsPropagableCompare(node))
{
modified |= PropagateCompare(ref buffer, node);
@ -99,20 +99,6 @@ namespace ARMeilleure.CodeGen.Optimizations
while (modified);
}
private static Span<Operation> GetUses(ref Span<Operation> buffer, Operand operand)
{
ReadOnlySpan<Operation> uses = operand.Uses;
if (buffer.Length < uses.Length)
{
buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
}
uses.CopyTo(buffer);
return buffer.Slice(0, uses.Length);
}
private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
{
// Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
@ -160,7 +146,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Comparison compType = (Comparison)comp.AsInt32();
Span<Operation> uses = GetUses(ref buffer, dest);
Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
{
@ -199,7 +185,7 @@ namespace ARMeilleure.CodeGen.Optimizations
Operand dest = copyOp.Destination;
Operand source = copyOp.GetSource(0);
Span<Operation> uses = GetUses(ref buffer, dest);
Span<Operation> uses = dest.GetUses(ref buffer);
foreach (Operation use in uses)
{
@ -231,12 +217,12 @@ namespace ARMeilleure.CodeGen.Optimizations
private static bool IsUnused(Operation node)
{
return DestIsLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
}
private static bool DestIsLocalVar(Operation node)
private static bool DestIsSingleLocalVar(Operation node)
{
return node.Destination != default && node.Destination.Kind == OperandKind.LocalVariable;
return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
}
private static bool HasSideEffects(Operation node)

View File

@ -1,6 +1,6 @@
namespace ARMeilleure.CodeGen.RegisterAllocators
{
struct AllocationResult
readonly struct AllocationResult
{
public int IntUsedRegisters { get; }
public int VecUsedRegisters { get; }

View File

@ -11,7 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
private class ParallelCopy
{
private struct Copy
private readonly struct Copy
{
public Register Dest { get; }
public Register Source { get; }

View File

@ -11,7 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
class HybridAllocator : IRegisterAllocator
{
private struct BlockInfo
private readonly struct BlockInfo
{
public bool HasCall { get; }

View File

@ -17,8 +17,6 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private const int InstructionGap = 2;
private const int InstructionGapMask = InstructionGap - 1;
private const int RegistersCount = 16;
private HashSet<int> _blockEdges;
private LiveRange[] _blockRanges;
private BitMap[] _blockLiveIn;
@ -59,7 +57,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
{
positions = new int[RegistersCount];
positions = new int[masks.RegistersCount];
count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
int mask = masks.GetAvailableRegisters(type);
@ -115,7 +113,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
StackAllocator stackAlloc,
RegisterMasks regMasks)
{
NumberLocals(cfg);
NumberLocals(cfg, regMasks.RegistersCount);
var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
@ -134,22 +132,25 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
context.Active.Set(index);
if (current.Register.Type == RegisterType.Integer)
if (current.IsFixedAndUsed)
{
context.IntUsedRegisters |= 1 << current.Register.Index;
}
else /* if (interval.Register.Type == RegisterType.Vector) */
{
context.VecUsedRegisters |= 1 << current.Register.Index;
if (current.Register.Type == RegisterType.Integer)
{
context.IntUsedRegisters |= 1 << current.Register.Index;
}
else /* if (interval.Register.Type == RegisterType.Vector) */
{
context.VecUsedRegisters |= 1 << current.Register.Index;
}
}
continue;
}
AllocateInterval(context, current, index);
AllocateInterval(context, current, index, regMasks.RegistersCount);
}
for (int index = RegistersCount * 2; index < _intervals.Count; index++)
for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
{
if (!_intervals[index].IsSpilled)
{
@ -163,7 +164,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
}
private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex)
private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{
// Check active intervals that already ended.
foreach (int iIndex in context.Active)
@ -199,17 +200,17 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
if (!TryAllocateRegWithoutSpill(context, current, cIndex))
if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
{
AllocateRegWithSpill(context, current, cIndex);
AllocateRegWithSpill(context, current, cIndex, registersCount);
}
}
private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex)
private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{
RegisterType regType = current.Local.Type.ToRegisterType();
Span<int> freePositions = stackalloc int[RegistersCount];
Span<int> freePositions = stackalloc int[registersCount];
context.GetFreePositions(regType, freePositions, out int freePositionsCount);
@ -278,7 +279,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild);
InsertInterval(splitChild, registersCount);
}
else
{
@ -302,12 +303,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return true;
}
private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex)
private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
{
RegisterType regType = current.Local.Type.ToRegisterType();
Span<int> usePositions = stackalloc int[RegistersCount];
Span<int> blockedPositions = stackalloc int[RegistersCount];
Span<int> usePositions = stackalloc int[registersCount];
Span<int> blockedPositions = stackalloc int[registersCount];
context.GetFreePositions(regType, usePositions, out _);
context.GetFreePositions(regType, blockedPositions, out _);
@ -386,7 +387,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild);
InsertInterval(splitChild, registersCount);
Spill(context, current);
}
@ -396,7 +397,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
// so we only need to split the intervals using the selected register.
current.Register = new Register(selectedReg, regType);
SplitAndSpillOverlappingIntervals(context, current);
SplitAndSpillOverlappingIntervals(context, current, registersCount);
context.Active.Set(cIndex);
}
@ -417,14 +418,14 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
InsertInterval(splitChild);
InsertInterval(splitChild, registersCount);
}
else
{
Spill(context, splitChild);
}
SplitAndSpillOverlappingIntervals(context, current);
SplitAndSpillOverlappingIntervals(context, current, registersCount);
context.Active.Set(cIndex);
}
@ -460,7 +461,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return selected;
}
private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current)
private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
{
foreach (int iIndex in context.Active)
{
@ -468,7 +469,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register)
{
SplitAndSpillOverlappingInterval(context, current, interval);
SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
context.Active.Clear(iIndex);
}
@ -480,7 +481,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
{
SplitAndSpillOverlappingInterval(context, current, interval);
SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
context.Inactive.Clear(iIndex);
}
@ -490,7 +491,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
private void SplitAndSpillOverlappingInterval(
AllocationContext context,
LiveInterval current,
LiveInterval interval)
LiveInterval interval,
int registersCount)
{
// If there's a next use after the start of the current interval,
// we need to split the spilled interval twice, and re-insert it
@ -522,7 +524,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
splitChild = right;
}
InsertInterval(splitChild);
InsertInterval(splitChild, registersCount);
}
else
{
@ -530,13 +532,13 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
}
private void InsertInterval(LiveInterval interval)
private void InsertInterval(LiveInterval interval, int registersCount)
{
Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
int startIndex = RegistersCount * 2;
int startIndex = registersCount * 2;
int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
@ -790,12 +792,12 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
return _operationNodes[position / InstructionGap];
}
private void NumberLocals(ControlFlowGraph cfg)
private void NumberLocals(ControlFlowGraph cfg, int registersCount)
{
_operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
_intervals = new List<LiveInterval>();
for (int index = 0; index < RegistersCount; index++)
for (int index = 0; index < registersCount; index++)
{
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
_intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
@ -1041,6 +1043,11 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
{
LiveInterval interval = _intervals[GetOperandId(dest)];
if (interval.IsFixed)
{
interval.IsFixedAndUsed = true;
}
interval.SetStart(operationPos + 1);
interval.AddUsePosition(operationPos + 1);
}

View File

@ -27,6 +27,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public Register Register;
public bool IsFixed;
public bool IsFixedAndUsed;
}
private readonly Data* _data;
@ -44,6 +45,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public ref int SpillOffset => ref _data->SpillOffset;
public bool IsFixed => _data->IsFixed;
public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
public bool IsEmpty => FirstRange == default;
public bool IsSplit => Children.Count != 0;
public bool IsSpilled => SpillOffset != -1;
@ -114,7 +116,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
}
else
{
FirstRange = new LiveRange(position, position + 1);
FirstRange = new LiveRange(position, position + 1);
End = position + 1;
}
}

View File

@ -3,7 +3,7 @@ using System;
namespace ARMeilleure.CodeGen.RegisterAllocators
{
struct RegisterMasks
readonly struct RegisterMasks
{
public int IntAvailableRegisters { get; }
public int VecAvailableRegisters { get; }
@ -11,6 +11,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
public int VecCallerSavedRegisters { get; }
public int IntCalleeSavedRegisters { get; }
public int VecCalleeSavedRegisters { get; }
public int RegistersCount { get; }
public RegisterMasks(
int intAvailableRegisters,
@ -18,7 +19,8 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
int intCallerSavedRegisters,
int vecCallerSavedRegisters,
int intCalleeSavedRegisters,
int vecCalleeSavedRegisters)
int vecCalleeSavedRegisters,
int registersCount)
{
IntAvailableRegisters = intAvailableRegisters;
VecAvailableRegisters = vecAvailableRegisters;
@ -26,6 +28,7 @@ namespace ARMeilleure.CodeGen.RegisterAllocators
VecCallerSavedRegisters = vecCallerSavedRegisters;
IntCalleeSavedRegisters = intCalleeSavedRegisters;
VecCalleeSavedRegisters = vecCalleeSavedRegisters;
RegistersCount = registersCount;
}
public int GetAvailableRegisters(RegisterType type)

View File

@ -16,6 +16,7 @@ namespace ARMeilleure.CodeGen.X86
{
static class CodeGenerator
{
private const int RegistersCount = 16;
private const int PageSize = 0x1000;
private const int StackGuardSize = 0x2000;
@ -143,7 +144,8 @@ namespace ARMeilleure.CodeGen.X86
CallingConvention.GetIntCallerSavedRegisters(),
CallingConvention.GetVecCallerSavedRegisters(),
CallingConvention.GetIntCalleeSavedRegisters(),
CallingConvention.GetVecCalleeSavedRegisters());
CallingConvention.GetVecCalleeSavedRegisters(),
RegistersCount);
AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
@ -1587,6 +1589,12 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
// We can eliminate the move if source is already 32-bit and the registers are the same.
if (dest.Value == source.Value && source.Type == OperandType.I32)
{
return;
}
context.Assembler.Mov(dest, source, OperandType.I32);
}

View File

@ -1,6 +1,6 @@
namespace ARMeilleure.CodeGen.X86
{
struct IntrinsicInfo
readonly struct IntrinsicInfo
{
public X86Instruction Inst { get; }
public IntrinsicType Type { get; }

View File

@ -5,8 +5,6 @@ namespace ARMeilleure.CodeGen.X86
{
static class IntrinsicTable
{
private const int BadOp = 0;
private static IntrinsicInfo[] _intrinTable;
static IntrinsicTable()

View File

@ -80,10 +80,7 @@ namespace ARMeilleure.Common
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
lock (_pages)
{
@ -100,10 +97,7 @@ namespace ARMeilleure.Common
/// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception>
public AddressTable(Level[] levels)
{
if (levels == null)
{
throw new ArgumentNullException(nameof(levels));
}
ArgumentNullException.ThrowIfNull(levels);
if (levels.Length < 2)
{
@ -141,10 +135,7 @@ namespace ARMeilleure.Common
/// <exception cref="ArgumentException"><paramref name="address"/> is not mapped</exception>
public ref TEntry GetValue(ulong address)
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
if (!IsValid(address))
{

View File

@ -1,15 +1,11 @@
using System;
using System.Numerics;
namespace ARMeilleure.Common
{
static class BitUtils
{
private static readonly sbyte[] HbsNibbleLut;
static BitUtils()
{
HbsNibbleLut = new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
}
private static ReadOnlySpan<sbyte> HbsNibbleLut => new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
public static long FillWithOnes(int bits)
{

View File

@ -49,10 +49,7 @@ namespace ARMeilleure.Common
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
return ref _countTable.GetValue(_index);
}

View File

@ -53,10 +53,7 @@ namespace ARMeilleure.Common
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
public int Allocate()
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
lock (_allocated)
{
@ -83,10 +80,7 @@ namespace ARMeilleure.Common
/// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception>
public void Free(int index)
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
lock (_allocated)
{
@ -108,10 +102,7 @@ namespace ARMeilleure.Common
/// <exception cref="ArgumentException">Entry at <paramref name="index"/> is not allocated</exception>
public ref TEntry GetValue(int index)
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
lock (_allocated)
{

View File

@ -2,7 +2,7 @@ using ARMeilleure.Instructions;
namespace ARMeilleure.Decoders
{
struct InstDescriptor
readonly struct InstDescriptor
{
public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, InstEmit.Und);

View File

@ -11,7 +11,7 @@ namespace ARMeilleure.Decoders
private const int FastLookupSize = 0x1000;
private struct InstInfo
private readonly struct InstInfo
{
public int Mask { get; }
public int Value { get; }
@ -1341,7 +1341,7 @@ namespace ARMeilleure.Decoders
{
string reversedEncoding = encoding.Substring(16) + encoding.Substring(0, 16);
MakeOp reversedMakeOp =
(InstDescriptor inst, ulong address, int opCode)
(inst, address, opCode)
=> makeOp(inst, address, (int)BitOperations.RotateRight((uint)opCode, 16));
Set(reversedEncoding, AllInstT32, new InstDescriptor(name, emitter), reversedMakeOp);
}

View File

@ -6,7 +6,7 @@ namespace ARMeilleure.Diagnostics
{
static class Symbols
{
private struct RangedSymbol
private readonly struct RangedSymbol
{
public readonly ulong Start;
public readonly ulong End;

View File

@ -25,13 +25,13 @@ namespace ARMeilleure.Diagnostics
_funcTabSizeCounter = new PollingCounter("addr-tab-alloc", this, () => _funcTabSize / 1024d / 1024d)
{
DisplayName = "AddressTable Total Bytes Allocated",
DisplayUnits = "MB"
DisplayUnits = "MiB"
};
_funcTabLeafSizeCounter = new PollingCounter("addr-tab-leaf-alloc", this, () => _funcTabLeafSize / 1024d / 1024d)
{
DisplayName = "AddressTable Total Leaf Bytes Allocated",
DisplayUnits = "MB"
DisplayUnits = "MiB"
};
}

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using static ARMeilleure.Instructions.InstEmitFlowHelper;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
@ -30,7 +31,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
@ -49,7 +54,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
@ -76,7 +85,11 @@ namespace ARMeilleure.Instructions
public static void Vadd_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd);
}
@ -92,7 +105,11 @@ namespace ARMeilleure.Instructions
public static void Vadd_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
}
@ -280,7 +297,11 @@ namespace ARMeilleure.Instructions
public static void Vfma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd);
}
@ -299,7 +320,11 @@ namespace ARMeilleure.Instructions
public static void Vfma_V(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps);
}
@ -314,7 +339,11 @@ namespace ARMeilleure.Instructions
public static void Vfms_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd);
}
@ -333,7 +362,11 @@ namespace ARMeilleure.Instructions
public static void Vfms_V(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps);
}
@ -348,7 +381,11 @@ namespace ARMeilleure.Instructions
public static void Vfnma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
}
@ -367,7 +404,11 @@ namespace ARMeilleure.Instructions
public static void Vfnms_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd);
}
@ -419,7 +460,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS);
}
else if (Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
@ -445,7 +490,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS);
}
else if (Optimizations.UseSse2)
{
EmitScalarBinaryOpSimd32(context, (n, m) =>
{
@ -473,7 +522,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true);
}
@ -498,7 +551,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true);
}
@ -525,7 +582,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
@ -554,7 +615,11 @@ namespace ARMeilleure.Instructions
public static void Vdiv_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd);
}
@ -573,7 +638,11 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, true);
}
@ -585,7 +654,11 @@ namespace ARMeilleure.Instructions
public static void Vmaxnm_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, true, false);
}
@ -597,7 +670,11 @@ namespace ARMeilleure.Instructions
public static void Vminnm_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, true);
}
@ -609,7 +686,11 @@ namespace ARMeilleure.Instructions
public static void Vminnm_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse41)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV);
}
else if (Optimizations.FastFP && Optimizations.UseSse41)
{
EmitSse41MaxMinNumOpF32(context, false, false);
}
@ -621,7 +702,11 @@ namespace ARMeilleure.Instructions
public static void Vmax_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
}
@ -664,7 +749,11 @@ namespace ARMeilleure.Instructions
public static void Vmin_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
}
@ -707,7 +796,11 @@ namespace ARMeilleure.Instructions
public static void Vmla_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
}
@ -730,7 +823,11 @@ namespace ARMeilleure.Instructions
public static void Vmla_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
}
@ -786,7 +883,11 @@ namespace ARMeilleure.Instructions
public static void Vmls_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
}
@ -809,7 +910,11 @@ namespace ARMeilleure.Instructions
public static void Vmls_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
}
@ -865,7 +970,11 @@ namespace ARMeilleure.Instructions
public static void Vmul_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
}
@ -884,7 +993,11 @@ namespace ARMeilleure.Instructions
public static void Vmul_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
}
@ -975,7 +1088,11 @@ namespace ARMeilleure.Instructions
public static void Vpadd_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps);
}
@ -1008,7 +1125,11 @@ namespace ARMeilleure.Instructions
public static void Vpmax_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps);
}
@ -1038,7 +1159,11 @@ namespace ARMeilleure.Instructions
public static void Vpmin_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps);
}
@ -1217,7 +1342,11 @@ namespace ARMeilleure.Instructions
{
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
{
EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0);
}
@ -1237,7 +1366,11 @@ namespace ARMeilleure.Instructions
public static void Vrecps(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
bool single = (op.Size & 1) == 0;
@ -1304,7 +1437,11 @@ namespace ARMeilleure.Instructions
{
int sizeF = op.Size & 1;
if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0)
{
EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0);
}
@ -1324,7 +1461,11 @@ namespace ARMeilleure.Instructions
public static void Vrsqrts(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
bool single = (op.Size & 1) == 0;
@ -1393,7 +1534,11 @@ namespace ARMeilleure.Instructions
public static void Vsqrt_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd);
}
@ -1408,7 +1553,11 @@ namespace ARMeilleure.Instructions
public static void Vsub_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd);
}
@ -1420,7 +1569,11 @@ namespace ARMeilleure.Instructions
public static void Vsub_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd);
}

View File

@ -466,12 +466,26 @@ namespace ARMeilleure.Instructions
public static void Fcmp_S(ArmEmitterContext context)
{
EmitFcmpOrFcmpe(context, signalNaNs: false);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false);
}
else
{
EmitFcmpOrFcmpe(context, signalNaNs: false);
}
}
public static void Fcmpe_S(ArmEmitterContext context)
{
EmitFcmpOrFcmpe(context, signalNaNs: true);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true);
}
else
{
EmitFcmpOrFcmpe(context, signalNaNs: true);
}
}
private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)

View File

@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
{
public static void Vceq_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false);
}
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true);
}
@ -55,7 +63,11 @@ namespace ARMeilleure.Instructions
public static void Vcge_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseAvx)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
}
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseAvx)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
}
@ -95,7 +111,11 @@ namespace ARMeilleure.Instructions
public static void Vcgt_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseAvx)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false);
}
@ -118,7 +138,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseAvx)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true);
}
else if (Optimizations.FastFP && Optimizations.UseAvx)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true);
}
@ -139,7 +163,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true);
}
@ -160,7 +188,11 @@ namespace ARMeilleure.Instructions
if (op.F)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
if (Optimizations.FastFP && Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true);
}
else if (Optimizations.FastFP && Optimizations.UseSse2)
{
EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true);
}
@ -247,12 +279,26 @@ namespace ARMeilleure.Instructions
public static void Vcmp(ArmEmitterContext context)
{
EmitVcmpOrVcmpe(context, false);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false);
}
else
{
EmitVcmpOrVcmpe(context, false);
}
}
public static void Vcmpe(ArmEmitterContext context)
{
EmitVcmpOrVcmpe(context, true);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true);
}
else
{
EmitVcmpOrVcmpe(context, true);
}
}
private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)

View File

@ -164,7 +164,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
}
@ -176,7 +180,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
}
@ -188,7 +196,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtas_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
}
@ -200,7 +212,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false);
}
@ -212,7 +228,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true);
}
@ -224,7 +244,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtau_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false);
}
@ -240,7 +264,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
if (Optimizations.UseSse2 && sizeF == 1)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV);
}
else if (Optimizations.UseSse2 && sizeF == 1)
{
Operand n = GetVec(op.Rn);
@ -296,7 +324,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtms_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
}
@ -308,7 +340,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtms_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
}
@ -320,7 +356,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtmu_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
}
@ -336,7 +376,11 @@ namespace ARMeilleure.Instructions
int sizeF = op.Size & 1;
if (Optimizations.UseSse2 && sizeF == 1)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV);
}
else if (Optimizations.UseSse2 && sizeF == 1)
{
Operand d = GetVec(op.Rd);
@ -381,7 +425,7 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
Operand ne = context.VectorExtract(type, GetVec(op.Rn), index);
if (sizeF == 0)
{
@ -389,8 +433,6 @@ namespace ARMeilleure.Instructions
Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
context.LoadFromContext();
e = context.ZeroExtend16(OperandType.I64, e);
res = EmitVectorInsert(context, res, e, part + index, 1);
}
else /* if (sizeF == 1) */
@ -407,7 +449,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
}
@ -419,7 +465,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
}
@ -431,7 +481,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtns_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
}
@ -443,7 +497,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtnu_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
}
@ -455,7 +513,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtnu_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
}
@ -467,7 +529,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtps_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
}
@ -479,7 +545,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtpu_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
}
@ -491,7 +561,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
}
@ -503,7 +577,13 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
}
@ -515,7 +595,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
}
@ -527,7 +611,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
}
@ -539,7 +627,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context));
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
}
@ -551,7 +643,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_Gp(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
}
@ -563,7 +659,13 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits);
}
else if (Optimizations.UseSse41)
{
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
}
@ -575,7 +677,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_S(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
}
@ -587,7 +693,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV);
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
}
@ -599,7 +709,11 @@ namespace ARMeilleure.Instructions
public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context));
}
else if (Optimizations.UseSse41)
{
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
}
@ -611,41 +725,59 @@ namespace ARMeilleure.Instructions
public static void Scvtf_Gp(ArmEmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn);
if (op.RegisterSize == RegisterSize.Int32)
if (Optimizations.UseAdvSimd)
{
res = context.SignExtend32(OperandType.I64, res);
InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp);
}
else
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
res = EmitFPConvert(context, res, op.Size, signed: true);
Operand res = GetIntOrZR(context, op.Rn);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
if (op.RegisterSize == RegisterSize.Int32)
{
res = context.SignExtend32(OperandType.I64, res);
}
res = EmitFPConvert(context, res, op.Size, signed: true);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
}
public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn);
if (op.RegisterSize == RegisterSize.Int32)
if (Optimizations.UseAdvSimd)
{
res = context.SignExtend32(OperandType.I64, res);
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits);
}
else
{
Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: true);
if (op.RegisterSize == RegisterSize.Int32)
{
res = context.SignExtend32(OperandType.I64, res);
}
res = EmitI2fFBitsMul(context, res, op.FBits);
res = EmitFPConvert(context, res, op.Size, signed: true);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
res = EmitI2fFBitsMul(context, res, op.FBits);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
}
public static void Scvtf_S(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS);
}
else if (Optimizations.UseSse2)
{
EmitSse2ScvtfOp(context, scalar: true);
}
@ -657,7 +789,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_S_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{
EmitSse2ScvtfOp(context, scalar: true);
}
@ -669,7 +805,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV);
}
else if (Optimizations.UseSse2)
{
EmitSse2ScvtfOp(context, scalar: false);
}
@ -681,7 +821,11 @@ namespace ARMeilleure.Instructions
public static void Scvtf_V_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{
EmitSse2ScvtfOp(context, scalar: false);
}
@ -693,31 +837,49 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_Gp(ArmEmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp);
}
else
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn);
Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: false);
res = EmitFPConvert(context, res, op.Size, signed: false);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
}
public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand res = GetIntOrZR(context, op.Rn);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits);
}
else
{
Operand res = GetIntOrZR(context, op.Rn);
res = EmitFPConvert(context, res, op.Size, signed: false);
res = EmitFPConvert(context, res, op.Size, signed: false);
res = EmitI2fFBitsMul(context, res, op.FBits);
res = EmitI2fFBitsMul(context, res, op.FBits);
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
}
}
public static void Ucvtf_S(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS);
}
else if (Optimizations.UseSse2)
{
EmitSse2UcvtfOp(context, scalar: true);
}
@ -729,7 +891,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_S_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{
EmitSse2UcvtfOp(context, scalar: true);
}
@ -741,7 +907,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV);
}
else if (Optimizations.UseSse2)
{
EmitSse2UcvtfOp(context, scalar: false);
}
@ -753,7 +923,11 @@ namespace ARMeilleure.Instructions
public static void Ucvtf_V_Fixed(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context));
}
else if (Optimizations.UseSse2)
{
EmitSse2UcvtfOp(context, scalar: false);
}

View File

@ -59,7 +59,11 @@ namespace ARMeilleure.Instructions
if (toInteger)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV);
}
else if (Optimizations.UseSse41)
{
EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
}
@ -153,7 +157,28 @@ namespace ARMeilleure.Instructions
bool unsigned = (op.Opc2 & 1) == 0;
bool roundWithFpscr = op.Opc != 1;
if (!roundWithFpscr && Optimizations.UseSse41)
if (!roundWithFpscr && Optimizations.UseAdvSimd)
{
bool doubleSize = floatSize == OperandType.FP64;
if (doubleSize)
{
Operand m = GetVecA32(op.Vm >> 1);
Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize);
Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble;
Operand asInteger = context.AddIntrinsicInt(inst, toConvert);
InsertScalar(context, op.Vd, asInteger);
}
else
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS);
}
}
else if (!roundWithFpscr && Optimizations.UseSse41)
{
EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
}
@ -231,7 +256,34 @@ namespace ARMeilleure.Instructions
bool unsigned = op.Opc == 0;
int rm = op.Opc2 & 3;
if (Optimizations.UseSse41)
Intrinsic inst;
if (Optimizations.UseAdvSimd)
{
if (unsigned)
{
inst = rm switch {
0b00 => Intrinsic.Arm64FcvtauS,
0b01 => Intrinsic.Arm64FcvtnuS,
0b10 => Intrinsic.Arm64FcvtpuS,
0b11 => Intrinsic.Arm64FcvtmuS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
}
else
{
inst = rm switch {
0b00 => Intrinsic.Arm64FcvtasS,
0b01 => Intrinsic.Arm64FcvtnsS,
0b10 => Intrinsic.Arm64FcvtpsS,
0b11 => Intrinsic.Arm64FcvtmsS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
}
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
}
else if (Optimizations.UseSse41)
{
EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
}
@ -338,7 +390,19 @@ namespace ARMeilleure.Instructions
int rm = op.Opc2 & 3;
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
Intrinsic inst = rm switch {
0b00 => Intrinsic.Arm64FrintaS,
0b01 => Intrinsic.Arm64FrintnS,
0b10 => Intrinsic.Arm64FrintpS,
0b11 => Intrinsic.Arm64FrintmS,
_ => throw new ArgumentOutOfRangeException(nameof(rm))
};
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst);
}
else if (Optimizations.UseSse41)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{
@ -382,12 +446,9 @@ namespace ARMeilleure.Instructions
// VRINTA (vector).
public static void Vrinta_V(ArmEmitterContext context)
{
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: false);
});
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS);
}
else
{
@ -398,7 +459,11 @@ namespace ARMeilleure.Instructions
// VRINTM (vector).
public static void Vrintm_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS);
}
else if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
@ -414,7 +479,11 @@ namespace ARMeilleure.Instructions
// VRINTN (vector).
public static void Vrintn_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS);
}
else if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
@ -430,7 +499,11 @@ namespace ARMeilleure.Instructions
// VRINTP (vector).
public static void Vrintp_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS);
}
else if (Optimizations.UseSse2)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
@ -448,7 +521,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS);
}
else if (Optimizations.UseSse2)
{
EmitScalarUnaryOpSimd32(context, (m) =>
{

View File

@ -0,0 +1,366 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
using Func1I = Func<Operand, Operand>;
using Func2I = Func<Operand, Operand, Operand>;
using Func3I = Func<Operand, Operand, Operand, Operand>;
static class InstEmitSimdHelper32Arm64
{
// Intrinsic Helpers
public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV)
{
Debug.Assert(input.Type == OperandType.V128);
int originalSide = originalV & 1;
int targetSide = targetV & 1;
if (originalSide == targetSide)
{
return input;
}
Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128;
if (targetSide == 1)
{
return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high.
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low.
}
}
public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV)
{
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
int targetSide = targetV & 1;
Operand idx = Const(targetSide);
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx);
}
public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
{
Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);
// Insert from index 0 in value to index in target.
int index = reg & (doubleWidth ? 1 : 3);
if (doubleWidth)
{
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0));
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0));
}
}
public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth)
{
int index = reg & (doubleWidth ? 1 : 3);
if (index == 0) return target; // Element is already at index 0, so just return the vector directly.
if (doubleWidth)
{
return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1).
}
else
{
return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index.
}
}
// Vector Operand Templates
public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(m);
if (!op.Q) // Register insert.
{
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m));
}
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
if (side == -1)
{
side = op.Vd;
}
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
n = EmitMoveDoubleWordToSide(context, n, op.Vn, side);
m = EmitMoveDoubleWordToSide(context, m, op.Vm, side);
}
Operand res = vectorFunc(n, m);
if (!op.Q) // Register insert.
{
if (side != op.Vd)
{
res = EmitMoveDoubleWordToSide(context, res, side, op.Vd);
}
res = EmitDoubleWordInsert(context, d, res, op.Vd);
}
context.Copy(d, res);
}
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand d = GetVecA32(op.Qd);
Operand initialD = d;
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd);
m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd);
}
Operand res = vectorFunc(d, n, m);
if (!op.Q) // Register insert.
{
res = EmitDoubleWordInsert(context, initialD, res, op.Vd);
}
context.Copy(initialD, res);
}
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
}
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand res = scalarFunc(m);
// Insert scalar into vector.
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m));
}
public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
n = EmitExtractScalar(context, n, op.Vn, doubleSize);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand res = scalarFunc(n, m);
// Insert scalar into vector.
res = EmitScalarInsert(context, d, res, op.Vd, doubleSize);
context.Copy(d, res);
}
public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vn >> shift);
Operand m = GetVecA32(op.Vm >> shift);
Operand d = GetVecA32(op.Vd >> shift);
Operand initialD = d;
n = EmitExtractScalar(context, n, op.Vn, doubleSize);
m = EmitExtractScalar(context, m, op.Vm, doubleSize);
d = EmitExtractScalar(context, d, op.Vd, doubleSize);
Operand res = scalarFunc(d, n, m);
// Insert scalar into vector.
res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize);
context.Copy(initialD, res);
}
public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst)
{
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m));
}
// Pairwise
public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat;
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0);
}
public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool cmpWithZero = (op.Opc & 2) != 0;
Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
Operand n = GetVecA32(op.Vd >> shift);
Operand m = GetVecA32(op.Vm >> shift);
n = EmitExtractScalar(context, n, op.Vd, doubleSize);
m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize);
Operand nzcv = context.AddIntrinsicInt(inst, n, m);
Operand one = Const(1);
SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
}
public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
int sizeF = op.Size & 1;
Intrinsic inst;
if (zero)
{
inst = cond switch
{
CmpCondition.Equal => Intrinsic.Arm64FcmeqVz,
CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz,
CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz,
CmpCondition.LessThan => Intrinsic.Arm64FcmltVz,
CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz,
_ => throw new InvalidOperationException()
};
}
else {
inst = cond switch
{
CmpCondition.Equal => Intrinsic.Arm64FcmeqV,
CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV,
CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV,
_ => throw new InvalidOperationException()
};
}
inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128;
if (zero)
{
EmitVectorUnaryOpSimd32(context, (m) =>
{
return context.AddIntrinsic(inst, m);
});
}
else
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(inst, n, m);
});
}
}
}
}

View File

@ -0,0 +1,720 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.State;
using ARMeilleure.Translation;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
static class InstEmitSimdHelperArm64
{
public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = GetIntOrZR(context, op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (inst, n)
: context.AddIntrinsicLong(inst, n));
}
public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand a = GetVec(op.Ra);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m));
}
public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
}
public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
}
public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, n, m);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand result = context.AddIntrinsic(inst, d, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetIntOrZR(context, op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32
? context.AddIntrinsicInt (inst, n, Const(fBits))
: context.AddIntrinsicLong(inst, n, Const(fBits)));
}
public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n));
}
public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m));
}
public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n));
}
public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index)));
}
public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m));
}
public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index)));
}
public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift)));
}
public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
}
public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift)));
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n, m);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, d, n);
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index));
context.Copy(GetVec(op.Rd), result);
context.SetPendingQcFlagSync();
}
public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits)));
}
public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst)
{
OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp;
Operand[] operands = new Operand[op.Size + 1];
operands[op.Size] = GetVec(op.Rm);
for (int index = 0; index < op.Size; index++)
{
operands[index] = GetVec((op.Rn + index) & 0x1F);
}
if (op.RegisterSize == RegisterSize.Simd128)
{
inst |= Intrinsic.Arm64V128;
}
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands));
}
public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS;
if ((op.Size & 1) != 0)
{
inst |= Intrinsic.Arm64VDouble;
}
Operand n = GetVec(op.Rn);
Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm);
Operand nzcv = context.AddIntrinsicInt(inst, n, m);
Operand one = Const(1);
SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one));
SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one));
SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one));
SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one));
}
}
}

View File

@ -14,7 +14,11 @@ namespace ARMeilleure.Instructions
{
public static void And_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
public static void Bic_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -98,12 +106,26 @@ namespace ARMeilleure.Instructions
public static void Bif_V(ArmEmitterContext context)
{
EmitBifBit(context, notRm: true);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV);
}
else
{
EmitBifBit(context, notRm: true);
}
}
public static void Bit_V(ArmEmitterContext context)
{
EmitBifBit(context, notRm: false);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV);
}
else
{
EmitBifBit(context, notRm: false);
}
}
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
@ -167,7 +189,11 @@ namespace ARMeilleure.Instructions
public static void Bsl_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -200,7 +226,11 @@ namespace ARMeilleure.Instructions
public static void Eor_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -249,7 +279,11 @@ namespace ARMeilleure.Instructions
public static void Orn_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@ -280,7 +314,11 @@ namespace ARMeilleure.Instructions
public static void Orr_V(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV);
}
else if (Optimizations.UseSse2)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

View File

@ -13,7 +13,11 @@ namespace ARMeilleure.Instructions
{
public static void Vand_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m));
}
@ -25,7 +29,11 @@ namespace ARMeilleure.Instructions
public static void Vbic_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n));
}
@ -73,17 +81,35 @@ namespace ARMeilleure.Instructions
public static void Vbif(ArmEmitterContext context)
{
EmitBifBit(context, true);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m));
}
else
{
EmitBifBit(context, true);
}
}
public static void Vbit(ArmEmitterContext context)
{
EmitBifBit(context, false);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m));
}
else
{
EmitBifBit(context, false);
}
}
public static void Vbsl(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m));
}
else if (Optimizations.UseSse2)
{
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
{
@ -105,7 +131,11 @@ namespace ARMeilleure.Instructions
public static void Veor_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m));
}
@ -117,7 +147,11 @@ namespace ARMeilleure.Instructions
public static void Vorn_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{
Operand mask = context.VectorOne();
@ -135,7 +169,11 @@ namespace ARMeilleure.Instructions
public static void Vorr_I(ArmEmitterContext context)
{
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m));
}
else if (Optimizations.UseSse2)
{
EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m));
}

View File

@ -392,7 +392,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
if (Optimizations.UseSse2)
if (Optimizations.UseAdvSimd)
{
EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V);
}
else if (Optimizations.UseSse2)
{
EmitVectorShuffleOpSimd32(context, (m, d) =>
{
@ -461,7 +465,11 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
if (Optimizations.UseSsse3)
if (Optimizations.UseAdvSimd)
{
EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V);
}
else if (Optimizations.UseSsse3)
{
EmitVectorShuffleOpSimd32(context, (m, d) =>
{
@ -559,6 +567,52 @@ namespace ARMeilleure.Instructions
}
}
private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2)
{
OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;
bool overlap = op.Qm == op.Qd;
Operand d = GetVecA32(op.Qd);
Operand m = GetVecA32(op.Qm);
Operand dPart = d;
Operand mPart = m;
if (!op.Q) // Register swap: move relevant doubleword to destination side.
{
dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0);
mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0);
}
Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64;
vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift);
Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart);
Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart);
if (!op.Q) // Register insert.
{
resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0));
if (overlap)
{
resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0));
}
else
{
resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0));
}
}
context.Copy(d, resD);
if (!overlap)
{
context.Copy(m, resM);
}
}
private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;

View File

@ -26,7 +26,15 @@ namespace ARMeilleure.Instructions
public static void Rshrn_V(ArmEmitterContext context)
{
if (Optimizations.UseSsse3)
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift);
}
else if (Optimizations.UseSsse3)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
@ -80,7 +88,14 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op);
EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift);
}
else
{
EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift)));
}
}
public static void Shl_V(ArmEmitterContext context)
@ -90,7 +105,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op);
int eSize = 8 << op.Size;
if (shift >= eSize)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift);
}
else if (shift >= eSize)
{
if ((op.RegisterSize == RegisterSize.Simd64))
{
@ -143,7 +162,11 @@ namespace ARMeilleure.Instructions
int shift = 8 << op.Size;
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV);
}
else if (Optimizations.UseSse41)
{
Operand n = GetVec(op.Rn);
@ -170,7 +193,15 @@ namespace ARMeilleure.Instructions
public static void Shrn_V(ArmEmitterContext context)
{
if (Optimizations.UseSsse3)
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift);
}
else if (Optimizations.UseSsse3)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
@ -205,89 +236,259 @@ namespace ARMeilleure.Instructions
public static void Sli_S(ArmEmitterContext context)
{
EmitSli(context, scalar: true);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShl(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift);
}
else
{
EmitSli(context, scalar: true);
}
}
public static void Sli_V(ArmEmitterContext context)
{
EmitSli(context, scalar: false);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShl(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift);
}
else
{
EmitSli(context, scalar: false);
}
}
public static void Sqrshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating);
}
}
public static void Sqrshrn_S(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
}
public static void Sqrshrn_V(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
}
public static void Sqrshrun_S(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
}
public static void Sqrshrun_V(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
}
public static void Sqshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating);
}
}
public static void Sqshrn_S(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
}
public static void Sqshrn_V(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
}
public static void Sqshrun_S(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
}
public static void Sqshrun_V(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
}
public static void Sri_S(ArmEmitterContext context)
{
EmitSri(context, scalar: true);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift);
}
else
{
EmitSri(context, scalar: true);
}
}
public static void Sri_V(ArmEmitterContext context)
{
EmitSri(context, scalar: false);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift);
}
else
{
EmitSri(context, scalar: false);
}
}
public static void Srshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round);
}
}
public static void Srshr_S(ArmEmitterContext context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
}
}
public static void Srshr_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
@ -325,14 +526,31 @@ namespace ARMeilleure.Instructions
public static void Srsra_S(ArmEmitterContext context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
}
public static void Srsra_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
@ -372,12 +590,26 @@ namespace ARMeilleure.Instructions
public static void Sshl_S(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed);
}
}
public static void Sshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Signed);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Signed);
}
}
public static void Sshll_V(ArmEmitterContext context)
@ -386,7 +618,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op);
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift);
}
else if (Optimizations.UseSse41)
{
Operand n = GetVec(op.Rn);
@ -416,7 +652,18 @@ namespace ARMeilleure.Instructions
public static void Sshr_S(ArmEmitterContext context)
{
EmitShrImmOp(context, ShrImmFlags.ScalarSx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift);
}
else
{
EmitShrImmOp(context, ShrImmFlags.ScalarSx);
}
}
public static void Sshr_V(ArmEmitterContext context)
@ -425,7 +672,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShr(op);
if (Optimizations.UseGfni && op.Size == 0)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift);
}
else if (Optimizations.UseGfni && op.Size == 0)
{
Operand n = GetVec(op.Rn);
@ -478,14 +729,31 @@ namespace ARMeilleure.Instructions
public static void Ssra_S(ArmEmitterContext context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift);
}
else
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
}
}
public static void Ssra_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
int shift = GetImmShr(op);
@ -515,49 +783,131 @@ namespace ARMeilleure.Instructions
public static void Uqrshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating);
}
}
public static void Uqrshrn_S(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
}
public static void Uqrshrn_V(ArmEmitterContext context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift);
}
else
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
}
public static void Uqshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Saturating);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Saturating);
}
}
public static void Uqshrn_S(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
}
public static void Uqshrn_V(ArmEmitterContext context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift);
}
else
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
}
public static void Urshl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Round);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Round);
}
}
public static void Urshr_S(ArmEmitterContext context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
}
}
public static void Urshr_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
@ -593,14 +943,31 @@ namespace ARMeilleure.Instructions
public static void Ursra_S(ArmEmitterContext context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
}
public static void Ursra_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
@ -638,12 +1005,26 @@ namespace ARMeilleure.Instructions
public static void Ushl_S(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.Scalar);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS);
}
else
{
EmitShlRegOp(context, ShlRegFlags.Scalar);
}
}
public static void Ushl_V(ArmEmitterContext context)
{
EmitShlRegOp(context, ShlRegFlags.None);
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV);
}
else
{
EmitShlRegOp(context, ShlRegFlags.None);
}
}
public static void Ushll_V(ArmEmitterContext context)
@ -652,7 +1033,11 @@ namespace ARMeilleure.Instructions
int shift = GetImmShl(op);
if (Optimizations.UseSse41)
if (Optimizations.UseAdvSimd)
{
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift);
}
else if (Optimizations.UseSse41)
{
Operand n = GetVec(op.Rn);
@ -682,14 +1067,31 @@ namespace ARMeilleure.Instructions
public static void Ushr_S(ArmEmitterContext context)
{
EmitShrImmOp(context, ShrImmFlags.ScalarZx);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift);
}
else
{
EmitShrImmOp(context, ShrImmFlags.ScalarZx);
}
}
public static void Ushr_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{
int shift = GetImmShr(op);
@ -714,14 +1116,31 @@ namespace ARMeilleure.Instructions
public static void Usra_S(ArmEmitterContext context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
if (Optimizations.UseAdvSimd)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift);
}
else
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
}
}
public static void Usra_V(ArmEmitterContext context)
{
OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
if (Optimizations.UseAdvSimd)
{
int shift = GetImmShr(op);
InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift);
}
else if (Optimizations.UseSse2 && op.Size > 0)
{
int shift = GetImmShr(op);

View File

@ -150,6 +150,8 @@ namespace ARMeilleure.Instructions
{
OpCodeSystem op = (OpCodeSystem)context.CurrOp;
context.SyncQcFlag();
Operand fpsr = Const(0);
for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++)
@ -196,6 +198,8 @@ namespace ARMeilleure.Instructions
{
OpCodeSystem op = (OpCodeSystem)context.CurrOp;
context.ClearQcFlagIfModified();
Operand fpsr = GetIntOrZR(context, op.Rt);
fpsr = context.ConvertI64ToI32(fpsr);

View File

@ -48,10 +48,7 @@ namespace ARMeilleure.IntermediateRepresentation
public void AddSuccessor(BasicBlock block)
{
if (block == null)
{
ThrowNull(nameof(block));
}
ArgumentNullException.ThrowIfNull(block);
if ((uint)_succCount + 1 > MaxSuccessors)
{
@ -100,10 +97,7 @@ namespace ARMeilleure.IntermediateRepresentation
public void SetSuccessor(int index, BasicBlock block)
{
if (block == null)
{
ThrowNull(nameof(block));
}
ArgumentNullException.ThrowIfNull(block);
if ((uint)index >= (uint)_succCount)
{
@ -144,7 +138,6 @@ namespace ARMeilleure.IntermediateRepresentation
}
}
private static void ThrowNull(string name) => throw new ArgumentNullException(name);
private static void ThrowOutOfRange(string name) => throw new ArgumentOutOfRangeException(name);
private static void ThrowSuccessorOverflow() => throw new OverflowException($"BasicBlock can only have {MaxSuccessors} successors.");

View File

@ -2,6 +2,8 @@ namespace ARMeilleure.IntermediateRepresentation
{
enum Intrinsic : ushort
{
// X86 (SSE and AVX)
X86Addpd,
X86Addps,
X86Addsd,
@ -172,6 +174,458 @@ namespace ARMeilleure.IntermediateRepresentation
X86Vfnmsub231sd,
X86Vfnmsub231ss,
X86Xorpd,
X86Xorps
X86Xorps,
// Arm64 (FP and Advanced SIMD)
Arm64AbsS,
Arm64AbsV,
Arm64AddhnV,
Arm64AddpS,
Arm64AddpV,
Arm64AddvV,
Arm64AddS,
Arm64AddV,
Arm64AesdV,
Arm64AeseV,
Arm64AesimcV,
Arm64AesmcV,
Arm64AndV,
Arm64BicVi,
Arm64BicV,
Arm64BifV,
Arm64BitV,
Arm64BslV,
Arm64ClsV,
Arm64ClzV,
Arm64CmeqS,
Arm64CmeqV,
Arm64CmeqSz,
Arm64CmeqVz,
Arm64CmgeS,
Arm64CmgeV,
Arm64CmgeSz,
Arm64CmgeVz,
Arm64CmgtS,
Arm64CmgtV,
Arm64CmgtSz,
Arm64CmgtVz,
Arm64CmhiS,
Arm64CmhiV,
Arm64CmhsS,
Arm64CmhsV,
Arm64CmleSz,
Arm64CmleVz,
Arm64CmltSz,
Arm64CmltVz,
Arm64CmtstS,
Arm64CmtstV,
Arm64CntV,
Arm64DupSe,
Arm64DupVe,
Arm64DupGp,
Arm64EorV,
Arm64ExtV,
Arm64FabdS,
Arm64FabdV,
Arm64FabsV,
Arm64FabsS,
Arm64FacgeS,
Arm64FacgeV,
Arm64FacgtS,
Arm64FacgtV,
Arm64FaddpS,
Arm64FaddpV,
Arm64FaddV,
Arm64FaddS,
Arm64FccmpeS,
Arm64FccmpS,
Arm64FcmeqS,
Arm64FcmeqV,
Arm64FcmeqSz,
Arm64FcmeqVz,
Arm64FcmgeS,
Arm64FcmgeV,
Arm64FcmgeSz,
Arm64FcmgeVz,
Arm64FcmgtS,
Arm64FcmgtV,
Arm64FcmgtSz,
Arm64FcmgtVz,
Arm64FcmleSz,
Arm64FcmleVz,
Arm64FcmltSz,
Arm64FcmltVz,
Arm64FcmpeS,
Arm64FcmpS,
Arm64FcselS,
Arm64FcvtasS,
Arm64FcvtasV,
Arm64FcvtasGp,
Arm64FcvtauS,
Arm64FcvtauV,
Arm64FcvtauGp,
Arm64FcvtlV,
Arm64FcvtmsS,
Arm64FcvtmsV,
Arm64FcvtmsGp,
Arm64FcvtmuS,
Arm64FcvtmuV,
Arm64FcvtmuGp,
Arm64FcvtnsS,
Arm64FcvtnsV,
Arm64FcvtnsGp,
Arm64FcvtnuS,
Arm64FcvtnuV,
Arm64FcvtnuGp,
Arm64FcvtnV,
Arm64FcvtpsS,
Arm64FcvtpsV,
Arm64FcvtpsGp,
Arm64FcvtpuS,
Arm64FcvtpuV,
Arm64FcvtpuGp,
Arm64FcvtxnS,
Arm64FcvtxnV,
Arm64FcvtzsSFixed,
Arm64FcvtzsVFixed,
Arm64FcvtzsS,
Arm64FcvtzsV,
Arm64FcvtzsGpFixed,
Arm64FcvtzsGp,
Arm64FcvtzuSFixed,
Arm64FcvtzuVFixed,
Arm64FcvtzuS,
Arm64FcvtzuV,
Arm64FcvtzuGpFixed,
Arm64FcvtzuGp,
Arm64FcvtS,
Arm64FdivV,
Arm64FdivS,
Arm64FmaddS,
Arm64FmaxnmpS,
Arm64FmaxnmpV,
Arm64FmaxnmvV,
Arm64FmaxnmV,
Arm64FmaxnmS,
Arm64FmaxpS,
Arm64FmaxpV,
Arm64FmaxvV,
Arm64FmaxV,
Arm64FmaxS,
Arm64FminnmpS,
Arm64FminnmpV,
Arm64FminnmvV,
Arm64FminnmV,
Arm64FminnmS,
Arm64FminpS,
Arm64FminpV,
Arm64FminvV,
Arm64FminV,
Arm64FminS,
Arm64FmlaSe,
Arm64FmlaVe,
Arm64FmlaV,
Arm64FmlsSe,
Arm64FmlsVe,
Arm64FmlsV,
Arm64FmovVi,
Arm64FmovS,
Arm64FmovGp,
Arm64FmovSi,
Arm64FmsubS,
Arm64FmulxSe,
Arm64FmulxVe,
Arm64FmulxS,
Arm64FmulxV,
Arm64FmulSe,
Arm64FmulVe,
Arm64FmulV,
Arm64FmulS,
Arm64FnegV,
Arm64FnegS,
Arm64FnmaddS,
Arm64FnmsubS,
Arm64FnmulS,
Arm64FrecpeS,
Arm64FrecpeV,
Arm64FrecpsS,
Arm64FrecpsV,
Arm64FrecpxS,
Arm64FrintaV,
Arm64FrintaS,
Arm64FrintiV,
Arm64FrintiS,
Arm64FrintmV,
Arm64FrintmS,
Arm64FrintnV,
Arm64FrintnS,
Arm64FrintpV,
Arm64FrintpS,
Arm64FrintxV,
Arm64FrintxS,
Arm64FrintzV,
Arm64FrintzS,
Arm64FrsqrteS,
Arm64FrsqrteV,
Arm64FrsqrtsS,
Arm64FrsqrtsV,
Arm64FsqrtV,
Arm64FsqrtS,
Arm64FsubV,
Arm64FsubS,
Arm64InsVe,
Arm64InsGp,
Arm64Ld1rV,
Arm64Ld1Vms,
Arm64Ld1Vss,
Arm64Ld2rV,
Arm64Ld2Vms,
Arm64Ld2Vss,
Arm64Ld3rV,
Arm64Ld3Vms,
Arm64Ld3Vss,
Arm64Ld4rV,
Arm64Ld4Vms,
Arm64Ld4Vss,
Arm64MlaVe,
Arm64MlaV,
Arm64MlsVe,
Arm64MlsV,
Arm64MoviV,
Arm64MrsFpsr,
Arm64MsrFpsr,
Arm64MulVe,
Arm64MulV,
Arm64MvniV,
Arm64NegS,
Arm64NegV,
Arm64NotV,
Arm64OrnV,
Arm64OrrVi,
Arm64OrrV,
Arm64PmullV,
Arm64PmulV,
Arm64RaddhnV,
Arm64RbitV,
Arm64Rev16V,
Arm64Rev32V,
Arm64Rev64V,
Arm64RshrnV,
Arm64RsubhnV,
Arm64SabalV,
Arm64SabaV,
Arm64SabdlV,
Arm64SabdV,
Arm64SadalpV,
Arm64SaddlpV,
Arm64SaddlvV,
Arm64SaddlV,
Arm64SaddwV,
Arm64ScvtfSFixed,
Arm64ScvtfVFixed,
Arm64ScvtfS,
Arm64ScvtfV,
Arm64ScvtfGpFixed,
Arm64ScvtfGp,
Arm64Sha1cV,
Arm64Sha1hV,
Arm64Sha1mV,
Arm64Sha1pV,
Arm64Sha1su0V,
Arm64Sha1su1V,
Arm64Sha256h2V,
Arm64Sha256hV,
Arm64Sha256su0V,
Arm64Sha256su1V,
Arm64ShaddV,
Arm64ShllV,
Arm64ShlS,
Arm64ShlV,
Arm64ShrnV,
Arm64ShsubV,
Arm64SliS,
Arm64SliV,
Arm64SmaxpV,
Arm64SmaxvV,
Arm64SmaxV,
Arm64SminpV,
Arm64SminvV,
Arm64SminV,
Arm64SmlalVe,
Arm64SmlalV,
Arm64SmlslVe,
Arm64SmlslV,
Arm64SmovV,
Arm64SmullVe,
Arm64SmullV,
Arm64SqabsS,
Arm64SqabsV,
Arm64SqaddS,
Arm64SqaddV,
Arm64SqdmlalSe,
Arm64SqdmlalVe,
Arm64SqdmlalS,
Arm64SqdmlalV,
Arm64SqdmlslSe,
Arm64SqdmlslVe,
Arm64SqdmlslS,
Arm64SqdmlslV,
Arm64SqdmulhSe,
Arm64SqdmulhVe,
Arm64SqdmulhS,
Arm64SqdmulhV,
Arm64SqdmullSe,
Arm64SqdmullVe,
Arm64SqdmullS,
Arm64SqdmullV,
Arm64SqnegS,
Arm64SqnegV,
Arm64SqrdmulhSe,
Arm64SqrdmulhVe,
Arm64SqrdmulhS,
Arm64SqrdmulhV,
Arm64SqrshlS,
Arm64SqrshlV,
Arm64SqrshrnS,
Arm64SqrshrnV,
Arm64SqrshrunS,
Arm64SqrshrunV,
Arm64SqshluS,
Arm64SqshluV,
Arm64SqshlSi,
Arm64SqshlVi,
Arm64SqshlS,
Arm64SqshlV,
Arm64SqshrnS,
Arm64SqshrnV,
Arm64SqshrunS,
Arm64SqshrunV,
Arm64SqsubS,
Arm64SqsubV,
Arm64SqxtnS,
Arm64SqxtnV,
Arm64SqxtunS,
Arm64SqxtunV,
Arm64SrhaddV,
Arm64SriS,
Arm64SriV,
Arm64SrshlS,
Arm64SrshlV,
Arm64SrshrS,
Arm64SrshrV,
Arm64SrsraS,
Arm64SrsraV,
Arm64SshllV,
Arm64SshlS,
Arm64SshlV,
Arm64SshrS,
Arm64SshrV,
Arm64SsraS,
Arm64SsraV,
Arm64SsublV,
Arm64SsubwV,
Arm64St1Vms,
Arm64St1Vss,
Arm64St2Vms,
Arm64St2Vss,
Arm64St3Vms,
Arm64St3Vss,
Arm64St4Vms,
Arm64St4Vss,
Arm64SubhnV,
Arm64SubS,
Arm64SubV,
Arm64SuqaddS,
Arm64SuqaddV,
Arm64TblV,
Arm64TbxV,
Arm64Trn1V,
Arm64Trn2V,
Arm64UabalV,
Arm64UabaV,
Arm64UabdlV,
Arm64UabdV,
Arm64UadalpV,
Arm64UaddlpV,
Arm64UaddlvV,
Arm64UaddlV,
Arm64UaddwV,
Arm64UcvtfSFixed,
Arm64UcvtfVFixed,
Arm64UcvtfS,
Arm64UcvtfV,
Arm64UcvtfGpFixed,
Arm64UcvtfGp,
Arm64UhaddV,
Arm64UhsubV,
Arm64UmaxpV,
Arm64UmaxvV,
Arm64UmaxV,
Arm64UminpV,
Arm64UminvV,
Arm64UminV,
Arm64UmlalVe,
Arm64UmlalV,
Arm64UmlslVe,
Arm64UmlslV,
Arm64UmovV,
Arm64UmullVe,
Arm64UmullV,
Arm64UqaddS,
Arm64UqaddV,
Arm64UqrshlS,
Arm64UqrshlV,
Arm64UqrshrnS,
Arm64UqrshrnV,
Arm64UqshlSi,
Arm64UqshlVi,
Arm64UqshlS,
Arm64UqshlV,
Arm64UqshrnS,
Arm64UqshrnV,
Arm64UqsubS,
Arm64UqsubV,
Arm64UqxtnS,
Arm64UqxtnV,
Arm64UrecpeV,
Arm64UrhaddV,
Arm64UrshlS,
Arm64UrshlV,
Arm64UrshrS,
Arm64UrshrV,
Arm64UrsqrteV,
Arm64UrsraS,
Arm64UrsraV,
Arm64UshllV,
Arm64UshlS,
Arm64UshlV,
Arm64UshrS,
Arm64UshrV,
Arm64UsqaddS,
Arm64UsqaddV,
Arm64UsraS,
Arm64UsraV,
Arm64UsublV,
Arm64UsubwV,
Arm64Uzp1V,
Arm64Uzp2V,
Arm64XtnV,
Arm64Zip1V,
Arm64Zip2V,
Arm64VTypeShift = 13,
Arm64VTypeMask = 1 << Arm64VTypeShift,
Arm64V64 = 0 << Arm64VTypeShift,
Arm64V128 = 1 << Arm64VTypeShift,
Arm64VSizeShift = 14,
Arm64VSizeMask = 3 << Arm64VSizeShift,
Arm64VFloat = 0 << Arm64VSizeShift,
Arm64VDouble = 1 << Arm64VSizeShift,
Arm64VByte = 0 << Arm64VSizeShift,
Arm64VHWord = 1 << Arm64VSizeShift,
Arm64VWord = 2 << Arm64VSizeShift,
Arm64VDWord = 3 << Arm64VSizeShift
}
}

View File

@ -5,6 +5,7 @@ namespace ARMeilleure.IntermediateRepresentation
x1 = 0,
x2 = 1,
x4 = 2,
x8 = 3
x8 = 3,
x16 = 4
}
}

View File

@ -259,6 +259,20 @@ namespace ARMeilleure.IntermediateRepresentation
}
}
public Span<Operation> GetUses(ref Span<Operation> buffer)
{
ReadOnlySpan<Operation> uses = Uses;
if (buffer.Length < uses.Length)
{
buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length);
}
uses.CopyTo(buffer);
return buffer.Slice(0, uses.Length);
}
private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged
{
count = 0;
@ -378,14 +392,7 @@ namespace ARMeilleure.IntermediateRepresentation
public override int GetHashCode()
{
if (Kind == OperandKind.LocalVariable)
{
return base.GetHashCode();
}
else
{
return (int)Value ^ ((int)Kind << 16) ^ ((int)Type << 20);
}
return ((ulong)_data).GetHashCode();
}
public bool Equals(Operand operand)

View File

@ -47,5 +47,19 @@ namespace ARMeilleure.IntermediateRepresentation
throw new InvalidOperationException($"Invalid operand type \"{type}\".");
}
public static int GetSizeInBytesLog2(this OperandType type)
{
switch (type)
{
case OperandType.FP32: return 2;
case OperandType.FP64: return 3;
case OperandType.I32: return 2;
case OperandType.I64: return 3;
case OperandType.V128: return 4;
}
throw new InvalidOperationException($"Invalid operand type \"{type}\".");
}
}
}

View File

@ -3,7 +3,7 @@ using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.IntermediateRepresentation
{
struct PhiOperation
readonly struct PhiOperation
{
private readonly Operation _operation;

View File

@ -2,7 +2,7 @@ using System;
namespace ARMeilleure.IntermediateRepresentation
{
struct Register : IEquatable<Register>
readonly struct Register : IEquatable<Register>
{
public int Index { get; }

View File

@ -4,7 +4,7 @@ namespace ARMeilleure.Memory
{
class ReservedRegion
{
private const int DefaultGranularity = 65536; // Mapping granularity in Windows.
public const int DefaultGranularity = 65536; // Mapping granularity in Windows.
public IJitMemoryBlock Block { get; }

View File

@ -0,0 +1,13 @@
using System;
using System.Runtime.InteropServices;
using System.Runtime.Versioning;
namespace ARMeilleure.Native
{
[SupportedOSPlatform("macos")]
public static partial class JitSupportDarwin
{
[LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")]
public static partial void Copy(IntPtr dst, IntPtr src, ulong n);
}
}

View File

@ -0,0 +1,8 @@
NAME = libarmeilleure-jitsupport.dylib
all: ${NAME}
${NAME}:
clang -O3 -dynamiclib support.c -o ${NAME}
clean:
rm -f ${NAME}

View File

@ -0,0 +1,14 @@
#include <stddef.h>
#include <string.h>
#include <pthread.h>
#include <libkern/OSCacheControl.h>
void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) {
pthread_jit_write_protect_np(0);
memcpy(dst, src, n);
pthread_jit_write_protect_np(1);
// Ensure that the instruction cache for this range is invalidated.
sys_icache_invalidate(dst, n);
}

View File

@ -1,7 +1,10 @@
using ARMeilleure.CodeGen.X86;
using System.Runtime.Intrinsics.Arm;
namespace ARMeilleure
{
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
public static class Optimizations
{
public static bool FastFP { get; set; } = true;
@ -9,6 +12,9 @@ namespace ARMeilleure
public static bool AllowLcqInFunctionTable { get; set; } = true;
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
public static bool UseAdvSimdIfAvailable { get; set; } = true;
public static bool UseArm64PmullIfAvailable { get; set; } = true;
public static bool UseSseIfAvailable { get; set; } = true;
public static bool UseSse2IfAvailable { get; set; } = true;
public static bool UseSse3IfAvailable { get; set; } = true;
@ -26,23 +32,26 @@ namespace ARMeilleure
public static bool ForceLegacySse
{
get => HardwareCapabilities.ForceLegacySse;
set => HardwareCapabilities.ForceLegacySse = value;
get => X86HardwareCapabilities.ForceLegacySse;
set => X86HardwareCapabilities.ForceLegacySse = value;
}
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
internal static bool UseF16c => UseF16cIfAvailable && HardwareCapabilities.SupportsF16c;
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
internal static bool UseGfni => UseGfniIfAvailable && HardwareCapabilities.SupportsGfni;
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
}
}
}

View File

@ -1,5 +1,7 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.Translation;
using ARMeilleure.Translation.Cache;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
@ -69,8 +71,8 @@ namespace ARMeilleure.Signal
private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005;
private const ulong PageSize = 0x1000;
private const ulong PageMask = PageSize - 1;
private static ulong _pageSize = GetPageSize();
private static ulong _pageMask = _pageSize - 1;
private static IntPtr _handlerConfig;
private static IntPtr _signalHandlerPtr;
@ -79,6 +81,19 @@ namespace ARMeilleure.Signal
private static readonly object _lock = new object();
private static bool _initialized;
private static ulong GetPageSize()
{
// TODO: This needs to be based on the current memory manager configuration.
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
return 1UL << 14;
}
else
{
return 1UL << 12;
}
}
static NativeSignalHandler()
{
_handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>());
@ -87,7 +102,12 @@ namespace ARMeilleure.Signal
config = new SignalHandlerConfig();
}
public static void InitializeSignalHandler()
public static void InitializeJitCache(IJitMemoryAllocator allocator)
{
JitCache.Initialize(allocator);
}
public static void InitializeSignalHandler(Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null)
{
if (_initialized) return;
@ -95,10 +115,9 @@ namespace ARMeilleure.Signal
{
if (_initialized) return;
bool unix = OperatingSystem.IsLinux() || OperatingSystem.IsMacOS();
ref SignalHandlerConfig config = ref GetConfigRef();
if (unix)
if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS())
{
// Unix siginfo struct locations.
// NOTE: These are incredibly likely to be different between kernel version and architectures.
@ -108,7 +127,13 @@ namespace ARMeilleure.Signal
_signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig));
SigAction old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
if (customSignalHandlerFactory != null)
{
_signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr);
}
var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
config.UnixOldSigaction = (nuint)(ulong)old.sa_handler;
config.UnixOldSigaction3Arg = old.sa_flags & 4;
}
@ -119,6 +144,11 @@ namespace ARMeilleure.Signal
_signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig));
if (customSignalHandlerFactory != null)
{
_signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr);
}
_signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr);
}
@ -197,7 +227,7 @@ namespace ARMeilleure.Signal
// Only call tracking if in range.
context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold);
Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~PageMask));
Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask));
// Call the tracking action, with the pointer's relative offset to the base address.
Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20));
@ -208,7 +238,7 @@ namespace ARMeilleure.Signal
// Tracking action should be non-null to call it, otherwise assume false return.
context.BranchIfFalse(skipActionLabel, trackingActionPtr);
Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(PageSize), isWrite, Const(0));
Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite, Const(0));
context.Copy(inRegionLocal, result);
context.MarkLabel(skipActionLabel);
@ -278,7 +308,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<UnixExceptionHandler>();
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<UnixExceptionHandler>();
}
private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr)
@ -332,7 +362,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<VectoredExceptionHandler>();
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<VectoredExceptionHandler>();
}
}
}

View File

@ -1,7 +1,7 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using System.Runtime.InteropServices;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Signal
@ -32,7 +32,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugPartialUnmap>();
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugPartialUnmap>();
}
public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr)
@ -49,7 +49,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq).Map<DebugThreadLocalMapGetOrReserve>();
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugThreadLocalMapGetOrReserve>();
}
public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop()
@ -78,7 +78,7 @@ namespace ARMeilleure.Signal
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq).Map<DebugNativeWriteLoop>();
return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugNativeWriteLoop>();
}
}
}

View File

@ -3,32 +3,47 @@ using System.Runtime.InteropServices;
namespace ARMeilleure.Signal
{
[StructLayout(LayoutKind.Sequential, Pack = 1)]
unsafe struct SigSet
static partial class UnixSignalHandlerRegistration
{
fixed long sa_mask[16];
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public unsafe struct SigSet
{
fixed long sa_mask[16];
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
struct SigAction
{
public IntPtr sa_handler;
public SigSet sa_mask;
public int sa_flags;
public IntPtr sa_restorer;
}
[StructLayout(LayoutKind.Sequential, Pack = 1)]
public struct SigAction
{
public IntPtr sa_handler;
public SigSet sa_mask;
public int sa_flags;
public IntPtr sa_restorer;
}
static class UnixSignalHandlerRegistration
{
private const int SIGSEGV = 11;
private const int SIGBUS = 10;
private const int SA_SIGINFO = 0x00000004;
[DllImport("libc", SetLastError = true)]
private static extern int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
[LibraryImport("libc", SetLastError = true)]
private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction);
[DllImport("libc", SetLastError = true)]
private static extern int sigemptyset(ref SigSet set);
[LibraryImport("libc", SetLastError = true)]
private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction);
[LibraryImport("libc", SetLastError = true)]
private static partial int sigemptyset(ref SigSet set);
public static SigAction GetSegfaultExceptionHandler()
{
int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old);
if (result != 0)
{
throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}");
}
return old;
}
public static SigAction RegisterExceptionHandler(IntPtr action)
{
@ -49,7 +64,7 @@ namespace ARMeilleure.Signal
if (OperatingSystem.IsMacOS())
{
result = sigaction(SIGBUS, ref sig, out SigAction oldb);
result = sigaction(SIGBUS, ref sig, out _);
if (result != 0)
{

View File

@ -3,19 +3,19 @@ using System.Runtime.InteropServices;
namespace ARMeilleure.Signal
{
unsafe class WindowsSignalHandlerRegistration
unsafe partial class WindowsSignalHandlerRegistration
{
[DllImport("kernel32.dll")]
private static extern IntPtr AddVectoredExceptionHandler(uint first, IntPtr handler);
[LibraryImport("kernel32.dll")]
private static partial IntPtr AddVectoredExceptionHandler(uint first, IntPtr handler);
[DllImport("kernel32.dll")]
private static extern ulong RemoveVectoredExceptionHandler(IntPtr handle);
[LibraryImport("kernel32.dll")]
private static partial ulong RemoveVectoredExceptionHandler(IntPtr handle);
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Ansi)]
static extern IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName);
[LibraryImport("kernel32.dll", SetLastError = true, EntryPoint = "LoadLibraryA")]
private static partial IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName);
[DllImport("kernel32.dll", CharSet = CharSet.Ansi, ExactSpelling = true, SetLastError = true)]
private static extern IntPtr GetProcAddress(IntPtr hModule, string procName);
[LibraryImport("kernel32.dll", SetLastError = true)]
private static partial IntPtr GetProcAddress(IntPtr hModule, [MarshalAs(UnmanagedType.LPStr)] string procName);
private static IntPtr _getCurrentThreadIdPtr;

View File

@ -6,7 +6,6 @@ using ARMeilleure.Instructions;
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Memory;
using ARMeilleure.State;
using ARMeilleure.Translation.PTC;
using System;
using System.Collections.Generic;
using System.Reflection;
@ -40,18 +39,19 @@ namespace ARMeilleure.Translation
}
}
private bool _pendingQcFlagSync;
public OpCode CurrOp { get; set; }
public IMemoryManager Memory { get; }
public bool HasPtc { get; }
public EntryTable<uint> CountTable { get; }
public AddressTable<ulong> FunctionTable { get; }
public TranslatorStubs Stubs { get; }
public ulong EntryAddress { get; }
public bool HighCq { get; }
public bool HasPtc { get; }
public Aarch32Mode Mode { get; }
private int _ifThenBlockStateIndex = 0;
@ -66,15 +66,16 @@ namespace ARMeilleure.Translation
TranslatorStubs stubs,
ulong entryAddress,
bool highCq,
bool hasPtc,
Aarch32Mode mode)
{
HasPtc = Ptc.State != PtcState.Disabled;
Memory = memory;
CountTable = countTable;
FunctionTable = funcTable;
Stubs = stubs;
EntryAddress = entryAddress;
HighCq = highCq;
HasPtc = hasPtc;
Mode = mode;
_labels = new Dictionary<ulong, Operand>();
@ -82,6 +83,8 @@ namespace ARMeilleure.Translation
public override Operand Call(MethodInfo info, params Operand[] callArgs)
{
SyncQcFlag();
if (!HasPtc)
{
return base.Call(info, callArgs);
@ -140,6 +143,51 @@ namespace ARMeilleure.Translation
_optOpLastFlagSet = null;
}
public void SetPendingQcFlagSync()
{
_pendingQcFlagSync = true;
}
public void SyncQcFlag()
{
if (_pendingQcFlagSync)
{
if (Optimizations.UseAdvSimd)
{
Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr);
uint qcFlagMask = (uint)FPSR.Qc;
Operand qcClearLabel = Label();
BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask)));
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1));
MarkLabel(qcClearLabel);
}
_pendingQcFlagSync = false;
}
}
public void ClearQcFlag()
{
if (Optimizations.UseAdvSimd)
{
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
}
}
public void ClearQcFlagIfModified()
{
if (_pendingQcFlagSync && Optimizations.UseAdvSimd)
{
AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0));
}
}
public Operand TryGetComparisonResult(Condition condition)
{
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)

View File

@ -4,7 +4,7 @@ using System.Diagnostics.CodeAnalysis;
namespace ARMeilleure.Translation.Cache
{
struct CacheEntry : IComparable<CacheEntry>
readonly struct CacheEntry : IComparable<CacheEntry>
{
public int Offset { get; }
public int Size { get; }

View File

@ -6,7 +6,7 @@ namespace ARMeilleure.Translation.Cache
{
class CacheMemoryAllocator
{
private struct MemoryBlock : IComparable<MemoryBlock>
private readonly struct MemoryBlock : IComparable<MemoryBlock>
{
public int Offset { get; }
public int Size { get; }

View File

@ -1,6 +1,7 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Unwinding;
using ARMeilleure.Memory;
using ARMeilleure.Native;
using System;
using System.Collections.Generic;
using System.Diagnostics;
@ -17,6 +18,7 @@ namespace ARMeilleure.Translation.Cache
private const int CacheSize = 2047 * 1024 * 1024;
private static ReservedRegion _jitRegion;
private static JitCacheInvalidation _jitCacheInvalidator;
private static CacheMemoryAllocator _cacheAllocator;
@ -25,8 +27,6 @@ namespace ARMeilleure.Translation.Cache
private static readonly object _lock = new object();
private static bool _initialized;
public static IntPtr Base => _jitRegion.Pointer;
public static void Initialize(IJitMemoryAllocator allocator)
{
if (_initialized) return;
@ -36,6 +36,7 @@ namespace ARMeilleure.Translation.Cache
if (_initialized) return;
_jitRegion = new ReservedRegion(allocator, CacheSize);
_jitCacheInvalidator = new JitCacheInvalidation(allocator);
_cacheAllocator = new CacheMemoryAllocator(CacheSize);
@ -60,11 +61,24 @@ namespace ARMeilleure.Translation.Cache
IntPtr funcPtr = _jitRegion.Pointer + funcOffset;
ReprotectAsWritable(funcOffset, code.Length);
if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
unsafe
{
fixed (byte *codePtr = code)
{
JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length);
}
}
}
else
{
ReprotectAsWritable(funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectAsExecutable(funcOffset, code.Length);
Marshal.Copy(code, 0, funcPtr, code.Length);
ReprotectAsExecutable(funcOffset, code.Length);
_jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length);
}
Add(funcOffset, code.Length, func.UnwindInfo);

View File

@ -0,0 +1,79 @@
using ARMeilleure.Memory;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
class JitCacheInvalidation
{
private static int[] _invalidationCode = new int[]
{
unchecked((int)0xd53b0022), // mrs x2, ctr_el0
unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4
unchecked((int)0x52800083), // mov w3, #0x4
unchecked((int)0x12000c45), // and w5, w2, #0xf
unchecked((int)0x1ac42064), // lsl w4, w3, w4
unchecked((int)0x51000482), // sub w2, w4, #0x1
unchecked((int)0x8a220002), // bic x2, x0, x2
unchecked((int)0x1ac52063), // lsl w3, w3, w5
unchecked((int)0xeb01005f), // cmp x2, x1
unchecked((int)0x93407c84), // sxtw x4, w4
unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear>
unchecked((int)0xd50b7b22), // dc cvau, x2
unchecked((int)0x8b040042), // add x2, x2, x4
unchecked((int)0xeb02003f), // cmp x1, x2
unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0x51000462), // sub w2, w3, #0x1
unchecked((int)0x93407c63), // sxtw x3, w3
unchecked((int)0x8a220000), // bic x0, x0, x2
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x540000a9), // b.ls 64 <exit>
unchecked((int)0xd50b7520), // ic ivau, x0
unchecked((int)0x8b030000), // add x0, x0, x3
unchecked((int)0xeb00003f), // cmp x1, x0
unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop>
unchecked((int)0xd5033b9f), // dsb ish
unchecked((int)0xd5033fdf), // isb
unchecked((int)0xd65f03c0), // ret
};
private delegate void InvalidateCache(ulong start, ulong end);
private InvalidateCache _invalidateCache;
private ReservedRegion _invalidateCacheCodeRegion;
private readonly bool _needsInvalidation;
public JitCacheInvalidation(IJitMemoryAllocator allocator)
{
// On macOS, a different path is used to write to the JIT cache, which does the invalidation.
if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
ulong size = (ulong)_invalidationCode.Length * sizeof(int);
ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1;
size = (size + mask) & ~mask;
_invalidateCacheCodeRegion = new ReservedRegion(allocator, size);
_invalidateCacheCodeRegion.ExpandIfNeeded(size);
Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length);
_invalidateCacheCodeRegion.Block.MapAsRx(0, size);
_invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer);
_needsInvalidation = true;
}
}
public void Invalidate(IntPtr basePointer, ulong size)
{
if (_needsInvalidation)
{
_invalidateCache((ulong)basePointer, (ulong)basePointer + size);
}
}
}
}

View File

@ -7,7 +7,7 @@ using System.Runtime.InteropServices;
namespace ARMeilleure.Translation.Cache
{
static class JitUnwindWindows
static partial class JitUnwindWindows
{
private const int MaxUnwindCodesArraySize = 32; // Must be an even value.
@ -42,14 +42,15 @@ namespace ARMeilleure.Translation.Cache
private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context);
[DllImport("kernel32.dll", CharSet = CharSet.Unicode)]
private static unsafe extern bool RtlInstallFunctionTableCallback(
[LibraryImport("kernel32.dll")]
[return: MarshalAs(UnmanagedType.Bool)]
private static unsafe partial bool RtlInstallFunctionTableCallback(
ulong tableIdentifier,
ulong baseAddress,
uint length,
GetRuntimeFunctionCallback callback,
IntPtr context,
string outOfProcessCallbackDll);
[MarshalAs(UnmanagedType.LPWStr)] string outOfProcessCallbackDll);
private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback;

View File

@ -1,8 +1,9 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Optimizations;
using ARMeilleure.CodeGen.X86;
using ARMeilleure.Diagnostics;
using ARMeilleure.IntermediateRepresentation;
using System;
using System.Runtime.InteropServices;
namespace ARMeilleure.Translation
{
@ -12,7 +13,8 @@ namespace ARMeilleure.Translation
ControlFlowGraph cfg,
OperandType[] argTypes,
OperandType retType,
CompilerOptions options)
CompilerOptions options,
Architecture target)
{
CompilerContext cctx = new(cfg, argTypes, retType, options);
@ -49,7 +51,18 @@ namespace ARMeilleure.Translation
Logger.EndPass(PassName.RegisterToLocal, cfg);
}
return CodeGenerator.Generate(cctx);
if (target == Architecture.X64)
{
return CodeGen.X86.CodeGenerator.Generate(cctx);
}
else if (target == Architecture.Arm64)
{
return CodeGen.Arm64.CodeGenerator.Generate(cctx);
}
else
{
throw new NotImplementedException(target.ToString());
}
}
}
}

View File

@ -2,7 +2,7 @@ using ARMeilleure.IntermediateRepresentation;
namespace ARMeilleure.Translation
{
struct CompilerContext
readonly struct CompilerContext
{
public ControlFlowGraph Cfg { get; }

View File

@ -25,10 +25,7 @@ namespace ARMeilleure.Translation
public static Delegate GetDelegate(MethodInfo info)
{
if (info == null)
{
throw new ArgumentNullException(nameof(info));
}
ArgumentNullException.ThrowIfNull(info);
Type[] parameters = info.GetParameters().Select(pI => pI.ParameterType).ToArray();
Type returnType = info.ReturnType;

View File

@ -35,10 +35,7 @@ namespace ARMeilleure.Translation
public static IntPtr GetDelegateFuncPtr(MethodInfo info)
{
if (info == null)
{
throw new ArgumentNullException(nameof(info));
}
ArgumentNullException.ThrowIfNull(info);
string key = GetKey(info);
@ -52,10 +49,7 @@ namespace ARMeilleure.Translation
public static int GetDelegateIndex(MethodInfo info)
{
if (info == null)
{
throw new ArgumentNullException(nameof(info));
}
ArgumentNullException.ThrowIfNull(info);
string key = GetKey(info);

View File

@ -67,10 +67,7 @@ namespace ARMeilleure.Translation
/// <returns>True if the value was added, false if the start key was already in the dictionary</returns>
public bool AddOrUpdate(K start, K end, V value, Func<K, V, V> updateFactoryCallback)
{
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
ArgumentNullException.ThrowIfNull(value);
return BSTInsert(start, end, value, updateFactoryCallback, out IntervalTreeNode<K, V> node);
}
@ -85,10 +82,7 @@ namespace ARMeilleure.Translation
/// <returns><paramref name="value"/> if <paramref name="start"/> is not yet on the tree, or the existing value otherwise</returns>
public V GetOrAdd(K start, K end, V value)
{
if (value == null)
{
throw new ArgumentNullException(nameof(value));
}
ArgumentNullException.ThrowIfNull(value);
BSTInsert(start, end, value, null, out IntervalTreeNode<K, V> node);
return node.Value;
@ -152,10 +146,7 @@ namespace ARMeilleure.Translation
/// <returns>Node reference in the tree</returns>
private IntervalTreeNode<K, V> GetNode(K key)
{
if (key == null)
{
throw new ArgumentNullException(nameof(key));
}
ArgumentNullException.ThrowIfNull(key);
IntervalTreeNode<K, V> node = _root;
while (node != null)

View File

@ -0,0 +1,10 @@
using System;
namespace ARMeilleure.Translation.PTC
{
public interface IPtcLoadState
{
event Action<PtcLoadingState, int, int> PtcStateChanged;
void Continue();
}
}

View File

@ -1,7 +1,6 @@
using ARMeilleure.CodeGen;
using ARMeilleure.CodeGen.Linking;
using ARMeilleure.CodeGen.Unwinding;
using ARMeilleure.CodeGen.X86;
using ARMeilleure.Common;
using ARMeilleure.Memory;
using Ryujinx.Common;
@ -22,12 +21,15 @@ using static ARMeilleure.Translation.PTC.PtcFormatter;
namespace ARMeilleure.Translation.PTC
{
public static class Ptc
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
class Ptc : IPtcLoadState
{
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 3713; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 4272; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@ -35,45 +37,49 @@ namespace ARMeilleure.Translation.PTC
private const string TitleIdTextDefault = "0000000000000000";
private const string DisplayVersionDefault = "0";
internal static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1);
internal static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
internal static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1);
public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
private const byte FillingByte = 0x00;
private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
public PtcProfiler Profiler { get; }
// Carriers.
private static MemoryStream _infosStream;
private static List<byte[]> _codesList;
private static MemoryStream _relocsStream;
private static MemoryStream _unwindInfosStream;
private MemoryStream _infosStream;
private List<byte[]> _codesList;
private MemoryStream _relocsStream;
private MemoryStream _unwindInfosStream;
private static readonly ulong _outerHeaderMagic;
private static readonly ulong _innerHeaderMagic;
private readonly ulong _outerHeaderMagic;
private readonly ulong _innerHeaderMagic;
private static readonly ManualResetEvent _waitEvent;
private readonly ManualResetEvent _waitEvent;
private static readonly object _lock;
private readonly object _lock;
private static bool _disposed;
private bool _disposed;
internal static string TitleIdText { get; private set; }
internal static string DisplayVersion { get; private set; }
public string TitleIdText { get; private set; }
public string DisplayVersion { get; private set; }
private static MemoryManagerMode _memoryMode;
private MemoryManagerType _memoryMode;
internal static string CachePathActual { get; private set; }
internal static string CachePathBackup { get; private set; }
public string CachePathActual { get; private set; }
public string CachePathBackup { get; private set; }
internal static PtcState State { get; private set; }
public PtcState State { get; private set; }
// Progress reporting helpers.
private static volatile int _translateCount;
private static volatile int _translateTotalCount;
public static event Action<PtcLoadingState, int, int> PtcStateChanged;
private volatile int _translateCount;
private volatile int _translateTotalCount;
public event Action<PtcLoadingState, int, int> PtcStateChanged;
static Ptc()
public Ptc()
{
Profiler = new PtcProfiler(this);
InitializeCarriers();
_outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan());
@ -94,12 +100,12 @@ namespace ARMeilleure.Translation.PTC
Disable();
}
public static void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerMode memoryMode)
public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode)
{
Wait();
PtcProfiler.Wait();
PtcProfiler.ClearEntries();
Profiler.Wait();
Profiler.ClearEntries();
Logger.Info?.Print(LogClass.Ptc, $"Initializing Profiled Persistent Translation Cache (enabled: {enabled}).");
@ -137,12 +143,12 @@ namespace ARMeilleure.Translation.PTC
CachePathBackup = Path.Combine(workPathBackup, DisplayVersion);
PreLoad();
PtcProfiler.PreLoad();
Profiler.PreLoad();
Enable();
}
private static void InitializeCarriers()
private void InitializeCarriers()
{
_infosStream = new MemoryStream();
_codesList = new List<byte[]>();
@ -150,7 +156,7 @@ namespace ARMeilleure.Translation.PTC
_unwindInfosStream = new MemoryStream();
}
private static void DisposeCarriers()
private void DisposeCarriers()
{
_infosStream.Dispose();
_codesList.Clear();
@ -158,12 +164,12 @@ namespace ARMeilleure.Translation.PTC
_unwindInfosStream.Dispose();
}
private static bool AreCarriersEmpty()
private bool AreCarriersEmpty()
{
return _infosStream.Length == 0L && _codesList.Count == 0 && _relocsStream.Length == 0L && _unwindInfosStream.Length == 0L;
}
private static void ResetCarriersIfNeeded()
private void ResetCarriersIfNeeded()
{
if (AreCarriersEmpty())
{
@ -175,7 +181,7 @@ namespace ARMeilleure.Translation.PTC
InitializeCarriers();
}
private static void PreLoad()
private void PreLoad()
{
string fileNameActual = string.Concat(CachePathActual, ".cache");
string fileNameBackup = string.Concat(CachePathBackup, ".cache");
@ -199,7 +205,7 @@ namespace ARMeilleure.Translation.PTC
}
}
private static unsafe bool Load(string fileName, bool isBackup)
private unsafe bool Load(string fileName, bool isBackup)
{
using (FileStream compressedStream = new(fileName, FileMode.Open))
using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true))
@ -255,6 +261,13 @@ namespace ARMeilleure.Translation.PTC
return false;
}
if (outerHeader.Architecture != (uint)RuntimeInformation.ProcessArchitecture)
{
InvalidateCompressedStream(compressedStream);
return false;
}
IntPtr intPtr = IntPtr.Zero;
try
@ -376,12 +389,12 @@ namespace ARMeilleure.Translation.PTC
return true;
}
private static void InvalidateCompressedStream(FileStream compressedStream)
private void InvalidateCompressedStream(FileStream compressedStream)
{
compressedStream.SetLength(0L);
}
private static void PreSave()
private void PreSave()
{
_waitEvent.Reset();
@ -409,7 +422,7 @@ namespace ARMeilleure.Translation.PTC
_waitEvent.Set();
}
private static unsafe void Save(string fileName)
private unsafe void Save(string fileName)
{
int translatedFuncsCount;
@ -431,6 +444,7 @@ namespace ARMeilleure.Translation.PTC
outerHeader.FeatureInfo = GetFeatureInfo();
outerHeader.MemoryManagerMode = GetMemoryManagerMode();
outerHeader.OSPlatform = GetOSPlatform();
outerHeader.Architecture = (uint)RuntimeInformation.ProcessArchitecture;
outerHeader.UncompressedStreamSize =
(long)Unsafe.SizeOf<InnerHeader>() +
@ -517,7 +531,7 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void LoadTranslations(Translator translator)
public void LoadTranslations(Translator translator)
{
if (AreCarriersEmpty())
{
@ -550,7 +564,7 @@ namespace ARMeilleure.Translation.PTC
bool isEntryChanged = infoEntry.Hash != ComputeHash(translator.Memory, infoEntry.Address, infoEntry.GuestSize);
if (isEntryChanged || (!infoEntry.HighCq && PtcProfiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq))
if (isEntryChanged || (!infoEntry.HighCq && Profiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq))
{
infoEntry.Stubbed = true;
infoEntry.CodeLength = 0;
@ -601,38 +615,38 @@ namespace ARMeilleure.Translation.PTC
Logger.Info?.Print(LogClass.Ptc, $"{translator.Functions.Count} translated functions loaded");
}
private static int GetEntriesCount()
private int GetEntriesCount()
{
return _codesList.Count;
}
[Conditional("DEBUG")]
private static void SkipCode(int index, int codeLength)
private void SkipCode(int index, int codeLength)
{
Debug.Assert(_codesList[index].Length == 0);
Debug.Assert(codeLength == 0);
}
private static void SkipReloc(int relocEntriesCount)
private void SkipReloc(int relocEntriesCount)
{
_relocsStream.Seek(relocEntriesCount * RelocEntry.Stride, SeekOrigin.Current);
}
private static void SkipUnwindInfo(BinaryReader unwindInfosReader)
private void SkipUnwindInfo(BinaryReader unwindInfosReader)
{
int pushEntriesLength = unwindInfosReader.ReadInt32();
_unwindInfosStream.Seek(pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride, SeekOrigin.Current);
}
private static byte[] ReadCode(int index, int codeLength)
private byte[] ReadCode(int index, int codeLength)
{
Debug.Assert(_codesList[index].Length == codeLength);
return _codesList[index];
}
private static RelocEntry[] GetRelocEntries(BinaryReader relocsReader, int relocEntriesCount)
private RelocEntry[] GetRelocEntries(BinaryReader relocsReader, int relocEntriesCount)
{
RelocEntry[] relocEntries = new RelocEntry[relocEntriesCount];
@ -648,7 +662,7 @@ namespace ARMeilleure.Translation.PTC
return relocEntries;
}
private static void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter)
private void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter)
{
callCounter = null;
@ -702,7 +716,7 @@ namespace ARMeilleure.Translation.PTC
}
}
private static UnwindInfo ReadUnwindInfo(BinaryReader unwindInfosReader)
private UnwindInfo ReadUnwindInfo(BinaryReader unwindInfosReader)
{
int pushEntriesLength = unwindInfosReader.ReadInt32();
@ -723,7 +737,7 @@ namespace ARMeilleure.Translation.PTC
return new UnwindInfo(pushEntries, prologueSize);
}
private static TranslatedFunction FastTranslate(
private TranslatedFunction FastTranslate(
byte[] code,
Counter<uint> callCounter,
ulong guestSize,
@ -736,19 +750,19 @@ namespace ARMeilleure.Translation.PTC
return new TranslatedFunction(gFunc, callCounter, guestSize, highCq);
}
private static void UpdateInfo(InfoEntry infoEntry)
private void UpdateInfo(InfoEntry infoEntry)
{
_infosStream.Seek(-Unsafe.SizeOf<InfoEntry>(), SeekOrigin.Current);
SerializeStructure(_infosStream, infoEntry);
}
private static void StubCode(int index)
private void StubCode(int index)
{
_codesList[index] = Array.Empty<byte>();
}
private static void StubReloc(int relocEntriesCount)
private void StubReloc(int relocEntriesCount)
{
for (int i = 0; i < relocEntriesCount * RelocEntry.Stride; i++)
{
@ -756,7 +770,7 @@ namespace ARMeilleure.Translation.PTC
}
}
private static void StubUnwindInfo(BinaryReader unwindInfosReader)
private void StubUnwindInfo(BinaryReader unwindInfosReader)
{
int pushEntriesLength = unwindInfosReader.ReadInt32();
@ -766,9 +780,9 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void MakeAndSaveTranslations(Translator translator)
public void MakeAndSaveTranslations(Translator translator)
{
var profiledFuncsToTranslate = PtcProfiler.GetProfiledFuncsToTranslate(translator.Functions);
var profiledFuncsToTranslate = Profiler.GetProfiledFuncsToTranslate(translator.Functions);
_translateCount = 0;
_translateTotalCount = profiledFuncsToTranslate.Count;
@ -811,7 +825,7 @@ namespace ARMeilleure.Translation.PTC
{
ulong address = item.address;
Debug.Assert(PtcProfiler.IsAddressInStaticCodeRange(address));
Debug.Assert(Profiler.IsAddressInStaticCodeRange(address));
TranslatedFunction func = translator.Translate(address, item.funcProfile.Mode, item.funcProfile.HighCq);
@ -861,7 +875,7 @@ namespace ARMeilleure.Translation.PTC
preSaveThread.Start();
}
private static void ReportProgress(object state)
private void ReportProgress(object state)
{
const int refreshRate = 50; // ms.
@ -882,12 +896,12 @@ namespace ARMeilleure.Translation.PTC
while (!endEvent.WaitOne(refreshRate));
}
internal static Hash128 ComputeHash(IMemoryManager memory, ulong address, ulong guestSize)
public static Hash128 ComputeHash(IMemoryManager memory, ulong address, ulong guestSize)
{
return XXHash128.ComputeHash(memory.GetSpan(address, checked((int)(guestSize))));
}
internal static void WriteCompiledFunction(ulong address, ulong guestSize, Hash128 hash, bool highCq, CompiledFunction compiledFunc)
public void WriteCompiledFunction(ulong address, ulong guestSize, Hash128 hash, bool highCq, CompiledFunction compiledFunc)
{
lock (_lock)
{
@ -936,26 +950,41 @@ namespace ARMeilleure.Translation.PTC
}
}
private static void WriteCode(ReadOnlySpan<byte> code)
private void WriteCode(ReadOnlySpan<byte> code)
{
_codesList.Add(code.ToArray());
}
internal static bool GetEndianness()
public static bool GetEndianness()
{
return BitConverter.IsLittleEndian;
}
private static FeatureInfo GetFeatureInfo()
{
return new FeatureInfo(
(uint)HardwareCapabilities.FeatureInfo1Ecx,
(uint)HardwareCapabilities.FeatureInfo1Edx,
(uint)HardwareCapabilities.FeatureInfo7Ebx,
(uint)HardwareCapabilities.FeatureInfo7Ecx);
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
return new FeatureInfo(
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
0);
}
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
{
return new FeatureInfo(
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
(ulong)X86HardwareCapabilities.FeatureInfo1Edx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx);
}
else
{
return new FeatureInfo(0, 0, 0, 0);
}
}
private static byte GetMemoryManagerMode()
private byte GetMemoryManagerMode()
{
return (byte)_memoryMode;
}
@ -972,7 +1001,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform;
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)]
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 78*/)]
private struct OuterHeader
{
public ulong Magic;
@ -983,6 +1012,7 @@ namespace ARMeilleure.Translation.PTC
public FeatureInfo FeatureInfo;
public byte MemoryManagerMode;
public uint OSPlatform;
public uint Architecture;
public long UncompressedStreamSize;
@ -1003,8 +1033,8 @@ namespace ARMeilleure.Translation.PTC
}
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)]
private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)]
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader
@ -1050,12 +1080,12 @@ namespace ARMeilleure.Translation.PTC
public int RelocEntriesCount;
}
private static void Enable()
private void Enable()
{
State = PtcState.Enabled;
}
public static void Continue()
public void Continue()
{
if (State == PtcState.Enabled)
{
@ -1063,7 +1093,7 @@ namespace ARMeilleure.Translation.PTC
}
}
public static void Close()
public void Close()
{
if (State == PtcState.Enabled ||
State == PtcState.Continuing)
@ -1072,17 +1102,17 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void Disable()
public void Disable()
{
State = PtcState.Disabled;
}
private static void Wait()
private void Wait()
{
_waitEvent.WaitOne();
}
public static void Dispose()
public void Dispose()
{
if (!_disposed)
{

View File

@ -16,7 +16,7 @@ using static ARMeilleure.Translation.PTC.PtcFormatter;
namespace ARMeilleure.Translation.PTC
{
public static class PtcProfiler
class PtcProfiler
{
private const string OuterHeaderMagicString = "Pohd\0\0\0\0";
@ -26,27 +26,31 @@ namespace ARMeilleure.Translation.PTC
private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
private static readonly System.Timers.Timer _timer;
private readonly Ptc _ptc;
private static readonly ulong _outerHeaderMagic;
private readonly System.Timers.Timer _timer;
private static readonly ManualResetEvent _waitEvent;
private readonly ulong _outerHeaderMagic;
private static readonly object _lock;
private readonly ManualResetEvent _waitEvent;
private static bool _disposed;
private readonly object _lock;
private static Hash128 _lastHash;
private bool _disposed;
internal static Dictionary<ulong, FuncProfile> ProfiledFuncs { get; private set; }
private Hash128 _lastHash;
internal static bool Enabled { get; private set; }
public Dictionary<ulong, FuncProfile> ProfiledFuncs { get; private set; }
public static ulong StaticCodeStart { internal get; set; }
public static ulong StaticCodeSize { internal get; set; }
public bool Enabled { get; private set; }
static PtcProfiler()
public ulong StaticCodeStart { get; set; }
public ulong StaticCodeSize { get; set; }
public PtcProfiler(Ptc ptc)
{
_ptc = ptc;
_timer = new System.Timers.Timer((double)SaveInterval * 1000d);
_timer.Elapsed += PreSave;
@ -63,7 +67,7 @@ namespace ARMeilleure.Translation.PTC
Enabled = false;
}
internal static void AddEntry(ulong address, ExecutionMode mode, bool highCq)
public void AddEntry(ulong address, ExecutionMode mode, bool highCq)
{
if (IsAddressInStaticCodeRange(address))
{
@ -76,7 +80,7 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void UpdateEntry(ulong address, ExecutionMode mode, bool highCq)
public void UpdateEntry(ulong address, ExecutionMode mode, bool highCq)
{
if (IsAddressInStaticCodeRange(address))
{
@ -91,12 +95,12 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static bool IsAddressInStaticCodeRange(ulong address)
public bool IsAddressInStaticCodeRange(ulong address)
{
return address >= StaticCodeStart && address < StaticCodeStart + StaticCodeSize;
}
internal static ConcurrentQueue<(ulong address, FuncProfile funcProfile)> GetProfiledFuncsToTranslate(TranslatorCache<TranslatedFunction> funcs)
public ConcurrentQueue<(ulong address, FuncProfile funcProfile)> GetProfiledFuncsToTranslate(TranslatorCache<TranslatedFunction> funcs)
{
var profiledFuncsToTranslate = new ConcurrentQueue<(ulong address, FuncProfile funcProfile)>();
@ -111,18 +115,18 @@ namespace ARMeilleure.Translation.PTC
return profiledFuncsToTranslate;
}
internal static void ClearEntries()
public void ClearEntries()
{
ProfiledFuncs.Clear();
ProfiledFuncs.TrimExcess();
}
internal static void PreLoad()
public void PreLoad()
{
_lastHash = default;
string fileNameActual = string.Concat(Ptc.CachePathActual, ".info");
string fileNameBackup = string.Concat(Ptc.CachePathBackup, ".info");
string fileNameActual = string.Concat(_ptc.CachePathActual, ".info");
string fileNameBackup = string.Concat(_ptc.CachePathBackup, ".info");
FileInfo fileInfoActual = new FileInfo(fileNameActual);
FileInfo fileInfoBackup = new FileInfo(fileNameBackup);
@ -143,7 +147,7 @@ namespace ARMeilleure.Translation.PTC
}
}
private static bool Load(string fileName, bool isBackup)
private bool Load(string fileName, bool isBackup)
{
using (FileStream compressedStream = new(fileName, FileMode.Open))
using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true))
@ -228,22 +232,22 @@ namespace ARMeilleure.Translation.PTC
return DeserializeDictionary<ulong, FuncProfile>(stream, (stream) => DeserializeStructure<FuncProfile>(stream));
}
private static ReadOnlySpan<byte> GetReadOnlySpan(MemoryStream memoryStream)
private ReadOnlySpan<byte> GetReadOnlySpan(MemoryStream memoryStream)
{
return new(memoryStream.GetBuffer(), (int)memoryStream.Position, (int)memoryStream.Length - (int)memoryStream.Position);
}
private static void InvalidateCompressedStream(FileStream compressedStream)
private void InvalidateCompressedStream(FileStream compressedStream)
{
compressedStream.SetLength(0L);
}
private static void PreSave(object source, System.Timers.ElapsedEventArgs e)
private void PreSave(object source, System.Timers.ElapsedEventArgs e)
{
_waitEvent.Reset();
string fileNameActual = string.Concat(Ptc.CachePathActual, ".info");
string fileNameBackup = string.Concat(Ptc.CachePathBackup, ".info");
string fileNameActual = string.Concat(_ptc.CachePathActual, ".info");
string fileNameBackup = string.Concat(_ptc.CachePathBackup, ".info");
FileInfo fileInfoActual = new FileInfo(fileNameActual);
@ -257,7 +261,7 @@ namespace ARMeilleure.Translation.PTC
_waitEvent.Set();
}
private static void Save(string fileName)
private void Save(string fileName)
{
int profiledFuncsCount;
@ -329,7 +333,7 @@ namespace ARMeilleure.Translation.PTC
}
}
private static void Serialize(Stream stream, Dictionary<ulong, FuncProfile> profiledFuncs)
private void Serialize(Stream stream, Dictionary<ulong, FuncProfile> profiledFuncs)
{
SerializeDictionary(stream, profiledFuncs, (stream, structure) => SerializeStructure(stream, structure));
}
@ -361,7 +365,7 @@ namespace ARMeilleure.Translation.PTC
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 5*/)]
internal struct FuncProfile
public struct FuncProfile
{
public ExecutionMode Mode;
public bool HighCq;
@ -373,10 +377,10 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void Start()
public void Start()
{
if (Ptc.State == PtcState.Enabled ||
Ptc.State == PtcState.Continuing)
if (_ptc.State == PtcState.Enabled ||
_ptc.State == PtcState.Continuing)
{
Enabled = true;
@ -384,7 +388,7 @@ namespace ARMeilleure.Translation.PTC
}
}
public static void Stop()
public void Stop()
{
Enabled = false;
@ -394,12 +398,12 @@ namespace ARMeilleure.Translation.PTC
}
}
internal static void Wait()
public void Wait()
{
_waitEvent.WaitOne();
}
public static void Dispose()
public void Dispose()
{
if (!_disposed)
{

View File

@ -14,7 +14,7 @@ namespace ARMeilleure.Translation
private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
private struct RegisterMask : IEquatable<RegisterMask>
private readonly struct RegisterMask : IEquatable<RegisterMask>
{
public long IntMask => Mask.GetElement(0);
public long VecMask => Mask.GetElement(1);

View File

@ -140,7 +140,7 @@ namespace ARMeilleure.Translation
}
}
Array.Clear(localDefs, 0, localDefs.Length);
Array.Clear(localDefs);
}
}

View File

@ -14,6 +14,7 @@ using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Threading;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
@ -44,6 +45,8 @@ namespace ARMeilleure.Translation
private readonly IJitMemoryAllocator _allocator;
private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs;
private readonly Ptc _ptc;
internal TranslatorCache<TranslatedFunction> Functions { get; }
internal AddressTable<ulong> FunctionTable { get; }
internal EntryTable<uint> CountTable { get; }
@ -63,6 +66,8 @@ namespace ARMeilleure.Translation
_oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>();
_ptc = new Ptc();
Queue = new TranslatorQueue();
JitCache.Initialize(allocator);
@ -80,22 +85,37 @@ namespace ARMeilleure.Translation
}
}
public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled)
{
_ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type);
return _ptc;
}
public void PrepareCodeRange(ulong address, ulong size)
{
if (_ptc.Profiler.StaticCodeSize == 0)
{
_ptc.Profiler.StaticCodeStart = address;
_ptc.Profiler.StaticCodeSize = size;
}
}
public void Execute(State.ExecutionContext context, ulong address)
{
if (Interlocked.Increment(ref _threadCount) == 1)
{
IsReadyForTranslation.WaitOne();
if (Ptc.State == PtcState.Enabled)
if (_ptc.State == PtcState.Enabled)
{
Debug.Assert(Functions.Count == 0);
Ptc.LoadTranslations(this);
Ptc.MakeAndSaveTranslations(this);
_ptc.LoadTranslations(this);
_ptc.MakeAndSaveTranslations(this);
}
PtcProfiler.Start();
_ptc.Profiler.Start();
Ptc.Disable();
_ptc.Disable();
// Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht
// etc). All threads are normal priority except from the last, which just fills as much of the last core
@ -148,6 +168,12 @@ namespace ARMeilleure.Translation
Stubs.Dispose();
FunctionTable.Dispose();
CountTable.Dispose();
_ptc.Close();
_ptc.Profiler.Stop();
_ptc.Dispose();
_ptc.Profiler.Dispose();
}
}
@ -189,9 +215,9 @@ namespace ARMeilleure.Translation
func = oldFunc;
}
if (PtcProfiler.Enabled)
if (_ptc.Profiler.Enabled)
{
PtcProfiler.AddEntry(address, mode, highCq: false);
_ptc.Profiler.AddEntry(address, mode, highCq: false);
}
RegisterFunction(address, func);
@ -217,6 +243,7 @@ namespace ARMeilleure.Translation
Stubs,
address,
highCq,
_ptc.State != PtcState.Disabled,
mode: Aarch32Mode.User);
Logger.StartPass(PassName.Decoding);
@ -256,13 +283,13 @@ namespace ARMeilleure.Translation
options |= CompilerOptions.Relocatable;
}
CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options);
CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture);
if (context.HasPtc && !singleStep)
{
Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize);
Ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc);
_ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc);
}
GuestFunction func = compiledFunc.Map<GuestFunction>();
@ -284,16 +311,16 @@ namespace ARMeilleure.Translation
return func;
});
if (PtcProfiler.Enabled)
if (_ptc.Profiler.Enabled)
{
PtcProfiler.UpdateEntry(request.Address, request.Mode, highCq: true);
_ptc.Profiler.UpdateEntry(request.Address, request.Mode, highCq: true);
}
RegisterFunction(request.Address, func);
}
}
private struct Range
private readonly struct Range
{
public ulong Start { get; }
public ulong End { get; }
@ -333,9 +360,14 @@ namespace ARMeilleure.Translation
}
}
if (block.Address == context.EntryAddress && !context.HighCq)
if (block.Address == context.EntryAddress)
{
EmitRejitCheck(context, out counter);
if (!context.HighCq)
{
EmitRejitCheck(context, out counter);
}
context.ClearQcFlag();
}
context.CurrBlock = block;
@ -360,9 +392,14 @@ namespace ARMeilleure.Translation
bool isLastOp = opcIndex == block.OpCodes.Count - 1;
if (isLastOp && block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
if (isLastOp)
{
EmitSynchronization(context);
context.SyncQcFlag();
if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address)
{
EmitSynchronization(context);
}
}
Operand lblPredicateSkip = default;

View File

@ -30,10 +30,7 @@ namespace ARMeilleure.Translation
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
return _dispatchStub.Value;
}
@ -47,10 +44,7 @@ namespace ARMeilleure.Translation
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
return _slowDispatchStub.Value;
}
@ -64,10 +58,7 @@ namespace ARMeilleure.Translation
{
get
{
if (_disposed)
{
throw new ObjectDisposedException(null);
}
ObjectDisposedException.ThrowIf(_disposed, this);
return _dispatchLoop.Value;
}
@ -81,7 +72,9 @@ namespace ARMeilleure.Translation
/// <exception cref="ArgumentNullException"><paramref name="translator"/> is null</exception>
public TranslatorStubs(Translator translator)
{
_translator = translator ?? throw new ArgumentNullException(nameof(translator));
ArgumentNullException.ThrowIfNull(translator);
_translator = translator;
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
}
@ -178,7 +171,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.I64;
var argTypes = new[] { OperandType.I64 };
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>();
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
return Marshal.GetFunctionPointerForDelegate(func);
}
@ -204,7 +197,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.I64;
var argTypes = new[] { OperandType.I64 };
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<GuestFunction>();
var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>();
return Marshal.GetFunctionPointerForDelegate(func);
}
@ -242,7 +235,7 @@ namespace ARMeilleure.Translation
var retType = OperandType.None;
var argTypes = new[] { OperandType.I64, OperandType.I64 };
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq).Map<DispatcherFunction>();
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
}
}
}

54
Directory.Packages.props Normal file
View File

@ -0,0 +1,54 @@
<Project>
<PropertyGroup>
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Avalonia" Version="0.10.18" />
<PackageVersion Include="Avalonia.Controls.DataGrid" Version="0.10.18" />
<PackageVersion Include="Avalonia.Desktop" Version="0.10.18" />
<PackageVersion Include="Avalonia.Diagnostics" Version="0.10.18" />
<PackageVersion Include="Avalonia.Markup.Xaml.Loader" Version="0.10.18" />
<PackageVersion Include="Avalonia.Svg" Version="0.10.18" />
<PackageVersion Include="Avalonia.Svg.Skia" Version="0.10.18" />
<PackageVersion Include="CommandLineParser" Version="2.9.1" />
<PackageVersion Include="Concentus" Version="1.1.7" />
<PackageVersion Include="Crc32.NET" Version="1.2.0" />
<PackageVersion Include="DiscordRichPresence" Version="1.1.3.18" />
<PackageVersion Include="DynamicData" Version="7.12.11" />
<PackageVersion Include="FluentAvaloniaUI" Version="1.4.5" />
<PackageVersion Include="GtkSharp.Dependencies" Version="1.1.1" />
<PackageVersion Include="GtkSharp.Dependencies.osx" Version="0.0.5" />
<PackageVersion Include="jp2masa.Avalonia.Flexbox" Version="0.2.0" />
<PackageVersion Include="LibHac" Version="0.17.0" />
<PackageVersion Include="Microsoft.CodeAnalysis.Analyzers" Version="3.3.3" />
<PackageVersion Include="Microsoft.CodeAnalysis.CSharp" Version="4.4.0" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.4.1" />
<PackageVersion Include="MsgPack.Cli" Version="1.0.1" />
<PackageVersion Include="NUnit" Version="3.13.3" />
<PackageVersion Include="NUnit3TestAdapter" Version="4.1.0" />
<PackageVersion Include="OpenTK.Core" Version="4.7.5" />
<PackageVersion Include="OpenTK.Graphics" Version="4.7.5" />
<PackageVersion Include="OpenTK.OpenAL" Version="4.7.5" />
<PackageVersion Include="OpenTK.Windowing.GraphicsLibraryFramework" Version="4.7.5" />
<PackageVersion Include="Ryujinx.Audio.OpenAL.Dependencies" Version="1.21.0.1" />
<PackageVersion Include="Ryujinx.Graphics.Nvdec.Dependencies" Version="5.0.1-build13" />
<PackageVersion Include="Ryujinx.Graphics.Vulkan.Dependencies.MoltenVK" Version="1.2.0" />
<PackageVersion Include="Ryujinx.GtkSharp" Version="3.24.24.59-ryujinx" />
<PackageVersion Include="Ryujinx.SDL2-CS" Version="2.26.1-build23" />
<PackageVersion Include="shaderc.net" Version="0.1.0" />
<PackageVersion Include="SharpZipLib" Version="1.4.1" />
<PackageVersion Include="Silk.NET.Vulkan" Version="2.16.0" />
<PackageVersion Include="Silk.NET.Vulkan.Extensions.EXT" Version="2.16.0" />
<PackageVersion Include="Silk.NET.Vulkan.Extensions.KHR" Version="2.16.0" />
<PackageVersion Include="SixLabors.ImageSharp" Version="1.0.4" />
<PackageVersion Include="SixLabors.ImageSharp.Drawing" Version="1.0.0-beta11" />
<PackageVersion Include="SPB" Version="0.0.4-build28" />
<PackageVersion Include="System.Drawing.Common" Version="7.0.0" />
<PackageVersion Include="System.IdentityModel.Tokens.Jwt" Version="6.25.1" />
<PackageVersion Include="System.IO.FileSystem.Primitives" Version="4.3.0" />
<PackageVersion Include="System.Management" Version="7.0.0" />
<PackageVersion Include="System.Net.NameResolution" Version="4.3.0" />
<PackageVersion Include="System.Threading.ThreadPool" Version="4.3.0" />
<PackageVersion Include="XamlNameReferenceGenerator" Version="1.5.1" />
</ItemGroup>
</Project>

View File

@ -21,6 +21,10 @@
<img src="https://github.com/Ryujinx/Ryujinx/actions/workflows/release.yml/badge.svg"
alt="">
</a>
<a href="https://crwd.in/ryujinx">
<img src="https://badges.crowdin.net/ryujinx/localized.svg"
alt="">
</a>
<a href="https://discord.com/invite/VkQYXAZ">
<img src="https://img.shields.io/discord/410208534861447168?color=5865F2&label=Ryujinx&logo=discord&logoColor=white"
alt="Discord">
@ -36,18 +40,20 @@
## Compatibility
As of October 2022, Ryujinx has been tested on approximately 3,700 titles; over 3,500 boot past menus and into gameplay, with roughly 3,000 of those being considered playable.
As of November 2022, Ryujinx has been tested on approximately 3,800 titles; over 3,600 boot past menus and into gameplay, with roughly 3,200 of those being considered playable.
You can check out the compatibility list [here](https://github.com/Ryujinx/Ryujinx-Games-List/issues). Anyone is free to submit a new game test or update an existing game test entry; simply follow the new issue template and testing guidelines, or post as a reply to the applicable game issue. Use the search function to see if a game has been tested already!
## Usage
To run this emulator, your PC must be equipped with at least 8GB of RAM; failing to meet this requirement may result in a poor gameplay experience or unexpected crashes.
To run this emulator, your PC must be equipped with at least 8GiB of RAM; failing to meet this requirement may result in a poor gameplay experience or unexpected crashes.
See our [Setup & Configuration Guide](https://github.com/Ryujinx/Ryujinx/wiki/Ryujinx-Setup-&-Configuration-Guide) on how to set up the emulator.
For our Local Wireless and LAN builds, see our [Multiplayer: Local Play/Local Wireless Guide
](https://github.com/Ryujinx/Ryujinx/wiki/Multiplayer-(LDN-Local-Wireless)-Guide).
Avalonia UI comes with translations for various languages. See [Crowdin](https://crwd.in/ryujinx) for more information.
## Latest build
These builds are compiled automatically for each commit on the master branch. While we strive to ensure optimal stability and performance prior to pushing an update, our automated builds **may be unstable or completely broken.**
@ -62,7 +68,7 @@ The latest automatic build for Windows, macOS, and Linux can be found on the [Of
If you wish to build the emulator yourself, follow these steps:
### Step 1
Install the X64 version of [.NET 6.0 (or higher) SDK](https://dotnet.microsoft.com/download/dotnet/6.0).
Install the X64 version of [.NET 7.0 (or higher) SDK](https://dotnet.microsoft.com/download/dotnet/7.0).
### Step 2
Either use `git clone https://github.com/Ryujinx/Ryujinx` on the command line to clone the repository or use Code --> Download zip button to get the files.
@ -90,7 +96,7 @@ Ryujinx system files are stored in the `Ryujinx` folder. This folder is located
- **GPU**
The GPU emulator emulates the Switch's Maxwell GPU using either the OpenGL (version 4.5 minimum) or Vulkan APIs through a custom build of OpenTK or Silk.NET respectively. There are currently four graphics enhancements available to the end user in Ryujinx: Disk Shader Caching, Resolution Scaling, Aspect Ratio Adjustment, and Anisotropic Filtering. These enhancements can be adjusted or toggled as desired in the GUI.
The GPU emulator emulates the Switch's Maxwell GPU using either the OpenGL (version 4.5 minimum), Vulkan, or Metal (via MoltenVK) APIs through a custom build of OpenTK or Silk.NET respectively. There are currently four graphics enhancements available to the end user in Ryujinx: Disk Shader Caching, Resolution Scaling, Aspect Ratio Adjustment, and Anisotropic Filtering. These enhancements can be adjusted or toggled as desired in the GUI.
- **Input**

View File

@ -1,11 +1,11 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net7.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="OpenTK.OpenAL" Version="4.7.2" />
<PackageReference Include="OpenTK.OpenAL" />
</ItemGroup>
<ItemGroup>

View File

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net7.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

View File

@ -1,5 +1,6 @@
using Ryujinx.Audio.Common;
using Ryujinx.Audio.Integration;
using Ryujinx.Common.Logging;
using Ryujinx.Memory;
using Ryujinx.SDL2.Common;
using System;
@ -112,6 +113,9 @@ namespace Ryujinx.Audio.Backends.SDL2
if (device == 0)
{
Logger.Error?.Print(LogClass.Application,
$"SDL2 open audio device initialization failed with error \"{SDL_GetError()}\"");
return 0;
}
@ -119,6 +123,7 @@ namespace Ryujinx.Audio.Backends.SDL2
if (!isValid)
{
Logger.Error?.Print(LogClass.Application, "SDL2 open audio device is not valid");
SDL_CloseAudioDevice(device);
return 0;

View File

@ -0,0 +1,178 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public static partial class SoundIo
{
private const string LibraryName = "libsoundio";
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public unsafe delegate void OnDeviceChangeNativeDelegate(IntPtr ctx);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public unsafe delegate void OnBackendDisconnectedDelegate(IntPtr ctx, SoundIoError err);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public unsafe delegate void OnEventsSignalDelegate(IntPtr ctx);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public unsafe delegate void EmitRtPrioWarningDelegate();
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public unsafe delegate void JackCallbackDelegate(IntPtr msg);
[StructLayout(LayoutKind.Sequential)]
public struct SoundIoStruct
{
public IntPtr UserData;
public IntPtr OnDeviceChange;
public IntPtr OnBackendDisconnected;
public IntPtr OnEventsSignal;
public SoundIoBackend CurrentBackend;
public IntPtr ApplicationName;
public IntPtr EmitRtPrioWarning;
public IntPtr JackInfoCallback;
public IntPtr JackErrorCallback;
}
public struct SoundIoChannelLayout
{
public IntPtr Name;
public int ChannelCount;
public Array24<SoundIoChannelId> Channels;
public static IntPtr GetDefault(int channelCount)
{
return soundio_channel_layout_get_default(channelCount);
}
public static unsafe SoundIoChannelLayout GetDefaultValue(int channelCount)
{
return Unsafe.AsRef<SoundIoChannelLayout>((SoundIoChannelLayout*)GetDefault(channelCount));
}
}
public struct SoundIoSampleRateRange
{
public int Min;
public int Max;
}
public struct SoundIoDevice
{
public IntPtr SoundIo;
public IntPtr Id;
public IntPtr Name;
public SoundIoDeviceAim Aim;
public IntPtr Layouts;
public int LayoutCount;
public SoundIoChannelLayout CurrentLayout;
public IntPtr Formats;
public int FormatCount;
public SoundIoFormat CurrentFormat;
public IntPtr SampleRates;
public int SampleRateCount;
public int SampleRateCurrent;
public double SoftwareLatencyMin;
public double SoftwareLatencyMax;
public double SoftwareLatencyCurrent;
public bool IsRaw;
public int RefCount;
public SoundIoError ProbeError;
}
public struct SoundIoOutStream
{
public IntPtr Device;
public SoundIoFormat Format;
public int SampleRate;
public SoundIoChannelLayout Layout;
public double SoftwareLatency;
public float Volume;
public IntPtr UserData;
public IntPtr WriteCallback;
public IntPtr UnderflowCallback;
public IntPtr ErrorCallback;
public IntPtr Name;
public bool NonTerminalHint;
public int BytesPerFrame;
public int BytesPerSample;
public SoundIoError LayoutError;
}
public struct SoundIoChannelArea
{
public IntPtr Pointer;
public int Step;
}
[LibraryImport(LibraryName)]
public static partial IntPtr soundio_create();
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_connect(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial void soundio_disconnect(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial void soundio_flush_events(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial int soundio_output_device_count(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial int soundio_default_output_device_index(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial IntPtr soundio_get_output_device(IntPtr ctx, int index);
[LibraryImport(LibraryName)]
[return: MarshalAs(UnmanagedType.Bool)]
public static partial bool soundio_device_supports_format(IntPtr devCtx, SoundIoFormat format);
[LibraryImport(LibraryName)]
[return: MarshalAs(UnmanagedType.Bool)]
public static partial bool soundio_device_supports_layout(IntPtr devCtx, IntPtr layout);
[LibraryImport(LibraryName)]
[return: MarshalAs(UnmanagedType.Bool)]
public static partial bool soundio_device_supports_sample_rate(IntPtr devCtx, int sampleRate);
[LibraryImport(LibraryName)]
public static partial IntPtr soundio_outstream_create(IntPtr devCtx);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_open(IntPtr outStreamCtx);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_start(IntPtr outStreamCtx);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_begin_write(IntPtr outStreamCtx, IntPtr areas, IntPtr frameCount);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_end_write(IntPtr outStreamCtx);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_pause(IntPtr devCtx, [MarshalAs(UnmanagedType.Bool)] bool pause);
[LibraryImport(LibraryName)]
public static partial SoundIoError soundio_outstream_set_volume(IntPtr devCtx, double volume);
[LibraryImport(LibraryName)]
public static partial void soundio_outstream_destroy(IntPtr streamCtx);
[LibraryImport(LibraryName)]
public static partial void soundio_destroy(IntPtr ctx);
[LibraryImport(LibraryName)]
public static partial IntPtr soundio_channel_layout_get_default(int channelCount);
[LibraryImport(LibraryName)]
public static partial IntPtr soundio_strerror(SoundIoError err);
}
}

View File

@ -0,0 +1,13 @@
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public enum SoundIoBackend : int
{
None = 0,
Jack = 1,
PulseAudio = 2,
Alsa = 3,
CoreAudio = 4,
Wasapi = 5,
Dummy = 6
}
}

View File

@ -0,0 +1,75 @@
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public enum SoundIoChannelId
{
Invalid = 0,
FrontLeft = 1,
FrontRight = 2,
FrontCenter = 3,
Lfe = 4,
BackLeft = 5,
BackRight = 6,
FrontLeftCenter = 7,
FrontRightCenter = 8,
BackCenter = 9,
SideLeft = 10,
SideRight = 11,
TopCenter = 12,
TopFrontLeft = 13,
TopFrontCenter = 14,
TopFrontRight = 15,
TopBackLeft = 16,
TopBackCenter = 17,
TopBackRight = 18,
BackLeftCenter = 19,
BackRightCenter = 20,
FrontLeftWide = 21,
FrontRightWide = 22,
FrontLeftHigh = 23,
FrontCenterHigh = 24,
FrontRightHigh = 25,
TopFrontLeftCenter = 26,
TopFrontRightCenter = 27,
TopSideLeft = 28,
TopSideRight = 29,
LeftLfe = 30,
RightLfe = 31,
Lfe2 = 32,
BottomCenter = 33,
BottomLeftCenter = 34,
BottomRightCenter = 35,
MsMid = 36,
MsSide = 37,
AmbisonicW = 38,
AmbisonicX = 39,
AmbisonicY = 40,
AmbisonicZ = 41,
XyX = 42,
XyY = 43,
HeadphonesLeft = 44,
HeadphonesRight = 45,
ClickTrack = 46,
ForeignLanguage = 47,
HearingImpaired = 48,
Narration = 49,
Haptic = 50,
DialogCentricMix = 51,
Aux = 52,
Aux0 = 53,
Aux1 = 54,
Aux2 = 55,
Aux3 = 56,
Aux4 = 57,
Aux5 = 58,
Aux6 = 59,
Aux7 = 60,
Aux8 = 61,
Aux9 = 62,
Aux10 = 63,
Aux11 = 64,
Aux12 = 65,
Aux13 = 66,
Aux14 = 67,
Aux15 = 68,
}
}

View File

@ -0,0 +1,107 @@
using System;
using System.Reflection.Metadata;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
using static Ryujinx.Audio.Backends.SoundIo.Native.SoundIo;
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public class SoundIoContext : IDisposable
{
private IntPtr _context;
private Action<SoundIoError> _onBackendDisconnect;
private OnBackendDisconnectedDelegate _onBackendDisconnectNative;
public IntPtr Context => _context;
internal SoundIoContext(IntPtr context)
{
_context = context;
_onBackendDisconnect = null;
_onBackendDisconnectNative = null;
}
public SoundIoError Connect() => soundio_connect(_context);
public void Disconnect() => soundio_disconnect(_context);
public void FlushEvents() => soundio_flush_events(_context);
public int OutputDeviceCount => soundio_output_device_count(_context);
public int DefaultOutputDeviceIndex => soundio_default_output_device_index(_context);
public Action<SoundIoError> OnBackendDisconnect
{
get { return _onBackendDisconnect; }
set
{
_onBackendDisconnect = value;
if (_onBackendDisconnect == null)
{
_onBackendDisconnectNative = null;
}
else
{
_onBackendDisconnectNative = (ctx, err) => _onBackendDisconnect(err);
}
GetContext().OnBackendDisconnected = Marshal.GetFunctionPointerForDelegate(_onBackendDisconnectNative);
}
}
private ref SoundIoStruct GetContext()
{
unsafe
{
return ref Unsafe.AsRef<SoundIoStruct>((SoundIoStruct*)_context);
}
}
public SoundIoDeviceContext GetOutputDevice(int index)
{
IntPtr deviceContext = soundio_get_output_device(_context, index);
if (deviceContext == IntPtr.Zero)
{
return null;
}
return new SoundIoDeviceContext(deviceContext);
}
public static SoundIoContext Create()
{
IntPtr context = soundio_create();
if (context == IntPtr.Zero)
{
return null;
}
return new SoundIoContext(context);
}
protected virtual void Dispose(bool disposing)
{
IntPtr currentContext = Interlocked.Exchange(ref _context, IntPtr.Zero);
if (currentContext != IntPtr.Zero)
{
soundio_destroy(currentContext);
}
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
~SoundIoContext()
{
Dispose(false);
}
}
}

View File

@ -0,0 +1,8 @@
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public enum SoundIoDeviceAim
{
SoundIoDeviceAimInput = 0,
SoundIoDeviceAimOutput = 1
}
}

View File

@ -0,0 +1,49 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static Ryujinx.Audio.Backends.SoundIo.Native.SoundIo;
namespace Ryujinx.Audio.Backends.SoundIo.Native
{
public class SoundIoDeviceContext
{
private readonly IntPtr _context;
public IntPtr Context => _context;
internal SoundIoDeviceContext(IntPtr context)
{
_context = context;
}
private ref SoundIoDevice GetDeviceContext()
{
unsafe
{
return ref Unsafe.AsRef<SoundIoDevice>((SoundIoDevice*)_context);
}
}
public bool IsRaw => GetDeviceContext().IsRaw;
public string Id => Marshal.PtrToStringAnsi(GetDeviceContext().Id);
public bool SupportsSampleRate(int sampleRate) => soundio_device_supports_sample_rate(_context, sampleRate);
public bool SupportsFormat(SoundIoFormat format) => soundio_device_supports_format(_context, format);
public bool SupportsChannelCount(int channelCount) => soundio_device_supports_layout(_context, SoundIoChannelLayout.GetDefault(channelCount));
public SoundIoOutStreamContext CreateOutStream()
{
IntPtr context = soundio_outstream_create(_context);
if (context == IntPtr.Zero)
{
return null;
}
return new SoundIoOutStreamContext(context);
}
}
}

Some files were not shown because too many files have changed in this diff Show More