diff options
author | Jesse Talavera-Greenberg <jesse@jesse.tg> | 2023-11-18 10:40:54 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-18 16:40:54 +0100 |
commit | 544fefa27f698f3a0d799a782dc03d3eb47561db (patch) | |
tree | b4907fca30677cc4e1befb02301392f172eed543 | |
parent | f2d7a290156b5aa62edc00644c55b00de73b6229 (diff) |
Refactor the JIT to be object-oriented (#1879)
* Move TinyVector to a new file
- So it's less sensitive to #include ordering
* Forgot to include assert.h
* Refactor ARMJIT_Memory into an object
* Oops, forgot a declaration
* Refactor ARMJIT to be contained in an object
* Remove an unused function declaration
* Add a missing #include
* Remove a now-unused global
* Use ARMJIT_Memory's own memory access functions
* Fix some omissions in the ARM JIT
* Move libandroid to be a member of ARMJIT_Memory instead of a global
* Default-initialize most fields in ARMJIT_Compiler.h
* Define NOOP_IF_NO_JIT
* Finish refactoring the JIT to be object-oriented
-rw-r--r-- | src/ARM.cpp | 38 | ||||
-rw-r--r-- | src/ARM.h | 15 | ||||
-rw-r--r-- | src/ARMJIT.cpp | 246 | ||||
-rw-r--r-- | src/ARMJIT.h | 160 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.cpp | 9 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.h | 6 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 26 | ||||
-rw-r--r-- | src/ARMJIT_Compiler.h | 11 | ||||
-rw-r--r-- | src/ARMJIT_Internal.h | 150 | ||||
-rw-r--r-- | src/ARMJIT_Memory.cpp | 270 | ||||
-rw-r--r-- | src/ARMJIT_Memory.h | 243 | ||||
-rw-r--r-- | src/ARMJIT_RegisterCache.h | 1 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_ALU.cpp | 1 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Branch.cpp | 1 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 7 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 58 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 31 | ||||
-rw-r--r-- | src/ARM_InstrInfo.cpp | 4 | ||||
-rw-r--r-- | src/ARM_InstrInfo.h | 2 | ||||
-rw-r--r-- | src/CP15.cpp | 23 | ||||
-rw-r--r-- | src/DSi.cpp | 140 | ||||
-rw-r--r-- | src/GPU.cpp | 8 | ||||
-rw-r--r-- | src/GPU.h | 8 | ||||
-rw-r--r-- | src/JitBlock.h | 61 | ||||
-rw-r--r-- | src/NDS.cpp | 136 | ||||
-rw-r--r-- | src/NDS.h | 6 | ||||
-rw-r--r-- | src/NDSCart.h | 1 | ||||
-rw-r--r-- | src/TinyVector.h | 131 |
28 files changed, 899 insertions, 894 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 18d50fe..a361d77 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -26,11 +26,7 @@ #include "ARMJIT.h" #include "Platform.h" #include "GPU.h" - -#ifdef JIT_ENABLED -#include "ARMJIT.h" #include "ARMJIT_Memory.h" -#endif using Platform::Log; using Platform::LogLevel; @@ -88,7 +84,7 @@ void ARM::GdbCheckC() {} -u32 ARM::ConditionTable[16] = +const u32 ARM::ConditionTable[16] = { 0xF0F0, // EQ 0x0F0F, // NE @@ -108,16 +104,14 @@ u32 ARM::ConditionTable[16] = 0x0000 // NE }; - -ARM::ARM(u32 num, Melon::GPU& gpu) : +ARM::ARM(u32 num, ARMJIT::ARMJIT& jit, Melon::GPU& gpu) : #ifdef GDBSTUB_ENABLED GdbStub(this, Platform::GetConfigInt(num ? Platform::GdbPortARM7 : Platform::GdbPortARM9)), #endif + JIT(jit), + Num(num), // well uh GPU(gpu) { - // well uh - Num = num; - #ifdef GDBSTUB_ENABLED if (Platform::GetConfigBool(Platform::GdbEnabled) #ifdef JIT_ENABLED @@ -134,25 +128,21 @@ ARM::~ARM() // dorp } -ARMv5::ARMv5(Melon::GPU& gpu) : ARM(0, gpu) +ARMv5::ARMv5(ARMJIT::ARMJIT& jit, Melon::GPU& gpu) : ARM(0, jit, gpu) { -#ifndef JIT_ENABLED - DTCM = new u8[DTCMPhysicalSize]; -#endif + DTCM = JIT.Memory.GetARM9DTCM(); PU_Map = PU_PrivMap; } -ARMv4::ARMv4(Melon::GPU& gpu) : ARM(1, gpu) +ARMv4::ARMv4(ARMJIT::ARMJIT& jit, Melon::GPU& gpu) : ARM(1, jit, gpu) { // } ARMv5::~ARMv5() { -#ifndef JIT_ENABLED - delete[] DTCM; -#endif + // DTCM is owned by Memory, not going to delete it } void ARM::Reset() @@ -752,19 +742,19 @@ void ARMv5::ExecuteJIT() u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) - && !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) + && !JIT.SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { NDS::ARM9Timestamp = NDS::ARM9Target; Log(LogLevel::Error, "ARMv5 PC in non executable region %08X\n", R[15]); return; } - ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup, + ARMJIT::JitBlockEntry block = JIT.LookUpBlock(0, FastBlockLookup, instrAddr - FastBlockLookupStart, instrAddr); if (block) ARM_Dispatch(this, block); else - ARMJIT::CompileBlock(this); + JIT.CompileBlock(this); if (StopExecution) { @@ -909,19 +899,19 @@ void ARMv4::ExecuteJIT() u32 instrAddr = R[15] - ((CPSR&0x20)?2:4); if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize)) - && !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) + && !JIT.SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize)) { NDS::ARM7Timestamp = NDS::ARM7Target; Log(LogLevel::Error, "ARMv4 PC in non executable region %08X\n", R[15]); return; } - ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup, + ARMJIT::JitBlockEntry block = JIT.LookUpBlock(1, FastBlockLookup, instrAddr - FastBlockLookupStart, instrAddr); if (block) ARM_Dispatch(this, block); else - ARMJIT::CompileBlock(this); + JIT.CompileBlock(this); if (StopExecution) { @@ -42,18 +42,24 @@ enum const u32 ITCMPhysicalSize = 0x8000; const u32 DTCMPhysicalSize = 0x4000; +namespace ARMJIT +{ +class ARMJIT; +} namespace Melon { class GPU; } +class ARMJIT_Memory; + class ARM #ifdef GDBSTUB_ENABLED : public Gdb::StubCallbacks #endif { public: - ARM(u32 num, Melon::GPU& gpu); + ARM(u32 num, ARMJIT::ARMJIT& jit, Melon::GPU& gpu); virtual ~ARM(); // destroy shit virtual void Reset(); @@ -179,11 +185,12 @@ public: u64* FastBlockLookup; #endif - static u32 ConditionTable[16]; + static const u32 ConditionTable[16]; #ifdef GDBSTUB_ENABLED Gdb::GdbStub GdbStub; #endif + ARMJIT::ARMJIT& JIT; protected: u8 (*BusRead8)(u32 addr); u16 (*BusRead16)(u32 addr); @@ -221,7 +228,7 @@ private: class ARMv5 : public ARM { public: - ARMv5(Melon::GPU& gpu); + ARMv5(ARMJIT::ARMJIT& jit, Melon::GPU& gpu); ~ARMv5(); void Reset() override; @@ -365,7 +372,7 @@ public: class ARMv4 : public ARM { public: - ARMv4(Melon::GPU& gpu); + ARMv4(ARMJIT::ARMJIT& jit, Melon::GPU& gpu); void Reset() override; diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 19b4438..c551213 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -17,7 +17,7 @@ */ #include "ARMJIT.h" - +#include "ARMJIT_Memory.h" #include <string.h> #include <assert.h> #include <unordered_map> @@ -58,49 +58,6 @@ namespace ARMJIT #define JIT_DEBUGPRINT(msg, ...) //#define JIT_DEBUGPRINT(msg, ...) Platform::Log(Platform::LogLevel::Debug, msg, ## __VA_ARGS__) -Compiler* JITCompiler; - -int MaxBlockSize; -bool LiteralOptimizations; -bool BranchOptimizations; -bool FastMemory; - - -std::unordered_map<u32, JitBlock*> JitBlocks9; -std::unordered_map<u32, JitBlock*> JitBlocks7; - -std::unordered_map<u32, JitBlock*> RestoreCandidates; - -TinyVector<u32> InvalidLiterals; - -AddressRange CodeIndexITCM[ITCMPhysicalSize / 512]; -AddressRange CodeIndexMainRAM[NDS::MainRAMMaxSize / 512]; -AddressRange CodeIndexSWRAM[NDS::SharedWRAMSize / 512]; -AddressRange CodeIndexVRAM[0x100000 / 512]; -AddressRange CodeIndexARM9BIOS[sizeof(NDS::ARM9BIOS) / 512]; -AddressRange CodeIndexARM7BIOS[sizeof(NDS::ARM7BIOS) / 512]; -AddressRange CodeIndexARM7WRAM[NDS::ARM7WRAMSize / 512]; -AddressRange CodeIndexARM7WVRAM[0x40000 / 512]; -AddressRange CodeIndexBIOS9DSi[0x10000 / 512]; -AddressRange CodeIndexBIOS7DSi[0x10000 / 512]; -AddressRange CodeIndexNWRAM_A[DSi::NWRAMSize / 512]; -AddressRange CodeIndexNWRAM_B[DSi::NWRAMSize / 512]; -AddressRange CodeIndexNWRAM_C[DSi::NWRAMSize / 512]; - -u64 FastBlockLookupITCM[ITCMPhysicalSize / 2]; -u64 FastBlockLookupMainRAM[NDS::MainRAMMaxSize / 2]; -u64 FastBlockLookupSWRAM[NDS::SharedWRAMSize / 2]; -u64 FastBlockLookupVRAM[0x100000 / 2]; -u64 FastBlockLookupARM9BIOS[sizeof(NDS::ARM9BIOS) / 2]; -u64 FastBlockLookupARM7BIOS[sizeof(NDS::ARM7BIOS) / 2]; -u64 FastBlockLookupARM7WRAM[NDS::ARM7WRAMSize / 2]; -u64 FastBlockLookupARM7WVRAM[0x40000 / 2]; -u64 FastBlockLookupBIOS9DSi[0x10000 / 2]; -u64 FastBlockLookupBIOS7DSi[0x10000 / 2]; -u64 FastBlockLookupNWRAM_A[DSi::NWRAMSize / 2]; -u64 FastBlockLookupNWRAM_B[DSi::NWRAMSize / 2]; -u64 FastBlockLookupNWRAM_C[DSi::NWRAMSize / 2]; - const u32 CodeRegionSizes[ARMJIT_Memory::memregions_Count] = { 0, @@ -123,58 +80,14 @@ const u32 CodeRegionSizes[ARMJIT_Memory::memregions_Count] = DSi::NWRAMSize, }; -AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] = -{ - NULL, - CodeIndexITCM, - NULL, - CodeIndexARM9BIOS, - CodeIndexMainRAM, - CodeIndexSWRAM, - NULL, - CodeIndexVRAM, - CodeIndexARM7BIOS, - CodeIndexARM7WRAM, - NULL, - NULL, - CodeIndexARM7WVRAM, - CodeIndexBIOS9DSi, - CodeIndexBIOS7DSi, - CodeIndexNWRAM_A, - CodeIndexNWRAM_B, - CodeIndexNWRAM_C -}; - -u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] = -{ - NULL, - FastBlockLookupITCM, - NULL, - FastBlockLookupARM9BIOS, - FastBlockLookupMainRAM, - FastBlockLookupSWRAM, - NULL, - FastBlockLookupVRAM, - FastBlockLookupARM7BIOS, - FastBlockLookupARM7WRAM, - NULL, - NULL, - FastBlockLookupARM7WVRAM, - FastBlockLookupBIOS9DSi, - FastBlockLookupBIOS7DSi, - FastBlockLookupNWRAM_A, - FastBlockLookupNWRAM_B, - FastBlockLookupNWRAM_C -}; - -u32 LocaliseCodeAddress(u32 num, u32 addr) +u32 ARMJIT::LocaliseCodeAddress(u32 num, u32 addr) const noexcept { int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(addr) - : ARMJIT_Memory::ClassifyAddress7(addr); + ? Memory.ClassifyAddress9(addr) + : Memory.ClassifyAddress7(addr); if (CodeMemRegions[region]) - return ARMJIT_Memory::LocaliseAddress(region, num, addr); + return Memory.LocaliseAddress(region, num, addr); return 0; } @@ -203,13 +116,33 @@ T SlowRead9(u32 addr, ARMv5* cpu) } template <typename T, int ConsoleType> +T SlowRead7(u32 addr) +{ + u32 offset = addr & 0x3; + addr &= ~(sizeof(T) - 1); + + T val; + if (std::is_same<T, u32>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); + else if (std::is_same<T, u16>::value) + val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); + else + val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); + + if (std::is_same<T, u32>::value) + return ROR(val, offset << 3); + else + return val; +} + +template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val) { addr &= ~(sizeof(T) - 1); if (addr < cpu->ITCMSize) { - CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); + cpu->JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); *(T*)&cpu->ITCM[addr & 0x7FFF] = val; } else if ((addr & cpu->DTCMMask) == cpu->DTCMBase) @@ -231,26 +164,6 @@ void SlowWrite9(u32 addr, ARMv5* cpu, u32 val) } template <typename T, int ConsoleType> -T SlowRead7(u32 addr) -{ - u32 offset = addr & 0x3; - addr &= ~(sizeof(T) - 1); - - T val; - if (std::is_same<T, u32>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read32 : DSi::ARM7Read32)(addr); - else if (std::is_same<T, u16>::value) - val = (ConsoleType == 0 ? NDS::ARM7Read16 : DSi::ARM7Read16)(addr); - else - val = (ConsoleType == 0 ? NDS::ARM7Read8 : DSi::ARM7Read8)(addr); - - if (std::is_same<T, u32>::value) - return ROR(val, offset << 3); - else - return val; -} - -template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val) { addr &= ~(sizeof(T) - 1); @@ -316,24 +229,13 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num) INSTANTIATE_SLOWMEM(0) INSTANTIATE_SLOWMEM(1) -void Init() -{ - JITCompiler = new Compiler(); - - ARMJIT_Memory::Init(); -} - -void DeInit() +ARMJIT::~ARMJIT() noexcept { JitEnableWrite(); ResetBlockCache(); - ARMJIT_Memory::DeInit(); - - delete JITCompiler; - JITCompiler = nullptr; } -void Reset() +void ARMJIT::Reset() noexcept { MaxBlockSize = Platform::GetConfigInt(Platform::JIT_MaxBlockSize); LiteralOptimizations = Platform::GetConfigBool(Platform::JIT_LiteralOptimizations); @@ -348,7 +250,7 @@ void Reset() JitEnableWrite(); ResetBlockCache(); - ARMJIT_Memory::Reset(); + Memory.Reset(); } void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) @@ -575,7 +477,7 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] = }; #undef F -void RetireJitBlock(JitBlock* block) +void ARMJIT::RetireJitBlock(JitBlock* block) noexcept { auto it = RestoreCandidates.find(block->InstrHash); if (it != RestoreCandidates.end()) @@ -589,7 +491,7 @@ void RetireJitBlock(JitBlock* block) } } -void CompileBlock(ARM* cpu) +void ARMJIT::CompileBlock(ARM* cpu) noexcept { bool thumb = cpu->CPSR & 0x20; @@ -616,7 +518,7 @@ void CompileBlock(ARM* cpu) u64* entry = &FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]; *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(existingBlockIt->second->EntryPoint); + *entry |= JITCompiler.SubEntryOffset(existingBlockIt->second->EntryPoint); return; } @@ -717,7 +619,7 @@ void CompileBlock(ARM* cpu) nextInstr[1] = cpuv4->CodeRead32(r15); instrs[i].CodeCycles = cpu->CodeCycles; } - instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr); + instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr, LiteralOptimizations); hasMemoryInstr |= thumb ? (instrs[i].Info.Kind >= ARMInstrInfo::tk_LDR_PCREL && instrs[i].Info.Kind <= ARMInstrInfo::tk_STMIA) @@ -875,7 +777,7 @@ void CompileBlock(ARM* cpu) i++; - bool canCompile = JITCompiler->CanCompile(thumb, instrs[i - 1].Info.Kind); + bool canCompile = JITCompiler.CanCompile(thumb, instrs[i - 1].Info.Kind); bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken)); if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond) FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF); @@ -956,7 +858,7 @@ void CompileBlock(ARM* cpu) FloodFillSetFlags(instrs, i - 1, 0xF); JitEnableWrite(); - block->EntryPoint = JITCompiler->CompileBlock(cpu, thumb, instrs, i, hasMemoryInstr); + block->EntryPoint = JITCompiler.CompileBlock(cpu, thumb, instrs, i, hasMemoryInstr); JitEnableExecute(); JIT_DEBUGPRINT("block start %p\n", block->EntryPoint); @@ -977,7 +879,7 @@ void CompileBlock(ARM* cpu) AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); + Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512]; range->Code |= addressMasks[j]; @@ -991,10 +893,10 @@ void CompileBlock(ARM* cpu) u64* entry = &FastBlockLookupRegions[(localAddr >> 27)][(localAddr & 0x7FFFFFF) / 2]; *entry = ((u64)blockAddr | cpu->Num) << 32; - *entry |= JITCompiler->SubEntryOffset(block->EntryPoint); + *entry |= JITCompiler.SubEntryOffset(block->EntryPoint); } -void InvalidateByAddr(u32 localAddr) +void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept { JIT_DEBUGPRINT("invalidating by addr %x\n", localAddr); @@ -1031,7 +933,7 @@ void InvalidateByAddr(u32 localAddr) if (range->Blocks.Length == 0 && !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512])) { - ARMJIT_Memory::SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); + Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); } bool literalInvalidation = false; @@ -1064,7 +966,7 @@ void InvalidateByAddr(u32 localAddr) if (otherRange->Blocks.Length == 0) { if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) - ARMJIT_Memory::SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); + Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); otherRange->Code = 0; } @@ -1088,7 +990,7 @@ void InvalidateByAddr(u32 localAddr) } } -void CheckAndInvalidateITCM() +void ARMJIT::CheckAndInvalidateITCM() noexcept { for (u32 i = 0; i < ITCMPhysicalSize; i+=512) { @@ -1106,7 +1008,7 @@ void CheckAndInvalidateITCM() } } -void CheckAndInvalidateWVRAM(int bank) +void ARMJIT::CheckAndInvalidateWVRAM(int bank) noexcept { u32 start = bank == 1 ? 0x20000 : 0; for (u32 i = start; i < start+0x20000; i+=512) @@ -1122,38 +1024,30 @@ void CheckAndInvalidateWVRAM(int bank) } } -template <u32 num, int region> -void CheckAndInvalidate(u32 addr) -{ - u32 localAddr = ARMJIT_Memory::LocaliseAddress(region, num, addr); - if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) - InvalidateByAddr(localAddr); -} - -JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) +JitBlockEntry ARMJIT::LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) noexcept { u64* entry = &entries[offset / 2]; if (*entry >> 32 == (addr | num)) - return JITCompiler->AddEntryOffset((u32)*entry); + return JITCompiler.AddEntryOffset((u32)*entry); return NULL; } -void blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) +void ARMJIT::blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) noexcept { u32 localAddr = LocaliseCodeAddress(num, blockAddr); - assert(JITCompiler->AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); + assert(JITCompiler.AddEntryOffset((u32)FastBlockLookupRegions[localAddr >> 27][(localAddr & 0x7FFFFFF) / 2]) == entry); } -bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) +bool ARMJIT::SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) noexcept { // amazingly ignoring the DTCM is the proper behaviour for code fetches int region = num == 0 - ? ARMJIT_Memory::ClassifyAddress9(blockAddr) - : ARMJIT_Memory::ClassifyAddress7(blockAddr); + ? Memory.ClassifyAddress9(blockAddr) + : Memory.ClassifyAddress7(blockAddr); u32 memoryOffset; if (FastBlockLookupRegions[region] - && ARMJIT_Memory::GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) + && Memory.GetMirrorLocation(region, num, blockAddr, memoryOffset, start, size)) { //printf("setup exec region %d %d %08x %08x %x %x\n", num, region, blockAddr, start, size, memoryOffset); entry = FastBlockLookupRegions[region] + memoryOffset / 2; @@ -1162,28 +1056,28 @@ bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& return false; } -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(u32); -template void CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u32); -template void CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u32); - -void ResetBlockCache() +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(u32); +template void ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u32); +template void ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(u32); + +void ARMJIT::ResetBlockCache() noexcept { Log(LogLevel::Debug, "Resetting JIT block cache...\n"); // could be replace through a function which only resets // the permissions but we're too lazy - ARMJIT_Memory::Reset(); + Memory.Reset(); InvalidLiterals.Clear(); for (int i = 0; i < ARMJIT_Memory::memregions_Count; i++) @@ -1221,10 +1115,10 @@ void ResetBlockCache() JitBlocks9.clear(); JitBlocks7.clear(); - JITCompiler->Reset(); + JITCompiler.Reset(); } -void JitEnableWrite() +void ARMJIT::JitEnableWrite() noexcept { #if defined(__APPLE__) && defined(__aarch64__) if (__builtin_available(macOS 11.0, *)) @@ -1232,7 +1126,7 @@ void JitEnableWrite() #endif } -void JitEnableExecute() +void ARMJIT::JitEnableExecute() noexcept { #if defined(__APPLE__) && defined(__aarch64__) if (__builtin_available(macOS 11.0, *)) diff --git a/src/ARMJIT.h b/src/ARMJIT.h index cd97561..074e2a1 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -19,49 +19,147 @@ #ifndef ARMJIT_H #define ARMJIT_H +#include <memory> #include "types.h" -#include "ARM.h" -#include "ARM_InstrInfo.h" +#include "ARMJIT_Memory.h" +#include "JitBlock.h" #if defined(__APPLE__) && defined(__aarch64__) #include <pthread.h> #endif -namespace ARMJIT -{ - -typedef void (*JitBlockEntry)(); - -extern int MaxBlockSize; -extern bool LiteralOptimizations; -extern bool BranchOptimizations; -extern bool FastMemory; - -void Init(); -void DeInit(); - -void Reset(); - -void CheckAndInvalidateITCM(); -void CheckAndInvalidateWVRAM(int bank); +#include "ARMJIT_Compiler.h" -void InvalidateByAddr(u32 pseudoPhysical); +class ARM; -template <u32 num, int region> -void CheckAndInvalidate(u32 addr); - -void CompileBlock(ARM* cpu); - -void ResetBlockCache(); - -JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr); -bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size); +namespace ARMJIT +{ +class JitBlock; +class ARMJIT +{ +public: + ARMJIT() noexcept : JITCompiler(*this), Memory(*this) {} + ~ARMJIT() noexcept NOOP_IF_NO_JIT; + void InvalidateByAddr(u32) noexcept NOOP_IF_NO_JIT; + void CheckAndInvalidateWVRAM(int) noexcept NOOP_IF_NO_JIT; + void CheckAndInvalidateITCM() noexcept NOOP_IF_NO_JIT; + void Reset() noexcept NOOP_IF_NO_JIT; + void JitEnableWrite() noexcept NOOP_IF_NO_JIT; + void JitEnableExecute() noexcept NOOP_IF_NO_JIT; + void CompileBlock(ARM* cpu) noexcept NOOP_IF_NO_JIT; + void ResetBlockCache() noexcept NOOP_IF_NO_JIT; + +#ifdef JIT_ENABLED + template <u32 num, int region> + void CheckAndInvalidate(u32 addr) noexcept + { + u32 localAddr = Memory.LocaliseAddress(region, num, addr); + if (CodeMemRegions[region][(localAddr & 0x7FFFFFF) / 512].Code & (1 << ((localAddr & 0x1FF) / 16))) + InvalidateByAddr(localAddr); + } + JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr) noexcept; + bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size) noexcept; + u32 LocaliseCodeAddress(u32 num, u32 addr) const noexcept; +#else + template <u32, int> + void CheckAndInvalidate(u32) noexcept {} +#endif -void JitEnableWrite(); -void JitEnableExecute(); + ARMJIT_Memory Memory; + int MaxBlockSize {}; + bool LiteralOptimizations = false; + bool BranchOptimizations = false; + bool FastMemory = false; + + TinyVector<u32> InvalidLiterals {}; +private: + friend class ::ARMJIT_Memory; + void blockSanityCheck(u32 num, u32 blockAddr, JitBlockEntry entry) noexcept; + void RetireJitBlock(JitBlock* block) noexcept; + + Compiler JITCompiler; + std::unordered_map<u32, JitBlock*> JitBlocks9 {}; + std::unordered_map<u32, JitBlock*> JitBlocks7 {}; + + std::unordered_map<u32, JitBlock*> RestoreCandidates {}; + + + AddressRange CodeIndexITCM[ITCMPhysicalSize / 512] {}; + AddressRange CodeIndexMainRAM[NDS::MainRAMMaxSize / 512] {}; + AddressRange CodeIndexSWRAM[NDS::SharedWRAMSize / 512] {}; + AddressRange CodeIndexVRAM[0x100000 / 512] {}; + AddressRange CodeIndexARM9BIOS[sizeof(NDS::ARM9BIOS) / 512] {}; + AddressRange CodeIndexARM7BIOS[sizeof(NDS::ARM7BIOS) / 512] {}; + AddressRange CodeIndexARM7WRAM[NDS::ARM7WRAMSize / 512] {}; + AddressRange CodeIndexARM7WVRAM[0x40000 / 512] {}; + AddressRange CodeIndexBIOS9DSi[0x10000 / 512] {}; + AddressRange CodeIndexBIOS7DSi[0x10000 / 512] {}; + AddressRange CodeIndexNWRAM_A[DSi::NWRAMSize / 512] {}; + AddressRange CodeIndexNWRAM_B[DSi::NWRAMSize / 512] {}; + AddressRange CodeIndexNWRAM_C[DSi::NWRAMSize / 512] {}; + + u64 FastBlockLookupITCM[ITCMPhysicalSize / 2] {}; + u64 FastBlockLookupMainRAM[NDS::MainRAMMaxSize / 2] {}; + u64 FastBlockLookupSWRAM[NDS::SharedWRAMSize / 2] {}; + u64 FastBlockLookupVRAM[0x100000 / 2] {}; + u64 FastBlockLookupARM9BIOS[sizeof(NDS::ARM9BIOS) / 2] {}; + u64 FastBlockLookupARM7BIOS[sizeof(NDS::ARM7BIOS) / 2] {}; + u64 FastBlockLookupARM7WRAM[NDS::ARM7WRAMSize / 2] {}; + u64 FastBlockLookupARM7WVRAM[0x40000 / 2] {}; + u64 FastBlockLookupBIOS9DSi[0x10000 / 2] {}; + u64 FastBlockLookupBIOS7DSi[0x10000 / 2] {}; + u64 FastBlockLookupNWRAM_A[DSi::NWRAMSize / 2] {}; + u64 FastBlockLookupNWRAM_B[DSi::NWRAMSize / 2] {}; + u64 FastBlockLookupNWRAM_C[DSi::NWRAMSize / 2] {}; + + AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count] = + { + NULL, + CodeIndexITCM, + NULL, + CodeIndexARM9BIOS, + CodeIndexMainRAM, + CodeIndexSWRAM, + NULL, + CodeIndexVRAM, + CodeIndexARM7BIOS, + CodeIndexARM7WRAM, + NULL, + NULL, + CodeIndexARM7WVRAM, + CodeIndexBIOS9DSi, + CodeIndexBIOS7DSi, + CodeIndexNWRAM_A, + CodeIndexNWRAM_B, + CodeIndexNWRAM_C + }; + + u64* const FastBlockLookupRegions[ARMJIT_Memory::memregions_Count] = + { + NULL, + FastBlockLookupITCM, + NULL, + FastBlockLookupARM9BIOS, + FastBlockLookupMainRAM, + FastBlockLookupSWRAM, + NULL, + FastBlockLookupVRAM, + FastBlockLookupARM7BIOS, + FastBlockLookupARM7WRAM, + NULL, + NULL, + FastBlockLookupARM7WVRAM, + FastBlockLookupBIOS9DSi, + FastBlockLookupBIOS7DSi, + FastBlockLookupNWRAM_A, + FastBlockLookupNWRAM_B, + FastBlockLookupNWRAM_C + }; +}; } +// Defined in assembly extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry); #endif diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 55bca84..90940b0 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -20,6 +20,7 @@ #include "../ARMJIT_Internal.h" #include "../ARMInterpreter.h" +#include "../ARMJIT.h" #if defined(__SWITCH__) #include <switch.h> @@ -219,7 +220,7 @@ void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged) } } -Compiler::Compiler() +Compiler::Compiler(ARMJIT& jit) : Arm64Gen::ARM64XEmitter(), JIT(jit) { #ifdef __SWITCH__ JitRWBase = aligned_alloc(0x1000, JitMemSize); @@ -704,12 +705,12 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] if (JitMemMainSize - GetCodeOffset() < 1024 * 16) { Log(LogLevel::Debug, "JIT near memory full, resetting...\n"); - ResetBlockCache(); + JIT.ResetBlockCache(); } if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8) { Log(LogLevel::Debug, "JIT far memory full, resetting...\n"); - ResetBlockCache(); + JIT.ResetBlockCache(); } JitBlockEntry res = (JitBlockEntry)GetRXPtr(); @@ -722,7 +723,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] CPSRDirty = false; if (hasMemInstr) - MOVP2R(RMemBase, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + MOVP2R(RMemBase, Num == 0 ? JIT.Memory.FastMem9Start : JIT.Memory.FastMem7Start); for (int i = 0; i < instrsCount; i++) { diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index 5045cb5..1f79f3d 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -20,7 +20,6 @@ #define ARMJIT_A64_COMPILER_H #include "../ARM.h" -#include "../ARMJIT.h" #include "../dolphin/Arm64Emitter.h" @@ -31,7 +30,7 @@ namespace ARMJIT { - +class ARMJIT; const Arm64Gen::ARM64Reg RMemBase = Arm64Gen::X26; const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27; const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28; @@ -97,7 +96,7 @@ class Compiler : public Arm64Gen::ARM64XEmitter public: typedef void (Compiler::*CompileFunc)(); - Compiler(); + Compiler(ARMJIT& jit); ~Compiler(); void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true); @@ -243,6 +242,7 @@ public: OtherCodeRegion = offset; } + ARMJIT& JIT; ptrdiff_t OtherCodeRegion; bool Exit; diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index ee8aabe..a779a72 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -62,9 +62,9 @@ u8* Compiler::RewriteMemAccess(u8* pc) bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) { - u32 localAddr = LocaliseCodeAddress(Num, addr); + u32 localAddr = JIT.LocaliseCodeAddress(Num, addr); - int invalidLiteralIdx = InvalidLiterals.Find(localAddr); + int invalidLiteralIdx = JIT.InvalidLiterals.Find(localAddr); if (invalidLiteralIdx != -1) { return false; @@ -111,7 +111,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (size == 16) addressMask = ~1; - if (ARMJIT::LiteralOptimizations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + if (JIT.LiteralOptimizations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) { u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); @@ -146,7 +146,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) MOV(W0, rnMapped); } - bool addrIsStatic = ARMJIT::LiteralOptimizations + bool addrIsStatic = JIT.LiteralOptimizations && RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post)); u32 staticAddress; if (addrIsStatic) @@ -185,10 +185,10 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) MOV(rnMapped, W0); u32 expectedTarget = Num == 0 - ? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion) - : ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion); + ? JIT.Memory.ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion) + : JIT.Memory.ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion); - if (ARMJIT::FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget))) + if (JIT.FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || JIT.Memory.IsFastmemCompatible(expectedTarget))) { ptrdiff_t memopStart = GetCodeOffset(); LoadStorePatch patch; @@ -225,7 +225,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) { void* func = NULL; if (addrIsStatic) - func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); + func = JIT.Memory.GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); PushRegs(false, false); @@ -452,7 +452,7 @@ void Compiler::T_Comp_LoadPCRel() u32 offset = ((CurInstr.Instr & 0xFF) << 2); u32 addr = (R15 & ~0x2) + offset; - if (!ARMJIT::LiteralOptimizations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) + if (!JIT.LiteralOptimizations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0); } @@ -494,11 +494,11 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc Comp_AddCycles_CDI(); int expectedTarget = Num == 0 - ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) - : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + ? JIT.Memory.ClassifyAddress9(CurInstr.DataRegion) + : JIT.Memory.ClassifyAddress7(CurInstr.DataRegion); - bool compileFastPath = ARMJIT::FastMemory - && store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)); + bool compileFastPath = JIT.FastMemory + && store && !usermode && (CurInstr.Cond() < 0xE || JIT.Memory.IsFastmemCompatible(expectedTarget)); { s32 offset = decrement diff --git a/src/ARMJIT_Compiler.h b/src/ARMJIT_Compiler.h index c5348f4..4ece834 100644 --- a/src/ARMJIT_Compiler.h +++ b/src/ARMJIT_Compiler.h @@ -19,6 +19,12 @@ #ifndef ARMJIT_COMPILER_H #define ARMJIT_COMPILER_H +#ifdef JIT_ENABLED +#define NOOP_IF_NO_JIT +#else +#define NOOP_IF_NO_JIT {} +#endif + #if defined(__x86_64__) #include "ARMJIT_x64/ARMJIT_Compiler.h" #elif defined(__aarch64__) @@ -27,9 +33,4 @@ #error "The current target platform doesn't have a JIT backend" #endif -namespace ARMJIT -{ -extern Compiler* JITCompiler; -} - #endif diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index fb80307..7a8eb6b 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -24,8 +24,12 @@ #include <string.h> #include <assert.h> -#include "ARMJIT.h" -#include "ARMJIT_Memory.h" +#include "ARM_InstrInfo.h" +#include "JitBlock.h" +#include "TinyVector.h" + +class ARM; +class ARMv5; // here lands everything which doesn't fit into ARMJIT.h // where it would be included by pretty much everything @@ -69,139 +73,6 @@ struct FetchedInstr ARMInstrInfo::Info Info; }; -/* - TinyVector - - because reinventing the wheel is the best! - - - meant to be used very often, with not so many elements - max 1 << 16 elements - - doesn't allocate while no elements are inserted - - not stl confirmant of course - - probably only works with POD types - - remove operations don't preserve order, but O(1)! -*/ -template <typename T> -struct __attribute__((packed)) TinyVector -{ - T* Data = NULL; - u16 Capacity = 0; - u16 Length = 0; - - ~TinyVector() - { - delete[] Data; - } - - void MakeCapacity(u32 capacity) - { - assert(capacity <= UINT16_MAX); - assert(capacity > Capacity); - T* newMem = new T[capacity]; - if (Data != NULL) - memcpy(newMem, Data, sizeof(T) * Length); - - T* oldData = Data; - Data = newMem; - if (oldData != NULL) - delete[] oldData; - - Capacity = capacity; - } - - void SetLength(u16 length) - { - if (Capacity < length) - MakeCapacity(length); - - Length = length; - } - - void Clear() - { - Length = 0; - } - - void Add(T element) - { - assert(Length + 1 <= UINT16_MAX); - if (Length + 1 > Capacity) - MakeCapacity(((Capacity + 4) * 3) / 2); - - Data[Length++] = element; - } - - void Remove(int index) - { - assert(Length > 0); - assert(index >= 0 && index < Length); - - Length--; - Data[index] = Data[Length]; - /*for (int i = index; i < Length; i++) - Data[i] = Data[i + 1];*/ - } - - int Find(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - return i; - } - return -1; - } - - bool RemoveByValue(T needle) - { - for (int i = 0; i < Length; i++) - { - if (Data[i] == needle) - { - Remove(i); - return true; - } - } - return false; - } - - T& operator[](int index) - { - assert(index >= 0 && index < Length); - return Data[index]; - } -}; - -class JitBlock -{ -public: - JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) - { - Num = num; - NumAddresses = numAddresses; - NumLiterals = numLiterals; - Data.SetLength(numAddresses * 2 + numLiterals); - } - - u32 StartAddr; - u32 StartAddrLocal; - u32 InstrHash, LiteralHash; - u8 Num; - u16 NumAddresses; - u16 NumLiterals; - - JitBlockEntry EntryPoint; - - u32* AddressRanges() - { return &Data[0]; } - u32* AddressMasks() - { return &Data[NumAddresses]; } - u32* Literals() - { return &Data[NumAddresses * 2]; } - -private: - TinyVector<u32> Data; -}; - // size should be 16 bytes because I'm to lazy to use mul and whatnot struct __attribute__((packed)) AddressRange { @@ -214,10 +85,6 @@ typedef void (*InterpreterFunc)(ARM* cpu); extern InterpreterFunc InterpretARM[]; extern InterpreterFunc InterpretTHUMB[]; -extern TinyVector<u32> InvalidLiterals; - -extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count]; - inline bool PageContainsCode(AddressRange* range) { for (int i = 0; i < 8; i++) @@ -228,11 +95,6 @@ inline bool PageContainsCode(AddressRange* range) return false; } -u32 LocaliseCodeAddress(u32 num, u32 addr); - -template <u32 Num> -void LinkBlock(ARM* cpu, u32 codeOffset); - template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu); template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val); template <typename T, int ConsoleType> T SlowRead7(u32 addr); diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 3591a25..361a1ed 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -34,6 +34,7 @@ #include <sys/ioctl.h> #endif +#include "ARMJIT.h" #include "ARMJIT_Memory.h" #include "ARMJIT_Internal.h" @@ -72,17 +73,6 @@ using Platform::LogLevel; */ -namespace ARMJIT_Memory -{ -struct FaultDescription -{ - u32 EmulatedFaultAddr; - u8* FaultPC; -}; - -bool FaultHandler(FaultDescription& faultDesc); -} - // Yes I know this looks messy, but better here than somewhere else in the code #if defined(__x86_64__) #if defined(_WIN32) @@ -112,7 +102,6 @@ bool FaultHandler(FaultDescription& faultDesc); #if defined(__ANDROID__) #define ASHMEM_DEVICE "/dev/ashmem" -Platform::DynamicLibrary* Libandroid = nullptr; #endif #if defined(__SWITCH__) @@ -146,7 +135,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) integerRegisters[31] = ctx->sp.x; integerRegisters[32] = ctx->pc.x; - if (ARMJIT_Memory::FaultHandler(desc)) + if (Melon::FaultHandler(desc)) { integerRegisters[32] = (u64)desc.FaultPC; @@ -160,19 +149,19 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx) #elif defined(_WIN32) -static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) +LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) { if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) { return EXCEPTION_CONTINUE_SEARCH; } - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + u8* curArea = (u8*)(NDS::CurCPU == 0 ? NDS::JIT->Memory.FastMem9Start : NDS::JIT->Memory.FastMem7Start); + FaultDescription desc {}; desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; desc.FaultPC = (u8*)exceptionInfo->ContextRecord->CONTEXT_PC; - if (ARMJIT_Memory::FaultHandler(desc)) + if (FaultHandler(desc, *NDS::JIT)) { exceptionInfo->ContextRecord->CONTEXT_PC = (u64)desc.FaultPC; return EXCEPTION_CONTINUE_EXECUTION; @@ -186,7 +175,7 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) static struct sigaction OldSaSegv; static struct sigaction OldSaBus; -static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) +void ARMJIT_Memory::SigsegvHandler(int sig, siginfo_t* info, void* rawContext) { if (sig != SIGSEGV && sig != SIGBUS) { @@ -201,13 +190,13 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) ucontext_t* context = (ucontext_t*)rawContext; - ARMJIT_Memory::FaultDescription desc; - u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + FaultDescription desc {}; + u8* curArea = (u8*)(NDS::CurCPU == 0 ? NDS::JIT->Memory.FastMem9Start : NDS::JIT->Memory.FastMem7Start); desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; desc.FaultPC = (u8*)context->CONTEXT_PC; - if (ARMJIT_Memory::FaultHandler(desc)) + if (FaultHandler(desc, *NDS::JIT)) { context->CONTEXT_PC = (u64)desc.FaultPC; return; @@ -239,33 +228,7 @@ static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) #endif -namespace ARMJIT_Memory -{ - -void* FastMem9Start, *FastMem7Start; - -#ifdef _WIN32 -inline u32 RoundUp(u32 size) -{ - return (size + 0xFFFF) & ~0xFFFF; -} -#else -inline u32 RoundUp(u32 size) -{ - return size; -} -#endif - -const u32 MemBlockMainRAMOffset = 0; -const u32 MemBlockSWRAMOffset = RoundUp(NDS::MainRAMMaxSize); -const u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(NDS::SharedWRAMSize); -const u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(NDS::ARM7WRAMSize); -const u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize); -const u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(DSi::NWRAMSize); -const u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(DSi::NWRAMSize); -const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(DSi::NWRAMSize); - -const u32 OffsetsPerRegion[memregions_Count] = +const u32 OffsetsPerRegion[ARMJIT_Memory::memregions_Count] = { UINT32_MAX, UINT32_MAX, @@ -295,23 +258,9 @@ enum memstate_MappedProtected, }; -u8 MappingStatus9[1 << (32-12)]; -u8 MappingStatus7[1 << (32-12)]; -#if defined(__SWITCH__) -VirtmemReservation* FastMem9Reservation, *FastMem7Reservation; -u8* MemoryBase; -u8* MemoryBaseCodeMem; -#elif defined(_WIN32) -u8* MemoryBase; -HANDLE MemoryFile; -LPVOID ExceptionHandlerHandle; -#else -u8* MemoryBase; -int MemoryFile = -1; -#endif -bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) +bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept { u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ @@ -326,7 +275,7 @@ bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) #endif } -bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) +bool ARMJIT_Memory::UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept { u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #ifdef __SWITCH__ @@ -341,7 +290,7 @@ bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) } #ifndef __SWITCH__ -void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) +void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept { u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; #if defined(_WIN32) @@ -367,82 +316,74 @@ void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) } #endif -struct Mapping +void ARMJIT_Memory::Mapping::Unmap(int region, ARMJIT_Memory& memory) noexcept { - u32 Addr; - u32 Size, LocalOffset; - u32 Num; - - void Unmap(int region) + u32 dtcmStart = NDS::ARM9->DTCMBase; + u32 dtcmSize = ~NDS::ARM9->DTCMMask + 1; + bool skipDTCM = Num == 0 && region != memregion_DTCM; + u8* statuses = Num == 0 ? memory.MappingStatus9 : memory.MappingStatus7; + u32 offset = 0; + while (offset < Size) { - u32 dtcmStart = NDS::ARM9->DTCMBase; - u32 dtcmSize = ~NDS::ARM9->DTCMMask + 1; - bool skipDTCM = Num == 0 && region != memregion_DTCM; - u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7; - u32 offset = 0; - while (offset < Size) + if (skipDTCM && Addr + offset == dtcmStart) + { + offset += dtcmSize; + } + else { - if (skipDTCM && Addr + offset == dtcmStart) + u32 segmentOffset = offset; + u8 status = statuses[(Addr + offset) >> 12]; + while (statuses[(Addr + offset) >> 12] == status + && offset < Size + && (!skipDTCM || Addr + offset != dtcmStart)) { - offset += dtcmSize; + assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); + statuses[(Addr + offset) >> 12] = memstate_Unmapped; + offset += 0x1000; } - else - { - u32 segmentOffset = offset; - u8 status = statuses[(Addr + offset) >> 12]; - while (statuses[(Addr + offset) >> 12] == status - && offset < Size - && (!skipDTCM || Addr + offset != dtcmStart)) - { - assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); - statuses[(Addr + offset) >> 12] = memstate_Unmapped; - offset += 0x1000; - } #ifdef __SWITCH__ - if (status == memstate_MappedRW) - { - u32 segmentSize = offset - segmentOffset; - Log(LogLevel::Debug, "unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); - assert(success); - } -#endif + if (status == memstate_MappedRW) + { + u32 segmentSize = offset - segmentOffset; + Log(LogLevel::Debug, "unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + bool success = memory.UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize); + assert(success); } +#endif } + } #ifndef __SWITCH__ #ifndef _WIN32 - u32 dtcmEnd = dtcmStart + dtcmSize; - if (Num == 0 - && dtcmEnd >= Addr - && dtcmStart < Addr + Size) + u32 dtcmEnd = dtcmStart + dtcmSize; + if (Num == 0 + && dtcmEnd >= Addr + && dtcmStart < Addr + Size) + { + bool success; + if (dtcmStart > Addr) { - bool success; - if (dtcmStart > Addr) - { - success = UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr); - assert(success); - } - if (dtcmEnd < Addr + Size) - { - u32 offset = dtcmStart - Addr + dtcmSize; - success = UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset); - assert(success); - } + success = memory.UnmapFromRange(Addr, 0, OffsetsPerRegion[region] + LocalOffset, dtcmStart - Addr); + assert(success); } - else -#endif + if (dtcmEnd < Addr + Size) { - bool succeded = UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); - assert(succeded); + u32 offset = dtcmStart - Addr + dtcmSize; + success = memory.UnmapFromRange(dtcmEnd, 0, OffsetsPerRegion[region] + LocalOffset + offset, Size - offset); + assert(success); } + } + else #endif + { + bool succeded = memory.UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); + assert(succeded); } -}; -ARMJIT::TinyVector<Mapping> Mappings[memregions_Count]; +#endif +} -void SetCodeProtection(int region, u32 offset, bool protect) +void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noexcept { offset &= ~0xFFF; //printf("set code protection %d %x %d\n", region, offset, protect); @@ -479,7 +420,7 @@ void SetCodeProtection(int region, u32 offset, bool protect) } } -void RemapDTCM(u32 newBase, u32 newSize) +void ARMJIT_Memory::RemapDTCM(u32 newBase, u32 newSize) noexcept { // this first part could be made more efficient // by unmapping DTCM first and then map the holes @@ -510,7 +451,7 @@ void RemapDTCM(u32 newBase, u32 newSize) if (mapping.Num == 0 && overlap) { - mapping.Unmap(region); + mapping.Unmap(region, *this); Mappings[region].Remove(i); } else @@ -522,12 +463,12 @@ void RemapDTCM(u32 newBase, u32 newSize) for (int i = 0; i < Mappings[memregion_DTCM].Length; i++) { - Mappings[memregion_DTCM][i].Unmap(memregion_DTCM); + Mappings[memregion_DTCM][i].Unmap(memregion_DTCM, *this); } Mappings[memregion_DTCM].Clear(); } -void RemapNWRAM(int num) +void ARMJIT_Memory::RemapNWRAM(int num) noexcept { for (int i = 0; i < Mappings[memregion_SharedWRAM].Length;) { @@ -535,7 +476,7 @@ void RemapNWRAM(int num) if (DSi::NWRAMStart[mapping.Num][num] < mapping.Addr + mapping.Size && DSi::NWRAMEnd[mapping.Num][num] > mapping.Addr) { - mapping.Unmap(memregion_SharedWRAM); + mapping.Unmap(memregion_SharedWRAM, *this); Mappings[memregion_SharedWRAM].Remove(i); } else @@ -545,12 +486,12 @@ void RemapNWRAM(int num) } for (int i = 0; i < Mappings[memregion_NewSharedWRAM_A + num].Length; i++) { - Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num); + Mappings[memregion_NewSharedWRAM_A + num][i].Unmap(memregion_NewSharedWRAM_A + num, *this); } Mappings[memregion_NewSharedWRAM_A + num].Clear(); } -void RemapSWRAM() +void ARMJIT_Memory::RemapSWRAM() noexcept { Log(LogLevel::Debug, "remapping SWRAM\n"); for (int i = 0; i < Mappings[memregion_WRAM7].Length;) @@ -558,7 +499,7 @@ void RemapSWRAM() Mapping& mapping = Mappings[memregion_WRAM7][i]; if (mapping.Addr + mapping.Size <= 0x03800000) { - mapping.Unmap(memregion_WRAM7); + mapping.Unmap(memregion_WRAM7, *this); Mappings[memregion_WRAM7].Remove(i); } else @@ -566,12 +507,12 @@ void RemapSWRAM() } for (int i = 0; i < Mappings[memregion_SharedWRAM].Length; i++) { - Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM); + Mappings[memregion_SharedWRAM][i].Unmap(memregion_SharedWRAM, *this); } Mappings[memregion_SharedWRAM].Clear(); } -bool MapAtAddress(u32 addr) +bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept { u32 num = NDS::CurCPU; @@ -589,7 +530,7 @@ bool MapAtAddress(u32 addr) u8* states = num == 0 ? MappingStatus9 : MappingStatus7; //printf("mapping mirror %x, %x %x %d %d\n", mirrorStart, mirrorSize, memoryOffset, region, num); - bool isExecutable = ARMJIT::CodeMemRegions[region]; + bool isExecutable = JIT.CodeMemRegions[region]; u32 dtcmStart = NDS::ARM9->DTCMBase; u32 dtcmSize = ~NDS::ARM9->DTCMMask + 1; @@ -621,7 +562,7 @@ bool MapAtAddress(u32 addr) } #endif - ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset / 512; + ARMJIT::AddressRange* range = JIT.CodeMemRegions[region] + memoryOffset / 512; // this overcomplicated piece of code basically just finds whole pieces of code memory // which can be mapped/protected @@ -676,19 +617,19 @@ bool MapAtAddress(u32 addr) return true; } -bool FaultHandler(FaultDescription& faultDesc) +bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, ARMJIT::ARMJIT& jit) { - if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC)) + if (jit.JITCompiler.IsJITFault(faultDesc.FaultPC)) { bool rewriteToSlowPath = true; - u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7; + u8* memStatus = NDS::CurCPU == 0 ? jit.Memory.MappingStatus9 : jit.Memory.MappingStatus7; if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped) - rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr); + rewriteToSlowPath = !jit.Memory.MapAtAddress(faultDesc.EmulatedFaultAddr); if (rewriteToSlowPath) - faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC); + faultDesc.FaultPC = jit.JITCompiler.RewriteMemAccess(faultDesc.FaultPC); return true; } @@ -697,7 +638,7 @@ bool FaultHandler(FaultDescription& faultDesc) const u64 AddrSpaceSize = 0x100000000; -void Init() +ARMJIT_Memory::ARMJIT_Memory(ARMJIT::ARMJIT& jit) noexcept : JIT(jit) { #if defined(__SWITCH__) MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize); @@ -740,8 +681,6 @@ void Init() MemoryBase = MemoryBase + AddrSpaceSize*3; MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase); - - u8* basePtr = MemoryBase; #else // this used to be allocated with three different mmaps // The idea was to give the OS more freedom where to position the buffers, @@ -798,16 +737,9 @@ void Init() u8* basePtr = MemoryBase; #endif - NDS::MainRAM = basePtr + MemBlockMainRAMOffset; - NDS::SharedWRAM = basePtr + MemBlockSWRAMOffset; - NDS::ARM7WRAM = basePtr + MemBlockARM7WRAMOffset; - NDS::ARM9->DTCM = basePtr + MemBlockDTCMOffset; - DSi::NWRAM_A = basePtr + MemBlockNWRAM_AOffset; - DSi::NWRAM_B = basePtr + MemBlockNWRAM_BOffset; - DSi::NWRAM_C = basePtr + MemBlockNWRAM_COffset; } -void DeInit() +ARMJIT_Memory::~ARMJIT_Memory() noexcept { #if defined(__SWITCH__) virtmemLock(); @@ -875,12 +807,12 @@ void DeInit() #endif } -void Reset() +void ARMJIT_Memory::Reset() noexcept { for (int region = 0; region < memregions_Count; region++) { for (int i = 0; i < Mappings[region].Length; i++) - Mappings[region][i].Unmap(region); + Mappings[region][i].Unmap(region, *this); Mappings[region].Clear(); } @@ -893,7 +825,7 @@ void Reset() Log(LogLevel::Debug, "done resetting jit mem\n"); } -bool IsFastmemCompatible(int region) +bool ARMJIT_Memory::IsFastmemCompatible(int region) const noexcept { #ifdef _WIN32 /* @@ -909,7 +841,7 @@ bool IsFastmemCompatible(int region) return OffsetsPerRegion[region] != UINT32_MAX; } -bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) +bool ARMJIT_Memory::GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) const noexcept { memoryOffset = 0; switch (region) @@ -955,14 +887,14 @@ bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mi { mirrorStart = addr & ~NDS::SWRAM_ARM9.Mask; mirrorSize = NDS::SWRAM_ARM9.Mask + 1; - memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM; + memoryOffset = NDS::SWRAM_ARM9.Mem - GetSharedWRAM(); return true; } else if (num == 1 && NDS::SWRAM_ARM7.Mem) { mirrorStart = addr & ~NDS::SWRAM_ARM7.Mask; mirrorSize = NDS::SWRAM_ARM7.Mask + 1; - memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM; + memoryOffset = NDS::SWRAM_ARM7.Mem - GetSharedWRAM(); return true; } return false; @@ -995,7 +927,7 @@ bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mi u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; if (ptr) { - memoryOffset = ptr - DSi::NWRAM_A; + memoryOffset = ptr - GetNWRAM_A(); mirrorStart = addr & ~0xFFFF; mirrorSize = 0x10000; return true; @@ -1007,7 +939,7 @@ bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mi u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; if (ptr) { - memoryOffset = ptr - DSi::NWRAM_B; + memoryOffset = ptr - GetNWRAM_B(); mirrorStart = addr & ~0x7FFF; mirrorSize = 0x8000; return true; @@ -1019,7 +951,7 @@ bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mi u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; if (ptr) { - memoryOffset = ptr - DSi::NWRAM_C; + memoryOffset = ptr - GetNWRAM_C(); mirrorStart = addr & ~0x7FFF; mirrorSize = 0x8000; return true; @@ -1048,7 +980,7 @@ bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mi } } -u32 LocaliseAddress(int region, u32 num, u32 addr) +u32 ARMJIT_Memory::LocaliseAddress(int region, u32 num, u32 addr) const noexcept { switch (region) { @@ -1062,9 +994,9 @@ u32 LocaliseAddress(int region, u32 num, u32 addr) return (addr & 0x3FFF) | (memregion_BIOS7 << 27); case memregion_SharedWRAM: if (num == 0) - return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + return ((addr & NDS::SWRAM_ARM9.Mask) + (NDS::SWRAM_ARM9.Mem - GetSharedWRAM())) | (memregion_SharedWRAM << 27); else - return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM)) | (memregion_SharedWRAM << 27); + return ((addr & NDS::SWRAM_ARM7.Mask) + (NDS::SWRAM_ARM7.Mem - GetSharedWRAM())) | (memregion_SharedWRAM << 27); case memregion_WRAM7: return (addr & (NDS::ARM7WRAMSize - 1)) | (memregion_WRAM7 << 27); case memregion_VRAM: @@ -1077,7 +1009,7 @@ u32 LocaliseAddress(int region, u32 num, u32 addr) { u8* ptr = DSi::NWRAMMap_A[num][(addr >> 16) & DSi::NWRAMMask[num][0]]; if (ptr) - return (ptr - DSi::NWRAM_A + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); + return (ptr - GetNWRAM_A() + (addr & 0xFFFF)) | (memregion_NewSharedWRAM_A << 27); else return memregion_Other << 27; // zero filled memory } @@ -1085,7 +1017,7 @@ u32 LocaliseAddress(int region, u32 num, u32 addr) { u8* ptr = DSi::NWRAMMap_B[num][(addr >> 15) & DSi::NWRAMMask[num][1]]; if (ptr) - return (ptr - DSi::NWRAM_B + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); + return (ptr - GetNWRAM_B() + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_B << 27); else return memregion_Other << 27; } @@ -1093,7 +1025,7 @@ u32 LocaliseAddress(int region, u32 num, u32 addr) { u8* ptr = DSi::NWRAMMap_C[num][(addr >> 15) & DSi::NWRAMMask[num][2]]; if (ptr) - return (ptr - DSi::NWRAM_C + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); + return (ptr - GetNWRAM_C() + (addr & 0x7FFF)) | (memregion_NewSharedWRAM_C << 27); else return memregion_Other << 27; } @@ -1106,7 +1038,7 @@ u32 LocaliseAddress(int region, u32 num, u32 addr) } } -int ClassifyAddress9(u32 addr) +int ARMJIT_Memory::ClassifyAddress9(u32 addr) const noexcept { if (addr < NDS::ARM9->ITCMSize) { @@ -1160,7 +1092,7 @@ int ClassifyAddress9(u32 addr) } } -int ClassifyAddress7(u32 addr) +int ARMJIT_Memory::ClassifyAddress7(u32 addr) const noexcept { if (NDS::ConsoleType == 1 && addr < 0x00010000 && !(DSi::SCFG_BIOS & (1<<9))) { @@ -1295,7 +1227,7 @@ u32 NDSCartSlot_ReadROMData() return NDS::NDSCartSlot->ReadROMData(); } -void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) +void* ARMJIT_Memory::GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept { if (cpu->Num == 0) { @@ -1433,5 +1365,3 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) } return NULL; } - -} diff --git a/src/ARMJIT_Memory.h b/src/ARMJIT_Memory.h index 6a7e13d..1cf4f81 100644 --- a/src/ARMJIT_Memory.h +++ b/src/ARMJIT_Memory.h @@ -20,62 +20,209 @@ #define ARMJIT_MEMORY #include "types.h" +#include "TinyVector.h" #include "ARM.h" +#include "DSi.h" + +#if defined(__SWITCH__) +#include <switch.h> +#elif defined(_WIN32) +#include <windows.h> +#else +#include <sys/mman.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <signal.h> +#endif -namespace ARMJIT_Memory -{ +#ifndef JIT_ENABLED +#include <array> +#include "NDS.h" +#endif -extern void* FastMem9Start; -extern void* FastMem7Start; +namespace ARMJIT +{ +class Compiler; +class ARMJIT; +} -void Init(); -void DeInit(); +constexpr u32 RoundUp(u32 size) noexcept +{ +#ifdef _WIN32 + return (size + 0xFFFF) & ~0xFFFF; +#else + return size; +#endif +} -void Reset(); +const u32 MemBlockMainRAMOffset = 0; +const u32 MemBlockSWRAMOffset = RoundUp(NDS::MainRAMMaxSize); +const u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(NDS::SharedWRAMSize); +const u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(NDS::ARM7WRAMSize); +const u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize); +const u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(DSi::NWRAMSize); +const u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(DSi::NWRAMSize); +const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(DSi::NWRAMSize); -enum +class ARMJIT_Memory { - memregion_Other = 0, - memregion_ITCM, - memregion_DTCM, - memregion_BIOS9, - memregion_MainRAM, - memregion_SharedWRAM, - memregion_IO9, - memregion_VRAM, - memregion_BIOS7, - memregion_WRAM7, - memregion_IO7, - memregion_Wifi, - memregion_VWRAM, - - // DSi - memregion_BIOS9DSi, - memregion_BIOS7DSi, - memregion_NewSharedWRAM_A, - memregion_NewSharedWRAM_B, - memregion_NewSharedWRAM_C, - - memregions_Count +public: + enum + { + memregion_Other = 0, + memregion_ITCM, + memregion_DTCM, + memregion_BIOS9, + memregion_MainRAM, + memregion_SharedWRAM, + memregion_IO9, + memregion_VRAM, + memregion_BIOS7, + memregion_WRAM7, + memregion_IO7, + memregion_Wifi, + memregion_VWRAM, + + // DSi + memregion_BIOS9DSi, + memregion_BIOS7DSi, + memregion_NewSharedWRAM_A, + memregion_NewSharedWRAM_B, + memregion_NewSharedWRAM_C, + + memregions_Count + }; + +#ifdef JIT_ENABLED +public: + explicit ARMJIT_Memory(ARMJIT::ARMJIT& jit) noexcept; + ~ARMJIT_Memory() noexcept; + ARMJIT_Memory(const ARMJIT_Memory&) = delete; + ARMJIT_Memory(ARMJIT_Memory&&) = delete; + ARMJIT_Memory& operator=(const ARMJIT_Memory&) = delete; + ARMJIT_Memory& operator=(ARMJIT_Memory&&) = delete; + void Reset() noexcept; + void RemapDTCM(u32 newBase, u32 newSize) noexcept; + void RemapSWRAM() noexcept; + void RemapNWRAM(int num) noexcept; + void SetCodeProtection(int region, u32 offset, bool protect) noexcept; + + [[nodiscard]] u8* GetMainRAM() noexcept { return MemoryBase + MemBlockMainRAMOffset; } + [[nodiscard]] const u8* GetMainRAM() const noexcept { return MemoryBase + MemBlockMainRAMOffset; } + + [[nodiscard]] u8* GetSharedWRAM() noexcept { return MemoryBase + MemBlockSWRAMOffset; } + [[nodiscard]] const u8* GetSharedWRAM() const noexcept { return MemoryBase + MemBlockSWRAMOffset; } + + [[nodiscard]] u8* GetARM7WRAM() noexcept { return MemoryBase + MemBlockARM7WRAMOffset; } + [[nodiscard]] const u8* GetARM7WRAM() const noexcept { return MemoryBase + MemBlockARM7WRAMOffset; } + + [[nodiscard]] u8* GetARM9DTCM() noexcept { return MemoryBase + MemBlockDTCMOffset; } + [[nodiscard]] const u8* GetARM9DTCM() const noexcept { return MemoryBase + MemBlockDTCMOffset; } + + [[nodiscard]] u8* GetNWRAM_A() noexcept { return MemoryBase + MemBlockNWRAM_AOffset; } + [[nodiscard]] const u8* GetNWRAM_A() const noexcept { return MemoryBase + MemBlockNWRAM_AOffset; } + + [[nodiscard]] u8* GetNWRAM_B() noexcept { return MemoryBase + MemBlockNWRAM_BOffset; } + [[nodiscard]] const u8* GetNWRAM_B() const noexcept { return MemoryBase + MemBlockNWRAM_BOffset; } + + [[nodiscard]] u8* GetNWRAM_C() noexcept { return MemoryBase + MemBlockNWRAM_COffset; } + [[nodiscard]] const u8* GetNWRAM_C() const noexcept { return MemoryBase + MemBlockNWRAM_COffset; } + + int ClassifyAddress9(u32 addr) const noexcept; + int ClassifyAddress7(u32 addr) const noexcept; + bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) const noexcept; + u32 LocaliseAddress(int region, u32 num, u32 addr) const noexcept; + bool IsFastmemCompatible(int region) const noexcept; + void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept; + bool MapAtAddress(u32 addr) noexcept; +private: + friend class ARMJIT::Compiler; + struct Mapping + { + u32 Addr; + u32 Size, LocalOffset; + u32 Num; + + void Unmap(int region, ARMJIT_Memory& memory) noexcept; + }; + + struct FaultDescription + { + u32 EmulatedFaultAddr; + u8* FaultPC; + }; + static bool FaultHandler(FaultDescription& faultDesc, ARMJIT::ARMJIT& jit); + bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept; + bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept; + void SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept; + + ARMJIT::ARMJIT& JIT; + void* FastMem9Start; + void* FastMem7Start; + u8* MemoryBase = nullptr; +#if defined(__SWITCH__) + VirtmemReservation* FastMem9Reservation, *FastMem7Reservation; + u8* MemoryBaseCodeMem; +#elif defined(_WIN32) + static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo); + HANDLE MemoryFile = INVALID_HANDLE_VALUE; + LPVOID ExceptionHandlerHandle = nullptr; +#else + static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext); + int MemoryFile = -1; +#endif +#ifdef ANDROID + Platform::DynamicLibrary* Libandroid = nullptr; +#endif + u8 MappingStatus9[1 << (32-12)] {}; + u8 MappingStatus7[1 << (32-12)] {}; + ARMJIT::TinyVector<Mapping> Mappings[memregions_Count] {}; +#else +public: + explicit ARMJIT_Memory(ARMJIT::ARMJIT&) {}; + ~ARMJIT_Memory() = default; + ARMJIT_Memory(const ARMJIT_Memory&) = delete; + ARMJIT_Memory(ARMJIT_Memory&&) = delete; + ARMJIT_Memory& operator=(const ARMJIT_Memory&) = delete; + ARMJIT_Memory& operator=(ARMJIT_Memory&&) = delete; + + void Reset() noexcept {} + void RemapDTCM(u32 newBase, u32 newSize) noexcept {} + void RemapSWRAM() noexcept {} + void RemapNWRAM(int num) noexcept {} + void SetCodeProtection(int region, u32 offset, bool protect) noexcept {} + + [[nodiscard]] u8* GetMainRAM() noexcept { return MainRAM.data(); } + [[nodiscard]] const u8* GetMainRAM() const noexcept { return MainRAM.data(); } + + [[nodiscard]] u8* GetSharedWRAM() noexcept { return SharedWRAM.data(); } + [[nodiscard]] const u8* GetSharedWRAM() const noexcept { return SharedWRAM.data(); } + + [[nodiscard]] u8* GetARM7WRAM() noexcept { return ARM7WRAM.data(); } + [[nodiscard]] const u8* GetARM7WRAM() const noexcept { return ARM7WRAM.data(); } + + [[nodiscard]] u8* GetARM9DTCM() noexcept { return DTCM.data(); } + [[nodiscard]] const u8* GetARM9DTCM() const noexcept { return DTCM.data(); } + + [[nodiscard]] u8* GetNWRAM_A() noexcept { return NWRAM_A.data(); } + [[nodiscard]] const u8* GetNWRAM_A() const noexcept { return NWRAM_A.data(); } + + [[nodiscard]] u8* GetNWRAM_B() noexcept { return NWRAM_B.data(); } + [[nodiscard]] const u8* GetNWRAM_B() const noexcept { return NWRAM_B.data(); } + + [[nodiscard]] u8* GetNWRAM_C() noexcept { return NWRAM_C.data(); } + [[nodiscard]] const u8* GetNWRAM_C() const noexcept { return NWRAM_C.data(); } +private: + std::array<u8, NDS::MainRAMMaxSize> MainRAM {}; + std::array<u8, NDS::ARM7WRAMSize> ARM7WRAM {}; + std::array<u8, NDS::SharedWRAMSize> SharedWRAM {}; + std::array<u8, DTCMPhysicalSize> DTCM {}; + std::array<u8, DSi::NWRAMSize> NWRAM_A {}; + std::array<u8, DSi::NWRAMSize> NWRAM_B {}; + std::array<u8, DSi::NWRAMSize> NWRAM_C {}; +#endif }; -int ClassifyAddress9(u32 addr); -int ClassifyAddress7(u32 addr); - -bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize); -u32 LocaliseAddress(int region, u32 num, u32 addr); - -bool IsFastmemCompatible(int region); - -void RemapDTCM(u32 newBase, u32 newSize); -void RemapSWRAM(); -void RemapNWRAM(int num); - -void SetCodeProtection(int region, u32 offset, bool protect); - -void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size); - -} - #endif diff --git a/src/ARMJIT_RegisterCache.h b/src/ARMJIT_RegisterCache.h index 7ea44ed..1610530 100644 --- a/src/ARMJIT_RegisterCache.h +++ b/src/ARMJIT_RegisterCache.h @@ -19,7 +19,6 @@ #ifndef ARMJIT_REGCACHE_H #define ARMJIT_REGCACHE_H -#include "ARMJIT.h" #include "ARMJIT_Internal.h" #include "Platform.h" diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index 069dd53..bdc17e8 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -17,6 +17,7 @@ */ #include "ARMJIT_Compiler.h" +#include "../ARM.h" using namespace Gen; diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index b36f5b7..ae7d1ae 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -17,6 +17,7 @@ */ #include "ARMJIT_Compiler.h" +#include "../ARM.h" using namespace Gen; diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 45a2751..5506db7 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -18,6 +18,7 @@ #include "ARMJIT_Compiler.h" +#include "../ARMJIT.h" #include "../ARMInterpreter.h" #include <assert.h> @@ -232,7 +233,7 @@ void Compiler::A_Comp_MSR() */ u8 CodeMemory[1024 * 1024 * 32]; -Compiler::Compiler() +Compiler::Compiler(ARMJIT& jit) : XEmitter(), JIT(jit) { { #ifdef _WIN32 @@ -712,12 +713,12 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] if (NearSize - (GetCodePtr() - NearStart) < 1024 * 32) // guess... { Log(LogLevel::Debug, "near reset\n"); - ResetBlockCache(); + JIT.ResetBlockCache(); } if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess... { Log(LogLevel::Debug, "far reset\n"); - ResetBlockCache(); + JIT.ResetBlockCache(); } ConstantCycles = 0; diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 680146f..84efb35 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -21,7 +21,6 @@ #include "../dolphin/x64Emitter.h" -#include "../ARMJIT.h" #include "../ARMJIT_Internal.h" #include "../ARMJIT_RegisterCache.h" @@ -31,9 +30,11 @@ #include <unordered_map> +class ARMJIT_Memory; + namespace ARMJIT { - +class ARMJIT; const Gen::X64Reg RCPU = Gen::RBP; const Gen::X64Reg RCPSR = Gen::R15; @@ -79,7 +80,11 @@ struct Op2 class Compiler : public Gen::XEmitter { public: - Compiler(); +#ifdef JIT_ENABLED + explicit Compiler(ARMJIT& jit); +#else + explicit Compiler(ARMJIT& jit) : XEmitter(), JIT(jit) {} +#endif void Reset(); @@ -238,42 +243,43 @@ public: void CreateMethod(const char* namefmt, void* start, ...); #endif - u8* FarCode; - u8* NearCode; - u32 FarSize; - u32 NearSize; + ARMJIT& JIT; + u8* FarCode {}; + u8* NearCode {}; + u32 FarSize {}; + u32 NearSize {}; - u8* NearStart; - u8* FarStart; + u8* NearStart {}; + u8* FarStart {}; - void* PatchedStoreFuncs[2][2][3][16]; - void* PatchedLoadFuncs[2][2][3][2][16]; + void* PatchedStoreFuncs[2][2][3][16] {}; + void* PatchedLoadFuncs[2][2][3][2][16] {}; - std::unordered_map<u8*, LoadStorePatch> LoadStorePatches; + std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {}; - u8* ResetStart; - u32 CodeMemSize; + u8* ResetStart {}; + u32 CodeMemSize {}; - bool Exit; - bool IrregularCycles; + bool Exit {}; + bool IrregularCycles {}; - void* ReadBanked; - void* WriteBanked; + void* ReadBanked {}; + void* WriteBanked {}; bool CPSRDirty = false; - FetchedInstr CurInstr; + FetchedInstr CurInstr {}; - RegisterCache<Compiler, Gen::X64Reg> RegCache; + RegisterCache<Compiler, Gen::X64Reg> RegCache {}; - bool Thumb; - u32 Num; - u32 R15; - u32 CodeRegion; + bool Thumb {}; + u32 Num {}; + u32 R15 {}; + u32 CodeRegion {}; - u32 ConstantCycles; + u32 ConstantCycles {}; - ARM* CurCPU; + ARM* CurCPU {}; }; } diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 718f1bc..1433429 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -17,6 +17,7 @@ */ #include "ARMJIT_Compiler.h" +#include "../ARMJIT.h" using namespace Gen; @@ -67,9 +68,9 @@ u8* Compiler::RewriteMemAccess(u8* pc) bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) { - u32 localAddr = LocaliseCodeAddress(Num, addr); + u32 localAddr = JIT.LocaliseCodeAddress(Num, addr); - int invalidLiteralIdx = InvalidLiterals.Find(localAddr); + int invalidLiteralIdx = JIT.InvalidLiterals.Find(localAddr); if (invalidLiteralIdx != -1) { return false; @@ -117,7 +118,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag if (size == 16) addressMask = ~1; - if (LiteralOptimizations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + if (JIT.LiteralOptimizations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) { u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); @@ -134,7 +135,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag Comp_AddCycles_CDI(); } - bool addrIsStatic = LiteralOptimizations + bool addrIsStatic = JIT.LiteralOptimizations && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post)); u32 staticAddress; if (addrIsStatic) @@ -195,10 +196,10 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag MOV(32, rnMapped, R(finalAddr)); u32 expectedTarget = Num == 0 - ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) - : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + ? JIT.Memory.ClassifyAddress9(CurInstr.DataRegion) + : JIT.Memory.ClassifyAddress7(CurInstr.DataRegion); - if (ARMJIT::FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget))) + if (JIT.FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || JIT.Memory.IsFastmemCompatible(expectedTarget))) { if (rdMapped.IsImm()) { @@ -216,7 +217,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag assert(patch.PatchFunc != NULL); - MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start)); + MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? JIT.Memory.FastMem9Start : JIT.Memory.FastMem7Start)); X64Reg maskedAddr = RSCRATCH3; if (size > 8) @@ -267,7 +268,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag void* func = NULL; if (addrIsStatic) - func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); + func = JIT.Memory.GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); if (func) { @@ -421,16 +422,16 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc s32 offset = (regsCount * 4) * (decrement ? -1 : 1); int expectedTarget = Num == 0 - ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) - : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + ? JIT.Memory.ClassifyAddress9(CurInstr.DataRegion) + : JIT.Memory.ClassifyAddress7(CurInstr.DataRegion); if (!store) Comp_AddCycles_CDI(); else Comp_AddCycles_CD(); - bool compileFastPath = FastMemory - && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)); + bool compileFastPath = JIT.FastMemory + && !usermode && (CurInstr.Cond() < 0xE || JIT.Memory.IsFastmemCompatible(expectedTarget)); // we need to make sure that the stack stays aligned to 16 bytes #ifdef _WIN32 @@ -453,7 +454,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc u8* fastPathStart = GetWritableCodePtr(); u8* loadStoreAddr[16]; - MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start)); + MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? JIT.Memory.FastMem9Start : JIT.Memory.FastMem7Start)); ADD(64, R(RSCRATCH2), R(RSCRATCH4)); u32 offset = 0; @@ -807,7 +808,7 @@ void Compiler::T_Comp_LoadPCRel() { u32 offset = (CurInstr.Instr & 0xFF) << 2; u32 addr = (R15 & ~0x2) + offset; - if (!LiteralOptimizations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) + if (!JIT.LiteralOptimizations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0); } diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index a546678..ea9f681 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -315,7 +315,7 @@ const u32 T_SVC = T_BranchAlways | T_WriteR14 | tk(tk_SVC); #include "ARM_InstrTable.h" #undef INSTRFUNC_PROTO -Info Decode(bool thumb, u32 num, u32 instr) +Info Decode(bool thumb, u32 num, u32 instr, bool literaloptimizations) { const u8 FlagsReadPerCond[7] = { flag_Z, @@ -386,7 +386,7 @@ Info Decode(bool thumb, u32 num, u32 instr) { if (res.Kind == tk_LDR_PCREL) { - if (!ARMJIT::LiteralOptimizations) + if (!literaloptimizations) res.SrcRegs |= 1 << 15; res.SpecialKind = special_LoadLiteral; } diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h index 56f6e62..3442c9a 100644 --- a/src/ARM_InstrInfo.h +++ b/src/ARM_InstrInfo.h @@ -274,7 +274,7 @@ struct Info } }; -Info Decode(bool thumb, u32 num, u32 instr); +Info Decode(bool thumb, u32 num, u32 instr, bool literaloptimizations); } diff --git a/src/CP15.cpp b/src/CP15.cpp index b8a77e2..e8d8c1a 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -22,11 +22,8 @@ #include "DSi.h" #include "ARM.h" #include "Platform.h" - -#ifdef JIT_ENABLED -#include "ARMJIT.h" #include "ARMJIT_Memory.h" -#endif +#include "ARMJIT.h" using Platform::Log; using Platform::LogLevel; @@ -125,9 +122,7 @@ void ARMv5::UpdateDTCMSetting() if (newDTCMBase != DTCMBase || newDTCMMask != DTCMMask) { -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapDTCM(newDTCMBase, newDTCMSize); -#endif + JIT.Memory.RemapDTCM(newDTCMBase, newDTCMSize); DTCMBase = newDTCMBase; DTCMMask = newDTCMMask; } @@ -926,9 +921,7 @@ void ARMv5::DataWrite8(u32 addr, u8 val) { DataCycles = 1; *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); -#endif + JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); return; } if ((addr & DTCMMask) == DTCMBase) @@ -958,9 +951,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val) { DataCycles = 1; *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); -#endif + JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); return; } if ((addr & DTCMMask) == DTCMBase) @@ -990,9 +981,7 @@ void ARMv5::DataWrite32(u32 addr, u32 val) { DataCycles = 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); -#endif + JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); return; } if ((addr & DTCMMask) == DTCMBase) @@ -1015,7 +1004,7 @@ void ARMv5::DataWrite32S(u32 addr, u32 val) DataCycles += 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; #ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); + JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif return; } diff --git a/src/DSi.cpp b/src/DSi.cpp index f2937b0..5c4b542 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -28,10 +28,8 @@ #include "DSi_SPI_TSC.h" #include "Platform.h" -#ifdef JIT_ENABLED #include "ARMJIT.h" #include "ARMJIT_Memory.h" -#endif #include "DSi_NDMA.h" #include "DSi_I2C.h" @@ -99,11 +97,10 @@ void Set_SCFG_MC(u32 val); bool Init() { -#ifndef JIT_ENABLED - NWRAM_A = new u8[NWRAMSize]; - NWRAM_B = new u8[NWRAMSize]; - NWRAM_C = new u8[NWRAMSize]; -#endif + // Memory is owned by ARMJIT_Memory, don't free it + NWRAM_A = NDS::JIT->Memory.GetNWRAM_A(); + NWRAM_B = NDS::JIT->Memory.GetNWRAM_B(); + NWRAM_C = NDS::JIT->Memory.GetNWRAM_C(); NDMAs[0] = new DSi_NDMA(0, 0, *NDS::GPU); NDMAs[1] = new DSi_NDMA(0, 1, *NDS::GPU); @@ -127,15 +124,10 @@ bool Init() void DeInit() { -#ifndef JIT_ENABLED - delete[] NWRAM_A; - delete[] NWRAM_B; - delete[] NWRAM_C; - + // Memory is owned externally NWRAM_A = nullptr; NWRAM_B = nullptr; NWRAM_C = nullptr; -#endif for (int i = 0; i < 8; i++) { @@ -684,10 +676,8 @@ void SoftReset() // also, BPTWL[0x70] could be abused to quickly boot specific titles -#ifdef JIT_ENABLED - ARMJIT_Memory::Reset(); - ARMJIT::CheckAndInvalidateITCM(); -#endif + NDS::JIT->Reset(); + NDS::JIT->CheckAndInvalidateITCM(); NDS::ARM9->Reset(); NDS::ARM7->Reset(); @@ -1043,9 +1033,7 @@ void MapNWRAM_A(u32 num, u8 val) u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(0); -#endif + NDS::JIT->Memory.RemapNWRAM(0); MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); @@ -1090,9 +1078,7 @@ void MapNWRAM_B(u32 num, u8 val) u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(1); -#endif + NDS::JIT->Memory.RemapNWRAM(1); MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); @@ -1139,9 +1125,7 @@ void MapNWRAM_C(u32 num, u8 val) u8 oldval = (MBK[0][mbkn] >> mbks) & 0xFF; if (oldval == val) return; -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(2); -#endif + NDS::JIT->Memory.RemapNWRAM(2); MBK[0][mbkn] &= ~(0xFF << mbks); MBK[0][mbkn] |= (val << mbks); @@ -1190,9 +1174,7 @@ void MapNWRAMRange(u32 cpu, u32 num, u32 val) u32 oldval = MBK[cpu][5+num]; if (oldval == val) return; -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapNWRAM(num); -#endif + NDS::JIT->Memory.RemapNWRAM(num); MBK[cpu][5+num] = val; @@ -1468,9 +1450,7 @@ void ARM9Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u8*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -1488,9 +1468,7 @@ void ARM9Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u8*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -1508,9 +1486,7 @@ void ARM9Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u8*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -1523,9 +1499,7 @@ void ARM9Write8(u32 addr, u8 val) case 0x06000000: if (!(SCFG_EXT[0] & (1<<13))) return; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { case 0x00000000: NDS::GPU->WriteVRAM_ABG<u8>(addr, val); return; @@ -1541,9 +1515,7 @@ void ARM9Write8(u32 addr, u8 val) return; case 0x0C000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u8*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } @@ -1574,9 +1546,7 @@ void ARM9Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u16*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -1594,9 +1564,7 @@ void ARM9Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u16*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -1614,9 +1582,7 @@ void ARM9Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u16*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -1633,9 +1599,7 @@ void ARM9Write16(u32 addr, u16 val) return; case 0x0C000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u16*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } @@ -1666,9 +1630,7 @@ void ARM9Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u32*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -1686,9 +1648,7 @@ void ARM9Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u32*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -1706,9 +1666,7 @@ void ARM9Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u32*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -1725,9 +1683,7 @@ void ARM9Write32(u32 addr, u32 val) return; case 0x0C000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u32*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } @@ -1970,9 +1926,7 @@ void ARM7Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u8*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -1990,9 +1944,7 @@ void ARM7Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u8*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -2010,9 +1962,7 @@ void ARM7Write8(u32 addr, u8 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u8*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -2033,9 +1983,7 @@ void ARM7Write8(u32 addr, u8 val) case 0x0C000000: case 0x0C800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u8*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } @@ -2067,9 +2015,7 @@ void ARM7Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u16*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -2087,9 +2033,7 @@ void ARM7Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u16*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -2107,9 +2051,7 @@ void ARM7Write16(u32 addr, u16 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u16*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -2130,9 +2072,7 @@ void ARM7Write16(u32 addr, u16 val) case 0x0C000000: case 0x0C800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u16*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } @@ -2164,9 +2104,7 @@ void ARM7Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_A[page * 0x10000]; *(u32*)&ptr[addr & 0xFFFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_A>(addr); } return; } @@ -2184,9 +2122,7 @@ void ARM7Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_B[page * 0x8000]; *(u32*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_B>(addr); } return; } @@ -2204,9 +2140,7 @@ void ARM7Write32(u32 addr, u32 val) continue; u8* ptr = &NWRAM_C[page * 0x8000]; *(u32*)&ptr[addr & 0x7FFF] = val; -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_NewSharedWRAM_C>(addr); } return; } @@ -2227,9 +2161,7 @@ void ARM7Write32(u32 addr, u32 val) case 0x0C000000: case 0x0C800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u32*)&NDS::MainRAM[addr & NDS::MainRAMMask] = val; return; } diff --git a/src/GPU.cpp b/src/GPU.cpp index 987068d..5c67cfb 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -20,9 +20,7 @@ #include "NDS.h" #include "GPU.h" -#ifdef JIT_ENABLED #include "ARMJIT.h" -#endif #include "GPU2D_Soft.h" #include "GPU3D_Soft.h" @@ -66,7 +64,7 @@ enum VRAMDirty need to be reset for the respective VRAM bank. */ -GPU::GPU() noexcept : GPU2D_A(0, *this), GPU2D_B(1, *this) +GPU::GPU(ARMJIT::ARMJIT& jit) noexcept : GPU2D_A(0, *this), GPU2D_B(1, *this), JIT(jit) { NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartHBlank, MemberEventFunc(GPU, StartHBlank)); NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartScanline, MemberEventFunc(GPU, StartScanline)); @@ -590,9 +588,7 @@ void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept VRAMMap_ARM7[ofs] |= bankmask; memset(VRAMDirty[bank].Data, 0xFF, sizeof(VRAMDirty[bank].Data)); VRAMSTAT |= (1 << (bank-2)); -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidateWVRAM(ofs); -#endif + JIT.CheckAndInvalidateWVRAM(ofs); break; case 3: // texture @@ -35,6 +35,11 @@ namespace GPU3D class GPU3D; } +namespace ARMJIT +{ +class ARMJIT; +} + namespace Melon { static constexpr u32 VRAMDirtyGranularity = 512; @@ -70,7 +75,7 @@ struct RenderSettings class GPU { public: - GPU() noexcept; + GPU(ARMJIT::ARMJIT& jit) noexcept; ~GPU() noexcept; void Reset() noexcept; void Stop() noexcept; @@ -539,6 +544,7 @@ public: void SyncDirtyFlags() noexcept; + ARMJIT::ARMJIT& JIT; u16 VCount = 0; u16 TotalScanlines = 0; u16 DispStat[2] {}; diff --git a/src/JitBlock.h b/src/JitBlock.h new file mode 100644 index 0000000..abd435b --- /dev/null +++ b/src/JitBlock.h @@ -0,0 +1,61 @@ +/* + Copyright 2016-2023 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef MELONDS_JITBLOCK_H +#define MELONDS_JITBLOCK_H + +#include "types.h" +#include "TinyVector.h" + +namespace ARMJIT +{ +typedef void (*JitBlockEntry)(); + +class JitBlock +{ +public: + JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals) + { + Num = num; + NumAddresses = numAddresses; + NumLiterals = numLiterals; + Data.SetLength(numAddresses * 2 + numLiterals); + } + + u32 StartAddr; + u32 StartAddrLocal; + u32 InstrHash, LiteralHash; + u8 Num; + u16 NumAddresses; + u16 NumLiterals; + + JitBlockEntry EntryPoint; + + u32* AddressRanges() + { return &Data[0]; } + u32* AddressMasks() + { return &Data[NumAddresses]; } + u32* Literals() + { return &Data[NumAddresses * 2]; } + +private: + TinyVector<u32> Data; +}; +} + +#endif //MELONDS_JITBLOCK_H diff --git a/src/NDS.cpp b/src/NDS.cpp index 5290423..d13fd91 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -35,16 +35,13 @@ #include "Platform.h" #include "FreeBIOS.h" -#ifdef JIT_ENABLED -#include "ARMJIT.h" -#include "ARMJIT_Memory.h" -#endif - #include "DSi.h" #include "DSi_SPI_TSC.h" #include "DSi_NWifi.h" #include "DSi_Camera.h" #include "DSi_DSP.h" +#include "ARMJIT.h" +#include "ARMJIT_Memory.h" using namespace Platform; @@ -186,6 +183,7 @@ class Wifi* Wifi; std::unique_ptr<NDSCart::NDSCartSlot> NDSCartSlot; std::unique_ptr<GBACart::GBACartSlot> GBACartSlot; std::unique_ptr<Melon::GPU> GPU; +std::unique_ptr<ARMJIT::ARMJIT> JIT; class AREngine* AREngine; bool Running; @@ -205,17 +203,15 @@ bool Init() RegisterEventFunc(Event_Div, 0, DivDone); RegisterEventFunc(Event_Sqrt, 0, SqrtDone); - GPU = std::make_unique<Melon::GPU>(); - ARM9 = new ARMv5(*GPU); - ARM7 = new ARMv4(*GPU); + JIT = std::make_unique<ARMJIT::ARMJIT>(); + GPU = std::make_unique<Melon::GPU>(*JIT); -#ifdef JIT_ENABLED - ARMJIT::Init(); -#else - MainRAM = new u8[0x1000000]; - ARM7WRAM = new u8[ARM7WRAMSize]; - SharedWRAM = new u8[SharedWRAMSize]; -#endif + MainRAM = JIT->Memory.GetMainRAM(); + SharedWRAM = JIT->Memory.GetSharedWRAM(); + ARM7WRAM = JIT->Memory.GetARM7WRAM(); + + ARM9 = new ARMv5(*JIT, *GPU); + ARM7 = new ARMv4(*JIT, *GPU); DMAs[0] = new DMA(0, 0, *GPU); DMAs[1] = new DMA(0, 1, *GPU); @@ -242,10 +238,6 @@ bool Init() void DeInit() { -#ifdef JIT_ENABLED - ARMJIT::DeInit(); -#endif - delete ARM9; ARM9 = nullptr; delete ARM7; ARM7 = nullptr; @@ -270,6 +262,8 @@ void DeInit() UnregisterEventFunc(Event_Div, 0); UnregisterEventFunc(Event_Sqrt, 0); + + JIT = nullptr; } @@ -548,9 +542,7 @@ void Reset() // BIOS files are now loaded by the frontend -#ifdef JIT_ENABLED - ARMJIT::Reset(); -#endif + JIT->Reset(); if (ConsoleType == 1) { @@ -869,8 +861,8 @@ bool DoSavestate(Savestate* file) #ifdef JIT_ENABLED if (!file->Saving) { - ARMJIT::ResetBlockCache(); - ARMJIT_Memory::Reset(); + JIT->ResetBlockCache(); + JIT->Memory.Reset(); } #endif @@ -1401,9 +1393,7 @@ void MapSharedWRAM(u8 val) if (val == WRAMCnt) return; -#ifdef JIT_ENABLED - ARMJIT_Memory::RemapSWRAM(); -#endif + NDS::JIT->Memory.RemapSWRAM(); WRAMCnt = val; @@ -2315,18 +2305,14 @@ void ARM9Write8(u32 addr, u8 val) switch (addr & 0xFF000000) { case 0x02000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u8*)&MainRAM[addr & MainRAMMask] = val; return; case 0x03000000: if (SWRAM_ARM9.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val; } return; @@ -2361,18 +2347,14 @@ void ARM9Write16(u32 addr, u16 val) switch (addr & 0xFF000000) { case 0x02000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u16*)&MainRAM[addr & MainRAMMask] = val; return; case 0x03000000: if (SWRAM_ARM9.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val; } return; @@ -2387,9 +2369,7 @@ void ARM9Write16(u32 addr, u16 val) return; case 0x06000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { case 0x00000000: GPU->WriteVRAM_ABG<u16>(addr, val); return; @@ -2429,18 +2409,14 @@ void ARM9Write32(u32 addr, u32 val) switch (addr & 0xFF000000) { case 0x02000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr); *(u32*)&MainRAM[addr & MainRAMMask] = val; return ; case 0x03000000: if (SWRAM_ARM9.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val; } return; @@ -2455,9 +2431,7 @@ void ARM9Write32(u32 addr, u32 val) return; case 0x06000000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { case 0x00000000: GPU->WriteVRAM_ABG<u32>(addr, val); return; @@ -2738,34 +2712,26 @@ void ARM7Write8(u32 addr, u8 val) { case 0x02000000: case 0x02800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u8*)&MainRAM[addr & MainRAMMask] = val; return; case 0x03000000: if (SWRAM_ARM7.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val; return; } else { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; } case 0x03800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; @@ -2775,9 +2741,7 @@ void ARM7Write8(u32 addr, u8 val) case 0x06000000: case 0x06800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); GPU->WriteVRAM_ARM7<u8>(addr, val); return; @@ -2808,34 +2772,26 @@ void ARM7Write16(u32 addr, u16 val) { case 0x02000000: case 0x02800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u16*)&MainRAM[addr & MainRAMMask] = val; return; case 0x03000000: if (SWRAM_ARM7.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val; return; } else { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; } case 0x03800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; @@ -2854,9 +2810,7 @@ void ARM7Write16(u32 addr, u16 val) case 0x06000000: case 0x06800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); GPU->WriteVRAM_ARM7<u16>(addr, val); return; @@ -2889,34 +2843,26 @@ void ARM7Write32(u32 addr, u32 val) { case 0x02000000: case 0x02800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr); *(u32*)&MainRAM[addr & MainRAMMask] = val; return; case 0x03000000: if (SWRAM_ARM7.Mem) { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_SharedWRAM>(addr); *(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val; return; } else { -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; } case 0x03800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr); *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val; return; @@ -2936,9 +2882,7 @@ void ARM7Write32(u32 addr, u32 val) case 0x06000000: case 0x06800000: -#ifdef JIT_ENABLED - ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); -#endif + NDS::JIT->CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); GPU->WriteVRAM_ARM7<u32>(addr, val); return; @@ -46,6 +46,11 @@ namespace Melon class GPU; } +namespace ARMJIT +{ +class ARMJIT; +} + namespace NDS { @@ -269,6 +274,7 @@ extern class Wifi* Wifi; extern std::unique_ptr<NDSCart::NDSCartSlot> NDSCartSlot; extern std::unique_ptr<GBACart::GBACartSlot> GBACartSlot; extern std::unique_ptr<Melon::GPU> GPU; +extern std::unique_ptr<ARMJIT::ARMJIT> JIT; extern class AREngine* AREngine; const u32 ARM7WRAMSize = 0x10000; diff --git a/src/NDSCart.h b/src/NDSCart.h index 5696fd7..33a17bb 100644 --- a/src/NDSCart.h +++ b/src/NDSCart.h @@ -19,6 +19,7 @@ #ifndef NDSCART_H #define NDSCART_H +#include <array> #include <string> #include <memory> #include <array> diff --git a/src/TinyVector.h b/src/TinyVector.h new file mode 100644 index 0000000..63e7caf --- /dev/null +++ b/src/TinyVector.h @@ -0,0 +1,131 @@ +/* + Copyright 2016-2023 melonDS team, RSDuck + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef MELONDS_TINYVECTOR_H +#define MELONDS_TINYVECTOR_H + +#include <assert.h> +#include <string.h> +#include "types.h" + +namespace ARMJIT +{ +/* + TinyVector + - because reinventing the wheel is the best! + + - meant to be used very often, with not so many elements + max 1 << 16 elements + - doesn't allocate while no elements are inserted + - not stl confirmant of course + - probably only works with POD types + - remove operations don't preserve order, but O(1)! +*/ +template<typename T> +struct __attribute__((packed)) TinyVector +{ + T* Data = NULL; + u16 Capacity = 0; + u16 Length = 0; + + ~TinyVector() + { + delete[] Data; + } + + void MakeCapacity(u32 capacity) + { + assert(capacity <= UINT16_MAX); + assert(capacity > Capacity); + T* newMem = new T[capacity]; + if (Data != NULL) + memcpy(newMem, Data, sizeof(T) * Length); + + T* oldData = Data; + Data = newMem; + if (oldData != NULL) + delete[] oldData; + + Capacity = capacity; + } + + void SetLength(u16 length) + { + if (Capacity < length) + MakeCapacity(length); + + Length = length; + } + + void Clear() + { + Length = 0; + } + + void Add(T element) + { + assert(Length + 1 <= UINT16_MAX); + if (Length + 1 > Capacity) + MakeCapacity(((Capacity + 4) * 3) / 2); + + Data[Length++] = element; + } + + void Remove(int index) + { + assert(Length > 0); + assert(index >= 0 && index < Length); + + Length--; + Data[index] = Data[Length]; + /*for (int i = index; i < Length; i++) + Data[i] = Data[i + 1];*/ + } + + int Find(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + return i; + } + return -1; + } + + bool RemoveByValue(T needle) + { + for (int i = 0; i < Length; i++) + { + if (Data[i] == needle) + { + Remove(i); + return true; + } + } + return false; + } + + T& operator[](int index) + { + assert(index >= 0 && index < Length); + return Data[index]; + } +}; +} + +#endif //MELONDS_TINYVECTOR_H |