diff options
Diffstat (limited to 'src/ARMJIT_A64')
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Branch.cpp | 10 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.cpp | 74 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.h | 7 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 21 |
4 files changed, 70 insertions, 42 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index eeabfb0..2f640c8 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -27,7 +27,7 @@ namespace ARMJIT { template <typename T> -void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR) +void JumpToTrampoline(T* cpu, u32 addr, bool changeCPSR) { cpu->JumpTo(addr, changeCPSR); } @@ -301,7 +301,7 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto bool cpsrDirty = CPSRDirty; SaveCPSR(); SaveCycles(); - PushRegs(restoreCPSR); + PushRegs(restoreCPSR, true); if (switchThumb) MOV(W1, addr); @@ -315,11 +315,11 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto MOV(X0, RCPU); MOVI2R(W2, restoreCPSR); if (Num == 0) - QuickCallFunction(X3, jumpToTrampoline<ARMv5>); + QuickCallFunction(X3, JumpToTrampoline<ARMv5>); else - QuickCallFunction(X3, jumpToTrampoline<ARMv4>); + QuickCallFunction(X3, JumpToTrampoline<ARMv4>); - PopRegs(restoreCPSR); + PopRegs(restoreCPSR, true); LoadCycles(); LoadCPSR(); if (CurInstr.Cond() < 0xE) diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp index 4fbb804..7dc854a 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -58,9 +58,14 @@ namespace ARMJIT template <> const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] = - {W19, W20, W21, W22, W23, W24, W25, W26}; +{ + W19, W20, W21, W22, W23, W24, W25, + W8, W9, W10, W11, W12, W13, W14, W15 +}; template <> -const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8; +const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15; + +const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15}); const int JitMemSize = 16 * 1024 * 1024; #ifndef __SWITCH__ @@ -164,44 +169,55 @@ void Compiler::A_Comp_MSR() MOV(W2, RCPSR); MOV(X0, RCPU); - PushRegs(true); - - QuickCallFunction(X3, (void*)&UpdateModeTrampoline); - - PopRegs(true); + PushRegs(true, true); + QuickCallFunction(X3, UpdateModeTrampoline); + PopRegs(true, true); } } } -void Compiler::PushRegs(bool saveHiRegs) + +void Compiler::PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload) { + BitSet32 loadedRegs(RegCache.LoadedRegs); + if (saveHiRegs) { - if (Thumb || CurInstr.Cond() == 0xE) + BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsLoaded) { - BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); - for (int reg : hiRegsLoaded) + if (Thumb || CurInstr.Cond() == 0xE) RegCache.UnloadRegister(reg); + else + SaveReg(reg, RegCache.Mapping[reg]); + // prevent saving the register twice + loadedRegs[reg] = false; } - else + } + + for (int reg : loadedRegs) + { + if (CallerSavedPushRegs[RegCache.Mapping[reg]] + && (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs)))) { - BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00); - for (int reg : hiRegsDirty) + if ((Thumb || CurInstr.Cond() == 0xE) && !((1 << reg) & (CurInstr.Info.DstRegs|CurInstr.Info.SrcRegs)) && allowUnload) + RegCache.UnloadRegister(reg); + else SaveReg(reg, RegCache.Mapping[reg]); } } } -void Compiler::PopRegs(bool saveHiRegs) +void Compiler::PopRegs(bool saveHiRegs, bool saveRegsToBeChanged) { - if (saveHiRegs) + BitSet32 loadedRegs(RegCache.LoadedRegs); + for (int reg : loadedRegs) { - if (!Thumb && CurInstr.Cond() != 0xE) + if ((saveHiRegs && reg >= 8 && reg < 15) + || (CallerSavedPushRegs[RegCache.Mapping[reg]] + && (saveRegsToBeChanged || !((1<<reg) & CurInstr.Info.DstRegs && !((1<<reg) & CurInstr.Info.SrcRegs))))) { - BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); - - for (int reg : hiRegsLoaded) - LoadReg(reg, RegCache.Mapping[reg]); + LoadReg(reg, RegCache.Mapping[reg]); } } } @@ -267,6 +283,7 @@ Compiler::Compiler() } /* + W4 - whether the register was written to W5 - mode W1 - reg num W3 - in/out value of reg @@ -358,7 +375,7 @@ Compiler::Compiler() { for (int reg = 0; reg < 32; reg++) { - if (!(reg == W4 || (reg >= W19 && reg <= W26))) + if (!(reg == W4 || (reg >= W8 && reg <= W15) || (reg >= W19 && reg <= W25))) continue; ARM64Reg rdMapped = (ARM64Reg)reg; PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); @@ -371,7 +388,7 @@ Compiler::Compiler() { MOV(W1, rdMapped); } - ABI_PushRegisters({30}); + ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs); if (consoleType == 0) { switch ((8 << size) | num) @@ -397,7 +414,7 @@ Compiler::Compiler() } } - ABI_PopRegisters({30}); + ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs); RET(); for (int signextend = 0; signextend < 2; signextend++) @@ -405,7 +422,7 @@ Compiler::Compiler() PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); if (num == 0) MOV(X1, RCPU); - ABI_PushRegisters({30}); + ABI_PushRegisters(BitSet32({30}) | CallerSavedPushRegs); if (consoleType == 0) { switch ((8 << size) | num) @@ -430,7 +447,7 @@ Compiler::Compiler() case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; } } - ABI_PopRegisters({30}); + ABI_PopRegisters(BitSet32({30}) | CallerSavedPushRegs); if (size == 32) MOV(rdMapped, W0); else if (signextend) @@ -673,7 +690,7 @@ void Compiler::Comp_BranchSpecialBehaviour(bool taken) } } -JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) +JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr) { if (JitMemMainSize - GetCodeOffset() < 1024 * 16) { @@ -695,6 +712,9 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true); CPSRDirty = false; + if (hasMemInstr) + MOVP2R(RMemBase, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + for (int i = 0; i < instrsCount; i++) { CurInstr = instrs[i]; diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h index d18da93..24e730b 100644 --- a/src/ARMJIT_A64/ARMJIT_Compiler.h +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -32,6 +32,7 @@ namespace ARMJIT { +const Arm64Gen::ARM64Reg RMemBase = Arm64Gen::X26; const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27; const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28; const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29; @@ -99,8 +100,8 @@ public: Compiler(); ~Compiler(); - void PushRegs(bool saveHiRegs); - void PopRegs(bool saveHiRegs); + void PushRegs(bool saveHiRegs, bool saveRegsToBeChanged, bool allowUnload = true); + void PopRegs(bool saveHiRegs, bool saveRegsToBeChanged); Arm64Gen::ARM64Reg MapReg(int reg) { @@ -108,7 +109,7 @@ public: return RegCache.Mapping[reg]; } - JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); + JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount, bool hasMemInstr); bool CanCompile(bool thumb, u16 kind); diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 4f05d4d..5ac629b 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -194,13 +194,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) ptrdiff_t memopStart = GetCodeOffset(); LoadStorePatch patch; - assert((rdMapped >= W19 && rdMapped <= W26) || rdMapped == W4); + assert((rdMapped >= W8 && rdMapped <= W15) || (rdMapped >= W19 && rdMapped <= W25) || rdMapped == W4); patch.PatchFunc = flags & memop_Store ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped] : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped]; - MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - // take a chance at fastmem if (size > 8) ANDI2R(W1, W0, addressMask); @@ -208,11 +206,11 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) ptrdiff_t loadStorePosition = GetCodeOffset(); if (flags & memop_Store) { - STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7); + STRGeneric(size, rdMapped, size > 8 ? X1 : X0, RMemBase); } else { - LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); + LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, RMemBase); if (size == 32 && !addrIsStatic) { UBFIZ(W0, W0, 3, 2); @@ -230,12 +228,16 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) if (addrIsStatic) func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); + PushRegs(false, false); + if (func) { if (flags & memop_Store) MOV(W1, rdMapped); QuickCallFunction(X2, (void (*)())func); + PopRegs(false, false); + if (!(flags & memop_Store)) { if (size == 32) @@ -314,6 +316,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) } } + PopRegs(false, false); + if (!(flags & memop_Store)) { if (size == 32) @@ -515,8 +519,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc ptrdiff_t fastPathStart = GetCodeOffset(); ptrdiff_t loadStoreOffsets[8]; - MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); - ADD(X1, X1, X0); + ADD(X1, RMemBase, X0); u32 offset = 0; BitSet16::Iterator it = regs.begin(); @@ -655,6 +658,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } } + PushRegs(false, false, !compileFastPath); + ADD(X1, SP, 0); MOVI2R(W2, regsCount); @@ -680,6 +685,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc } } + PopRegs(false, false); + if (!store) { if (usermode && !regs[15] && (regs & BitSet16(0x7f00))) |