diff options
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Branch.cpp | 23 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 140 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 19 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_GenOffsets.cpp | 15 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Linkage.s | 74 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Offsets.h | 3 |
6 files changed, 220 insertions, 54 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index e02865d..cac590a 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) ConstantCycles += cycles; else - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); } void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) @@ -135,7 +135,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) IrregularCycles = true; BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); - bool previouslyDirty = CPSRDirty; + bool cpsrDirty = CPSRDirty; SaveCPSR(); if (restoreCPSR) @@ -168,9 +168,10 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) LoadReg(reg, RegCache.Mapping[reg]); } - if (previouslyDirty) - LoadCPSR(); - CPSRDirty = previouslyDirty; + LoadCPSR(); + // in case this instruction is skipped + if (CurInstr.Cond() < 0xE) + CPSRDirty = cpsrDirty; } void Compiler::A_Comp_BranchImm() @@ -209,20 +210,12 @@ void Compiler::T_Comp_BCOND() s32 offset = (s32)(CurInstr.Instr << 24) >> 23; Comp_JumpTo(R15 + offset + 1, true); - Comp_SpecialBranchBehaviour(); + Comp_SpecialBranchBehaviour(true); FixupBranch skipFailed = J(); SetJumpTarget(skipExecute); - if (CurInstr.BranchFlags & branch_FollowCondTaken) - { - RegCache.PrepareExit(); - SaveCPSR(false); - - MOV(32, R(RAX), Imm32(ConstantCycles)); - ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8); - RET(); - } + Comp_SpecialBranchBehaviour(false); Comp_AddCycles_C(true); SetJumpTarget(skipFailed); diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index d69bdff..be3709e 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -1,6 +1,7 @@ #include "ARMJIT_Compiler.h" #include "../ARMInterpreter.h" +#include "../Config.h" #include <assert.h> @@ -15,6 +16,8 @@ using namespace Gen; +extern "C" void ARM_Ret(); + namespace ARMJIT { template <> @@ -170,6 +173,24 @@ Compiler::Compiler() RET(); } + { + CPSRDirty = true; + BranchStub[0] = GetWritableCodePtr(); + SaveCPSR(); + MOV(64, R(ABI_PARAM1), R(RCPU)); + CALL((u8*)ARMJIT::LinkBlock<0>); + LoadCPSR(); + JMP((u8*)ARM_Ret, true); + + CPSRDirty = true; + BranchStub[1] = GetWritableCodePtr(); + SaveCPSR(); + MOV(64, R(ABI_PARAM1), R(RCPU)); + CALL((u8*)ARMJIT::LinkBlock<1>); + LoadCPSR(); + JMP((u8*)ARM_Ret, true); + } + // move the region forward to prevent overwriting the generated functions CodeMemSize -= GetWritableCodePtr() - ResetStart; ResetStart = GetWritableCodePtr(); @@ -362,23 +383,43 @@ void Compiler::Reset() SetCodePtr(ResetStart); } -void Compiler::Comp_SpecialBranchBehaviour() +void Compiler::Comp_SpecialBranchBehaviour(bool taken) { - if (CurInstr.BranchFlags & branch_IdleBranch) - OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1)); + if (taken && CurInstr.BranchFlags & branch_IdleBranch) + OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1)); - if (CurInstr.BranchFlags & branch_FollowCondNotTaken) + if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken) + || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken)) { RegCache.PrepareExit(); - SaveCPSR(false); - - MOV(32, R(RAX), Imm32(ConstantCycles)); - ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8); - RET(); + + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + + if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch) + && (!taken || (CurInstr.BranchFlags & branch_StaticTarget))) + { + FixupBranch ret = J_CC(CC_S); + CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0)); + FixupBranch ret2 = J_CC(CC_NZ); + + u8* rewritePart = GetWritableCodePtr(); + NOP(5); + + MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart)); + JMP((u8*)BranchStub[Num], true); + + SetJumpTarget(ret); + SetJumpTarget(ret2); + JMP((u8*)ARM_Ret, true); + } + else + { + JMP((u8*)&ARM_Ret, true); + } } } -JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) +JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) { if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess... ResetBlockCache(); @@ -388,15 +429,11 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] Num = cpu->Num; CodeRegion = instrs[0].Addr >> 24; CurCPU = cpu; + // CPSR might have been modified in a previous block + CPSRDirty = Config::JIT_BrancheOptimisations == 2; JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr(); - ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8); - - MOV(64, R(RCPU), ImmPtr(cpu)); - - LoadCPSR(); - RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount); for (int i = 0; i < instrsCount; i++) @@ -474,7 +511,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] else (this->*comp)(); - Comp_SpecialBranchBehaviour(); + Comp_SpecialBranchBehaviour(true); if (CurInstr.Cond() < 0xE) { @@ -485,15 +522,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] Comp_AddCycles_C(true); - if (CurInstr.BranchFlags & branch_FollowCondTaken) - { - RegCache.PrepareExit(); - SaveCPSR(false); - - MOV(32, R(RAX), Imm32(ConstantCycles)); - ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8); - RET(); - } + Comp_SpecialBranchBehaviour(false); SetJumpTarget(skipFailed); } @@ -504,17 +533,38 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] } } - if (comp == NULL && i != instrsCount - 1) + if (comp == NULL) LoadCPSR(); } RegCache.Flush(); - SaveCPSR(); - MOV(32, R(RAX), Imm32(ConstantCycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + + if (Config::JIT_BrancheOptimisations == 2 + && !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch) + && (!instrs[instrsCount - 1].Info.Branches() + || instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken + || (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget))) + { + FixupBranch ret = J_CC(CC_S); + CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0)); + FixupBranch ret2 = J_CC(CC_NZ); + + u8* rewritePart = GetWritableCodePtr(); + NOP(5); + + MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart)); + JMP((u8*)BranchStub[Num], true); - ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8); - RET(); + SetJumpTarget(ret); + SetJumpTarget(ret2); + JMP((u8*)ARM_Ret, true); + } + else + { + JMP((u8*)ARM_Ret, true); + } /*FILE* codeout = fopen("codeout", "a"); fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr); @@ -525,6 +575,22 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[] return res; } +void Compiler::LinkBlock(u32 offset, JitBlockEntry entry) +{ + u8* curPtr = GetWritableCodePtr(); + SetCodePtr(ResetStart + offset); + JMP((u8*)entry, true); + SetCodePtr(curPtr); +} + +void Compiler::UnlinkBlock(u32 offset) +{ + u8* curPtr = GetWritableCodePtr(); + SetCodePtr(ResetStart + offset); + NOP(5); + SetCodePtr(curPtr); +} + void Compiler::Comp_AddCycles_C(bool forceNonConstant) { s32 cycles = Num ? @@ -532,7 +598,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -544,7 +610,7 @@ void Compiler::Comp_AddCycles_CI(u32 i) : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; if (!Thumb && CurInstr.Cond() < 0xE) - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -558,12 +624,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) if (!Thumb && CurInstr.Cond() < 0xE) { LEA(32, RSCRATCH, MDisp(i, add + cycles)); - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); } else { ConstantCycles += i + cycles; - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); } } @@ -599,7 +665,7 @@ void Compiler::Comp_AddCycles_CDI() } if (!Thumb && CurInstr.Cond() < 0xE) - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } @@ -643,7 +709,7 @@ void Compiler::Comp_AddCycles_CD() } if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) - ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 2cb57dc..b428c33 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -51,7 +51,10 @@ public: void Reset(); - JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); + void LinkBlock(u32 offset, JitBlockEntry entry); + void UnlinkBlock(u32 offset); + + JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); void LoadReg(int reg, Gen::X64Reg nativeReg); void SaveReg(int reg, Gen::X64Reg nativeReg); @@ -145,7 +148,7 @@ public: void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); - void Comp_SpecialBranchBehaviour(); + void Comp_SpecialBranchBehaviour(bool taken); void* Gen_MemoryRoutine9(bool store, int size); @@ -176,12 +179,24 @@ public: return Gen::R(RegCache.Mapping[reg]); } + JitBlockEntry AddEntryOffset(u32 offset) + { + return (JitBlockEntry)(ResetStart + offset); + } + + u32 SubEntryOffset(JitBlockEntry entry) + { + return (u8*)entry - ResetStart; + } + u8* ResetStart; u32 CodeMemSize; bool Exit; bool IrregularCycles; + void* BranchStub[2]; + void* MemoryFuncs9[3][2]; void* MemoryFuncs7[3][2]; diff --git a/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp new file mode 100644 index 0000000..9696d22 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp @@ -0,0 +1,15 @@ +#include "../ARM.h" + +int main(int argc, char* argv[]) +{ + FILE* f = fopen("ARMJIT_Offsets.h", "w"); +#define writeOffset(field) \ + fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field)) + + writeOffset(CPSR); + writeOffset(Cycles); + writeOffset(StopExecution); + + fclose(f); + return 0; +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s new file mode 100644 index 0000000..dbbb024 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Linkage.s @@ -0,0 +1,74 @@ +.intel_syntax noprefix + +#include "ARMJIT_Offsets.h" + +.text + +#define RCPU rbp +#define RCPSR r15d + +#ifdef WIN64 +#define ARG1_REG ecx +#define ARG2_REG edx +#define ARG3_REG r8d +#define ARG4_REG r9d +#define ARG1_REG64 rcx +#define ARG2_REG64 rdx +#define ARG3_REG64 r8 +#define ARG4_REG64 r9 +#else +#define ARG1_REG edi +#define ARG2_REG esi +#define ARG3_REG edx +#define ARG4_REG ecx +#define ARG1_REG64 rdi +#define ARG2_REG64 rsi +#define ARG3_REG64 rdx +#define ARG4_REG64 rcx +#endif + +.p2align 4,,15 + +.global ARM_Dispatch +ARM_Dispatch: +#ifdef WIN64 + push rdi + push rsi +#endif + push rbx + push r12 + push r13 + push r14 + push r15 + push rbp + +#ifdef WIN64 + sub rsp, 0x28 +#endif + mov RCPU, ARG1_REG64 + mov RCPSR, [RCPU + ARM_CPSR_offset] + + jmp ARG2_REG64 + +.p2align 4,,15 + +.global ARM_Ret +ARM_Ret: + mov [RCPU + ARM_CPSR_offset], RCPSR + +#ifdef WIN64 + add rsp, 0x28 +#endif + + pop rbp + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx +#ifdef WIN64 + pop rsi + pop rdi +#endif + + ret diff --git a/src/ARMJIT_x64/ARMJIT_Offsets.h b/src/ARMJIT_x64/ARMJIT_Offsets.h new file mode 100644 index 0000000..a73dd59 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Offsets.h @@ -0,0 +1,3 @@ +#define ARM_CPSR_offset 0x64 +#define ARM_Cycles_offset 0xc +#define ARM_StopExecution_offset 0x10 |