aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_x64
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2020-04-25 13:40:51 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-04-26 13:05:17 +0200
commit3787bab1f69ae22d3e8106d70598ce923e5efe70 (patch)
tree33f1f2133832cc75e4d82b0c75c4268441c6c0cf /src/ARMJIT_x64
parent5d0f244f3c86c2b1c65566bffa3972ae1dbac27b (diff)
implement block linking + some refactoring
currently only supported for x64
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r--src/ARMJIT_x64/ARMJIT_Branch.cpp23
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp140
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h19
-rw-r--r--src/ARMJIT_x64/ARMJIT_GenOffsets.cpp15
-rw-r--r--src/ARMJIT_x64/ARMJIT_Linkage.s74
-rw-r--r--src/ARMJIT_x64/ARMJIT_Offsets.h3
6 files changed, 220 insertions, 54 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp
index e02865d..cac590a 100644
--- a/src/ARMJIT_x64/ARMJIT_Branch.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp
@@ -127,7 +127,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
@@ -135,7 +135,7 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
IrregularCycles = true;
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
- bool previouslyDirty = CPSRDirty;
+ bool cpsrDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
@@ -168,9 +168,10 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
LoadReg(reg, RegCache.Mapping[reg]);
}
- if (previouslyDirty)
- LoadCPSR();
- CPSRDirty = previouslyDirty;
+ LoadCPSR();
+ // in case this instruction is skipped
+ if (CurInstr.Cond() < 0xE)
+ CPSRDirty = cpsrDirty;
}
void Compiler::A_Comp_BranchImm()
@@ -209,20 +210,12 @@ void Compiler::T_Comp_BCOND()
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
- Comp_SpecialBranchBehaviour();
+ Comp_SpecialBranchBehaviour(true);
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
- if (CurInstr.BranchFlags & branch_FollowCondTaken)
- {
- RegCache.PrepareExit();
- SaveCPSR(false);
-
- MOV(32, R(RAX), Imm32(ConstantCycles));
- ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
- RET();
- }
+ Comp_SpecialBranchBehaviour(false);
Comp_AddCycles_C(true);
SetJumpTarget(skipFailed);
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index d69bdff..be3709e 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -1,6 +1,7 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
+#include "../Config.h"
#include <assert.h>
@@ -15,6 +16,8 @@
using namespace Gen;
+extern "C" void ARM_Ret();
+
namespace ARMJIT
{
template <>
@@ -170,6 +173,24 @@ Compiler::Compiler()
RET();
}
+ {
+ CPSRDirty = true;
+ BranchStub[0] = GetWritableCodePtr();
+ SaveCPSR();
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ CALL((u8*)ARMJIT::LinkBlock<0>);
+ LoadCPSR();
+ JMP((u8*)ARM_Ret, true);
+
+ CPSRDirty = true;
+ BranchStub[1] = GetWritableCodePtr();
+ SaveCPSR();
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ CALL((u8*)ARMJIT::LinkBlock<1>);
+ LoadCPSR();
+ JMP((u8*)ARM_Ret, true);
+ }
+
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
@@ -362,23 +383,43 @@ void Compiler::Reset()
SetCodePtr(ResetStart);
}
-void Compiler::Comp_SpecialBranchBehaviour()
+void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
- if (CurInstr.BranchFlags & branch_IdleBranch)
- OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
+ if (taken && CurInstr.BranchFlags & branch_IdleBranch)
+ OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
- if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
+ if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
+ || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
RegCache.PrepareExit();
- SaveCPSR(false);
-
- MOV(32, R(RAX), Imm32(ConstantCycles));
- ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
- RET();
+
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
+
+ if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
+ && (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
+ {
+ FixupBranch ret = J_CC(CC_S);
+ CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
+ FixupBranch ret2 = J_CC(CC_NZ);
+
+ u8* rewritePart = GetWritableCodePtr();
+ NOP(5);
+
+ MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
+ JMP((u8*)BranchStub[Num], true);
+
+ SetJumpTarget(ret);
+ SetJumpTarget(ret2);
+ JMP((u8*)ARM_Ret, true);
+ }
+ else
+ {
+ JMP((u8*)&ARM_Ret, true);
+ }
}
}
-JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
+JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
ResetBlockCache();
@@ -388,15 +429,11 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
Num = cpu->Num;
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
+ // CPSR might have been modified in a previous block
+ CPSRDirty = Config::JIT_BrancheOptimisations == 2;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
- ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
-
- MOV(64, R(RCPU), ImmPtr(cpu));
-
- LoadCPSR();
-
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
for (int i = 0; i < instrsCount; i++)
@@ -474,7 +511,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
else
(this->*comp)();
- Comp_SpecialBranchBehaviour();
+ Comp_SpecialBranchBehaviour(true);
if (CurInstr.Cond() < 0xE)
{
@@ -485,15 +522,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
Comp_AddCycles_C(true);
- if (CurInstr.BranchFlags & branch_FollowCondTaken)
- {
- RegCache.PrepareExit();
- SaveCPSR(false);
-
- MOV(32, R(RAX), Imm32(ConstantCycles));
- ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
- RET();
- }
+ Comp_SpecialBranchBehaviour(false);
SetJumpTarget(skipFailed);
}
@@ -504,17 +533,38 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
}
}
- if (comp == NULL && i != instrsCount - 1)
+ if (comp == NULL)
LoadCPSR();
}
RegCache.Flush();
- SaveCPSR();
- MOV(32, R(RAX), Imm32(ConstantCycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
+
+ if (Config::JIT_BrancheOptimisations == 2
+ && !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
+ && (!instrs[instrsCount - 1].Info.Branches()
+ || instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
+ || (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
+ {
+ FixupBranch ret = J_CC(CC_S);
+ CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
+ FixupBranch ret2 = J_CC(CC_NZ);
+
+ u8* rewritePart = GetWritableCodePtr();
+ NOP(5);
+
+ MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
+ JMP((u8*)BranchStub[Num], true);
- ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
- RET();
+ SetJumpTarget(ret);
+ SetJumpTarget(ret2);
+ JMP((u8*)ARM_Ret, true);
+ }
+ else
+ {
+ JMP((u8*)ARM_Ret, true);
+ }
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
@@ -525,6 +575,22 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
return res;
}
+void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
+{
+ u8* curPtr = GetWritableCodePtr();
+ SetCodePtr(ResetStart + offset);
+ JMP((u8*)entry, true);
+ SetCodePtr(curPtr);
+}
+
+void Compiler::UnlinkBlock(u32 offset)
+{
+ u8* curPtr = GetWritableCodePtr();
+ SetCodePtr(ResetStart + offset);
+ NOP(5);
+ SetCodePtr(curPtr);
+}
+
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
@@ -532,7 +598,7 @@ void Compiler::Comp_AddCycles_C(bool forceNonConstant)
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@@ -544,7 +610,7 @@ void Compiler::Comp_AddCycles_CI(u32 i)
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (!Thumb && CurInstr.Cond() < 0xE)
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@@ -558,12 +624,12 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
if (!Thumb && CurInstr.Cond() < 0xE)
{
LEA(32, RSCRATCH, MDisp(i, add + cycles));
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
}
else
{
ConstantCycles += i + cycles;
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
}
}
@@ -599,7 +665,7 @@ void Compiler::Comp_AddCycles_CDI()
}
if (!Thumb && CurInstr.Cond() < 0xE)
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
@@ -643,7 +709,7 @@ void Compiler::Comp_AddCycles_CD()
}
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
- ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index 2cb57dc..b428c33 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -51,7 +51,10 @@ public:
void Reset();
- JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
+ void LinkBlock(u32 offset, JitBlockEntry entry);
+ void UnlinkBlock(u32 offset);
+
+ JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
@@ -145,7 +148,7 @@ public:
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
- void Comp_SpecialBranchBehaviour();
+ void Comp_SpecialBranchBehaviour(bool taken);
void* Gen_MemoryRoutine9(bool store, int size);
@@ -176,12 +179,24 @@ public:
return Gen::R(RegCache.Mapping[reg]);
}
+ JitBlockEntry AddEntryOffset(u32 offset)
+ {
+ return (JitBlockEntry)(ResetStart + offset);
+ }
+
+ u32 SubEntryOffset(JitBlockEntry entry)
+ {
+ return (u8*)entry - ResetStart;
+ }
+
u8* ResetStart;
u32 CodeMemSize;
bool Exit;
bool IrregularCycles;
+ void* BranchStub[2];
+
void* MemoryFuncs9[3][2];
void* MemoryFuncs7[3][2];
diff --git a/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
new file mode 100644
index 0000000..9696d22
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
@@ -0,0 +1,15 @@
+#include "../ARM.h"
+
+int main(int argc, char* argv[])
+{
+ FILE* f = fopen("ARMJIT_Offsets.h", "w");
+#define writeOffset(field) \
+ fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
+
+ writeOffset(CPSR);
+ writeOffset(Cycles);
+ writeOffset(StopExecution);
+
+ fclose(f);
+ return 0;
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s
new file mode 100644
index 0000000..dbbb024
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Linkage.s
@@ -0,0 +1,74 @@
+.intel_syntax noprefix
+
+#include "ARMJIT_Offsets.h"
+
+.text
+
+#define RCPU rbp
+#define RCPSR r15d
+
+#ifdef WIN64
+#define ARG1_REG ecx
+#define ARG2_REG edx
+#define ARG3_REG r8d
+#define ARG4_REG r9d
+#define ARG1_REG64 rcx
+#define ARG2_REG64 rdx
+#define ARG3_REG64 r8
+#define ARG4_REG64 r9
+#else
+#define ARG1_REG edi
+#define ARG2_REG esi
+#define ARG3_REG edx
+#define ARG4_REG ecx
+#define ARG1_REG64 rdi
+#define ARG2_REG64 rsi
+#define ARG3_REG64 rdx
+#define ARG4_REG64 rcx
+#endif
+
+.p2align 4,,15
+
+.global ARM_Dispatch
+ARM_Dispatch:
+#ifdef WIN64
+ push rdi
+ push rsi
+#endif
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbp
+
+#ifdef WIN64
+ sub rsp, 0x28
+#endif
+ mov RCPU, ARG1_REG64
+ mov RCPSR, [RCPU + ARM_CPSR_offset]
+
+ jmp ARG2_REG64
+
+.p2align 4,,15
+
+.global ARM_Ret
+ARM_Ret:
+ mov [RCPU + ARM_CPSR_offset], RCPSR
+
+#ifdef WIN64
+ add rsp, 0x28
+#endif
+
+ pop rbp
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+#ifdef WIN64
+ pop rsi
+ pop rdi
+#endif
+
+ ret
diff --git a/src/ARMJIT_x64/ARMJIT_Offsets.h b/src/ARMJIT_x64/ARMJIT_Offsets.h
new file mode 100644
index 0000000..a73dd59
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Offsets.h
@@ -0,0 +1,3 @@
+#define ARM_CPSR_offset 0x64
+#define ARM_Cycles_offset 0xc
+#define ARM_StopExecution_offset 0x10