aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_A64/ARMJIT_Compiler.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMJIT_A64/ARMJIT_Compiler.cpp')
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp383
1 files changed, 272 insertions, 111 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
index a67f357..42435ed 100644
--- a/src/ARMJIT_A64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -1,9 +1,3 @@
-#include "ARMJIT_Compiler.h"
-
-#include "../ARMInterpreter.h"
-
-#include "../ARMJIT_Internal.h"
-
#ifdef __SWITCH__
#include "../switch/compat_switch.h"
@@ -13,10 +7,17 @@ extern char __start__;
#include <unistd.h>
#endif
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMInterpreter.h"
+#include "../Config.h"
+
#include <malloc.h>
using namespace Arm64Gen;
+extern "C" void ARM_Ret();
namespace ARMJIT
{
@@ -28,7 +29,10 @@ namespace ARMJIT
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
- */
+
+ While writing a JIT when an instruction is recompiled into multiple ones
+ not to write back until you've read all the other operands!
+*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
@@ -46,6 +50,132 @@ void Compiler::MovePC()
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
+void Compiler::A_Comp_MRS()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+ MOV(rd, W3);
+ }
+ else
+ MOV(rd, RCPSR);
+}
+
+void Compiler::A_Comp_MSR()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg val;
+ if (CurInstr.Instr & (1 << 25))
+ {
+ val = W0;
+ MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
+ }
+ else
+ {
+ val = MapReg(CurInstr.A_Reg(0));
+ }
+
+ u32 mask = 0;
+ if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
+ if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
+ if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
+ if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+
+ MOVI2R(W1, mask);
+ MOVI2R(W2, mask & 0xFFFFFF00);
+ ANDI2R(W5, RCPSR, 0x1F);
+ CMP(W5, 0x10);
+ CSEL(W1, W2, W1, CC_EQ);
+
+ BIC(W3, W3, W1);
+ AND(W0, val, W1);
+ ORR(W3, W3, W0);
+
+ MOVI2R(W1, 15 - 8);
+
+ BL(WriteBanked);
+ }
+ else
+ {
+ mask &= 0xFFFFFFDF;
+ CPSRDirty = true;
+
+ if ((mask & 0xFF) == 0)
+ {
+ ANDI2R(RCPSR, RCPSR, ~mask);
+ ANDI2R(W0, val, mask);
+ ORR(RCPSR, RCPSR, W0);
+ }
+ else
+ {
+ MOVI2R(W2, mask);
+ MOVI2R(W3, mask & 0xFFFFFF00);
+ ANDI2R(W1, RCPSR, 0x1F);
+ // W1 = first argument
+ CMP(W1, 0x10);
+ CSEL(W2, W3, W2, CC_EQ);
+
+ BIC(RCPSR, RCPSR, W2);
+ AND(W0, val, W2);
+ ORR(RCPSR, RCPSR, W0);
+
+ MOV(W2, RCPSR);
+ MOV(X0, RCPU);
+
+ PushRegs(true);
+
+ QuickCallFunction(X3, (void*)&ARM::UpdateMode);
+
+ PopRegs(true);
+ }
+ }
+}
+
+void Compiler::PushRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ if (Thumb || CurInstr.Cond() == 0xE)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsLoaded)
+ RegCache.UnloadRegister(reg);
+ }
+ else
+ {
+ BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsDirty)
+ SaveReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+}
+
+void Compiler::PopRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+
+ for (int reg : hiRegsLoaded)
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+}
+
Compiler::Compiler()
{
#ifdef __SWITCH__
@@ -80,8 +210,7 @@ Compiler::Compiler()
assert(succeded);
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
- JitMemUseableSize = JitMemSize;
- Reset();
+ JitMemMainSize = JitMemSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
@@ -90,31 +219,8 @@ Compiler::Compiler()
SetCodeBase(pageAligned, pageAligned);
JitMemUseableSize = alignedSize;
- Reset();
#endif
-
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 2; j++)
- {
- MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
- }
- }
- MemFunc7[0][0] = (void*)NDS::ARM7Read8;
- MemFunc7[1][0] = (void*)NDS::ARM7Read16;
- MemFunc7[2][0] = (void*)NDS::ARM7Read32;
- MemFunc7[0][1] = (void*)NDS::ARM7Write8;
- MemFunc7[1][1] = (void*)NDS::ARM7Write16;
- MemFunc7[2][1] = (void*)NDS::ARM7Write32;
-
- for (int i = 0; i < 2; i++)
- {
- for (int j = 0; j < 2; j++)
- {
- MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
- MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
- }
- }
+ SetCodePtr(0);
for (int i = 0; i < 3; i++)
{
@@ -123,26 +229,26 @@ Compiler::Compiler()
}
/*
- W0 - mode
+ W5 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
- CMP(W0, 0x11);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
- CMP(W0, 0x12);
+ CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
- CMP(W0, 0x13);
+ CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
- CMP(W0, 0x17);
+ CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
- CMP(W0, 0x1B);
+ CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
@@ -166,19 +272,19 @@ Compiler::Compiler()
{
WriteBanked = GetRXPtr();
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
- CMP(W0, 0x11);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
- ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
- CMP(W0, 0x12);
+ CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
- CMP(W0, 0x13);
+ CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
- CMP(W0, 0x17);
+ CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
- CMP(W0, 0x1B);
+ CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
@@ -206,9 +312,71 @@ Compiler::Compiler()
RET();
}
- //FlushIcache();
+ for (int num = 0; num < 2; num++)
+ {
+ for (int size = 0; size < 3; size++)
+ {
+ for (int reg = 0; reg < 8; reg++)
+ {
+ ARM64Reg rdMapped = (ARM64Reg)(W19 + reg);
+ PatchedStoreFuncs[num][size][reg] = GetRXPtr();
+ if (num == 0)
+ {
+ MOV(X1, RCPU);
+ MOV(W2, rdMapped);
+ }
+ else
+ {
+ MOV(W1, rdMapped);
+ }
+ ABI_PushRegisters({30});
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
+ case 33: QuickCallFunction(X3, SlowWrite7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
+ case 17: QuickCallFunction(X3, SlowWrite7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
+ case 9: QuickCallFunction(X3, SlowWrite7<u8>); break;
+ }
+ ABI_PopRegisters({30});
+ RET();
+
+ for (int signextend = 0; signextend < 2; signextend++)
+ {
+ PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr();
+ if (num == 0)
+ MOV(X1, RCPU);
+ ABI_PushRegisters({30});
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
+ case 33: QuickCallFunction(X3, SlowRead7<u32>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
+ case 17: QuickCallFunction(X3, SlowRead7<u16>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
+ case 9: QuickCallFunction(X3, SlowRead7<u8>); break;
+ }
+ ABI_PopRegisters({30});
+ if (size == 32)
+ MOV(rdMapped, W0);
+ else if (signextend)
+ SBFX(rdMapped, W0, 0, 8 << size);
+ else
+ UBFX(rdMapped, W0, 0, 8 << size);
+ RET();
+ }
+ }
+ }
+ }
+
+ FlushIcache();
+
+ JitMemSecondarySize = 1024*1024*4;
+
+ JitMemMainSize -= GetCodeOffset();
+ JitMemMainSize -= JitMemSecondarySize;
- JitMemUseableSize -= GetCodeOffset();
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
@@ -227,6 +395,16 @@ Compiler::~Compiler()
#endif
}
+void Compiler::LoadCycles()
+{
+ LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
+void Compiler::SaveCycles()
+{
+ STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
@@ -325,7 +503,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
- F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
+ F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
@@ -356,7 +534,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
- NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
&Compiler::Nop
};
#undef F
@@ -404,29 +582,34 @@ bool Compiler::CanCompile(bool thumb, u16 kind)
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
-void Compiler::Comp_BranchSpecialBehaviour()
+void Compiler::Comp_BranchSpecialBehaviour(bool taken)
{
- if (CurInstr.BranchFlags & branch_IdleBranch)
+ if (taken && CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
- if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
+ if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
+ || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
- SaveCPSR(false);
RegCache.PrepareExit();
- ADD(W0, RCycles, ConstantCycles);
- ABI_PopRegisters(SavedRegs);
- RET();
+
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
- if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
+ if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
+ {
+ printf("JIT near memory full, resetting...\n");
+ ResetBlockCache();
+ }
+ if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
{
- printf("JIT memory full, resetting...\n");
+ printf("JIT far memory full, resetting...\n");
ResetBlockCache();
}
@@ -437,21 +620,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
-
- //printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
- const u32 ALL_CALLEE_SAVED = 0x7FF80000;
-
- SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
-
- //if (Num == 1)
- {
- ABI_PushRegisters(SavedRegs);
-
- MOVP2R(RCPU, CurCPU);
- MOVI2R(RCycles, 0);
-
- LoadCPSR();
- }
+ CPSRDirty = false;
for (int i = 0; i < instrsCount; i++)
{
@@ -486,6 +655,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (comp == NULL)
{
+ SaveCycles();
SaveCPSR();
RegCache.Flush();
}
@@ -535,25 +705,18 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
(this->*comp)();
}
- Comp_BranchSpecialBehaviour();
+ Comp_BranchSpecialBehaviour(true);
if (cond < 0xE)
{
- if (IrregularCycles)
+ if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C();
- if (CurInstr.BranchFlags & branch_FollowCondTaken)
- {
- SaveCPSR(false);
- RegCache.PrepareExit();
- ADD(W0, RCycles, ConstantCycles);
- ABI_PopRegisters(SavedRegs);
- RET();
- }
+ Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipNop);
}
@@ -565,76 +728,74 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
}
if (comp == NULL)
+ {
+ LoadCycles();
LoadCPSR();
+ }
}
RegCache.Flush();
- //if (Num == 1)
- {
- SaveCPSR();
-
- ADD(W0, RCycles, ConstantCycles);
-
- ABI_PopRegisters(SavedRegs);
- }
- //else
- // ADD(RCycles, RCycles, ConstantCycles);
-
- RET();
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
FlushIcache();
- //printf("finished\n");
-
return res;
}
void Compiler::Reset()
{
+ LoadStorePatches.clear();
+
SetCodePtr(0);
+ OtherCodeRegion = JitMemMainSize;
const u32 brk_0 = 0xD4200000;
- for (int i = 0; i < JitMemUseableSize / 4; i++)
+ for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
-void Compiler::Comp_AddCycles_C(bool nonConst)
+void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
- if (!nonConst && !CurInstr.Info.Branches())
+ if (forceNonConstant)
ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
+ IrregularCycles = true;
+
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
- if (Thumb || CurInstr.Cond() >= 0xE)
+ if (Thumb || CurInstr.Cond() == 0xE)
ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
+ IrregularCycles = true;
+
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
- ADD(RCycles, RCycles, numI, shift);
+ SUB(RCycles, RCycles, cycles);
if (Thumb || CurInstr.Cond() >= 0xE)
- ConstantCycles += c;
+ ConstantCycles += cycles;
else
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
@@ -671,7 +832,7 @@ void Compiler::Comp_AddCycles_CDI()
}
if (!Thumb && CurInstr.Cond() < 0xE)
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
@@ -715,7 +876,7 @@ void Compiler::Comp_AddCycles_CD()
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
- ADD(RCycles, RCycles, cycles);
+ SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}