aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_x64
diff options
context:
space:
mode:
authorArisotura <thetotalworm@gmail.com>2020-07-01 00:01:11 +0200
committerGitHub <noreply@github.com>2020-07-01 00:01:11 +0200
commit62c6e2f703d88660e0ca9bda78032c5bd6b63a78 (patch)
tree1dbf9eb1bbe418d14f07dc3a0e30821fb5deb258 /src/ARMJIT_x64
parentd97ce22b010e868437c649911bce89d679a4deaa (diff)
parentc5381d2911d47fb1fcbd6ec27a83f5da3606c4bd (diff)
Merge pull request #667 from Arisotura/generic_jit
merge jit
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r--src/ARMJIT_x64/ARMJIT_ALU.cpp768
-rw-r--r--src/ARMJIT_x64/ARMJIT_Branch.cpp272
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp899
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h255
-rw-r--r--src/ARMJIT_x64/ARMJIT_GenOffsets.cpp15
-rw-r--r--src/ARMJIT_x64/ARMJIT_Linkage.s78
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp773
-rw-r--r--src/ARMJIT_x64/ARMJIT_Offsets.h3
8 files changed, 3063 insertions, 0 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp
new file mode 100644
index 0000000..43b94b6
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp
@@ -0,0 +1,768 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Gen;
+
+namespace ARMJIT
+{
+
+// uses RSCRATCH3
+void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&),
+ OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags)
+{
+ if (opFlags & opSyncCarry)
+ {
+ BT(32, R(RCPSR), Imm8(29));
+ if (opFlags & opInvertCarry)
+ CMC();
+ }
+
+ if (rd == rn && !(opFlags & opInvertOp2))
+ (this->*op)(32, rd, op2);
+ else if (opFlags & opSymmetric && op2 == R(RSCRATCH))
+ {
+ if (opFlags & opInvertOp2)
+ NOT(32, op2);
+ (this->*op)(32, op2, rn);
+ MOV(32, rd, op2);
+ }
+ else
+ {
+ if (opFlags & opInvertOp2)
+ {
+ if (op2 != R(RSCRATCH))
+ {
+ MOV(32, R(RSCRATCH), op2);
+ op2 = R(RSCRATCH);
+ }
+ NOT(32, op2);
+ }
+ MOV(32, R(RSCRATCH3), rn);
+ (this->*op)(32, R(RSCRATCH3), op2);
+ MOV(32, rd, R(RSCRATCH3));
+ }
+
+ if (opFlags & opSetsFlags)
+ Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
+}
+
+void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
+ Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags)
+{
+ if (opFlags & opSyncCarry)
+ {
+ BT(32, R(RCPSR), Imm8(29));
+ if (opFlags & opInvertCarry)
+ CMC();
+ }
+
+ if (op2 != R(RSCRATCH))
+ {
+ MOV(32, R(RSCRATCH), op2);
+ op2 = R(RSCRATCH);
+ }
+ (this->*op)(32, op2, rn);
+ MOV(32, rd, op2);
+
+ if (opFlags & opSetsFlags)
+ Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed);
+}
+
+void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
+{
+ switch (op)
+ {
+ case 0: // TST
+ if (rn.IsImm())
+ {
+ MOV(32, R(RSCRATCH3), rn);
+ rn = R(RSCRATCH3);
+ }
+ TEST(32, rn, op2);
+ break;
+ case 1: // TEQ
+ MOV(32, R(RSCRATCH3), rn);
+ XOR(32, R(RSCRATCH3), op2);
+ break;
+ case 2: // CMP
+ if (rn.IsImm())
+ {
+ MOV(32, R(RSCRATCH3), rn);
+ rn = R(RSCRATCH3);
+ }
+ CMP(32, rn, op2);
+ break;
+ case 3: // CMN
+ MOV(32, R(RSCRATCH3), rn);
+ ADD(32, R(RSCRATCH3), op2);
+ break;
+ }
+
+ Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
+}
+
+// also calculates cycles
+OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
+{
+ if (CurInstr.Instr & (1 << 25))
+ {
+ Comp_AddCycles_C();
+ carryUsed = false;
+ return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
+ }
+ else
+ {
+ S = S && (CurInstr.SetFlags & 0x2);
+
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ if (CurInstr.Instr & (1 << 4))
+ {
+ Comp_AddCycles_CI(1);
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
+ if (rm.IsImm() && CurInstr.A_Reg(0) == 15)
+ rm = Imm32(rm.Imm32() + 4);
+ return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed);
+ }
+ else
+ {
+ Comp_AddCycles_C();
+ return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F,
+ MapReg(CurInstr.A_Reg(0)), S, carryUsed);
+ }
+ }
+}
+
+void Compiler::A_Comp_CmpOp()
+{
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ bool carryUsed;
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+ OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
+
+ Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
+}
+
+void Compiler::A_Comp_Arith()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ bool carryUsed;
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+ OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
+
+ u32 sFlag = S ? opSetsFlags : 0;
+ switch (op)
+ {
+ case 0x0: // AND
+ Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
+ break;
+ case 0x1: // EOR
+ Comp_ArithTriOp(&Compiler::XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
+ break;
+ case 0x2: // SUB
+ Comp_ArithTriOp(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
+ break;
+ case 0x3: // RSB
+ if (op2.IsZero())
+ {
+ if (rd != rn)
+ MOV(32, rd, rn);
+ NEG(32, rd);
+ if (S)
+ Comp_RetriveFlags(true, true, false);
+ }
+ else
+ Comp_ArithTriOpReverse(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
+ break;
+ case 0x4: // ADD
+ Comp_ArithTriOp(&Compiler::ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
+ break;
+ case 0x5: // ADC
+ Comp_ArithTriOp(&Compiler::ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
+ break;
+ case 0x6: // SBC
+ Comp_ArithTriOp(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
+ break;
+ case 0x7: // RSC
+ Comp_ArithTriOpReverse(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
+ break;
+ case 0xC: // ORR
+ Comp_ArithTriOp(&Compiler::OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
+ break;
+ case 0xE: // BIC
+ Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
+ break;
+ default:
+ assert("unimplemented");
+ }
+
+ if (CurInstr.A_Reg(12) == 15)
+ Comp_JumpTo(rd.GetSimpleReg(), S);
+}
+
+void Compiler::A_Comp_MovOp()
+{
+ bool carryUsed;
+ bool S = CurInstr.Instr & (1 << 20);
+ OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+
+ if (rd != op2)
+ MOV(32, rd, op2);
+
+ if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
+ {
+ NOT(32, rd);
+ if (op2.IsImm() && CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32());
+ }
+ else if (op2.IsImm() && CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32());
+
+ if (S)
+ {
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
+ Comp_RetriveFlags(false, false, carryUsed);
+ }
+
+ if (CurInstr.A_Reg(12) == 15)
+ Comp_JumpTo(rd.GetSimpleReg(), S);
+}
+
+void Compiler::A_Comp_CLZ()
+{
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
+
+ MOV(32, R(RSCRATCH), Imm32(32));
+ TEST(32, rm, rm);
+ FixupBranch skipZero = J_CC(CC_Z);
+ BSR(32, RSCRATCH, rm);
+ XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH
+ SetJumpTarget(skipZero);
+ MOV(32, rd, R(RSCRATCH));
+}
+
+void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn)
+{
+ if (Num == 0)
+ Comp_AddCycles_CI(S ? 3 : 1);
+ else
+ {
+ XOR(32, R(RSCRATCH), R(RSCRATCH));
+ MOV(32, R(RSCRATCH3), rs);
+ TEST(32, R(RSCRATCH3), R(RSCRATCH3));
+ FixupBranch zeroBSR = J_CC(CC_Z);
+ BSR(32, RSCRATCH2, R(RSCRATCH3));
+ NOT(32, R(RSCRATCH3));
+ BSR(32, RSCRATCH, R(RSCRATCH3));
+ CMP(32, R(RSCRATCH2), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
+ SHR(32, R(RSCRATCH), Imm8(3));
+ SetJumpTarget(zeroBSR); // fortunately that's even right
+ Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1);
+ }
+
+ static_assert(EAX == RSCRATCH, "Someone changed RSCRATCH!");
+ MOV(32, R(RSCRATCH), rm);
+ if (add)
+ {
+ IMUL(32, RSCRATCH, rs);
+ LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg()));
+ if (S && FlagsNZRequired())
+ TEST(32, rd, rd);
+ }
+ else
+ {
+ IMUL(32, RSCRATCH, rs);
+ MOV(32, rd, R(RSCRATCH));
+ if (S && FlagsNZRequired())
+ TEST(32, R(RSCRATCH), R(RSCRATCH));
+ }
+
+ if (S)
+ Comp_RetriveFlags(false, false, false);
+}
+
+void Compiler::A_Comp_MUL_MLA()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ bool add = CurInstr.Instr & (1 << 21);
+ OpArg rd = MapReg(CurInstr.A_Reg(16));
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
+ OpArg rs = MapReg(CurInstr.A_Reg(8));
+ OpArg rn;
+ if (add)
+ rn = MapReg(CurInstr.A_Reg(12));
+
+ Comp_MulOp(S, add, rd, rm, rs, rn);
+}
+
+void Compiler::A_Comp_Mul_Long()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ bool add = CurInstr.Instr & (1 << 21);
+ bool sign = CurInstr.Instr & (1 << 22);
+ OpArg rd = MapReg(CurInstr.A_Reg(16));
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
+ OpArg rs = MapReg(CurInstr.A_Reg(8));
+ OpArg rn = MapReg(CurInstr.A_Reg(12));
+
+ if (Num == 0)
+ Comp_AddCycles_CI(S ? 3 : 1);
+ else
+ {
+ XOR(32, R(RSCRATCH), R(RSCRATCH));
+ MOV(32, R(RSCRATCH3), rs);
+ TEST(32, R(RSCRATCH3), R(RSCRATCH3));
+ FixupBranch zeroBSR = J_CC(CC_Z);
+ if (sign)
+ {
+ BSR(32, RSCRATCH2, R(RSCRATCH3));
+ NOT(32, R(RSCRATCH3));
+ BSR(32, RSCRATCH, R(RSCRATCH3));
+ CMP(32, R(RSCRATCH2), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L);
+ }
+ else
+ {
+ BSR(32, RSCRATCH, R(RSCRATCH3));
+ }
+
+ SHR(32, R(RSCRATCH), Imm8(3));
+ SetJumpTarget(zeroBSR); // fortunately that's even right
+ Comp_AddCycles_CI(RSCRATCH, 2);
+ }
+
+ if (sign)
+ {
+ MOVSX(64, 32, RSCRATCH2, rm);
+ MOVSX(64, 32, RSCRATCH3, rs);
+ }
+ else
+ {
+ MOV(32, R(RSCRATCH2), rm);
+ MOV(32, R(RSCRATCH3), rs);
+ }
+ if (add)
+ {
+ MOV(32, R(RSCRATCH), rd);
+ SHL(64, R(RSCRATCH), Imm8(32));
+ OR(64, R(RSCRATCH), rn);
+
+ IMUL(64, RSCRATCH2, R(RSCRATCH3));
+ ADD(64, R(RSCRATCH2), R(RSCRATCH));
+ }
+ else
+ {
+ IMUL(64, RSCRATCH2, R(RSCRATCH3));
+ if (S && FlagsNZRequired())
+ TEST(64, R(RSCRATCH2), R(RSCRATCH2));
+ }
+
+ if (S)
+ Comp_RetriveFlags(false, false, false);
+
+ MOV(32, rn, R(RSCRATCH2));
+ SHR(64, R(RSCRATCH2), Imm8(32));
+ MOV(32, rd, R(RSCRATCH2));
+}
+
+void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
+{
+ if (CurInstr.SetFlags == 0)
+ return;
+ if (retriveCV && !(CurInstr.SetFlags & 0x3))
+ retriveCV = false;
+
+ bool carryOnly = !retriveCV && carryUsed;
+ if (carryOnly && !(CurInstr.SetFlags & 0x2))
+ {
+ carryUsed = false;
+ carryOnly = false;
+ }
+
+ CPSRDirty = true;
+
+ if (retriveCV)
+ {
+ SETcc(CC_O, R(RSCRATCH));
+ SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3));
+ LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0));
+ }
+
+ if (FlagsNZRequired())
+ {
+ SETcc(CC_S, R(RSCRATCH));
+ SETcc(CC_Z, R(RSCRATCH3));
+ LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0));
+ int shiftAmount = 30;
+ if (retriveCV || carryUsed)
+ {
+ LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0));
+ shiftAmount = carryOnly ? 29 : 28;
+ }
+ SHL(32, R(RSCRATCH), Imm8(shiftAmount));
+
+ AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
+ OR(32, R(RCPSR), R(RSCRATCH));
+ }
+ else if (carryUsed || retriveCV)
+ {
+ SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28));
+ AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28)));
+ OR(32, R(RCPSR), R(RSCRATCH2));
+ }
+}
+
+// always uses RSCRATCH, RSCRATCH2 only if S == true
+OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed)
+{
+ carryUsed = S;
+
+ if (S)
+ {
+ XOR(32, R(RSCRATCH2), R(RSCRATCH2));
+ TEST(32, R(RCPSR), Imm32(1 << 29));
+ SETcc(CC_NZ, R(RSCRATCH2));
+ }
+
+ MOV(32, R(RSCRATCH), rm);
+ static_assert(RSCRATCH3 == ECX, "Someone changed RSCRATCH3");
+ MOV(32, R(ECX), rs);
+ AND(32, R(ECX), Imm32(0xFF));
+
+ FixupBranch zero = J_CC(CC_Z);
+ if (op < 3)
+ {
+ void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL;
+ if (op == 0)
+ shiftOp = &Compiler::SHL;
+ else if (op == 1)
+ shiftOp = &Compiler::SHR;
+ else if (op == 2)
+ shiftOp = &Compiler::SAR;
+
+ CMP(32, R(ECX), Imm8(32));
+ FixupBranch lt32 = J_CC(CC_L);
+ FixupBranch done1;
+ if (op < 2)
+ {
+ FixupBranch eq32 = J_CC(CC_E);
+ XOR(32, R(RSCRATCH), R(RSCRATCH));
+ if (S)
+ XOR(32, R(RSCRATCH2), R(RSCRATCH2));
+ done1 = J();
+ SetJumpTarget(eq32);
+ }
+ (this->*shiftOp)(32, R(RSCRATCH), Imm8(31));
+ (this->*shiftOp)(32, R(RSCRATCH), Imm8(1));
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+
+ FixupBranch done2 = J();
+
+ SetJumpTarget(lt32);
+ (this->*shiftOp)(32, R(RSCRATCH), R(ECX));
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+
+ if (op < 2)
+ SetJumpTarget(done1);
+ SetJumpTarget(done2);
+
+ }
+ else if (op == 3)
+ {
+ if (S)
+ BT(32, R(RSCRATCH), Imm8(31));
+ ROR_(32, R(RSCRATCH), R(ECX));
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+ }
+ SetJumpTarget(zero);
+
+ return R(RSCRATCH);
+}
+
+// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue
+OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed)
+{
+ carryUsed = true;
+
+ switch (op)
+ {
+ case 0: // LSL
+ if (amount > 0)
+ {
+ MOV(32, R(RSCRATCH), rm);
+ SHL(32, R(RSCRATCH), Imm8(amount));
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+
+ return R(RSCRATCH);
+ }
+ else
+ {
+ carryUsed = false;
+ return rm;
+ }
+ case 1: // LSR
+ if (amount > 0)
+ {
+ MOV(32, R(RSCRATCH), rm);
+ SHR(32, R(RSCRATCH), Imm8(amount));
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+ return R(RSCRATCH);
+ }
+ else
+ {
+ if (S)
+ {
+ MOV(32, R(RSCRATCH2), rm);
+ SHR(32, R(RSCRATCH2), Imm8(31));
+ }
+ return Imm32(0);
+ }
+ case 2: // ASR
+ MOV(32, R(RSCRATCH), rm);
+ SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
+ if (S)
+ {
+ if (amount == 0)
+ BT(32, rm, Imm8(31));
+ SETcc(CC_C, R(RSCRATCH2));
+ }
+ return R(RSCRATCH);
+ case 3: // ROR
+ MOV(32, R(RSCRATCH), rm);
+ if (amount > 0)
+ ROR_(32, R(RSCRATCH), Imm8(amount));
+ else
+ {
+ BT(32, R(RCPSR), Imm8(29));
+ RCR(32, R(RSCRATCH), Imm8(1));
+ }
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+ return R(RSCRATCH);
+ }
+
+ assert(false);
+}
+
+void Compiler::T_Comp_ShiftImm()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
+
+ int op = (CurInstr.Instr >> 11) & 0x3;
+ int amount = (CurInstr.Instr >> 6) & 0x1F;
+
+ Comp_AddCycles_C();
+
+ bool carryUsed;
+ OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed);
+
+ if (shifted != rd)
+ MOV(32, rd, shifted);
+
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
+ Comp_RetriveFlags(false, false, carryUsed);
+}
+
+void Compiler::T_Comp_AddSub_()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
+
+ int op = (CurInstr.Instr >> 9) & 0x3;
+
+ OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6));
+
+ Comp_AddCycles_C();
+
+ // special case for thumb mov being alias to add rd, rn, #0
+ if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0)
+ {
+ if (rd != rs)
+ MOV(32, rd, rs);
+ }
+ else if (op & 1)
+ Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
+ else
+ Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
+}
+
+void Compiler::T_Comp_ALU_Imm8()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(8));
+
+ u32 op = (CurInstr.Instr >> 11) & 0x3;
+ OpArg imm = Imm32(CurInstr.Instr & 0xFF);
+
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0x0:
+ MOV(32, rd, imm);
+ if (FlagsNZRequired())
+ TEST(32, rd, rd);
+ Comp_RetriveFlags(false, false, false);
+ return;
+ case 0x1:
+ Comp_CmpOp(2, rd, imm, false);
+ return;
+ case 0x2:
+ Comp_ArithTriOp(&Compiler::ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
+ return;
+ case 0x3:
+ Comp_ArithTriOp(&Compiler::SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
+ return;
+ }
+}
+
+void Compiler::T_Comp_MUL()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
+ Comp_MulOp(true, false, rd, rd, rs, Imm8(-1));
+}
+
+void Compiler::T_Comp_ALU()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
+
+ u32 op = (CurInstr.Instr >> 6) & 0xF;
+
+ if ((op >= 0x2 && op < 0x4) || op == 0x7)
+ Comp_AddCycles_CI(1); // shift by reg
+ else
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0x0: // AND
+ Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric);
+ return;
+ case 0x1: // EOR
+ Comp_ArithTriOp(&Compiler::XOR, rd, rd, rs, false, opSetsFlags|opSymmetric);
+ return;
+ case 0x2:
+ case 0x3:
+ case 0x4:
+ case 0x7:
+ {
+ int shiftOp = op == 0x7 ? 3 : op - 0x2;
+ bool carryUsed;
+ OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed);
+ if (FlagsNZRequired())
+ TEST(32, shifted, shifted);
+ MOV(32, rd, shifted);
+ Comp_RetriveFlags(false, false, true);
+ }
+ return;
+ case 0x5: // ADC
+ Comp_ArithTriOp(&Compiler::ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV);
+ return;
+ case 0x6: // SBC
+ Comp_ArithTriOp(&Compiler::SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV);
+ return;
+ case 0x8: // TST
+ Comp_CmpOp(0, rd, rs, false);
+ return;
+ case 0x9: // NEG
+ if (rd != rs)
+ MOV(32, rd, rs);
+ NEG(32, rd);
+ Comp_RetriveFlags(true, true, false);
+ return;
+ case 0xA: // CMP
+ Comp_CmpOp(2, rd, rs, false);
+ return;
+ case 0xB: // CMN
+ Comp_CmpOp(3, rd, rs, false);
+ return;
+ case 0xC: // ORR
+ Comp_ArithTriOp(&Compiler::OR, rd, rd, rs, false, opSetsFlags|opSymmetric);
+ return;
+ case 0xE: // BIC
+ Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2);
+ return;
+ case 0xF: // MVN
+ if (rd != rs)
+ MOV(32, rd, rs);
+ NOT(32, rd);
+ Comp_RetriveFlags(false, false, false);
+ return;
+ default:
+ break;
+ }
+}
+
+void Compiler::T_Comp_ALU_HiReg()
+{
+ u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
+ OpArg rdMapped = MapReg(rd);
+ OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
+
+ u32 op = (CurInstr.Instr >> 8) & 0x3;
+
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0x0: // ADD
+ Comp_ArithTriOp(&Compiler::ADD, rdMapped, rdMapped, rs, false, opSymmetric);
+ break;
+ case 0x1: // CMP
+ Comp_CmpOp(2, rdMapped, rs, false);
+ return; // this is on purpose
+ case 0x2: // MOV
+ if (rdMapped != rs)
+ MOV(32, rdMapped, rs);
+ break;
+ }
+
+ if (rd == 15)
+ {
+ OR(32, rdMapped, Imm8(1));
+ Comp_JumpTo(rdMapped.GetSimpleReg());
+ }
+}
+
+void Compiler::T_Comp_AddSP()
+{
+ Comp_AddCycles_C();
+
+ OpArg sp = MapReg(13);
+ OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2);
+ if (CurInstr.Instr & (1 << 7))
+ SUB(32, sp, offset);
+ else
+ ADD(32, sp, offset);
+}
+
+void Compiler::T_Comp_RelAddr()
+{
+ Comp_AddCycles_C();
+
+ OpArg rd = MapReg(CurInstr.T_Reg(8));
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ if (CurInstr.Instr & (1 << 11))
+ {
+ OpArg sp = MapReg(13);
+ LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset));
+ }
+ else
+ MOV(32, rd, Imm32((R15 & ~2) + offset));
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp
new file mode 100644
index 0000000..bda9e52
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp
@@ -0,0 +1,272 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Gen;
+
+namespace ARMJIT
+{
+
+template <typename T>
+int squeezePointer(T* ptr)
+{
+ int truncated = (int)((u64)ptr);
+ assert((T*)((u64)truncated) == ptr);
+ return truncated;
+}
+
+void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
+{
+ // we can simplify constant branches by a lot
+ IrregularCycles = true;
+
+ u32 newPC;
+ u32 cycles = 0;
+
+ if (addr & 0x1 && !Thumb)
+ {
+ CPSRDirty = true;
+ OR(32, R(RCPSR), Imm8(0x20));
+ }
+ else if (!(addr & 0x1) && Thumb)
+ {
+ CPSRDirty = true;
+ AND(32, R(RCPSR), Imm32(~0x20));
+ }
+
+ if (Num == 0)
+ {
+ ARMv5* cpu9 = (ARMv5*)CurCPU;
+
+ u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
+ u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
+ cpu9->RegionCodeCycles = regionCodeCycles;
+
+ if (Exit)
+ MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // two-opcodes-at-once fetch
+ // doesn't matter if we put garbage in the MSbs there
+ if (addr & 0x2)
+ {
+ cpu9->CodeRead32(addr-2, true);
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+2, false);
+ cycles += CurCPU->CodeCycles;
+ }
+ else
+ {
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ }
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+4, false);
+ cycles += cpu9->CodeCycles;
+ }
+
+ cpu9->RegionCodeCycles = compileTimeCodeCycles;
+ }
+ else
+ {
+ ARMv4* cpu7 = (ARMv4*)CurCPU;
+
+ u32 codeRegion = addr >> 24;
+ u32 codeCycles = addr >> 15; // cheato
+
+ cpu7->CodeRegion = codeRegion;
+ cpu7->CodeCycles = codeCycles;
+
+ if (Exit)
+ {
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
+ }
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // this is necessary because ARM7 bios protection
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+
+ cpu7->CodeRegion = R15 >> 24;
+ cpu7->CodeCycles = addr >> 15;
+ }
+
+ if (Exit)
+ MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
+ if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
+ ConstantCycles += cycles;
+ else
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+}
+
+void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
+{
+ IrregularCycles = true;
+
+ bool cpsrDirty = CPSRDirty;
+ SaveCPSR();
+
+ PushRegs(restoreCPSR);
+
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ MOV(32, R(ABI_PARAM2), R(addr));
+ if (!restoreCPSR)
+ XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
+ else
+ MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
+ if (Num == 0)
+ CALL((void*)&ARMv5::JumpTo);
+ else
+ CALL((void*)&ARMv4::JumpTo);
+
+ PopRegs(restoreCPSR);
+
+ LoadCPSR();
+ // in case this instruction is skipped
+ if (CurInstr.Cond() < 0xE)
+ CPSRDirty = cpsrDirty;
+}
+
+void Compiler::A_Comp_BranchImm()
+{
+ int op = (CurInstr.Instr >> 24) & 1;
+ s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
+ u32 target = R15 + offset;
+ bool link = op;
+
+ if (CurInstr.Cond() == 0xF) // BLX_imm
+ {
+ target += (op << 1) + 1;
+ link = true;
+ }
+
+ if (link)
+ MOV(32, MapReg(14), Imm32(R15 - 4));
+
+ Comp_JumpTo(target);
+}
+
+void Compiler::A_Comp_BranchXchangeReg()
+{
+ OpArg rn = MapReg(CurInstr.A_Reg(0));
+ MOV(32, R(RSCRATCH), rn);
+ if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
+ MOV(32, MapReg(14), Imm32(R15 - 4));
+ Comp_JumpTo(RSCRATCH);
+}
+
+void Compiler::T_Comp_BCOND()
+{
+ u32 cond = (CurInstr.Instr >> 8) & 0xF;
+ FixupBranch skipExecute = CheckCondition(cond);
+
+ s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
+ Comp_JumpTo(R15 + offset + 1, true);
+
+ Comp_SpecialBranchBehaviour(true);
+
+ FixupBranch skipFailed = J();
+ SetJumpTarget(skipExecute);
+
+ Comp_SpecialBranchBehaviour(false);
+
+ Comp_AddCycles_C(true);
+ SetJumpTarget(skipFailed);
+}
+
+void Compiler::T_Comp_B()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
+ Comp_JumpTo(R15 + offset + 1);
+}
+
+void Compiler::T_Comp_BranchXchangeReg()
+{
+ bool link = CurInstr.Instr & (1 << 7);
+
+ if (link)
+ {
+ if (Num == 1)
+ {
+ printf("BLX unsupported on ARM7!!!\n");
+ return;
+ }
+ MOV(32, R(RSCRATCH), MapReg(CurInstr.A_Reg(3)));
+ MOV(32, MapReg(14), Imm32(R15 - 1));
+ Comp_JumpTo(RSCRATCH);
+ }
+ else
+ {
+ OpArg rn = MapReg(CurInstr.A_Reg(3));
+ Comp_JumpTo(rn.GetSimpleReg());
+ }
+}
+
+void Compiler::T_Comp_BL_LONG_1()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
+ MOV(32, MapReg(14), Imm32(R15 + offset));
+ Comp_AddCycles_C();
+}
+
+void Compiler::T_Comp_BL_LONG_2()
+{
+ OpArg lr = MapReg(14);
+ s32 offset = (CurInstr.Instr & 0x7FF) << 1;
+ LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
+ MOV(32, lr, Imm32((R15 - 2) | 1));
+ if (Num == 1 || CurInstr.Instr & (1 << 12))
+ OR(32, R(RSCRATCH), Imm8(1));
+ Comp_JumpTo(RSCRATCH);
+}
+
+void Compiler::T_Comp_BL_Merged()
+{
+ Comp_AddCycles_C();
+
+ R15 += 2;
+
+ u32 upperPart = CurInstr.Instr >> 16;
+ u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
+ target += (upperPart & 0x7FF) << 1;
+
+ if (Num == 1 || upperPart & (1 << 12))
+ target |= 1;
+
+ MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
+
+ Comp_JumpTo(target);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
new file mode 100644
index 0000000..d8bdd56
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -0,0 +1,899 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMInterpreter.h"
+#include "../Config.h"
+
+#include <assert.h>
+
+#include "../dolphin/CommonFuncs.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+using namespace Gen;
+
+extern "C" void ARM_Ret();
+
+namespace ARMJIT
+{
+template <>
+const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
+{
+#ifdef _WIN32
+ RBX, RSI, RDI, R12, R13, R14, // callee saved
+ R10, R11, // caller saved
+#else
+ RBX, R12, R13, R14, // callee saved, this is sad
+ R9, R10, R11, // caller saved
+#endif
+};
+template <>
+const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
+#ifdef _WIN32
+ 8
+#else
+ 7
+#endif
+;
+
+#ifdef _WIN32
+const BitSet32 CallerSavedPushRegs({R10, R11});
+#else
+const BitSet32 CallerSavedPushRegs({R9, R10, R11});
+#endif
+
+void Compiler::PushRegs(bool saveHiRegs)
+{
+ BitSet32 loadedRegs(RegCache.LoadedRegs);
+
+ if (saveHiRegs)
+ {
+ BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsLoaded)
+ {
+ if (Thumb || CurInstr.Cond() == 0xE)
+ RegCache.UnloadRegister(reg);
+ else
+ SaveReg(reg, RegCache.Mapping[reg]);
+ // prevent saving the register twice
+ loadedRegs[reg] = false;
+ }
+ }
+
+ for (int reg : loadedRegs)
+ if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED)
+ SaveReg(reg, RegCache.Mapping[reg]);
+}
+
+void Compiler::PopRegs(bool saveHiRegs)
+{
+ BitSet32 loadedRegs(RegCache.LoadedRegs);
+ for (int reg : loadedRegs)
+ {
+ if ((saveHiRegs && reg >= 8 && reg < 15)
+ || BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED)
+ {
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+}
+
+void Compiler::A_Comp_MRS()
+{
+ Comp_AddCycles_C();
+
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ XOR(32, R(RSCRATCH3), R(RSCRATCH3));
+ MOV(32, R(RSCRATCH2), Imm32(15 - 8));
+ CALL(ReadBanked);
+ MOV(32, rd, R(RSCRATCH3));
+ }
+ else
+ MOV(32, rd, R(RCPSR));
+}
+
+void Compiler::A_Comp_MSR()
+{
+ Comp_AddCycles_C();
+
+ OpArg val = CurInstr.Instr & (1 << 25)
+ ? Imm32(ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)))
+ : MapReg(CurInstr.A_Reg(0));
+
+ u32 mask = 0;
+ if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
+ if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
+ if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
+ if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ XOR(32, R(RSCRATCH3), R(RSCRATCH3));
+ MOV(32, R(RSCRATCH2), Imm32(15 - 8));
+ CALL(ReadBanked);
+
+ MOV(32, R(RSCRATCH2), Imm32(mask));
+ MOV(32, R(RSCRATCH4), R(RSCRATCH2));
+ AND(32, R(RSCRATCH4), Imm32(0xFFFFFF00));
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ CMP(32, R(RSCRATCH), Imm8(0x10));
+ CMOVcc(32, RSCRATCH2, R(RSCRATCH4), CC_E);
+
+ MOV(32, R(RSCRATCH4), R(RSCRATCH2));
+ NOT(32, R(RSCRATCH4));
+ AND(32, R(RSCRATCH3), R(RSCRATCH4));
+
+ AND(32, R(RSCRATCH2), val);
+ OR(32, R(RSCRATCH3), R(RSCRATCH2));
+
+ MOV(32, R(RSCRATCH2), Imm32(15 - 8));
+ CALL(WriteBanked);
+ }
+ else
+ {
+ mask &= 0xFFFFFFDF;
+ CPSRDirty = true;
+
+ if ((mask & 0xFF) == 0)
+ {
+ AND(32, R(RCPSR), Imm32(~mask));
+ if (!val.IsImm())
+ {
+ MOV(32, R(RSCRATCH), val);
+ AND(32, R(RSCRATCH), Imm32(mask));
+ OR(32, R(RCPSR), R(RSCRATCH));
+ }
+ else
+ {
+ OR(32, R(RCPSR), Imm32(val.Imm32() & mask));
+ }
+ }
+ else
+ {
+ MOV(32, R(RSCRATCH2), Imm32(mask));
+ MOV(32, R(RSCRATCH3), R(RSCRATCH2));
+ AND(32, R(RSCRATCH3), Imm32(0xFFFFFF00));
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ CMP(32, R(RSCRATCH), Imm8(0x10));
+ CMOVcc(32, RSCRATCH2, R(RSCRATCH3), CC_E);
+
+ MOV(32, R(RSCRATCH3), R(RCPSR));
+
+ // I need you ANDN
+ MOV(32, R(RSCRATCH), R(RSCRATCH2));
+ NOT(32, R(RSCRATCH));
+ AND(32, R(RCPSR), R(RSCRATCH));
+
+ AND(32, R(RSCRATCH2), val);
+ OR(32, R(RCPSR), R(RSCRATCH2));
+
+ PushRegs(true);
+
+ MOV(32, R(ABI_PARAM3), R(RCPSR));
+ MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ CALL((void*)&ARM::UpdateMode);
+
+ PopRegs(true);
+ }
+ }
+}
+
+/*
+ We'll repurpose this .bss memory
+
+ */
+u8 CodeMemory[1024 * 1024 * 32];
+
+Compiler::Compiler()
+{
+ {
+ #ifdef _WIN32
+ SYSTEM_INFO sysInfo;
+ GetSystemInfo(&sysInfo);
+
+ u64 pageSize = (u64)sysInfo.dwPageSize;
+ #else
+ u64 pageSize = sysconf(_SC_PAGE_SIZE);
+ #endif
+
+ u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize);
+ u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
+
+ #ifdef _WIN32
+ DWORD dummy;
+ VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
+ #else
+ mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
+ #endif
+
+ ResetStart = pageAligned;
+ CodeMemSize = alignedSize;
+ }
+
+ Reset();
+
+ {
+ // RSCRATCH mode
+ // RSCRATCH2 reg number
+ // RSCRATCH3 value in current mode
+ // ret - RSCRATCH3
+ ReadBanked = (void*)GetWritableCodePtr();
+ CMP(32, R(RSCRATCH), Imm8(0x11));
+ FixupBranch fiq = J_CC(CC_E);
+ SUB(32, R(RSCRATCH2), Imm8(13 - 8));
+ FixupBranch notEverything = J_CC(CC_L);
+ CMP(32, R(RSCRATCH), Imm8(0x12));
+ FixupBranch irq = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x13));
+ FixupBranch svc = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x17));
+ FixupBranch abt = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x1B));
+ FixupBranch und = J_CC(CC_E);
+ SetJumpTarget(notEverything);
+ RET();
+
+ SetJumpTarget(fiq);
+ MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)));
+ RET();
+ SetJumpTarget(irq);
+ MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)));
+ RET();
+ SetJumpTarget(svc);
+ MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)));
+ RET();
+ SetJumpTarget(abt);
+ MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)));
+ RET();
+ SetJumpTarget(und);
+ MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)));
+ RET();
+ }
+ {
+ // RSCRATCH mode
+ // RSCRATCH2 reg n
+ // RSCRATCH3 value
+ // carry flag set if the register isn't banked
+ WriteBanked = (void*)GetWritableCodePtr();
+ CMP(32, R(RSCRATCH), Imm8(0x11));
+ FixupBranch fiq = J_CC(CC_E);
+ SUB(32, R(RSCRATCH2), Imm8(13 - 8));
+ FixupBranch notEverything = J_CC(CC_L);
+ CMP(32, R(RSCRATCH), Imm8(0x12));
+ FixupBranch irq = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x13));
+ FixupBranch svc = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x17));
+ FixupBranch abt = J_CC(CC_E);
+ CMP(32, R(RSCRATCH), Imm8(0x1B));
+ FixupBranch und = J_CC(CC_E);
+ SetJumpTarget(notEverything);
+ STC();
+ RET();
+
+ SetJumpTarget(fiq);
+ MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)), R(RSCRATCH3));
+ CLC();
+ RET();
+ SetJumpTarget(irq);
+ MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)), R(RSCRATCH3));
+ CLC();
+ RET();
+ SetJumpTarget(svc);
+ MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)), R(RSCRATCH3));
+ CLC();
+ RET();
+ SetJumpTarget(abt);
+ MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)), R(RSCRATCH3));
+ CLC();
+ RET();
+ SetJumpTarget(und);
+ MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)), R(RSCRATCH3));
+ CLC();
+ RET();
+ }
+
+ for (int consoleType = 0; consoleType < 2; consoleType++)
+ {
+ for (int num = 0; num < 2; num++)
+ {
+ for (int size = 0; size < 3; size++)
+ {
+ for (int reg = 0; reg < 16; reg++)
+ {
+ if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3)
+ {
+ PatchedStoreFuncs[consoleType][num][size][reg] = NULL;
+ PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL;
+ PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL;
+ continue;
+ }
+
+ X64Reg rdMapped = (X64Reg)reg;
+ PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr();
+ if (RSCRATCH3 != ABI_PARAM1)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (num == 0)
+ {
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ MOV(32, R(ABI_PARAM3), R(rdMapped));
+ }
+ else
+ {
+ MOV(32, R(ABI_PARAM2), R(rdMapped));
+ }
+ ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
+ case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
+ case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
+ case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break;
+ case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break;
+ case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
+ case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
+ case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break;
+ case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break;
+ case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break;
+ case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break;
+ }
+ }
+ ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ RET();
+
+ for (int signextend = 0; signextend < 2; signextend++)
+ {
+ PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr();
+ if (RSCRATCH3 != ABI_PARAM1)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (num == 0)
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
+ case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
+ case 16: ABI_CallFunction(SlowRead9<u16, 0>); break;
+ case 17: ABI_CallFunction(SlowRead7<u16, 0>); break;
+ case 8: ABI_CallFunction(SlowRead9<u8, 0>); break;
+ case 9: ABI_CallFunction(SlowRead7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
+ case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
+ case 16: ABI_CallFunction(SlowRead9<u16, 1>); break;
+ case 17: ABI_CallFunction(SlowRead7<u16, 1>); break;
+ case 8: ABI_CallFunction(SlowRead9<u8, 1>); break;
+ case 9: ABI_CallFunction(SlowRead7<u8, 1>); break;
+ }
+ }
+ ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8);
+ if (signextend)
+ MOVSX(32, 8 << size, rdMapped, R(RSCRATCH));
+ else
+ MOVZX(32, 8 << size, rdMapped, R(RSCRATCH));
+ RET();
+ }
+ }
+ }
+ }
+ }
+
+ // move the region forward to prevent overwriting the generated functions
+ CodeMemSize -= GetWritableCodePtr() - ResetStart;
+ ResetStart = GetWritableCodePtr();
+
+ NearStart = ResetStart;
+ FarStart = ResetStart + 1024*1024*24;
+
+ NearSize = FarStart - ResetStart;
+ FarSize = (ResetStart + CodeMemSize) - FarStart;
+}
+
+void Compiler::LoadCPSR()
+{
+ assert(!CPSRDirty);
+
+ MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
+}
+
+void Compiler::SaveCPSR(bool flagClean)
+{
+ if (CPSRDirty)
+ {
+ MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
+ if (flagClean)
+ CPSRDirty = false;
+ }
+}
+
+void Compiler::LoadReg(int reg, X64Reg nativeReg)
+{
+ if (reg != 15)
+ MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg])));
+ else
+ MOV(32, R(nativeReg), Imm32(R15));
+}
+
+void Compiler::SaveReg(int reg, X64Reg nativeReg)
+{
+ MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
+}
+
+// invalidates RSCRATCH and RSCRATCH3
+Gen::FixupBranch Compiler::CheckCondition(u32 cond)
+{
+ // hack, ldm/stm can get really big TODO: make this better
+ bool ldmStm = !Thumb &&
+ (CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM);
+ if (cond >= 0x8)
+ {
+ static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
+ MOV(32, R(RSCRATCH3), R(RCPSR));
+ SHR(32, R(RSCRATCH3), Imm8(28));
+ MOV(32, R(RSCRATCH), Imm32(1));
+ SHL(32, R(RSCRATCH), R(RSCRATCH3));
+ TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
+
+ return J_CC(CC_Z, ldmStm);
+ }
+ else
+ {
+ // could have used a LUT, but then where would be the fun?
+ TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
+
+ return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm);
+ }
+}
+
+#define F(x) &Compiler::x
+const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
+{
+ // AND
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // EOR
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // SUB
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // RSB
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // ADD
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // ADC
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // SBC
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // RSC
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // ORR
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // MOV
+ F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
+ F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
+ // BIC
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith),
+ // MVN
+ F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
+ F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp),
+ // TST
+ F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
+ // TEQ
+ F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
+ // CMP
+ F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
+ // CMN
+ F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp),
+ // Mul
+ F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL,
+ // ARMv5 stuff
+ F(A_Comp_CLZ), NULL, NULL, NULL, NULL,
+ // STR
+ F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
+ // STRB
+ F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
+ // LDR
+ F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
+ // LDRB
+ F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB),
+ // STRH
+ F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
+ // LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked)
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ // LDRH
+ F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
+ // LDRSB
+ F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
+ // LDRSH
+ F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf),
+ // swap
+ NULL, NULL,
+ // LDM/STM
+ F(A_Comp_LDM_STM), F(A_Comp_LDM_STM),
+ // Branch
+ F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchXchangeReg), F(A_Comp_BranchXchangeReg),
+ // system stuff
+ NULL, F(A_Comp_MSR), F(A_Comp_MSR), F(A_Comp_MRS), NULL, NULL, NULL,
+ F(Nop)
+};
+
+const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
+ // Shift imm
+ F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), F(T_Comp_ShiftImm),
+ // Three operand ADD/SUB
+ F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_),
+ // 8 bit imm
+ F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8),
+ // general ALU
+ F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
+ F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
+ F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU),
+ F(T_Comp_ALU), F(T_Comp_MUL), F(T_Comp_ALU), F(T_Comp_ALU),
+ // hi reg
+ F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg),
+ // pc/sp relative
+ F(T_Comp_RelAddr), F(T_Comp_RelAddr), F(T_Comp_AddSP),
+ // LDR pcrel
+ F(T_Comp_LoadPCRel),
+ // LDR/STR reg offset
+ F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg),
+ // LDR/STR sign extended, half
+ F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf),
+ // LDR/STR imm offset
+ F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm),
+ // LDR/STR half imm offset
+ F(T_Comp_MemImmHalf), F(T_Comp_MemImmHalf),
+ // LDR/STR sp rel
+ F(T_Comp_MemSPRel), F(T_Comp_MemSPRel),
+ // PUSH/POP
+ F(T_Comp_PUSH_POP), F(T_Comp_PUSH_POP),
+ // LDMIA, STMIA
+ F(T_Comp_LDMIA_STMIA), F(T_Comp_LDMIA_STMIA),
+ // Branch
+ F(T_Comp_BCOND), F(T_Comp_BranchXchangeReg), F(T_Comp_BranchXchangeReg), F(T_Comp_B), F(T_Comp_BL_LONG_1), F(T_Comp_BL_LONG_2),
+ // Unk, SVC
+ NULL, NULL,
+ F(T_Comp_BL_Merged)
+};
+#undef F
+
+bool Compiler::CanCompile(bool thumb, u16 kind)
+{
+ return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
+}
+
+void Compiler::Reset()
+{
+ memset(ResetStart, 0xcc, CodeMemSize);
+ SetCodePtr(ResetStart);
+
+ NearCode = NearStart;
+ FarCode = FarStart;
+
+ LoadStorePatches.clear();
+}
+
+bool Compiler::IsJITFault(u64 addr)
+{
+ return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
+}
+
+void Compiler::Comp_SpecialBranchBehaviour(bool taken)
+{
+ if (taken && CurInstr.BranchFlags & branch_IdleBranch)
+ OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
+
+ if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
+ || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
+ {
+ RegCache.PrepareExit();
+
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
+ JMP((u8*)&ARM_Ret, true);
+ }
+}
+
+JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
+{
+ if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
+ {
+ printf("near reset\n");
+ ResetBlockCache();
+ }
+ if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
+ {
+ printf("far reset\n");
+ ResetBlockCache();
+ }
+
+ ConstantCycles = 0;
+ Thumb = thumb;
+ Num = cpu->Num;
+ CodeRegion = instrs[0].Addr >> 24;
+ CurCPU = cpu;
+ // CPSR might have been modified in a previous block
+ CPSRDirty = false;
+
+ JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
+
+ RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
+
+ for (int i = 0; i < instrsCount; i++)
+ {
+ CurInstr = instrs[i];
+ R15 = CurInstr.Addr + (Thumb ? 4 : 8);
+ CodeRegion = R15 >> 24;
+
+ Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
+
+ CompileFunc comp = Thumb
+ ? T_Comp[CurInstr.Info.Kind]
+ : A_Comp[CurInstr.Info.Kind];
+
+ bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
+ if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
+ {
+ MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
+ if (comp == NULL)
+ {
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
+ MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
+
+ SaveCPSR();
+ }
+ }
+
+ if (comp != NULL)
+ RegCache.Prepare(Thumb, i);
+ else
+ RegCache.Flush();
+
+ if (Thumb)
+ {
+ if (comp == NULL)
+ {
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+
+ ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]);
+ }
+ else
+ (this->*comp)();
+ }
+ else
+ {
+ u32 cond = CurInstr.Cond();
+ if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
+ {
+ if (comp)
+ (this->*comp)();
+ else
+ {
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
+ }
+ }
+ else if (cond == 0xF)
+ {
+ Comp_AddCycles_C();
+ }
+ else
+ {
+ IrregularCycles = false;
+
+ FixupBranch skipExecute;
+ if (cond < 0xE)
+ skipExecute = CheckCondition(cond);
+
+ if (comp == NULL)
+ {
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+
+ ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]);
+ }
+ else
+ (this->*comp)();
+
+ Comp_SpecialBranchBehaviour(true);
+
+ if (CurInstr.Cond() < 0xE)
+ {
+ if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
+ {
+ FixupBranch skipFailed = J();
+ SetJumpTarget(skipExecute);
+
+ Comp_AddCycles_C(true);
+
+ Comp_SpecialBranchBehaviour(false);
+
+ SetJumpTarget(skipFailed);
+ }
+ else
+ SetJumpTarget(skipExecute);
+ }
+
+ }
+ }
+
+ if (comp == NULL)
+ LoadCPSR();
+ }
+
+ RegCache.Flush();
+
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
+ JMP((u8*)ARM_Ret, true);
+
+ /*FILE* codeout = fopen("codeout", "a");
+ fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
+ fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout);
+
+ fclose(codeout);*/
+
+ return res;
+}
+
+void Compiler::Comp_AddCycles_C(bool forceNonConstant)
+{
+ s32 cycles = Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
+
+ if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ else
+ ConstantCycles += cycles;
+}
+
+void Compiler::Comp_AddCycles_CI(u32 i)
+{
+ s32 cycles = (Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
+
+ if (!Thumb && CurInstr.Cond() < 0xE)
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ else
+ ConstantCycles += cycles;
+}
+
+void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
+{
+ s32 cycles = Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
+
+ if (!Thumb && CurInstr.Cond() < 0xE)
+ {
+ LEA(32, RSCRATCH, MDisp(i, add + cycles));
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH));
+ }
+ else
+ {
+ ConstantCycles += cycles;
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i));
+ }
+}
+
+void Compiler::Comp_AddCycles_CDI()
+{
+ if (Num == 0)
+ Comp_AddCycles_CD();
+ else
+ {
+ IrregularCycles = true;
+
+ s32 cycles;
+
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
+ {
+ if (CodeRegion == 0x02)
+ cycles = numC + numD;
+ else
+ {
+ numC++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ }
+ else if (CodeRegion == 0x02)
+ {
+ numD++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles = numC + numD + 1;
+ }
+
+ if (!Thumb && CurInstr.Cond() < 0xE)
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ else
+ ConstantCycles += cycles;
+ }
+}
+
+void Compiler::Comp_AddCycles_CD()
+{
+ u32 cycles = 0;
+ if (Num == 0)
+ {
+ s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
+ s32 numD = CurInstr.DataCycles;
+
+ //if (DataRegion != CodeRegion)
+ cycles = std::max(numC + numD - 6, std::max(numC, numD));
+
+ IrregularCycles = cycles != numC;
+ }
+ else
+ {
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if ((CurInstr.DataRegion >> 4) == 0x02)
+ {
+ if (CodeRegion == 0x02)
+ cycles += numC + numD;
+ else
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else if (CodeRegion == 0x02)
+ {
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles += numC + numD;
+ }
+
+ IrregularCycles = true;
+ }
+
+ if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
+ SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+ else
+ ConstantCycles += cycles;
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
new file mode 100644
index 0000000..0fe0147
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -0,0 +1,255 @@
+#ifndef ARMJIT_COMPILER_H
+#define ARMJIT_COMPILER_H
+
+#include "../dolphin/x64Emitter.h"
+
+#include "../ARMJIT.h"
+#include "../ARMJIT_Internal.h"
+#include "../ARMJIT_RegisterCache.h"
+
+#include <unordered_map>
+
+namespace ARMJIT
+{
+
+const Gen::X64Reg RCPU = Gen::RBP;
+const Gen::X64Reg RCPSR = Gen::R15;
+
+const Gen::X64Reg RSCRATCH = Gen::EAX;
+const Gen::X64Reg RSCRATCH2 = Gen::EDX;
+const Gen::X64Reg RSCRATCH3 = Gen::ECX;
+const Gen::X64Reg RSCRATCH4 = Gen::R8;
+
+struct LoadStorePatch
+{
+ void* PatchFunc;
+ s16 Offset;
+ u16 Size;
+};
+
+struct Op2
+{
+ Op2()
+ {}
+
+ Op2(u32 imm)
+ : IsImm(true), Imm(imm)
+ {}
+ Op2(int reg, int op, int amount)
+ : IsImm(false)
+ {
+ Reg.Reg = reg;
+ Reg.Op = op;
+ Reg.Amount = amount;
+ }
+
+ bool IsImm;
+ union
+ {
+ struct
+ {
+ int Reg, Op, Amount;
+ } Reg;
+ u32 Imm;
+ };
+};
+
+class Compiler : public Gen::XEmitter
+{
+public:
+ Compiler();
+
+ void Reset();
+
+ JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
+
+ void LoadReg(int reg, Gen::X64Reg nativeReg);
+ void SaveReg(int reg, Gen::X64Reg nativeReg);
+
+ bool CanCompile(bool thumb, u16 kind);
+
+ typedef void (Compiler::*CompileFunc)();
+
+ void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
+ void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
+
+ void Comp_AddCycles_C(bool forceNonConstant = false);
+ void Comp_AddCycles_CI(u32 i);
+ void Comp_AddCycles_CI(Gen::X64Reg i, int add);
+ void Comp_AddCycles_CDI();
+ void Comp_AddCycles_CD();
+
+ enum
+ {
+ opSetsFlags = 1 << 0,
+ opSymmetric = 1 << 1,
+ opRetriveCV = 1 << 2,
+ opInvertCarry = 1 << 3,
+ opSyncCarry = 1 << 4,
+ opInvertOp2 = 1 << 5,
+ };
+
+ void Nop() {}
+
+ void A_Comp_Arith();
+ void A_Comp_MovOp();
+ void A_Comp_CmpOp();
+
+ void A_Comp_MUL_MLA();
+ void A_Comp_Mul_Long();
+
+ void A_Comp_CLZ();
+
+ void A_Comp_MemWB();
+ void A_Comp_MemHalf();
+ void A_Comp_LDM_STM();
+
+ void A_Comp_BranchImm();
+ void A_Comp_BranchXchangeReg();
+
+ void A_Comp_MRS();
+ void A_Comp_MSR();
+
+ void T_Comp_ShiftImm();
+ void T_Comp_AddSub_();
+ void T_Comp_ALU_Imm8();
+ void T_Comp_ALU();
+ void T_Comp_ALU_HiReg();
+ void T_Comp_MUL();
+
+ void T_Comp_RelAddr();
+ void T_Comp_AddSP();
+
+ void T_Comp_MemReg();
+ void T_Comp_MemImm();
+ void T_Comp_MemRegHalf();
+ void T_Comp_MemImmHalf();
+ void T_Comp_LoadPCRel();
+ void T_Comp_MemSPRel();
+ void T_Comp_PUSH_POP();
+ void T_Comp_LDMIA_STMIA();
+
+ void T_Comp_BCOND();
+ void T_Comp_B();
+ void T_Comp_BranchXchangeReg();
+ void T_Comp_BL_LONG_1();
+ void T_Comp_BL_LONG_2();
+ void T_Comp_BL_Merged();
+
+ enum
+ {
+ memop_Writeback = 1 << 0,
+ memop_Post = 1 << 1,
+ memop_SignExtend = 1 << 2,
+ memop_Store = 1 << 3,
+ memop_SubtractOffset = 1 << 4
+ };
+ void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags);
+ s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
+ bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+
+ void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
+ Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
+ void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
+ Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
+ void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed);
+
+ void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn);
+
+ void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
+
+ void Comp_SpecialBranchBehaviour(bool taken);
+
+
+ Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
+ Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
+
+ Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
+
+ void LoadCPSR();
+ void SaveCPSR(bool flagClean = true);
+
+ bool FlagsNZRequired()
+ { return CurInstr.SetFlags & 0xC; }
+
+ Gen::FixupBranch CheckCondition(u32 cond);
+
+ void PushRegs(bool saveHiRegs);
+ void PopRegs(bool saveHiRegs);
+
+ Gen::OpArg MapReg(int reg)
+ {
+ if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)
+ return Gen::Imm32(R15);
+
+ assert(RegCache.Mapping[reg] != Gen::INVALID_REG);
+ return Gen::R(RegCache.Mapping[reg]);
+ }
+
+ JitBlockEntry AddEntryOffset(u32 offset)
+ {
+ return (JitBlockEntry)(ResetStart + offset);
+ }
+
+ u32 SubEntryOffset(JitBlockEntry entry)
+ {
+ return (u8*)entry - ResetStart;
+ }
+
+ void SwitchToNearCode()
+ {
+ FarCode = GetWritableCodePtr();
+ SetCodePtr(NearCode);
+ }
+
+ void SwitchToFarCode()
+ {
+ NearCode = GetWritableCodePtr();
+ SetCodePtr(FarCode);
+ }
+
+ bool IsJITFault(u64 addr);
+
+ s32 RewriteMemAccess(u64 pc);
+
+ u8* FarCode;
+ u8* NearCode;
+ u32 FarSize;
+ u32 NearSize;
+
+ u8* NearStart;
+ u8* FarStart;
+
+ void* PatchedStoreFuncs[2][2][3][16];
+ void* PatchedLoadFuncs[2][2][3][2][16];
+
+ std::unordered_map<u8*, LoadStorePatch> LoadStorePatches;
+
+ u8* ResetStart;
+ u32 CodeMemSize;
+
+ bool Exit;
+ bool IrregularCycles;
+
+ void* ReadBanked;
+ void* WriteBanked;
+
+ bool CPSRDirty = false;
+
+ FetchedInstr CurInstr;
+
+ RegisterCache<Compiler, Gen::X64Reg> RegCache;
+
+ bool Thumb;
+ u32 Num;
+ u32 R15;
+ u32 CodeRegion;
+
+ u32 ConstantCycles;
+
+ ARM* CurCPU;
+};
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
new file mode 100644
index 0000000..9696d22
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp
@@ -0,0 +1,15 @@
+#include "../ARM.h"
+
+int main(int argc, char* argv[])
+{
+ FILE* f = fopen("ARMJIT_Offsets.h", "w");
+#define writeOffset(field) \
+ fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field))
+
+ writeOffset(CPSR);
+ writeOffset(Cycles);
+ writeOffset(StopExecution);
+
+ fclose(f);
+ return 0;
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s
new file mode 100644
index 0000000..0a84df0
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Linkage.s
@@ -0,0 +1,78 @@
+.intel_syntax noprefix
+
+#include "ARMJIT_Offsets.h"
+
+.text
+
+#define RCPU rbp
+#define RCPSR r15d
+
+#ifdef WIN64
+#define ARG1_REG ecx
+#define ARG2_REG edx
+#define ARG3_REG r8d
+#define ARG4_REG r9d
+#define ARG1_REG64 rcx
+#define ARG2_REG64 rdx
+#define ARG3_REG64 r8
+#define ARG4_REG64 r9
+#else
+#define ARG1_REG edi
+#define ARG2_REG esi
+#define ARG3_REG edx
+#define ARG4_REG ecx
+#define ARG1_REG64 rdi
+#define ARG2_REG64 rsi
+#define ARG3_REG64 rdx
+#define ARG4_REG64 rcx
+#endif
+
+.p2align 4,,15
+
+.global ARM_Dispatch
+ARM_Dispatch:
+#ifdef WIN64
+ push rdi
+ push rsi
+#endif
+ push rbx
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbp
+
+#ifdef WIN64
+ sub rsp, 0x28
+#else
+ sub rsp, 0x8
+#endif
+ mov RCPU, ARG1_REG64
+ mov RCPSR, [RCPU + ARM_CPSR_offset]
+
+ jmp ARG2_REG64
+
+.p2align 4,,15
+
+.global ARM_Ret
+ARM_Ret:
+ mov [RCPU + ARM_CPSR_offset], RCPSR
+
+#ifdef WIN64
+ add rsp, 0x28
+#else
+ add rsp, 0x8
+#endif
+
+ pop rbp
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ pop rbx
+#ifdef WIN64
+ pop rsi
+ pop rdi
+#endif
+
+ ret
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
new file mode 100644
index 0000000..2da113b
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -0,0 +1,773 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../Config.h"
+
+using namespace Gen;
+
+namespace ARMJIT
+{
+
+template <typename T>
+int squeezePointer(T* ptr)
+{
+ int truncated = (int)((u64)ptr);
+ assert((T*)((u64)truncated) == ptr);
+ return truncated;
+}
+
+s32 Compiler::RewriteMemAccess(u64 pc)
+{
+ auto it = LoadStorePatches.find((u8*)pc);
+ if (it != LoadStorePatches.end())
+ {
+ LoadStorePatch patch = it->second;
+ LoadStorePatches.erase(it);
+
+ u8* curCodePtr = GetWritableCodePtr();
+ u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
+ SetCodePtr(rewritePtr);
+
+ CALL(patch.PatchFunc);
+ u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
+ if (remainingSize > 0)
+ NOP(remainingSize);
+
+ //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
+
+ SetCodePtr(curCodePtr);
+
+ return patch.Offset;
+ }
+
+ printf("this is a JIT bug %x\n", pc);
+ abort();
+}
+
+/*
+ According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
+ of all memory load and store instructions always access addresses in the same region as
+ during the their first execution.
+
+ I tried multiple optimisations, which would benefit from this behaviour
+ (having fast paths for the first region, …), though none of them yielded a measureable
+ improvement.
+*/
+
+bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
+{
+ u32 localAddr = LocaliseCodeAddress(Num, addr);
+
+ int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
+ if (invalidLiteralIdx != -1)
+ {
+ InvalidLiterals.Remove(invalidLiteralIdx);
+ return false;
+ }
+
+ Comp_AddCycles_CDI();
+
+ u32 val;
+ // make sure arm7 bios is accessible
+ u32 tmpR15 = CurCPU->R[15];
+ CurCPU->R[15] = R15;
+ if (size == 32)
+ {
+ CurCPU->DataRead32(addr & ~0x3, &val);
+ val = ROR(val, (addr & 0x3) << 3);
+ }
+ else if (size == 16)
+ {
+ CurCPU->DataRead16(addr & ~0x1, &val);
+ if (signExtend)
+ val = ((s32)val << 16) >> 16;
+ }
+ else
+ {
+ CurCPU->DataRead8(addr, &val);
+ if (signExtend)
+ val = ((s32)val << 24) >> 24;
+ }
+ CurCPU->R[15] = tmpR15;
+
+ MOV(32, MapReg(rd), Imm32(val));
+
+ if (Thumb || CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(rd, val);
+
+ return true;
+}
+
+
+void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags)
+{
+ u32 addressMask = ~0;
+ if (size == 32)
+ addressMask = ~3;
+ if (size == 16)
+ addressMask = ~1;
+
+ if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
+ {
+ u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+
+ if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
+ return;
+ }
+
+ if (flags & memop_Store)
+ {
+ Comp_AddCycles_CD();
+ }
+ else
+ {
+ Comp_AddCycles_CDI();
+ }
+
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+ OpArg rdMapped = MapReg(rd);
+
+ OpArg rnMapped = MapReg(rn);
+ if (Thumb && rn == 15)
+ rnMapped = Imm32(R15 & ~0x2);
+
+ X64Reg finalAddr = RSCRATCH3;
+ if (flags & memop_Post)
+ {
+ MOV(32, R(RSCRATCH3), rnMapped);
+
+ finalAddr = rnMapped.GetSimpleReg();
+ }
+
+ if (op2.IsImm)
+ {
+ MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
+ }
+ else
+ {
+ OpArg rm = MapReg(op2.Reg.Reg);
+
+ if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
+ && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
+ {
+ LEA(32, finalAddr,
+ MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
+ }
+ else
+ {
+ bool throwAway;
+ OpArg offset =
+ Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
+
+ if (flags & memop_SubtractOffset)
+ {
+ if (R(finalAddr) != rnMapped)
+ MOV(32, R(finalAddr), rnMapped);
+ if (!offset.IsZero())
+ SUB(32, R(finalAddr), offset);
+ }
+ else
+ MOV_sum(32, finalAddr, rnMapped, offset);
+ }
+ }
+
+ if ((flags & memop_Writeback) && !(flags & memop_Post))
+ MOV(32, rnMapped, R(finalAddr));
+
+ u32 expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
+
+ if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
+ {
+ u8* memopStart = GetWritableCodePtr();
+ LoadStorePatch patch;
+
+ patch.PatchFunc = flags & memop_Store
+ ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
+ : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];
+
+ assert(patch.PatchFunc != NULL);
+
+ MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
+
+ X64Reg maskedAddr = RSCRATCH3;
+ if (size > 8)
+ {
+ maskedAddr = RSCRATCH2;
+ MOV(32, R(RSCRATCH2), R(RSCRATCH3));
+ AND(32, R(RSCRATCH2), Imm8(addressMask));
+ }
+
+ u8* memopLoadStoreLocation = GetWritableCodePtr();
+ if (flags & memop_Store)
+ {
+ MOV(size, MRegSum(RSCRATCH, maskedAddr), rdMapped);
+ }
+ else
+ {
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr));
+
+ if (size == 32)
+ {
+ AND(32, R(RSCRATCH3), Imm8(0x3));
+ SHL(32, R(RSCRATCH3), Imm8(3));
+ ROR_(32, rdMapped, R(RSCRATCH3));
+ }
+ }
+
+ patch.Offset = memopStart - memopLoadStoreLocation;
+ patch.Size = GetWritableCodePtr() - memopStart;
+
+ assert(patch.Size >= 5);
+
+ LoadStorePatches[memopLoadStoreLocation] = patch;
+ }
+ else
+ {
+ PushRegs(false);
+
+ if (Num == 0)
+ {
+ MOV(64, R(ABI_PARAM2), R(RCPU));
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM3), rdMapped);
+
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
+ case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
+ case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
+ case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
+ case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
+ case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
+ }
+ }
+ else
+ {
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: CALL((void*)&SlowRead9<u32, 0>); break;
+ case 16: CALL((void*)&SlowRead9<u16, 0>); break;
+ case 8: CALL((void*)&SlowRead9<u8, 0>); break;
+ case 33: CALL((void*)&SlowRead9<u32, 1>); break;
+ case 17: CALL((void*)&SlowRead9<u16, 1>); break;
+ case 9: CALL((void*)&SlowRead9<u8, 1>); break;
+ }
+ }
+ }
+ else
+ {
+ if (ABI_PARAM1 != RSCRATCH3)
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
+ if (flags & memop_Store)
+ {
+ MOV(32, R(ABI_PARAM2), rdMapped);
+
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
+ case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
+ case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
+ case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
+ case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
+ case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
+ }
+ }
+ else
+ {
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: CALL((void*)&SlowRead7<u32, 0>); break;
+ case 16: CALL((void*)&SlowRead7<u16, 0>); break;
+ case 8: CALL((void*)&SlowRead7<u8, 0>); break;
+ case 33: CALL((void*)&SlowRead7<u32, 1>); break;
+ case 17: CALL((void*)&SlowRead7<u16, 1>); break;
+ case 9: CALL((void*)&SlowRead7<u8, 1>); break;
+ }
+ }
+ }
+
+ PopRegs(false);
+
+ if (!(flags & memop_Store))
+ {
+ if (flags & memop_SignExtend)
+ MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
+ }
+ }
+
+ if (!(flags & memop_Store) && rd == 15)
+ {
+ if (size < 32)
+ printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
+ {
+ if (Num == 1)
+ {
+ if (Thumb)
+ OR(32, rdMapped, Imm8(0x1));
+ else
+ AND(32, rdMapped, Imm8(0xFE));
+ }
+ Comp_JumpTo(rdMapped.GetSimpleReg());
+ }
+ }
+}
+
+s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
+{
+ int regsCount = regs.Count();
+
+ if (regsCount == 0)
+ return 0; // actually not the right behaviour TODO: fix me
+
+ if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
+ {
+ int flags = 0;
+ if (store)
+ flags |= memop_Store;
+ if (decrement && preinc)
+ flags |= memop_SubtractOffset;
+ Op2 offset = preinc ? Op2(4) : Op2(0);
+
+ Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
+
+ return decrement ? -4 : 4;
+ }
+
+ s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
+
+ int expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
+
+ if (!store)
+ Comp_AddCycles_CDI();
+ else
+ Comp_AddCycles_CD();
+
+ bool compileFastPath = Config::JIT_FastMemory
+ && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
+
+ // we need to make sure that the stack stays aligned to 16 bytes
+#ifdef _WIN32
+ // include shadow
+ u32 stackAlloc = (((regsCount + 4 + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
+#else
+ u32 stackAlloc = (((regsCount + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8;
+#endif
+ u32 allocOffset = stackAlloc - regsCount * 8;
+
+ if (decrement)
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4 + (preinc ? 0 : 4)));
+ else
+ MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(preinc ? 4 : 0));
+
+ if (compileFastPath)
+ {
+ AND(32, R(RSCRATCH4), Imm8(~3));
+
+ u8* fastPathStart = GetWritableCodePtr();
+ u8* firstLoadStoreAddr;
+
+ bool firstLoadStore = true;
+
+ MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start));
+ ADD(64, R(RSCRATCH2), R(RSCRATCH4));
+ MOV(32, R(RSCRATCH3), R(RSCRATCH4));
+
+ u32 offset = 0;
+ for (int reg : regs)
+ {
+ if (firstLoadStore)
+ firstLoadStoreAddr = GetWritableCodePtr();
+
+ OpArg mem = MDisp(RSCRATCH2, offset);
+ if (store)
+ {
+ if (RegCache.LoadedRegs & (1 << reg))
+ {
+ MOV(32, mem, MapReg(reg));
+ }
+ else
+ {
+ LoadReg(reg, RSCRATCH);
+ if (firstLoadStore)
+ firstLoadStoreAddr = GetWritableCodePtr();
+ MOV(32, mem, R(RSCRATCH));
+ }
+ }
+ else
+ {
+ if (RegCache.LoadedRegs & (1 << reg))
+ {
+ MOV(32, MapReg(reg), mem);
+ }
+ else
+ {
+ MOV(32, R(RSCRATCH), mem);
+ SaveReg(reg, RSCRATCH);
+ }
+ }
+ offset += 4;
+
+ firstLoadStore = false;
+ }
+
+ LoadStorePatch patch;
+ patch.Size = GetWritableCodePtr() - fastPathStart;
+ patch.Offset = fastPathStart - firstLoadStoreAddr;
+ SwitchToFarCode();
+ patch.PatchFunc = GetWritableCodePtr();
+
+ LoadStorePatches[firstLoadStoreAddr] = patch;
+ }
+
+ if (!store)
+ {
+ PushRegs(false);
+
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
+ if (allocOffset == 0)
+ MOV(64, R(ABI_PARAM2), R(RSP));
+ else
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
+
+ switch (Num * 2 | NDS::ConsoleType)
+ {
+ case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
+ }
+
+ PopRegs(false);
+
+ if (allocOffset)
+ ADD(64, R(RSP), Imm8(allocOffset));
+
+ bool firstUserMode = true;
+ for (int reg : regs)
+ {
+ if (usermode && !regs[15] && reg >= 8 && reg < 15)
+ {
+ if (firstUserMode)
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ firstUserMode = false;
+ }
+ MOV(32, R(RSCRATCH2), Imm32(reg - 8));
+ POP(RSCRATCH3);
+ CALL(WriteBanked);
+ FixupBranch sucessfulWritten = J_CC(CC_NC);
+ if (RegCache.LoadedRegs & (1 << reg))
+ MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
+ else
+ SaveReg(reg, RSCRATCH3);
+ SetJumpTarget(sucessfulWritten);
+ }
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
+ {
+ assert(reg != 15);
+
+ POP(RSCRATCH);
+ SaveReg(reg, RSCRATCH);
+ }
+ else
+ {
+ POP(MapReg(reg).GetSimpleReg());
+ }
+ }
+ }
+ else
+ {
+ bool firstUserMode = true;
+ for (int reg = 15; reg >= 0; reg--)
+ {
+ if (regs[reg])
+ {
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ if (firstUserMode)
+ {
+ MOV(32, R(RSCRATCH), R(RCPSR));
+ AND(32, R(RSCRATCH), Imm8(0x1F));
+ firstUserMode = false;
+ }
+ if (RegCache.Mapping[reg] == INVALID_REG)
+ LoadReg(reg, RSCRATCH3);
+ else
+ MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg]));
+ MOV(32, R(RSCRATCH2), Imm32(reg - 8));
+ CALL(ReadBanked);
+ PUSH(RSCRATCH3);
+ }
+ else if (!(RegCache.LoadedRegs & (1 << reg)))
+ {
+ LoadReg(reg, RSCRATCH);
+ PUSH(RSCRATCH);
+ }
+ else
+ {
+ PUSH(MapReg(reg).GetSimpleReg());
+ }
+ }
+ }
+
+ if (allocOffset)
+ SUB(64, R(RSP), Imm8(allocOffset));
+
+ PushRegs(false);
+
+ MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
+ if (allocOffset)
+ LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
+ else
+ MOV(64, R(ABI_PARAM2), R(RSP));
+
+ MOV(32, R(ABI_PARAM3), Imm32(regsCount));
+ if (Num == 0)
+ MOV(64, R(ABI_PARAM4), R(RCPU));
+
+ switch (Num * 2 | NDS::ConsoleType)
+ {
+ case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
+ case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
+ case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
+ case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
+ }
+
+ ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
+
+ PopRegs(false);
+ }
+
+ if (compileFastPath)
+ {
+ RET();
+ SwitchToNearCode();
+ }
+
+ if (!store && regs[15])
+ {
+ if (Num == 1)
+ {
+ if (Thumb)
+ OR(32, MapReg(15), Imm8(1));
+ else
+ AND(32, MapReg(15), Imm8(0xFE));
+ }
+ Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
+ }
+
+ return offset;
+}
+
+
+void Compiler::A_Comp_MemWB()
+{
+ bool load = CurInstr.Instr & (1 << 20);
+ bool byte = CurInstr.Instr & (1 << 22);
+ int size = byte ? 8 : 32;
+
+ int flags = 0;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+
+ Op2 offset;
+ if (!(CurInstr.Instr & (1 << 25)))
+ {
+ offset = Op2(CurInstr.Instr & 0xFFF);
+ }
+ else
+ {
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ int amount = (CurInstr.Instr >> 7) & 0x1F;
+ int rm = CurInstr.A_Reg(0);
+
+ offset = Op2(rm, op, amount);
+ }
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
+}
+
+void Compiler::A_Comp_MemHalf()
+{
+ Op2 offset = CurInstr.Instr & (1 << 22)
+ ? Op2(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
+ : Op2(CurInstr.A_Reg(0), 0, 0);
+
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ bool load = CurInstr.Instr & (1 << 20);
+
+ bool signExtend = false;
+ int size;
+ if (!load)
+ {
+ size = op == 1 ? 16 : 32;
+ load = op == 2;
+ }
+ else if (load)
+ {
+ size = op == 2 ? 8 : 16;
+ signExtend = op > 1;
+ }
+
+ if (size == 32 && Num == 1)
+ return; // NOP
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
+}
+
+void Compiler::T_Comp_MemReg()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op & 0x2;
+ bool byte = op & 0x1;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
+ byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::A_Comp_LDM_STM()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFFFF);
+
+ bool load = CurInstr.Instr & (1 << 20);
+ bool pre = CurInstr.Instr & (1 << 24);
+ bool add = CurInstr.Instr & (1 << 23);
+ bool writeback = CurInstr.Instr & (1 << 21);
+ bool usermode = CurInstr.Instr & (1 << 22);
+
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
+
+ if (load && writeback && regs[CurInstr.A_Reg(16)])
+ writeback = Num == 0
+ ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
+ : false;
+ if (writeback)
+ ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset));
+}
+
+void Compiler::T_Comp_MemImm()
+{
+ int op = (CurInstr.Instr >> 11) & 0x3;
+ bool load = op & 0x1;
+ bool byte = op & 0x2;
+ u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
+ byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_MemRegHalf()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op != 0;
+ int size = op != 1 ? 16 : 8;
+ bool signExtend = op & 1;
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0),
+ size, flags);
+}
+
+void Compiler::T_Comp_MemImmHalf()
+{
+ u32 offset = (CurInstr.Instr >> 5) & 0x3E;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
+ load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_LoadPCRel()
+{
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ u32 addr = (R15 & ~0x2) + offset;
+ if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
+ Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
+}
+
+void Compiler::T_Comp_MemSPRel()
+{
+ u32 offset = (CurInstr.Instr & 0xFF) * 4;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32,
+ load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_PUSH_POP()
+{
+ bool load = CurInstr.Instr & (1 << 11);
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ if (CurInstr.Instr & (1 << 8))
+ {
+ if (load)
+ regs[15] = true;
+ else
+ regs[14] = true;
+ }
+
+ OpArg sp = MapReg(13);
+ s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
+
+ ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max
+}
+
+void Compiler::T_Comp_LDMIA_STMIA()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ OpArg rb = MapReg(CurInstr.T_Reg(8));
+ bool load = CurInstr.Instr & (1 << 11);
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
+
+ if (!load || !regs[CurInstr.T_Reg(8)])
+ ADD(32, rb, Imm8(offset));
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Offsets.h b/src/ARMJIT_x64/ARMJIT_Offsets.h
new file mode 100644
index 0000000..a73dd59
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Offsets.h
@@ -0,0 +1,3 @@
+#define ARM_CPSR_offset 0x64
+#define ARM_Cycles_offset 0xc
+#define ARM_StopExecution_offset 0x10