aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_A64
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMJIT_A64')
-rw-r--r--src/ARMJIT_A64/ARMJIT_ALU.cpp837
-rw-r--r--src/ARMJIT_A64/ARMJIT_Branch.cpp452
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp707
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.h234
-rw-r--r--src/ARMJIT_A64/ARMJIT_LoadStore.cpp848
5 files changed, 3078 insertions, 0 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp
new file mode 100644
index 0000000..0fe6a97
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp
@@ -0,0 +1,837 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Arm64Gen;
+
+namespace ARMJIT
+{
+
+void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
+{
+ if (!(CurInstr.SetFlags & 0x2))
+ S = false;
+
+ CPSRDirty |= S;
+
+ UBFX(W1, rs, 0, 8);
+
+ if (!S)
+ {
+ if (op == 3)
+ RORV(W0, op2.Reg.Rm, W1);
+ else
+ {
+ CMP(W1, 32);
+ if (op == 2)
+ {
+ MOVI2R(W2, 31);
+ CSEL(W1, W2, W1, CC_GE);
+ ASRV(W0, op2.Reg.Rm, W1);
+ }
+ else
+ {
+ if (op == 0)
+ LSLV(W0, op2.Reg.Rm, W1);
+ else if (op == 1)
+ LSRV(W0, op2.Reg.Rm, W1);
+ CSEL(W0, WZR, W0, CC_GE);
+ }
+ }
+ }
+ else
+ {
+ MOV(W0, op2.Reg.Rm);
+ FixupBranch zero = CBZ(W1);
+
+ SUB(W1, W1, 1);
+ if (op == 3)
+ {
+ RORV(W0, op2.Reg.Rm, W1);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ else
+ {
+ CMP(W1, 31);
+ if (op == 2)
+ {
+ MOVI2R(W2, 31);
+ CSEL(W1, W2, W1, CC_GT);
+ ASRV(W0, op2.Reg.Rm, W1);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ else
+ {
+ if (op == 0)
+ {
+ LSLV(W0, op2.Reg.Rm, W1);
+ UBFX(W1, W0, 31, 1);
+ }
+ else if (op == 1)
+ LSRV(W0, op2.Reg.Rm, W1);
+ CSEL(W1, WZR, op ? W0 : W1, CC_GT);
+ BFI(RCPSR, W1, 29, 1);
+ CSEL(W0, WZR, W0, CC_GE);
+ }
+ }
+
+ MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
+ SetJumpTarget(zero);
+ }
+ op2 = Op2(W0, ST_LSL, 0);
+}
+
+void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
+{
+ if (!(CurInstr.SetFlags & 0x2))
+ S = false;
+
+ CPSRDirty |= S;
+
+ switch (op)
+ {
+ case 0: // LSL
+ if (S && amount)
+ {
+ UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
+ return;
+ case 1: // LSR
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ if (amount == 0)
+ {
+ op2 = Op2(0);
+ return;
+ }
+ op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
+ return;
+ case 2: // ASR
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
+ return;
+ case 3: // ROR
+ if (amount == 0)
+ {
+ UBFX(tmp, RCPSR, 29, 1);
+ LSL(tmp, tmp, 31);
+ if (S)
+ BFI(RCPSR, op2.Reg.Rm, 29, 1);
+ ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
+
+ op2 = Op2(tmp, ST_LSL, 0);
+ }
+ else
+ {
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
+ }
+ return;
+ }
+}
+
+void Compiler::Comp_RetriveFlags(bool retriveCV)
+{
+ if (CurInstr.SetFlags)
+ CPSRDirty = true;
+
+ if (CurInstr.SetFlags & 0x4)
+ {
+ CSET(W0, CC_EQ);
+ BFI(RCPSR, W0, 30, 1);
+ }
+ if (CurInstr.SetFlags & 0x8)
+ {
+ CSET(W0, CC_MI);
+ BFI(RCPSR, W0, 31, 1);
+ }
+ if (retriveCV)
+ {
+ if (CurInstr.SetFlags & 0x2)
+ {
+ CSET(W0, CC_CS);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ if (CurInstr.SetFlags & 0x1)
+ {
+ CSET(W0, CC_VS);
+ BFI(RCPSR, W0, 28, 1);
+ }
+ }
+}
+
+void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
+{
+ if (S && !CurInstr.SetFlags)
+ S = false;
+
+ switch (op)
+ {
+ case 0x0: // AND
+ if (S)
+ {
+ if (op2.IsImm)
+ ANDSI2R(rd, rn, op2.Imm, W0);
+ else
+ ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ANDI2R(rd, rn, op2.Imm, W0);
+ else
+ AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x1: // EOR
+ if (op2.IsImm)
+ EORI2R(rd, rn, op2.Imm, W0);
+ else
+ EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ if (S && FlagsNZNeeded())
+ TST(rd, rd);
+ break;
+ case 0xC: // ORR
+ if (op2.IsImm)
+ ORRI2R(rd, rn, op2.Imm, W0);
+ else
+ ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ if (S && FlagsNZNeeded())
+ TST(rd, rd);
+ break;
+ case 0xE: // BIC
+ if (S)
+ {
+ if (op2.IsImm)
+ ANDSI2R(rd, rn, ~op2.Imm, W0);
+ else
+ BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ANDI2R(rd, rn, ~op2.Imm, W0);
+ else
+ BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ }
+
+ if (S)
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
+{
+ if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
+ {
+ MOV(W0, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W0, ST_LSL, 0);
+ }
+
+ if (S && !CurInstr.SetFlags)
+ S = false;
+
+ bool CVInGP = false;
+ switch (op)
+ {
+ case 0x2: // SUB
+ if (S)
+ {
+ if (op2.IsImm)
+ SUBSI2R(rd, rn, op2.Imm, W0);
+ else
+ SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ {
+ MOVI2R(W2, op2.Imm);
+ SUBI2R(rd, rn, op2.Imm, W0);
+ }
+ else
+ SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x3: // RSB
+ if (op2.IsZero())
+ {
+ op2 = Op2(WZR);
+ }
+ else if (op2.IsImm)
+ {
+ MOVI2R(W1, op2.Imm);
+ op2 = Op2(W1);
+ }
+ else if (op2.Reg.ShiftAmount != 0)
+ {
+ MOV(W1, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W1);
+ }
+
+ if (S)
+ SUBS(rd, op2.Reg.Rm, rn);
+ else
+ SUB(rd, op2.Reg.Rm, rn);
+ break;
+ case 0x4: // ADD
+ if (S)
+ {
+ if (op2.IsImm)
+ ADDSI2R(rd, rn, op2.Imm, W0);
+ else
+ ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ADDI2R(rd, rn, op2.Imm, W0);
+ else
+ ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x5: // ADC
+ UBFX(W2, RCPSR, 29, 1);
+ if (S)
+ {
+ CVInGP = true;
+ ADDS(W1, rn, W2);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ if (op2.IsImm)
+ ADDSI2R(rd, W1, op2.Imm, W0);
+ else
+ ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, rn, W2);
+ if (op2.IsImm)
+ ADDI2R(rd, W1, op2.Imm, W0);
+ else
+ ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x6: // SBC
+ UBFX(W2, RCPSR, 29, 1);
+ // W1 = -op2 - 1
+ if (op2.IsImm)
+ MOVI2R(W1, ~op2.Imm);
+ else
+ ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
+ if (S)
+ {
+ CVInGP = true;
+ ADDS(W1, W2, W1);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ ADDS(rd, rn, W1);
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, W2, W1);
+ ADD(rd, rn, W1);
+ }
+ break;
+ case 0x7: // RSC
+ UBFX(W2, RCPSR, 29, 1);
+ // W1 = -rn - 1
+ MVN(W1, rn);
+ if (S)
+ {
+ CVInGP = true;
+ ADDS(W1, W2, W1);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ if (op2.IsImm)
+ ADDSI2R(rd, W1, op2.Imm);
+ else
+ ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, W2, W1);
+ if (op2.IsImm)
+ ADDI2R(rd, W1, op2.Imm);
+ else
+ ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ }
+
+ if (S)
+ {
+ if (CVInGP)
+ {
+ BFI(RCPSR, W2, 29, 1);
+ BFI(RCPSR, W3, 28, 1);
+ }
+ Comp_RetriveFlags(!CVInGP);
+ }
+}
+
+void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
+{
+ if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
+ {
+ MOV(W0, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W0, ST_LSL, 0);
+ }
+
+ switch (op)
+ {
+ case 0x8: // TST
+ if (op2.IsImm)
+ TSTI2R(rn, op2.Imm, W0);
+ else
+ ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ case 0x9: // TEQ
+ if (op2.IsImm)
+ EORI2R(W0, rn, op2.Imm, W0);
+ else
+ EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
+ TST(W0, W0);
+ break;
+ case 0xA: // CMP
+ if (op2.IsImm)
+ CMPI2R(rn, op2.Imm, W0);
+ else
+ CMP(rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ case 0xB: // CMN
+ if (op2.IsImm)
+ ADDSI2R(WZR, rn, op2.Imm, W0);
+ else
+ CMN(rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ }
+
+ Comp_RetriveFlags(op >= 0xA);
+}
+
+// also counts cycles!
+void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
+{
+ if (CurInstr.Instr & (1 << 25))
+ {
+ Comp_AddCycles_C();
+ op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
+ }
+ else
+ {
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
+ if (CurInstr.Instr & (1 << 4))
+ {
+ Comp_AddCycles_CI(1);
+
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ if (CurInstr.A_Reg(0) == 15)
+ {
+ ADD(W0, op2.Reg.Rm, 4);
+ op2.Reg.Rm = W0;
+ }
+ Comp_RegShiftReg(op, S, op2, rs);
+ }
+ else
+ {
+ Comp_AddCycles_C();
+
+ int amount = (CurInstr.Instr >> 7) & 0x1F;
+ Comp_RegShiftImm(op, amount, S, op2);
+ }
+ }
+}
+
+void Compiler::A_Comp_ALUCmpOp()
+{
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+ Op2 op2;
+ A_Comp_GetOp2(op <= 0x9, op2);
+
+ Comp_Compare(op, rn, op2);
+}
+
+void Compiler::A_Comp_ALUMovOp()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ Op2 op2;
+ A_Comp_GetOp2(S, op2);
+
+ if (op == 0xF) // MVN
+ {
+ if (op2.IsImm)
+ {
+ if (CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
+ MOVI2R(rd, ~op2.Imm);
+ }
+ else
+ ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else // MOV
+ {
+ if (op2.IsImm)
+ {
+ if (CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
+ MOVI2R(rd, op2.Imm);
+ }
+ else
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ }
+
+ if (S)
+ {
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+
+ if (CurInstr.Info.Branches())
+ Comp_JumpTo(rd, true, S);
+}
+
+void Compiler::A_Comp_ALUTriOp()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+ bool logical = (1 << op) & 0xF303;
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+ Op2 op2;
+ A_Comp_GetOp2(S && logical, op2);
+
+ if (op2.IsImm && op2.Imm == 0)
+ op2 = Op2(WZR, ST_LSL, 0);
+
+ if (logical)
+ Comp_Logical(op, S, rd, rn, op2);
+ else
+ Comp_Arithmetic(op, S, rd, rn, op2);
+
+ if (CurInstr.Info.Branches())
+ Comp_JumpTo(rd, true, S);
+}
+
+void Compiler::A_Comp_Clz()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+
+ CLZ(rd, rm);
+
+ assert(Num == 0);
+}
+
+void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
+{
+ if (Num == 0)
+ {
+ Comp_AddCycles_CI(S ? 3 : 1);
+ }
+ else
+ {
+ CLZ(W0, rs);
+ CLS(W1, rs);
+ CMP(W0, W1);
+ CSEL(W0, W0, W1, CC_GT);
+ Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
+ }
+
+ if (mla)
+ MADD(rd, rm, rs, rn);
+ else
+ MUL(rd, rm, rs);
+
+ if (S && FlagsNZNeeded())
+ {
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+}
+
+void Compiler::A_Comp_Mul_Long()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+
+ bool S = CurInstr.Instr & (1 << 20);
+ bool add = CurInstr.Instr & (1 << 21);
+ bool sign = CurInstr.Instr & (1 << 22);
+
+ if (Num == 0)
+ {
+ Comp_AddCycles_CI(S ? 3 : 1);
+ }
+ else
+ {
+ CLZ(W0, rs);
+ CLS(W1, rs);
+ CMP(W0, W1);
+ CSEL(W0, W0, W1, CC_GT);
+ Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
+ }
+
+ if (add)
+ {
+ MOV(W0, rn);
+ BFI(X0, EncodeRegTo64(rd), 32, 32);
+ if (sign)
+ SMADDL(EncodeRegTo64(rn), rm, rs, X0);
+ else
+ UMADDL(EncodeRegTo64(rn), rm, rs, X0);
+ if (S && FlagsNZNeeded())
+ TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+ }
+ else
+ {
+ if (sign)
+ SMULL(EncodeRegTo64(rn), rm, rs);
+ else
+ UMULL(EncodeRegTo64(rn), rm, rs);
+ if (S && FlagsNZNeeded())
+ TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+ }
+
+ if (S)
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::A_Comp_Mul()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+
+ bool S = CurInstr.Instr & (1 << 20);
+ bool mla = CurInstr.Instr & (1 << 21);
+ ARM64Reg rn = INVALID_REG;
+ if (mla)
+ rn = MapReg(CurInstr.A_Reg(12));
+
+ Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
+}
+
+void Compiler::T_Comp_ShiftImm()
+{
+ Comp_AddCycles_C();
+
+ u32 op = (CurInstr.Instr >> 11) & 0x3;
+ int amount = (CurInstr.Instr >> 6) & 0x1F;
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
+ Op2 op2;
+ op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
+ Comp_RegShiftImm(op, amount, true, op2);
+ if (op2.IsImm)
+ MOVI2R(rd, op2.Imm);
+ else
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::T_Comp_AddSub_()
+{
+ Comp_AddCycles_C();
+
+ Op2 op2;
+ if (CurInstr.Instr & (1 << 10))
+ op2 = Op2((CurInstr.Instr >> 6) & 0x7);
+ else
+ op2 = Op2(MapReg(CurInstr.T_Reg(6)));
+
+ Comp_Arithmetic(
+ CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
+ true,
+ MapReg(CurInstr.T_Reg(0)),
+ MapReg(CurInstr.T_Reg(3)),
+ op2);
+}
+
+void Compiler::T_Comp_ALUImm8()
+{
+ Comp_AddCycles_C();
+
+ u32 imm = CurInstr.Instr & 0xFF;
+ int op = (CurInstr.Instr >> 11) & 0x3;
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
+
+ switch (op)
+ {
+ case 0:
+ MOVI2R(rd, imm);
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ break;
+ case 1:
+ Comp_Compare(0xA, rd, Op2(imm));
+ break;
+ case 2:
+ case 3:
+ Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
+ break;
+ }
+}
+
+void Compiler::T_Comp_ALU()
+{
+ int op = (CurInstr.Instr >> 6) & 0xF;
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
+
+ if ((op >= 0x2 && op <= 0x4) || op == 0x7)
+ Comp_AddCycles_CI(1);
+ else
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0x0:
+ Comp_Logical(0x0, true, rd, rd, Op2(rs));
+ break;
+ case 0x1:
+ Comp_Logical(0x1, true, rd, rd, Op2(rs));
+ break;
+ case 0x2:
+ case 0x3:
+ case 0x4:
+ case 0x7:
+ {
+ Op2 op2;
+ op2.Reg.Rm = rd;
+ Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+ break;
+ case 0x5:
+ Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
+ break;
+ case 0x6:
+ Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
+ break;
+ case 0x8:
+ Comp_Compare(0x8, rd, Op2(rs));
+ break;
+ case 0x9:
+ Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
+ break;
+ case 0xA:
+ Comp_Compare(0xA, rd, Op2(rs));
+ break;
+ case 0xB:
+ Comp_Compare(0xB, rd, Op2(rs));
+ break;
+ case 0xC:
+ Comp_Logical(0xC, true, rd, rd, Op2(rs));
+ break;
+ case 0xD:
+ Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
+ break;
+ case 0xE:
+ Comp_Logical(0xE, true, rd, rd, Op2(rs));
+ break;
+ case 0xF:
+ MVN(rd, rs);
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ break;
+ }
+}
+
+void Compiler::T_Comp_ALU_HiReg()
+{
+ u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
+ ARM64Reg rdMapped = MapReg(rd);
+ ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
+
+ u32 op = (CurInstr.Instr >> 8) & 0x3;
+
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0:
+ Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
+ break;
+ case 1:
+ Comp_Compare(0xA, rdMapped, rs);
+ return;
+ case 2:
+ MOV(rdMapped, rs);
+ break;
+ }
+
+ if (rd == 15)
+ {
+ Comp_JumpTo(rdMapped, false, false);
+ }
+}
+
+void Compiler::T_Comp_AddSP()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg sp = MapReg(13);
+ u32 offset = (CurInstr.Instr & 0x7F) << 2;
+ if (CurInstr.Instr & (1 << 7))
+ SUB(sp, sp, offset);
+ else
+ ADD(sp, sp, offset);
+}
+
+void Compiler::T_Comp_RelAddr()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ if (CurInstr.Instr & (1 << 11))
+ {
+ ARM64Reg sp = MapReg(13);
+ ADD(rd, sp, offset);
+ }
+ else
+ MOVI2R(rd, (R15 & ~2) + offset);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp
new file mode 100644
index 0000000..542f0b7
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp
@@ -0,0 +1,452 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Arm64Gen;
+
+// hack
+const int kCodeCacheTiming = 3;
+
+namespace ARMJIT
+{
+
+template <typename T>
+void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
+{
+ cpu->JumpTo(addr, changeCPSR);
+}
+
+void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
+{
+ // we can simplify constant branches by a lot
+ // it's not completely safe to assume stuff like, which instructions to preload
+ // we'll see how it works out
+
+ IrregularCycles = true;
+
+ u32 newPC;
+ u32 cycles = 0;
+ bool setupRegion = false;
+
+ if (addr & 0x1 && !Thumb)
+ {
+ CPSRDirty = true;
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+ else if (!(addr & 0x1) && Thumb)
+ {
+ CPSRDirty = true;
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+ }
+
+ if (Num == 0)
+ {
+ ARMv5* cpu9 = (ARMv5*)CurCPU;
+
+ u32 oldregion = R15 >> 24;
+ u32 newregion = addr >> 24;
+
+ u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
+ u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
+ cpu9->RegionCodeCycles = regionCodeCycles;
+
+ MOVI2R(W0, regionCodeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
+
+ setupRegion = newregion != oldregion;
+ if (setupRegion)
+ cpu9->SetupCodeMem(addr);
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // two-opcodes-at-once fetch
+ // doesn't matter if we put garbage in the MSbs there
+ if (addr & 0x2)
+ {
+ cpu9->CodeRead32(addr-2, true) >> 16;
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+2, false);
+ cycles += CurCPU->CodeCycles;
+ }
+ else
+ {
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ }
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+4, false);
+ cycles += cpu9->CodeCycles;
+ }
+
+ cpu9->RegionCodeCycles = compileTimeCodeCycles;
+ if (setupRegion)
+ cpu9->SetupCodeMem(R15);
+ }
+ else
+ {
+ ARMv4* cpu7 = (ARMv4*)CurCPU;
+
+ u32 codeRegion = addr >> 24;
+ u32 codeCycles = addr >> 15; // cheato
+
+ cpu7->CodeRegion = codeRegion;
+ cpu7->CodeCycles = codeCycles;
+
+ MOVI2R(W0, codeRegion);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
+ MOVI2R(W0, codeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // this is necessary because ARM7 bios protection
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+
+ cpu7->CodeRegion = R15 >> 24;
+ cpu7->CodeCycles = addr >> 15;
+ }
+
+ if (Exit)
+ {
+ MOVI2R(W0, newPC);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
+ }
+ if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
+ ConstantCycles += cycles;
+ else
+ ADD(RCycles, RCycles, cycles);
+}
+
+
+void* Compiler::Gen_JumpTo9(int kind)
+{
+ AlignCode16();
+ void* res = GetRXPtr();
+
+ MOVI2R(W2, kCodeCacheTiming);
+ // W1 - code cycles non branch
+ // W2 - branch code cycles
+ LSR(W1, W0, 12);
+ LSL(W1, W1, 2);
+ ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
+ LDRB(W1, RCPU, W1);
+
+ LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
+
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
+
+ CMP(W0, W3);
+ FixupBranch outsideITCM = B(CC_LO);
+ MOVI2R(W1, 1);
+ MOVI2R(W2, 1);
+ SetJumpTarget(outsideITCM);
+
+ FixupBranch switchToThumb;
+ if (kind == 0)
+ switchToThumb = TBNZ(W0, 0);
+
+ if (kind == 0 || kind == 1)
+ {
+ ANDI2R(W0, W0, ~3);
+
+ if (kind == 0)
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+
+ ADD(W3, W0, 4);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ ADD(W1, W1, W2);
+ ADD(RCycles, RCycles, W1);
+
+ RET();
+ }
+ if (kind == 0 || kind == 2)
+ {
+ if (kind == 0)
+ {
+ SetJumpTarget(switchToThumb);
+
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+
+ ANDI2R(W0, W0, ~1);
+
+ ADD(W3, W0, 2);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ FixupBranch halfwordLoc = TBZ(W0, 1);
+ ADD(W1, W1, W2);
+ ADD(RCycles, RCycles, W1);
+ RET();
+
+ SetJumpTarget(halfwordLoc);
+ ADD(RCycles, RCycles, W2);
+ RET();
+ }
+
+ return res;
+}
+
+void* Compiler::Gen_JumpTo7(int kind)
+{
+ void* res = GetRXPtr();
+
+ LSR(W1, W0, 24);
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
+ LSR(W1, W0, 15);
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
+
+ MOVP2R(X2, NDS::ARM7MemTimings);
+ LDR(W3, X2, ArithOption(W1, true));
+
+ FixupBranch switchToThumb;
+ if (kind == 0)
+ switchToThumb = TBNZ(W0, 0);
+
+ if (kind == 0 || kind == 1)
+ {
+ UBFX(W2, W3, 0, 8);
+ UBFX(W3, W3, 8, 8);
+ ADD(W2, W3, W2);
+ ADD(RCycles, RCycles, W2);
+
+ ANDI2R(W0, W0, ~3);
+
+ if (kind == 0)
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+
+ ADD(W3, W0, 4);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ RET();
+ }
+ if (kind == 0 || kind == 2)
+ {
+ if (kind == 0)
+ {
+ SetJumpTarget(switchToThumb);
+
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+
+ UBFX(W2, W3, 16, 8);
+ UBFX(W3, W3, 24, 8);
+ ADD(W2, W3, W2);
+ ADD(RCycles, RCycles, W2);
+
+ ANDI2R(W0, W0, ~1);
+
+ ADD(W3, W0, 2);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ RET();
+ }
+
+ return res;
+}
+
+void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
+{
+ IrregularCycles = true;
+
+ if (!restoreCPSR)
+ {
+ if (switchThumb)
+ CPSRDirty = true;
+ MOV(W0, addr);
+ BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
+ }
+ else
+ {
+ BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
+ bool previouslyDirty = CPSRDirty;
+ SaveCPSR();
+
+ if (restoreCPSR)
+ {
+ if (Thumb || CurInstr.Cond() >= 0xE)
+ RegCache.Flush();
+ else
+ {
+ // the ugly way...
+ // we only save them, to load and save them again
+ for (int reg : hiRegsLoaded)
+ SaveReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+
+ if (switchThumb)
+ MOV(W1, addr);
+ else
+ {
+ if (Thumb)
+ ORRI2R(W1, addr, 1);
+ else
+ ANDI2R(W1, addr, ~1);
+ }
+ MOV(X0, RCPU);
+ MOVI2R(W2, restoreCPSR);
+ if (Num == 0)
+ QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
+ else
+ QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
+
+ if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
+ {
+ for (int reg : hiRegsLoaded)
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+
+ if (previouslyDirty)
+ LoadCPSR();
+ CPSRDirty = previouslyDirty;
+ }
+}
+
+void Compiler::A_Comp_BranchImm()
+{
+ int op = (CurInstr.Instr >> 24) & 1;
+ s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
+ u32 target = R15 + offset;
+ bool link = op;
+
+ if (CurInstr.Cond() == 0xF) // BLX_imm
+ {
+ target += (op << 1) + 1;
+ link = true;
+ }
+
+ if (link)
+ MOVI2R(MapReg(14), R15 - 4);
+
+ Comp_JumpTo(target);
+}
+
+void Compiler::A_Comp_BranchXchangeReg()
+{
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
+ MOV(W0, rn);
+ if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
+ MOVI2R(MapReg(14), R15 - 4);
+ Comp_JumpTo(W0, true);
+}
+
+void Compiler::T_Comp_BCOND()
+{
+ u32 cond = (CurInstr.Instr >> 8) & 0xF;
+ FixupBranch skipExecute = CheckCondition(cond);
+
+ s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
+ Comp_JumpTo(R15 + offset + 1, true);
+
+ Comp_BranchSpecialBehaviour();
+
+ FixupBranch skipFailed = B();
+ SetJumpTarget(skipExecute);
+ Comp_AddCycles_C(true);
+
+ if (CurInstr.BranchFlags & branch_FollowCondTaken)
+ {
+ SaveCPSR(false);
+ RegCache.PrepareExit();
+
+ ADD(W0, RCycles, ConstantCycles);
+ ABI_PopRegisters(SavedRegs);
+ RET();
+ }
+
+ SetJumpTarget(skipFailed);
+}
+
+void Compiler::T_Comp_B()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
+ Comp_JumpTo(R15 + offset + 1);
+}
+
+void Compiler::T_Comp_BranchXchangeReg()
+{
+ bool link = CurInstr.Instr & (1 << 7);
+
+ if (link)
+ {
+ if (Num == 1)
+ {
+ printf("BLX unsupported on ARM7!!!\n");
+ return;
+ }
+ MOV(W0, MapReg(CurInstr.A_Reg(3)));
+ MOVI2R(MapReg(14), R15 - 1);
+ Comp_JumpTo(W0, true);
+ }
+ else
+ {
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
+ Comp_JumpTo(rn, true);
+ }
+}
+
+void Compiler::T_Comp_BL_LONG_1()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
+ MOVI2R(MapReg(14), R15 + offset);
+ Comp_AddCycles_C();
+}
+
+void Compiler::T_Comp_BL_LONG_2()
+{
+ ARM64Reg lr = MapReg(14);
+ s32 offset = (CurInstr.Instr & 0x7FF) << 1;
+ ADD(W0, lr, offset);
+ MOVI2R(lr, (R15 - 2) | 1);
+ Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
+}
+
+void Compiler::T_Comp_BL_Merged()
+{
+ Comp_AddCycles_C();
+
+ R15 += 2;
+
+ u32 upperPart = CurInstr.Instr >> 16;
+ u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
+ target += (upperPart & 0x7FF) << 1;
+
+ if (Num == 1 || upperPart & (1 << 12))
+ target |= 1;
+
+ MOVI2R(MapReg(14), (R15 - 2) | 1);
+
+ Comp_JumpTo(target);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
new file mode 100644
index 0000000..89d0029
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -0,0 +1,707 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMInterpreter.h"
+
+#include "../ARMJIT_Internal.h"
+
+#ifdef __SWITCH__
+#include "../switch/compat_switch.h"
+
+extern char __start__;
+#endif
+
+#include <malloc.h>
+
+using namespace Arm64Gen;
+
+
+namespace ARMJIT
+{
+
+/*
+
+ Recompiling classic ARM to ARMv8 code is at the same time
+ easier and trickier than compiling to a less related architecture
+ like x64. At one hand you can translate a lot of instructions directly.
+ But at the same time, there are a ton of exceptions, like for
+ example ADD and SUB can't have a RORed second operand on ARMv8.
+ */
+
+template <>
+const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
+ {W19, W20, W21, W22, W23, W24, W25, W26};
+template <>
+const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
+
+const int JitMemSize = 16 * 1024 * 1024;
+
+void Compiler::MovePC()
+{
+ ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
+}
+
+Compiler::Compiler()
+{
+#ifdef __SWITCH__
+ JitRWBase = memalign(0x1000, JitMemSize);
+
+ JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
+ JitRWStart = virtmemReserve(JitMemSize);
+ MemoryInfo info = {0};
+ u32 pageInfo = {0};
+ int i = 0;
+ while (JitRXStart != NULL)
+ {
+ svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
+ if (info.type != MemType_Unmapped)
+ JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
+ else
+ break;
+ if (i++ > 8)
+ {
+ printf("couldn't find unmapped place for jit memory\n");
+ JitRXStart = NULL;
+ }
+ }
+
+ assert(JitRXStart != NULL);
+
+ bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
+ assert(succeded);
+ succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
+ assert(succeded);
+ succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
+ assert(succeded);
+
+ SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
+ JitMemUseableSize = JitMemSize;
+ Reset();
+#endif
+
+ for (int i = 0; i < 3; i++)
+ {
+ for (int j = 0; j < 2; j++)
+ {
+ MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
+ }
+ }
+ MemFunc7[0][0] = (void*)NDS::ARM7Read8;
+ MemFunc7[1][0] = (void*)NDS::ARM7Read16;
+ MemFunc7[2][0] = (void*)NDS::ARM7Read32;
+ MemFunc7[0][1] = (void*)NDS::ARM7Write8;
+ MemFunc7[1][1] = (void*)NDS::ARM7Write16;
+ MemFunc7[2][1] = (void*)NDS::ARM7Write32;
+
+ for (int i = 0; i < 2; i++)
+ {
+ for (int j = 0; j < 2; j++)
+ {
+ MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
+ MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
+ }
+ }
+
+ for (int i = 0; i < 3; i++)
+ {
+ JumpToFuncs9[i] = Gen_JumpTo9(i);
+ JumpToFuncs7[i] = Gen_JumpTo7(i);
+ }
+
+ /*
+ W0 - mode
+ W1 - reg num
+ W3 - in/out value of reg
+ */
+ {
+ ReadBanked = GetRXPtr();
+
+ ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ CMP(W0, 0x11);
+ FixupBranch fiq = B(CC_EQ);
+ SUBS(W1, W1, 13 - 8);
+ ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ FixupBranch notEverything = B(CC_LT);
+ CMP(W0, 0x12);
+ FixupBranch irq = B(CC_EQ);
+ CMP(W0, 0x13);
+ FixupBranch svc = B(CC_EQ);
+ CMP(W0, 0x17);
+ FixupBranch abt = B(CC_EQ);
+ CMP(W0, 0x1B);
+ FixupBranch und = B(CC_EQ);
+ SetJumpTarget(notEverything);
+ RET();
+
+ SetJumpTarget(fiq);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
+ RET();
+ SetJumpTarget(irq);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
+ RET();
+ SetJumpTarget(svc);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
+ RET();
+ SetJumpTarget(abt);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
+ RET();
+ SetJumpTarget(und);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
+ RET();
+ }
+ {
+ WriteBanked = GetRXPtr();
+
+ ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ CMP(W0, 0x11);
+ FixupBranch fiq = B(CC_EQ);
+ SUBS(W1, W1, 13 - 8);
+ ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
+ FixupBranch notEverything = B(CC_LT);
+ CMP(W0, 0x12);
+ FixupBranch irq = B(CC_EQ);
+ CMP(W0, 0x13);
+ FixupBranch svc = B(CC_EQ);
+ CMP(W0, 0x17);
+ FixupBranch abt = B(CC_EQ);
+ CMP(W0, 0x1B);
+ FixupBranch und = B(CC_EQ);
+ SetJumpTarget(notEverything);
+ MOVI2R(W4, 0);
+ RET();
+
+ SetJumpTarget(fiq);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(irq);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(svc);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(abt);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(und);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
+ MOVI2R(W4, 1);
+ RET();
+ }
+
+ //FlushIcache();
+
+ JitMemUseableSize -= GetCodeOffset();
+ SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
+}
+
+Compiler::~Compiler()
+{
+#ifdef __SWITCH__
+ if (JitRWStart != NULL)
+ {
+ bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
+ assert(succeded);
+ virtmemFree(JitRWStart, JitMemSize);
+ succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
+ assert(succeded);
+ free(JitRWBase);
+ }
+#endif
+}
+
+void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
+{
+ if (reg == 15)
+ MOVI2R(nativeReg, R15);
+ else
+ LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
+}
+
+void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
+{
+ STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
+}
+
+void Compiler::LoadCPSR()
+{
+ assert(!CPSRDirty);
+ LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
+}
+
+void Compiler::SaveCPSR(bool markClean)
+{
+ if (CPSRDirty)
+ {
+ STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
+ CPSRDirty = CPSRDirty && !markClean;
+ }
+}
+
+FixupBranch Compiler::CheckCondition(u32 cond)
+{
+ if (cond >= 0x8)
+ {
+ LSR(W1, RCPSR, 28);
+ MOVI2R(W2, 1);
+ LSLV(W2, W2, W1);
+ ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
+
+ return CBZ(W2);
+ }
+ else
+ {
+ u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
+
+ if (cond & 1)
+ return TBNZ(RCPSR, bit);
+ else
+ return TBZ(RCPSR, bit);
+ }
+}
+
+#define F(x) &Compiler::A_Comp_##x
+const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
+{
+ // AND
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // EOR
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // SUB
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // RSB
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ADD
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ADC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // SBC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // RSC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ORR
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // MOV
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ // BIC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // MVN
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ // TST
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // TEQ
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // CMP
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // CMN
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // Mul
+ F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
+ // ARMv5 exclusives
+ F(Clz), NULL, NULL, NULL, NULL,
+
+ // STR
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // STRB
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // LDR
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // LDRB
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // STRH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRD
+ NULL, NULL, NULL, NULL,
+ // STRD
+ NULL, NULL, NULL, NULL,
+ // LDRH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRSB
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRSH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // Swap
+ NULL, NULL,
+ // LDM, STM
+ F(LDM_STM), F(LDM_STM),
+ // Branch
+ F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
+ // Special
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+#undef F
+#define F(x) &Compiler::T_Comp_##x
+const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
+{
+ // Shift imm
+ F(ShiftImm), F(ShiftImm), F(ShiftImm),
+ // Add/sub tri operand
+ F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
+ // 8 bit imm
+ F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
+ // ALU
+ F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
+ F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
+ // ALU hi reg
+ F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
+ // PC/SP relative ops
+ F(RelAddr), F(RelAddr), F(AddSP),
+ // LDR PC rel
+ F(LoadPCRel),
+ // LDR/STR reg offset
+ F(MemReg), F(MemReg), F(MemReg), F(MemReg),
+ // LDR/STR sign extended, half
+ F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
+ // LDR/STR imm offset
+ F(MemImm), F(MemImm), F(MemImm), F(MemImm),
+ // LDR/STR half imm offset
+ F(MemImmHalf), F(MemImmHalf),
+ // LDR/STR sp rel
+ F(MemSPRel), F(MemSPRel),
+ // PUSH/POP
+ F(PUSH_POP), F(PUSH_POP),
+ // LDMIA, STMIA
+ F(LDMIA_STMIA), F(LDMIA_STMIA),
+ // Branch
+ F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
+ // Unk, SVC
+ NULL, NULL,
+ F(BL_Merged)
+};
+
+bool Compiler::CanCompile(bool thumb, u16 kind)
+{
+ return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
+}
+
+void Compiler::Comp_BranchSpecialBehaviour()
+{
+ if (CurInstr.BranchFlags & branch_IdleBranch)
+ {
+ MOVI2R(W0, 1);
+ STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
+ }
+
+ if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
+ {
+ SaveCPSR(false);
+ RegCache.PrepareExit();
+ ADD(W0, RCycles, ConstantCycles);
+ ABI_PopRegisters(SavedRegs);
+ RET();
+ }
+}
+
+JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
+{
+ if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
+ {
+ printf("JIT memory full, resetting...\n");
+ ResetBlockCache();
+ }
+
+ JitBlockEntry res = (JitBlockEntry)GetRXPtr();
+
+ Thumb = thumb;
+ Num = cpu->Num;
+ CurCPU = cpu;
+ ConstantCycles = 0;
+ RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
+
+ //printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
+ const u32 ALL_CALLEE_SAVED = 0x7FF80000;
+
+ SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
+
+ //if (Num == 1)
+ {
+ ABI_PushRegisters(SavedRegs);
+
+ MOVP2R(RCPU, CurCPU);
+ MOVI2R(RCycles, 0);
+
+ LoadCPSR();
+ }
+
+ for (int i = 0; i < instrsCount; i++)
+ {
+ CurInstr = instrs[i];
+ R15 = CurInstr.Addr + (Thumb ? 4 : 8);
+ CodeRegion = R15 >> 24;
+
+ CompileFunc comp = Thumb
+ ? T_Comp[CurInstr.Info.Kind]
+ : A_Comp[CurInstr.Info.Kind];
+
+ Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
+
+ //printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
+
+ bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
+ if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
+ {
+ MOVI2R(W0, R15);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
+ if (comp == NULL)
+ {
+ MOVI2R(W0, CurInstr.Instr);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
+ }
+ if (Num == 0)
+ {
+ MOVI2R(W0, (s32)CurInstr.CodeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
+ }
+ }
+
+ if (comp == NULL)
+ {
+ SaveCPSR();
+ RegCache.Flush();
+ }
+ else
+ RegCache.Prepare(Thumb, i);
+
+ if (Thumb)
+ {
+ if (comp == NULL)
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
+ }
+ else
+ (this->*comp)();
+ }
+ else
+ {
+ u32 cond = CurInstr.Cond();
+ if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
+ {
+ if (comp)
+ (this->*comp)();
+ else
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
+ }
+ }
+ else if (cond == 0xF)
+ Comp_AddCycles_C();
+ else
+ {
+ IrregularCycles = false;
+
+ FixupBranch skipExecute;
+ if (cond < 0xE)
+ skipExecute = CheckCondition(cond);
+
+ if (comp == NULL)
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
+ }
+ else
+ {
+ (this->*comp)();
+ }
+
+ Comp_BranchSpecialBehaviour();
+
+ if (cond < 0xE)
+ {
+ if (IrregularCycles)
+ {
+ FixupBranch skipNop = B();
+ SetJumpTarget(skipExecute);
+
+ Comp_AddCycles_C();
+
+ if (CurInstr.BranchFlags & branch_FollowCondTaken)
+ {
+ SaveCPSR(false);
+ RegCache.PrepareExit();
+ ADD(W0, RCycles, ConstantCycles);
+ ABI_PopRegisters(SavedRegs);
+ RET();
+ }
+
+ SetJumpTarget(skipNop);
+ }
+ else
+ SetJumpTarget(skipExecute);
+ }
+
+ }
+ }
+
+ if (comp == NULL)
+ LoadCPSR();
+ }
+
+ RegCache.Flush();
+
+ //if (Num == 1)
+ {
+ SaveCPSR();
+
+ ADD(W0, RCycles, ConstantCycles);
+
+ ABI_PopRegisters(SavedRegs);
+ }
+ //else
+ // ADD(RCycles, RCycles, ConstantCycles);
+
+ RET();
+
+ FlushIcache();
+
+ //printf("finished\n");
+
+ return res;
+}
+
+void Compiler::Reset()
+{
+ SetCodePtr(0);
+
+ const u32 brk_0 = 0xD4200000;
+
+ for (int i = 0; i < JitMemUseableSize / 4; i++)
+ *(((u32*)GetRWPtr()) + i) = brk_0;
+}
+
+void Compiler::Comp_AddCycles_C(bool nonConst)
+{
+ s32 cycles = Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
+
+ if (!nonConst && !CurInstr.Info.Branches())
+ ConstantCycles += cycles;
+ else
+ ADD(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CI(u32 numI)
+{
+ s32 cycles = (Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
+
+ if (Thumb || CurInstr.Cond() >= 0xE)
+ ConstantCycles += cycles;
+ else
+ ADD(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
+{
+ s32 cycles = (Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
+
+ ADD(RCycles, RCycles, numI, shift);
+ if (Thumb || CurInstr.Cond() >= 0xE)
+ ConstantCycles += c;
+ else
+ ADD(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CDI()
+{
+ if (Num == 0)
+ Comp_AddCycles_CD();
+ else
+ {
+ IrregularCycles = true;
+
+ s32 cycles;
+
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if (CurInstr.DataRegion == 0x02) // mainRAM
+ {
+ if (CodeRegion == 0x02)
+ cycles = numC + numD;
+ else
+ {
+ numC++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ }
+ else if (CodeRegion == 0x02)
+ {
+ numD++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles = numC + numD + 1;
+ }
+
+ if (!Thumb && CurInstr.Cond() < 0xE)
+ ADD(RCycles, RCycles, cycles);
+ else
+ ConstantCycles += cycles;
+ }
+}
+
+void Compiler::Comp_AddCycles_CD()
+{
+ u32 cycles = 0;
+ if (Num == 0)
+ {
+ s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
+ s32 numD = CurInstr.DataCycles;
+
+ //if (DataRegion != CodeRegion)
+ cycles = std::max(numC + numD - 6, std::max(numC, numD));
+
+ IrregularCycles = cycles != numC;
+ }
+ else
+ {
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if (CurInstr.DataRegion == 0x02)
+ {
+ if (CodeRegion == 0x02)
+ cycles += numC + numD;
+ else
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else if (CodeRegion == 0x02)
+ {
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles += numC + numD;
+ }
+
+ IrregularCycles = true;
+ }
+
+ if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
+ ADD(RCycles, RCycles, cycles);
+ else
+ ConstantCycles += cycles;
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h
new file mode 100644
index 0000000..7e13507
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.h
@@ -0,0 +1,234 @@
+#ifndef ARMJIT_COMPILER_H
+#define ARMJIT_COMPILER_H
+
+#include "../ARM.h"
+#include "../ARMJIT.h"
+
+#include "../dolphin/Arm64Emitter.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMJIT_RegisterCache.h"
+
+namespace ARMJIT
+{
+
+const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
+const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
+const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
+
+struct Op2
+{
+ Op2()
+ {}
+
+ Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
+ {
+ Reg.Rm = rm;
+ Reg.ShiftType = Arm64Gen::ST_LSL;
+ Reg.ShiftAmount = 0;
+ }
+
+ Op2(u32 imm) : IsImm(true), Imm(imm)
+ {}
+
+ Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
+ {
+ Reg.Rm = rm;
+ Reg.ShiftType = st;
+ Reg.ShiftAmount = amount;
+ }
+
+ Arm64Gen::ArithOption ToArithOption()
+ {
+ assert(!IsImm);
+ return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
+ }
+
+ bool IsSimpleReg()
+ { return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
+ bool ImmFits12Bit()
+ { return IsImm && (Imm & 0xFFF == Imm); }
+ bool IsZero()
+ { return IsImm && !Imm; }
+
+ bool IsImm;
+ union
+ {
+ struct
+ {
+ Arm64Gen::ARM64Reg Rm;
+ Arm64Gen::ShiftType ShiftType;
+ int ShiftAmount;
+ } Reg;
+ u32 Imm;
+ };
+};
+
+class Compiler : Arm64Gen::ARM64XEmitter
+{
+public:
+ typedef void (Compiler::*CompileFunc)();
+
+ Compiler();
+ ~Compiler();
+
+ Arm64Gen::ARM64Reg MapReg(int reg)
+ {
+ assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
+ return RegCache.Mapping[reg];
+ }
+
+ JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
+
+ bool CanCompile(bool thumb, u16 kind);
+
+ bool FlagsNZNeeded()
+ {
+ return CurInstr.SetFlags & 0xC;
+ }
+
+ void Reset();
+
+ void Comp_AddCycles_C(bool forceNonConst = false);
+ void Comp_AddCycles_CI(u32 numI);
+ void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
+ void Comp_AddCycles_CD();
+ void Comp_AddCycles_CDI();
+
+ void MovePC();
+
+ void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
+ void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
+
+ void LoadCPSR();
+ void SaveCPSR(bool markClean = true);
+
+ void A_Comp_ALUTriOp();
+ void A_Comp_ALUMovOp();
+ void A_Comp_ALUCmpOp();
+
+ void A_Comp_Mul();
+ void A_Comp_Mul_Long();
+
+ void A_Comp_Clz();
+
+ void A_Comp_MemWB();
+ void A_Comp_MemHD();
+
+ void A_Comp_LDM_STM();
+
+ void A_Comp_BranchImm();
+ void A_Comp_BranchXchangeReg();
+
+
+ void T_Comp_ShiftImm();
+ void T_Comp_AddSub_();
+ void T_Comp_ALUImm8();
+ void T_Comp_ALU();
+ void T_Comp_ALU_HiReg();
+ void T_Comp_AddSP();
+ void T_Comp_RelAddr();
+
+ void T_Comp_MemReg();
+ void T_Comp_MemImm();
+ void T_Comp_MemRegHalf();
+ void T_Comp_MemImmHalf();
+ void T_Comp_LoadPCRel();
+ void T_Comp_MemSPRel();
+
+ void T_Comp_LDMIA_STMIA();
+ void T_Comp_PUSH_POP();
+
+ void T_Comp_BCOND();
+ void T_Comp_B();
+ void T_Comp_BranchXchangeReg();
+ void T_Comp_BL_LONG_1();
+ void T_Comp_BL_LONG_2();
+ void T_Comp_BL_Merged();
+
+ s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
+
+ void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
+
+ void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
+ void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
+ void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
+
+ void Comp_RetriveFlags(bool retriveCV);
+
+ Arm64Gen::FixupBranch CheckCondition(u32 cond);
+
+ void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
+ void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
+
+ void A_Comp_GetOp2(bool S, Op2& op2);
+
+ void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
+ void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
+
+ void Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+ enum
+ {
+ memop_Writeback = 1 << 0,
+ memop_Post = 1 << 1,
+ memop_SignExtend = 1 << 2,
+ memop_Store = 1 << 3,
+ memop_SubtractOffset = 1 << 4
+ };
+ void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
+
+ void* Gen_MemoryRoutine9(int size, bool store);
+
+ void* Gen_MemoryRoutine9Seq(bool store, bool preinc);
+ void* Gen_MemoryRoutine7Seq(bool store, bool preinc);
+
+ // 0 = switch mode, 1 = stay arm, 2 = stay thumb
+ void* Gen_JumpTo9(int kind);
+ void* Gen_JumpTo7(int kind);
+
+ void Comp_BranchSpecialBehaviour();
+
+ bool Exit;
+
+ FetchedInstr CurInstr;
+ bool Thumb;
+ u32 R15;
+ u32 Num;
+ ARM* CurCPU;
+ u32 ConstantCycles;
+ u32 CodeRegion;
+
+ BitSet32 SavedRegs;
+
+ u32 JitMemUseableSize;
+
+ void* ReadBanked, *WriteBanked;
+
+ // [size][store]
+ void* MemFunc9[3][2];
+ void* MemFunc7[3][2];
+
+ // [store][pre increment]
+ void* MemFuncsSeq9[2][2];
+ // "[code in main ram]
+ void* MemFuncsSeq7[2][2];
+
+ void* JumpToFuncs9[3];
+ void* JumpToFuncs7[3];
+
+ RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
+
+ bool CPSRDirty = false;
+
+ bool IrregularCycles = false;
+
+#ifdef __SWITCH__
+ void* JitRWBase;
+ void* JitRWStart;
+ void* JitRXStart;
+#endif
+};
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
new file mode 100644
index 0000000..a5d0e3f
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
@@ -0,0 +1,848 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../Config.h"
+
+using namespace Arm64Gen;
+
+namespace ARMJIT
+{
+
+// W0 - address
+// (if store) W1 - value to store
+// W2 - code cycles
+void* Compiler::Gen_MemoryRoutine9(int size, bool store)
+{
+ AlignCode16();
+ void* res = GetRXPtr();
+
+ u32 addressMask;
+ switch (size)
+ {
+ case 32: addressMask = ~3; break;
+ case 16: addressMask = ~1; break;
+ case 8: addressMask = ~0; break;
+ }
+
+ LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
+ LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
+ SUB(W3, W0, W3);
+ CMP(W3, W4);
+ FixupBranch insideDTCM = B(CC_LO);
+
+ UBFX(W4, W0, 24, 8);
+ CMP(W4, 0x02);
+ FixupBranch outsideMainRAM = B(CC_NEQ);
+ ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
+ MOVP2R(X4, NDS::MainRAM);
+ if (!store && size == 32)
+ {
+ LDR(W3, X3, X4);
+ ANDI2R(W0, W0, 3);
+ LSL(W0, W0, 3);
+ RORV(W0, W3, W0);
+ }
+ else if (store)
+ STRGeneric(size, W1, X3, X4);
+ else
+ LDRGeneric(size, false, W0, X3, X4);
+ RET();
+
+ SetJumpTarget(outsideMainRAM);
+
+ LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
+ CMP(W0, W3);
+ FixupBranch insideITCM = B(CC_LO);
+
+ if (store)
+ {
+ if (size > 8)
+ ANDI2R(W0, W0, addressMask);
+
+ switch (size)
+ {
+ case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
+ case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
+ case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
+ }
+ }
+ else
+ {
+ if (size == 32)
+ ABI_PushRegisters({0, 30});
+ if (size > 8)
+ ANDI2R(W0, W0, addressMask);
+
+ switch (size)
+ {
+ case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
+ case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
+ case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
+ }
+ if (size == 32)
+ {
+ ABI_PopRegisters({1, 30});
+ ANDI2R(W1, W1, 3);
+ LSL(W1, W1, 3);
+ RORV(W0, W0, W1);
+ RET();
+ }
+ }
+
+ SetJumpTarget(insideDTCM);
+ ANDI2R(W3, W3, 0x3FFF & addressMask);
+ ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
+ if (!store && size == 32)
+ {
+ ANDI2R(W4, W0, 3);
+ LDR(W0, RCPU, W3);
+ LSL(W4, W4, 3);
+ RORV(W0, W0, W4);
+ }
+ else if (store)
+ STRGeneric(size, W1, RCPU, W3);
+ else
+ LDRGeneric(size, false, W0, RCPU, W3);
+
+ RET();
+
+ SetJumpTarget(insideITCM);
+ ANDI2R(W3, W0, 0x7FFF & addressMask);
+ if (store)
+ {
+ LSR(W0, W3, 8);
+ ADDI2R(W0, W0, ExeMemRegionOffsets[exeMem_ITCM], W4);
+ MOVP2R(X4, CodeRanges);
+ ADD(X4, X4, X0, ArithOption(X0, ST_LSL, 4));
+ static_assert(sizeof(AddressRange) == 16);
+ LDR(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
+ FixupBranch null = CBZ(W4);
+ ABI_PushRegisters({1, 3, 30});
+ QuickCallFunction(X4, InvalidateByAddr);
+ ABI_PopRegisters({1, 3, 30});
+ SetJumpTarget(null);
+ }
+ ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
+ if (!store && size == 32)
+ {
+ ANDI2R(W4, W0, 3);
+ LDR(W0, RCPU, W3);
+ LSL(W4, W4, 3);
+ RORV(W0, W0, W4);
+ }
+ else if (store)
+ STRGeneric(size, W1, RCPU, W3);
+ else
+ LDRGeneric(size, false, W0, RCPU, W3);
+ RET();
+
+ return res;
+}
+
+/*
+ W0 - base address
+ X1 - stack space
+ W2 - values count
+*/
+void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
+{
+ AlignCode16();
+ void* res = GetRXPtr();
+
+ void* loopStart = GetRXPtr();
+ SUB(W2, W2, 1);
+
+ if (preinc)
+ ADD(W0, W0, 4);
+
+ LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
+ LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
+ SUB(W4, W0, W4);
+ CMP(W4, W5);
+ FixupBranch insideDTCM = B(CC_LO);
+
+ LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
+ CMP(W0, W4);
+ FixupBranch insideITCM = B(CC_LO);
+
+ ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
+ if (store)
+ {
+ LDR(X1, X1, ArithOption(X2, true));
+ QuickCallFunction(X4, NDS::ARM9Write32);
+
+ ABI_PopRegisters({0, 1, 2, 30});
+ }
+ else
+ {
+ QuickCallFunction(X4, NDS::ARM9Read32);
+ MOV(W4, W0);
+
+ ABI_PopRegisters({0, 1, 2, 30});
+
+ STR(X4, X1, ArithOption(X2, true));
+ }
+
+ if (!preinc)
+ ADD(W0, W0, 4);
+ CBNZ(W2, loopStart);
+ RET();
+
+ SetJumpTarget(insideDTCM);
+
+ ANDI2R(W4, W4, ~3 & 0x3FFF);
+ ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
+ if (store)
+ {
+ LDR(X5, X1, ArithOption(X2, true));
+ STR(W5, RCPU, X4);
+ }
+ else
+ {
+ LDR(W5, RCPU, X4);
+ STR(X5, X1, ArithOption(X2, true));
+ }
+
+ if (!preinc)
+ ADD(W0, W0, 4);
+ CBNZ(W2, loopStart);
+ RET();
+
+ SetJumpTarget(insideITCM);
+
+ ANDI2R(W4, W0, ~3 & 0x7FFF);
+
+ if (store)
+ {
+ LSR(W6, W4, 8);
+ ADDI2R(W6, W6, ExeMemRegionOffsets[exeMem_ITCM], W5);
+ MOVP2R(X5, CodeRanges);
+ ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
+ static_assert(sizeof(AddressRange) == 16);
+ LDR(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
+ FixupBranch null = CBZ(W5);
+ ABI_PushRegisters({0, 1, 2, 4, 30});
+ MOV(W0, W6);
+ QuickCallFunction(X5, InvalidateByAddr);
+ ABI_PopRegisters({0, 1, 2, 4, 30});
+ SetJumpTarget(null);
+ }
+
+ ADDI2R(W4, W4, offsetof(ARMv5, ITCM), W5);
+ if (store)
+ {
+ LDR(X5, X1, ArithOption(X2, true));
+ STR(W5, RCPU, X4);
+ }
+ else
+ {
+ LDR(W5, RCPU, X4);
+ STR(X5, X1, ArithOption(X2, true));
+ }
+
+ if (!preinc)
+ ADD(W0, W0, 4);
+ CBNZ(W2, loopStart);
+ RET();
+ return res;
+}
+
+void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
+{
+ AlignCode16();
+ void* res = GetRXPtr();
+
+ void* loopStart = GetRXPtr();
+ SUB(W2, W2, 1);
+
+ if (preinc)
+ ADD(W0, W0, 4);
+
+ ABI_PushRegisters({0, 1, 2, 30});
+ if (store)
+ {
+ LDR(X1, X1, ArithOption(X2, true));
+ QuickCallFunction(X4, NDS::ARM7Write32);
+ ABI_PopRegisters({0, 1, 2, 30});
+ }
+ else
+ {
+ QuickCallFunction(X4, NDS::ARM7Read32);
+ MOV(W4, W0);
+ ABI_PopRegisters({0, 1, 2, 30});
+ STR(X4, X1, ArithOption(X2, true));
+ }
+
+ if (!preinc)
+ ADD(W0, W0, 4);
+ CBNZ(W2, loopStart);
+ RET();
+
+ return res;
+}
+
+void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
+{
+ u32 val;
+ // make sure arm7 bios is accessible
+ u32 tmpR15 = CurCPU->R[15];
+ CurCPU->R[15] = R15;
+ if (size == 32)
+ {
+ CurCPU->DataRead32(addr & ~0x3, &val);
+ val = ROR(val, (addr & 0x3) << 3);
+ }
+ else if (size == 16)
+ {
+ CurCPU->DataRead16(addr & ~0x1, &val);
+ if (signExtend)
+ val = ((s32)val << 16) >> 16;
+ }
+ else
+ {
+ CurCPU->DataRead8(addr, &val);
+ if (signExtend)
+ val = ((s32)val << 24) >> 24;
+ }
+ CurCPU->R[15] = tmpR15;
+
+ MOVI2R(MapReg(rd), val);
+
+ if (Thumb || CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(rd, val);
+}
+
+void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
+{
+ u32 addressMask = ~0;
+ if (size == 32)
+ addressMask = ~3;
+ if (size == 16)
+ addressMask = ~1;
+
+ if (flags & memop_Store)
+ Comp_AddCycles_CD();
+ else
+ Comp_AddCycles_CDI();
+
+ if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
+ {
+ u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+ u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
+
+ if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
+ {
+ Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
+ return;
+ }
+ }
+
+ {
+ ARM64Reg rdMapped = MapReg(rd);
+ ARM64Reg rnMapped = MapReg(rn);
+
+ bool inlinePreparation = Num == 1;
+ u32 constLocalROR32 = 4;
+
+ void* memFunc = Num == 0
+ ? MemFunc9[size >> 4][!!(flags & memop_Store)]
+ : MemFunc7[size >> 4][!!((flags & memop_Store))];
+
+ if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
+ {
+ u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+
+ NDS::MemRegion region;
+ region.Mem = NULL;
+ if (Num == 0)
+ {
+ ARMv5* cpu5 = (ARMv5*)CurCPU;
+
+ // stupid dtcm...
+ if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
+ {
+ region.Mem = cpu5->DTCM;
+ region.Mask = 0x3FFF;
+ }
+ else
+ {
+ NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
+ }
+ }
+ else
+ NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
+
+ if (region.Mem != NULL)
+ {
+ void* ptr = &region.Mem[addr & addressMask & region.Mask];
+
+ MOVP2R(X0, ptr);
+ if (flags & memop_Store)
+ STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
+ else
+ {
+ LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
+ if (size == 32 && addr & ~0x3)
+ ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
+ }
+ return;
+ }
+
+ void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
+ if (specialFunc)
+ {
+ memFunc = specialFunc;
+ inlinePreparation = true;
+ constLocalROR32 = addr & 0x3;
+ }
+ }
+
+ ARM64Reg finalAddr = W0;
+ if (flags & memop_Post)
+ {
+ finalAddr = rnMapped;
+ MOV(W0, rnMapped);
+ }
+
+ if (flags & memop_Store)
+ MOV(W1, rdMapped);
+
+ if (!offset.IsImm)
+ Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
+ // offset might become an immediate
+ if (offset.IsImm)
+ {
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Imm);
+ else
+ ADD(finalAddr, rnMapped, offset.Imm);
+ }
+ else
+ {
+ if (offset.Reg.ShiftType == ST_ROR)
+ {
+ ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
+ offset = Op2(W0);
+ }
+
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ else
+ ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ }
+
+ if (!(flags & memop_Post) && (flags & memop_Writeback))
+ MOV(rnMapped, W0);
+
+ if (inlinePreparation)
+ {
+ if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
+ ANDI2R(rdMapped, W0, 3);
+ if (size > 8)
+ ANDI2R(W0, W0, addressMask);
+ }
+ QuickCallFunction(X2, memFunc);
+ if (!(flags & memop_Store))
+ {
+ if (inlinePreparation && !(flags & memop_Store) && size == 32)
+ {
+ if (constLocalROR32 == 4)
+ {
+ LSL(rdMapped, rdMapped, 3);
+ RORV(rdMapped, W0, rdMapped);
+ }
+ else if (constLocalROR32 > 0)
+ ROR_(rdMapped, W0, constLocalROR32 << 3);
+ else
+ MOV(rdMapped, W0);
+ }
+ else if (flags & memop_SignExtend)
+ {
+ if (size == 16)
+ SXTH(rdMapped, W0);
+ else if (size == 8)
+ SXTB(rdMapped, W0);
+ else
+ assert("What's wrong with you?");
+ }
+ else
+ MOV(rdMapped, W0);
+
+ if (CurInstr.Info.Branches())
+ {
+ if (size < 32)
+ printf("LDR size < 32 branching?\n");
+ Comp_JumpTo(rdMapped, Num == 0, false);
+ }
+ }
+ }
+}
+
+void Compiler::A_Comp_MemWB()
+{
+ Op2 offset;
+ if (CurInstr.Instr & (1 << 25))
+ offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
+ else
+ offset = Op2(CurInstr.Instr & 0xFFF);
+
+ bool load = CurInstr.Instr & (1 << 20);
+ bool byte = CurInstr.Instr & (1 << 22);
+
+ int flags = 0;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
+}
+
+void Compiler::A_Comp_MemHD()
+{
+ bool load = CurInstr.Instr & (1 << 20);
+ bool signExtend;
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ int size;
+
+ if (load)
+ {
+ signExtend = op >= 2;
+ size = op == 2 ? 8 : 16;
+ }
+ else
+ {
+ size = 16;
+ signExtend = false;
+ }
+
+ Op2 offset;
+ if (CurInstr.Instr & (1 << 22))
+ offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
+ else
+ offset = Op2(MapReg(CurInstr.A_Reg(0)));
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
+}
+
+void Compiler::T_Comp_MemReg()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op & 0x2;
+ bool byte = op & 0x1;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
+ Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_MemImm()
+{
+ int op = (CurInstr.Instr >> 11) & 0x3;
+ bool load = op & 0x1;
+ bool byte = op & 0x2;
+ u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
+ byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_MemRegHalf()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op != 0;
+ int size = op != 1 ? 16 : 8;
+ bool signExtend = op & 1;
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
+ size, flags);
+}
+
+void Compiler::T_Comp_MemImmHalf()
+{
+ u32 offset = (CurInstr.Instr >> 5) & 0x3E;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
+ load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_LoadPCRel()
+{
+ u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
+
+ if (Config::JIT_LiteralOptimisations)
+ {
+ Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
+ Comp_AddCycles_CDI();
+ }
+ else
+ {
+ bool negative = addr < R15;
+ u32 abs = negative ? R15 - addr : addr - R15;
+ Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
+ }
+}
+
+void Compiler::T_Comp_MemSPRel()
+{
+ u32 offset = (CurInstr.Instr & 0xFF) * 4;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
+}
+
+s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
+{
+ IrregularCycles = true;
+
+ int regsCount = regs.Count();
+
+ if (regsCount == 0)
+ return 0; // actually not the right behaviour TODO: fix me
+
+ SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
+ if (store)
+ {
+ Comp_AddCycles_CD();
+
+ if (usermode && (regs & BitSet16(0x7f00)))
+ UBFX(W0, RCPSR, 0, 5);
+
+ int i = regsCount - 1;
+
+ BitSet16::Iterator it = regs.begin();
+ while (it != regs.end())
+ {
+ BitSet16::Iterator nextReg = it;
+ nextReg++;
+
+ int reg = *it;
+
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ if (RegCache.Mapping[reg] != INVALID_REG)
+ MOV(W3, MapReg(reg));
+ else
+ LoadReg(reg, W3);
+ MOVI2R(W1, reg - 8);
+ BL(ReadBanked);
+ STR(INDEX_UNSIGNED, W3, SP, i * 8);
+ }
+ else if (!usermode && nextReg != regs.end())
+ {
+ ARM64Reg first = W3;
+ ARM64Reg second = W4;
+
+ if (RegCache.Mapping[reg] != INVALID_REG)
+ first = MapReg(reg);
+ else
+ LoadReg(reg, W3);
+
+ if (RegCache.Mapping[*nextReg] != INVALID_REG)
+ second = MapReg(*nextReg);
+ else
+ LoadReg(*nextReg, W4);
+
+ STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
+
+ i--;
+ it++;
+ }
+ else if (RegCache.Mapping[reg] != INVALID_REG)
+ STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
+ else
+ {
+ LoadReg(reg, W3);
+ STR(INDEX_UNSIGNED, W3, SP, i * 8);
+ }
+ i--;
+ it++;
+ }
+ }
+ if (decrement)
+ {
+ SUB(W0, MapReg(rn), regsCount * 4);
+ preinc ^= true;
+ }
+ else
+ MOV(W0, MapReg(rn));
+ ADD(X1, SP, 0);
+ MOVI2R(W2, regsCount);
+
+ BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
+
+ if (!store)
+ {
+ Comp_AddCycles_CDI();
+
+ if (usermode && (regs & BitSet16(0x7f00)))
+ UBFX(W0, RCPSR, 0, 5);
+
+ int i = regsCount - 1;
+ BitSet16::Iterator it = regs.begin();
+ while (it != regs.end())
+ {
+ BitSet16::Iterator nextReg = it;
+ nextReg++;
+
+ int reg = *it;
+
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ LDR(INDEX_UNSIGNED, W3, SP, i * 8);
+ MOVI2R(W1, reg - 8);
+ BL(WriteBanked);
+ FixupBranch alreadyWritten = CBNZ(W4);
+ if (RegCache.Mapping[reg] != INVALID_REG)
+ {
+ MOV(MapReg(reg), W3);
+ RegCache.DirtyRegs |= 1 << reg;
+ }
+ else
+ SaveReg(reg, W3);
+ SetJumpTarget(alreadyWritten);
+ }
+ else if (!usermode && nextReg != regs.end())
+ {
+ ARM64Reg first = W3, second = W4;
+
+ if (RegCache.Mapping[reg] != INVALID_REG)
+ {
+ first = MapReg(reg);
+ if (reg != 15)
+ RegCache.DirtyRegs |= 1 << reg;
+ }
+ if (RegCache.Mapping[*nextReg] != INVALID_REG)
+ {
+ second = MapReg(*nextReg);
+ if (*nextReg != 15)
+ RegCache.DirtyRegs |= 1 << *nextReg;
+ }
+
+ LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
+
+ if (first == W3)
+ SaveReg(reg, W3);
+ if (second == W4)
+ SaveReg(*nextReg, W4);
+
+ it++;
+ i--;
+ }
+ else if (RegCache.Mapping[reg] != INVALID_REG)
+ {
+ ARM64Reg mapped = MapReg(reg);
+ LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
+
+ if (reg != 15)
+ RegCache.DirtyRegs |= 1 << reg;
+ }
+ else
+ {
+ LDR(INDEX_UNSIGNED, W3, SP, i * 8);
+ SaveReg(reg, W3);
+ }
+
+ it++;
+ i--;
+ }
+ }
+ ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
+
+ if (!store && regs[15])
+ {
+ ARM64Reg mapped = MapReg(15);
+ Comp_JumpTo(mapped, Num == 0, usermode);
+ }
+
+ return regsCount * 4 * (decrement ? -1 : 1);
+}
+
+void Compiler::A_Comp_LDM_STM()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFFFF);
+
+ bool load = CurInstr.Instr & (1 << 20);
+ bool pre = CurInstr.Instr & (1 << 24);
+ bool add = CurInstr.Instr & (1 << 23);
+ bool writeback = CurInstr.Instr & (1 << 21);
+ bool usermode = CurInstr.Instr & (1 << 22);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
+
+ if (load && writeback && regs[CurInstr.A_Reg(16)])
+ writeback = Num == 0
+ ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
+ : false;
+ if (writeback)
+ {
+ if (offset > 0)
+ ADD(rn, rn, offset);
+ else
+ SUB(rn, rn, -offset);
+ }
+}
+
+void Compiler::T_Comp_PUSH_POP()
+{
+ bool load = CurInstr.Instr & (1 << 11);
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ if (CurInstr.Instr & (1 << 8))
+ {
+ if (load)
+ regs[15] = true;
+ else
+ regs[14] = true;
+ }
+
+ ARM64Reg sp = MapReg(13);
+ s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
+
+ if (offset > 0)
+ ADD(sp, sp, offset);
+ else
+ SUB(sp, sp, -offset);
+}
+
+void Compiler::T_Comp_LDMIA_STMIA()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
+ bool load = CurInstr.Instr & (1 << 11);
+ u32 regsCount = regs.Count();
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
+
+ if (!load || !regs[CurInstr.T_Reg(8)])
+ {
+ if (offset > 0)
+ ADD(rb, rb, offset);
+ else
+ SUB(rb, rb, -offset);
+ }
+}
+
+} \ No newline at end of file