aboutsummaryrefslogtreecommitdiff
path: root/src/ARMJIT_A64
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMJIT_A64')
-rw-r--r--src/ARMJIT_A64/ARMJIT_ALU.cpp943
-rw-r--r--src/ARMJIT_A64/ARMJIT_Branch.cpp421
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.cpp918
-rw-r--r--src/ARMJIT_A64/ARMJIT_Compiler.h269
-rw-r--r--src/ARMJIT_A64/ARMJIT_Linkage.s68
-rw-r--r--src/ARMJIT_A64/ARMJIT_LoadStore.cpp810
6 files changed, 3429 insertions, 0 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp
new file mode 100644
index 0000000..26a89cb
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp
@@ -0,0 +1,943 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Arm64Gen;
+
+namespace ARMJIT
+{
+
+void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
+{
+ if (!(CurInstr.SetFlags & 0x2))
+ S = false;
+
+ CPSRDirty |= S;
+
+ UBFX(W1, rs, 0, 8);
+
+ if (!S)
+ {
+ if (op == 3)
+ RORV(W0, op2.Reg.Rm, W1);
+ else
+ {
+ CMP(W1, 32);
+ if (op == 2)
+ {
+ MOVI2R(W2, 31);
+ CSEL(W1, W2, W1, CC_GE);
+ ASRV(W0, op2.Reg.Rm, W1);
+ }
+ else
+ {
+ if (op == 0)
+ LSLV(W0, op2.Reg.Rm, W1);
+ else if (op == 1)
+ LSRV(W0, op2.Reg.Rm, W1);
+ CSEL(W0, WZR, W0, CC_GE);
+ }
+ }
+ }
+ else
+ {
+ MOV(W0, op2.Reg.Rm);
+ FixupBranch zero = CBZ(W1);
+
+ SUB(W1, W1, 1);
+ if (op == 3)
+ {
+ RORV(W0, op2.Reg.Rm, W1);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ else
+ {
+ CMP(W1, 31);
+ if (op == 2)
+ {
+ MOVI2R(W2, 31);
+ CSEL(W1, W2, W1, CC_GT);
+ ASRV(W0, op2.Reg.Rm, W1);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ else
+ {
+ if (op == 0)
+ {
+ LSLV(W0, op2.Reg.Rm, W1);
+ UBFX(W1, W0, 31, 1);
+ }
+ else if (op == 1)
+ LSRV(W0, op2.Reg.Rm, W1);
+ CSEL(W1, WZR, op ? W0 : W1, CC_GT);
+ BFI(RCPSR, W1, 29, 1);
+ CSEL(W0, WZR, W0, CC_GE);
+ }
+ }
+
+ MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
+ SetJumpTarget(zero);
+ }
+ op2 = Op2(W0, ST_LSL, 0);
+}
+
+void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
+{
+ if (!(CurInstr.SetFlags & 0x2))
+ S = false;
+
+ CPSRDirty |= S;
+
+ switch (op)
+ {
+ case 0: // LSL
+ if (S && amount)
+ {
+ UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
+ return;
+ case 1: // LSR
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ if (amount == 0)
+ {
+ op2 = Op2(0);
+ return;
+ }
+ op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
+ return;
+ case 2: // ASR
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
+ return;
+ case 3: // ROR
+ if (amount == 0)
+ {
+ UBFX(tmp, RCPSR, 29, 1);
+ LSL(tmp, tmp, 31);
+ if (S)
+ BFI(RCPSR, op2.Reg.Rm, 29, 1);
+ ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
+
+ op2 = Op2(tmp, ST_LSL, 0);
+ }
+ else
+ {
+ if (S)
+ {
+ UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
+ BFI(RCPSR, tmp, 29, 1);
+ }
+ op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
+ }
+ return;
+ }
+}
+
+void Compiler::Comp_RetriveFlags(bool retriveCV)
+{
+ if (CurInstr.SetFlags)
+ CPSRDirty = true;
+
+ if (CurInstr.SetFlags & 0x4)
+ {
+ CSET(W0, CC_EQ);
+ BFI(RCPSR, W0, 30, 1);
+ }
+ if (CurInstr.SetFlags & 0x8)
+ {
+ CSET(W0, CC_MI);
+ BFI(RCPSR, W0, 31, 1);
+ }
+ if (retriveCV)
+ {
+ if (CurInstr.SetFlags & 0x2)
+ {
+ CSET(W0, CC_CS);
+ BFI(RCPSR, W0, 29, 1);
+ }
+ if (CurInstr.SetFlags & 0x1)
+ {
+ CSET(W0, CC_VS);
+ BFI(RCPSR, W0, 28, 1);
+ }
+ }
+}
+
+void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
+{
+ if (S && !CurInstr.SetFlags)
+ S = false;
+
+ switch (op)
+ {
+ case 0x0: // AND
+ if (S)
+ {
+ if (op2.IsImm)
+ ANDSI2R(rd, rn, op2.Imm, W0);
+ else
+ ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ANDI2R(rd, rn, op2.Imm, W0);
+ else
+ AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x1: // EOR
+ if (op2.IsImm)
+ EORI2R(rd, rn, op2.Imm, W0);
+ else
+ EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ if (S && FlagsNZNeeded())
+ TST(rd, rd);
+ break;
+ case 0xC: // ORR
+ if (op2.IsImm)
+ ORRI2R(rd, rn, op2.Imm, W0);
+ else
+ ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ if (S && FlagsNZNeeded())
+ TST(rd, rd);
+ break;
+ case 0xE: // BIC
+ if (S)
+ {
+ if (op2.IsImm)
+ ANDSI2R(rd, rn, ~op2.Imm, W0);
+ else
+ BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ANDI2R(rd, rn, ~op2.Imm, W0);
+ else
+ BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ }
+
+ if (S)
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
+{
+ if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
+ {
+ MOV(W0, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W0, ST_LSL, 0);
+ }
+
+ if (S && !CurInstr.SetFlags)
+ S = false;
+
+ bool CVInGPR = false;
+ switch (op)
+ {
+ case 0x2: // SUB
+ if (S)
+ {
+ if (op2.IsImm)
+ SUBSI2R(rd, rn, op2.Imm, W0);
+ else
+ SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ {
+ MOVI2R(W2, op2.Imm);
+ SUBI2R(rd, rn, op2.Imm, W0);
+ }
+ else
+ SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x3: // RSB
+ if (op2.IsZero())
+ {
+ op2 = Op2(WZR);
+ }
+ else if (op2.IsImm)
+ {
+ MOVI2R(W1, op2.Imm);
+ op2 = Op2(W1);
+ }
+ else if (op2.Reg.ShiftAmount != 0)
+ {
+ MOV(W1, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W1);
+ }
+
+ if (S)
+ SUBS(rd, op2.Reg.Rm, rn);
+ else
+ SUB(rd, op2.Reg.Rm, rn);
+ break;
+ case 0x4: // ADD
+ if (S)
+ {
+ if (op2.IsImm)
+ ADDSI2R(rd, rn, op2.Imm, W0);
+ else
+ ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else
+ {
+ if (op2.IsImm)
+ ADDI2R(rd, rn, op2.Imm, W0);
+ else
+ ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x5: // ADC
+ UBFX(W2, RCPSR, 29, 1);
+ if (S)
+ {
+ CVInGPR = true;
+ ADDS(W1, rn, W2);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ if (op2.IsImm)
+ ADDSI2R(rd, W1, op2.Imm, W0);
+ else
+ ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, rn, W2);
+ if (op2.IsImm)
+ ADDI2R(rd, W1, op2.Imm, W0);
+ else
+ ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ case 0x6: // SBC
+ UBFX(W2, RCPSR, 29, 1);
+ // W1 = -op2 - 1
+ if (op2.IsImm)
+ MOVI2R(W1, ~op2.Imm);
+ else
+ ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
+ if (S)
+ {
+ CVInGPR = true;
+ ADDS(W1, W2, W1);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ ADDS(rd, rn, W1);
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, W2, W1);
+ ADD(rd, rn, W1);
+ }
+ break;
+ case 0x7: // RSC
+ UBFX(W2, RCPSR, 29, 1);
+ // W1 = -rn - 1
+ MVN(W1, rn);
+ if (S)
+ {
+ CVInGPR = true;
+ ADDS(W1, W2, W1);
+ CSET(W2, CC_CS);
+ CSET(W3, CC_VS);
+ if (op2.IsImm)
+ ADDSI2R(rd, W1, op2.Imm);
+ else
+ ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ CSINC(W2, W2, WZR, CC_CC);
+ CSINC(W3, W3, WZR, CC_VC);
+ }
+ else
+ {
+ ADD(W1, W2, W1);
+ if (op2.IsImm)
+ ADDI2R(rd, W1, op2.Imm);
+ else
+ ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
+ }
+ break;
+ }
+
+ if (S)
+ {
+ if (CVInGPR)
+ {
+ BFI(RCPSR, W2, 29, 1);
+ BFI(RCPSR, W3, 28, 1);
+ }
+ Comp_RetriveFlags(!CVInGPR);
+ }
+}
+
+void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
+{
+ if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
+ {
+ MOV(W0, op2.Reg.Rm, op2.ToArithOption());
+ op2 = Op2(W0, ST_LSL, 0);
+ }
+
+ switch (op)
+ {
+ case 0x8: // TST
+ if (op2.IsImm)
+ TSTI2R(rn, op2.Imm, W0);
+ else
+ ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ case 0x9: // TEQ
+ if (op2.IsImm)
+ EORI2R(W0, rn, op2.Imm, W0);
+ else
+ EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
+ TST(W0, W0);
+ break;
+ case 0xA: // CMP
+ if (op2.IsImm)
+ CMPI2R(rn, op2.Imm, W0);
+ else
+ CMP(rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ case 0xB: // CMN
+ if (op2.IsImm)
+ ADDSI2R(WZR, rn, op2.Imm, W0);
+ else
+ CMN(rn, op2.Reg.Rm, op2.ToArithOption());
+ break;
+ }
+
+ Comp_RetriveFlags(op >= 0xA);
+}
+
+// also counts cycles!
+void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
+{
+ if (CurInstr.Instr & (1 << 25))
+ {
+ Comp_AddCycles_C();
+
+ u32 shift = (CurInstr.Instr >> 7) & 0x1E;
+ u32 imm = ROR(CurInstr.Instr & 0xFF, shift);
+
+ if (S && shift && (CurInstr.SetFlags & 0x2))
+ {
+ CPSRDirty = true;
+ if (imm & 0x80000000)
+ ORRI2R(RCPSR, RCPSR, 1 << 29);
+ else
+ ANDI2R(RCPSR, RCPSR, ~(1 << 29));
+ }
+
+ op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
+ }
+ else
+ {
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
+ if (CurInstr.Instr & (1 << 4))
+ {
+ Comp_AddCycles_CI(1);
+
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ if (CurInstr.A_Reg(0) == 15)
+ {
+ ADD(W0, op2.Reg.Rm, 4);
+ op2.Reg.Rm = W0;
+ }
+ Comp_RegShiftReg(op, S, op2, rs);
+ }
+ else
+ {
+ Comp_AddCycles_C();
+
+ int amount = (CurInstr.Instr >> 7) & 0x1F;
+ Comp_RegShiftImm(op, amount, S, op2);
+ }
+ }
+}
+
+void Compiler::A_Comp_ALUCmpOp()
+{
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+ Op2 op2;
+ A_Comp_GetOp2(op <= 0x9, op2);
+
+ Comp_Compare(op, rn, op2);
+}
+
+void Compiler::A_Comp_ALUMovOp()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ Op2 op2;
+ A_Comp_GetOp2(S, op2);
+
+ if (op == 0xF) // MVN
+ {
+ if (op2.IsImm)
+ {
+ if (CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
+ MOVI2R(rd, ~op2.Imm);
+ }
+ else
+ ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
+ }
+ else // MOV
+ {
+ if (op2.IsImm)
+ {
+ if (CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
+ MOVI2R(rd, op2.Imm);
+ }
+ else
+ {
+ // ORR with shifted operand has cycles latency
+ if (op2.Reg.ShiftAmount > 0)
+ {
+ switch (op2.Reg.ShiftType)
+ {
+ case ST_LSL: LSL(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_LSR: LSR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_ASR: ASR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ case ST_ROR: ROR_(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
+ }
+ }
+ else
+ {
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ }
+ }
+ }
+
+ if (S)
+ {
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+
+ if (CurInstr.Info.Branches())
+ Comp_JumpTo(rd, true, S);
+}
+
+void Compiler::A_Comp_ALUTriOp()
+{
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+ bool logical = (1 << op) & 0xF303;
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+ Op2 op2;
+ A_Comp_GetOp2(S && logical, op2);
+
+ if (op2.IsImm && op2.Imm == 0)
+ op2 = Op2(WZR, ST_LSL, 0);
+
+ if (logical)
+ Comp_Logical(op, S, rd, rn, op2);
+ else
+ Comp_Arithmetic(op, S, rd, rn, op2);
+
+ if (CurInstr.Info.Branches())
+ Comp_JumpTo(rd, true, S);
+}
+
+void Compiler::A_Comp_Clz()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+
+ CLZ(rd, rm);
+
+ assert(Num == 0);
+}
+
+void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
+{
+ if (Num == 0)
+ {
+ Comp_AddCycles_CI(S ? 3 : 1);
+ }
+ else
+ {
+ CLS(W0, rs);
+ Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
+ }
+
+ if (mla)
+ MADD(rd, rm, rs, rn);
+ else
+ MUL(rd, rm, rs);
+
+ if (S && FlagsNZNeeded())
+ {
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+}
+
+void Compiler::A_Comp_Mul_Long()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+
+ bool S = CurInstr.Instr & (1 << 20);
+ bool add = CurInstr.Instr & (1 << 21);
+ bool sign = CurInstr.Instr & (1 << 22);
+
+ if (Num == 0)
+ {
+ Comp_AddCycles_CI(S ? 3 : 1);
+ }
+ else
+ {
+ if (sign)
+ CLS(W0, rs);
+ else
+ CLZ(W0, rs);
+ Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
+ }
+
+ if (add)
+ {
+ MOV(W0, rn);
+ BFI(X0, EncodeRegTo64(rd), 32, 32);
+ if (sign)
+ SMADDL(EncodeRegTo64(rn), rm, rs, X0);
+ else
+ UMADDL(EncodeRegTo64(rn), rm, rs, X0);
+ if (S && FlagsNZNeeded())
+ TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+ }
+ else
+ {
+ if (sign)
+ SMULL(EncodeRegTo64(rn), rm, rs);
+ else
+ UMULL(EncodeRegTo64(rn), rm, rs);
+ if (S && FlagsNZNeeded())
+ TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+ }
+
+ if (S)
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::A_Comp_Mul_Short()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
+
+ bool x = CurInstr.Instr & (1 << 5);
+ bool y = CurInstr.Instr & (1 << 6);
+
+ SBFX(W1, rs, y ? 16 : 0, 16);
+
+ if (op == 0b1000)
+ {
+ // SMLAxy
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ MUL(W0, W0, W1);
+
+ ORRI2R(W1, RCPSR, 0x08000000);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+ ADDS(rd, W0, rn);
+
+ CSEL(RCPSR, W1, RCPSR, CC_VS);
+
+ CPSRDirty = true;
+
+ Comp_AddCycles_C();
+ }
+ else if (op == 0b1011)
+ {
+ // SMULxy
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ MUL(rd, W0, W1);
+
+ Comp_AddCycles_C();
+ }
+ else if (op == 0b1010)
+ {
+ // SMLALxy
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+
+ MOV(W2, rn);
+ BFI(X2, rd, 32, 32);
+
+ SBFX(W0, rm, x ? 16 : 0, 16);
+
+ SMADDL(EncodeRegTo64(rn), W0, W1, X2);
+
+ UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
+
+ Comp_AddCycles_CI(1);
+ }
+ else if (op == 0b1001)
+ {
+ // SMLAWy/SMULWy
+ SMULL(X0, rm, W1);
+ ASR(x ? EncodeRegTo64(rd) : X0, X0, 16);
+
+ if (!x)
+ {
+ ORRI2R(W1, RCPSR, 0x08000000);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
+ ADDS(rd, W0, rn);
+
+ CSEL(RCPSR, W1, RCPSR, CC_VS);
+
+ CPSRDirty = true;
+ }
+
+ Comp_AddCycles_C();
+ }
+}
+
+void Compiler::A_Comp_Mul()
+{
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
+ ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
+
+ bool S = CurInstr.Instr & (1 << 20);
+ bool mla = CurInstr.Instr & (1 << 21);
+ ARM64Reg rn = INVALID_REG;
+ if (mla)
+ rn = MapReg(CurInstr.A_Reg(12));
+
+ Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
+}
+
+void Compiler::T_Comp_ShiftImm()
+{
+ Comp_AddCycles_C();
+
+ u32 op = (CurInstr.Instr >> 11) & 0x3;
+ int amount = (CurInstr.Instr >> 6) & 0x1F;
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
+ Op2 op2;
+ op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
+ Comp_RegShiftImm(op, amount, true, op2);
+ if (op2.IsImm)
+ MOVI2R(rd, op2.Imm);
+ else
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+
+ Comp_RetriveFlags(false);
+}
+
+void Compiler::T_Comp_AddSub_()
+{
+ Comp_AddCycles_C();
+
+ Op2 op2;
+ if (CurInstr.Instr & (1 << 10))
+ op2 = Op2((CurInstr.Instr >> 6) & 0x7);
+ else
+ op2 = Op2(MapReg(CurInstr.T_Reg(6)));
+
+ Comp_Arithmetic(
+ CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
+ true,
+ MapReg(CurInstr.T_Reg(0)),
+ MapReg(CurInstr.T_Reg(3)),
+ op2);
+}
+
+void Compiler::T_Comp_ALUImm8()
+{
+ Comp_AddCycles_C();
+
+ u32 imm = CurInstr.Instr & 0xFF;
+ int op = (CurInstr.Instr >> 11) & 0x3;
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
+
+ switch (op)
+ {
+ case 0:
+ MOVI2R(rd, imm);
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ break;
+ case 1:
+ Comp_Compare(0xA, rd, Op2(imm));
+ break;
+ case 2:
+ case 3:
+ Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
+ break;
+ }
+}
+
+void Compiler::T_Comp_ALU()
+{
+ int op = (CurInstr.Instr >> 6) & 0xF;
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
+ ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
+
+ if ((op >= 0x2 && op <= 0x4) || op == 0x7)
+ Comp_AddCycles_CI(1);
+ else
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0x0:
+ Comp_Logical(0x0, true, rd, rd, Op2(rs));
+ break;
+ case 0x1:
+ Comp_Logical(0x1, true, rd, rd, Op2(rs));
+ break;
+ case 0x2:
+ case 0x3:
+ case 0x4:
+ case 0x7:
+ {
+ Op2 op2;
+ op2.Reg.Rm = rd;
+ Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
+ MOV(rd, op2.Reg.Rm, op2.ToArithOption());
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ }
+ break;
+ case 0x5:
+ Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
+ break;
+ case 0x6:
+ Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
+ break;
+ case 0x8:
+ Comp_Compare(0x8, rd, Op2(rs));
+ break;
+ case 0x9:
+ Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
+ break;
+ case 0xA:
+ Comp_Compare(0xA, rd, Op2(rs));
+ break;
+ case 0xB:
+ Comp_Compare(0xB, rd, Op2(rs));
+ break;
+ case 0xC:
+ Comp_Logical(0xC, true, rd, rd, Op2(rs));
+ break;
+ case 0xD:
+ Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
+ break;
+ case 0xE:
+ Comp_Logical(0xE, true, rd, rd, Op2(rs));
+ break;
+ case 0xF:
+ MVN(rd, rs);
+ if (FlagsNZNeeded())
+ TST(rd, rd);
+ Comp_RetriveFlags(false);
+ break;
+ }
+}
+
+void Compiler::T_Comp_ALU_HiReg()
+{
+ u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
+ ARM64Reg rdMapped = MapReg(rd);
+ ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
+
+ u32 op = (CurInstr.Instr >> 8) & 0x3;
+
+ Comp_AddCycles_C();
+
+ switch (op)
+ {
+ case 0:
+ Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
+ break;
+ case 1:
+ Comp_Compare(0xA, rdMapped, rs);
+ return;
+ case 2:
+ MOV(rdMapped, rs);
+ break;
+ }
+
+ if (rd == 15)
+ {
+ Comp_JumpTo(rdMapped, false, false);
+ }
+}
+
+void Compiler::T_Comp_AddSP()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg sp = MapReg(13);
+ u32 offset = (CurInstr.Instr & 0x7F) << 2;
+ if (CurInstr.Instr & (1 << 7))
+ SUB(sp, sp, offset);
+ else
+ ADD(sp, sp, offset);
+}
+
+void Compiler::T_Comp_RelAddr()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ if (CurInstr.Instr & (1 << 11))
+ {
+ ARM64Reg sp = MapReg(13);
+ ADD(rd, sp, offset);
+ }
+ else
+ MOVI2R(rd, (R15 & ~2) + offset);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp
new file mode 100644
index 0000000..f130938
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp
@@ -0,0 +1,421 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Arm64Gen;
+
+// hack
+const int kCodeCacheTiming = 3;
+
+namespace ARMJIT
+{
+
+template <typename T>
+void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
+{
+ cpu->JumpTo(addr, changeCPSR);
+}
+
+void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
+{
+ // we can simplify constant branches by a lot
+ // it's not completely safe to assume stuff like, which instructions to preload
+ // we'll see how it works out
+
+ IrregularCycles = true;
+
+ u32 newPC;
+ u32 cycles = 0;
+ bool setupRegion = false;
+
+ if (addr & 0x1 && !Thumb)
+ {
+ CPSRDirty = true;
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+ else if (!(addr & 0x1) && Thumb)
+ {
+ CPSRDirty = true;
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+ }
+
+ if (Num == 0)
+ {
+ ARMv5* cpu9 = (ARMv5*)CurCPU;
+
+ u32 oldregion = R15 >> 24;
+ u32 newregion = addr >> 24;
+
+ u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
+ u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
+ cpu9->RegionCodeCycles = regionCodeCycles;
+
+ MOVI2R(W0, regionCodeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
+
+ setupRegion = newregion != oldregion;
+ if (setupRegion)
+ cpu9->SetupCodeMem(addr);
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // two-opcodes-at-once fetch
+ // doesn't matter if we put garbage in the MSbs there
+ if (addr & 0x2)
+ {
+ cpu9->CodeRead32(addr-2, true) >> 16;
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+2, false);
+ cycles += CurCPU->CodeCycles;
+ }
+ else
+ {
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ }
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ cpu9->CodeRead32(addr+4, false);
+ cycles += cpu9->CodeCycles;
+ }
+
+ cpu9->RegionCodeCycles = compileTimeCodeCycles;
+ if (setupRegion)
+ cpu9->SetupCodeMem(R15);
+ }
+ else
+ {
+ ARMv4* cpu7 = (ARMv4*)CurCPU;
+
+ u32 codeRegion = addr >> 24;
+ u32 codeCycles = addr >> 15; // cheato
+
+ cpu7->CodeRegion = codeRegion;
+ cpu7->CodeCycles = codeCycles;
+
+ MOVI2R(W0, codeRegion);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
+ MOVI2R(W0, codeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // this is necessary because ARM7 bios protection
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ u32 compileTimePC = CurCPU->R[15];
+ CurCPU->R[15] = newPC;
+
+ cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
+
+ CurCPU->R[15] = compileTimePC;
+ }
+
+ cpu7->CodeRegion = R15 >> 24;
+ cpu7->CodeCycles = addr >> 15;
+ }
+
+ if (Exit)
+ {
+ MOVI2R(W0, newPC);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
+ }
+ if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
+ ConstantCycles += cycles;
+ else
+ SUB(RCycles, RCycles, cycles);
+}
+
+
+void* Compiler::Gen_JumpTo9(int kind)
+{
+ AlignCode16();
+ void* res = GetRXPtr();
+
+ LSR(W1, W0, 12);
+ ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
+ LDRB(W1, RCPU, W1);
+
+ LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize));
+
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
+
+ CMP(W1, 0xFF);
+ MOVI2R(W3, kCodeCacheTiming);
+ CSEL(W1, W3, W1, CC_EQ);
+ CMP(W0, W2);
+ CSINC(W1, W1, WZR, CC_HS);
+
+ FixupBranch switchToThumb;
+ if (kind == 0)
+ switchToThumb = TBNZ(W0, 0);
+
+ if (kind == 0 || kind == 1)
+ {
+ // ARM
+ if (kind == 0)
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+
+ ANDI2R(W0, W0, ~3);
+ ADD(W0, W0, 4);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
+
+ ADD(W1, W1, W1);
+ SUB(RCycles, RCycles, W1);
+ RET();
+ }
+
+ if (kind == 0 || kind == 2)
+ {
+ // Thumb
+ if (kind == 0)
+ {
+ SetJumpTarget(switchToThumb);
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+
+ ANDI2R(W0, W0, ~1);
+ ADD(W0, W0, 2);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
+
+ ADD(W2, W1, W1);
+ TSTI2R(W0, 0x2);
+ CSEL(W1, W1, W2, CC_EQ);
+ SUB(RCycles, RCycles, W1);
+ RET();
+ }
+
+ return res;
+}
+
+void* Compiler::Gen_JumpTo7(int kind)
+{
+ void* res = GetRXPtr();
+
+ LSR(W1, W0, 24);
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
+ LSR(W1, W0, 15);
+ STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
+
+ MOVP2R(X2, NDS::ARM7MemTimings);
+ LDR(W3, X2, ArithOption(W1, true));
+
+ FixupBranch switchToThumb;
+ if (kind == 0)
+ switchToThumb = TBNZ(W0, 0);
+
+ if (kind == 0 || kind == 1)
+ {
+ UBFX(W2, W3, 0, 8);
+ UBFX(W3, W3, 8, 8);
+ ADD(W2, W3, W2);
+ SUB(RCycles, RCycles, W2);
+
+ ANDI2R(W0, W0, ~3);
+
+ if (kind == 0)
+ ANDI2R(RCPSR, RCPSR, ~0x20);
+
+ ADD(W3, W0, 4);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ RET();
+ }
+ if (kind == 0 || kind == 2)
+ {
+ if (kind == 0)
+ {
+ SetJumpTarget(switchToThumb);
+
+ ORRI2R(RCPSR, RCPSR, 0x20);
+ }
+
+ UBFX(W2, W3, 16, 8);
+ UBFX(W3, W3, 24, 8);
+ ADD(W2, W3, W2);
+ SUB(RCycles, RCycles, W2);
+
+ ANDI2R(W0, W0, ~1);
+
+ ADD(W3, W0, 2);
+ STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
+
+ RET();
+ }
+
+ return res;
+}
+
+void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
+{
+ IrregularCycles = true;
+
+ if (!restoreCPSR)
+ {
+ if (switchThumb)
+ CPSRDirty = true;
+ MOV(W0, addr);
+ BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
+ }
+ else
+ {
+
+ bool cpsrDirty = CPSRDirty;
+ SaveCPSR();
+ SaveCycles();
+ PushRegs(restoreCPSR);
+
+ if (switchThumb)
+ MOV(W1, addr);
+ else
+ {
+ if (Thumb)
+ ORRI2R(W1, addr, 1);
+ else
+ ANDI2R(W1, addr, ~1);
+ }
+ MOV(X0, RCPU);
+ MOVI2R(W2, restoreCPSR);
+ if (Num == 0)
+ QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
+ else
+ QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
+
+ PopRegs(restoreCPSR);
+ LoadCycles();
+ LoadCPSR();
+ if (CurInstr.Cond() < 0xE)
+ CPSRDirty = cpsrDirty;
+ }
+}
+
+void Compiler::A_Comp_BranchImm()
+{
+ int op = (CurInstr.Instr >> 24) & 1;
+ s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
+ u32 target = R15 + offset;
+ bool link = op;
+
+ if (CurInstr.Cond() == 0xF) // BLX_imm
+ {
+ target += (op << 1) + 1;
+ link = true;
+ }
+
+ if (link)
+ MOVI2R(MapReg(14), R15 - 4);
+
+ Comp_JumpTo(target);
+}
+
+void Compiler::A_Comp_BranchXchangeReg()
+{
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
+ MOV(W0, rn);
+ if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
+ MOVI2R(MapReg(14), R15 - 4);
+ Comp_JumpTo(W0, true);
+}
+
+void Compiler::T_Comp_BCOND()
+{
+ u32 cond = (CurInstr.Instr >> 8) & 0xF;
+ FixupBranch skipExecute = CheckCondition(cond);
+
+ s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
+ Comp_JumpTo(R15 + offset + 1, true);
+
+ Comp_BranchSpecialBehaviour(true);
+
+ FixupBranch skipFailed = B();
+ SetJumpTarget(skipExecute);
+ Comp_AddCycles_C(true);
+
+ Comp_BranchSpecialBehaviour(false);
+
+ SetJumpTarget(skipFailed);
+}
+
+void Compiler::T_Comp_B()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
+ Comp_JumpTo(R15 + offset + 1);
+}
+
+void Compiler::T_Comp_BranchXchangeReg()
+{
+ bool link = CurInstr.Instr & (1 << 7);
+
+ if (link)
+ {
+ if (Num == 1)
+ {
+ printf("BLX unsupported on ARM7!!!\n");
+ return;
+ }
+ MOV(W0, MapReg(CurInstr.A_Reg(3)));
+ MOVI2R(MapReg(14), R15 - 1);
+ Comp_JumpTo(W0, true);
+ }
+ else
+ {
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
+ Comp_JumpTo(rn, true);
+ }
+}
+
+void Compiler::T_Comp_BL_LONG_1()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
+ MOVI2R(MapReg(14), R15 + offset);
+ Comp_AddCycles_C();
+}
+
+void Compiler::T_Comp_BL_LONG_2()
+{
+ ARM64Reg lr = MapReg(14);
+ s32 offset = (CurInstr.Instr & 0x7FF) << 1;
+ ADD(W0, lr, offset);
+ MOVI2R(lr, (R15 - 2) | 1);
+ Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
+}
+
+void Compiler::T_Comp_BL_Merged()
+{
+ Comp_AddCycles_C();
+
+ R15 += 2;
+
+ u32 upperPart = CurInstr.Instr >> 16;
+ u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
+ target += (upperPart & 0x7FF) << 1;
+
+ if (Num == 1 || upperPart & (1 << 12))
+ target |= 1;
+
+ MOVI2R(MapReg(14), (R15 - 2) | 1);
+
+ Comp_JumpTo(target);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
new file mode 100644
index 0000000..413c673
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp
@@ -0,0 +1,918 @@
+#ifdef __SWITCH__
+#include "../switch/compat_switch.h"
+
+extern char __start__;
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
+
+#include "ARMJIT_Compiler.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMInterpreter.h"
+#include "../Config.h"
+
+#include <malloc.h>
+
+using namespace Arm64Gen;
+
+extern "C" void ARM_Ret();
+
+namespace ARMJIT
+{
+
+/*
+
+ Recompiling classic ARM to ARMv8 code is at the same time
+ easier and trickier than compiling to a less related architecture
+ like x64. At one hand you can translate a lot of instructions directly.
+ But at the same time, there are a ton of exceptions, like for
+ example ADD and SUB can't have a RORed second operand on ARMv8.
+
+ While writing a JIT when an instruction is recompiled into multiple ones
+ not to write back until you've read all the other operands!
+*/
+
+template <>
+const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
+ {W19, W20, W21, W22, W23, W24, W25, W26};
+template <>
+const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
+
+const int JitMemSize = 16 * 1024 * 1024;
+#ifndef __SWITCH__
+u8 JitMem[JitMemSize];
+#endif
+
+void Compiler::MovePC()
+{
+ ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
+}
+
+void Compiler::A_Comp_MRS()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+ MOV(rd, W3);
+ }
+ else
+ MOV(rd, RCPSR);
+}
+
+void Compiler::A_Comp_MSR()
+{
+ Comp_AddCycles_C();
+
+ ARM64Reg val;
+ if (CurInstr.Instr & (1 << 25))
+ {
+ val = W0;
+ MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
+ }
+ else
+ {
+ val = MapReg(CurInstr.A_Reg(0));
+ }
+
+ u32 mask = 0;
+ if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
+ if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
+ if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
+ if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
+
+ if (CurInstr.Instr & (1 << 22))
+ {
+ ANDI2R(W5, RCPSR, 0x1F);
+ MOVI2R(W3, 0);
+ MOVI2R(W1, 15 - 8);
+ BL(ReadBanked);
+
+ MOVI2R(W1, mask);
+ MOVI2R(W2, mask & 0xFFFFFF00);
+ ANDI2R(W5, RCPSR, 0x1F);
+ CMP(W5, 0x10);
+ CSEL(W1, W2, W1, CC_EQ);
+
+ BIC(W3, W3, W1);
+ AND(W0, val, W1);
+ ORR(W3, W3, W0);
+
+ MOVI2R(W1, 15 - 8);
+
+ BL(WriteBanked);
+ }
+ else
+ {
+ mask &= 0xFFFFFFDF;
+ CPSRDirty = true;
+
+ if ((mask & 0xFF) == 0)
+ {
+ ANDI2R(RCPSR, RCPSR, ~mask);
+ ANDI2R(W0, val, mask);
+ ORR(RCPSR, RCPSR, W0);
+ }
+ else
+ {
+ MOVI2R(W2, mask);
+ MOVI2R(W3, mask & 0xFFFFFF00);
+ ANDI2R(W1, RCPSR, 0x1F);
+ // W1 = first argument
+ CMP(W1, 0x10);
+ CSEL(W2, W3, W2, CC_EQ);
+
+ BIC(RCPSR, RCPSR, W2);
+ AND(W0, val, W2);
+ ORR(RCPSR, RCPSR, W0);
+
+ MOV(W2, RCPSR);
+ MOV(X0, RCPU);
+
+ PushRegs(true);
+
+ QuickCallFunction(X3, (void*)&ARM::UpdateMode);
+
+ PopRegs(true);
+ }
+ }
+}
+
+void Compiler::PushRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ if (Thumb || CurInstr.Cond() == 0xE)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsLoaded)
+ RegCache.UnloadRegister(reg);
+ }
+ else
+ {
+ BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
+ for (int reg : hiRegsDirty)
+ SaveReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+}
+
+void Compiler::PopRegs(bool saveHiRegs)
+{
+ if (saveHiRegs)
+ {
+ BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
+
+ for (int reg : hiRegsLoaded)
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+}
+
+Compiler::Compiler()
+{
+#ifdef __SWITCH__
+ JitRWBase = memalign(0x1000, JitMemSize);
+
+ JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
+ JitRWStart = virtmemReserve(JitMemSize);
+ MemoryInfo info = {0};
+ u32 pageInfo = {0};
+ int i = 0;
+ while (JitRXStart != NULL)
+ {
+ svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
+ if (info.type != MemType_Unmapped)
+ JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
+ else
+ break;
+ if (i++ > 8)
+ {
+ printf("couldn't find unmapped place for jit memory\n");
+ JitRXStart = NULL;
+ }
+ }
+
+ assert(JitRXStart != NULL);
+
+ bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
+ assert(succeded);
+ succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
+ assert(succeded);
+ succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
+ assert(succeded);
+
+ SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
+ JitMemMainSize = JitMemSize;
+#else
+ u64 pageSize = sysconf(_SC_PAGE_SIZE);
+ u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
+ u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned;
+ mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
+
+ SetCodeBase(pageAligned, pageAligned);
+ JitMemMainSize = alignedSize;
+#endif
+ SetCodePtr(0);
+
+ for (int i = 0; i < 3; i++)
+ {
+ JumpToFuncs9[i] = Gen_JumpTo9(i);
+ JumpToFuncs7[i] = Gen_JumpTo7(i);
+ }
+
+ /*
+ W5 - mode
+ W1 - reg num
+ W3 - in/out value of reg
+ */
+ {
+ ReadBanked = GetRXPtr();
+
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
+ FixupBranch fiq = B(CC_EQ);
+ SUBS(W1, W1, 13 - 8);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ FixupBranch notEverything = B(CC_LT);
+ CMP(W5, 0x12);
+ FixupBranch irq = B(CC_EQ);
+ CMP(W5, 0x13);
+ FixupBranch svc = B(CC_EQ);
+ CMP(W5, 0x17);
+ FixupBranch abt = B(CC_EQ);
+ CMP(W5, 0x1B);
+ FixupBranch und = B(CC_EQ);
+ SetJumpTarget(notEverything);
+ RET();
+
+ SetJumpTarget(fiq);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
+ RET();
+ SetJumpTarget(irq);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
+ RET();
+ SetJumpTarget(svc);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
+ RET();
+ SetJumpTarget(abt);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
+ RET();
+ SetJumpTarget(und);
+ LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
+ RET();
+ }
+ {
+ WriteBanked = GetRXPtr();
+
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ CMP(W5, 0x11);
+ FixupBranch fiq = B(CC_EQ);
+ SUBS(W1, W1, 13 - 8);
+ ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
+ FixupBranch notEverything = B(CC_LT);
+ CMP(W5, 0x12);
+ FixupBranch irq = B(CC_EQ);
+ CMP(W5, 0x13);
+ FixupBranch svc = B(CC_EQ);
+ CMP(W5, 0x17);
+ FixupBranch abt = B(CC_EQ);
+ CMP(W5, 0x1B);
+ FixupBranch und = B(CC_EQ);
+ SetJumpTarget(notEverything);
+ MOVI2R(W4, 0);
+ RET();
+
+ SetJumpTarget(fiq);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(irq);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(svc);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(abt);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
+ MOVI2R(W4, 1);
+ RET();
+ SetJumpTarget(und);
+ STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
+ MOVI2R(W4, 1);
+ RET();
+ }
+
+ for (int consoleType = 0; consoleType < 2; consoleType++)
+ {
+ for (int num = 0; num < 2; num++)
+ {
+ for (int size = 0; size < 3; size++)
+ {
+ for (int reg = 0; reg < 8; reg++)
+ {
+ ARM64Reg rdMapped = (ARM64Reg)(W19 + reg);
+ PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr();
+ if (num == 0)
+ {
+ MOV(X1, RCPU);
+ MOV(W2, rdMapped);
+ }
+ else
+ {
+ MOV(W1, rdMapped);
+ }
+ ABI_PushRegisters({30});
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowWrite7<u32, 0>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowWrite7<u16, 0>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowWrite7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32, 1>); break;
+ case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16, 1>); break;
+ case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8, 1>); break;
+ case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break;
+ }
+ }
+
+ ABI_PopRegisters({30});
+ RET();
+
+ for (int signextend = 0; signextend < 2; signextend++)
+ {
+ PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr();
+ if (num == 0)
+ MOV(X1, RCPU);
+ ABI_PushRegisters({30});
+ if (consoleType == 0)
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowRead7<u32, 0>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowRead7<u16, 0>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowRead7<u8, 0>); break;
+ }
+ }
+ else
+ {
+ switch ((8 << size) | num)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32, 1>); break;
+ case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16, 1>); break;
+ case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8, 1>); break;
+ case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
+ }
+ }
+ ABI_PopRegisters({30});
+ if (size == 32)
+ MOV(rdMapped, W0);
+ else if (signextend)
+ SBFX(rdMapped, W0, 0, 8 << size);
+ else
+ UBFX(rdMapped, W0, 0, 8 << size);
+ RET();
+ }
+ }
+ }
+ }
+ }
+
+ FlushIcache();
+
+ JitMemSecondarySize = 1024*1024*4;
+
+ JitMemMainSize -= GetCodeOffset();
+ JitMemMainSize -= JitMemSecondarySize;
+
+ SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
+}
+
+Compiler::~Compiler()
+{
+#ifdef __SWITCH__
+ if (JitRWStart != NULL)
+ {
+ bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
+ assert(succeded);
+ virtmemFree(JitRWStart, JitMemSize);
+ succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
+ assert(succeded);
+ free(JitRWBase);
+ }
+#endif
+}
+
+void Compiler::LoadCycles()
+{
+ LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
+void Compiler::SaveCycles()
+{
+ STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
+}
+
+void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
+{
+ if (reg == 15)
+ MOVI2R(nativeReg, R15);
+ else
+ LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
+}
+
+void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
+{
+ STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
+}
+
+void Compiler::LoadCPSR()
+{
+ assert(!CPSRDirty);
+ LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
+}
+
+void Compiler::SaveCPSR(bool markClean)
+{
+ if (CPSRDirty)
+ {
+ STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
+ CPSRDirty = CPSRDirty && !markClean;
+ }
+}
+
+FixupBranch Compiler::CheckCondition(u32 cond)
+{
+ if (cond >= 0x8)
+ {
+ LSR(W1, RCPSR, 28);
+ MOVI2R(W2, 1);
+ LSLV(W2, W2, W1);
+ ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
+
+ return CBZ(W2);
+ }
+ else
+ {
+ u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
+
+ if (cond & 1)
+ return TBNZ(RCPSR, bit);
+ else
+ return TBZ(RCPSR, bit);
+ }
+}
+
+#define F(x) &Compiler::A_Comp_##x
+const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
+{
+ // AND
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // EOR
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // SUB
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // RSB
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ADD
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ADC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // SBC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // RSC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // ORR
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // MOV
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ // BIC
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
+ // MVN
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
+ // TST
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // TEQ
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // CMP
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // CMN
+ F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
+ // Mul
+ F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
+ // ARMv5 exclusives
+ F(Clz), NULL, NULL, NULL, NULL,
+
+ // STR
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // STRB
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // LDR
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // LDRB
+ F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
+ // STRH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRD
+ NULL, NULL, NULL, NULL,
+ // STRD
+ NULL, NULL, NULL, NULL,
+ // LDRH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRSB
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // LDRSH
+ F(MemHD), F(MemHD), F(MemHD), F(MemHD),
+ // Swap
+ NULL, NULL,
+ // LDM, STM
+ F(LDM_STM), F(LDM_STM),
+ // Branch
+ F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
+ // Special
+ NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
+ &Compiler::Nop
+};
+#undef F
+#define F(x) &Compiler::T_Comp_##x
+const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
+{
+ // Shift imm
+ F(ShiftImm), F(ShiftImm), F(ShiftImm),
+ // Add/sub tri operand
+ F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
+ // 8 bit imm
+ F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
+ // ALU
+ F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
+ F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
+ // ALU hi reg
+ F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
+ // PC/SP relative ops
+ F(RelAddr), F(RelAddr), F(AddSP),
+ // LDR PC rel
+ F(LoadPCRel),
+ // LDR/STR reg offset
+ F(MemReg), F(MemReg), F(MemReg), F(MemReg),
+ // LDR/STR sign extended, half
+ F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
+ // LDR/STR imm offset
+ F(MemImm), F(MemImm), F(MemImm), F(MemImm),
+ // LDR/STR half imm offset
+ F(MemImmHalf), F(MemImmHalf),
+ // LDR/STR sp rel
+ F(MemSPRel), F(MemSPRel),
+ // PUSH/POP
+ F(PUSH_POP), F(PUSH_POP),
+ // LDMIA, STMIA
+ F(LDMIA_STMIA), F(LDMIA_STMIA),
+ // Branch
+ F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
+ // Unk, SVC
+ NULL, NULL,
+ F(BL_Merged)
+};
+
+bool Compiler::CanCompile(bool thumb, u16 kind)
+{
+ return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
+}
+
+void Compiler::Comp_BranchSpecialBehaviour(bool taken)
+{
+ if (taken && CurInstr.BranchFlags & branch_IdleBranch)
+ {
+ MOVI2R(W0, 1);
+ STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
+ }
+
+ if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
+ || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
+ {
+ RegCache.PrepareExit();
+
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
+ }
+}
+
+JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
+{
+ if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
+ {
+ printf("JIT near memory full, resetting...\n");
+ ResetBlockCache();
+ }
+ if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
+ {
+ printf("JIT far memory full, resetting...\n");
+ ResetBlockCache();
+ }
+
+ JitBlockEntry res = (JitBlockEntry)GetRXPtr();
+
+ Thumb = thumb;
+ Num = cpu->Num;
+ CurCPU = cpu;
+ ConstantCycles = 0;
+ RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
+ CPSRDirty = false;
+
+ for (int i = 0; i < instrsCount; i++)
+ {
+ CurInstr = instrs[i];
+ R15 = CurInstr.Addr + (Thumb ? 4 : 8);
+ CodeRegion = R15 >> 24;
+
+ CompileFunc comp = Thumb
+ ? T_Comp[CurInstr.Info.Kind]
+ : A_Comp[CurInstr.Info.Kind];
+
+ Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
+
+ //printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
+
+ bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
+ if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
+ {
+ MOVI2R(W0, R15);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
+ if (comp == NULL)
+ {
+ MOVI2R(W0, CurInstr.Instr);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
+ }
+ if (Num == 0)
+ {
+ MOVI2R(W0, (s32)CurInstr.CodeCycles);
+ STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
+ }
+ }
+
+ if (comp == NULL)
+ {
+ SaveCycles();
+ SaveCPSR();
+ RegCache.Flush();
+ }
+ else
+ RegCache.Prepare(Thumb, i);
+
+ if (Thumb)
+ {
+ if (comp == NULL)
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
+ }
+ else
+ (this->*comp)();
+ }
+ else
+ {
+ u32 cond = CurInstr.Cond();
+ if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
+ {
+ if (comp)
+ (this->*comp)();
+ else
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
+ }
+ }
+ else if (cond == 0xF)
+ Comp_AddCycles_C();
+ else
+ {
+ IrregularCycles = false;
+
+ FixupBranch skipExecute;
+ if (cond < 0xE)
+ skipExecute = CheckCondition(cond);
+
+ if (comp == NULL)
+ {
+ MOV(X0, RCPU);
+ QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
+ }
+ else
+ {
+ (this->*comp)();
+ }
+
+ Comp_BranchSpecialBehaviour(true);
+
+ if (cond < 0xE)
+ {
+ if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
+ {
+ FixupBranch skipNop = B();
+ SetJumpTarget(skipExecute);
+
+ Comp_AddCycles_C();
+
+ Comp_BranchSpecialBehaviour(false);
+
+ SetJumpTarget(skipNop);
+ }
+ else
+ SetJumpTarget(skipExecute);
+ }
+
+ }
+ }
+
+ if (comp == NULL)
+ {
+ LoadCycles();
+ LoadCPSR();
+ }
+ }
+
+ RegCache.Flush();
+
+ SUB(RCycles, RCycles, ConstantCycles);
+ QuickTailCall(X0, ARM_Ret);
+
+ FlushIcache();
+
+ return res;
+}
+
+void Compiler::Reset()
+{
+ LoadStorePatches.clear();
+
+ SetCodePtr(0);
+ OtherCodeRegion = JitMemMainSize;
+
+ const u32 brk_0 = 0xD4200000;
+
+ for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
+ *(((u32*)GetRWPtr()) + i) = brk_0;
+}
+
+void Compiler::Comp_AddCycles_C(bool forceNonConstant)
+{
+ s32 cycles = Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
+
+ if (forceNonConstant)
+ ConstantCycles += cycles;
+ else
+ SUB(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CI(u32 numI)
+{
+ IrregularCycles = true;
+
+ s32 cycles = (Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
+
+ if (Thumb || CurInstr.Cond() == 0xE)
+ ConstantCycles += cycles;
+ else
+ SUB(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
+{
+ IrregularCycles = true;
+
+ s32 cycles = (Num ?
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
+
+ SUB(RCycles, RCycles, cycles);
+ if (Thumb || CurInstr.Cond() >= 0xE)
+ ConstantCycles += cycles;
+ else
+ SUB(RCycles, RCycles, cycles);
+}
+
+void Compiler::Comp_AddCycles_CDI()
+{
+ if (Num == 0)
+ Comp_AddCycles_CD();
+ else
+ {
+ IrregularCycles = true;
+
+ s32 cycles;
+
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
+ {
+ if (CodeRegion == 0x02)
+ cycles = numC + numD;
+ else
+ {
+ numC++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ }
+ else if (CodeRegion == 0x02)
+ {
+ numD++;
+ cycles = std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles = numC + numD + 1;
+ }
+
+ if (!Thumb && CurInstr.Cond() < 0xE)
+ SUB(RCycles, RCycles, cycles);
+ else
+ ConstantCycles += cycles;
+ }
+}
+
+void Compiler::Comp_AddCycles_CD()
+{
+ u32 cycles = 0;
+ if (Num == 0)
+ {
+ s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
+ s32 numD = CurInstr.DataCycles;
+
+ //if (DataRegion != CodeRegion)
+ cycles = std::max(numC + numD - 6, std::max(numC, numD));
+
+ IrregularCycles = cycles != numC;
+ }
+ else
+ {
+ s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
+ s32 numD = CurInstr.DataCycles;
+
+ if ((CurInstr.DataRegion >> 24) == 0x02)
+ {
+ if (CodeRegion == 0x02)
+ cycles += numC + numD;
+ else
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else if (CodeRegion == 0x02)
+ {
+ cycles += std::max(numC + numD - 3, std::max(numC, numD));
+ }
+ else
+ {
+ cycles += numC + numD;
+ }
+
+ IrregularCycles = true;
+ }
+
+ if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
+ SUB(RCycles, RCycles, cycles);
+ else
+ ConstantCycles += cycles;
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h
new file mode 100644
index 0000000..0e7d54c
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Compiler.h
@@ -0,0 +1,269 @@
+#ifndef ARMJIT_COMPILER_H
+#define ARMJIT_COMPILER_H
+
+#include "../ARM.h"
+#include "../ARMJIT.h"
+
+#include "../dolphin/Arm64Emitter.h"
+
+#include "../ARMJIT_Internal.h"
+#include "../ARMJIT_RegisterCache.h"
+
+#include <unordered_map>
+
+namespace ARMJIT
+{
+
+const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
+const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
+const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
+
+struct Op2
+{
+ Op2()
+ {}
+
+ Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
+ {
+ Reg.Rm = rm;
+ Reg.ShiftType = Arm64Gen::ST_LSL;
+ Reg.ShiftAmount = 0;
+ }
+
+ Op2(u32 imm) : IsImm(true), Imm(imm)
+ {}
+
+ Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
+ {
+ Reg.Rm = rm;
+ Reg.ShiftType = st;
+ Reg.ShiftAmount = amount;
+ }
+
+ Arm64Gen::ArithOption ToArithOption()
+ {
+ assert(!IsImm);
+ return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
+ }
+
+ bool IsSimpleReg()
+ { return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
+ bool ImmFits12Bit()
+ { return IsImm && (Imm & 0xFFF == Imm); }
+ bool IsZero()
+ { return IsImm && !Imm; }
+
+ bool IsImm;
+ union
+ {
+ struct
+ {
+ Arm64Gen::ARM64Reg Rm;
+ Arm64Gen::ShiftType ShiftType;
+ int ShiftAmount;
+ } Reg;
+ u32 Imm;
+ };
+};
+
+struct LoadStorePatch
+{
+ void* PatchFunc;
+ s32 PatchOffset;
+ u32 PatchSize;
+};
+
+class Compiler : public Arm64Gen::ARM64XEmitter
+{
+public:
+ typedef void (Compiler::*CompileFunc)();
+
+ Compiler();
+ ~Compiler();
+
+ void PushRegs(bool saveHiRegs);
+ void PopRegs(bool saveHiRegs);
+
+ Arm64Gen::ARM64Reg MapReg(int reg)
+ {
+ assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
+ return RegCache.Mapping[reg];
+ }
+
+ JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
+
+ bool CanCompile(bool thumb, u16 kind);
+
+ bool FlagsNZNeeded()
+ {
+ return CurInstr.SetFlags & 0xC;
+ }
+
+ void Reset();
+
+ void Comp_AddCycles_C(bool forceNonConstant = false);
+ void Comp_AddCycles_CI(u32 numI);
+ void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
+ void Comp_AddCycles_CD();
+ void Comp_AddCycles_CDI();
+
+ void MovePC();
+
+ void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
+ void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
+
+ void LoadCPSR();
+ void SaveCPSR(bool markClean = true);
+
+ void LoadCycles();
+ void SaveCycles();
+
+ void Nop() {}
+
+ void A_Comp_ALUTriOp();
+ void A_Comp_ALUMovOp();
+ void A_Comp_ALUCmpOp();
+
+ void A_Comp_Mul();
+ void A_Comp_Mul_Long();
+ void A_Comp_Mul_Short();
+
+ void A_Comp_Clz();
+
+ void A_Comp_MemWB();
+ void A_Comp_MemHD();
+
+ void A_Comp_LDM_STM();
+
+ void A_Comp_BranchImm();
+ void A_Comp_BranchXchangeReg();
+
+ void A_Comp_MRS();
+ void A_Comp_MSR();
+
+ void T_Comp_ShiftImm();
+ void T_Comp_AddSub_();
+ void T_Comp_ALUImm8();
+ void T_Comp_ALU();
+ void T_Comp_ALU_HiReg();
+ void T_Comp_AddSP();
+ void T_Comp_RelAddr();
+
+ void T_Comp_MemReg();
+ void T_Comp_MemImm();
+ void T_Comp_MemRegHalf();
+ void T_Comp_MemImmHalf();
+ void T_Comp_LoadPCRel();
+ void T_Comp_MemSPRel();
+
+ void T_Comp_LDMIA_STMIA();
+ void T_Comp_PUSH_POP();
+
+ void T_Comp_BCOND();
+ void T_Comp_B();
+ void T_Comp_BranchXchangeReg();
+ void T_Comp_BL_LONG_1();
+ void T_Comp_BL_LONG_2();
+ void T_Comp_BL_Merged();
+
+ s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
+
+ void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
+
+ void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
+ void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
+ void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
+
+ void Comp_RetriveFlags(bool retriveCV);
+
+ Arm64Gen::FixupBranch CheckCondition(u32 cond);
+
+ void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
+ void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
+
+ void A_Comp_GetOp2(bool S, Op2& op2);
+
+ void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
+ void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
+
+ bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
+ enum
+ {
+ memop_Writeback = 1 << 0,
+ memop_Post = 1 << 1,
+ memop_SignExtend = 1 << 2,
+ memop_Store = 1 << 3,
+ memop_SubtractOffset = 1 << 4
+ };
+ void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
+
+ // 0 = switch mode, 1 = stay arm, 2 = stay thumb
+ void* Gen_JumpTo9(int kind);
+ void* Gen_JumpTo7(int kind);
+
+ void Comp_BranchSpecialBehaviour(bool taken);
+
+ JitBlockEntry AddEntryOffset(u32 offset)
+ {
+ return (JitBlockEntry)(GetRXBase() + offset);
+ }
+
+ u32 SubEntryOffset(JitBlockEntry entry)
+ {
+ return (u8*)entry - GetRXBase();
+ }
+
+ bool IsJITFault(u64 pc);
+ s64 RewriteMemAccess(u64 pc);
+
+ void SwapCodeRegion()
+ {
+ ptrdiff_t offset = GetCodeOffset();
+ SetCodePtrUnsafe(OtherCodeRegion);
+ OtherCodeRegion = offset;
+ }
+
+ ptrdiff_t OtherCodeRegion;
+
+ bool Exit;
+
+ FetchedInstr CurInstr;
+ bool Thumb;
+ u32 R15;
+ u32 Num;
+ ARM* CurCPU;
+ u32 ConstantCycles;
+ u32 CodeRegion;
+
+ BitSet32 SavedRegs;
+
+ u32 JitMemSecondarySize;
+ u32 JitMemMainSize;
+
+ void* ReadBanked, *WriteBanked;
+
+ void* JumpToFuncs9[3];
+ void* JumpToFuncs7[3];
+
+ std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches;
+
+ // [Console Type][Num][Size][Sign Extend][Output register]
+ void* PatchedLoadFuncs[2][2][3][2][8];
+ void* PatchedStoreFuncs[2][2][3][8];
+
+ RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
+
+ bool CPSRDirty = false;
+
+ bool IrregularCycles = false;
+
+#ifdef __SWITCH__
+ void* JitRWBase;
+ void* JitRWStart;
+ void* JitRXStart;
+#endif
+};
+
+}
+
+#endif \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s
new file mode 100644
index 0000000..7886315
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_Linkage.s
@@ -0,0 +1,68 @@
+#include "../ARMJIT_x64/ARMJIT_Offsets.h"
+
+.text
+
+#define RCPSR w27
+#define RCycles w28
+#define RCPU x29
+
+.p2align 4,,15
+
+.global ARM_Dispatch
+ARM_Dispatch:
+ stp x19, x20, [sp, #-96]!
+ stp x21, x22, [sp, #16]
+ stp x23, x24, [sp, #32]
+ stp x25, x26, [sp, #48]
+ stp x27, x28, [sp, #64]
+ stp x29, x30, [sp, #80]
+
+ mov RCPU, x0
+ ldr RCycles, [RCPU, ARM_Cycles_offset]
+ ldr RCPSR, [RCPU, ARM_CPSR_offset]
+
+ br x1
+
+.p2align 4,,15
+
+.global ARM_Ret
+ARM_Ret:
+ str RCycles, [RCPU, ARM_Cycles_offset]
+ str RCPSR, [RCPU, ARM_CPSR_offset]
+
+ ldp x29, x30, [sp, #80]
+ ldp x27, x28, [sp, #64]
+ ldp x25, x26, [sp, #48]
+ ldp x23, x24, [sp, #32]
+ ldp x21, x22, [sp, #16]
+ ldp x19, x20, [sp], #96
+
+ ret
+
+.p2align 4,,15
+
+.global ARM_RestoreContext
+ARM_RestoreContext:
+ mov sp, x0
+
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp x8, x9, [sp, #64]
+ ldp x10, x11, [sp, #80]
+ ldp x12, x13, [sp, #96]
+ ldp x14, x15, [sp, #112]
+ ldp x16, x17, [sp, #128]
+ ldp x18, x19, [sp, #144]
+ ldp x20, x21, [sp, #160]
+ ldp x22, x23, [sp, #176]
+ ldp x24, x25, [sp, #192]
+ ldp x26, x27, [sp, #208]
+ ldp x28, x29, [sp, #224]
+ ldr x30, [sp, #240]
+
+ ldp x17, x18, [sp, #248]
+ mov sp, x17
+
+ br x18 \ No newline at end of file
diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
new file mode 100644
index 0000000..6140ffc
--- /dev/null
+++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp
@@ -0,0 +1,810 @@
+#include "ARMJIT_Compiler.h"
+
+#include "../Config.h"
+
+#include "../ARMJIT_Memory.h"
+
+using namespace Arm64Gen;
+
+namespace ARMJIT
+{
+
+bool Compiler::IsJITFault(u64 pc)
+{
+ return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
+}
+
+s64 Compiler::RewriteMemAccess(u64 pc)
+{
+ ptrdiff_t pcOffset = pc - (u64)GetRXBase();
+
+ auto it = LoadStorePatches.find(pcOffset);
+
+ if (it != LoadStorePatches.end())
+ {
+ LoadStorePatch patch = it->second;
+
+ ptrdiff_t curCodeOffset = GetCodeOffset();
+
+ SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
+
+ BL(patch.PatchFunc);
+
+ for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
+ HINT(HINT_NOP);
+
+ FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
+
+ SetCodePtrUnsafe(curCodeOffset);
+
+ LoadStorePatches.erase(it);
+
+ return patch.PatchOffset;
+ }
+ printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
+ assert(false);
+}
+
+bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
+{
+ u32 localAddr = LocaliseCodeAddress(Num, addr);
+
+ int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
+ if (invalidLiteralIdx != -1)
+ {
+ InvalidLiterals.Remove(invalidLiteralIdx);
+ return false;
+ }
+
+ Comp_AddCycles_CDI();
+
+ u32 val;
+ // make sure arm7 bios is accessible
+ u32 tmpR15 = CurCPU->R[15];
+ CurCPU->R[15] = R15;
+ if (size == 32)
+ {
+ CurCPU->DataRead32(addr & ~0x3, &val);
+ val = ROR(val, (addr & 0x3) << 3);
+ }
+ else if (size == 16)
+ {
+ CurCPU->DataRead16(addr & ~0x1, &val);
+ if (signExtend)
+ val = ((s32)val << 16) >> 16;
+ }
+ else
+ {
+ CurCPU->DataRead8(addr, &val);
+ if (signExtend)
+ val = ((s32)val << 24) >> 24;
+ }
+ CurCPU->R[15] = tmpR15;
+
+ MOVI2R(MapReg(rd), val);
+
+ if (Thumb || CurInstr.Cond() == 0xE)
+ RegCache.PutLiteral(rd, val);
+
+ return true;
+}
+
+void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
+{
+ u32 addressMask = ~0;
+ if (size == 32)
+ addressMask = ~3;
+ if (size == 16)
+ addressMask = ~1;
+
+ if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
+ {
+ u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+
+ if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
+ return;
+ }
+
+ if (flags & memop_Store)
+ Comp_AddCycles_CD();
+ else
+ Comp_AddCycles_CDI();
+
+ ARM64Reg rdMapped = MapReg(rd);
+ ARM64Reg rnMapped = MapReg(rn);
+
+ if (Thumb && rn == 15)
+ {
+ ANDI2R(W3, rnMapped, ~2);
+ rnMapped = W3;
+ }
+
+ ARM64Reg finalAddr = W0;
+ if (flags & memop_Post)
+ {
+ finalAddr = rnMapped;
+ MOV(W0, rnMapped);
+ }
+
+ bool addrIsStatic = Config::JIT_LiteralOptimisations
+ && RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
+ u32 staticAddress;
+ if (addrIsStatic)
+ staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
+
+ if (!offset.IsImm)
+ Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
+ // offset might has become an immediate
+ if (offset.IsImm)
+ {
+ if (offset.Imm)
+ {
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Imm);
+ else
+ ADD(finalAddr, rnMapped, offset.Imm);
+ }
+ else if (finalAddr != rnMapped)
+ MOV(finalAddr, rnMapped);
+ }
+ else
+ {
+ if (offset.Reg.ShiftType == ST_ROR)
+ {
+ ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
+ offset = Op2(W0);
+ }
+
+ if (flags & memop_SubtractOffset)
+ SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ else
+ ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
+ }
+
+ if (!(flags & memop_Post) && (flags & memop_Writeback))
+ MOV(rnMapped, W0);
+
+ u32 expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
+
+ if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)))
+ {
+ ptrdiff_t memopStart = GetCodeOffset();
+ LoadStorePatch patch;
+
+ patch.PatchFunc = flags & memop_Store
+ ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped - W19]
+ : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19];
+ assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8);
+
+ MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
+
+ // take a chance at fastmem
+ if (size > 8)
+ ANDI2R(W1, W0, addressMask);
+
+ ptrdiff_t loadStorePosition = GetCodeOffset();
+ if (flags & memop_Store)
+ {
+ STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
+ }
+ else
+ {
+ LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
+ if (size == 32)
+ {
+ UBFIZ(W0, W0, 3, 2);
+ RORV(rdMapped, rdMapped, W0);
+ }
+ }
+
+ patch.PatchOffset = memopStart - loadStorePosition;
+ patch.PatchSize = GetCodeOffset() - memopStart;
+ LoadStorePatches[loadStorePosition] = patch;
+ }
+ else
+ {
+ void* func = NULL;
+ if (addrIsStatic)
+ func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
+
+ if (func)
+ {
+ if (flags & memop_Store)
+ MOV(W1, rdMapped);
+ QuickCallFunction(X2, (void (*)())func);
+
+ if (!(flags & memop_Store))
+ {
+ if (size == 32)
+ {
+ if (staticAddress & 0x3)
+ ROR_(rdMapped, W0, (staticAddress & 0x3) << 3);
+ else
+ MOV(rdMapped, W0);
+ }
+ else
+ {
+ if (flags & memop_SignExtend)
+ SBFX(rdMapped, W0, 0, size);
+ else
+ UBFX(rdMapped, W0, 0, size);
+ }
+ }
+ }
+ else
+ {
+ if (Num == 0)
+ {
+ MOV(X1, RCPU);
+ if (flags & memop_Store)
+ {
+ MOV(W2, rdMapped);
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowWrite9<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowWrite9<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowWrite9<u8, 1>); break;
+ }
+ }
+ else
+ {
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowRead9<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowRead9<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowRead9<u8, 1>); break;
+ }
+ }
+ }
+ else
+ {
+ if (flags & memop_Store)
+ {
+ MOV(W1, rdMapped);
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: QuickCallFunction(X3, SlowWrite7<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowWrite7<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowWrite7<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break;
+ }
+ }
+ else
+ {
+ switch (size | NDS::ConsoleType)
+ {
+ case 32: QuickCallFunction(X3, SlowRead7<u32, 0>); break;
+ case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break;
+ case 16: QuickCallFunction(X3, SlowRead7<u16, 0>); break;
+ case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break;
+ case 8: QuickCallFunction(X3, SlowRead7<u8, 0>); break;
+ case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break;
+ }
+ }
+ }
+
+ if (!(flags & memop_Store))
+ {
+ if (size == 32)
+ MOV(rdMapped, W0);
+ else if (flags & memop_SignExtend)
+ SBFX(rdMapped, W0, 0, size);
+ else
+ UBFX(rdMapped, W0, 0, size);
+ }
+ }
+ }
+
+ if (CurInstr.Info.Branches())
+ {
+ if (size < 32)
+ printf("LDR size < 32 branching?\n");
+ Comp_JumpTo(rdMapped, Num == 0, false);
+ }
+}
+
+void Compiler::A_Comp_MemWB()
+{
+ Op2 offset;
+ if (CurInstr.Instr & (1 << 25))
+ offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
+ else
+ offset = Op2(CurInstr.Instr & 0xFFF);
+
+ bool load = CurInstr.Instr & (1 << 20);
+ bool byte = CurInstr.Instr & (1 << 22);
+
+ int flags = 0;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
+}
+
+void Compiler::A_Comp_MemHD()
+{
+ bool load = CurInstr.Instr & (1 << 20);
+ bool signExtend;
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ int size;
+
+ if (load)
+ {
+ signExtend = op >= 2;
+ size = op == 2 ? 8 : 16;
+ }
+ else
+ {
+ size = 16;
+ signExtend = false;
+ }
+
+ Op2 offset;
+ if (CurInstr.Instr & (1 << 22))
+ offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
+ else
+ offset = Op2(MapReg(CurInstr.A_Reg(0)));
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+ if (!(CurInstr.Instr & (1 << 24)))
+ flags |= memop_Post;
+ if (!(CurInstr.Instr & (1 << 23)))
+ flags |= memop_SubtractOffset;
+ if (CurInstr.Instr & (1 << 21))
+ flags |= memop_Writeback;
+
+ Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
+}
+
+void Compiler::T_Comp_MemReg()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op & 0x2;
+ bool byte = op & 0x1;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
+ Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_MemImm()
+{
+ int op = (CurInstr.Instr >> 11) & 0x3;
+ bool load = op & 0x1;
+ bool byte = op & 0x2;
+ u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
+ byte ? 8 : 32, load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_MemRegHalf()
+{
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op != 0;
+ int size = op != 1 ? 16 : 8;
+ bool signExtend = op & 1;
+
+ int flags = 0;
+ if (signExtend)
+ flags |= memop_SignExtend;
+ if (!load)
+ flags |= memop_Store;
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
+ size, flags);
+}
+
+void Compiler::T_Comp_MemImmHalf()
+{
+ u32 offset = (CurInstr.Instr >> 5) & 0x3E;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
+ load ? 0 : memop_Store);
+}
+
+void Compiler::T_Comp_LoadPCRel()
+{
+ u32 offset = ((CurInstr.Instr & 0xFF) << 2);
+ u32 addr = (R15 & ~0x2) + offset;
+
+ if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
+ Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
+}
+
+void Compiler::T_Comp_MemSPRel()
+{
+ u32 offset = (CurInstr.Instr & 0xFF) * 4;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
+}
+
+s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
+{
+ IrregularCycles = true;
+
+ int regsCount = regs.Count();
+
+ if (regsCount == 0)
+ return 0; // actually not the right behaviour TODO: fix me
+
+ if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
+ {
+ int flags = 0;
+ if (store)
+ flags |= memop_Store;
+ if (decrement)
+ flags |= memop_SubtractOffset;
+ Op2 offset = preinc ? Op2(4) : Op2(0);
+
+ Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
+
+ return decrement ? -4 : 4;
+ }
+
+ if (store)
+ Comp_AddCycles_CD();
+ else
+ Comp_AddCycles_CDI();
+
+ int expectedTarget = Num == 0
+ ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
+ : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
+
+ bool compileFastPath = Config::JIT_FastMemory
+ && store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget));
+
+ if (decrement)
+ {
+ s32 offset = -regsCount * 4 + (preinc ? 0 : 4);
+ if (offset)
+ {
+ ADDI2R(W0, MapReg(rn), offset);
+ ANDI2R(W0, W0, ~3);
+ }
+ else
+ {
+ ANDI2R(W0, MapReg(rn), ~3);
+ }
+ }
+ else
+ {
+ ANDI2R(W0, MapReg(rn), ~3);
+ if (preinc)
+ ADD(W0, W0, 4);
+ }
+
+ u8* patchFunc;
+ if (compileFastPath)
+ {
+ ptrdiff_t fastPathStart = GetCodeOffset();
+ ptrdiff_t loadStoreOffsets[16];
+
+ MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
+ ADD(X1, X1, X0);
+
+ u32 offset = 0;
+ BitSet16::Iterator it = regs.begin();
+ u32 i = 0;
+
+ if (regsCount & 1)
+ {
+ int reg = *it;
+ it++;
+
+ ARM64Reg first = W3;
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ else if (store)
+ LoadReg(reg, first);
+
+ loadStoreOffsets[i++] = GetCodeOffset();
+
+ if (store)
+ STR(INDEX_UNSIGNED, first, X1, offset);
+ else
+ LDR(INDEX_UNSIGNED, first, X1, offset);
+
+ if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
+ SaveReg(reg, first);
+
+ offset += 4;
+ }
+
+ while (it != regs.end())
+ {
+ int reg = *it;
+ it++;
+ int nextReg = *it;
+ it++;
+
+ ARM64Reg first = W3, second = W4;
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ else if (store)
+ LoadReg(reg, first);
+ if (RegCache.LoadedRegs & (1 << nextReg))
+ second = MapReg(nextReg);
+ else if (store)
+ LoadReg(nextReg, second);
+
+ loadStoreOffsets[i++] = GetCodeOffset();
+
+ if (store)
+ STP(INDEX_SIGNED, first, second, X1, offset);
+ else
+ LDP(INDEX_SIGNED, first, second, X1, offset);
+
+ if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
+ SaveReg(reg, first);
+ if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store)
+ SaveReg(nextReg, second);
+
+ offset += 8;
+ }
+
+ LoadStorePatch patch;
+ patch.PatchSize = GetCodeOffset() - fastPathStart;
+ SwapCodeRegion();
+ patchFunc = (u8*)GetRXPtr();
+ patch.PatchFunc = patchFunc;
+ for (i = 0; i < regsCount; i++)
+ {
+ patch.PatchOffset = fastPathStart - loadStoreOffsets[i];
+ LoadStorePatches[loadStoreOffsets[i]] = patch;
+ }
+
+ ABI_PushRegisters({30});
+ }
+
+ int i = 0;
+
+ SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
+ if (store)
+ {
+ if (usermode && (regs & BitSet16(0x7f00)))
+ UBFX(W5, RCPSR, 0, 5);
+
+ BitSet16::Iterator it = regs.begin();
+ while (it != regs.end())
+ {
+ BitSet16::Iterator nextReg = it;
+ nextReg++;
+
+ int reg = *it;
+
+ if (usermode && reg >= 8 && reg < 15)
+ {
+ if (RegCache.LoadedRegs & (1 << reg))
+ MOV(W3, MapReg(reg));
+ else
+ LoadReg(reg, W3);
+ MOVI2R(W1, reg - 8);
+ BL(ReadBanked);
+ STR(INDEX_UNSIGNED, W3, SP, i * 8);
+ }
+ else if (!usermode && nextReg != regs.end())
+ {
+ ARM64Reg first = W3, second = W4;
+
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ else
+ LoadReg(reg, W3);
+
+ if (RegCache.LoadedRegs & (1 << *nextReg))
+ second = MapReg(*nextReg);
+ else
+ LoadReg(*nextReg, W4);
+
+ STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
+
+ i++;
+ it++;
+ }
+ else if (RegCache.LoadedRegs & (1 << reg))
+ {
+ STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
+ }
+ else
+ {
+ LoadReg(reg, W3);
+ STR(INDEX_UNSIGNED, W3, SP, i * 8);
+ }
+ i++;
+ it++;
+ }
+ }
+
+ ADD(X1, SP, 0);
+ MOVI2R(W2, regsCount);
+
+ if (Num == 0)
+ {
+ MOV(X3, RCPU);
+ switch ((u32)store * 2 | NDS::ConsoleType)
+ {
+ case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, 0>); break;
+ case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, 1>); break;
+ case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, 0>); break;
+ case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, 1>); break;
+ }
+ }
+ else
+ {
+ switch ((u32)store * 2 | NDS::ConsoleType)
+ {
+ case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, 0>); break;
+ case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, 1>); break;
+ case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, 0>); break;
+ case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, 1>); break;
+ }
+ }
+
+ if (!store)
+ {
+ if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
+ UBFX(W5, RCPSR, 0, 5);
+
+ BitSet16::Iterator it = regs.begin();
+ while (it != regs.end())
+ {
+ BitSet16::Iterator nextReg = it;
+ nextReg++;
+
+ int reg = *it;
+
+ if (usermode && !regs[15] && reg >= 8 && reg < 15)
+ {
+ LDR(INDEX_UNSIGNED, W3, SP, i * 8);
+ MOVI2R(W1, reg - 8);
+ BL(WriteBanked);
+ FixupBranch alreadyWritten = CBNZ(W4);
+ if (RegCache.LoadedRegs & (1 << reg))
+ MOV(MapReg(reg), W3);
+ else
+ SaveReg(reg, W3);
+ SetJumpTarget(alreadyWritten);
+ }
+ else if (!usermode && nextReg != regs.end())
+ {
+ ARM64Reg first = W3, second = W4;
+
+ if (RegCache.LoadedRegs & (1 << reg))
+ first = MapReg(reg);
+ if (RegCache.LoadedRegs & (1 << *nextReg))
+ second = MapReg(*nextReg);
+
+ LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
+
+ if (first == W3)
+ SaveReg(reg, W3);
+ if (second == W4)
+ SaveReg(*nextReg, W4);
+
+ it++;
+ i++;
+ }
+ else if (RegCache.LoadedRegs & (1 << reg))
+ {
+ ARM64Reg mapped = MapReg(reg);
+ LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
+ }
+ else
+ {
+ LDR(INDEX_UNSIGNED, W3, SP, i * 8);
+ SaveReg(reg, W3);
+ }
+
+ it++;
+ i++;
+ }
+ }
+ ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
+
+ if (compileFastPath)
+ {
+ ABI_PopRegisters({30});
+ RET();
+
+ FlushIcacheSection(patchFunc, (u8*)GetRXPtr());
+ SwapCodeRegion();
+ }
+
+ if (!store && regs[15])
+ {
+ ARM64Reg mapped = MapReg(15);
+ Comp_JumpTo(mapped, Num == 0, usermode);
+ }
+
+ return regsCount * 4 * (decrement ? -1 : 1);
+}
+
+void Compiler::A_Comp_LDM_STM()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFFFF);
+
+ bool load = CurInstr.Instr & (1 << 20);
+ bool pre = CurInstr.Instr & (1 << 24);
+ bool add = CurInstr.Instr & (1 << 23);
+ bool writeback = CurInstr.Instr & (1 << 21);
+ bool usermode = CurInstr.Instr & (1 << 22);
+
+ ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
+
+ if (load && writeback && regs[CurInstr.A_Reg(16)])
+ writeback = Num == 0
+ ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
+ : false;
+ if (writeback)
+ {
+ if (offset > 0)
+ ADD(rn, rn, offset);
+ else
+ SUB(rn, rn, -offset);
+ }
+}
+
+void Compiler::T_Comp_PUSH_POP()
+{
+ bool load = CurInstr.Instr & (1 << 11);
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ if (CurInstr.Instr & (1 << 8))
+ {
+ if (load)
+ regs[15] = true;
+ else
+ regs[14] = true;
+ }
+
+ ARM64Reg sp = MapReg(13);
+ s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
+
+ if (offset > 0)
+ ADD(sp, sp, offset);
+ else
+ SUB(sp, sp, -offset);
+}
+
+void Compiler::T_Comp_LDMIA_STMIA()
+{
+ BitSet16 regs(CurInstr.Instr & 0xFF);
+ ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
+ bool load = CurInstr.Instr & (1 << 11);
+ u32 regsCount = regs.Count();
+
+ s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
+
+ if (!load || !regs[CurInstr.T_Reg(8)])
+ {
+ if (offset > 0)
+ ADD(rb, rb, offset);
+ else
+ SUB(rb, rb, -offset);
+ }
+}
+
+} \ No newline at end of file