diff options
Diffstat (limited to 'src/ARMJIT_A64')
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_ALU.cpp | 943 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Branch.cpp | 421 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.cpp | 918 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Compiler.h | 269 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_Linkage.s | 68 | ||||
-rw-r--r-- | src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 810 |
6 files changed, 3429 insertions, 0 deletions
diff --git a/src/ARMJIT_A64/ARMJIT_ALU.cpp b/src/ARMJIT_A64/ARMJIT_ALU.cpp new file mode 100644 index 0000000..26a89cb --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_ALU.cpp @@ -0,0 +1,943 @@ +#include "ARMJIT_Compiler.h" + +using namespace Arm64Gen; + +namespace ARMJIT +{ + +void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs) +{ + if (!(CurInstr.SetFlags & 0x2)) + S = false; + + CPSRDirty |= S; + + UBFX(W1, rs, 0, 8); + + if (!S) + { + if (op == 3) + RORV(W0, op2.Reg.Rm, W1); + else + { + CMP(W1, 32); + if (op == 2) + { + MOVI2R(W2, 31); + CSEL(W1, W2, W1, CC_GE); + ASRV(W0, op2.Reg.Rm, W1); + } + else + { + if (op == 0) + LSLV(W0, op2.Reg.Rm, W1); + else if (op == 1) + LSRV(W0, op2.Reg.Rm, W1); + CSEL(W0, WZR, W0, CC_GE); + } + } + } + else + { + MOV(W0, op2.Reg.Rm); + FixupBranch zero = CBZ(W1); + + SUB(W1, W1, 1); + if (op == 3) + { + RORV(W0, op2.Reg.Rm, W1); + BFI(RCPSR, W0, 29, 1); + } + else + { + CMP(W1, 31); + if (op == 2) + { + MOVI2R(W2, 31); + CSEL(W1, W2, W1, CC_GT); + ASRV(W0, op2.Reg.Rm, W1); + BFI(RCPSR, W0, 29, 1); + } + else + { + if (op == 0) + { + LSLV(W0, op2.Reg.Rm, W1); + UBFX(W1, W0, 31, 1); + } + else if (op == 1) + LSRV(W0, op2.Reg.Rm, W1); + CSEL(W1, WZR, op ? W0 : W1, CC_GT); + BFI(RCPSR, W1, 29, 1); + CSEL(W0, WZR, W0, CC_GE); + } + } + + MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1)); + SetJumpTarget(zero); + } + op2 = Op2(W0, ST_LSL, 0); +} + +void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp) +{ + if (!(CurInstr.SetFlags & 0x2)) + S = false; + + CPSRDirty |= S; + + switch (op) + { + case 0: // LSL + if (S && amount) + { + UBFX(tmp, op2.Reg.Rm, 32 - amount, 1); + BFI(RCPSR, tmp, 29, 1); + } + op2 = Op2(op2.Reg.Rm, ST_LSL, amount); + return; + case 1: // LSR + if (S) + { + UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1); + BFI(RCPSR, tmp, 29, 1); + } + if (amount == 0) + { + op2 = Op2(0); + return; + } + op2 = Op2(op2.Reg.Rm, ST_LSR, amount); + return; + case 2: // ASR + if (S) + { + UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1); + BFI(RCPSR, tmp, 29, 1); + } + op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31); + return; + case 3: // ROR + if (amount == 0) + { + UBFX(tmp, RCPSR, 29, 1); + LSL(tmp, tmp, 31); + if (S) + BFI(RCPSR, op2.Reg.Rm, 29, 1); + ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1)); + + op2 = Op2(tmp, ST_LSL, 0); + } + else + { + if (S) + { + UBFX(tmp, op2.Reg.Rm, amount - 1, 1); + BFI(RCPSR, tmp, 29, 1); + } + op2 = Op2(op2.Reg.Rm, ST_ROR, amount); + } + return; + } +} + +void Compiler::Comp_RetriveFlags(bool retriveCV) +{ + if (CurInstr.SetFlags) + CPSRDirty = true; + + if (CurInstr.SetFlags & 0x4) + { + CSET(W0, CC_EQ); + BFI(RCPSR, W0, 30, 1); + } + if (CurInstr.SetFlags & 0x8) + { + CSET(W0, CC_MI); + BFI(RCPSR, W0, 31, 1); + } + if (retriveCV) + { + if (CurInstr.SetFlags & 0x2) + { + CSET(W0, CC_CS); + BFI(RCPSR, W0, 29, 1); + } + if (CurInstr.SetFlags & 0x1) + { + CSET(W0, CC_VS); + BFI(RCPSR, W0, 28, 1); + } + } +} + +void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2) +{ + if (S && !CurInstr.SetFlags) + S = false; + + switch (op) + { + case 0x0: // AND + if (S) + { + if (op2.IsImm) + ANDSI2R(rd, rn, op2.Imm, W0); + else + ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + else + { + if (op2.IsImm) + ANDI2R(rd, rn, op2.Imm, W0); + else + AND(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + break; + case 0x1: // EOR + if (op2.IsImm) + EORI2R(rd, rn, op2.Imm, W0); + else + EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + if (S && FlagsNZNeeded()) + TST(rd, rd); + break; + case 0xC: // ORR + if (op2.IsImm) + ORRI2R(rd, rn, op2.Imm, W0); + else + ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + if (S && FlagsNZNeeded()) + TST(rd, rd); + break; + case 0xE: // BIC + if (S) + { + if (op2.IsImm) + ANDSI2R(rd, rn, ~op2.Imm, W0); + else + BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + else + { + if (op2.IsImm) + ANDI2R(rd, rn, ~op2.Imm, W0); + else + BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + break; + } + + if (S) + Comp_RetriveFlags(false); +} + +void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2) +{ + if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR) + { + MOV(W0, op2.Reg.Rm, op2.ToArithOption()); + op2 = Op2(W0, ST_LSL, 0); + } + + if (S && !CurInstr.SetFlags) + S = false; + + bool CVInGPR = false; + switch (op) + { + case 0x2: // SUB + if (S) + { + if (op2.IsImm) + SUBSI2R(rd, rn, op2.Imm, W0); + else + SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + else + { + if (op2.IsImm) + { + MOVI2R(W2, op2.Imm); + SUBI2R(rd, rn, op2.Imm, W0); + } + else + SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + break; + case 0x3: // RSB + if (op2.IsZero()) + { + op2 = Op2(WZR); + } + else if (op2.IsImm) + { + MOVI2R(W1, op2.Imm); + op2 = Op2(W1); + } + else if (op2.Reg.ShiftAmount != 0) + { + MOV(W1, op2.Reg.Rm, op2.ToArithOption()); + op2 = Op2(W1); + } + + if (S) + SUBS(rd, op2.Reg.Rm, rn); + else + SUB(rd, op2.Reg.Rm, rn); + break; + case 0x4: // ADD + if (S) + { + if (op2.IsImm) + ADDSI2R(rd, rn, op2.Imm, W0); + else + ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + else + { + if (op2.IsImm) + ADDI2R(rd, rn, op2.Imm, W0); + else + ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption()); + } + break; + case 0x5: // ADC + UBFX(W2, RCPSR, 29, 1); + if (S) + { + CVInGPR = true; + ADDS(W1, rn, W2); + CSET(W2, CC_CS); + CSET(W3, CC_VS); + if (op2.IsImm) + ADDSI2R(rd, W1, op2.Imm, W0); + else + ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption()); + CSINC(W2, W2, WZR, CC_CC); + CSINC(W3, W3, WZR, CC_VC); + } + else + { + ADD(W1, rn, W2); + if (op2.IsImm) + ADDI2R(rd, W1, op2.Imm, W0); + else + ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption()); + } + break; + case 0x6: // SBC + UBFX(W2, RCPSR, 29, 1); + // W1 = -op2 - 1 + if (op2.IsImm) + MOVI2R(W1, ~op2.Imm); + else + ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption()); + if (S) + { + CVInGPR = true; + ADDS(W1, W2, W1); + CSET(W2, CC_CS); + CSET(W3, CC_VS); + ADDS(rd, rn, W1); + CSINC(W2, W2, WZR, CC_CC); + CSINC(W3, W3, WZR, CC_VC); + } + else + { + ADD(W1, W2, W1); + ADD(rd, rn, W1); + } + break; + case 0x7: // RSC + UBFX(W2, RCPSR, 29, 1); + // W1 = -rn - 1 + MVN(W1, rn); + if (S) + { + CVInGPR = true; + ADDS(W1, W2, W1); + CSET(W2, CC_CS); + CSET(W3, CC_VS); + if (op2.IsImm) + ADDSI2R(rd, W1, op2.Imm); + else + ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption()); + CSINC(W2, W2, WZR, CC_CC); + CSINC(W3, W3, WZR, CC_VC); + } + else + { + ADD(W1, W2, W1); + if (op2.IsImm) + ADDI2R(rd, W1, op2.Imm); + else + ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption()); + } + break; + } + + if (S) + { + if (CVInGPR) + { + BFI(RCPSR, W2, 29, 1); + BFI(RCPSR, W3, 28, 1); + } + Comp_RetriveFlags(!CVInGPR); + } +} + +void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2) +{ + if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR) + { + MOV(W0, op2.Reg.Rm, op2.ToArithOption()); + op2 = Op2(W0, ST_LSL, 0); + } + + switch (op) + { + case 0x8: // TST + if (op2.IsImm) + TSTI2R(rn, op2.Imm, W0); + else + ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption()); + break; + case 0x9: // TEQ + if (op2.IsImm) + EORI2R(W0, rn, op2.Imm, W0); + else + EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption()); + TST(W0, W0); + break; + case 0xA: // CMP + if (op2.IsImm) + CMPI2R(rn, op2.Imm, W0); + else + CMP(rn, op2.Reg.Rm, op2.ToArithOption()); + break; + case 0xB: // CMN + if (op2.IsImm) + ADDSI2R(WZR, rn, op2.Imm, W0); + else + CMN(rn, op2.Reg.Rm, op2.ToArithOption()); + break; + } + + Comp_RetriveFlags(op >= 0xA); +} + +// also counts cycles! +void Compiler::A_Comp_GetOp2(bool S, Op2& op2) +{ + if (CurInstr.Instr & (1 << 25)) + { + Comp_AddCycles_C(); + + u32 shift = (CurInstr.Instr >> 7) & 0x1E; + u32 imm = ROR(CurInstr.Instr & 0xFF, shift); + + if (S && shift && (CurInstr.SetFlags & 0x2)) + { + CPSRDirty = true; + if (imm & 0x80000000) + ORRI2R(RCPSR, RCPSR, 1 << 29); + else + ANDI2R(RCPSR, RCPSR, ~(1 << 29)); + } + + op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); + } + else + { + int op = (CurInstr.Instr >> 5) & 0x3; + op2.Reg.Rm = MapReg(CurInstr.A_Reg(0)); + if (CurInstr.Instr & (1 << 4)) + { + Comp_AddCycles_CI(1); + + ARM64Reg rs = MapReg(CurInstr.A_Reg(8)); + if (CurInstr.A_Reg(0) == 15) + { + ADD(W0, op2.Reg.Rm, 4); + op2.Reg.Rm = W0; + } + Comp_RegShiftReg(op, S, op2, rs); + } + else + { + Comp_AddCycles_C(); + + int amount = (CurInstr.Instr >> 7) & 0x1F; + Comp_RegShiftImm(op, amount, S, op2); + } + } +} + +void Compiler::A_Comp_ALUCmpOp() +{ + u32 op = (CurInstr.Instr >> 21) & 0xF; + ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); + Op2 op2; + A_Comp_GetOp2(op <= 0x9, op2); + + Comp_Compare(op, rn, op2); +} + +void Compiler::A_Comp_ALUMovOp() +{ + bool S = CurInstr.Instr & (1 << 20); + u32 op = (CurInstr.Instr >> 21) & 0xF; + + ARM64Reg rd = MapReg(CurInstr.A_Reg(12)); + Op2 op2; + A_Comp_GetOp2(S, op2); + + if (op == 0xF) // MVN + { + if (op2.IsImm) + { + if (CurInstr.Cond() == 0xE) + RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm); + MOVI2R(rd, ~op2.Imm); + } + else + ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption()); + } + else // MOV + { + if (op2.IsImm) + { + if (CurInstr.Cond() == 0xE) + RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm); + MOVI2R(rd, op2.Imm); + } + else + { + // ORR with shifted operand has cycles latency + if (op2.Reg.ShiftAmount > 0) + { + switch (op2.Reg.ShiftType) + { + case ST_LSL: LSL(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break; + case ST_LSR: LSR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break; + case ST_ASR: ASR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break; + case ST_ROR: ROR_(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break; + } + } + else + { + MOV(rd, op2.Reg.Rm, op2.ToArithOption()); + } + } + } + + if (S) + { + if (FlagsNZNeeded()) + TST(rd, rd); + Comp_RetriveFlags(false); + } + + if (CurInstr.Info.Branches()) + Comp_JumpTo(rd, true, S); +} + +void Compiler::A_Comp_ALUTriOp() +{ + bool S = CurInstr.Instr & (1 << 20); + u32 op = (CurInstr.Instr >> 21) & 0xF; + bool logical = (1 << op) & 0xF303; + + ARM64Reg rd = MapReg(CurInstr.A_Reg(12)); + ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); + Op2 op2; + A_Comp_GetOp2(S && logical, op2); + + if (op2.IsImm && op2.Imm == 0) + op2 = Op2(WZR, ST_LSL, 0); + + if (logical) + Comp_Logical(op, S, rd, rn, op2); + else + Comp_Arithmetic(op, S, rd, rn, op2); + + if (CurInstr.Info.Branches()) + Comp_JumpTo(rd, true, S); +} + +void Compiler::A_Comp_Clz() +{ + Comp_AddCycles_C(); + + ARM64Reg rd = MapReg(CurInstr.A_Reg(12)); + ARM64Reg rm = MapReg(CurInstr.A_Reg(0)); + + CLZ(rd, rm); + + assert(Num == 0); +} + +void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn) +{ + if (Num == 0) + { + Comp_AddCycles_CI(S ? 3 : 1); + } + else + { + CLS(W0, rs); + Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3)); + } + + if (mla) + MADD(rd, rm, rs, rn); + else + MUL(rd, rm, rs); + + if (S && FlagsNZNeeded()) + { + TST(rd, rd); + Comp_RetriveFlags(false); + } +} + +void Compiler::A_Comp_Mul_Long() +{ + ARM64Reg rd = MapReg(CurInstr.A_Reg(16)); + ARM64Reg rm = MapReg(CurInstr.A_Reg(0)); + ARM64Reg rs = MapReg(CurInstr.A_Reg(8)); + ARM64Reg rn = MapReg(CurInstr.A_Reg(12)); + + bool S = CurInstr.Instr & (1 << 20); + bool add = CurInstr.Instr & (1 << 21); + bool sign = CurInstr.Instr & (1 << 22); + + if (Num == 0) + { + Comp_AddCycles_CI(S ? 3 : 1); + } + else + { + if (sign) + CLS(W0, rs); + else + CLZ(W0, rs); + Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3)); + } + + if (add) + { + MOV(W0, rn); + BFI(X0, EncodeRegTo64(rd), 32, 32); + if (sign) + SMADDL(EncodeRegTo64(rn), rm, rs, X0); + else + UMADDL(EncodeRegTo64(rn), rm, rs, X0); + if (S && FlagsNZNeeded()) + TST(EncodeRegTo64(rn), EncodeRegTo64(rn)); + UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32); + } + else + { + if (sign) + SMULL(EncodeRegTo64(rn), rm, rs); + else + UMULL(EncodeRegTo64(rn), rm, rs); + if (S && FlagsNZNeeded()) + TST(EncodeRegTo64(rn), EncodeRegTo64(rn)); + UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32); + } + + if (S) + Comp_RetriveFlags(false); +} + +void Compiler::A_Comp_Mul_Short() +{ + ARM64Reg rd = MapReg(CurInstr.A_Reg(16)); + ARM64Reg rm = MapReg(CurInstr.A_Reg(0)); + ARM64Reg rs = MapReg(CurInstr.A_Reg(8)); + u32 op = (CurInstr.Instr >> 21) & 0xF; + + bool x = CurInstr.Instr & (1 << 5); + bool y = CurInstr.Instr & (1 << 6); + + SBFX(W1, rs, y ? 16 : 0, 16); + + if (op == 0b1000) + { + // SMLAxy + + SBFX(W0, rm, x ? 16 : 0, 16); + + MUL(W0, W0, W1); + + ORRI2R(W1, RCPSR, 0x08000000); + + ARM64Reg rn = MapReg(CurInstr.A_Reg(12)); + ADDS(rd, W0, rn); + + CSEL(RCPSR, W1, RCPSR, CC_VS); + + CPSRDirty = true; + + Comp_AddCycles_C(); + } + else if (op == 0b1011) + { + // SMULxy + + SBFX(W0, rm, x ? 16 : 0, 16); + + MUL(rd, W0, W1); + + Comp_AddCycles_C(); + } + else if (op == 0b1010) + { + // SMLALxy + + ARM64Reg rn = MapReg(CurInstr.A_Reg(12)); + + MOV(W2, rn); + BFI(X2, rd, 32, 32); + + SBFX(W0, rm, x ? 16 : 0, 16); + + SMADDL(EncodeRegTo64(rn), W0, W1, X2); + + UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32); + + Comp_AddCycles_CI(1); + } + else if (op == 0b1001) + { + // SMLAWy/SMULWy + SMULL(X0, rm, W1); + ASR(x ? EncodeRegTo64(rd) : X0, X0, 16); + + if (!x) + { + ORRI2R(W1, RCPSR, 0x08000000); + + ARM64Reg rn = MapReg(CurInstr.A_Reg(12)); + ADDS(rd, W0, rn); + + CSEL(RCPSR, W1, RCPSR, CC_VS); + + CPSRDirty = true; + } + + Comp_AddCycles_C(); + } +} + +void Compiler::A_Comp_Mul() +{ + ARM64Reg rd = MapReg(CurInstr.A_Reg(16)); + ARM64Reg rm = MapReg(CurInstr.A_Reg(0)); + ARM64Reg rs = MapReg(CurInstr.A_Reg(8)); + + bool S = CurInstr.Instr & (1 << 20); + bool mla = CurInstr.Instr & (1 << 21); + ARM64Reg rn = INVALID_REG; + if (mla) + rn = MapReg(CurInstr.A_Reg(12)); + + Comp_Mul_Mla(S, mla, rd, rm, rs, rn); +} + +void Compiler::T_Comp_ShiftImm() +{ + Comp_AddCycles_C(); + + u32 op = (CurInstr.Instr >> 11) & 0x3; + int amount = (CurInstr.Instr >> 6) & 0x1F; + + ARM64Reg rd = MapReg(CurInstr.T_Reg(0)); + Op2 op2; + op2.Reg.Rm = MapReg(CurInstr.T_Reg(3)); + Comp_RegShiftImm(op, amount, true, op2); + if (op2.IsImm) + MOVI2R(rd, op2.Imm); + else + MOV(rd, op2.Reg.Rm, op2.ToArithOption()); + if (FlagsNZNeeded()) + TST(rd, rd); + + Comp_RetriveFlags(false); +} + +void Compiler::T_Comp_AddSub_() +{ + Comp_AddCycles_C(); + + Op2 op2; + if (CurInstr.Instr & (1 << 10)) + op2 = Op2((CurInstr.Instr >> 6) & 0x7); + else + op2 = Op2(MapReg(CurInstr.T_Reg(6))); + + Comp_Arithmetic( + CurInstr.Instr & (1 << 9) ? 0x2 : 0x4, + true, + MapReg(CurInstr.T_Reg(0)), + MapReg(CurInstr.T_Reg(3)), + op2); +} + +void Compiler::T_Comp_ALUImm8() +{ + Comp_AddCycles_C(); + + u32 imm = CurInstr.Instr & 0xFF; + int op = (CurInstr.Instr >> 11) & 0x3; + + ARM64Reg rd = MapReg(CurInstr.T_Reg(8)); + + switch (op) + { + case 0: + MOVI2R(rd, imm); + if (FlagsNZNeeded()) + TST(rd, rd); + Comp_RetriveFlags(false); + break; + case 1: + Comp_Compare(0xA, rd, Op2(imm)); + break; + case 2: + case 3: + Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm)); + break; + } +} + +void Compiler::T_Comp_ALU() +{ + int op = (CurInstr.Instr >> 6) & 0xF; + ARM64Reg rd = MapReg(CurInstr.T_Reg(0)); + ARM64Reg rs = MapReg(CurInstr.T_Reg(3)); + + if ((op >= 0x2 && op <= 0x4) || op == 0x7) + Comp_AddCycles_CI(1); + else + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: + Comp_Logical(0x0, true, rd, rd, Op2(rs)); + break; + case 0x1: + Comp_Logical(0x1, true, rd, rd, Op2(rs)); + break; + case 0x2: + case 0x3: + case 0x4: + case 0x7: + { + Op2 op2; + op2.Reg.Rm = rd; + Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs); + MOV(rd, op2.Reg.Rm, op2.ToArithOption()); + if (FlagsNZNeeded()) + TST(rd, rd); + Comp_RetriveFlags(false); + } + break; + case 0x5: + Comp_Arithmetic(0x5, true, rd, rd, Op2(rs)); + break; + case 0x6: + Comp_Arithmetic(0x6, true, rd, rd, Op2(rs)); + break; + case 0x8: + Comp_Compare(0x8, rd, Op2(rs)); + break; + case 0x9: + Comp_Arithmetic(0x3, true, rd, rs, Op2(0)); + break; + case 0xA: + Comp_Compare(0xA, rd, Op2(rs)); + break; + case 0xB: + Comp_Compare(0xB, rd, Op2(rs)); + break; + case 0xC: + Comp_Logical(0xC, true, rd, rd, Op2(rs)); + break; + case 0xD: + Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG); + break; + case 0xE: + Comp_Logical(0xE, true, rd, rd, Op2(rs)); + break; + case 0xF: + MVN(rd, rs); + if (FlagsNZNeeded()) + TST(rd, rd); + Comp_RetriveFlags(false); + break; + } +} + +void Compiler::T_Comp_ALU_HiReg() +{ + u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8)); + ARM64Reg rdMapped = MapReg(rd); + ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF); + + u32 op = (CurInstr.Instr >> 8) & 0x3; + + Comp_AddCycles_C(); + + switch (op) + { + case 0: + Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs)); + break; + case 1: + Comp_Compare(0xA, rdMapped, rs); + return; + case 2: + MOV(rdMapped, rs); + break; + } + + if (rd == 15) + { + Comp_JumpTo(rdMapped, false, false); + } +} + +void Compiler::T_Comp_AddSP() +{ + Comp_AddCycles_C(); + + ARM64Reg sp = MapReg(13); + u32 offset = (CurInstr.Instr & 0x7F) << 2; + if (CurInstr.Instr & (1 << 7)) + SUB(sp, sp, offset); + else + ADD(sp, sp, offset); +} + +void Compiler::T_Comp_RelAddr() +{ + Comp_AddCycles_C(); + + ARM64Reg rd = MapReg(CurInstr.T_Reg(8)); + u32 offset = (CurInstr.Instr & 0xFF) << 2; + if (CurInstr.Instr & (1 << 11)) + { + ARM64Reg sp = MapReg(13); + ADD(rd, sp, offset); + } + else + MOVI2R(rd, (R15 & ~2) + offset); +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp new file mode 100644 index 0000000..f130938 --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -0,0 +1,421 @@ +#include "ARMJIT_Compiler.h" + +using namespace Arm64Gen; + +// hack +const int kCodeCacheTiming = 3; + +namespace ARMJIT +{ + +template <typename T> +void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR) +{ + cpu->JumpTo(addr, changeCPSR); +} + +void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) +{ + // we can simplify constant branches by a lot + // it's not completely safe to assume stuff like, which instructions to preload + // we'll see how it works out + + IrregularCycles = true; + + u32 newPC; + u32 cycles = 0; + bool setupRegion = false; + + if (addr & 0x1 && !Thumb) + { + CPSRDirty = true; + ORRI2R(RCPSR, RCPSR, 0x20); + } + else if (!(addr & 0x1) && Thumb) + { + CPSRDirty = true; + ANDI2R(RCPSR, RCPSR, ~0x20); + } + + if (Num == 0) + { + ARMv5* cpu9 = (ARMv5*)CurCPU; + + u32 oldregion = R15 >> 24; + u32 newregion = addr >> 24; + + u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0]; + u32 compileTimeCodeCycles = cpu9->RegionCodeCycles; + cpu9->RegionCodeCycles = regionCodeCycles; + + MOVI2R(W0, regionCodeCycles); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles)); + + setupRegion = newregion != oldregion; + if (setupRegion) + cpu9->SetupCodeMem(addr); + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + // two-opcodes-at-once fetch + // doesn't matter if we put garbage in the MSbs there + if (addr & 0x2) + { + cpu9->CodeRead32(addr-2, true) >> 16; + cycles += cpu9->CodeCycles; + cpu9->CodeRead32(addr+2, false); + cycles += CurCPU->CodeCycles; + } + else + { + cpu9->CodeRead32(addr, true); + cycles += cpu9->CodeCycles; + } + } + else + { + addr &= ~0x3; + newPC = addr+4; + + cpu9->CodeRead32(addr, true); + cycles += cpu9->CodeCycles; + cpu9->CodeRead32(addr+4, false); + cycles += cpu9->CodeCycles; + } + + cpu9->RegionCodeCycles = compileTimeCodeCycles; + if (setupRegion) + cpu9->SetupCodeMem(R15); + } + else + { + ARMv4* cpu7 = (ARMv4*)CurCPU; + + u32 codeRegion = addr >> 24; + u32 codeCycles = addr >> 15; // cheato + + cpu7->CodeRegion = codeRegion; + cpu7->CodeCycles = codeCycles; + + MOVI2R(W0, codeRegion); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion)); + MOVI2R(W0, codeCycles); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles)); + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + // this is necessary because ARM7 bios protection + u32 compileTimePC = CurCPU->R[15]; + CurCPU->R[15] = newPC; + + cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1]; + + CurCPU->R[15] = compileTimePC; + } + else + { + addr &= ~0x3; + newPC = addr+4; + + u32 compileTimePC = CurCPU->R[15]; + CurCPU->R[15] = newPC; + + cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3]; + + CurCPU->R[15] = compileTimePC; + } + + cpu7->CodeRegion = R15 >> 24; + cpu7->CodeCycles = addr >> 15; + } + + if (Exit) + { + MOVI2R(W0, newPC); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15])); + } + if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) + ConstantCycles += cycles; + else + SUB(RCycles, RCycles, cycles); +} + + +void* Compiler::Gen_JumpTo9(int kind) +{ + AlignCode16(); + void* res = GetRXPtr(); + + LSR(W1, W0, 12); + ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2); + LDRB(W1, RCPU, W1); + + LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize)); + + STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles)); + + CMP(W1, 0xFF); + MOVI2R(W3, kCodeCacheTiming); + CSEL(W1, W3, W1, CC_EQ); + CMP(W0, W2); + CSINC(W1, W1, WZR, CC_HS); + + FixupBranch switchToThumb; + if (kind == 0) + switchToThumb = TBNZ(W0, 0); + + if (kind == 0 || kind == 1) + { + // ARM + if (kind == 0) + ANDI2R(RCPSR, RCPSR, ~0x20); + + ANDI2R(W0, W0, ~3); + ADD(W0, W0, 4); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); + + ADD(W1, W1, W1); + SUB(RCycles, RCycles, W1); + RET(); + } + + if (kind == 0 || kind == 2) + { + // Thumb + if (kind == 0) + { + SetJumpTarget(switchToThumb); + ORRI2R(RCPSR, RCPSR, 0x20); + } + + ANDI2R(W0, W0, ~1); + ADD(W0, W0, 2); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15])); + + ADD(W2, W1, W1); + TSTI2R(W0, 0x2); + CSEL(W1, W1, W2, CC_EQ); + SUB(RCycles, RCycles, W1); + RET(); + } + + return res; +} + +void* Compiler::Gen_JumpTo7(int kind) +{ + void* res = GetRXPtr(); + + LSR(W1, W0, 24); + STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion)); + LSR(W1, W0, 15); + STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles)); + + MOVP2R(X2, NDS::ARM7MemTimings); + LDR(W3, X2, ArithOption(W1, true)); + + FixupBranch switchToThumb; + if (kind == 0) + switchToThumb = TBNZ(W0, 0); + + if (kind == 0 || kind == 1) + { + UBFX(W2, W3, 0, 8); + UBFX(W3, W3, 8, 8); + ADD(W2, W3, W2); + SUB(RCycles, RCycles, W2); + + ANDI2R(W0, W0, ~3); + + if (kind == 0) + ANDI2R(RCPSR, RCPSR, ~0x20); + + ADD(W3, W0, 4); + STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15])); + + RET(); + } + if (kind == 0 || kind == 2) + { + if (kind == 0) + { + SetJumpTarget(switchToThumb); + + ORRI2R(RCPSR, RCPSR, 0x20); + } + + UBFX(W2, W3, 16, 8); + UBFX(W3, W3, 24, 8); + ADD(W2, W3, W2); + SUB(RCycles, RCycles, W2); + + ANDI2R(W0, W0, ~1); + + ADD(W3, W0, 2); + STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15])); + + RET(); + } + + return res; +} + +void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR) +{ + IrregularCycles = true; + + if (!restoreCPSR) + { + if (switchThumb) + CPSRDirty = true; + MOV(W0, addr); + BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]); + } + else + { + + bool cpsrDirty = CPSRDirty; + SaveCPSR(); + SaveCycles(); + PushRegs(restoreCPSR); + + if (switchThumb) + MOV(W1, addr); + else + { + if (Thumb) + ORRI2R(W1, addr, 1); + else + ANDI2R(W1, addr, ~1); + } + MOV(X0, RCPU); + MOVI2R(W2, restoreCPSR); + if (Num == 0) + QuickCallFunction(X3, jumpToTrampoline<ARMv5>); + else + QuickCallFunction(X3, jumpToTrampoline<ARMv4>); + + PopRegs(restoreCPSR); + LoadCycles(); + LoadCPSR(); + if (CurInstr.Cond() < 0xE) + CPSRDirty = cpsrDirty; + } +} + +void Compiler::A_Comp_BranchImm() +{ + int op = (CurInstr.Instr >> 24) & 1; + s32 offset = (s32)(CurInstr.Instr << 8) >> 6; + u32 target = R15 + offset; + bool link = op; + + if (CurInstr.Cond() == 0xF) // BLX_imm + { + target += (op << 1) + 1; + link = true; + } + + if (link) + MOVI2R(MapReg(14), R15 - 4); + + Comp_JumpTo(target); +} + +void Compiler::A_Comp_BranchXchangeReg() +{ + ARM64Reg rn = MapReg(CurInstr.A_Reg(0)); + MOV(W0, rn); + if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg + MOVI2R(MapReg(14), R15 - 4); + Comp_JumpTo(W0, true); +} + +void Compiler::T_Comp_BCOND() +{ + u32 cond = (CurInstr.Instr >> 8) & 0xF; + FixupBranch skipExecute = CheckCondition(cond); + + s32 offset = (s32)(CurInstr.Instr << 24) >> 23; + Comp_JumpTo(R15 + offset + 1, true); + + Comp_BranchSpecialBehaviour(true); + + FixupBranch skipFailed = B(); + SetJumpTarget(skipExecute); + Comp_AddCycles_C(true); + + Comp_BranchSpecialBehaviour(false); + + SetJumpTarget(skipFailed); +} + +void Compiler::T_Comp_B() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20; + Comp_JumpTo(R15 + offset + 1); +} + +void Compiler::T_Comp_BranchXchangeReg() +{ + bool link = CurInstr.Instr & (1 << 7); + + if (link) + { + if (Num == 1) + { + printf("BLX unsupported on ARM7!!!\n"); + return; + } + MOV(W0, MapReg(CurInstr.A_Reg(3))); + MOVI2R(MapReg(14), R15 - 1); + Comp_JumpTo(W0, true); + } + else + { + ARM64Reg rn = MapReg(CurInstr.A_Reg(3)); + Comp_JumpTo(rn, true); + } +} + +void Compiler::T_Comp_BL_LONG_1() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9; + MOVI2R(MapReg(14), R15 + offset); + Comp_AddCycles_C(); +} + +void Compiler::T_Comp_BL_LONG_2() +{ + ARM64Reg lr = MapReg(14); + s32 offset = (CurInstr.Instr & 0x7FF) << 1; + ADD(W0, lr, offset); + MOVI2R(lr, (R15 - 2) | 1); + Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12))); +} + +void Compiler::T_Comp_BL_Merged() +{ + Comp_AddCycles_C(); + + R15 += 2; + + u32 upperPart = CurInstr.Instr >> 16; + u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9); + target += (upperPart & 0x7FF) << 1; + + if (Num == 1 || upperPart & (1 << 12)) + target |= 1; + + MOVI2R(MapReg(14), (R15 - 2) | 1); + + Comp_JumpTo(target); +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.cpp b/src/ARMJIT_A64/ARMJIT_Compiler.cpp new file mode 100644 index 0000000..413c673 --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_Compiler.cpp @@ -0,0 +1,918 @@ +#ifdef __SWITCH__ +#include "../switch/compat_switch.h" + +extern char __start__; +#else +#include <sys/mman.h> +#include <unistd.h> +#endif + +#include "ARMJIT_Compiler.h" + +#include "../ARMJIT_Internal.h" +#include "../ARMInterpreter.h" +#include "../Config.h" + +#include <malloc.h> + +using namespace Arm64Gen; + +extern "C" void ARM_Ret(); + +namespace ARMJIT +{ + +/* + + Recompiling classic ARM to ARMv8 code is at the same time + easier and trickier than compiling to a less related architecture + like x64. At one hand you can translate a lot of instructions directly. + But at the same time, there are a ton of exceptions, like for + example ADD and SUB can't have a RORed second operand on ARMv8. + + While writing a JIT when an instruction is recompiled into multiple ones + not to write back until you've read all the other operands! +*/ + +template <> +const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] = + {W19, W20, W21, W22, W23, W24, W25, W26}; +template <> +const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8; + +const int JitMemSize = 16 * 1024 * 1024; +#ifndef __SWITCH__ +u8 JitMem[JitMemSize]; +#endif + +void Compiler::MovePC() +{ + ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4); +} + +void Compiler::A_Comp_MRS() +{ + Comp_AddCycles_C(); + + ARM64Reg rd = MapReg(CurInstr.A_Reg(12)); + + if (CurInstr.Instr & (1 << 22)) + { + ANDI2R(W5, RCPSR, 0x1F); + MOVI2R(W3, 0); + MOVI2R(W1, 15 - 8); + BL(ReadBanked); + MOV(rd, W3); + } + else + MOV(rd, RCPSR); +} + +void Compiler::A_Comp_MSR() +{ + Comp_AddCycles_C(); + + ARM64Reg val; + if (CurInstr.Instr & (1 << 25)) + { + val = W0; + MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E))); + } + else + { + val = MapReg(CurInstr.A_Reg(0)); + } + + u32 mask = 0; + if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF; + if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00; + if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000; + if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000; + + if (CurInstr.Instr & (1 << 22)) + { + ANDI2R(W5, RCPSR, 0x1F); + MOVI2R(W3, 0); + MOVI2R(W1, 15 - 8); + BL(ReadBanked); + + MOVI2R(W1, mask); + MOVI2R(W2, mask & 0xFFFFFF00); + ANDI2R(W5, RCPSR, 0x1F); + CMP(W5, 0x10); + CSEL(W1, W2, W1, CC_EQ); + + BIC(W3, W3, W1); + AND(W0, val, W1); + ORR(W3, W3, W0); + + MOVI2R(W1, 15 - 8); + + BL(WriteBanked); + } + else + { + mask &= 0xFFFFFFDF; + CPSRDirty = true; + + if ((mask & 0xFF) == 0) + { + ANDI2R(RCPSR, RCPSR, ~mask); + ANDI2R(W0, val, mask); + ORR(RCPSR, RCPSR, W0); + } + else + { + MOVI2R(W2, mask); + MOVI2R(W3, mask & 0xFFFFFF00); + ANDI2R(W1, RCPSR, 0x1F); + // W1 = first argument + CMP(W1, 0x10); + CSEL(W2, W3, W2, CC_EQ); + + BIC(RCPSR, RCPSR, W2); + AND(W0, val, W2); + ORR(RCPSR, RCPSR, W0); + + MOV(W2, RCPSR); + MOV(X0, RCPU); + + PushRegs(true); + + QuickCallFunction(X3, (void*)&ARM::UpdateMode); + + PopRegs(true); + } + } +} + +void Compiler::PushRegs(bool saveHiRegs) +{ + if (saveHiRegs) + { + if (Thumb || CurInstr.Cond() == 0xE) + { + BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsLoaded) + RegCache.UnloadRegister(reg); + } + else + { + BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsDirty) + SaveReg(reg, RegCache.Mapping[reg]); + } + } +} + +void Compiler::PopRegs(bool saveHiRegs) +{ + if (saveHiRegs) + { + BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + + for (int reg : hiRegsLoaded) + LoadReg(reg, RegCache.Mapping[reg]); + } +} + +Compiler::Compiler() +{ +#ifdef __SWITCH__ + JitRWBase = memalign(0x1000, JitMemSize); + + JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000; + JitRWStart = virtmemReserve(JitMemSize); + MemoryInfo info = {0}; + u32 pageInfo = {0}; + int i = 0; + while (JitRXStart != NULL) + { + svcQueryMemory(&info, &pageInfo, (u64)JitRXStart); + if (info.type != MemType_Unmapped) + JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000); + else + break; + if (i++ > 8) + { + printf("couldn't find unmapped place for jit memory\n"); + JitRXStart = NULL; + } + } + + assert(JitRXStart != NULL); + + bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize)); + assert(succeded); + succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx)); + assert(succeded); + succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize)); + assert(succeded); + + SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart); + JitMemMainSize = JitMemSize; +#else + u64 pageSize = sysconf(_SC_PAGE_SIZE); + u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize); + u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned; + mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); + + SetCodeBase(pageAligned, pageAligned); + JitMemMainSize = alignedSize; +#endif + SetCodePtr(0); + + for (int i = 0; i < 3; i++) + { + JumpToFuncs9[i] = Gen_JumpTo9(i); + JumpToFuncs7[i] = Gen_JumpTo7(i); + } + + /* + W5 - mode + W1 - reg num + W3 - in/out value of reg + */ + { + ReadBanked = GetRXPtr(); + + ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); + CMP(W5, 0x11); + FixupBranch fiq = B(CC_EQ); + SUBS(W1, W1, 13 - 8); + ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); + FixupBranch notEverything = B(CC_LT); + CMP(W5, 0x12); + FixupBranch irq = B(CC_EQ); + CMP(W5, 0x13); + FixupBranch svc = B(CC_EQ); + CMP(W5, 0x17); + FixupBranch abt = B(CC_EQ); + CMP(W5, 0x1B); + FixupBranch und = B(CC_EQ); + SetJumpTarget(notEverything); + RET(); + + SetJumpTarget(fiq); + LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ)); + RET(); + SetJumpTarget(irq); + LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ)); + RET(); + SetJumpTarget(svc); + LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC)); + RET(); + SetJumpTarget(abt); + LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT)); + RET(); + SetJumpTarget(und); + LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND)); + RET(); + } + { + WriteBanked = GetRXPtr(); + + ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); + CMP(W5, 0x11); + FixupBranch fiq = B(CC_EQ); + SUBS(W1, W1, 13 - 8); + ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2)); + FixupBranch notEverything = B(CC_LT); + CMP(W5, 0x12); + FixupBranch irq = B(CC_EQ); + CMP(W5, 0x13); + FixupBranch svc = B(CC_EQ); + CMP(W5, 0x17); + FixupBranch abt = B(CC_EQ); + CMP(W5, 0x1B); + FixupBranch und = B(CC_EQ); + SetJumpTarget(notEverything); + MOVI2R(W4, 0); + RET(); + + SetJumpTarget(fiq); + STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ)); + MOVI2R(W4, 1); + RET(); + SetJumpTarget(irq); + STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ)); + MOVI2R(W4, 1); + RET(); + SetJumpTarget(svc); + STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC)); + MOVI2R(W4, 1); + RET(); + SetJumpTarget(abt); + STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT)); + MOVI2R(W4, 1); + RET(); + SetJumpTarget(und); + STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND)); + MOVI2R(W4, 1); + RET(); + } + + for (int consoleType = 0; consoleType < 2; consoleType++) + { + for (int num = 0; num < 2; num++) + { + for (int size = 0; size < 3; size++) + { + for (int reg = 0; reg < 8; reg++) + { + ARM64Reg rdMapped = (ARM64Reg)(W19 + reg); + PatchedStoreFuncs[consoleType][num][size][reg] = GetRXPtr(); + if (num == 0) + { + MOV(X1, RCPU); + MOV(W2, rdMapped); + } + else + { + MOV(W1, rdMapped); + } + ABI_PushRegisters({30}); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; + } + } + + ABI_PopRegisters({30}); + RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetRXPtr(); + if (num == 0) + MOV(X1, RCPU); + ABI_PushRegisters({30}); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 1>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; + } + } + ABI_PopRegisters({30}); + if (size == 32) + MOV(rdMapped, W0); + else if (signextend) + SBFX(rdMapped, W0, 0, 8 << size); + else + UBFX(rdMapped, W0, 0, 8 << size); + RET(); + } + } + } + } + } + + FlushIcache(); + + JitMemSecondarySize = 1024*1024*4; + + JitMemMainSize -= GetCodeOffset(); + JitMemMainSize -= JitMemSecondarySize; + + SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr()); +} + +Compiler::~Compiler() +{ +#ifdef __SWITCH__ + if (JitRWStart != NULL) + { + bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize)); + assert(succeded); + virtmemFree(JitRWStart, JitMemSize); + succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize)); + assert(succeded); + free(JitRWBase); + } +#endif +} + +void Compiler::LoadCycles() +{ + LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles)); +} + +void Compiler::SaveCycles() +{ + STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles)); +} + +void Compiler::LoadReg(int reg, ARM64Reg nativeReg) +{ + if (reg == 15) + MOVI2R(nativeReg, R15); + else + LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg])); +} + +void Compiler::SaveReg(int reg, ARM64Reg nativeReg) +{ + STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg])); +} + +void Compiler::LoadCPSR() +{ + assert(!CPSRDirty); + LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR)); +} + +void Compiler::SaveCPSR(bool markClean) +{ + if (CPSRDirty) + { + STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR)); + CPSRDirty = CPSRDirty && !markClean; + } +} + +FixupBranch Compiler::CheckCondition(u32 cond) +{ + if (cond >= 0x8) + { + LSR(W1, RCPSR, 28); + MOVI2R(W2, 1); + LSLV(W2, W2, W1); + ANDI2R(W2, W2, ARM::ConditionTable[cond], W3); + + return CBZ(W2); + } + else + { + u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))); + + if (cond & 1) + return TBNZ(RCPSR, bit); + else + return TBZ(RCPSR, bit); + } +} + +#define F(x) &Compiler::A_Comp_##x +const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] = +{ + // AND + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // EOR + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // SUB + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // RSB + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // ADD + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // ADC + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // SBC + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // RSC + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // ORR + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // MOV + F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), + F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), + // BIC + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), + // MVN + F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), + F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), + // TST + F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), + // TEQ + F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), + // CMP + F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), + // CMN + F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), + // Mul + F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), + // ARMv5 exclusives + F(Clz), NULL, NULL, NULL, NULL, + + // STR + F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), + // STRB + F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), + // LDR + F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), + // LDRB + F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), + // STRH + F(MemHD), F(MemHD), F(MemHD), F(MemHD), + // LDRD + NULL, NULL, NULL, NULL, + // STRD + NULL, NULL, NULL, NULL, + // LDRH + F(MemHD), F(MemHD), F(MemHD), F(MemHD), + // LDRSB + F(MemHD), F(MemHD), F(MemHD), F(MemHD), + // LDRSH + F(MemHD), F(MemHD), F(MemHD), F(MemHD), + // Swap + NULL, NULL, + // LDM, STM + F(LDM_STM), F(LDM_STM), + // Branch + F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg), + // Special + NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL, + &Compiler::Nop +}; +#undef F +#define F(x) &Compiler::T_Comp_##x +const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = +{ + // Shift imm + F(ShiftImm), F(ShiftImm), F(ShiftImm), + // Add/sub tri operand + F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_), + // 8 bit imm + F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8), + // ALU + F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), + F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), + // ALU hi reg + F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg), + // PC/SP relative ops + F(RelAddr), F(RelAddr), F(AddSP), + // LDR PC rel + F(LoadPCRel), + // LDR/STR reg offset + F(MemReg), F(MemReg), F(MemReg), F(MemReg), + // LDR/STR sign extended, half + F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), + // LDR/STR imm offset + F(MemImm), F(MemImm), F(MemImm), F(MemImm), + // LDR/STR half imm offset + F(MemImmHalf), F(MemImmHalf), + // LDR/STR sp rel + F(MemSPRel), F(MemSPRel), + // PUSH/POP + F(PUSH_POP), F(PUSH_POP), + // LDMIA, STMIA + F(LDMIA_STMIA), F(LDMIA_STMIA), + // Branch + F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2), + // Unk, SVC + NULL, NULL, + F(BL_Merged) +}; + +bool Compiler::CanCompile(bool thumb, u16 kind) +{ + return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL; +} + +void Compiler::Comp_BranchSpecialBehaviour(bool taken) +{ + if (taken && CurInstr.BranchFlags & branch_IdleBranch) + { + MOVI2R(W0, 1); + STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop)); + } + + if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken) + || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken)) + { + RegCache.PrepareExit(); + + SUB(RCycles, RCycles, ConstantCycles); + QuickTailCall(X0, ARM_Ret); + } +} + +JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) +{ + if (JitMemMainSize - GetCodeOffset() < 1024 * 16) + { + printf("JIT near memory full, resetting...\n"); + ResetBlockCache(); + } + if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8) + { + printf("JIT far memory full, resetting...\n"); + ResetBlockCache(); + } + + JitBlockEntry res = (JitBlockEntry)GetRXPtr(); + + Thumb = thumb; + Num = cpu->Num; + CurCPU = cpu; + ConstantCycles = 0; + RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true); + CPSRDirty = false; + + for (int i = 0; i < instrsCount; i++) + { + CurInstr = instrs[i]; + R15 = CurInstr.Addr + (Thumb ? 4 : 8); + CodeRegion = R15 >> 24; + + CompileFunc comp = Thumb + ? T_Comp[CurInstr.Info.Kind] + : A_Comp[CurInstr.Info.Kind]; + + Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken); + + //printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags); + + bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE; + if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional))) + { + MOVI2R(W0, R15); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15])); + if (comp == NULL) + { + MOVI2R(W0, CurInstr.Instr); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr)); + } + if (Num == 0) + { + MOVI2R(W0, (s32)CurInstr.CodeCycles); + STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles)); + } + } + + if (comp == NULL) + { + SaveCycles(); + SaveCPSR(); + RegCache.Flush(); + } + else + RegCache.Prepare(Thumb, i); + + if (Thumb) + { + if (comp == NULL) + { + MOV(X0, RCPU); + QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]); + } + else + (this->*comp)(); + } + else + { + u32 cond = CurInstr.Cond(); + if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + if (comp) + (this->*comp)(); + else + { + MOV(X0, RCPU); + QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM); + } + } + else if (cond == 0xF) + Comp_AddCycles_C(); + else + { + IrregularCycles = false; + + FixupBranch skipExecute; + if (cond < 0xE) + skipExecute = CheckCondition(cond); + + if (comp == NULL) + { + MOV(X0, RCPU); + QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]); + } + else + { + (this->*comp)(); + } + + Comp_BranchSpecialBehaviour(true); + + if (cond < 0xE) + { + if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken)) + { + FixupBranch skipNop = B(); + SetJumpTarget(skipExecute); + + Comp_AddCycles_C(); + + Comp_BranchSpecialBehaviour(false); + + SetJumpTarget(skipNop); + } + else + SetJumpTarget(skipExecute); + } + + } + } + + if (comp == NULL) + { + LoadCycles(); + LoadCPSR(); + } + } + + RegCache.Flush(); + + SUB(RCycles, RCycles, ConstantCycles); + QuickTailCall(X0, ARM_Ret); + + FlushIcache(); + + return res; +} + +void Compiler::Reset() +{ + LoadStorePatches.clear(); + + SetCodePtr(0); + OtherCodeRegion = JitMemMainSize; + + const u32 brk_0 = 0xD4200000; + + for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++) + *(((u32*)GetRWPtr()) + i) = brk_0; +} + +void Compiler::Comp_AddCycles_C(bool forceNonConstant) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + if (forceNonConstant) + ConstantCycles += cycles; + else + SUB(RCycles, RCycles, cycles); +} + +void Compiler::Comp_AddCycles_CI(u32 numI) +{ + IrregularCycles = true; + + s32 cycles = (Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI; + + if (Thumb || CurInstr.Cond() == 0xE) + ConstantCycles += cycles; + else + SUB(RCycles, RCycles, cycles); +} + +void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift) +{ + IrregularCycles = true; + + s32 cycles = (Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c; + + SUB(RCycles, RCycles, cycles); + if (Thumb || CurInstr.Cond() >= 0xE) + ConstantCycles += cycles; + else + SUB(RCycles, RCycles, cycles); +} + +void Compiler::Comp_AddCycles_CDI() +{ + if (Num == 0) + Comp_AddCycles_CD(); + else + { + IrregularCycles = true; + + s32 cycles; + + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM + { + if (CodeRegion == 0x02) + cycles = numC + numD; + else + { + numC++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + } + else if (CodeRegion == 0x02) + { + numD++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles = numC + numD + 1; + } + + if (!Thumb && CurInstr.Cond() < 0xE) + SUB(RCycles, RCycles, cycles); + else + ConstantCycles += cycles; + } +} + +void Compiler::Comp_AddCycles_CD() +{ + u32 cycles = 0; + if (Num == 0) + { + s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles; + s32 numD = CurInstr.DataCycles; + + //if (DataRegion != CodeRegion) + cycles = std::max(numC + numD - 6, std::max(numC, numD)); + + IrregularCycles = cycles != numC; + } + else + { + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 24) == 0x02) + { + if (CodeRegion == 0x02) + cycles += numC + numD; + else + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else if (CodeRegion == 0x02) + { + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles += numC + numD; + } + + IrregularCycles = true; + } + + if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles) + SUB(RCycles, RCycles, cycles); + else + ConstantCycles += cycles; +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_Compiler.h b/src/ARMJIT_A64/ARMJIT_Compiler.h new file mode 100644 index 0000000..0e7d54c --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_Compiler.h @@ -0,0 +1,269 @@ +#ifndef ARMJIT_COMPILER_H +#define ARMJIT_COMPILER_H + +#include "../ARM.h" +#include "../ARMJIT.h" + +#include "../dolphin/Arm64Emitter.h" + +#include "../ARMJIT_Internal.h" +#include "../ARMJIT_RegisterCache.h" + +#include <unordered_map> + +namespace ARMJIT +{ + +const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27; +const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28; +const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29; + +struct Op2 +{ + Op2() + {} + + Op2(Arm64Gen::ARM64Reg rm) : IsImm(false) + { + Reg.Rm = rm; + Reg.ShiftType = Arm64Gen::ST_LSL; + Reg.ShiftAmount = 0; + } + + Op2(u32 imm) : IsImm(true), Imm(imm) + {} + + Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false) + { + Reg.Rm = rm; + Reg.ShiftType = st; + Reg.ShiftAmount = amount; + } + + Arm64Gen::ArithOption ToArithOption() + { + assert(!IsImm); + return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount); + } + + bool IsSimpleReg() + { return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; } + bool ImmFits12Bit() + { return IsImm && (Imm & 0xFFF == Imm); } + bool IsZero() + { return IsImm && !Imm; } + + bool IsImm; + union + { + struct + { + Arm64Gen::ARM64Reg Rm; + Arm64Gen::ShiftType ShiftType; + int ShiftAmount; + } Reg; + u32 Imm; + }; +}; + +struct LoadStorePatch +{ + void* PatchFunc; + s32 PatchOffset; + u32 PatchSize; +}; + +class Compiler : public Arm64Gen::ARM64XEmitter +{ +public: + typedef void (Compiler::*CompileFunc)(); + + Compiler(); + ~Compiler(); + + void PushRegs(bool saveHiRegs); + void PopRegs(bool saveHiRegs); + + Arm64Gen::ARM64Reg MapReg(int reg) + { + assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG); + return RegCache.Mapping[reg]; + } + + JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); + + bool CanCompile(bool thumb, u16 kind); + + bool FlagsNZNeeded() + { + return CurInstr.SetFlags & 0xC; + } + + void Reset(); + + void Comp_AddCycles_C(bool forceNonConstant = false); + void Comp_AddCycles_CI(u32 numI); + void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift); + void Comp_AddCycles_CD(); + void Comp_AddCycles_CDI(); + + void MovePC(); + + void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg); + void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg); + + void LoadCPSR(); + void SaveCPSR(bool markClean = true); + + void LoadCycles(); + void SaveCycles(); + + void Nop() {} + + void A_Comp_ALUTriOp(); + void A_Comp_ALUMovOp(); + void A_Comp_ALUCmpOp(); + + void A_Comp_Mul(); + void A_Comp_Mul_Long(); + void A_Comp_Mul_Short(); + + void A_Comp_Clz(); + + void A_Comp_MemWB(); + void A_Comp_MemHD(); + + void A_Comp_LDM_STM(); + + void A_Comp_BranchImm(); + void A_Comp_BranchXchangeReg(); + + void A_Comp_MRS(); + void A_Comp_MSR(); + + void T_Comp_ShiftImm(); + void T_Comp_AddSub_(); + void T_Comp_ALUImm8(); + void T_Comp_ALU(); + void T_Comp_ALU_HiReg(); + void T_Comp_AddSP(); + void T_Comp_RelAddr(); + + void T_Comp_MemReg(); + void T_Comp_MemImm(); + void T_Comp_MemRegHalf(); + void T_Comp_MemImmHalf(); + void T_Comp_LoadPCRel(); + void T_Comp_MemSPRel(); + + void T_Comp_LDMIA_STMIA(); + void T_Comp_PUSH_POP(); + + void T_Comp_BCOND(); + void T_Comp_B(); + void T_Comp_BranchXchangeReg(); + void T_Comp_BL_LONG_1(); + void T_Comp_BL_LONG_2(); + void T_Comp_BL_Merged(); + + s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); + + void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn); + + void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2); + void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2); + void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2); + + void Comp_RetriveFlags(bool retriveCV); + + Arm64Gen::FixupBranch CheckCondition(u32 cond); + + void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false); + void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false); + + void A_Comp_GetOp2(bool S, Op2& op2); + + void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0); + void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs); + + bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); + enum + { + memop_Writeback = 1 << 0, + memop_Post = 1 << 1, + memop_SignExtend = 1 << 2, + memop_Store = 1 << 3, + memop_SubtractOffset = 1 << 4 + }; + void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags); + + // 0 = switch mode, 1 = stay arm, 2 = stay thumb + void* Gen_JumpTo9(int kind); + void* Gen_JumpTo7(int kind); + + void Comp_BranchSpecialBehaviour(bool taken); + + JitBlockEntry AddEntryOffset(u32 offset) + { + return (JitBlockEntry)(GetRXBase() + offset); + } + + u32 SubEntryOffset(JitBlockEntry entry) + { + return (u8*)entry - GetRXBase(); + } + + bool IsJITFault(u64 pc); + s64 RewriteMemAccess(u64 pc); + + void SwapCodeRegion() + { + ptrdiff_t offset = GetCodeOffset(); + SetCodePtrUnsafe(OtherCodeRegion); + OtherCodeRegion = offset; + } + + ptrdiff_t OtherCodeRegion; + + bool Exit; + + FetchedInstr CurInstr; + bool Thumb; + u32 R15; + u32 Num; + ARM* CurCPU; + u32 ConstantCycles; + u32 CodeRegion; + + BitSet32 SavedRegs; + + u32 JitMemSecondarySize; + u32 JitMemMainSize; + + void* ReadBanked, *WriteBanked; + + void* JumpToFuncs9[3]; + void* JumpToFuncs7[3]; + + std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches; + + // [Console Type][Num][Size][Sign Extend][Output register] + void* PatchedLoadFuncs[2][2][3][2][8]; + void* PatchedStoreFuncs[2][2][3][8]; + + RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache; + + bool CPSRDirty = false; + + bool IrregularCycles = false; + +#ifdef __SWITCH__ + void* JitRWBase; + void* JitRWStart; + void* JitRXStart; +#endif +}; + +} + +#endif
\ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_Linkage.s b/src/ARMJIT_A64/ARMJIT_Linkage.s new file mode 100644 index 0000000..7886315 --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_Linkage.s @@ -0,0 +1,68 @@ +#include "../ARMJIT_x64/ARMJIT_Offsets.h" + +.text + +#define RCPSR w27 +#define RCycles w28 +#define RCPU x29 + +.p2align 4,,15 + +.global ARM_Dispatch +ARM_Dispatch: + stp x19, x20, [sp, #-96]! + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp x25, x26, [sp, #48] + stp x27, x28, [sp, #64] + stp x29, x30, [sp, #80] + + mov RCPU, x0 + ldr RCycles, [RCPU, ARM_Cycles_offset] + ldr RCPSR, [RCPU, ARM_CPSR_offset] + + br x1 + +.p2align 4,,15 + +.global ARM_Ret +ARM_Ret: + str RCycles, [RCPU, ARM_Cycles_offset] + str RCPSR, [RCPU, ARM_CPSR_offset] + + ldp x29, x30, [sp, #80] + ldp x27, x28, [sp, #64] + ldp x25, x26, [sp, #48] + ldp x23, x24, [sp, #32] + ldp x21, x22, [sp, #16] + ldp x19, x20, [sp], #96 + + ret + +.p2align 4,,15 + +.global ARM_RestoreContext +ARM_RestoreContext: + mov sp, x0 + + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + ldp x8, x9, [sp, #64] + ldp x10, x11, [sp, #80] + ldp x12, x13, [sp, #96] + ldp x14, x15, [sp, #112] + ldp x16, x17, [sp, #128] + ldp x18, x19, [sp, #144] + ldp x20, x21, [sp, #160] + ldp x22, x23, [sp, #176] + ldp x24, x25, [sp, #192] + ldp x26, x27, [sp, #208] + ldp x28, x29, [sp, #224] + ldr x30, [sp, #240] + + ldp x17, x18, [sp, #248] + mov sp, x17 + + br x18
\ No newline at end of file diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp new file mode 100644 index 0000000..6140ffc --- /dev/null +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -0,0 +1,810 @@ +#include "ARMJIT_Compiler.h" + +#include "../Config.h" + +#include "../ARMJIT_Memory.h" + +using namespace Arm64Gen; + +namespace ARMJIT +{ + +bool Compiler::IsJITFault(u64 pc) +{ + return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); +} + +s64 Compiler::RewriteMemAccess(u64 pc) +{ + ptrdiff_t pcOffset = pc - (u64)GetRXBase(); + + auto it = LoadStorePatches.find(pcOffset); + + if (it != LoadStorePatches.end()) + { + LoadStorePatch patch = it->second; + + ptrdiff_t curCodeOffset = GetCodeOffset(); + + SetCodePtrUnsafe(pcOffset + patch.PatchOffset); + + BL(patch.PatchFunc); + + for (int i = 0; i < patch.PatchSize / 4 - 1; i++) + HINT(HINT_NOP); + + FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); + + SetCodePtrUnsafe(curCodeOffset); + + LoadStorePatches.erase(it); + + return patch.PatchOffset; + } + printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); + assert(false); +} + +bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) +{ + u32 localAddr = LocaliseCodeAddress(Num, addr); + + int invalidLiteralIdx = InvalidLiterals.Find(localAddr); + if (invalidLiteralIdx != -1) + { + InvalidLiterals.Remove(invalidLiteralIdx); + return false; + } + + Comp_AddCycles_CDI(); + + u32 val; + // make sure arm7 bios is accessible + u32 tmpR15 = CurCPU->R[15]; + CurCPU->R[15] = R15; + if (size == 32) + { + CurCPU->DataRead32(addr & ~0x3, &val); + val = ROR(val, (addr & 0x3) << 3); + } + else if (size == 16) + { + CurCPU->DataRead16(addr & ~0x1, &val); + if (signExtend) + val = ((s32)val << 16) >> 16; + } + else + { + CurCPU->DataRead8(addr, &val); + if (signExtend) + val = ((s32)val << 24) >> 24; + } + CurCPU->R[15] = tmpR15; + + MOVI2R(MapReg(rd), val); + + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.PutLiteral(rd, val); + + return true; +} + +void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags) +{ + u32 addressMask = ~0; + if (size == 32) + addressMask = ~3; + if (size == 16) + addressMask = ~1; + + if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + { + u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + + if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr)) + return; + } + + if (flags & memop_Store) + Comp_AddCycles_CD(); + else + Comp_AddCycles_CDI(); + + ARM64Reg rdMapped = MapReg(rd); + ARM64Reg rnMapped = MapReg(rn); + + if (Thumb && rn == 15) + { + ANDI2R(W3, rnMapped, ~2); + rnMapped = W3; + } + + ARM64Reg finalAddr = W0; + if (flags & memop_Post) + { + finalAddr = rnMapped; + MOV(W0, rnMapped); + } + + bool addrIsStatic = Config::JIT_LiteralOptimisations + && RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post)); + u32 staticAddress; + if (addrIsStatic) + staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + + if (!offset.IsImm) + Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2); + // offset might has become an immediate + if (offset.IsImm) + { + if (offset.Imm) + { + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Imm); + else + ADD(finalAddr, rnMapped, offset.Imm); + } + else if (finalAddr != rnMapped) + MOV(finalAddr, rnMapped); + } + else + { + if (offset.Reg.ShiftType == ST_ROR) + { + ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount); + offset = Op2(W0); + } + + if (flags & memop_SubtractOffset) + SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + else + ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption()); + } + + if (!(flags & memop_Post) && (flags & memop_Writeback)) + MOV(rnMapped, W0); + + u32 expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion); + + if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget))) + { + ptrdiff_t memopStart = GetCodeOffset(); + LoadStorePatch patch; + + patch.PatchFunc = flags & memop_Store + ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped - W19] + : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19]; + assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8); + + MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + + // take a chance at fastmem + if (size > 8) + ANDI2R(W1, W0, addressMask); + + ptrdiff_t loadStorePosition = GetCodeOffset(); + if (flags & memop_Store) + { + STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7); + } + else + { + LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); + if (size == 32) + { + UBFIZ(W0, W0, 3, 2); + RORV(rdMapped, rdMapped, W0); + } + } + + patch.PatchOffset = memopStart - loadStorePosition; + patch.PatchSize = GetCodeOffset() - memopStart; + LoadStorePatches[loadStorePosition] = patch; + } + else + { + void* func = NULL; + if (addrIsStatic) + func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size); + + if (func) + { + if (flags & memop_Store) + MOV(W1, rdMapped); + QuickCallFunction(X2, (void (*)())func); + + if (!(flags & memop_Store)) + { + if (size == 32) + { + if (staticAddress & 0x3) + ROR_(rdMapped, W0, (staticAddress & 0x3) << 3); + else + MOV(rdMapped, W0); + } + else + { + if (flags & memop_SignExtend) + SBFX(rdMapped, W0, 0, size); + else + UBFX(rdMapped, W0, 0, size); + } + } + } + else + { + if (Num == 0) + { + MOV(X1, RCPU); + if (flags & memop_Store) + { + MOV(W2, rdMapped); + switch (size | NDS::ConsoleType) + { + case 32: QuickCallFunction(X3, SlowWrite9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite9<u8, 1>); break; + } + } + else + { + switch (size | NDS::ConsoleType) + { + case 32: QuickCallFunction(X3, SlowRead9<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead9<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead9<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead9<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead9<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead9<u8, 1>); break; + } + } + } + else + { + if (flags & memop_Store) + { + MOV(W1, rdMapped); + switch (size | NDS::ConsoleType) + { + case 32: QuickCallFunction(X3, SlowWrite7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowWrite7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowWrite7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowWrite7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowWrite7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowWrite7<u8, 1>); break; + } + } + else + { + switch (size | NDS::ConsoleType) + { + case 32: QuickCallFunction(X3, SlowRead7<u32, 0>); break; + case 33: QuickCallFunction(X3, SlowRead7<u32, 1>); break; + case 16: QuickCallFunction(X3, SlowRead7<u16, 0>); break; + case 17: QuickCallFunction(X3, SlowRead7<u16, 1>); break; + case 8: QuickCallFunction(X3, SlowRead7<u8, 0>); break; + case 9: QuickCallFunction(X3, SlowRead7<u8, 1>); break; + } + } + } + + if (!(flags & memop_Store)) + { + if (size == 32) + MOV(rdMapped, W0); + else if (flags & memop_SignExtend) + SBFX(rdMapped, W0, 0, size); + else + UBFX(rdMapped, W0, 0, size); + } + } + } + + if (CurInstr.Info.Branches()) + { + if (size < 32) + printf("LDR size < 32 branching?\n"); + Comp_JumpTo(rdMapped, Num == 0, false); + } +} + +void Compiler::A_Comp_MemWB() +{ + Op2 offset; + if (CurInstr.Instr & (1 << 25)) + offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F); + else + offset = Op2(CurInstr.Instr & 0xFFF); + + bool load = CurInstr.Instr & (1 << 20); + bool byte = CurInstr.Instr & (1 << 22); + + int flags = 0; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags); +} + +void Compiler::A_Comp_MemHD() +{ + bool load = CurInstr.Instr & (1 << 20); + bool signExtend; + int op = (CurInstr.Instr >> 5) & 0x3; + int size; + + if (load) + { + signExtend = op >= 2; + size = op == 2 ? 8 : 16; + } + else + { + size = 16; + signExtend = false; + } + + Op2 offset; + if (CurInstr.Instr & (1 << 22)) + offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0)); + else + offset = Op2(MapReg(CurInstr.A_Reg(0))); + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags); +} + +void Compiler::T_Comp_MemReg() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op & 0x2; + bool byte = op & 0x1; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), + Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::T_Comp_MemImm() +{ + int op = (CurInstr.Instr >> 11) & 0x3; + bool load = op & 0x1; + bool byte = op & 0x2; + u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), + byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::T_Comp_MemRegHalf() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op != 0; + int size = op != 1 ? 16 : 8; + bool signExtend = op & 1; + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))), + size, flags); +} + +void Compiler::T_Comp_MemImmHalf() +{ + u32 offset = (CurInstr.Instr >> 5) & 0x3E; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16, + load ? 0 : memop_Store); +} + +void Compiler::T_Comp_LoadPCRel() +{ + u32 offset = ((CurInstr.Instr & 0xFF) << 2); + u32 addr = (R15 & ~0x2) + offset; + + if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) + Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0); +} + +void Compiler::T_Comp_MemSPRel() +{ + u32 offset = (CurInstr.Instr & 0xFF) * 4; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store); +} + +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +{ + IrregularCycles = true; + + int regsCount = regs.Count(); + + if (regsCount == 0) + return 0; // actually not the right behaviour TODO: fix me + + if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) + { + int flags = 0; + if (store) + flags |= memop_Store; + if (decrement) + flags |= memop_SubtractOffset; + Op2 offset = preinc ? Op2(4) : Op2(0); + + Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); + + return decrement ? -4 : 4; + } + + if (store) + Comp_AddCycles_CD(); + else + Comp_AddCycles_CDI(); + + int expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + + bool compileFastPath = Config::JIT_FastMemory + && store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)); + + if (decrement) + { + s32 offset = -regsCount * 4 + (preinc ? 0 : 4); + if (offset) + { + ADDI2R(W0, MapReg(rn), offset); + ANDI2R(W0, W0, ~3); + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + } + } + else + { + ANDI2R(W0, MapReg(rn), ~3); + if (preinc) + ADD(W0, W0, 4); + } + + u8* patchFunc; + if (compileFastPath) + { + ptrdiff_t fastPathStart = GetCodeOffset(); + ptrdiff_t loadStoreOffsets[16]; + + MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); + ADD(X1, X1, X0); + + u32 offset = 0; + BitSet16::Iterator it = regs.begin(); + u32 i = 0; + + if (regsCount & 1) + { + int reg = *it; + it++; + + ARM64Reg first = W3; + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + else if (store) + LoadReg(reg, first); + + loadStoreOffsets[i++] = GetCodeOffset(); + + if (store) + STR(INDEX_UNSIGNED, first, X1, offset); + else + LDR(INDEX_UNSIGNED, first, X1, offset); + + if (!(RegCache.LoadedRegs & (1 << reg)) && !store) + SaveReg(reg, first); + + offset += 4; + } + + while (it != regs.end()) + { + int reg = *it; + it++; + int nextReg = *it; + it++; + + ARM64Reg first = W3, second = W4; + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + else if (store) + LoadReg(reg, first); + if (RegCache.LoadedRegs & (1 << nextReg)) + second = MapReg(nextReg); + else if (store) + LoadReg(nextReg, second); + + loadStoreOffsets[i++] = GetCodeOffset(); + + if (store) + STP(INDEX_SIGNED, first, second, X1, offset); + else + LDP(INDEX_SIGNED, first, second, X1, offset); + + if (!(RegCache.LoadedRegs & (1 << reg)) && !store) + SaveReg(reg, first); + if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store) + SaveReg(nextReg, second); + + offset += 8; + } + + LoadStorePatch patch; + patch.PatchSize = GetCodeOffset() - fastPathStart; + SwapCodeRegion(); + patchFunc = (u8*)GetRXPtr(); + patch.PatchFunc = patchFunc; + for (i = 0; i < regsCount; i++) + { + patch.PatchOffset = fastPathStart - loadStoreOffsets[i]; + LoadStorePatches[loadStoreOffsets[i]] = patch; + } + + ABI_PushRegisters({30}); + } + + int i = 0; + + SUB(SP, SP, ((regsCount + 1) & ~1) * 8); + if (store) + { + if (usermode && (regs & BitSet16(0x7f00))) + UBFX(W5, RCPSR, 0, 5); + + BitSet16::Iterator it = regs.begin(); + while (it != regs.end()) + { + BitSet16::Iterator nextReg = it; + nextReg++; + + int reg = *it; + + if (usermode && reg >= 8 && reg < 15) + { + if (RegCache.LoadedRegs & (1 << reg)) + MOV(W3, MapReg(reg)); + else + LoadReg(reg, W3); + MOVI2R(W1, reg - 8); + BL(ReadBanked); + STR(INDEX_UNSIGNED, W3, SP, i * 8); + } + else if (!usermode && nextReg != regs.end()) + { + ARM64Reg first = W3, second = W4; + + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + else + LoadReg(reg, W3); + + if (RegCache.LoadedRegs & (1 << *nextReg)) + second = MapReg(*nextReg); + else + LoadReg(*nextReg, W4); + + STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); + + i++; + it++; + } + else if (RegCache.LoadedRegs & (1 << reg)) + { + STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8); + } + else + { + LoadReg(reg, W3); + STR(INDEX_UNSIGNED, W3, SP, i * 8); + } + i++; + it++; + } + } + + ADD(X1, SP, 0); + MOVI2R(W2, regsCount); + + if (Num == 0) + { + MOV(X3, RCPU); + switch ((u32)store * 2 | NDS::ConsoleType) + { + case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, 1>); break; + } + } + else + { + switch ((u32)store * 2 | NDS::ConsoleType) + { + case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, 0>); break; + case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, 1>); break; + case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, 0>); break; + case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, 1>); break; + } + } + + if (!store) + { + if (usermode && !regs[15] && (regs & BitSet16(0x7f00))) + UBFX(W5, RCPSR, 0, 5); + + BitSet16::Iterator it = regs.begin(); + while (it != regs.end()) + { + BitSet16::Iterator nextReg = it; + nextReg++; + + int reg = *it; + + if (usermode && !regs[15] && reg >= 8 && reg < 15) + { + LDR(INDEX_UNSIGNED, W3, SP, i * 8); + MOVI2R(W1, reg - 8); + BL(WriteBanked); + FixupBranch alreadyWritten = CBNZ(W4); + if (RegCache.LoadedRegs & (1 << reg)) + MOV(MapReg(reg), W3); + else + SaveReg(reg, W3); + SetJumpTarget(alreadyWritten); + } + else if (!usermode && nextReg != regs.end()) + { + ARM64Reg first = W3, second = W4; + + if (RegCache.LoadedRegs & (1 << reg)) + first = MapReg(reg); + if (RegCache.LoadedRegs & (1 << *nextReg)) + second = MapReg(*nextReg); + + LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8); + + if (first == W3) + SaveReg(reg, W3); + if (second == W4) + SaveReg(*nextReg, W4); + + it++; + i++; + } + else if (RegCache.LoadedRegs & (1 << reg)) + { + ARM64Reg mapped = MapReg(reg); + LDR(INDEX_UNSIGNED, mapped, SP, i * 8); + } + else + { + LDR(INDEX_UNSIGNED, W3, SP, i * 8); + SaveReg(reg, W3); + } + + it++; + i++; + } + } + ADD(SP, SP, ((regsCount + 1) & ~1) * 8); + + if (compileFastPath) + { + ABI_PopRegisters({30}); + RET(); + + FlushIcacheSection(patchFunc, (u8*)GetRXPtr()); + SwapCodeRegion(); + } + + if (!store && regs[15]) + { + ARM64Reg mapped = MapReg(15); + Comp_JumpTo(mapped, Num == 0, usermode); + } + + return regsCount * 4 * (decrement ? -1 : 1); +} + +void Compiler::A_Comp_LDM_STM() +{ + BitSet16 regs(CurInstr.Instr & 0xFFFF); + + bool load = CurInstr.Instr & (1 << 20); + bool pre = CurInstr.Instr & (1 << 24); + bool add = CurInstr.Instr & (1 << 23); + bool writeback = CurInstr.Instr & (1 << 21); + bool usermode = CurInstr.Instr & (1 << 22); + + ARM64Reg rn = MapReg(CurInstr.A_Reg(16)); + + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); + + if (load && writeback && regs[CurInstr.A_Reg(16)]) + writeback = Num == 0 + ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) + : false; + if (writeback) + { + if (offset > 0) + ADD(rn, rn, offset); + else + SUB(rn, rn, -offset); + } +} + +void Compiler::T_Comp_PUSH_POP() +{ + bool load = CurInstr.Instr & (1 << 11); + BitSet16 regs(CurInstr.Instr & 0xFF); + if (CurInstr.Instr & (1 << 8)) + { + if (load) + regs[15] = true; + else + regs[14] = true; + } + + ARM64Reg sp = MapReg(13); + s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); + + if (offset > 0) + ADD(sp, sp, offset); + else + SUB(sp, sp, -offset); +} + +void Compiler::T_Comp_LDMIA_STMIA() +{ + BitSet16 regs(CurInstr.Instr & 0xFF); + ARM64Reg rb = MapReg(CurInstr.T_Reg(8)); + bool load = CurInstr.Instr & (1 << 11); + u32 regsCount = regs.Count(); + + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); + + if (!load || !regs[CurInstr.T_Reg(8)]) + { + if (offset > 0) + ADD(rb, rb, offset); + else + SUB(rb, rb, -offset); + } +} + +}
\ No newline at end of file |