diff options
author | Arisotura <thetotalworm@gmail.com> | 2020-07-01 00:01:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-01 00:01:11 +0200 |
commit | 62c6e2f703d88660e0ca9bda78032c5bd6b63a78 (patch) | |
tree | 1dbf9eb1bbe418d14f07dc3a0e30821fb5deb258 /src/ARMJIT_x64 | |
parent | d97ce22b010e868437c649911bce89d679a4deaa (diff) | |
parent | c5381d2911d47fb1fcbd6ec27a83f5da3606c4bd (diff) |
Merge pull request #667 from Arisotura/generic_jit
merge jit
Diffstat (limited to 'src/ARMJIT_x64')
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_ALU.cpp | 768 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Branch.cpp | 272 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.cpp | 899 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Compiler.h | 255 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_GenOffsets.cpp | 15 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Linkage.s | 78 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 773 | ||||
-rw-r--r-- | src/ARMJIT_x64/ARMJIT_Offsets.h | 3 |
8 files changed, 3063 insertions, 0 deletions
diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp new file mode 100644 index 0000000..43b94b6 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -0,0 +1,768 @@ +#include "ARMJIT_Compiler.h" + +using namespace Gen; + +namespace ARMJIT +{ + +// uses RSCRATCH3 +void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&), + OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags) +{ + if (opFlags & opSyncCarry) + { + BT(32, R(RCPSR), Imm8(29)); + if (opFlags & opInvertCarry) + CMC(); + } + + if (rd == rn && !(opFlags & opInvertOp2)) + (this->*op)(32, rd, op2); + else if (opFlags & opSymmetric && op2 == R(RSCRATCH)) + { + if (opFlags & opInvertOp2) + NOT(32, op2); + (this->*op)(32, op2, rn); + MOV(32, rd, op2); + } + else + { + if (opFlags & opInvertOp2) + { + if (op2 != R(RSCRATCH)) + { + MOV(32, R(RSCRATCH), op2); + op2 = R(RSCRATCH); + } + NOT(32, op2); + } + MOV(32, R(RSCRATCH3), rn); + (this->*op)(32, R(RSCRATCH3), op2); + MOV(32, rd, R(RSCRATCH3)); + } + + if (opFlags & opSetsFlags) + Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed); +} + +void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags) +{ + if (opFlags & opSyncCarry) + { + BT(32, R(RCPSR), Imm8(29)); + if (opFlags & opInvertCarry) + CMC(); + } + + if (op2 != R(RSCRATCH)) + { + MOV(32, R(RSCRATCH), op2); + op2 = R(RSCRATCH); + } + (this->*op)(32, op2, rn); + MOV(32, rd, op2); + + if (opFlags & opSetsFlags) + Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed); +} + +void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) +{ + switch (op) + { + case 0: // TST + if (rn.IsImm()) + { + MOV(32, R(RSCRATCH3), rn); + rn = R(RSCRATCH3); + } + TEST(32, rn, op2); + break; + case 1: // TEQ + MOV(32, R(RSCRATCH3), rn); + XOR(32, R(RSCRATCH3), op2); + break; + case 2: // CMP + if (rn.IsImm()) + { + MOV(32, R(RSCRATCH3), rn); + rn = R(RSCRATCH3); + } + CMP(32, rn, op2); + break; + case 3: // CMN + MOV(32, R(RSCRATCH3), rn); + ADD(32, R(RSCRATCH3), op2); + break; + } + + Comp_RetriveFlags(op == 2, op >= 2, carryUsed); +} + +// also calculates cycles +OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) +{ + if (CurInstr.Instr & (1 << 25)) + { + Comp_AddCycles_C(); + carryUsed = false; + return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); + } + else + { + S = S && (CurInstr.SetFlags & 0x2); + + int op = (CurInstr.Instr >> 5) & 0x3; + if (CurInstr.Instr & (1 << 4)) + { + Comp_AddCycles_CI(1); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + if (rm.IsImm() && CurInstr.A_Reg(0) == 15) + rm = Imm32(rm.Imm32() + 4); + return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed); + } + else + { + Comp_AddCycles_C(); + return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F, + MapReg(CurInstr.A_Reg(0)), S, carryUsed); + } + } +} + +void Compiler::A_Comp_CmpOp() +{ + u32 op = (CurInstr.Instr >> 21) & 0xF; + + bool carryUsed; + OpArg rn = MapReg(CurInstr.A_Reg(16)); + OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed); + + Comp_CmpOp(op - 0x8, rn, op2, carryUsed); +} + +void Compiler::A_Comp_Arith() +{ + bool S = CurInstr.Instr & (1 << 20); + u32 op = (CurInstr.Instr >> 21) & 0xF; + + bool carryUsed; + OpArg rn = MapReg(CurInstr.A_Reg(16)); + OpArg rd = MapReg(CurInstr.A_Reg(12)); + OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed); + + u32 sFlag = S ? opSetsFlags : 0; + switch (op) + { + case 0x0: // AND + Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, opSymmetric|sFlag); + break; + case 0x1: // EOR + Comp_ArithTriOp(&Compiler::XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag); + break; + case 0x2: // SUB + Comp_ArithTriOp(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); + break; + case 0x3: // RSB + if (op2.IsZero()) + { + if (rd != rn) + MOV(32, rd, rn); + NEG(32, rd); + if (S) + Comp_RetriveFlags(true, true, false); + } + else + Comp_ArithTriOpReverse(&Compiler::SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); + break; + case 0x4: // ADD + Comp_ArithTriOp(&Compiler::ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV); + break; + case 0x5: // ADC + Comp_ArithTriOp(&Compiler::ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry); + break; + case 0x6: // SBC + Comp_ArithTriOp(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opSyncCarry|opInvertCarry); + break; + case 0x7: // RSC + Comp_ArithTriOpReverse(&Compiler::SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry); + break; + case 0xC: // ORR + Comp_ArithTriOp(&Compiler::OR, rd, rn, op2, carryUsed, opSymmetric|sFlag); + break; + case 0xE: // BIC + Comp_ArithTriOp(&Compiler::AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2); + break; + default: + assert("unimplemented"); + } + + if (CurInstr.A_Reg(12) == 15) + Comp_JumpTo(rd.GetSimpleReg(), S); +} + +void Compiler::A_Comp_MovOp() +{ + bool carryUsed; + bool S = CurInstr.Instr & (1 << 20); + OpArg op2 = A_Comp_GetALUOp2(S, carryUsed); + OpArg rd = MapReg(CurInstr.A_Reg(12)); + + if (rd != op2) + MOV(32, rd, op2); + + if (((CurInstr.Instr >> 21) & 0xF) == 0xF) + { + NOT(32, rd); + if (op2.IsImm() && CurInstr.Cond() == 0xE) + RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32()); + } + else if (op2.IsImm() && CurInstr.Cond() == 0xE) + RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32()); + + if (S) + { + if (FlagsNZRequired()) + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, carryUsed); + } + + if (CurInstr.A_Reg(12) == 15) + Comp_JumpTo(rd.GetSimpleReg(), S); +} + +void Compiler::A_Comp_CLZ() +{ + OpArg rd = MapReg(CurInstr.A_Reg(12)); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + + MOV(32, R(RSCRATCH), Imm32(32)); + TEST(32, rm, rm); + FixupBranch skipZero = J_CC(CC_Z); + BSR(32, RSCRATCH, rm); + XOR(32, R(RSCRATCH), Imm8(0x1F)); // 31 - RSCRATCH + SetJumpTarget(skipZero); + MOV(32, rd, R(RSCRATCH)); +} + +void Compiler::Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn) +{ + if (Num == 0) + Comp_AddCycles_CI(S ? 3 : 1); + else + { + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(32, R(RSCRATCH3), rs); + TEST(32, R(RSCRATCH3), R(RSCRATCH3)); + FixupBranch zeroBSR = J_CC(CC_Z); + BSR(32, RSCRATCH2, R(RSCRATCH3)); + NOT(32, R(RSCRATCH3)); + BSR(32, RSCRATCH, R(RSCRATCH3)); + CMP(32, R(RSCRATCH2), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); + SHR(32, R(RSCRATCH), Imm8(3)); + SetJumpTarget(zeroBSR); // fortunately that's even right + Comp_AddCycles_CI(RSCRATCH, add ? 2 : 1); + } + + static_assert(EAX == RSCRATCH, "Someone changed RSCRATCH!"); + MOV(32, R(RSCRATCH), rm); + if (add) + { + IMUL(32, RSCRATCH, rs); + LEA(32, rd.GetSimpleReg(), MRegSum(RSCRATCH, rn.GetSimpleReg())); + if (S && FlagsNZRequired()) + TEST(32, rd, rd); + } + else + { + IMUL(32, RSCRATCH, rs); + MOV(32, rd, R(RSCRATCH)); + if (S && FlagsNZRequired()) + TEST(32, R(RSCRATCH), R(RSCRATCH)); + } + + if (S) + Comp_RetriveFlags(false, false, false); +} + +void Compiler::A_Comp_MUL_MLA() +{ + bool S = CurInstr.Instr & (1 << 20); + bool add = CurInstr.Instr & (1 << 21); + OpArg rd = MapReg(CurInstr.A_Reg(16)); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + OpArg rs = MapReg(CurInstr.A_Reg(8)); + OpArg rn; + if (add) + rn = MapReg(CurInstr.A_Reg(12)); + + Comp_MulOp(S, add, rd, rm, rs, rn); +} + +void Compiler::A_Comp_Mul_Long() +{ + bool S = CurInstr.Instr & (1 << 20); + bool add = CurInstr.Instr & (1 << 21); + bool sign = CurInstr.Instr & (1 << 22); + OpArg rd = MapReg(CurInstr.A_Reg(16)); + OpArg rm = MapReg(CurInstr.A_Reg(0)); + OpArg rs = MapReg(CurInstr.A_Reg(8)); + OpArg rn = MapReg(CurInstr.A_Reg(12)); + + if (Num == 0) + Comp_AddCycles_CI(S ? 3 : 1); + else + { + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(32, R(RSCRATCH3), rs); + TEST(32, R(RSCRATCH3), R(RSCRATCH3)); + FixupBranch zeroBSR = J_CC(CC_Z); + if (sign) + { + BSR(32, RSCRATCH2, R(RSCRATCH3)); + NOT(32, R(RSCRATCH3)); + BSR(32, RSCRATCH, R(RSCRATCH3)); + CMP(32, R(RSCRATCH2), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(RSCRATCH2), CC_L); + } + else + { + BSR(32, RSCRATCH, R(RSCRATCH3)); + } + + SHR(32, R(RSCRATCH), Imm8(3)); + SetJumpTarget(zeroBSR); // fortunately that's even right + Comp_AddCycles_CI(RSCRATCH, 2); + } + + if (sign) + { + MOVSX(64, 32, RSCRATCH2, rm); + MOVSX(64, 32, RSCRATCH3, rs); + } + else + { + MOV(32, R(RSCRATCH2), rm); + MOV(32, R(RSCRATCH3), rs); + } + if (add) + { + MOV(32, R(RSCRATCH), rd); + SHL(64, R(RSCRATCH), Imm8(32)); + OR(64, R(RSCRATCH), rn); + + IMUL(64, RSCRATCH2, R(RSCRATCH3)); + ADD(64, R(RSCRATCH2), R(RSCRATCH)); + } + else + { + IMUL(64, RSCRATCH2, R(RSCRATCH3)); + if (S && FlagsNZRequired()) + TEST(64, R(RSCRATCH2), R(RSCRATCH2)); + } + + if (S) + Comp_RetriveFlags(false, false, false); + + MOV(32, rn, R(RSCRATCH2)); + SHR(64, R(RSCRATCH2), Imm8(32)); + MOV(32, rd, R(RSCRATCH2)); +} + +void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) +{ + if (CurInstr.SetFlags == 0) + return; + if (retriveCV && !(CurInstr.SetFlags & 0x3)) + retriveCV = false; + + bool carryOnly = !retriveCV && carryUsed; + if (carryOnly && !(CurInstr.SetFlags & 0x2)) + { + carryUsed = false; + carryOnly = false; + } + + CPSRDirty = true; + + if (retriveCV) + { + SETcc(CC_O, R(RSCRATCH)); + SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3)); + LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0)); + } + + if (FlagsNZRequired()) + { + SETcc(CC_S, R(RSCRATCH)); + SETcc(CC_Z, R(RSCRATCH3)); + LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0)); + int shiftAmount = 30; + if (retriveCV || carryUsed) + { + LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0)); + shiftAmount = carryOnly ? 29 : 28; + } + SHL(32, R(RSCRATCH), Imm8(shiftAmount)); + + AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); + OR(32, R(RCPSR), R(RSCRATCH)); + } + else if (carryUsed || retriveCV) + { + SHL(32, R(RSCRATCH2), Imm8(carryOnly ? 29 : 28)); + AND(32, R(RCPSR), Imm32(0xFFFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); + OR(32, R(RCPSR), R(RSCRATCH2)); + } +} + +// always uses RSCRATCH, RSCRATCH2 only if S == true +OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed) +{ + carryUsed = S; + + if (S) + { + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + TEST(32, R(RCPSR), Imm32(1 << 29)); + SETcc(CC_NZ, R(RSCRATCH2)); + } + + MOV(32, R(RSCRATCH), rm); + static_assert(RSCRATCH3 == ECX, "Someone changed RSCRATCH3"); + MOV(32, R(ECX), rs); + AND(32, R(ECX), Imm32(0xFF)); + + FixupBranch zero = J_CC(CC_Z); + if (op < 3) + { + void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL; + if (op == 0) + shiftOp = &Compiler::SHL; + else if (op == 1) + shiftOp = &Compiler::SHR; + else if (op == 2) + shiftOp = &Compiler::SAR; + + CMP(32, R(ECX), Imm8(32)); + FixupBranch lt32 = J_CC(CC_L); + FixupBranch done1; + if (op < 2) + { + FixupBranch eq32 = J_CC(CC_E); + XOR(32, R(RSCRATCH), R(RSCRATCH)); + if (S) + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + done1 = J(); + SetJumpTarget(eq32); + } + (this->*shiftOp)(32, R(RSCRATCH), Imm8(31)); + (this->*shiftOp)(32, R(RSCRATCH), Imm8(1)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + FixupBranch done2 = J(); + + SetJumpTarget(lt32); + (this->*shiftOp)(32, R(RSCRATCH), R(ECX)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + if (op < 2) + SetJumpTarget(done1); + SetJumpTarget(done2); + + } + else if (op == 3) + { + if (S) + BT(32, R(RSCRATCH), Imm8(31)); + ROR_(32, R(RSCRATCH), R(ECX)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + } + SetJumpTarget(zero); + + return R(RSCRATCH); +} + +// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue +OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed) +{ + carryUsed = true; + + switch (op) + { + case 0: // LSL + if (amount > 0) + { + MOV(32, R(RSCRATCH), rm); + SHL(32, R(RSCRATCH), Imm8(amount)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + return R(RSCRATCH); + } + else + { + carryUsed = false; + return rm; + } + case 1: // LSR + if (amount > 0) + { + MOV(32, R(RSCRATCH), rm); + SHR(32, R(RSCRATCH), Imm8(amount)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + return R(RSCRATCH); + } + else + { + if (S) + { + MOV(32, R(RSCRATCH2), rm); + SHR(32, R(RSCRATCH2), Imm8(31)); + } + return Imm32(0); + } + case 2: // ASR + MOV(32, R(RSCRATCH), rm); + SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31)); + if (S) + { + if (amount == 0) + BT(32, rm, Imm8(31)); + SETcc(CC_C, R(RSCRATCH2)); + } + return R(RSCRATCH); + case 3: // ROR + MOV(32, R(RSCRATCH), rm); + if (amount > 0) + ROR_(32, R(RSCRATCH), Imm8(amount)); + else + { + BT(32, R(RCPSR), Imm8(29)); + RCR(32, R(RSCRATCH), Imm8(1)); + } + if (S) + SETcc(CC_C, R(RSCRATCH2)); + return R(RSCRATCH); + } + + assert(false); +} + +void Compiler::T_Comp_ShiftImm() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); + + int op = (CurInstr.Instr >> 11) & 0x3; + int amount = (CurInstr.Instr >> 6) & 0x1F; + + Comp_AddCycles_C(); + + bool carryUsed; + OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed); + + if (shifted != rd) + MOV(32, rd, shifted); + + if (FlagsNZRequired()) + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, carryUsed); +} + +void Compiler::T_Comp_AddSub_() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); + + int op = (CurInstr.Instr >> 9) & 0x3; + + OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6)); + + Comp_AddCycles_C(); + + // special case for thumb mov being alias to add rd, rn, #0 + if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0) + { + if (rd != rs) + MOV(32, rd, rs); + } + else if (op & 1) + Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV); + else + Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV); +} + +void Compiler::T_Comp_ALU_Imm8() +{ + OpArg rd = MapReg(CurInstr.T_Reg(8)); + + u32 op = (CurInstr.Instr >> 11) & 0x3; + OpArg imm = Imm32(CurInstr.Instr & 0xFF); + + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: + MOV(32, rd, imm); + if (FlagsNZRequired()) + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, false); + return; + case 0x1: + Comp_CmpOp(2, rd, imm, false); + return; + case 0x2: + Comp_ArithTriOp(&Compiler::ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV); + return; + case 0x3: + Comp_ArithTriOp(&Compiler::SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV); + return; + } +} + +void Compiler::T_Comp_MUL() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); + Comp_MulOp(true, false, rd, rd, rs, Imm8(-1)); +} + +void Compiler::T_Comp_ALU() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); + + u32 op = (CurInstr.Instr >> 6) & 0xF; + + if ((op >= 0x2 && op < 0x4) || op == 0x7) + Comp_AddCycles_CI(1); // shift by reg + else + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: // AND + Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0x1: // EOR + Comp_ArithTriOp(&Compiler::XOR, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0x2: + case 0x3: + case 0x4: + case 0x7: + { + int shiftOp = op == 0x7 ? 3 : op - 0x2; + bool carryUsed; + OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed); + if (FlagsNZRequired()) + TEST(32, shifted, shifted); + MOV(32, rd, shifted); + Comp_RetriveFlags(false, false, true); + } + return; + case 0x5: // ADC + Comp_ArithTriOp(&Compiler::ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV); + return; + case 0x6: // SBC + Comp_ArithTriOp(&Compiler::SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV); + return; + case 0x8: // TST + Comp_CmpOp(0, rd, rs, false); + return; + case 0x9: // NEG + if (rd != rs) + MOV(32, rd, rs); + NEG(32, rd); + Comp_RetriveFlags(true, true, false); + return; + case 0xA: // CMP + Comp_CmpOp(2, rd, rs, false); + return; + case 0xB: // CMN + Comp_CmpOp(3, rd, rs, false); + return; + case 0xC: // ORR + Comp_ArithTriOp(&Compiler::OR, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0xE: // BIC + Comp_ArithTriOp(&Compiler::AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2); + return; + case 0xF: // MVN + if (rd != rs) + MOV(32, rd, rs); + NOT(32, rd); + Comp_RetriveFlags(false, false, false); + return; + default: + break; + } +} + +void Compiler::T_Comp_ALU_HiReg() +{ + u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8)); + OpArg rdMapped = MapReg(rd); + OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF); + + u32 op = (CurInstr.Instr >> 8) & 0x3; + + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: // ADD + Comp_ArithTriOp(&Compiler::ADD, rdMapped, rdMapped, rs, false, opSymmetric); + break; + case 0x1: // CMP + Comp_CmpOp(2, rdMapped, rs, false); + return; // this is on purpose + case 0x2: // MOV + if (rdMapped != rs) + MOV(32, rdMapped, rs); + break; + } + + if (rd == 15) + { + OR(32, rdMapped, Imm8(1)); + Comp_JumpTo(rdMapped.GetSimpleReg()); + } +} + +void Compiler::T_Comp_AddSP() +{ + Comp_AddCycles_C(); + + OpArg sp = MapReg(13); + OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2); + if (CurInstr.Instr & (1 << 7)) + SUB(32, sp, offset); + else + ADD(32, sp, offset); +} + +void Compiler::T_Comp_RelAddr() +{ + Comp_AddCycles_C(); + + OpArg rd = MapReg(CurInstr.T_Reg(8)); + u32 offset = (CurInstr.Instr & 0xFF) << 2; + if (CurInstr.Instr & (1 << 11)) + { + OpArg sp = MapReg(13); + LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset)); + } + else + MOV(32, rd, Imm32((R15 & ~2) + offset)); +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp new file mode 100644 index 0000000..bda9e52 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -0,0 +1,272 @@ +#include "ARMJIT_Compiler.h" + +using namespace Gen; + +namespace ARMJIT +{ + +template <typename T> +int squeezePointer(T* ptr) +{ + int truncated = (int)((u64)ptr); + assert((T*)((u64)truncated) == ptr); + return truncated; +} + +void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) +{ + // we can simplify constant branches by a lot + IrregularCycles = true; + + u32 newPC; + u32 cycles = 0; + + if (addr & 0x1 && !Thumb) + { + CPSRDirty = true; + OR(32, R(RCPSR), Imm8(0x20)); + } + else if (!(addr & 0x1) && Thumb) + { + CPSRDirty = true; + AND(32, R(RCPSR), Imm32(~0x20)); + } + + if (Num == 0) + { + ARMv5* cpu9 = (ARMv5*)CurCPU; + + u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0]; + u32 compileTimeCodeCycles = cpu9->RegionCodeCycles; + cpu9->RegionCodeCycles = regionCodeCycles; + + if (Exit) + MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles)); + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + // two-opcodes-at-once fetch + // doesn't matter if we put garbage in the MSbs there + if (addr & 0x2) + { + cpu9->CodeRead32(addr-2, true); + cycles += cpu9->CodeCycles; + cpu9->CodeRead32(addr+2, false); + cycles += CurCPU->CodeCycles; + } + else + { + cpu9->CodeRead32(addr, true); + cycles += cpu9->CodeCycles; + } + } + else + { + addr &= ~0x3; + newPC = addr+4; + + cpu9->CodeRead32(addr, true); + cycles += cpu9->CodeCycles; + cpu9->CodeRead32(addr+4, false); + cycles += cpu9->CodeCycles; + } + + cpu9->RegionCodeCycles = compileTimeCodeCycles; + } + else + { + ARMv4* cpu7 = (ARMv4*)CurCPU; + + u32 codeRegion = addr >> 24; + u32 codeCycles = addr >> 15; // cheato + + cpu7->CodeRegion = codeRegion; + cpu7->CodeCycles = codeCycles; + + if (Exit) + { + MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion)); + MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles)); + } + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + // this is necessary because ARM7 bios protection + u32 compileTimePC = CurCPU->R[15]; + CurCPU->R[15] = newPC; + + cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1]; + + CurCPU->R[15] = compileTimePC; + } + else + { + addr &= ~0x3; + newPC = addr+4; + + u32 compileTimePC = CurCPU->R[15]; + CurCPU->R[15] = newPC; + + cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3]; + + CurCPU->R[15] = compileTimePC; + } + + cpu7->CodeRegion = R15 >> 24; + cpu7->CodeCycles = addr >> 15; + } + + if (Exit) + MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC)); + if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) + ConstantCycles += cycles; + else + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); +} + +void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) +{ + IrregularCycles = true; + + bool cpsrDirty = CPSRDirty; + SaveCPSR(); + + PushRegs(restoreCPSR); + + MOV(64, R(ABI_PARAM1), R(RCPU)); + MOV(32, R(ABI_PARAM2), R(addr)); + if (!restoreCPSR) + XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); + else + MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste + if (Num == 0) + CALL((void*)&ARMv5::JumpTo); + else + CALL((void*)&ARMv4::JumpTo); + + PopRegs(restoreCPSR); + + LoadCPSR(); + // in case this instruction is skipped + if (CurInstr.Cond() < 0xE) + CPSRDirty = cpsrDirty; +} + +void Compiler::A_Comp_BranchImm() +{ + int op = (CurInstr.Instr >> 24) & 1; + s32 offset = (s32)(CurInstr.Instr << 8) >> 6; + u32 target = R15 + offset; + bool link = op; + + if (CurInstr.Cond() == 0xF) // BLX_imm + { + target += (op << 1) + 1; + link = true; + } + + if (link) + MOV(32, MapReg(14), Imm32(R15 - 4)); + + Comp_JumpTo(target); +} + +void Compiler::A_Comp_BranchXchangeReg() +{ + OpArg rn = MapReg(CurInstr.A_Reg(0)); + MOV(32, R(RSCRATCH), rn); + if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg + MOV(32, MapReg(14), Imm32(R15 - 4)); + Comp_JumpTo(RSCRATCH); +} + +void Compiler::T_Comp_BCOND() +{ + u32 cond = (CurInstr.Instr >> 8) & 0xF; + FixupBranch skipExecute = CheckCondition(cond); + + s32 offset = (s32)(CurInstr.Instr << 24) >> 23; + Comp_JumpTo(R15 + offset + 1, true); + + Comp_SpecialBranchBehaviour(true); + + FixupBranch skipFailed = J(); + SetJumpTarget(skipExecute); + + Comp_SpecialBranchBehaviour(false); + + Comp_AddCycles_C(true); + SetJumpTarget(skipFailed); +} + +void Compiler::T_Comp_B() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20; + Comp_JumpTo(R15 + offset + 1); +} + +void Compiler::T_Comp_BranchXchangeReg() +{ + bool link = CurInstr.Instr & (1 << 7); + + if (link) + { + if (Num == 1) + { + printf("BLX unsupported on ARM7!!!\n"); + return; + } + MOV(32, R(RSCRATCH), MapReg(CurInstr.A_Reg(3))); + MOV(32, MapReg(14), Imm32(R15 - 1)); + Comp_JumpTo(RSCRATCH); + } + else + { + OpArg rn = MapReg(CurInstr.A_Reg(3)); + Comp_JumpTo(rn.GetSimpleReg()); + } +} + +void Compiler::T_Comp_BL_LONG_1() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9; + MOV(32, MapReg(14), Imm32(R15 + offset)); + Comp_AddCycles_C(); +} + +void Compiler::T_Comp_BL_LONG_2() +{ + OpArg lr = MapReg(14); + s32 offset = (CurInstr.Instr & 0x7FF) << 1; + LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset)); + MOV(32, lr, Imm32((R15 - 2) | 1)); + if (Num == 1 || CurInstr.Instr & (1 << 12)) + OR(32, R(RSCRATCH), Imm8(1)); + Comp_JumpTo(RSCRATCH); +} + +void Compiler::T_Comp_BL_Merged() +{ + Comp_AddCycles_C(); + + R15 += 2; + + u32 upperPart = CurInstr.Instr >> 16; + u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9); + target += (upperPart & 0x7FF) << 1; + + if (Num == 1 || upperPart & (1 << 12)) + target |= 1; + + MOV(32, MapReg(14), Imm32((R15 - 2) | 1)); + + Comp_JumpTo(target); +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp new file mode 100644 index 0000000..d8bdd56 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -0,0 +1,899 @@ +#include "ARMJIT_Compiler.h" + +#include "../ARMInterpreter.h" +#include "../Config.h" + +#include <assert.h> + +#include "../dolphin/CommonFuncs.h" + +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/mman.h> +#include <unistd.h> +#endif + +using namespace Gen; + +extern "C" void ARM_Ret(); + +namespace ARMJIT +{ +template <> +const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] = +{ +#ifdef _WIN32 + RBX, RSI, RDI, R12, R13, R14, // callee saved + R10, R11, // caller saved +#else + RBX, R12, R13, R14, // callee saved, this is sad + R9, R10, R11, // caller saved +#endif +}; +template <> +const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable = +#ifdef _WIN32 + 8 +#else + 7 +#endif +; + +#ifdef _WIN32 +const BitSet32 CallerSavedPushRegs({R10, R11}); +#else +const BitSet32 CallerSavedPushRegs({R9, R10, R11}); +#endif + +void Compiler::PushRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + + if (saveHiRegs) + { + BitSet32 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00); + for (int reg : hiRegsLoaded) + { + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.UnloadRegister(reg); + else + SaveReg(reg, RegCache.Mapping[reg]); + // prevent saving the register twice + loadedRegs[reg] = false; + } + } + + for (int reg : loadedRegs) + if (BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + SaveReg(reg, RegCache.Mapping[reg]); +} + +void Compiler::PopRegs(bool saveHiRegs) +{ + BitSet32 loadedRegs(RegCache.LoadedRegs); + for (int reg : loadedRegs) + { + if ((saveHiRegs && reg >= 8 && reg < 15) + || BitSet32(1 << RegCache.Mapping[reg]) & ABI_ALL_CALLER_SAVED) + { + LoadReg(reg, RegCache.Mapping[reg]); + } + } +} + +void Compiler::A_Comp_MRS() +{ + Comp_AddCycles_C(); + + OpArg rd = MapReg(CurInstr.A_Reg(12)); + + if (CurInstr.Instr & (1 << 22)) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + XOR(32, R(RSCRATCH3), R(RSCRATCH3)); + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(ReadBanked); + MOV(32, rd, R(RSCRATCH3)); + } + else + MOV(32, rd, R(RCPSR)); +} + +void Compiler::A_Comp_MSR() +{ + Comp_AddCycles_C(); + + OpArg val = CurInstr.Instr & (1 << 25) + ? Imm32(ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E))) + : MapReg(CurInstr.A_Reg(0)); + + u32 mask = 0; + if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF; + if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00; + if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000; + if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000; + + if (CurInstr.Instr & (1 << 22)) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + XOR(32, R(RSCRATCH3), R(RSCRATCH3)); + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(ReadBanked); + + MOV(32, R(RSCRATCH2), Imm32(mask)); + MOV(32, R(RSCRATCH4), R(RSCRATCH2)); + AND(32, R(RSCRATCH4), Imm32(0xFFFFFF00)); + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + CMP(32, R(RSCRATCH), Imm8(0x10)); + CMOVcc(32, RSCRATCH2, R(RSCRATCH4), CC_E); + + MOV(32, R(RSCRATCH4), R(RSCRATCH2)); + NOT(32, R(RSCRATCH4)); + AND(32, R(RSCRATCH3), R(RSCRATCH4)); + + AND(32, R(RSCRATCH2), val); + OR(32, R(RSCRATCH3), R(RSCRATCH2)); + + MOV(32, R(RSCRATCH2), Imm32(15 - 8)); + CALL(WriteBanked); + } + else + { + mask &= 0xFFFFFFDF; + CPSRDirty = true; + + if ((mask & 0xFF) == 0) + { + AND(32, R(RCPSR), Imm32(~mask)); + if (!val.IsImm()) + { + MOV(32, R(RSCRATCH), val); + AND(32, R(RSCRATCH), Imm32(mask)); + OR(32, R(RCPSR), R(RSCRATCH)); + } + else + { + OR(32, R(RCPSR), Imm32(val.Imm32() & mask)); + } + } + else + { + MOV(32, R(RSCRATCH2), Imm32(mask)); + MOV(32, R(RSCRATCH3), R(RSCRATCH2)); + AND(32, R(RSCRATCH3), Imm32(0xFFFFFF00)); + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + CMP(32, R(RSCRATCH), Imm8(0x10)); + CMOVcc(32, RSCRATCH2, R(RSCRATCH3), CC_E); + + MOV(32, R(RSCRATCH3), R(RCPSR)); + + // I need you ANDN + MOV(32, R(RSCRATCH), R(RSCRATCH2)); + NOT(32, R(RSCRATCH)); + AND(32, R(RCPSR), R(RSCRATCH)); + + AND(32, R(RSCRATCH2), val); + OR(32, R(RCPSR), R(RSCRATCH2)); + + PushRegs(true); + + MOV(32, R(ABI_PARAM3), R(RCPSR)); + MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); + MOV(64, R(ABI_PARAM1), R(RCPU)); + CALL((void*)&ARM::UpdateMode); + + PopRegs(true); + } + } +} + +/* + We'll repurpose this .bss memory + + */ +u8 CodeMemory[1024 * 1024 * 32]; + +Compiler::Compiler() +{ + { + #ifdef _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + + u64 pageSize = (u64)sysInfo.dwPageSize; + #else + u64 pageSize = sysconf(_SC_PAGE_SIZE); + #endif + + u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize); + u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned; + + #ifdef _WIN32 + DWORD dummy; + VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy); + #else + mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE); + #endif + + ResetStart = pageAligned; + CodeMemSize = alignedSize; + } + + Reset(); + + { + // RSCRATCH mode + // RSCRATCH2 reg number + // RSCRATCH3 value in current mode + // ret - RSCRATCH3 + ReadBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(RSCRATCH2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + RET(); + + SetJumpTarget(fiq); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ))); + RET(); + SetJumpTarget(irq); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ))); + RET(); + SetJumpTarget(svc); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC))); + RET(); + SetJumpTarget(abt); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT))); + RET(); + SetJumpTarget(und); + MOV(32, R(RSCRATCH3), MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND))); + RET(); + } + { + // RSCRATCH mode + // RSCRATCH2 reg n + // RSCRATCH3 value + // carry flag set if the register isn't banked + WriteBanked = (void*)GetWritableCodePtr(); + CMP(32, R(RSCRATCH), Imm8(0x11)); + FixupBranch fiq = J_CC(CC_E); + SUB(32, R(RSCRATCH2), Imm8(13 - 8)); + FixupBranch notEverything = J_CC(CC_L); + CMP(32, R(RSCRATCH), Imm8(0x12)); + FixupBranch irq = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x13)); + FixupBranch svc = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x17)); + FixupBranch abt = J_CC(CC_E); + CMP(32, R(RSCRATCH), Imm8(0x1B)); + FixupBranch und = J_CC(CC_E); + SetJumpTarget(notEverything); + STC(); + RET(); + + SetJumpTarget(fiq); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_FIQ)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(irq); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_IRQ)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(svc); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_SVC)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(abt); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_ABT)), R(RSCRATCH3)); + CLC(); + RET(); + SetJumpTarget(und); + MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_4, offsetof(ARM, R_UND)), R(RSCRATCH3)); + CLC(); + RET(); + } + + for (int consoleType = 0; consoleType < 2; consoleType++) + { + for (int num = 0; num < 2; num++) + { + for (int size = 0; size < 3; size++) + { + for (int reg = 0; reg < 16; reg++) + { + if (reg == RSCRATCH || reg == ABI_PARAM1 || reg == ABI_PARAM2 || reg == ABI_PARAM3) + { + PatchedStoreFuncs[consoleType][num][size][reg] = NULL; + PatchedLoadFuncs[consoleType][num][size][0][reg] = NULL; + PatchedLoadFuncs[consoleType][num][size][1][reg] = NULL; + continue; + } + + X64Reg rdMapped = (X64Reg)reg; + PatchedStoreFuncs[consoleType][num][size][reg] = GetWritableCodePtr(); + if (RSCRATCH3 != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (num == 0) + { + MOV(64, R(ABI_PARAM2), R(RCPU)); + MOV(32, R(ABI_PARAM3), R(rdMapped)); + } + else + { + MOV(32, R(ABI_PARAM2), R(rdMapped)); + } + ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break; + case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break; + case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break; + case 17: ABI_CallFunction(SlowWrite7<u16, 0>); break; + case 8: ABI_CallFunction(SlowWrite9<u8, 0>); break; + case 9: ABI_CallFunction(SlowWrite7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break; + case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break; + case 16: ABI_CallFunction(SlowWrite9<u16, 1>); break; + case 17: ABI_CallFunction(SlowWrite7<u16, 1>); break; + case 8: ABI_CallFunction(SlowWrite9<u8, 1>); break; + case 9: ABI_CallFunction(SlowWrite7<u8, 1>); break; + } + } + ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8); + RET(); + + for (int signextend = 0; signextend < 2; signextend++) + { + PatchedLoadFuncs[consoleType][num][size][signextend][reg] = GetWritableCodePtr(); + if (RSCRATCH3 != ABI_PARAM1) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (num == 0) + MOV(64, R(ABI_PARAM2), R(RCPU)); + ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (consoleType == 0) + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowRead9<u32, 0>); break; + case 33: ABI_CallFunction(SlowRead7<u32, 0>); break; + case 16: ABI_CallFunction(SlowRead9<u16, 0>); break; + case 17: ABI_CallFunction(SlowRead7<u16, 0>); break; + case 8: ABI_CallFunction(SlowRead9<u8, 0>); break; + case 9: ABI_CallFunction(SlowRead7<u8, 0>); break; + } + } + else + { + switch ((8 << size) | num) + { + case 32: ABI_CallFunction(SlowRead9<u32, 1>); break; + case 33: ABI_CallFunction(SlowRead7<u32, 1>); break; + case 16: ABI_CallFunction(SlowRead9<u16, 1>); break; + case 17: ABI_CallFunction(SlowRead7<u16, 1>); break; + case 8: ABI_CallFunction(SlowRead9<u8, 1>); break; + case 9: ABI_CallFunction(SlowRead7<u8, 1>); break; + } + } + ABI_PopRegistersAndAdjustStack(CallerSavedPushRegs, 8); + if (signextend) + MOVSX(32, 8 << size, rdMapped, R(RSCRATCH)); + else + MOVZX(32, 8 << size, rdMapped, R(RSCRATCH)); + RET(); + } + } + } + } + } + + // move the region forward to prevent overwriting the generated functions + CodeMemSize -= GetWritableCodePtr() - ResetStart; + ResetStart = GetWritableCodePtr(); + + NearStart = ResetStart; + FarStart = ResetStart + 1024*1024*24; + + NearSize = FarStart - ResetStart; + FarSize = (ResetStart + CodeMemSize) - FarStart; +} + +void Compiler::LoadCPSR() +{ + assert(!CPSRDirty); + + MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR))); +} + +void Compiler::SaveCPSR(bool flagClean) +{ + if (CPSRDirty) + { + MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR)); + if (flagClean) + CPSRDirty = false; + } +} + +void Compiler::LoadReg(int reg, X64Reg nativeReg) +{ + if (reg != 15) + MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg]))); + else + MOV(32, R(nativeReg), Imm32(R15)); +} + +void Compiler::SaveReg(int reg, X64Reg nativeReg) +{ + MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg)); +} + +// invalidates RSCRATCH and RSCRATCH3 +Gen::FixupBranch Compiler::CheckCondition(u32 cond) +{ + // hack, ldm/stm can get really big TODO: make this better + bool ldmStm = !Thumb && + (CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM); + if (cond >= 0x8) + { + static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!"); + MOV(32, R(RSCRATCH3), R(RCPSR)); + SHR(32, R(RSCRATCH3), Imm8(28)); + MOV(32, R(RSCRATCH), Imm32(1)); + SHL(32, R(RSCRATCH), R(RSCRATCH3)); + TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond])); + + return J_CC(CC_Z, ldmStm); + } + else + { + // could have used a LUT, but then where would be the fun? + TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))))); + + return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm); + } +} + +#define F(x) &Compiler::x +const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] = +{ + // AND + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // EOR + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // SUB + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // RSB + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ADD + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ADC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // SBC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // RSC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // ORR + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // MOV + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + // BIC + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), F(A_Comp_Arith), + // MVN + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), F(A_Comp_MovOp), + // TST + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // TEQ + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // CMP + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // CMN + F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), F(A_Comp_CmpOp), + // Mul + F(A_Comp_MUL_MLA), F(A_Comp_MUL_MLA), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), F(A_Comp_Mul_Long), NULL, NULL, NULL, NULL, NULL, + // ARMv5 stuff + F(A_Comp_CLZ), NULL, NULL, NULL, NULL, + // STR + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // STRB + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // LDR + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // LDRB + F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), F(A_Comp_MemWB), + // STRH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked) + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // LDRH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRSB + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // LDRSH + F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), F(A_Comp_MemHalf), + // swap + NULL, NULL, + // LDM/STM + F(A_Comp_LDM_STM), F(A_Comp_LDM_STM), + // Branch + F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchImm), F(A_Comp_BranchXchangeReg), F(A_Comp_BranchXchangeReg), + // system stuff + NULL, F(A_Comp_MSR), F(A_Comp_MSR), F(A_Comp_MRS), NULL, NULL, NULL, + F(Nop) +}; + +const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { + // Shift imm + F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), F(T_Comp_ShiftImm), + // Three operand ADD/SUB + F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), F(T_Comp_AddSub_), + // 8 bit imm + F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), F(T_Comp_ALU_Imm8), + // general ALU + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), F(T_Comp_ALU), + F(T_Comp_ALU), F(T_Comp_MUL), F(T_Comp_ALU), F(T_Comp_ALU), + // hi reg + F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), F(T_Comp_ALU_HiReg), + // pc/sp relative + F(T_Comp_RelAddr), F(T_Comp_RelAddr), F(T_Comp_AddSP), + // LDR pcrel + F(T_Comp_LoadPCRel), + // LDR/STR reg offset + F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), F(T_Comp_MemReg), + // LDR/STR sign extended, half + F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), F(T_Comp_MemRegHalf), + // LDR/STR imm offset + F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), F(T_Comp_MemImm), + // LDR/STR half imm offset + F(T_Comp_MemImmHalf), F(T_Comp_MemImmHalf), + // LDR/STR sp rel + F(T_Comp_MemSPRel), F(T_Comp_MemSPRel), + // PUSH/POP + F(T_Comp_PUSH_POP), F(T_Comp_PUSH_POP), + // LDMIA, STMIA + F(T_Comp_LDMIA_STMIA), F(T_Comp_LDMIA_STMIA), + // Branch + F(T_Comp_BCOND), F(T_Comp_BranchXchangeReg), F(T_Comp_BranchXchangeReg), F(T_Comp_B), F(T_Comp_BL_LONG_1), F(T_Comp_BL_LONG_2), + // Unk, SVC + NULL, NULL, + F(T_Comp_BL_Merged) +}; +#undef F + +bool Compiler::CanCompile(bool thumb, u16 kind) +{ + return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL; +} + +void Compiler::Reset() +{ + memset(ResetStart, 0xcc, CodeMemSize); + SetCodePtr(ResetStart); + + NearCode = NearStart; + FarCode = FarStart; + + LoadStorePatches.clear(); +} + +bool Compiler::IsJITFault(u64 addr) +{ + return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); +} + +void Compiler::Comp_SpecialBranchBehaviour(bool taken) +{ + if (taken && CurInstr.BranchFlags & branch_IdleBranch) + OR(8, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1)); + + if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken) + || (CurInstr.BranchFlags & branch_FollowCondTaken && !taken)) + { + RegCache.PrepareExit(); + + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + JMP((u8*)&ARM_Ret, true); + } +} + +JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount) +{ + if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess... + { + printf("near reset\n"); + ResetBlockCache(); + } + if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess... + { + printf("far reset\n"); + ResetBlockCache(); + } + + ConstantCycles = 0; + Thumb = thumb; + Num = cpu->Num; + CodeRegion = instrs[0].Addr >> 24; + CurCPU = cpu; + // CPSR might have been modified in a previous block + CPSRDirty = false; + + JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr(); + + RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount); + + for (int i = 0; i < instrsCount; i++) + { + CurInstr = instrs[i]; + R15 = CurInstr.Addr + (Thumb ? 4 : 8); + CodeRegion = R15 >> 24; + + Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken); + + CompileFunc comp = Thumb + ? T_Comp[CurInstr.Info.Kind] + : A_Comp[CurInstr.Info.Kind]; + + bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE; + if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional))) + { + MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15)); + if (comp == NULL) + { + MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles)); + MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr)); + + SaveCPSR(); + } + } + + if (comp != NULL) + RegCache.Prepare(Thumb, i); + else + RegCache.Flush(); + + if (Thumb) + { + if (comp == NULL) + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + + ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]); + } + else + (this->*comp)(); + } + else + { + u32 cond = CurInstr.Cond(); + if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) + { + if (comp) + (this->*comp)(); + else + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + ABI_CallFunction(ARMInterpreter::A_BLX_IMM); + } + } + else if (cond == 0xF) + { + Comp_AddCycles_C(); + } + else + { + IrregularCycles = false; + + FixupBranch skipExecute; + if (cond < 0xE) + skipExecute = CheckCondition(cond); + + if (comp == NULL) + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + + ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]); + } + else + (this->*comp)(); + + Comp_SpecialBranchBehaviour(true); + + if (CurInstr.Cond() < 0xE) + { + if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken)) + { + FixupBranch skipFailed = J(); + SetJumpTarget(skipExecute); + + Comp_AddCycles_C(true); + + Comp_SpecialBranchBehaviour(false); + + SetJumpTarget(skipFailed); + } + else + SetJumpTarget(skipExecute); + } + + } + } + + if (comp == NULL) + LoadCPSR(); + } + + RegCache.Flush(); + + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); + JMP((u8*)ARM_Ret, true); + + /*FILE* codeout = fopen("codeout", "a"); + fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr); + fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout); + + fclose(codeout);*/ + + return res; +} + +void Compiler::Comp_AddCycles_C(bool forceNonConstant) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +void Compiler::Comp_AddCycles_CI(u32 i) +{ + s32 cycles = (Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; + + if (!Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add) +{ + s32 cycles = Num ? + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); + + if (!Thumb && CurInstr.Cond() < 0xE) + { + LEA(32, RSCRATCH, MDisp(i, add + cycles)); + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(RSCRATCH)); + } + else + { + ConstantCycles += cycles; + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(i)); + } +} + +void Compiler::Comp_AddCycles_CDI() +{ + if (Num == 0) + Comp_AddCycles_CD(); + else + { + IrregularCycles = true; + + s32 cycles; + + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM + { + if (CodeRegion == 0x02) + cycles = numC + numD; + else + { + numC++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + } + else if (CodeRegion == 0x02) + { + numD++; + cycles = std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles = numC + numD + 1; + } + + if (!Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; + } +} + +void Compiler::Comp_AddCycles_CD() +{ + u32 cycles = 0; + if (Num == 0) + { + s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles; + s32 numD = CurInstr.DataCycles; + + //if (DataRegion != CodeRegion) + cycles = std::max(numC + numD - 6, std::max(numC, numD)); + + IrregularCycles = cycles != numC; + } + else + { + s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]; + s32 numD = CurInstr.DataCycles; + + if ((CurInstr.DataRegion >> 4) == 0x02) + { + if (CodeRegion == 0x02) + cycles += numC + numD; + else + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else if (CodeRegion == 0x02) + { + cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + cycles += numC + numD; + } + + IrregularCycles = true; + } + + if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE) + SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + else + ConstantCycles += cycles; +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h new file mode 100644 index 0000000..0fe0147 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -0,0 +1,255 @@ +#ifndef ARMJIT_COMPILER_H +#define ARMJIT_COMPILER_H + +#include "../dolphin/x64Emitter.h" + +#include "../ARMJIT.h" +#include "../ARMJIT_Internal.h" +#include "../ARMJIT_RegisterCache.h" + +#include <unordered_map> + +namespace ARMJIT +{ + +const Gen::X64Reg RCPU = Gen::RBP; +const Gen::X64Reg RCPSR = Gen::R15; + +const Gen::X64Reg RSCRATCH = Gen::EAX; +const Gen::X64Reg RSCRATCH2 = Gen::EDX; +const Gen::X64Reg RSCRATCH3 = Gen::ECX; +const Gen::X64Reg RSCRATCH4 = Gen::R8; + +struct LoadStorePatch +{ + void* PatchFunc; + s16 Offset; + u16 Size; +}; + +struct Op2 +{ + Op2() + {} + + Op2(u32 imm) + : IsImm(true), Imm(imm) + {} + Op2(int reg, int op, int amount) + : IsImm(false) + { + Reg.Reg = reg; + Reg.Op = op; + Reg.Amount = amount; + } + + bool IsImm; + union + { + struct + { + int Reg, Op, Amount; + } Reg; + u32 Imm; + }; +}; + +class Compiler : public Gen::XEmitter +{ +public: + Compiler(); + + void Reset(); + + JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount); + + void LoadReg(int reg, Gen::X64Reg nativeReg); + void SaveReg(int reg, Gen::X64Reg nativeReg); + + bool CanCompile(bool thumb, u16 kind); + + typedef void (Compiler::*CompileFunc)(); + + void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false); + void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false); + + void Comp_AddCycles_C(bool forceNonConstant = false); + void Comp_AddCycles_CI(u32 i); + void Comp_AddCycles_CI(Gen::X64Reg i, int add); + void Comp_AddCycles_CDI(); + void Comp_AddCycles_CD(); + + enum + { + opSetsFlags = 1 << 0, + opSymmetric = 1 << 1, + opRetriveCV = 1 << 2, + opInvertCarry = 1 << 3, + opSyncCarry = 1 << 4, + opInvertOp2 = 1 << 5, + }; + + void Nop() {} + + void A_Comp_Arith(); + void A_Comp_MovOp(); + void A_Comp_CmpOp(); + + void A_Comp_MUL_MLA(); + void A_Comp_Mul_Long(); + + void A_Comp_CLZ(); + + void A_Comp_MemWB(); + void A_Comp_MemHalf(); + void A_Comp_LDM_STM(); + + void A_Comp_BranchImm(); + void A_Comp_BranchXchangeReg(); + + void A_Comp_MRS(); + void A_Comp_MSR(); + + void T_Comp_ShiftImm(); + void T_Comp_AddSub_(); + void T_Comp_ALU_Imm8(); + void T_Comp_ALU(); + void T_Comp_ALU_HiReg(); + void T_Comp_MUL(); + + void T_Comp_RelAddr(); + void T_Comp_AddSP(); + + void T_Comp_MemReg(); + void T_Comp_MemImm(); + void T_Comp_MemRegHalf(); + void T_Comp_MemImmHalf(); + void T_Comp_LoadPCRel(); + void T_Comp_MemSPRel(); + void T_Comp_PUSH_POP(); + void T_Comp_LDMIA_STMIA(); + + void T_Comp_BCOND(); + void T_Comp_B(); + void T_Comp_BranchXchangeReg(); + void T_Comp_BL_LONG_1(); + void T_Comp_BL_LONG_2(); + void T_Comp_BL_Merged(); + + enum + { + memop_Writeback = 1 << 0, + memop_Post = 1 << 1, + memop_SignExtend = 1 << 2, + memop_Store = 1 << 3, + memop_SubtractOffset = 1 << 4 + }; + void Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags); + s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); + bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr); + + void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); + void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); + void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed); + + void Comp_MulOp(bool S, bool add, Gen::OpArg rd, Gen::OpArg rm, Gen::OpArg rs, Gen::OpArg rn); + + void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); + + void Comp_SpecialBranchBehaviour(bool taken); + + + Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed); + Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed); + + Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed); + + void LoadCPSR(); + void SaveCPSR(bool flagClean = true); + + bool FlagsNZRequired() + { return CurInstr.SetFlags & 0xC; } + + Gen::FixupBranch CheckCondition(u32 cond); + + void PushRegs(bool saveHiRegs); + void PopRegs(bool saveHiRegs); + + Gen::OpArg MapReg(int reg) + { + if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG) + return Gen::Imm32(R15); + + assert(RegCache.Mapping[reg] != Gen::INVALID_REG); + return Gen::R(RegCache.Mapping[reg]); + } + + JitBlockEntry AddEntryOffset(u32 offset) + { + return (JitBlockEntry)(ResetStart + offset); + } + + u32 SubEntryOffset(JitBlockEntry entry) + { + return (u8*)entry - ResetStart; + } + + void SwitchToNearCode() + { + FarCode = GetWritableCodePtr(); + SetCodePtr(NearCode); + } + + void SwitchToFarCode() + { + NearCode = GetWritableCodePtr(); + SetCodePtr(FarCode); + } + + bool IsJITFault(u64 addr); + + s32 RewriteMemAccess(u64 pc); + + u8* FarCode; + u8* NearCode; + u32 FarSize; + u32 NearSize; + + u8* NearStart; + u8* FarStart; + + void* PatchedStoreFuncs[2][2][3][16]; + void* PatchedLoadFuncs[2][2][3][2][16]; + + std::unordered_map<u8*, LoadStorePatch> LoadStorePatches; + + u8* ResetStart; + u32 CodeMemSize; + + bool Exit; + bool IrregularCycles; + + void* ReadBanked; + void* WriteBanked; + + bool CPSRDirty = false; + + FetchedInstr CurInstr; + + RegisterCache<Compiler, Gen::X64Reg> RegCache; + + bool Thumb; + u32 Num; + u32 R15; + u32 CodeRegion; + + u32 ConstantCycles; + + ARM* CurCPU; +}; + +} + +#endif
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp new file mode 100644 index 0000000..9696d22 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_GenOffsets.cpp @@ -0,0 +1,15 @@ +#include "../ARM.h" + +int main(int argc, char* argv[]) +{ + FILE* f = fopen("ARMJIT_Offsets.h", "w"); +#define writeOffset(field) \ + fprintf(f, "#define ARM_" #field "_offset 0x%x\n", offsetof(ARM, field)) + + writeOffset(CPSR); + writeOffset(Cycles); + writeOffset(StopExecution); + + fclose(f); + return 0; +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Linkage.s b/src/ARMJIT_x64/ARMJIT_Linkage.s new file mode 100644 index 0000000..0a84df0 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Linkage.s @@ -0,0 +1,78 @@ +.intel_syntax noprefix + +#include "ARMJIT_Offsets.h" + +.text + +#define RCPU rbp +#define RCPSR r15d + +#ifdef WIN64 +#define ARG1_REG ecx +#define ARG2_REG edx +#define ARG3_REG r8d +#define ARG4_REG r9d +#define ARG1_REG64 rcx +#define ARG2_REG64 rdx +#define ARG3_REG64 r8 +#define ARG4_REG64 r9 +#else +#define ARG1_REG edi +#define ARG2_REG esi +#define ARG3_REG edx +#define ARG4_REG ecx +#define ARG1_REG64 rdi +#define ARG2_REG64 rsi +#define ARG3_REG64 rdx +#define ARG4_REG64 rcx +#endif + +.p2align 4,,15 + +.global ARM_Dispatch +ARM_Dispatch: +#ifdef WIN64 + push rdi + push rsi +#endif + push rbx + push r12 + push r13 + push r14 + push r15 + push rbp + +#ifdef WIN64 + sub rsp, 0x28 +#else + sub rsp, 0x8 +#endif + mov RCPU, ARG1_REG64 + mov RCPSR, [RCPU + ARM_CPSR_offset] + + jmp ARG2_REG64 + +.p2align 4,,15 + +.global ARM_Ret +ARM_Ret: + mov [RCPU + ARM_CPSR_offset], RCPSR + +#ifdef WIN64 + add rsp, 0x28 +#else + add rsp, 0x8 +#endif + + pop rbp + pop r15 + pop r14 + pop r13 + pop r12 + pop rbx +#ifdef WIN64 + pop rsi + pop rdi +#endif + + ret diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp new file mode 100644 index 0000000..2da113b --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -0,0 +1,773 @@ +#include "ARMJIT_Compiler.h" + +#include "../Config.h" + +using namespace Gen; + +namespace ARMJIT +{ + +template <typename T> +int squeezePointer(T* ptr) +{ + int truncated = (int)((u64)ptr); + assert((T*)((u64)truncated) == ptr); + return truncated; +} + +s32 Compiler::RewriteMemAccess(u64 pc) +{ + auto it = LoadStorePatches.find((u8*)pc); + if (it != LoadStorePatches.end()) + { + LoadStorePatch patch = it->second; + LoadStorePatches.erase(it); + + u8* curCodePtr = GetWritableCodePtr(); + u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset; + SetCodePtr(rewritePtr); + + CALL(patch.PatchFunc); + u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr); + if (remainingSize > 0) + NOP(remainingSize); + + //printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size); + + SetCodePtr(curCodePtr); + + return patch.Offset; + } + + printf("this is a JIT bug %x\n", pc); + abort(); +} + +/* + According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number) + of all memory load and store instructions always access addresses in the same region as + during the their first execution. + + I tried multiple optimisations, which would benefit from this behaviour + (having fast paths for the first region, …), though none of them yielded a measureable + improvement. +*/ + +bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) +{ + u32 localAddr = LocaliseCodeAddress(Num, addr); + + int invalidLiteralIdx = InvalidLiterals.Find(localAddr); + if (invalidLiteralIdx != -1) + { + InvalidLiterals.Remove(invalidLiteralIdx); + return false; + } + + Comp_AddCycles_CDI(); + + u32 val; + // make sure arm7 bios is accessible + u32 tmpR15 = CurCPU->R[15]; + CurCPU->R[15] = R15; + if (size == 32) + { + CurCPU->DataRead32(addr & ~0x3, &val); + val = ROR(val, (addr & 0x3) << 3); + } + else if (size == 16) + { + CurCPU->DataRead16(addr & ~0x1, &val); + if (signExtend) + val = ((s32)val << 16) >> 16; + } + else + { + CurCPU->DataRead8(addr, &val); + if (signExtend) + val = ((s32)val << 24) >> 24; + } + CurCPU->R[15] = tmpR15; + + MOV(32, MapReg(rd), Imm32(val)); + + if (Thumb || CurInstr.Cond() == 0xE) + RegCache.PutLiteral(rd, val); + + return true; +} + + +void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flags) +{ + u32 addressMask = ~0; + if (size == 32) + addressMask = ~3; + if (size == 16) + addressMask = ~1; + + if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback))) + { + u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + + if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr)) + return; + } + + if (flags & memop_Store) + { + Comp_AddCycles_CD(); + } + else + { + Comp_AddCycles_CDI(); + } + + bool addrIsStatic = Config::JIT_LiteralOptimisations + && RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post)); + u32 staticAddress; + if (addrIsStatic) + staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); + OpArg rdMapped = MapReg(rd); + + OpArg rnMapped = MapReg(rn); + if (Thumb && rn == 15) + rnMapped = Imm32(R15 & ~0x2); + + X64Reg finalAddr = RSCRATCH3; + if (flags & memop_Post) + { + MOV(32, R(RSCRATCH3), rnMapped); + + finalAddr = rnMapped.GetSimpleReg(); + } + + if (op2.IsImm) + { + MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1))); + } + else + { + OpArg rm = MapReg(op2.Reg.Reg); + + if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg() + && op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3) + { + LEA(32, finalAddr, + MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0)); + } + else + { + bool throwAway; + OpArg offset = + Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway); + + if (flags & memop_SubtractOffset) + { + if (R(finalAddr) != rnMapped) + MOV(32, R(finalAddr), rnMapped); + if (!offset.IsZero()) + SUB(32, R(finalAddr), offset); + } + else + MOV_sum(32, finalAddr, rnMapped, offset); + } + } + + if ((flags & memop_Writeback) && !(flags & memop_Post)) + MOV(32, rnMapped, R(finalAddr)); + + u32 expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + + if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsFastmemCompatible(expectedTarget))) + { + u8* memopStart = GetWritableCodePtr(); + LoadStorePatch patch; + + patch.PatchFunc = flags & memop_Store + ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()] + : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()]; + + assert(patch.PatchFunc != NULL); + + MOV(64, R(RSCRATCH), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start)); + + X64Reg maskedAddr = RSCRATCH3; + if (size > 8) + { + maskedAddr = RSCRATCH2; + MOV(32, R(RSCRATCH2), R(RSCRATCH3)); + AND(32, R(RSCRATCH2), Imm8(addressMask)); + } + + u8* memopLoadStoreLocation = GetWritableCodePtr(); + if (flags & memop_Store) + { + MOV(size, MRegSum(RSCRATCH, maskedAddr), rdMapped); + } + else + { + if (flags & memop_SignExtend) + MOVSX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr)); + else + MOVZX(32, size, rdMapped.GetSimpleReg(), MRegSum(RSCRATCH, maskedAddr)); + + if (size == 32) + { + AND(32, R(RSCRATCH3), Imm8(0x3)); + SHL(32, R(RSCRATCH3), Imm8(3)); + ROR_(32, rdMapped, R(RSCRATCH3)); + } + } + + patch.Offset = memopStart - memopLoadStoreLocation; + patch.Size = GetWritableCodePtr() - memopStart; + + assert(patch.Size >= 5); + + LoadStorePatches[memopLoadStoreLocation] = patch; + } + else + { + PushRegs(false); + + if (Num == 0) + { + MOV(64, R(ABI_PARAM2), R(RCPU)); + if (ABI_PARAM1 != RSCRATCH3) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (flags & memop_Store) + { + MOV(32, R(ABI_PARAM3), rdMapped); + + switch (size | NDS::ConsoleType) + { + case 32: CALL((void*)&SlowWrite9<u32, 0>); break; + case 16: CALL((void*)&SlowWrite9<u16, 0>); break; + case 8: CALL((void*)&SlowWrite9<u8, 0>); break; + case 33: CALL((void*)&SlowWrite9<u32, 1>); break; + case 17: CALL((void*)&SlowWrite9<u16, 1>); break; + case 9: CALL((void*)&SlowWrite9<u8, 1>); break; + } + } + else + { + switch (size | NDS::ConsoleType) + { + case 32: CALL((void*)&SlowRead9<u32, 0>); break; + case 16: CALL((void*)&SlowRead9<u16, 0>); break; + case 8: CALL((void*)&SlowRead9<u8, 0>); break; + case 33: CALL((void*)&SlowRead9<u32, 1>); break; + case 17: CALL((void*)&SlowRead9<u16, 1>); break; + case 9: CALL((void*)&SlowRead9<u8, 1>); break; + } + } + } + else + { + if (ABI_PARAM1 != RSCRATCH3) + MOV(32, R(ABI_PARAM1), R(RSCRATCH3)); + if (flags & memop_Store) + { + MOV(32, R(ABI_PARAM2), rdMapped); + + switch (size | NDS::ConsoleType) + { + case 32: CALL((void*)&SlowWrite7<u32, 0>); break; + case 16: CALL((void*)&SlowWrite7<u16, 0>); break; + case 8: CALL((void*)&SlowWrite7<u8, 0>); break; + case 33: CALL((void*)&SlowWrite7<u32, 1>); break; + case 17: CALL((void*)&SlowWrite7<u16, 1>); break; + case 9: CALL((void*)&SlowWrite7<u8, 1>); break; + } + } + else + { + switch (size | NDS::ConsoleType) + { + case 32: CALL((void*)&SlowRead7<u32, 0>); break; + case 16: CALL((void*)&SlowRead7<u16, 0>); break; + case 8: CALL((void*)&SlowRead7<u8, 0>); break; + case 33: CALL((void*)&SlowRead7<u32, 1>); break; + case 17: CALL((void*)&SlowRead7<u16, 1>); break; + case 9: CALL((void*)&SlowRead7<u8, 1>); break; + } + } + } + + PopRegs(false); + + if (!(flags & memop_Store)) + { + if (flags & memop_SignExtend) + MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH)); + else + MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH)); + } + } + + if (!(flags & memop_Store) && rd == 15) + { + if (size < 32) + printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr); + { + if (Num == 1) + { + if (Thumb) + OR(32, rdMapped, Imm8(0x1)); + else + AND(32, rdMapped, Imm8(0xFE)); + } + Comp_JumpTo(rdMapped.GetSimpleReg()); + } + } +} + +s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode) +{ + int regsCount = regs.Count(); + + if (regsCount == 0) + return 0; // actually not the right behaviour TODO: fix me + + if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin())) + { + int flags = 0; + if (store) + flags |= memop_Store; + if (decrement && preinc) + flags |= memop_SubtractOffset; + Op2 offset = preinc ? Op2(4) : Op2(0); + + Comp_MemAccess(*regs.begin(), rn, offset, 32, flags); + + return decrement ? -4 : 4; + } + + s32 offset = (regsCount * 4) * (decrement ? -1 : 1); + + int expectedTarget = Num == 0 + ? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion) + : ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion); + + if (!store) + Comp_AddCycles_CDI(); + else + Comp_AddCycles_CD(); + + bool compileFastPath = Config::JIT_FastMemory + && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsFastmemCompatible(expectedTarget)); + + // we need to make sure that the stack stays aligned to 16 bytes +#ifdef _WIN32 + // include shadow + u32 stackAlloc = (((regsCount + 4 + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8; +#else + u32 stackAlloc = (((regsCount + 1) & ~1) + (compileFastPath ? 1 : 0)) * 8; +#endif + u32 allocOffset = stackAlloc - regsCount * 8; + + if (decrement) + MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4 + (preinc ? 0 : 4))); + else + MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(preinc ? 4 : 0)); + + if (compileFastPath) + { + AND(32, R(RSCRATCH4), Imm8(~3)); + + u8* fastPathStart = GetWritableCodePtr(); + u8* firstLoadStoreAddr; + + bool firstLoadStore = true; + + MOV(64, R(RSCRATCH2), ImmPtr(Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start)); + ADD(64, R(RSCRATCH2), R(RSCRATCH4)); + MOV(32, R(RSCRATCH3), R(RSCRATCH4)); + + u32 offset = 0; + for (int reg : regs) + { + if (firstLoadStore) + firstLoadStoreAddr = GetWritableCodePtr(); + + OpArg mem = MDisp(RSCRATCH2, offset); + if (store) + { + if (RegCache.LoadedRegs & (1 << reg)) + { + MOV(32, mem, MapReg(reg)); + } + else + { + LoadReg(reg, RSCRATCH); + if (firstLoadStore) + firstLoadStoreAddr = GetWritableCodePtr(); + MOV(32, mem, R(RSCRATCH)); + } + } + else + { + if (RegCache.LoadedRegs & (1 << reg)) + { + MOV(32, MapReg(reg), mem); + } + else + { + MOV(32, R(RSCRATCH), mem); + SaveReg(reg, RSCRATCH); + } + } + offset += 4; + + firstLoadStore = false; + } + + LoadStorePatch patch; + patch.Size = GetWritableCodePtr() - fastPathStart; + patch.Offset = fastPathStart - firstLoadStoreAddr; + SwitchToFarCode(); + patch.PatchFunc = GetWritableCodePtr(); + + LoadStorePatches[firstLoadStoreAddr] = patch; + } + + if (!store) + { + PushRegs(false); + + MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); + MOV(32, R(ABI_PARAM3), Imm32(regsCount)); + SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); + if (allocOffset == 0) + MOV(64, R(ABI_PARAM2), R(RSP)); + else + LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset)); + + if (Num == 0) + MOV(64, R(ABI_PARAM4), R(RCPU)); + + switch (Num * 2 | NDS::ConsoleType) + { + case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break; + case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break; + case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break; + case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break; + } + + PopRegs(false); + + if (allocOffset) + ADD(64, R(RSP), Imm8(allocOffset)); + + bool firstUserMode = true; + for (int reg : regs) + { + if (usermode && !regs[15] && reg >= 8 && reg < 15) + { + if (firstUserMode) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + firstUserMode = false; + } + MOV(32, R(RSCRATCH2), Imm32(reg - 8)); + POP(RSCRATCH3); + CALL(WriteBanked); + FixupBranch sucessfulWritten = J_CC(CC_NC); + if (RegCache.LoadedRegs & (1 << reg)) + MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3)); + else + SaveReg(reg, RSCRATCH3); + SetJumpTarget(sucessfulWritten); + } + else if (!(RegCache.LoadedRegs & (1 << reg))) + { + assert(reg != 15); + + POP(RSCRATCH); + SaveReg(reg, RSCRATCH); + } + else + { + POP(MapReg(reg).GetSimpleReg()); + } + } + } + else + { + bool firstUserMode = true; + for (int reg = 15; reg >= 0; reg--) + { + if (regs[reg]) + { + if (usermode && reg >= 8 && reg < 15) + { + if (firstUserMode) + { + MOV(32, R(RSCRATCH), R(RCPSR)); + AND(32, R(RSCRATCH), Imm8(0x1F)); + firstUserMode = false; + } + if (RegCache.Mapping[reg] == INVALID_REG) + LoadReg(reg, RSCRATCH3); + else + MOV(32, R(RSCRATCH3), R(RegCache.Mapping[reg])); + MOV(32, R(RSCRATCH2), Imm32(reg - 8)); + CALL(ReadBanked); + PUSH(RSCRATCH3); + } + else if (!(RegCache.LoadedRegs & (1 << reg))) + { + LoadReg(reg, RSCRATCH); + PUSH(RSCRATCH); + } + else + { + PUSH(MapReg(reg).GetSimpleReg()); + } + } + } + + if (allocOffset) + SUB(64, R(RSP), Imm8(allocOffset)); + + PushRegs(false); + + MOV(32, R(ABI_PARAM1), R(RSCRATCH4)); + if (allocOffset) + LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset)); + else + MOV(64, R(ABI_PARAM2), R(RSP)); + + MOV(32, R(ABI_PARAM3), Imm32(regsCount)); + if (Num == 0) + MOV(64, R(ABI_PARAM4), R(RCPU)); + + switch (Num * 2 | NDS::ConsoleType) + { + case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break; + case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break; + case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break; + case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break; + } + + ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); + + PopRegs(false); + } + + if (compileFastPath) + { + RET(); + SwitchToNearCode(); + } + + if (!store && regs[15]) + { + if (Num == 1) + { + if (Thumb) + OR(32, MapReg(15), Imm8(1)); + else + AND(32, MapReg(15), Imm8(0xFE)); + } + Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode); + } + + return offset; +} + + +void Compiler::A_Comp_MemWB() +{ + bool load = CurInstr.Instr & (1 << 20); + bool byte = CurInstr.Instr & (1 << 22); + int size = byte ? 8 : 32; + + int flags = 0; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + + Op2 offset; + if (!(CurInstr.Instr & (1 << 25))) + { + offset = Op2(CurInstr.Instr & 0xFFF); + } + else + { + int op = (CurInstr.Instr >> 5) & 0x3; + int amount = (CurInstr.Instr >> 7) & 0x1F; + int rm = CurInstr.A_Reg(0); + + offset = Op2(rm, op, amount); + } + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags); +} + +void Compiler::A_Comp_MemHalf() +{ + Op2 offset = CurInstr.Instr & (1 << 22) + ? Op2(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0)) + : Op2(CurInstr.A_Reg(0), 0, 0); + + int op = (CurInstr.Instr >> 5) & 0x3; + bool load = CurInstr.Instr & (1 << 20); + + bool signExtend = false; + int size; + if (!load) + { + size = op == 1 ? 16 : 32; + load = op == 2; + } + else if (load) + { + size = op == 2 ? 8 : 16; + signExtend = op > 1; + } + + if (size == 32 && Num == 1) + return; // NOP + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + if (!(CurInstr.Instr & (1 << 24))) + flags |= memop_Post; + if (!(CurInstr.Instr & (1 << 23))) + flags |= memop_SubtractOffset; + if (CurInstr.Instr & (1 << 21)) + flags |= memop_Writeback; + + Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags); +} + +void Compiler::T_Comp_MemReg() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op & 0x2; + bool byte = op & 0x1; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0), + byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::A_Comp_LDM_STM() +{ + BitSet16 regs(CurInstr.Instr & 0xFFFF); + + bool load = CurInstr.Instr & (1 << 20); + bool pre = CurInstr.Instr & (1 << 24); + bool add = CurInstr.Instr & (1 << 23); + bool writeback = CurInstr.Instr & (1 << 21); + bool usermode = CurInstr.Instr & (1 << 22); + + OpArg rn = MapReg(CurInstr.A_Reg(16)); + + s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode); + + if (load && writeback && regs[CurInstr.A_Reg(16)]) + writeback = Num == 0 + ? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1)) + : false; + if (writeback) + ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset)); +} + +void Compiler::T_Comp_MemImm() +{ + int op = (CurInstr.Instr >> 11) & 0x3; + bool load = op & 0x1; + bool byte = op & 0x2; + u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), + byte ? 8 : 32, load ? 0 : memop_Store); +} + +void Compiler::T_Comp_MemRegHalf() +{ + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op != 0; + int size = op != 1 ? 16 : 8; + bool signExtend = op & 1; + + int flags = 0; + if (signExtend) + flags |= memop_SignExtend; + if (!load) + flags |= memop_Store; + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(CurInstr.T_Reg(6), 0, 0), + size, flags); +} + +void Compiler::T_Comp_MemImmHalf() +{ + u32 offset = (CurInstr.Instr >> 5) & 0x3E; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16, + load ? 0 : memop_Store); +} + +void Compiler::T_Comp_LoadPCRel() +{ + u32 offset = (CurInstr.Instr & 0xFF) << 2; + u32 addr = (R15 & ~0x2) + offset; + if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr)) + Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0); +} + +void Compiler::T_Comp_MemSPRel() +{ + u32 offset = (CurInstr.Instr & 0xFF) * 4; + bool load = CurInstr.Instr & (1 << 11); + + Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, + load ? 0 : memop_Store); +} + +void Compiler::T_Comp_PUSH_POP() +{ + bool load = CurInstr.Instr & (1 << 11); + BitSet16 regs(CurInstr.Instr & 0xFF); + if (CurInstr.Instr & (1 << 8)) + { + if (load) + regs[15] = true; + else + regs[14] = true; + } + + OpArg sp = MapReg(13); + s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false); + + ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max +} + +void Compiler::T_Comp_LDMIA_STMIA() +{ + BitSet16 regs(CurInstr.Instr & 0xFF); + OpArg rb = MapReg(CurInstr.T_Reg(8)); + bool load = CurInstr.Instr & (1 << 11); + + s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false); + + if (!load || !regs[CurInstr.T_Reg(8)]) + ADD(32, rb, Imm8(offset)); +} + +}
\ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Offsets.h b/src/ARMJIT_x64/ARMJIT_Offsets.h new file mode 100644 index 0000000..a73dd59 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Offsets.h @@ -0,0 +1,3 @@ +#define ARM_CPSR_offset 0x64 +#define ARM_Cycles_offset 0xc +#define ARM_StopExecution_offset 0x10 |