aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2019-07-06 01:48:42 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-06-16 11:53:08 +0200
commit10e386fe50af1a11ada54a380f6802025fca8efd (patch)
treeb3b9f321f0d92d5082e92be4452bbcd6d46f0cd5
parent550e6b86d2dc09960c5a74270bc49d3f0e895699 (diff)
JIT: most mem instructions working
+ branching
-rw-r--r--src/ARM.cpp10
-rw-r--r--src/ARMJIT.cpp7
-rw-r--r--src/ARMJIT.h2
-rw-r--r--src/ARMJIT_RegCache.h2
-rw-r--r--src/ARMJIT_x64/ARMJIT_ALU.cpp322
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp145
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h42
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp805
-rw-r--r--src/ARM_InstrInfo.cpp2
-rw-r--r--src/NDS.cpp2
10 files changed, 653 insertions, 686 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index eadedc7..df58ce3 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -565,8 +565,9 @@ void ARMv5::Execute()
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
- block = ARMJIT::CompileBlock(this);
- Cycles += block();
+ ARMJIT::CompileBlock(this);
+ else
+ Cycles += block();
// TODO optimize this shit!!!
if (Halted)
@@ -650,8 +651,9 @@ void ARMv4::Execute()
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
- block = ARMJIT::CompileBlock(this);
- Cycles += block();
+ ARMJIT::CompileBlock(this);
+ else
+ Cycles += block();
// TODO optimize this shit!!!
if (Halted)
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 4da781c..6afa967 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -121,12 +121,13 @@ void DeInit()
delete compiler;
}
-CompiledBlock CompileBlock(ARM* cpu)
+void CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
FetchedInstr instrs[12];
int i = 0;
+ u32 r15Initial = cpu->R[15];
u32 r15 = cpu->R[15];
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
//printf("block %x %d\n", r15, thumb);
@@ -169,9 +170,7 @@ CompiledBlock CompileBlock(ARM* cpu)
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
- InsertBlock(cpu->Num, cpu->R[15] - (thumb ? 2 : 4), block);
-
- return block;
+ InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block);
}
void ResetBlocks()
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index 45bb4ed..71188f9 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
void Init();
void DeInit();
-CompiledBlock CompileBlock(ARM* cpu);
+void CompileBlock(ARM* cpu);
void ResetBlocks();
diff --git a/src/ARMJIT_RegCache.h b/src/ARMJIT_RegCache.h
index ea9fb30..556d27b 100644
--- a/src/ARMJIT_RegCache.h
+++ b/src/ARMJIT_RegCache.h
@@ -114,7 +114,7 @@ public:
for (int reg : needToBeLoaded)
LoadRegister(reg);
}
- DirtyRegs |= Instr.Info.DstRegs;
+ DirtyRegs |= Instr.Info.DstRegs & ~(1 << 15);
}
static const Reg NativeRegAllocOrder[];
diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp
index 6294e1d..c22751e 100644
--- a/src/ARMJIT_x64/ARMJIT_ALU.cpp
+++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp
@@ -71,30 +71,30 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
{
switch (op)
{
- case 0: // TST
- if (rn.IsImm())
- {
- MOV(32, R(RSCRATCH3), rn);
- rn = R(RSCRATCH3);
- }
- TEST(32, rn, op2);
- break;
- case 1: // TEQ
+ case 0: // TST
+ if (rn.IsImm())
+ {
MOV(32, R(RSCRATCH3), rn);
- XOR(32, R(RSCRATCH3), op2);
- break;
- case 2: // CMP
- if (rn.IsImm())
- {
- MOV(32, R(RSCRATCH3), rn);
- rn = R(RSCRATCH3);
- }
- CMP(32, rn, op2);
- break;
- case 3: // CMN
+ rn = R(RSCRATCH3);
+ }
+ TEST(32, rn, op2);
+ break;
+ case 1: // TEQ
+ MOV(32, R(RSCRATCH3), rn);
+ XOR(32, R(RSCRATCH3), op2);
+ break;
+ case 2: // CMP
+ if (rn.IsImm())
+ {
MOV(32, R(RSCRATCH3), rn);
- ADD(32, R(RSCRATCH3), op2);
- break;
+ rn = R(RSCRATCH3);
+ }
+ CMP(32, rn, op2);
+ break;
+ case 3: // CMN
+ MOV(32, R(RSCRATCH3), rn);
+ ADD(32, R(RSCRATCH3), op2);
+ break;
}
Comp_RetriveFlags(op == 2, op >= 2, carryUsed);
@@ -103,38 +103,38 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed)
// also calculates cycles
OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed)
{
- if (CurrentInstr.Instr & (1 << 25))
+ if (CurInstr.Instr & (1 << 25))
{
Comp_AddCycles_C();
carryUsed = false;
- return Imm32(ROR(CurrentInstr.Instr & 0xFF, (CurrentInstr.Instr >> 7) & 0x1E));
+ return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
}
else
{
- int op = (CurrentInstr.Instr >> 5) & 0x3;
- if (CurrentInstr.Instr & (1 << 4))
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ if (CurInstr.Instr & (1 << 4))
{
Comp_AddCycles_CI(1);
- OpArg rm = MapReg(CurrentInstr.A_Reg(0));
- if (rm.IsImm() && CurrentInstr.A_Reg(0) == 15)
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
+ if (rm.IsImm() && CurInstr.A_Reg(0) == 15)
rm = Imm32(rm.Imm32() + 4);
- return Comp_RegShiftReg(op, MapReg(CurrentInstr.A_Reg(8)), rm, S, carryUsed);
+ return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed);
}
else
{
Comp_AddCycles_C();
- return Comp_RegShiftImm(op, (CurrentInstr.Instr >> 7) & 0x1F,
- MapReg(CurrentInstr.A_Reg(0)), S, carryUsed);
+ return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F,
+ MapReg(CurInstr.A_Reg(0)), S, carryUsed);
}
}
}
void Compiler::A_Comp_CmpOp()
{
- u32 op = (CurrentInstr.Instr >> 21) & 0xF;
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
- OpArg rn = MapReg(CurrentInstr.A_Reg(16));
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed);
Comp_CmpOp(op - 0x8, rn, op2, carryUsed);
@@ -142,12 +142,12 @@ void Compiler::A_Comp_CmpOp()
void Compiler::A_Comp_Arith()
{
- bool S = CurrentInstr.Instr & (1 << 20);
- u32 op = (CurrentInstr.Instr >> 21) & 0xF;
+ bool S = CurInstr.Instr & (1 << 20);
+ u32 op = (CurInstr.Instr >> 21) & 0xF;
bool carryUsed;
- OpArg rn = MapReg(CurrentInstr.A_Reg(16));
- OpArg rd = MapReg(CurrentInstr.A_Reg(12));
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed);
u32 sFlag = S ? opSetsFlags : 0;
@@ -155,13 +155,13 @@ void Compiler::A_Comp_Arith()
{
case 0x0: // AND
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag);
- return;
+ break;
case 0x1: // EOR
Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
- return;
+ break;
case 0x2: // SUB
Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
- return;
+ break;
case 0x3: // RSB
if (op2.IsZero())
{
@@ -173,41 +173,44 @@ void Compiler::A_Comp_Arith()
}
else
Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry);
- return;
+ break;
case 0x4: // ADD
Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV);
- return;
+ break;
case 0x5: // ADC
Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry);
- return;
+ break;
case 0x6: // SBC
Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry);
- return;
+ break;
case 0x7: // RSC
Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry);
- return;
+ break;
case 0xC: // ORR
Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag);
- return;
+ break;
case 0xE: // BIC
Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2);
- return;
+ break;
default:
assert("unimplemented");
}
+
+ if (CurInstr.A_Reg(12) == 15)
+ Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::A_Comp_MovOp()
{
bool carryUsed;
- bool S = CurrentInstr.Instr & (1 << 20);
+ bool S = CurInstr.Instr & (1 << 20);
OpArg op2 = A_Comp_GetALUOp2(S, carryUsed);
- OpArg rd = MapReg(CurrentInstr.A_Reg(12));
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
if (rd != op2)
MOV(32, rd, op2);
- if (((CurrentInstr.Instr >> 21) & 0xF) == 0xF)
+ if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
NOT(32, rd);
if (S)
@@ -215,6 +218,9 @@ void Compiler::A_Comp_MovOp()
TEST(32, rd, rd);
Comp_RetriveFlags(false, false, carryUsed);
}
+
+ if (CurInstr.A_Reg(12) == 15)
+ Comp_JumpTo(rd.GetSimpleReg(), S);
}
void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
@@ -230,7 +236,7 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed)
}
if (carryUsed == 983298)
- printf("etwas ist faul im lande daenemark %x\n", CurrentInstr.Instr);
+ printf("etwas ist faul im lande daenemark %x\n", CurInstr.Instr);
SETcc(CC_S, R(RSCRATCH));
SETcc(CC_Z, R(RSCRATCH3));
@@ -324,61 +330,61 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car
switch (op)
{
- case 0: // LSL
- if (amount > 0)
- {
- MOV(32, R(RSCRATCH), rm);
- SHL(32, R(RSCRATCH), Imm8(amount));
- if (S)
- SETcc(CC_C, R(RSCRATCH2));
-
- return R(RSCRATCH);
- }
- else
- {
- carryUsed = false;
- return rm;
- }
- case 1: // LSR
- if (amount > 0)
- {
- MOV(32, R(RSCRATCH), rm);
- SHR(32, R(RSCRATCH), Imm8(amount));
- if (S)
- SETcc(CC_C, R(RSCRATCH2));
- return R(RSCRATCH);
- }
- else
- {
- if (S)
- {
- MOV(32, R(RSCRATCH2), rm);
- SHR(32, R(RSCRATCH2), Imm8(31));
- }
- return Imm32(0);
- }
- case 2: // ASR
+ case 0: // LSL
+ if (amount > 0)
+ {
MOV(32, R(RSCRATCH), rm);
- SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
+ SHL(32, R(RSCRATCH), Imm8(amount));
if (S)
- {
- if (amount == 0)
- BT(32, rm, Imm8(31));
SETcc(CC_C, R(RSCRATCH2));
- }
+
return R(RSCRATCH);
- case 3: // ROR
+ }
+ else
+ {
+ carryUsed = false;
+ return rm;
+ }
+ case 1: // LSR
+ if (amount > 0)
+ {
MOV(32, R(RSCRATCH), rm);
- if (amount > 0)
- ROR_(32, R(RSCRATCH), Imm8(amount));
- else
- {
- BT(32, R(RCPSR), Imm8(29));
- RCR(32, R(RSCRATCH), Imm8(1));
- }
+ SHR(32, R(RSCRATCH), Imm8(amount));
if (S)
SETcc(CC_C, R(RSCRATCH2));
return R(RSCRATCH);
+ }
+ else
+ {
+ if (S)
+ {
+ MOV(32, R(RSCRATCH2), rm);
+ SHR(32, R(RSCRATCH2), Imm8(31));
+ }
+ return Imm32(0);
+ }
+ case 2: // ASR
+ MOV(32, R(RSCRATCH), rm);
+ SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31));
+ if (S)
+ {
+ if (amount == 0)
+ BT(32, rm, Imm8(31));
+ SETcc(CC_C, R(RSCRATCH2));
+ }
+ return R(RSCRATCH);
+ case 3: // ROR
+ MOV(32, R(RSCRATCH), rm);
+ if (amount > 0)
+ ROR_(32, R(RSCRATCH), Imm8(amount));
+ else
+ {
+ BT(32, R(RCPSR), Imm8(29));
+ RCR(32, R(RSCRATCH), Imm8(1));
+ }
+ if (S)
+ SETcc(CC_C, R(RSCRATCH2));
+ return R(RSCRATCH);
}
assert(false);
@@ -386,11 +392,11 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car
void Compiler::T_Comp_ShiftImm()
{
- OpArg rd = MapReg(CurrentInstr.T_Reg(0));
- OpArg rs = MapReg(CurrentInstr.T_Reg(3));
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
- int op = (CurrentInstr.Instr >> 11) & 0x3;
- int amount = (CurrentInstr.Instr >> 6) & 0x1F;
+ int op = (CurInstr.Instr >> 11) & 0x3;
+ int amount = (CurInstr.Instr >> 6) & 0x1F;
Comp_AddCycles_C();
@@ -406,12 +412,12 @@ void Compiler::T_Comp_ShiftImm()
void Compiler::T_Comp_AddSub_()
{
- OpArg rd = MapReg(CurrentInstr.T_Reg(0));
- OpArg rs = MapReg(CurrentInstr.T_Reg(3));
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
- int op = (CurrentInstr.Instr >> 9) & 0x3;
+ int op = (CurInstr.Instr >> 9) & 0x3;
- OpArg rn = op >= 2 ? Imm32((CurrentInstr.Instr >> 6) & 0x7) : MapReg(CurrentInstr.T_Reg(6));
+ OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6));
Comp_AddCycles_C();
@@ -423,38 +429,38 @@ void Compiler::T_Comp_AddSub_()
void Compiler::T_Comp_ALU_Imm8()
{
- OpArg rd = MapReg(CurrentInstr.T_Reg(8));
+ OpArg rd = MapReg(CurInstr.T_Reg(8));
- u32 op = (CurrentInstr.Instr >> 11) & 0x3;
- OpArg imm = Imm32(CurrentInstr.Instr & 0xFF);
+ u32 op = (CurInstr.Instr >> 11) & 0x3;
+ OpArg imm = Imm32(CurInstr.Instr & 0xFF);
Comp_AddCycles_C();
switch (op)
{
- case 0x0:
- MOV(32, rd, imm);
- TEST(32, rd, rd);
- Comp_RetriveFlags(false, false, false);
- return;
- case 0x1:
- Comp_CmpOp(2, rd, imm, false);
- return;
- case 0x2:
- Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
- return;
- case 0x3:
- Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
- return;
+ case 0x0:
+ MOV(32, rd, imm);
+ TEST(32, rd, rd);
+ Comp_RetriveFlags(false, false, false);
+ return;
+ case 0x1:
+ Comp_CmpOp(2, rd, imm, false);
+ return;
+ case 0x2:
+ Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV);
+ return;
+ case 0x3:
+ Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV);
+ return;
}
}
void Compiler::T_Comp_ALU()
{
- OpArg rd = MapReg(CurrentInstr.T_Reg(0));
- OpArg rs = MapReg(CurrentInstr.T_Reg(3));
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rs = MapReg(CurInstr.T_Reg(3));
- u32 op = (CurrentInstr.Instr >> 6) & 0xF;
+ u32 op = (CurInstr.Instr >> 6) & 0xF;
if ((op >= 0x2 && op < 0x4) || op == 0x7)
Comp_AddCycles_CI(1);
@@ -522,28 +528,62 @@ void Compiler::T_Comp_ALU()
void Compiler::T_Comp_ALU_HiReg()
{
- OpArg rd = MapReg(((CurrentInstr.Instr & 0x7) | ((CurrentInstr.Instr >> 4) & 0x8)));
- OpArg rs = MapReg((CurrentInstr.Instr >> 3) & 0xF);
+ u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
+ OpArg rdMapped = MapReg(rd);
+ OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
- u32 op = (CurrentInstr.Instr >> 8) & 0x3;
+ u32 op = (CurInstr.Instr >> 8) & 0x3;
Comp_AddCycles_C();
switch (op)
{
- case 0x0: // ADD
- Comp_ArithTriOp(ADD, rd, rd, rs, false, opSymmetric|opRetriveCV);
- return;
- case 0x1: // CMP
- Comp_CmpOp(2, rd, rs, false);
- return;
- case 0x2: // MOV
- if (rd != rs)
- MOV(32, rd, rs);
- TEST(32, rd, rd);
- Comp_RetriveFlags(false, false, false);
- return;
+ case 0x0: // ADD
+ Comp_ArithTriOp(ADD, rdMapped, rdMapped, rs, false, opSymmetric|opRetriveCV);
+ break;
+ case 0x1: // CMP
+ Comp_CmpOp(2, rdMapped, rs, false);
+ return; // this is on purpose
+ case 0x2: // MOV
+ if (rdMapped != rs)
+ MOV(32, rdMapped, rs);
+ TEST(32, rdMapped, rdMapped);
+ Comp_RetriveFlags(false, false, false);
+ break;
+ }
+
+ if (rd == 15)
+ {
+ OR(32, rdMapped, Imm8(1));
+ Comp_JumpTo(rdMapped.GetSimpleReg());
}
}
+void Compiler::T_Comp_AddSP()
+{
+ Comp_AddCycles_C();
+
+ OpArg sp = MapReg(13);
+ OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2);
+ if (CurInstr.Instr & (1 << 7))
+ SUB(32, sp, offset);
+ else
+ ADD(32, sp, offset);
+}
+
+void Compiler::T_Comp_RelAddr()
+{
+ Comp_AddCycles_C();
+
+ OpArg rd = MapReg(CurInstr.T_Reg(8));
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ if (CurInstr.Instr & (1 << 11))
+ {
+ OpArg sp = MapReg(13);
+ LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset));
+ }
+ else
+ MOV(32, rd, Imm32((R15 & ~2) + offset));
+}
+
} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index 9096397..b7358a2 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -9,7 +9,7 @@ using namespace Gen;
namespace ARMJIT
{
template <>
-const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
+const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
{
#ifdef _WIN32
RBX, RSI, RDI, R12, R13
@@ -18,7 +18,7 @@ const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
#endif
};
template <>
-const int RegCache<Compiler, X64Reg>::NativeRegsAvailable =
+const int RegCache<Compiler, X64Reg>::NativeRegsAvailable =
#ifdef _WIN32
5
#else
@@ -30,24 +30,33 @@ Compiler::Compiler()
{
AllocCodeSpace(1024 * 1024 * 16);
- for (int i = 0; i < 15; i++)
+ for (int i = 0; i < 3; i++)
{
- ReadMemFuncs9[i] = Gen_MemoryRoutine9(false, 32, 0x1000000 * i);
- WriteMemFuncs9[i] = Gen_MemoryRoutine9(true, 32, 0x1000000 * i);
for (int j = 0; j < 2; j++)
{
- ReadMemFuncs7[j][i] = Gen_MemoryRoutine7(false, 32, j, 0x1000000 * i);
- WriteMemFuncs7[j][i] = Gen_MemoryRoutine7(true, 32, j, 0x1000000 * i);
+ MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
+ MemoryFuncs7[i][j][0] = Gen_MemoryRoutine7(j, false, 8 << i);
+ MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
}
}
- ReadMemFuncs9[15] = Gen_MemoryRoutine9(false, 32, 0xFF000000);
- WriteMemFuncs9[15] = Gen_MemoryRoutine9(true, 32, 0xFF000000);
- ReadMemFuncs7[15][0] = ReadMemFuncs7[15][1] = Gen_MemoryRoutine7(false, 32, false, 0xFF000000);
- WriteMemFuncs7[15][0] = WriteMemFuncs7[15][1] = Gen_MemoryRoutine7(true, 32, false, 0xFF000000);
ResetStart = GetWritableCodePtr();
}
+DataRegion Compiler::ClassifyAddress(u32 addr)
+{
+ if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
+ return dataRegionDTCM;
+ switch (addr & 0xFF000000)
+ {
+ case 0x02000000: return dataRegionMainRAM;
+ case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM;
+ case 0x04000000: return dataRegionIO;
+ case 0x06000000: return dataRegionVRAM;
+ }
+ return dataRegionGeneric;
+}
+
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@@ -92,6 +101,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
Num = cpu->Num;
R15 = cpu->R[15];
CodeRegion = cpu->CodeRegion;
+ CurCPU = cpu;
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
@@ -106,27 +116,32 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
for (int i = 0; i < instrsCount; i++)
{
R15 += Thumb ? 2 : 4;
- CurrentInstr = instrs[i];
-
- CompileFunc comp = GetCompFunc(CurrentInstr.Info.Kind);
+ CurInstr = instrs[i];
- if (CurrentInstr.Info.Branches())
- comp = NULL;
+ CompileFunc comp = GetCompFunc(CurInstr.Info.Kind);
if (comp == NULL || i == instrsCount - 1)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
- MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurrentInstr.CodeCycles));
- MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurrentInstr.Instr));
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
+ MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
if (i == instrsCount - 1)
{
- MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurrentInstr.NextInstr[0]));
- MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurrentInstr.NextInstr[1]));
+ MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurInstr.NextInstr[0]));
+ MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
}
- SaveCPSR();
+ if (comp == NULL || CurInstr.Info.Branches())
+ SaveCPSR();
}
+ // run interpreter
+ cpu->CodeCycles = CurInstr.CodeCycles;
+ cpu->R[15] = R15;
+ cpu->CurInstr = CurInstr.Instr;
+ cpu->NextInstr[0] = CurInstr.NextInstr[0];
+ cpu->NextInstr[1] = CurInstr.NextInstr[1];
+
if (comp != NULL)
RegCache.Prepare(i);
else
@@ -134,26 +149,33 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
if (Thumb)
{
+ u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
- u32 icode = (CurrentInstr.Instr >> 6) & 0x3FF;
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
}
else
(this->*comp)();
+
+ ARMInterpreter::THUMBInstrTable[icode](cpu);
}
else
{
- u32 cond = CurrentInstr.Cond();
- if (CurrentInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
+ u32 cond = CurInstr.Cond();
+ if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
+
+ ARMInterpreter::A_BLX_IMM(cpu);
}
else if (cond == 0xF)
+ {
Comp_AddCycles_C();
+ cpu->AddCycles_C();
+ }
else
{
FixupBranch skipExecute;
@@ -180,18 +202,18 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
}
+ u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
- u32 icode = ((CurrentInstr.Instr >> 4) & 0xF) | ((CurrentInstr.Instr >> 16) & 0xFF0);
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
}
else
(this->*comp)();
FixupBranch skipFailed;
- if (CurrentInstr.Cond() < 0xE)
+ if (CurInstr.Cond() < 0xE)
{
skipFailed = J();
SetJumpTarget(skipExecute);
@@ -200,13 +222,17 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
SetJumpTarget(skipFailed);
}
+
+ if (cpu->CheckCondition(cond))
+ ARMInterpreter::ARMInstrTable[icode](cpu);
+ else
+ cpu->AddCycles_C();
}
}
/*
we don't need to collect the interpreted cycles,
- since all functions only add to it, the dispatcher
- takes care of it.
+ since cpu->Cycles is taken into account by the dispatcher.
*/
if (comp == NULL && i != instrsCount - 1)
@@ -277,29 +303,29 @@ CompileFunc Compiler::GetCompFunc(int kind)
// Mul
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// ARMv5 stuff
- NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL,
// STR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// STRB
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// LDR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// LDRB
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// STRH
- NULL, NULL, NULL, NULL,
+ A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRD
NULL, NULL, NULL, NULL,
// STRD
NULL, NULL, NULL, NULL,
// LDRH
- NULL, NULL, NULL, NULL,
+ A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRSB
- NULL, NULL, NULL, NULL,
+ A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// LDRSH
- NULL, NULL, NULL, NULL,
+ A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
// swap
- NULL, NULL,
+ NULL, NULL,
// LDM/STM
NULL, NULL,
// Branch
@@ -314,26 +340,26 @@ CompileFunc Compiler::GetCompFunc(int kind)
// Three operand ADD/SUB
T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_,
// 8 bit imm
- T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8,
+ T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8,
// general ALU
- T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
- T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
+ T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
+ T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU,
T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU,
// hi reg
T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg,
// pc/sp relative
- NULL, NULL, NULL,
+ T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP,
// LDR pcrel
- NULL,
+ NULL,
// LDR/STR reg offset
- T_Comp_MemReg, NULL, T_Comp_MemReg, NULL,
- // LDR/STR sign extended, half
- NULL, NULL, NULL, NULL,
+ T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg,
+ // LDR/STR sign extended, half
+ T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf,
// LDR/STR imm offset
- T_Comp_MemImm, T_Comp_MemImm, NULL, NULL,
+ T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm,
// LDR/STR half imm offset
- NULL, NULL,
+ T_Comp_MemImmHalf, T_Comp_MemImmHalf,
// branch, etc.
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
@@ -346,10 +372,10 @@ CompileFunc Compiler::GetCompFunc(int kind)
void Compiler::Comp_AddCycles_C()
{
s32 cycles = Num ?
- NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3]
- : ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles);
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
- if (CurrentInstr.Cond() < 0xE)
+ if (CurInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles));
else
ConstantCycles += cycles;
@@ -358,13 +384,26 @@ void Compiler::Comp_AddCycles_C()
void Compiler::Comp_AddCycles_CI(u32 i)
{
s32 cycles = (Num ?
- NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 0 : 2]
- : ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles)) + i;
-
- if (CurrentInstr.Cond() < 0xE)
+ NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
+
+ if (CurInstr.Cond() < 0xE)
ADD(32, R(RCycles), Imm8(cycles));
else
ConstantCycles += cycles;
}
+void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
+{
+ SaveCPSR();
+
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ MOV(32, R(ABI_PARAM2), R(addr));
+ MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
+ if (Num == 0)
+ CALL((void*)&ARMv5::JumpTo);
+ else
+ CALL((void*)&ARMv4::JumpTo);
+}
+
} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index 7ab9b25..9395a29 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -6,6 +6,8 @@
#include "../ARMJIT.h"
#include "../ARMJIT_RegCache.h"
+#include <tuple>
+
namespace ARMJIT
{
@@ -21,6 +23,19 @@ class Compiler;
typedef void (Compiler::*CompileFunc)();
+enum DataRegion
+{
+ dataRegionGeneric, // hey, that's me!
+ dataRegionMainRAM,
+ dataRegionSWRAM,
+ dataRegionVRAM,
+ dataRegionIO,
+ dataRegionExclusive,
+ dataRegionsCount,
+ dataRegionDTCM = dataRegionExclusive,
+ dataRegionWRAM7 = dataRegionExclusive,
+};
+
class Compiler : public Gen::X64CodeBlock
{
public:
@@ -34,6 +49,8 @@ public:
private:
CompileFunc GetCompFunc(int kind);
+ void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
+
void Comp_AddCycles_C();
void Comp_AddCycles_CI(u32 i);
@@ -47,11 +64,14 @@ private:
opInvertOp2 = 1 << 5,
};
+ DataRegion ClassifyAddress(u32 addr);
+
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
void A_Comp_MemWB();
+ void A_Comp_MemHalf();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
@@ -59,8 +79,15 @@ private:
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
+ void T_Comp_RelAddr();
+ void T_Comp_AddSP();
+
void T_Comp_MemReg();
void T_Comp_MemImm();
+ void T_Comp_MemRegHalf();
+ void T_Comp_MemImmHalf();
+
+ void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
@@ -70,8 +97,8 @@ private:
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
- void* Gen_MemoryRoutine9(bool store, int size, u32 region);
- void* Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region);
+ void* Gen_MemoryRoutine9(bool store, int size);
+ void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
@@ -92,10 +119,12 @@ private:
}
void* ResetStart;
+ void* MemoryFuncs9[3][2];
+ void* MemoryFuncs7[3][2][2];
bool CPSRDirty = false;
- FetchedInstr CurrentInstr;
+ FetchedInstr CurInstr;
RegCache<Compiler, Gen::X64Reg> RegCache;
@@ -105,12 +134,9 @@ private:
u32 CodeRegion;
u32 ConstantCycles;
-};
-extern void* ReadMemFuncs9[16];
-extern void* ReadMemFuncs7[2][16];
-extern void* WriteMemFuncs9[16];
-extern void* WriteMemFuncs7[2][16];
+ ARM* CurCPU;
+};
}
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index d534269..69746e2 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -5,7 +5,6 @@
namespace NDS
{
-#define MAIN_RAM_SIZE 0x400000
extern u8* SWRAM_ARM9;
extern u32 SWRAM_ARM9Mask;
extern u8* SWRAM_ARM7;
@@ -19,11 +18,6 @@ using namespace Gen;
namespace ARMJIT
{
-void* ReadMemFuncs9[16];
-void* ReadMemFuncs7[2][16];
-void* WriteMemFuncs9[16];
-void* WriteMemFuncs7[2][16];
-
template <typename T>
int squeezePointer(T* ptr)
{
@@ -32,569 +26,434 @@ int squeezePointer(T* ptr)
return truncated;
}
-u32 ReadVRAM9(u32 addr)
-{
- switch (addr & 0x00E00000)
- {
- case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
- case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
- case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
- case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
- default: return GPU::ReadVRAM_LCDC<u32>(addr);
- }
-}
+/*
+ According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
+ of all memory load and store instructions always access addresses in the same region as
+ during the their first execution.
-void WriteVRAM9(u32 addr, u32 val)
-{
- switch (addr & 0x00E00000)
- {
- case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
- case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
- case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
- case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
- default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
- }
-}
+ I tried multiple optimisations, which would benefit from this behaviour
+ (having fast paths for the first region, …), though none of them yielded a measureable
+ improvement.
+*/
/*
- R11 - data to write (store only)
- RSCRATCH2 - address
- RSCRATCH3 - code cycles
+ address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
+ store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
+ code cycles - ABI_PARAM3
*/
-void* Compiler::Gen_MemoryRoutine9(bool store, int size, u32 region)
+void* Compiler::Gen_MemoryRoutine9(bool store, int size)
{
+ u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
AlignCode4();
- void* res = (void*)GetWritableCodePtr();
+ void* res = GetWritableCodePtr();
- if (!store)
- {
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
- AND(32, R(RSCRATCH), Imm8(0x3));
- SHL(32, R(RSCRATCH), Imm8(3));
- // enter the shadow realm!
- MOV(32, MDisp(RSP, 8), R(RSCRATCH));
- }
+ MOV(32, R(RSCRATCH), R(ABI_PARAM1));
+ SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
+ CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
+ FixupBranch insideDTCM = J_CC(CC_B);
- // cycle counting!
- // this is AddCycles_CDI
- MOV(32, R(R10), R(RSCRATCH2));
- SHR(32, R(R10), Imm8(12));
- MOVZX(32, 8, R10, MComplex(RCPU, R10, SCALE_1, offsetof(ARMv5, MemTimings) + 2));
- LEA(32, RSCRATCH, MComplex(RSCRATCH3, R10, SCALE_1, -6));
- CMP(32, R(R10), R(RSCRATCH3));
- CMOVcc(32, RSCRATCH3, R(R10), CC_G);
- CMP(32, R(RSCRATCH), R(RSCRATCH3));
- CMOVcc(32, RSCRATCH3, R(RSCRATCH), CC_G);
- ADD(32, R(RCycles), R(RSCRATCH3));
-
- if (!store)
- XOR(32, R(RSCRATCH), R(RSCRATCH));
- AND(32, R(RSCRATCH2), Imm32(~3));
+ CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
+ FixupBranch insideITCM = J_CC(CC_B);
+ // cycle counting!
+ MOV(32, R(RSCRATCH), R(ABI_PARAM1));
+ SHR(32, R(RSCRATCH), Imm8(12));
+ MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 0)));
+ LEA(32, ABI_PARAM4, MComplex(RSCRATCH, ABI_PARAM3, SCALE_1, -6));
+ CMP(32, R(ABI_PARAM3), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
+ CMP(32, R(ABI_PARAM4), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(ABI_PARAM4), CC_G);
+ ADD(32, R(RCycles), R(RSCRATCH));
+
+ if (store)
{
- MOV(32, R(RSCRATCH3), R(RSCRATCH2));
- SUB(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
- CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
- FixupBranch outsideDTCM = J_CC(CC_AE);
- AND(32, R(RSCRATCH2), Imm32(0x3FFF));
- if (!store)
+ if (size > 8)
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
+ switch (size)
{
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)));
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
+ case 32: JMP((u8*)NDS::ARM9Write32, true); break;
+ case 16: JMP((u8*)NDS::ARM9Write16, true); break;
+ case 8: JMP((u8*)NDS::ARM9Write8, true); break;
}
- else
- MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)), R(R11));
- RET();
- SetJumpTarget(outsideDTCM);
- MOV(32, R(RSCRATCH2), R(RSCRATCH3));
}
-
- switch (region)
+ else
{
- case 0x00000000:
- case 0x01000000:
- {
- CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
- FixupBranch insideITCM = J_CC(CC_B);
- RET();
- SetJumpTarget(insideITCM);
- AND(32, R(RSCRATCH2), Imm32(0x7FFF));
- if (!store)
- MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)));
- else
- {
- MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), Imm32(0));
- }
- }
- break;
- case 0x02000000:
- AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
- if (!store)
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
- else
- {
- MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
- }
- break;
- case 0x03000000:
- {
- MOV(64, R(RSCRATCH3), M(&NDS::SWRAM_ARM9));
- TEST(64, R(RSCRATCH3), R(RSCRATCH3));
- FixupBranch notMapped = J_CC(CC_Z);
- AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM9Mask));
- if (!store)
- MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH3));
- else
- {
- MOV(32, MRegSum(RSCRATCH2, RSCRATCH3), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
- }
- SetJumpTarget(notMapped);
- }
- break;
- case 0x04000000:
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- if (!store)
- {
- ABI_PushRegistersAndAdjustStack({}, 8, 0);
- ABI_CallFunction(NDS::ARM9IORead32);
- ABI_PopRegistersAndAdjustStack({}, 8, 0);
- }
- else
- {
- MOV(32, R(ABI_PARAM2), R(R11));
- JMP((u8*)NDS::ARM9IOWrite32, true);
- }
- break;
- case 0x05000000:
- {
- MOV(32, R(RSCRATCH), Imm32(1<<1));
- MOV(32, R(RSCRATCH3), Imm32(1<<9));
- TEST(32, R(RSCRATCH2), Imm32(0x400));
- CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
- TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
- FixupBranch available = J_CC(CC_NZ);
- RET();
- SetJumpTarget(available);
- AND(32, R(RSCRATCH2), Imm32(0x7FF));
- if (!store)
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::Palette)));
- else
- MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::Palette)), R(R11));
- }
- break;
- case 0x06000000:
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- if (!store)
- {
- ABI_PushRegistersAndAdjustStack({}, 8);
- ABI_CallFunction(ReadVRAM9);
- ABI_PopRegistersAndAdjustStack({}, 8);
- }
- else
- {
- MOV(32, R(ABI_PARAM2), R(R11));
- JMP((u8*)WriteVRAM9, true);
- }
- break;
- case 0x07000000:
+ if (size == 32)
{
- MOV(32, R(RSCRATCH), Imm32(1<<1));
- MOV(32, R(RSCRATCH3), Imm32(1<<9));
- TEST(32, R(RSCRATCH2), Imm32(0x400));
- CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
- TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
- FixupBranch available = J_CC(CC_NZ);
+ ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
+ // everything's already in the appropriate register
+ ABI_CallFunction(NDS::ARM9Read32);
+ ABI_PopRegistersAndAdjustStack({ECX}, 8);
+ AND(32, R(ECX), Imm8(3));
+ SHL(32, R(ECX), Imm8(3));
+ ROR_(32, R(RSCRATCH), R(ECX));
RET();
- SetJumpTarget(available);
- AND(32, R(RSCRATCH2), Imm32(0x7FF));
- if (!store)
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::OAM)));
- else
- MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::OAM)), R(R11));
}
- break;
- case 0x08000000:
- case 0x09000000:
- case 0x0A000000:
- if (!store)
- MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
- break;
- case 0xFF000000:
- if (!store)
- {
- AND(32, R(RSCRATCH2), Imm32(0xFFF));
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM9BIOS)));
- }
- break;
- default:
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- if (!store)
+ else if (size == 16)
{
- ABI_PushRegistersAndAdjustStack({}, 8, 0);
- ABI_CallFunction(NDS::ARM9Read32);
- ABI_PopRegistersAndAdjustStack({}, 8, 0);
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
+ JMP((u8*)NDS::ARM9Read16, true);
}
else
+ JMP((u8*)NDS::ARM9Read8, true);
+ }
+
+ SetJumpTarget(insideDTCM);
+ ADD(32, R(RCycles), R(ABI_PARAM3));
+ AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
+ if (store)
+ MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
+ else
+ {
+ MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
+ if (size == 32)
{
- MOV(32, R(ABI_PARAM2), R(R11));
- JMP((u8*)NDS::ARM9Write32, true);
+ if (ABI_PARAM1 != ECX)
+ MOV(32, R(ECX), R(ABI_PARAM1));
+ AND(32, R(ECX), Imm8(3));
+ SHL(32, R(ECX), Imm8(3));
+ ROR_(32, R(RSCRATCH), R(ECX));
}
- break;
}
+ RET();
- if (!store)
+ SetJumpTarget(insideITCM);
+ ADD(32, R(RCycles), R(ABI_PARAM3));
+ MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
+ AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
+ if (store)
{
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
+ MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
+ XOR(32, R(RSCRATCH), R(RSCRATCH));
+ MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(RSCRATCH));
+ if (size == 32)
+ MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(RSCRATCH));
+ }
+ else
+ {
+ MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)));
+ if (size == 32)
+ {
+ if (ABI_PARAM1 != ECX)
+ MOV(32, R(ECX), R(ABI_PARAM1));
+ AND(32, R(ECX), Imm8(3));
+ SHL(32, R(ECX), Imm8(3));
+ ROR_(32, R(RSCRATCH), R(ECX));
+ }
}
-
RET();
+ static_assert(RSCRATCH == EAX);
+
return res;
}
-void* Compiler::Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region)
+void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
{
+ u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
AlignCode4();
void* res = GetWritableCodePtr();
- if (!store)
- {
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
- AND(32, R(RSCRATCH), Imm8(0x3));
- SHL(32, R(RSCRATCH), Imm8(3));
- // enter the shadow realm!
- MOV(32, MDisp(RSP, 8), R(RSCRATCH));
- }
-
- // AddCycles_CDI
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
+ MOV(32, R(RSCRATCH), R(ABI_PARAM1));
SHR(32, R(RSCRATCH), Imm8(15));
- MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(NDS::ARM7MemTimings + 2)));
- if ((region == 0x02000000 && mainRAMCode) || (region != 0x02000000 && !mainRAMCode))
+ MOVZX(32, 8, ABI_PARAM4, MDisp(RSCRATCH, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
+
+ MOV(32, R(RSCRATCH), R(ABI_PARAM1));
+ AND(32, R(RSCRATCH), Imm32(0xFF000000));
+ CMP(32, R(RSCRATCH), Imm32(0x02000000));
+ FixupBranch outsideMainRAM = J_CC(CC_NE);
+ if (codeMainRAM)
{
- if (!store && region != 0x02000000)
- LEA(32, RSCRATCH3, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, 1));
- ADD(32, R(RCycles), R(RSCRATCH3));
+ LEA(32, RSCRATCH, MRegSum(ABI_PARAM4, ABI_PARAM3));
+ ADD(32, R(RCycles), R(RSCRATCH));
}
else
{
if (!store)
- ADD(32, R(region == 0x02000000 ? RSCRATCH2 : RSCRATCH), Imm8(1));
- LEA(32, R10, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, -3));
- CMP(32, R(RSCRATCH3), R(RSCRATCH));
- CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_G);
- CMP(32, R(R10), R(RSCRATCH));
- CMOVcc(32, RSCRATCH, R(R10), CC_G);
+ ADD(32, R(ABI_PARAM3), Imm8(1));
+ LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
+ CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
+ CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
+ CMP(32, R(ABI_PARAM3), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
ADD(32, R(RCycles), R(RSCRATCH));
}
-
- if (!store)
+ MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
+ AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
+ if (store)
+ {
+ MOV(size, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)), R(ABI_PARAM2));
XOR(32, R(RSCRATCH), R(RSCRATCH));
- AND(32, R(RSCRATCH2), Imm32(~3));
+ MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM)), R(RSCRATCH));
+ if (size == 32)
+ MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM) + 8), R(RSCRATCH));
+ }
+ else
+ {
+ MOVZX(32, size, RSCRATCH, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)));
+ if (size == 32)
+ {
+ if (ABI_PARAM1 != ECX)
+ MOV(32, R(ECX), R(ABI_PARAM1));
+ AND(32, R(ECX), Imm8(3));
+ SHL(32, R(ECX), Imm8(3));
+ ROR_(32, R(RSCRATCH), R(ECX));
+ }
+ }
+ RET();
- switch (region)
+ SetJumpTarget(outsideMainRAM);
+ if (codeMainRAM)
+ {
+ if (!store)
+ ADD(32, R(ABI_PARAM4), Imm8(1));
+ LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
+ CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
+ CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
+ CMP(32, R(ABI_PARAM3), R(RSCRATCH));
+ CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
+ ADD(32, R(RCycles), R(RSCRATCH));
+ }
+ else
+ {
+ LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, store ? 0 : 1));
+ ADD(32, R(RCycles), R(RSCRATCH));
+ }
+ if (store)
+ {
+ if (size > 8)
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
+ switch (size)
+ {
+ case 32: JMP((u8*)NDS::ARM7Write32, true); break;
+ case 16: JMP((u8*)NDS::ARM7Write16, true); break;
+ case 8: JMP((u8*)NDS::ARM7Write8, true); break;
+ }
+ }
+ else
{
- case 0x00000000:
- if (!store) {
- CMP(32, R(RSCRATCH2), Imm32(0x4000));
- FixupBranch outsideBIOS1 = J_CC(CC_AE);
-
- MOV(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARM, R[15])));
- CMP(32, R(RSCRATCH), Imm32(0x4000));
- FixupBranch outsideBIOS2 = J_CC(CC_AE);
- MOV(32, R(RSCRATCH3), M(&NDS::ARM7BIOSProt));
- CMP(32, R(RSCRATCH2), R(RSCRATCH3));
- FixupBranch notDenied1 = J_CC(CC_AE);
- CMP(32, R(RSCRATCH), R(RSCRATCH3));
- FixupBranch notDenied2 = J_CC(CC_B);
- SetJumpTarget(outsideBIOS2);
- MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
- RET();
-
- SetJumpTarget(notDenied1);
- SetJumpTarget(notDenied2);
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7BIOS)));
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
- RET();
-
- SetJumpTarget(outsideBIOS1);
- }
- break;
- case 0x02000000:
- AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
- if (!store)
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
- else
- {
- MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
- }
- break;
- case 0x03000000:
- {
- TEST(32, R(RSCRATCH2), Imm32(0x800000));
- FixupBranch region = J_CC(CC_NZ);
- MOV(64, R(RSCRATCH), M(&NDS::SWRAM_ARM7));
- TEST(64, R(RSCRATCH), R(RSCRATCH));
- FixupBranch notMapped = J_CC(CC_Z);
- AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM7Mask));
- if (!store)
- {
- MOV(32, R(RSCRATCH), MRegSum(RSCRATCH, RSCRATCH2));
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
- }
- else
- {
- MOV(32, MRegSum(RSCRATCH, RSCRATCH2), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
- }
- RET();
- SetJumpTarget(region);
- SetJumpTarget(notMapped);
- AND(32, R(RSCRATCH2), Imm32(0xFFFF));
- if (!store)
- MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)));
- else
- {
- MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)), R(R11));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM)), Imm32(0));
- MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM) + 8), Imm32(0));
- }
- }
- break;
- case 0x04000000:
- {
- TEST(32, R(RSCRATCH2), Imm32(0x800000));
- FixupBranch region = J_CC(CC_NZ);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- if (!store)
- {
- ABI_PushRegistersAndAdjustStack({}, 8);
- ABI_CallFunction(NDS::ARM7IORead32);
- ABI_PopRegistersAndAdjustStack({}, 8);
-
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
- RET();
- }
- else
- {
- MOV(32, R(ABI_PARAM2), R(R11));
- JMP((u8*)NDS::ARM7IOWrite32, true);
- }
- SetJumpTarget(region);
-
- if (!store)
- {
- ABI_PushRegistersAndAdjustStack({RSCRATCH2}, 8);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- ABI_CallFunction(Wifi::Read);
- ABI_PopRegistersAndAdjustStack({RSCRATCH2}, 8);
-
- ADD(32, R(RSCRATCH2), Imm8(2));
- ABI_PushRegistersAndAdjustStack({EAX}, 8);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- ABI_CallFunction(Wifi::Read);
- MOV(32, R(RSCRATCH2), R(EAX));
- SHL(32, R(RSCRATCH2), Imm8(16));
- ABI_PopRegistersAndAdjustStack({EAX}, 8);
- OR(32, R(EAX), R(RSCRATCH2));
- }
- else
- {
- ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- MOVZX(32, 16, ABI_PARAM2, R(R11));
- ABI_CallFunction(Wifi::Write);
- ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
- SHR(32, R(R11), Imm8(16));
- ADD(32, R(RSCRATCH2), Imm8(2));
- ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- MOVZX(32, 16, ABI_PARAM2, R(R11));
- ABI_CallFunction(Wifi::Write);
- ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
- }
- }
- break;
- case 0x06000000:
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
- if (!store)
- {
- ABI_PushRegistersAndAdjustStack({}, 8);
- ABI_CallFunction(GPU::ReadVRAM_ARM7<u32>);
- ABI_PopRegistersAndAdjustStack({}, 8);
- }
- else
- {
- AND(32, R(ABI_PARAM1), Imm32(0x40000 - 1));
- MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM)), Imm32(0));
- MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM) + 8), Imm32(0));
- MOV(32, R(ABI_PARAM2), R(R11));
- JMP((u8*)GPU::WriteVRAM_ARM7<u32>, true);
- }
- break;
- case 0x08000000:
- case 0x09000000:
- case 0x0A000000:
- if (!store)
- MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
- break;
- /*default:
- ABI_PushRegistersAndAdjustStack({}, 8, 0);
- MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
+ if (size == 32)
+ {
+ ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
ABI_CallFunction(NDS::ARM7Read32);
- ABI_PopRegistersAndAdjustStack({}, 8, 0);
- break;*/
+ ABI_PopRegistersAndAdjustStack({ECX}, 8);
+ AND(32, R(ECX), Imm8(3));
+ SHL(32, R(ECX), Imm8(3));
+ ROR_(32, R(RSCRATCH), R(ECX));
+ RET();
+ }
+ else if (size == 16)
+ {
+ AND(32, R(ABI_PARAM1), Imm32(addressMask));
+ JMP((u8*)NDS::ARM7Read16, true);
+ }
+ else
+ JMP((u8*)NDS::ARM7Read8, true);
}
+ return res;
+}
+
+void Compiler::Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size)
+{
+ if (store)
+ MOV(32, R(ABI_PARAM2), rd);
+ u32 cycles = Num
+ ? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
+ : (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
+ MOV(32, R(ABI_PARAM3), Imm32(cycles));
+ CALL(Num == 0
+ ? MemoryFuncs9[size >> 4][store]
+ : MemoryFuncs7[size >> 4][store][CodeRegion == 0x02]);
+
if (!store)
{
- MOV(32, R(ECX), MDisp(RSP, 8));
- ROR_(32, R(RSCRATCH), R(ECX));
+ if (signExtend)
+ MOVSX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
+ else
+ MOVZX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
}
-
- RET();
-
- return res;
}
OpArg Compiler::A_Comp_GetMemWBOffset()
{
- if (!(CurrentInstr.Instr & (1 << 25)))
- return Imm32(CurrentInstr.Instr & 0xFFF);
+ if (!(CurInstr.Instr & (1 << 25)))
+ {
+ u32 imm = CurInstr.Instr & 0xFFF;
+ return Imm32(imm);
+ }
else
{
- int op = (CurrentInstr.Instr >> 5) & 0x3;
- int amount = (CurrentInstr.Instr >> 7) & 0x1F;
- OpArg rm = MapReg(CurrentInstr.A_Reg(0));
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ int amount = (CurInstr.Instr >> 7) & 0x1F;
+ OpArg rm = MapReg(CurInstr.A_Reg(0));
bool carryUsed;
+
return Comp_RegShiftImm(op, amount, rm, false, carryUsed);
}
}
void Compiler::A_Comp_MemWB()
-{
- OpArg rn = MapReg(CurrentInstr.A_Reg(16));
- OpArg rd = MapReg(CurrentInstr.A_Reg(12));
- bool load = CurrentInstr.Instr & (1 << 20);
+{
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+ bool load = CurInstr.Instr & (1 << 20);
+ bool byte = CurInstr.Instr & (1 << 22);
+ int size = byte ? 8 : 32;
- MOV(32, R(RSCRATCH2), rn);
- if (CurrentInstr.Instr & (1 << 24))
+ if (CurInstr.Instr & (1 << 24))
{
OpArg offset = A_Comp_GetMemWBOffset();
- if (CurrentInstr.Instr & (1 << 23))
- ADD(32, R(RSCRATCH2), offset);
+ if (CurInstr.Instr & (1 << 23))
+ MOV_sum(32, ABI_PARAM1, rn, offset);
else
- SUB(32, R(RSCRATCH2), offset);
+ {
+ MOV(32, R(ABI_PARAM1), rn);
+ SUB(32, R(ABI_PARAM1), offset);
+ }
- if (CurrentInstr.Instr & (1 << 21))
- MOV(32, rn, R(RSCRATCH2));
+ if (CurInstr.Instr & (1 << 21))
+ MOV(32, rn, R(ABI_PARAM1));
}
-
- u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][2] : CurrentInstr.CodeCycles;
- MOV(32, R(RSCRATCH3), Imm32(cycles));
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
- SHR(32, R(RSCRATCH), Imm8(24));
- AND(32, R(RSCRATCH), Imm8(0xF));
- void** funcArray;
- if (load)
- funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
+ MOV(32, R(ABI_PARAM1), rn);
+
+ if (!(CurInstr.Instr & (1 << 24)))
+ {
+ OpArg offset = A_Comp_GetMemWBOffset();
+
+ if (CurInstr.Instr & (1 << 23))
+ ADD(32, rn, offset);
+ else
+ SUB(32, rn, offset);
+ }
+
+ Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
+ if (load && CurInstr.A_Reg(12) == 15)
+ {
+ if (byte)
+ printf("!!! LDRB PC %08X\n", R15);
+ else
+ {
+ if (Num == 1)
+ AND(32, rd, Imm8(0xFE)); // immediate is sign extended
+ Comp_JumpTo(rd.GetSimpleReg());
+ }
+ }
+}
+
+void Compiler::A_Comp_MemHalf()
+{
+ OpArg rn = MapReg(CurInstr.A_Reg(16));
+ OpArg rd = MapReg(CurInstr.A_Reg(12));
+
+ OpArg offset = CurInstr.Instr & (1 << 22)
+ ? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
+ : MapReg(CurInstr.A_Reg(0));
+
+ if (CurInstr.Instr & (1 << 24))
{
- funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
- MOV(32, R(R11), rd);
+ if (CurInstr.Instr & (1 << 23))
+ MOV_sum(32, ABI_PARAM1, rn, offset);
+ else
+ {
+ MOV(32, R(ABI_PARAM1), rn);
+ SUB(32, R(ABI_PARAM1), offset);
+ }
+
+ if (CurInstr.Instr & (1 << 21))
+ MOV(32, rn, R(ABI_PARAM1));
}
- CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
+ else
+ MOV(32, R(ABI_PARAM1), rn);
- if (load)
- MOV(32, R(RSCRATCH2), R(RSCRATCH));
+ int op = (CurInstr.Instr >> 5) & 0x3;
+ bool load = CurInstr.Instr & (1 << 20);
- if (!(CurrentInstr.Instr & (1 << 24)))
+ bool signExtend = false;
+ int size;
+ if (!load && op == 1)
+ size = 16;
+ else if (load)
{
- OpArg offset = A_Comp_GetMemWBOffset();
+ size = op == 2 ? 8 : 16;
+ signExtend = op > 1;
+ }
- if (CurrentInstr.Instr & (1 << 23))
+ if (!(CurInstr.Instr & (1 << 24)))
+ {
+ if (CurInstr.Instr & (1 << 23))
ADD(32, rn, offset);
else
SUB(32, rn, offset);
}
- if (load)
- MOV(32, rd, R(RSCRATCH2));
+ Comp_MemAccess(rd, signExtend, !load, size);
+
+ if (load && CurInstr.A_Reg(12) == 15)
+ printf("!!! MemHalf op PC %08X\n", R15);;
}
void Compiler::T_Comp_MemReg()
{
- OpArg rd = MapReg(CurrentInstr.T_Reg(0));
- OpArg rb = MapReg(CurrentInstr.T_Reg(3));
- OpArg ro = MapReg(CurrentInstr.T_Reg(6));
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rb = MapReg(CurInstr.T_Reg(3));
+ OpArg ro = MapReg(CurInstr.T_Reg(6));
- int op = (CurrentInstr.Instr >> 10) & 0x3;
+ int op = (CurInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
-
- MOV(32, R(RSCRATCH2), rb);
- ADD(32, R(RSCRATCH2), ro);
-
- u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles);
- MOV(32, R(RSCRATCH3), Imm32(cycles));
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
- SHR(32, R(RSCRATCH), Imm8(24));
- AND(32, R(RSCRATCH), Imm8(0xF));
- void** funcArray;
- if (load)
- funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
- else
- {
- funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
- MOV(32, R(R11), rd);
- }
- CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
+ bool byte = op & 0x1;
+
+ MOV_sum(32, ABI_PARAM1, rb, ro);
- if (load)
- MOV(32, rd, R(RSCRATCH));
+ Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
}
void Compiler::T_Comp_MemImm()
{
- // TODO: aufräumen!!!
- OpArg rd = MapReg(CurrentInstr.T_Reg(0));
- OpArg rb = MapReg(CurrentInstr.T_Reg(3));
-
- int op = (CurrentInstr.Instr >> 11) & 0x3;
- u32 offset = ((CurrentInstr.Instr >> 6) & 0x1F) * 4;
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rb = MapReg(CurInstr.T_Reg(3));
+
+ int op = (CurInstr.Instr >> 11) & 0x3;
bool load = op & 0x1;
+ bool byte = op & 0x2;
+ u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
- LEA(32, RSCRATCH2, MDisp(rb.GetSimpleReg(), offset));
- u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles);
- MOV(32, R(RSCRATCH3), Imm32(cycles));
- MOV(32, R(RSCRATCH), R(RSCRATCH2));
- SHR(32, R(RSCRATCH), Imm8(24));
- AND(32, R(RSCRATCH), Imm8(0xF));
- void** funcArray;
- if (load)
- funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
- else
- {
- funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
- MOV(32, R(R11), rd);
- }
- CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
+ LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
+
+ Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
+}
+
+void Compiler::T_Comp_MemRegHalf()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rb = MapReg(CurInstr.T_Reg(3));
+ OpArg ro = MapReg(CurInstr.T_Reg(6));
+
+ int op = (CurInstr.Instr >> 10) & 0x3;
+ bool load = op != 0;
+ int size = op != 1 ? 16 : 8;
+ bool signExtend = op & 1;
+
+ MOV_sum(32, ABI_PARAM1, rb, ro);
+
+ Comp_MemAccess(rd, signExtend, !load, size);
+}
+
+void Compiler::T_Comp_MemImmHalf()
+{
+ OpArg rd = MapReg(CurInstr.T_Reg(0));
+ OpArg rb = MapReg(CurInstr.T_Reg(3));
+
+ u32 offset = (CurInstr.Instr >> 5) & 0x3E;
+ bool load = CurInstr.Instr & (1 << 11);
+
+ LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
- if (load)
- MOV(32, rd, R(RSCRATCH));
+ Comp_MemAccess(rd, false, !load, 16);
}
} \ No newline at end of file
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index 41c46e1..32a9645 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -317,7 +317,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
else
{
u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)];
- if ((instr & 0xFE000000) == 0xFA000000)
+ if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
data = A_BLX_IMM;
if (data & A_ARM9Only && num != 0)
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 2a7edfd..4073536 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -566,6 +566,8 @@ void Reset()
KeyCnt = 0;
RCnt = 0;
+ ARMJIT::ResetBlocks();
+
NDSCart::Reset();
GBACart::Reset();
GPU::Reset();