aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRSDuck <rsduck@users.noreply.github.com>2019-07-11 16:22:47 +0200
committerRSDuck <rsduck@users.noreply.github.com>2020-06-16 11:53:09 +0200
commit83bd863361e19bc5456bbaaa3d0ec0df3c1731c0 (patch)
treed55df8ddd8619148bbe0ad9f1c5ac95b0d9416c2 /src
parent27cbc821b139b74142630c57f7da11478a052282 (diff)
jit: branch instructions
Diffstat (limited to 'src')
-rw-r--r--src/ARM.cpp12
-rw-r--r--src/ARMJIT.cpp4
-rw-r--r--src/ARMJIT.h2
-rw-r--r--src/ARMJIT_x64/ARMJIT_Branch.cpp267
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp185
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.h30
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp42
-rw-r--r--src/ARM_InstrInfo.cpp6
-rw-r--r--src/ARM_InstrInfo.h1
-rw-r--r--src/CMakeLists.txt1
10 files changed, 363 insertions, 187 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index df58ce3..3c2253c 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -564,11 +564,8 @@ void ARMv5::Execute()
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
- if (block == NULL)
- ARMJIT::CompileBlock(this);
- else
- Cycles += block();
-
+ Cycles += (block ? block : ARMJIT::CompileBlock(this))();
+
// TODO optimize this shit!!!
if (Halted)
{
@@ -650,10 +647,7 @@ void ARMv4::Execute()
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
- if (block == NULL)
- ARMJIT::CompileBlock(this);
- else
- Cycles += block();
+ Cycles += (block ? block : ARMJIT::CompileBlock(this))();
// TODO optimize this shit!!!
if (Halted)
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 6afa967..47b425f 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -121,7 +121,7 @@ void DeInit()
delete compiler;
}
-void CompileBlock(ARM* cpu)
+CompiledBlock CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
@@ -171,6 +171,8 @@ void CompileBlock(ARM* cpu)
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block);
+
+ return block;
}
void ResetBlocks()
diff --git a/src/ARMJIT.h b/src/ARMJIT.h
index 71188f9..45bb4ed 100644
--- a/src/ARMJIT.h
+++ b/src/ARMJIT.h
@@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
void Init();
void DeInit();
-void CompileBlock(ARM* cpu);
+CompiledBlock CompileBlock(ARM* cpu);
void ResetBlocks();
diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp
new file mode 100644
index 0000000..fb2acba
--- /dev/null
+++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp
@@ -0,0 +1,267 @@
+#include "ARMJIT_Compiler.h"
+
+using namespace Gen;
+
+namespace ARMJIT
+{
+
+void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
+{
+ // we can simplify constant branches by a lot
+ // it's not completely safe to assume stuff like, which instructions to preload
+ // we'll see how it works out
+
+ u32 newPC;
+ u32 nextInstr[2];
+ u32 cycles = 0;
+ bool setupRegion = false;
+
+ if (addr & 0x1 && !Thumb)
+ {
+ CPSRDirty = true;
+ OR(32, R(RCPSR), Imm8(0x20));
+ }
+ else if (!(addr & 0x1) && Thumb)
+ {
+ CPSRDirty = true;
+ AND(32, R(RCPSR), Imm32(~0x20));
+ }
+
+ if (Num == 0)
+ {
+ ARMv5* cpu9 = (ARMv5*)CurCPU;
+
+ u32 oldregion = R15 >> 24;
+ u32 newregion = addr >> 24;
+
+ u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
+ cpu9->RegionCodeCycles = regionCodeCycles;
+
+ MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
+
+ setupRegion = newregion != oldregion;
+ if (setupRegion)
+ cpu9->SetupCodeMem(addr);
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ // two-opcodes-at-once fetch
+ // doesn't matter if we put garbage in the MSbs there
+ if (addr & 0x2)
+ {
+ nextInstr[0] = cpu9->CodeRead32(addr-2, true) >> 16;
+ cycles += CurCPU->CodeCycles;
+ nextInstr[1] = cpu9->CodeRead32(addr+2, false);
+ cycles += CurCPU->CodeCycles;
+ }
+ else
+ {
+ nextInstr[0] = cpu9->CodeRead32(addr, true);
+ nextInstr[1] = nextInstr[0] >> 16;
+ cycles += CurCPU->CodeCycles;
+ }
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ nextInstr[0] = cpu9->CodeRead32(addr, true);
+ cycles += cpu9->CodeCycles;
+ nextInstr[1] = cpu9->CodeRead32(addr+4, false);
+ cycles += cpu9->CodeCycles;
+ }
+ }
+ else
+ {
+ ARMv4* cpu7 = (ARMv4*)CurCPU;
+
+ u32 codeRegion = addr >> 24;
+ u32 codeCycles = addr >> 15; // cheato
+
+ cpu7->CodeRegion = codeRegion;
+ cpu7->CodeCycles = codeCycles;
+
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
+ MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeCycles));
+
+ if (addr & 0x1)
+ {
+ addr &= ~0x1;
+ newPC = addr+2;
+
+ nextInstr[0] = ((ARMv4*)CurCPU)->CodeRead16(addr);
+ nextInstr[1] = ((ARMv4*)CurCPU)->CodeRead16(addr+2);
+ cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
+ }
+ else
+ {
+ addr &= ~0x3;
+ newPC = addr+4;
+
+ nextInstr[0] = cpu7->CodeRead32(addr);
+ nextInstr[1] = cpu7->CodeRead32(addr+4);
+ cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
+ }
+ }
+
+ MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
+ MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(nextInstr[0]));
+ MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(nextInstr[1]));
+ if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
+ ConstantCycles += cycles;
+ else
+ ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
+
+ if (setupRegion)
+ {
+ MOV(32, R(ABI_PARAM1), R(RCPU));
+ MOV(32, R(ABI_PARAM2), Imm32(newPC));
+ CALL((void*)&ARMv5::SetupCodeMem);
+ }
+}
+
+void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
+{
+ BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000);
+ bool previouslyDirty = CPSRDirty;
+ SaveCPSR();
+
+ if (restoreCPSR)
+ {
+ if (Thumb || CurInstr.Cond() >= 0xE)
+ {
+ for (int reg : hiRegsLoaded)
+ RegCache.UnloadRegister(reg);
+ }
+ else
+ {
+ // the ugly way...
+ // we only save them, to load and save them again
+ for (int reg : hiRegsLoaded)
+ SaveReg(reg, RegCache.Mapping[reg]);
+ }
+ }
+
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ MOV(32, R(ABI_PARAM2), R(addr));
+ if (!restoreCPSR)
+ XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
+ else
+ MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
+ if (Num == 0)
+ CALL((void*)&ARMv5::JumpTo);
+ else
+ CALL((void*)&ARMv4::JumpTo);
+
+ if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
+ {
+ for (int reg : hiRegsLoaded)
+ LoadReg(reg, RegCache.Mapping[reg]);
+ }
+
+ if (previouslyDirty)
+ LoadCPSR();
+ CPSRDirty = previouslyDirty;
+}
+
+void Compiler::A_Comp_BranchImm()
+{
+ int op = (CurInstr.Instr >> 24) & 1;
+ s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
+ u32 target = R15 + offset;
+ bool link = op;
+
+ if (CurInstr.Cond() == 0xF) // BLX_imm
+ {
+ target += (op << 1) + 1;
+ link = true;
+ }
+
+ if (link)
+ MOV(32, MapReg(14), Imm32(R15 - 4));
+
+ Comp_JumpTo(target);
+}
+
+void Compiler::A_Comp_BranchXchangeReg()
+{
+ OpArg rn = MapReg(CurInstr.A_Reg(0));
+ if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
+ MOV(32, MapReg(14), Imm32(R15 - 4));
+ Comp_JumpTo(rn.GetSimpleReg());
+}
+
+void Compiler::T_Comp_BCOND()
+{
+ u32 cond = (CurInstr.Instr >> 8) & 0xF;
+ FixupBranch skipExecute = CheckCondition(cond);
+
+ s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
+ Comp_JumpTo(R15 + offset + 1, true);
+
+ FixupBranch skipFailed = J();
+ SetJumpTarget(skipExecute);
+ Comp_AddCycles_C(true);
+ SetJumpTarget(skipFailed);
+}
+
+void Compiler::T_Comp_B()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
+ Comp_JumpTo(R15 + offset + 1);
+}
+
+void Compiler::T_Comp_BranchXchangeReg()
+{
+ bool link = CurInstr.Instr & (1 << 7);
+ if (link && Num == 1)
+ {
+ printf("BLX unsupported on ARM7!!!\n");
+ return;
+ }
+
+ OpArg rn = MapReg(CurInstr.A_Reg(3));
+ if (link)
+ MOV(32, MapReg(14), Imm32(R15 - 1));
+ Comp_JumpTo(rn.GetSimpleReg());
+}
+
+void Compiler::T_Comp_BL_LONG_1()
+{
+ s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
+ MOV(32, MapReg(14), Imm32(R15 + offset));
+ Comp_AddCycles_C();
+}
+
+void Compiler::T_Comp_BL_LONG_2()
+{
+ OpArg lr = MapReg(14);
+ s32 offset = (CurInstr.Instr & 0x7FF) << 1;
+ LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
+ MOV(32, lr, Imm32((R15 - 2) | 1));
+ if (Num == 1 || CurInstr.Instr & (1 << 12))
+ OR(32, R(RSCRATCH), Imm8(1));
+ Comp_JumpTo(RSCRATCH);
+}
+
+void Compiler::T_Comp_BL_Merged(FetchedInstr part1)
+{
+ assert(part1.Info.Kind == ARMInstrInfo::tk_BL_LONG_1);
+ Comp_AddCycles_C();
+
+ u32 target = (R15 - 2) + ((s32)((part1.Instr & 0x7FF) << 21) >> 9);
+ target += (CurInstr.Instr & 0x7FF) << 1;
+
+ if (Num == 1 || CurInstr.Instr & (1 << 12))
+ target |= 1;
+
+ MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
+
+ Comp_JumpTo(target);
+}
+
+} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index 4fe0c70..6799a90 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -50,50 +50,6 @@ Compiler::Compiler()
ResetStart = GetWritableCodePtr();
}
-void* Compiler::Gen_ChangeCPSRRoutine()
-{
- void* res = (void*)GetWritableCodePtr();
-
- MOV(32, R(RSCRATCH), R(RCPSR));
- AND(32, R(RSCRATCH), Imm8(0x1F));
- CMP(32, R(RSCRATCH), Imm8(0x11));
- FixupBranch fiq = J_CC(CC_E);
- CMP(32, R(RSCRATCH), Imm8(0x12));
- FixupBranch irq = J_CC(CC_E);
- CMP(32, R(RSCRATCH), Imm8(0x13));
- FixupBranch svc = J_CC(CC_E);
- CMP(32, R(RSCRATCH), Imm8(0x17));
- FixupBranch abt = J_CC(CC_E);
- CMP(32, R(RSCRATCH), Imm8(0x1B));
- FixupBranch und = J_CC(CC_E);
-
- SetJumpTarget(fiq);
-
- SetJumpTarget(irq);
-
- SetJumpTarget(svc);
-
- SetJumpTarget(abt);
-
- SetJumpTarget(und);
-
- return res;
-}
-
-DataRegion Compiler::ClassifyAddress(u32 addr)
-{
- if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
- return dataRegionDTCM;
- switch (addr & 0xFF000000)
- {
- case 0x02000000: return dataRegionMainRAM;
- case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM;
- case 0x04000000: return dataRegionIO;
- case 0x06000000: return dataRegionVRAM;
- }
- return dataRegionGeneric;
-}
-
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@@ -123,6 +79,29 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg)
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
}
+// invalidates RSCRATCH and RSCRATCH3
+Gen::FixupBranch Compiler::CheckCondition(u32 cond)
+{
+ if (cond >= 0x8)
+ {
+ static_assert(RSCRATCH3 == ECX);
+ MOV(32, R(RSCRATCH3), R(RCPSR));
+ SHR(32, R(RSCRATCH3), Imm8(28));
+ MOV(32, R(RSCRATCH), Imm32(1));
+ SHL(32, R(RSCRATCH), R(RSCRATCH3));
+ TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
+
+ return J_CC(CC_Z);
+ }
+ else
+ {
+ // could have used a LUT, but then where would be the fun?
+ TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
+
+ return J_CC(cond & 1 ? CC_NZ : CC_Z);
+ }
+}
+
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
{
if (IsAlmostFull())
@@ -140,6 +119,8 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
CodeRegion = cpu->CodeRegion;
CurCPU = cpu;
+ bool mergedThumbBL = false;
+
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
MOV(64, R(RCPU), ImmPtr(cpu));
@@ -167,17 +148,10 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
}
- if (comp == NULL || CurInstr.Info.Branches())
+ if (comp == NULL)
SaveCPSR();
}
-
- // run interpreter
- cpu->CodeCycles = CurInstr.CodeCycles;
- cpu->R[15] = R15;
- cpu->CurInstr = CurInstr.Instr;
- cpu->NextInstr[0] = CurInstr.NextInstr[0];
- cpu->NextInstr[1] = CurInstr.NextInstr[1];
-
+
if (comp != NULL)
RegCache.Prepare(i);
else
@@ -185,58 +159,44 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
if (Thumb)
{
- u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
- if (comp == NULL)
+ if (i < instrsCount - 1 && CurInstr.Info.Kind == ARMInstrInfo::tk_BL_LONG_1
+ && instrs[i + 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_2)
+ mergedThumbBL = true;
+ else
{
- MOV(64, R(ABI_PARAM1), R(RCPU));
+ u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
+ if (comp == NULL)
+ {
+ MOV(64, R(ABI_PARAM1), R(RCPU));
- ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
+ ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
+ }
+ else if (mergedThumbBL)
+ T_Comp_BL_Merged(instrs[i - 1]);
+ else
+ (this->*comp)();
}
- else
- (this->*comp)();
-
- ARMInterpreter::THUMBInstrTable[icode](cpu);
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
- MOV(64, R(ABI_PARAM1), R(RCPU));
- ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
-
- ARMInterpreter::A_BLX_IMM(cpu);
+ if (comp)
+ (this->*comp)();
+ else
+ {
+ MOV(64, R(ABI_PARAM1), R(RCPU));
+ ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
+ }
}
else if (cond == 0xF)
- {
Comp_AddCycles_C();
- cpu->AddCycles_C();
- }
else
{
FixupBranch skipExecute;
if (cond < 0xE)
- {
- if (cond >= 0x8)
- {
- static_assert(RSCRATCH3 == ECX);
- MOV(32, R(RSCRATCH3), R(RCPSR));
- SHR(32, R(RSCRATCH3), Imm8(28));
- MOV(32, R(RSCRATCH), Imm32(1));
- SHL(32, R(RSCRATCH), R(RSCRATCH3));
- TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
-
- skipExecute = J_CC(CC_Z);
- }
- else
- {
- // could have used a LUT, but then where would be the fun?
- TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
-
- skipExecute = J_CC(cond & 1 ? CC_NZ : CC_Z);
- }
-
- }
+ skipExecute = CheckCondition(cond);
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
if (comp == NULL)
@@ -258,19 +218,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
SetJumpTarget(skipFailed);
}
-
- if (cpu->CheckCondition(cond))
- ARMInterpreter::ARMInstrTable[icode](cpu);
- else
- cpu->AddCycles_C();
}
}
- /*
- we don't need to collect the interpreted cycles,
- since cpu->Cycles is taken into account by the dispatcher.
- */
-
if (comp == NULL && i != instrsCount - 1)
LoadCPSR();
}
@@ -367,7 +317,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
// LDM/STM
NULL, NULL,
// Branch
- NULL, NULL, NULL, NULL, NULL,
+ A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg,
// system stuff
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
@@ -389,7 +339,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
// pc/sp relative
T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP,
// LDR pcrel
- NULL,
+ T_Comp_LoadPCRel,
// LDR/STR reg offset
T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg,
// LDR/STR sign extended, half
@@ -399,25 +349,27 @@ CompileFunc Compiler::GetCompFunc(int kind)
// LDR/STR half imm offset
T_Comp_MemImmHalf, T_Comp_MemImmHalf,
// LDR/STR sp rel
- NULL, NULL,
+ T_Comp_MemSPRel, T_Comp_MemSPRel,
// PUSH/POP
- NULL, NULL,
+ T_Comp_PUSH_POP, T_Comp_PUSH_POP,
// LDMIA, STMIA
- NULL, NULL,
- NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL
+ T_Comp_LDMIA_STMIA, T_Comp_LDMIA_STMIA,
+ // Branch
+ T_Comp_BCOND, T_Comp_BranchXchangeReg, T_Comp_BranchXchangeReg, T_Comp_B, T_Comp_BL_LONG_1, T_Comp_BL_LONG_2,
+ // Unk, SVC
+ NULL, NULL
};
return Thumb ? T_Comp[kind] : A_Comp[kind];
}
-void Compiler::Comp_AddCycles_C()
+void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
- if (CurInstr.Cond() < 0xE)
+ if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
@@ -429,25 +381,10 @@ void Compiler::Comp_AddCycles_CI(u32 i)
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
- if (CurInstr.Cond() < 0xE)
+ if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
-void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
-{
- // potentieller Bug: falls ein Register das noch gecacht ist, beim Modeswitch gespeichert
- // wird der alte Wert gespeichert
- SaveCPSR();
-
- MOV(64, R(ABI_PARAM1), R(RCPU));
- MOV(32, R(ABI_PARAM2), R(addr));
- MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
- if (Num == 0)
- CALL((void*)&ARMv5::JumpTo);
- else
- CALL((void*)&ARMv4::JumpTo);
-}
-
} \ No newline at end of file
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h
index a751737..45b488a 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.h
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.h
@@ -22,19 +22,6 @@ class Compiler;
typedef void (Compiler::*CompileFunc)();
-enum DataRegion
-{
- dataRegionGeneric, // hey, that's me!
- dataRegionMainRAM,
- dataRegionSWRAM,
- dataRegionVRAM,
- dataRegionIO,
- dataRegionExclusive,
- dataRegionsCount,
- dataRegionDTCM = dataRegionExclusive,
- dataRegionWRAM7 = dataRegionExclusive,
-};
-
class Compiler : public Gen::X64CodeBlock
{
public:
@@ -49,8 +36,9 @@ private:
CompileFunc GetCompFunc(int kind);
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
+ void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
- void Comp_AddCycles_C();
+ void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 i);
enum
@@ -63,8 +51,6 @@ private:
opInvertOp2 = 1 << 5,
};
- DataRegion ClassifyAddress(u32 addr);
-
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
@@ -73,6 +59,9 @@ private:
void A_Comp_MemHalf();
void A_Comp_LDM_STM();
+ void A_Comp_BranchImm();
+ void A_Comp_BranchXchangeReg();
+
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
@@ -91,6 +80,13 @@ private:
void T_Comp_PUSH_POP();
void T_Comp_LDMIA_STMIA();
+ void T_Comp_BCOND();
+ void T_Comp_B();
+ void T_Comp_BranchXchangeReg();
+ void T_Comp_BL_LONG_1();
+ void T_Comp_BL_LONG_2();
+ void T_Comp_BL_Merged(FetchedInstr prefix);
+
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
@@ -119,6 +115,8 @@ private:
void LoadCPSR();
void SaveCPSR();
+ Gen::FixupBranch CheckCondition(u32 cond);
+
Gen::OpArg MapReg(int reg)
{
if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index 20e1893..69b324c 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -462,38 +462,10 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
int regsCount = regs.Count();
- const u8 userModeOffsets[] =
- {
- offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
- offsetof(ARM, R[12]), offsetof(ARM, R[13]), offsetof(ARM, R[14]), 0,
-
- offsetof(ARM, R_FIQ[0]), offsetof(ARM, R_FIQ[1]), offsetof(ARM, R_FIQ[2]), offsetof(ARM, R_FIQ[3]),
- offsetof(ARM, R_FIQ[4]), offsetof(ARM, R_FIQ[5]), offsetof(ARM, R_FIQ[6]), 0,
-
- offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
- offsetof(ARM, R[12]), offsetof(ARM, R_IRQ[13]), offsetof(ARM, R_IRQ[14]), 0,
-
- offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
- offsetof(ARM, R[12]), offsetof(ARM, R_SVC[13]), offsetof(ARM, R_SVC[14]), 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-
- offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
- offsetof(ARM, R[12]), offsetof(ARM, R_ABT[13]), offsetof(ARM, R_ABT[14]), 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-
- offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
- offsetof(ARM, R[12]), offsetof(ARM, R_UND[13]), offsetof(ARM, R_UND[14]), 0,
- };
-
if (decrement)
{
MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4));
- preinc = !preinc;
+ preinc ^= true;
}
else
MOV(32, R(ABI_PARAM1), rb);
@@ -516,16 +488,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
if (regs[reg])
{
- if (usermode && reg >= 8 && reg < 15)
+ /*if (usermode && reg >= 8 && reg < 15)
{
MOV(32, R(RSCRATCH2), R(RCPSR));
AND(32, R(RSCRATCH2), Imm8(0x1F));
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
- MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
+ MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
POP(RSCRATCH);
MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH));
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else */if (RegCache.Mapping[reg] == INVALID_REG)
{
assert(reg != 15);
@@ -552,16 +524,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
for (int reg : regs)
{
- if (usermode && reg >= 8 && reg < 15)
+ /*if (usermode && reg >= 8 && reg < 15)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
- MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
+ MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH));
PUSH(RSCRATCH);
}
- else if (RegCache.Mapping[reg] == INVALID_REG)
+ else */if (RegCache.Mapping[reg] == INVALID_REG)
{
LoadReg(reg, RSCRATCH);
PUSH(RSCRATCH);
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index c519229..b8dff00 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -255,7 +255,7 @@ const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
-const u32 T_BLX_REG = T_BranchAlways | T_ReadR15 | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
+const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_B = T_BranchAlways | tk(tk_B);
const u32 T_BL_LONG_1 = T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_1);
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_2);
@@ -301,6 +301,10 @@ Info Decode(bool thumb, u32 num, u32 instr)
res.DstRegs |= (1 << 13);
if (data & T_ReadR15)
res.SrcRegs |= (1 << 15);
+ if (data & T_WriteR14)
+ res.DstRegs |= (1 << 14);
+ if (data & T_ReadR14)
+ res.SrcRegs |= (1 << 14);
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h
index dcd938b..51dcfa2 100644
--- a/src/ARM_InstrInfo.h
+++ b/src/ARM_InstrInfo.h
@@ -202,6 +202,7 @@ enum
tk_POP,
tk_LDMIA,
tk_STMIA,
+
tk_BCOND,
tk_BX,
tk_BLX_REG,
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index ae04ffb..75fa42c 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,6 +54,7 @@ add_library(core STATIC
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
+ ARMJIT_x64/ARMJIT_Branch.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp