aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ARM.cpp40
-rw-r--r--src/ARM.h13
-rw-r--r--src/ARMJIT.cpp50
-rw-r--r--src/ARMJIT_RegisterCache.h33
-rw-r--r--src/ARMJIT_x64/ARMJIT_Compiler.cpp7
-rw-r--r--src/ARMJIT_x64/ARMJIT_LoadStore.cpp16
-rw-r--r--src/ARM_InstrInfo.cpp28
-rw-r--r--src/ARM_InstrInfo.h2
-rw-r--r--src/Config.cpp2
-rw-r--r--src/Config.h1
-rw-r--r--src/NDS.cpp4
11 files changed, 153 insertions, 43 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index 1e75301..2f4aa90 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -159,7 +159,7 @@ void ARM::DoSavestate(Savestate* file)
file->Var32((u32*)&Cycles);
//file->Var32((u32*)&CyclesToRun);
- file->Var32(&Halted);
+ file->Var32(&StopExecution);
file->VarArray(R, 16*sizeof(u32));
file->Var32(&CPSR);
@@ -632,16 +632,21 @@ void ARMv5::ExecuteJIT()
NDS::ARM9Timestamp += Cycles;
Cycles = 0;
- if (IRQ) TriggerIRQ();
- if (Halted)
+ if (StopExecution)
{
- bool idleLoop = Halted & 0x20;
- Halted &= ~0x20;
- if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
+ if (IRQ)
+ TriggerIRQ();
+
+ if (Halted || IdleLoop)
{
- NDS::ARM9Timestamp = NDS::ARM9Target;
+ bool idleLoop = IdleLoop;
+ IdleLoop = 0;
+ if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
+ {
+ NDS::ARM9Timestamp = NDS::ARM9Target;
+ }
+ break;
}
- break;
}
}
@@ -769,16 +774,21 @@ void ARMv4::ExecuteJIT()
Cycles = 0;
// TODO optimize this shit!!!
- if (IRQ) TriggerIRQ();
- if (Halted)
+ if (StopExecution)
{
- bool idleLoop = Halted & 0x20;
- Halted &= ~0x20;
- if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
+ if (IRQ)
+ TriggerIRQ();
+
+ if (Halted || IdleLoop)
{
- NDS::ARM7Timestamp = NDS::ARM7Target;
+ bool idleLoop = IdleLoop;
+ IdleLoop = 0;
+ if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
+ {
+ NDS::ARM7Timestamp = NDS::ARM7Target;
+ }
+ break;
}
- break;
}
}
diff --git a/src/ARM.h b/src/ARM.h
index b36120a..96dd857 100644
--- a/src/ARM.h
+++ b/src/ARM.h
@@ -112,9 +112,16 @@ public:
u32 Num;
s32 Cycles;
- u32 Halted;
-
- u32 IRQ; // nonzero to trigger IRQ
+ union
+ {
+ struct
+ {
+ u8 Halted;
+ u8 IRQ; // nonzero to trigger IRQ
+ u8 IdleLoop;
+ };
+ u32 StopExecution;
+ };
u32 CodeRegion;
s32 CodeCycles;
diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp
index 19a5e70..0695b85 100644
--- a/src/ARMJIT.cpp
+++ b/src/ARMJIT.cpp
@@ -16,11 +16,13 @@
#include "GPU3D.h"
#include "SPU.h"
#include "Wifi.h"
+#include "NDSCart.h"
namespace ARMJIT
{
#define JIT_DEBUGPRINT(msg, ...)
+//#define JIT_DEBUGPRINT(msg, ...) printf(msg, ## __VA_ARGS__)
Compiler* compiler;
@@ -159,13 +161,17 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
}
}
-bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetAddr)
+bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
+ u32& linkAddr, u32& targetAddr)
{
if (thumb)
{
u32 r15 = instr.Addr + 4;
cond = 0xE;
+ link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG;
+ linkAddr = instr.Addr + 4;
+
if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12)))
{
targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9);
@@ -185,9 +191,18 @@ bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetA
targetAddr = r15 + offset;
return true;
}
+ else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14)
+ {
+ JIT_DEBUGPRINT("returning!\n");
+ targetAddr = lr;
+ return true;
+ }
}
else
{
+ link = instr.Info.Kind == ARMInstrInfo::ak_BL;
+ linkAddr = instr.Addr + 4;
+
cond = instr.Cond();
if (instr.Info.Kind == ARMInstrInfo::ak_BL
|| instr.Info.Kind == ARMInstrInfo::ak_B)
@@ -197,6 +212,12 @@ bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetA
targetAddr = r15 + offset;
return true;
}
+ else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14)
+ {
+ JIT_DEBUGPRINT("returning!\n");
+ targetAddr = lr;
+ return true;
+ }
}
return false;
}
@@ -351,6 +372,8 @@ void CompileBlock(ARM* cpu)
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
u32 lastSegmentStart = blockAddr;
+ u32 lr;
+ bool hasLink = false;
do
{
@@ -413,6 +436,9 @@ void CompileBlock(ARM* cpu)
cpu->CurInstr = instrs[i].Instr;
cpu->CodeCycles = instrs[i].CodeCycles;
+ if (instrs[i].Info.DstRegs & (1 << 14))
+ hasLink = false;
+
if (thumb)
{
InterpretTHUMB[instrs[i].Info.Kind](cpu);
@@ -452,8 +478,9 @@ void CompileBlock(ARM* cpu)
{
bool hasBranched = cpu->R[15] != r15;
- u32 cond, target;
- bool staticBranch = DecodeBranch(thumb, instrs[i], cond, target);
+ bool link;
+ u32 cond, target, linkAddr;
+ bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target);
JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched);
if (staticBranch)
@@ -474,18 +501,24 @@ void CompileBlock(ARM* cpu)
if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart)
{
// we might have an idle loop
- u32 offset = (target - blockAddr) / (thumb ? 2 : 4);
- if (IsIdleLoop(instrs + offset, i - offset + 1))
+ u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4);
+ if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1))
{
instrs[i].BranchFlags |= branch_IdleBranch;
JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr);
}
}
- else if (hasBranched && (!thumb || cond == 0xE) && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
+ else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
{
u32 targetPseudoPhysical = cpu->Num == 0
? TranslateAddr<0>(target)
: TranslateAddr<1>(target);
+
+ if (link)
+ {
+ lr = linkAddr;
+ hasLink = true;
+ }
r15 = target + (thumb ? 2 : 4);
assert(r15 == cpu->R[15]);
@@ -520,7 +553,7 @@ void CompileBlock(ARM* cpu)
bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken));
if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond)
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
- } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted);
+ } while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
@@ -713,6 +746,9 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
{
if ((addr & 0xFF000000) == 0x04000000)
{
+ if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
+ return (void*)NDSCart::ReadROMData;
+
/*
unfortunately we can't map GPU2D this way
since it's hidden inside an object
diff --git a/src/ARMJIT_RegisterCache.h b/src/ARMJIT_RegisterCache.h
index ed6a2b7..2222bc2 100644
--- a/src/ARMJIT_RegisterCache.h
+++ b/src/ARMJIT_RegisterCache.h
@@ -93,10 +93,12 @@ public:
void Prepare(bool thumb, int i)
{
+ FetchedInstr instr = Instrs[i];
+
if (LoadedRegs & (1 << 15))
UnloadRegister(15);
- BitSet16 invalidedLiterals(LiteralsLoaded & Instrs[i].Info.DstRegs);
+ BitSet16 invalidedLiterals(LiteralsLoaded & instr.Info.DstRegs);
for (int reg : invalidedLiterals)
UnloadLiteral(reg);
@@ -108,6 +110,7 @@ public:
{
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
futureNeeded |= regsNeeded.m_val;
+ regsNeeded &= BitSet16(~Instrs[j].Info.NotStrictlyNeeded);
for (int reg : regsNeeded)
ranking[reg]++;
}
@@ -117,8 +120,8 @@ public:
for (int reg : neverNeededAgain)
UnloadRegister(reg);
- FetchedInstr Instr = Instrs[i];
- u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs;
+ u16 necessaryRegs = ((instr.Info.SrcRegs & ~(1 << 15)) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
+ u16 writeRegs = instr.Info.DstRegs & ~instr.Info.NotStrictlyNeeded;
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
if (needToBeLoaded != BitSet16(0))
{
@@ -143,13 +146,31 @@ public:
loadedSet.m_val = LoadedRegs;
}
+ // we don't need to load a value which is always going to be overwritten
BitSet16 needValueLoaded(needToBeLoaded);
- if (thumb || Instr.Cond() >= 0xE)
- needValueLoaded = BitSet16(Instr.Info.SrcRegs);
+ if (thumb || instr.Cond() >= 0xE)
+ needValueLoaded = BitSet16(instr.Info.SrcRegs);
for (int reg : needToBeLoaded)
LoadRegister(reg, needValueLoaded[reg]);
+ }
+ {
+ BitSet16 loadedSet(LoadedRegs);
+ BitSet16 loadRegs(instr.Info.NotStrictlyNeeded & futureNeeded & ~LoadedRegs);
+ if (loadRegs && loadedSet.Count() < NativeRegsAvailable)
+ {
+ int left = NativeRegsAvailable - loadedSet.Count();
+ for (int reg : loadRegs)
+ {
+ if (left-- == 0)
+ break;
+
+ writeRegs |= (1 << reg) & instr.Info.DstRegs;
+ LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
+ }
+ }
}
- DirtyRegs |= Instr.Info.DstRegs & ~(1 << 15);
+
+ DirtyRegs |= writeRegs & ~(1 << 15);
}
static const Reg NativeRegAllocOrder[];
diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
index a994d34..fd38724 100644
--- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp
+++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp
@@ -364,7 +364,7 @@ void Compiler::Reset()
void Compiler::Comp_SpecialBranchBehaviour()
{
if (CurInstr.BranchFlags & branch_IdleBranch)
- OR(32, MDisp(RCPU, offsetof(ARM, Halted)), Imm8(0x20));
+ OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
{
@@ -402,6 +402,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
{
CurInstr = instrs[i];
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
+ CodeRegion = R15 >> 24;
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
@@ -571,8 +572,6 @@ void Compiler::Comp_AddCycles_CDI()
Comp_AddCycles_CD();
else
{
- IrregularCycles = true;
-
s32 cycles;
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
@@ -642,7 +641,7 @@ void Compiler::Comp_AddCycles_CD()
IrregularCycles = true;
}
- if (!Thumb && CurInstr.Cond() < 0xE)
+ if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
index eb01c87..3799774 100644
--- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
+++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp
@@ -1,5 +1,6 @@
#include "ARMJIT_Compiler.h"
+#include "../Config.h"
using namespace Gen;
@@ -290,7 +291,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (size == 16)
addressMask = ~1;
- if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
+ if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
Comp_MemLoadLiteral(size, rd, addr);
@@ -309,6 +310,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
OpArg rdMapped = MapReg(rd);
OpArg rnMapped = MapReg(rn);
+ if (Thumb && rn == 15)
+ rnMapped = Imm32(R15 & ~0x2);
bool inlinePreparation = Num == 1;
u32 constLocalROR32 = 4;
@@ -317,7 +320,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
- if ((rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
+ if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
{
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
@@ -749,9 +752,12 @@ void Compiler::T_Comp_MemImmHalf()
void Compiler::T_Comp_LoadPCRel()
{
- u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
-
- Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
+ u32 offset = (CurInstr.Instr & 0xFF) << 2;
+ u32 addr = (R15 & ~0x2) + offset;
+ if (Config::JIT_LiteralOptimisations)
+ Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
+ else
+ Comp_MemAccess(CurInstr.T_Reg(8), 15, ComplexOperand(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp
index 1261bbe..8f8bd35 100644
--- a/src/ARM_InstrInfo.cpp
+++ b/src/ARM_InstrInfo.cpp
@@ -365,6 +365,21 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
res.SpecialKind = special_LoadLiteral;
+ if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
+ {
+ u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
+ res.NotStrictlyNeeded |= set;
+ res.DstRegs |= set;
+ }
+ if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
+ {
+ u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
+ if (res.Kind == tk_PUSH && instr & (1 << 8))
+ set |= (1 << 14);
+ res.NotStrictlyNeeded |= set;
+ res.SrcRegs |= set;
+ }
+
res.EndBlock |= res.Branches();
if (res.Kind == tk_BCOND)
@@ -466,6 +481,19 @@ Info Decode(bool thumb, u32 num, u32 instr)
if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral;
+
+ if (res.Kind == ak_LDM)
+ {
+ u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
+ res.DstRegs |= set;
+ res.NotStrictlyNeeded |= set;
+ }
+ if (res.Kind == ak_STM)
+ {
+ u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
+ res.SrcRegs |= set;
+ res.NotStrictlyNeeded |= set;
+ }
if ((instr >> 28) < 0xE)
{
diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h
index c032a4f..2732181 100644
--- a/src/ARM_InstrInfo.h
+++ b/src/ARM_InstrInfo.h
@@ -236,7 +236,7 @@ enum
struct Info
{
- u16 DstRegs, SrcRegs;
+ u16 DstRegs, SrcRegs, NotStrictlyNeeded;
u16 Kind;
u8 SpecialKind;
diff --git a/src/Config.cpp b/src/Config.cpp
index c117a41..a7d78cd 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -41,6 +41,7 @@ char DSiNANDPath[1024];
bool JIT_Enable = false;
int JIT_MaxBlockSize = 12;
bool JIT_BrancheOptimisations = true;
+bool JIT_LiteralOptimisations = true;
#endif
ConfigEntry ConfigFile[] =
@@ -58,6 +59,7 @@ ConfigEntry ConfigFile[] =
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
+ {"JIT_BrancheOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
#endif
{"", -1, NULL, 0, NULL, 0}
diff --git a/src/Config.h b/src/Config.h
index c9013aa..1fcd9bb 100644
--- a/src/Config.h
+++ b/src/Config.h
@@ -55,6 +55,7 @@ extern char DSiNANDPath[1024];
extern bool JIT_Enable;
extern int JIT_MaxBlockSize;
extern bool JIT_BrancheOptimisations;
+extern bool JIT_LiteralOptimisations;
#endif
}
diff --git a/src/NDS.cpp b/src/NDS.cpp
index 0cfbd1a..7b6a450 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -1211,9 +1211,9 @@ void UpdateIRQ(u32 cpu)
if (IME[cpu] & 0x1)
{
- arm->IRQ = IE[cpu] & IF[cpu];
+ arm->IRQ = !!(IE[cpu] & IF[cpu]);
if ((ConsoleType == 1) && cpu)
- arm->IRQ |= (IE2 & IF2);
+ arm->IRQ |= !!(IE2 & IF2);
}
else
{