diff options
-rw-r--r-- | src/ARM.cpp | 312 | ||||
-rw-r--r-- | src/ARM.h | 369 | ||||
-rw-r--r-- | src/ARMInterpreter.cpp | 13 | ||||
-rw-r--r-- | src/ARMInterpreter_ALU.cpp | 199 | ||||
-rw-r--r-- | src/ARMInterpreter_Branch.cpp | 3 | ||||
-rw-r--r-- | src/ARMInterpreter_LoadStore.cpp | 236 | ||||
-rw-r--r-- | src/CP15.cpp | 174 | ||||
-rw-r--r-- | src/CP15.h | 5 | ||||
-rw-r--r-- | src/DMA.cpp | 16 | ||||
-rw-r--r-- | src/GPU3D.cpp | 10 | ||||
-rw-r--r-- | src/NDS.cpp | 604 | ||||
-rw-r--r-- | src/NDS.h | 89 | ||||
-rw-r--r-- | src/SPU.cpp | 2 | ||||
-rw-r--r-- | src/libui_sdl/main.cpp | 4 |
14 files changed, 1399 insertions, 637 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index d16e193..64196f6 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -22,6 +22,20 @@ #include "ARMInterpreter.h" +// instruction timing notes +// +// * simple instruction: 1S (code) +// * LDR: 1N+1N+1I (code/data/internal) +// * STR: 1N+1N (code/data) +// * LDM: 1N+1N+(n-1)S+1I +// * STM: 1N+1N+(n-1)S +// * MUL/etc: 1N+xI (code/internal) +// * branch: 1N+1S (code/code) (pipeline refill) +// +// MUL/MLA seems to take 1I on ARM9 + + + u32 ARM::ConditionTable[16] = { 0xF0F0, // EQ @@ -49,97 +63,6 @@ ARM::ARM(u32 num) Num = num; SetClockShift(0); // safe default - - for (int i = 0; i < 16; i++) - { - Waitstates[0][i] = 1; - Waitstates[1][i] = 1; - Waitstates[2][i] = 1; - Waitstates[3][i] = 1; - } - - if (!num) - { - // ARM9 - Waitstates[0][0x2] = 1; // main RAM timing, assuming cache hit - Waitstates[0][0x3] = 4; - Waitstates[0][0x4] = 4; - Waitstates[0][0x5] = 5; - Waitstates[0][0x6] = 5; - Waitstates[0][0x7] = 4; - Waitstates[0][0x8] = 19; - Waitstates[0][0x9] = 19; - Waitstates[0][0xF] = 4; - - Waitstates[1][0x2] = 1; - Waitstates[1][0x3] = 8; - Waitstates[1][0x4] = 8; - Waitstates[1][0x5] = 10; - Waitstates[1][0x6] = 10; - Waitstates[1][0x7] = 8; - Waitstates[1][0x8] = 38; - Waitstates[1][0x9] = 38; - Waitstates[1][0xF] = 8; - - Waitstates[2][0x2] = 1; - Waitstates[2][0x3] = 2; - Waitstates[2][0x4] = 2; - Waitstates[2][0x5] = 2; - Waitstates[2][0x6] = 2; - Waitstates[2][0x7] = 2; - Waitstates[2][0x8] = 12; - Waitstates[2][0x9] = 12; - Waitstates[2][0xA] = 20; - Waitstates[2][0xF] = 2; - - Waitstates[3][0x2] = 1; - Waitstates[3][0x3] = 2; - Waitstates[3][0x4] = 2; - Waitstates[3][0x5] = 4; - Waitstates[3][0x6] = 4; - Waitstates[3][0x7] = 2; - Waitstates[3][0x8] = 24; - Waitstates[3][0x9] = 24; - Waitstates[3][0xA] = 20; - Waitstates[3][0xF] = 2; - } - else - { - // ARM7 - Waitstates[0][0x0] = 1; - Waitstates[0][0x2] = 1; - Waitstates[0][0x3] = 1; - Waitstates[0][0x4] = 1; - Waitstates[0][0x6] = 1; - Waitstates[0][0x8] = 6; - Waitstates[0][0x9] = 6; - - Waitstates[1][0x0] = 1; - Waitstates[1][0x2] = 2; - Waitstates[1][0x3] = 1; - Waitstates[1][0x4] = 1; - Waitstates[1][0x6] = 2; - Waitstates[1][0x8] = 12; - Waitstates[1][0x9] = 12; - - Waitstates[2][0x0] = 1; - Waitstates[2][0x2] = 1; - Waitstates[2][0x3] = 1; - Waitstates[2][0x4] = 1; - Waitstates[2][0x6] = 1; - Waitstates[2][0x8] = 6; - Waitstates[2][0x9] = 6; - Waitstates[2][0xA] = 10; - - Waitstates[3][0x0] = 1; - Waitstates[3][0x2] = 2; - Waitstates[3][0x3] = 1; - Waitstates[3][0x4] = 1; - Waitstates[3][0x6] = 2; - Waitstates[3][0x8] = 12; - Waitstates[3][0x9] = 12; - Waitstates[3][0xA] = 10; - } } ARM::~ARM() @@ -147,6 +70,16 @@ ARM::~ARM() // dorp } +ARMv5::ARMv5() : ARM(0) +{ + // +} + +ARMv4::ARMv4() : ARM(1) +{ + // +} + void ARM::Reset() { Cycles = 0; @@ -165,6 +98,13 @@ void ARM::Reset() JumpTo(ExceptionBase); } +void ARMv5::Reset() +{ + ARM::Reset(); + CP15Reset(); +} + + void ARM::DoSavestate(Savestate* file) { file->Section((char*)(Num ? "ARM7" : "ARM9")); @@ -189,14 +129,29 @@ void ARM::DoSavestate(Savestate* file) SetupCodeMem(R[15]); // should fix it } +void ARMv5::DoSavestate(Savestate* file) +{ + ARM::DoSavestate(file); + CP15DoSavestate(file); +} + + +void ARMv5::CalculateTimings() +{ + // +} + +void ARMv4::CalculateTimings() +{ + // +} + + void ARM::SetupCodeMem(u32 addr) { if (!Num) { - if (CP15::GetCodeMemRegion(addr, &CodeMem)) - return; - - NDS::ARM9GetMemRegion(addr, false, &CodeMem); + ((ARMv5*)this)->GetCodeMemRegion(addr, &CodeMem); } else { @@ -204,7 +159,7 @@ void ARM::SetupCodeMem(u32 addr) } } -void ARM::JumpTo(u32 addr, bool restorecpsr) +void ARMv5::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) { @@ -221,16 +176,69 @@ void ARM::JumpTo(u32 addr, bool restorecpsr) u32 oldregion = R[15] >> 23; u32 newregion = addr >> 23; -//if(!Num)printf("ARM%c branch from %08X to %08X. %03X->%03X\n", Num?'7':'9', R[15], addr, oldregion, newregion); + + if (addr & 0x1) + { + addr &= ~0x1; + R[15] = addr+2; + + //if (newregion != oldregion) SetupCodeMem(addr); + + // two-opcodes-at-once fetch + // doesn't matter if we put garbage in the MSbs there + if (addr & 0x2) + { + NextInstr[0] = CodeRead32(addr-2) >> 16; + NextInstr[1] = CodeRead32(addr+2); + Cycles += NDS::ARM9MemTimings[CodeRegion][2] * 2; + } + else + { + NextInstr[0] = CodeRead32(addr); + NextInstr[1] = NextInstr[0] >> 16; + Cycles += NDS::ARM9MemTimings[CodeRegion][2]; + } + + CPSR |= 0x20; + } + else + { + addr &= ~0x3; + R[15] = addr+4; + + //if (newregion != oldregion) SetupCodeMem(addr); + + NextInstr[0] = CodeRead32(addr); + NextInstr[1] = CodeRead32(addr+4); + Cycles += NDS::ARM9MemTimings[CodeRegion][2] * 2; + + CPSR &= ~0x20; + } +} + +void ARMv4::JumpTo(u32 addr, bool restorecpsr) +{ + if (restorecpsr) + { + RestoreCPSR(); + + if (CPSR & 0x20) addr |= 0x1; + else addr &= ~0x1; + } + + u32 oldregion = R[15] >> 23; + u32 newregion = addr >> 23; + if (addr & 0x1) { addr &= ~0x1; R[15] = addr+2; - if (newregion != oldregion) SetupCodeMem(addr); + //if (newregion != oldregion) SetupCodeMem(addr); NextInstr[0] = CodeRead16(addr); NextInstr[1] = CodeRead16(addr+2); + Cycles += NDS::ARM7MemTimings[CodeRegion][0] + NDS::ARM7MemTimings[CodeRegion][1]; CPSR |= 0x20; } @@ -239,10 +247,11 @@ void ARM::JumpTo(u32 addr, bool restorecpsr) addr &= ~0x3; R[15] = addr+4; - if (newregion != oldregion) SetupCodeMem(addr); + //if (newregion != oldregion) SetupCodeMem(addr); NextInstr[0] = CodeRead32(addr); NextInstr[1] = CodeRead32(addr+4); + Cycles += NDS::ARM7MemTimings[CodeRegion][2] + NDS::ARM7MemTimings[CodeRegion][3]; CPSR &= ~0x20; } @@ -373,7 +382,7 @@ void ARM::TriggerIRQ() JumpTo(ExceptionBase + 0x18); } -s32 ARM::Execute() +s32 ARMv5::Execute() { if (Halted) { @@ -381,19 +390,16 @@ s32 ARM::Execute() { Halted = 0; } - else if (NDS::HaltInterrupted(Num)) + else if (NDS::HaltInterrupted(0)) { Halted = 0; - if (NDS::IME[Num] & 0x1) + if (NDS::IME[0] & 0x1) TriggerIRQ(); } else { Cycles = CyclesToRun; - - if (Num == 0) NDS::RunTimingCriticalDevices(0, CyclesToRun >> 1); - else NDS::RunTimingCriticalDevices(1, CyclesToRun); - + NDS::RunTimingCriticalDevices(0, CyclesToRun >> ClockShift); return Cycles; } } @@ -409,10 +415,11 @@ s32 ARM::Execute() R[15] += 2; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15]); + if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeRegion = NDS::Region9_MAX; } + else NextInstr[1] = CodeRead32(R[15]); // actually execute - u32 icode = (CurInstr >> 6); + u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); } else @@ -433,10 +440,12 @@ s32 ARM::Execute() { ARMInterpreter::A_BLX_IMM(this); } + else + AddCycles_C(); } s32 diff = Cycles - lastcycles; - NDS::RunTimingCriticalDevices(Num, diff >> ClockShift); + NDS::RunTimingCriticalDevices(0, diff >> ClockShift); lastcycles = Cycles - (diff & ClockDiffMask); // TODO optimize this shit!!! @@ -446,9 +455,90 @@ s32 ARM::Execute() Cycles = CyclesToRun; break; } - if (NDS::IF[Num] & NDS::IE[Num]) + if (NDS::IF[0] & NDS::IE[0]) + { + if (NDS::IME[0] & 0x1) + TriggerIRQ(); + } + } + + if (Halted == 2) + Halted = 0; + + return Cycles; +} + +s32 ARMv4::Execute() +{ + if (Halted) + { + if (Halted == 2) + { + Halted = 0; + } + else if (NDS::HaltInterrupted(1)) + { + Halted = 0; + if (NDS::IME[1] & 0x1) + TriggerIRQ(); + } + else + { + Cycles = CyclesToRun; + NDS::RunTimingCriticalDevices(1, CyclesToRun); + return Cycles; + } + } + + Cycles = 0; + s32 lastcycles = 0; + + while (Cycles < CyclesToRun) + { + if (CPSR & 0x20) // THUMB + { + // prefetch + R[15] += 2; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead16(R[15]); + + // actually execute + u32 icode = (CurInstr >> 6); + ARMInterpreter::THUMBInstrTable[icode](this); + } + else + { + // prefetch + R[15] += 4; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead32(R[15]); + + // actually execute + if (CheckCondition(CurInstr >> 28)) + { + u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); + ARMInterpreter::ARMInstrTable[icode](this); + } + else + AddCycles_C(); + } + + s32 diff = Cycles - lastcycles; + NDS::RunTimingCriticalDevices(1, diff); + lastcycles = Cycles; + + // TODO optimize this shit!!! + if (Halted) + { + if (Halted == 1) + Cycles = CyclesToRun; + break; + } + if (NDS::IF[1] & NDS::IE[1]) { - if (NDS::IME[Num] & 0x1) + if (NDS::IME[1] & 0x1) TriggerIRQ(); } } @@ -19,24 +19,29 @@ #ifndef ARM_H #define ARM_H +#include <algorithm> + #include "types.h" #include "NDS.h" #include "CP15.h" -// lame -#define C_S(x) x -#define C_N(x) x -#define C_I(x) x - #define ROR(x, n) (((x) >> (n)) | ((x) << (32-(n)))) +enum +{ + RWFlags_Nonseq = (1<<5), + RWFlags_ForceUser = (1<<21), +}; + class ARM { public: ARM(u32 num); ~ARM(); // destroy shit - void Reset(); + virtual void Reset(); + + virtual void DoSavestate(Savestate* file); void SetClockShift(u32 shift) { @@ -44,9 +49,9 @@ public: ClockDiffMask = (1<<shift) - 1; } - void DoSavestate(Savestate* file); + virtual void CalculateTimings() = 0; - void JumpTo(u32 addr, bool restorecpsr = false); + virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; void RestoreCPSR(); void Halt(u32 halt) @@ -55,6 +60,7 @@ public: Halted = halt; } + // TODO: is this actually used?? void CheckIRQ() { if (!(NDS::IME[Num] & 0x1)) return; @@ -64,7 +70,7 @@ public: } } - s32 Execute(); + virtual s32 Execute() = 0; bool CheckCondition(u32 code) { @@ -102,167 +108,296 @@ public: void SetupCodeMem(u32 addr); - u16 CodeRead16(u32 addr) - { - Cycles += Waitstates[0][(addr>>24)&0xF]; + virtual bool DataRead8(u32 addr, u32* val, u32 flags) = 0; + virtual bool DataRead16(u32 addr, u32* val, u32 flags) = 0; + virtual bool DataRead32(u32 addr, u32* val, u32 flags) = 0; + virtual bool DataWrite8(u32 addr, u8 val, u32 flags) = 0; + virtual bool DataWrite16(u32 addr, u16 val, u32 flags) = 0; + virtual bool DataWrite32(u32 addr, u32 val, u32 flags) = 0; - if (CodeMem.Mem) return *(u16*)&CodeMem.Mem[addr & CodeMem.Mask]; + virtual void AddCycles_C() = 0; + virtual void AddCycles_CI(s32 num) = 0; + virtual void AddCycles_CDI() = 0; + virtual void AddCycles_CD() = 0; - u16 val; - // TODO eventually: on ARM9, THUMB opcodes are prefetched with 32bit reads - // probably not worth going through the trouble. we can probably just simulate - // the timing quirks resulting from this. or not. - if (!Num) - { - if (!CP15::HandleCodeRead16(addr, &val)) - val = NDS::ARM9Read16(addr); - } - else - val = NDS::ARM7Read16(addr); - return val; + u32 Num; + + // shift relative to system clock + // 0=33MHz 1=66MHz 2=133MHz + u32 ClockShift; + u32 ClockDiffMask; + + s32 Cycles; + s32 CyclesToRun; + u32 Halted; + + int CodeRegion; + + int DataRegion; + s32 DataCycles; + + u32 R[16]; // heh + u32 CPSR; + u32 R_FIQ[8]; // holding SPSR too + u32 R_SVC[3]; + u32 R_ABT[3]; + u32 R_IRQ[3]; + u32 R_UND[3]; + u32 CurInstr; + u32 NextInstr[2]; + + u32 ExceptionBase; + + NDS::MemRegion CodeMem; + + static u32 ConditionTable[16]; +}; + +class ARMv5 : public ARM +{ +public: + ARMv5(); + + void Reset(); + + void DoSavestate(Savestate* file); + + void CalculateTimings(); + + void JumpTo(u32 addr, bool restorecpsr = false); + + s32 Execute(); + + // all code accesses are forced nonseq 32bit + u32 CodeRead32(u32 addr); + + bool DataRead8(u32 addr, u32* val, u32 flags); + bool DataRead16(u32 addr, u32* val, u32 flags); + bool DataRead32(u32 addr, u32* val, u32 flags); + bool DataWrite8(u32 addr, u8 val, u32 flags); + bool DataWrite16(u32 addr, u16 val, u32 flags); + bool DataWrite32(u32 addr, u32 val, u32 flags); + + void AddCycles_C() + { + // code only. always nonseq 32-bit for ARM9. + Cycles += NDS::ARM9MemTimings[CodeRegion][2]; } - u32 CodeRead32(u32 addr) + void AddCycles_CI(s32 num) { - Cycles += Waitstates[1][(addr>>24)&0xF]; + // code+internal + Cycles += NDS::ARM9MemTimings[CodeRegion][2] + num; + } - if (CodeMem.Mem) return *(u32*)&CodeMem.Mem[addr & CodeMem.Mask]; + void AddCycles_CDI() + { + // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. + // TODO: ITCM data fetches shouldn't be parallelized, they say + s32 numC = NDS::ARM9MemTimings[CodeRegion][2]; + s32 numD = DataCycles; - u32 val; - if (!Num) - { - if (!CP15::HandleCodeRead32(addr, &val)) - val = NDS::ARM9Read32(addr); - } + if (DataRegion != CodeRegion) + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); else - val = NDS::ARM7Read32(addr); + Cycles += numC + numD; + } + + void AddCycles_CD() + { + // TODO: ITCM data fetches shouldn't be parallelized, they say + s32 numC = NDS::ARM9MemTimings[CodeRegion][2]; + s32 numD = DataCycles; + + if (DataRegion != CodeRegion) + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); + else + Cycles += numC + numD; + } + + void GetCodeMemRegion(u32 addr, NDS::MemRegion* region); + + void CP15Reset(); + void CP15DoSavestate(Savestate* file); + + void UpdateDTCMSetting(); + void UpdateITCMSetting(); + + void CP15Write(u32 id, u32 val); + u32 CP15Read(u32 id); + + u32 CP15Control; - return val; + u32 DTCMSetting, ITCMSetting; + + u8 ITCM[0x8000]; + u32 ITCMSize; + u8 DTCM[0x4000]; + u32 DTCMBase, DTCMSize; +}; + +class ARMv4 : public ARM +{ +public: + ARMv4(); + + void CalculateTimings(); + + void JumpTo(u32 addr, bool restorecpsr = false); + + s32 Execute(); + + u16 CodeRead16(u32 addr) + { + u32 ret; + CodeRegion = NDS::ARM7Read16(addr, &ret); + return ret; } + u32 CodeRead32(u32 addr) + { + u32 ret; + CodeRegion = NDS::ARM7Read32(addr, &ret); + return ret; + } - u8 DataRead8(u32 addr, u32 forceuser=0) + bool DataRead8(u32 addr, u32* val, u32 flags) { - u8 val; - if (!Num) - { - if (!CP15::HandleDataRead8(addr, &val, forceuser)) - val = NDS::ARM9Read8(addr); - } + DataRegion = NDS::ARM7Read8(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][0]; else - val = NDS::ARM7Read8(addr); + DataCycles += NDS::ARM7MemTimings[DataRegion][1]; - Cycles += Waitstates[2][(addr>>24)&0xF]; - return val; + return true; } - u16 DataRead16(u32 addr, u32 forceuser=0) + bool DataRead16(u32 addr, u32* val, u32 flags) { - u16 val; addr &= ~1; - if (!Num) - { - if (!CP15::HandleDataRead16(addr, &val, forceuser)) - val = NDS::ARM9Read16(addr); - } + + DataRegion = NDS::ARM7Read16(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][0]; else - val = NDS::ARM7Read16(addr); + DataCycles += NDS::ARM7MemTimings[DataRegion][1]; - Cycles += Waitstates[2][(addr>>24)&0xF]; - return val; + return true; } - u32 DataRead32(u32 addr, u32 forceuser=0) + bool DataRead32(u32 addr, u32* val, u32 flags) { - u32 val; addr &= ~3; - if (!Num) - { - if (!CP15::HandleDataRead32(addr, &val, forceuser)) - val = NDS::ARM9Read32(addr); - } + + DataRegion = NDS::ARM7Read32(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][2]; else - val = NDS::ARM7Read32(addr); + DataCycles += NDS::ARM7MemTimings[DataRegion][3]; - Cycles += Waitstates[3][(addr>>24)&0xF]; - return val; + return true; } - void DataWrite8(u32 addr, u8 val, u32 forceuser=0) + bool DataWrite8(u32 addr, u8 val, u32 flags) { - if (!Num) - { - if (!CP15::HandleDataWrite8(addr, val, forceuser)) - NDS::ARM9Write8(addr, val); - } + DataRegion = NDS::ARM7Write8(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][0]; else - NDS::ARM7Write8(addr, val); + DataCycles += NDS::ARM7MemTimings[DataRegion][1]; - Cycles += Waitstates[2][(addr>>24)&0xF]; + return true; } - void DataWrite16(u32 addr, u16 val, u32 forceuser=0) + bool DataWrite16(u32 addr, u16 val, u32 flags) { addr &= ~1; - if (!Num) - { - if (!CP15::HandleDataWrite16(addr, val, forceuser)) - NDS::ARM9Write16(addr, val); - } + + DataRegion = NDS::ARM7Write16(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][0]; else - NDS::ARM7Write16(addr, val); + DataCycles += NDS::ARM7MemTimings[DataRegion][1]; - Cycles += Waitstates[2][(addr>>24)&0xF]; + return true; } - void DataWrite32(u32 addr, u32 val, u32 forceuser=0) + bool DataWrite32(u32 addr, u32 val, u32 flags) { addr &= ~3; - if (!Num) - { - if (!CP15::HandleDataWrite32(addr, val, forceuser)) - NDS::ARM9Write32(addr, val); - } + + DataRegion = NDS::ARM7Write32(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM7MemTimings[DataRegion][2]; else - NDS::ARM7Write32(addr, val); + DataCycles += NDS::ARM7MemTimings[DataRegion][3]; - Cycles += Waitstates[3][(addr>>24)&0xF]; + return true; } - u32 Num; - - // shift relative to system clock - // 0=33MHz 1=66MHz 2=133MHz - u32 ClockShift; - u32 ClockDiffMask; - - // waitstates: - // 0=code16 1=code32 2=data16 3=data32 - // TODO eventually: nonsequential waitstates - // TODO NOT MAKE THIS A FUCKING GROSS HACK!!!!!! - s32 Waitstates[4][16]; + void AddCycles_C() + { + // code only. this code fetch is sequential. + Cycles += NDS::ARM7MemTimings[CodeRegion][(CPSR&0x20)?1:3]; + } - s32 Cycles; - s32 CyclesToRun; - u32 Halted; + void AddCycles_CI(s32 num) + { + // code+internal. results in a nonseq code fetch. + Cycles += NDS::ARM7MemTimings[CodeRegion][(CPSR&0x20)?0:2] + num; + } - u32 R[16]; // heh - u32 CPSR; - u32 R_FIQ[8]; // holding SPSR too - u32 R_SVC[3]; - u32 R_ABT[3]; - u32 R_IRQ[3]; - u32 R_UND[3]; - u32 CurInstr; - u32 NextInstr[2]; + void AddCycles_CDI() + { + // LDR/LDM cycles. + s32 numC = NDS::ARM7MemTimings[CodeRegion][(CPSR&0x20)?0:2]; + s32 numD = DataCycles; - u32 ExceptionBase; + if (DataRegion == NDS::Region7_MainRAM) + { + if (CodeRegion == NDS::Region7_MainRAM) + Cycles += numC + numD; + else + { + numC++; + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + } + else if (CodeRegion == NDS::Region7_MainRAM) + { + numD++; + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + Cycles += numC + numD + 1; + } + } - NDS::MemRegion CodeMem; + void AddCycles_CD() + { + // TODO: max gain should be 5c when writing to mainRAM + s32 numC = NDS::ARM7MemTimings[CodeRegion][(CPSR&0x20)?0:2]; + s32 numD = DataCycles; - static u32 ConditionTable[16]; + if (DataRegion == NDS::Region7_MainRAM) + { + if (CodeRegion == NDS::Region7_MainRAM) + Cycles += numC + numD; + else + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else if (CodeRegion == NDS::Region7_MainRAM) + { + Cycles += std::max(numC + numD - 3, std::max(numC, numD)); + } + else + { + Cycles += numC + numD; + } + } }; namespace ARMInterpreter diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 32b5658..2ec9bfc 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -98,6 +98,8 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); + + cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) @@ -138,6 +140,8 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); + + cpu->AddCycles_C(); } void A_MRS(ARM* cpu) @@ -159,6 +163,7 @@ void A_MRS(ARM* cpu) psr = cpu->CPSR; cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; + cpu->AddCycles_C(); } @@ -172,7 +177,7 @@ void A_MCR(ARM* cpu) if (cpu->Num==0 && cp==15) { - CP15::Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]); + ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]); } else if (cpu->Num==1 && cp==14) { @@ -184,7 +189,7 @@ void A_MCR(ARM* cpu) return A_UNK(cpu); // TODO: check what kind of exception it really is } - cpu->Cycles += 2; // TODO: checkme + cpu->AddCycles_CI(1 + 1); // TODO: checkme } void A_MRC(ARM* cpu) @@ -197,7 +202,7 @@ void A_MRC(ARM* cpu) if (cpu->Num==0 && cp==15) { - cpu->R[(cpu->CurInstr>>12)&0xF] = CP15::Read((cn<<8)|(cm<<4)|cpinfo); + cpu->R[(cpu->CurInstr>>12)&0xF] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); } else if (cpu->Num==1 && cp==14) { @@ -209,7 +214,7 @@ void A_MRC(ARM* cpu) return A_UNK(cpu); // TODO: check what kind of exception it really is } - cpu->Cycles += 3; // TODO: checkme + cpu->AddCycles_CI(2 + 1); // TODO: checkme } diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 58bf94d..9bfcbd1 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -282,7 +282,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ #define A_AND(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -297,7 +297,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -313,7 +313,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) #define A_EOR(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -328,7 +328,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) #define A_SUB(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -361,7 +361,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) !res, \ CARRY_SUB(a, b), \ OVERFLOW_SUB(a, b, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -377,7 +377,7 @@ A_IMPLEMENT_ALU_OP(SUB,) #define A_RSB(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -394,7 +394,7 @@ A_IMPLEMENT_ALU_OP(SUB,) !res, \ CARRY_SUB(b, a), \ OVERFLOW_SUB(b, a, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -410,7 +410,7 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -427,7 +427,7 @@ A_IMPLEMENT_ALU_OP(RSB,) !res, \ CARRY_ADD(a, b), \ OVERFLOW_ADD(a, b, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -443,7 +443,7 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -462,7 +462,7 @@ A_IMPLEMENT_ALU_OP(ADD,) !res, \ CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), \ OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -478,7 +478,7 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -497,7 +497,7 @@ A_IMPLEMENT_ALU_OP(ADC,) !res, \ CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), \ OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -513,7 +513,7 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -532,7 +532,7 @@ A_IMPLEMENT_ALU_OP(SBC,) !res, \ CARRY_SUB(b, a) & CARRY_SUB(res_tmp, carry), \ OVERFLOW_SUB(b, a, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -550,7 +550,7 @@ A_IMPLEMENT_ALU_OP(RSC,) u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TST,_S) @@ -560,7 +560,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TEQ,_S) @@ -572,7 +572,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) !res, \ CARRY_SUB(a, b), \ OVERFLOW_SUB(a, b, res)); \ - cpu->Cycles += c; + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMP,) @@ -584,7 +584,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) !res, \ CARRY_ADD(a, b), \ OVERFLOW_ADD(a, b, res)); \ - cpu->Cycles += c; + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMN,) @@ -592,7 +592,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) #define A_ORR(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -607,7 +607,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) u32 res = a | b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -621,7 +621,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S) #define A_MOV(c) \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(b); \ @@ -634,7 +634,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S) #define A_MOV_S(c) \ cpu->SetNZ(b & 0x80000000, \ !b); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(b, true); \ @@ -650,7 +650,7 @@ A_IMPLEMENT_ALU_OP(MOV,_S) #define A_BIC(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res); \ @@ -665,7 +665,7 @@ A_IMPLEMENT_ALU_OP(MOV,_S) u32 res = a & ~b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(res, true); \ @@ -680,7 +680,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S) #define A_MVN(c) \ b = ~b; \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(b); \ @@ -694,7 +694,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S) b = ~b; \ cpu->SetNZ(b & 0x80000000, \ !b); \ - cpu->Cycles += c; \ + if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ cpu->JumpTo(b, true); \ @@ -724,12 +724,17 @@ void A_MUL(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; - else cycles = 4; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; + else cycles = 4; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_MLA(ARM* cpu) @@ -749,12 +754,17 @@ void A_MLA(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; - else cycles = 5; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_UMULL(ARM* cpu) @@ -774,12 +784,17 @@ void A_UMULL(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; - else cycles = 5; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; + else cycles = 5; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_UMLAL(ARM* cpu) @@ -802,12 +817,17 @@ void A_UMLAL(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; - else cycles = 5; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; + else cycles = 5; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_SMULL(ARM* cpu) @@ -827,12 +847,17 @@ void A_SMULL(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; - else cycles = 5; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_SMLAL(ARM* cpu) @@ -855,12 +880,17 @@ void A_SMLAL(ARM* cpu) } u32 cycles; - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; - else cycles = 5; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void A_SMLAxy(ARM* cpu) @@ -882,6 +912,8 @@ void A_SMLAxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OVERFLOW_ADD(res_mul, rn, res)) cpu->CPSR |= 0x08000000; + + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLAWy(ARM* cpu) @@ -901,6 +933,8 @@ void A_SMLAWy(ARM* cpu) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OVERFLOW_ADD(res_mul, rn, res)) cpu->CPSR |= 0x08000000; + + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULxy(ARM* cpu) @@ -918,6 +952,7 @@ void A_SMULxy(ARM* cpu) u32 res = ((s16)rm * (s16)rs); cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULWy(ARM* cpu) @@ -933,6 +968,7 @@ void A_SMULWy(ARM* cpu) u32 res = ((s64)(s32)rm * (s16)rs) >> 16; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLALxy(ARM* cpu) @@ -955,7 +991,7 @@ void A_SMLALxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->Cycles += 1; + cpu->AddCycles_CI(1); // TODO: interlock?? } @@ -981,6 +1017,7 @@ void A_CLZ(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); } void A_QADD(ARM* cpu) @@ -998,6 +1035,7 @@ void A_QADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } void A_QSUB(ARM* cpu) @@ -1015,6 +1053,7 @@ void A_QSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDADD(ARM* cpu) @@ -1040,6 +1079,7 @@ void A_QDADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDSUB(ARM* cpu) @@ -1065,6 +1105,7 @@ void A_QDSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } @@ -1081,6 +1122,7 @@ void T_LSL_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + cpu->AddCycles_C(); } void T_LSR_IMM(ARM* cpu) @@ -1091,6 +1133,7 @@ void T_LSR_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + cpu->AddCycles_C(); } void T_ASR_IMM(ARM* cpu) @@ -1101,6 +1144,7 @@ void T_ASR_IMM(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = op; cpu->SetNZ(op & 0x80000000, !op); + cpu->AddCycles_C(); } void T_ADD_REG_(ARM* cpu) @@ -1113,6 +1157,7 @@ void T_ADD_REG_(ARM* cpu) !res, CARRY_ADD(a, b), OVERFLOW_ADD(a, b, res)); + cpu->AddCycles_C(); } void T_SUB_REG_(ARM* cpu) @@ -1125,6 +1170,7 @@ void T_SUB_REG_(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } void T_ADD_IMM_(ARM* cpu) @@ -1137,6 +1183,7 @@ void T_ADD_IMM_(ARM* cpu) !res, CARRY_ADD(a, b), OVERFLOW_ADD(a, b, res)); + cpu->AddCycles_C(); } void T_SUB_IMM_(ARM* cpu) @@ -1149,6 +1196,7 @@ void T_SUB_IMM_(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } void T_MOV_IMM(ARM* cpu) @@ -1157,6 +1205,7 @@ void T_MOV_IMM(ARM* cpu) cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; cpu->SetNZ(0, !b); + cpu->AddCycles_C(); } void T_CMP_IMM(ARM* cpu) @@ -1168,6 +1217,7 @@ void T_CMP_IMM(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } void T_ADD_IMM(ARM* cpu) @@ -1180,6 +1230,7 @@ void T_ADD_IMM(ARM* cpu) !res, CARRY_ADD(a, b), OVERFLOW_ADD(a, b, res)); + cpu->AddCycles_C(); } void T_SUB_IMM(ARM* cpu) @@ -1192,6 +1243,7 @@ void T_SUB_IMM(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } @@ -1203,6 +1255,7 @@ void T_AND_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } void T_EOR_REG(ARM* cpu) @@ -1213,6 +1266,7 @@ void T_EOR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } void T_LSL_REG(ARM* cpu) @@ -1223,7 +1277,7 @@ void T_LSL_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - cpu->Cycles += 1; + cpu->AddCycles_CI(1); } void T_LSR_REG(ARM* cpu) @@ -1234,7 +1288,7 @@ void T_LSR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - cpu->Cycles += 1; + cpu->AddCycles_CI(1); } void T_ASR_REG(ARM* cpu) @@ -1245,7 +1299,7 @@ void T_ASR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - cpu->Cycles += 1; + cpu->AddCycles_CI(1); } void T_ADC_REG(ARM* cpu) @@ -1260,6 +1314,7 @@ void T_ADC_REG(ARM* cpu) !res, CARRY_ADD(a, b) | CARRY_ADD(res_tmp, carry), OVERFLOW_ADD(a, b, res_tmp) | OVERFLOW_ADD(res_tmp, carry, res)); + cpu->AddCycles_C(); } void T_SBC_REG(ARM* cpu) @@ -1274,6 +1329,7 @@ void T_SBC_REG(ARM* cpu) !res, CARRY_SUB(a, b) & CARRY_SUB(res_tmp, carry), OVERFLOW_SUB(a, b, res_tmp) | OVERFLOW_SUB(res_tmp, carry, res)); + cpu->AddCycles_C(); } void T_ROR_REG(ARM* cpu) @@ -1284,7 +1340,7 @@ void T_ROR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, !a); - cpu->Cycles += 1; + cpu->AddCycles_CI(1); } void T_TST_REG(ARM* cpu) @@ -1294,6 +1350,7 @@ void T_TST_REG(ARM* cpu) u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } void T_NEG_REG(ARM* cpu) @@ -1305,6 +1362,7 @@ void T_NEG_REG(ARM* cpu) !res, CARRY_SUB(0, b), OVERFLOW_SUB(0, b, res)); + cpu->AddCycles_C(); } void T_CMP_REG(ARM* cpu) @@ -1316,6 +1374,7 @@ void T_CMP_REG(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } void T_CMN_REG(ARM* cpu) @@ -1327,6 +1386,7 @@ void T_CMN_REG(ARM* cpu) !res, CARRY_ADD(a, b), OVERFLOW_ADD(a, b, res)); + cpu->AddCycles_C(); } void T_ORR_REG(ARM* cpu) @@ -1337,6 +1397,7 @@ void T_ORR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } void T_MUL_REG(ARM* cpu) @@ -1361,7 +1422,7 @@ void T_MUL_REG(ARM* cpu) else if (a & 0x0000FF00) cycles += 2; else cycles += 1; } - cpu->Cycles += cycles; + cpu->AddCycles_CI(cycles); } void T_BIC_REG(ARM* cpu) @@ -1372,6 +1433,7 @@ void T_BIC_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } void T_MVN_REG(ARM* cpu) @@ -1381,6 +1443,7 @@ void T_MVN_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, !res); + cpu->AddCycles_C(); } @@ -1395,6 +1458,8 @@ void T_ADD_HIREG(ARM* cpu) u32 a = cpu->R[rd]; u32 b = cpu->R[rs]; + cpu->AddCycles_C(); + if (rd == 15) { cpu->JumpTo((a + b) | 1); @@ -1418,6 +1483,7 @@ void T_CMP_HIREG(ARM* cpu) !res, CARRY_SUB(a, b), OVERFLOW_SUB(a, b, res)); + cpu->AddCycles_C(); } void T_MOV_HIREG(ARM* cpu) @@ -1425,6 +1491,8 @@ void T_MOV_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; + cpu->AddCycles_C(); + if (rd == 15) { cpu->JumpTo(cpu->R[rs] | 1); @@ -1441,6 +1509,7 @@ void T_ADD_PCREL(ARM* cpu) u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; + cpu->AddCycles_C(); } void T_ADD_SPREL(ARM* cpu) @@ -1448,6 +1517,7 @@ void T_ADD_SPREL(ARM* cpu) u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; + cpu->AddCycles_C(); } void T_ADD_SP(ARM* cpu) @@ -1458,6 +1528,7 @@ void T_ADD_SP(ARM* cpu) else val += ((cpu->CurInstr & 0x7F) << 2); cpu->R[13] = val; + cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 740375d..5e2ef2c 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -66,6 +66,8 @@ void T_BCOND(ARM* cpu) s32 offset = (s32)(cpu->CurInstr << 24) >> 23; cpu->JumpTo(cpu->R[15] + offset + 1); } + else + cpu->AddCycles_C(); } void T_BX(ARM* cpu) @@ -96,6 +98,7 @@ void T_BL_LONG_1(ARM* cpu) { s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 9; cpu->R[14] = cpu->R[15] + offset; + cpu->AddCycles_C(); } void T_BL_LONG_2(ARM* cpu) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index adb44a9..b34a341 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -62,29 +62,35 @@ namespace ARMInterpreter #define A_STR \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (!cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->AddCycles_CD(); #define A_STR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite32(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + if (!cpu->DataWrite32(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq | (cpu->CurInstr & (1<<21)))) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->AddCycles_CD(); #define A_STRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (!cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->AddCycles_CD(); #define A_STRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], cpu->CurInstr & (1<<21)); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + if (!cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq | (cpu->CurInstr & (1<<21)))) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->AddCycles_CD(); #define A_LDR \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val = cpu->DataRead32(offset); val = ROR(val, ((offset&0x3)<<3)); \ + u32 val; \ + if (!cpu->DataRead32(offset, &val, RWFlags_Nonseq)) return; \ + val = ROR(val, ((offset&0x3)<<3)); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->Cycles += 1; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ if (cpu->Num==1) val &= ~0x1; \ @@ -97,9 +103,11 @@ namespace ARMInterpreter #define A_LDR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val = cpu->DataRead32(addr, cpu->CurInstr & (1<<21)); val = ROR(val, ((addr&0x3)<<3)); \ + u32 val; \ + if (!cpu->DataRead32(addr, &val, RWFlags_Nonseq | (cpu->CurInstr & (1<<21)))) return; \ + val = ROR(val, ((addr&0x3)<<3)); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->Cycles += 1; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ if (cpu->Num==1) val &= ~0x1; \ @@ -112,17 +120,19 @@ namespace ARMInterpreter #define A_LDRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val = cpu->DataRead8(offset); \ + u32 val; \ + if (!cpu->DataRead8(offset, &val, RWFlags_Nonseq)) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->Cycles += 1; \ + cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ #define A_LDRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val = cpu->DataRead8(addr, cpu->CurInstr & (1<<21)); \ + u32 val; \ + if (!cpu->DataRead8(addr, &val, RWFlags_Nonseq | (cpu->CurInstr & (1<<21)))) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->Cycles += 1; \ + cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ @@ -209,13 +219,15 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + if (!cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + cpu->AddCycles_CD(); #define A_STRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + if (!cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + cpu->AddCycles_CD(); // TODO: CHECK LDRD/STRD TIMINGS!! @@ -223,69 +235,85 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->Cycles += 1; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - cpu->R[r ] = cpu->DataRead32(offset ); \ - cpu->R[r+1] = cpu->DataRead32(offset+4); \ + if (r&1) printf("!! MISALIGNED LDRD %d\n", r); \ + if (!cpu->DataRead32(offset , &cpu->R[r ], RWFlags_Nonseq)) return; \ + if (!cpu->DataRead32(offset+4, &cpu->R[r+1], 0)) return; \ + cpu->AddCycles_CDI(); #define A_LDRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->Cycles += 1; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - cpu->R[r ] = cpu->DataRead32(addr ); \ - cpu->R[r+1] = cpu->DataRead32(addr+4); \ + if (r&1) printf("!! MISALIGNED LDRD_POST %d\n", r); \ + if (!cpu->DataRead32(addr , &cpu->R[r ], RWFlags_Nonseq)) return; \ + if (!cpu->DataRead32(addr+4, &cpu->R[r+1], 0)) return; \ + cpu->AddCycles_CDI(); #define A_STRD \ if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - cpu->DataWrite32(offset , cpu->R[r ]); \ - cpu->DataWrite32(offset+4, cpu->R[r+1]); \ + if (r&1) printf("!! MISALIGNED STRD %d\n", r); \ + if (!cpu->DataWrite32(offset , cpu->R[r ], RWFlags_Nonseq)) return; \ + if (!cpu->DataWrite32(offset+4, cpu->R[r+1], 0)) return; \ + cpu->AddCycles_CD(); #define A_STRD_POST \ if (cpu->Num != 0) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - cpu->DataWrite32(offset , cpu->R[r ]); \ - cpu->DataWrite32(offset+4, cpu->R[r+1]); \ + if (r&1) printf("!! MISALIGNED STRD_POST %d\n", r); \ + if (!cpu->DataWrite32(offset , cpu->R[r ], RWFlags_Nonseq)) return; \ + if (!cpu->DataWrite32(offset+4, cpu->R[r+1], 0)) return; \ + cpu->AddCycles_CD(); #define A_LDRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(offset); \ + if (!cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ #define A_LDRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = cpu->DataRead16(addr); \ + if (!cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ #define A_LDRSB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(offset); \ + if (!cpu->DataRead8(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ #define A_LDRSB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->DataRead8(addr); \ + if (!cpu->DataRead8(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ #define A_LDRSH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(offset); \ + if (!cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ #define A_LDRSH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->DataRead16(addr); \ + if (!cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF], RWFlags_Nonseq)) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ @@ -328,12 +356,15 @@ void A_SWP(ARM* cpu) u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 val = cpu->DataRead32(base); + u32 val; + if (!cpu->DataRead32(base, &val, RWFlags_Nonseq)) return; cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); - cpu->DataWrite32(base, rm); + u32 numD = cpu->DataCycles; + if (!cpu->DataWrite32(base, rm, RWFlags_Nonseq)) return; + cpu->DataCycles += numD; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void A_SWPB(ARM* cpu) @@ -341,11 +372,13 @@ void A_SWPB(ARM* cpu) u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = cpu->DataRead8(base); + if (!cpu->DataRead8(base, &cpu->R[(cpu->CurInstr >> 12) & 0xF], RWFlags_Nonseq)) return; - cpu->DataWrite8(base, rm); + u32 numD = cpu->DataCycles; + if (!cpu->DataWrite8(base, rm, RWFlags_Nonseq)) return; + cpu->DataCycles += numD; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } @@ -356,6 +389,7 @@ void A_LDM(ARM* cpu) u32 base = cpu->R[baseid]; u32 wbbase; u32 preinc = (cpu->CurInstr & (1<<24)); + u32 flags = RWFlags_Nonseq; if (!(cpu->CurInstr & (1<<23))) { @@ -374,8 +408,6 @@ void A_LDM(ARM* cpu) preinc = !preinc; } - cpu->Cycles += 1; - if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10); @@ -384,15 +416,17 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<<i)) { if (preinc) base += 4; - cpu->R[i] = cpu->DataRead32(base); + if (!cpu->DataRead32(base, &cpu->R[i], flags)) return; + flags &= ~RWFlags_Nonseq; if (!preinc) base += 4; } } if (cpu->CurInstr & (1<<15)) { + u32 pc; if (preinc) base += 4; - u32 pc = cpu->DataRead32(base); + if (!cpu->DataRead32(base, &pc, flags)) return; if (!preinc) base += 4; if (cpu->Num == 1) @@ -422,6 +456,8 @@ void A_LDM(ARM* cpu) else cpu->R[baseid] = wbbase; } + + cpu->AddCycles_CDI(); } void A_STM(ARM* cpu) @@ -430,6 +466,7 @@ void A_STM(ARM* cpu) u32 base = cpu->R[baseid]; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); + u32 flags = RWFlags_Nonseq; if (!(cpu->CurInstr & (1<<23))) { @@ -463,15 +500,19 @@ void A_STM(ARM* cpu) { if (preinc) base += 4; + bool res; if (i == baseid && !isbanked) { if ((cpu->Num == 0) || (!(cpu->CurInstr & ((1<<i)-1)))) - cpu->DataWrite32(base, oldbase); + res = cpu->DataWrite32(base, oldbase, flags); else - cpu->DataWrite32(base, base); // checkme + res = cpu->DataWrite32(base, base, flags); // checkme } else - cpu->DataWrite32(base, cpu->R[i]); + res = cpu->DataWrite32(base, cpu->R[i], flags); + + if (!res) return; + flags &= ~RWFlags_Nonseq; if (!preinc) base += 4; } @@ -482,6 +523,8 @@ void A_STM(ARM* cpu) if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) cpu->R[baseid] = base; + + cpu->AddCycles_CD(); } @@ -494,71 +537,80 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); - cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(addr); + if (!cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7], RWFlags_Nonseq)) return; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_STR_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + + cpu->AddCycles_CD(); } void T_STRB_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + + cpu->AddCycles_CD(); } void T_LDR_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - u32 val = cpu->DataRead32(addr); + u32 val; + if (!cpu->DataRead32(addr, &val, RWFlags_Nonseq)) return; cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_LDRB_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(addr); + if (!cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_STRH_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + + cpu->AddCycles_CD(); } void T_LDRSB_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->DataRead8(addr); + if (!cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_LDRH_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(addr); + if (!cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_LDRSH_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->DataRead16(addr); + if (!cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } @@ -567,7 +619,8 @@ void T_STR_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 4) & 0x7C; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CD(); } void T_LDR_IMM(ARM* cpu) @@ -575,9 +628,10 @@ void T_LDR_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 4) & 0x7C; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - u32 val = cpu->DataRead32(offset); + u32 val; + if (!cpu->DataRead32(offset, &val, RWFlags_Nonseq)) return; cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); - cpu->Cycles += 1; + cpu->AddCycles_CDI(); } void T_STRB_IMM(ARM* cpu) @@ -585,7 +639,8 @@ void T_STRB_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 6) & 0x1F; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CD(); } void T_LDRB_IMM(ARM* cpu) @@ -593,8 +648,8 @@ void T_LDRB_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 6) & 0x1F; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead8(offset); - cpu->Cycles += 1; + if (!cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CDI(); } @@ -603,7 +658,8 @@ void T_STRH_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 5) & 0x3E; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); + if (!cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CD(); } void T_LDRH_IMM(ARM* cpu) @@ -611,8 +667,8 @@ void T_LDRH_IMM(ARM* cpu) u32 offset = (cpu->CurInstr >> 5) & 0x3E; offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->R[cpu->CurInstr & 0x7] = cpu->DataRead16(offset); - cpu->Cycles += 1; + if (!cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CDI(); } @@ -621,7 +677,8 @@ void T_STR_SPREL(ARM* cpu) u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; - cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); + if (!cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CD(); } void T_LDR_SPREL(ARM* cpu) @@ -629,14 +686,15 @@ void T_LDR_SPREL(ARM* cpu) u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; - cpu->R[(cpu->CurInstr >> 8) & 0x7] = cpu->DataRead32(offset); - cpu->Cycles += 1; + if (!cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7], RWFlags_Nonseq)) return; + cpu->AddCycles_CDI(); } void T_PUSH(ARM* cpu) { int nregs = 0; + u32 flags = RWFlags_Nonseq; for (int i = 0; i < 8; i++) { @@ -655,77 +713,87 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<<i)) { - cpu->DataWrite32(base, cpu->R[i]); + if (!cpu->DataWrite32(base, cpu->R[i], flags)) return; + flags &= ~RWFlags_Nonseq; base += 4; } } if (cpu->CurInstr & (1<<8)) { - cpu->DataWrite32(base, cpu->R[14]); + if (!cpu->DataWrite32(base, cpu->R[14], flags)) return; } + + cpu->AddCycles_CD(); } void T_POP(ARM* cpu) { u32 base = cpu->R[13]; - - cpu->Cycles += 1; + u32 flags = RWFlags_Nonseq; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<<i)) { - cpu->R[i] = cpu->DataRead32(base); + if (!cpu->DataRead32(base, &cpu->R[i], flags)) return; + flags &= ~RWFlags_Nonseq; base += 4; } } if (cpu->CurInstr & (1<<8)) { - u32 pc = cpu->DataRead32(base); + u32 pc; + if (!cpu->DataRead32(base, &pc, flags)) return; if (cpu->Num==1) pc |= 0x1; cpu->JumpTo(pc); base += 4; } cpu->R[13] = base; + cpu->AddCycles_CDI(); } void T_STMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 flags = RWFlags_Nonseq; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<<i)) { - cpu->DataWrite32(base, cpu->R[i]); + if (!cpu->DataWrite32(base, cpu->R[i], flags)) return; + flags &= ~RWFlags_Nonseq; base += 4; } } // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; + cpu->AddCycles_CD(); } void T_LDMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; - - cpu->Cycles += 1; + u32 flags = RWFlags_Nonseq; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<<i)) { - cpu->R[i] = cpu->DataRead32(base); + if (!cpu->DataRead32(base, &cpu->R[i], flags)) return; + flags &= ~RWFlags_Nonseq; base += 4; } } if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; + + cpu->AddCycles_CDI(); } diff --git a/src/CP15.cpp b/src/CP15.cpp index 229c560..44f0233 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -20,31 +20,12 @@ #include <string.h> #include "NDS.h" #include "ARM.h" -#include "CP15.h" -// derp -namespace NDS -{ -extern ARM* ARM9; -} - -namespace CP15 -{ - -u32 Control; -u32 DTCMSetting, ITCMSetting; - -u8 ITCM[0x8000]; -u32 ITCMSize; -u8 DTCM[0x4000]; -u32 DTCMBase, DTCMSize; - - -void Reset() +void ARMv5::CP15Reset() { - Control = 0x78; // dunno + CP15Control = 0x78; // dunno DTCMSetting = 0; ITCMSetting = 0; @@ -57,11 +38,11 @@ void Reset() DTCMSize = 0; } -void DoSavestate(Savestate* file) +void ARMv5::CP15DoSavestate(Savestate* file) { file->Section("CP15"); - file->Var32(&Control); + file->Var32(&CP15Control); file->Var32(&DTCMSetting); file->Var32(&ITCMSetting); @@ -77,9 +58,9 @@ void DoSavestate(Savestate* file) } -void UpdateDTCMSetting() +void ARMv5::UpdateDTCMSetting() { - if (Control & (1<<16)) + if (CP15Control & (1<<16)) { DTCMBase = DTCMSetting & 0xFFFFF000; DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F); @@ -93,9 +74,9 @@ void UpdateDTCMSetting() } } -void UpdateITCMSetting() +void ARMv5::UpdateITCMSetting() { - if (Control & (1<<18)) + if (CP15Control & (1<<18)) { ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F); //printf("ITCM [%08X] enabled at %08X, size %X\n", ITCMSetting, 0, ITCMSize); @@ -108,7 +89,7 @@ void UpdateITCMSetting() } -void Write(u32 id, u32 val) +void ARMv5::CP15Write(u32 id, u32 val) { //printf("CP15 write op %03X %08X %08X\n", id, val, NDS::ARM9->R[15]); @@ -116,8 +97,8 @@ void Write(u32 id, u32 val) { case 0x100: val &= 0x000FF085; - Control &= ~0x000FF085; - Control |= val; + CP15Control &= ~0x000FF085; + CP15Control |= val; UpdateDTCMSetting(); UpdateITCMSetting(); return; @@ -125,7 +106,7 @@ void Write(u32 id, u32 val) case 0x704: case 0x782: - NDS::ARM9->Halt(1); + Halt(1); return; @@ -158,7 +139,7 @@ void Write(u32 id, u32 val) printf("unknown CP15 write op %03X %08X\n", id, val); } -u32 Read(u32 id) +u32 ARMv5::CP15Read(u32 id) { //printf("CP15 read op %03X %08X\n", id, NDS::ARM9->R[15]); @@ -180,7 +161,7 @@ u32 Read(u32 id) case 0x100: // control reg - return Control; + return CP15Control; case 0x910: @@ -197,135 +178,202 @@ u32 Read(u32 id) // TCM are handled here. // TODO: later on, handle PU, and maybe caches -bool HandleCodeRead16(u32 addr, u16* val) +u32 ARMv5::CodeRead32(u32 addr) { - if (addr < ITCMSize) - { - *val = *(u16*)&ITCM[addr & 0x7FFF]; - return true; - } - - return false; -} + // PU/cache check here -bool HandleCodeRead32(u32 addr, u32* val) -{ if (addr < ITCMSize) { - *val = *(u32*)&ITCM[addr & 0x7FFF]; - return true; + CodeRegion = NDS::Region9_ITCM; + return *(u32*)&ITCM[addr & 0x7FFF]; } - return false; + u32 ret; + CodeRegion = NDS::ARM9Read32(addr, &ret); + return ret; } -bool HandleDataRead8(u32 addr, u8* val, u32 forceuser) +bool ARMv5::DataRead8(u32 addr, u32* val, u32 flags) { + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u8*)&ITCM[addr & 0x7FFF]; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF]; return true; } - return false; + DataRegion = NDS::ARM9Read8(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][0]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][1]; + return true; } -bool HandleDataRead16(u32 addr, u16* val, u32 forceuser) +bool ARMv5::DataRead16(u32 addr, u32* val, u32 flags) { + addr &= ~1; + + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u16*)&ITCM[addr & 0x7FFF]; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF]; return true; } - return false; + DataRegion = NDS::ARM9Read16(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][0]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][1]; + return true; } -bool HandleDataRead32(u32 addr, u32* val, u32 forceuser) +bool ARMv5::DataRead32(u32 addr, u32* val, u32 flags) { + addr &= ~3; + + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u32*)&ITCM[addr & 0x7FFF]; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF]; return true; } - return false; + DataRegion = NDS::ARM9Read32(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][2]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][3]; + return true; } -bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser) +bool ARMv5::DataWrite8(u32 addr, u8 val, u32 flags) { + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u8*)&ITCM[addr & 0x7FFF] = val; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; return true; } - return false; + DataRegion = NDS::ARM9Write8(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][0]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][1]; + return true; } -bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser) +bool ARMv5::DataWrite16(u32 addr, u16 val, u32 flags) { + addr &= ~1; + + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u16*)&ITCM[addr & 0x7FFF] = val; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; return true; } - return false; + DataRegion = NDS::ARM9Write16(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][0]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][1]; + return true; } -bool HandleDataWrite32(u32 addr, u32 val, u32 forceuser) +bool ARMv5::DataWrite32(u32 addr, u32 val, u32 flags) { + addr &= ~3; + + // PU/cache check here + if (addr < ITCMSize) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u32*)&ITCM[addr & 0x7FFF] = val; return true; } if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize)) { + DataRegion = NDS::Region9_ITCM; + DataCycles += 1; *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val; return true; } - return false; + DataRegion = NDS::ARM9Write32(addr, val); + if (flags & RWFlags_Nonseq) + DataCycles = NDS::ARM9MemTimings[DataRegion][2]; + else + DataCycles += NDS::ARM9MemTimings[DataRegion][3]; + return true; } -bool GetCodeMemRegion(u32 addr, NDS::MemRegion* region) +void ARMv5::GetCodeMemRegion(u32 addr, NDS::MemRegion* region) { if (addr < ITCMSize) { + region->Region = NDS::Region9_ITCM; region->Mem = ITCM; region->Mask = 0x7FFF; - return true; + return; } - return false; + NDS::ARM9GetMemRegion(addr, false, &CodeMem); } -} @@ -32,10 +32,9 @@ void UpdateITCMSetting(); void Write(u32 id, u32 val); u32 Read(u32 id); -bool HandleCodeRead16(u32 addr, u16* val); bool HandleCodeRead32(u32 addr, u32* val); -bool HandleDataRead8(u32 addr, u8* val, u32 forceuser=0); -bool HandleDataRead16(u32 addr, u16* val, u32 forceuser=0); +bool HandleDataRead8(u32 addr, u32* val, u32 forceuser=0); +bool HandleDataRead16(u32 addr, u32* val, u32 forceuser=0); bool HandleDataRead32(u32 addr, u32* val, u32 forceuser=0); bool HandleDataWrite8(u32 addr, u8 val, u32 forceuser=0); bool HandleDataWrite16(u32 addr, u16 val, u32 forceuser=0); diff --git a/src/DMA.cpp b/src/DMA.cpp index 432e0f2..95aa9e6 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -246,12 +246,14 @@ s32 DMA::Run(s32 cycles) if (!(Cnt & 0x04000000)) { - u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; - void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; + int (*readfn)(u32,u32*) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; + int (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; while (IterCount > 0 && cycles > 0 && !Stall) { - writefn(CurDstAddr, readfn(CurSrcAddr)); + u32 val; + readfn(CurSrcAddr, &val); + writefn(CurDstAddr, val); s32 c = (Waitstates[0][(CurSrcAddr >> 24) & 0xF] + Waitstates[0][(CurDstAddr >> 24) & 0xF]); cycles -= c; @@ -283,12 +285,14 @@ s32 DMA::Run(s32 cycles) } }*/ - u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; - void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; + int (*readfn)(u32,u32*) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; + int (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; while (IterCount > 0 && cycles > 0 && !Stall) { - writefn(CurDstAddr, readfn(CurSrcAddr)); + u32 val; + readfn(CurSrcAddr, &val); + writefn(CurDstAddr, val); s32 c = (Waitstates[1][(CurSrcAddr >> 24) & 0xF] + Waitstates[1][(CurDstAddr >> 24) & 0xF]); cycles -= c; diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 36858f7..df27913 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -1984,12 +1984,10 @@ void ExecuteCommand() break; case 0x21: // normal - { - Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; - Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; - Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; - CalculateLighting(); - } + Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; + Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; + Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + CalculateLighting(); break; case 0x22: // texcoord diff --git a/src/NDS.cpp b/src/NDS.cpp index 846671c..38cc611 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -21,7 +21,6 @@ #include "Config.h" #include "NDS.h" #include "ARM.h" -#include "CP15.h" #include "NDSCart.h" #include "DMA.h" #include "FIFO.h" @@ -36,8 +35,69 @@ namespace NDS { -ARM* ARM9; -ARM* ARM7; +// timing notes +// +// * this implementation is technically wrong for VRAM +// each bank is considered a separate region +// but this would only matter in specific VRAM->VRAM DMA transfers or +// when running code in VRAM, which is way unlikely +// +// bus/basedelay/nspenalty +// +// bus types: +// * 0 / 32-bit: nothing special +// * 1 / 16-bit: 32-bit accesses split into two 16-bit accesses, second is always sequential +// * 2 / 8-bit/GBARAM: (presumably) split into multiple 8-bit accesses? +// * 3 / ARM9 internal: cache/TCM +// +// ARM9 always gets 3c nonseq penalty when using the bus (except for mainRAM where the penalty is 7c) +// +// ARM7 only gets nonseq penalty when accessing mainRAM (7c as for ARM9) +// +// timings for GBA slot and wifi are set up at runtime + +RegionTimings ARM9MemTimingInfo[Region9_MAX] = +{ + {0, 1, 4}, // void + {0, 1, 4}, // BIOS + {3, 0, 0}, // icache + {3, 0, 0}, // dcache + {3, 0, 0}, // ITCM + {3, 0, 0}, // DTCM + {1, 1, 8}, // main RAM + {0, 1, 4}, // shared WRAM + {0, 1, 4}, // IO + {1, 1, 4}, // palette + {1, 1, 4}, // VRAM ABG + {1, 1, 4}, // VRAM BBG + {1, 1, 4}, // VRAM AOBJ + {1, 1, 4}, // VRAM BOBJ + {1, 1, 4}, // VRAM LCDC + {0, 1, 4}, // OAM + {1, 1, 4}, // GBA ROM + {2, 1, 4}, // GBA RAM +}; + +RegionTimings ARM7MemTimingInfo[Region7_MAX] = +{ + {0, 1, 1}, // void + {0, 1, 1}, // BIOS + {1, 1, 8}, // main RAM + {0, 1, 1}, // shared WRAM + {0, 1, 1}, // ARM7 WRAM + {0, 1, 1}, // IO + {1, 1, 1}, // wifi WS0 + {1, 1, 1}, // wifi WS1 + {1, 1, 1}, // ARM7 VRAM + {1, 1, 1}, // GBA ROM + {2, 1, 1}, // GBA RAM +}; + +u8 ARM9MemTimings[Region9_MAX+1][4]; +u8 ARM7MemTimings[Region7_MAX+1][4]; + +ARMv5* ARM9; +ARMv4* ARM7; s32 CurIterationCycles; s32 ARM7Offset; @@ -113,8 +173,8 @@ void RunTimer(u32 tid, s32 cycles); bool Init() { - ARM9 = new ARM(0); - ARM7 = new ARM(1); + ARM9 = new ARMv5(); + ARM7 = new ARMv4(); DMAs[0] = new DMA(0, 0); DMAs[1] = new DMA(0, 1); @@ -158,6 +218,80 @@ void DeInit() } +void CalculateTimings(int arm9shift) +{ + int i; + + for (i = 0; i < Region9_MAX; i++) + { + RegionTimings t = ARM9MemTimingInfo[i]; + + if (t.BusType == 3) // ARM9 internal + { + ARM9MemTimings[i][0] = 1; // 16-bit N + ARM9MemTimings[i][1] = 1; // 16-bit S + ARM9MemTimings[i][2] = 1; // 32-bit N + ARM9MemTimings[i][3] = 1; // 32-bit S + continue; + } + + ARM9MemTimings[i][0] = t.DelayN << arm9shift; // 16-bit N + ARM9MemTimings[i][1] = t.DelayS << arm9shift; // 16-bit S + + if (t.BusType == 0) // 32-bit + { + ARM9MemTimings[i][2] = t.DelayN << arm9shift; // 32-bit N + ARM9MemTimings[i][3] = t.DelayS << arm9shift; // 32-bit S + } + else if (t.BusType == 1) // 16-bit + { + ARM9MemTimings[i][2] = (t.DelayN + t.DelayS) << arm9shift; // 32-bit N + ARM9MemTimings[i][3] = (t.DelayS + t.DelayS) << arm9shift; // 32-bit S + } + else if (t.BusType == 2) // 8-bit + { + // TODO!! + ARM9MemTimings[i][2] = t.DelayN << arm9shift; // 32-bit N + ARM9MemTimings[i][3] = t.DelayS << arm9shift; // 32-bit S + } + } + + ARM9MemTimings[i][0] = 0; + ARM9MemTimings[i][1] = 0; + ARM9MemTimings[i][2] = 0; + ARM9MemTimings[i][3] = 0; + + for (i = 0; i < Region7_MAX; i++) + { + RegionTimings t = ARM7MemTimingInfo[i]; + + ARM7MemTimings[i][0] = t.DelayN; // 16-bit N + ARM7MemTimings[i][1] = t.DelayS; // 16-bit S + + if (t.BusType == 0) // 32-bit + { + ARM7MemTimings[i][2] = t.DelayN; // 32-bit N + ARM7MemTimings[i][3] = t.DelayS; // 32-bit S + } + else if (t.BusType == 1) // 16-bit + { + ARM7MemTimings[i][2] = t.DelayN + t.DelayS; // 32-bit N + ARM7MemTimings[i][3] = t.DelayS + t.DelayS; // 32-bit S + } + else if (t.BusType == 2) // 8-bit + { + // TODO!! + ARM7MemTimings[i][2] = t.DelayN; // 32-bit N + ARM7MemTimings[i][3] = t.DelayS; // 32-bit S + } + } + + ARM7MemTimings[i][0] = 0; + ARM7MemTimings[i][1] = 0; + ARM7MemTimings[i][2] = 0; + ARM7MemTimings[i][3] = 0; +} + void SetupDirectBoot() { u32 bootparams[8]; @@ -204,9 +338,9 @@ void SetupDirectBoot() ARM9Write16(0x027FFC30, 0xFFFF); ARM9Write16(0x027FFC40, 0x0001); - CP15::Write(0x910, 0x0300000A); - CP15::Write(0x911, 0x00000020); - CP15::Write(0x100, 0x00050000); + ARM9->CP15Write(0x910, 0x0300000A); + ARM9->CP15Write(0x911, 0x00000020); + ARM9->CP15Write(0x100, 0x00050000); ARM9->R[12] = bootparams[1]; ARM9->R[13] = 0x03002F7C; @@ -317,7 +451,6 @@ void Reset() ARM9->Reset(); ARM7->Reset(); - CP15::Reset(); CPUStop = 0; @@ -347,6 +480,8 @@ void Reset() ARM9->SetClockShift(1); ARM7->SetClockShift(0); + + CalculateTimings(1); } void Stop() @@ -523,7 +658,6 @@ bool DoSavestate(Savestate* file) ARM9->DoSavestate(file); ARM7->DoSavestate(file); - CP15::DoSavestate(file); NDSCart::DoSavestate(file); GPU::DoSavestate(file); @@ -1174,254 +1308,307 @@ void debug(u32 param) -u8 ARM9Read8(u32 addr) +int ARM9Read8(u32 addr, u32* val) { if ((addr & 0xFFFFF000) == 0xFFFF0000) { - return *(u8*)&ARM9BIOS[addr & 0xFFF]; + *val = *(u8*)&ARM9BIOS[addr & 0xFFF]; + return Region9_BIOS; } switch (addr & 0xFF000000) { case 0x02000000: - return *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; - else return 0; + if (SWRAM_ARM9) + { + *val = *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + return Region9_SharedWRAM; + } + else + { + *val = 0; + return Region9_Void; + } case 0x04000000: - return ARM9IORead8(addr); + *val = ARM9IORead8(addr); + return Region9_IO; case 0x05000000: - return *(u8*)&GPU::Palette[addr & 0x7FF]; + *val = *(u8*)&GPU::Palette[addr & 0x7FF]; + return Region9_Palette; case 0x06000000: + switch (addr & 0x00E00000) { - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr); - default: return GPU::ReadVRAM_LCDC<u8>(addr); - } + case 0x00000000: *val = GPU::ReadVRAM_ABG<u8>(addr); return Region9_VRAM_ABG; + case 0x00200000: *val = GPU::ReadVRAM_BBG<u8>(addr); return Region9_VRAM_BBG; + case 0x00400000: *val = GPU::ReadVRAM_AOBJ<u8>(addr); return Region9_VRAM_AOBJ; + case 0x00600000: *val = GPU::ReadVRAM_BOBJ<u8>(addr); return Region9_VRAM_BOBJ; + default: *val = GPU::ReadVRAM_LCDC<u8>(addr); return Region9_VRAM_LCDC; } - return 0; case 0x07000000: - return *(u8*)&GPU::OAM[addr & 0x7FF]; + *val = *(u8*)&GPU::OAM[addr & 0x7FF]; + return Region9_OAM; case 0x08000000: case 0x09000000: //return *(u8*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)]; //printf("GBA read8 %08X\n", addr); - return 0xFF; + // TODO!!! + *val = 0xFF; + return Region9_Void; } printf("unknown arm9 read8 %08X\n", addr); - return 0; + *val = 0; + return Region9_Void; } -u16 ARM9Read16(u32 addr) +int ARM9Read16(u32 addr, u32* val) { if ((addr & 0xFFFFF000) == 0xFFFF0000) { - return *(u16*)&ARM9BIOS[addr & 0xFFF]; + *val = *(u16*)&ARM9BIOS[addr & 0xFFF]; + return Region9_BIOS; } switch (addr & 0xFF000000) { case 0x02000000: - return *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; - else return 0; + if (SWRAM_ARM9) + { + *val = *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + return Region9_SharedWRAM; + } + else + { + *val = 0; + return Region9_Void; + } case 0x04000000: - return ARM9IORead16(addr); + *val = ARM9IORead16(addr); + return Region9_IO; case 0x05000000: - return *(u16*)&GPU::Palette[addr & 0x7FF]; + *val = *(u16*)&GPU::Palette[addr & 0x7FF]; + return Region9_Palette; case 0x06000000: + switch (addr & 0x00E00000) { - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr); - default: return GPU::ReadVRAM_LCDC<u16>(addr); - } + case 0x00000000: *val = GPU::ReadVRAM_ABG<u16>(addr); return Region9_VRAM_ABG; + case 0x00200000: *val = GPU::ReadVRAM_BBG<u16>(addr); return Region9_VRAM_BBG; + case 0x00400000: *val = GPU::ReadVRAM_AOBJ<u16>(addr); return Region9_VRAM_AOBJ; + case 0x00600000: *val = GPU::ReadVRAM_BOBJ<u16>(addr); return Region9_VRAM_BOBJ; + default: *val = GPU::ReadVRAM_LCDC<u16>(addr); return Region9_VRAM_LCDC; } - return 0; case 0x07000000: - return *(u16*)&GPU::OAM[addr & 0x7FF]; + *val = *(u16*)&GPU::OAM[addr & 0x7FF]; + return Region9_OAM; case 0x08000000: case 0x09000000: //return *(u16*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)]; //printf("GBA read16 %08X\n", addr); - return 0xFFFF; + // TODO!!! + *val = 0xFFFF; + return Region9_Void; } //printf("unknown arm9 read16 %08X %08X\n", addr, ARM9->R[15]); - return 0; + *val = 0; + return Region9_Void; } -u32 ARM9Read32(u32 addr) +int ARM9Read32(u32 addr, u32* val) { if ((addr & 0xFFFFF000) == 0xFFFF0000) { - return *(u32*)&ARM9BIOS[addr & 0xFFF]; + *val = *(u32*)&ARM9BIOS[addr & 0xFFF]; + return Region9_BIOS; } switch (addr & 0xFF000000) { case 0x02000000: - return *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; - else return 0; + if (SWRAM_ARM9) + { + *val = *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask]; + return Region9_SharedWRAM; + } + else + { + *val = 0; + return Region9_Void; + } case 0x04000000: - return ARM9IORead32(addr); + *val = ARM9IORead32(addr); + return Region9_IO; case 0x05000000: - return *(u32*)&GPU::Palette[addr & 0x7FF]; + *val = *(u32*)&GPU::Palette[addr & 0x7FF]; + return Region9_Palette; case 0x06000000: + switch (addr & 0x00E00000) { - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr); - default: return GPU::ReadVRAM_LCDC<u32>(addr); - } + case 0x00000000: *val = GPU::ReadVRAM_ABG<u32>(addr); return Region9_VRAM_ABG; + case 0x00200000: *val = GPU::ReadVRAM_BBG<u32>(addr); return Region9_VRAM_BBG; + case 0x00400000: *val = GPU::ReadVRAM_AOBJ<u32>(addr); return Region9_VRAM_AOBJ; + case 0x00600000: *val = GPU::ReadVRAM_BOBJ<u32>(addr); return Region9_VRAM_BOBJ; + default: *val = GPU::ReadVRAM_LCDC<u32>(addr); return Region9_VRAM_LCDC; } - return 0; case 0x07000000: - return *(u32*)&GPU::OAM[addr & 0x7FF]; + *val = *(u32*)&GPU::OAM[addr & 0x7FF]; + return Region9_OAM; case 0x08000000: case 0x09000000: //return *(u32*)&NDSCart::CartROM[addr & (NDSCart::CartROMSize-1)]; //printf("GBA read32 %08X\n", addr); - return 0xFFFFFFFF; + // TODO!!! + *val = 0xFFFFFFFF; + return Region9_Void; } - printf("unknown arm9 read32 %08X | %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[12], ARM9Read32(0x027FF820)); - return 0; + printf("unknown arm9 read32 %08X | %08X %08X\n", addr, ARM9->R[15], ARM9->R[12]); + *val = 0; + return Region9_Void; } -void ARM9Write8(u32 addr, u8 val) +int ARM9Write8(u32 addr, u8 val) { switch (addr & 0xFF000000) { case 0x02000000: *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; - return; + if (SWRAM_ARM9) + { + *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return Region9_SharedWRAM; + } + return Region9_Void; case 0x04000000: ARM9IOWrite8(addr, val); - return; + return Region9_IO; case 0x05000000: case 0x06000000: case 0x07000000: - return; + // checkme + return Region9_Void; } printf("unknown arm9 write8 %08X %02X\n", addr, val); + return Region9_Void; } -void ARM9Write16(u32 addr, u16 val) +int ARM9Write16(u32 addr, u16 val) { switch (addr & 0xFF000000) { case 0x02000000: *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; - return; + if (SWRAM_ARM9) + { + *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return Region9_SharedWRAM; + } + return Region9_Void; case 0x04000000: ARM9IOWrite16(addr, val); - return; + return Region9_IO; case 0x05000000: *(u16*)&GPU::Palette[addr & 0x7FF] = val; - return; + return Region9_Palette; case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); break; - case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); break; - case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); break; - case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); break; - default: GPU::WriteVRAM_LCDC<u16>(addr, val); break; + case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return Region9_VRAM_ABG; + case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return Region9_VRAM_BBG; + case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return Region9_VRAM_AOBJ; + case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return Region9_VRAM_BOBJ; + default: GPU::WriteVRAM_LCDC<u16>(addr, val); return Region9_VRAM_LCDC; } - return; case 0x07000000: *(u16*)&GPU::OAM[addr & 0x7FF] = val; - return; + return Region9_OAM; } //printf("unknown arm9 write16 %08X %04X\n", addr, val); + return Region9_Void; } -void ARM9Write32(u32 addr, u32 val) +int ARM9Write32(u32 addr, u32 val) { switch (addr & 0xFF000000) { case 0x02000000: *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region9_MainRAM; case 0x03000000: - if (SWRAM_ARM9) *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; - return; + if (SWRAM_ARM9) + { + *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val; + return Region9_SharedWRAM; + } + return Region9_Void; case 0x04000000: ARM9IOWrite32(addr, val); - return; + return Region9_IO; case 0x05000000: *(u32*)&GPU::Palette[addr & 0x7FF] = val; - return; + return Region9_Palette; case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); break; - case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); break; - case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); break; - case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); break; - default: GPU::WriteVRAM_LCDC<u32>(addr, val); break; + case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return Region9_VRAM_ABG; + case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return Region9_VRAM_BBG; + case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return Region9_VRAM_AOBJ; + case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return Region9_VRAM_BOBJ; + default: GPU::WriteVRAM_LCDC<u32>(addr, val); return Region9_VRAM_LCDC; } - return; case 0x07000000: *(u32*)&GPU::OAM[addr & 0x7FF] = val; - return; + return Region9_OAM; } printf("unknown arm9 write32 %08X %08X | %08X\n", addr, val, ARM9->R[15]); + return Region9_Void; } bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) @@ -1429,6 +1616,7 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) switch (addr & 0xFF000000) { case 0x02000000: + region->Region = Region9_MainRAM; region->Mem = MainRAM; region->Mask = MAIN_RAM_SIZE-1; return true; @@ -1436,6 +1624,7 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) case 0x03000000: if (SWRAM_ARM9) { + region->Region = Region9_SharedWRAM; region->Mem = SWRAM_ARM9; region->Mask = SWRAM_ARM9Mask; return true; @@ -1445,6 +1634,7 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) if ((addr & 0xFFFFF000) == 0xFFFF0000 && !write) { + region->Region = Region9_BIOS; region->Mem = ARM9BIOS; region->Mask = 0xFFF; return true; @@ -1456,223 +1646,306 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region) -u8 ARM7Read8(u32 addr) +int ARM7Read8(u32 addr, u32* val) { if (addr < 0x00004000) { if (ARM7->R[15] >= 0x4000) - return 0xFF; + *val = 0xFF; if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) - return 0xFF; + *val = 0xFF; - return *(u8*)&ARM7BIOS[addr]; + *val = *(u8*)&ARM7BIOS[addr]; + return Region7_BIOS; } switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: - return *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; - else return *(u8*)&ARM7WRAM[addr & 0xFFFF]; + if (SWRAM_ARM7) + { + *val = *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + return Region7_SharedWRAM; + } + else + { + *val = *(u8*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; + } case 0x03800000: - return *(u8*)&ARM7WRAM[addr & 0xFFFF]; + *val = *(u8*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; case 0x04000000: - return ARM7IORead8(addr); + *val = ARM7IORead8(addr); + return Region7_IO; case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u8>(addr); + *val = GPU::ReadVRAM_ARM7<u8>(addr); + return Region7_VRAM; } printf("unknown arm7 read8 %08X %08X %08X/%08X\n", addr, ARM7->R[15], ARM7->R[0], ARM7->R[1]); - return 0; + *val = 0; + return Region7_Void; } -u16 ARM7Read16(u32 addr) +int ARM7Read16(u32 addr, u32* val) { if (addr < 0x00004000) { if (ARM7->R[15] >= 0x4000) - return 0xFFFF; + *val = 0xFFFF; if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) - return 0xFFFF; + *val = 0xFFFF; - return *(u16*)&ARM7BIOS[addr]; + *val = *(u16*)&ARM7BIOS[addr]; + return Region7_BIOS; } switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: - return *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; - else return *(u16*)&ARM7WRAM[addr & 0xFFFF]; + if (SWRAM_ARM7) + { + *val = *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + return Region7_SharedWRAM; + } + else + { + *val = *(u16*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; + } case 0x03800000: - return *(u16*)&ARM7WRAM[addr & 0xFFFF]; + *val = *(u16*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; case 0x04000000: - return ARM7IORead16(addr); + *val = ARM7IORead16(addr); + return Region7_IO; case 0x04800000: - return Wifi::Read(addr); + if (addr < 0x04810000) + { + *val = Wifi::Read(addr); + return (addr & 0x8000) ? Region7_Wifi1 : Region7_Wifi0; + } + break; case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u16>(addr); + *val = GPU::ReadVRAM_ARM7<u16>(addr); + return Region7_VRAM; } printf("unknown arm7 read16 %08X %08X\n", addr, ARM7->R[15]); - return 0; + *val = 0; + return Region7_Void; } -u32 ARM7Read32(u32 addr) +int ARM7Read32(u32 addr, u32* val) { if (addr < 0x00004000) { if (ARM7->R[15] >= 0x4000) - return 0xFFFFFFFF; + *val = 0xFFFFFFFF; if (addr < ARM7BIOSProt && ARM7->R[15] >= ARM7BIOSProt) - return 0xFFFFFFFF; + *val = 0xFFFFFFFF; - return *(u32*)&ARM7BIOS[addr]; + *val = *(u32*)&ARM7BIOS[addr]; + return Region7_BIOS; } switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: - return *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + *val = *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)]; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; - else return *(u32*)&ARM7WRAM[addr & 0xFFFF]; + if (SWRAM_ARM7) + { + *val = *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask]; + return Region7_SharedWRAM; + } + else + { + *val = *(u32*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; + } case 0x03800000: - return *(u32*)&ARM7WRAM[addr & 0xFFFF]; + *val = *(u32*)&ARM7WRAM[addr & 0xFFFF]; + return Region7_ARM7WRAM; case 0x04000000: - return ARM7IORead32(addr); + *val = ARM7IORead32(addr); + return Region7_IO; case 0x04800000: - return Wifi::Read(addr) | (Wifi::Read(addr+2) << 16); + if (addr < 0x04810000) + { + *val = Wifi::Read(addr) | (Wifi::Read(addr+2) << 16); + return (addr & 0x8000) ? Region7_Wifi1 : Region7_Wifi0; + } + break; case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u32>(addr); + *val = GPU::ReadVRAM_ARM7<u32>(addr); + return Region7_VRAM; } printf("unknown arm7 read32 %08X | %08X\n", addr, ARM7->R[15]); - return 0; + *val = 0; } -void ARM7Write8(u32 addr, u8 val) +int ARM7Write8(u32 addr, u8 val) { switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: *(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; - else *(u8*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + if (SWRAM_ARM7) + { + *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + return Region7_SharedWRAM; + } + else + { + *(u8*)&ARM7WRAM[addr & 0xFFFF] = val; + return Region7_ARM7WRAM; + } case 0x03800000: *(u8*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + return Region7_ARM7WRAM; case 0x04000000: ARM7IOWrite8(addr, val); - return; + return Region7_IO; case 0x06000000: case 0x06800000: GPU::WriteVRAM_ARM7<u8>(addr, val); - return; + return Region7_VRAM; } printf("unknown arm7 write8 %08X %02X @ %08X\n", addr, val, ARM7->R[15]); + return Region7_Void; } -void ARM7Write16(u32 addr, u16 val) +int ARM7Write16(u32 addr, u16 val) { switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: *(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; - else *(u16*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + if (SWRAM_ARM7) + { + *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + return Region7_SharedWRAM; + } + else + { + *(u16*)&ARM7WRAM[addr & 0xFFFF] = val; + return Region7_ARM7WRAM; + } case 0x03800000: *(u16*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + return Region7_ARM7WRAM; case 0x04000000: ARM7IOWrite16(addr, val); - return; + return Region7_IO; case 0x04800000: - Wifi::Write(addr, val); - return; + if (addr < 0x04810000) + { + Wifi::Write(addr, val); + return (addr & 0x8000) ? Region7_Wifi1 : Region7_Wifi0; + } + break; case 0x06000000: case 0x06800000: GPU::WriteVRAM_ARM7<u16>(addr, val); - return; + return Region7_VRAM; } //printf("unknown arm7 write16 %08X %04X @ %08X\n", addr, val, ARM7->R[15]); + return Region7_Void; } -void ARM7Write32(u32 addr, u32 val) +int ARM7Write32(u32 addr, u32 val) { switch (addr & 0xFF800000) { case 0x02000000: case 0x02800000: *(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val; - return; + return Region7_MainRAM; case 0x03000000: - if (SWRAM_ARM7) *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; - else *(u32*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + if (SWRAM_ARM7) + { + *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val; + return Region7_SharedWRAM; + } + else + { + *(u32*)&ARM7WRAM[addr & 0xFFFF] = val; + return Region7_ARM7WRAM; + } case 0x03800000: *(u32*)&ARM7WRAM[addr & 0xFFFF] = val; - return; + return Region7_ARM7WRAM; case 0x04000000: ARM7IOWrite32(addr, val); - return; + return Region7_IO; case 0x04800000: - Wifi::Write(addr, val & 0xFFFF); - Wifi::Write(addr+2, val >> 16); - return; + if (addr < 0x04810000) + { + Wifi::Write(addr, val & 0xFFFF); + Wifi::Write(addr+2, val >> 16); + return (addr & 0x8000) ? Region7_Wifi1 : Region7_Wifi0; + } + return Region7_Void; case 0x06000000: case 0x06800000: GPU::WriteVRAM_ARM7<u32>(addr, val); - return; + return Region7_VRAM; } //printf("unknown arm7 write32 %08X %08X @ %08X\n", addr, val, ARM7->R[15]); + return Region7_Void; } bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region) @@ -1681,6 +1954,7 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region) { case 0x02000000: case 0x02800000: + region->Region = Region7_MainRAM; region->Mem = MainRAM; region->Mask = MAIN_RAM_SIZE-1; return true; @@ -1693,6 +1967,7 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region) // it's not really worth bothering anyway if (!SWRAM_ARM7) { + region->Region = Region7_ARM7WRAM; region->Mem = ARM7WRAM; region->Mask = 0xFFFF; return true; @@ -1700,6 +1975,7 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region) break; case 0x03800000: + region->Region = Region7_ARM7WRAM; region->Mem = ARM7WRAM; region->Mask = 0xFFFF; return true; @@ -87,13 +87,78 @@ typedef struct } Timer; +enum +{ + Region9_Void = 0, + + Region9_BIOS, + + Region9_ICache, + Region9_DCache, + Region9_ITCM, + Region9_DTCM, + + Region9_MainRAM, + Region9_SharedWRAM, + + Region9_IO, + + Region9_Palette, + Region9_VRAM_ABG, + Region9_VRAM_BBG, + Region9_VRAM_AOBJ, + Region9_VRAM_BOBJ, + Region9_VRAM_LCDC, + Region9_OAM, + + Region9_GBAROM, + Region9_GBARAM, + + Region9_MAX +}; + +enum +{ + Region7_Void = 0, + + Region7_BIOS, + + Region7_MainRAM, + Region7_SharedWRAM, + Region7_ARM7WRAM, + + Region7_IO, + Region7_Wifi0, + Region7_Wifi1, + + Region7_VRAM, + + Region7_GBAROM, + Region7_GBARAM, + + Region7_MAX +}; + +typedef struct +{ + u8 BusType; // 0=32bit 1=16bit 2=8bit/GBARAM 3=ARM9/internal + u8 DelayS; // baseline sequential access delay + u8 DelayN; // baseline nonsequential access delay + u8 _pad; + +} RegionTimings; + typedef struct { + int Region; u8* Mem; u32 Mask; } MemRegion; +extern u8 ARM9MemTimings[Region9_MAX+1][4]; +extern u8 ARM7MemTimings[Region7_MAX+1][4]; + // hax extern u32 IME[2]; extern u32 IE[2]; @@ -159,21 +224,21 @@ void StopDMAs(u32 cpu, u32 mode); void RunTimingCriticalDevices(u32 cpu, s32 cycles); -u8 ARM9Read8(u32 addr); -u16 ARM9Read16(u32 addr); -u32 ARM9Read32(u32 addr); -void ARM9Write8(u32 addr, u8 val); -void ARM9Write16(u32 addr, u16 val); -void ARM9Write32(u32 addr, u32 val); +int ARM9Read8(u32 addr, u32* val); +int ARM9Read16(u32 addr, u32* val); +int ARM9Read32(u32 addr, u32* val); +int ARM9Write8(u32 addr, u8 val); +int ARM9Write16(u32 addr, u16 val); +int ARM9Write32(u32 addr, u32 val); bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region); -u8 ARM7Read8(u32 addr); -u16 ARM7Read16(u32 addr); -u32 ARM7Read32(u32 addr); -void ARM7Write8(u32 addr, u8 val); -void ARM7Write16(u32 addr, u16 val); -void ARM7Write32(u32 addr, u32 val); +int ARM7Read8(u32 addr, u32* val); +int ARM7Read16(u32 addr, u32* val); +int ARM7Read32(u32 addr, u32* val); +int ARM7Write8(u32 addr, u8 val); +int ARM7Write16(u32 addr, u16 val); +int ARM7Write32(u32 addr, u32 val); bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region); diff --git a/src/SPU.cpp b/src/SPU.cpp index 034e1aa..9000ecb 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -214,7 +214,7 @@ void Channel::FIFO_BufferData() for (u32 i = 0; i < burstlen; i += 4) { - FIFO[FIFOWritePos] = NDS::ARM7Read32(SrcAddr + FIFOReadOffset); + NDS::ARM7Read32(SrcAddr + FIFOReadOffset, &FIFO[FIFOWritePos]); FIFOReadOffset += 4; FIFOWritePos++; FIFOWritePos &= 0x7; diff --git a/src/libui_sdl/main.cpp b/src/libui_sdl/main.cpp index e9b230b..fff49df 100644 --- a/src/libui_sdl/main.cpp +++ b/src/libui_sdl/main.cpp @@ -518,8 +518,8 @@ int OnAreaKeyEvent(uiAreaHandler* handler, uiArea* area, uiAreaKeyEvent* evt) if (evt->Scancode == Config::KeyMapping[i]) KeyInputMask &= ~(1<<i); - //if (evt->Scancode == 0x57) // F11 - // NDS::debug(0); + if (evt->Scancode == 0x57) // F11 + NDS::debug(0); } return 1; |