aboutsummaryrefslogtreecommitdiff
path: root/src/NDS.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/NDS.cpp')
-rw-r--r--src/NDS.cpp304
1 files changed, 129 insertions, 175 deletions
diff --git a/src/NDS.cpp b/src/NDS.cpp
index aa8b1d3..48f4b38 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -36,14 +36,6 @@
namespace NDS
{
-#ifdef DEBUG_CHECK_DESYNC
-u64 dbg_CyclesSys;
-u64 dbg_CyclesARM9;
-u64 dbg_CyclesTimer9;
-u64 dbg_CyclesARM7;
-u64 dbg_CyclesTimer7;
-#endif
-
// timing notes
//
// * this implementation is technically wrong for VRAM
@@ -60,6 +52,7 @@ u64 dbg_CyclesTimer7;
// * 3 / ARM9 internal: cache/TCM
//
// ARM9 always gets 3c nonseq penalty when using the bus (except for mainRAM where the penalty is 7c)
+// /!\ 3c penalty doesn't apply to DMA!
//
// ARM7 only gets nonseq penalty when accessing mainRAM (7c as for ARM9)
//
@@ -72,14 +65,20 @@ ARMv5* ARM9;
ARMv4* ARM7;
u32 NumFrames;
-u64 SysClockCycles;
u64 LastSysClockCycles;
-u32 FrameSysClockCycles;
+u64 FrameStartTimestamp;
-s32 CurIterationCycles;
-s32 ARM7Offset;
int CurCPU;
+const s32 kMaxIterationCycles = 16;
+
+u32 ARM9ClockShift;
+
+// no need to worry about those overflowing, they can keep going for atleast 4350 years
+u64 ARM9Timestamp, ARM9Target;
+u64 ARM7Timestamp, ARM7Target;
+u64 SysTimestamp;
+
SchedEvent SchedList[Event_MAX];
u32 SchedListMask;
@@ -119,6 +118,7 @@ u16 ARM7BIOSProt;
Timer Timers[8];
u8 TimerCheckMask[2];
+u64 TimerTimestamp[2];
DMA* DMAs[8];
u32 DMA9Fill[4];
@@ -270,6 +270,8 @@ void InitTimings()
// (especially wrt VRAM mirroring and overlapping and whatnot).
// ARM9
+ // TODO: +3c nonseq waitstate doesn't apply to DMA!
+ // but of course mainRAM always gets 8c nonseq waitstate
SetARM9RegionTimings(0x00000000, 0xFFFFFFFF, 32, 1 + 3, 1); // void
@@ -384,15 +386,6 @@ void Reset()
FILE* f;
u32 i;
-#ifdef DEBUG_CHECK_DESYNC
- dbg_CyclesSys = 0;
- dbg_CyclesARM9 = 0;
- dbg_CyclesTimer9 = 0;
- dbg_CyclesARM7 = 0;
- dbg_CyclesTimer7 = 0;
-#endif // DEBUG_CHECK_DESYNC
-
- SysClockCycles = 0;
LastSysClockCycles = 0;
f = melon_fopen_local("bios9.bin", "rb");
@@ -429,8 +422,12 @@ void Reset()
fclose(f);
}
- ARM9->SetClockShift(1);
- ARM7->SetClockShift(0);
+ // TODO for later: configure this when emulating a DSi
+ ARM9ClockShift = 1;
+
+ ARM9Timestamp = 0; ARM9Target = 0;
+ ARM7Timestamp = 0; ARM7Target = 0;
+ SysTimestamp = 0;
InitTimings();
@@ -481,6 +478,8 @@ void Reset()
memset(Timers, 0, 8*sizeof(Timer));
TimerCheckMask[0] = 0;
TimerCheckMask[1] = 0;
+ TimerTimestamp[0] = 0;
+ TimerTimestamp[1] = 0;
for (i = 0; i < 8; i++) DMAs[i]->Reset();
memset(DMA9Fill, 0, 4*4);
@@ -488,9 +487,6 @@ void Reset()
memset(SchedList, 0, sizeof(SchedList));
SchedListMask = 0;
- CurIterationCycles = 0;
- ARM7Offset = 0;
-
KeyInput = 0x007F03FF;
KeyCnt = 0;
RCnt = 0;
@@ -566,7 +562,7 @@ bool DoSavestate_Scheduler(Savestate* file)
}
file->Var32(&funcid);
- file->Var32((u32*)&evt->WaitCycles);
+ file->Var64(&evt->Timestamp);
file->Var32(&evt->Param);
}
}
@@ -596,7 +592,7 @@ bool DoSavestate_Scheduler(Savestate* file)
else
evt->Func = NULL;
- file->Var32((u32*)&evt->WaitCycles);
+ file->Var64(&evt->Timestamp);
file->Var32(&evt->Param);
}
}
@@ -651,13 +647,20 @@ bool DoSavestate(Savestate* file)
file->Var32(&timer->CycleShift);
}
file->VarArray(TimerCheckMask, 2*sizeof(u8));
+ file->VarArray(TimerTimestamp, 2*sizeof(u64));
file->VarArray(DMA9Fill, 4*sizeof(u32));
if (!DoSavestate_Scheduler(file)) return false;
file->Var32(&SchedListMask);
- file->Var32((u32*)&CurIterationCycles);
- file->Var32((u32*)&ARM7Offset);
+ file->Var64(&ARM9Timestamp);
+ file->Var64(&ARM9Target);
+ file->Var64(&ARM7Timestamp);
+ file->Var64(&ARM7Target);
+ file->Var64(&SysTimestamp);
+ file->Var64(&LastSysClockCycles);
+ file->Var64(&FrameStartTimestamp);
+ file->Var32(&NumFrames);
// TODO: save KeyInput????
file->Var16(&KeyCnt);
@@ -731,40 +734,51 @@ void RelocateSave(const char* path, bool write)
}
-void CalcIterationCycles()
+
+u64 NextTarget()
{
- CurIterationCycles = 16;
+ u64 ret = SysTimestamp + kMaxIterationCycles;
+ u32 mask = SchedListMask;
for (int i = 0; i < Event_MAX; i++)
{
- if (!(SchedListMask & (1<<i)))
- continue;
+ if (!mask) break;
+ if (mask & 0x1)
+ {
+ if (SchedList[i].Timestamp < ret)
+ ret = SchedList[i].Timestamp;
+ }
- if (SchedList[i].WaitCycles < CurIterationCycles)
- CurIterationCycles = SchedList[i].WaitCycles;
+ mask >>= 1;
}
+
+ return ret;
}
-void RunSystem(s32 cycles)
+void RunSystem(u64 timestamp)
{
+ SysTimestamp = timestamp;
+
+ u32 mask = SchedListMask;
for (int i = 0; i < Event_MAX; i++)
{
- if (!(SchedListMask & (1<<i)))
- continue;
-
- SchedList[i].WaitCycles -= cycles;
-
- if (SchedList[i].WaitCycles < 1)
+ if (!mask) break;
+ if (mask & 0x1)
{
- SchedListMask &= ~(1<<i);
- SchedList[i].Func(SchedList[i].Param);
+ if (SchedList[i].Timestamp <= SysTimestamp)
+ {
+ SchedListMask &= ~(1<<i);
+ SchedList[i].Func(SchedList[i].Param);
+ }
}
+
+ mask >>= 1;
}
}
u32 RunFrame()
{
- FrameSysClockCycles = 0;
+ FrameStartTimestamp = SysTimestamp;
if (!Running) return 263; // dorp
if (CPUStop & 0x40000000) return 263;
@@ -774,88 +788,55 @@ u32 RunFrame()
while (Running && GPU::TotalScanlines==0)
{
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
- CalcIterationCycles();
- s32 arm9cycles;
+ u64 target = NextTarget();
+ ARM9Target = target << ARM9ClockShift;
+ CurCPU = 0;
if (CPUStop & 0x80000000)
{
// GXFIFO stall
- // we just run the GPU and the timers.
- // the rest of the hardware is driven by the event scheduler.
+ s32 cycles = GPU3D::CyclesToRunFor();
- arm9cycles = GPU3D::CyclesToRunFor();
- arm9cycles = std::min(CurIterationCycles, arm9cycles);
- RunTightTimers(0, arm9cycles);
-
-#ifdef DEBUG_CHECK_DESYNC
- dbg_CyclesARM9 += arm9cycles;
-#endif // DEBUG_CHECK_DESYNC
+ ARM9Timestamp = std::min(ARM9Target, ARM9Timestamp+(cycles<<ARM9ClockShift));
}
else if (CPUStop & 0x0FFF)
{
- s32 cycles = CurIterationCycles;
-
- cycles = DMAs[0]->Run(cycles);
- if (cycles > 0 && !(CPUStop & 0x80000000))
- cycles = DMAs[1]->Run(cycles);
- if (cycles > 0 && !(CPUStop & 0x80000000))
- cycles = DMAs[2]->Run(cycles);
- if (cycles > 0 && !(CPUStop & 0x80000000))
- cycles = DMAs[3]->Run(cycles);
-
- arm9cycles = CurIterationCycles - cycles;
+ DMAs[0]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[1]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[2]->Run();
+ if (!(CPUStop & 0x80000000)) DMAs[3]->Run();
}
else
{
- ARM9->CyclesToRun = CurIterationCycles << 1;
- CurCPU = 1; ARM9->Execute(); CurCPU = 0;
- arm9cycles = ARM9->Cycles >> 1;
- RunTightTimers(0, arm9cycles);
+ ARM9->Execute();
}
- RunLooseTimers(0, arm9cycles);
- GPU3D::Run(arm9cycles);
+ RunTimers(0);
+ GPU3D::Run();
- s32 ndscyclestorun = arm9cycles;
+ target = ARM9Timestamp >> ARM9ClockShift;
+ CurCPU = 1;
- // ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9
- if (ARM7Offset > ndscyclestorun)
+ while (ARM7Timestamp < target)
{
- ARM7Offset -= ndscyclestorun;
- }
- else
- if (CPUStop & 0x0FFF0000)
- {
- s32 cycles = ndscyclestorun - ARM7Offset;
+ ARM7Target = target; // might be changed by a reschedule
- cycles = DMAs[4]->Run(cycles);
- if (cycles > 0)
- cycles = DMAs[5]->Run(cycles);
- if (cycles > 0)
- cycles = DMAs[6]->Run(cycles);
- if (cycles > 0)
- cycles = DMAs[7]->Run(cycles);
+ if (CPUStop & 0x0FFF0000)
+ {
+ DMAs[4]->Run();
+ DMAs[5]->Run();
+ DMAs[6]->Run();
+ DMAs[7]->Run();
+ }
+ else
+ {
+ ARM7->Execute();
+ }
- ARM7Offset = -cycles;
+ RunTimers(1);
}
- else
- {
- ARM7->CyclesToRun = ndscyclestorun - ARM7Offset;
- CurCPU = 2; ARM7->Execute(); CurCPU = 0;
- ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun;
- RunTightTimers(1, ARM7->Cycles);
- }
-
-#ifdef DEBUG_CHECK_DESYNC
- dbg_CyclesSys += ndscyclestorun;
-#endif // DEBUG_CHECK_DESYNC
-
- RunLooseTimers(1, ndscyclestorun);
- RunSystem(ndscyclestorun);
- SysClockCycles += ndscyclestorun;
- LastSysClockCycles += ndscyclestorun;
- FrameSysClockCycles += ndscyclestorun;
+ RunSystem(target);
if (CPUStop & 0x40000000)
{
@@ -867,12 +848,11 @@ u32 RunFrame()
}
#ifdef DEBUG_CHECK_DESYNC
- printf("[%08X%08X] ARM9=%ld timer9=%ld, ARM7=%ld timer7=%ld\n",
- (u32)(dbg_CyclesSys>>32), (u32)dbg_CyclesSys,
- dbg_CyclesARM9-dbg_CyclesSys,
- dbg_CyclesTimer9-dbg_CyclesSys,
- dbg_CyclesARM7-dbg_CyclesSys,
- dbg_CyclesTimer7-dbg_CyclesSys);
+ printf("[%08X%08X] ARM9=%ld, ARM7=%ld, GPU=%ld\n",
+ (u32)(SysTimestamp>>32), (u32)SysTimestamp,
+ (ARM9Timestamp>>1)-SysTimestamp,
+ ARM7Timestamp-SysTimestamp,
+ GPU3D::Timestamp-SysTimestamp);
#endif
NumFrames++;
@@ -880,26 +860,18 @@ u32 RunFrame()
return GPU::TotalScanlines;
}
-void Reschedule()
+void Reschedule(u64 target)
{
- s32 oldcycles = CurIterationCycles;
- CalcIterationCycles();
-
- if (CurIterationCycles >= oldcycles)
+ if (CurCPU == 0)
{
- CurIterationCycles = oldcycles;
- return;
+ if (target < (ARM9Target >> ARM9ClockShift))
+ ARM9Target = (target << ARM9ClockShift);
}
-
- if (CurCPU == 0)
+ else
{
- CurIterationCycles = oldcycles;
- return;
+ if (target < ARM7Target)
+ ARM7Target = target;
}
-
- if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1;
- else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset;
- // this is all. a reschedule shouldn't happen during DMA or GXFIFO stall.
}
void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param)
@@ -913,12 +885,13 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para
SchedEvent* evt = &SchedList[id];
if (periodic)
- evt->WaitCycles += delay;
+ evt->Timestamp += delay;
else
{
- if (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1);
- else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles;
- else evt->WaitCycles = delay;
+ if (CurCPU == 0)
+ evt->Timestamp = (ARM9Timestamp >> ARM9ClockShift) + delay;
+ else
+ evt->Timestamp = ARM7Timestamp + delay;
}
evt->Func = func;
@@ -926,7 +899,7 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para
SchedListMask |= (1<<id);
- Reschedule();
+ Reschedule(evt->Timestamp);
}
void CancelEvent(u32 id)
@@ -1156,27 +1129,22 @@ u64 GetSysClockCycles(int num)
if (num == 0 || num == 2)
{
- if (num == 0) ret = SysClockCycles;
- else if (num == 2) ret = FrameSysClockCycles;
+ if (CurCPU == 0)
+ ret = ARM9Timestamp >> ARM9ClockShift;
+ else
+ ret = ARM7Timestamp;
- if (CurCPU == 1) ret += (ARM9->Cycles >> 1);
- else if (CurCPU == 2) ret += ARM7->Cycles;
+ if (num == 2) ret -= FrameStartTimestamp;
}
else if (num == 1)
{
ret = LastSysClockCycles;
LastSysClockCycles = 0;
- if (CurCPU == 1)
- {
- ret += (ARM9->Cycles >> 1);
- LastSysClockCycles = -(ARM9->Cycles >> 1);
- }
- else if (CurCPU == 2)
- {
- ret += ARM7->Cycles;
- LastSysClockCycles = -ARM7->Cycles;
- }
+ if (CurCPU == 0)
+ LastSysClockCycles = ARM9Timestamp >> ARM9ClockShift;
+ else
+ LastSysClockCycles = ARM7Timestamp;
}
return ret;
@@ -1271,17 +1239,11 @@ void NocashPrint(u32 ncpu, u32 addr)
void HandleTimerOverflow(u32 tid)
{
Timer* timer = &Timers[tid];
- //if ((timer->Cnt & 0x84) != 0x80) return;
timer->Counter += timer->Reload << 16;
if (timer->Cnt & (1<<6))
SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3));
- //u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift);
- //delay -= (timer->Counter - timer->Reload) >> timer->CycleShift;
- //printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter);
- //ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid);
-
if ((tid & 0x3) == 3)
return;
@@ -1310,8 +1272,6 @@ void HandleTimerOverflow(u32 tid)
void RunTimer(u32 tid, s32 cycles)
{
Timer* timer = &Timers[tid];
- //if ((timer->Cnt & 0x84) != 0x80)
- // return;
u32 oldcount = timer->Counter;
timer->Counter += (cycles << timer->CycleShift);
@@ -1319,29 +1279,22 @@ void RunTimer(u32 tid, s32 cycles)
HandleTimerOverflow(tid);
}
-void RunTightTimers(u32 cpu, s32 cycles)
+void RunTimers(u32 cpu)
{
register u32 timermask = TimerCheckMask[cpu];
+ s32 cycles;
+
+ if (cpu == 0)
+ cycles = (ARM9Timestamp >> ARM9ClockShift) - TimerTimestamp[0];
+ else
+ cycles = ARM7Timestamp - TimerTimestamp[1];
if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles);
if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles);
if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles);
if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles);
-#ifdef DEBUG_CHECK_DESYNC
- if (cpu) dbg_CyclesTimer7 += cycles;
- else dbg_CyclesTimer9 += cycles;
-#endif // DEBUG_CHECK_DESYNC
-}
-
-void RunLooseTimers(u32 cpu, s32 cycles)
-{
- register u32 timermask = TimerCheckMask[cpu];
-
- if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles);
- if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles);
- if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles);
- if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles);
+ TimerTimestamp[cpu] += cycles;
}
@@ -1391,6 +1344,7 @@ const s32 TimerPrescaler[4] = {0, 6, 8, 10};
u16 TimerGetCounter(u32 timer)
{
+ RunTimers(timer>>2);
u32 ret = Timers[timer].Counter;
return ret >> 16;
@@ -1421,10 +1375,10 @@ void TimerStart(u32 id, u16 cnt)
if ((cnt & 0x84) == 0x80)
{
u32 tmask;
- if ((cnt & 0x03) == 0)
+ //if ((cnt & 0x03) == 0)
tmask = 0x01 << (id&0x3);
- else
- tmask = 0x10 << (id&0x3);
+ //else
+ // tmask = 0x10 << (id&0x3);
TimerCheckMask[id>>2] |= tmask;
}
@@ -1579,7 +1533,7 @@ void debug(u32 param)
// printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);
/*FILE*
- shit = fopen("debug/justbeep.bin", "wb");
+ shit = fopen("debug/colourfuck.bin", "wb");
for (u32 i = 0x02000000; i < 0x02400000; i+=4)
{
u32 val = ARM7Read32(i);