diff options
-rw-r--r-- | src/ARM.cpp | 37 | ||||
-rw-r--r-- | src/DMA.cpp | 32 | ||||
-rw-r--r-- | src/GPU.cpp | 5 | ||||
-rw-r--r-- | src/NDS.cpp | 100 | ||||
-rw-r--r-- | src/NDS.h | 12 |
5 files changed, 133 insertions, 53 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 655daa8..8566663 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -147,7 +147,7 @@ void ARM::SetupCodeMem(u32 addr) NDS::ARM7GetMemRegion(addr, false, &CodeMem); } } -namespace GPU{extern u16 VCount;} + void ARMv5::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) @@ -162,8 +162,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]); //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); // R0=DMA# R1=src R2=size - if (addr==0x1FFD9E0) printf("[%03d] FMVdec\n", GPU::VCount); - if (R[15]==0x1FFDF40) printf("[%03d] FMVdec FINISHED\n", GPU::VCount); u32 oldregion = R[15] >> 24; u32 newregion = addr >> 24; @@ -438,7 +436,7 @@ void ARMv5::DataAbort() R[14] = R[15] + (oldcpsr & 0x20 ? 6 : 4); JumpTo(ExceptionBase + 0x10); } -extern u64 arm9total, arm7total, arm9timer, arm7timer; + s32 ARMv5::Execute() { if (Halted) @@ -451,12 +449,14 @@ s32 ARMv5::Execute() { Halted = 0; if (NDS::IME[0] & 0x1) - TriggerIRQ(); //!! potential drift + TriggerIRQ(); } else { Cycles = CyclesToRun; - arm9total+=(CyclesToRun>>1);//arm9timer+=(CyclesToRun>>1); +#ifdef DEBUG_CHECK_DESYNC + NDS::dbg_CyclesARM9 += (CyclesToRun >> ClockShift); +#endif // DEBUG_CHECK_DESYNC //NDS::RunTightTimers(0, CyclesToRun >> ClockShift); return Cycles; } @@ -511,7 +511,7 @@ s32 ARMv5::Execute() { if (Halted == 1 && Cycles < CyclesToRun) { - s32 diff = CyclesToRun - Cycles; + //s32 diff = CyclesToRun - Cycles; Cycles = CyclesToRun; //NDS::RunTightTimers(0, diff >> ClockShift); //arm9timer += (diff>>1); @@ -528,13 +528,15 @@ s32 ARMv5::Execute() if (Halted == 2) Halted = 0; - if (Cycles > lastcycles) + /*if (Cycles > lastcycles) { //s32 diff = Cycles - lastcycles;arm9timer+=(diff>>1); //NDS::RunTightTimers(0, diff >> ClockShift); - } + }*/ +#ifdef DEBUG_CHECK_DESYNC + NDS::dbg_CyclesARM9 += (Cycles >> ClockShift); +#endif // DEBUG_CHECK_DESYNC -arm9total+=(Cycles>>1); return Cycles; } @@ -555,8 +557,10 @@ s32 ARMv4::Execute() else { Cycles = CyclesToRun; +#ifdef DEBUG_CHECK_DESYNC + NDS::dbg_CyclesARM7 += CyclesToRun; +#endif // DEBUG_CHECK_DESYNC //NDS::RunTightTimers(1, CyclesToRun); - arm7total+=CyclesToRun; //arm7timer+=CyclesToRun; return Cycles; } } @@ -605,7 +609,7 @@ s32 ARMv4::Execute() { if (Halted == 1 && Cycles < CyclesToRun) { - s32 diff = CyclesToRun - Cycles; + //s32 diff = CyclesToRun - Cycles; Cycles = CyclesToRun; //NDS::RunTightTimers(1, diff); //arm7timer += diff; @@ -622,12 +626,15 @@ s32 ARMv4::Execute() if (Halted == 2) Halted = 0; - if (Cycles > lastcycles) + /*if (Cycles > lastcycles) { //s32 diff = Cycles - lastcycles;arm7timer+=(diff); //NDS::RunTightTimers(1, diff); - } + }*/ + +#ifdef DEBUG_CHECK_DESYNC + NDS::dbg_CyclesARM7 += Cycles; +#endif // DEBUG_CHECK_DESYNC -arm7total+=Cycles; return Cycles; } diff --git a/src/DMA.cpp b/src/DMA.cpp index 2f6674d..7bbf980 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -170,8 +170,8 @@ void DMA::Start() if ((Cnt & 0x00600000) == 0x00600000) CurDstAddr = DstAddr; -if(CPU==0&&StartMode!=7&&false) - printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); + + //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); IsGXFIFODMA = (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0); @@ -185,12 +185,16 @@ if(CPU==0&&StartMode!=7&&false) InProgress = true; NDS::StopCPU(CPU, 1<<Num); } -extern u64 arm9total, arm7total; + s32 DMA::Run(s32 cycles) { if (!Running) return cycles; -s32 startc = cycles; + +#ifdef DEBUG_CHECK_DESYNC + s32 startc = cycles; +#endif // DEBUG_CHECK_DESYNC + Executing = true; // add NS penalty for first accesses in burst @@ -257,7 +261,7 @@ s32 startc = cycles; cycles -= unitcycles; NDS::RunTightTimers(CPU, lastcycles-cycles); -//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);} + lastcycles = cycles; writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -325,7 +329,7 @@ s32 startc = cycles; cycles -= unitcycles; NDS::RunTightTimers(CPU, lastcycles-cycles); -//if(CPU){arm7timer+=(lastcycles-cycles);}else{arm9timer+=(lastcycles-cycles);} + lastcycles = cycles; writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -341,7 +345,7 @@ s32 startc = cycles; Executing = false; Stall = false; -//if (CPU) printf("ran DMA for %d cycles (asked %d)\n", startc-cycles, startc); + if (RemCount) { if (IterCount == 0) @@ -352,7 +356,12 @@ s32 startc = cycles; if (StartMode == 0x07) GPU3D::CheckFIFODMA(); } -if(CPU){arm7total+=(startc-cycles);}else{arm9total+=(startc-cycles);} + +#ifdef DEBUG_CHECK_DESYNC + if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); + else NDS::dbg_CyclesARM9 += (startc-cycles); +#endif // DEBUG_CHECK_DESYNC + return cycles; } @@ -365,6 +374,11 @@ if(CPU){arm7total+=(startc-cycles);}else{arm9total+=(startc-cycles);} Running = 0; InProgress = false; NDS::ResumeCPU(CPU, 1<<Num); -if(CPU){arm7total+=(startc-(cycles));}else{arm9total+=(startc-(cycles));} + +#ifdef DEBUG_CHECK_DESYNC + if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); + else NDS::dbg_CyclesARM9 += (startc-cycles); +#endif // DEBUG_CHECK_DESYNC + return cycles; } diff --git a/src/GPU.cpp b/src/GPU.cpp index 7da78a9..aba97a5 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -653,12 +653,12 @@ void MapVRAM_I(u32 bank, u8 cnt) void DisplaySwap(u32 val) { if (val) - {printf("main GPU on top screen\n"); + { GPU2D_A->SetFramebuffer(&Framebuffer[256*0]); GPU2D_B->SetFramebuffer(&Framebuffer[256*192]); } else - {printf("main GPU on bottom screen\n"); + { GPU2D_A->SetFramebuffer(&Framebuffer[256*192]); GPU2D_B->SetFramebuffer(&Framebuffer[256*0]); } @@ -813,7 +813,6 @@ void StartScanline(u32 line) GPU2D_A->VBlank(); GPU2D_B->VBlank(); GPU3D::VBlank(); - //printf("VBlank. PC=%08X\n", NDS::GetPC(0)); } else if (VCount == 144) { diff --git a/src/NDS.cpp b/src/NDS.cpp index 838d396..d267cb7 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -31,10 +31,18 @@ #include "Wifi.h" #include "Platform.h" -u64 arm9total=0, arm7total=0, arm9timer=0, arm7timer=0, systotal=0; + namespace NDS { +#ifdef DEBUG_CHECK_DESYNC +u64 dbg_CyclesSys; +u64 dbg_CyclesARM9; +u64 dbg_CyclesTimer9; +u64 dbg_CyclesARM7; +u64 dbg_CyclesTimer7; +#endif + // timing notes // // * this implementation is technically wrong for VRAM @@ -370,6 +378,14 @@ void Reset() FILE* f; u32 i; +#ifdef DEBUG_CHECK_DESYNC + dbg_CyclesSys = 0; + dbg_CyclesARM9 = 0; + dbg_CyclesTimer9 = 0; + dbg_CyclesARM7 = 0; + dbg_CyclesTimer7 = 0; +#endif // DEBUG_CHECK_DESYNC + f = Config::GetConfigFile("bios9.bin", "rb"); if (!f) { @@ -737,7 +753,7 @@ u32 RunFrame() // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1 CalcIterationCycles(); s32 arm9cycles; -u64 kiki = arm9total; + if (CPUStop & 0x80000000) { // GXFIFO stall @@ -746,16 +762,24 @@ u64 kiki = arm9total; arm9cycles = GPU3D::CyclesToRunFor(); arm9cycles = std::min(CurIterationCycles, arm9cycles); - RunTightTimers(0, arm9cycles); arm9total+=arm9cycles;//arm9timer += arm9cycles; + RunTightTimers(0, arm9cycles); + +#ifdef DEBUG_CHECK_DESYNC + dbg_CyclesARM9 += arm9cycles; +#endif // DEBUG_CHECK_DESYNC } else if (CPUStop & 0x0FFF) { s32 cycles = CurIterationCycles; + cycles = DMAs[0]->Run(cycles); - if (cycles > 0) cycles = DMAs[1]->Run(cycles); - if (cycles > 0) cycles = DMAs[2]->Run(cycles); - if (cycles > 0) cycles = DMAs[3]->Run(cycles); - //printf("DMAs been running for %d cycles, %d, asked for %d\n", CurIterationCycles-cycles, (u32)(arm9total-kiki), CurIterationCycles); + if (cycles > 0 && !(CPUStop & 0x80000000)) + cycles = DMAs[1]->Run(cycles); + if (cycles > 0 && !(CPUStop & 0x80000000)) + cycles = DMAs[2]->Run(cycles); + if (cycles > 0 && !(CPUStop & 0x80000000)) + cycles = DMAs[3]->Run(cycles); + arm9cycles = CurIterationCycles - cycles; } else @@ -763,14 +787,14 @@ u64 kiki = arm9total; ARM9->CyclesToRun = CurIterationCycles << 1; CurCPU = 1; ARM9->Execute(); CurCPU = 0; arm9cycles = ARM9->Cycles >> 1; - RunTightTimers(0, arm9cycles); //arm9timer += arm9cycles; + RunTightTimers(0, arm9cycles); } -//arm9total += arm9cycles; + RunLooseTimers(0, arm9cycles); GPU3D::Run(arm9cycles); s32 ndscyclestorun = arm9cycles; -s32 zarp; + // ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9 if (ARM7Offset > ndscyclestorun) { @@ -779,29 +803,42 @@ s32 zarp; else if (CPUStop & 0x0FFF0000) { - s32 cycles = ndscyclestorun - ARM7Offset; zarp=cycles; + s32 cycles = ndscyclestorun - ARM7Offset; + cycles = DMAs[4]->Run(cycles); - if (cycles > 0) cycles = DMAs[5]->Run(cycles); - if (cycles > 0) cycles = DMAs[6]->Run(cycles); - if (cycles > 0) cycles = DMAs[7]->Run(cycles); + if (cycles > 0) + cycles = DMAs[5]->Run(cycles); + if (cycles > 0) + cycles = DMAs[6]->Run(cycles); + if (cycles > 0) + cycles = DMAs[7]->Run(cycles); + ARM7Offset = -cycles; - printf("ARM7 DMA: cyclestorun=%d, req=%d, offset=%d\n", ndscyclestorun, zarp, ARM7Offset); } else { - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; zarp=ARM7->CyclesToRun; + ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; CurCPU = 2; ARM7->Execute(); CurCPU = 0; ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; - RunTightTimers(1, ARM7->Cycles); //arm7timer += ndscyclestorun; + RunTightTimers(1, ARM7->Cycles); } -//arm7total += zarp + ARM7Offset;//ARM7->Cycles;//ndscyclestorun+ARM7Offset; -systotal += ndscyclestorun; - RunLooseTimers(1, ndscyclestorun);// + ARM7Offset); + +#ifdef DEBUG_CHECK_DESYNC + dbg_CyclesSys += ndscyclestorun; +#endif // DEBUG_CHECK_DESYNC + + RunLooseTimers(1, ndscyclestorun); RunSystem(ndscyclestorun); } - //printf("cycles: %ld %ld, %ld %ld, %ld\n", arm9total, arm9timer, arm7total, arm7timer, systotal); - printf("drift: [%ld] %ld %ld, %ld %ld\n", systotal, arm9total-systotal, arm9timer-systotal, arm7total-systotal, arm7timer-systotal); +#ifdef DEBUG_CHECK_DESYNC + printf("[%08X%08X] ARM9=%ld timer9=%ld, ARM7=%ld timer7=%ld\n", + (u32)(dbg_CyclesSys>>32), (u32)dbg_CyclesSys, + dbg_CyclesARM9-dbg_CyclesSys, + dbg_CyclesTimer9-dbg_CyclesSys, + dbg_CyclesARM7-dbg_CyclesSys, + dbg_CyclesTimer7-dbg_CyclesSys); +#endif return GPU::TotalScanlines; } @@ -816,7 +853,13 @@ void Reschedule() CurIterationCycles = oldcycles; return; } -//printf("Reschedule %d->%d while in %d, %08X\n", oldcycles, CurIterationCycles, CurCPU, CPUStop); + + if (CurCPU == 0) + { + CurIterationCycles = oldcycles; + return; + } + if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1; else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset; // this is all. a reschedule shouldn't happen during DMA or GXFIFO stall. @@ -845,7 +888,7 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para evt->Param = param; SchedListMask |= (1<<id); -//printf("scheduling event %d for within %d cycles\n", id, delay); + Reschedule(); } @@ -1020,7 +1063,7 @@ void ResumeCPU(u32 cpu, u32 mask) void GXFIFOStall() { if (CPUStop & 0x80000000) return; -printf("GXFIFO STALL\n"); + CPUStop |= 0x80000000; if (CurCPU == 1) ARM9->Halt(2); @@ -1099,11 +1142,16 @@ void RunTimer(u32 tid, s32 cycles) void RunTightTimers(u32 cpu, s32 cycles) { register u32 timermask = TimerCheckMask[cpu]; -if(cpu)arm7timer+=cycles;else arm9timer+=cycles; + if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles); if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles); if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles); if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles); + +#ifdef DEBUG_CHECK_DESYNC + if (cpu) dbg_CyclesTimer7 += cycles; + else dbg_CyclesTimer9 += cycles; +#endif // DEBUG_CHECK_DESYNC } void RunLooseTimers(u32 cpu, s32 cycles) @@ -22,9 +22,21 @@ #include "Savestate.h" #include "types.h" +// when touching the main loop/timing code, pls test a lot of shit +// with this enabled, to make sure it doesn't desync +//#define DEBUG_CHECK_DESYNC + namespace NDS { +#ifdef DEBUG_CHECK_DESYNC +extern u64 dbg_CyclesSys; +extern u64 dbg_CyclesARM9; +extern u64 dbg_CyclesTimer9; +extern u64 dbg_CyclesARM7; +extern u64 dbg_CyclesTimer7; +#endif + enum { Event_LCD = 0, |