diff options
-rw-r--r-- | src/ARM.cpp | 85 | ||||
-rw-r--r-- | src/ARM.h | 18 | ||||
-rw-r--r-- | src/CP15.cpp | 10 | ||||
-rw-r--r-- | src/DMA.cpp | 279 | ||||
-rw-r--r-- | src/DMA.h | 5 | ||||
-rw-r--r-- | src/GPU.cpp | 2 | ||||
-rw-r--r-- | src/GPU3D.cpp | 30 | ||||
-rw-r--r-- | src/GPU3D.h | 4 | ||||
-rw-r--r-- | src/NDS.cpp | 304 | ||||
-rw-r--r-- | src/NDS.h | 17 | ||||
-rw-r--r-- | src/NDSCart.cpp | 4 | ||||
-rw-r--r-- | src/Savestate.h | 2 | ||||
-rw-r--r-- | src/Wifi.cpp | 2 | ||||
-rw-r--r-- | src/melon_fopen.cpp | 2 |
14 files changed, 356 insertions, 408 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index a8ac0cc..b71df5c 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -61,8 +61,6 @@ ARM::ARM(u32 num) { // well uh Num = num; - - SetClockShift(0); // safe default } ARM::~ARM() @@ -110,7 +108,7 @@ void ARM::DoSavestate(Savestate* file) file->Section((char*)(Num ? "ARM7" : "ARM9")); file->Var32((u32*)&Cycles); - file->Var32((u32*)&CyclesToRun); + //file->Var32((u32*)&CyclesToRun); file->Var32(&Halted); file->VarArray(R, 16*sizeof(u32)); @@ -450,7 +448,7 @@ void ARMv5::DataAbort() JumpTo(ExceptionBase + 0x10); } -s32 ARMv5::Execute() +void ARMv5::Execute() { if (Halted) { @@ -466,19 +464,12 @@ s32 ARMv5::Execute() } else { - Cycles = CyclesToRun; -#ifdef DEBUG_CHECK_DESYNC - NDS::dbg_CyclesARM9 += (CyclesToRun >> ClockShift); -#endif // DEBUG_CHECK_DESYNC - //NDS::RunTightTimers(0, CyclesToRun >> ClockShift); - return Cycles; + NDS::ARM9Timestamp = NDS::ARM9Target; + return; } } - Cycles = 0; - s32 lastcycles = 0; - - while (Cycles < CyclesToRun) + while (NDS::ARM9Timestamp < NDS::ARM9Target) { if (CPSR & 0x20) // THUMB { @@ -515,19 +506,12 @@ s32 ARMv5::Execute() AddCycles_C(); } - //s32 diff = Cycles - lastcycles; - //NDS::RunTightTimers(0, diff >> ClockShift); - //lastcycles = Cycles - (diff & ClockDiffMask); - // TODO optimize this shit!!! if (Halted) { - if (Halted == 1 && Cycles < CyclesToRun) + if (Halted == 1 && NDS::ARM9Timestamp < NDS::ARM9Target) { - //s32 diff = CyclesToRun - Cycles; - Cycles = CyclesToRun; - //NDS::RunTightTimers(0, diff >> ClockShift); - //arm9timer += (diff>>1); + NDS::ARM9Timestamp = NDS::ARM9Target; } break; } @@ -536,24 +520,16 @@ s32 ARMv5::Execute() if (NDS::IME[0] & 0x1) TriggerIRQ(); } + + NDS::ARM9Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) Halted = 0; - - /*if (Cycles > lastcycles) - { - s32 diff = Cycles - lastcycles; - //NDS::RunTightTimers(0, diff >> ClockShift); - }*/ -#ifdef DEBUG_CHECK_DESYNC - NDS::dbg_CyclesARM9 += (Cycles >> ClockShift); -#endif // DEBUG_CHECK_DESYNC - - return Cycles; } -s32 ARMv4::Execute() +void ARMv4::Execute() { if (Halted) { @@ -569,19 +545,12 @@ s32 ARMv4::Execute() } else { - Cycles = CyclesToRun; -#ifdef DEBUG_CHECK_DESYNC - NDS::dbg_CyclesARM7 += CyclesToRun; -#endif // DEBUG_CHECK_DESYNC - //NDS::RunTightTimers(1, CyclesToRun); - return Cycles; + NDS::ARM7Timestamp = NDS::ARM7Target; + return; } } - Cycles = 0; - s32 lastcycles = 0; - - while (Cycles < CyclesToRun) + while (NDS::ARM7Timestamp < NDS::ARM7Target) { if (CPSR & 0x20) // THUMB { @@ -613,19 +582,12 @@ s32 ARMv4::Execute() AddCycles_C(); } - //s32 diff = Cycles - lastcycles; - //NDS::RunTightTimers(1, diff); - //lastcycles = Cycles; - // TODO optimize this shit!!! if (Halted) { - if (Halted == 1 && Cycles < CyclesToRun) + if (Halted == 1 && NDS::ARM7Timestamp < NDS::ARM7Target) { - //s32 diff = CyclesToRun - Cycles; - Cycles = CyclesToRun; - //NDS::RunTightTimers(1, diff); - //arm7timer += diff; + NDS::ARM7Timestamp = NDS::ARM7Target; } break; } @@ -634,20 +596,11 @@ s32 ARMv4::Execute() if (NDS::IME[1] & 0x1) TriggerIRQ(); } + + NDS::ARM7Timestamp += Cycles; + Cycles = 0; } if (Halted == 2) Halted = 0; - - /*if (Cycles > lastcycles) - { - //s32 diff = Cycles - lastcycles; - //NDS::RunTightTimers(1, diff); - }*/ - -#ifdef DEBUG_CHECK_DESYNC - NDS::dbg_CyclesARM7 += Cycles; -#endif // DEBUG_CHECK_DESYNC - - return Cycles; } @@ -42,12 +42,6 @@ public: virtual void DoSavestate(Savestate* file); - void SetClockShift(u32 shift) - { - ClockShift = shift; - ClockDiffMask = (1<<shift) - 1; - } - virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; void RestoreCPSR(); @@ -67,7 +61,7 @@ public: } } - virtual s32 Execute() = 0; + virtual void Execute() = 0; bool CheckCondition(u32 code) { @@ -122,13 +116,7 @@ public: u32 Num; - // shift relative to system clock - // 0=33MHz 1=66MHz 2=133MHz - u32 ClockShift; - u32 ClockDiffMask; - s32 Cycles; - s32 CyclesToRun; u32 Halted; u32 CodeRegion; @@ -170,7 +158,7 @@ public: void PrefetchAbort(); void DataAbort(); - s32 Execute(); + void Execute(); // all code accesses are forced nonseq 32bit u32 CodeRead32(u32 addr, bool branch); @@ -287,7 +275,7 @@ public: void JumpTo(u32 addr, bool restorecpsr = false); - s32 Execute(); + void Execute(); u16 CodeRead16(u32 addr) { diff --git a/src/CP15.cpp b/src/CP15.cpp index 7da41e8..fe22c8a 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -265,7 +265,7 @@ void ARMv5::UpdateRegionTimings(u32 addrstart, u32 addrend) } else { - MemTimings[i][0] = bustimings[2] << ClockShift; + MemTimings[i][0] = bustimings[2] << NDS::ARM9ClockShift; } if (pu & 0x10) @@ -276,9 +276,9 @@ void ARMv5::UpdateRegionTimings(u32 addrstart, u32 addrend) } else { - MemTimings[i][1] = bustimings[0] << ClockShift; - MemTimings[i][2] = bustimings[2] << ClockShift; - MemTimings[i][3] = bustimings[3] << ClockShift; + MemTimings[i][1] = bustimings[0] << NDS::ARM9ClockShift; + MemTimings[i][2] = bustimings[2] << NDS::ARM9ClockShift; + MemTimings[i][3] = bustimings[3] << NDS::ARM9ClockShift; } } } @@ -358,7 +358,7 @@ void ARMv5::ICacheLookup(u32 addr) // ouch :/ //printf("cache miss %08X: %d/%d\n", addr, NDS::ARM9MemTimings[addr >> 14][2], NDS::ARM9MemTimings[addr >> 14][3]); - CodeCycles = (NDS::ARM9MemTimings[addr >> 14][2] + (NDS::ARM9MemTimings[addr >> 14][3] * 7)) << ClockShift; + CodeCycles = (NDS::ARM9MemTimings[addr >> 14][2] + (NDS::ARM9MemTimings[addr >> 14][3] * 7)) << NDS::ARM9ClockShift; CurICacheLine = ptr; } diff --git a/src/DMA.cpp b/src/DMA.cpp index 7bbf980..01ce04e 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -45,6 +45,8 @@ // * applied to all accesses for mainRAM->mainRAM, resulting in timings of 16-18 cycles per unit // // TODO: GBA slot +// TODO: re-add initial NS delay +// TODO: timings are nonseq when address is fixed/decrementing DMA::DMA(u32 cpu, u32 num) @@ -186,160 +188,94 @@ void DMA::Start() NDS::StopCPU(CPU, 1<<Num); } -s32 DMA::Run(s32 cycles) +void DMA::Run() { - if (!Running) - return cycles; + if (!Running) return; + if (CPU == 0) return Run9(); + else return Run7(); +} -#ifdef DEBUG_CHECK_DESYNC - s32 startc = cycles; -#endif // DEBUG_CHECK_DESYNC +void DMA::Run9() +{ + if (NDS::ARM9Timestamp >= NDS::ARM9Target) return; Executing = true; // add NS penalty for first accesses in burst - // note: this seems to only apply when starting DMA 'in the void' - // for example, the aging cart DMA PRIORITY test: - // starts a big DMA immediately, and a small DMA upon HBlank - // each pulling from a timer incrementing once per cycle - // it expects that the values be increasing linearly (2c/unit) - // even as the small DMA starts and ends bool burststart = (Running == 2); Running = 1; s32 unitcycles; - s32 lastcycles = cycles; + //s32 lastcycles = cycles; - if (!(Cnt & 0x04000000)) + if (!(Cnt & (1<<26))) { - if (CPU == 0) + if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) { - if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) - { - unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]; - } - else - { - unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][1] + NDS::ARM9MemTimings[CurDstAddr >> 14][1]; - if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) - unitcycles++; - - if (burststart) - { - cycles -= 2; - cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]); - cycles += unitcycles; - } - } + unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]; } else { - if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) - { - unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]; - } - else + unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][1] + NDS::ARM9MemTimings[CurDstAddr >> 14][1]; + if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) + unitcycles++; + + /*if (burststart) { - unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][1] + NDS::ARM7MemTimings[CurDstAddr >> 15][1]; - if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) - unitcycles++; - - if (burststart) - { - cycles -= 2; - cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]); - cycles += unitcycles; - } - } + cycles -= 2; + cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][0] + NDS::ARM9MemTimings[CurDstAddr >> 14][0]); + cycles += unitcycles; + }*/ } - u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; - void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; - while (IterCount > 0 && !Stall) { - cycles -= unitcycles; - - NDS::RunTightTimers(CPU, lastcycles-cycles); + NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - lastcycles = cycles; - - writefn(CurDstAddr, readfn(CurSrcAddr)); + NDS::ARM9Write16(CurDstAddr, NDS::ARM9Read16(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<1; CurDstAddr += DstAddrInc<<1; IterCount--; RemCount--; - if (cycles <= 0) break; + if (NDS::ARM9Timestamp >= NDS::ARM9Target) break; } } else { - if (CPU == 0) + if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) { - if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) - { - unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]; - } - else - { - unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][3] + NDS::ARM9MemTimings[CurDstAddr >> 14][3]; - if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) - unitcycles++; - else if ((CurSrcAddr >> 24) == 0x02) - unitcycles--; - - if (burststart) - { - cycles -= 2; - cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]); - cycles += unitcycles; - } - } + unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]; } else { - if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) - { - unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]; - } - else + unitcycles = NDS::ARM9MemTimings[CurSrcAddr >> 14][3] + NDS::ARM9MemTimings[CurDstAddr >> 14][3]; + if ((CurSrcAddr >> 24) == (CurDstAddr >> 24)) + unitcycles++; + else if ((CurSrcAddr >> 24) == 0x02) + unitcycles--; + + /*if (burststart) { - unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][3] + NDS::ARM7MemTimings[CurDstAddr >> 15][3]; - if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) - unitcycles++; - else if ((CurSrcAddr >> 24) == 0x02) - unitcycles--; - - if (burststart) - { - cycles -= 2; - cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]); - cycles += unitcycles; - } - } + cycles -= 2; + cycles -= (NDS::ARM9MemTimings[CurSrcAddr >> 14][2] + NDS::ARM9MemTimings[CurDstAddr >> 14][2]); + cycles += unitcycles; + }*/ } - u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; - void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; - while (IterCount > 0 && !Stall) { - cycles -= unitcycles; - - NDS::RunTightTimers(CPU, lastcycles-cycles); + NDS::ARM9Timestamp += (unitcycles << NDS::ARM9ClockShift); - lastcycles = cycles; - - writefn(CurDstAddr, readfn(CurSrcAddr)); + NDS::ARM9Write32(CurDstAddr, NDS::ARM9Read32(CurSrcAddr)); CurSrcAddr += SrcAddrInc<<2; CurDstAddr += DstAddrInc<<2; IterCount--; RemCount--; - if (cycles <= 0) break; + if (NDS::ARM9Timestamp >= NDS::ARM9Target) break; } } @@ -351,34 +287,131 @@ s32 DMA::Run(s32 cycles) if (IterCount == 0) { Running = 0; - NDS::ResumeCPU(CPU, 1<<Num); + NDS::ResumeCPU(0, 1<<Num); if (StartMode == 0x07) GPU3D::CheckFIFODMA(); } -#ifdef DEBUG_CHECK_DESYNC - if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); - else NDS::dbg_CyclesARM9 += (startc-cycles); -#endif // DEBUG_CHECK_DESYNC - - return cycles; + return; } - if (!(Cnt & 0x02000000)) - Cnt &= ~0x80000000; + if (!(Cnt & (1<<25))) + Cnt &= ~(1<<31); - if (Cnt & 0x40000000) - NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num); + if (Cnt & (1<<30)) + NDS::SetIRQ(0, NDS::IRQ_DMA0 + Num); Running = 0; InProgress = false; - NDS::ResumeCPU(CPU, 1<<Num); + NDS::ResumeCPU(0, 1<<Num); +} + +void DMA::Run7() +{ + if (NDS::ARM7Timestamp >= NDS::ARM7Target) return; -#ifdef DEBUG_CHECK_DESYNC - if (CPU) NDS::dbg_CyclesARM7 += (startc-cycles); - else NDS::dbg_CyclesARM9 += (startc-cycles); -#endif // DEBUG_CHECK_DESYNC + Executing = true; + + // add NS penalty for first accesses in burst + bool burststart = (Running == 2); + Running = 1; + + s32 unitcycles; + //s32 lastcycles = cycles; + + if (!(Cnt & (1<<26))) + { + if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) + { + unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]; + } + else + { + unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][1] + NDS::ARM7MemTimings[CurDstAddr >> 15][1]; + if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) + unitcycles++; + + /*if (burststart) + { + cycles -= 2; + cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][0] + NDS::ARM7MemTimings[CurDstAddr >> 15][0]); + cycles += unitcycles; + }*/ + } - return cycles; + while (IterCount > 0 && !Stall) + { + NDS::ARM7Timestamp += unitcycles; + + NDS::ARM7Write16(CurDstAddr, NDS::ARM7Read16(CurSrcAddr)); + + CurSrcAddr += SrcAddrInc<<1; + CurDstAddr += DstAddrInc<<1; + IterCount--; + RemCount--; + + if (NDS::ARM7Timestamp >= NDS::ARM7Target) break; + } + } + else + { + if ((CurSrcAddr >> 24) == 0x02 && (CurDstAddr >> 24) == 0x02) + { + unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]; + } + else + { + unitcycles = NDS::ARM7MemTimings[CurSrcAddr >> 15][3] + NDS::ARM7MemTimings[CurDstAddr >> 15][3]; + if ((CurSrcAddr >> 23) == (CurDstAddr >> 23)) + unitcycles++; + else if ((CurSrcAddr >> 24) == 0x02) + unitcycles--; + + /*if (burststart) + { + cycles -= 2; + cycles -= (NDS::ARM7MemTimings[CurSrcAddr >> 15][2] + NDS::ARM7MemTimings[CurDstAddr >> 15][2]); + cycles += unitcycles; + }*/ + } + + while (IterCount > 0 && !Stall) + { + NDS::ARM7Timestamp += unitcycles; + + NDS::ARM7Write32(CurDstAddr, NDS::ARM7Read32(CurSrcAddr)); + + CurSrcAddr += SrcAddrInc<<2; + CurDstAddr += DstAddrInc<<2; + IterCount--; + RemCount--; + + if (NDS::ARM7Timestamp >= NDS::ARM7Target) break; + } + } + + Executing = false; + Stall = false; + + if (RemCount) + { + if (IterCount == 0) + { + Running = 0; + NDS::ResumeCPU(1, 1<<Num); + } + + return; + } + + if (!(Cnt & (1<<25))) + Cnt &= ~(1<<31); + + if (Cnt & (1<<30)) + NDS::SetIRQ(1, NDS::IRQ_DMA0 + Num); + + Running = 0; + InProgress = false; + NDS::ResumeCPU(1, 1<<Num); } @@ -34,7 +34,10 @@ public: void WriteCnt(u32 val); void Start(); - s32 Run(s32 cycles); + void Run(); + + void Run9(); + void Run7(); bool IsInMode(u32 mode) { diff --git a/src/GPU.cpp b/src/GPU.cpp index ba04c84..91f47b4 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -798,7 +798,7 @@ void StartScanline(u32 line) } if (RunFIFO) - NDS::ScheduleEvent(NDS::Event_DisplayFIFO, true, 32, DisplayFIFO, 0); + NDS::ScheduleEvent(NDS::Event_DisplayFIFO, false, 32, DisplayFIFO, 0); } if (VCount == 262) diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index a4d5015..34f88da 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -91,6 +91,7 @@ // and imposes rules on when further vertex commands can run // (one every 9-cycle time slot during polygon setup) // polygon setup time is 27 cycles for a triangle and 36 for a quad +// except: only one time slot is taken if the polygon is rejected by culling/clipping // * additionally, some commands (BEGIN, LIGHT_VECTOR, BOXTEST) stall the polygon pipeline @@ -182,6 +183,7 @@ u32 GXStat; u32 ExecParams[32]; u32 ExecParamCount; +u64 Timestamp; s32 CycleCount; s32 VertexPipeline; s32 NormalPipeline; @@ -330,6 +332,7 @@ void Reset() memset(ExecParams, 0, 32*4); ExecParamCount = 0; + Timestamp = 0; CycleCount = 0; VertexPipeline = 0; NormalPipeline = 0; @@ -405,6 +408,7 @@ void DoSavestate(Savestate* file) file->VarArray(ExecParams, 32*4); file->Var32(&ExecParamCount); file->Var32((u32*)&CycleCount); + file->Var64(&Timestamp); file->Var32(&MatrixMode); @@ -2271,16 +2275,18 @@ void FinishWork(s32 cycles) GXStat &= ~(1<<27); } -void Run(s32 cycles) +void Run() { - if (!GeometryEnabled) - return; - if (FlushRequest) - return; - if (CmdPIPE->IsEmpty() && !(GXStat & (1<<27))) + if (!GeometryEnabled || FlushRequest || + (CmdPIPE->IsEmpty() && !(GXStat & (1<<27)))) + { + Timestamp = NDS::ARM9Timestamp >> NDS::ARM9ClockShift; return; + } + s32 cycles = (NDS::ARM9Timestamp >> NDS::ARM9ClockShift) - Timestamp; CycleCount -= cycles; + Timestamp = NDS::ARM9Timestamp >> NDS::ARM9ClockShift; if (CycleCount <= 0) { @@ -2465,21 +2471,27 @@ u8 Read8(u32 addr) switch (addr) { case 0x04000600: + Run(); return GXStat & 0xFF; case 0x04000601: { + Run(); return ((GXStat >> 8) & 0xFF) | (PosMatrixStackPointer & 0x1F) | ((ProjMatrixStackPointer & 0x1) << 5); } case 0x04000602: { + Run(); + u32 fifolevel = CmdFIFO->Level(); return fifolevel & 0xFF; } case 0x04000603: { + Run(); + u32 fifolevel = CmdFIFO->Level(); return ((GXStat >> 24) & 0xFF) | @@ -2505,12 +2517,16 @@ u16 Read16(u32 addr) case 0x04000600: { + Run(); + return (GXStat & 0xFFFF) | ((PosMatrixStackPointer & 0x1F) << 8) | ((ProjMatrixStackPointer & 0x1) << 13); } case 0x04000602: { + Run(); + u32 fifolevel = CmdFIFO->Level(); return (GXStat >> 16) | @@ -2545,6 +2561,8 @@ u32 Read32(u32 addr) case 0x04000600: { + Run(); + u32 fifolevel = CmdFIFO->Level(); return GXStat | diff --git a/src/GPU3D.h b/src/GPU3D.h index e1d19fc..ba3e19b 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -84,6 +84,8 @@ extern u32 RenderClearAttr1, RenderClearAttr2; extern std::array<Polygon*,2048> RenderPolygonRAM; extern u32 RenderNumPolygons; +extern u64 Timestamp; + bool Init(); void DeInit(); void Reset(); @@ -95,7 +97,7 @@ void SetEnabled(bool geometry, bool rendering); void ExecuteCommand(); s32 CyclesToRunFor(); -void Run(s32 cycles); +void Run(); void CheckFIFOIRQ(); void CheckFIFODMA(); diff --git a/src/NDS.cpp b/src/NDS.cpp index aa8b1d3..48f4b38 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -36,14 +36,6 @@ namespace NDS { -#ifdef DEBUG_CHECK_DESYNC -u64 dbg_CyclesSys; -u64 dbg_CyclesARM9; -u64 dbg_CyclesTimer9; -u64 dbg_CyclesARM7; -u64 dbg_CyclesTimer7; -#endif - // timing notes // // * this implementation is technically wrong for VRAM @@ -60,6 +52,7 @@ u64 dbg_CyclesTimer7; // * 3 / ARM9 internal: cache/TCM // // ARM9 always gets 3c nonseq penalty when using the bus (except for mainRAM where the penalty is 7c) +// /!\ 3c penalty doesn't apply to DMA! // // ARM7 only gets nonseq penalty when accessing mainRAM (7c as for ARM9) // @@ -72,14 +65,20 @@ ARMv5* ARM9; ARMv4* ARM7; u32 NumFrames; -u64 SysClockCycles; u64 LastSysClockCycles; -u32 FrameSysClockCycles; +u64 FrameStartTimestamp; -s32 CurIterationCycles; -s32 ARM7Offset; int CurCPU; +const s32 kMaxIterationCycles = 16; + +u32 ARM9ClockShift; + +// no need to worry about those overflowing, they can keep going for atleast 4350 years +u64 ARM9Timestamp, ARM9Target; +u64 ARM7Timestamp, ARM7Target; +u64 SysTimestamp; + SchedEvent SchedList[Event_MAX]; u32 SchedListMask; @@ -119,6 +118,7 @@ u16 ARM7BIOSProt; Timer Timers[8]; u8 TimerCheckMask[2]; +u64 TimerTimestamp[2]; DMA* DMAs[8]; u32 DMA9Fill[4]; @@ -270,6 +270,8 @@ void InitTimings() // (especially wrt VRAM mirroring and overlapping and whatnot). // ARM9 + // TODO: +3c nonseq waitstate doesn't apply to DMA! + // but of course mainRAM always gets 8c nonseq waitstate SetARM9RegionTimings(0x00000000, 0xFFFFFFFF, 32, 1 + 3, 1); // void @@ -384,15 +386,6 @@ void Reset() FILE* f; u32 i; -#ifdef DEBUG_CHECK_DESYNC - dbg_CyclesSys = 0; - dbg_CyclesARM9 = 0; - dbg_CyclesTimer9 = 0; - dbg_CyclesARM7 = 0; - dbg_CyclesTimer7 = 0; -#endif // DEBUG_CHECK_DESYNC - - SysClockCycles = 0; LastSysClockCycles = 0; f = melon_fopen_local("bios9.bin", "rb"); @@ -429,8 +422,12 @@ void Reset() fclose(f); } - ARM9->SetClockShift(1); - ARM7->SetClockShift(0); + // TODO for later: configure this when emulating a DSi + ARM9ClockShift = 1; + + ARM9Timestamp = 0; ARM9Target = 0; + ARM7Timestamp = 0; ARM7Target = 0; + SysTimestamp = 0; InitTimings(); @@ -481,6 +478,8 @@ void Reset() memset(Timers, 0, 8*sizeof(Timer)); TimerCheckMask[0] = 0; TimerCheckMask[1] = 0; + TimerTimestamp[0] = 0; + TimerTimestamp[1] = 0; for (i = 0; i < 8; i++) DMAs[i]->Reset(); memset(DMA9Fill, 0, 4*4); @@ -488,9 +487,6 @@ void Reset() memset(SchedList, 0, sizeof(SchedList)); SchedListMask = 0; - CurIterationCycles = 0; - ARM7Offset = 0; - KeyInput = 0x007F03FF; KeyCnt = 0; RCnt = 0; @@ -566,7 +562,7 @@ bool DoSavestate_Scheduler(Savestate* file) } file->Var32(&funcid); - file->Var32((u32*)&evt->WaitCycles); + file->Var64(&evt->Timestamp); file->Var32(&evt->Param); } } @@ -596,7 +592,7 @@ bool DoSavestate_Scheduler(Savestate* file) else evt->Func = NULL; - file->Var32((u32*)&evt->WaitCycles); + file->Var64(&evt->Timestamp); file->Var32(&evt->Param); } } @@ -651,13 +647,20 @@ bool DoSavestate(Savestate* file) file->Var32(&timer->CycleShift); } file->VarArray(TimerCheckMask, 2*sizeof(u8)); + file->VarArray(TimerTimestamp, 2*sizeof(u64)); file->VarArray(DMA9Fill, 4*sizeof(u32)); if (!DoSavestate_Scheduler(file)) return false; file->Var32(&SchedListMask); - file->Var32((u32*)&CurIterationCycles); - file->Var32((u32*)&ARM7Offset); + file->Var64(&ARM9Timestamp); + file->Var64(&ARM9Target); + file->Var64(&ARM7Timestamp); + file->Var64(&ARM7Target); + file->Var64(&SysTimestamp); + file->Var64(&LastSysClockCycles); + file->Var64(&FrameStartTimestamp); + file->Var32(&NumFrames); // TODO: save KeyInput???? file->Var16(&KeyCnt); @@ -731,40 +734,51 @@ void RelocateSave(const char* path, bool write) } -void CalcIterationCycles() + +u64 NextTarget() { - CurIterationCycles = 16; + u64 ret = SysTimestamp + kMaxIterationCycles; + u32 mask = SchedListMask; for (int i = 0; i < Event_MAX; i++) { - if (!(SchedListMask & (1<<i))) - continue; + if (!mask) break; + if (mask & 0x1) + { + if (SchedList[i].Timestamp < ret) + ret = SchedList[i].Timestamp; + } - if (SchedList[i].WaitCycles < CurIterationCycles) - CurIterationCycles = SchedList[i].WaitCycles; + mask >>= 1; } + + return ret; } -void RunSystem(s32 cycles) +void RunSystem(u64 timestamp) { + SysTimestamp = timestamp; + + u32 mask = SchedListMask; for (int i = 0; i < Event_MAX; i++) { - if (!(SchedListMask & (1<<i))) - continue; - - SchedList[i].WaitCycles -= cycles; - - if (SchedList[i].WaitCycles < 1) + if (!mask) break; + if (mask & 0x1) { - SchedListMask &= ~(1<<i); - SchedList[i].Func(SchedList[i].Param); + if (SchedList[i].Timestamp <= SysTimestamp) + { + SchedListMask &= ~(1<<i); + SchedList[i].Func(SchedList[i].Param); + } } + + mask >>= 1; } } u32 RunFrame() { - FrameSysClockCycles = 0; + FrameStartTimestamp = SysTimestamp; if (!Running) return 263; // dorp if (CPUStop & 0x40000000) return 263; @@ -774,88 +788,55 @@ u32 RunFrame() while (Running && GPU::TotalScanlines==0) { // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1 - CalcIterationCycles(); - s32 arm9cycles; + u64 target = NextTarget(); + ARM9Target = target << ARM9ClockShift; + CurCPU = 0; if (CPUStop & 0x80000000) { // GXFIFO stall - // we just run the GPU and the timers. - // the rest of the hardware is driven by the event scheduler. + s32 cycles = GPU3D::CyclesToRunFor(); - arm9cycles = GPU3D::CyclesToRunFor(); - arm9cycles = std::min(CurIterationCycles, arm9cycles); - RunTightTimers(0, arm9cycles); - -#ifdef DEBUG_CHECK_DESYNC - dbg_CyclesARM9 += arm9cycles; -#endif // DEBUG_CHECK_DESYNC + ARM9Timestamp = std::min(ARM9Target, ARM9Timestamp+(cycles<<ARM9ClockShift)); } else if (CPUStop & 0x0FFF) { - s32 cycles = CurIterationCycles; - - cycles = DMAs[0]->Run(cycles); - if (cycles > 0 && !(CPUStop & 0x80000000)) - cycles = DMAs[1]->Run(cycles); - if (cycles > 0 && !(CPUStop & 0x80000000)) - cycles = DMAs[2]->Run(cycles); - if (cycles > 0 && !(CPUStop & 0x80000000)) - cycles = DMAs[3]->Run(cycles); - - arm9cycles = CurIterationCycles - cycles; + DMAs[0]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[1]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[2]->Run(); + if (!(CPUStop & 0x80000000)) DMAs[3]->Run(); } else { - ARM9->CyclesToRun = CurIterationCycles << 1; - CurCPU = 1; ARM9->Execute(); CurCPU = 0; - arm9cycles = ARM9->Cycles >> 1; - RunTightTimers(0, arm9cycles); + ARM9->Execute(); } - RunLooseTimers(0, arm9cycles); - GPU3D::Run(arm9cycles); + RunTimers(0); + GPU3D::Run(); - s32 ndscyclestorun = arm9cycles; + target = ARM9Timestamp >> ARM9ClockShift; + CurCPU = 1; - // ARM7Offset > ndscyclestorun means we are too far ahead of the ARM9 - if (ARM7Offset > ndscyclestorun) + while (ARM7Timestamp < target) { - ARM7Offset -= ndscyclestorun; - } - else - if (CPUStop & 0x0FFF0000) - { - s32 cycles = ndscyclestorun - ARM7Offset; + ARM7Target = target; // might be changed by a reschedule - cycles = DMAs[4]->Run(cycles); - if (cycles > 0) - cycles = DMAs[5]->Run(cycles); - if (cycles > 0) - cycles = DMAs[6]->Run(cycles); - if (cycles > 0) - cycles = DMAs[7]->Run(cycles); + if (CPUStop & 0x0FFF0000) + { + DMAs[4]->Run(); + DMAs[5]->Run(); + DMAs[6]->Run(); + DMAs[7]->Run(); + } + else + { + ARM7->Execute(); + } - ARM7Offset = -cycles; + RunTimers(1); } - else - { - ARM7->CyclesToRun = ndscyclestorun - ARM7Offset; - CurCPU = 2; ARM7->Execute(); CurCPU = 0; - ARM7Offset = ARM7->Cycles - ARM7->CyclesToRun; - RunTightTimers(1, ARM7->Cycles); - } - -#ifdef DEBUG_CHECK_DESYNC - dbg_CyclesSys += ndscyclestorun; -#endif // DEBUG_CHECK_DESYNC - - RunLooseTimers(1, ndscyclestorun); - RunSystem(ndscyclestorun); - SysClockCycles += ndscyclestorun; - LastSysClockCycles += ndscyclestorun; - FrameSysClockCycles += ndscyclestorun; + RunSystem(target); if (CPUStop & 0x40000000) { @@ -867,12 +848,11 @@ u32 RunFrame() } #ifdef DEBUG_CHECK_DESYNC - printf("[%08X%08X] ARM9=%ld timer9=%ld, ARM7=%ld timer7=%ld\n", - (u32)(dbg_CyclesSys>>32), (u32)dbg_CyclesSys, - dbg_CyclesARM9-dbg_CyclesSys, - dbg_CyclesTimer9-dbg_CyclesSys, - dbg_CyclesARM7-dbg_CyclesSys, - dbg_CyclesTimer7-dbg_CyclesSys); + printf("[%08X%08X] ARM9=%ld, ARM7=%ld, GPU=%ld\n", + (u32)(SysTimestamp>>32), (u32)SysTimestamp, + (ARM9Timestamp>>1)-SysTimestamp, + ARM7Timestamp-SysTimestamp, + GPU3D::Timestamp-SysTimestamp); #endif NumFrames++; @@ -880,26 +860,18 @@ u32 RunFrame() return GPU::TotalScanlines; } -void Reschedule() +void Reschedule(u64 target) { - s32 oldcycles = CurIterationCycles; - CalcIterationCycles(); - - if (CurIterationCycles >= oldcycles) + if (CurCPU == 0) { - CurIterationCycles = oldcycles; - return; + if (target < (ARM9Target >> ARM9ClockShift)) + ARM9Target = (target << ARM9ClockShift); } - - if (CurCPU == 0) + else { - CurIterationCycles = oldcycles; - return; + if (target < ARM7Target) + ARM7Target = target; } - - if (CurCPU == 1) ARM9->CyclesToRun = CurIterationCycles << 1; - else if (CurCPU == 2) ARM7->CyclesToRun = CurIterationCycles - ARM7Offset; - // this is all. a reschedule shouldn't happen during DMA or GXFIFO stall. } void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param) @@ -913,12 +885,13 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para SchedEvent* evt = &SchedList[id]; if (periodic) - evt->WaitCycles += delay; + evt->Timestamp += delay; else { - if (CurCPU == 1) evt->WaitCycles = delay + (ARM9->Cycles >> 1); - else if (CurCPU == 2) evt->WaitCycles = delay + ARM7->Cycles; - else evt->WaitCycles = delay; + if (CurCPU == 0) + evt->Timestamp = (ARM9Timestamp >> ARM9ClockShift) + delay; + else + evt->Timestamp = ARM7Timestamp + delay; } evt->Func = func; @@ -926,7 +899,7 @@ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 para SchedListMask |= (1<<id); - Reschedule(); + Reschedule(evt->Timestamp); } void CancelEvent(u32 id) @@ -1156,27 +1129,22 @@ u64 GetSysClockCycles(int num) if (num == 0 || num == 2) { - if (num == 0) ret = SysClockCycles; - else if (num == 2) ret = FrameSysClockCycles; + if (CurCPU == 0) + ret = ARM9Timestamp >> ARM9ClockShift; + else + ret = ARM7Timestamp; - if (CurCPU == 1) ret += (ARM9->Cycles >> 1); - else if (CurCPU == 2) ret += ARM7->Cycles; + if (num == 2) ret -= FrameStartTimestamp; } else if (num == 1) { ret = LastSysClockCycles; LastSysClockCycles = 0; - if (CurCPU == 1) - { - ret += (ARM9->Cycles >> 1); - LastSysClockCycles = -(ARM9->Cycles >> 1); - } - else if (CurCPU == 2) - { - ret += ARM7->Cycles; - LastSysClockCycles = -ARM7->Cycles; - } + if (CurCPU == 0) + LastSysClockCycles = ARM9Timestamp >> ARM9ClockShift; + else + LastSysClockCycles = ARM7Timestamp; } return ret; @@ -1271,17 +1239,11 @@ void NocashPrint(u32 ncpu, u32 addr) void HandleTimerOverflow(u32 tid) { Timer* timer = &Timers[tid]; - //if ((timer->Cnt & 0x84) != 0x80) return; timer->Counter += timer->Reload << 16; if (timer->Cnt & (1<<6)) SetIRQ(tid >> 2, IRQ_Timer0 + (tid & 0x3)); - //u32 delay = (0x10000 - timer->Reload) << (16 - timer->CycleShift); - //delay -= (timer->Counter - timer->Reload) >> timer->CycleShift; - //printf("timer%d IRQ: resched %d, reload=%04X cnt=%08X\n", tid, delay, timer->Reload, timer->Counter); - //ScheduleEvent(Event_TimerIRQ_0 + tid, true, delay, HandleTimerOverflow, tid); - if ((tid & 0x3) == 3) return; @@ -1310,8 +1272,6 @@ void HandleTimerOverflow(u32 tid) void RunTimer(u32 tid, s32 cycles) { Timer* timer = &Timers[tid]; - //if ((timer->Cnt & 0x84) != 0x80) - // return; u32 oldcount = timer->Counter; timer->Counter += (cycles << timer->CycleShift); @@ -1319,29 +1279,22 @@ void RunTimer(u32 tid, s32 cycles) HandleTimerOverflow(tid); } -void RunTightTimers(u32 cpu, s32 cycles) +void RunTimers(u32 cpu) { register u32 timermask = TimerCheckMask[cpu]; + s32 cycles; + + if (cpu == 0) + cycles = (ARM9Timestamp >> ARM9ClockShift) - TimerTimestamp[0]; + else + cycles = ARM7Timestamp - TimerTimestamp[1]; if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles); if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles); if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles); if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles); -#ifdef DEBUG_CHECK_DESYNC - if (cpu) dbg_CyclesTimer7 += cycles; - else dbg_CyclesTimer9 += cycles; -#endif // DEBUG_CHECK_DESYNC -} - -void RunLooseTimers(u32 cpu, s32 cycles) -{ - register u32 timermask = TimerCheckMask[cpu]; - - if (timermask & 0x10) RunTimer((cpu<<2)+0, cycles); - if (timermask & 0x20) RunTimer((cpu<<2)+1, cycles); - if (timermask & 0x40) RunTimer((cpu<<2)+2, cycles); - if (timermask & 0x80) RunTimer((cpu<<2)+3, cycles); + TimerTimestamp[cpu] += cycles; } @@ -1391,6 +1344,7 @@ const s32 TimerPrescaler[4] = {0, 6, 8, 10}; u16 TimerGetCounter(u32 timer) { + RunTimers(timer>>2); u32 ret = Timers[timer].Counter; return ret >> 16; @@ -1421,10 +1375,10 @@ void TimerStart(u32 id, u16 cnt) if ((cnt & 0x84) == 0x80) { u32 tmask; - if ((cnt & 0x03) == 0) + //if ((cnt & 0x03) == 0) tmask = 0x01 << (id&0x3); - else - tmask = 0x10 << (id&0x3); + //else + // tmask = 0x10 << (id&0x3); TimerCheckMask[id>>2] |= tmask; } @@ -1579,7 +1533,7 @@ void debug(u32 param) // printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); /*FILE* - shit = fopen("debug/justbeep.bin", "wb"); + shit = fopen("debug/colourfuck.bin", "wb"); for (u32 i = 0x02000000; i < 0x02400000; i+=4) { u32 val = ARM7Read32(i); @@ -29,14 +29,6 @@ namespace NDS { -#ifdef DEBUG_CHECK_DESYNC -extern u64 dbg_CyclesSys; -extern u64 dbg_CyclesARM9; -extern u64 dbg_CyclesTimer9; -extern u64 dbg_CyclesARM7; -extern u64 dbg_CyclesTimer7; -#endif - enum { Event_LCD = 0, @@ -56,7 +48,7 @@ enum typedef struct { void (*Func)(u32 param); - s32 WaitCycles; + u64 Timestamp; u32 Param; } SchedEvent; @@ -109,6 +101,10 @@ typedef struct extern u8 ARM9MemTimings[0x40000][4]; extern u8 ARM7MemTimings[0x20000][4]; +extern u64 ARM9Timestamp, ARM9Target; +extern u64 ARM7Timestamp, ARM7Target; +extern u32 ARM9ClockShift; + // hax extern u32 IME[2]; extern u32 IE[2]; @@ -182,8 +178,7 @@ bool DMAsRunning(u32 cpu); void CheckDMAs(u32 cpu, u32 mode); void StopDMAs(u32 cpu, u32 mode); -void RunTightTimers(u32 cpu, s32 cycles); -void RunLooseTimers(u32 cpu, s32 cycles); +void RunTimers(u32 cpu); u8 ARM9Read8(u32 addr); u16 ARM9Read16(u32 addr); diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp index a47b7ac..27fa9b5 100644 --- a/src/NDSCart.cpp +++ b/src/NDSCart.cpp @@ -1266,7 +1266,7 @@ void WriteROMCnt(u32 val) if (datasize == 0) NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*cmddelay, ROMEndTransfer, 0); else - NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*(cmddelay+4), ROMPrepareData, 0); + NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*(cmddelay+4), ROMPrepareData, 0); } u32 ReadROMData() @@ -1281,7 +1281,7 @@ u32 ReadROMData() u32 delay = 4; if (!(DataOutPos & 0x1FF)) delay += ((ROMCnt >> 16) & 0x3F); - NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*delay, ROMPrepareData, 0); + NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*delay, ROMPrepareData, 0); } else ROMEndTransfer(0); diff --git a/src/Savestate.h b/src/Savestate.h index be96d78..bfc34b9 100644 --- a/src/Savestate.h +++ b/src/Savestate.h @@ -22,7 +22,7 @@ #include <stdio.h> #include "types.h" -#define SAVESTATE_MAJOR 3 +#define SAVESTATE_MAJOR 4 #define SAVESTATE_MINOR 0 class Savestate diff --git a/src/Wifi.cpp b/src/Wifi.cpp index 0e73422..596c9f0 100644 --- a/src/Wifi.cpp +++ b/src/Wifi.cpp @@ -1350,7 +1350,7 @@ void Write(u32 addr, u16 val) if ((IOPORT(W_PowerUS) & 0x0001) && !(val & 0x0001)) { printf("WIFI ON\n"); - NDS::ScheduleEvent(NDS::Event_Wifi, true, 33, USTimer, 0); + NDS::ScheduleEvent(NDS::Event_Wifi, false, 33, USTimer, 0); if (!MPInited) { Platform::MP_Init(); diff --git a/src/melon_fopen.cpp b/src/melon_fopen.cpp index 04d3caf..007d248 100644 --- a/src/melon_fopen.cpp +++ b/src/melon_fopen.cpp @@ -16,6 +16,8 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +// TODO: all this should ideally go in Platform.cpp + #include <stdio.h> #include <string.h> #include <stdlib.h> |