diff options
author | StapleButter <thetotalworm@gmail.com> | 2018-11-23 22:21:41 +0100 |
---|---|---|
committer | StapleButter <thetotalworm@gmail.com> | 2018-11-23 22:21:41 +0100 |
commit | a9e7f8bc5bb417de1e2a792c3de4d8f57be7b883 (patch) | |
tree | 763a289d1cf509e16adb4f73338d8384f02c25ee /src | |
parent | 27e1ca41031a0216d7d9a940b336a232217e6abf (diff) |
add proper support for GXFIFO stalls.
bad games that blast the GXFIFO and overflow it:
* Super Mario 64 DS
* Rayman RR2
latter seems to get its music streaming crapoed.
Diffstat (limited to 'src')
-rw-r--r-- | src/ARM.cpp | 1 | ||||
-rw-r--r-- | src/DMA.cpp | 14 | ||||
-rw-r--r-- | src/DMA.h | 8 | ||||
-rw-r--r-- | src/GPU3D.cpp | 107 | ||||
-rw-r--r-- | src/GPU3D.h | 1 | ||||
-rw-r--r-- | src/NDS.cpp | 39 | ||||
-rw-r--r-- | src/NDS.h | 2 | ||||
-rw-r--r-- | src/Savestate.h | 2 |
8 files changed, 140 insertions, 34 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 226b463..d16e193 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -217,6 +217,7 @@ void ARM::JumpTo(u32 addr, bool restorecpsr) // aging cart debug crap //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]); //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); + // R0=DMA# R1=src R2=size u32 oldregion = R[15] >> 23; u32 newregion = addr >> 23; diff --git a/src/DMA.cpp b/src/DMA.cpp index e88814e..432e0f2 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -242,12 +242,14 @@ s32 DMA::Run(s32 cycles) if (!Running) return cycles; + Executing = true; + if (!(Cnt & 0x04000000)) { u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16; void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; - while (IterCount > 0 && cycles > 0) + while (IterCount > 0 && cycles > 0 && !Stall) { writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -264,7 +266,8 @@ s32 DMA::Run(s32 cycles) else { // optimized path for typical GXFIFO DMA - if (IsGXFIFODMA) + // likely not worth it tbh + /*if (IsGXFIFODMA) { while (IterCount > 0 && cycles > 0) { @@ -278,12 +281,12 @@ s32 DMA::Run(s32 cycles) IterCount--; RemCount--; } - } + }*/ u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; - while (IterCount > 0 && cycles > 0) + while (IterCount > 0 && cycles > 0 && !Stall) { writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -298,6 +301,9 @@ s32 DMA::Run(s32 cycles) } } + Executing = false; + Stall = false; + if (RemCount) { if (IterCount == 0) @@ -53,6 +53,11 @@ public: Cnt &= ~0x80000000; } + void StallIfRunning() + { + if (Executing) Stall = true; + } + u32 SrcAddr; u32 DstAddr; u32 Cnt; @@ -74,6 +79,9 @@ private: bool Running; bool InProgress; + bool Executing; + bool Stall; + bool IsGXFIFODMA; }; diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 79863ef..0b16192 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -73,6 +73,13 @@ // TODO: check how DISP_1DOT_DEPTH works and whether it's latched +// command execution notes +// +// timings given by GBAtek are for individual commands +// real-life timings are different depending on how commands are combined +// the engine is able to do parallel execution to some extent + + namespace GPU3D { @@ -116,38 +123,38 @@ const u32 CmdNumParams[256] = const s32 CmdNumCycles[256] = { // 0x00 - 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, - 0, 0, 0, + 1, 1, 1, // 0x20 - 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, - 0, 0, 0, 0, + 1, 9, 1, 9, 9, 9, 9, 9, 9, 1, 1, 1, + 1, 1, 1, 1, // 0x30 4, 4, 6, 1, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 392, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 103, 9, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; typedef union @@ -164,6 +171,8 @@ typedef union FIFO<CmdFIFOEntry>* CmdFIFO; FIFO<CmdFIFOEntry>* CmdPIPE; +FIFO<CmdFIFOEntry>* CmdStallQueue; + u32 NumCommands, CurCommand, ParamCount, TotalParams; u32 DispCnt; @@ -276,6 +285,8 @@ bool Init() CmdFIFO = new FIFO<CmdFIFOEntry>(256); CmdPIPE = new FIFO<CmdFIFOEntry>(4); + CmdStallQueue = new FIFO<CmdFIFOEntry>(64); + if (!SoftRenderer::Init()) return false; return true; @@ -287,6 +298,8 @@ void DeInit() delete CmdFIFO; delete CmdPIPE; + + delete CmdStallQueue; } void Reset() @@ -294,6 +307,8 @@ void Reset() CmdFIFO->Clear(); CmdPIPE->Clear(); + CmdStallQueue->Clear(); + NumCommands = 0; CurCommand = 0; ParamCount = 0; @@ -514,6 +529,20 @@ void DoSavestate(Savestate* file) // probably not worth storing the vblank-latched Renderxxxxxx variables + if (file->Saving || + file->VersionMajor > 2 || + (file->VersionMajor == 2 && file->VersionMinor >= 1)) + { + // command stall queue, only in version 2.1 and up + CmdStallQueue->DoSavestate(file); + } + else + { + // for version 2.0, just clear it. not having it doesn't matter + // if this comes from older melonDS revisions. + CmdStallQueue->Clear(); + } + if (!file->Saving) { ClipMatrixDirty = true; @@ -1387,17 +1416,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsFull()) { - //printf("!!! GX FIFO FULL\n"); - //return; + // store it to the stall queue. stall the system. + // worst case is if a STMxx opcode causes this, which is why our stall queue + // has 64 entries. this is less complicated than trying to make STMxx stall-able. - // temp. hack - // SM64DS seems to overflow the FIFO occasionally - // either leftover bugs in our implementation, or the game accidentally doing that - // TODO: investigate. - // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) - - while (CmdFIFO->IsFull()) - ExecuteCommand(); + CmdStallQueue->Write(entry); + NDS::GXFIFOStall(); + return; } CmdFIFO->Write(entry); @@ -1426,6 +1451,21 @@ CmdFIFOEntry CmdFIFORead() if (!CmdFIFO->IsEmpty()) CmdPIPE->Write(CmdFIFO->Read()); + // empty stall queue if needed + // CmdFIFO should not be full at this point. + if (!CmdStallQueue->IsEmpty()) + { + while (!CmdStallQueue->IsEmpty()) + { + if (CmdFIFO->IsFull()) break; + CmdFIFOEntry entry = CmdStallQueue->Read(); + CmdFIFOWrite(entry); + } + + if (CmdStallQueue->IsEmpty()) + NDS::GXFIFOUnstall(); + } + CheckFIFODMA(); CheckFIFOIRQ(); } @@ -1450,6 +1490,7 @@ void ExecuteCommand() for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]); printf("\n");*/ CycleCount += CmdNumCycles[entry.Command]; + ExecParamCount = 0; if (CycleCount > 0) @@ -1852,6 +1893,8 @@ void ExecuteCommand() break; case 0x40: // begin polygons + // TODO: check if there was a polygon being defined but incomplete + // such cases seem to freeze the GPU PolygonMode = ExecParams[0] & 0x3; VertexNum = 0; VertexNumInPoly = 0; @@ -1902,6 +1945,12 @@ void ExecuteCommand() } } +s32 CyclesToRunFor() +{ + if (CycleCount < 0) return 0; + return CycleCount; +} + void Run(s32 cycles) { if (FlushRequest) @@ -1924,6 +1973,8 @@ void Run(s32 cycles) if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { + // todo: advance remaining pipeline shit here + CycleCount = 0; GXStat &= ~(1<<27); diff --git a/src/GPU3D.h b/src/GPU3D.h index c997a8f..b74e421 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -90,6 +90,7 @@ void DoSavestate(Savestate* file); void ExecuteCommand(); +s32 CyclesToRunFor(); void Run(s32 cycles); void CheckFIFOIRQ(); void CheckFIFODMA(); diff --git a/src/NDS.cpp b/src/NDS.cpp index f4f2c36..9f19214 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -108,6 +108,7 @@ bool Running; void DivDone(u32 param); void SqrtDone(u32 param); +void RunTimer(u32 tid, s32 cycles); bool Init() @@ -608,12 +609,27 @@ u32 RunFrame() s32 ndscyclestorun; // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1 - // TODO: we need to directly change CurIterationCycles when rescheduling shit CalcIterationCycles(); if (CPUStop & 0x80000000) { // GXFIFO stall + // we just run the GPU and the timers. + // the rest of the hardware is driven by the event scheduler. + + s32 cycles = GPU3D::CyclesToRunFor(); + GPU3D::Run(cycles); + + u32 timermask = TimerCheckMask[0]; + if (timermask & 0x1) RunTimer(0, cycles); + if (timermask & 0x2) RunTimer(1, cycles); + if (timermask & 0x4) RunTimer(2, cycles); + if (timermask & 0x8) RunTimer(3, cycles); + timermask = TimerCheckMask[1]; + if (timermask & 0x1) RunTimer(4, cycles); + if (timermask & 0x2) RunTimer(5, cycles); + if (timermask & 0x4) RunTimer(6, cycles); + if (timermask & 0x8) RunTimer(7, cycles); } else { @@ -818,6 +834,27 @@ void ResumeCPU(u32 cpu, u32 mask) CPUStop &= ~mask; } +void GXFIFOStall() +{ + if (CPUStop & 0x80000000) return; + + CPUStop |= 0x80000000; + + if (CurCPU == 1) ARM9->Halt(2); + else + { + DMAs[0]->StallIfRunning(); + DMAs[1]->StallIfRunning(); + DMAs[2]->StallIfRunning(); + DMAs[3]->StallIfRunning(); + } +} + +void GXFIFOUnstall() +{ + CPUStop &= ~0x80000000; +} + u32 GetPC(u32 cpu) { return cpu ? ARM7->R[15] : ARM9->R[15]; @@ -148,6 +148,8 @@ void ClearIRQ(u32 cpu, u32 irq); bool HaltInterrupted(u32 cpu); void StopCPU(u32 cpu, u32 mask); void ResumeCPU(u32 cpu, u32 mask); +void GXFIFOStall(); +void GXFIFOUnstall(); u32 GetPC(u32 cpu); diff --git a/src/Savestate.h b/src/Savestate.h index 81541f9..e217bc1 100644 --- a/src/Savestate.h +++ b/src/Savestate.h @@ -23,7 +23,7 @@ #include "types.h" #define SAVESTATE_MAJOR 2 -#define SAVESTATE_MINOR 0 +#define SAVESTATE_MINOR 1 class Savestate { |