aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStapleButter <thetotalworm@gmail.com>2018-11-23 22:21:41 +0100
committerStapleButter <thetotalworm@gmail.com>2018-11-23 22:21:41 +0100
commita9e7f8bc5bb417de1e2a792c3de4d8f57be7b883 (patch)
tree763a289d1cf509e16adb4f73338d8384f02c25ee /src
parent27e1ca41031a0216d7d9a940b336a232217e6abf (diff)
add proper support for GXFIFO stalls.
bad games that blast the GXFIFO and overflow it: * Super Mario 64 DS * Rayman RR2 latter seems to get its music streaming crapoed.
Diffstat (limited to 'src')
-rw-r--r--src/ARM.cpp1
-rw-r--r--src/DMA.cpp14
-rw-r--r--src/DMA.h8
-rw-r--r--src/GPU3D.cpp107
-rw-r--r--src/GPU3D.h1
-rw-r--r--src/NDS.cpp39
-rw-r--r--src/NDS.h2
-rw-r--r--src/Savestate.h2
8 files changed, 140 insertions, 34 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index 226b463..d16e193 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -217,6 +217,7 @@ void ARM::JumpTo(u32 addr, bool restorecpsr)
// aging cart debug crap
//if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);
//if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]);
+ // R0=DMA# R1=src R2=size
u32 oldregion = R[15] >> 23;
u32 newregion = addr >> 23;
diff --git a/src/DMA.cpp b/src/DMA.cpp
index e88814e..432e0f2 100644
--- a/src/DMA.cpp
+++ b/src/DMA.cpp
@@ -242,12 +242,14 @@ s32 DMA::Run(s32 cycles)
if (!Running)
return cycles;
+ Executing = true;
+
if (!(Cnt & 0x04000000))
{
u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;
void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16;
- while (IterCount > 0 && cycles > 0)
+ while (IterCount > 0 && cycles > 0 && !Stall)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
@@ -264,7 +266,8 @@ s32 DMA::Run(s32 cycles)
else
{
// optimized path for typical GXFIFO DMA
- if (IsGXFIFODMA)
+ // likely not worth it tbh
+ /*if (IsGXFIFODMA)
{
while (IterCount > 0 && cycles > 0)
{
@@ -278,12 +281,12 @@ s32 DMA::Run(s32 cycles)
IterCount--;
RemCount--;
}
- }
+ }*/
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
- while (IterCount > 0 && cycles > 0)
+ while (IterCount > 0 && cycles > 0 && !Stall)
{
writefn(CurDstAddr, readfn(CurSrcAddr));
@@ -298,6 +301,9 @@ s32 DMA::Run(s32 cycles)
}
}
+ Executing = false;
+ Stall = false;
+
if (RemCount)
{
if (IterCount == 0)
diff --git a/src/DMA.h b/src/DMA.h
index 6157431..bc72899 100644
--- a/src/DMA.h
+++ b/src/DMA.h
@@ -53,6 +53,11 @@ public:
Cnt &= ~0x80000000;
}
+ void StallIfRunning()
+ {
+ if (Executing) Stall = true;
+ }
+
u32 SrcAddr;
u32 DstAddr;
u32 Cnt;
@@ -74,6 +79,9 @@ private:
bool Running;
bool InProgress;
+ bool Executing;
+ bool Stall;
+
bool IsGXFIFODMA;
};
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 79863ef..0b16192 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -73,6 +73,13 @@
// TODO: check how DISP_1DOT_DEPTH works and whether it's latched
+// command execution notes
+//
+// timings given by GBAtek are for individual commands
+// real-life timings are different depending on how commands are combined
+// the engine is able to do parallel execution to some extent
+
+
namespace GPU3D
{
@@ -116,38 +123,38 @@ const u32 CmdNumParams[256] =
const s32 CmdNumCycles[256] =
{
// 0x00
- 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x10
1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22,
- 0, 0, 0,
+ 1, 1, 1,
// 0x20
- 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1,
- 0, 0, 0, 0,
+ 1, 9, 1, 9, 9, 9, 9, 9, 9, 1, 1, 1,
+ 1, 1, 1, 1,
// 0x30
4, 4, 6, 1, 32,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x40
1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x50
392,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x60
1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x70
103, 9, 5,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
// 0x80+
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
typedef union
@@ -164,6 +171,8 @@ typedef union
FIFO<CmdFIFOEntry>* CmdFIFO;
FIFO<CmdFIFOEntry>* CmdPIPE;
+FIFO<CmdFIFOEntry>* CmdStallQueue;
+
u32 NumCommands, CurCommand, ParamCount, TotalParams;
u32 DispCnt;
@@ -276,6 +285,8 @@ bool Init()
CmdFIFO = new FIFO<CmdFIFOEntry>(256);
CmdPIPE = new FIFO<CmdFIFOEntry>(4);
+ CmdStallQueue = new FIFO<CmdFIFOEntry>(64);
+
if (!SoftRenderer::Init()) return false;
return true;
@@ -287,6 +298,8 @@ void DeInit()
delete CmdFIFO;
delete CmdPIPE;
+
+ delete CmdStallQueue;
}
void Reset()
@@ -294,6 +307,8 @@ void Reset()
CmdFIFO->Clear();
CmdPIPE->Clear();
+ CmdStallQueue->Clear();
+
NumCommands = 0;
CurCommand = 0;
ParamCount = 0;
@@ -514,6 +529,20 @@ void DoSavestate(Savestate* file)
// probably not worth storing the vblank-latched Renderxxxxxx variables
+ if (file->Saving ||
+ file->VersionMajor > 2 ||
+ (file->VersionMajor == 2 && file->VersionMinor >= 1))
+ {
+ // command stall queue, only in version 2.1 and up
+ CmdStallQueue->DoSavestate(file);
+ }
+ else
+ {
+ // for version 2.0, just clear it. not having it doesn't matter
+ // if this comes from older melonDS revisions.
+ CmdStallQueue->Clear();
+ }
+
if (!file->Saving)
{
ClipMatrixDirty = true;
@@ -1387,17 +1416,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)
{
if (CmdFIFO->IsFull())
{
- //printf("!!! GX FIFO FULL\n");
- //return;
+ // store it to the stall queue. stall the system.
+ // worst case is if a STMxx opcode causes this, which is why our stall queue
+ // has 64 entries. this is less complicated than trying to make STMxx stall-able.
- // temp. hack
- // SM64DS seems to overflow the FIFO occasionally
- // either leftover bugs in our implementation, or the game accidentally doing that
- // TODO: investigate.
- // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore)
-
- while (CmdFIFO->IsFull())
- ExecuteCommand();
+ CmdStallQueue->Write(entry);
+ NDS::GXFIFOStall();
+ return;
}
CmdFIFO->Write(entry);
@@ -1426,6 +1451,21 @@ CmdFIFOEntry CmdFIFORead()
if (!CmdFIFO->IsEmpty())
CmdPIPE->Write(CmdFIFO->Read());
+ // empty stall queue if needed
+ // CmdFIFO should not be full at this point.
+ if (!CmdStallQueue->IsEmpty())
+ {
+ while (!CmdStallQueue->IsEmpty())
+ {
+ if (CmdFIFO->IsFull()) break;
+ CmdFIFOEntry entry = CmdStallQueue->Read();
+ CmdFIFOWrite(entry);
+ }
+
+ if (CmdStallQueue->IsEmpty())
+ NDS::GXFIFOUnstall();
+ }
+
CheckFIFODMA();
CheckFIFOIRQ();
}
@@ -1450,6 +1490,7 @@ void ExecuteCommand()
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/
CycleCount += CmdNumCycles[entry.Command];
+
ExecParamCount = 0;
if (CycleCount > 0)
@@ -1852,6 +1893,8 @@ void ExecuteCommand()
break;
case 0x40: // begin polygons
+ // TODO: check if there was a polygon being defined but incomplete
+ // such cases seem to freeze the GPU
PolygonMode = ExecParams[0] & 0x3;
VertexNum = 0;
VertexNumInPoly = 0;
@@ -1902,6 +1945,12 @@ void ExecuteCommand()
}
}
+s32 CyclesToRunFor()
+{
+ if (CycleCount < 0) return 0;
+ return CycleCount;
+}
+
void Run(s32 cycles)
{
if (FlushRequest)
@@ -1924,6 +1973,8 @@ void Run(s32 cycles)
if (CycleCount <= 0 && CmdPIPE->IsEmpty())
{
+ // todo: advance remaining pipeline shit here
+
CycleCount = 0;
GXStat &= ~(1<<27);
diff --git a/src/GPU3D.h b/src/GPU3D.h
index c997a8f..b74e421 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -90,6 +90,7 @@ void DoSavestate(Savestate* file);
void ExecuteCommand();
+s32 CyclesToRunFor();
void Run(s32 cycles);
void CheckFIFOIRQ();
void CheckFIFODMA();
diff --git a/src/NDS.cpp b/src/NDS.cpp
index f4f2c36..9f19214 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -108,6 +108,7 @@ bool Running;
void DivDone(u32 param);
void SqrtDone(u32 param);
+void RunTimer(u32 tid, s32 cycles);
bool Init()
@@ -608,12 +609,27 @@ u32 RunFrame()
s32 ndscyclestorun;
// TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1
- // TODO: we need to directly change CurIterationCycles when rescheduling shit
CalcIterationCycles();
if (CPUStop & 0x80000000)
{
// GXFIFO stall
+ // we just run the GPU and the timers.
+ // the rest of the hardware is driven by the event scheduler.
+
+ s32 cycles = GPU3D::CyclesToRunFor();
+ GPU3D::Run(cycles);
+
+ u32 timermask = TimerCheckMask[0];
+ if (timermask & 0x1) RunTimer(0, cycles);
+ if (timermask & 0x2) RunTimer(1, cycles);
+ if (timermask & 0x4) RunTimer(2, cycles);
+ if (timermask & 0x8) RunTimer(3, cycles);
+ timermask = TimerCheckMask[1];
+ if (timermask & 0x1) RunTimer(4, cycles);
+ if (timermask & 0x2) RunTimer(5, cycles);
+ if (timermask & 0x4) RunTimer(6, cycles);
+ if (timermask & 0x8) RunTimer(7, cycles);
}
else
{
@@ -818,6 +834,27 @@ void ResumeCPU(u32 cpu, u32 mask)
CPUStop &= ~mask;
}
+void GXFIFOStall()
+{
+ if (CPUStop & 0x80000000) return;
+
+ CPUStop |= 0x80000000;
+
+ if (CurCPU == 1) ARM9->Halt(2);
+ else
+ {
+ DMAs[0]->StallIfRunning();
+ DMAs[1]->StallIfRunning();
+ DMAs[2]->StallIfRunning();
+ DMAs[3]->StallIfRunning();
+ }
+}
+
+void GXFIFOUnstall()
+{
+ CPUStop &= ~0x80000000;
+}
+
u32 GetPC(u32 cpu)
{
return cpu ? ARM7->R[15] : ARM9->R[15];
diff --git a/src/NDS.h b/src/NDS.h
index c7913e9..e25b9e4 100644
--- a/src/NDS.h
+++ b/src/NDS.h
@@ -148,6 +148,8 @@ void ClearIRQ(u32 cpu, u32 irq);
bool HaltInterrupted(u32 cpu);
void StopCPU(u32 cpu, u32 mask);
void ResumeCPU(u32 cpu, u32 mask);
+void GXFIFOStall();
+void GXFIFOUnstall();
u32 GetPC(u32 cpu);
diff --git a/src/Savestate.h b/src/Savestate.h
index 81541f9..e217bc1 100644
--- a/src/Savestate.h
+++ b/src/Savestate.h
@@ -23,7 +23,7 @@
#include "types.h"
#define SAVESTATE_MAJOR 2
-#define SAVESTATE_MINOR 0
+#define SAVESTATE_MINOR 1
class Savestate
{