diff options
| author | StapleButter <thetotalworm@gmail.com> | 2018-11-23 22:21:41 +0100 | 
|---|---|---|
| committer | StapleButter <thetotalworm@gmail.com> | 2018-11-23 22:21:41 +0100 | 
| commit | a9e7f8bc5bb417de1e2a792c3de4d8f57be7b883 (patch) | |
| tree | 763a289d1cf509e16adb4f73338d8384f02c25ee /src | |
| parent | 27e1ca41031a0216d7d9a940b336a232217e6abf (diff) | |
add proper support for GXFIFO stalls.
bad games that blast the GXFIFO and overflow it:
* Super Mario 64 DS
* Rayman RR2
latter seems to get its music streaming crapoed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ARM.cpp | 1 | ||||
| -rw-r--r-- | src/DMA.cpp | 14 | ||||
| -rw-r--r-- | src/DMA.h | 8 | ||||
| -rw-r--r-- | src/GPU3D.cpp | 107 | ||||
| -rw-r--r-- | src/GPU3D.h | 1 | ||||
| -rw-r--r-- | src/NDS.cpp | 39 | ||||
| -rw-r--r-- | src/NDS.h | 2 | ||||
| -rw-r--r-- | src/Savestate.h | 2 | 
8 files changed, 140 insertions, 34 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 226b463..d16e193 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -217,6 +217,7 @@ void ARM::JumpTo(u32 addr, bool restorecpsr)      // aging cart debug crap      //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);      //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); +    // R0=DMA# R1=src R2=size      u32 oldregion = R[15] >> 23;      u32 newregion = addr >> 23; diff --git a/src/DMA.cpp b/src/DMA.cpp index e88814e..432e0f2 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -242,12 +242,14 @@ s32 DMA::Run(s32 cycles)      if (!Running)          return cycles; +    Executing = true; +      if (!(Cnt & 0x04000000))      {          u16 (*readfn)(u32) = CPU ? NDS::ARM7Read16 : NDS::ARM9Read16;          void (*writefn)(u32,u16) = CPU ? NDS::ARM7Write16 : NDS::ARM9Write16; -        while (IterCount > 0 && cycles > 0) +        while (IterCount > 0 && cycles > 0 && !Stall)          {              writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -264,7 +266,8 @@ s32 DMA::Run(s32 cycles)      else      {          // optimized path for typical GXFIFO DMA -        if (IsGXFIFODMA) +        // likely not worth it tbh +        /*if (IsGXFIFODMA)          {              while (IterCount > 0 && cycles > 0)              { @@ -278,12 +281,12 @@ s32 DMA::Run(s32 cycles)                  IterCount--;                  RemCount--;              } -        } +        }*/          u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;          void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; -        while (IterCount > 0 && cycles > 0) +        while (IterCount > 0 && cycles > 0 && !Stall)          {              writefn(CurDstAddr, readfn(CurSrcAddr)); @@ -298,6 +301,9 @@ s32 DMA::Run(s32 cycles)          }      } +    Executing = false; +    Stall = false; +      if (RemCount)      {          if (IterCount == 0) @@ -53,6 +53,11 @@ public:              Cnt &= ~0x80000000;      } +    void StallIfRunning() +    { +        if (Executing) Stall = true; +    } +      u32 SrcAddr;      u32 DstAddr;      u32 Cnt; @@ -74,6 +79,9 @@ private:      bool Running;      bool InProgress; +    bool Executing; +    bool Stall; +      bool IsGXFIFODMA;  }; diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 79863ef..0b16192 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -73,6 +73,13 @@  // TODO: check how DISP_1DOT_DEPTH works and whether it's latched +// command execution notes +// +// timings given by GBAtek are for individual commands +// real-life timings are different depending on how commands are combined +// the engine is able to do parallel execution to some extent + +  namespace GPU3D  { @@ -116,38 +123,38 @@ const u32 CmdNumParams[256] =  const s32 CmdNumCycles[256] =  {      // 0x00 -    0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x10      1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, -    0, 0, 0, +    1, 1, 1,      // 0x20 -    1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, -    0, 0, 0, 0, +    1, 9, 1, 9, 9, 9, 9, 9, 9, 1, 1, 1, +    1, 1, 1, 1,      // 0x30      4, 4, 6, 1, 32, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x40      1, 1, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x50      392, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x60      1, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x70      103, 9, 5, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,      // 0x80+ -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  };  typedef union @@ -164,6 +171,8 @@ typedef union  FIFO<CmdFIFOEntry>* CmdFIFO;  FIFO<CmdFIFOEntry>* CmdPIPE; +FIFO<CmdFIFOEntry>* CmdStallQueue; +  u32 NumCommands, CurCommand, ParamCount, TotalParams;  u32 DispCnt; @@ -276,6 +285,8 @@ bool Init()      CmdFIFO = new FIFO<CmdFIFOEntry>(256);      CmdPIPE = new FIFO<CmdFIFOEntry>(4); +    CmdStallQueue = new FIFO<CmdFIFOEntry>(64); +      if (!SoftRenderer::Init()) return false;      return true; @@ -287,6 +298,8 @@ void DeInit()      delete CmdFIFO;      delete CmdPIPE; + +    delete CmdStallQueue;  }  void Reset() @@ -294,6 +307,8 @@ void Reset()      CmdFIFO->Clear();      CmdPIPE->Clear(); +    CmdStallQueue->Clear(); +      NumCommands = 0;      CurCommand = 0;      ParamCount = 0; @@ -514,6 +529,20 @@ void DoSavestate(Savestate* file)      // probably not worth storing the vblank-latched Renderxxxxxx variables +    if (file->Saving || +        file->VersionMajor > 2 || +        (file->VersionMajor == 2 && file->VersionMinor >= 1)) +    { +        // command stall queue, only in version 2.1 and up +        CmdStallQueue->DoSavestate(file); +    } +    else +    { +        // for version 2.0, just clear it. not having it doesn't matter +        // if this comes from older melonDS revisions. +        CmdStallQueue->Clear(); +    } +      if (!file->Saving)      {          ClipMatrixDirty = true; @@ -1387,17 +1416,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)      {          if (CmdFIFO->IsFull())          { -            //printf("!!! GX FIFO FULL\n"); -            //return; +            // store it to the stall queue. stall the system. +            // worst case is if a STMxx opcode causes this, which is why our stall queue +            // has 64 entries. this is less complicated than trying to make STMxx stall-able. -            // temp. hack -            // SM64DS seems to overflow the FIFO occasionally -            // either leftover bugs in our implementation, or the game accidentally doing that -            // TODO: investigate. -            // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) - -            while (CmdFIFO->IsFull()) -                ExecuteCommand(); +            CmdStallQueue->Write(entry); +            NDS::GXFIFOStall(); +            return;          }          CmdFIFO->Write(entry); @@ -1426,6 +1451,21 @@ CmdFIFOEntry CmdFIFORead()          if (!CmdFIFO->IsEmpty())              CmdPIPE->Write(CmdFIFO->Read()); +        // empty stall queue if needed +        // CmdFIFO should not be full at this point. +        if (!CmdStallQueue->IsEmpty()) +        { +            while (!CmdStallQueue->IsEmpty()) +            { +                if (CmdFIFO->IsFull()) break; +                CmdFIFOEntry entry = CmdStallQueue->Read(); +                CmdFIFOWrite(entry); +            } + +            if (CmdStallQueue->IsEmpty()) +                NDS::GXFIFOUnstall(); +        } +          CheckFIFODMA();          CheckFIFOIRQ();      } @@ -1450,6 +1490,7 @@ void ExecuteCommand()          for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);          printf("\n");*/          CycleCount += CmdNumCycles[entry.Command]; +          ExecParamCount = 0;          if (CycleCount > 0) @@ -1852,6 +1893,8 @@ void ExecuteCommand()              break;          case 0x40: // begin polygons +            // TODO: check if there was a polygon being defined but incomplete +            // such cases seem to freeze the GPU              PolygonMode = ExecParams[0] & 0x3;              VertexNum = 0;              VertexNumInPoly = 0; @@ -1902,6 +1945,12 @@ void ExecuteCommand()      }  } +s32 CyclesToRunFor() +{ +    if (CycleCount < 0) return 0; +    return CycleCount; +} +  void Run(s32 cycles)  {      if (FlushRequest) @@ -1924,6 +1973,8 @@ void Run(s32 cycles)      if (CycleCount <= 0 && CmdPIPE->IsEmpty())      { +        // todo: advance remaining pipeline shit here +          CycleCount = 0;          GXStat &= ~(1<<27); diff --git a/src/GPU3D.h b/src/GPU3D.h index c997a8f..b74e421 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -90,6 +90,7 @@ void DoSavestate(Savestate* file);  void ExecuteCommand(); +s32 CyclesToRunFor();  void Run(s32 cycles);  void CheckFIFOIRQ();  void CheckFIFODMA(); diff --git a/src/NDS.cpp b/src/NDS.cpp index f4f2c36..9f19214 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -108,6 +108,7 @@ bool Running;  void DivDone(u32 param);  void SqrtDone(u32 param); +void RunTimer(u32 tid, s32 cycles);  bool Init() @@ -608,12 +609,27 @@ u32 RunFrame()          s32 ndscyclestorun;          // TODO: give it some margin, so it can directly do 17 cycles instead of 16 then 1 -        // TODO: we need to directly change CurIterationCycles when rescheduling shit          CalcIterationCycles();          if (CPUStop & 0x80000000)          {              // GXFIFO stall +            // we just run the GPU and the timers. +            // the rest of the hardware is driven by the event scheduler. + +            s32 cycles = GPU3D::CyclesToRunFor(); +            GPU3D::Run(cycles); + +            u32 timermask = TimerCheckMask[0]; +            if (timermask & 0x1) RunTimer(0, cycles); +            if (timermask & 0x2) RunTimer(1, cycles); +            if (timermask & 0x4) RunTimer(2, cycles); +            if (timermask & 0x8) RunTimer(3, cycles); +            timermask = TimerCheckMask[1]; +            if (timermask & 0x1) RunTimer(4, cycles); +            if (timermask & 0x2) RunTimer(5, cycles); +            if (timermask & 0x4) RunTimer(6, cycles); +            if (timermask & 0x8) RunTimer(7, cycles);          }          else          { @@ -818,6 +834,27 @@ void ResumeCPU(u32 cpu, u32 mask)      CPUStop &= ~mask;  } +void GXFIFOStall() +{ +    if (CPUStop & 0x80000000) return; + +    CPUStop |= 0x80000000; + +    if (CurCPU == 1) ARM9->Halt(2); +    else +    { +        DMAs[0]->StallIfRunning(); +        DMAs[1]->StallIfRunning(); +        DMAs[2]->StallIfRunning(); +        DMAs[3]->StallIfRunning(); +    } +} + +void GXFIFOUnstall() +{ +    CPUStop &= ~0x80000000; +} +  u32 GetPC(u32 cpu)  {      return cpu ? ARM7->R[15] : ARM9->R[15]; @@ -148,6 +148,8 @@ void ClearIRQ(u32 cpu, u32 irq);  bool HaltInterrupted(u32 cpu);  void StopCPU(u32 cpu, u32 mask);  void ResumeCPU(u32 cpu, u32 mask); +void GXFIFOStall(); +void GXFIFOUnstall();  u32 GetPC(u32 cpu); diff --git a/src/Savestate.h b/src/Savestate.h index 81541f9..e217bc1 100644 --- a/src/Savestate.h +++ b/src/Savestate.h @@ -23,7 +23,7 @@  #include "types.h"  #define SAVESTATE_MAJOR 2 -#define SAVESTATE_MINOR 0 +#define SAVESTATE_MINOR 1  class Savestate  {  |