diff options
author | Jesse Talavera <jesse@jesse.tg> | 2024-01-07 17:39:43 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-07 23:39:43 +0100 |
commit | 8143f549566029f366b774d0722c5f16011b9b56 (patch) | |
tree | e097c2bdd11f852a0886c1861c9c70c896a0d371 | |
parent | f68f55d002c412ead4e0ce88dd48b11d6f7900e3 (diff) |
Protect savestates while the threaded software renderer is running (#1864)
* First crack at ensuring the render thread doesn't touch GPU state while it's being serialized
* Get rid of the semaphore wait
* Add some extra fields into GPU3D's serialization
* Oops, TempVertexBuffer is already serialized
* Move vertex serialization into its own method
* Lock the GPU3D state when rendering on the render thread or serializing it
* Revert "Lock the GPU3D state when rendering on the render thread or serializing it"
This reverts commit 2f49a551c13934b9dc815bbda67a45098f0482a7.
* Add comments that describe the synchronization within GPU3D_Soft
- I need to understand it before I can solve my actual problem
- Now I do
* Revert "Revert "Lock the GPU3D state when rendering on the render thread or serializing it""
This reverts commit 1977566a6d8671d72bd94ba4ebf832c3bf08933a.
* Let's try locking the GPU3D state throughout NDS::RunFrame
- Just to see what happens
* Slim down the lock's scope
* Narrow the lock's scope some more
* Remove the lock entirely
* Try protecting the GPU3D state with just a mutex
- I'll clean this up once I know it works
* Remove a duplicate method definition
* Add a missing `noexcept` specifier
* Remove an unused function
* Cut some non-hardware state from `GPU3D`'s savestate
* Assume that the next frame after loading a savestate won't be identical
* Actually, it _is_ worth it
* Don't serialize the clip matrix
- It's recalculated anyway
* Serialize `RenderPolygonRAM` as an array of indexes
* Clean up some comments
- I liked the dialogue style, but oh well
* Try restarting the render thread instead of using the lock
- Let's see what happens
* Put the lock back
* Fix some polygon and vertex indexes being saved incorrectly
- Taking the difference between two pointers results in the number of elements, not the number of bytes
* Remove `SoftRenderer::StateBusy` since it turns out we don't need it
- The real synchronization was the friends we made along the way
-rw-r--r-- | src/GPU3D.cpp | 107 | ||||
-rw-r--r-- | src/GPU3D.h | 6 | ||||
-rw-r--r-- | src/GPU3D_Soft.cpp | 53 | ||||
-rw-r--r-- | src/GPU3D_Soft.h | 9 | ||||
-rw-r--r-- | src/Savestate.h | 2 |
5 files changed, 126 insertions, 51 deletions
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 4787702..47abae2 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -146,6 +146,19 @@ GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr<Renderer3D>&& renderer) noexcept { } +void Vertex::DoSavestate(Savestate* file) noexcept +{ + file->VarArray(Position, sizeof(Position)); + file->VarArray(Color, sizeof(Color)); + file->VarArray(TexCoords, sizeof(TexCoords)); + + file->Bool32(&Clipped); + + file->VarArray(FinalPosition, sizeof(FinalPosition)); + file->VarArray(FinalColor, sizeof(FinalColor)); + file->VarArray(HiresPosition, sizeof(HiresPosition)); +} + void GPU3D::SetCurrentRenderer(std::unique_ptr<Renderer3D>&& renderer) noexcept { CurrentRenderer = std::move(renderer); @@ -297,6 +310,12 @@ void GPU3D::DoSavestate(Savestate* file) noexcept { file->Section("GP3D"); + SoftRenderer* softRenderer = dynamic_cast<SoftRenderer*>(CurrentRenderer.get()); + if (softRenderer && softRenderer->IsThreaded()) + { + softRenderer->SetupRenderThread(NDS.GPU); + } + CmdFIFO.DoSavestate(file); CmdPIPE.DoSavestate(file); @@ -372,33 +391,21 @@ void GPU3D::DoSavestate(Savestate* file) noexcept file->Var32(&VertexNumInPoly); file->Var32(&NumConsecutivePolygons); - for (int i = 0; i < 4; i++) + for (Vertex& vtx : TempVertexBuffer) { - Vertex* vtx = &TempVertexBuffer[i]; - - file->VarArray(vtx->Position, sizeof(s32)*4); - file->VarArray(vtx->Color, sizeof(s32)*3); - file->VarArray(vtx->TexCoords, sizeof(s16)*2); - - file->Bool32(&vtx->Clipped); - - file->VarArray(vtx->FinalPosition, sizeof(s32)*2); - file->VarArray(vtx->FinalColor, sizeof(s32)*3); + vtx.DoSavestate(file); } if (file->Saving) { - u32 id; - if (LastStripPolygon) id = (u32)((LastStripPolygon - (&PolygonRAM[0])) / sizeof(Polygon)); - else id = -1; - file->Var32(&id); + u32 index = LastStripPolygon ? (u32)(LastStripPolygon - &PolygonRAM[0]) : UINT32_MAX; + file->Var32(&index); } else { - u32 id; - file->Var32(&id); - if (id == 0xFFFFFFFF) LastStripPolygon = NULL; - else LastStripPolygon = &PolygonRAM[id]; + u32 index = UINT32_MAX; + file->Var32(&index); + LastStripPolygon = (index == UINT32_MAX) ? nullptr : &PolygonRAM[index]; } file->Var32(&CurRAMBank); @@ -409,18 +416,9 @@ void GPU3D::DoSavestate(Savestate* file) noexcept file->Var32(&FlushRequest); file->Var32(&FlushAttributes); - for (int i = 0; i < 6144*2; i++) + for (Vertex& vtx : VertexRAM) { - Vertex* vtx = &VertexRAM[i]; - - file->VarArray(vtx->Position, sizeof(s32)*4); - file->VarArray(vtx->Color, sizeof(s32)*3); - file->VarArray(vtx->TexCoords, sizeof(s16)*2); - - file->Bool32(&vtx->Clipped); - - file->VarArray(vtx->FinalPosition, sizeof(s32)*2); - file->VarArray(vtx->FinalColor, sizeof(s32)*3); + vtx.DoSavestate(file); } for(int i = 0; i < 2048*2; i++) @@ -434,20 +432,17 @@ void GPU3D::DoSavestate(Savestate* file) noexcept for (int j = 0; j < 10; j++) { Vertex* ptr = poly->Vertices[j]; - u32 id; - if (ptr) id = (u32)((ptr - (&VertexRAM[0])) / sizeof(Vertex)); - else id = -1; - file->Var32(&id); + u32 index = ptr ? (u32)(ptr - &VertexRAM[0]) : UINT32_MAX; + file->Var32(&index); } } else { for (int j = 0; j < 10; j++) { - u32 id = -1; - file->Var32(&id); - if (id == 0xFFFFFFFF) poly->Vertices[j] = NULL; - else poly->Vertices[j] = &VertexRAM[id]; + u32 index = UINT32_MAX; + file->Var32(&index); + poly->Vertices[j] = index == UINT32_MAX ? nullptr : &VertexRAM[index]; } } @@ -495,7 +490,6 @@ void GPU3D::DoSavestate(Savestate* file) noexcept } } - // probably not worth storing the vblank-latched Renderxxxxxx variables CmdStallQueue.DoSavestate(file); file->Var32((u32*)&VertexPipeline); @@ -511,10 +505,27 @@ void GPU3D::DoSavestate(Savestate* file) noexcept CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; + } + + file->Var32(&RenderNumPolygons); + if (file->Saving) + { + for (const Polygon* p : RenderPolygonRAM) + { + u32 index = p ? (p - &PolygonRAM[0]) : UINT32_MAX; + + file->Var32(&index); + } + } + else + { + for (int i = 0; i < RenderPolygonRAM.size(); ++i) + { + u32 index = UINT32_MAX; + file->Var32(&index); - // better safe than sorry, I guess - // might cause a blank frame but atleast it won't shit itself - RenderNumPolygons = 0; + RenderPolygonRAM[i] = index == UINT32_MAX ? nullptr : &PolygonRAM[index]; + } } file->VarArray(CurVertex, sizeof(s16)*3); @@ -534,6 +545,18 @@ void GPU3D::DoSavestate(Savestate* file) noexcept file->VarArray(ShininessTable, 128*sizeof(u8)); file->Bool32(&AbortFrame); + file->Bool32(&GeometryEnabled); + file->Bool32(&RenderingEnabled); + file->Var32(&PolygonMode); + file->Var32(&PolygonAttr); + file->Var32(&CurPolygonAttr); + file->Var32(&TexParam); + file->Var32(&TexPalette); + RenderFrameIdentical = false; + if (softRenderer && softRenderer->IsThreaded()) + { + softRenderer->EnableRenderThread(); + } } diff --git a/src/GPU3D.h b/src/GPU3D.h index 7c42eeb..4a5fe6e 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -47,6 +47,7 @@ struct Vertex // TODO maybe: hi-res color? (that survives clipping) s32 HiresPosition[2]; + void DoSavestate(Savestate* file) noexcept; }; struct Polygon @@ -78,6 +79,7 @@ struct Polygon u32 SortKey; + void DoSavestate(Savestate* file) noexcept; }; class Renderer3D; @@ -269,7 +271,7 @@ public: u32 RenderClearAttr1 = 0; u32 RenderClearAttr2 = 0; - bool RenderFrameIdentical = false; + bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize bool AbortFrame = false; @@ -323,7 +325,7 @@ public: u32 FlushRequest = 0; u32 FlushAttributes = 0; - u32 ScrolledLine[256]; + u32 ScrolledLine[256]; // not part of the hardware state, don't serialize }; class Renderer3D diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index c96464a..74027d5 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -34,8 +34,11 @@ void SoftRenderer::StopRenderThread() { if (RenderThreadRunning.load(std::memory_order_relaxed)) { + // Tell the render thread to stop drawing new frames, and finish up the current one. RenderThreadRunning = false; + Platform::Semaphore_Post(Sema_RenderStart); + Platform::Thread_Wait(RenderThread); Platform::Thread_Free(RenderThread); RenderThread = nullptr; @@ -47,24 +50,36 @@ void SoftRenderer::SetupRenderThread(GPU& gpu) if (Threaded) { if (!RenderThreadRunning.load(std::memory_order_relaxed)) - { - RenderThreadRunning = true; + { // If the render thread isn't already running... + RenderThreadRunning = true; // "Time for work, render thread!" RenderThread = Platform::Thread_Create([this, &gpu]() { RenderThreadFunc(gpu); }); } - // otherwise more than one frame can be queued up at once + // "Be on standby, but don't start rendering until I tell you to!" Platform::Semaphore_Reset(Sema_RenderStart); + // "Oh, sorry, were you already in the middle of a frame from the last iteration?" if (RenderThreadRendering) + // "Tell me when you're done, I'll wait here." Platform::Semaphore_Wait(Sema_RenderDone); + // "All good? Okay, let me give you your training." + // "(Maybe you're still the same thread, but I have to tell you this stuff anyway.)" + + // "This is the signal you'll send when you're done with a frame." + // "I'll listen for it when I need to show something to the frontend." Platform::Semaphore_Reset(Sema_RenderDone); + + // "This is the signal I'll send when I want you to start rendering." + // "Don't do anything until you get the message." Platform::Semaphore_Reset(Sema_RenderStart); - Platform::Semaphore_Reset(Sema_ScanlineCount); - Platform::Semaphore_Post(Sema_RenderStart); + // "This is the signal you'll send every time you finish drawing a line." + // "I might need some of your scanlines before you finish the whole buffer," + // "so let me know as soon as you're done with each one." + Platform::Semaphore_Reset(Sema_ScanlineCount); } else { @@ -72,6 +87,13 @@ void SoftRenderer::SetupRenderThread(GPU& gpu) } } +void SoftRenderer::EnableRenderThread() +{ + if (Threaded && Sema_RenderStart) + { + Platform::Semaphore_Post(Sema_RenderStart); + } +} SoftRenderer::SoftRenderer(bool threaded) noexcept : Renderer3D(false), Threaded(threaded) @@ -103,6 +125,7 @@ void SoftRenderer::Reset(GPU& gpu) PrevIsShadowMask = false; SetupRenderThread(gpu); + EnableRenderThread(); } void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept @@ -111,6 +134,7 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept { Threaded = threaded; SetupRenderThread(gpu); + EnableRenderThread(); } } @@ -1692,12 +1716,14 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg ScanlineFinalPass(gpu.GPU3D, y-1); if (threaded) + // Notify the main thread that we're done with a scanline. Platform::Semaphore_Post(Sema_ScanlineCount); } ScanlineFinalPass(gpu.GPU3D, 191); if (threaded) + // If this renderer is threaded, notify the main thread that we're done with the frame. Platform::Semaphore_Post(Sema_ScanlineCount); } @@ -1719,6 +1745,7 @@ void SoftRenderer::RenderFrame(GPU& gpu) if (RenderThreadRunning.load(std::memory_order_relaxed)) { + // "Render thread, you're up! Get moving." Platform::Semaphore_Post(Sema_RenderStart); } else if (!FrameIdentical) @@ -1731,18 +1758,26 @@ void SoftRenderer::RenderFrame(GPU& gpu) void SoftRenderer::RestartFrame(GPU& gpu) { SetupRenderThread(gpu); + EnableRenderThread(); } void SoftRenderer::RenderThreadFunc(GPU& gpu) { for (;;) { + // Wait for a notice from the main thread to start rendering (or to stop entirely). Platform::Semaphore_Wait(Sema_RenderStart); if (!RenderThreadRunning) return; + // Protect the GPU state from the main thread. + // Some melonDS frontends (though not ours) + // will repeatedly save or load states; + // if they do so while the render thread is busy here, + // the ensuing race conditions may cause a crash + // (since some of the GPU state includes pointers). RenderThreadRendering = true; if (FrameIdentical) - { + { // If no rendering is needed, just say we're done. Platform::Semaphore_Post(Sema_ScanlineCount, 192); } else @@ -1751,7 +1786,10 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu) RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); } + // Tell the main thread that we're done rendering + // and that it's safe to access the GPU state again. Platform::Semaphore_Post(Sema_RenderDone); + RenderThreadRendering = false; } } @@ -1761,6 +1799,9 @@ u32* SoftRenderer::GetLine(int line) if (RenderThreadRunning.load(std::memory_order_relaxed)) { if (line < 192) + // We need a scanline, so let's wait for the render thread to finish it. + // (both threads process scanlines from top-to-bottom, + // so we don't need to wait for a specific row) Platform::Semaphore_Wait(Sema_ScanlineCount); } diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index de65944..9cfdf9a 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -42,8 +42,10 @@ public: u32* GetLine(int line) override; void SetupRenderThread(GPU& gpu); + void EnableRenderThread(); void StopRenderThread(); private: + friend void GPU3D::DoSavestate(Savestate* file) noexcept; // Notes on the interpolator: // // This is a theory on how the DS hardware interpolates values. It matches hardware output @@ -506,8 +508,15 @@ private: Platform::Thread* RenderThread; std::atomic_bool RenderThreadRunning; std::atomic_bool RenderThreadRendering; + + // Used by the main thread to tell the render thread to start rendering a frame Platform::Semaphore* Sema_RenderStart; + + // Used by the render thread to tell the main thread that it's done rendering a frame Platform::Semaphore* Sema_RenderDone; + + // Used to allow the main thread to read some scanlines + // before (the 3D portion of) the entire frame is rasterized. Platform::Semaphore* Sema_ScanlineCount; }; } diff --git a/src/Savestate.h b/src/Savestate.h index 236aa64..2e1400a 100644 --- a/src/Savestate.h +++ b/src/Savestate.h @@ -24,7 +24,7 @@ #include <stdio.h> #include "types.h" -#define SAVESTATE_MAJOR 11 +#define SAVESTATE_MAJOR 12 #define SAVESTATE_MINOR 1 namespace melonDS |