aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Talavera <jesse@jesse.tg>2024-01-07 17:39:43 -0500
committerGitHub <noreply@github.com>2024-01-07 23:39:43 +0100
commit8143f549566029f366b774d0722c5f16011b9b56 (patch)
treee097c2bdd11f852a0886c1861c9c70c896a0d371
parentf68f55d002c412ead4e0ce88dd48b11d6f7900e3 (diff)
Protect savestates while the threaded software renderer is running (#1864)
* First crack at ensuring the render thread doesn't touch GPU state while it's being serialized * Get rid of the semaphore wait * Add some extra fields into GPU3D's serialization * Oops, TempVertexBuffer is already serialized * Move vertex serialization into its own method * Lock the GPU3D state when rendering on the render thread or serializing it * Revert "Lock the GPU3D state when rendering on the render thread or serializing it" This reverts commit 2f49a551c13934b9dc815bbda67a45098f0482a7. * Add comments that describe the synchronization within GPU3D_Soft - I need to understand it before I can solve my actual problem - Now I do * Revert "Revert "Lock the GPU3D state when rendering on the render thread or serializing it"" This reverts commit 1977566a6d8671d72bd94ba4ebf832c3bf08933a. * Let's try locking the GPU3D state throughout NDS::RunFrame - Just to see what happens * Slim down the lock's scope * Narrow the lock's scope some more * Remove the lock entirely * Try protecting the GPU3D state with just a mutex - I'll clean this up once I know it works * Remove a duplicate method definition * Add a missing `noexcept` specifier * Remove an unused function * Cut some non-hardware state from `GPU3D`'s savestate * Assume that the next frame after loading a savestate won't be identical * Actually, it _is_ worth it * Don't serialize the clip matrix - It's recalculated anyway * Serialize `RenderPolygonRAM` as an array of indexes * Clean up some comments - I liked the dialogue style, but oh well * Try restarting the render thread instead of using the lock - Let's see what happens * Put the lock back * Fix some polygon and vertex indexes being saved incorrectly - Taking the difference between two pointers results in the number of elements, not the number of bytes * Remove `SoftRenderer::StateBusy` since it turns out we don't need it - The real synchronization was the friends we made along the way
-rw-r--r--src/GPU3D.cpp107
-rw-r--r--src/GPU3D.h6
-rw-r--r--src/GPU3D_Soft.cpp53
-rw-r--r--src/GPU3D_Soft.h9
-rw-r--r--src/Savestate.h2
5 files changed, 126 insertions, 51 deletions
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 4787702..47abae2 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -146,6 +146,19 @@ GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr<Renderer3D>&& renderer) noexcept
{
}
+void Vertex::DoSavestate(Savestate* file) noexcept
+{
+ file->VarArray(Position, sizeof(Position));
+ file->VarArray(Color, sizeof(Color));
+ file->VarArray(TexCoords, sizeof(TexCoords));
+
+ file->Bool32(&Clipped);
+
+ file->VarArray(FinalPosition, sizeof(FinalPosition));
+ file->VarArray(FinalColor, sizeof(FinalColor));
+ file->VarArray(HiresPosition, sizeof(HiresPosition));
+}
+
void GPU3D::SetCurrentRenderer(std::unique_ptr<Renderer3D>&& renderer) noexcept
{
CurrentRenderer = std::move(renderer);
@@ -297,6 +310,12 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
{
file->Section("GP3D");
+ SoftRenderer* softRenderer = dynamic_cast<SoftRenderer*>(CurrentRenderer.get());
+ if (softRenderer && softRenderer->IsThreaded())
+ {
+ softRenderer->SetupRenderThread(NDS.GPU);
+ }
+
CmdFIFO.DoSavestate(file);
CmdPIPE.DoSavestate(file);
@@ -372,33 +391,21 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&VertexNumInPoly);
file->Var32(&NumConsecutivePolygons);
- for (int i = 0; i < 4; i++)
+ for (Vertex& vtx : TempVertexBuffer)
{
- Vertex* vtx = &TempVertexBuffer[i];
-
- file->VarArray(vtx->Position, sizeof(s32)*4);
- file->VarArray(vtx->Color, sizeof(s32)*3);
- file->VarArray(vtx->TexCoords, sizeof(s16)*2);
-
- file->Bool32(&vtx->Clipped);
-
- file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
- file->VarArray(vtx->FinalColor, sizeof(s32)*3);
+ vtx.DoSavestate(file);
}
if (file->Saving)
{
- u32 id;
- if (LastStripPolygon) id = (u32)((LastStripPolygon - (&PolygonRAM[0])) / sizeof(Polygon));
- else id = -1;
- file->Var32(&id);
+ u32 index = LastStripPolygon ? (u32)(LastStripPolygon - &PolygonRAM[0]) : UINT32_MAX;
+ file->Var32(&index);
}
else
{
- u32 id;
- file->Var32(&id);
- if (id == 0xFFFFFFFF) LastStripPolygon = NULL;
- else LastStripPolygon = &PolygonRAM[id];
+ u32 index = UINT32_MAX;
+ file->Var32(&index);
+ LastStripPolygon = (index == UINT32_MAX) ? nullptr : &PolygonRAM[index];
}
file->Var32(&CurRAMBank);
@@ -409,18 +416,9 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&FlushRequest);
file->Var32(&FlushAttributes);
- for (int i = 0; i < 6144*2; i++)
+ for (Vertex& vtx : VertexRAM)
{
- Vertex* vtx = &VertexRAM[i];
-
- file->VarArray(vtx->Position, sizeof(s32)*4);
- file->VarArray(vtx->Color, sizeof(s32)*3);
- file->VarArray(vtx->TexCoords, sizeof(s16)*2);
-
- file->Bool32(&vtx->Clipped);
-
- file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
- file->VarArray(vtx->FinalColor, sizeof(s32)*3);
+ vtx.DoSavestate(file);
}
for(int i = 0; i < 2048*2; i++)
@@ -434,20 +432,17 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
for (int j = 0; j < 10; j++)
{
Vertex* ptr = poly->Vertices[j];
- u32 id;
- if (ptr) id = (u32)((ptr - (&VertexRAM[0])) / sizeof(Vertex));
- else id = -1;
- file->Var32(&id);
+ u32 index = ptr ? (u32)(ptr - &VertexRAM[0]) : UINT32_MAX;
+ file->Var32(&index);
}
}
else
{
for (int j = 0; j < 10; j++)
{
- u32 id = -1;
- file->Var32(&id);
- if (id == 0xFFFFFFFF) poly->Vertices[j] = NULL;
- else poly->Vertices[j] = &VertexRAM[id];
+ u32 index = UINT32_MAX;
+ file->Var32(&index);
+ poly->Vertices[j] = index == UINT32_MAX ? nullptr : &VertexRAM[index];
}
}
@@ -495,7 +490,6 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
}
}
- // probably not worth storing the vblank-latched Renderxxxxxx variables
CmdStallQueue.DoSavestate(file);
file->Var32((u32*)&VertexPipeline);
@@ -511,10 +505,27 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0];
CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0];
+ }
+
+ file->Var32(&RenderNumPolygons);
+ if (file->Saving)
+ {
+ for (const Polygon* p : RenderPolygonRAM)
+ {
+ u32 index = p ? (p - &PolygonRAM[0]) : UINT32_MAX;
+
+ file->Var32(&index);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < RenderPolygonRAM.size(); ++i)
+ {
+ u32 index = UINT32_MAX;
+ file->Var32(&index);
- // better safe than sorry, I guess
- // might cause a blank frame but atleast it won't shit itself
- RenderNumPolygons = 0;
+ RenderPolygonRAM[i] = index == UINT32_MAX ? nullptr : &PolygonRAM[index];
+ }
}
file->VarArray(CurVertex, sizeof(s16)*3);
@@ -534,6 +545,18 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->VarArray(ShininessTable, 128*sizeof(u8));
file->Bool32(&AbortFrame);
+ file->Bool32(&GeometryEnabled);
+ file->Bool32(&RenderingEnabled);
+ file->Var32(&PolygonMode);
+ file->Var32(&PolygonAttr);
+ file->Var32(&CurPolygonAttr);
+ file->Var32(&TexParam);
+ file->Var32(&TexPalette);
+ RenderFrameIdentical = false;
+ if (softRenderer && softRenderer->IsThreaded())
+ {
+ softRenderer->EnableRenderThread();
+ }
}
diff --git a/src/GPU3D.h b/src/GPU3D.h
index 7c42eeb..4a5fe6e 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -47,6 +47,7 @@ struct Vertex
// TODO maybe: hi-res color? (that survives clipping)
s32 HiresPosition[2];
+ void DoSavestate(Savestate* file) noexcept;
};
struct Polygon
@@ -78,6 +79,7 @@ struct Polygon
u32 SortKey;
+ void DoSavestate(Savestate* file) noexcept;
};
class Renderer3D;
@@ -269,7 +271,7 @@ public:
u32 RenderClearAttr1 = 0;
u32 RenderClearAttr2 = 0;
- bool RenderFrameIdentical = false;
+ bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize
bool AbortFrame = false;
@@ -323,7 +325,7 @@ public:
u32 FlushRequest = 0;
u32 FlushAttributes = 0;
- u32 ScrolledLine[256];
+ u32 ScrolledLine[256]; // not part of the hardware state, don't serialize
};
class Renderer3D
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index c96464a..74027d5 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -34,8 +34,11 @@ void SoftRenderer::StopRenderThread()
{
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
+ // Tell the render thread to stop drawing new frames, and finish up the current one.
RenderThreadRunning = false;
+
Platform::Semaphore_Post(Sema_RenderStart);
+
Platform::Thread_Wait(RenderThread);
Platform::Thread_Free(RenderThread);
RenderThread = nullptr;
@@ -47,24 +50,36 @@ void SoftRenderer::SetupRenderThread(GPU& gpu)
if (Threaded)
{
if (!RenderThreadRunning.load(std::memory_order_relaxed))
- {
- RenderThreadRunning = true;
+ { // If the render thread isn't already running...
+ RenderThreadRunning = true; // "Time for work, render thread!"
RenderThread = Platform::Thread_Create([this, &gpu]() {
RenderThreadFunc(gpu);
});
}
- // otherwise more than one frame can be queued up at once
+ // "Be on standby, but don't start rendering until I tell you to!"
Platform::Semaphore_Reset(Sema_RenderStart);
+ // "Oh, sorry, were you already in the middle of a frame from the last iteration?"
if (RenderThreadRendering)
+ // "Tell me when you're done, I'll wait here."
Platform::Semaphore_Wait(Sema_RenderDone);
+ // "All good? Okay, let me give you your training."
+ // "(Maybe you're still the same thread, but I have to tell you this stuff anyway.)"
+
+ // "This is the signal you'll send when you're done with a frame."
+ // "I'll listen for it when I need to show something to the frontend."
Platform::Semaphore_Reset(Sema_RenderDone);
+
+ // "This is the signal I'll send when I want you to start rendering."
+ // "Don't do anything until you get the message."
Platform::Semaphore_Reset(Sema_RenderStart);
- Platform::Semaphore_Reset(Sema_ScanlineCount);
- Platform::Semaphore_Post(Sema_RenderStart);
+ // "This is the signal you'll send every time you finish drawing a line."
+ // "I might need some of your scanlines before you finish the whole buffer,"
+ // "so let me know as soon as you're done with each one."
+ Platform::Semaphore_Reset(Sema_ScanlineCount);
}
else
{
@@ -72,6 +87,13 @@ void SoftRenderer::SetupRenderThread(GPU& gpu)
}
}
+void SoftRenderer::EnableRenderThread()
+{
+ if (Threaded && Sema_RenderStart)
+ {
+ Platform::Semaphore_Post(Sema_RenderStart);
+ }
+}
SoftRenderer::SoftRenderer(bool threaded) noexcept
: Renderer3D(false), Threaded(threaded)
@@ -103,6 +125,7 @@ void SoftRenderer::Reset(GPU& gpu)
PrevIsShadowMask = false;
SetupRenderThread(gpu);
+ EnableRenderThread();
}
void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
@@ -111,6 +134,7 @@ void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept
{
Threaded = threaded;
SetupRenderThread(gpu);
+ EnableRenderThread();
}
}
@@ -1692,12 +1716,14 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg
ScanlineFinalPass(gpu.GPU3D, y-1);
if (threaded)
+ // Notify the main thread that we're done with a scanline.
Platform::Semaphore_Post(Sema_ScanlineCount);
}
ScanlineFinalPass(gpu.GPU3D, 191);
if (threaded)
+ // If this renderer is threaded, notify the main thread that we're done with the frame.
Platform::Semaphore_Post(Sema_ScanlineCount);
}
@@ -1719,6 +1745,7 @@ void SoftRenderer::RenderFrame(GPU& gpu)
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
+ // "Render thread, you're up! Get moving."
Platform::Semaphore_Post(Sema_RenderStart);
}
else if (!FrameIdentical)
@@ -1731,18 +1758,26 @@ void SoftRenderer::RenderFrame(GPU& gpu)
void SoftRenderer::RestartFrame(GPU& gpu)
{
SetupRenderThread(gpu);
+ EnableRenderThread();
}
void SoftRenderer::RenderThreadFunc(GPU& gpu)
{
for (;;)
{
+ // Wait for a notice from the main thread to start rendering (or to stop entirely).
Platform::Semaphore_Wait(Sema_RenderStart);
if (!RenderThreadRunning) return;
+ // Protect the GPU state from the main thread.
+ // Some melonDS frontends (though not ours)
+ // will repeatedly save or load states;
+ // if they do so while the render thread is busy here,
+ // the ensuing race conditions may cause a crash
+ // (since some of the GPU state includes pointers).
RenderThreadRendering = true;
if (FrameIdentical)
- {
+ { // If no rendering is needed, just say we're done.
Platform::Semaphore_Post(Sema_ScanlineCount, 192);
}
else
@@ -1751,7 +1786,10 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu)
RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons);
}
+ // Tell the main thread that we're done rendering
+ // and that it's safe to access the GPU state again.
Platform::Semaphore_Post(Sema_RenderDone);
+
RenderThreadRendering = false;
}
}
@@ -1761,6 +1799,9 @@ u32* SoftRenderer::GetLine(int line)
if (RenderThreadRunning.load(std::memory_order_relaxed))
{
if (line < 192)
+ // We need a scanline, so let's wait for the render thread to finish it.
+ // (both threads process scanlines from top-to-bottom,
+ // so we don't need to wait for a specific row)
Platform::Semaphore_Wait(Sema_ScanlineCount);
}
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index de65944..9cfdf9a 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -42,8 +42,10 @@ public:
u32* GetLine(int line) override;
void SetupRenderThread(GPU& gpu);
+ void EnableRenderThread();
void StopRenderThread();
private:
+ friend void GPU3D::DoSavestate(Savestate* file) noexcept;
// Notes on the interpolator:
//
// This is a theory on how the DS hardware interpolates values. It matches hardware output
@@ -506,8 +508,15 @@ private:
Platform::Thread* RenderThread;
std::atomic_bool RenderThreadRunning;
std::atomic_bool RenderThreadRendering;
+
+ // Used by the main thread to tell the render thread to start rendering a frame
Platform::Semaphore* Sema_RenderStart;
+
+ // Used by the render thread to tell the main thread that it's done rendering a frame
Platform::Semaphore* Sema_RenderDone;
+
+ // Used to allow the main thread to read some scanlines
+ // before (the 3D portion of) the entire frame is rasterized.
Platform::Semaphore* Sema_ScanlineCount;
};
}
diff --git a/src/Savestate.h b/src/Savestate.h
index 236aa64..2e1400a 100644
--- a/src/Savestate.h
+++ b/src/Savestate.h
@@ -24,7 +24,7 @@
#include <stdio.h>
#include "types.h"
-#define SAVESTATE_MAJOR 11
+#define SAVESTATE_MAJOR 12
#define SAVESTATE_MINOR 1
namespace melonDS