diff options
Diffstat (limited to 'src/GPU3D_Soft.cpp')
-rw-r--r-- | src/GPU3D_Soft.cpp | 531 |
1 files changed, 43 insertions, 488 deletions
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 3d6ace6..f6d27a0 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -16,82 +16,43 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +#include "GPU3D_Soft.h" + #include <stdio.h> #include <string.h> #include "NDS.h" #include "GPU.h" #include "Config.h" -#include "Platform.h" namespace GPU3D { -namespace SoftRenderer -{ - -// buffer dimensions are 258x194 to add a offscreen 1px border -// which simplifies edge marking tests -// buffer is duplicated to keep track of the two topmost pixels -// TODO: check if the hardware can accidentally plot pixels -// offscreen in that border - -const int ScanlineWidth = 258; -const int NumScanlines = 194; -const int BufferSize = ScanlineWidth * NumScanlines; -const int FirstPixelOffset = ScanlineWidth + 1; - -u32 ColorBuffer[BufferSize * 2]; -u32 DepthBuffer[BufferSize * 2]; -u32 AttrBuffer[BufferSize * 2]; - -// attribute buffer: -// bit0-3: edge flags (left/right/top/bottom) -// bit4: backfacing flag -// bit8-12: antialiasing alpha -// bit15: fog enable -// bit16-21: polygon ID for translucent pixels -// bit22: translucent flag -// bit24-29: polygon ID for opaque pixels - -u8 StencilBuffer[256*2]; -bool PrevIsShadowMask; - -bool Enabled; - -bool FrameIdentical; - -// threading - -bool Threaded; -Platform::Thread* RenderThread; -bool RenderThreadRunning; -bool RenderThreadRendering; -Platform::Semaphore* Sema_RenderStart; -Platform::Semaphore* Sema_RenderDone; -Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); -void StopRenderThread() +void SoftRenderer::StopRenderThread() { if (RenderThreadRunning) { RenderThreadRunning = false; Platform::Semaphore_Post(Sema_RenderStart); - Platform::Thread_Wait(RenderThread); - Platform::Thread_Free(RenderThread); + // Platform::Thread_Wait(RenderThread); + // Platform::Thread_Free(RenderThread); + RenderThread.join(); + } } -void SetupRenderThread() +void SoftRenderer::SetupRenderThread() { if (Threaded) { if (!RenderThreadRunning) { RenderThreadRunning = true; - RenderThread = Platform::Thread_Create(RenderThreadFunc); + //RenderThread = Platform::Thread_Create(RenderThreadFunc); + RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this); } // otherwise more than one frame can be queued up at once @@ -113,7 +74,13 @@ void SetupRenderThread() } -bool Init() +SoftRenderer::SoftRenderer() + : Renderer3D(false) +{ + +} + +bool SoftRenderer::Init() { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -126,7 +93,7 @@ bool Init() return true; } -void DeInit() +void SoftRenderer::DeInit() { StopRenderThread(); @@ -135,7 +102,7 @@ void DeInit() Platform::Semaphore_Free(Sema_ScanlineCount); } -void Reset() +void SoftRenderer::Reset() { memset(ColorBuffer, 0, BufferSize * 2 * 4); memset(DepthBuffer, 0, BufferSize * 2 * 4); @@ -146,428 +113,13 @@ void Reset() SetupRenderThread(); } -void SetRenderSettings(GPU::RenderSettings& settings) +void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings) { Threaded = settings.Soft_Threaded; SetupRenderThread(); } - - -// Notes on the interpolator: -// -// This is a theory on how the DS hardware interpolates values. It matches hardware output -// in the tests I did, but the hardware may be doing it differently. You never know. -// -// Assuming you want to perspective-correctly interpolate a variable named A across two points -// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, -// then divide A/W by 1/W to recover the correct A value. -// -// The DS GPU approximates interpolation by calculating a perspective-correct interpolation -// between 0 and 1, then using the result as a factor to linearly interpolate the actual -// vertex attributes. The factor has 9 bits of precision when interpolating along Y and -// 8 bits along X. -// -// There's a special path for when the two W values are equal: it directly does linear -// interpolation, avoiding precision loss from the aforementioned approximation. -// Which is desirable when using the GPU to draw 2D graphics. - -template<int dir> -class Interpolator -{ -public: - Interpolator() {} - Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) - { - Setup(x0, x1, w0, w1); - } - - void Setup(s32 x0, s32 x1, s32 w0, s32 w1) - { - this->x0 = x0; - this->x1 = x1; - this->xdiff = x1 - x0; - - // calculate reciprocals for linear mode and Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip = (1<<30) / this->xdiff; - else - this->xrecip = 0; - this->xrecip_z = this->xrecip >> 8; - - // linear mode is used if both W values are equal and have - // low-order bits cleared (0-6 along X, 1-6 along Y) - u32 mask = dir ? 0x7E : 0x7F; - if ((w0 == w1) && !(w0 & mask) && !(w1 & mask)) - this->linear = true; - else - this->linear = false; - - if (dir) - { - // along Y - - if ((w0 & 0x1) && !(w1 & 0x1)) - { - this->w0n = w0 - 1; - this->w0d = w0 + 1; - this->w1d = w1; - } - else - { - this->w0n = w0 & 0xFFFE; - this->w0d = w0 & 0xFFFE; - this->w1d = w1 & 0xFFFE; - } - - this->shift = 9; - } - else - { - // along X - - this->w0n = w0; - this->w0d = w0; - this->w1d = w1; - - this->shift = 8; - } - } - - void SetX(s32 x) - { - x -= x0; - this->x = x; - if (xdiff != 0 && !linear) - { - s64 num = ((s64)x * w0n) << shift; - s32 den = (x * w0d) + ((xdiff-x) * w1d); - - // this seems to be a proper division on hardware :/ - // I haven't been able to find cases that produce imperfect output - if (den == 0) yfactor = 0; - else yfactor = (s32)(num / den); - } - } - - s32 Interpolate(s32 y0, s32 y1) - { - if (xdiff == 0 || y0 == y1) return y0; - - if (!linear) - { - // perspective-correct approx. interpolation - if (y0 < y1) - return y0 + (((y1-y0) * yfactor) >> shift); - else - return y1 + (((y0-y1) * ((1<<shift)-yfactor)) >> shift); - } - else - { - // linear interpolation - // checkme: the rounding bias there (3<<24) is a guess - if (y0 < y1) - return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30); - else - return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30); - } - } - - s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) - { - if (xdiff == 0 || z0 == z1) return z0; - - if (wbuffer) - { - // W-buffering: perspective-correct approx. interpolation - if (z0 < z1) - return z0 + (((s64)(z1-z0) * yfactor) >> shift); - else - return z1 + (((s64)(z0-z1) * ((1<<shift)-yfactor)) >> shift); - } - else - { - // Z-buffering: linear interpolation - // still doesn't quite match hardware... - s32 base, disp, factor; - - if (z0 < z1) - { - base = z0; - disp = z1 - z0; - factor = x; - } - else - { - base = z1; - disp = z0 - z1, - factor = xdiff - x; - } - - if (dir) - { - int shift = 0; - while (disp > 0x3FF) - { - disp >>= 1; - shift++; - } - - return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); - } - else - { - disp >>= 9; - return base + (((s64)disp * factor * xrecip_z) >> 13); - } - } - } - -private: - s32 x0, x1, xdiff, x; - - int shift; - bool linear; - - s32 xrecip, xrecip_z; - s32 w0n, w0d, w1d; - - u32 yfactor; -}; - - -template<int side> -class Slope -{ -public: - Slope() {} - - s32 SetupDummy(s32 x0) - { - if (side) - { - dx = -0x40000; - x0--; - } - else - { - dx = 0; - } - - this->x0 = x0; - this->xmin = x0; - this->xmax = x0; - - Increment = 0; - XMajor = false; - - Interp.Setup(0, 0, 0, 0); - Interp.SetX(0); - - xcov_incr = 0; - - return x0; - } - - s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) - { - this->x0 = x0; - this->y = y; - - if (x1 > x0) - { - this->xmin = x0; - this->xmax = x1-1; - this->Negative = false; - } - else if (x1 < x0) - { - this->xmin = x1; - this->xmax = x0-1; - this->Negative = true; - } - else - { - this->xmin = x0; - if (side) this->xmin--; - this->xmax = this->xmin; - this->Negative = false; - } - - xlen = xmax+1 - xmin; - ylen = y1 - y0; - - // slope increment has a 18-bit fractional part - // note: for some reason, x/y isn't calculated directly, - // instead, 1/y is calculated and then multiplied by x - // TODO: this is still not perfect (see for example x=169 y=33) - if (ylen == 0) - Increment = 0; - else if (ylen == xlen) - Increment = 0x40000; - else - { - s32 yrecip = (1<<18) / ylen; - Increment = (x1-x0) * yrecip; - if (Increment < 0) Increment = -Increment; - } - - XMajor = (Increment > 0x40000); - - if (side) - { - // right - - if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = -0x40000; - } - else - { - // left - - if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = 0; - } - - dx += (y - y0) * Increment; - - s32 x = XVal(); - - if (XMajor) - { - if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme - else Interp.Setup(x0, x1, w0, w1); - Interp.SetX(x); - - // used for calculating AA coverage - xcov_incr = (ylen << 10) / xlen; - } - else - { - Interp.Setup(y0, y1, w0, w1); - Interp.SetX(y); - } - - return x; - } - - s32 Step() - { - dx += Increment; - y++; - - s32 x = XVal(); - if (XMajor) - { - Interp.SetX(x); - } - else - { - Interp.SetX(y); - } - return x; - } - - s32 XVal() - { - s32 ret; - if (Negative) ret = x0 - (dx >> 18); - else ret = x0 + (dx >> 18); - - if (ret < xmin) ret = xmin; - else if (ret > xmax) ret = xmax; - return ret; - } - - void EdgeParams_XMajor(s32* length, s32* coverage) - { - if (side ^ Negative) - *length = (dx >> 18) - ((dx-Increment) >> 18); - else - *length = ((dx+Increment) >> 18) - (dx >> 18); - - // for X-major edges, we return the coverage - // for the first pixel, and the increment for - // further pixels on the same scanline - s32 startx = dx >> 18; - if (Negative) startx = xlen - startx; - if (side) startx = startx - *length + 1; - - s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; - *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); - } - - void EdgeParams_YMajor(s32* length, s32* coverage) - { - *length = 1; - - if (Increment == 0) - { - *coverage = 31; - } - else - { - s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; - if ((cov >> 5) != (dx >> 18)) cov = 31; - cov &= 0x1F; - if (!(side ^ Negative)) cov = 0x1F - cov; - - *coverage = cov; - } - } - - void EdgeParams(s32* length, s32* coverage) - { - if (XMajor) - return EdgeParams_XMajor(length, coverage); - else - return EdgeParams_YMajor(length, coverage); - } - - s32 Increment; - bool Negative; - bool XMajor; - Interpolator<1> Interp; - -private: - s32 x0, xmin, xmax; - s32 xlen, ylen; - s32 dx; - s32 y; - - s32 xcov_incr; - s32 ycoverage, ycov_incr; -}; - -struct RendererPolygon -{ - Polygon* PolyData; - - Slope<0> SlopeL; - Slope<1> SlopeR; - s32 XL, XR; - u32 CurVL, CurVR; - u32 NextVL, NextVR; - -}; - -RendererPolygon PolygonList[2048]; - -template <typename T> -inline T ReadVRAM_Texture(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; -} -template <typename T> -inline T ReadVRAM_TexPal(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; -} - -void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) +void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { u32 vramaddr = (texparam & 0xFFFF) << 3; @@ -873,7 +425,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24); } -u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) +u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) { u8 r, g, b, a; @@ -981,7 +533,7 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) return r | (g << 8) | (b << 16) | (a << 24); } -void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) +void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) { u32 dstattr = AttrBuffer[pixeladdr]; u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F); @@ -1020,7 +572,7 @@ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 sha AttrBuffer[pixeladdr] = attr; } -void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1047,7 +599,7 @@ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); } -void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1074,7 +626,7 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } -void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) { u32 nverts = polygon->NumVertices; @@ -1127,7 +679,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon) } } -void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1340,7 +892,7 @@ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderPolygonScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1755,7 +1307,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderScanline(s32 y, int npolys) +void SoftRenderer::RenderScanline(s32 y, int npolys) { for (int i = 0; i < npolys; i++) { @@ -1772,8 +1324,7 @@ void RenderScanline(s32 y, int npolys) } } - -u32 CalculateFogDensity(u32 pixeladdr) +u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) { u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; @@ -1812,7 +1363,7 @@ u32 CalculateFogDensity(u32 pixeladdr) return density; } -void ScanlineFinalPass(s32 y) +void SoftRenderer::ScanlineFinalPass(s32 y) { // to consider: // clearing all polygon fog flags if the master flag isn't set? @@ -1981,7 +1532,7 @@ void ScanlineFinalPass(s32 y) } } -void ClearBuffers() +void SoftRenderer::ClearBuffers() { u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID @@ -2055,7 +1606,7 @@ void ClearBuffers() u32 a = (RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (RenderClearAttr1 & 0x8000); + polyid |= (RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -2070,7 +1621,7 @@ void ClearBuffers() } } -void RenderPolygons(bool threaded, Polygon** polygons, int npolys) +void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -2096,13 +1647,13 @@ void RenderPolygons(bool threaded, Polygon** polygons, int npolys) Platform::Semaphore_Post(Sema_ScanlineCount); } -void VCount144() +void SoftRenderer::VCount144() { if (RenderThreadRunning) Platform::Semaphore_Wait(Sema_RenderDone); } -void RenderFrame() +void SoftRenderer::RenderFrame() { auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); @@ -2123,7 +1674,12 @@ void RenderFrame() } } -void RenderThreadFunc() +void SoftRenderer::RestartFrame() +{ + SetupRenderThread(); +} + +void SoftRenderer::RenderThreadFunc() { for (;;) { @@ -2146,7 +1702,7 @@ void RenderThreadFunc() } } -u32* GetLine(int line) +u32* SoftRenderer::GetLine(int line) { if (RenderThreadRunning) { @@ -2158,4 +1714,3 @@ u32* GetLine(int line) } } -} |