diff options
author | Wunk <wunkolo@gmail.com> | 2021-02-09 14:38:51 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-09 23:38:51 +0100 |
commit | a7029aebae2d09c2dd666a5832a90e227305bab1 (patch) | |
tree | 33869de30c9893df4cbc2ffaee5e8f1c68b3f807 /src | |
parent | 891427c75c6c617bf61b2e7f2a3f0d79872f7f3c (diff) |
Allow for a more modular renderer backends (#990)
* Draft GPU3D renderer modularization
* Update sources C++ standard to C++17
The top-level `CMakeLists.txt` is already using the C++17 standard.
* Move GLCompositor into class type
Some other misc fixes to push towards better modularity
* Make renderer-implementation types move-only
These types are going to be holding onto handles
of GPU-side resources and shouldn't ever be copied around.
* Fix OSX: Remove 'register' storage class specifier
`register` has been removed in C++17...
But this keyword hasn't done anything in years anyways.
OSX builds consider this "warning" an error and it
stops the whole build.
* Add RestartFrame to Renderer3D interface
* Move Accelerated property to Renderer3D interface
There are points in the code base where we do:
`renderer != 0` to know if we are feeding
an openGL renderer. Rather than that we can instead just have this be
a property of the renderer itself.
With this pattern a renderer can just say how it wants its data to come
in rather than have everyone know that they're talking to an OpenGL
renderer.
* Remove Accelerated flag from GPU
* Move 2D_Soft interface in separate header
Also make the current 2D engine an "owned" unique_ptr.
* Update alignment attribute to standard alignas
Uses standardized `alignas` rather than compiler-specific
attributes.
https://en.cppreference.com/w/cpp/language/alignas
* Fix Clang: alignas specifier
Alignment must be specified before the array to align the entire array.
https://en.cppreference.com/w/cpp/language/alignas
* Converted Renderer3D Accelerated to variable
This flag is checked a lot during scanline rasterization. So rather
than having an expensive vtable-lookup call during mainline rendering
code, it is now a public constant bool type that is written to only once
during Renderer3D initialization.
Diffstat (limited to 'src')
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/GPU.cpp | 101 | ||||
-rw-r--r-- | src/GPU.h | 32 | ||||
-rw-r--r-- | src/GPU2D.h | 69 | ||||
-rw-r--r-- | src/GPU2D_Soft.cpp | 39 | ||||
-rw-r--r-- | src/GPU2D_Soft.h | 79 | ||||
-rw-r--r-- | src/GPU3D.cpp | 22 | ||||
-rw-r--r-- | src/GPU3D.h | 55 | ||||
-rw-r--r-- | src/GPU3D_OpenGL.cpp | 143 | ||||
-rw-r--r-- | src/GPU3D_OpenGL.h | 152 | ||||
-rw-r--r-- | src/GPU3D_Soft.cpp | 531 | ||||
-rw-r--r-- | src/GPU3D_Soft.h | 516 | ||||
-rw-r--r-- | src/GPU_OpenGL.cpp | 48 | ||||
-rw-r--r-- | src/GPU_OpenGL.h | 68 | ||||
-rw-r--r-- | src/NDS.cpp | 2 | ||||
-rw-r--r-- | src/frontend/qt_sdl/main.cpp | 12 |
16 files changed, 1037 insertions, 834 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3bcecbc..dc32b2a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ project(core) -set (CMAKE_CXX_STANDARD 14) +set (CMAKE_CXX_STANDARD 17) add_library(core STATIC ARCodeFile.cpp diff --git a/src/GPU.cpp b/src/GPU.cpp index ab3a5f9..d5465bb 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -21,6 +21,7 @@ #include "NDS.h" #include "GPU.h" +#include "GPU2D_Soft.h" namespace GPU { @@ -79,11 +80,10 @@ u8* VRAMPtr_BOBJ[0x8]; int FrontBuffer; u32* Framebuffer[2][2]; -int Renderer; -bool Accelerated; +int Renderer = 0; -GPU2D* GPU2D_A; -GPU2D* GPU2D_B; +std::unique_ptr<GPU2D> GPU2D_A = {}; +std::unique_ptr<GPU2D> GPU2D_B = {}; /* VRAM invalidation tracking @@ -145,25 +145,28 @@ u8 VRAMFlat_TexPal[128*1024]; u32 OAMDirty; u32 PaletteDirty; +#ifdef OGLRENDERER_ENABLED +std::unique_ptr<GLCompositor> CurGLCompositor = {}; +#endif + bool Init() { - GPU2D_A = new GPU2D_Soft(0); - GPU2D_B = new GPU2D_Soft(1); + GPU2D_A = std::make_unique<GPU2D_Soft>(0); + GPU2D_B = std::make_unique<GPU2D_Soft>(1); if (!GPU3D::Init()) return false; FrontBuffer = 0; Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL; Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL; Renderer = 0; - Accelerated = false; return true; } void DeInit() { - delete GPU2D_A; - delete GPU2D_B; + GPU2D_A.reset(); + GPU2D_B.reset(); GPU3D::DeInit(); if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; @@ -250,9 +253,12 @@ void Reset() memset(VRAMPtr_BBG, 0, sizeof(VRAMPtr_BBG)); memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ)); - int fbsize; - if (Accelerated) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + size_t fbsize; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + for (int i = 0; i < fbsize; i++) { Framebuffer[0][0][i] = 0xFFFFFFFF; @@ -283,17 +289,22 @@ void Reset() void Stop() { int fbsize; - if (Accelerated) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + memset(Framebuffer[0][0], 0, fbsize*4); memset(Framebuffer[0][1], 0, fbsize*4); memset(Framebuffer[1][0], 0, fbsize*4); memset(Framebuffer[1][1], 0, fbsize*4); #ifdef OGLRENDERER_ENABLED - if (Accelerated) - GLCompositor::Stop(); -#endif + // This needs a better way to know that we're + // using the OpenGL renderer specifically + if (GPU3D::CurrentRenderer->Accelerated) + CurGLCompositor->Stop(); +#endif } void DoSavestate(Savestate* file) @@ -382,37 +393,42 @@ void InitRenderer(int renderer) #ifdef OGLRENDERER_ENABLED if (renderer == 1) { - if (!GLCompositor::Init()) + CurGLCompositor = std::make_unique<GLCompositor>(); + // Create opengl rendrerer + if (!CurGLCompositor->Init()) { + // Fallback on software renderer renderer = 0; + GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); + GPU3D::CurrentRenderer->Init(); } - else if (!GPU3D::GLRenderer::Init()) + GPU3D::CurrentRenderer = std::make_unique<GPU3D::GLRenderer>(); + if (!GPU3D::CurrentRenderer->Init()) { - GLCompositor::DeInit(); + // Fallback on software renderer + CurGLCompositor->DeInit(); + CurGLCompositor.reset(); renderer = 0; + GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); } } else #endif { - GPU3D::SoftRenderer::Init(); + GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); + GPU3D::CurrentRenderer->Init(); } Renderer = renderer; - Accelerated = renderer != 0; } void DeInitRenderer() { - if (Renderer == 0) - { - GPU3D::SoftRenderer::DeInit(); - } + GPU3D::CurrentRenderer->DeInit(); #ifdef OGLRENDERER_ENABLED - else + if (Renderer == 1) { - GPU3D::GLRenderer::DeInit(); - GLCompositor::DeInit(); + CurGLCompositor->DeInit(); } #endif } @@ -421,13 +437,13 @@ void ResetRenderer() { if (Renderer == 0) { - GPU3D::SoftRenderer::Reset(); + GPU3D::CurrentRenderer->Reset(); } #ifdef OGLRENDERER_ENABLED else { - GLCompositor::Reset(); - GPU3D::GLRenderer::Reset(); + CurGLCompositor->Reset(); + GPU3D::CurrentRenderer->Reset(); } #endif } @@ -440,10 +456,12 @@ void SetRenderSettings(int renderer, RenderSettings& settings) InitRenderer(renderer); } - bool accel = Accelerated; int fbsize; - if (accel) fbsize = (256*3 + 1) * 192; - else fbsize = 256 * 192; + if (GPU3D::CurrentRenderer->Accelerated) + fbsize = (256*3 + 1) * 192; + else + fbsize = 256 * 192; + if (Framebuffer[0][0]) { delete[] Framebuffer[0][0]; Framebuffer[0][0] = nullptr; } if (Framebuffer[1][0]) { delete[] Framebuffer[1][0]; Framebuffer[1][0] = nullptr; } if (Framebuffer[0][1]) { delete[] Framebuffer[0][1]; Framebuffer[0][1] = nullptr; } @@ -461,18 +479,15 @@ void SetRenderSettings(int renderer, RenderSettings& settings) AssignFramebuffers(); - GPU2D_A->SetRenderSettings(accel); - GPU2D_B->SetRenderSettings(accel); - if (Renderer == 0) { - GPU3D::SoftRenderer::SetRenderSettings(settings); + GPU3D::CurrentRenderer->SetRenderSettings(settings); } #ifdef OGLRENDERER_ENABLED else { - GLCompositor::SetRenderSettings(settings); - GPU3D::GLRenderer::SetRenderSettings(settings); + CurGLCompositor->SetRenderSettings(settings); + GPU3D::CurrentRenderer->SetRenderSettings(settings); } #endif } @@ -1149,7 +1164,9 @@ void StartScanline(u32 line) GPU3D::VBlank(); #ifdef OGLRENDERER_ENABLED - if (Accelerated) GLCompositor::RenderFrame(); + // Need a better way to identify the openGL renderer in particular + if (GPU3D::CurrentRenderer->Accelerated) + CurGLCompositor->RenderFrame(); #endif } } @@ -19,9 +19,15 @@ #ifndef GPU_H #define GPU_H +#include <memory> + #include "GPU2D.h" #include "NonStupidBitfield.h" +#ifdef OGLRENDERER_ENABLED +#include "GPU_OpenGL.h" +#endif + namespace GPU { @@ -69,8 +75,8 @@ extern u8* VRAMPtr_BOBJ[0x8]; extern int FrontBuffer; extern u32* Framebuffer[2][2]; -extern GPU2D* GPU2D_A; -extern GPU2D* GPU2D_B; +extern std::unique_ptr<GPU2D> GPU2D_A; +extern std::unique_ptr<GPU2D> GPU2D_B; extern int Renderer; @@ -149,6 +155,10 @@ void SyncDirtyFlags(); extern u32 OAMDirty; extern u32 PaletteDirty; +#ifdef OGLRENDERER_ENABLED +extern std::unique_ptr<GLCompositor> CurGLCompositor; +#endif + struct RenderSettings { bool Soft_Threaded; @@ -550,24 +560,6 @@ void DisplayFIFO(u32 x); void SetDispStat(u32 cpu, u16 val); void SetVCount(u16 val); - -#ifdef OGLRENDERER_ENABLED -namespace GLCompositor -{ - -bool Init(); -void DeInit(); -void Reset(); - -void SetRenderSettings(RenderSettings& settings); - -void Stop(); -void RenderFrame(); -void BindOutputTexture(int buf); - -} -#endif - } #include "GPU3D.h" diff --git a/src/GPU2D.h b/src/GPU2D.h index 0f59ae3..e9ce8e1 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -28,13 +28,15 @@ public: GPU2D(u32 num); virtual ~GPU2D() {} + GPU2D(const GPU2D&) = delete; + GPU2D& operator=(const GPU2D&) = delete; + void Reset(); void DoSavestate(Savestate* file); void SetEnabled(bool enable) { Enabled = enable; } void SetFramebuffer(u32* buf); - virtual void SetRenderSettings(bool accel) = 0; u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -115,8 +117,8 @@ protected: u16 MasterBrightness; - u8 WindowMask[256] __attribute__((aligned (8))); - u8 OBJWindow[256] __attribute__((aligned (8))); + alignas(8) u8 WindowMask[256]; + alignas(8) u8 OBJWindow[256]; void UpdateMosaicCounters(u32 line); void CalculateWindowMask(u32 line); @@ -124,65 +126,4 @@ protected: virtual void MosaicXSizeChanged() = 0; }; -class GPU2D_Soft : public GPU2D -{ -public: - GPU2D_Soft(u32 num); - ~GPU2D_Soft() override {} - - void SetRenderSettings(bool accel) override; - - void DrawScanline(u32 line) override; - void DrawSprites(u32 line) override; - void VBlankEnd() override; - -protected: - void MosaicXSizeChanged() override; - -private: - bool Accelerated; - - u32 BGOBJLine[256*3] __attribute__((aligned (8))); - u32* _3DLine; - - u32 OBJLine[256] __attribute__((aligned (8))); - u8 OBJIndex[256] __attribute__((aligned (8))); - - u32 NumSprites; - - u8 MosaicTable[16][256]; - u8* CurBGXMosaicTable; - u8* CurOBJXMosaicTable; - - u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); - u32 ColorBlend5(u32 val1, u32 val2); - u32 ColorBrightnessUp(u32 val, u32 factor); - u32 ColorBrightnessDown(u32 val, u32 factor); - u32 ColorComposite(int i, u32 val1, u32 val2); - - template<u32 bgmode> void DrawScanlineBGMode(u32 line); - void DrawScanlineBGMode6(u32 line); - void DrawScanlineBGMode7(u32 line); - void DrawScanline_BGOBJ(u32 line); - - static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); - static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - - typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); - - void DrawBG_3D(); - template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum); - template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum); - template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum); - template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line); - - void ApplySpriteMosaicX(); - template<DrawPixel drawPixel> - void InterleaveSprites(u32 prio); - template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); - template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); - - void DoCapture(u32 line, u32 width); -}; - #endif diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 7cab67a..e455b7c 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -1,4 +1,4 @@ -#include "GPU2D.h" +#include "GPU2D_Soft.h" #include "GPU.h" GPU2D_Soft::GPU2D_Soft(u32 num) @@ -15,11 +15,6 @@ GPU2D_Soft::GPU2D_Soft(u32 num) } } -void GPU2D_Soft::SetRenderSettings(bool accel) -{ - Accelerated = accel; -} - u32 GPU2D_Soft::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) { u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; @@ -152,7 +147,7 @@ u32 GPU2D_Soft::ColorComposite(int i, u32 val1, u32 val2) void GPU2D_Soft::DrawScanline(u32 line) { - int stride = Accelerated ? (256*3 + 1) : 256; + int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256; u32* dst = &Framebuffer[stride * line]; int n3dline = line; @@ -192,7 +187,7 @@ void GPU2D_Soft::DrawScanline(u32 line) if (Num == 0) { - if (!Accelerated) + if (!GPU3D::CurrentRenderer->Accelerated) _3DLine = GPU3D::GetLine(n3dline); else if (CaptureLatch && (((CaptureCnt >> 29) & 0x3) != 1)) { @@ -206,7 +201,7 @@ void GPU2D_Soft::DrawScanline(u32 line) for (int i = 0; i < 256; i++) dst[i] = 0xFFFFFFFF; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { dst[256*3] = 0; } @@ -296,7 +291,7 @@ void GPU2D_Soft::DrawScanline(u32 line) DoCapture(line, capwidth); } - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { dst[256*3] = MasterBrightness | (DispCnt & 0x30000); return; @@ -350,11 +345,11 @@ void GPU2D_Soft::VBlankEnd() GPU2D::VBlankEnd(); #ifdef OGLRENDERER_ENABLED - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) { - GPU3D::GLRenderer::PrepareCaptureFrame(); + reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame(); } } #endif @@ -372,7 +367,7 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); - // TODO: handle 3D in accelerated mode!! + // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!! u32* srcA; if (CaptureCnt & (1<<24)) @@ -382,9 +377,9 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) else { srcA = BGOBJLine; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { - // in accelerated mode, compositing is normally done on the GPU + // in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU // but when doing display capture, we do need the composited output // so we do it here @@ -586,12 +581,12 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { \ if ((BGCnt[num] & 0x0040) && (BGMosaicSize[0] > 0)) \ { \ - if (Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ else DrawBG_##type<true, DrawPixel_Normal>(line, num); \ } \ else \ { \ - if (Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ else DrawBG_##type<false, DrawPixel_Normal>(line, num); \ } \ } while (false) @@ -601,18 +596,18 @@ void GPU2D_Soft::DoCapture(u32 line, u32 width) { \ if ((BGCnt[2] & 0x0040) && (BGMosaicSize[0] > 0)) \ { \ - if (Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \ else DrawBG_Large<true, DrawPixel_Normal>(line); \ } \ else \ { \ - if (Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \ + if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \ else DrawBG_Large<false, DrawPixel_Normal>(line); \ } \ } while (false) #define DoInterleaveSprites(prio) \ - if (Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); + if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); template<u32 bgmode> void GPU2D_Soft::DrawScanlineBGMode(u32 line) @@ -773,7 +768,7 @@ void GPU2D_Soft::DrawScanline_BGOBJ(u32 line) // color special effects // can likely be optimized - if (!Accelerated) + if (!GPU3D::CurrentRenderer->Accelerated) { for (int i = 0; i < 256; i++) { @@ -919,7 +914,7 @@ void GPU2D_Soft::DrawBG_3D() { int i = 0; - if (Accelerated) + if (GPU3D::CurrentRenderer->Accelerated) { for (i = 0; i < 256; i++) { diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h new file mode 100644 index 0000000..754f08a --- /dev/null +++ b/src/GPU2D_Soft.h @@ -0,0 +1,79 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU2D.h" + +class GPU2D_Soft : public GPU2D +{ +public: + GPU2D_Soft(u32 num); + ~GPU2D_Soft() override {} + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + void VBlankEnd() override; + +protected: + void MosaicXSizeChanged() override; + +private: + + alignas(8) u32 BGOBJLine[256*3]; + u32* _3DLine; + + alignas(8) u32 OBJLine[256]; + alignas(8) u8 OBJIndex[256]; + + u32 NumSprites; + + u8 MosaicTable[16][256]; + u8* CurBGXMosaicTable; + u8* CurOBJXMosaicTable; + + u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); + u32 ColorBlend5(u32 val1, u32 val2); + u32 ColorBrightnessUp(u32 val, u32 factor); + u32 ColorBrightnessDown(u32 val, u32 factor); + u32 ColorComposite(int i, u32 val1, u32 val2); + + template<u32 bgmode> void DrawScanlineBGMode(u32 line); + void DrawScanlineBGMode6(u32 line); + void DrawScanlineBGMode7(u32 line); + void DrawScanline_BGOBJ(u32 line); + + static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); + static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); + + typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); + + void DrawBG_3D(); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Affine(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Extended(u32 line, u32 bgnum); + template<bool mosaic, DrawPixel drawPixel> void DrawBG_Large(u32 line); + + void ApplySpriteMosaicX(); + template<DrawPixel drawPixel> + void InterleaveSprites(u32 prio); + template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); + template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); + + void DoCapture(u32 line, u32 width); +};
\ No newline at end of file diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 9b41830..c933c82 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -273,7 +273,7 @@ u32 RenderNumPolygons; u32 FlushRequest; u32 FlushAttributes; - +std::unique_ptr<GPU3D::Renderer3D> CurrentRenderer = {}; bool Init() { @@ -2497,12 +2497,12 @@ void CheckFIFODMA() void VCount144() { - if (GPU::Renderer == 0) SoftRenderer::VCount144(); + CurrentRenderer->VCount144(); } void RestartFrame() { - if (GPU::Renderer == 0) SoftRenderer::SetupRenderThread(); + CurrentRenderer->RestartFrame(); } @@ -2597,10 +2597,7 @@ void VBlank() void VCount215() { - if (GPU::Renderer == 0) SoftRenderer::RenderFrame(); -#ifdef OGLRENDERER_ENABLED - else GLRenderer::RenderFrame(); -#endif + CurrentRenderer->RenderFrame(); } void SetRenderXPos(u16 xpos) @@ -2614,12 +2611,7 @@ u32 ScrolledLine[256]; u32* GetLine(int line) { - u32* rawline = NULL; - - if (GPU::Renderer == 0) rawline = SoftRenderer::GetLine(line); -#ifdef OGLRENDERER_ENABLED - else rawline = GLRenderer::GetLine(line); -#endif + u32* rawline = CurrentRenderer->GetLine(line); if (RenderXPos == 0) return rawline; @@ -3055,5 +3047,9 @@ void Write32(u32 addr, u32 val) printf("unknown GPU3D write32 %08X %08X\n", addr, val); } +Renderer3D::Renderer3D(bool Accelerated) +: Accelerated(Accelerated) +{ } + } diff --git a/src/GPU3D.h b/src/GPU3D.h index e4629b0..1aba0bd 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -20,6 +20,9 @@ #define GPU3D_H #include <array> +#include <memory> + +#include "GPU.h" #include "Savestate.h" namespace GPU3D @@ -96,8 +99,6 @@ extern u32 RenderNumPolygons; extern u64 Timestamp; -extern int Renderer; - bool Init(); void DeInit(); void Reset(); @@ -131,40 +132,42 @@ void Write8(u32 addr, u8 val); void Write16(u32 addr, u16 val); void Write32(u32 addr, u32 val); -namespace SoftRenderer +class Renderer3D { +public: + Renderer3D(bool Accelerated); + virtual ~Renderer3D() {}; -bool Init(); -void DeInit(); -void Reset(); + Renderer3D(const Renderer3D&) = delete; + Renderer3D& operator=(const Renderer3D&) = delete; -void SetRenderSettings(GPU::RenderSettings& settings); -void SetupRenderThread(); + virtual bool Init() = 0; + virtual void DeInit() = 0; + virtual void Reset() = 0; -void VCount144(); -void RenderFrame(); -u32* GetLine(int line); + // This "Accelerated" flag currently communicates if the framebuffer should + // be allocated differently and other little misc handlers. Ideally there + // are more detailed "traits" that we can ask of the Renderer3D type + const bool Accelerated; -} + virtual void SetRenderSettings(GPU::RenderSettings& settings) = 0; -#ifdef OGLRENDERER_ENABLED -namespace GLRenderer -{ + virtual void VCount144() {}; -bool Init(); -void DeInit(); -void Reset(); - -void SetRenderSettings(GPU::RenderSettings& settings); + virtual void RenderFrame() = 0; + virtual void RestartFrame() {}; + virtual u32* GetLine(int line) = 0; +}; -void RenderFrame(); -void PrepareCaptureFrame(); -u32* GetLine(int line); -void SetupAccelFrame(); +extern int Renderer; +extern std::unique_ptr<Renderer3D> CurrentRenderer; } -#endif -} +#include "GPU3D_Soft.h" + +#ifdef OGLRENDERER_ENABLED +#include "GPU3D_OpenGL.h" +#endif #endif diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 164f29a..93c1523 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -16,118 +16,19 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +#include "GPU3D_OpenGL.h" + #include <stdio.h> #include <string.h> #include "NDS.h" #include "GPU.h" #include "Config.h" -#include "OpenGLSupport.h" #include "GPU3D_OpenGL_shaders.h" namespace GPU3D { -namespace GLRenderer -{ - -using namespace OpenGL; - -// GL version requirements -// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) -// * UBO: 3.1 - - -enum -{ - RenderFlag_WBuffer = 0x01, - RenderFlag_Trans = 0x02, - RenderFlag_ShadowMask = 0x04, - RenderFlag_Edge = 0x08, -}; - - -GLuint ClearShaderPlain[3]; - -GLuint RenderShader[16][3]; -GLuint CurShaderID = -1; - -GLuint FinalPassEdgeShader[3]; -GLuint FinalPassFogShader[3]; - -// std140 compliant structure -struct -{ - float uScreenSize[2]; // vec2 0 / 2 - u32 uDispCnt; // int 2 / 1 - u32 __pad0; - float uToonColors[32][4]; // vec4[32] 4 / 128 - float uEdgeColors[8][4]; // vec4[8] 132 / 32 - float uFogColor[4]; // vec4 164 / 4 - float uFogDensity[34][4]; // float[34] 168 / 136 - u32 uFogOffset; // int 304 / 1 - u32 uFogShift; // int 305 / 1 - u32 _pad1[2]; // int 306 / 2 -} ShaderConfig; - -GLuint ShaderConfigUBO; - -struct RendererPolygon -{ - Polygon* PolyData; - - u32 NumIndices; - u32 IndicesOffset; - GLuint PrimType; - - u32 NumEdgeIndices; - u32 EdgeIndicesOffset; - u32 RenderKey; -}; - -RendererPolygon PolygonList[2048]; -int NumFinalPolys, NumOpaqueFinalPolys; - -GLuint ClearVertexBufferID, ClearVertexArrayID; -GLint ClearUniformLoc[4]; - -// vertex buffer -// * XYZW: 4x16bit -// * RGBA: 4x8bit -// * ST: 2x16bit -// * polygon data: 3x32bit (polygon/texture attributes) -// -// polygon attributes: -// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR -// * bit16-20: Z shift -// * bit8: front-facing (?) -// * bit9: W-buffering (?) - -GLuint VertexBufferID; -u32 VertexBuffer[10240 * 7]; -u32 NumVertices; - -GLuint VertexArrayID; -GLuint IndexBufferID; -u16 IndexBuffer[2048 * 40]; -u32 NumIndices, NumEdgeIndices; - -const u32 EdgeIndicesOffset = 2048 * 30; - -GLuint TexMemID; -GLuint TexPalMemID; - -int ScaleFactor; -bool BetterPolygons; -int ScreenW, ScreenH; - -GLuint FramebufferTex[8]; -int FrontBuffer; -GLuint FramebufferID[4], PixelbufferID; -u32 Framebuffer[256*192]; - - - -bool BuildRenderShader(u32 flags, const char* vs, const char* fs) +bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs) { char shadername[32]; sprintf(shadername, "RenderShader%02X", flags); @@ -180,7 +81,7 @@ bool BuildRenderShader(u32 flags, const char* vs, const char* fs) return true; } -void UseRenderShader(u32 flags) +void GLRenderer::UseRenderShader(u32 flags) { if (CurShaderID == flags) return; glUseProgram(RenderShader[flags][2]); @@ -196,7 +97,12 @@ void SetupDefaultTexParams(GLuint tex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } -bool Init() +GLRenderer::GLRenderer() + : Renderer3D(true) +{ +} + +bool GLRenderer::Init() { GLint uni_id; @@ -382,7 +288,7 @@ bool Init() return true; } -void DeInit() +void GLRenderer::DeInit() { glDeleteTextures(1, &TexMemID); glDeleteTextures(1, &TexPalMemID); @@ -404,11 +310,11 @@ void DeInit() } } -void Reset() +void GLRenderer::Reset() { } -void SetRenderSettings(GPU::RenderSettings& settings) +void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings) { int scale = settings.GL_ScaleFactor; @@ -462,7 +368,7 @@ void SetRenderSettings(GPU::RenderSettings& settings) } -void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon) { rp->PolyData = polygon; @@ -508,7 +414,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon) } } -u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) +u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) { u32 z = poly->FinalZ[vid]; u32 w = poly->FinalW[vid]; @@ -569,7 +475,7 @@ u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr) return vptr; } -void BuildPolygons(RendererPolygon* polygons, int npolys) +void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys) { u32* vptr = &VertexBuffer[0]; u32 vidx = 0; @@ -791,7 +697,7 @@ void BuildPolygons(RendererPolygon* polygons, int npolys) NumEdgeIndices = eidx - EdgeIndicesOffset; } -int RenderSinglePolygon(int i) +int GLRenderer::RenderSinglePolygon(int i) { RendererPolygon* rp = &PolygonList[i]; @@ -800,7 +706,7 @@ int RenderSinglePolygon(int i) return 1; } -int RenderPolygonBatch(int i) +int GLRenderer::RenderPolygonBatch(int i) { RendererPolygon* rp = &PolygonList[i]; GLuint primtype = rp->PrimType; @@ -822,7 +728,7 @@ int RenderPolygonBatch(int i) return numpolys; } -int RenderPolygonEdgeBatch(int i) +int GLRenderer::RenderPolygonEdgeBatch(int i) { RendererPolygon* rp = &PolygonList[i]; u32 key = rp->RenderKey; @@ -842,7 +748,7 @@ int RenderPolygonEdgeBatch(int i) return numpolys; } -void RenderSceneChunk(int y, int h) +void GLRenderer::RenderSceneChunk(int y, int h) { u32 flags = 0; if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; @@ -1206,7 +1112,7 @@ void RenderSceneChunk(int y, int h) } -void RenderFrame() +void GLRenderer::RenderFrame() { CurShaderID = -1; @@ -1381,7 +1287,7 @@ void RenderFrame() FrontBuffer = FrontBuffer ? 0 : 1; } -void PrepareCaptureFrame() +void GLRenderer::PrepareCaptureFrame() { // TODO: make sure this picks the right buffer when doing antialiasing int original_fb = FrontBuffer^1; @@ -1396,7 +1302,7 @@ void PrepareCaptureFrame() glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL); } -u32* GetLine(int line) +u32* GLRenderer::GetLine(int line) { int stride = 256; @@ -1419,10 +1325,9 @@ u32* GetLine(int line) return &Framebuffer[stride * line]; } -void SetupAccelFrame() +void GLRenderer::SetupAccelFrame() { glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]); } } -} diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h new file mode 100644 index 0000000..73e2955 --- /dev/null +++ b/src/GPU3D_OpenGL.h @@ -0,0 +1,152 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU3D.h" + +#include "OpenGLSupport.h" + + +namespace GPU3D +{ +class GLRenderer : public Renderer3D +{ +public: + GLRenderer(); + virtual ~GLRenderer() override {}; + virtual bool Init() override; + virtual void DeInit() override; + virtual void Reset() override; + + virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + + virtual void VCount144() override {}; + virtual void RenderFrame() override; + virtual u32* GetLine(int line) override; + + void SetupAccelFrame(); + void PrepareCaptureFrame(); +private: + + // GL version requirements + // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) + // * UBO: 3.1 + + struct RendererPolygon + { + Polygon* PolyData; + + u32 NumIndices; + u32 IndicesOffset; + GLuint PrimType; + + u32 NumEdgeIndices; + u32 EdgeIndicesOffset; + + u32 RenderKey; + }; + + RendererPolygon PolygonList[2048]; + + bool BuildRenderShader(u32 flags, const char* vs, const char* fs); + void UseRenderShader(u32 flags); + void SetupPolygon(RendererPolygon* rp, Polygon* polygon); + u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr); + void BuildPolygons(RendererPolygon* polygons, int npolys); + int RenderSinglePolygon(int i); + int RenderPolygonBatch(int i); + int RenderPolygonEdgeBatch(int i); + void RenderSceneChunk(int y, int h); + + enum + { + RenderFlag_WBuffer = 0x01, + RenderFlag_Trans = 0x02, + RenderFlag_ShadowMask = 0x04, + RenderFlag_Edge = 0x08, + }; + + + GLuint ClearShaderPlain[3]; + + GLuint RenderShader[16][3]; + GLuint CurShaderID = -1; + + GLuint FinalPassEdgeShader[3]; + GLuint FinalPassFogShader[3]; + + // std140 compliant structure + struct + { + float uScreenSize[2]; // vec2 0 / 2 + u32 uDispCnt; // int 2 / 1 + u32 __pad0; + float uToonColors[32][4]; // vec4[32] 4 / 128 + float uEdgeColors[8][4]; // vec4[8] 132 / 32 + float uFogColor[4]; // vec4 164 / 4 + float uFogDensity[34][4]; // float[34] 168 / 136 + u32 uFogOffset; // int 304 / 1 + u32 uFogShift; // int 305 / 1 + u32 _pad1[2]; // int 306 / 2 + } ShaderConfig; + + GLuint ShaderConfigUBO; + int NumFinalPolys, NumOpaqueFinalPolys; + + GLuint ClearVertexBufferID, ClearVertexArrayID; + GLint ClearUniformLoc[4]; + + // vertex buffer + // * XYZW: 4x16bit + // * RGBA: 4x8bit + // * ST: 2x16bit + // * polygon data: 3x32bit (polygon/texture attributes) + // + // polygon attributes: + // * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR + // * bit16-20: Z shift + // * bit8: front-facing (?) + // * bit9: W-buffering (?) + + GLuint VertexBufferID; + u32 VertexBuffer[10240 * 7]; + u32 NumVertices; + + GLuint VertexArrayID; + GLuint IndexBufferID; + u16 IndexBuffer[2048 * 40]; + u32 NumIndices, NumEdgeIndices; + + const u32 EdgeIndicesOffset = 2048 * 30; + + GLuint TexMemID; + GLuint TexPalMemID; + + int ScaleFactor; + bool BetterPolygons; + int ScreenW, ScreenH; + + GLuint FramebufferTex[8]; + int FrontBuffer; + GLuint FramebufferID[4], PixelbufferID; + u32 Framebuffer[256*192]; + + +}; +}
\ No newline at end of file diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 3d6ace6..f6d27a0 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -16,82 +16,43 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ +#include "GPU3D_Soft.h" + #include <stdio.h> #include <string.h> #include "NDS.h" #include "GPU.h" #include "Config.h" -#include "Platform.h" namespace GPU3D { -namespace SoftRenderer -{ - -// buffer dimensions are 258x194 to add a offscreen 1px border -// which simplifies edge marking tests -// buffer is duplicated to keep track of the two topmost pixels -// TODO: check if the hardware can accidentally plot pixels -// offscreen in that border - -const int ScanlineWidth = 258; -const int NumScanlines = 194; -const int BufferSize = ScanlineWidth * NumScanlines; -const int FirstPixelOffset = ScanlineWidth + 1; - -u32 ColorBuffer[BufferSize * 2]; -u32 DepthBuffer[BufferSize * 2]; -u32 AttrBuffer[BufferSize * 2]; - -// attribute buffer: -// bit0-3: edge flags (left/right/top/bottom) -// bit4: backfacing flag -// bit8-12: antialiasing alpha -// bit15: fog enable -// bit16-21: polygon ID for translucent pixels -// bit22: translucent flag -// bit24-29: polygon ID for opaque pixels - -u8 StencilBuffer[256*2]; -bool PrevIsShadowMask; - -bool Enabled; - -bool FrameIdentical; - -// threading - -bool Threaded; -Platform::Thread* RenderThread; -bool RenderThreadRunning; -bool RenderThreadRendering; -Platform::Semaphore* Sema_RenderStart; -Platform::Semaphore* Sema_RenderDone; -Platform::Semaphore* Sema_ScanlineCount; void RenderThreadFunc(); -void StopRenderThread() +void SoftRenderer::StopRenderThread() { if (RenderThreadRunning) { RenderThreadRunning = false; Platform::Semaphore_Post(Sema_RenderStart); - Platform::Thread_Wait(RenderThread); - Platform::Thread_Free(RenderThread); + // Platform::Thread_Wait(RenderThread); + // Platform::Thread_Free(RenderThread); + RenderThread.join(); + } } -void SetupRenderThread() +void SoftRenderer::SetupRenderThread() { if (Threaded) { if (!RenderThreadRunning) { RenderThreadRunning = true; - RenderThread = Platform::Thread_Create(RenderThreadFunc); + //RenderThread = Platform::Thread_Create(RenderThreadFunc); + RenderThread = std::thread(&SoftRenderer::RenderThreadFunc, this); } // otherwise more than one frame can be queued up at once @@ -113,7 +74,13 @@ void SetupRenderThread() } -bool Init() +SoftRenderer::SoftRenderer() + : Renderer3D(false) +{ + +} + +bool SoftRenderer::Init() { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -126,7 +93,7 @@ bool Init() return true; } -void DeInit() +void SoftRenderer::DeInit() { StopRenderThread(); @@ -135,7 +102,7 @@ void DeInit() Platform::Semaphore_Free(Sema_ScanlineCount); } -void Reset() +void SoftRenderer::Reset() { memset(ColorBuffer, 0, BufferSize * 2 * 4); memset(DepthBuffer, 0, BufferSize * 2 * 4); @@ -146,428 +113,13 @@ void Reset() SetupRenderThread(); } -void SetRenderSettings(GPU::RenderSettings& settings) +void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings) { Threaded = settings.Soft_Threaded; SetupRenderThread(); } - - -// Notes on the interpolator: -// -// This is a theory on how the DS hardware interpolates values. It matches hardware output -// in the tests I did, but the hardware may be doing it differently. You never know. -// -// Assuming you want to perspective-correctly interpolate a variable named A across two points -// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, -// then divide A/W by 1/W to recover the correct A value. -// -// The DS GPU approximates interpolation by calculating a perspective-correct interpolation -// between 0 and 1, then using the result as a factor to linearly interpolate the actual -// vertex attributes. The factor has 9 bits of precision when interpolating along Y and -// 8 bits along X. -// -// There's a special path for when the two W values are equal: it directly does linear -// interpolation, avoiding precision loss from the aforementioned approximation. -// Which is desirable when using the GPU to draw 2D graphics. - -template<int dir> -class Interpolator -{ -public: - Interpolator() {} - Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) - { - Setup(x0, x1, w0, w1); - } - - void Setup(s32 x0, s32 x1, s32 w0, s32 w1) - { - this->x0 = x0; - this->x1 = x1; - this->xdiff = x1 - x0; - - // calculate reciprocals for linear mode and Z interpolation - // TODO eventually: use a faster reciprocal function? - if (this->xdiff != 0) - this->xrecip = (1<<30) / this->xdiff; - else - this->xrecip = 0; - this->xrecip_z = this->xrecip >> 8; - - // linear mode is used if both W values are equal and have - // low-order bits cleared (0-6 along X, 1-6 along Y) - u32 mask = dir ? 0x7E : 0x7F; - if ((w0 == w1) && !(w0 & mask) && !(w1 & mask)) - this->linear = true; - else - this->linear = false; - - if (dir) - { - // along Y - - if ((w0 & 0x1) && !(w1 & 0x1)) - { - this->w0n = w0 - 1; - this->w0d = w0 + 1; - this->w1d = w1; - } - else - { - this->w0n = w0 & 0xFFFE; - this->w0d = w0 & 0xFFFE; - this->w1d = w1 & 0xFFFE; - } - - this->shift = 9; - } - else - { - // along X - - this->w0n = w0; - this->w0d = w0; - this->w1d = w1; - - this->shift = 8; - } - } - - void SetX(s32 x) - { - x -= x0; - this->x = x; - if (xdiff != 0 && !linear) - { - s64 num = ((s64)x * w0n) << shift; - s32 den = (x * w0d) + ((xdiff-x) * w1d); - - // this seems to be a proper division on hardware :/ - // I haven't been able to find cases that produce imperfect output - if (den == 0) yfactor = 0; - else yfactor = (s32)(num / den); - } - } - - s32 Interpolate(s32 y0, s32 y1) - { - if (xdiff == 0 || y0 == y1) return y0; - - if (!linear) - { - // perspective-correct approx. interpolation - if (y0 < y1) - return y0 + (((y1-y0) * yfactor) >> shift); - else - return y1 + (((y0-y1) * ((1<<shift)-yfactor)) >> shift); - } - else - { - // linear interpolation - // checkme: the rounding bias there (3<<24) is a guess - if (y0 < y1) - return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30); - else - return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30); - } - } - - s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) - { - if (xdiff == 0 || z0 == z1) return z0; - - if (wbuffer) - { - // W-buffering: perspective-correct approx. interpolation - if (z0 < z1) - return z0 + (((s64)(z1-z0) * yfactor) >> shift); - else - return z1 + (((s64)(z0-z1) * ((1<<shift)-yfactor)) >> shift); - } - else - { - // Z-buffering: linear interpolation - // still doesn't quite match hardware... - s32 base, disp, factor; - - if (z0 < z1) - { - base = z0; - disp = z1 - z0; - factor = x; - } - else - { - base = z1; - disp = z0 - z1, - factor = xdiff - x; - } - - if (dir) - { - int shift = 0; - while (disp > 0x3FF) - { - disp >>= 1; - shift++; - } - - return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); - } - else - { - disp >>= 9; - return base + (((s64)disp * factor * xrecip_z) >> 13); - } - } - } - -private: - s32 x0, x1, xdiff, x; - - int shift; - bool linear; - - s32 xrecip, xrecip_z; - s32 w0n, w0d, w1d; - - u32 yfactor; -}; - - -template<int side> -class Slope -{ -public: - Slope() {} - - s32 SetupDummy(s32 x0) - { - if (side) - { - dx = -0x40000; - x0--; - } - else - { - dx = 0; - } - - this->x0 = x0; - this->xmin = x0; - this->xmax = x0; - - Increment = 0; - XMajor = false; - - Interp.Setup(0, 0, 0, 0); - Interp.SetX(0); - - xcov_incr = 0; - - return x0; - } - - s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) - { - this->x0 = x0; - this->y = y; - - if (x1 > x0) - { - this->xmin = x0; - this->xmax = x1-1; - this->Negative = false; - } - else if (x1 < x0) - { - this->xmin = x1; - this->xmax = x0-1; - this->Negative = true; - } - else - { - this->xmin = x0; - if (side) this->xmin--; - this->xmax = this->xmin; - this->Negative = false; - } - - xlen = xmax+1 - xmin; - ylen = y1 - y0; - - // slope increment has a 18-bit fractional part - // note: for some reason, x/y isn't calculated directly, - // instead, 1/y is calculated and then multiplied by x - // TODO: this is still not perfect (see for example x=169 y=33) - if (ylen == 0) - Increment = 0; - else if (ylen == xlen) - Increment = 0x40000; - else - { - s32 yrecip = (1<<18) / ylen; - Increment = (x1-x0) * yrecip; - if (Increment < 0) Increment = -Increment; - } - - XMajor = (Increment > 0x40000); - - if (side) - { - // right - - if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = -0x40000; - } - else - { - // left - - if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; - else if (Increment != 0) dx = Negative ? 0x40000 : 0; - else dx = 0; - } - - dx += (y - y0) * Increment; - - s32 x = XVal(); - - if (XMajor) - { - if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme - else Interp.Setup(x0, x1, w0, w1); - Interp.SetX(x); - - // used for calculating AA coverage - xcov_incr = (ylen << 10) / xlen; - } - else - { - Interp.Setup(y0, y1, w0, w1); - Interp.SetX(y); - } - - return x; - } - - s32 Step() - { - dx += Increment; - y++; - - s32 x = XVal(); - if (XMajor) - { - Interp.SetX(x); - } - else - { - Interp.SetX(y); - } - return x; - } - - s32 XVal() - { - s32 ret; - if (Negative) ret = x0 - (dx >> 18); - else ret = x0 + (dx >> 18); - - if (ret < xmin) ret = xmin; - else if (ret > xmax) ret = xmax; - return ret; - } - - void EdgeParams_XMajor(s32* length, s32* coverage) - { - if (side ^ Negative) - *length = (dx >> 18) - ((dx-Increment) >> 18); - else - *length = ((dx+Increment) >> 18) - (dx >> 18); - - // for X-major edges, we return the coverage - // for the first pixel, and the increment for - // further pixels on the same scanline - s32 startx = dx >> 18; - if (Negative) startx = xlen - startx; - if (side) startx = startx - *length + 1; - - s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; - *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); - } - - void EdgeParams_YMajor(s32* length, s32* coverage) - { - *length = 1; - - if (Increment == 0) - { - *coverage = 31; - } - else - { - s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; - if ((cov >> 5) != (dx >> 18)) cov = 31; - cov &= 0x1F; - if (!(side ^ Negative)) cov = 0x1F - cov; - - *coverage = cov; - } - } - - void EdgeParams(s32* length, s32* coverage) - { - if (XMajor) - return EdgeParams_XMajor(length, coverage); - else - return EdgeParams_YMajor(length, coverage); - } - - s32 Increment; - bool Negative; - bool XMajor; - Interpolator<1> Interp; - -private: - s32 x0, xmin, xmax; - s32 xlen, ylen; - s32 dx; - s32 y; - - s32 xcov_incr; - s32 ycoverage, ycov_incr; -}; - -struct RendererPolygon -{ - Polygon* PolyData; - - Slope<0> SlopeL; - Slope<1> SlopeR; - s32 XL, XR; - u32 CurVL, CurVR; - u32 NextVL, NextVR; - -}; - -RendererPolygon PolygonList[2048]; - -template <typename T> -inline T ReadVRAM_Texture(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; -} -template <typename T> -inline T ReadVRAM_TexPal(u32 addr) -{ - return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; -} - -void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) +void SoftRenderer::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { u32 vramaddr = (texparam & 0xFFFF) << 3; @@ -873,7 +425,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24); } -u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) +u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) { u8 r, g, b, a; @@ -981,7 +533,7 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) return r | (g << 8) | (b << 16) | (a << 24); } -void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) +void SoftRenderer::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) { u32 dstattr = AttrBuffer[pixeladdr]; u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F); @@ -1020,7 +572,7 @@ void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 sha AttrBuffer[pixeladdr] = attr; } -void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1047,7 +599,7 @@ void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y); } -void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) +void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1074,7 +626,7 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y); } -void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) { u32 nverts = polygon->NumVertices; @@ -1127,7 +679,7 @@ void SetupPolygon(RendererPolygon* rp, Polygon* polygon) } } -void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1340,7 +892,7 @@ void RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderPolygonScanline(RendererPolygon* rp, s32 y) +void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1755,7 +1307,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y) rp->XR = rp->SlopeR.Step(); } -void RenderScanline(s32 y, int npolys) +void SoftRenderer::RenderScanline(s32 y, int npolys) { for (int i = 0; i < npolys; i++) { @@ -1772,8 +1324,7 @@ void RenderScanline(s32 y, int npolys) } } - -u32 CalculateFogDensity(u32 pixeladdr) +u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) { u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; @@ -1812,7 +1363,7 @@ u32 CalculateFogDensity(u32 pixeladdr) return density; } -void ScanlineFinalPass(s32 y) +void SoftRenderer::ScanlineFinalPass(s32 y) { // to consider: // clearing all polygon fog flags if the master flag isn't set? @@ -1981,7 +1532,7 @@ void ScanlineFinalPass(s32 y) } } -void ClearBuffers() +void SoftRenderer::ClearBuffers() { u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID @@ -2055,7 +1606,7 @@ void ClearBuffers() u32 a = (RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (RenderClearAttr1 & 0x8000); + polyid |= (RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -2070,7 +1621,7 @@ void ClearBuffers() } } -void RenderPolygons(bool threaded, Polygon** polygons, int npolys) +void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -2096,13 +1647,13 @@ void RenderPolygons(bool threaded, Polygon** polygons, int npolys) Platform::Semaphore_Post(Sema_ScanlineCount); } -void VCount144() +void SoftRenderer::VCount144() { if (RenderThreadRunning) Platform::Semaphore_Wait(Sema_RenderDone); } -void RenderFrame() +void SoftRenderer::RenderFrame() { auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); @@ -2123,7 +1674,12 @@ void RenderFrame() } } -void RenderThreadFunc() +void SoftRenderer::RestartFrame() +{ + SetupRenderThread(); +} + +void SoftRenderer::RenderThreadFunc() { for (;;) { @@ -2146,7 +1702,7 @@ void RenderThreadFunc() } } -u32* GetLine(int line) +u32* SoftRenderer::GetLine(int line) { if (RenderThreadRunning) { @@ -2158,4 +1714,3 @@ u32* GetLine(int line) } } -} diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h new file mode 100644 index 0000000..851b7c1 --- /dev/null +++ b/src/GPU3D_Soft.h @@ -0,0 +1,516 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "GPU3D.h" +#include "Platform.h" +#include <thread> + +namespace GPU3D +{ +class SoftRenderer : public Renderer3D +{ +public: + SoftRenderer(); + virtual ~SoftRenderer() override {}; + virtual bool Init() override; + virtual void DeInit() override; + virtual void Reset() override; + + virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + + virtual void VCount144() override; + virtual void RenderFrame() override; + virtual void RestartFrame() override; + virtual u32* GetLine(int line) override; + + void SetupRenderThread(); + void StopRenderThread(); +private: + // Notes on the interpolator: + // + // This is a theory on how the DS hardware interpolates values. It matches hardware output + // in the tests I did, but the hardware may be doing it differently. You never know. + // + // Assuming you want to perspective-correctly interpolate a variable named A across two points + // in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, + // then divide A/W by 1/W to recover the correct A value. + // + // The DS GPU approximates interpolation by calculating a perspective-correct interpolation + // between 0 and 1, then using the result as a factor to linearly interpolate the actual + // vertex attributes. The factor has 9 bits of precision when interpolating along Y and + // 8 bits along X. + // + // There's a special path for when the two W values are equal: it directly does linear + // interpolation, avoiding precision loss from the aforementioned approximation. + // Which is desirable when using the GPU to draw 2D graphics. + + template<int dir> + class Interpolator + { + public: + Interpolator() {} + Interpolator(s32 x0, s32 x1, s32 w0, s32 w1) + { + Setup(x0, x1, w0, w1); + } + + void Setup(s32 x0, s32 x1, s32 w0, s32 w1) + { + this->x0 = x0; + this->x1 = x1; + this->xdiff = x1 - x0; + + // calculate reciprocals for linear mode and Z interpolation + // TODO eventually: use a faster reciprocal function? + if (this->xdiff != 0) + this->xrecip = (1<<30) / this->xdiff; + else + this->xrecip = 0; + this->xrecip_z = this->xrecip >> 8; + + // linear mode is used if both W values are equal and have + // low-order bits cleared (0-6 along X, 1-6 along Y) + u32 mask = dir ? 0x7E : 0x7F; + if ((w0 == w1) && !(w0 & mask) && !(w1 & mask)) + this->linear = true; + else + this->linear = false; + + if (dir) + { + // along Y + + if ((w0 & 0x1) && !(w1 & 0x1)) + { + this->w0n = w0 - 1; + this->w0d = w0 + 1; + this->w1d = w1; + } + else + { + this->w0n = w0 & 0xFFFE; + this->w0d = w0 & 0xFFFE; + this->w1d = w1 & 0xFFFE; + } + + this->shift = 9; + } + else + { + // along X + + this->w0n = w0; + this->w0d = w0; + this->w1d = w1; + + this->shift = 8; + } + } + + void SetX(s32 x) + { + x -= x0; + this->x = x; + if (xdiff != 0 && !linear) + { + s64 num = ((s64)x * w0n) << shift; + s32 den = (x * w0d) + ((xdiff-x) * w1d); + + // this seems to be a proper division on hardware :/ + // I haven't been able to find cases that produce imperfect output + if (den == 0) yfactor = 0; + else yfactor = (s32)(num / den); + } + } + + s32 Interpolate(s32 y0, s32 y1) + { + if (xdiff == 0 || y0 == y1) return y0; + + if (!linear) + { + // perspective-correct approx. interpolation + if (y0 < y1) + return y0 + (((y1-y0) * yfactor) >> shift); + else + return y1 + (((y0-y1) * ((1<<shift)-yfactor)) >> shift); + } + else + { + // linear interpolation + // checkme: the rounding bias there (3<<24) is a guess + if (y0 < y1) + return y0 + ((((s64)(y1-y0) * x * xrecip) + (3<<24)) >> 30); + else + return y1 + ((((s64)(y0-y1) * (xdiff-x) * xrecip) + (3<<24)) >> 30); + } + } + + s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) + { + if (xdiff == 0 || z0 == z1) return z0; + + if (wbuffer) + { + // W-buffering: perspective-correct approx. interpolation + if (z0 < z1) + return z0 + (((s64)(z1-z0) * yfactor) >> shift); + else + return z1 + (((s64)(z0-z1) * ((1<<shift)-yfactor)) >> shift); + } + else + { + // Z-buffering: linear interpolation + // still doesn't quite match hardware... + s32 base, disp, factor; + + if (z0 < z1) + { + base = z0; + disp = z1 - z0; + factor = x; + } + else + { + base = z1; + disp = z0 - z1, + factor = xdiff - x; + } + + if (dir) + { + int shift = 0; + while (disp > 0x3FF) + { + disp >>= 1; + shift++; + } + + return base + ((((s64)disp * factor * xrecip_z) >> 22) << shift); + } + else + { + disp >>= 9; + return base + (((s64)disp * factor * xrecip_z) >> 13); + } + } + } + + private: + s32 x0, x1, xdiff, x; + + int shift; + bool linear; + + s32 xrecip, xrecip_z; + s32 w0n, w0d, w1d; + + u32 yfactor; + }; + + + template<int side> + class Slope + { + public: + Slope() {} + + s32 SetupDummy(s32 x0) + { + if (side) + { + dx = -0x40000; + x0--; + } + else + { + dx = 0; + } + + this->x0 = x0; + this->xmin = x0; + this->xmax = x0; + + Increment = 0; + XMajor = false; + + Interp.Setup(0, 0, 0, 0); + Interp.SetX(0); + + xcov_incr = 0; + + return x0; + } + + s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, s32 y) + { + this->x0 = x0; + this->y = y; + + if (x1 > x0) + { + this->xmin = x0; + this->xmax = x1-1; + this->Negative = false; + } + else if (x1 < x0) + { + this->xmin = x1; + this->xmax = x0-1; + this->Negative = true; + } + else + { + this->xmin = x0; + if (side) this->xmin--; + this->xmax = this->xmin; + this->Negative = false; + } + + xlen = xmax+1 - xmin; + ylen = y1 - y0; + + // slope increment has a 18-bit fractional part + // note: for some reason, x/y isn't calculated directly, + // instead, 1/y is calculated and then multiplied by x + // TODO: this is still not perfect (see for example x=169 y=33) + if (ylen == 0) + Increment = 0; + else if (ylen == xlen) + Increment = 0x40000; + else + { + s32 yrecip = (1<<18) / ylen; + Increment = (x1-x0) * yrecip; + if (Increment < 0) Increment = -Increment; + } + + XMajor = (Increment > 0x40000); + + if (side) + { + // right + + if (XMajor) dx = Negative ? (0x20000 + 0x40000) : (Increment - 0x20000); + else if (Increment != 0) dx = Negative ? 0x40000 : 0; + else dx = -0x40000; + } + else + { + // left + + if (XMajor) dx = Negative ? ((Increment - 0x20000) + 0x40000) : 0x20000; + else if (Increment != 0) dx = Negative ? 0x40000 : 0; + else dx = 0; + } + + dx += (y - y0) * Increment; + + s32 x = XVal(); + + if (XMajor) + { + if (side) Interp.Setup(x0-1, x1-1, w0, w1); // checkme + else Interp.Setup(x0, x1, w0, w1); + Interp.SetX(x); + + // used for calculating AA coverage + xcov_incr = (ylen << 10) / xlen; + } + else + { + Interp.Setup(y0, y1, w0, w1); + Interp.SetX(y); + } + + return x; + } + + s32 Step() + { + dx += Increment; + y++; + + s32 x = XVal(); + if (XMajor) + { + Interp.SetX(x); + } + else + { + Interp.SetX(y); + } + return x; + } + + s32 XVal() + { + s32 ret; + if (Negative) ret = x0 - (dx >> 18); + else ret = x0 + (dx >> 18); + + if (ret < xmin) ret = xmin; + else if (ret > xmax) ret = xmax; + return ret; + } + + void EdgeParams_XMajor(s32* length, s32* coverage) + { + if (side ^ Negative) + *length = (dx >> 18) - ((dx-Increment) >> 18); + else + *length = ((dx+Increment) >> 18) - (dx >> 18); + + // for X-major edges, we return the coverage + // for the first pixel, and the increment for + // further pixels on the same scanline + s32 startx = dx >> 18; + if (Negative) startx = xlen - startx; + if (side) startx = startx - *length + 1; + + s32 startcov = (((startx << 10) + 0x1FF) * ylen) / xlen; + *coverage = (1<<31) | ((startcov & 0x3FF) << 12) | (xcov_incr & 0x3FF); + } + + void EdgeParams_YMajor(s32* length, s32* coverage) + { + *length = 1; + + if (Increment == 0) + { + *coverage = 31; + } + else + { + s32 cov = ((dx >> 9) + (Increment >> 10)) >> 4; + if ((cov >> 5) != (dx >> 18)) cov = 31; + cov &= 0x1F; + if (!(side ^ Negative)) cov = 0x1F - cov; + + *coverage = cov; + } + } + + void EdgeParams(s32* length, s32* coverage) + { + if (XMajor) + return EdgeParams_XMajor(length, coverage); + else + return EdgeParams_YMajor(length, coverage); + } + + s32 Increment; + bool Negative; + bool XMajor; + Interpolator<1> Interp; + + private: + s32 x0, xmin, xmax; + s32 xlen, ylen; + s32 dx; + s32 y; + + s32 xcov_incr; + s32 ycoverage, ycov_incr; + }; + + template <typename T> + inline T ReadVRAM_Texture(u32 addr) + { + return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; + } + template <typename T> + inline T ReadVRAM_TexPal(u32 addr) + { + return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; + } + + struct RendererPolygon + { + Polygon* PolyData; + + Slope<0> SlopeL; + Slope<1> SlopeR; + s32 XL, XR; + u32 CurVL, CurVR; + u32 NextVL, NextVR; + + }; + + RendererPolygon PolygonList[2048]; + void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); + u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); + void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); + void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y); + void SetupPolygonRightEdge(RendererPolygon* rp, s32 y); + void SetupPolygon(RendererPolygon* rp, Polygon* polygon); + void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); + void RenderPolygonScanline(RendererPolygon* rp, s32 y); + void RenderScanline(s32 y, int npolys); + u32 CalculateFogDensity(u32 pixeladdr); + void ScanlineFinalPass(s32 y); + void ClearBuffers(); + void RenderPolygons(bool threaded, Polygon** polygons, int npolys); + + void RenderThreadFunc(); + + // buffer dimensions are 258x194 to add a offscreen 1px border + // which simplifies edge marking tests + // buffer is duplicated to keep track of the two topmost pixels + // TODO: check if the hardware can accidentally plot pixels + // offscreen in that border + + static constexpr int ScanlineWidth = 258; + static constexpr int NumScanlines = 194; + static constexpr int BufferSize = ScanlineWidth * NumScanlines; + static constexpr int FirstPixelOffset = ScanlineWidth + 1; + + u32 ColorBuffer[BufferSize * 2]; + u32 DepthBuffer[BufferSize * 2]; + u32 AttrBuffer[BufferSize * 2]; + + // attribute buffer: + // bit0-3: edge flags (left/right/top/bottom) + // bit4: backfacing flag + // bit8-12: antialiasing alpha + // bit15: fog enable + // bit16-21: polygon ID for translucent pixels + // bit22: translucent flag + // bit24-29: polygon ID for opaque pixels + + u8 StencilBuffer[256*2]; + bool PrevIsShadowMask; + + bool Enabled; + + bool FrameIdentical; + + // threading + + bool Threaded; + // Platform::Thread* RenderThread; + std::thread RenderThread; + bool RenderThreadRunning; + bool RenderThreadRendering; + Platform::Semaphore* Sema_RenderStart; + Platform::Semaphore* Sema_RenderDone; + Platform::Semaphore* Sema_ScanlineCount; +}; +}
\ No newline at end of file diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 8f2d5a1..c02d955 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -16,8 +16,11 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include <stdio.h> -#include <string.h> +#include "GPU_OpenGL.h" + +#include <cstdio> +#include <cstring> + #include "NDS.h" #include "GPU.h" #include "Config.h" @@ -26,34 +29,10 @@ namespace GPU { -namespace GLCompositor -{ using namespace OpenGL; -int Scale; -int ScreenH, ScreenW; - -GLuint CompShader[1][3]; -GLuint CompScaleLoc[1]; -GLuint Comp3DXPosLoc[1]; - -GLuint CompVertexBufferID; -GLuint CompVertexArrayID; - -struct CompVertex -{ - float Position[2]; - float Texcoord[2]; -}; -CompVertex CompVertices[2 * 3*2]; - -GLuint CompScreenInputTex; -GLuint CompScreenOutputTex[2]; -GLuint CompScreenOutputFB[2]; - - -bool Init() +bool GLCompositor::Init() { if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Nearest, CompShader[0], "CompositorShader")) //if (!OpenGL::BuildShaderProgram(kCompositorVS, kCompositorFS_Linear, CompShader[0], "CompositorShader")) @@ -144,7 +123,7 @@ bool Init() return true; } -void DeInit() +void GLCompositor::DeInit() { glDeleteFramebuffers(2, CompScreenOutputFB); glDeleteTextures(1, &CompScreenInputTex); @@ -157,12 +136,12 @@ void DeInit() OpenGL::DeleteShaderProgram(CompShader[i]); } -void Reset() +void GLCompositor::Reset() { } -void SetRenderSettings(RenderSettings& settings) +void GLCompositor::SetRenderSettings(RenderSettings& settings) { int scale = settings.GL_ScaleFactor; @@ -188,7 +167,7 @@ void SetRenderSettings(RenderSettings& settings) glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void Stop() +void GLCompositor::Stop() { for (int i = 0; i < 2; i++) { @@ -202,7 +181,7 @@ void Stop() glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void RenderFrame() +void GLCompositor::RenderFrame() { int frontbuf = GPU::FrontBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); @@ -236,17 +215,16 @@ void RenderFrame() } glActiveTexture(GL_TEXTURE1); - GPU3D::GLRenderer::SetupAccelFrame(); + reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->SetupAccelFrame(); glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); glBindVertexArray(CompVertexArrayID); glDrawArrays(GL_TRIANGLES, 0, 4*3); } -void BindOutputTexture(int buf) +void GLCompositor::BindOutputTexture(int buf) { glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]); } } -} diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h new file mode 100644 index 0000000..1fcb08f --- /dev/null +++ b/src/GPU_OpenGL.h @@ -0,0 +1,68 @@ +/* + Copyright 2016-2020 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include "OpenGLSupport.h" + +namespace GPU +{ + +struct RenderSettings; + +class GLCompositor +{ +public: + GLCompositor() = default; + GLCompositor(const GLCompositor&) = delete; + GLCompositor& operator=(const GLCompositor&) = delete; + + bool Init(); + void DeInit(); + void Reset(); + + void SetRenderSettings(RenderSettings& settings); + + void Stop(); + void RenderFrame(); + void BindOutputTexture(int buf); +private: + + int Scale; + int ScreenH, ScreenW; + + GLuint CompShader[1][3]; + GLuint CompScaleLoc[1]; + GLuint Comp3DXPosLoc[1]; + + GLuint CompVertexBufferID; + GLuint CompVertexArrayID; + + struct CompVertex + { + float Position[2]; + float Texcoord[2]; + }; + CompVertex CompVertices[2 * 3*2]; + + GLuint CompScreenInputTex; + GLuint CompScreenOutputTex[2]; + GLuint CompScreenOutputFB[2]; +}; + +}
\ No newline at end of file diff --git a/src/NDS.cpp b/src/NDS.cpp index 7c0ecea..6c41cb5 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1564,7 +1564,7 @@ void RunTimer(u32 tid, s32 cycles) void RunTimers(u32 cpu) { - register u32 timermask = TimerCheckMask[cpu]; + u32 timermask = TimerCheckMask[cpu]; s32 cycles; if (cpu == 0) diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index 5aa4959..460457c 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -1013,7 +1013,7 @@ void ScreenPanelGL::paintGL() if (GPU::Renderer != 0) { // hardware-accelerated render - GPU::GLCompositor::BindOutputTexture(frontbuf); + GPU::CurGLCompositor->BindOutputTexture(frontbuf); } else #endif @@ -2536,9 +2536,15 @@ int main(int argc, char** argv) Config::Load(); -#define SANITIZE(var, min, max) { if (var < min) var = min; else if (var > max) var = max; } +#define SANITIZE(var, min, max) { var = std::clamp(var, min, max); } SANITIZE(Config::ConsoleType, 0, 1); - SANITIZE(Config::_3DRenderer, 0, 1); + SANITIZE(Config::_3DRenderer, + 0, + 0 // Minimum, Software renderer + #ifdef OGLRENDERER_ENABLED + + 1 // OpenGL Renderer + #endif + ); SANITIZE(Config::ScreenVSyncInterval, 1, 20); SANITIZE(Config::GL_ScaleFactor, 1, 16); SANITIZE(Config::AudioVolume, 0, 256); |