diff options
author | Jesse Talavera-Greenberg <jesse@jesse.tg> | 2023-11-09 15:54:51 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-09 21:54:51 +0100 |
commit | 4558be0d8eb79d276c89392b9410e6edb649db95 (patch) | |
tree | 3e2b37e31b38337adec64c5391e57ddf45af5d23 /src | |
parent | 88072a02c523e26390af6bd726608b3e567f996f (diff) |
Refactor the GPU to be object-oriented (#1873)
* Refactor GPU3D to be an object
- Who has two thumbs and is the sworn enemy of global state? This guy!
* Refactor GPU itself to be an object
- Wow, it's used in a lot of places
- Also introduce a new `Melon` namespace for a few classes
- I expect other classes will be moved into `Melon` over time
* Change signature of Renderer3D::SetRenderSettings
- Make it noexcept, and its argument const
* Remove some stray whitespace
Diffstat (limited to 'src')
-rw-r--r-- | src/ARM.cpp | 11 | ||||
-rw-r--r-- | src/ARM.h | 13 | ||||
-rw-r--r-- | src/ARMJIT_Memory.cpp | 86 | ||||
-rw-r--r-- | src/DMA.cpp | 9 | ||||
-rw-r--r-- | src/DMA.h | 8 | ||||
-rw-r--r-- | src/DSi.cpp | 34 | ||||
-rw-r--r-- | src/DSi_NDMA.cpp | 6 | ||||
-rw-r--r-- | src/DSi_NDMA.h | 8 | ||||
-rw-r--r-- | src/GPU.cpp | 376 | ||||
-rw-r--r-- | src/GPU.h | 1009 | ||||
-rw-r--r-- | src/GPU2D.cpp | 49 | ||||
-rw-r--r-- | src/GPU2D.h | 11 | ||||
-rw-r--r-- | src/GPU2D_Soft.cpp | 119 | ||||
-rw-r--r-- | src/GPU2D_Soft.h | 8 | ||||
-rw-r--r-- | src/GPU3D.cpp | 282 | ||||
-rw-r--r-- | src/GPU3D.h | 276 | ||||
-rw-r--r-- | src/GPU3D_OpenGL.cpp | 86 | ||||
-rw-r--r-- | src/GPU3D_OpenGL.h | 13 | ||||
-rw-r--r-- | src/GPU3D_Soft.cpp | 106 | ||||
-rw-r--r-- | src/GPU3D_Soft.h | 11 | ||||
-rw-r--r-- | src/GPU_OpenGL.cpp | 24 | ||||
-rw-r--r-- | src/GPU_OpenGL.h | 12 | ||||
-rw-r--r-- | src/NDS.cpp | 329 | ||||
-rw-r--r-- | src/NDS.h | 7 | ||||
-rw-r--r-- | src/frontend/qt_sdl/main.cpp | 30 |
25 files changed, 1473 insertions, 1450 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index 4f2f892..18d50fe 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -109,10 +109,11 @@ u32 ARM::ConditionTable[16] = }; -ARM::ARM(u32 num) +ARM::ARM(u32 num, Melon::GPU& gpu) : #ifdef GDBSTUB_ENABLED - : GdbStub(this, Platform::GetConfigInt(num ? Platform::GdbPortARM7 : Platform::GdbPortARM9)) + GdbStub(this, Platform::GetConfigInt(num ? Platform::GdbPortARM7 : Platform::GdbPortARM9)), #endif + GPU(gpu) { // well uh Num = num; @@ -133,7 +134,7 @@ ARM::~ARM() // dorp } -ARMv5::ARMv5() : ARM(0) +ARMv5::ARMv5(Melon::GPU& gpu) : ARM(0, gpu) { #ifndef JIT_ENABLED DTCM = new u8[DTCMPhysicalSize]; @@ -142,7 +143,7 @@ ARMv5::ARMv5() : ARM(0) PU_Map = PU_PrivMap; } -ARMv4::ARMv4() : ARM(1) +ARMv4::ARMv4(Melon::GPU& gpu) : ARM(1, gpu) { // } @@ -1144,7 +1145,7 @@ void ARM::WriteMem(u32 addr, int size, u32 v) void ARM::ResetGdb() { NDS::Reset(); - GPU::StartFrame(); // need this to properly kick off the scheduler & frame output + GPU.StartFrame(); // need this to properly kick off the scheduler & frame output } int ARM::RemoteCmd(const u8* cmd, size_t len) { @@ -42,13 +42,18 @@ enum const u32 ITCMPhysicalSize = 0x8000; const u32 DTCMPhysicalSize = 0x4000; +namespace Melon +{ +class GPU; +} + class ARM #ifdef GDBSTUB_ENABLED : public Gdb::StubCallbacks #endif { public: - ARM(u32 num); + ARM(u32 num, Melon::GPU& gpu); virtual ~ARM(); // destroy shit virtual void Reset(); @@ -209,12 +214,14 @@ protected: void GdbCheckA(); void GdbCheckB(); void GdbCheckC(); +private: + Melon::GPU& GPU; }; class ARMv5 : public ARM { public: - ARMv5(); + ARMv5(Melon::GPU& gpu); ~ARMv5(); void Reset() override; @@ -358,7 +365,7 @@ public: class ARMv4 : public ARM { public: - ARMv4(); + ARMv4(Melon::GPU& gpu); void Reset() override; diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 095fb30..3591a25 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -1228,11 +1228,11 @@ void VRAMWrite(u32 addr, T val) { switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return; - default: GPU::WriteVRAM_LCDC<T>(addr, val); return; + case 0x00000000: NDS::GPU->WriteVRAM_ABG<T>(addr, val); return; + case 0x00200000: NDS::GPU->WriteVRAM_BBG<T>(addr, val); return; + case 0x00400000: NDS::GPU->WriteVRAM_AOBJ<T>(addr, val); return; + case 0x00600000: NDS::GPU->WriteVRAM_BOBJ<T>(addr, val); return; + default: NDS::GPU->WriteVRAM_LCDC<T>(addr, val); return; } } template <typename T> @@ -1240,14 +1240,56 @@ T VRAMRead(u32 addr) { switch (addr & 0x00E00000) { - case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr); - default: return GPU::ReadVRAM_LCDC<T>(addr); + case 0x00000000: return NDS::GPU->ReadVRAM_ABG<T>(addr); + case 0x00200000: return NDS::GPU->ReadVRAM_BBG<T>(addr); + case 0x00400000: return NDS::GPU->ReadVRAM_AOBJ<T>(addr); + case 0x00600000: return NDS::GPU->ReadVRAM_BOBJ<T>(addr); + default: return NDS::GPU->ReadVRAM_LCDC<T>(addr); } } +static u8 GPU3D_Read8(u32 addr) noexcept +{ + return NDS::GPU->GPU3D.Read8(addr); +} + +static u16 GPU3D_Read16(u32 addr) noexcept +{ + return NDS::GPU->GPU3D.Read16(addr); +} + +static u32 GPU3D_Read32(u32 addr) noexcept +{ + return NDS::GPU->GPU3D.Read32(addr); +} + +static void GPU3D_Write8(u32 addr, u8 val) noexcept +{ + NDS::GPU->GPU3D.Write8(addr, val); +} + +static void GPU3D_Write16(u32 addr, u16 val) noexcept +{ + NDS::GPU->GPU3D.Write16(addr, val); +} + +static void GPU3D_Write32(u32 addr, u32 val) noexcept +{ + NDS::GPU->GPU3D.Write32(addr, val); +} + +template<class T> +static T GPU_ReadVRAM_ARM7(u32 addr) noexcept +{ + return NDS::GPU->ReadVRAM_ARM7<T>(addr); +} + +template<class T> +static void GPU_WriteVRAM_ARM7(u32 addr, T val) noexcept +{ + NDS::GPU->WriteVRAM_ARM7<T>(addr, val); +} + u32 NDSCartSlot_ReadROMData() { // TODO: Add a NDS* parameter, when NDS* is eventually implemented return NDS::NDSCartSlot->ReadROMData(); @@ -1273,12 +1315,12 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) { switch (size | store) { - case 8: return (void*)GPU3D::Read8; - case 9: return (void*)GPU3D::Write8; - case 16: return (void*)GPU3D::Read16; - case 17: return (void*)GPU3D::Write16; - case 32: return (void*)GPU3D::Read32; - case 33: return (void*)GPU3D::Write32; + case 8: return (void*)GPU3D_Read8; + case 9: return (void*)GPU3D_Write8; + case 16: return (void*)GPU3D_Read16; + case 17: return (void*)GPU3D_Write16; + case 32: return (void*)GPU3D_Read32; + case 33: return (void*)GPU3D_Write32; } } @@ -1380,12 +1422,12 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) case 0x06800000: switch (size | store) { - case 8: return (void*)GPU::ReadVRAM_ARM7<u8>; - case 9: return (void*)GPU::WriteVRAM_ARM7<u8>; - case 16: return (void*)GPU::ReadVRAM_ARM7<u16>; - case 17: return (void*)GPU::WriteVRAM_ARM7<u16>; - case 32: return (void*)GPU::ReadVRAM_ARM7<u32>; - case 33: return (void*)GPU::WriteVRAM_ARM7<u32>; + case 8: return (void*)GPU_ReadVRAM_ARM7<u8>; + case 9: return (void*)GPU_WriteVRAM_ARM7<u8>; + case 16: return (void*)GPU_ReadVRAM_ARM7<u16>; + case 17: return (void*)GPU_WriteVRAM_ARM7<u16>; + case 32: return (void*)GPU_ReadVRAM_ARM7<u32>; + case 33: return (void*)GPU_WriteVRAM_ARM7<u32>; } } } diff --git a/src/DMA.cpp b/src/DMA.cpp index b779d26..91f2345 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -47,9 +47,10 @@ using Platform::LogLevel; // TODO: timings are nonseq when address is fixed/decrementing -DMA::DMA(u32 cpu, u32 num) : +DMA::DMA(u32 cpu, u32 num, Melon::GPU& gpu) : CPU(cpu), - Num(num) + Num(num), + GPU(gpu) { if (cpu == 0) CountMask = 0x001FFFFF; @@ -142,7 +143,7 @@ void DMA::WriteCnt(u32 val) if ((StartMode & 0x7) == 0) Start(); else if (StartMode == 0x07) - GPU3D::CheckFIFODMA(); + GPU.GPU3D.CheckFIFODMA(); if (StartMode==0x06 || StartMode==0x13) Log(LogLevel::Warn, "UNIMPLEMENTED ARM%d DMA%d START MODE %02X, %08X->%08X\n", CPU?7:9, Num, StartMode, SrcAddr, DstAddr); @@ -609,7 +610,7 @@ void DMA::Run9() NDS::ResumeCPU(0, 1<<Num); if (StartMode == 0x07) - GPU3D::CheckFIFODMA(); + GPU.GPU3D.CheckFIFODMA(); } return; @@ -24,10 +24,15 @@ #include "Savestate.h" #include "DMA_Timings.h" +namespace Melon +{ +class GPU; +} + class DMA { public: - DMA(u32 cpu, u32 num); + DMA(u32 cpu, u32 num, Melon::GPU& gpu); ~DMA() = default; void Reset(); @@ -79,6 +84,7 @@ public: u32 Cnt {}; private: + Melon::GPU& GPU; u32 CPU {}; u32 Num {}; diff --git a/src/DSi.cpp b/src/DSi.cpp index bf7748e..f2937b0 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -105,14 +105,14 @@ bool Init() NWRAM_C = new u8[NWRAMSize]; #endif - NDMAs[0] = new DSi_NDMA(0, 0); - NDMAs[1] = new DSi_NDMA(0, 1); - NDMAs[2] = new DSi_NDMA(0, 2); - NDMAs[3] = new DSi_NDMA(0, 3); - NDMAs[4] = new DSi_NDMA(1, 0); - NDMAs[5] = new DSi_NDMA(1, 1); - NDMAs[6] = new DSi_NDMA(1, 2); - NDMAs[7] = new DSi_NDMA(1, 3); + NDMAs[0] = new DSi_NDMA(0, 0, *NDS::GPU); + NDMAs[1] = new DSi_NDMA(0, 1, *NDS::GPU); + NDMAs[2] = new DSi_NDMA(0, 2, *NDS::GPU); + NDMAs[3] = new DSi_NDMA(0, 3, *NDS::GPU); + NDMAs[4] = new DSi_NDMA(1, 0, *NDS::GPU); + NDMAs[5] = new DSi_NDMA(1, 1, *NDS::GPU); + NDMAs[6] = new DSi_NDMA(1, 2, *NDS::GPU); + NDMAs[7] = new DSi_NDMA(1, 3, *NDS::GPU); SDMMC = new DSi_SDHost(0); SDIO = new DSi_SDHost(1); @@ -205,8 +205,8 @@ void Reset() GPIO_WiFi = 0; // LCD init flag - GPU::DispStat[0] |= (1<<6); - GPU::DispStat[1] |= (1<<6); + NDS::GPU->DispStat[0] |= (1<<6); + NDS::GPU->DispStat[1] |= (1<<6); } void Stop() @@ -730,8 +730,8 @@ void SoftReset() // LCD init flag - GPU::DispStat[0] |= (1<<6); - GPU::DispStat[1] |= (1<<6); + NDS::GPU->DispStat[0] |= (1<<6); + NDS::GPU->DispStat[1] |= (1<<6); } bool LoadNAND() @@ -1528,11 +1528,11 @@ void ARM9Write8(u32 addr, u8 val) #endif switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<u8>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<u8>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<u8>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<u8>(addr, val); return; - default: GPU::WriteVRAM_LCDC<u8>(addr, val); return; + case 0x00000000: NDS::GPU->WriteVRAM_ABG<u8>(addr, val); return; + case 0x00200000: NDS::GPU->WriteVRAM_BBG<u8>(addr, val); return; + case 0x00400000: NDS::GPU->WriteVRAM_AOBJ<u8>(addr, val); return; + case 0x00600000: NDS::GPU->WriteVRAM_BOBJ<u8>(addr, val); return; + default: NDS::GPU->WriteVRAM_LCDC<u8>(addr, val); return; } case 0x08000000: diff --git a/src/DSi_NDMA.cpp b/src/DSi_NDMA.cpp index f3c3745..0afb201 100644 --- a/src/DSi_NDMA.cpp +++ b/src/DSi_NDMA.cpp @@ -26,7 +26,7 @@ using Platform::Log; using Platform::LogLevel; -DSi_NDMA::DSi_NDMA(u32 cpu, u32 num) +DSi_NDMA::DSi_NDMA(u32 cpu, u32 num, Melon::GPU& gpu) : GPU(gpu) { CPU = cpu; Num = num; @@ -125,7 +125,7 @@ void DSi_NDMA::WriteCnt(u32 val) if ((StartMode & 0x1F) == 0x10) Start(); else if (StartMode == 0x0A) - GPU3D::CheckFIFODMA(); + GPU.GPU3D.CheckFIFODMA(); // TODO: unsupported start modes: // * timers (00-03) @@ -259,7 +259,7 @@ void DSi_NDMA::Run9() NDS::ResumeCPU(0, 1<<(Num+4)); if (StartMode == 0x0A) - GPU3D::CheckFIFODMA(); + GPU.GPU3D.CheckFIFODMA(); } return; diff --git a/src/DSi_NDMA.h b/src/DSi_NDMA.h index e3da93d..732bc9d 100644 --- a/src/DSi_NDMA.h +++ b/src/DSi_NDMA.h @@ -22,10 +22,15 @@ #include "types.h" #include "Savestate.h" +namespace Melon +{ +class GPU; +} + class DSi_NDMA { public: - DSi_NDMA(u32 cpu, u32 num); + DSi_NDMA(u32 cpu, u32 num, Melon::GPU& gpu); ~DSi_NDMA(); void Reset(); @@ -73,6 +78,7 @@ public: u32 Cnt; private: + Melon::GPU& GPU; u32 CPU, Num; u32 StartMode; diff --git a/src/GPU.cpp b/src/GPU.cpp index ccfcb63..987068d 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -25,17 +25,18 @@ #endif #include "GPU2D_Soft.h" +#include "GPU3D_Soft.h" +#include "GPU3D_OpenGL.h" using Platform::Log; using Platform::LogLevel; -namespace GPU -{ - #define LINE_CYCLES (355*6) #define HBLANK_CYCLES (48+(256*6)) #define FRAME_CYCLES (LINE_CYCLES * 263) +namespace Melon +{ enum { LCD_StartHBlank = 0, @@ -43,62 +44,6 @@ enum LCD_FinishFrame, }; -u16 VCount; -u32 NextVCount; -u16 TotalScanlines; - -bool RunFIFO; - -u16 DispStat[2], VMatch[2]; - -u8 Palette[2*1024]; -u8 OAM[2*1024]; - -u8 VRAM_A[128*1024]; -u8 VRAM_B[128*1024]; -u8 VRAM_C[128*1024]; -u8 VRAM_D[128*1024]; -u8 VRAM_E[ 64*1024]; -u8 VRAM_F[ 16*1024]; -u8 VRAM_G[ 16*1024]; -u8 VRAM_H[ 32*1024]; -u8 VRAM_I[ 16*1024]; -u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; -u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; - -u8 VRAMCNT[9]; -u8 VRAMSTAT; - -u32 VRAMMap_LCDC; - -u32 VRAMMap_ABG[0x20]; -u32 VRAMMap_AOBJ[0x10]; -u32 VRAMMap_BBG[0x8]; -u32 VRAMMap_BOBJ[0x8]; - -u32 VRAMMap_ABGExtPal[4]; -u32 VRAMMap_AOBJExtPal; -u32 VRAMMap_BBGExtPal[4]; -u32 VRAMMap_BOBJExtPal; - -u32 VRAMMap_Texture[4]; -u32 VRAMMap_TexPal[8]; - -u32 VRAMMap_ARM7[2]; - -u8* VRAMPtr_ABG[0x20]; -u8* VRAMPtr_AOBJ[0x10]; -u8* VRAMPtr_BBG[0x8]; -u8* VRAMPtr_BOBJ[0x8]; - -int FrontBuffer; -u32* Framebuffer[2][2]; -int Renderer = 0; - -GPU2D::Unit GPU2D_A(0); -GPU2D::Unit GPU2D_B(1); - -std::unique_ptr<GPU2D::Renderer2D> GPU2D_Renderer = {}; /* VRAM invalidation tracking @@ -121,64 +66,24 @@ std::unique_ptr<GPU2D::Renderer2D> GPU2D_Renderer = {}; VRAMDirty need to be reset for the respective VRAM bank. */ -VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; -VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; -VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; -VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; - -VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; -VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; -VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; -VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; - -VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; -VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; - -NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; - -u8 VRAMFlat_ABG[512*1024]; -u8 VRAMFlat_BBG[128*1024]; -u8 VRAMFlat_AOBJ[256*1024]; -u8 VRAMFlat_BOBJ[128*1024]; - -u8 VRAMFlat_ABGExtPal[32*1024]; -u8 VRAMFlat_BBGExtPal[32*1024]; -u8 VRAMFlat_AOBJExtPal[8*1024]; -u8 VRAMFlat_BOBJExtPal[8*1024]; - -u8 VRAMFlat_Texture[512*1024]; -u8 VRAMFlat_TexPal[128*1024]; - -u32 OAMDirty; -u32 PaletteDirty; - -#ifdef OGLRENDERER_ENABLED -std::unique_ptr<GLCompositor> CurGLCompositor = {}; -#endif - -bool Init() +GPU::GPU() noexcept : GPU2D_A(0, *this), GPU2D_B(1, *this) { - NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartHBlank, StartHBlank); - NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartScanline, StartScanline); - NDS::RegisterEventFunc(NDS::Event_LCD, LCD_FinishFrame, FinishFrame); - NDS::RegisterEventFunc(NDS::Event_DisplayFIFO, 0, DisplayFIFO); + NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartHBlank, MemberEventFunc(GPU, StartHBlank)); + NDS::RegisterEventFunc(NDS::Event_LCD, LCD_StartScanline, MemberEventFunc(GPU, StartScanline)); + NDS::RegisterEventFunc(NDS::Event_LCD, LCD_FinishFrame, MemberEventFunc(GPU, FinishFrame)); + NDS::RegisterEventFunc(NDS::Event_DisplayFIFO, 0, MemberEventFunc(GPU, DisplayFIFO)); - GPU2D_Renderer = std::make_unique<GPU2D::SoftRenderer>(); - if (!GPU3D::Init()) return false; + GPU2D_Renderer = std::make_unique<GPU2D::SoftRenderer>(*this); FrontBuffer = 0; Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL; Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL; Renderer = 0; - - return true; } -void DeInit() +GPU::~GPU() noexcept { - GPU2D_Renderer.reset(); - GPU3D::DeInit(); - + // All unique_ptr fields are automatically cleaned up if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; if (Framebuffer[0][1]) delete[] Framebuffer[0][1]; if (Framebuffer[1][0]) delete[] Framebuffer[1][0]; @@ -189,17 +94,13 @@ void DeInit() Framebuffer[1][0] = nullptr; Framebuffer[1][1] = nullptr; -#ifdef OGLRENDERER_ENABLED - CurGLCompositor = nullptr; -#endif - NDS::UnregisterEventFunc(NDS::Event_LCD, LCD_StartHBlank); NDS::UnregisterEventFunc(NDS::Event_LCD, LCD_StartScanline); NDS::UnregisterEventFunc(NDS::Event_LCD, LCD_FinishFrame); NDS::UnregisterEventFunc(NDS::Event_DisplayFIFO, 0); } -void ResetVRAMCache() +void GPU::ResetVRAMCache() noexcept { for (int i = 0; i < 9; i++) VRAMDirty[i] = NonStupidBitField<128*1024/VRAMDirtyGranularity>(); @@ -227,7 +128,7 @@ void ResetVRAMCache() memset(VRAMFlat_TexPal, 0, sizeof(VRAMFlat_TexPal)); } -void Reset() +void GPU::Reset() noexcept { VCount = 0; NextVCount = -1; @@ -278,7 +179,7 @@ void Reset() memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ)); size_t fbsize; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) fbsize = (256*3 + 1) * 192; else fbsize = 256 * 192; @@ -296,7 +197,7 @@ void Reset() GPU2D_A.Reset(); GPU2D_B.Reset(); - GPU3D::Reset(); + GPU3D.Reset(); int backbuf = FrontBuffer ? 0 : 1; GPU2D_Renderer->SetFramebuffer(Framebuffer[backbuf][1], Framebuffer[backbuf][0]); @@ -309,10 +210,10 @@ void Reset() PaletteDirty = 0xF; } -void Stop() +void GPU::Stop() noexcept { int fbsize; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) fbsize = (256*3 + 1) * 192; else fbsize = 256 * 192; @@ -325,12 +226,12 @@ void Stop() #ifdef OGLRENDERER_ENABLED // This needs a better way to know that we're // using the OpenGL renderer specifically - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) CurGLCompositor->Stop(); #endif } -void DoSavestate(Savestate* file) +void GPU::DoSavestate(Savestate* file) noexcept { file->Section("GPUG"); @@ -391,12 +292,12 @@ void DoSavestate(Savestate* file) GPU2D_A.DoSavestate(file); GPU2D_B.DoSavestate(file); - GPU3D::DoSavestate(file); + GPU3D.DoSavestate(file); ResetVRAMCache(); } -void AssignFramebuffers() +void GPU::AssignFramebuffers() noexcept { int backbuf = FrontBuffer ? 0 : 1; if (NDS::PowerControl9 & (1<<15)) @@ -409,41 +310,41 @@ void AssignFramebuffers() } } -void InitRenderer(int renderer) +void GPU::InitRenderer(int renderer) noexcept { #ifdef OGLRENDERER_ENABLED if (renderer == 1) { - CurGLCompositor = GLCompositor::New(); + CurGLCompositor = GLCompositor::New(*this); // Create opengl renderer if (!CurGLCompositor) { // Fallback on software renderer renderer = 0; - GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); + GPU3D.SetCurrentRenderer(std::make_unique<GPU3D::SoftRenderer>(*this)); } - GPU3D::CurrentRenderer = GPU3D::GLRenderer::New(); - if (!GPU3D::CurrentRenderer) + GPU3D.SetCurrentRenderer(GPU3D::GLRenderer::New(*this)); + if (!GPU3D.GetCurrentRenderer()) { // Fallback on software renderer CurGLCompositor.reset(); renderer = 0; - GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); + GPU3D.SetCurrentRenderer(std::make_unique<GPU3D::SoftRenderer>(*this)); } } else #endif { - GPU3D::CurrentRenderer = std::make_unique<GPU3D::SoftRenderer>(); + GPU3D.SetCurrentRenderer(std::make_unique<GPU3D::SoftRenderer>(*this)); } Renderer = renderer; } -void DeInitRenderer() +void GPU::DeInitRenderer() noexcept { // Delete the 3D renderer, if it exists - GPU3D::CurrentRenderer.reset(); + GPU3D.SetCurrentRenderer(nullptr); #ifdef OGLRENDERER_ENABLED // Delete the compositor, if one exists @@ -451,22 +352,22 @@ void DeInitRenderer() #endif } -void ResetRenderer() +void GPU::ResetRenderer() noexcept { if (Renderer == 0) { - GPU3D::CurrentRenderer->Reset(); + GPU3D.GetCurrentRenderer()->Reset(); } #ifdef OGLRENDERER_ENABLED else { CurGLCompositor->Reset(); - GPU3D::CurrentRenderer->Reset(); + GPU3D.GetCurrentRenderer()->Reset(); } #endif } -void SetRenderSettings(int renderer, RenderSettings& settings) +void GPU::SetRenderSettings(int renderer, RenderSettings& settings) noexcept { if (renderer != Renderer) { @@ -475,7 +376,7 @@ void SetRenderSettings(int renderer, RenderSettings& settings) } int fbsize; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) fbsize = (256*3 + 1) * 192; else fbsize = 256 * 192; @@ -499,13 +400,13 @@ void SetRenderSettings(int renderer, RenderSettings& settings) if (Renderer == 0) { - GPU3D::CurrentRenderer->SetRenderSettings(settings); + GPU3D.GetCurrentRenderer()->SetRenderSettings(settings); } #ifdef OGLRENDERER_ENABLED else { CurGLCompositor->SetRenderSettings(settings); - GPU3D::CurrentRenderer->SetRenderSettings(settings); + GPU3D.GetCurrentRenderer()->SetRenderSettings(settings); } #endif } @@ -541,7 +442,14 @@ void SetRenderSettings(int renderer, RenderSettings& settings) // when reading: values are read from each bank and ORed together // when writing: value is written to each bank -u8* GetUniqueBankPtr(u32 mask, u32 offset) +u8* GPU::GetUniqueBankPtr(u32 mask, u32 offset) noexcept +{ + if (!mask || (mask & (mask - 1)) != 0) return NULL; + int num = __builtin_ctz(mask); + return &VRAM[num][offset & VRAMMask[num]]; +} + +const u8* GPU::GetUniqueBankPtr(u32 mask, u32 offset) const noexcept { if (!mask || (mask & (mask - 1)) != 0) return NULL; int num = __builtin_ctz(mask); @@ -556,7 +464,7 @@ u8* GetUniqueBankPtr(u32 mask, u32 offset) #define UNMAP_RANGE_PTR(map, base, n) \ for (int i = 0; i < n; i++) { VRAMMap_##map[(base)+i] &= ~bankmask; VRAMPtr_##map[(base)+i] = GetUniqueBankPtr(VRAMMap_##map[(base)+i], ((base)+i)<<14); } -void MapVRAM_AB(u32 bank, u8 cnt) +void GPU::MapVRAM_AB(u32 bank, u8 cnt) noexcept { cnt &= 0x9B; @@ -616,7 +524,7 @@ void MapVRAM_AB(u32 bank, u8 cnt) } } -void MapVRAM_CD(u32 bank, u8 cnt) +void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept { cnt &= 0x9F; @@ -705,7 +613,7 @@ void MapVRAM_CD(u32 bank, u8 cnt) } } -void MapVRAM_E(u32 bank, u8 cnt) +void GPU::MapVRAM_E(u32 bank, u8 cnt) noexcept { cnt &= 0x87; @@ -769,7 +677,7 @@ void MapVRAM_E(u32 bank, u8 cnt) } } -void MapVRAM_FG(u32 bank, u8 cnt) +void GPU::MapVRAM_FG(u32 bank, u8 cnt) noexcept { cnt &= 0x9F; @@ -869,7 +777,7 @@ void MapVRAM_FG(u32 bank, u8 cnt) } } -void MapVRAM_H(u32 bank, u8 cnt) +void GPU::MapVRAM_H(u32 bank, u8 cnt) noexcept { cnt &= 0x83; @@ -931,7 +839,7 @@ void MapVRAM_H(u32 bank, u8 cnt) } } -void MapVRAM_I(u32 bank, u8 cnt) +void GPU::MapVRAM_I(u32 bank, u8 cnt) noexcept { cnt &= 0x83; @@ -1002,7 +910,7 @@ void MapVRAM_I(u32 bank, u8 cnt) } -void SetPowerCnt(u32 val) +void GPU::SetPowerCnt(u32 val) noexcept { // POWCNT1 effects: // * bit0: asplodes hardware??? not tested. @@ -1016,13 +924,13 @@ void SetPowerCnt(u32 val) GPU2D_A.SetEnabled(val & (1<<1)); GPU2D_B.SetEnabled(val & (1<<9)); - GPU3D::SetEnabled(val & (1<<3), val & (1<<2)); + GPU3D.SetEnabled(val & (1<<3), val & (1<<2)); AssignFramebuffers(); } -void DisplayFIFO(u32 x) +void GPU::DisplayFIFO(u32 x) noexcept { // sample the FIFO // as this starts 16 cycles (~3 pixels) before display start, @@ -1045,7 +953,7 @@ void DisplayFIFO(u32 x) GPU2D_A.SampleFIFO(253, 3); // sample the remaining pixels } -void StartFrame() +void GPU::StartFrame() noexcept { // only run the display FIFO if needed: // * if it is used for display or capture @@ -1056,7 +964,7 @@ void StartFrame() StartScanline(0); } -void StartHBlank(u32 line) +void GPU::StartHBlank(u32 line) noexcept { DispStat[0] |= (1<<1); DispStat[1] |= (1<<1); @@ -1082,7 +990,7 @@ void StartHBlank(u32 line) } else if (VCount == 215) { - GPU3D::VCount215(); + GPU3D.VCount215(); } else if (VCount == 262) { @@ -1099,25 +1007,25 @@ void StartHBlank(u32 line) NDS::ScheduleEvent(NDS::Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_FinishFrame, line+1); } -void FinishFrame(u32 lines) +void GPU::FinishFrame(u32 lines) noexcept { FrontBuffer = FrontBuffer ? 0 : 1; AssignFramebuffers(); TotalScanlines = lines; - if (GPU3D::AbortFrame) + if (GPU3D.AbortFrame) { - GPU3D::RestartFrame(); - GPU3D::AbortFrame = false; + GPU3D.RestartFrame(); + GPU3D.AbortFrame = false; } } -void BlankFrame() +void GPU::BlankFrame() noexcept { int backbuf = FrontBuffer ? 0 : 1; int fbsize; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) fbsize = (256*3 + 1) * 192; else fbsize = 256 * 192; @@ -1131,7 +1039,7 @@ void BlankFrame() TotalScanlines = 263; } -void StartScanline(u32 line) +void GPU::StartScanline(u32 line) noexcept { if (line == 0) VCount = 0; @@ -1201,7 +1109,7 @@ void StartScanline(u32 line) // texture memory anyway and only update it before the start //of the next frame. // So we can give the rasteriser a bit more headroom - GPU3D::VCount144(); + GPU3D.VCount144(); // VBlank DispStat[0] |= (1<<0); @@ -1217,11 +1125,11 @@ void StartScanline(u32 line) GPU2D_A.VBlank(); GPU2D_B.VBlank(); - GPU3D::VBlank(); + GPU3D.VBlank(); #ifdef OGLRENDERER_ENABLED // Need a better way to identify the openGL renderer in particular - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU3D.IsRendererAccelerated()) CurGLCompositor->RenderFrame(); #endif } @@ -1231,7 +1139,7 @@ void StartScanline(u32 line) } -void SetDispStat(u32 cpu, u16 val) +void GPU::SetDispStat(u32 cpu, u16 val) noexcept { val &= 0xFFB8; DispStat[cpu] &= 0x0047; @@ -1240,7 +1148,7 @@ void SetDispStat(u32 cpu, u16 val) VMatch[cpu] = (val >> 8) | ((val & 0x80) << 1); } -void SetVCount(u16 val) +void GPU::SetVCount(u16 val) noexcept { // VCount write is delayed until the next scanline @@ -1248,12 +1156,12 @@ void SetVCount(u16 val) // 3D engine seems to give up on the current frame in that situation, repeating the last two scanlines // TODO: also check the various DMA types that can be involved - GPU3D::AbortFrame |= NextVCount != val; + GPU3D.AbortFrame |= NextVCount != val; NextVCount = val; } template <u32 Size, u32 MappingGranularity> -NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings) +NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranularity>::DeriveState(u32* currentMappings, GPU& gpu) { NonStupidBitField<Size/VRAMDirtyGranularity> result; u16 banksToBeZeroed = 0; @@ -1282,20 +1190,20 @@ NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranul static_assert(VRAMDirtyGranularity == 512, ""); if (MappingGranularity == 16*1024) { - u32 dirty = ((u32*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 14)]; + u32 dirty = ((u32*)gpu.VRAMDirty[num].Data)[i & (gpu.VRAMMask[num] >> 14)]; result.Data[i / 2] |= (u64)dirty << ((i&1)*32); } else if (MappingGranularity == 8*1024) { - u16 dirty = ((u16*)VRAMDirty[num].Data)[i & (VRAMMask[num] >> 13)]; + u16 dirty = ((u16*)gpu.VRAMDirty[num].Data)[i & (gpu.VRAMMask[num] >> 13)]; result.Data[i / 4] |= (u64)dirty << ((i&3)*16); } else if (MappingGranularity == 128*1024) { - result.Data[i * 4 + 0] |= VRAMDirty[num].Data[0]; - result.Data[i * 4 + 1] |= VRAMDirty[num].Data[1]; - result.Data[i * 4 + 2] |= VRAMDirty[num].Data[2]; - result.Data[i * 4 + 3] |= VRAMDirty[num].Data[3]; + result.Data[i * 4 + 0] |= gpu.VRAMDirty[num].Data[0]; + result.Data[i * 4 + 1] |= gpu.VRAMDirty[num].Data[1]; + result.Data[i * 4 + 2] |= gpu.VRAMDirty[num].Data[2]; + result.Data[i * 4 + 3] |= gpu.VRAMDirty[num].Data[3]; } else { @@ -1310,137 +1218,63 @@ NonStupidBitField<Size/VRAMDirtyGranularity> VRAMTrackingSet<Size, MappingGranul { u32 num = __builtin_ctz(banksToBeZeroed); banksToBeZeroed &= ~(1 << num); - VRAMDirty[num].Clear(); + gpu.VRAMDirty[num].Clear(); } return result; } -template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*); -template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*); -template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*); -template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*); -template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*); -template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*); +template NonStupidBitField<32*1024/VRAMDirtyGranularity> VRAMTrackingSet<32*1024, 8*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<8*1024/VRAMDirtyGranularity> VRAMTrackingSet<8*1024, 8*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 128*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMTrackingSet<128*1024, 16*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<256*1024/VRAMDirtyGranularity> VRAMTrackingSet<256*1024, 16*1024>::DeriveState(u32*, GPU& gpu); +template NonStupidBitField<512*1024/VRAMDirtyGranularity> VRAMTrackingSet<512*1024, 16*1024>::DeriveState(u32*, GPU& gpu); -template <u32 MappingGranularity, u32 Size> -inline bool CopyLinearVRAM(u8* flat, u32* mappings, NonStupidBitField<Size>& dirty, u64 (*slowAccess)(u32 addr)) -{ - const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; - bool change = false; - - typename NonStupidBitField<Size>::Iterator it = dirty.Begin(); - while (it != dirty.End()) - { - u32 offset = *it * VRAMDirtyGranularity; - u8* dst = flat + offset; - u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset); - if (fastAccess) - { - memcpy(dst, fastAccess, VRAMDirtyGranularity); - } - else - { - for (u32 i = 0; i < VRAMDirtyGranularity; i += 8) - *(u64*)&dst[i] = slowAccess(offset + i); - } - change = true; - it++; - } - return change; -} -bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, ReadVRAM_Texture<u64>); + return CopyLinearVRAM<128*1024>(VRAMFlat_Texture, VRAMMap_Texture, dirty, &GPU::ReadVRAM_Texture<u64>); } -bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, ReadVRAM_TexPal<u64>); + return CopyLinearVRAM<16*1024>(VRAMFlat_TexPal, VRAMMap_TexPal, dirty, &GPU::ReadVRAM_TexPal<u64>); } -bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, ReadVRAM_ABG<u64>); + return CopyLinearVRAM<16*1024>(VRAMFlat_ABG, VRAMMap_ABG, dirty, &GPU::ReadVRAM_ABG<u64>); } -bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, ReadVRAM_BBG<u64>); + return CopyLinearVRAM<16*1024>(VRAMFlat_BBG, VRAMMap_BBG, dirty, &GPU::ReadVRAM_BBG<u64>); } -bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, ReadVRAM_AOBJ<u64>); + return CopyLinearVRAM<16*1024>(VRAMFlat_AOBJ, VRAMMap_AOBJ, dirty, &GPU::ReadVRAM_AOBJ<u64>); } -bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, ReadVRAM_BOBJ<u64>); + return CopyLinearVRAM<16*1024>(VRAMFlat_BOBJ, VRAMMap_BOBJ, dirty, &GPU::ReadVRAM_BOBJ<u64>); } -template<typename T> -T ReadVRAM_ABGExtPal(u32 addr) +bool GPU::MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) noexcept { - u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3]; - - T ret = 0; - if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF]; - if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; - if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; - - return ret; + return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, &GPU::ReadVRAM_ABGExtPal<u64>); } - -template<typename T> -T ReadVRAM_BBGExtPal(u32 addr) +bool GPU::MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) noexcept { - u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3]; - - T ret = 0; - if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; - - return ret; + return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, &GPU::ReadVRAM_BBGExtPal<u64>); } -template<typename T> -T ReadVRAM_AOBJExtPal(u32 addr) +bool GPU::MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) noexcept { - u32 mask = VRAMMap_AOBJExtPal; - - T ret = 0; - if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF]; - if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF]; - - return ret; + return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, &GPU::ReadVRAM_AOBJExtPal<u64>); } - -template<typename T> -T ReadVRAM_BOBJExtPal(u32 addr) -{ - u32 mask = VRAMMap_BOBJExtPal; - - T ret = 0; - if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF]; - - return ret; -} - -bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) +bool GPU::MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) noexcept { - return CopyLinearVRAM<8*1024>(VRAMFlat_ABGExtPal, VRAMMap_ABGExtPal, dirty, ReadVRAM_ABGExtPal<u64>); + return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, &GPU::ReadVRAM_BOBJExtPal<u64>); } -bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) -{ - return CopyLinearVRAM<8*1024>(VRAMFlat_BBGExtPal, VRAMMap_BBGExtPal, dirty, ReadVRAM_BBGExtPal<u64>); -} - -bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) -{ - return CopyLinearVRAM<8*1024>(VRAMFlat_AOBJExtPal, &VRAMMap_AOBJExtPal, dirty, ReadVRAM_AOBJExtPal<u64>); -} -bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) -{ - return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, ReadVRAM_BOBJExtPal<u64>); -} - } @@ -22,67 +22,23 @@ #include <memory> #include "GPU2D.h" +#include "GPU3D.h" #include "NonStupidBitfield.h" #ifdef OGLRENDERER_ENABLED #include "GPU_OpenGL.h" #endif -namespace GPU -{ - -extern u16 VCount; -extern u16 TotalScanlines; - -extern u16 DispStat[2]; - -extern u8 VRAMCNT[9]; -extern u8 VRAMSTAT; - -extern u8 Palette[2*1024]; -extern u8 OAM[2*1024]; - -extern u8 VRAM_A[128*1024]; -extern u8 VRAM_B[128*1024]; -extern u8 VRAM_C[128*1024]; -extern u8 VRAM_D[128*1024]; -extern u8 VRAM_E[ 64*1024]; -extern u8 VRAM_F[ 16*1024]; -extern u8 VRAM_G[ 16*1024]; -extern u8 VRAM_H[ 32*1024]; -extern u8 VRAM_I[ 16*1024]; - -extern u8* const VRAM[9]; - -extern u32 VRAMMap_LCDC; -extern u32 VRAMMap_ABG[0x20]; -extern u32 VRAMMap_AOBJ[0x10]; -extern u32 VRAMMap_BBG[0x8]; -extern u32 VRAMMap_BOBJ[0x8]; -extern u32 VRAMMap_ABGExtPal[4]; -extern u32 VRAMMap_AOBJExtPal; -extern u32 VRAMMap_BBGExtPal[4]; -extern u32 VRAMMap_BOBJExtPal; -extern u32 VRAMMap_Texture[4]; -extern u32 VRAMMap_TexPal[8]; -extern u32 VRAMMap_ARM7[2]; - -extern u8* VRAMPtr_ABG[0x20]; -extern u8* VRAMPtr_AOBJ[0x10]; -extern u8* VRAMPtr_BBG[0x8]; -extern u8* VRAMPtr_BOBJ[0x8]; - -extern int FrontBuffer; -extern u32* Framebuffer[2][2]; - -extern GPU2D::Unit GPU2D_A; -extern GPU2D::Unit GPU2D_B; -extern int Renderer; - -const u32 VRAMDirtyGranularity = 512; +namespace GPU3D +{ +class GPU3D; +} -extern NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9]; +namespace Melon +{ +static constexpr u32 VRAMDirtyGranularity = 512; +class GPU; template <u32 Size, u32 MappingGranularity> struct VRAMTrackingSet @@ -100,524 +56,657 @@ struct VRAMTrackingSet Mapping[i] = 0x8000; } } - NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings); + NonStupidBitField<Size/VRAMDirtyGranularity> DeriveState(u32* currentMappings, GPU& gpu); }; -extern VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG; -extern VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ; -extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG; -extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ; +struct RenderSettings +{ + bool Soft_Threaded; -extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal; -extern VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal; -extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal; -extern VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal; + int GL_ScaleFactor; + bool GL_BetterPolygons; +}; -extern VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture; -extern VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal; +class GPU +{ +public: + GPU() noexcept; + ~GPU() noexcept; + void Reset() noexcept; + void Stop() noexcept; -extern u8 VRAMFlat_ABG[512*1024]; -extern u8 VRAMFlat_BBG[128*1024]; -extern u8 VRAMFlat_AOBJ[256*1024]; -extern u8 VRAMFlat_BOBJ[128*1024]; + void DoSavestate(Savestate* file) noexcept; -extern u8 VRAMFlat_ABGExtPal[32*1024]; -extern u8 VRAMFlat_BBGExtPal[32*1024]; + [[deprecated("Set the renderer directly instead of using an integer code")]] void InitRenderer(int renderer) noexcept; + void DeInitRenderer() noexcept; + void ResetRenderer() noexcept; -extern u8 VRAMFlat_AOBJExtPal[8*1024]; -extern u8 VRAMFlat_BOBJExtPal[8*1024]; + void SetRenderSettings(int renderer, RenderSettings& settings) noexcept; -extern u8 VRAMFlat_Texture[512*1024]; -extern u8 VRAMFlat_TexPal[128*1024]; + u8* GetUniqueBankPtr(u32 mask, u32 offset) noexcept; + const u8* GetUniqueBankPtr(u32 mask, u32 offset) const noexcept; -bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); -bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + void MapVRAM_AB(u32 bank, u8 cnt) noexcept; + void MapVRAM_CD(u32 bank, u8 cnt) noexcept; + void MapVRAM_E(u32 bank, u8 cnt) noexcept; + void MapVRAM_FG(u32 bank, u8 cnt) noexcept; + void MapVRAM_H(u32 bank, u8 cnt) noexcept; + void MapVRAM_I(u32 bank, u8 cnt) noexcept; -bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty); -bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + template<typename T> + T ReadVRAM_LCDC(u32 addr) const noexcept + { + int bank; -bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); -bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty); + switch (addr & 0xFF8FC000) + { + case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: + case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: + bank = 0; + addr &= 0x1FFFF; + break; + + case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: + case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: + bank = 1; + addr &= 0x1FFFF; + break; + + case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: + case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: + bank = 2; + addr &= 0x1FFFF; + break; + + case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: + case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: + bank = 3; + addr &= 0x1FFFF; + break; + + case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: + bank = 4; + addr &= 0xFFFF; + break; + + case 0x06890000: + bank = 5; + addr &= 0x3FFF; + break; + + case 0x06894000: + bank = 6; + addr &= 0x3FFF; + break; + + case 0x06898000: + case 0x0689C000: + bank = 7; + addr &= 0x7FFF; + break; + + case 0x068A0000: + bank = 8; + addr &= 0x3FFF; + break; + + default: return 0; + } -bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); -bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty); + if (VRAMMap_LCDC & (1<<bank)) return *(T*)&VRAM[bank][addr]; -bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty); -bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty); + return 0; + } -void SyncDirtyFlags(); + template<typename T> + void WriteVRAM_LCDC(u32 addr, T val) + { + int bank; -extern u32 OAMDirty; -extern u32 PaletteDirty; + switch (addr & 0xFF8FC000) + { + case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: + case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: + bank = 0; + addr &= 0x1FFFF; + break; + + case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: + case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: + bank = 1; + addr &= 0x1FFFF; + break; + + case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: + case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: + bank = 2; + addr &= 0x1FFFF; + break; + + case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: + case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: + bank = 3; + addr &= 0x1FFFF; + break; + + case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: + bank = 4; + addr &= 0xFFFF; + break; + + case 0x06890000: + bank = 5; + addr &= 0x3FFF; + break; + + case 0x06894000: + bank = 6; + addr &= 0x3FFF; + break; + + case 0x06898000: + case 0x0689C000: + bank = 7; + addr &= 0x7FFF; + break; + + case 0x068A0000: + bank = 8; + addr &= 0x3FFF; + break; + + default: return; + } -#ifdef OGLRENDERER_ENABLED -extern std::unique_ptr<GLCompositor> CurGLCompositor; -#endif + if (VRAMMap_LCDC & (1<<bank)) + { + *(T*)&VRAM[bank][addr] = val; + VRAMDirty[bank][addr / VRAMDirtyGranularity] = true; + } + } -struct RenderSettings -{ - bool Soft_Threaded; - int GL_ScaleFactor; - bool GL_BetterPolygons; -}; + template<typename T> + T ReadVRAM_ABG(u32 addr) const noexcept + { + u8* ptr = VRAMPtr_ABG[(addr >> 14) & 0x1F]; + if (ptr) return *(T*)&ptr[addr & 0x3FFF]; + T ret = 0; + u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; -bool Init(); -void DeInit(); -void Reset(); -void Stop(); + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; -void DoSavestate(Savestate* file); + return ret; + } -void InitRenderer(int renderer); -void DeInitRenderer(); -void ResetRenderer(); + template<typename T> + void WriteVRAM_ABG(u32 addr, T val) + { + u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; -void SetRenderSettings(int renderer, RenderSettings& settings); + if (mask & (1<<0)) + { + VRAMDirty[0][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_A[addr & 0x1FFFF] = val; + } + if (mask & (1<<1)) + { + VRAMDirty[1][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_B[addr & 0x1FFFF] = val; + } + if (mask & (1<<2)) + { + VRAMDirty[2][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_C[addr & 0x1FFFF] = val; + } + if (mask & (1<<3)) + { + VRAMDirty[3][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_D[addr & 0x1FFFF] = val; + } + if (mask & (1<<4)) + { + VRAMDirty[4][(addr & 0xFFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_E[addr & 0xFFFF] = val; + } + if (mask & (1<<5)) + { + VRAMDirty[5][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_F[addr & 0x3FFF] = val; + } + if (mask & (1<<6)) + { + VRAMDirty[6][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_G[addr & 0x3FFF] = val; + } + } -u8* GetUniqueBankPtr(u32 mask, u32 offset); + template<typename T> + T ReadVRAM_AOBJ(u32 addr) const noexcept + { + u8* ptr = VRAMPtr_AOBJ[(addr >> 14) & 0xF]; + if (ptr) return *(T*)&ptr[addr & 0x3FFF]; -void MapVRAM_AB(u32 bank, u8 cnt); -void MapVRAM_CD(u32 bank, u8 cnt); -void MapVRAM_E(u32 bank, u8 cnt); -void MapVRAM_FG(u32 bank, u8 cnt); -void MapVRAM_H(u32 bank, u8 cnt); -void MapVRAM_I(u32 bank, u8 cnt); + T ret = 0; + u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; -template<typename T> -T ReadVRAM_LCDC(u32 addr) -{ - int bank; + return ret; + } - switch (addr & 0xFF8FC000) + template<typename T> + void WriteVRAM_AOBJ(u32 addr, T val) { - case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: - case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: - bank = 0; - addr &= 0x1FFFF; - break; - - case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: - case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: - bank = 1; - addr &= 0x1FFFF; - break; - - case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: - case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: - bank = 2; - addr &= 0x1FFFF; - break; - - case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: - case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: - bank = 3; - addr &= 0x1FFFF; - break; - - case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: - bank = 4; - addr &= 0xFFFF; - break; - - case 0x06890000: - bank = 5; - addr &= 0x3FFF; - break; - - case 0x06894000: - bank = 6; - addr &= 0x3FFF; - break; - - case 0x06898000: - case 0x0689C000: - bank = 7; - addr &= 0x7FFF; - break; - - case 0x068A0000: - bank = 8; - addr &= 0x3FFF; - break; - - default: return 0; + u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + + if (mask & (1<<0)) + { + VRAMDirty[0][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_A[addr & 0x1FFFF] = val; + } + if (mask & (1<<1)) + { + VRAMDirty[1][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_B[addr & 0x1FFFF] = val; + } + if (mask & (1<<4)) + { + VRAMDirty[4][(addr & 0xFFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_E[addr & 0xFFFF] = val; + } + if (mask & (1<<5)) + { + VRAMDirty[5][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_F[addr & 0x3FFF] = val; + } + if (mask & (1<<6)) + { + VRAMDirty[6][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_G[addr & 0x3FFF] = val; + } } - if (VRAMMap_LCDC & (1<<bank)) return *(T*)&VRAM[bank][addr]; - return 0; -} + template<typename T> + T ReadVRAM_BBG(u32 addr) const noexcept + { + u8* ptr = VRAMPtr_BBG[(addr >> 14) & 0x7]; + if (ptr) return *(T*)&ptr[addr & 0x3FFF]; -template<typename T> -void WriteVRAM_LCDC(u32 addr, T val) -{ - int bank; + T ret = 0; + u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; - switch (addr & 0xFF8FC000) - { - case 0x06800000: case 0x06804000: case 0x06808000: case 0x0680C000: - case 0x06810000: case 0x06814000: case 0x06818000: case 0x0681C000: - bank = 0; - addr &= 0x1FFFF; - break; - - case 0x06820000: case 0x06824000: case 0x06828000: case 0x0682C000: - case 0x06830000: case 0x06834000: case 0x06838000: case 0x0683C000: - bank = 1; - addr &= 0x1FFFF; - break; - - case 0x06840000: case 0x06844000: case 0x06848000: case 0x0684C000: - case 0x06850000: case 0x06854000: case 0x06858000: case 0x0685C000: - bank = 2; - addr &= 0x1FFFF; - break; - - case 0x06860000: case 0x06864000: case 0x06868000: case 0x0686C000: - case 0x06870000: case 0x06874000: case 0x06878000: case 0x0687C000: - bank = 3; - addr &= 0x1FFFF; - break; - - case 0x06880000: case 0x06884000: case 0x06888000: case 0x0688C000: - bank = 4; - addr &= 0xFFFF; - break; - - case 0x06890000: - bank = 5; - addr &= 0x3FFF; - break; - - case 0x06894000: - bank = 6; - addr &= 0x3FFF; - break; - - case 0x06898000: - case 0x0689C000: - bank = 7; - addr &= 0x7FFF; - break; - - case 0x068A0000: - bank = 8; - addr &= 0x3FFF; - break; - - default: return; - } + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; - if (VRAMMap_LCDC & (1<<bank)) - { - *(T*)&VRAM[bank][addr] = val; - VRAMDirty[bank][addr / VRAMDirtyGranularity] = true; + return ret; } -} + template<typename T> + void WriteVRAM_BBG(u32 addr, T val) + { + u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; -template<typename T> -T ReadVRAM_ABG(u32 addr) -{ - u8* ptr = VRAMPtr_ABG[(addr >> 14) & 0x1F]; - if (ptr) return *(T*)&ptr[addr & 0x3FFF]; + if (mask & (1<<2)) + { + VRAMDirty[2][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_C[addr & 0x1FFFF] = val; + } + if (mask & (1<<7)) + { + VRAMDirty[7][(addr & 0x7FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_H[addr & 0x7FFF] = val; + } + if (mask & (1<<8)) + { + VRAMDirty[8][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_I[addr & 0x3FFF] = val; + } + } - T ret = 0; - u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; - if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; - if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; - if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; - if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; - if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; - if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; - if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + template<typename T> + T ReadVRAM_BOBJ(u32 addr) const noexcept + { + u8* ptr = VRAMPtr_BOBJ[(addr >> 14) & 0x7]; + if (ptr) return *(T*)&ptr[addr & 0x3FFF]; - return ret; -} + T ret = 0; + u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; -template<typename T> -void WriteVRAM_ABG(u32 addr, T val) -{ - u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; - if (mask & (1<<0)) - { - VRAMDirty[0][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_A[addr & 0x1FFFF] = val; - } - if (mask & (1<<1)) - { - VRAMDirty[1][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_B[addr & 0x1FFFF] = val; + return ret; } - if (mask & (1<<2)) + + template<typename T> + void WriteVRAM_BOBJ(u32 addr, T val) { - VRAMDirty[2][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_C[addr & 0x1FFFF] = val; + u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + + if (mask & (1<<3)) + { + VRAMDirty[3][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_D[addr & 0x1FFFF] = val; + } + if (mask & (1<<8)) + { + VRAMDirty[8][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; + *(T*)&VRAM_I[addr & 0x3FFF] = val; + } } - if (mask & (1<<3)) + + template<typename T> + T ReadVRAM_ARM7(u32 addr) const noexcept { - VRAMDirty[3][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_D[addr & 0x1FFFF] = val; + T ret = 0; + u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + + return ret; } - if (mask & (1<<4)) + + template<typename T> + void WriteVRAM_ARM7(u32 addr, T val) { - VRAMDirty[4][(addr & 0xFFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_E[addr & 0xFFFF] = val; + u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + + if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; + if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; } - if (mask & (1<<5)) + + + template<typename T> + T ReadVRAM_BG(u32 addr) const noexcept { - VRAMDirty[5][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_F[addr & 0x3FFF] = val; + if ((addr & 0xFFE00000) == 0x06000000) + return ReadVRAM_ABG<T>(addr); + else + return ReadVRAM_BBG<T>(addr); } - if (mask & (1<<6)) + + template<typename T> + T ReadVRAM_OBJ(u32 addr) const noexcept { - VRAMDirty[6][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_G[addr & 0x3FFF] = val; + if ((addr & 0xFFE00000) == 0x06400000) + return ReadVRAM_AOBJ<T>(addr); + else + return ReadVRAM_BOBJ<T>(addr); } -} -template<typename T> -T ReadVRAM_AOBJ(u32 addr) -{ - u8* ptr = VRAMPtr_AOBJ[(addr >> 14) & 0xF]; - if (ptr) return *(T*)&ptr[addr & 0x3FFF]; + template<typename T> + T ReadVRAM_Texture(u32 addr) const noexcept + { + T ret = 0; + u32 mask = VRAMMap_Texture[(addr >> 17) & 0x3]; - T ret = 0; - u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; + if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; + if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; + if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; - if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; - if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; - if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; - if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; - if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + return ret; + } - return ret; -} + template<typename T> + T ReadVRAM_TexPal(u32 addr) const noexcept + { + T ret = 0; + u32 mask = VRAMMap_TexPal[(addr >> 14) & 0x7]; -template<typename T> -void WriteVRAM_AOBJ(u32 addr, T val) -{ - u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; - if (mask & (1<<0)) - { - VRAMDirty[0][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_A[addr & 0x1FFFF] = val; + return ret; } - if (mask & (1<<1)) + + template<typename T> + T ReadPalette(u32 addr) const noexcept { - VRAMDirty[1][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_B[addr & 0x1FFFF] = val; + return *(T*)&Palette[addr & 0x7FF]; } - if (mask & (1<<4)) + + template<typename T> + void WritePalette(u32 addr, T val) { - VRAMDirty[4][(addr & 0xFFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_E[addr & 0xFFFF] = val; + addr &= 0x7FF; + + *(T*)&Palette[addr] = val; + PaletteDirty |= 1 << (addr / VRAMDirtyGranularity); } - if (mask & (1<<5)) + + template<typename T> + T ReadOAM(u32 addr) const noexcept { - VRAMDirty[5][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_F[addr & 0x3FFF] = val; + return *(T*)&OAM[addr & 0x7FF]; } - if (mask & (1<<6)) + + template<typename T> + void WriteOAM(u32 addr, T val) { - VRAMDirty[6][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_G[addr & 0x3FFF] = val; + addr &= 0x7FF; + + *(T*)&OAM[addr] = val; + OAMDirty |= 1 << (addr / 1024); } -} + void SetPowerCnt(u32 val) noexcept; -template<typename T> -T ReadVRAM_BBG(u32 addr) -{ - u8* ptr = VRAMPtr_BBG[(addr >> 14) & 0x7]; - if (ptr) return *(T*)&ptr[addr & 0x3FFF]; + void StartFrame() noexcept; + void FinishFrame(u32 lines) noexcept; + void BlankFrame() noexcept; + void StartScanline(u32 line) noexcept; + void StartHBlank(u32 line) noexcept; - T ret = 0; - u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + void DisplayFIFO(u32 x) noexcept; - if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; - if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; - if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; + void SetDispStat(u32 cpu, u16 val) noexcept; - return ret; -} + void SetVCount(u16 val) noexcept; + bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept; -template<typename T> -void WriteVRAM_BBG(u32 addr, T val) -{ - u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + bool MakeVRAMFlat_AOBJCoherent(NonStupidBitField<256*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_BOBJCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept; - if (mask & (1<<2)) - { - VRAMDirty[2][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_C[addr & 0x1FFFF] = val; - } - if (mask & (1<<7)) - { - VRAMDirty[7][(addr & 0x7FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_H[addr & 0x7FFF] = val; - } - if (mask & (1<<8)) - { - VRAMDirty[8][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_I[addr & 0x3FFF] = val; - } -} + bool MakeVRAMFlat_ABGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_BBGExtPalCoherent(NonStupidBitField<32*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_AOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGranularity>& dirty) noexcept; -template<typename T> -T ReadVRAM_BOBJ(u32 addr) -{ - u8* ptr = VRAMPtr_BOBJ[(addr >> 14) & 0x7]; - if (ptr) return *(T*)&ptr[addr & 0x3FFF]; + bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept; + bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept; - T ret = 0; - u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + void SyncDirtyFlags() noexcept; - if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; - if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x3FFF]; + u16 VCount = 0; + u16 TotalScanlines = 0; + u16 DispStat[2] {}; + u8 VRAMCNT[9] {}; + u8 VRAMSTAT = 0; - return ret; -} + u8 Palette[2*1024] {}; + u8 OAM[2*1024] {}; -template<typename T> -void WriteVRAM_BOBJ(u32 addr, T val) -{ - u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + u8 VRAM_A[128*1024] {}; + u8 VRAM_B[128*1024] {}; + u8 VRAM_C[128*1024] {}; + u8 VRAM_D[128*1024] {}; + u8 VRAM_E[ 64*1024] {}; + u8 VRAM_F[ 16*1024] {}; + u8 VRAM_G[ 16*1024] {}; + u8 VRAM_H[ 32*1024] {}; + u8 VRAM_I[ 16*1024] {}; - if (mask & (1<<3)) - { - VRAMDirty[3][(addr & 0x1FFFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_D[addr & 0x1FFFF] = val; - } - if (mask & (1<<8)) - { - VRAMDirty[8][(addr & 0x3FFF) / VRAMDirtyGranularity] = true; - *(T*)&VRAM_I[addr & 0x3FFF] = val; - } -} + u8* const VRAM[9] = {VRAM_A, VRAM_B, VRAM_C, VRAM_D, VRAM_E, VRAM_F, VRAM_G, VRAM_H, VRAM_I}; + u32 const VRAMMask[9] = {0x1FFFF, 0x1FFFF, 0x1FFFF, 0x1FFFF, 0xFFFF, 0x3FFF, 0x3FFF, 0x7FFF, 0x3FFF}; -template<typename T> -T ReadVRAM_ARM7(u32 addr) -{ - T ret = 0; - u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + u32 VRAMMap_LCDC = 0; + u32 VRAMMap_ABG[0x20] {}; + u32 VRAMMap_AOBJ[0x10] {}; + u32 VRAMMap_BBG[0x8] {}; + u32 VRAMMap_BOBJ[0x8] {}; + u32 VRAMMap_ABGExtPal[4] {}; + u32 VRAMMap_AOBJExtPal {}; + u32 VRAMMap_BBGExtPal[4] {}; + u32 VRAMMap_BOBJExtPal {}; + u32 VRAMMap_Texture[4] {}; + u32 VRAMMap_TexPal[8] {}; + u32 VRAMMap_ARM7[2] {}; - if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; - if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + u8* VRAMPtr_ABG[0x20] {}; + u8* VRAMPtr_AOBJ[0x10] {}; + u8* VRAMPtr_BBG[0x8] {}; + u8* VRAMPtr_BOBJ[0x8] {}; - return ret; -} + int FrontBuffer = 0; + u32* Framebuffer[2][2] {}; -template<typename T> -void WriteVRAM_ARM7(u32 addr, T val) -{ - u32 mask = VRAMMap_ARM7[(addr >> 17) & 0x1]; + GPU2D::Unit GPU2D_A; + GPU2D::Unit GPU2D_B; + GPU3D::GPU3D GPU3D {}; - if (mask & (1<<2)) *(T*)&VRAM_C[addr & 0x1FFFF] = val; - if (mask & (1<<3)) *(T*)&VRAM_D[addr & 0x1FFFF] = val; -} + NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9] {}; + VRAMTrackingSet<512*1024, 16*1024> VRAMDirty_ABG {}; + VRAMTrackingSet<256*1024, 16*1024> VRAMDirty_AOBJ {}; + VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BBG {}; + VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_BOBJ {}; + VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_ABGExtPal {}; + VRAMTrackingSet<32*1024, 8*1024> VRAMDirty_BBGExtPal {}; + VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_AOBJExtPal {}; + VRAMTrackingSet<8*1024, 8*1024> VRAMDirty_BOBJExtPal {}; -template<typename T> -T ReadVRAM_BG(u32 addr) -{ - if ((addr & 0xFFE00000) == 0x06000000) - return ReadVRAM_ABG<T>(addr); - else - return ReadVRAM_BBG<T>(addr); -} + VRAMTrackingSet<512*1024, 128*1024> VRAMDirty_Texture {}; + VRAMTrackingSet<128*1024, 16*1024> VRAMDirty_TexPal {}; -template<typename T> -T ReadVRAM_OBJ(u32 addr) -{ - if ((addr & 0xFFE00000) == 0x06400000) - return ReadVRAM_AOBJ<T>(addr); - else - return ReadVRAM_BOBJ<T>(addr); -} + u8 VRAMFlat_ABG[512*1024] {}; + u8 VRAMFlat_BBG[128*1024] {}; + u8 VRAMFlat_AOBJ[256*1024] {}; + u8 VRAMFlat_BOBJ[128*1024] {}; + u8 VRAMFlat_ABGExtPal[32*1024] {}; + u8 VRAMFlat_BBGExtPal[32*1024] {}; -template<typename T> -T ReadVRAM_Texture(u32 addr) -{ - T ret = 0; - u32 mask = VRAMMap_Texture[(addr >> 17) & 0x3]; + u8 VRAMFlat_AOBJExtPal[8*1024] {}; + u8 VRAMFlat_BOBJExtPal[8*1024] {}; + + u8 VRAMFlat_Texture[512*1024] {}; + u8 VRAMFlat_TexPal[128*1024] {}; + + int Renderer = 0; +#ifdef OGLRENDERER_ENABLED + std::unique_ptr<GLCompositor> CurGLCompositor = nullptr; +#endif +private: + void ResetVRAMCache() noexcept; + void AssignFramebuffers() noexcept; + template<typename T> + T ReadVRAM_ABGExtPal(u32 addr) const noexcept + { + u32 mask = VRAMMap_ABGExtPal[(addr >> 13) & 0x3]; - if (mask & (1<<0)) ret |= *(T*)&VRAM_A[addr & 0x1FFFF]; - if (mask & (1<<1)) ret |= *(T*)&VRAM_B[addr & 0x1FFFF]; - if (mask & (1<<2)) ret |= *(T*)&VRAM_C[addr & 0x1FFFF]; - if (mask & (1<<3)) ret |= *(T*)&VRAM_D[addr & 0x1FFFF]; + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0x7FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; + if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; - return ret; -} + return ret; + } -template<typename T> -T ReadVRAM_TexPal(u32 addr) -{ - T ret = 0; - u32 mask = VRAMMap_TexPal[(addr >> 14) & 0x7]; + template<typename T> + T ReadVRAM_BBGExtPal(u32 addr) const noexcept + { + u32 mask = VRAMMap_BBGExtPal[(addr >> 13) & 0x3]; - if (mask & (1<<4)) ret |= *(T*)&VRAM_E[addr & 0xFFFF]; - if (mask & (1<<5)) ret |= *(T*)&VRAM_F[addr & 0x3FFF]; - if (mask & (1<<6)) ret |= *(T*)&VRAM_G[addr & 0x3FFF]; + T ret = 0; + if (mask & (1<<7)) ret |= *(T*)&VRAM_H[addr & 0x7FFF]; - return ret; -} + return ret; + } -template<typename T> -T ReadPalette(u32 addr) -{ - return *(T*)&Palette[addr & 0x7FF]; -} + template<typename T> + T ReadVRAM_AOBJExtPal(u32 addr) const noexcept + { + u32 mask = VRAMMap_AOBJExtPal; -template<typename T> -void WritePalette(u32 addr, T val) -{ - addr &= 0x7FF; + T ret = 0; + if (mask & (1<<4)) ret |= *(T*)&VRAM_F[addr & 0x1FFF]; + if (mask & (1<<5)) ret |= *(T*)&VRAM_G[addr & 0x1FFF]; - *(T*)&Palette[addr] = val; - PaletteDirty |= 1 << (addr / VRAMDirtyGranularity); -} + return ret; + } -template<typename T> -T ReadOAM(u32 addr) -{ - return *(T*)&OAM[addr & 0x7FF]; -} + template<typename T> + T ReadVRAM_BOBJExtPal(u32 addr) const noexcept + { + u32 mask = VRAMMap_BOBJExtPal; -template<typename T> -void WriteOAM(u32 addr, T val) -{ - addr &= 0x7FF; + T ret = 0; + if (mask & (1<<8)) ret |= *(T*)&VRAM_I[addr & 0x1FFF]; - *(T*)&OAM[addr] = val; - OAMDirty |= 1 << (addr / 1024); -} + return ret; + } -void SetPowerCnt(u32 val); + template <u32 MappingGranularity, u32 Size> + constexpr bool CopyLinearVRAM(u8* flat, const u32* mappings, NonStupidBitField<Size>& dirty, u64 (GPU::* const slowAccess)(u32) const noexcept) noexcept + { + const u32 VRAMBitsPerMapping = MappingGranularity / VRAMDirtyGranularity; -void StartFrame(); -void FinishFrame(u32 lines); -void BlankFrame(); -void StartScanline(u32 line); -void StartHBlank(u32 line); + bool change = false; -void DisplayFIFO(u32 x); + typename NonStupidBitField<Size>::Iterator it = dirty.Begin(); + while (it != dirty.End()) + { + u32 offset = *it * VRAMDirtyGranularity; + u8* dst = flat + offset; + u8* fastAccess = GetUniqueBankPtr(mappings[*it / VRAMBitsPerMapping], offset); + if (fastAccess) + { + memcpy(dst, fastAccess, VRAMDirtyGranularity); + } + else + { + for (u32 i = 0; i < VRAMDirtyGranularity; i += 8) + *(u64*)&dst[i] = (this->*slowAccess)(offset + i); + } + change = true; + it++; + } + return change; + } -void SetDispStat(u32 cpu, u16 val); + u32 NextVCount = 0; -void SetVCount(u16 val); -} + bool RunFIFO = false; -#include "GPU3D.h" + u16 VMatch[2] {}; + + std::unique_ptr<GPU2D::Renderer2D> GPU2D_Renderer = nullptr; + + u32 OAMDirty = 0; + u32 PaletteDirty = 0; +}; +} #endif diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index d1f6beb..e892116 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -85,9 +85,8 @@ using Platform::LogLevel; namespace GPU2D { -Unit::Unit(u32 num) +Unit::Unit(u32 num, Melon::GPU& gpu) : Num(num), GPU(gpu) { - Num = num; } void Unit::Reset() @@ -287,10 +286,10 @@ void Unit::Write8(u32 addr, u8 val) return; case 0x10: - if (!Num) GPU3D::SetRenderXPos((GPU3D::RenderXPos & 0xFF00) | val); + if (!Num) GPU.GPU3D.SetRenderXPos((GPU.GPU3D.GetRenderXPos() & 0xFF00) | val); break; case 0x11: - if (!Num) GPU3D::SetRenderXPos((GPU3D::RenderXPos & 0x00FF) | (val << 8)); + if (!Num) GPU.GPU3D.SetRenderXPos((GPU.GPU3D.GetRenderXPos() & 0x00FF) | (val << 8)); break; } @@ -383,7 +382,7 @@ void Unit::Write16(u32 addr, u16 val) return; case 0x010: - if (!Num) GPU3D::SetRenderXPos(val); + if (!Num) GPU.GPU3D.SetRenderXPos(val); break; case 0x068: @@ -422,21 +421,21 @@ void Unit::Write16(u32 addr, u16 val) case 0x026: BGRotD[0] = val; return; case 0x028: BGXRef[0] = (BGXRef[0] & 0xFFFF0000) | val; - if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; return; case 0x02A: if (val & 0x0800) val |= 0xF000; BGXRef[0] = (BGXRef[0] & 0xFFFF) | (val << 16); - if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; return; case 0x02C: BGYRef[0] = (BGYRef[0] & 0xFFFF0000) | val; - if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; return; case 0x02E: if (val & 0x0800) val |= 0xF000; BGYRef[0] = (BGYRef[0] & 0xFFFF) | (val << 16); - if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; return; case 0x030: BGRotA[1] = val; return; @@ -445,21 +444,21 @@ void Unit::Write16(u32 addr, u16 val) case 0x036: BGRotD[1] = val; return; case 0x038: BGXRef[1] = (BGXRef[1] & 0xFFFF0000) | val; - if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; return; case 0x03A: if (val & 0x0800) val |= 0xF000; BGXRef[1] = (BGXRef[1] & 0xFFFF) | (val << 16); - if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; return; case 0x03C: BGYRef[1] = (BGYRef[1] & 0xFFFF0000) | val; - if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; return; case 0x03E: if (val & 0x0800) val |= 0xF000; BGYRef[1] = (BGYRef[1] & 0xFFFF) | (val << 16); - if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; return; case 0x040: @@ -541,23 +540,23 @@ void Unit::Write32(u32 addr, u32 val) case 0x028: if (val & 0x08000000) val |= 0xF0000000; BGXRef[0] = val; - if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0]; + if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; return; case 0x02C: if (val & 0x08000000) val |= 0xF0000000; BGYRef[0] = val; - if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0]; + if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; return; case 0x038: if (val & 0x08000000) val |= 0xF0000000; BGXRef[1] = val; - if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1]; + if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; return; case 0x03C: if (val & 0x08000000) val |= 0xF0000000; BGYRef[1] = val; - if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; + if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; return; } } @@ -628,15 +627,15 @@ u16* Unit::GetBGExtPal(u32 slot, u32 pal) const u32 PaletteSize = 256 * 2; const u32 SlotSize = PaletteSize * 16; return (u16*)&(Num == 0 - ? GPU::VRAMFlat_ABGExtPal - : GPU::VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize]; + ? GPU.VRAMFlat_ABGExtPal + : GPU.VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize]; } u16* Unit::GetOBJExtPal() { return Num == 0 - ? (u16*)GPU::VRAMFlat_AOBJExtPal - : (u16*)GPU::VRAMFlat_BOBJExtPal; + ? (u16*)GPU.VRAMFlat_AOBJExtPal + : (u16*)GPU.VRAMFlat_BOBJExtPal; } void Unit::CheckWindows(u32 line) @@ -698,12 +697,12 @@ void Unit::GetBGVRAM(u8*& data, u32& mask) { if (Num == 0) { - data = GPU::VRAMFlat_ABG; + data = GPU.VRAMFlat_ABG; mask = 0x7FFFF; } else { - data = GPU::VRAMFlat_BBG; + data = GPU.VRAMFlat_BBG; mask = 0x1FFFF; } } @@ -712,12 +711,12 @@ void Unit::GetOBJVRAM(u8*& data, u32& mask) { if (Num == 0) { - data = GPU::VRAMFlat_AOBJ; + data = GPU.VRAMFlat_AOBJ; mask = 0x3FFFF; } else { - data = GPU::VRAMFlat_BOBJ; + data = GPU.VRAMFlat_BOBJ; mask = 0x1FFFF; } } diff --git a/src/GPU2D.h b/src/GPU2D.h index ffef57d..ad052bf 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -22,13 +22,20 @@ #include "types.h" #include "Savestate.h" +namespace Melon +{ +class GPU; +} + namespace GPU2D { class Unit { public: - Unit(u32 num); + // take a reference to the GPU so we can access its state + // and ensure that it's not null + Unit(u32 num, Melon::GPU& gpu); Unit(const Unit&) = delete; Unit& operator=(const Unit&) = delete; @@ -116,6 +123,8 @@ public: u32 CaptureCnt; u16 MasterBrightness; +private: + Melon::GPU& GPU; }; class Renderer2D diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 495b747..7447bcf 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -18,12 +18,13 @@ #include "GPU2D_Soft.h" #include "GPU.h" +#include "GPU3D_OpenGL.h" namespace GPU2D { -SoftRenderer::SoftRenderer() - : Renderer2D() +SoftRenderer::SoftRenderer(Melon::GPU& gpu) + : Renderer2D(), GPU(gpu) { // initialize mosaic table for (int m = 0; m < 16; m++) @@ -165,29 +166,29 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) { CurUnit = unit; - int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256; + int stride = GPU.GPU3D.IsRendererAccelerated() ? (256*3 + 1) : 256; u32* dst = &Framebuffer[CurUnit->Num][stride * line]; int n3dline = line; - line = GPU::VCount; + line = GPU.VCount; if (CurUnit->Num == 0) { - auto bgDirty = GPU::VRAMDirty_ABG.DeriveState(GPU::VRAMMap_ABG); - GPU::MakeVRAMFlat_ABGCoherent(bgDirty); - auto bgExtPalDirty = GPU::VRAMDirty_ABGExtPal.DeriveState(GPU::VRAMMap_ABGExtPal); - GPU::MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); - auto objExtPalDirty = GPU::VRAMDirty_AOBJExtPal.DeriveState(&GPU::VRAMMap_AOBJExtPal); - GPU::MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + auto bgDirty = GPU.VRAMDirty_ABG.DeriveState(GPU.VRAMMap_ABG, GPU); + GPU.MakeVRAMFlat_ABGCoherent(bgDirty); + auto bgExtPalDirty = GPU.VRAMDirty_ABGExtPal.DeriveState(GPU.VRAMMap_ABGExtPal, GPU); + GPU.MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU.VRAMDirty_AOBJExtPal.DeriveState(&GPU.VRAMMap_AOBJExtPal, GPU); + GPU.MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); } else { - auto bgDirty = GPU::VRAMDirty_BBG.DeriveState(GPU::VRAMMap_BBG); - GPU::MakeVRAMFlat_BBGCoherent(bgDirty); - auto bgExtPalDirty = GPU::VRAMDirty_BBGExtPal.DeriveState(GPU::VRAMMap_BBGExtPal); - GPU::MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); - auto objExtPalDirty = GPU::VRAMDirty_BOBJExtPal.DeriveState(&GPU::VRAMMap_BOBJExtPal); - GPU::MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + auto bgDirty = GPU.VRAMDirty_BBG.DeriveState(GPU.VRAMMap_BBG, GPU); + GPU.MakeVRAMFlat_BBGCoherent(bgDirty); + auto bgExtPalDirty = GPU.VRAMDirty_BBGExtPal.DeriveState(GPU.VRAMMap_BBGExtPal, GPU); + GPU.MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + auto objExtPalDirty = GPU.VRAMDirty_BOBJExtPal.DeriveState(&GPU.VRAMMap_BOBJExtPal, GPU); + GPU.MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); } bool forceblank = false; @@ -205,11 +206,11 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) if (CurUnit->Num == 0) { - if (!GPU3D::CurrentRenderer->Accelerated) - _3DLine = GPU3D::GetLine(n3dline); + if (!GPU.GPU3D.IsRendererAccelerated()) + _3DLine = GPU.GPU3D.GetLine(n3dline); else if (CurUnit->CaptureLatch && (((CurUnit->CaptureCnt >> 29) & 0x3) != 1)) { - _3DLine = GPU3D::GetLine(n3dline); + _3DLine = GPU.GPU3D.GetLine(n3dline); //GPU3D::GLRenderer::PrepareCaptureFrame(); } } @@ -219,7 +220,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) for (int i = 0; i < 256; i++) dst[i] = 0xFFFFFFFF; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU.GPU3D.IsRendererAccelerated()) { dst[256*3] = 0; } @@ -253,9 +254,9 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) case 2: // VRAM display { u32 vrambank = (CurUnit->DispCnt >> 18) & 0x3; - if (GPU::VRAMMap_LCDC & (1<<vrambank)) + if (GPU.VRAMMap_LCDC & (1<<vrambank)) { - u16* vram = (u16*)GPU::VRAM[vrambank]; + u16* vram = (u16*)GPU.VRAM[vrambank]; vram = &vram[line * 256]; for (int i = 0; i < 256; i++) @@ -311,7 +312,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) u32 masterBrightness = CurUnit->MasterBrightness; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU.GPU3D.IsRendererAccelerated()) { dst[256*3] = masterBrightness | (CurUnit->DispCnt & 0x30000); return; @@ -363,11 +364,11 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) void SoftRenderer::VBlankEnd(Unit* unitA, Unit* unitB) { #ifdef OGLRENDERER_ENABLED - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU.GPU3D.IsRendererAccelerated()) { if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1)) { - reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->PrepareCaptureFrame(); + reinterpret_cast<GPU3D::GLRenderer*>(GPU.GPU3D.GetCurrentRenderer())->PrepareCaptureFrame(); } } #endif @@ -380,10 +381,10 @@ void SoftRenderer::DoCapture(u32 line, u32 width) // TODO: confirm this // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC - if (!(GPU::VRAMMap_LCDC & (1<<dstvram))) + if (!(GPU.VRAMMap_LCDC & (1<<dstvram))) return; - u16* dst = (u16*)GPU::VRAM[dstvram]; + u16* dst = (u16*)GPU.VRAM[dstvram]; u32 dstaddr = (((captureCnt >> 18) & 0x3) << 14) + (line * width); // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!! @@ -396,7 +397,7 @@ void SoftRenderer::DoCapture(u32 line, u32 width) else { srcA = BGOBJLine; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU.GPU3D.IsRendererAccelerated()) { // in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU // but when doing display capture, we do need the composited output @@ -468,8 +469,8 @@ void SoftRenderer::DoCapture(u32 line, u32 width) else { u32 srcvram = (CurUnit->DispCnt >> 18) & 0x3; - if (GPU::VRAMMap_LCDC & (1<<srcvram)) - srcB = (u16*)GPU::VRAM[srcvram]; + if (GPU.VRAMMap_LCDC & (1<<srcvram)) + srcB = (u16*)GPU.VRAM[srcvram]; if (((CurUnit->DispCnt >> 16) & 0x3) != 2) srcBaddr += ((captureCnt >> 26) & 0x3) << 14; @@ -478,8 +479,8 @@ void SoftRenderer::DoCapture(u32 line, u32 width) dstaddr &= 0xFFFF; srcBaddr &= 0xFFFF; - static_assert(GPU::VRAMDirtyGranularity == 512, ""); - GPU::VRAMDirty[dstvram][(dstaddr * 2) / GPU::VRAMDirtyGranularity] = true; + static_assert(Melon::VRAMDirtyGranularity == 512); + GPU.VRAMDirty[dstvram][(dstaddr * 2) / Melon::VRAMDirtyGranularity] = true; switch ((captureCnt >> 29) & 0x3) { @@ -600,12 +601,12 @@ void SoftRenderer::DoCapture(u32 line, u32 width) { \ if ((bgCnt[num] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \ { \ - if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ + if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_##type<true, DrawPixel_Accel>(line, num); \ else DrawBG_##type<true, DrawPixel_Normal>(line, num); \ } \ else \ { \ - if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ + if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_##type<false, DrawPixel_Accel>(line, num); \ else DrawBG_##type<false, DrawPixel_Normal>(line, num); \ } \ } while (false) @@ -615,18 +616,18 @@ void SoftRenderer::DoCapture(u32 line, u32 width) { \ if ((bgCnt[2] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \ { \ - if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \ + if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_Large<true, DrawPixel_Accel>(line); \ else DrawBG_Large<true, DrawPixel_Normal>(line); \ } \ else \ { \ - if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \ + if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_Large<false, DrawPixel_Accel>(line); \ else DrawBG_Large<false, DrawPixel_Normal>(line); \ } \ } while (false) #define DoInterleaveSprites(prio) \ - if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); + if (GPU.GPU3D.IsRendererAccelerated()) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio); template<u32 bgmode> void SoftRenderer::DrawScanlineBGMode(u32 line) @@ -756,8 +757,8 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line) } u64 backdrop; - if (CurUnit->Num) backdrop = *(u16*)&GPU::Palette[0x400]; - else backdrop = *(u16*)&GPU::Palette[0]; + if (CurUnit->Num) backdrop = *(u16*)&GPU.Palette[0x400]; + else backdrop = *(u16*)&GPU.Palette[0]; { u8 r = (backdrop & 0x001F) << 1; @@ -794,7 +795,7 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line) // color special effects // can likely be optimized - if (!GPU3D::CurrentRenderer->Accelerated) + if (!GPU.GPU3D.IsRendererAccelerated()) { for (int i = 0; i < 256; i++) { @@ -940,7 +941,7 @@ void SoftRenderer::DrawBG_3D() { int i = 0; - if (GPU3D::CurrentRenderer->Accelerated) + if (GPU.GPU3D.IsRendererAccelerated()) { for (i = 0; i < 256; i++) { @@ -997,14 +998,14 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0x400]; + pal = (u16*)&GPU.Palette[0x400]; } else { tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0]; + pal = (u16*)&GPU.Palette[0]; } // adjust Y position in tilemap @@ -1176,14 +1177,14 @@ void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum) tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0x400]; + pal = (u16*)&GPU.Palette[0x400]; } else { tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0]; + pal = (u16*)&GPU.Palette[0]; } u16 curtile; @@ -1330,8 +1331,8 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) { // 256-color bitmap - if (CurUnit->Num) pal = (u16*)&GPU::Palette[0x400]; - else pal = (u16*)&GPU::Palette[0]; + if (CurUnit->Num) pal = (u16*)&GPU.Palette[0x400]; + else pal = (u16*)&GPU.Palette[0]; u8 color; @@ -1389,14 +1390,14 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0x400]; + pal = (u16*)&GPU.Palette[0x400]; } else { tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); - pal = (u16*)&GPU::Palette[0]; + pal = (u16*)&GPU.Palette[0]; } u16 curtile; @@ -1507,8 +1508,8 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2 // 256-color bitmap - if (CurUnit->Num) pal = (u16*)&GPU::Palette[0x400]; - else pal = (u16*)&GPU::Palette[0]; + if (CurUnit->Num) pal = (u16*)&GPU.Palette[0x400]; + else pal = (u16*)&GPU.Palette[0]; u8 color; @@ -1581,7 +1582,7 @@ template <SoftRenderer::DrawPixel drawPixel> void SoftRenderer::InterleaveSprites(u32 prio) { u32* objLine = OBJLine[CurUnit->Num]; - u16* pal = (u16*)&GPU::Palette[CurUnit->Num ? 0x600 : 0x200]; + u16* pal = (u16*)&GPU.Palette[CurUnit->Num ? 0x600 : 0x200]; if (CurUnit->DispCnt & 0x80000000) { @@ -1655,13 +1656,13 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit) if (CurUnit->Num == 0) { - auto objDirty = GPU::VRAMDirty_AOBJ.DeriveState(GPU::VRAMMap_AOBJ); - GPU::MakeVRAMFlat_AOBJCoherent(objDirty); + auto objDirty = GPU.VRAMDirty_AOBJ.DeriveState(GPU.VRAMMap_AOBJ, GPU); + GPU.MakeVRAMFlat_AOBJCoherent(objDirty); } else { - auto objDirty = GPU::VRAMDirty_BOBJ.DeriveState(GPU::VRAMMap_BOBJ); - GPU::MakeVRAMFlat_BOBJCoherent(objDirty); + auto objDirty = GPU.VRAMDirty_BOBJ.DeriveState(GPU.VRAMMap_BOBJ, GPU); + GPU.MakeVRAMFlat_BOBJCoherent(objDirty); } NumSprites[CurUnit->Num] = 0; @@ -1669,7 +1670,7 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit) memset(OBJWindow[CurUnit->Num], 0, 256); if (!(CurUnit->DispCnt & 0x1000)) return; - u16* oam = (u16*)&GPU::OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; const s32 spritewidth[16] = { @@ -1764,7 +1765,7 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit) template<bool window> void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) { - u16* oam = (u16*)&GPU::OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; u16* rotparams = &oam[(((attrib[1] >> 9) & 0x1F) * 16) + 3]; @@ -1976,7 +1977,7 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, template<bool window> void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) { - u16* oam = (u16*)&GPU::OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000; diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h index e1e6eed..7d6d65f 100644 --- a/src/GPU2D_Soft.h +++ b/src/GPU2D_Soft.h @@ -20,19 +20,25 @@ #include "GPU2D.h" +namespace Melon +{ +class GPU; +} + namespace GPU2D { class SoftRenderer : public Renderer2D { public: - SoftRenderer(); + SoftRenderer(Melon::GPU& gpu); ~SoftRenderer() override {} void DrawScanline(u32 line, Unit* unit) override; void DrawSprites(u32 line, Unit* unit) override; void VBlankEnd(Unit* unitA, Unit* unitB) override; private: + Melon::GPU& GPU; alignas(8) u32 BGOBJLine[256*3]; u32* _3DLine; diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 55d6de7..80c5d46 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -138,158 +138,9 @@ const u8 CmdNumParams[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -typedef union -{ - u64 _contents; - struct - { - u32 Param; - u8 Command; - }; - -} CmdFIFOEntry; - -FIFO<CmdFIFOEntry, 256> CmdFIFO; -FIFO<CmdFIFOEntry, 4> CmdPIPE; - -FIFO<CmdFIFOEntry, 64> CmdStallQueue; - -u32 NumCommands, CurCommand, ParamCount, TotalParams; - -bool GeometryEnabled; -bool RenderingEnabled; - -u32 DispCnt; -u8 AlphaRefVal, AlphaRef; - -u16 ToonTable[32]; -u16 EdgeTable[8]; - -u32 FogColor, FogOffset; -u8 FogDensityTable[32]; - -u32 ClearAttr1, ClearAttr2; - -u32 RenderDispCnt; -u8 RenderAlphaRef; - -u16 RenderToonTable[32]; -u16 RenderEdgeTable[8]; - -u32 RenderFogColor, RenderFogOffset, RenderFogShift; -u8 RenderFogDensityTable[34]; - -u32 RenderClearAttr1, RenderClearAttr2; - -bool RenderFrameIdentical; - -u16 RenderXPos; - -u32 ZeroDotWLimit; - -u32 GXStat; - -u32 ExecParams[32]; -u32 ExecParamCount; - -u64 Timestamp; -s32 CycleCount; -s32 VertexPipeline; -s32 NormalPipeline; -s32 PolygonPipeline; -s32 VertexSlotCounter; -u32 VertexSlotsFree; - -u32 NumPushPopCommands; -u32 NumTestCommands; - - -u32 MatrixMode; - -s32 ProjMatrix[16]; -s32 PosMatrix[16]; -s32 VecMatrix[16]; -s32 TexMatrix[16]; - -s32 ClipMatrix[16]; -bool ClipMatrixDirty; - -u32 Viewport[6]; - -s32 ProjMatrixStack[16]; -s32 PosMatrixStack[32][16]; -s32 VecMatrixStack[32][16]; -s32 TexMatrixStack[16]; -s32 ProjMatrixStackPointer; -s32 PosMatrixStackPointer; -s32 TexMatrixStackPointer; - void MatrixLoadIdentity(s32* m); -void UpdateClipMatrix(); - - -u32 PolygonMode; -s16 CurVertex[3]; -u8 VertexColor[3]; -s16 TexCoords[2]; -s16 RawTexCoords[2]; -s16 Normal[3]; - -s16 LightDirection[4][3]; -u8 LightColor[4][3]; -u8 MatDiffuse[3]; -u8 MatAmbient[3]; -u8 MatSpecular[3]; -u8 MatEmission[3]; - -bool UseShininessTable; -u8 ShininessTable[128]; - -u32 PolygonAttr; -u32 CurPolygonAttr; -u32 TexParam; -u32 TexPalette; - -s32 PosTestResult[4]; -s16 VecTestResult[3]; - -Vertex TempVertexBuffer[4]; -u32 VertexNum; -u32 VertexNumInPoly; -u32 NumConsecutivePolygons; -Polygon* LastStripPolygon; -u32 NumOpaquePolygons; - -Vertex VertexRAM[6144 * 2]; -Polygon PolygonRAM[2048 * 2]; - -Vertex* CurVertexRAM; -Polygon* CurPolygonRAM; -u32 NumVertices, NumPolygons; -u32 CurRAMBank; - -std::array<Polygon*,2048> RenderPolygonRAM; -u32 RenderNumPolygons; - -u32 FlushRequest; -u32 FlushAttributes; - -std::unique_ptr<GPU3D::Renderer3D> CurrentRenderer = {}; - -bool AbortFrame; - -bool Init() -{ - return true; -} - -void DeInit() -{ - CurrentRenderer = nullptr; -} - -void ResetRenderingState() +void GPU3D::ResetRenderingState() noexcept { RenderNumPolygons = 0; @@ -308,7 +159,7 @@ void ResetRenderingState() RenderClearAttr2 = 0x00007FFF; } -void Reset() +void GPU3D::Reset() noexcept { CmdFIFO.Clear(); CmdPIPE.Clear(); @@ -389,7 +240,7 @@ void Reset() AbortFrame = false; } -void DoSavestate(Savestate* file) +void GPU3D::DoSavestate(Savestate* file) noexcept { file->Section("GP3D"); @@ -634,7 +485,7 @@ void DoSavestate(Savestate* file) -void SetEnabled(bool geometry, bool rendering) +void GPU3D::SetEnabled(bool geometry, bool rendering) noexcept { GeometryEnabled = geometry; RenderingEnabled = rendering; @@ -767,7 +618,7 @@ void MatrixTranslate(s32* m, s32* s) m[15] += ((s64)s[0]*m[3] + (s64)s[1]*m[7] + (s64)s[2]*m[11]) >> 12; } -void UpdateClipMatrix() +void GPU3D::UpdateClipMatrix() noexcept { if (!ClipMatrixDirty) return; ClipMatrixDirty = false; @@ -778,7 +629,7 @@ void UpdateClipMatrix() -void AddCycles(s32 num) +void GPU3D::AddCycles(s32 num) noexcept { CycleCount += num; @@ -809,7 +660,7 @@ void AddCycles(s32 num) } } -void NextVertexSlot() +void GPU3D::NextVertexSlot() noexcept { s32 num = (9 - VertexSlotCounter) + 1; @@ -852,7 +703,7 @@ void NextVertexSlot() } } -void StallPolygonPipeline(s32 delay, s32 nonstalldelay) +void GPU3D::StallPolygonPipeline(s32 delay, s32 nonstalldelay) noexcept { if (PolygonPipeline > 0) { @@ -907,7 +758,7 @@ void ClipSegment(Vertex* outbuf, Vertex* vin, Vertex* vout) } template<int comp, bool attribs> -int ClipAgainstPlane(Vertex* vertices, int nverts, int clipstart) +int ClipAgainstPlane(const GPU3D& gpu, Vertex* vertices, int nverts, int clipstart) { Vertex temp[10]; int prev, next; @@ -927,7 +778,7 @@ int ClipAgainstPlane(Vertex* vertices, int nverts, int clipstart) Vertex vtx = vertices[i]; if (vtx.Position[comp] > vtx.Position[3]) { - if ((comp == 2) && (!(CurPolygonAttr & (1<<12)))) return 0; + if ((comp == 2) && (!(gpu.CurPolygonAttr & (1<<12)))) return 0; Vertex* vprev = &vertices[prev]; if (vprev->Position[comp] <= vprev->Position[3]) @@ -988,7 +839,7 @@ int ClipAgainstPlane(Vertex* vertices, int nverts, int clipstart) } template<bool attribs> -int ClipPolygon(Vertex* vertices, int nverts, int clipstart) +int ClipPolygon(GPU3D& gpu, Vertex* vertices, int nverts, int clipstart) { // clip. // for each vertex: @@ -1001,13 +852,13 @@ int ClipPolygon(Vertex* vertices, int nverts, int clipstart) // clipping seems to process the Y plane before the X plane. // Z clipping - nverts = ClipAgainstPlane<2, attribs>(vertices, nverts, clipstart); + nverts = ClipAgainstPlane<2, attribs>(gpu, vertices, nverts, clipstart); // Y clipping - nverts = ClipAgainstPlane<1, attribs>(vertices, nverts, clipstart); + nverts = ClipAgainstPlane<1, attribs>(gpu, vertices, nverts, clipstart); // X clipping - nverts = ClipAgainstPlane<0, attribs>(vertices, nverts, clipstart); + nverts = ClipAgainstPlane<0, attribs>(gpu, vertices, nverts, clipstart); return nverts; } @@ -1020,7 +871,7 @@ bool ClipCoordsEqual(Vertex* a, Vertex* b) a->Position[3] == b->Position[3]; } -void SubmitPolygon() +void GPU3D::SubmitPolygon() noexcept { Vertex clippedvertices[10]; Vertex* reusedvertices[2]; @@ -1153,7 +1004,7 @@ void SubmitPolygon() // clipping - nverts = ClipPolygon<true>(clippedvertices, nverts, clipstart); + nverts = ClipPolygon<true>(*this, clippedvertices, nverts, clipstart); if (nverts == 0) { LastStripPolygon = NULL; @@ -1425,7 +1276,7 @@ void SubmitPolygon() LastStripPolygon = NULL; } -void SubmitVertex() +void GPU3D::SubmitVertex() noexcept { s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; @@ -1523,7 +1374,7 @@ void SubmitVertex() AddCycles(3); } -void CalculateLighting() +void GPU3D::CalculateLighting() noexcept { if ((TexParam >> 30) == 2) { @@ -1598,7 +1449,7 @@ void CalculateLighting() } -void BoxTest(u32* params) +void GPU3D::BoxTest(u32* params) noexcept { Vertex cube[8]; Vertex face[10]; @@ -1642,7 +1493,7 @@ void BoxTest(u32* params) // front face (-Z) face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[2]; face[3] = cube[3]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1651,7 +1502,7 @@ void BoxTest(u32* params) // back face (+Z) face[0] = cube[4]; face[1] = cube[5]; face[2] = cube[6]; face[3] = cube[7]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1660,7 +1511,7 @@ void BoxTest(u32* params) // left face (-X) face[0] = cube[0]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[5]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1669,7 +1520,7 @@ void BoxTest(u32* params) // right face (+X) face[0] = cube[1]; face[1] = cube[2]; face[2] = cube[7]; face[3] = cube[6]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1678,7 +1529,7 @@ void BoxTest(u32* params) // bottom face (-Y) face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[6]; face[3] = cube[5]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1687,7 +1538,7 @@ void BoxTest(u32* params) // top face (+Y) face[0] = cube[2]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[7]; - res = ClipPolygon<false>(face, 4, 0); + res = ClipPolygon<false>(*this, face, 4, 0); if (res > 0) { GXStat |= (1<<1); @@ -1695,7 +1546,7 @@ void BoxTest(u32* params) } } -void PosTest() +void GPU3D::PosTest() noexcept { s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; @@ -1708,7 +1559,7 @@ void PosTest() AddCycles(5); } -void VecTest(u32 param) +void GPU3D::VecTest(u32 param) noexcept { // TODO: maybe it overwrites the normal registers, too @@ -1731,7 +1582,7 @@ void VecTest(u32 param) -void CmdFIFOWrite(CmdFIFOEntry& entry) +void GPU3D::CmdFIFOWrite(CmdFIFOEntry& entry) noexcept { if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull()) { @@ -1767,7 +1618,7 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) } } -CmdFIFOEntry CmdFIFORead() +GPU3D::CmdFIFOEntry GPU3D::CmdFIFORead() noexcept { CmdFIFOEntry ret = CmdPIPE.Read(); @@ -1800,39 +1651,7 @@ CmdFIFOEntry CmdFIFORead() return ret; } -inline void VertexPipelineSubmitCmd() -{ - // vertex commands 0x24, 0x25, 0x26, 0x27, 0x28 - if (!(VertexSlotsFree & 0x1)) NextVertexSlot(); - else AddCycles(1); - NormalPipeline = 0; -} - -inline void VertexPipelineCmdDelayed6() -{ - // commands 0x20, 0x30, 0x31, 0x72 that can run 6 cycles after a vertex - if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1); - else AddCycles(NormalPipeline + 1); - NormalPipeline = 0; -} - -inline void VertexPipelineCmdDelayed8() -{ - // commands 0x29, 0x2A, 0x2B, 0x33, 0x34, 0x41, 0x60, 0x71 that can run 8 cycles after a vertex - if (VertexPipeline > 0) AddCycles(VertexPipeline + 1); - else AddCycles(NormalPipeline + 1); - NormalPipeline = 0; -} - -inline void VertexPipelineCmdDelayed4() -{ - // all other commands can run 4 cycles after a vertex - // no need to do much here since that is the minimum - AddCycles(NormalPipeline + 1); - NormalPipeline = 0; -} - -void ExecuteCommand() +void GPU3D::ExecuteCommand() noexcept { CmdFIFOEntry entry = CmdFIFORead(); @@ -2430,13 +2249,13 @@ void ExecuteCommand() } } -s32 CyclesToRunFor() +s32 GPU3D::CyclesToRunFor() const noexcept { if (CycleCount < 0) return 0; return CycleCount; } -void FinishWork(s32 cycles) +void GPU3D::FinishWork(s32 cycles) noexcept { AddCycles(cycles); if (NormalPipeline) @@ -2450,7 +2269,7 @@ void FinishWork(s32 cycles) GXStat &= ~(1<<27); } -void Run() +void GPU3D::Run() noexcept { if (!GeometryEnabled || FlushRequest || (CmdPIPE.IsEmpty() && !(GXStat & (1<<27)))) @@ -2485,7 +2304,7 @@ void Run() } -void CheckFIFOIRQ() +void GPU3D::CheckFIFOIRQ() noexcept { bool irq = false; switch (GXStat >> 30) @@ -2498,18 +2317,18 @@ void CheckFIFOIRQ() else NDS::ClearIRQ(0, NDS::IRQ_GXFIFO); } -void CheckFIFODMA() +void GPU3D::CheckFIFODMA() noexcept { if (CmdFIFO.Level() < 128) NDS::CheckDMAs(0, 0x07); } -void VCount144() +void GPU3D::VCount144() noexcept { CurrentRenderer->VCount144(); } -void RestartFrame() +void GPU3D::RestartFrame() noexcept { CurrentRenderer->RestartFrame(); } @@ -2527,7 +2346,7 @@ bool YSort(Polygon* a, Polygon* b) return a->SortKey < b->SortKey; } -void VBlank() +void GPU3D::VBlank() noexcept { if (GeometryEnabled) { @@ -2604,21 +2423,20 @@ void VBlank() } } -void VCount215() +void GPU3D::VCount215() noexcept { CurrentRenderer->RenderFrame(); } -void SetRenderXPos(u16 xpos) +void GPU3D::SetRenderXPos(u16 xpos) noexcept { if (!RenderingEnabled) return; RenderXPos = xpos & 0x01FF; } -u32 ScrolledLine[256]; -u32* GetLine(int line) +u32* GPU3D::GetLine(int line) noexcept { if (!AbortFrame) { @@ -2653,8 +2471,12 @@ u32* GetLine(int line) return ScrolledLine; } +bool GPU3D::IsRendererAccelerated() const noexcept +{ + return CurrentRenderer && CurrentRenderer->Accelerated; +} -void WriteToGXFIFO(u32 val) +void GPU3D::WriteToGXFIFO(u32 val) noexcept { if (NumCommands == 0) { @@ -2693,7 +2515,7 @@ void WriteToGXFIFO(u32 val) } -u8 Read8(u32 addr) +u8 GPU3D::Read8(u32 addr) noexcept { switch (addr) { @@ -2732,7 +2554,7 @@ u8 Read8(u32 addr) return 0; } -u16 Read16(u32 addr) +u16 GPU3D::Read16(u32 addr) noexcept { switch (addr) { @@ -2776,7 +2598,7 @@ u16 Read16(u32 addr) return 0; } -u32 Read32(u32 addr) +u32 GPU3D::Read32(u32 addr) noexcept { switch (addr) { @@ -2829,7 +2651,7 @@ u32 Read32(u32 addr) return 0; } -void Write8(u32 addr, u8 val) +void GPU3D::Write8(u32 addr, u8 val) noexcept { if (!RenderingEnabled && addr >= 0x04000320 && addr < 0x04000400) return; if (!GeometryEnabled && addr >= 0x04000400 && addr < 0x04000700) return; @@ -2879,7 +2701,7 @@ void Write8(u32 addr, u8 val) Log(LogLevel::Debug, "unknown GPU3D write8 %08X %02X\n", addr, val); } -void Write16(u32 addr, u16 val) +void GPU3D::Write16(u32 addr, u16 val) noexcept { if (!RenderingEnabled && addr >= 0x04000320 && addr < 0x04000400) return; if (!GeometryEnabled && addr >= 0x04000400 && addr < 0x04000700) return; @@ -2966,7 +2788,7 @@ void Write16(u32 addr, u16 val) Log(LogLevel::Debug, "unknown GPU3D write16 %08X %04X\n", addr, val); } -void Write32(u32 addr, u32 val) +void GPU3D::Write32(u32 addr, u32 val) noexcept { if (!RenderingEnabled && addr >= 0x04000320 && addr < 0x04000400) return; if (!GeometryEnabled && addr >= 0x04000400 && addr < 0x04000700) return; diff --git a/src/GPU3D.h b/src/GPU3D.h index 1a0bfa0..b8a2d55 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -22,8 +22,13 @@ #include <array> #include <memory> -#include "GPU.h" #include "Savestate.h" +#include "FIFO.h" + +namespace Melon +{ +struct RenderSettings; +} namespace GPU3D { @@ -79,60 +84,246 @@ struct Polygon }; -extern u32 RenderDispCnt; -extern u8 RenderAlphaRef; +class Renderer3D; + +class GPU3D +{ +public: + GPU3D() noexcept = default; + ~GPU3D() noexcept = default; + void Reset() noexcept; + + void DoSavestate(Savestate* file) noexcept; + + void SetEnabled(bool geometry, bool rendering) noexcept; + + void ExecuteCommand() noexcept; + + s32 CyclesToRunFor() const noexcept; + void Run() noexcept; + void CheckFIFOIRQ() noexcept; + void CheckFIFODMA() noexcept; + + void VCount144() noexcept; + void VBlank() noexcept; + void VCount215() noexcept; + + void RestartFrame() noexcept; + + void SetRenderXPos(u16 xpos) noexcept; + [[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; } + u32* GetLine(int line) noexcept; + + void WriteToGXFIFO(u32 val) noexcept; + + [[nodiscard]] bool IsRendererAccelerated() const noexcept; + [[nodiscard]] Renderer3D* GetCurrentRenderer() noexcept { return CurrentRenderer.get(); } + [[nodiscard]] const Renderer3D* GetCurrentRenderer() const noexcept { return CurrentRenderer.get(); } + void SetCurrentRenderer(std::unique_ptr<Renderer3D>&& renderer) noexcept { CurrentRenderer = std::move(renderer); } + + u8 Read8(u32 addr) noexcept; + u16 Read16(u32 addr) noexcept; + u32 Read32(u32 addr) noexcept; + void Write8(u32 addr, u8 val) noexcept; + void Write16(u32 addr, u16 val) noexcept; + void Write32(u32 addr, u32 val) noexcept; +private: + typedef union + { + u64 _contents; + struct + { + u32 Param; + u8 Command; + }; + + } CmdFIFOEntry; + + void UpdateClipMatrix() noexcept; + void ResetRenderingState() noexcept; + void AddCycles(s32 num) noexcept; + void NextVertexSlot() noexcept; + void StallPolygonPipeline(s32 delay, s32 nonstalldelay) noexcept; + void SubmitPolygon() noexcept; + void SubmitVertex() noexcept; + void CalculateLighting() noexcept; + void BoxTest(u32* params) noexcept; + void PosTest() noexcept; + void VecTest(u32 param) noexcept; + void CmdFIFOWrite(CmdFIFOEntry& entry) noexcept; + CmdFIFOEntry CmdFIFORead() noexcept; + void FinishWork(s32 cycles) noexcept; + void VertexPipelineSubmitCmd() noexcept + { + // vertex commands 0x24, 0x25, 0x26, 0x27, 0x28 + if (!(VertexSlotsFree & 0x1)) NextVertexSlot(); + else AddCycles(1); + NormalPipeline = 0; + } + + void VertexPipelineCmdDelayed6() noexcept + { + // commands 0x20, 0x30, 0x31, 0x72 that can run 6 cycles after a vertex + if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1); + else AddCycles(NormalPipeline + 1); + NormalPipeline = 0; + } + + void VertexPipelineCmdDelayed8() noexcept + { + // commands 0x29, 0x2A, 0x2B, 0x33, 0x34, 0x41, 0x60, 0x71 that can run 8 cycles after a vertex + if (VertexPipeline > 0) AddCycles(VertexPipeline + 1); + else AddCycles(NormalPipeline + 1); + NormalPipeline = 0; + } + + void VertexPipelineCmdDelayed4() noexcept + { + // all other commands can run 4 cycles after a vertex + // no need to do much here since that is the minimum + AddCycles(NormalPipeline + 1); + NormalPipeline = 0; + } + + std::unique_ptr<Renderer3D> CurrentRenderer = nullptr; + + u16 RenderXPos = 0; + +public: + FIFO<CmdFIFOEntry, 256> CmdFIFO {}; + FIFO<CmdFIFOEntry, 4> CmdPIPE {}; + + FIFO<CmdFIFOEntry, 64> CmdStallQueue {}; + + u32 ZeroDotWLimit = 0; + + u32 GXStat = 0; + + u32 ExecParams[32] {}; + u32 ExecParamCount = 0; + + s32 CycleCount = 0; + s32 VertexPipeline = 0; + s32 NormalPipeline = 0; + s32 PolygonPipeline = 0; + s32 VertexSlotCounter = 0; + u32 VertexSlotsFree = 0; + + u32 NumPushPopCommands = 0; + u32 NumTestCommands = 0; + + + u32 MatrixMode = 0; -extern u16 RenderToonTable[32]; -extern u16 RenderEdgeTable[8]; + s32 ProjMatrix[16] {}; + s32 PosMatrix[16] {}; + s32 VecMatrix[16] {}; + s32 TexMatrix[16] {}; -extern u32 RenderFogColor, RenderFogOffset, RenderFogShift; -extern u8 RenderFogDensityTable[34]; + s32 ClipMatrix[16] {}; + bool ClipMatrixDirty = false; -extern u32 RenderClearAttr1, RenderClearAttr2; + u32 Viewport[6] {}; -extern bool RenderFrameIdentical; + s32 ProjMatrixStack[16] {}; + s32 PosMatrixStack[32][16] {}; + s32 VecMatrixStack[32][16] {}; + s32 TexMatrixStack[16] {}; + s32 ProjMatrixStackPointer = 0; + s32 PosMatrixStackPointer = 0; + s32 TexMatrixStackPointer = 0; -extern u16 RenderXPos; + u32 NumCommands = 0; + u32 CurCommand = 0; + u32 ParamCount = 0; + u32 TotalParams = 0; -extern std::array<Polygon*,2048> RenderPolygonRAM; -extern u32 RenderNumPolygons; + bool GeometryEnabled = false; + bool RenderingEnabled = false; -extern bool AbortFrame; + u32 DispCnt = 0; + u8 AlphaRefVal = 0; + u8 AlphaRef = 0; -extern u64 Timestamp; + u16 ToonTable[32] {}; + u16 EdgeTable[8] {}; -bool Init(); -void DeInit(); -void Reset(); + u32 FogColor = 0; + u32 FogOffset = 0; + u8 FogDensityTable[32] {}; -void DoSavestate(Savestate* file); + u32 ClearAttr1 = 0; + u32 ClearAttr2 = 0; + u32 RenderDispCnt = 0; + u8 RenderAlphaRef = 0; -void SetEnabled(bool geometry, bool rendering); + u16 RenderToonTable[32] {}; + u16 RenderEdgeTable[8] {}; -void ExecuteCommand(); + u32 RenderFogColor = 0; + u32 RenderFogOffset = 0; + u32 RenderFogShift = 0; + u8 RenderFogDensityTable[34] {}; -s32 CyclesToRunFor(); -void Run(); -void CheckFIFOIRQ(); -void CheckFIFODMA(); + u32 RenderClearAttr1 = 0; + u32 RenderClearAttr2 = 0; -void VCount144(); -void VBlank(); -void VCount215(); + bool RenderFrameIdentical = false; -void RestartFrame(); + bool AbortFrame = false; -void SetRenderXPos(u16 xpos); -u32* GetLine(int line); + u64 Timestamp = 0; -void WriteToGXFIFO(u32 val); -u8 Read8(u32 addr); -u16 Read16(u32 addr); -u32 Read32(u32 addr); -void Write8(u32 addr, u8 val); -void Write16(u32 addr, u16 val); -void Write32(u32 addr, u32 val); + u32 PolygonMode = 0; + s16 CurVertex[3] {}; + u8 VertexColor[3] {}; + s16 TexCoords[2] {}; + s16 RawTexCoords[2] {}; + s16 Normal[3] {}; + + s16 LightDirection[4][3] {}; + u8 LightColor[4][3] {}; + u8 MatDiffuse[3] {}; + u8 MatAmbient[3] {}; + u8 MatSpecular[3] {}; + u8 MatEmission[3] {}; + + bool UseShininessTable = false; + u8 ShininessTable[128] {}; + + u32 PolygonAttr = 0; + u32 CurPolygonAttr = 0; + + u32 TexParam = 0; + u32 TexPalette = 0; + + s32 PosTestResult[4] {}; + s16 VecTestResult[3] {}; + + Vertex TempVertexBuffer[4] {}; + u32 VertexNum = 0; + u32 VertexNumInPoly = 0; + u32 NumConsecutivePolygons = 0; + Polygon* LastStripPolygon = nullptr; + u32 NumOpaquePolygons = 0; + + Vertex VertexRAM[6144 * 2] {}; + Polygon PolygonRAM[2048 * 2] {}; + + Vertex* CurVertexRAM = nullptr; + Polygon* CurPolygonRAM = nullptr; + u32 NumVertices = 0; + u32 NumPolygons = 0; + u32 CurRAMBank = 0; + + std::array<Polygon*,2048> RenderPolygonRAM {}; + u32 RenderNumPolygons = 0; + + u32 FlushRequest = 0; + u32 FlushAttributes = 0; + u32 ScrolledLine[256]; +}; class Renderer3D { @@ -149,7 +340,7 @@ public: // are more detailed "traits" that we can ask of the Renderer3D type const bool Accelerated; - virtual void SetRenderSettings(GPU::RenderSettings& settings) = 0; + virtual void SetRenderSettings(const Melon::RenderSettings& settings) noexcept = 0; virtual void VCount144() {}; @@ -160,15 +351,6 @@ protected: Renderer3D(bool Accelerated); }; -extern int Renderer; -extern std::unique_ptr<Renderer3D> CurrentRenderer; - } -#include "GPU3D_Soft.h" - -#ifdef OGLRENDERER_ENABLED -#include "GPU3D_OpenGL.h" -#endif - #endif diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 2d0b15a..629ec15 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -97,20 +97,20 @@ void SetupDefaultTexParams(GLuint tex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } -GLRenderer::GLRenderer() noexcept : Renderer3D(true) +GLRenderer::GLRenderer(Melon::GPU& gpu) noexcept : Renderer3D(true), GPU(gpu) { // GLRenderer::New() will be used to actually initialize the renderer; // The various glDelete* functions silently ignore invalid IDs, // so we can just let the destructor clean up a half-initialized renderer. } -std::unique_ptr<GLRenderer> GLRenderer::New() noexcept +std::unique_ptr<GLRenderer> GLRenderer::New(Melon::GPU& gpu) noexcept { assert(glEnable != nullptr); // Will be returned if the initialization succeeds, // or cleaned up via RAII if it fails. - std::unique_ptr<GLRenderer> result = std::unique_ptr<GLRenderer>(new GLRenderer()); + std::unique_ptr<GLRenderer> result = std::unique_ptr<GLRenderer>(new GLRenderer(gpu)); glEnable(GL_DEPTH_TEST); glEnable(GL_STENCIL_TEST); @@ -329,7 +329,7 @@ void GLRenderer::Reset() { } -void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings) +void GLRenderer::SetRenderSettings(const Melon::RenderSettings& settings) noexcept { int scale = settings.GL_ScaleFactor; @@ -766,11 +766,11 @@ int GLRenderer::RenderPolygonEdgeBatch(int i) void GLRenderer::RenderSceneChunk(int y, int h) { u32 flags = 0; - if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + if (GPU.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; if (h != 192) glScissor(0, y<<ScaleFactor, 256<<ScaleFactor, h<<ScaleFactor); - GLboolean fogenable = (RenderDispCnt & (1<<7)) ? GL_TRUE : GL_FALSE; + GLboolean fogenable = (GPU.GPU3D.RenderDispCnt & (1<<7)) ? GL_TRUE : GL_FALSE; // TODO: proper 'equal' depth test! // (has margin of +-0x200 in Z-buffer mode, +-0xFF in W-buffer mode) @@ -842,7 +842,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) glEnable(GL_BLEND); glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); - if (RenderDispCnt & (1<<3)) + if (GPU.GPU3D.RenderDispCnt & (1<<3)) glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE); else glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ONE); @@ -854,7 +854,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) // pass 2: if needed, render translucent pixels that are against background pixels // when background alpha is zero, those need to be rendered with blending disabled - if ((RenderClearAttr1 & 0x001F0000) == 0) + if ((GPU.GPU3D.RenderClearAttr1 & 0x001F0000) == 0) { glDisable(GL_BLEND); @@ -918,7 +918,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) if (rp->PolyData->IsShadow) { // shadow against clear-plane will only pass if its polyID matches that of the clear plane - u32 clrpolyid = (RenderClearAttr1 >> 24) & 0x3F; + u32 clrpolyid = (GPU.GPU3D.RenderClearAttr1 >> 24) & 0x3F; if (polyid != clrpolyid) { i++; continue; } glEnable(GL_BLEND); @@ -1066,7 +1066,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) } } - if (RenderDispCnt & 0x00A0) // fog/edge enabled + if (GPU.GPU3D.RenderDispCnt & 0x00A0) // fog/edge enabled { glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -1088,7 +1088,7 @@ void GLRenderer::RenderSceneChunk(int y, int h) glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); glBindVertexArray(ClearVertexArrayID); - if (RenderDispCnt & (1<<5)) + if (GPU.GPU3D.RenderDispCnt & (1<<5)) { // edge marking // TODO: depth/polyid values at screen edges @@ -1100,19 +1100,19 @@ void GLRenderer::RenderSceneChunk(int y, int h) glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (RenderDispCnt & (1<<7)) + if (GPU.GPU3D.RenderDispCnt & (1<<7)) { // fog glUseProgram(FinalPassFogShader[2]); - if (RenderDispCnt & (1<<6)) + if (GPU.GPU3D.RenderDispCnt & (1<<6)) glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); else glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); { - u32 c = RenderFogColor; + u32 c = GPU.GPU3D.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1136,11 +1136,11 @@ void GLRenderer::RenderFrame() ShaderConfig.uScreenSize[0] = ScreenW; ShaderConfig.uScreenSize[1] = ScreenH; - ShaderConfig.uDispCnt = RenderDispCnt; + ShaderConfig.uDispCnt = GPU.GPU3D.RenderDispCnt; for (int i = 0; i < 32; i++) { - u16 c = RenderToonTable[i]; + u16 c = GPU.GPU3D.RenderToonTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1152,7 +1152,7 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 8; i++) { - u16 c = RenderEdgeTable[i]; + u16 c = GPU.GPU3D.RenderEdgeTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1163,7 +1163,7 @@ void GLRenderer::RenderFrame() } { - u32 c = RenderFogColor; + u32 c = GPU.GPU3D.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1177,12 +1177,12 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 34; i++) { - u8 d = RenderFogDensityTable[i]; + u8 d = GPU.GPU3D.RenderFogDensityTable[i]; ShaderConfig.uFogDensity[i][0] = (float)d / 127.0; } - ShaderConfig.uFogOffset = RenderFogOffset; - ShaderConfig.uFogShift = RenderFogShift; + ShaderConfig.uFogOffset = GPU.GPU3D.RenderFogOffset; + ShaderConfig.uFogShift = GPU.GPU3D.RenderFogShift; glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); void* unibuf = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); @@ -1195,13 +1195,13 @@ void GLRenderer::RenderFrame() glBindTexture(GL_TEXTURE_2D, TexMemID); for (int i = 0; i < 4; i++) { - u32 mask = GPU::VRAMMap_Texture[i]; + u32 mask = GPU.VRAMMap_Texture[i]; u8* vram; if (!mask) continue; - else if (mask & (1<<0)) vram = GPU::VRAM_A; - else if (mask & (1<<1)) vram = GPU::VRAM_B; - else if (mask & (1<<2)) vram = GPU::VRAM_C; - else if (mask & (1<<3)) vram = GPU::VRAM_D; + else if (mask & (1<<0)) vram = GPU.VRAM_A; + else if (mask & (1<<1)) vram = GPU.VRAM_B; + else if (mask & (1<<2)) vram = GPU.VRAM_C; + else if (mask & (1<<3)) vram = GPU.VRAM_D; glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*128, 1024, 128, GL_RED_INTEGER, GL_UNSIGNED_BYTE, vram); } @@ -1211,12 +1211,12 @@ void GLRenderer::RenderFrame() for (int i = 0; i < 6; i++) { // 6 x 16K chunks - u32 mask = GPU::VRAMMap_TexPal[i]; + u32 mask = GPU.VRAMMap_TexPal[i]; u8* vram; if (!mask) continue; - else if (mask & (1<<4)) vram = &GPU::VRAM_E[(i&3)*0x4000]; - else if (mask & (1<<5)) vram = GPU::VRAM_F; - else if (mask & (1<<6)) vram = GPU::VRAM_G; + else if (mask & (1<<4)) vram = &GPU.VRAM_E[(i&3)*0x4000]; + else if (mask & (1<<5)) vram = GPU.VRAM_F; + else if (mask & (1<<6)) vram = GPU.VRAM_G; glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*8, 1024, 8, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, vram); } @@ -1241,13 +1241,13 @@ void GLRenderer::RenderFrame() glUseProgram(ClearShaderPlain[2]); glDepthFunc(GL_ALWAYS); - u32 r = RenderClearAttr1 & 0x1F; - u32 g = (RenderClearAttr1 >> 5) & 0x1F; - u32 b = (RenderClearAttr1 >> 10) & 0x1F; - u32 fog = (RenderClearAttr1 >> 15) & 0x1; - u32 a = (RenderClearAttr1 >> 16) & 0x1F; - u32 polyid = (RenderClearAttr1 >> 24) & 0x3F; - u32 z = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + u32 r = GPU.GPU3D.RenderClearAttr1 & 0x1F; + u32 g = (GPU.GPU3D.RenderClearAttr1 >> 5) & 0x1F; + u32 b = (GPU.GPU3D.RenderClearAttr1 >> 10) & 0x1F; + u32 fog = (GPU.GPU3D.RenderClearAttr1 >> 15) & 0x1; + u32 a = (GPU.GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 polyid = (GPU.GPU3D.RenderClearAttr1 >> 24) & 0x3F; + u32 z = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); @@ -1266,20 +1266,20 @@ void GLRenderer::RenderFrame() glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (RenderNumPolygons) + if (GPU.GPU3D.RenderNumPolygons) { // render shit here u32 flags = 0; - if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + if (GPU.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; int npolys = 0; int firsttrans = -1; - for (u32 i = 0; i < RenderNumPolygons; i++) + for (u32 i = 0; i < GPU.GPU3D.RenderNumPolygons; i++) { - if (RenderPolygonRAM[i]->Degenerate) continue; + if (GPU.GPU3D.RenderPolygonRAM[i]->Degenerate) continue; - SetupPolygon(&PolygonList[npolys], RenderPolygonRAM[i]); - if (firsttrans < 0 && RenderPolygonRAM[i]->Translucent) + SetupPolygon(&PolygonList[npolys], GPU.GPU3D.RenderPolygonRAM[i]); + if (firsttrans < 0 && GPU.GPU3D.RenderPolygonRAM[i]->Translucent) firsttrans = npolys; npolys++; diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h index 02d8303..3657a12 100644 --- a/src/GPU3D_OpenGL.h +++ b/src/GPU3D_OpenGL.h @@ -18,10 +18,15 @@ #pragma once +#ifdef OGLRENDERER_ENABLED #include "GPU3D.h" #include "OpenGLSupport.h" +namespace Melon +{ +class GPU; +} namespace GPU3D { @@ -31,7 +36,7 @@ public: virtual ~GLRenderer() override; virtual void Reset() override; - virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + virtual void SetRenderSettings(const Melon::RenderSettings& settings) noexcept override; virtual void VCount144() override {}; virtual void RenderFrame() override; @@ -40,10 +45,10 @@ public: void SetupAccelFrame(); void PrepareCaptureFrame(); - static std::unique_ptr<GLRenderer> New() noexcept; + static std::unique_ptr<GLRenderer> New(Melon::GPU& gpu) noexcept; private: // Used by New() - GLRenderer() noexcept; + GLRenderer(Melon::GPU& gpu) noexcept; // GL version requirements // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) @@ -63,6 +68,7 @@ private: u32 RenderKey; }; + Melon::GPU& GPU; RendererPolygon PolygonList[2048] {}; bool BuildRenderShader(u32 flags, const char* vs, const char* fs); @@ -151,3 +157,4 @@ private: }; } +#endif
\ No newline at end of file diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 19154ba..823f752 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -72,8 +72,8 @@ void SoftRenderer::SetupRenderThread() } -SoftRenderer::SoftRenderer() noexcept - : Renderer3D(false) +SoftRenderer::SoftRenderer(Melon::GPU& gpu) noexcept + : Renderer3D(false), GPU(gpu) { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -105,7 +105,7 @@ void SoftRenderer::Reset() SetupRenderThread(); } -void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings) +void SoftRenderer::SetRenderSettings(const Melon::RenderSettings& settings) noexcept { Threaded = settings.Soft_Threaded; SetupRenderThread(); @@ -387,7 +387,7 @@ bool DepthTest_LessThan_FrontFacing(s32 dstz, s32 z, u32 dstattr) return false; } -u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) +u32 SoftRenderer::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) noexcept { u32 dstalpha = dstcolor >> 24; @@ -398,7 +398,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) u32 srcG = (srccolor >> 8) & 0x3F; u32 srcB = (srccolor >> 16) & 0x3F; - if (RenderDispCnt & (1<<3)) + if (GPU.GPU3D.RenderDispCnt & (1<<3)) { u32 dstR = dstcolor & 0x3F; u32 dstG = (dstcolor >> 8) & 0x3F; @@ -427,7 +427,7 @@ u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 if (blendmode == 2) { - if (RenderDispCnt & (1<<1)) + if (GPU.GPU3D.RenderDispCnt & (1<<1)) { // highlight mode: color is calculated normally // except all vertex color components are set @@ -441,7 +441,7 @@ u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 { // toon mode: vertex color is replaced by toon color - u16 tooncolor = RenderToonTable[vr >> 1]; + u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -449,7 +449,7 @@ u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 } } - if ((RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) + if ((GPU.GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) { u8 tr, tg, tb; @@ -502,9 +502,9 @@ u32 SoftRenderer::RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 a = polyalpha; } - if ((blendmode == 2) && (RenderDispCnt & (1<<1))) + if ((blendmode == 2) && (GPU.GPU3D.RenderDispCnt & (1<<1))) { - u16 tooncolor = RenderToonTable[vr >> 1]; + u16 tooncolor = GPU.GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -748,7 +748,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) std::swap(zl, zr); // CHECKME: edge fill rules for swapped opaque shadow mask polygons - if ((polyalpha < 31) || (RenderDispCnt & (3<<4))) + if ((polyalpha < 31) || (GPU.GPU3D.RenderDispCnt & (3<<4))) { l_filledge = true; r_filledge = true; @@ -776,7 +776,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) rp->SlopeR.EdgeParams<false>(&r_edgelen, &r_edgecov); // CHECKME: edge fill rules for unswapped opaque shadow mask polygons - if ((polyalpha < 31) || (RenderDispCnt & (3<<4))) + if ((polyalpha < 31) || (GPU.GPU3D.RenderDispCnt & (3<<4))) { l_filledge = true; r_filledge = true; @@ -797,7 +797,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) // similarly, we can perform alpha test early (checkme) if (wireframe) polyalpha = 31; - if (polyalpha <= RenderAlphaRef) return; + if (polyalpha <= GPU.GPU3D.RenderAlphaRef) return; // in wireframe mode, there are special rules for equal Z (TODO) @@ -982,7 +982,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) // * the bottom-most pixel of negative x-major slopes are filled if they are next to a flat bottom edge // edges are always filled if antialiasing/edgemarking are enabled or if the pixels are translucent // checkme: do swapped line polygons exist? - if ((polyalpha < 31) || wireframe || (RenderDispCnt & ((1<<4)|(1<<5)))) + if ((polyalpha < 31) || wireframe || (GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5)))) { l_filledge = true; r_filledge = true; @@ -1016,7 +1016,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) // * the bottom-most pixel of negative x-major slopes are filled if they are next to a flat bottom edge // * edges are filled if both sides are identical and fully overlapping // edges are always filled if antialiasing/edgemarking are enabled or if the pixels are translucent - if ((polyalpha < 31) || wireframe || (RenderDispCnt & ((1<<4)|(1<<5)))) + if ((polyalpha < 31) || wireframe || (GPU.GPU3D.RenderDispCnt & ((1<<4)|(1<<5)))) { l_filledge = true; r_filledge = true; @@ -1119,13 +1119,13 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) u8 alpha = color >> 24; // alpha test - if (alpha <= RenderAlphaRef) continue; + if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (RenderDispCnt & (1<<4)) + if (GPU.GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1215,13 +1215,13 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) u8 alpha = color >> 24; // alpha test - if (alpha <= RenderAlphaRef) continue; + if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if ((RenderDispCnt & (1<<4)) && (attr & 0xF)) + if ((GPU.GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF)) { // anti-aliasing: all edges are rendered @@ -1307,13 +1307,13 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y) u8 alpha = color >> 24; // alpha test - if (alpha <= RenderAlphaRef) continue; + if (alpha <= GPU.GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (RenderDispCnt & (1<<4)) + if (GPU.GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1377,7 +1377,7 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; - if (z < RenderFogOffset) + if (z < GPU.GPU3D.RenderFogOffset) { densityid = 0; densityfrac = 0; @@ -1389,8 +1389,8 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) // on hardware, the final value can overflow the 32-bit range with a shift big enough, // causing fog to 'wrap around' and accidentally apply to larger Z ranges - z -= RenderFogOffset; - z = (z >> 2) << RenderFogShift; + z -= GPU.GPU3D.RenderFogOffset; + z = (z >> 2) << GPU.GPU3D.RenderFogShift; densityid = z >> 17; if (densityid >= 32) @@ -1404,8 +1404,8 @@ u32 SoftRenderer::CalculateFogDensity(u32 pixeladdr) // checkme (may be too precise?) u32 density = - ((RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + - (RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; + ((GPU.GPU3D.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + + (GPU.GPU3D.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; if (density >= 127) density = 128; return density; @@ -1417,7 +1417,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) // clearing all polygon fog flags if the master flag isn't set? // merging all final pass loops into one? - if (RenderDispCnt & (1<<5)) + if (GPU.GPU3D.RenderDispCnt & (1<<5)) { // edge marking // only applied to topmost pixels @@ -1437,7 +1437,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) ((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) || ((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth]))) { - u16 edgecolor = RenderEdgeTable[polyid >> 3]; + u16 edgecolor = GPU.GPU3D.RenderEdgeTable[polyid >> 3]; u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++; u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++; u32 edgeB = (edgecolor >> 9) & 0x3E; if (edgeB) edgeB++; @@ -1450,7 +1450,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) } } - if (RenderDispCnt & (1<<7)) + if (GPU.GPU3D.RenderDispCnt & (1<<7)) { // fog @@ -1463,12 +1463,12 @@ void SoftRenderer::ScanlineFinalPass(s32 y) // TODO: check the 'fog alpha glitch with small Z' GBAtek talks about - bool fogcolor = !(RenderDispCnt & (1<<6)); + bool fogcolor = !(GPU.GPU3D.RenderDispCnt & (1<<6)); - u32 fogR = (RenderFogColor << 1) & 0x3E; if (fogR) fogR++; - u32 fogG = (RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; - u32 fogB = (RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; - u32 fogA = (RenderFogColor >> 16) & 0x1F; + u32 fogR = (GPU.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; + u32 fogG = (GPU.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; + u32 fogB = (GPU.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; + u32 fogA = (GPU.GPU3D.RenderFogColor >> 16) & 0x1F; for (int x = 0; x < 256; x++) { @@ -1528,7 +1528,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y) } } - if (RenderDispCnt & (1<<4)) + if (GPU.GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing @@ -1583,8 +1583,8 @@ void SoftRenderer::ScanlineFinalPass(s32 y) void SoftRenderer::ClearBuffers() { - u32 clearz = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - u32 polyid = RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID + u32 clearz = ((GPU.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + u32 polyid = GPU.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID // fill screen borders for edge marking @@ -1614,10 +1614,10 @@ void SoftRenderer::ClearBuffers() // clear the screen - if (RenderDispCnt & (1<<14)) + if (GPU.GPU3D.RenderDispCnt & (1<<14)) { - u8 xoff = (RenderClearAttr2 >> 16) & 0xFF; - u8 yoff = (RenderClearAttr2 >> 24) & 0xFF; + u8 xoff = (GPU.GPU3D.RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (GPU.GPU3D.RenderClearAttr2 >> 24) & 0xFF; for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -1649,13 +1649,13 @@ void SoftRenderer::ClearBuffers() else { // TODO: confirm color conversion - u32 r = (RenderClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (RenderClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (RenderClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (RenderClearAttr1 >> 16) & 0x1F; + u32 r = (GPU.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (GPU.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (GPU.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (GPU.GPU3D.RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (RenderClearAttr1 & 0x8000); + polyid |= (GPU.GPU3D.RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -1698,19 +1698,19 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys) void SoftRenderer::VCount144() { - if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU3D::AbortFrame) + if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU.GPU3D.AbortFrame) Platform::Semaphore_Wait(Sema_RenderDone); } void SoftRenderer::RenderFrame() { - auto textureDirty = GPU::VRAMDirty_Texture.DeriveState(GPU::VRAMMap_Texture); - auto texPalDirty = GPU::VRAMDirty_TexPal.DeriveState(GPU::VRAMMap_TexPal); + auto textureDirty = GPU.VRAMDirty_Texture.DeriveState(GPU.VRAMMap_Texture, GPU); + auto texPalDirty = GPU.VRAMDirty_TexPal.DeriveState(GPU.VRAMMap_TexPal, GPU); - bool textureChanged = GPU::MakeVRAMFlat_TextureCoherent(textureDirty); - bool texPalChanged = GPU::MakeVRAMFlat_TexPalCoherent(texPalDirty); + bool textureChanged = GPU.MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU.MakeVRAMFlat_TexPalCoherent(texPalDirty); - FrameIdentical = !(textureChanged || texPalChanged) && RenderFrameIdentical; + FrameIdentical = !(textureChanged || texPalChanged) && GPU.GPU3D.RenderFrameIdentical; if (RenderThreadRunning.load(std::memory_order_relaxed)) { @@ -1719,7 +1719,7 @@ void SoftRenderer::RenderFrame() else if (!FrameIdentical) { ClearBuffers(); - RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons); + RenderPolygons(false, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons); } } @@ -1743,7 +1743,7 @@ void SoftRenderer::RenderThreadFunc() else { ClearBuffers(); - RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons); + RenderPolygons(true, &GPU.GPU3D.RenderPolygonRAM[0], GPU.GPU3D.RenderNumPolygons); } Platform::Semaphore_Post(Sema_RenderDone); diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index 16257df..b1cfb2f 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -18,6 +18,7 @@ #pragma once +#include "GPU.h" #include "GPU3D.h" #include "Platform.h" #include <thread> @@ -28,11 +29,11 @@ namespace GPU3D class SoftRenderer : public Renderer3D { public: - SoftRenderer() noexcept; + SoftRenderer(Melon::GPU& gpu) noexcept; virtual ~SoftRenderer() override; virtual void Reset() override; - virtual void SetRenderSettings(GPU::RenderSettings& settings) override; + virtual void SetRenderSettings(const Melon::RenderSettings& settings) noexcept override; virtual void VCount144() override; virtual void RenderFrame() override; @@ -429,13 +430,14 @@ private: template <typename T> inline T ReadVRAM_Texture(u32 addr) { - return *(T*)&GPU::VRAMFlat_Texture[addr & 0x7FFFF]; + return *(T*)&GPU.VRAMFlat_Texture[addr & 0x7FFFF]; } template <typename T> inline T ReadVRAM_TexPal(u32 addr) { - return *(T*)&GPU::VRAMFlat_TexPal[addr & 0x1FFFF]; + return *(T*)&GPU.VRAMFlat_TexPal[addr & 0x1FFFF]; } + u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) noexcept; struct RendererPolygon { @@ -449,6 +451,7 @@ private: }; + Melon::GPU& GPU; RendererPolygon PolygonList[2048]; void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha); u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t); diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 32633a7..d0649bb 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -28,12 +28,12 @@ #include "OpenGLSupport.h" #include "GPU_OpenGL_shaders.h" -namespace GPU +namespace Melon { using namespace OpenGL; -std::unique_ptr<GLCompositor> GLCompositor::New() noexcept +std::unique_ptr<GLCompositor> GLCompositor::New(Melon::GPU& gpu) noexcept { assert(glBindAttribLocation != nullptr); @@ -50,10 +50,10 @@ std::unique_ptr<GLCompositor> GLCompositor::New() noexcept // if linking the shaders together failed. return nullptr; - return std::unique_ptr<GLCompositor>(new GLCompositor(CompShader)); + return std::unique_ptr<GLCompositor>(new GLCompositor(CompShader, gpu)); } -GLCompositor::GLCompositor(std::array<GLuint, 3> compShader) noexcept : CompShader(compShader) +GLCompositor::GLCompositor(std::array<GLuint, 3> compShader, Melon::GPU& gpu) noexcept : CompShader(compShader), GPU(gpu) { CompScaleLoc = glGetUniformLocation(CompShader[2], "u3DScale"); Comp3DXPosLoc = glGetUniformLocation(CompShader[2], "u3DXPos"); @@ -144,7 +144,7 @@ void GLCompositor::Reset() } -void GLCompositor::SetRenderSettings(RenderSettings& settings) +void GLCompositor::SetRenderSettings(const RenderSettings& settings) noexcept { int scale = settings.GL_ScaleFactor; @@ -174,7 +174,7 @@ void GLCompositor::Stop() { for (int i = 0; i < 2; i++) { - int frontbuf = GPU::FrontBuffer; + int frontbuf = GPU.FrontBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]); @@ -186,7 +186,7 @@ void GLCompositor::Stop() void GLCompositor::RenderFrame() { - int frontbuf = GPU::FrontBuffer; + int frontbuf = GPU.FrontBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[frontbuf]); @@ -204,21 +204,21 @@ void GLCompositor::RenderFrame() glUniform1ui(CompScaleLoc, Scale); // TODO: support setting this midframe, if ever needed - glUniform1i(Comp3DXPosLoc, ((int)GPU3D::RenderXPos << 23) >> 23); + glUniform1i(Comp3DXPosLoc, ((int)GPU.GPU3D.GetRenderXPos() << 23) >> 23); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); - if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + if (GPU.Framebuffer[frontbuf][0] && GPU.Framebuffer[frontbuf][1]) { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + GL_UNSIGNED_BYTE, GPU.Framebuffer[frontbuf][0]); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + GL_UNSIGNED_BYTE, GPU.Framebuffer[frontbuf][1]); } glActiveTexture(GL_TEXTURE1); - reinterpret_cast<GPU3D::GLRenderer*>(GPU3D::CurrentRenderer.get())->SetupAccelFrame(); + reinterpret_cast<GPU3D::GLRenderer*>(GPU.GPU3D.GetCurrentRenderer())->SetupAccelFrame(); glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); glBindVertexArray(CompVertexArrayID); diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h index 3841df1..3763b6b 100644 --- a/src/GPU_OpenGL.h +++ b/src/GPU_OpenGL.h @@ -23,29 +23,29 @@ #include <array> #include <memory> -namespace GPU +namespace Melon { - +class GPU; struct RenderSettings; class GLCompositor { public: - static std::unique_ptr<GLCompositor> New() noexcept; + static std::unique_ptr<GLCompositor> New(Melon::GPU& gpu) noexcept; GLCompositor(const GLCompositor&) = delete; GLCompositor& operator=(const GLCompositor&) = delete; ~GLCompositor(); void Reset(); - void SetRenderSettings(RenderSettings& settings); + void SetRenderSettings(const RenderSettings& settings) noexcept; void Stop(); void RenderFrame(); void BindOutputTexture(int buf); private: - GLCompositor(std::array<GLuint, 3> CompShader) noexcept; - + GLCompositor(std::array<GLuint, 3> CompShader, Melon::GPU& gpu) noexcept; + Melon::GPU& GPU; int Scale; int ScreenH, ScreenW; diff --git a/src/NDS.cpp b/src/NDS.cpp index 7935929..5290423 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -185,6 +185,7 @@ class RTC* RTC; class Wifi* Wifi; std::unique_ptr<NDSCart::NDSCartSlot> NDSCartSlot; std::unique_ptr<GBACart::GBACartSlot> GBACartSlot; +std::unique_ptr<Melon::GPU> GPU; class AREngine* AREngine; bool Running; @@ -204,8 +205,9 @@ bool Init() RegisterEventFunc(Event_Div, 0, DivDone); RegisterEventFunc(Event_Sqrt, 0, SqrtDone); - ARM9 = new ARMv5(); - ARM7 = new ARMv4(); + GPU = std::make_unique<Melon::GPU>(); + ARM9 = new ARMv5(*GPU); + ARM7 = new ARMv4(*GPU); #ifdef JIT_ENABLED ARMJIT::Init(); @@ -215,14 +217,14 @@ bool Init() SharedWRAM = new u8[SharedWRAMSize]; #endif - DMAs[0] = new DMA(0, 0); - DMAs[1] = new DMA(0, 1); - DMAs[2] = new DMA(0, 2); - DMAs[3] = new DMA(0, 3); - DMAs[4] = new DMA(1, 0); - DMAs[5] = new DMA(1, 1); - DMAs[6] = new DMA(1, 2); - DMAs[7] = new DMA(1, 3); + DMAs[0] = new DMA(0, 0, *GPU); + DMAs[1] = new DMA(0, 1, *GPU); + DMAs[2] = new DMA(0, 2, *GPU); + DMAs[3] = new DMA(0, 3, *GPU); + DMAs[4] = new DMA(1, 0, *GPU); + DMAs[5] = new DMA(1, 1, *GPU); + DMAs[6] = new DMA(1, 2, *GPU); + DMAs[7] = new DMA(1, 3, *GPU); SPU = new class SPU; SPI = new class SPIHost(); @@ -230,7 +232,6 @@ bool Init() Wifi = new class Wifi(); NDSCartSlot = std::make_unique<NDSCart::NDSCartSlot>(); GBACartSlot = std::make_unique<GBACart::GBACartSlot>(); - if (!GPU::Init()) return false; if (!DSi::Init()) return false; @@ -261,7 +262,7 @@ void DeInit() NDSCartSlot = nullptr; GBACartSlot = nullptr; - GPU::DeInit(); + GPU = nullptr; DSi::DeInit(); @@ -521,7 +522,7 @@ void SetupDirectBoot(const std::string& romname) PostFlag7 = 0x01; PowerControl9 = 0x820F; - GPU::SetPowerCnt(PowerControl9); + GPU->SetPowerCnt(PowerControl9); // checkme RCnt = 0x8000; @@ -644,9 +645,9 @@ void Reset() KeyCnt[1] = 0; RCnt = 0; + GPU->Reset(); NDSCartSlot->Reset(); GBACartSlot->Reset(); - GPU::Reset(); SPU->Reset(); SPI->Reset(); RTC->Reset(); @@ -720,7 +721,7 @@ void Stop(Platform::StopReason reason) Log(level, "Stopping emulated console (Reason: %s)\n", StopReasonName(reason)); Running = false; Platform::SignalStop(reason); - GPU::Stop(); + GPU->Stop(); SPU->Stop(); if (ConsoleType == 1) @@ -848,7 +849,7 @@ bool DoSavestate(Savestate* file) NDSCartSlot->DoSavestate(file); if (ConsoleType == 0) GBACartSlot->DoSavestate(file); - GPU::DoSavestate(file); + GPU->DoSavestate(file); SPU->DoSavestate(file); SPI->DoSavestate(file); RTC->DoSavestate(file); @@ -859,7 +860,7 @@ bool DoSavestate(Savestate* file) if (!file->Saving) { - GPU::SetPowerCnt(PowerControl9); + GPU->SetPowerCnt(PowerControl9); SPU->SetPowerCnt(PowerControl7 & 0x0001); Wifi->SetPowerCnt(PowerControl7 & 0x0002); @@ -1071,7 +1072,7 @@ u32 RunFrame() { FrameStartTimestamp = SysTimestamp; - GPU::TotalScanlines = 0; + GPU->TotalScanlines = 0; LagFrameFlag = true; bool runFrame = Running && !(CPUStop & CPUStop_Sleep); @@ -1095,7 +1096,7 @@ u32 RunFrame() ARM7Timestamp = target; TimerTimestamp[0] = target; TimerTimestamp[1] = target; - GPU3D::Timestamp = target; + GPU->GPU3D.Timestamp = target; RunSystemSleep(target); if (!(CPUStop & CPUStop_Sleep)) @@ -1103,7 +1104,7 @@ u32 RunFrame() } if (SysTimestamp >= frametarget) - GPU::BlankFrame(); + GPU->BlankFrame(); } else { @@ -1112,11 +1113,11 @@ u32 RunFrame() if (!(CPUStop & CPUStop_Wakeup)) { - GPU::StartFrame(); + GPU->StartFrame(); } CPUStop &= ~CPUStop_Wakeup; - while (Running && GPU::TotalScanlines==0) + while (Running && GPU->TotalScanlines==0) { u64 target = NextTarget(); ARM9Target = target << ARM9ClockShift; @@ -1125,7 +1126,7 @@ u32 RunFrame() if (CPUStop & CPUStop_GXStall) { // GXFIFO stall - s32 cycles = GPU3D::CyclesToRunFor(); + s32 cycles = GPU->GPU3D.CyclesToRunFor(); ARM9Timestamp = std::min(ARM9Target, ARM9Timestamp+(cycles<<ARM9ClockShift)); } @@ -1148,7 +1149,7 @@ u32 RunFrame() } RunTimers(0); - GPU3D::Run(); + GPU->GPU3D.Run(); target = ARM9Timestamp >> ARM9ClockShift; CurCPU = 1; @@ -1187,7 +1188,7 @@ u32 RunFrame() } } - if (GPU::TotalScanlines == 0) + if (GPU->TotalScanlines == 0) continue; #ifdef DEBUG_CHECK_DESYNC @@ -1208,7 +1209,7 @@ u32 RunFrame() NumLagFrames++; if (Running) - return GPU::TotalScanlines; + return GPU->TotalScanlines; else return 263; } @@ -1531,7 +1532,7 @@ void SetIRQ(u32 cpu, u32 irq) { CPUStop &= ~CPUStop_Sleep; CPUStop |= CPUStop_Wakeup; - GPU3D::RestartFrame(); + GPU->GPU3D.RestartFrame(); } } } @@ -1708,7 +1709,7 @@ void NocashPrint(u32 ncpu, u32 addr) else if (!strcmp(cmd, "lr")) sprintf(subs, "%08X", cpu->R[14]); else if (!strcmp(cmd, "pc")) sprintf(subs, "%08X", cpu->R[15]); else if (!strcmp(cmd, "frame")) sprintf(subs, "%u", NumFrames); - else if (!strcmp(cmd, "scanline")) sprintf(subs, "%u", GPU::VCount); + else if (!strcmp(cmd, "scanline")) sprintf(subs, "%u", GPU->VCount); else if (!strcmp(cmd, "totalclks")) sprintf(subs, "%" PRIu64, GetSysClockCycles(0)); else if (!strcmp(cmd, "lastclks")) sprintf(subs, "%" PRIu64, GetSysClockCycles(1)); else if (!strcmp(cmd, "zeroclks")) @@ -2081,7 +2082,7 @@ void debug(u32 param) Log(LogLevel::Debug, "ARM7 IME=%08X IE=%08X IF=%08X IE2=%04X IF2=%04X\n", IME[1], IE[1], IF[1], IE2, IF2); //for (int i = 0; i < 9; i++) - // printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); + // printf("VRAM %c: %02X\n", 'A'+i, GPU->VRAMCNT[i]); FILE* shit = fopen("debug/DSfirmware.bin", "wb"); @@ -2149,21 +2150,21 @@ u8 ARM9Read8(u32 addr) case 0x05000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadPalette<u8>(addr); + return GPU->ReadPalette<u8>(addr); case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU::ReadVRAM_ABG<u8>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u8>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u8>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u8>(addr); - default: return GPU::ReadVRAM_LCDC<u8>(addr); + case 0x00000000: return GPU->ReadVRAM_ABG<u8>(addr); + case 0x00200000: return GPU->ReadVRAM_BBG<u8>(addr); + case 0x00400000: return GPU->ReadVRAM_AOBJ<u8>(addr); + case 0x00600000: return GPU->ReadVRAM_BOBJ<u8>(addr); + default: return GPU->ReadVRAM_LCDC<u8>(addr); } case 0x07000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadOAM<u8>(addr); + return GPU->ReadOAM<u8>(addr); case 0x08000000: case 0x09000000: @@ -2211,21 +2212,21 @@ u16 ARM9Read16(u32 addr) case 0x05000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadPalette<u16>(addr); + return GPU->ReadPalette<u16>(addr); case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU::ReadVRAM_ABG<u16>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u16>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u16>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u16>(addr); - default: return GPU::ReadVRAM_LCDC<u16>(addr); + case 0x00000000: return GPU->ReadVRAM_ABG<u16>(addr); + case 0x00200000: return GPU->ReadVRAM_BBG<u16>(addr); + case 0x00400000: return GPU->ReadVRAM_AOBJ<u16>(addr); + case 0x00600000: return GPU->ReadVRAM_BOBJ<u16>(addr); + default: return GPU->ReadVRAM_LCDC<u16>(addr); } case 0x07000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadOAM<u16>(addr); + return GPU->ReadOAM<u16>(addr); case 0x08000000: case 0x09000000: @@ -2273,21 +2274,21 @@ u32 ARM9Read32(u32 addr) case 0x05000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadPalette<u32>(addr); + return GPU->ReadPalette<u32>(addr); case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr); - case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr); - default: return GPU::ReadVRAM_LCDC<u32>(addr); + case 0x00000000: return GPU->ReadVRAM_ABG<u32>(addr); + case 0x00200000: return GPU->ReadVRAM_BBG<u32>(addr); + case 0x00400000: return GPU->ReadVRAM_AOBJ<u32>(addr); + case 0x00600000: return GPU->ReadVRAM_BOBJ<u32>(addr); + default: return GPU->ReadVRAM_LCDC<u32>(addr); } case 0x07000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return 0; - return GPU::ReadOAM<u32>(addr & 0x7FF); + return GPU->ReadOAM<u32>(addr & 0x7FF); case 0x08000000: case 0x09000000: @@ -2382,7 +2383,7 @@ void ARM9Write16(u32 addr, u16 val) case 0x05000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return; - GPU::WritePalette<u16>(addr, val); + GPU->WritePalette<u16>(addr, val); return; case 0x06000000: @@ -2391,16 +2392,16 @@ void ARM9Write16(u32 addr, u16 val) #endif switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return; - default: GPU::WriteVRAM_LCDC<u16>(addr, val); return; + case 0x00000000: GPU->WriteVRAM_ABG<u16>(addr, val); return; + case 0x00200000: GPU->WriteVRAM_BBG<u16>(addr, val); return; + case 0x00400000: GPU->WriteVRAM_AOBJ<u16>(addr, val); return; + case 0x00600000: GPU->WriteVRAM_BOBJ<u16>(addr, val); return; + default: GPU->WriteVRAM_LCDC<u16>(addr, val); return; } case 0x07000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return; - GPU::WriteOAM<u16>(addr, val); + GPU->WriteOAM<u16>(addr, val); return; case 0x08000000: @@ -2450,7 +2451,7 @@ void ARM9Write32(u32 addr, u32 val) case 0x05000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return; - GPU::WritePalette(addr, val); + GPU->WritePalette(addr, val); return; case 0x06000000: @@ -2459,16 +2460,16 @@ void ARM9Write32(u32 addr, u32 val) #endif switch (addr & 0x00E00000) { - case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return; - default: GPU::WriteVRAM_LCDC<u32>(addr, val); return; + case 0x00000000: GPU->WriteVRAM_ABG<u32>(addr, val); return; + case 0x00200000: GPU->WriteVRAM_BBG<u32>(addr, val); return; + case 0x00400000: GPU->WriteVRAM_AOBJ<u32>(addr, val); return; + case 0x00600000: GPU->WriteVRAM_BOBJ<u32>(addr, val); return; + default: GPU->WriteVRAM_LCDC<u32>(addr, val); return; } case 0x07000000: if (!(PowerControl9 & ((addr & 0x400) ? (1<<9) : (1<<1)))) return; - GPU::WriteOAM<u32>(addr, val); + GPU->WriteOAM<u32>(addr, val); return; case 0x08000000: @@ -2570,7 +2571,7 @@ u8 ARM7Read8(u32 addr) case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u8>(addr); + return GPU->ReadVRAM_ARM7<u8>(addr); case 0x08000000: case 0x08800000: @@ -2638,7 +2639,7 @@ u16 ARM7Read16(u32 addr) case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u16>(addr); + return GPU->ReadVRAM_ARM7<u16>(addr); case 0x08000000: case 0x08800000: @@ -2706,7 +2707,7 @@ u32 ARM7Read32(u32 addr) case 0x06000000: case 0x06800000: - return GPU::ReadVRAM_ARM7<u32>(addr); + return GPU->ReadVRAM_ARM7<u32>(addr); case 0x08000000: case 0x08800000: @@ -2777,7 +2778,7 @@ void ARM7Write8(u32 addr, u8 val) #ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); #endif - GPU::WriteVRAM_ARM7<u8>(addr, val); + GPU->WriteVRAM_ARM7<u8>(addr, val); return; case 0x08000000: @@ -2856,7 +2857,7 @@ void ARM7Write16(u32 addr, u16 val) #ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); #endif - GPU::WriteVRAM_ARM7<u16>(addr, val); + GPU->WriteVRAM_ARM7<u16>(addr, val); return; case 0x08000000: @@ -2938,7 +2939,7 @@ void ARM7Write32(u32 addr, u32 val) #ifdef JIT_ENABLED ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr); #endif - GPU::WriteVRAM_ARM7<u32>(addr, val); + GPU->WriteVRAM_ARM7<u32>(addr, val); return; case 0x08000000: @@ -3073,16 +3074,16 @@ u8 ARM9IORead8(u32 addr) case 0x04000208: return IME[0]; - case 0x04000240: return GPU::VRAMCNT[0]; - case 0x04000241: return GPU::VRAMCNT[1]; - case 0x04000242: return GPU::VRAMCNT[2]; - case 0x04000243: return GPU::VRAMCNT[3]; - case 0x04000244: return GPU::VRAMCNT[4]; - case 0x04000245: return GPU::VRAMCNT[5]; - case 0x04000246: return GPU::VRAMCNT[6]; + case 0x04000240: return GPU->VRAMCNT[0]; + case 0x04000241: return GPU->VRAMCNT[1]; + case 0x04000242: return GPU->VRAMCNT[2]; + case 0x04000243: return GPU->VRAMCNT[3]; + case 0x04000244: return GPU->VRAMCNT[4]; + case 0x04000245: return GPU->VRAMCNT[5]; + case 0x04000246: return GPU->VRAMCNT[6]; case 0x04000247: return WRAMCnt; - case 0x04000248: return GPU::VRAMCNT[7]; - case 0x04000249: return GPU::VRAMCNT[8]; + case 0x04000248: return GPU->VRAMCNT[7]; + case 0x04000249: return GPU->VRAMCNT[8]; CASE_READ8_16BIT(0x04000280, DivCnt) CASE_READ8_32BIT(0x04000290, DivNumerator[0]) @@ -3104,15 +3105,15 @@ u8 ARM9IORead8(u32 addr) if (addr >= 0x04000000 && addr < 0x04000060) { - return GPU::GPU2D_A.Read8(addr); + return GPU->GPU2D_A.Read8(addr); } if (addr >= 0x04001000 && addr < 0x04001060) { - return GPU::GPU2D_B.Read8(addr); + return GPU->GPU2D_B.Read8(addr); } if (addr >= 0x04000320 && addr < 0x040006A4) { - return GPU3D::Read8(addr); + return GPU->GPU3D.Read8(addr); } // NO$GBA debug register "Emulation ID" if(addr >= 0x04FFFA00 && addr < 0x04FFFA10) @@ -3132,12 +3133,12 @@ u16 ARM9IORead16(u32 addr) { switch (addr) { - case 0x04000004: return GPU::DispStat[0]; - case 0x04000006: return GPU::VCount; + case 0x04000004: return GPU->DispStat[0]; + case 0x04000006: return GPU->VCount; - case 0x04000060: return GPU3D::Read16(addr); + case 0x04000060: return GPU->GPU3D.Read16(addr); case 0x04000064: - case 0x04000066: return GPU::GPU2D_A.Read16(addr); + case 0x04000066: return GPU->GPU2D_A.Read16(addr); case 0x040000B8: return DMAs[0]->Cnt & 0xFFFF; case 0x040000BA: return DMAs[0]->Cnt >> 16; @@ -3215,11 +3216,11 @@ u16 ARM9IORead16(u32 addr) case 0x04000210: return IE[0] & 0xFFFF; case 0x04000212: return IE[0] >> 16; - case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8); - case 0x04000242: return GPU::VRAMCNT[2] | (GPU::VRAMCNT[3] << 8); - case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8); - case 0x04000246: return GPU::VRAMCNT[6] | (WRAMCnt << 8); - case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8); + case 0x04000240: return GPU->VRAMCNT[0] | (GPU->VRAMCNT[1] << 8); + case 0x04000242: return GPU->VRAMCNT[2] | (GPU->VRAMCNT[3] << 8); + case 0x04000244: return GPU->VRAMCNT[4] | (GPU->VRAMCNT[5] << 8); + case 0x04000246: return GPU->VRAMCNT[6] | (WRAMCnt << 8); + case 0x04000248: return GPU->VRAMCNT[7] | (GPU->VRAMCNT[8] << 8); case 0x04000280: return DivCnt; case 0x04000290: return DivNumerator[0] & 0xFFFF; @@ -3259,15 +3260,15 @@ u16 ARM9IORead16(u32 addr) if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C)) { - return GPU::GPU2D_A.Read16(addr); + return GPU->GPU2D_A.Read16(addr); } if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C)) { - return GPU::GPU2D_B.Read16(addr); + return GPU->GPU2D_B.Read16(addr); } if (addr >= 0x04000320 && addr < 0x040006A4) { - return GPU3D::Read16(addr); + return GPU->GPU3D.Read16(addr); } if ((addr & 0xFFFFF000) != 0x04004000) @@ -3279,10 +3280,10 @@ u32 ARM9IORead32(u32 addr) { switch (addr) { - case 0x04000004: return GPU::DispStat[0] | (GPU::VCount << 16); + case 0x04000004: return GPU->DispStat[0] | (GPU->VCount << 16); - case 0x04000060: return GPU3D::Read32(addr); - case 0x04000064: return GPU::GPU2D_A.Read32(addr); + case 0x04000060: return GPU->GPU3D.Read32(addr); + case 0x04000064: return GPU->GPU2D_A.Read32(addr); case 0x040000B0: return DMAs[0]->SrcAddr; case 0x040000B4: return DMAs[0]->DstAddr; @@ -3342,9 +3343,9 @@ u32 ARM9IORead32(u32 addr) case 0x04000210: return IE[0]; case 0x04000214: return IF[0]; - case 0x04000240: return GPU::VRAMCNT[0] | (GPU::VRAMCNT[1] << 8) | (GPU::VRAMCNT[2] << 16) | (GPU::VRAMCNT[3] << 24); - case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24); - case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8); + case 0x04000240: return GPU->VRAMCNT[0] | (GPU->VRAMCNT[1] << 8) | (GPU->VRAMCNT[2] << 16) | (GPU->VRAMCNT[3] << 24); + case 0x04000244: return GPU->VRAMCNT[4] | (GPU->VRAMCNT[5] << 8) | (GPU->VRAMCNT[6] << 16) | (WRAMCnt << 24); + case 0x04000248: return GPU->VRAMCNT[7] | (GPU->VRAMCNT[8] << 8); case 0x04000280: return DivCnt; case 0x04000290: return DivNumerator[0]; @@ -3403,15 +3404,15 @@ u32 ARM9IORead32(u32 addr) if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C)) { - return GPU::GPU2D_A.Read32(addr); + return GPU->GPU2D_A.Read32(addr); } if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C)) { - return GPU::GPU2D_B.Read32(addr); + return GPU->GPU2D_B.Read32(addr); } if (addr >= 0x04000320 && addr < 0x040006A4) { - return GPU3D::Read32(addr); + return GPU->GPU3D.Read32(addr); } if ((addr & 0xFFFFF000) != 0x04004000) @@ -3424,9 +3425,9 @@ void ARM9IOWrite8(u32 addr, u8 val) switch (addr) { case 0x0400006C: - case 0x0400006D: GPU::GPU2D_A.Write8(addr, val); return; + case 0x0400006D: GPU->GPU2D_A.Write8(addr, val); return; case 0x0400106C: - case 0x0400106D: GPU::GPU2D_B.Write8(addr, val); return; + case 0x0400106D: GPU->GPU2D_B.Write8(addr, val); return; case 0x04000132: KeyCnt[0] = (KeyCnt[0] & 0xFF00) | val; @@ -3463,16 +3464,16 @@ void ARM9IOWrite8(u32 addr, u8 val) case 0x04000208: IME[0] = val & 0x1; UpdateIRQ(0); return; - case 0x04000240: GPU::MapVRAM_AB(0, val); return; - case 0x04000241: GPU::MapVRAM_AB(1, val); return; - case 0x04000242: GPU::MapVRAM_CD(2, val); return; - case 0x04000243: GPU::MapVRAM_CD(3, val); return; - case 0x04000244: GPU::MapVRAM_E(4, val); return; - case 0x04000245: GPU::MapVRAM_FG(5, val); return; - case 0x04000246: GPU::MapVRAM_FG(6, val); return; + case 0x04000240: GPU->MapVRAM_AB(0, val); return; + case 0x04000241: GPU->MapVRAM_AB(1, val); return; + case 0x04000242: GPU->MapVRAM_CD(2, val); return; + case 0x04000243: GPU->MapVRAM_CD(3, val); return; + case 0x04000244: GPU->MapVRAM_E(4, val); return; + case 0x04000245: GPU->MapVRAM_FG(5, val); return; + case 0x04000246: GPU->MapVRAM_FG(6, val); return; case 0x04000247: MapSharedWRAM(val); return; - case 0x04000248: GPU::MapVRAM_H(7, val); return; - case 0x04000249: GPU::MapVRAM_I(8, val); return; + case 0x04000248: GPU->MapVRAM_H(7, val); return; + case 0x04000249: GPU->MapVRAM_I(8, val); return; case 0x04000300: if (PostFlag9 & 0x01) val |= 0x01; @@ -3482,17 +3483,17 @@ void ARM9IOWrite8(u32 addr, u8 val) if (addr >= 0x04000000 && addr < 0x04000060) { - GPU::GPU2D_A.Write8(addr, val); + GPU->GPU2D_A.Write8(addr, val); return; } if (addr >= 0x04001000 && addr < 0x04001060) { - GPU::GPU2D_B.Write8(addr, val); + GPU->GPU2D_B.Write8(addr, val); return; } if (addr >= 0x04000320 && addr < 0x040006A4) { - GPU3D::Write8(addr, val); + GPU->GPU3D.Write8(addr, val); return; } @@ -3503,16 +3504,16 @@ void ARM9IOWrite16(u32 addr, u16 val) { switch (addr) { - case 0x04000004: GPU::SetDispStat(0, val); return; - case 0x04000006: GPU::SetVCount(val); return; + case 0x04000004: GPU->SetDispStat(0, val); return; + case 0x04000006: GPU->SetVCount(val); return; - case 0x04000060: GPU3D::Write16(addr, val); return; + case 0x04000060: GPU->GPU3D.Write16(addr, val); return; case 0x04000068: - case 0x0400006A: GPU::GPU2D_A.Write16(addr, val); return; + case 0x0400006A: GPU->GPU2D_A.Write16(addr, val); return; - case 0x0400006C: GPU::GPU2D_A.Write16(addr, val); return; - case 0x0400106C: GPU::GPU2D_B.Write16(addr, val); return; + case 0x0400006C: GPU->GPU2D_A.Write16(addr, val); return; + case 0x0400106C: GPU->GPU2D_B.Write16(addr, val); return; case 0x040000B8: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0xFFFF0000) | val); return; case 0x040000BA: DMAs[0]->WriteCnt((DMAs[0]->Cnt & 0x0000FFFF) | (val << 16)); return; @@ -3629,24 +3630,24 @@ void ARM9IOWrite16(u32 addr, u16 val) // TODO: what happens when writing to IF this way?? case 0x04000240: - GPU::MapVRAM_AB(0, val & 0xFF); - GPU::MapVRAM_AB(1, val >> 8); + GPU->MapVRAM_AB(0, val & 0xFF); + GPU->MapVRAM_AB(1, val >> 8); return; case 0x04000242: - GPU::MapVRAM_CD(2, val & 0xFF); - GPU::MapVRAM_CD(3, val >> 8); + GPU->MapVRAM_CD(2, val & 0xFF); + GPU->MapVRAM_CD(3, val >> 8); return; case 0x04000244: - GPU::MapVRAM_E(4, val & 0xFF); - GPU::MapVRAM_FG(5, val >> 8); + GPU->MapVRAM_E(4, val & 0xFF); + GPU->MapVRAM_FG(5, val >> 8); return; case 0x04000246: - GPU::MapVRAM_FG(6, val & 0xFF); + GPU->MapVRAM_FG(6, val & 0xFF); MapSharedWRAM(val >> 8); return; case 0x04000248: - GPU::MapVRAM_H(7, val & 0xFF); - GPU::MapVRAM_I(8, val >> 8); + GPU->MapVRAM_H(7, val & 0xFF); + GPU->MapVRAM_I(8, val >> 8); return; case 0x04000280: DivCnt = val; StartDiv(); return; @@ -3660,23 +3661,23 @@ void ARM9IOWrite16(u32 addr, u16 val) case 0x04000304: PowerControl9 = val & 0x820F; - GPU::SetPowerCnt(PowerControl9); + GPU->SetPowerCnt(PowerControl9); return; } if (addr >= 0x04000000 && addr < 0x04000060) { - GPU::GPU2D_A.Write16(addr, val); + GPU->GPU2D_A.Write16(addr, val); return; } if (addr >= 0x04001000 && addr < 0x04001060) { - GPU::GPU2D_B.Write16(addr, val); + GPU->GPU2D_B.Write16(addr, val); return; } if (addr >= 0x04000320 && addr < 0x040006A4) { - GPU3D::Write16(addr, val); + GPU->GPU3D.Write16(addr, val); return; } @@ -3688,16 +3689,16 @@ void ARM9IOWrite32(u32 addr, u32 val) switch (addr) { case 0x04000004: - GPU::SetDispStat(0, val & 0xFFFF); - GPU::SetVCount(val >> 16); + GPU->SetDispStat(0, val & 0xFFFF); + GPU->SetVCount(val >> 16); return; - case 0x04000060: GPU3D::Write32(addr, val); return; + case 0x04000060: GPU->GPU3D.Write32(addr, val); return; case 0x04000064: - case 0x04000068: GPU::GPU2D_A.Write32(addr, val); return; + case 0x04000068: GPU->GPU2D_A.Write32(addr, val); return; - case 0x0400006C: GPU::GPU2D_A.Write16(addr, val&0xFFFF); return; - case 0x0400106C: GPU::GPU2D_B.Write16(addr, val&0xFFFF); return; + case 0x0400006C: GPU->GPU2D_A.Write16(addr, val&0xFFFF); return; + case 0x0400106C: GPU->GPU2D_B.Write16(addr, val&0xFFFF); return; case 0x040000B0: DMAs[0]->SrcAddr = val; return; case 0x040000B4: DMAs[0]->DstAddr = val; return; @@ -3793,23 +3794,23 @@ void ARM9IOWrite32(u32 addr, u32 val) case 0x04000208: IME[0] = val & 0x1; UpdateIRQ(0); return; case 0x04000210: IE[0] = val; UpdateIRQ(0); return; - case 0x04000214: IF[0] &= ~val; GPU3D::CheckFIFOIRQ(); UpdateIRQ(0); return; + case 0x04000214: IF[0] &= ~val; GPU->GPU3D.CheckFIFOIRQ(); UpdateIRQ(0); return; case 0x04000240: - GPU::MapVRAM_AB(0, val & 0xFF); - GPU::MapVRAM_AB(1, (val >> 8) & 0xFF); - GPU::MapVRAM_CD(2, (val >> 16) & 0xFF); - GPU::MapVRAM_CD(3, val >> 24); + GPU->MapVRAM_AB(0, val & 0xFF); + GPU->MapVRAM_AB(1, (val >> 8) & 0xFF); + GPU->MapVRAM_CD(2, (val >> 16) & 0xFF); + GPU->MapVRAM_CD(3, val >> 24); return; case 0x04000244: - GPU::MapVRAM_E(4, val & 0xFF); - GPU::MapVRAM_FG(5, (val >> 8) & 0xFF); - GPU::MapVRAM_FG(6, (val >> 16) & 0xFF); + GPU->MapVRAM_E(4, val & 0xFF); + GPU->MapVRAM_FG(5, (val >> 8) & 0xFF); + GPU->MapVRAM_FG(6, (val >> 16) & 0xFF); MapSharedWRAM(val >> 24); return; case 0x04000248: - GPU::MapVRAM_H(7, val & 0xFF); - GPU::MapVRAM_I(8, (val >> 8) & 0xFF); + GPU->MapVRAM_H(7, val & 0xFF); + GPU->MapVRAM_I(8, (val >> 8) & 0xFF); return; case 0x04000280: DivCnt = val; StartDiv(); return; @@ -3826,7 +3827,7 @@ void ARM9IOWrite32(u32 addr, u32 val) case 0x04000304: PowerControl9 = val & 0x820F; - GPU::SetPowerCnt(PowerControl9); + GPU->SetPowerCnt(PowerControl9); return; case 0x04100010: @@ -3864,17 +3865,17 @@ void ARM9IOWrite32(u32 addr, u32 val) if (addr >= 0x04000000 && addr < 0x04000060) { - GPU::GPU2D_A.Write32(addr, val); + GPU->GPU2D_A.Write32(addr, val); return; } if (addr >= 0x04001000 && addr < 0x04001060) { - GPU::GPU2D_B.Write32(addr, val); + GPU->GPU2D_B.Write32(addr, val); return; } if (addr >= 0x04000320 && addr < 0x040006A4) { - GPU3D::Write32(addr, val); + GPU->GPU3D.Write32(addr, val); return; } @@ -3939,7 +3940,7 @@ u8 ARM7IORead8(u32 addr) case 0x04000208: return IME[1]; - case 0x04000240: return GPU::VRAMSTAT; + case 0x04000240: return GPU->VRAMSTAT; case 0x04000241: return WRAMCnt; case 0x04000300: return PostFlag7; @@ -3960,8 +3961,8 @@ u16 ARM7IORead16(u32 addr) { switch (addr) { - case 0x04000004: return GPU::DispStat[1]; - case 0x04000006: return GPU::VCount; + case 0x04000004: return GPU->DispStat[1]; + case 0x04000006: return GPU->VCount; case 0x040000B8: return DMAs[4]->Cnt & 0xFFFF; case 0x040000BA: return DMAs[4]->Cnt >> 16; @@ -4054,7 +4055,7 @@ u32 ARM7IORead32(u32 addr) { switch (addr) { - case 0x04000004: return GPU::DispStat[1] | (GPU::VCount << 16); + case 0x04000004: return GPU->DispStat[1] | (GPU->VCount << 16); case 0x040000B0: return DMAs[4]->SrcAddr; case 0x040000B4: return DMAs[4]->DstAddr; @@ -4232,8 +4233,8 @@ void ARM7IOWrite16(u32 addr, u16 val) { switch (addr) { - case 0x04000004: GPU::SetDispStat(1, val); return; - case 0x04000006: GPU::SetVCount(val); return; + case 0x04000004: GPU->SetDispStat(1, val); return; + case 0x04000006: GPU->SetVCount(val); return; case 0x040000B8: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0xFFFF0000) | val); return; case 0x040000BA: DMAs[4]->WriteCnt((DMAs[4]->Cnt & 0x0000FFFF) | (val << 16)); return; @@ -4388,8 +4389,8 @@ void ARM7IOWrite32(u32 addr, u32 val) switch (addr) { case 0x04000004: - GPU::SetDispStat(1, val & 0xFFFF); - GPU::SetVCount(val >> 16); + GPU->SetDispStat(1, val & 0xFFFF); + GPU->SetVCount(val >> 16); return; case 0x040000B0: DMAs[4]->SrcAddr = val; return; @@ -19,6 +19,7 @@ #ifndef NDS_H #define NDS_H +#include <memory> #include <string> #include <memory> #include <functional> @@ -40,6 +41,11 @@ class Wifi; class AREngine; +namespace Melon +{ +class GPU; +} + namespace NDS { @@ -262,6 +268,7 @@ extern class RTC* RTC; extern class Wifi* Wifi; extern std::unique_ptr<NDSCart::NDSCartSlot> NDSCartSlot; extern std::unique_ptr<GBACart::GBACartSlot> GBACartSlot; +extern std::unique_ptr<Melon::GPU> GPU; extern class AREngine* AREngine; const u32 ARM7WRAMSize = 0x10000; diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp index a4e56d4..b820b50 100644 --- a/src/frontend/qt_sdl/main.cpp +++ b/src/frontend/qt_sdl/main.cpp @@ -163,7 +163,7 @@ EmuThread* emuThread; int autoScreenSizing = 0; int videoRenderer; -GPU::RenderSettings videoSettings; +Melon::RenderSettings videoSettings; bool videoSettingsDirty; CameraManager* camManager[2]; @@ -340,8 +340,8 @@ void EmuThread::run() videoRenderer = 0; } - GPU::InitRenderer(videoRenderer); - GPU::SetRenderSettings(videoRenderer, videoSettings); + NDS::GPU->InitRenderer(videoRenderer); + NDS::GPU->SetRenderSettings(videoRenderer, videoSettings); NDS::SPU->SetInterpolation(Config::AudioInterp); @@ -472,7 +472,7 @@ void EmuThread::run() videoSettings.GL_ScaleFactor = Config::GL_ScaleFactor; videoSettings.GL_BetterPolygons = Config::GL_BetterPolygons; - GPU::SetRenderSettings(videoRenderer, videoSettings); + NDS::GPU->SetRenderSettings(videoRenderer, videoSettings); } // process input and hotkeys @@ -534,12 +534,12 @@ void EmuThread::run() if (!oglContext) { FrontBufferLock.lock(); - FrontBuffer = GPU::FrontBuffer; + FrontBuffer = NDS::GPU->FrontBuffer; FrontBufferLock.unlock(); } else { - FrontBuffer = GPU::FrontBuffer; + FrontBuffer = NDS::GPU->FrontBuffer; drawScreenGL(); } @@ -676,7 +676,7 @@ void EmuThread::run() EmuStatus = emuStatus_Exit; - GPU::DeInitRenderer(); + NDS::GPU->DeInitRenderer(); NDS::DeInit(); //Platform::LAN_DeInit(); } @@ -780,10 +780,10 @@ void EmuThread::drawScreenGL() glActiveTexture(GL_TEXTURE0); #ifdef OGLRENDERER_ENABLED - if (GPU::Renderer != 0) + if (NDS::GPU->Renderer != 0) { // hardware-accelerated render - GPU::CurGLCompositor->BindOutputTexture(frontbuf); + NDS::GPU->CurGLCompositor->BindOutputTexture(frontbuf); } else #endif @@ -791,12 +791,12 @@ void EmuThread::drawScreenGL() // regular render glBindTexture(GL_TEXTURE_2D, screenTexture); - if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + if (NDS::GPU->Framebuffer[frontbuf][0] && NDS::GPU->Framebuffer[frontbuf][1]) { glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + GL_UNSIGNED_BYTE, NDS::GPU->Framebuffer[frontbuf][0]); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + GL_UNSIGNED_BYTE, NDS::GPU->Framebuffer[frontbuf][1]); } } @@ -1082,14 +1082,14 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event) { emuThread->FrontBufferLock.lock(); int frontbuf = emuThread->FrontBuffer; - if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1]) + if (!NDS::GPU->Framebuffer[frontbuf][0] || !NDS::GPU->Framebuffer[frontbuf][1]) { emuThread->FrontBufferLock.unlock(); return; } - memcpy(screen[0].scanLine(0), GPU::Framebuffer[frontbuf][0], 256 * 192 * 4); - memcpy(screen[1].scanLine(0), GPU::Framebuffer[frontbuf][1], 256 * 192 * 4); + memcpy(screen[0].scanLine(0), NDS::GPU->Framebuffer[frontbuf][0], 256 * 192 * 4); + memcpy(screen[1].scanLine(0), NDS::GPU->Framebuffer[frontbuf][1], 256 * 192 * 4); emuThread->FrontBufferLock.unlock(); QRect screenrc(0, 0, 256, 192); |