diff options
author | Arisotura <thetotalworm@gmail.com> | 2019-05-25 20:42:27 +0200 |
---|---|---|
committer | Arisotura <thetotalworm@gmail.com> | 2019-05-25 20:42:27 +0200 |
commit | 94f5ecb64714c3a4026bebe4f81a99ca4dba0362 (patch) | |
tree | cfc88ac94ce13bb1332aadde9f15c6eb4e61aee5 | |
parent | 63e42bf90fa9d78c92123dcbc9c2b8ca5bb5e3ba (diff) | |
parent | 9ed1dda9ca18e571fc6613885ac944bbb938cd9a (diff) |
Merge branch 'blackmagic'
BAHAHAHHAHAHAHAAHAHAHAHHH
HARK HARK HARK HARK HA-*~
33 files changed, 4218 insertions, 553 deletions
diff --git a/melonDS.cbp b/melonDS.cbp index fc5f8df..bb4d8c7 100644 --- a/melonDS.cbp +++ b/melonDS.cbp @@ -19,22 +19,6 @@ </Compiler> <Linker> <Add option="-m64" /> - <Add library="SDL2" /> - <Add library="shell32" /> - <Add library="comctl32" /> - <Add library="comdlg32" /> - <Add library="advapi32" /> - <Add library="wsock32" /> - <Add library="oleacc" /> - <Add library="ole32" /> - <Add library="usp10" /> - <Add library="gdi32" /> - <Add library="d2d1" /> - <Add library="dwrite" /> - <Add library="uxtheme" /> - <Add library="iphlpapi" /> - <Add library="user32" /> - <Add library="ws2_32" /> </Linker> </Target> <Target title="Release Windows"> @@ -51,22 +35,6 @@ <Linker> <Add option="-s" /> <Add option="-m64" /> - <Add library="SDL2" /> - <Add library="shell32" /> - <Add library="comctl32" /> - <Add library="comdlg32" /> - <Add library="advapi32" /> - <Add library="wsock32" /> - <Add library="oleacc" /> - <Add library="ole32" /> - <Add library="usp10" /> - <Add library="gdi32" /> - <Add library="d2d1" /> - <Add library="dwrite" /> - <Add library="uxtheme" /> - <Add library="iphlpapi" /> - <Add library="user32" /> - <Add library="ws2_32" /> </Linker> </Target> <Target title="DebugFast Windows"> @@ -82,22 +50,6 @@ </Compiler> <Linker> <Add option="-m64" /> - <Add library="SDL2" /> - <Add library="shell32" /> - <Add library="comctl32" /> - <Add library="comdlg32" /> - <Add library="advapi32" /> - <Add library="wsock32" /> - <Add library="oleacc" /> - <Add library="ole32" /> - <Add library="usp10" /> - <Add library="gdi32" /> - <Add library="d2d1" /> - <Add library="dwrite" /> - <Add library="uxtheme" /> - <Add library="iphlpapi" /> - <Add library="user32" /> - <Add library="ws2_32" /> </Linker> </Target> </Build> @@ -107,6 +59,25 @@ <Add option="-pipe" /> <Add directory="src" /> </Compiler> + <Linker> + <Add library="SDL2" /> + <Add library="shell32" /> + <Add library="comctl32" /> + <Add library="comdlg32" /> + <Add library="advapi32" /> + <Add library="wsock32" /> + <Add library="oleacc" /> + <Add library="ole32" /> + <Add library="usp10" /> + <Add library="gdi32" /> + <Add library="d2d1" /> + <Add library="dwrite" /> + <Add library="uxtheme" /> + <Add library="iphlpapi" /> + <Add library="user32" /> + <Add library="ws2_32" /> + <Add library="opengl32" /> + </Linker> <Unit filename="melon.rc"> <Option compilerVar="WINDRES" /> </Unit> @@ -135,11 +106,15 @@ <Unit filename="src/GPU2D.h" /> <Unit filename="src/GPU3D.cpp" /> <Unit filename="src/GPU3D.h" /> + <Unit filename="src/GPU3D_OpenGL.cpp" /> + <Unit filename="src/GPU3D_OpenGL_shaders.h" /> <Unit filename="src/GPU3D_Soft.cpp" /> <Unit filename="src/NDS.cpp" /> <Unit filename="src/NDS.h" /> <Unit filename="src/NDSCart.cpp" /> <Unit filename="src/NDSCart.h" /> + <Unit filename="src/OpenGLSupport.cpp" /> + <Unit filename="src/OpenGLSupport.h" /> <Unit filename="src/Platform.h" /> <Unit filename="src/RTC.cpp" /> <Unit filename="src/RTC.h" /> @@ -159,6 +134,8 @@ <Unit filename="src/libui_sdl/DlgEmuSettings.h" /> <Unit filename="src/libui_sdl/DlgInputConfig.cpp" /> <Unit filename="src/libui_sdl/DlgInputConfig.h" /> + <Unit filename="src/libui_sdl/DlgVideoSettings.cpp" /> + <Unit filename="src/libui_sdl/DlgVideoSettings.h" /> <Unit filename="src/libui_sdl/DlgWifiSettings.cpp" /> <Unit filename="src/libui_sdl/DlgWifiSettings.h" /> <Unit filename="src/libui_sdl/LAN_PCap.cpp" /> @@ -222,6 +199,7 @@ <Unit filename="src/libui_sdl/libui/windows/fontbutton.cpp" /> <Unit filename="src/libui_sdl/libui/windows/fontdialog.cpp" /> <Unit filename="src/libui_sdl/libui/windows/form.cpp" /> + <Unit filename="src/libui_sdl/libui/windows/gl.cpp" /> <Unit filename="src/libui_sdl/libui/windows/graphemes.cpp" /> <Unit filename="src/libui_sdl/libui/windows/grid.cpp" /> <Unit filename="src/libui_sdl/libui/windows/group.cpp" /> @@ -253,6 +231,7 @@ <Unit filename="src/libui_sdl/libui/windows/winpublic.cpp" /> <Unit filename="src/libui_sdl/libui/windows/winutil.cpp" /> <Unit filename="src/libui_sdl/main.cpp" /> + <Unit filename="src/libui_sdl/main_shaders.h" /> <Unit filename="src/pcap/bluetooth.h" /> <Unit filename="src/pcap/bpf.h" /> <Unit filename="src/pcap/can_socketcan.h" /> diff --git a/src/ARM.cpp b/src/ARM.cpp index f1bed5f..69755ff 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -159,7 +159,7 @@ void ARM::SetupCodeMem(u32 addr) //NDS::ARM7GetMemRegion(addr, false, &CodeMem); } } - +extern u64 vbltime; void ARMv5::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) @@ -180,6 +180,16 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) if (R[15]==0x0204BE5E) printf("recvfrom() ret:%d errno:%d %08X\n", R[0], NDS::ARM9Read32(0x217F398), addr); if (R[15]==0x0205038A) printf("sgrecvfrom() ret:%d errno:%d %08X\n", R[0], NDS::ARM9Read32(0x217F398), addr); if (addr==0x02050379 || addr==0x0205036D) printf("morp %08X->%08X, %d\n", R[15], addr, R[7]);*/ + /*if (addr==0x020B5F14) printf("VRAM UNMAP %02X %08X\n", R[0], R[15]); + if (addr==0x020B5FAC) printf("VRAM MAP %2X %08X\n", R[0], R[15]); + if (addr==0x0209F860) printf("VRAM BLORP %02X %08X\n", R[0], R[15]); + if (addr==0x02005A34) printf("VAZAVAZORP %08X. VCOUNT=%d\n", R[15], NDS::ARM9Read16(0x04000006)); + if (addr==0x0209FBEC) printf("COUILLON. %08X %08X\n", R[0], R[1]); + if (addr==0x02004AA8) printf("ANEBATE 1 %d\n", NDS::ARM9Read16(0x04000006)); + if (addr==0x020058C8) printf("ANEBATE 2 %d\n", NDS::ARM9Read16(0x04000006)); + if (addr==0x02005398) printf("ANEBATE 3 %d %d\n", NDS::ARM9Read16(0x04000006), (u32)(NDS::ARM9Timestamp-vbltime)); + if (addr==0x02005A5C) printf("PLAFORP %d\n", NDS::ARM9Read16(0x04000006)); + if (addr==0x209FBDC) printf("ROLOLORP\n");*/ u32 oldregion = R[15] >> 24; u32 newregion = addr >> 24; @@ -508,6 +518,8 @@ void ARMv5::Execute() } else AddCycles_C(); + + //if (R[15]>=0x02005A5C && R[15]<=0x02005A84) printf("NORP %08X %d\n", R[15]-8, NDS::ARM9Read16(0x04000006)); } // TODO optimize this shit!!! diff --git a/src/Config.cpp b/src/Config.cpp index 4182dba..42f18c7 100644 --- a/src/Config.cpp +++ b/src/Config.cpp @@ -28,12 +28,20 @@ namespace Config const char* kConfigFile = "melonDS.ini"; +int _3DRenderer; int Threaded3D; +int GL_ScaleFactor; +int GL_Antialias; + ConfigEntry ConfigFile[] = { + {"3DRenderer", 0, &_3DRenderer, 1, NULL, 0}, {"Threaded3D", 0, &Threaded3D, 1, NULL, 0}, + {"GL_ScaleFactor", 0, &GL_ScaleFactor, 1, NULL, 0}, + {"GL_Antialias", 0, &GL_Antialias, 0, NULL, 0}, + {"", -1, NULL, 0, NULL, 0} }; diff --git a/src/Config.h b/src/Config.h index 88eb202..642e9d5 100644 --- a/src/Config.h +++ b/src/Config.h @@ -40,8 +40,12 @@ bool HasConfigFile(const char* fileName); void Load(); void Save(); +extern int _3DRenderer; extern int Threaded3D; +extern int GL_ScaleFactor; +extern int GL_Antialias; + } #endif // CONFIG_H diff --git a/src/GPU.cpp b/src/GPU.cpp index 14e562e..d1870fd 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -20,7 +20,7 @@ #include <string.h> #include "NDS.h" #include "GPU.h" - +u64 vbltime; namespace GPU { @@ -71,7 +71,9 @@ u32 VRAMMap_TexPal[8]; u32 VRAMMap_ARM7[2]; -u32 Framebuffer[256*192*2]; +int FrontBuffer; +u32* Framebuffer[2][2]; +bool Accelerated; GPU2D* GPU2D_A; GPU2D* GPU2D_B; @@ -83,6 +85,12 @@ bool Init() GPU2D_B = new GPU2D(1); if (!GPU3D::Init()) return false; + FrontBuffer = 0; + Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL; + Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL; + Accelerated = false; + SetDisplaySettings(false); + return true; } @@ -91,6 +99,11 @@ void DeInit() delete GPU2D_A; delete GPU2D_B; GPU3D::DeInit(); + + if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; + if (Framebuffer[0][1]) delete[] Framebuffer[0][1]; + if (Framebuffer[1][0]) delete[] Framebuffer[1][0]; + if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; } void Reset() @@ -137,23 +150,39 @@ void Reset() VRAMMap_ARM7[0] = 0; VRAMMap_ARM7[1] = 0; - - for (int i = 0; i < 256*192*2; i++) +printf("RESET: ACCEL=%d FRAMEBUFFER=%p\n", Accelerated, Framebuffer[0][0]); + int fbsize; + if (Accelerated) fbsize = (256*3 + 1) * 192; + else fbsize = 256 * 192; + for (int i = 0; i < fbsize; i++) + { + Framebuffer[0][0][i] = 0xFFFFFFFF; + Framebuffer[1][0][i] = 0xFFFFFFFF; + } + for (int i = 0; i < fbsize; i++) { - Framebuffer[i] = 0xFFFFFFFF; + Framebuffer[0][1][i] = 0xFFFFFFFF; + Framebuffer[1][1][i] = 0xFFFFFFFF; } GPU2D_A->Reset(); GPU2D_B->Reset(); GPU3D::Reset(); - GPU2D_A->SetFramebuffer(&Framebuffer[256*192]); - GPU2D_B->SetFramebuffer(&Framebuffer[256*0]); + int backbuf = FrontBuffer ? 0 : 1; + GPU2D_A->SetFramebuffer(Framebuffer[backbuf][1]); + GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); } void Stop() { - memset(Framebuffer, 0, 256*192*2*4); + int fbsize; + if (Accelerated) fbsize = (256*3 + 1) * 192; + else fbsize = 256 * 192; + memset(Framebuffer[0][0], 0, fbsize*4); + memset(Framebuffer[0][1], 0, fbsize*4); + memset(Framebuffer[1][0], 0, fbsize*4); + memset(Framebuffer[1][1], 0, fbsize*4); } void DoSavestate(Savestate* file) @@ -208,6 +237,48 @@ void DoSavestate(Savestate* file) GPU3D::DoSavestate(file); } +void AssignFramebuffers() +{ + int backbuf = FrontBuffer ? 0 : 1; + if (NDS::PowerControl9 & (1<<15)) + { + GPU2D_A->SetFramebuffer(Framebuffer[backbuf][0]); + GPU2D_B->SetFramebuffer(Framebuffer[backbuf][1]); + } + else + { + GPU2D_A->SetFramebuffer(Framebuffer[backbuf][1]); + GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]); + } +} + +void SetDisplaySettings(bool accel) +{ + int fbsize; + if (accel) fbsize = (256*3 + 1) * 192; + else fbsize = 256 * 192; + if (Framebuffer[0][0]) delete[] Framebuffer[0][0]; + if (Framebuffer[1][0]) delete[] Framebuffer[1][0]; + if (Framebuffer[0][1]) delete[] Framebuffer[0][1]; + if (Framebuffer[1][1]) delete[] Framebuffer[1][1]; + Framebuffer[0][0] = new u32[fbsize]; + Framebuffer[1][0] = new u32[fbsize]; + Framebuffer[0][1] = new u32[fbsize]; + Framebuffer[1][1] = new u32[fbsize]; + + memset(Framebuffer[0][0], 0, fbsize*4); + memset(Framebuffer[1][0], 0, fbsize*4); + memset(Framebuffer[0][1], 0, fbsize*4); + memset(Framebuffer[1][1], 0, fbsize*4); + + AssignFramebuffers(); + + GPU2D_A->SetDisplaySettings(accel); + GPU2D_B->SetDisplaySettings(accel); + + Accelerated = accel; +} + // VRAM mapping notes // @@ -666,16 +737,7 @@ void SetPowerCnt(u32 val) GPU2D_B->SetEnabled(val & (1<<9)); GPU3D::SetEnabled(val & (1<<3), val & (1<<2)); - if (val & (1<<15)) - { - GPU2D_A->SetFramebuffer(&Framebuffer[256*0]); - GPU2D_B->SetFramebuffer(&Framebuffer[256*192]); - } - else - { - GPU2D_A->SetFramebuffer(&Framebuffer[256*192]); - GPU2D_B->SetFramebuffer(&Framebuffer[256*0]); - } + AssignFramebuffers(); } @@ -746,6 +808,9 @@ void StartHBlank(u32 line) void FinishFrame(u32 lines) { + FrontBuffer = FrontBuffer ? 0 : 1; + AssignFramebuffers(); + TotalScanlines = lines; } @@ -61,7 +61,8 @@ extern u32 VRAMMap_Texture[4]; extern u32 VRAMMap_TexPal[8]; extern u32 VRAMMap_ARM7[2]; -extern u32 Framebuffer[256*192*2]; +extern int FrontBuffer; +extern u32* Framebuffer[2][2]; extern GPU2D* GPU2D_A; extern GPU2D* GPU2D_B; @@ -74,6 +75,8 @@ void Stop(); void DoSavestate(Savestate* file); +void SetDisplaySettings(bool accel); + void MapVRAM_AB(u32 bank, u8 cnt); void MapVRAM_CD(u32 bank, u8 cnt); diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index b120cd8..4fe2209 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -215,6 +215,14 @@ void GPU2D::SetFramebuffer(u32* buf) Framebuffer = buf; } +void GPU2D::SetDisplaySettings(bool accel) +{ + Accelerated = accel; + + if (Accelerated) DrawPixel = DrawPixel_Accel; + else DrawPixel = DrawPixel_Normal; +} + u8 GPU2D::Read8(u32 addr) { @@ -549,17 +557,141 @@ void GPU2D::Write32(u32 addr, u32 val) } -void GPU2D::DrawScanline(u32 line) +u32 GPU2D::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) { - u32* dst = &Framebuffer[256*line]; - u32 mode1gfx[256]; + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000; - // request each 3D scanline in advance - // this is required for the threaded mode of the software renderer - // (alternately we could call GetLine() once and store the result somewhere) - if (Num == 0) - GPU3D::RequestLine(line); + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +u32 GPU2D::ColorBlend5(u32 val1, u32 val2) +{ + u32 eva = ((val1 >> 24) & 0x1F) + 1; + u32 evb = 32 - eva; + + if (eva == 32) return val1; + + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000; + + if (eva <= 16) + { + r += 0x000001; + g += 0x000100; + b += 0x010000; + } + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +u32 GPU2D::ColorBrightnessUp(u32 val, u32 factor) +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb += ((((0x3F003F - rb) * factor) >> 4) & 0x3F003F); + g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00); + + return rb | g | 0xFF000000; +} + +u32 GPU2D::ColorBrightnessDown(u32 val, u32 factor) +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb -= (((rb * factor) >> 4) & 0x3F003F); + g -= (((g * factor) >> 4) & 0x003F00); + return rb | g | 0xFF000000; +} + +u32 GPU2D::ColorComposite(int i, u32 val1, u32 val2) +{ + u32 coloreffect = 0; + u32 eva, evb; + + u32 flag1 = val1 >> 24; + u32 flag2 = val2 >> 24; + + u32 target2; + if (flag2 & 0x80) target2 = 0x1000; + else if (flag2 & 0x40) target2 = 0x0100; + else target2 = flag2 << 8; + + if ((flag1 & 0x80) && (BlendCnt & target2)) + { + // sprite blending + + coloreffect = 1; + + if (flag1 & 0x40) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else + { + eva = EVA; + evb = EVB; + } + } + else if ((flag1 & 0x40) && (BlendCnt & target2)) + { + // 3D layer blending + + coloreffect = 4; + } + else + { + if (flag1 & 0x80) flag1 = 0x10; + else if (flag1 & 0x40) flag1 = 0x01; + + if ((BlendCnt & flag1) && (WindowMask[i] & 0x20)) + { + coloreffect = (BlendCnt >> 6) & 0x3; + + if (coloreffect == 1) + { + if (BlendCnt & target2) + { + eva = EVA; + evb = EVB; + } + else + coloreffect = 0; + } + } + } + + switch (coloreffect) + { + case 0: return val1; + case 1: return ColorBlend4(val1, val2, eva, evb); + case 2: return ColorBrightnessUp(val1, EVY); + case 3: return ColorBrightnessDown(val1, EVY); + case 4: return ColorBlend5(val1, val2); + } +} + + +void GPU2D::DrawScanline(u32 line) +{ + int stride = Accelerated ? (256*3 + 1) : 256; + u32* dst = &Framebuffer[stride * line]; + + int n3dline = line; line = GPU::VCount; bool forceblank = false; @@ -580,28 +712,44 @@ void GPU2D::DrawScanline(u32 line) { for (int i = 0; i < 256; i++) dst[i] = 0xFFFFFFFF; + + if (Accelerated) + { + dst[256*3] = 0; + } return; } u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); + if (Num == 0) + { + if (!Accelerated) + _3DLine = GPU3D::GetLine(n3dline); + else if ((CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) + { + _3DLine = GPU3D::GetLine(n3dline); + //GPU3D::GLRenderer::PrepareCaptureFrame(); + } + } + // always render regular graphics - DrawScanline_Mode1(line, mode1gfx); + DrawScanline_BGOBJ(line); switch (dispmode) { case 0: // screen off { for (int i = 0; i < 256; i++) - dst[i] = 0xFF3F3F3F; + dst[i] = 0x003F3F3F; } break; case 1: // regular display { - for (int i = 0; i < 256; i++) - dst[i] = mode1gfx[i]; + for (int i = 0; i < stride; i+=2) + *(u64*)&dst[i] = *(u64*)&BGOBJLine[i]; } break; @@ -661,7 +809,13 @@ void GPU2D::DrawScanline(u32 line) } if (line < capheight) - DoCapture(line, capwidth, mode1gfx); + DoCapture(line, capwidth); + } + + if (Accelerated) + { + dst[256*3] = MasterBrightness | (DispCnt & 0x30000); + return; } // master brightness @@ -675,17 +829,7 @@ void GPU2D::DrawScanline(u32 line) for (int i = 0; i < 256; i++) { - u32 val = dst[i]; - - u32 r = val & 0x00003F; - u32 g = val & 0x003F00; - u32 b = val & 0x3F0000; - - r += (((0x00003F - r) * factor) >> 4); - g += ((((0x003F00 - g) * factor) >> 4) & 0x003F00); - b += ((((0x3F0000 - b) * factor) >> 4) & 0x3F0000); - - dst[i] = r | g | b; + dst[i] = ColorBrightnessUp(dst[i], factor); } } else if ((MasterBrightness >> 14) == 2) @@ -696,17 +840,7 @@ void GPU2D::DrawScanline(u32 line) for (int i = 0; i < 256; i++) { - u32 val = dst[i]; - - u32 r = val & 0x00003F; - u32 g = val & 0x003F00; - u32 b = val & 0x3F0000; - - r -= ((r * factor) >> 4); - g -= (((g * factor) >> 4) & 0x003F00); - b -= (((b * factor) >> 4) & 0x3F0000); - - dst[i] = r | g | b; + dst[i] = ColorBrightnessDown(dst[i], factor); } } } @@ -714,16 +848,16 @@ void GPU2D::DrawScanline(u32 line) // convert to 32-bit BGRA // note: 32-bit RGBA would be more straightforward, but // BGRA seems to be more compatible (Direct2D soft, cairo...) - for (int i = 0; i < 256; i++) + for (int i = 0; i < 256; i+=2) { - u32 c = dst[i]; + u64 c = *(u64*)&dst[i]; - u32 r = c << 18; - u32 g = (c << 2) & 0xFC00; - u32 b = (c >> 14) & 0xFC; + u64 r = (c << 18) & 0xFC000000FC0000; + u64 g = (c << 2) & 0xFC000000FC00; + u64 b = (c >> 14) & 0xFC000000FC; c = r | g | b; - dst[i] = c | ((c & 0x00C0C0C0) >> 6) | 0xFF000000; + *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000; } } @@ -747,10 +881,16 @@ void GPU2D::VBlankEnd() BGMosaicYMax = BGMosaicSize[1]; OBJMosaicY = 0; OBJMosaicYMax = OBJMosaicSize[1]; + + // TODO: make optional + if ((Num == 0) && (CaptureCnt & (1<<31)) && (((CaptureCnt >> 29) & 0x3) != 1)) + { + GPU3D::GLRenderer::PrepareCaptureFrame(); + } } -void GPU2D::DoCapture(u32 line, u32 width, u32* src) +void GPU2D::DoCapture(u32 line, u32 width) { u32 dstvram = (CaptureCnt >> 16) & 0x3; @@ -762,8 +902,76 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) u16* dst = (u16*)GPU::VRAM[dstvram]; u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width); + // TODO: handle 3D in accelerated mode!! + + u32* srcA; if (CaptureCnt & (1<<24)) - src = (u32*)GPU3D::GetLine(line); + { + srcA = _3DLine; + } + else + { + srcA = BGOBJLine; + if (Accelerated) + { + // in accelerated mode, compositing is normally done on the GPU + // but when doing display capture, we do need the composited output + // so we do it here + + for (int i = 0; i < 256; i++) + { + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + u32 val3 = BGOBJLine[512+i]; + + u32 compmode = (val3 >> 24) & 0xF; + + if (compmode == 4) + { + // 3D on top, blending + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + val1 = ColorBlend5(_3dval, val1); + else + val1 = val2; + } + else if (compmode == 1) + { + // 3D on bottom, blending + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + { + u32 eva = (val3 >> 8) & 0x1F; + u32 evb = (val3 >> 16) & 0x1F; + + val1 = ColorBlend4(val1, _3dval, eva, evb); + } + else + val1 = val2; + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + u32 _3dval = _3DLine[val3 & 0xFF]; + if ((_3dval >> 24) > 0) + { + u32 evy = (val3 >> 8) & 0x1F; + + val1 = _3dval; + if (compmode == 2) val1 = ColorBrightnessUp(val1, evy); + else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy); + } + else + val1 = val2; + } + + BGOBJLine[i] = val1; + } + } + } u16* srcB = NULL; u32 srcBaddr = line * 256; @@ -792,7 +1000,7 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) { for (u32 i = 0; i < width; i++) { - u32 val = src[i]; + u32 val = srcA[i]; // TODO: check what happens when alpha=0 @@ -843,7 +1051,7 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) { for (u32 i = 0; i < width; i++) { - u32 val = src[i]; + u32 val = srcA[i]; // TODO: check what happens when alpha=0 @@ -877,7 +1085,7 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) { for (u32 i = 0; i < width; i++) { - u32 val = src[i]; + u32 val = srcA[i]; // TODO: check what happens when alpha=0 @@ -1003,15 +1211,15 @@ void GPU2D::CheckWindows(u32 line) else if (line == Win1Coords[2]) Win1Active |= 0x1; } -void GPU2D::CalculateWindowMask(u32 line, u8* mask) +void GPU2D::CalculateWindowMask(u32 line) { for (u32 i = 0; i < 256; i++) - mask[i] = WinCnt[2]; // window outside + WindowMask[i] = WinCnt[2]; // window outside - if (DispCnt & ((1<<15)|(1<<12))) + if ((DispCnt & (1<<15)) && (DispCnt & (1<<12))) { // OBJ window - DrawSpritesWindow(line, mask); + DrawSpritesWindow(line); } if (DispCnt & (1<<14)) @@ -1025,7 +1233,7 @@ void GPU2D::CalculateWindowMask(u32 line, u8* mask) if (i == x2) Win1Active &= ~0x2; else if (i == x1) Win1Active |= 0x2; - if (Win1Active == 0x3) mask[i] = WinCnt[1]; + if (Win1Active == 0x3) WindowMask[i] = WinCnt[1]; } } @@ -1040,14 +1248,14 @@ void GPU2D::CalculateWindowMask(u32 line, u8* mask) if (i == x2) Win0Active &= ~0x2; else if (i == x1) Win0Active |= 0x2; - if (Win0Active == 0x3) mask[i] = WinCnt[0]; + if (Win0Active == 0x3) WindowMask[i] = WinCnt[0]; } } } template<u32 bgmode> -void GPU2D::DrawScanlineBGMode(u32 line, u32 nsprites, u32* spritebuf, u32* dst) +void GPU2D::DrawScanlineBGMode(u32 line, u32 nsprites) { for (int i = 3; i >= 0; i--) { @@ -1056,11 +1264,11 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32 nsprites, u32* spritebuf, u32* dst) if (DispCnt & 0x0800) { if (bgmode >= 3) - DrawBG_Extended(line, dst, 3); + DrawBG_Extended(line, 3); else if (bgmode >= 1) - DrawBG_Affine(line, dst, 3); + DrawBG_Affine(line, 3); else - DrawBG_Text(line, dst, 3); + DrawBG_Text(line, 3); } } if ((BGCnt[2] & 0x3) == i) @@ -1068,18 +1276,18 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32 nsprites, u32* spritebuf, u32* dst) if (DispCnt & 0x0400) { if (bgmode == 5) - DrawBG_Extended(line, dst, 2); + DrawBG_Extended(line, 2); else if (bgmode == 4 || bgmode == 2) - DrawBG_Affine(line, dst, 2); + DrawBG_Affine(line, 2); else - DrawBG_Text(line, dst, 2); + DrawBG_Text(line, 2); } } if ((BGCnt[1] & 0x3) == i) { if (DispCnt & 0x0200) { - DrawBG_Text(line, dst, 1); + DrawBG_Text(line, 1); } } if ((BGCnt[0] & 0x3) == i) @@ -1087,17 +1295,17 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32 nsprites, u32* spritebuf, u32* dst) if (DispCnt & 0x0100) { if ((!Num) && (DispCnt & 0x8)) - DrawBG_3D(line, dst); + DrawBG_3D(); else - DrawBG_Text(line, dst, 0); + DrawBG_Text(line, 0); } } if ((DispCnt & 0x1000) && nsprites) - InterleaveSprites(spritebuf, 0x8000 | (i<<16), dst); + InterleaveSprites(0x8000 | (i<<16)); } } -void GPU2D::DrawScanlineBGMode6(u32 line, u32 nsprites, u32* spritebuf, u32* dst) +void GPU2D::DrawScanlineBGMode6(u32 line, u32 nsprites) { if (Num) { @@ -1111,7 +1319,7 @@ void GPU2D::DrawScanlineBGMode6(u32 line, u32 nsprites, u32* spritebuf, u32* dst { if (DispCnt & 0x0400) { - DrawBG_Large(line, dst); + DrawBG_Large(line); } } if ((BGCnt[0] & 0x3) == i) @@ -1119,20 +1327,17 @@ void GPU2D::DrawScanlineBGMode6(u32 line, u32 nsprites, u32* spritebuf, u32* dst if (DispCnt & 0x0100) { if (DispCnt & 0x8) - DrawBG_3D(line, dst); + DrawBG_3D(); } } if ((DispCnt & 0x1000) && nsprites) - InterleaveSprites(spritebuf, 0x8000 | (i<<16), dst); + InterleaveSprites(0x8000 | (i<<16)); } } -void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) +void GPU2D::DrawScanline_BGOBJ(u32 line) { - u32 linebuf[256*2 + 64]; - u8* windowmask = (u8*)&linebuf[256*2]; - - u32 backdrop; + u64 backdrop; if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; else backdrop = *(u16*)&GPU::Palette[0]; @@ -1142,165 +1347,135 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) u8 b = (backdrop & 0x7C00) >> 9; backdrop = r | (g << 8) | (b << 16) | 0x20000000; + backdrop |= (backdrop << 32); - for (int i = 0; i < 256; i++) - linebuf[i] = backdrop; + for (int i = 0; i < 256; i+=2) + *(u64*)&BGOBJLine[i] = backdrop; } if (DispCnt & 0xE000) - CalculateWindowMask(line, windowmask); + CalculateWindowMask(line); else - memset(windowmask, 0xFF, 256); + memset(WindowMask, 0xFF, 256); // prerender sprites - u32 spritebuf[256]; u32 nsprites = 0; - memset(spritebuf, 0, 256*4); - if (DispCnt & 0x1000) nsprites = DrawSprites(line, spritebuf); + u32 nsprites = 0; + memset(OBJLine, 0, 256*4); + if (DispCnt & 0x1000) nsprites = DrawSprites(line); // TODO: what happens in mode 7? mode 6 on the sub engine? switch (DispCnt & 0x7) { - case 0: DrawScanlineBGMode<0>(line, nsprites, spritebuf, linebuf); break; - case 1: DrawScanlineBGMode<1>(line, nsprites, spritebuf, linebuf); break; - case 2: DrawScanlineBGMode<2>(line, nsprites, spritebuf, linebuf); break; - case 3: DrawScanlineBGMode<3>(line, nsprites, spritebuf, linebuf); break; - case 4: DrawScanlineBGMode<4>(line, nsprites, spritebuf, linebuf); break; - case 5: DrawScanlineBGMode<5>(line, nsprites, spritebuf, linebuf); break; - case 6: DrawScanlineBGMode6(line, nsprites, spritebuf, linebuf); break; + case 0: DrawScanlineBGMode<0>(line, nsprites); break; + case 1: DrawScanlineBGMode<1>(line, nsprites); break; + case 2: DrawScanlineBGMode<2>(line, nsprites); break; + case 3: DrawScanlineBGMode<3>(line, nsprites); break; + case 4: DrawScanlineBGMode<4>(line, nsprites); break; + case 5: DrawScanlineBGMode<5>(line, nsprites); break; + case 6: DrawScanlineBGMode6(line, nsprites); break; } // color special effects // can likely be optimized - u32 bldcnteffect = (BlendCnt >> 6) & 0x3; - - for (int i = 0; i < 256; i++) + if (!Accelerated) { - u32 val1 = linebuf[i]; - u32 val2 = linebuf[256+i]; - - u32 coloreffect, eva, evb; - - u32 flag1 = val1 >> 24; - u32 flag2 = val2 >> 24; - - u32 target2; - if (flag2 & 0x80) target2 = 0x1000; - else if (flag2 & 0x40) target2 = 0x0100; - else target2 = flag2 << 8; - - if ((flag1 & 0x80) && (BlendCnt & target2)) + for (int i = 0; i < 256; i++) { - // sprite blending - - coloreffect = 1; + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; - if (flag1 & 0x40) - { - eva = flag1 & 0x1F; - evb = 16 - eva; - } - else - { - eva = EVA; - evb = EVB; - } + BGOBJLine[i] = ColorComposite(i, val1, val2); } - else if ((flag1 & 0x40) && (BlendCnt & target2)) + } + else + { + if (Num == 0) { - // 3D layer blending - - eva = (flag1 & 0x1F) + 1; - evb = 32 - eva; - - u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 5; - u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 5) & 0x007F00; - u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 5) & 0x7F0000; - - if (eva <= 16) + for (int i = 0; i < 256; i++) { - r += 0x000001; - g += 0x000100; - b += 0x010000; - } + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; + u32 val3 = BGOBJLine[512+i]; - if (r > 0x00003F) r = 0x00003F; - if (g > 0x003F00) g = 0x003F00; - if (b > 0x3F0000) b = 0x3F0000; + u32 flag1 = val1 >> 24; + u32 flag2 = val2 >> 24; - dst[i] = r | g | b | 0xFF000000; + u32 bldcnteffect = (BlendCnt >> 6) & 0x3; - continue; - } - else - { - if (flag1 & 0x80) flag1 = 0x10; - else if (flag1 & 0x40) flag1 = 0x01; + u32 target1; + if (flag1 & 0x80) target1 = 0x0010; + else if (flag1 & 0x40) target1 = 0x0001; + else target1 = flag1; - if ((BlendCnt & flag1) && (windowmask[i] & 0x20)) - { - if ((bldcnteffect == 1) && (BlendCnt & target2)) - { - coloreffect = 1; - eva = EVA; - evb = EVB; - } - else if (bldcnteffect >= 2) - coloreffect = bldcnteffect; - else - coloreffect = 0; - } - else - coloreffect = 0; - } + u32 target2; + if (flag2 & 0x80) target2 = 0x1000; + else if (flag2 & 0x40) target2 = 0x0100; + else target2 = flag2 << 8; - switch (coloreffect) - { - case 0: - dst[i] = val1; - break; + if (((flag1 & 0xC0) == 0x40) && (BlendCnt & target2)) + { + // 3D on top, blending - case 1: - { - u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb)) >> 4; - u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb)) >> 4) & 0x007F00; - u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb)) >> 4) & 0x7F0000; + BGOBJLine[i] = val2; + BGOBJLine[256+i] = ColorComposite(i, val2, val3); + BGOBJLine[512+i] = 0x04000000 | (val1 & 0xFF); + } + else if ((flag1 & 0xC0) == 0x40) + { + // 3D on top, normal/fade - if (r > 0x00003F) r = 0x00003F; - if (g > 0x003F00) g = 0x003F00; - if (b > 0x3F0000) b = 0x3F0000; + if (bldcnteffect == 1) bldcnteffect = 0; + if (!(BlendCnt & 0x0001)) bldcnteffect = 0; + if (!(WindowMask[i] & 0x20)) bldcnteffect = 0; - dst[i] = r | g | b | 0xFF000000; - } - break; + BGOBJLine[i] = val2; + BGOBJLine[256+i] = ColorComposite(i, val2, val3); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVY << 8) | (val1 & 0xFF); + } + else if (((flag2 & 0xC0) == 0x40) && ((BlendCnt & 0x01C0) == 0x0140)) + { + // 3D on bottom, blending - case 2: - { - u32 r = val1 & 0x00003F; - u32 g = val1 & 0x003F00; - u32 b = val1 & 0x3F0000; + u32 eva, evb; + if ((flag1 & 0xC0) == 0xC0) + { + eva = flag1 & 0x1F; + evb = 16 - eva; + } + else if ((BlendCnt & target1) && (WindowMask[i] & 0x20)) + { + eva = EVA; + evb = EVB; + } + else + bldcnteffect = 7; - r += ((0x00003F - r) * EVY) >> 4; - g += (((0x003F00 - g) * EVY) >> 4) & 0x003F00; - b += (((0x3F0000 - b) * EVY) >> 4) & 0x3F0000; + BGOBJLine[i] = val1; + BGOBJLine[256+i] = ColorComposite(i, val1, val3); + BGOBJLine[512+i] = (bldcnteffect << 24) | (EVB << 16) | (EVA << 8) | (val1 & 0xFF); + } + else + { + // no potential 3D pixel involved - dst[i] = r | g | b | 0xFF000000; + BGOBJLine[i] = ColorComposite(i, val1, val2); + BGOBJLine[256+i] = 0; + BGOBJLine[512+i] = 0x07000000; + } } - break; - - case 3: + } + else + { + for (int i = 0; i < 256; i++) { - u32 r = val1 & 0x00003F; - u32 g = val1 & 0x003F00; - u32 b = val1 & 0x3F0000; - - r -= (r * EVY) >> 4; - g -= ((g * EVY) >> 4) & 0x003F00; - b -= ((b * EVY) >> 4) & 0x3F0000; + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; - dst[i] = r | g | b | 0xFF000000; + BGOBJLine[i] = ColorComposite(i, val1, val2); + BGOBJLine[256+i] = 0; + BGOBJLine[512+i] = 0x07000000; } - break; } } @@ -1322,7 +1497,7 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) } -void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag) +void GPU2D::DrawPixel_Normal(u32* dst, u16 color, u32 flag) { u8 r = (color & 0x001F) << 1; u8 g = (color & 0x03E0) >> 4; @@ -1332,11 +1507,19 @@ void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag) *dst = r | (g << 8) | (b << 16) | flag; } -void GPU2D::DrawBG_3D(u32 line, u32* dst) +void GPU2D::DrawPixel_Accel(u32* dst, u16 color, u32 flag) { - u32* src = GPU3D::GetLine(line); - u8* windowmask = (u8*)&dst[256*2]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + *(dst+512) = *(dst+256); + *(dst+256) = *dst; + *dst = r | (g << 8) | (b << 16) | flag; +} +void GPU2D::DrawBG_3D() +{ u16 xoff = BGXPos[0]; int i = 0; int iend = 256; @@ -1351,22 +1534,37 @@ void GPU2D::DrawBG_3D(u32 line, u32* dst) iend -= (xoff & 0xFF); } - for (; i < iend; i++) + if (Accelerated) + { + for (; i < iend; i++) + { + int pos = xoff++; + + if (!(WindowMask[i] & 0x01)) continue; + + BGOBJLine[i+512] = BGOBJLine[i+256]; + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = 0x40000000 | pos; // 3D-layer placeholder + } + } + else { - u32 c = src[xoff]; - xoff++; + for (; i < iend; i++) + { + u32 c = _3DLine[xoff]; + xoff++; - if ((c >> 24) == 0) continue; - if (!(windowmask[i] & 0x01)) continue; + if ((c >> 24) == 0) continue; + if (!(WindowMask[i] & 0x01)) continue; - dst[i+256] = dst[i]; - dst[i] = c | 0x40000000; + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = c | 0x40000000; + } } } -void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) +void GPU2D::DrawBG_Text(u32 line, u32 bgnum) { - u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 xmos = 0, xmossize = 0; @@ -1451,7 +1649,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) } // draw pixel - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos == 0) { @@ -1463,7 +1661,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) xmos--; if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1496,7 +1694,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) // draw pixel // TODO: optimize VRAM access - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos == 0) { @@ -1515,7 +1713,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) xmos--; if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); } xoff++; @@ -1523,9 +1721,8 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) } } -void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) +void GPU2D::DrawBG_Affine(u32 line, u32 bgnum) { - u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 xmos = 0, xmossize = 0; @@ -1584,12 +1781,12 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos > 0) { if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); xmos--; } @@ -1605,7 +1802,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); xmos = xmossize; } @@ -1619,9 +1816,8 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) +void GPU2D::DrawBG_Extended(u32 line, u32 bgnum) { - u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 xmos = 0, xmossize = 0; @@ -1684,12 +1880,12 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos > 0) { if (color & 0x8000) - DrawPixel(&dst[i], color, 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); xmos--; } @@ -1699,7 +1895,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & ymask) >> 8) << yshift) + ((rotX & xmask) >> 8)) << 1)); if (color & 0x8000) - DrawPixel(&dst[i], color, 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum); xmos = xmossize; } @@ -1720,12 +1916,12 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos > 0) { if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); xmos--; } @@ -1735,7 +1931,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & ymask) >> 8) << yshift) + ((rotX & xmask) >> 8)); if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum); xmos = xmossize; } @@ -1787,12 +1983,12 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (windowmask[i] & (1<<bgnum)) + if (WindowMask[i] & (1<<bgnum)) { if (xmos > 0) { if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); xmos--; } @@ -1814,7 +2010,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) color = GPU::ReadVRAM_BG<u8>(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff); if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + DrawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum); xmos = xmossize; } @@ -1829,9 +2025,8 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) BGYRefInternal[bgnum-2] += rotD; } -void GPU2D::DrawBG_Large(u32 line, u32* dst) // BG is always BG2 +void GPU2D::DrawBG_Large(u32 line) // BG is always BG2 { - u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[2]; u32 xmos = 0, xmossize = 0; @@ -1888,12 +2083,12 @@ void GPU2D::DrawBG_Large(u32 line, u32* dst) // BG is always BG2 for (int i = 0; i < 256; i++) { - if (windowmask[i] & (1<<2)) + if (WindowMask[i] & (1<<2)) { if (xmos > 0) { if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<2); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); xmos--; } @@ -1903,7 +2098,7 @@ void GPU2D::DrawBG_Large(u32 line, u32* dst) // BG is always BG2 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & ymask) >> 8) << yshift) + ((rotX & xmask) >> 8)); if (color) - DrawPixel(&dst[i], pal[color], 0x01000000<<2); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); } } @@ -1915,20 +2110,18 @@ void GPU2D::DrawBG_Large(u32 line, u32* dst) // BG is always BG2 BGYRefInternal[0] += rotD; } -void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst) +void GPU2D::InterleaveSprites(u32 prio) { - u8* windowmask = (u8*)&dst[256*2]; - for (u32 i = 0; i < 256; i++) { - if (((buf[i] & 0xF8000) == prio) && (windowmask[i] & 0x10)) + if (((OBJLine[i] & 0xF8000) == prio) && (WindowMask[i] & 0x10)) { - DrawPixel(&dst[i], buf[i] & 0x7FFF, buf[i] & 0xFF000000); + DrawPixel(&BGOBJLine[i], OBJLine[i] & 0x7FFF, OBJLine[i] & 0xFF000000); } } } -u32 GPU2D::DrawSprites(u32 line, u32* dst) +u32 GPU2D::DrawSprites(u32 line) { u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; @@ -1986,7 +2179,7 @@ u32 GPU2D::DrawSprites(u32 line, u32* dst) u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; - DrawSprite_Rotscale<false>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst); + DrawSprite_Rotscale<false>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos); nsprites++; } else @@ -2011,7 +2204,7 @@ u32 GPU2D::DrawSprites(u32 line, u32* dst) if (attrib[1] & 0x2000) ypos = height-1 - ypos; - DrawSprite_Normal<false>(attrib, width, xpos, ypos, dst); + DrawSprite_Normal<false>(attrib, width, xpos, ypos); nsprites++; } } @@ -2020,7 +2213,7 @@ u32 GPU2D::DrawSprites(u32 line, u32* dst) return nsprites; } -void GPU2D::DrawSpritesWindow(u32 line, u8* dst) +void GPU2D::DrawSpritesWindow(u32 line) { u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; @@ -2071,7 +2264,7 @@ void GPU2D::DrawSpritesWindow(u32 line, u8* dst) u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; - DrawSprite_Rotscale<true>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, (u32*)dst); + DrawSprite_Rotscale<true>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos); } else { @@ -2095,13 +2288,13 @@ void GPU2D::DrawSpritesWindow(u32 line, u8* dst) if (attrib[1] & 0x2000) ypos = height-1 - ypos; - DrawSprite_Normal<true>(attrib, width, xpos, ypos, (u32*)dst); + DrawSprite_Normal<true>(attrib, width, xpos, ypos); } } } template<bool window> -void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos, u32* dst) +void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; @@ -2203,8 +2396,8 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 if (color & 0x8000) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = color | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = color | prio; } } else @@ -2267,8 +2460,8 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } } else @@ -2325,8 +2518,8 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } } else @@ -2345,7 +2538,7 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 } template<bool window> -void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* dst) +void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; @@ -2439,8 +2632,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color & 0x8000) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = color | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = color | prio; } xoff++; @@ -2466,8 +2659,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color & 0x8000) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = color | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = color | prio; } xoff++; @@ -2526,8 +2719,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } xoff++; @@ -2555,8 +2748,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } xoff++; @@ -2604,8 +2797,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } xoff++; @@ -2638,8 +2831,8 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* d if (color) { - if (window) ((u8*)dst)[xpos] = WinCnt[3]; - else dst[xpos] = pal[color] | prio; + if (window) WindowMask[xpos] = WinCnt[3]; + else OBJLine[xpos] = pal[color] | prio; } xoff++; diff --git a/src/GPU2D.h b/src/GPU2D.h index d7c4e3f..21b43f1 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -31,6 +31,7 @@ public: void SetEnabled(bool enable) { Enabled = enable; } void SetFramebuffer(u32* buf); + void SetDisplaySettings(bool accel); u8 Read8(u32 addr); u16 Read16(u32 addr); @@ -68,6 +69,14 @@ private: bool Enabled; u32* Framebuffer; + bool Accelerated; + + u32 BGOBJLine[256*3]; + u32* _3DLine; + + u8 WindowMask[256]; + u32 OBJLine[256]; + u16 DispFIFO[16]; u32 DispFIFOReadPtr; u32 DispFIFOWritePtr; @@ -114,27 +123,35 @@ private: u32 BGExtPalStatus[4]; u32 OBJExtPalStatus; - template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32 nsprites, u32* spritebuf, u32* dst); - void DrawScanlineBGMode6(u32 line, u32 nsprites, u32* spritebuf, u32* dst); - void DrawScanline_Mode1(u32 line, u32* dst); + u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb); + u32 ColorBlend5(u32 val1, u32 val2); + u32 ColorBrightnessUp(u32 val, u32 factor); + u32 ColorBrightnessDown(u32 val, u32 factor); + u32 ColorComposite(int i, u32 val1, u32 val2); + + template<u32 bgmode> void DrawScanlineBGMode(u32 line, u32 nsprites); + void DrawScanlineBGMode6(u32 line, u32 nsprites); + void DrawScanline_BGOBJ(u32 line); - void DrawPixel(u32* dst, u16 color, u32 flag); + static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); + static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); + void (*DrawPixel)(u32* dst, u16 color, u32 flag); - void DrawBG_3D(u32 line, u32* dst); - void DrawBG_Text(u32 line, u32* dst, u32 bgnum); - void DrawBG_Affine(u32 line, u32* dst, u32 bgnum); - void DrawBG_Extended(u32 line, u32* dst, u32 bgnum); - void DrawBG_Large(u32 line, u32* dst); + void DrawBG_3D(); + void DrawBG_Text(u32 line, u32 bgnum); + void DrawBG_Affine(u32 line, u32 bgnum); + void DrawBG_Extended(u32 line, u32 bgnum); + void DrawBG_Large(u32 line); - void InterleaveSprites(u32* buf, u32 prio, u32* dst); - u32 DrawSprites(u32 line, u32* dst); - void DrawSpritesWindow(u32 line, u8* dst); - template<bool window> void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos, u32* dst); - template<bool window> void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos, u32* dst); + void InterleaveSprites(u32 prio); + u32 DrawSprites(u32 line); + void DrawSpritesWindow(u32 line); + template<bool window> void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); + template<bool window> void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, s32 ypos); - void DoCapture(u32 line, u32 width, u32* src); + void DoCapture(u32 line, u32 width); - void CalculateWindowMask(u32 line, u8* mask); + void CalculateWindowMask(u32 line); }; #endif diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 802c9cd..98aa6eb 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -22,6 +22,7 @@ #include "NDS.h" #include "GPU.h" #include "FIFO.h" +#include "Config.h" // 3D engine notes @@ -156,6 +157,8 @@ u32 NumCommands, CurCommand, ParamCount, TotalParams; bool GeometryEnabled; bool RenderingEnabled; +int Renderer; + u32 DispCnt; u8 AlphaRefVal, AlphaRef; @@ -275,14 +278,16 @@ bool Init() CmdStallQueue = new FIFO<CmdFIFOEntry>(64); - if (!SoftRenderer::Init()) return false; + Renderer = -1; + // SetRenderer() will be called to set it up later return true; } void DeInit() { - SoftRenderer::DeInit(); + if (Renderer == 0) SoftRenderer::DeInit(); + else GLRenderer::DeInit(); delete CmdFIFO; delete CmdPIPE; @@ -382,7 +387,8 @@ void Reset() FlushAttributes = 0; ResetRenderingState(); - SoftRenderer::Reset(); + if (Renderer == 0) SoftRenderer::Reset(); + else GLRenderer::Reset(); } void DoSavestate(Savestate* file) @@ -605,6 +611,43 @@ void SetEnabled(bool geometry, bool rendering) } +int InitRenderer(bool hasGL) +{ + int renderer = hasGL ? Config::_3DRenderer : 0; + + if (renderer == 1) + { + if (!GLRenderer::Init()) + renderer = 0; + } + + if (renderer == 0) SoftRenderer::Init(); + + Renderer = renderer; + UpdateRendererConfig(); + GPU::SetDisplaySettings(Renderer != 0); + return renderer; +} + +void DeInitRenderer() +{ + if (Renderer == 0) SoftRenderer::DeInit(); + else GLRenderer::DeInit(); +} + +void UpdateRendererConfig() +{ + if (Renderer == 0) + { + SoftRenderer::SetupRenderThread(); + } + else + { + GLRenderer::UpdateDisplaySettings(); + } +} + + void MatrixLoadIdentity(s32* m) { @@ -1191,6 +1234,16 @@ void SubmitPolygon() vtx->FinalPosition[0] = posX & 0x1FF; vtx->FinalPosition[1] = posY & 0xFF; + // hi-res positions + if (w != 0) + { + posX = ((((s64)(vtx->Position[0] + w) * Viewport[4]) << 4) / (((s64)w) << 1)) + (Viewport[0] << 4); + posY = ((((s64)(-vtx->Position[1] + w) * Viewport[5]) << 4) / (((s64)w) << 1)) + (Viewport[3] << 4); + + vtx->HiresPosition[0] = posX & 0x1FFF; + vtx->HiresPosition[1] = posY & 0xFFF; + } + vtx->FinalColor[0] = vtx->Color[0] >> 12; if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF); vtx->FinalColor[1] = vtx->Color[1] >> 12; @@ -2331,7 +2384,7 @@ void CheckFIFODMA() void VCount144() { - SoftRenderer::VCount144(); + if (Renderer == 0) SoftRenderer::VCount144(); } @@ -2413,17 +2466,14 @@ void VBlank() void VCount215() { - SoftRenderer::RenderFrame(); -} - -void RequestLine(int line) -{ - return SoftRenderer::RequestLine(line); + if (Renderer == 0) SoftRenderer::RenderFrame(); + else GLRenderer::RenderFrame(); } u32* GetLine(int line) { - return SoftRenderer::GetLine(line); + if (Renderer == 0) return SoftRenderer::GetLine(line); + else return GLRenderer::GetLine(line); } diff --git a/src/GPU3D.h b/src/GPU3D.h index 279494a..4e7c01a 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -39,6 +39,10 @@ typedef struct s32 FinalPosition[2]; s32 FinalColor[3]; + // hi-res position (4-bit fractional part) + // TODO maybe: hi-res color? (that survives clipping) + s32 HiresPosition[2]; + } Vertex; typedef struct @@ -86,6 +90,8 @@ extern u32 RenderNumPolygons; extern u64 Timestamp; +extern int Renderer; + bool Init(); void DeInit(); void Reset(); @@ -94,6 +100,10 @@ void DoSavestate(Savestate* file); void SetEnabled(bool geometry, bool rendering); +int InitRenderer(bool hasGL); +void DeInitRenderer(); +void UpdateRendererConfig(); + void ExecuteCommand(); s32 CyclesToRunFor(); @@ -104,7 +114,6 @@ void CheckFIFODMA(); void VCount144(); void VBlank(); void VCount215(); -void RequestLine(int line); u32* GetLine(int line); void WriteToGXFIFO(u32 val); @@ -127,11 +136,26 @@ void SetupRenderThread(); void VCount144(); void RenderFrame(); -void RequestLine(int line); u32* GetLine(int line); } +namespace GLRenderer +{ + +bool Init(); +void DeInit(); +void Reset(); + +void UpdateDisplaySettings(); + +void RenderFrame(); +void PrepareCaptureFrame(); +u32* GetLine(int line); +void SetupAccelFrame(); + +} + } #endif diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp new file mode 100644 index 0000000..c759147 --- /dev/null +++ b/src/GPU3D_OpenGL.cpp @@ -0,0 +1,1124 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "GPU.h" +#include "Config.h" +#include "OpenGLSupport.h" +#include "GPU3D_OpenGL_shaders.h" + +namespace GPU3D +{ +namespace GLRenderer +{ + +// GL version requirements +// * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) +// * UBO: 3.1 + + +enum +{ + RenderFlag_WBuffer = 0x01, + RenderFlag_Trans = 0x02, + RenderFlag_ShadowMask = 0x04, +}; + + +GLuint ClearShaderPlain[3]; + +GLuint RenderShader[16][3]; +GLuint CurShaderID = -1; + +GLuint FinalPassShader[3]; + +struct +{ + float uScreenSize[2]; + u32 uDispCnt; + u32 __pad0; + float uToonColors[32][4]; + float uEdgeColors[8][4]; + float uFogColor[4]; + float uFogDensity[34][4]; + u32 uFogOffset; + u32 uFogShift; + +} ShaderConfig; + +GLuint ShaderConfigUBO; + +typedef struct +{ + Polygon* PolyData; + + u32 NumIndices; + u16* Indices; + u32 NumEdgeIndices; + u16* EdgeIndices; + + u32 RenderKey; + +} RendererPolygon; + +RendererPolygon PolygonList[2048]; +int NumFinalPolys, NumOpaqueFinalPolys; + +GLuint ClearVertexBufferID, ClearVertexArrayID; +GLint ClearUniformLoc[4]; + +// vertex buffer +// * XYZW: 4x16bit +// * RGBA: 4x8bit +// * ST: 2x16bit +// * polygon data: 3x32bit (polygon/texture attributes) +// +// polygon attributes: +// * bit4-7, 11, 14-15, 24-29: POLYGON_ATTR +// * bit16-20: Z shift +// * bit8: front-facing (?) +// * bit9: W-buffering (?) + +GLuint VertexBufferID; +u32 VertexBuffer[10240 * 7]; +u32 NumVertices; + +GLuint VertexArrayID; +u16 IndexBuffer[2048 * 40]; +u32 NumTriangles; + +GLuint TexMemID; +GLuint TexPalMemID; + +int ScaleFactor; +bool Antialias; +int ScreenW, ScreenH; + +GLuint FramebufferTex[8]; +int FrontBuffer; +GLuint FramebufferID[4], PixelbufferID; +u32 Framebuffer[256*192]; + + + +bool BuildRenderShader(u32 flags, const char* vs, const char* fs) +{ + char shadername[32]; + sprintf(shadername, "RenderShader%02X", flags); + + int headerlen = strlen(kShaderHeader); + + int vslen = strlen(vs); + int vsclen = strlen(kRenderVSCommon); + char* vsbuf = new char[headerlen + vsclen + vslen + 1]; + strcpy(&vsbuf[0], kShaderHeader); + strcpy(&vsbuf[headerlen], kRenderVSCommon); + strcpy(&vsbuf[headerlen + vsclen], vs); + + int fslen = strlen(fs); + int fsclen = strlen(kRenderFSCommon); + char* fsbuf = new char[headerlen + fsclen + fslen + 1]; + strcpy(&fsbuf[0], kShaderHeader); + strcpy(&fsbuf[headerlen], kRenderFSCommon); + strcpy(&fsbuf[headerlen + fsclen], fs); + + bool ret = OpenGL_BuildShaderProgram(vsbuf, fsbuf, RenderShader[flags], shadername); + + delete[] vsbuf; + delete[] fsbuf; + + if (!ret) return false; + + GLuint prog = RenderShader[flags][2]; + + GLint uni_id = glGetUniformBlockIndex(prog, "uConfig"); + glUniformBlockBinding(prog, uni_id, 0); + + glBindAttribLocation(prog, 0, "vPosition"); + glBindAttribLocation(prog, 1, "vColor"); + glBindAttribLocation(prog, 2, "vTexcoord"); + glBindAttribLocation(prog, 3, "vPolygonAttr"); + glBindFragDataLocation(prog, 0, "oColor"); + glBindFragDataLocation(prog, 1, "oAttr"); + + glUseProgram(prog); + + uni_id = glGetUniformLocation(prog, "TexMem"); + glUniform1i(uni_id, 0); + uni_id = glGetUniformLocation(prog, "TexPalMem"); + glUniform1i(uni_id, 1); + + return true; +} + +void UseRenderShader(u32 flags) +{ + if (CurShaderID == flags) return; + glUseProgram(RenderShader[flags][2]); + CurShaderID = flags; +} + +void SetupDefaultTexParams(GLuint tex) +{ + glBindTexture(GL_TEXTURE_2D, tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +} + +bool Init() +{ + GLint uni_id; + + const GLubyte* renderer = glGetString(GL_RENDERER); // get renderer string + const GLubyte* version = glGetString(GL_VERSION); // version as a string + printf("OpenGL: renderer: %s\n", renderer); + printf("OpenGL: version: %s\n", version); + + glEnable(GL_DEPTH_TEST); + glEnable(GL_STENCIL_TEST); + + + glDepthRange(0, 1); + glClearDepth(1.0); + + + if (!OpenGL_BuildShaderProgram(kClearVS, kClearFS, ClearShaderPlain, "ClearShader")) + return false; + + glBindAttribLocation(ClearShaderPlain[2], 0, "vPosition"); + glBindFragDataLocation(ClearShaderPlain[2], 0, "oColor"); + glBindFragDataLocation(ClearShaderPlain[2], 1, "oAttr"); + ClearUniformLoc[0] = glGetUniformLocation(ClearShaderPlain[2], "uColor"); + ClearUniformLoc[1] = glGetUniformLocation(ClearShaderPlain[2], "uDepth"); + ClearUniformLoc[2] = glGetUniformLocation(ClearShaderPlain[2], "uOpaquePolyId"); + ClearUniformLoc[3] = glGetUniformLocation(ClearShaderPlain[2], "uFogFlag"); + + memset(RenderShader, 0, sizeof(RenderShader)); + + if (!BuildRenderShader(0, + kRenderVS_Z, kRenderFS_ZO)) return false; + if (!BuildRenderShader(RenderFlag_WBuffer, + kRenderVS_W, kRenderFS_WO)) return false; + if (!BuildRenderShader(RenderFlag_Trans, + kRenderVS_Z, kRenderFS_ZT)) return false; + if (!BuildRenderShader(RenderFlag_Trans | RenderFlag_WBuffer, + kRenderVS_W, kRenderFS_WT)) return false; + if (!BuildRenderShader(RenderFlag_ShadowMask, + kRenderVS_Z, kRenderFS_ZSM)) return false; + if (!BuildRenderShader(RenderFlag_ShadowMask | RenderFlag_WBuffer, + kRenderVS_W, kRenderFS_WSM)) return false; + + + if (!OpenGL_BuildShaderProgram(kFinalPassVS, kFinalPassFS, FinalPassShader, "FinalPassShader")) + return false; + + uni_id = glGetUniformBlockIndex(FinalPassShader[2], "uConfig"); + glUniformBlockBinding(FinalPassShader[2], uni_id, 0); + + glBindAttribLocation(FinalPassShader[2], 0, "vPosition"); + glBindFragDataLocation(FinalPassShader[2], 0, "oColor"); + + glUseProgram(FinalPassShader[2]); + + uni_id = glGetUniformLocation(FinalPassShader[2], "DepthBuffer"); + glUniform1i(uni_id, 0); + uni_id = glGetUniformLocation(FinalPassShader[2], "AttrBuffer"); + glUniform1i(uni_id, 1); + + + memset(&ShaderConfig, 0, sizeof(ShaderConfig)); + + glGenBuffers(1, &ShaderConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(ShaderConfig), &ShaderConfig, GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ShaderConfigUBO); + + + float clearvtx[6*2] = + { + -1.0, -1.0, + 1.0, 1.0, + -1.0, 1.0, + + -1.0, -1.0, + 1.0, -1.0, + 1.0, 1.0 + }; + + glGenBuffers(1, &ClearVertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); + glBufferData(GL_ARRAY_BUFFER, sizeof(clearvtx), clearvtx, GL_STATIC_DRAW); + + glGenVertexArrays(1, &ClearVertexArrayID); + glBindVertexArray(ClearVertexArrayID); + glEnableVertexAttribArray(0); // position + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (void*)(0)); + + + glGenBuffers(1, &VertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); + glBufferData(GL_ARRAY_BUFFER, sizeof(VertexBuffer), NULL, GL_DYNAMIC_DRAW); + + glGenVertexArrays(1, &VertexArrayID); + glBindVertexArray(VertexArrayID); + glEnableVertexAttribArray(0); // position + glVertexAttribIPointer(0, 4, GL_UNSIGNED_SHORT, 7*4, (void*)(0)); + glEnableVertexAttribArray(1); // color + glVertexAttribIPointer(1, 4, GL_UNSIGNED_BYTE, 7*4, (void*)(2*4)); + glEnableVertexAttribArray(2); // texcoords + glVertexAttribIPointer(2, 2, GL_SHORT, 7*4, (void*)(3*4)); + glEnableVertexAttribArray(3); // attrib + glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4)); + + + glGenFramebuffers(4, &FramebufferID[0]); + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); + + glGenTextures(8, &FramebufferTex[0]); + FrontBuffer = 0; + + // color buffers + SetupDefaultTexParams(FramebufferTex[0]); + SetupDefaultTexParams(FramebufferTex[1]); + + // depth/stencil buffer + SetupDefaultTexParams(FramebufferTex[4]); + SetupDefaultTexParams(FramebufferTex[6]); + + // attribute buffer + // R: opaque polyID (for edgemarking) + // G: edge flag + // B: fog flag + SetupDefaultTexParams(FramebufferTex[5]); + SetupDefaultTexParams(FramebufferTex[7]); + + // downscale framebuffer for antialiased mode + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[2]); + SetupDefaultTexParams(FramebufferTex[2]); + + // downscale framebuffer for display capture (always 256x192) + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[3]); + SetupDefaultTexParams(FramebufferTex[3]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 192, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[3], 0); + + GLenum fbassign[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1}; + + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[0], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[4], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[5], 0); + glDrawBuffers(2, fbassign); + + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[1]); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[1], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[4], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[5], 0); + glDrawBuffers(2, fbassign); + + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[2]); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FramebufferTex[2], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, FramebufferTex[6], 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FramebufferTex[7], 0); + glDrawBuffers(2, fbassign); + + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]); + + glEnable(GL_BLEND); + glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); + + glGenBuffers(1, &PixelbufferID); + + glActiveTexture(GL_TEXTURE0); + glGenTextures(1, &TexMemID); + glBindTexture(GL_TEXTURE_2D, TexMemID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, 1024, 512, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, NULL); + + glActiveTexture(GL_TEXTURE1); + glGenTextures(1, &TexPalMemID); + glBindTexture(GL_TEXTURE_2D, TexPalMemID); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 1024, 48, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, NULL); + + return true; +} + +void DeInit() +{ + glDeleteTextures(1, &TexMemID); + glDeleteTextures(1, &TexPalMemID); + + glDeleteFramebuffers(4, &FramebufferID[0]); + glDeleteTextures(8, &FramebufferTex[0]); + + glDeleteVertexArrays(1, &VertexArrayID); + glDeleteBuffers(1, &VertexBufferID); + glDeleteVertexArrays(1, &ClearVertexArrayID); + glDeleteBuffers(1, &ClearVertexBufferID); + + glDeleteBuffers(1, &ShaderConfigUBO); + + for (int i = 0; i < 16; i++) + { + if (!RenderShader[i][2]) continue; + OpenGL_DeleteShaderProgram(RenderShader[i]); + } +} + +void Reset() +{ + UpdateDisplaySettings(); +} + +void UpdateDisplaySettings() +{ + int scale = Config::GL_ScaleFactor; + bool antialias = false; //Config::GL_Antialias; + + if (antialias) scale *= 2; + + ScaleFactor = scale; + Antialias = antialias; + + ScreenW = 256 * scale; + ScreenH = 192 * scale; + + if (!antialias) + { + glBindTexture(GL_TEXTURE_2D, FramebufferTex[0]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[1]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[2]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[4]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[5]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8UI, ScreenW, ScreenH, 0, GL_RGB_INTEGER, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[6]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, 1, 1, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[7]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8UI, 1, 1, 0, GL_RGB_INTEGER, GL_UNSIGNED_BYTE, NULL); + } + else + { + glBindTexture(GL_TEXTURE_2D, FramebufferTex[0]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW/2, ScreenH/2, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[1]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW/2, ScreenH/2, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[2]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[4]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW/2, ScreenH/2, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[5]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8UI, ScreenW/2, ScreenH/2, 0, GL_RGB_INTEGER, GL_UNSIGNED_BYTE, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[6]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, ScreenW, ScreenH, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[7]); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB8UI, ScreenW, ScreenH, 0, GL_RGB_INTEGER, GL_UNSIGNED_BYTE, NULL); + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID); + glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ); + + //glLineWidth(scale); +} + + +void SetupPolygon(RendererPolygon* rp, Polygon* polygon) +{ + rp->PolyData = polygon; + + // render key: depending on what we're drawing + // opaque polygons: + // - depthfunc + // -- alpha=0 + // regular translucent polygons: + // - depthfunc + // -- depthwrite + // --- polyID + // shadow mask polygons: + // - depthfunc????? + // shadow polygons: + // - depthfunc + // -- depthwrite + // --- polyID + + rp->RenderKey = (polygon->Attr >> 14) & 0x1; // bit14 - depth func + if (!polygon->IsShadowMask) + { + if (polygon->Translucent) + { + if (polygon->IsShadow) rp->RenderKey |= 0x20000; + else rp->RenderKey |= 0x10000; + rp->RenderKey |= (polygon->Attr >> 10) & 0x2; // bit11 - depth write + rp->RenderKey |= (polygon->Attr >> 13) & 0x4; // bit15 - fog + rp->RenderKey |= (polygon->Attr & 0x3F000000) >> 16; // polygon ID + } + else + { + if ((polygon->Attr & 0x001F0000) == 0) + rp->RenderKey |= 0x2; + rp->RenderKey |= (polygon->Attr & 0x3F000000) >> 16; // polygon ID + } + } + else + { + rp->RenderKey |= 0x30000; + } +} + +void BuildPolygons(RendererPolygon* polygons, int npolys) +{ + u32* vptr = &VertexBuffer[0]; + u32 vidx = 0; + + u16* iptr = &IndexBuffer[0]; + u16* eiptr = &IndexBuffer[2048*30]; + u32 numtriangles = 0; + + for (int i = 0; i < npolys; i++) + { + RendererPolygon* rp = &polygons[i]; + Polygon* poly = rp->PolyData; + + rp->Indices = iptr; + rp->NumIndices = 0; + + u32 vidx_first = vidx; + + u32 polyattr = poly->Attr; + + u32 alpha = (polyattr >> 16) & 0x1F; + + u32 vtxattr = polyattr & 0x1F00C8F0; + if (poly->FacingView) vtxattr |= (1<<8); + if (poly->WBuffer) vtxattr |= (1<<9); + + // assemble vertices + for (int j = 0; j < poly->NumVertices; j++) + { + Vertex* vtx = poly->Vertices[j]; + + u32 z = poly->FinalZ[j]; + u32 w = poly->FinalW[j]; + + // Z should always fit within 16 bits, so it's okay to do this + u32 zshift = 0; + while (z > 0xFFFF) { z >>= 1; zshift++; } + + u32 x, y; + if (ScaleFactor > 1) + { + x = (vtx->HiresPosition[0] * ScaleFactor) >> 4; + y = (vtx->HiresPosition[1] * ScaleFactor) >> 4; + } + else + { + x = vtx->FinalPosition[0]; + y = vtx->FinalPosition[1]; + } + + *vptr++ = x | (y << 16); + *vptr++ = z | (w << 16); + + *vptr++ = (vtx->FinalColor[0] >> 1) | + ((vtx->FinalColor[1] >> 1) << 8) | + ((vtx->FinalColor[2] >> 1) << 16) | + (alpha << 24); + + *vptr++ = (u16)vtx->TexCoords[0] | ((u16)vtx->TexCoords[1] << 16); + + *vptr++ = vtxattr | (zshift << 16); + *vptr++ = poly->TexParam; + *vptr++ = poly->TexPalette; + + if (j >= 2) + { + // build a triangle + *iptr++ = vidx_first; + *iptr++ = vidx - 1; + *iptr++ = vidx; + numtriangles++; + rp->NumIndices += 3; + } + + vidx++; + } + + rp->EdgeIndices = eiptr; + rp->NumEdgeIndices = 0; + + for (int j = 1; j < poly->NumVertices; j++) + { + *eiptr++ = vidx_first; + *eiptr++ = vidx_first + 1; + vidx_first++; + rp->NumEdgeIndices += 2; + } + } + + NumTriangles = numtriangles; + NumVertices = vidx; +} + +void RenderSinglePolygon(int i) +{ + RendererPolygon* rp = &PolygonList[i]; + + glDrawElements(GL_TRIANGLES, rp->NumIndices, GL_UNSIGNED_SHORT, rp->Indices); +} + +int RenderPolygonBatch(int i) +{ + RendererPolygon* rp = &PolygonList[i]; + u32 key = rp->RenderKey; + int numpolys = 0; + u32 numindices = 0; + + for (int iend = i; iend < NumFinalPolys; iend++) + { + RendererPolygon* cur_rp = &PolygonList[iend]; + if (cur_rp->RenderKey != key) break; + + numpolys++; + numindices += cur_rp->NumIndices; + } + + glDrawElements(GL_TRIANGLES, numindices, GL_UNSIGNED_SHORT, rp->Indices); + return numpolys; +} + +int RenderPolygonEdges() +{ + RendererPolygon* rp = &PolygonList[0]; + int numpolys = 0; + u32 numindices = 0; + + for (int iend = 0; iend < NumOpaqueFinalPolys; iend++) + { + RendererPolygon* cur_rp = &PolygonList[iend]; + + numpolys++; + numindices += cur_rp->NumEdgeIndices; + } + + glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, rp->EdgeIndices); + return numpolys; +} + +void RenderSceneChunk(int y, int h) +{ + u32 flags = 0; + if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + + if (h != 192) glScissor(0, y<<ScaleFactor, 256<<ScaleFactor, h<<ScaleFactor); + + GLboolean fogenable = (RenderDispCnt & (1<<7)) ? GL_TRUE : GL_FALSE; + + // pass 1: opaque pixels + + UseRenderShader(flags); + + glColorMaski(1, GL_TRUE, GL_FALSE, fogenable, GL_FALSE); + + glDepthFunc(GL_LESS); + glDepthMask(GL_TRUE); + + glBindVertexArray(VertexArrayID); + + for (int i = 0; i < NumFinalPolys; ) + { + RendererPolygon* rp = &PolygonList[i]; + + if (rp->PolyData->IsShadowMask) { i++; continue; } + + // zorp + glDepthFunc(GL_LESS); + + u32 polyattr = rp->PolyData->Attr; + u32 polyid = (polyattr >> 24) & 0x3F; + + glStencilFunc(GL_ALWAYS, polyid, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + glStencilMask(0xFF); + + i += RenderPolygonBatch(i); + } + + glEnable(GL_BLEND); + if (RenderDispCnt & (1<<3)) + glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE); + else + glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ONE); + + UseRenderShader(flags | RenderFlag_Trans); + + if (NumOpaqueFinalPolys > -1) + { + // pass 2: if needed, render translucent pixels that are against background pixels + // when background alpha is zero, those need to be rendered with blending disabled + + if ((RenderClearAttr1 & 0x001F0000) == 0) + { + glDisable(GL_BLEND); + + for (int i = 0; i < NumFinalPolys; ) + { + RendererPolygon* rp = &PolygonList[i]; + + if (rp->PolyData->IsShadowMask) + { + // draw actual shadow mask + + UseRenderShader(flags | RenderFlag_ShadowMask); + + glDisable(GL_BLEND); + glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glDepthMask(GL_FALSE); + + glDepthFunc(GL_LESS); + glStencilFunc(GL_EQUAL, 0xFF, 0xFF); + glStencilOp(GL_KEEP, GL_INVERT, GL_KEEP); + glStencilMask(0x01); + + i += RenderPolygonBatch(i); + } + else if (rp->PolyData->Translucent) + { + // zorp + glDepthFunc(GL_LESS); + + u32 polyattr = rp->PolyData->Attr; + u32 polyid = (polyattr >> 24) & 0x3F; + + GLboolean transfog; + if (!(polyattr & (1<<15))) transfog = fogenable; + else transfog = GL_FALSE; + + if (rp->PolyData->IsShadow) + { + // shadow against clear-plane will only pass if its polyID matches that of the clear plane + u32 clrpolyid = (RenderClearAttr1 >> 24) & 0x3F; + if (polyid != clrpolyid) { i++; continue; } + + glEnable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + + glStencilFunc(GL_EQUAL, 0xFE, 0xFF); + glStencilOp(GL_KEEP, GL_KEEP, GL_INVERT); + glStencilMask(~(0x40|polyid)); // heheh + + if (polyattr & (1<<11)) glDepthMask(GL_TRUE); + else glDepthMask(GL_FALSE); + + i += RenderPolygonBatch(i); + } + else + { + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + + glStencilFunc(GL_EQUAL, 0xFF, 0xFE); + glStencilOp(GL_KEEP, GL_KEEP, GL_INVERT); + glStencilMask(~(0x40|polyid)); // heheh + + if (polyattr & (1<<11)) glDepthMask(GL_TRUE); + else glDepthMask(GL_FALSE); + + i += RenderPolygonBatch(i); + } + } + else + i++; + } + + glEnable(GL_BLEND); + glStencilMask(0xFF); + } + + // pass 3: translucent pixels + + for (int i = 0; i < NumFinalPolys; ) + { + RendererPolygon* rp = &PolygonList[i]; + + if (rp->PolyData->IsShadowMask) + { + // clear shadow bits in stencil buffer + + glStencilMask(0x80); + glClear(GL_STENCIL_BUFFER_BIT); + + // draw actual shadow mask + + UseRenderShader(flags | RenderFlag_ShadowMask); + + glDisable(GL_BLEND); + glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glDepthMask(GL_FALSE); + + glDepthFunc(GL_LESS); + glStencilFunc(GL_ALWAYS, 0x80, 0x80); + glStencilOp(GL_KEEP, GL_REPLACE, GL_KEEP); + + i += RenderPolygonBatch(i); + } + else if (rp->PolyData->Translucent) + { + UseRenderShader(flags | RenderFlag_Trans); + + u32 polyattr = rp->PolyData->Attr; + u32 polyid = (polyattr >> 24) & 0x3F; + + GLboolean transfog; + if (!(polyattr & (1<<15))) transfog = fogenable; + else transfog = GL_FALSE; + + // zorp + glDepthFunc(GL_LESS); + + if (rp->PolyData->IsShadow) + { + glDisable(GL_BLEND); + glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glDepthMask(GL_FALSE); + glStencilFunc(GL_EQUAL, polyid, 0x3F); + glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO); + glStencilMask(0x80); + + RenderSinglePolygon(i); + + glEnable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + + glStencilFunc(GL_EQUAL, 0xC0|polyid, 0x80); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + glStencilMask(0x7F); + + if (polyattr & (1<<11)) glDepthMask(GL_TRUE); + else glDepthMask(GL_FALSE); + + RenderSinglePolygon(i); + i++; + } + else + { + glEnable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + + glStencilFunc(GL_NOTEQUAL, 0x40|polyid, 0x7F); + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + glStencilMask(0x7F); + + if (polyattr & (1<<11)) glDepthMask(GL_TRUE); + else glDepthMask(GL_FALSE); + + i += RenderPolygonBatch(i); + } + } + else + i++; + } + } + + glFlush(); + + if (RenderDispCnt & 0x00A0) // fog/edge enabled + { + glUseProgram(FinalPassShader[2]); + + glEnable(GL_BLEND); + if (RenderDispCnt & (1<<6)) + glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); + else + glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); + + { + u32 c = RenderFogColor; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + u32 a = (c >> 16) & 0x1F; + + glBlendColor((float)b/31.0, (float)g/31.0, (float)r/31.0, (float)a/31.0); + } + + glDepthFunc(GL_ALWAYS); + glDepthMask(GL_FALSE); + glStencilFunc(GL_ALWAYS, 0, 0); + glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); + glStencilMask(0); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[4]); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, FramebufferTex[5]); + + glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); + glBindVertexArray(ClearVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 2*3); + + glFlush(); + } +} + + +void RenderFrame() +{ + CurShaderID = -1; + + ShaderConfig.uScreenSize[0] = ScreenW; + ShaderConfig.uScreenSize[1] = ScreenH; + ShaderConfig.uDispCnt = RenderDispCnt; + + for (int i = 0; i < 32; i++) + { + u16 c = RenderToonTable[i]; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + + ShaderConfig.uToonColors[i][0] = (float)r / 31.0; + ShaderConfig.uToonColors[i][1] = (float)g / 31.0; + ShaderConfig.uToonColors[i][2] = (float)b / 31.0; + } + + for (int i = 0; i < 8; i++) + { + u16 c = RenderEdgeTable[i]; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + + ShaderConfig.uEdgeColors[i][0] = (float)r / 31.0; + ShaderConfig.uEdgeColors[i][1] = (float)g / 31.0; + ShaderConfig.uEdgeColors[i][2] = (float)b / 31.0; + } + + { + u32 c = RenderFogColor; + u32 r = c & 0x1F; + u32 g = (c >> 5) & 0x1F; + u32 b = (c >> 10) & 0x1F; + u32 a = (c >> 16) & 0x1F; + + ShaderConfig.uFogColor[0] = (float)r / 31.0; + ShaderConfig.uFogColor[1] = (float)g / 31.0; + ShaderConfig.uFogColor[2] = (float)b / 31.0; + ShaderConfig.uFogColor[3] = (float)a / 31.0; + } + + for (int i = 0; i < 34; i++) + { + u8 d = RenderFogDensityTable[i]; + ShaderConfig.uFogDensity[i][0] = (float)d / 127.0; + } + + ShaderConfig.uFogOffset = RenderFogOffset; + ShaderConfig.uFogShift = RenderFogShift; + + glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); + void* unibuf = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); + if (unibuf) memcpy(unibuf, &ShaderConfig, sizeof(ShaderConfig)); + glUnmapBuffer(GL_UNIFORM_BUFFER); + + // SUCKY!!!!!!!!!!!!!!!!!! + // TODO: detect when VRAM blocks are modified! + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, TexMemID); + for (int i = 0; i < 4; i++) + { + u32 mask = GPU::VRAMMap_Texture[i]; + u8* vram; + if (!mask) continue; + else if (mask & (1<<0)) vram = GPU::VRAM_A; + else if (mask & (1<<1)) vram = GPU::VRAM_B; + else if (mask & (1<<2)) vram = GPU::VRAM_C; + else if (mask & (1<<3)) vram = GPU::VRAM_D; + + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*128, 1024, 128, GL_RED_INTEGER, GL_UNSIGNED_BYTE, vram); + } + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, TexPalMemID); + for (int i = 0; i < 6; i++) + { + // 6 x 16K chunks + u32 mask = GPU::VRAMMap_TexPal[i]; + u8* vram; + if (!mask) continue; + else if (mask & (1<<4)) vram = &GPU::VRAM_E[(i&3)*0x4000]; + else if (mask & (1<<5)) vram = GPU::VRAM_F; + else if (mask & (1<<6)) vram = GPU::VRAM_G; + + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*8, 1024, 8, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, vram); + } + + glDisable(GL_SCISSOR_TEST); + glEnable(GL_DEPTH_TEST); + glEnable(GL_STENCIL_TEST); + + glViewport(0, 0, ScreenW, ScreenH); + + if (Antialias) glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[2]); + else glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[FrontBuffer]); + + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_TRUE); + glStencilMask(0xFF); + + // clear buffers + // TODO: clear bitmap + // TODO: check whether 'clear polygon ID' affects translucent polyID + // (for example when alpha is 1..30) + { + glUseProgram(ClearShaderPlain[2]); + glDepthFunc(GL_ALWAYS); + + u32 r = RenderClearAttr1 & 0x1F; + u32 g = (RenderClearAttr1 >> 5) & 0x1F; + u32 b = (RenderClearAttr1 >> 10) & 0x1F; + u32 fog = (RenderClearAttr1 >> 15) & 0x1; + u32 a = (RenderClearAttr1 >> 16) & 0x1F; + u32 polyid = (RenderClearAttr1 >> 24) & 0x3F; + u32 z = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + + glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); + glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + + /*if (r) r = r*2 + 1; + if (g) g = g*2 + 1; + if (b) b = b*2 + 1;*/ + + glUniform4ui(ClearUniformLoc[0], r, g, b, a); + glUniform1ui(ClearUniformLoc[1], z); + glUniform1ui(ClearUniformLoc[2], polyid); + glUniform1ui(ClearUniformLoc[3], fog); + + glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); + glBindVertexArray(ClearVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 2*3); + } + + if (RenderNumPolygons) + { + // render shit here + u32 flags = 0; + if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + + int npolys = 0; + int firsttrans = -1; + for (int i = 0; i < RenderNumPolygons; i++) + { + if (RenderPolygonRAM[i]->Degenerate) continue; + + // zog. + //if (RenderPolygonRAM[i]->YBottom <= 96 || RenderPolygonRAM[i]->YTop >= 144) continue; + + SetupPolygon(&PolygonList[npolys], RenderPolygonRAM[i]); + if (firsttrans < 0 && RenderPolygonRAM[i]->Translucent) + firsttrans = npolys; + + npolys++; + } + NumFinalPolys = npolys; + NumOpaqueFinalPolys = firsttrans; + + BuildPolygons(&PolygonList[0], npolys); + glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); + glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer); + + RenderSceneChunk(0, 192); + } + + if (Antialias) + { + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[2]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[FrontBuffer]); + glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, ScreenW/2, ScreenH/2, GL_COLOR_BUFFER_BIT, GL_LINEAR); + } + + glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[FrontBuffer]); + FrontBuffer = FrontBuffer ? 0 : 1; +} + +void PrepareCaptureFrame() +{ + // TODO: make sure this picks the right buffer when doing antialiasing + int original_fb = FrontBuffer^1; + + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[original_fb]); + glReadBuffer(GL_COLOR_ATTACHMENT0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[3]); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, 256, 192, GL_COLOR_BUFFER_BIT, GL_NEAREST); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[3]); + glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL); +} + +u32* GetLine(int line) +{ + int stride = 256; + + if (line == 0) + { + u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); + if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*192); + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + } + + u64* ptr = (u64*)&Framebuffer[stride * line]; + for (int i = 0; i < stride; i+=2) + { + u64 rgb = *ptr & 0x00FCFCFC00FCFCFC; + u64 a = *ptr & 0xF8000000F8000000; + + *ptr++ = (rgb >> 2) | (a >> 3); + } + + return &Framebuffer[stride * line]; +} + +void SetupAccelFrame() +{ + glBindTexture(GL_TEXTURE_2D, FramebufferTex[FrontBuffer]); +} + +} +} diff --git a/src/GPU3D_OpenGL_shaders.h b/src/GPU3D_OpenGL_shaders.h new file mode 100644 index 0000000..a1aa95a --- /dev/null +++ b/src/GPU3D_OpenGL_shaders.h @@ -0,0 +1,690 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU3D_OPENGL_SHADERS_H +#define GPU3D_OPENGL_SHADERS_H + +#define kShaderHeader "#version 140" + + +const char* kClearVS = kShaderHeader R"( + +in vec2 vPosition; + +uniform uint uDepth; + +void main() +{ + float fdepth = (float(uDepth) / 8388608.0) - 1.0; + gl_Position = vec4(vPosition, fdepth, 1.0); +} +)"; + +const char* kClearFS = kShaderHeader R"( + +uniform uvec4 uColor; +uniform uint uOpaquePolyID; +uniform uint uFogFlag; + +out vec4 oColor; +out uvec3 oAttr; + +void main() +{ + oColor = vec4(uColor).bgra / 31.0; + oAttr.r = uOpaquePolyID; + oAttr.g = uint(0); + oAttr.b = uFogFlag; +} +)"; + + + +const char* kFinalPassVS = kShaderHeader R"( + +in vec2 vPosition; + +void main() +{ + // heh + gl_Position = vec4(vPosition, 0.0, 1.0); +} +)"; + +const char* kFinalPassFS = kShaderHeader R"( + +uniform sampler2D DepthBuffer; +uniform usampler2D AttrBuffer; + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +out vec4 oColor; + +vec4 CalculateFog(float depth) +{ + int idepth = int(depth * 16777216.0); + int densityid, densityfrac; + + if (idepth < uFogOffset) + { + densityid = 0; + densityfrac = 0; + } + else + { + uint udepth = uint(idepth); + udepth -= uint(uFogOffset); + udepth = (udepth >> 2) << uint(uFogShift); + + densityid = int(udepth >> 17); + if (densityid >= 32) + { + densityid = 32; + densityfrac = 0; + } + else + densityfrac = int(udepth & uint(0x1FFFF)); + } + + float density = mix(uFogDensity[densityid], uFogDensity[densityid+1], float(densityfrac)/131072.0); + + return vec4(density, density, density, density); +} + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + + vec4 ret = vec4(0,0,0,0); + vec4 depth = texelFetch(DepthBuffer, coord, 0); + ivec4 attr = ivec4(texelFetch(AttrBuffer, coord, 0)); + + if (attr.b != 0) ret = CalculateFog(depth.r); + + oColor = ret; +} +)"; + + + +const char* kRenderVSCommon = R"( + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +in uvec4 vPosition; +in uvec4 vColor; +in ivec2 vTexcoord; +in ivec3 vPolygonAttr; + +smooth out vec4 fColor; +smooth out vec2 fTexcoord; +flat out ivec3 fPolygonAttr; +)"; + +const char* kRenderFSCommon = R"( + +uniform usampler2D TexMem; +uniform sampler2D TexPalMem; + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +smooth in vec4 fColor; +smooth in vec2 fTexcoord; +flat in ivec3 fPolygonAttr; + +out vec4 oColor; +out uvec3 oAttr; + +int TexcoordWrap(int c, int maxc, int mode) +{ + if ((mode & (1<<0)) != 0) + { + if ((mode & (1<<2)) != 0 && (c & maxc) != 0) + return (maxc-1) - (c & (maxc-1)); + else + return (c & (maxc-1)); + } + else + return clamp(c, 0, maxc-1); +} + +vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, int wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + + pixel.a = (pixel.r & 0xE0); + pixel.a = (pixel.a >> 3) + (pixel.a >> 6); + pixel.r &= 0x1F; + + addr.y = (addr.y << 3) + pixel.r; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, float(pixel.a)/31.0); +} + +vec4 TextureFetch_I2(ivec2 addr, ivec4 st, int wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) >> 2; + ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + pixel.r >>= (2 * (st.x & 3)); + pixel.r &= 0x03; + + addr.y = (addr.y << 2) + pixel.r; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, (pixel.r>0)?1:alpha0); +} + +vec4 TextureFetch_I4(ivec2 addr, ivec4 st, int wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) >> 1; + ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + if ((st.x & 1) != 0) pixel.r >>= 4; + else pixel.r &= 0x0F; + + addr.y = (addr.y << 3) + pixel.r; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, (pixel.r>0)?1:alpha0); +} + +vec4 TextureFetch_I8(ivec2 addr, ivec4 st, int wrapmode, float alpha0) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + + addr.y = (addr.y << 3) + pixel.r; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, (pixel.r>0)?1:alpha0); +} + +vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, int wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3); + ivec4 p = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + int val = (p.r >> (2 * (st.x & 0x3))) & 0x3; + + int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1); + if (addr.x >= 0x40000) slot1addr += 0x10000; + + int palinfo; + p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); + palinfo = p.r; + slot1addr++; + p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); + palinfo |= (p.r << 8); + + addr.y = (addr.y << 3) + ((palinfo & 0x3FFF) << 1); + palinfo >>= 14; + + if (val == 0) + { + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (val == 1) + { + addr.y++; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (val == 2) + { + if (palinfo == 1) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb + color1.rgb) / 2.0, 1.0); + } + else if (palinfo == 3) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0); + } + else + { + addr.y += 2; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + } + else + { + if (palinfo == 2) + { + addr.y += 3; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4(color.rgb, 1.0); + } + else if (palinfo == 3) + { + vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + addr.y++; + vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0); + } + else + { + return vec4(0.0); + } + } +} + +vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, int wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x); + ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + + pixel.a = (pixel.r & 0xF8) >> 3; + pixel.r &= 0x07; + + addr.y = (addr.y << 3) + pixel.r; + vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); + + return vec4(color.rgb, float(pixel.a)/31.0); +} + +vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, int wrapmode) +{ + st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); + st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); + + addr.x += ((st.y * st.z) + st.x) << 1; + ivec4 pixelL = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + addr.x++; + ivec4 pixelH = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); + + vec4 color; + color.r = float(pixelL.r & 0x1F) / 31.0; + color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0; + color.b = float((pixelH.r & 0x7C) >> 2) / 31.0; + color.a = float(pixelH.r >> 7); + + return color; +} + +vec4 TextureLookup_Nearest(vec2 st) +{ + int attr = int(fPolygonAttr.y); + int paladdr = int(fPolygonAttr.z); + + float alpha0; + if ((attr & (1<<29)) != 0) alpha0 = 0.0; + else alpha0 = 1.0; + + int tw = 8 << ((attr >> 20) & 0x7); + int th = 8 << ((attr >> 23) & 0x7); + ivec4 st_full = ivec4(ivec2(st), tw, th); + + ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); + int wrapmode = (attr >> 16); + + int type = (attr >> 26) & 0x7; + if (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode); + else if (type == 2) return TextureFetch_I2 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 3) return TextureFetch_I4 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 4) return TextureFetch_I8 (vramaddr, st_full, wrapmode, alpha0); + else if (type == 1) return TextureFetch_A3I5 (vramaddr, st_full, wrapmode); + else if (type == 6) return TextureFetch_A5I3 (vramaddr, st_full, wrapmode); + else return TextureFetch_Direct (vramaddr, st_full, wrapmode); +} + +vec4 TextureLookup_Linear(vec2 texcoord) +{ + ivec2 intpart = ivec2(texcoord); + vec2 fracpart = fract(texcoord); + + int attr = int(fPolygonAttr.y); + int paladdr = int(fPolygonAttr.z); + + float alpha0; + if ((attr & (1<<29)) != 0) alpha0 = 0.0; + else alpha0 = 1.0; + + int tw = 8 << ((attr >> 20) & 0x7); + int th = 8 << ((attr >> 23) & 0x7); + ivec4 st_full = ivec4(intpart, tw, th); + + ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); + int wrapmode = (attr >> 16); + + vec4 A, B, C, D; + int type = (attr >> 26) & 0x7; + if (type == 5) + { + A = TextureFetch_Compressed(vramaddr, st_full , wrapmode); + B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else if (type == 2) + { + A = TextureFetch_I2(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 3) + { + A = TextureFetch_I4(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 4) + { + A = TextureFetch_I8(vramaddr, st_full , wrapmode, alpha0); + B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); + C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); + D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); + } + else if (type == 1) + { + A = TextureFetch_A3I5(vramaddr, st_full , wrapmode); + B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else if (type == 6) + { + A = TextureFetch_A5I3(vramaddr, st_full , wrapmode); + B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + else + { + A = TextureFetch_Direct(vramaddr, st_full , wrapmode); + B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); + C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); + D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); + } + + float fx = fracpart.x; + vec4 AB; + if (A.a < (0.5/31.0) && B.a < (0.5/31.0)) + AB = vec4(0); + else + { + //if (A.a < (0.5/31.0) || B.a < (0.5/31.0)) + // fx = step(0.5, fx); + + AB = mix(A, B, fx); + } + + fx = fracpart.x; + vec4 CD; + if (C.a < (0.5/31.0) && D.a < (0.5/31.0)) + CD = vec4(0); + else + { + //if (C.a < (0.5/31.0) || D.a < (0.5/31.0)) + // fx = step(0.5, fx); + + CD = mix(C, D, fx); + } + + fx = fracpart.y; + vec4 ret; + if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0)) + ret = vec4(0); + else + { + //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0)) + // fx = step(0.5, fx); + + ret = mix(AB, CD, fx); + } + + return ret; +} + +vec4 FinalColor() +{ + vec4 col; + vec4 vcol = fColor; + int blendmode = (fPolygonAttr.x >> 4) & 0x3; + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) == 0) + { + // toon + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + vcol.rgb = tooncolor; + } + else + { + // highlight + vcol.rgb = vcol.rrr; + } + } + + if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0)) + { + // no texture + col = vcol; + } + else + { + vec4 tcol = TextureLookup_Nearest(fTexcoord); + //vec4 tcol = TextureLookup_Linear(fTexcoord); + + if ((blendmode & 1) != 0) + { + // decal + col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); + col.a = vcol.a; + } + else + { + // modulate + col = vcol * tcol; + } + } + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) != 0) + { + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + col.rgb = min(col.rgb + tooncolor, 1.0); + } + } + + return col.bgra; +} +)"; + + +const char* kRenderVS_Z = R"( + +void main() +{ + int attr = vPolygonAttr.x; + int zshift = (attr >> 16) & 0x1F; + + vec4 fpos; + fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; + fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; + fpos.w = float(vPosition.w) / 65536.0f; + fpos.xyz *= fpos.w; + + fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); + fTexcoord = vec2(vTexcoord) / 16.0; + fPolygonAttr = vPolygonAttr; + + gl_Position = fpos; +} +)"; + +const char* kRenderVS_W = R"( + +smooth out float fZ; + +void main() +{ + int attr = vPolygonAttr.x; + int zshift = (attr >> 16) & 0x1F; + + vec4 fpos; + fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; + fZ = float(vPosition.z << zshift) / 16777216.0; + fpos.w = float(vPosition.w) / 65536.0f; + fpos.xy *= fpos.w; + + fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); + fTexcoord = vec2(vTexcoord) / 16.0; + fPolygonAttr = vPolygonAttr; + + gl_Position = fpos; +} +)"; + + +const char* kRenderFS_ZO = R"( + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 30.5/31) discard; + + oColor = col; + oAttr.r = uint((fPolygonAttr.x >> 24) & 0x3F); + oAttr.b = uint((fPolygonAttr.x >> 15) & 0x1); +} +)"; + +const char* kRenderFS_WO = R"( + +smooth in float fZ; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 30.5/31) discard; + + oColor = col; + oAttr.r = uint((fPolygonAttr.x >> 24) & 0x3F); + oAttr.b = uint((fPolygonAttr.x >> 15) & 0x1); + gl_FragDepth = fZ; +} +)"; + +const char* kRenderFS_ZT = R"( + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + oColor = col; + oAttr.b = uint(0); +} +)"; + +const char* kRenderFS_WT = R"( + +smooth in float fZ; + +void main() +{ + vec4 col = FinalColor(); + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + oColor = col; + oAttr.b = uint(0); + gl_FragDepth = fZ; +} +)"; + +const char* kRenderFS_ZSM = R"( + +void main() +{ + oColor = vec4(0,0,0,1); +} +)"; + +const char* kRenderFS_WSM = R"( + +smooth in float fZ; + +void main() +{ + oColor = vec4(0,0,0,1); + gl_FragDepth = fZ; +} +)"; + +#endif // GPU3D_OPENGL_SHADERS_H diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 29b46e1..932b5d4 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -129,9 +129,9 @@ void DeInit() void Reset() { - memset(ColorBuffer, 0, 256*192 * 4); - memset(DepthBuffer, 0, 256*192 * 4); - memset(AttrBuffer, 0, 256*192 * 4); + memset(ColorBuffer, 0, BufferSize * 2 * 4); + memset(DepthBuffer, 0, BufferSize * 2 * 4); + memset(AttrBuffer, 0, BufferSize * 2 * 4); PrevIsShadowMask = false; @@ -2106,17 +2106,14 @@ void RenderThreadFunc() } } -void RequestLine(int line) +u32* GetLine(int line) { if (RenderThreadRunning) { if (line < 192) Platform::Semaphore_Wait(Sema_ScanlineCount); } -} -u32* GetLine(int line) -{ return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset]; } diff --git a/src/NDS.cpp b/src/NDS.cpp index ae71ad6..a814dfd 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -1532,7 +1532,7 @@ void debug(u32 param) // printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); FILE* - shit = fopen("debug/lmnts.bin", "wb"); + shit = fopen("debug/card.bin", "wb"); for (u32 i = 0x02000000; i < 0x02400000; i+=4) { u32 val = ARM7Read32(i); diff --git a/src/OpenGLSupport.cpp b/src/OpenGLSupport.cpp new file mode 100644 index 0000000..0204835 --- /dev/null +++ b/src/OpenGLSupport.cpp @@ -0,0 +1,119 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include "OpenGLSupport.h" + + +DO_PROCLIST(DECLPROC); + + +bool OpenGL_Init() +{ + DO_PROCLIST(LOADPROC); + + return true; +} + +bool OpenGL_BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name) +{ + int len; + int res; + + ids[0] = glCreateShader(GL_VERTEX_SHADER); + len = strlen(vs); + glShaderSource(ids[0], 1, &vs, &len); + glCompileShader(ids[0]); + + glGetShaderiv(ids[0], GL_COMPILE_STATUS, &res); + if (res != GL_TRUE) + { + glGetShaderiv(ids[0], GL_INFO_LOG_LENGTH, &res); + if (res < 1) res = 1024; + char* log = new char[res+1]; + glGetShaderInfoLog(ids[0], res+1, NULL, log); + printf("OpenGL: failed to compile vertex shader %s: %s\n", name, log); + printf("shader source:\n--\n%s\n--\n", vs); + delete[] log; + + glDeleteShader(ids[0]); + + return false; + } + + ids[1] = glCreateShader(GL_FRAGMENT_SHADER); + len = strlen(fs); + glShaderSource(ids[1], 1, &fs, &len); + glCompileShader(ids[1]); + + glGetShaderiv(ids[1], GL_COMPILE_STATUS, &res); + if (res != GL_TRUE) + { + glGetShaderiv(ids[1], GL_INFO_LOG_LENGTH, &res); + if (res < 1) res = 1024; + char* log = new char[res+1]; + glGetShaderInfoLog(ids[1], res+1, NULL, log); + printf("OpenGL: failed to compile fragment shader %s: %s\n", name, log); + //printf("shader source:\n--\n%s\n--\n", fs); + delete[] log; + + FILE* logf = fopen("shaderfail.log", "w"); + fwrite(fs, len+1, 1, logf); + fclose(logf); + + glDeleteShader(ids[0]); + glDeleteShader(ids[1]); + + return false; + } + + ids[2] = glCreateProgram(); + glAttachShader(ids[2], ids[0]); + glAttachShader(ids[2], ids[1]); + glLinkProgram(ids[2]); + + glGetProgramiv(ids[2], GL_LINK_STATUS, &res); + if (res != GL_TRUE) + { + glGetProgramiv(ids[2], GL_INFO_LOG_LENGTH, &res); + if (res < 1) res = 1024; + char* log = new char[res+1]; + glGetProgramInfoLog(ids[2], res+1, NULL, log); + printf("OpenGL: failed to link program %s: %s\n", name, log); + delete[] log; + + glDeleteShader(ids[0]); + glDeleteShader(ids[1]); + glDeleteProgram(ids[2]); + + return false; + } + + return true; +} + +void OpenGL_DeleteShaderProgram(GLuint* ids) +{ + glDeleteShader(ids[0]); + glDeleteShader(ids[1]); + glDeleteProgram(ids[2]); +} + +void OpenGL_UseShaderProgram(GLuint* ids) +{ + glUseProgram(ids[2]); +} diff --git a/src/OpenGLSupport.h b/src/OpenGLSupport.h new file mode 100644 index 0000000..1ae0a35 --- /dev/null +++ b/src/OpenGLSupport.h @@ -0,0 +1,123 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef OPENGLSUPPORT_H +#define OPENGLSUPPORT_H + +#include <stdio.h> +#include <GL/gl.h> +#include <GL/glext.h> + +#include "Platform.h" + + +// here, have some macro magic +// we at the melonDS company really love macro magic +// also, suggestion to the fine folks who write the OpenGL headers: +// pls make the type names follow the same capitalization as their +// matching function names, so this is more convenient to deal with + +#define DECLPROC(type, name) \ + PFN##type##PROC name ; + +#define DECLPROC_EXT(type, name) \ + extern PFN##type##PROC name ; + +#define LOADPROC(type, name) \ + name = (PFN##type##PROC)Platform::GL_GetProcAddress(#name); \ + if (!name) { printf("OpenGL: " #name " not found\n"); return false; } + + +// if you need more OpenGL functions, add them to the macronator here +// TODO: handle conditionally loading certain functions for different GL versions + +#define DO_PROCLIST(func) \ + func(GLGENFRAMEBUFFERS, glGenFramebuffers); \ + func(GLDELETEFRAMEBUFFERS, glDeleteFramebuffers); \ + func(GLBINDFRAMEBUFFER, glBindFramebuffer); \ + func(GLFRAMEBUFFERTEXTURE, glFramebufferTexture); \ + func(GLBLITFRAMEBUFFER, glBlitFramebuffer); \ + func(GLCHECKFRAMEBUFFERSTATUS, glCheckFramebufferStatus); \ + \ + func(GLGENBUFFERS, glGenBuffers); \ + func(GLDELETEBUFFERS, glDeleteBuffers); \ + func(GLBINDBUFFER, glBindBuffer); \ + func(GLMAPBUFFER, glMapBuffer); \ + func(GLMAPBUFFERRANGE, glMapBufferRange); \ + func(GLUNMAPBUFFER, glUnmapBuffer); \ + func(GLBUFFERDATA, glBufferData); \ + func(GLBUFFERSUBDATA, glBufferSubData); \ + func(GLBINDBUFFERBASE, glBindBufferBase); \ + \ + func(GLGENVERTEXARRAYS, glGenVertexArrays); \ + func(GLDELETEVERTEXARRAYS, glDeleteVertexArrays); \ + func(GLBINDVERTEXARRAY, glBindVertexArray); \ + func(GLENABLEVERTEXATTRIBARRAY, glEnableVertexAttribArray); \ + func(GLDISABLEVERTEXATTRIBARRAY, glDisableVertexAttribArray); \ + func(GLVERTEXATTRIBPOINTER, glVertexAttribPointer); \ + func(GLVERTEXATTRIBIPOINTER, glVertexAttribIPointer); \ + func(GLBINDATTRIBLOCATION, glBindAttribLocation); \ + func(GLBINDFRAGDATALOCATION, glBindFragDataLocation); \ + \ + func(GLCREATESHADER, glCreateShader); \ + func(GLSHADERSOURCE, glShaderSource); \ + func(GLCOMPILESHADER, glCompileShader); \ + func(GLCREATEPROGRAM, glCreateProgram); \ + func(GLATTACHSHADER, glAttachShader); \ + func(GLLINKPROGRAM, glLinkProgram); \ + func(GLUSEPROGRAM, glUseProgram); \ + func(GLGETSHADERIV, glGetShaderiv); \ + func(GLGETSHADERINFOLOG, glGetShaderInfoLog); \ + func(GLGETPROGRAMIV, glGetProgramiv); \ + func(GLGETPROGRAMINFOLOG, glGetProgramInfoLog); \ + func(GLDELETESHADER, glDeleteShader); \ + func(GLDELETEPROGRAM, glDeleteProgram); \ + \ + func(GLUNIFORM1I, glUniform1i); \ + func(GLUNIFORM1UI, glUniform1ui); \ + func(GLUNIFORM4UI, glUniform4ui); \ + func(GLUNIFORMBLOCKBINDING, glUniformBlockBinding); \ + func(GLGETUNIFORMLOCATION, glGetUniformLocation); \ + func(GLGETUNIFORMBLOCKINDEX, glGetUniformBlockIndex); \ + \ + func(GLACTIVETEXTURE, glActiveTexture); \ + func(GLBINDIMAGETEXTURE, glBindImageTexture); \ + \ + func(GLDRAWBUFFERS, glDrawBuffers); \ + \ + func(GLBLENDFUNCSEPARATE, glBlendFuncSeparate); \ + func(GLBLENDEQUATIONSEPARATE, glBlendEquationSeparate); \ + func(GLBLENDCOLOR, glBlendColor); \ + \ + func(GLCOLORMASKI, glColorMaski); \ + \ + func(GLMEMORYBARRIER, glMemoryBarrier); \ + \ + func(GLGETSTRINGI, glGetStringi); \ + + +DO_PROCLIST(DECLPROC_EXT); + + +bool OpenGL_Init(); + +bool OpenGL_BuildShaderProgram(const char* vs, const char* fs, GLuint* ids, const char* name); +void OpenGL_DeleteShaderProgram(GLuint* ids); +void OpenGL_UseShaderProgram(GLuint* ids); + +#endif // OPENGLSUPPORT_H diff --git a/src/Platform.h b/src/Platform.h index df3335b..ca6971e 100644 --- a/src/Platform.h +++ b/src/Platform.h @@ -68,6 +68,8 @@ void Semaphore_Reset(void* sema); void Semaphore_Wait(void* sema); void Semaphore_Post(void* sema); +void* GL_GetProcAddress(const char* proc); + // local multiplayer comm interface // packet type: DS-style TX header (12 bytes) + original 802.11 frame bool MP_Init(); diff --git a/src/SPU.cpp b/src/SPU.cpp index 93fdb00..ee9237f 100644 --- a/src/SPU.cpp +++ b/src/SPU.cpp @@ -23,7 +23,9 @@ // SPU TODO -// * loop mode 3, what does it do? +// * capture addition modes, overflow bugs +// * channel hold +// * 'length less than 4' glitch namespace SPU diff --git a/src/libui_sdl/DlgVideoSettings.cpp b/src/libui_sdl/DlgVideoSettings.cpp new file mode 100644 index 0000000..329c79e --- /dev/null +++ b/src/libui_sdl/DlgVideoSettings.cpp @@ -0,0 +1,309 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "libui/ui.h" + +#include "../types.h" +#include "PlatformConfig.h" + +#include "DlgVideoSettings.h" + + +void ApplyNewSettings(int type); + + +namespace DlgVideoSettings +{ + +bool opened; +uiWindow* win; + +uiRadioButtons* rbRenderer; +uiCheckbox* cbGLDisplay; +uiCheckbox* cbThreaded3D; +uiCombobox* cbResolution; +uiCheckbox* cbAntialias; + +int old_renderer; +int old_gldisplay; +int old_threaded3D; +int old_resolution; +int old_antialias; + + +void UpdateControls() +{ + int renderer = uiRadioButtonsSelected(rbRenderer); + + if (renderer == 0) + { + uiControlEnable(uiControl(cbGLDisplay)); + uiControlEnable(uiControl(cbThreaded3D)); + uiControlDisable(uiControl(cbResolution)); + uiControlDisable(uiControl(cbAntialias)); + } + else + { + uiControlDisable(uiControl(cbGLDisplay)); + uiControlDisable(uiControl(cbThreaded3D)); + uiControlEnable(uiControl(cbResolution)); + uiControlEnable(uiControl(cbAntialias)); + } +} + + +int OnCloseWindow(uiWindow* window, void* blarg) +{ + opened = false; + return 1; +} + +void OnRendererChanged(uiRadioButtons* rb, void* blarg) +{ + int id = uiRadioButtonsSelected(rb); + bool old_usegl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0); + + Config::_3DRenderer = id; + UpdateControls(); + + bool new_usegl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0); + if (new_usegl != old_usegl) + ApplyNewSettings(2); + else + ApplyNewSettings(3); +} + +void OnGLDisplayChanged(uiCheckbox* cb, void* blarg) +{ + Config::ScreenUseGL = uiCheckboxChecked(cb); + ApplyNewSettings(2); + uiControlSetFocus(uiControl(cb)); +} + +void OnThreaded3DChanged(uiCheckbox* cb, void* blarg) +{ + Config::Threaded3D = uiCheckboxChecked(cb); + ApplyNewSettings(0); +} + +void OnResolutionChanged(uiCombobox* cb, void* blarg) +{ + int id = uiComboboxSelected(cb); + + Config::GL_ScaleFactor = id+1; + ApplyNewSettings(0); +} + +void OnAntialiasChanged(uiCheckbox* cb, void* blarg) +{ + Config::GL_Antialias = uiCheckboxChecked(cb); + ApplyNewSettings(0); +} + +void OnCancel(uiButton* btn, void* blarg) +{ + bool apply0 = false; + bool apply2 = false; + bool apply3 = false; + + bool old_usegl = (old_gldisplay != 0) || (old_renderer != 0); + bool new_usegl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0); + + if (old_renderer != Config::_3DRenderer) + { + Config::_3DRenderer = old_renderer; + apply3 = true; + } + + if (old_gldisplay != Config::ScreenUseGL) + { + Config::ScreenUseGL = old_gldisplay; + } + if (old_usegl != new_usegl) + { + apply2 = true; + } + + if (old_threaded3D != Config::Threaded3D) + { + Config::Threaded3D = old_threaded3D; + apply0 = true; + } + + if (old_resolution != Config::GL_ScaleFactor || + old_antialias != Config::GL_Antialias) + { + Config::GL_ScaleFactor = old_resolution; + Config::GL_Antialias = old_antialias; + apply0 = true; + } + + if (apply2) ApplyNewSettings(2); + else if (apply3) ApplyNewSettings(3); + if (apply0) ApplyNewSettings(0); + + uiControlDestroy(uiControl(win)); + opened = false; +} + +void OnOk(uiButton* btn, void* blarg) +{ + Config::Save(); + + uiControlDestroy(uiControl(win)); + opened = false; +} + +void Open() +{ + if (opened) + { + uiControlSetFocus(uiControl(win)); + return; + } + + opened = true; + win = uiNewWindow("Video settings - melonDS", 400, 100, 0, 0, 0); + uiWindowSetMargined(win, 1); + uiWindowOnClosing(win, OnCloseWindow, NULL); + + uiBox* top = uiNewVerticalBox(); + uiWindowSetChild(win, uiControl(top)); + uiBoxSetPadded(top, 1); + + uiBox* splitter = uiNewHorizontalBox(); + uiBoxAppend(top, uiControl(splitter), 0); + uiBoxSetPadded(splitter, 1); + + uiBox* left = uiNewVerticalBox(); + uiBoxAppend(splitter, uiControl(left), 1); + uiBoxSetPadded(left, 1); + uiBox* right = uiNewVerticalBox(); + uiBoxAppend(splitter, uiControl(right), 1); + uiBoxSetPadded(right, 1); + + { + uiGroup* grp = uiNewGroup("Display settings"); + uiBoxAppend(left, uiControl(grp), 0); + uiGroupSetMargined(grp, 1); + + uiBox* in_ctrl = uiNewVerticalBox(); + uiGroupSetChild(grp, uiControl(in_ctrl)); + + uiLabel* lbl = uiNewLabel("3D renderer:"); + uiBoxAppend(in_ctrl, uiControl(lbl), 0); + + rbRenderer = uiNewRadioButtons(); + uiRadioButtonsAppend(rbRenderer, "Software"); + uiRadioButtonsAppend(rbRenderer, "OpenGL"); + uiRadioButtonsOnSelected(rbRenderer, OnRendererChanged, NULL); + uiBoxAppend(in_ctrl, uiControl(rbRenderer), 0); + + lbl = uiNewLabel(""); + uiBoxAppend(in_ctrl, uiControl(lbl), 0); + + cbGLDisplay = uiNewCheckbox("OpenGL display"); + uiCheckboxOnToggled(cbGLDisplay, OnGLDisplayChanged, NULL); + uiBoxAppend(in_ctrl, uiControl(cbGLDisplay), 0); + } + + { + uiGroup* grp = uiNewGroup("Software renderer"); + uiBoxAppend(right, uiControl(grp), 0); + uiGroupSetMargined(grp, 1); + + uiBox* in_ctrl = uiNewVerticalBox(); + uiGroupSetChild(grp, uiControl(in_ctrl)); + + cbThreaded3D = uiNewCheckbox("Threaded"); + uiCheckboxOnToggled(cbThreaded3D, OnThreaded3DChanged, NULL); + uiBoxAppend(in_ctrl, uiControl(cbThreaded3D), 0); + } + + { + uiGroup* grp = uiNewGroup("OpenGL renderer"); + uiBoxAppend(right, uiControl(grp), 0); + uiGroupSetMargined(grp, 1); + + uiBox* in_ctrl = uiNewVerticalBox(); + uiGroupSetChild(grp, uiControl(in_ctrl)); + + uiLabel* lbl = uiNewLabel("Internal resolution:"); + uiBoxAppend(in_ctrl, uiControl(lbl), 0); + + cbResolution = uiNewCombobox(); + uiComboboxOnSelected(cbResolution, OnResolutionChanged, NULL); + for (int i = 1; i <= 8; i++) + { + char txt[64]; + sprintf(txt, "%dx native (%dx%d)", i, 256*i, 192*i); + uiComboboxAppend(cbResolution, txt); + } + uiBoxAppend(in_ctrl, uiControl(cbResolution), 0); + + lbl = uiNewLabel(""); + uiBoxAppend(in_ctrl, uiControl(lbl), 0); + + cbAntialias = uiNewCheckbox("Antialiasing"); + uiCheckboxOnToggled(cbAntialias, OnAntialiasChanged, NULL); + uiBoxAppend(in_ctrl, uiControl(cbAntialias), 0); + } + + { + uiBox* in_ctrl = uiNewHorizontalBox(); + uiBoxSetPadded(in_ctrl, 1); + uiBoxAppend(top, uiControl(in_ctrl), 0); + + uiLabel* dummy = uiNewLabel(""); + uiBoxAppend(in_ctrl, uiControl(dummy), 1); + + uiButton* btncancel = uiNewButton("Cancel"); + uiButtonOnClicked(btncancel, OnCancel, NULL); + uiBoxAppend(in_ctrl, uiControl(btncancel), 0); + + uiButton* btnok = uiNewButton("Ok"); + uiButtonOnClicked(btnok, OnOk, NULL); + uiBoxAppend(in_ctrl, uiControl(btnok), 0); + } + + Config::_3DRenderer = Config::_3DRenderer ? 1 : 0; + + if (Config::GL_ScaleFactor < 1) Config::GL_ScaleFactor = 1; + else if (Config::GL_ScaleFactor > 8) Config::GL_ScaleFactor = 8; + + old_renderer = Config::_3DRenderer; + old_gldisplay = Config::ScreenUseGL; + old_threaded3D = Config::Threaded3D; + old_resolution = Config::GL_ScaleFactor; + old_antialias = Config::GL_Antialias; + + uiCheckboxSetChecked(cbGLDisplay, Config::ScreenUseGL); + uiCheckboxSetChecked(cbThreaded3D, Config::Threaded3D); + uiComboboxSetSelected(cbResolution, Config::GL_ScaleFactor-1); + uiCheckboxSetChecked(cbAntialias, Config::GL_Antialias); + uiRadioButtonsSetSelected(rbRenderer, Config::_3DRenderer); + UpdateControls(); + + uiControlShow(uiControl(win)); +} + +} diff --git a/src/libui_sdl/DlgVideoSettings.h b/src/libui_sdl/DlgVideoSettings.h new file mode 100644 index 0000000..cd3d1b1 --- /dev/null +++ b/src/libui_sdl/DlgVideoSettings.h @@ -0,0 +1,29 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef DLGVIDEOSETTINGS_H +#define DLGVIDEOSETTINGS_H + +namespace DlgVideoSettings +{ + +void Open(); + +} + +#endif // DLGVIDEOSETTINGS_H diff --git a/src/libui_sdl/Platform.cpp b/src/libui_sdl/Platform.cpp index 6ebe8c3..e3035b3 100644 --- a/src/libui_sdl/Platform.cpp +++ b/src/libui_sdl/Platform.cpp @@ -24,6 +24,7 @@ #include "PlatformConfig.h" #include "LAN_Socket.h" #include "LAN_PCap.h" +#include "libui/ui.h" #include <string> #ifdef __WIN32__ @@ -302,6 +303,12 @@ void Semaphore_Post(void* sema) } +void* GL_GetProcAddress(const char* proc) +{ + return uiGLGetProcAddress(proc); +} + + bool MP_Init() { int opt_true = 1; diff --git a/src/libui_sdl/PlatformConfig.cpp b/src/libui_sdl/PlatformConfig.cpp index 2daf746..f700ecb 100644 --- a/src/libui_sdl/PlatformConfig.cpp +++ b/src/libui_sdl/PlatformConfig.cpp @@ -40,6 +40,9 @@ int ScreenLayout; int ScreenSizing; int ScreenFilter; +int ScreenUseGL; +int ScreenRatio; + int LimitFPS; int DirectBoot; @@ -101,6 +104,9 @@ ConfigEntry PlatformConfigFile[] = {"ScreenSizing", 0, &ScreenSizing, 0, NULL, 0}, {"ScreenFilter", 0, &ScreenFilter, 1, NULL, 0}, + {"ScreenUseGL", 0, &ScreenUseGL, 1, NULL, 0}, + {"ScreenRatio", 0, &ScreenRatio, 0, NULL, 0}, + {"LimitFPS", 0, &LimitFPS, 1, NULL, 0}, {"DirectBoot", 0, &DirectBoot, 1, NULL, 0}, diff --git a/src/libui_sdl/PlatformConfig.h b/src/libui_sdl/PlatformConfig.h index ed31e18..013a0a7 100644 --- a/src/libui_sdl/PlatformConfig.h +++ b/src/libui_sdl/PlatformConfig.h @@ -48,6 +48,9 @@ extern int ScreenLayout; extern int ScreenSizing; extern int ScreenFilter; +extern int ScreenUseGL; +extern int ScreenRatio; + extern int LimitFPS; extern int DirectBoot; diff --git a/src/libui_sdl/libui/ui.h b/src/libui_sdl/libui/ui.h index 5f40aff..d2e9960 100644 --- a/src/libui_sdl/libui/ui.h +++ b/src/libui_sdl/libui/ui.h @@ -108,6 +108,8 @@ typedef struct uiWindow uiWindow; #define uiWindow(this) ((uiWindow *) (this)) _UI_EXTERN char *uiWindowTitle(uiWindow *w); _UI_EXTERN void uiWindowSetTitle(uiWindow *w, const char *title); +_UI_EXTERN void uiWindowPosition(uiWindow *w, int *x, int *y); +_UI_EXTERN void uiWindowSetPosition(uiWindow *w, int x, int y); _UI_EXTERN void uiWindowContentSize(uiWindow *w, int *width, int *height); _UI_EXTERN void uiWindowSetContentSize(uiWindow *w, int width, int height); _UI_EXTERN int uiWindowMinimized(uiWindow *w); @@ -326,6 +328,10 @@ _UI_ENUM(uiWindowResizeEdge) { // TODO way to bring up the system menu instead? }; +#define uiGLVersion(major, minor) ((major) | ((minor)<<16)) +#define uiGLVerMajor(ver) ((ver) & 0xFFFF) +#define uiGLVerMinor(ver) ((ver) >> 16) + #define uiArea(this) ((uiArea *) (this)) // TODO give a better name // TODO document the types of width and height @@ -342,6 +348,7 @@ _UI_EXTERN void uiAreaBeginUserWindowMove(uiArea *a); _UI_EXTERN void uiAreaBeginUserWindowResize(uiArea *a, uiWindowResizeEdge edge); _UI_EXTERN void uiAreaSetBackgroundColor(uiArea *a, int r, int g, int b); _UI_EXTERN uiArea *uiNewArea(uiAreaHandler *ah); +_UI_EXTERN uiArea *uiNewGLArea(uiAreaHandler *ah, const unsigned int* req_versions); _UI_EXTERN uiArea *uiNewScrollingArea(uiAreaHandler *ah, int width, int height); struct uiAreaDrawParams { @@ -599,6 +606,18 @@ _UI_EXTERN void uiDrawTextLayoutSetColor(uiDrawTextLayout *layout, int startChar _UI_EXTERN void uiDrawText(uiDrawContext *c, double x, double y, uiDrawTextLayout *layout); + +// OpenGL support + +typedef struct uiGLContext uiGLContext; + +_UI_EXTERN uiGLContext *uiAreaGetGLContext(uiArea* a); +_UI_EXTERN void uiGLMakeContextCurrent(uiGLContext* ctx); +_UI_EXTERN unsigned int uiGLGetVersion(uiGLContext* ctx); +_UI_EXTERN void *uiGLGetProcAddress(const char* proc); +_UI_EXTERN void uiGLSwapBuffers(uiGLContext* ctx); + + _UI_ENUM(uiModifiers) { uiModifierCtrl = 1 << 0, uiModifierAlt = 1 << 1, diff --git a/src/libui_sdl/libui/windows/area.cpp b/src/libui_sdl/libui/windows/area.cpp index 2185f25..72d5145 100644 --- a/src/libui_sdl/libui/windows/area.cpp +++ b/src/libui_sdl/libui/windows/area.cpp @@ -25,8 +25,11 @@ static LRESULT CALLBACK areaWndProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM } // always recreate the render target if necessary - if (a->rt == NULL) - a->rt = makeHWNDRenderTarget(a->hwnd); + if (!a->openGL) + { + if (a->rt == NULL) + a->rt = makeHWNDRenderTarget(a->hwnd); + } if (areaDoDraw(a, uMsg, wParam, lParam, &lResult) != FALSE) return lResult; @@ -34,12 +37,14 @@ static LRESULT CALLBACK areaWndProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM if (uMsg == WM_WINDOWPOSCHANGED) { if ((wp->flags & SWP_NOSIZE) != 0) return DefWindowProcW(hwnd, uMsg, wParam, lParam); + a->width = -1; + a->height = -1; uiWindowsEnsureGetClientRect(a->hwnd, &client); areaDrawOnResize(a, &client); areaScrollOnResize(a, &client); { double w, h; - loadAreaSize(a, a->rt, &w, &h); + loadAreaSize(a, &w, &h); a->ah->Resize(a->ah, a, (int)w, (int)h); } return 0; @@ -56,7 +61,15 @@ static LRESULT CALLBACK areaWndProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM // control implementation -uiWindowsControlAllDefaults(uiArea) +uiWindowsControlAllDefaultsExceptDestroy(uiArea) + +static void uiAreaDestroy(uiControl *c) +{ + uiArea* a = uiArea(c); + if (a->openGL && a->glcontext) freeGLContext(a->glcontext); + uiWindowsEnsureDestroyWindow(a->hwnd); + uiFreeControl(c); +} static void uiAreaMinimumSize(uiWindowsControl *c, int *width, int *height) { @@ -182,6 +195,9 @@ uiArea *uiNewArea(uiAreaHandler *ah) uiWindowsNewControl(uiArea, a); + a->width = -1; + a->height = -1; + a->ah = ah; a->scrolling = FALSE; clickCounterReset(&(a->cc)); @@ -195,6 +211,50 @@ uiArea *uiNewArea(uiAreaHandler *ah) uiAreaSetBackgroundColor(a, -1, -1, -1); + a->openGL = 0; + + return a; +} + +uiGLContext *uiAreaGetGLContext(uiArea* a) +{ + if (!a->openGL) userbug("trying to get GL context from non-GL area"); + + return a->glcontext; +} + +uiArea *uiNewGLArea(uiAreaHandler *ah, const unsigned int* req_versions) +{ + uiArea *a; + + uiWindowsNewControl(uiArea, a); + + a->width = -1; + a->height = -1; + + a->ah = ah; + a->scrolling = FALSE; + clickCounterReset(&(a->cc)); + + // a->hwnd is assigned in areaWndProc() + uiWindowsEnsureCreateControlHWND(0, + areaClass, L"", + 0, + hInstance, a, + FALSE); + + uiAreaSetBackgroundColor(a, -1, -1, -1); + + a->openGL = 1; + + for (int i = 0; req_versions[i]; i++) + { + int major = uiGLVerMajor(req_versions[i]); + int minor = uiGLVerMinor(req_versions[i]); + a->glcontext = createGLContext(a, major, minor); + if (a->glcontext) break; + } + return a; } @@ -204,6 +264,9 @@ uiArea *uiNewScrollingArea(uiAreaHandler *ah, int width, int height) uiWindowsNewControl(uiArea, a); + a->width = -1; + a->height = -1; + a->ah = ah; a->scrolling = TRUE; a->scrollWidth = width; @@ -219,6 +282,8 @@ uiArea *uiNewScrollingArea(uiAreaHandler *ah, int width, int height) uiAreaSetBackgroundColor(a, -1, -1, -1); + a->openGL = 0; // TODO, eventually??? + // set initial scrolling parameters areaUpdateScroll(a); diff --git a/src/libui_sdl/libui/windows/area.hpp b/src/libui_sdl/libui/windows/area.hpp index add62dd..cfc45a4 100644 --- a/src/libui_sdl/libui/windows/area.hpp +++ b/src/libui_sdl/libui/windows/area.hpp @@ -10,6 +10,8 @@ struct uiArea { HWND hwnd; uiAreaHandler *ah; + int width, height; + BOOL scrolling; int scrollWidth; int scrollHeight; @@ -26,6 +28,9 @@ struct uiArea { int bgR, bgG, bgB; + int openGL; + uiGLContext* glcontext; + ID2D1HwndRenderTarget *rt; }; @@ -42,6 +47,10 @@ extern void areaUpdateScroll(uiArea *a); extern BOOL areaDoEvents(uiArea *a, UINT uMsg, WPARAM wParam, LPARAM lParam, LRESULT *lResult); // areautil.cpp -extern void loadAreaSize(uiArea *a, ID2D1RenderTarget *rt, double *width, double *height); +extern void loadAreaSize(uiArea *a, double *width, double *height); extern void pixelsToDIP(uiArea *a, double *x, double *y); extern void dipToPixels(uiArea *a, double *x, double *y); + +// gl.cpp +extern uiGLContext* createGLContext(uiArea* a, int vermajor, int verminor); +extern void freeGLContext(uiGLContext* c); diff --git a/src/libui_sdl/libui/windows/areadraw.cpp b/src/libui_sdl/libui/windows/areadraw.cpp index a9ad477..f369255 100644 --- a/src/libui_sdl/libui/windows/areadraw.cpp +++ b/src/libui_sdl/libui/windows/areadraw.cpp @@ -6,6 +6,13 @@ static HRESULT doPaint(uiArea *a, ID2D1RenderTarget *rt, RECT *clip) { uiAreaHandler *ah = a->ah; uiAreaDrawParams dp; + + if (a->openGL) + { + //(*(ah->Draw))(ah, a, &dp); + return S_OK; + } + COLORREF bgcolorref; D2D1_COLOR_F bgcolor; D2D1_MATRIX_3X2_F scrollTransform; @@ -13,7 +20,7 @@ static HRESULT doPaint(uiArea *a, ID2D1RenderTarget *rt, RECT *clip) // no need to save or restore the graphics state to reset transformations; it's handled by resetTarget() in draw.c, called during the following dp.Context = newContext(rt); - loadAreaSize(a, rt, &(dp.AreaWidth), &(dp.AreaHeight)); + loadAreaSize(a, &(dp.AreaWidth), &(dp.AreaHeight)); dp.ClipX = clip->left; dp.ClipY = clip->top; @@ -113,6 +120,9 @@ static void onWM_PAINT(uiArea *a) static void onWM_PRINTCLIENT(uiArea *a, HDC dc) { + // TODO???? + if (a->openGL) return; + ID2D1DCRenderTarget *rt; RECT client; HRESULT hr; @@ -143,13 +153,16 @@ BOOL areaDoDraw(uiArea *a, UINT uMsg, WPARAM wParam, LPARAM lParam, LRESULT *lRe // TODO only if the render target wasn't just created? void areaDrawOnResize(uiArea *a, RECT *newClient) { - D2D1_SIZE_U size; + if (!a->openGL) + { + D2D1_SIZE_U size; - size.width = newClient->right - newClient->left; - size.height = newClient->bottom - newClient->top; - // don't track the error; we'll get that in EndDraw() - // see https://msdn.microsoft.com/en-us/library/windows/desktop/dd370994%28v=vs.85%29.aspx - a->rt->Resize(&size); + size.width = newClient->right - newClient->left; + size.height = newClient->bottom - newClient->top; + // don't track the error; we'll get that in EndDraw() + // see https://msdn.microsoft.com/en-us/library/windows/desktop/dd370994%28v=vs.85%29.aspx + a->rt->Resize(&size); + } // according to Rick Brewster, we must always redraw the entire client area after calling ID2D1RenderTarget::Resize() (see http://stackoverflow.com/a/33222983/3408572) // we used to have a uiAreaHandler.RedrawOnResize() method to decide this; now you know why we don't anymore diff --git a/src/libui_sdl/libui/windows/areaevents.cpp b/src/libui_sdl/libui/windows/areaevents.cpp index 3ff7a47..46d6ab9 100644 --- a/src/libui_sdl/libui/windows/areaevents.cpp +++ b/src/libui_sdl/libui/windows/areaevents.cpp @@ -109,7 +109,7 @@ static void areaMouseEvent(uiArea *a, int down, int up, WPARAM wParam, LPARAM l me.Y += a->vscrollpos; } - loadAreaSize(a, NULL, &(me.AreaWidth), &(me.AreaHeight)); + loadAreaSize(a, &(me.AreaWidth), &(me.AreaHeight)); me.Down = down; me.Up = up; diff --git a/src/libui_sdl/libui/windows/areautil.cpp b/src/libui_sdl/libui/windows/areautil.cpp index 212ea42..ea13221 100644 --- a/src/libui_sdl/libui/windows/areautil.cpp +++ b/src/libui_sdl/libui/windows/areautil.cpp @@ -2,20 +2,35 @@ #include "uipriv_windows.hpp" #include "area.hpp" -void loadAreaSize(uiArea *a, ID2D1RenderTarget *rt, double *width, double *height) +// TODO: make those int rather than double +void loadAreaSize(uiArea *a, double *width, double *height) { D2D1_SIZE_F size; + if (a->width != -1) + { + *width = (double)a->width; + *height = (double)a->height; + return; + } + *width = 0; *height = 0; if (!a->scrolling) { - if (rt == NULL) + /*if (rt == NULL) rt = a->rt; size = realGetSize(rt); *width = size.width; *height = size.height; - dipToPixels(a, width, height); + dipToPixels(a, width, height);*/ + RECT rect; + GetWindowRect(a->hwnd, &rect); + *width = (double)(rect.right - rect.left); + *height = (double)(rect.bottom - rect.top); } + + a->width = (int)*width; + a->height = (int)*height; } void pixelsToDIP(uiArea *a, double *x, double *y) diff --git a/src/libui_sdl/libui/windows/gl.cpp b/src/libui_sdl/libui/windows/gl.cpp new file mode 100644 index 0000000..1e3732c --- /dev/null +++ b/src/libui_sdl/libui/windows/gl.cpp @@ -0,0 +1,142 @@ +// 31 march 2019 +#include "uipriv_windows.hpp" +#include "area.hpp" + +#include <GL/gl.h> +#include <GL/wglext.h> + +struct uiGLContext +{ + uiArea* a; + + HWND hwnd; + HDC dc; + HGLRC rc; + + unsigned int version; +}; + + +uiGLContext* createGLContext(uiArea* a, int vermajor, int verminor) +{ + uiGLContext* ctx; + BOOL res; + + ctx = uiNew(uiGLContext); + + ctx->a = a; + ctx->hwnd = a->hwnd; + + PIXELFORMATDESCRIPTOR pfd; + memset(&pfd, 0, sizeof(pfd)); + pfd.nSize = sizeof(pfd); + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.cAlphaBits = 8; + pfd.cDepthBits = 24; + pfd.cStencilBits = 8; + pfd.iLayerType = PFD_MAIN_PLANE; + + ctx->dc = GetDC(ctx->hwnd); + if (!ctx->dc) + { + uiFree(ctx); + return NULL; + } + + int pixelformat = ChoosePixelFormat(ctx->dc, &pfd); + res = SetPixelFormat(ctx->dc, pixelformat, &pfd); + if (!res) + { + ReleaseDC(ctx->hwnd, ctx->dc); + uiFree(ctx); + return NULL; + } + + ctx->rc = wglCreateContext(ctx->dc); + if (!ctx->rc) + { + ReleaseDC(ctx->hwnd, ctx->dc); + uiFree(ctx); + return NULL; + } + + wglMakeCurrent(ctx->dc, ctx->rc); + + if (vermajor >= 3) + { + HGLRC (*wglCreateContextAttribsARB)(HDC,HGLRC,const int*); + HGLRC rc_better = NULL; + + wglCreateContextAttribsARB = (HGLRC(*)(HDC,HGLRC,const int*))wglGetProcAddress("wglCreateContextAttribsARB"); + if (wglCreateContextAttribsARB) + { + int attribs[15]; + int i = 0; + + attribs[i++] = WGL_CONTEXT_MAJOR_VERSION_ARB; + attribs[i++] = vermajor; + attribs[i++] = WGL_CONTEXT_MINOR_VERSION_ARB; + attribs[i++] = verminor; + + attribs[i] = 0; + rc_better = wglCreateContextAttribsARB(ctx->dc, NULL, attribs); + } + + wglMakeCurrent(NULL, NULL); + wglDeleteContext(ctx->rc); + + if (!rc_better) + { + ReleaseDC(ctx->hwnd, ctx->dc); + uiFree(ctx); + return NULL; + } + + ctx->version = uiGLVersion(vermajor, verminor); + ctx->rc = rc_better; + wglMakeCurrent(ctx->dc, ctx->rc); + } + + return ctx; +} + +void freeGLContext(uiGLContext* ctx) +{ + if (ctx == NULL) return; + wglMakeCurrent(NULL, NULL); + wglDeleteContext(ctx->rc); + ReleaseDC(ctx->hwnd, ctx->dc); + uiFree(ctx); +} + +void uiGLMakeContextCurrent(uiGLContext* ctx) +{ + if (ctx == NULL) + { + wglMakeCurrent(NULL, NULL); + return; + } + + if (wglGetCurrentContext() == ctx->rc) return; + int res = wglMakeCurrent(ctx->dc, ctx->rc); +} + +unsigned int uiGLGetVersion(uiGLContext* ctx) +{ + if (ctx == NULL) return 0; + return ctx->version; +} + +void *uiGLGetProcAddress(const char* proc) +{ + return (void*)wglGetProcAddress(proc); +} + +void uiGLSwapBuffers(uiGLContext* ctx) +{ + if (ctx == NULL) return; + SwapBuffers(ctx->dc); +} diff --git a/src/libui_sdl/libui/windows/window.cpp b/src/libui_sdl/libui/windows/window.cpp index f52e2f6..18d1171 100644 --- a/src/libui_sdl/libui/windows/window.cpp +++ b/src/libui_sdl/libui/windows/window.cpp @@ -363,6 +363,21 @@ static void windowMonitorRect(HWND hwnd, RECT *r) *r = mi.rcMonitor; } +void uiWindowPosition(uiWindow *w, int *x, int *y) +{ + RECT rect; + if (GetWindowRect(w->hwnd, &rect) == 0) + logLastError(L"error getting window position"); + *x = rect.left; + *y = rect.top; +} + +void uiWindowSetPosition(uiWindow *w, int x, int y) +{ + if (SetWindowPos(w->hwnd, NULL, x, y, 0, 0, SWP_NOACTIVATE | SWP_NOSIZE | SWP_NOOWNERZORDER | SWP_NOZORDER) == 0) + logLastError(L"error moving window"); +} + void uiWindowContentSize(uiWindow *w, int *width, int *height) { RECT r; diff --git a/src/libui_sdl/main.cpp b/src/libui_sdl/main.cpp index 3eea39c..c086bbf 100644 --- a/src/libui_sdl/main.cpp +++ b/src/libui_sdl/main.cpp @@ -24,12 +24,16 @@ #include <SDL2/SDL.h> #include "libui/ui.h" +#include "../OpenGLSupport.h" +#include "main_shaders.h" + #include "../types.h" #include "../version.h" #include "PlatformConfig.h" #include "DlgEmuSettings.h" #include "DlgInputConfig.h" +#include "DlgVideoSettings.h" #include "DlgAudioSettings.h" #include "DlgWifiSettings.h" @@ -60,6 +64,10 @@ char* EmuDirectory; uiWindow* MainWindow; uiArea* MainDrawArea; +uiAreaHandler MainDrawAreaHandler; + +const u32 kGLVersions[] = {uiGLVersion(3,1), 0}; +uiGLContext* GLContext; int WindowWidth, WindowHeight; @@ -81,6 +89,9 @@ uiMenuItem* MenuItem_ScreenGap[6]; uiMenuItem* MenuItem_ScreenLayout[3]; uiMenuItem* MenuItem_ScreenSizing[4]; +uiMenuItem* MenuItem_ScreenFilter; +uiMenuItem* MenuItem_LimitFPS; + SDL_Thread* EmuThread; int EmuRunning; volatile int EmuStatus; @@ -92,9 +103,27 @@ char PrevSRAMPath[1024]; // for savestate 'undo load' bool SavestateLoaded; +bool Screen_UseGL; + bool ScreenDrawInited = false; -uiDrawBitmap* ScreenBitmap = NULL; -u32 ScreenBuffer[256*384]; +uiDrawBitmap* ScreenBitmap[2] = {NULL,NULL}; + +GLuint GL_ScreenShader[3]; +GLuint GL_ScreenShaderAccel[3]; +struct +{ + float uScreenSize[2]; + u32 u3DScale; + u32 uFilterMode; + +} GL_ShaderConfig; +GLuint GL_ShaderConfigUBO; +GLuint GL_ScreenVertexArrayID, GL_ScreenVertexBufferID; +float GL_ScreenVertices[2 * 3*2 * 4]; // position/texcoord +GLuint GL_ScreenTexture; +bool GL_ScreenSizeDirty; + +int GL_3DScale; int ScreenGap = 0; int ScreenLayout = 0; @@ -135,7 +164,262 @@ void LoadState(int slot); void UndoStateLoad(); void GetSavestateName(int slot, char* filename, int len); +void CreateMainWindow(bool opengl); +void DestroyMainWindow(); +void RecreateMainWindow(bool opengl); + + + +bool GLScreen_InitShader(GLuint* shader, const char* fs) +{ + if (!OpenGL_BuildShaderProgram(kScreenVS, fs, shader, "ScreenShader")) + return false; + + GLuint uni_id; + + uni_id = glGetUniformBlockIndex(shader[2], "uConfig"); + glUniformBlockBinding(shader[2], uni_id, 16); + + glUseProgram(shader[2]); + uni_id = glGetUniformLocation(shader[2], "ScreenTex"); + glUniform1i(uni_id, 0); + uni_id = glGetUniformLocation(shader[2], "_3DTex"); + glUniform1i(uni_id, 1); + + glBindAttribLocation(shader[2], 0, "vPosition"); + glBindAttribLocation(shader[2], 1, "vTexcoord"); + glBindFragDataLocation(shader[2], 0, "oColor"); + + return true; +} + +bool GLScreen_Init() +{ + if (!OpenGL_Init()) + return false; + + if (!GLScreen_InitShader(GL_ScreenShader, kScreenFS)) + return false; + if (!GLScreen_InitShader(GL_ScreenShaderAccel, kScreenFS_Accel)) + return false; + + memset(&GL_ShaderConfig, 0, sizeof(GL_ShaderConfig)); + + glGenBuffers(1, &GL_ShaderConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, GL_ShaderConfigUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GL_ShaderConfig), &GL_ShaderConfig, GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 16, GL_ShaderConfigUBO); + + glGenBuffers(1, &GL_ScreenVertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, GL_ScreenVertexBufferID); + glBufferData(GL_ARRAY_BUFFER, sizeof(GL_ScreenVertices), NULL, GL_STATIC_DRAW); + + glGenVertexArrays(1, &GL_ScreenVertexArrayID); + glBindVertexArray(GL_ScreenVertexArrayID); + glEnableVertexAttribArray(0); // position + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4*4, (void*)(0)); + glEnableVertexAttribArray(1); // texcoord + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4*4, (void*)(2*4)); + + glGenTextures(1, &GL_ScreenTexture); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, GL_ScreenTexture); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL); + + GL_ScreenSizeDirty = true; + + return true; +} + +void GLScreen_DeInit() +{ + glDeleteTextures(1, &GL_ScreenTexture); + + glDeleteVertexArrays(1, &GL_ScreenVertexArrayID); + glDeleteBuffers(1, &GL_ScreenVertexBufferID); + + OpenGL_DeleteShaderProgram(GL_ScreenShader); + OpenGL_DeleteShaderProgram(GL_ScreenShaderAccel); +} + +void GLScreen_DrawScreen() +{ + if (GL_ScreenSizeDirty) + { + GL_ScreenSizeDirty = false; + + GL_ShaderConfig.uScreenSize[0] = WindowWidth; + GL_ShaderConfig.uScreenSize[1] = WindowHeight; + GL_ShaderConfig.u3DScale = GL_3DScale; + + glBindBuffer(GL_UNIFORM_BUFFER, GL_ShaderConfigUBO); + void* unibuf = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); + if (unibuf) memcpy(unibuf, &GL_ShaderConfig, sizeof(GL_ShaderConfig)); + glUnmapBuffer(GL_UNIFORM_BUFFER); + + float scwidth, scheight; + + float x0, y0, x1, y1; + float s0, s1, s2, s3; + float t0, t1, t2, t3; + +#define SETVERTEX(i, x, y, s, t) \ + GL_ScreenVertices[4*(i) + 0] = x; \ + GL_ScreenVertices[4*(i) + 1] = y; \ + GL_ScreenVertices[4*(i) + 2] = s; \ + GL_ScreenVertices[4*(i) + 3] = t; + + x0 = TopScreenRect.X; + y0 = TopScreenRect.Y; + x1 = TopScreenRect.X + TopScreenRect.Width; + y1 = TopScreenRect.Y + TopScreenRect.Height; + + scwidth = 256; + scheight = 192; + + switch (ScreenRotation) + { + case 0: + s0 = 0; t0 = 0; + s1 = scwidth; t1 = 0; + s2 = 0; t2 = scheight; + s3 = scwidth; t3 = scheight; + break; + + case 1: + s0 = 0; t0 = scheight; + s1 = 0; t1 = 0; + s2 = scwidth; t2 = scheight; + s3 = scwidth; t3 = 0; + break; + + case 2: + s0 = scwidth; t0 = scheight; + s1 = 0; t1 = scheight; + s2 = scwidth; t2 = 0; + s3 = 0; t3 = 0; + break; + + case 3: + s0 = scwidth; t0 = 0; + s1 = scwidth; t1 = scheight; + s2 = 0; t2 = 0; + s3 = 0; t3 = scheight; + break; + } + + SETVERTEX(0, x0, y0, s0, t0); + SETVERTEX(1, x1, y1, s3, t3); + SETVERTEX(2, x1, y0, s1, t1); + SETVERTEX(3, x0, y0, s0, t0); + SETVERTEX(4, x0, y1, s2, t2); + SETVERTEX(5, x1, y1, s3, t3); + + x0 = BottomScreenRect.X; + y0 = BottomScreenRect.Y; + x1 = BottomScreenRect.X + BottomScreenRect.Width; + y1 = BottomScreenRect.Y + BottomScreenRect.Height; + + scwidth = 256; + scheight = 192; + + switch (ScreenRotation) + { + case 0: + s0 = 0; t0 = 192; + s1 = scwidth; t1 = 192; + s2 = 0; t2 = 192+scheight; + s3 = scwidth; t3 = 192+scheight; + break; + + case 1: + s0 = 0; t0 = 192+scheight; + s1 = 0; t1 = 192; + s2 = scwidth; t2 = 192+scheight; + s3 = scwidth; t3 = 192; + break; + + case 2: + s0 = scwidth; t0 = 192+scheight; + s1 = 0; t1 = 192+scheight; + s2 = scwidth; t2 = 192; + s3 = 0; t3 = 192; + break; + + case 3: + s0 = scwidth; t0 = 192; + s1 = scwidth; t1 = 192+scheight; + s2 = 0; t2 = 192; + s3 = 0; t3 = 192+scheight; + break; + } + SETVERTEX(6, x0, y0, s0, t0); + SETVERTEX(7, x1, y1, s3, t3); + SETVERTEX(8, x1, y0, s1, t1); + SETVERTEX(9, x0, y0, s0, t0); + SETVERTEX(10, x0, y1, s2, t2); + SETVERTEX(11, x1, y1, s3, t3); + +#undef SETVERTEX + + glBindBuffer(GL_ARRAY_BUFFER, GL_ScreenVertexBufferID); + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(GL_ScreenVertices), GL_ScreenVertices); + } + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glViewport(0, 0, WindowWidth, WindowHeight); + + if (GPU3D::Renderer == 0) + OpenGL_UseShaderProgram(GL_ScreenShader); + else + OpenGL_UseShaderProgram(GL_ScreenShaderAccel); + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glClearColor(0, 0, 0, 1); + glClear(GL_COLOR_BUFFER_BIT); + + int frontbuf = GPU::FrontBuffer; + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, GL_ScreenTexture); + + if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1]) + { + if (GPU3D::Renderer == 0) + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER, + GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]); + } + } + + glActiveTexture(GL_TEXTURE1); + if (GPU3D::Renderer != 0) + GPU3D::GLRenderer::SetupAccelFrame(); + + glBindBuffer(GL_ARRAY_BUFFER, GL_ScreenVertexBufferID); + glBindVertexArray(GL_ScreenVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 4*3); + + glFlush(); + uiGLSwapBuffers(GLContext); +} void MicLoadWav(char* name) { @@ -399,7 +683,18 @@ int EmuThreadFunc(void* burp) MainScreenPos[2] = 0; AutoScreenSizing = 0; - ScreenDrawInited = false; + if (Screen_UseGL) + { + uiGLMakeContextCurrent(GLContext); + GPU3D::InitRenderer(true); + uiGLMakeContextCurrent(NULL); + } + else + { + GPU3D::InitRenderer(false); + GPU::SetDisplaySettings(false); + } + Touching = false; KeyInputMask = 0xFFF; HotkeyMask = 0; @@ -513,10 +808,7 @@ int EmuThreadFunc(void* burp) // microphone input FeedMicInput(); - // emulate - u32 nlines = NDS::RunFrame(); - - if (EmuRunning == 0) break; + if (Screen_UseGL) uiGLMakeContextCurrent(GLContext); // auto screen layout { @@ -547,13 +839,16 @@ int EmuThreadFunc(void* burp) } } - memcpy(ScreenBuffer, GPU::Framebuffer, 256*384*4); + // emulate + u32 nlines = NDS::RunFrame(); + + if (EmuRunning == 0) break; + + if (Screen_UseGL) GLScreen_DrawScreen(); uiAreaQueueRedrawAll(MainDrawArea); // framerate limiter based off SDL2_gfx - float framerate; - if (nlines == 263) framerate = 1000.0f / 60.0f; - else framerate = ((1000.0f * nlines) / 263.0f) / 60.0f; + float framerate = (1000.0f * nlines) / (60.0f * 263.0f); fpslimitcount++; u32 curtick = SDL_GetTicks(); @@ -602,9 +897,16 @@ int EmuThreadFunc(void* burp) if (EmuRunning == 2) { + if (Screen_UseGL) + { + uiGLMakeContextCurrent(GLContext); + GLScreen_DrawScreen(); + } uiAreaQueueRedrawAll(MainDrawArea); } + if (Screen_UseGL) uiGLMakeContextCurrent(NULL); + EmuStatus = EmuRunning; SDL_Delay(100); @@ -618,6 +920,8 @@ int EmuThreadFunc(void* burp) NDS::DeInit(); Platform::LAN_DeInit(); + if (Screen_UseGL) GLScreen_DeInit(); + return 44203; } @@ -626,25 +930,32 @@ void OnAreaDraw(uiAreaHandler* handler, uiArea* area, uiAreaDrawParams* params) { if (!ScreenDrawInited) { - ScreenBitmap = uiDrawNewBitmap(params->Context, 256, 384); + if (ScreenBitmap[0]) uiDrawFreeBitmap(ScreenBitmap[0]); + if (ScreenBitmap[1]) uiDrawFreeBitmap(ScreenBitmap[1]); + ScreenDrawInited = true; + ScreenBitmap[0] = uiDrawNewBitmap(params->Context, 256, 192); + ScreenBitmap[1] = uiDrawNewBitmap(params->Context, 256, 192); } - if (!ScreenBitmap) return; + int frontbuf = GPU::FrontBuffer; + if (!ScreenBitmap[0] || !ScreenBitmap[1]) return; + if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1]) return; uiRect top = {0, 0, 256, 192}; - uiRect bot = {0, 192, 256, 192}; + uiRect bot = {0, 0, 256, 192}; - uiDrawBitmapUpdate(ScreenBitmap, ScreenBuffer); + uiDrawBitmapUpdate(ScreenBitmap[0], GPU::Framebuffer[frontbuf][0]); + uiDrawBitmapUpdate(ScreenBitmap[1], GPU::Framebuffer[frontbuf][1]); uiDrawSave(params->Context); uiDrawTransform(params->Context, &TopScreenTrans); - uiDrawBitmapDraw(params->Context, ScreenBitmap, &top, &TopScreenRect, Config::ScreenFilter==1); + uiDrawBitmapDraw(params->Context, ScreenBitmap[0], &top, &TopScreenRect, Config::ScreenFilter==1); uiDrawRestore(params->Context); uiDrawSave(params->Context); uiDrawTransform(params->Context, &BottomScreenTrans); - uiDrawBitmapDraw(params->Context, ScreenBitmap, &bot, &BottomScreenRect, Config::ScreenFilter==1); + uiDrawBitmapDraw(params->Context, ScreenBitmap[1], &bot, &BottomScreenRect, Config::ScreenFilter==1); uiDrawRestore(params->Context); } @@ -804,7 +1115,7 @@ void SetupScreenRects(int width, int height) else sizemode = ScreenSizing; - int screenW, screenH; + int screenW, screenH, gap; if (sideways) { screenW = 192; @@ -816,6 +1127,8 @@ void SetupScreenRects(int width, int height) screenH = 192; } + gap = ScreenGap; + uiRect *topscreen, *bottomscreen; if (ScreenRotation == 1 || ScreenRotation == 2) { @@ -835,7 +1148,7 @@ void SetupScreenRects(int width, int height) int heightreq; int startX = 0; - width -= ScreenGap; + width -= gap; if (sizemode == 0) // even { @@ -873,7 +1186,7 @@ void SetupScreenRects(int width, int height) topscreen->X = startX; topscreen->Y = ((height - heightreq) / 2) + (heightreq - topscreen->Height); - bottomscreen->X = topscreen->X + topscreen->Width + ScreenGap; + bottomscreen->X = topscreen->X + topscreen->Width + gap; if (sizemode == 1) { @@ -894,7 +1207,7 @@ void SetupScreenRects(int width, int height) int widthreq; int startY = 0; - height -= ScreenGap; + height -= gap; if (sizemode == 0) // even { @@ -932,7 +1245,7 @@ void SetupScreenRects(int width, int height) topscreen->Y = startY; topscreen->X = (width - topscreen->Width) / 2; - bottomscreen->Y = topscreen->Y + topscreen->Height + ScreenGap; + bottomscreen->Y = topscreen->Y + topscreen->Height + gap; if (sizemode == 1) { @@ -1002,6 +1315,8 @@ void SetupScreenRects(int width, int height) } break; } + + GL_ScreenSizeDirty = true; } void SetMinSize(int w, int h) @@ -1092,7 +1407,6 @@ void Stop(bool internal) uiMenuItemDisable(MenuItem_Stop); uiMenuItemSetChecked(MenuItem_Pause, 0); - memset(ScreenBuffer, 0, 256*384*4); uiAreaQueueRedrawAll(MainDrawArea); SDL_PauseAudioDevice(AudioDevice, 1); @@ -1364,7 +1678,7 @@ void OnCloseByMenu(uiMenuItem* item, uiWindow* window, void* blarg) EmuRunning = 3; while (EmuStatus != 3); - uiControlDestroy(uiControl(window)); + DestroyMainWindow(); uiQuit(); } @@ -1485,6 +1799,11 @@ void OnOpenHotkeyConfig(uiMenuItem* item, uiWindow* window, void* blarg) DlgInputConfig::Open(1); } +void OnOpenVideoSettings(uiMenuItem* item, uiWindow* window, void* blarg) +{ + DlgVideoSettings::Open(); +} + void OnOpenAudioSettings(uiMenuItem* item, uiWindow* window, void* blarg) { DlgAudioSettings::Open(); @@ -1506,26 +1825,31 @@ void EnsureProperMinSize() { bool isHori = (ScreenRotation == 1 || ScreenRotation == 3); + int w0 = 256; + int h0 = 192; + int w1 = 256; + int h1 = 192; + if (ScreenLayout == 0) // natural { if (isHori) - SetMinSize(384+ScreenGap, 256); + SetMinSize(h0+ScreenGap+h1, std::max(w0,w1)); else - SetMinSize(256, 384+ScreenGap); + SetMinSize(std::max(w0,w1), h0+ScreenGap+h1); } else if (ScreenLayout == 1) // vertical { if (isHori) - SetMinSize(192, 512+ScreenGap); + SetMinSize(std::max(h0,h1), w0+ScreenGap+w1); else - SetMinSize(256, 384+ScreenGap); + SetMinSize(std::max(w0,w1), h0+ScreenGap+h1); } else // horizontal { if (isHori) - SetMinSize(384+ScreenGap, 256); + SetMinSize(h0+ScreenGap+h1, std::max(w0,w1)); else - SetMinSize(512+ScreenGap, 192); + SetMinSize(w0+ScreenGap+w1, std::max(h0,h1)); } } @@ -1537,6 +1861,8 @@ void OnSetScreenSize(uiMenuItem* item, uiWindow* window, void* param) int w = 256*factor; int h = 192*factor; + // FIXME + if (ScreenLayout == 0) // natural { if (isHori) @@ -1646,15 +1972,20 @@ void OnSetLimitFPS(uiMenuItem* item, uiWindow* window, void* blarg) void ApplyNewSettings(int type) { - if (!RunningSomething) return; + if (!RunningSomething && type != 2) return; int prevstatus = EmuRunning; - EmuRunning = 2; - while (EmuStatus != 2); + EmuRunning = 3; + while (EmuStatus != 3); - if (type == 0) // general emu settings + if (type == 0) // 3D renderer settings { - GPU3D::SoftRenderer::SetupRenderThread(); + if (Screen_UseGL) uiGLMakeContextCurrent(GLContext); + GPU3D::UpdateRendererConfig(); + if (Screen_UseGL) uiGLMakeContextCurrent(NULL); + + GL_3DScale = Config::GL_ScaleFactor; // dorp + GL_ScreenSizeDirty = true; } else if (type == 1) // wifi settings { @@ -1667,112 +1998,44 @@ void ApplyNewSettings(int type) Platform::LAN_DeInit(); Platform::LAN_Init(); } - - EmuRunning = prevstatus; -} - - -int main(int argc, char** argv) -{ - srand(time(NULL)); - - printf("melonDS " MELONDS_VERSION "\n"); - printf(MELONDS_URL "\n"); - - if (argc > 0 && strlen(argv[0]) > 0) + else if (type == 2) // video output method { - int len = strlen(argv[0]); - while (len > 0) - { - if (argv[0][len] == '/') break; - if (argv[0][len] == '\\') break; - len--; - } - if (len > 0) - { - EmuDirectory = new char[len+1]; - strncpy(EmuDirectory, argv[0], len); - EmuDirectory[len] = '\0'; - } - else + bool usegl = Config::ScreenUseGL || (Config::_3DRenderer != 0); + if (usegl != Screen_UseGL) { - EmuDirectory = new char[2]; - strcpy(EmuDirectory, "."); - } - } - else - { - EmuDirectory = new char[2]; - strcpy(EmuDirectory, "."); - } - - // http://stackoverflow.com/questions/14543333/joystick-wont-work-using-sdl - SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1"); + Screen_UseGL = usegl; - if (SDL_Init(SDL_INIT_HAPTIC) < 0) - { - printf("SDL couldn't init rumble\n"); - } - if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_JOYSTICK) < 0) - { - printf("SDL shat itself :(\n"); - return 1; - } + if (RunningSomething) + { + if (usegl) uiGLMakeContextCurrent(GLContext); + GPU3D::DeInitRenderer(); + if (usegl) uiGLMakeContextCurrent(NULL); + } - SDL_JoystickEventState(SDL_ENABLE); + RecreateMainWindow(usegl); - uiInitOptions ui_opt; - memset(&ui_opt, 0, sizeof(uiInitOptions)); - const char* ui_err = uiInit(&ui_opt); - if (ui_err != NULL) - { - printf("libui shat itself :( %s\n", ui_err); - uiFreeInitError(ui_err); - return 1; + if (RunningSomething) + { + if (Screen_UseGL) uiGLMakeContextCurrent(GLContext); + GPU3D::InitRenderer(Screen_UseGL); + if (Screen_UseGL) uiGLMakeContextCurrent(NULL); + } + } } - - Config::Load(); - - if (Config::AudioVolume < 0) Config::AudioVolume = 0; - else if (Config::AudioVolume > 256) Config::AudioVolume = 256; - - if (!Platform::LocalFileExists("bios7.bin") || - !Platform::LocalFileExists("bios9.bin") || - !Platform::LocalFileExists("firmware.bin")) + else if (type == 3) // 3D renderer { - uiMsgBoxError( - NULL, - "BIOS/Firmware not found", - "One or more of the following required files don't exist or couldn't be accessed:\n\n" - "bios7.bin -- ARM7 BIOS\n" - "bios9.bin -- ARM9 BIOS\n" - "firmware.bin -- firmware image\n\n" - "Dump the files from your DS and place them in the directory you run melonDS from.\n" - "Make sure that the files can be accessed."); - - uiUninit(); - SDL_Quit(); - return 0; + if (Screen_UseGL) uiGLMakeContextCurrent(GLContext); + GPU3D::DeInitRenderer(); + GPU3D::InitRenderer(Screen_UseGL); + if (Screen_UseGL) uiGLMakeContextCurrent(NULL); } - { - FILE* f = Platform::OpenLocalFile("romlist.bin", "rb"); - if (f) - { - u32 data; - fread(&data, 4, 1, f); - fclose(f); + EmuRunning = prevstatus; +} - if ((data >> 24) == 0) // old CRC-based list - { - uiMsgBoxError(NULL, - "Your version of romlist.bin is outdated.", - "Save memory type detection will not work correctly.\n\n" - "You should use the latest version of romlist.bin (provided in melonDS release packages)."); - } - } - } +void CreateMainWindowMenu() +{ uiMenu* menu; uiMenuItem* menuitem; @@ -1847,6 +2110,8 @@ int main(int argc, char** argv) uiMenuItemOnClicked(menuitem, OnOpenInputConfig, NULL); menuitem = uiMenuAppendItem(menu, "Hotkey config"); uiMenuItemOnClicked(menuitem, OnOpenHotkeyConfig, NULL); + menuitem = uiMenuAppendItem(menu, "Video settings"); + uiMenuItemOnClicked(menuitem, OnOpenVideoSettings, NULL); menuitem = uiMenuAppendItem(menu, "Audio settings"); uiMenuItemOnClicked(menuitem, OnOpenAudioSettings, NULL); menuitem = uiMenuAppendItem(menu, "Wifi settings"); @@ -1928,24 +2193,18 @@ int main(int argc, char** argv) uiMenuAppendSubmenu(menu, submenu); } - menuitem = uiMenuAppendCheckItem(menu, "Screen filtering"); - uiMenuItemOnClicked(menuitem, OnSetScreenFiltering, NULL); - uiMenuItemSetChecked(menuitem, Config::ScreenFilter==1); - - menuitem = uiMenuAppendCheckItem(menu, "Limit framerate"); - uiMenuItemOnClicked(menuitem, OnSetLimitFPS, NULL); - uiMenuItemSetChecked(menuitem, Config::LimitFPS==1); + MenuItem_ScreenFilter = uiMenuAppendCheckItem(menu, "Screen filtering"); + uiMenuItemOnClicked(MenuItem_ScreenFilter, OnSetScreenFiltering, NULL); + MenuItem_LimitFPS = uiMenuAppendCheckItem(menu, "Limit framerate"); + uiMenuItemOnClicked(MenuItem_LimitFPS, OnSetLimitFPS, NULL); +} - int w = Config::WindowWidth; - int h = Config::WindowHeight; - //if (w < 256) w = 256; - //if (h < 384) h = 384; - - WindowWidth = w; - WindowHeight = h; - - MainWindow = uiNewWindow("melonDS " MELONDS_VERSION, w, h, Config::WindowMaximized, 1, 1); +void CreateMainWindow(bool opengl) +{ + MainWindow = uiNewWindow("melonDS " MELONDS_VERSION, + WindowWidth, WindowHeight, + Config::WindowMaximized, 1, 1); uiWindowOnClosing(MainWindow, OnCloseWindow, NULL); uiWindowSetDropTarget(MainWindow, 1); @@ -1954,28 +2213,182 @@ int main(int argc, char** argv) uiWindowOnGetFocus(MainWindow, OnGetFocus, NULL); uiWindowOnLoseFocus(MainWindow, OnLoseFocus, NULL); - //uiMenuItemDisable(MenuItem_SaveState); - //uiMenuItemDisable(MenuItem_LoadState); - for (int i = 0; i < 9; i++) uiMenuItemDisable(MenuItem_SaveStateSlot[i]); - for (int i = 0; i < 9; i++) uiMenuItemDisable(MenuItem_LoadStateSlot[i]); - uiMenuItemDisable(MenuItem_UndoStateLoad); - - uiMenuItemDisable(MenuItem_Pause); - uiMenuItemDisable(MenuItem_Reset); - uiMenuItemDisable(MenuItem_Stop); + ScreenDrawInited = false; + bool opengl_good = opengl; - uiAreaHandler areahandler; - areahandler.Draw = OnAreaDraw; - areahandler.MouseEvent = OnAreaMouseEvent; - areahandler.MouseCrossed = OnAreaMouseCrossed; - areahandler.DragBroken = OnAreaDragBroken; - areahandler.KeyEvent = OnAreaKeyEvent; - areahandler.Resize = OnAreaResize; + if (!opengl) MainDrawArea = uiNewArea(&MainDrawAreaHandler); + else MainDrawArea = uiNewGLArea(&MainDrawAreaHandler, kGLVersions); - MainDrawArea = uiNewArea(&areahandler); uiWindowSetChild(MainWindow, uiControl(MainDrawArea)); uiControlSetMinSize(uiControl(MainDrawArea), 256, 384); - uiAreaSetBackgroundColor(MainDrawArea, 0, 0, 0); // TODO: make configurable? + uiAreaSetBackgroundColor(MainDrawArea, 0, 0, 0); + + uiControlShow(uiControl(MainWindow)); + uiControlSetFocus(uiControl(MainDrawArea)); + + if (opengl_good) + { + GLContext = uiAreaGetGLContext(MainDrawArea); + if (!GLContext) opengl_good = false; + } + if (opengl_good) + { + uiGLMakeContextCurrent(GLContext); + if (!GLScreen_Init()) opengl_good = false; + uiGLMakeContextCurrent(NULL); + } + + if (opengl && !opengl_good) + { + printf("OpenGL: initialization failed\n"); + RecreateMainWindow(false); + Screen_UseGL = false; + } +} + +void DestroyMainWindow() +{ + uiControlDestroy(uiControl(MainWindow)); + + if (ScreenBitmap[0]) uiDrawFreeBitmap(ScreenBitmap[0]); + if (ScreenBitmap[1]) uiDrawFreeBitmap(ScreenBitmap[1]); + + ScreenBitmap[0] = NULL; + ScreenBitmap[1] = NULL; +} + +void RecreateMainWindow(bool opengl) +{ + int winX, winY, maxi; + uiWindowPosition(MainWindow, &winX, &winY); + maxi = uiWindowMaximized(MainWindow); + DestroyMainWindow(); + CreateMainWindow(opengl); + uiWindowSetPosition(MainWindow, winX, winY); + uiWindowSetMaximized(MainWindow, maxi); +} + + +int main(int argc, char** argv) +{ + srand(time(NULL)); + + printf("melonDS " MELONDS_VERSION "\n"); + printf(MELONDS_URL "\n"); + + if (argc > 0 && strlen(argv[0]) > 0) + { + int len = strlen(argv[0]); + while (len > 0) + { + if (argv[0][len] == '/') break; + if (argv[0][len] == '\\') break; + len--; + } + if (len > 0) + { + EmuDirectory = new char[len+1]; + strncpy(EmuDirectory, argv[0], len); + EmuDirectory[len] = '\0'; + } + else + { + EmuDirectory = new char[2]; + strcpy(EmuDirectory, "."); + } + } + else + { + EmuDirectory = new char[2]; + strcpy(EmuDirectory, "."); + } + + // http://stackoverflow.com/questions/14543333/joystick-wont-work-using-sdl + SDL_SetHint(SDL_HINT_JOYSTICK_ALLOW_BACKGROUND_EVENTS, "1"); + + if (SDL_Init(SDL_INIT_HAPTIC) < 0) + { + printf("SDL couldn't init rumble\n"); + } + if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_JOYSTICK) < 0) + { + printf("SDL shat itself :(\n"); + return 1; + } + + SDL_JoystickEventState(SDL_ENABLE); + + uiInitOptions ui_opt; + memset(&ui_opt, 0, sizeof(uiInitOptions)); + const char* ui_err = uiInit(&ui_opt); + if (ui_err != NULL) + { + printf("libui shat itself :( %s\n", ui_err); + uiFreeInitError(ui_err); + return 1; + } + + Config::Load(); + + if (Config::AudioVolume < 0) Config::AudioVolume = 0; + else if (Config::AudioVolume > 256) Config::AudioVolume = 256; + + if (!Platform::LocalFileExists("bios7.bin") || + !Platform::LocalFileExists("bios9.bin") || + !Platform::LocalFileExists("firmware.bin")) + { + uiMsgBoxError( + NULL, + "BIOS/Firmware not found", + "One or more of the following required files don't exist or couldn't be accessed:\n\n" + "bios7.bin -- ARM7 BIOS\n" + "bios9.bin -- ARM9 BIOS\n" + "firmware.bin -- firmware image\n\n" + "Dump the files from your DS and place them in the directory you run melonDS from.\n" + "Make sure that the files can be accessed."); + + uiUninit(); + SDL_Quit(); + return 0; + } + + { + FILE* f = Platform::OpenLocalFile("romlist.bin", "rb"); + if (f) + { + u32 data; + fread(&data, 4, 1, f); + fclose(f); + + if ((data >> 24) == 0) // old CRC-based list + { + uiMsgBoxError(NULL, + "Your version of romlist.bin is outdated.", + "Save memory type detection will not work correctly.\n\n" + "You should use the latest version of romlist.bin (provided in melonDS release packages)."); + } + } + } + + CreateMainWindowMenu(); + + MainDrawAreaHandler.Draw = OnAreaDraw; + MainDrawAreaHandler.MouseEvent = OnAreaMouseEvent; + MainDrawAreaHandler.MouseCrossed = OnAreaMouseCrossed; + MainDrawAreaHandler.DragBroken = OnAreaDragBroken; + MainDrawAreaHandler.KeyEvent = OnAreaKeyEvent; + MainDrawAreaHandler.Resize = OnAreaResize; + + WindowWidth = Config::WindowWidth; + WindowHeight = Config::WindowHeight; + + Screen_UseGL = Config::ScreenUseGL || (Config::_3DRenderer != 0); + + GL_3DScale = Config::GL_ScaleFactor; + if (GL_3DScale < 1) GL_3DScale = 1; + else if (GL_3DScale > 8) GL_3DScale = 8; + + CreateMainWindow(Screen_UseGL); ScreenRotation = Config::ScreenRotation; ScreenGap = Config::ScreenGap; @@ -1988,6 +2401,14 @@ int main(int argc, char** argv) SANITIZE(ScreenSizing, 0, 3); #undef SANITIZE + for (int i = 0; i < 9; i++) uiMenuItemDisable(MenuItem_SaveStateSlot[i]); + for (int i = 0; i < 9; i++) uiMenuItemDisable(MenuItem_LoadStateSlot[i]); + uiMenuItemDisable(MenuItem_UndoStateLoad); + + uiMenuItemDisable(MenuItem_Pause); + uiMenuItemDisable(MenuItem_Reset); + uiMenuItemDisable(MenuItem_Stop); + uiMenuItemSetChecked(MenuItem_SavestateSRAMReloc, Config::SavestateRelocSRAM?1:0); uiMenuItemSetChecked(MenuItem_ScreenRot[ScreenRotation], 1); @@ -2002,6 +2423,9 @@ int main(int argc, char** argv) OnSetScreenRotation(MenuItem_ScreenRot[ScreenRotation], MainWindow, (void*)&kScreenRot[ScreenRotation]); + uiMenuItemSetChecked(MenuItem_ScreenFilter, Config::ScreenFilter==1); + uiMenuItemSetChecked(MenuItem_LimitFPS, Config::LimitFPS==1); + SDL_AudioSpec whatIwant, whatIget; memset(&whatIwant, 0, sizeof(SDL_AudioSpec)); whatIwant.freq = 47340; @@ -2070,8 +2494,6 @@ int main(int argc, char** argv) } } - uiControlShow(uiControl(MainWindow)); - uiControlSetFocus(uiControl(MainDrawArea)); uiMain(); EmuRunning = 0; @@ -2090,8 +2512,6 @@ int main(int argc, char** argv) Config::Save(); - if (ScreenBitmap) uiDrawFreeBitmap(ScreenBitmap); - uiUninit(); SDL_Quit(); delete[] EmuDirectory; diff --git a/src/libui_sdl/main_shaders.h b/src/libui_sdl/main_shaders.h new file mode 100644 index 0000000..f03931c --- /dev/null +++ b/src/libui_sdl/main_shaders.h @@ -0,0 +1,201 @@ +/* + Copyright 2016-2019 Arisotura + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef MAIN_SHADERS_H +#define MAIN_SHADERS_H + +const char* kScreenVS = R"(#version 140 + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + uint u3DScale; + uint uFilterMode; +}; + +in vec2 vPosition; +in vec2 vTexcoord; + +smooth out vec2 fTexcoord; + +void main() +{ + vec4 fpos; + fpos.xy = ((vPosition.xy * 2.0) / uScreenSize) - 1.0; + fpos.y *= -1; + fpos.z = 0.0; + fpos.w = 1.0; + + gl_Position = fpos; + fTexcoord = vTexcoord; +} +)"; + +const char* kScreenFS = R"(#version 140 + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + uint u3DScale; + uint uFilterMode; +}; + +uniform usampler2D ScreenTex; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +void main() +{ + ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + + // TODO: filters + + oColor = vec4(vec3(pixel.bgr) / 255.0, 1.0); +} +)"; + +const char* kScreenFS_Accel = R"(#version 140 + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + uint u3DScale; + uint uFilterMode; +}; + +uniform usampler2D ScreenTex; +uniform sampler2D _3DTex; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +void main() +{ + ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); + + ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); + int dispmode = mbright.b & 0x3; + + if (dispmode == 1) + { + ivec4 val1 = pixel; + ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); + ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); + + int compmode = val3.a & 0xF; + int eva, evb, evy; + + if (compmode == 4) + { + // 3D on top, blending + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + eva = (_3dpix.a & 0x1F) + 1; + evb = 32 - eva; + + val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; + if (eva <= 16) val1 += ivec4(1,1,1,0); + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode == 1) + { + // 3D on bottom, blending + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + eva = val3.g; + evb = val3.b; + + val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; + val1 = min(val1, 0x3F); + } + else + val1 = val2; + } + else if (compmode <= 3) + { + // 3D on top, normal/fade + + float xpos = val3.r + fract(fTexcoord.x); + float ypos = mod(fTexcoord.y, 192); + ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra + * vec4(63,63,63,31)); + + if (_3dpix.a > 0) + { + evy = val3.g; + + val1 = _3dpix; + if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; + else if (compmode == 3) val1 -= (val1 * evy) >> 4; + } + else + val1 = val2; + } + + pixel = val1; + } + + if (dispmode != 0) + { + int brightmode = mbright.g >> 6; + if (brightmode == 1) + { + // up + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; + } + else if (brightmode == 2) + { + // down + int evy = mbright.r & 0x1F; + if (evy > 16) evy = 16; + + pixel -= (pixel * evy) >> 4; + } + } + + pixel.rgb <<= 2; + pixel.rgb |= (pixel.rgb >> 6); + + // TODO: filters + + oColor = vec4(vec3(pixel.rgb) / 255.0, 1.0); +} +)"; + +#endif // MAIN_SHADERS_H |