From c81bcccadc9ac8394ba8d4a836d7c954dd528751 Mon Sep 17 00:00:00 2001
From: Arisotura <thetotalworm@gmail.com>
Date: Thu, 16 May 2019 16:27:45 +0200
Subject: BAHAHAHAHAHAHAHAA

---
 src/GPU.cpp                  |  40 +--
 src/GPU.h                    |   2 +-
 src/GPU2D.cpp                | 169 +++++++-----
 src/GPU2D.h                  |   4 +-
 src/GPU3D.cpp                |   4 +-
 src/GPU3D.h                  |   9 +-
 src/GPU3D_OpenGL43.cpp       | 641 +-----------------------------------------
 src/GPU3D_OpenGL43_shaders.h | 645 +++++++++++++++++++++++++++++++++++++++++++
 src/GPU3D_Soft.cpp           |   7 +-
 src/libui_sdl/main.cpp       |  19 +-
 10 files changed, 812 insertions(+), 728 deletions(-)
 create mode 100644 src/GPU3D_OpenGL43_shaders.h

(limited to 'src')

diff --git a/src/GPU.cpp b/src/GPU.cpp
index f4e9fd4..1d073f5 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -74,6 +74,7 @@ u32 VRAMMap_ARM7[2];
 int FrontBuffer;
 u32* Framebuffer[2][2];
 int FBScale[2];
+bool Accelerated;
 
 GPU2D* GPU2D_A;
 GPU2D* GPU2D_B;
@@ -88,9 +89,8 @@ bool Init()
     FrontBuffer = 0;
     Framebuffer[0][0] = NULL; Framebuffer[0][1] = NULL;
     Framebuffer[1][0] = NULL; Framebuffer[1][1] = NULL;
-    FBScale[0] = -1; FBScale[1] = -1;
-    //SetFramebufferScale(1);
-    SetFramebufferScale(1, 1);
+    FBScale[0] = -1; FBScale[1] = -1; Accelerated = false;
+    SetDisplaySettings(0, 0, false);
 
     return true;
 }
@@ -247,13 +247,15 @@ void AssignFramebuffers()
     }
 }
 
-void SetFramebufferScale(int top, int bottom)
-{
-    if (top != FBScale[0])
+void SetDisplaySettings(int topscale, int bottomscale, bool accel)
+{accel=true;
+    if (topscale != FBScale[0] || accel != Accelerated)
     {
-        FBScale[0] = top;
+        FBScale[0] = accel ? 0 : topscale;
 
-        int fbsize = (256 * 192) << (FBScale[0] * 2);
+        int fbsize;
+        if (accel) fbsize = 256*3 * 192;
+        else       fbsize = (256 * 192) << (FBScale[0] * 2);
         if (Framebuffer[0][0]) delete[] Framebuffer[0][0];
         if (Framebuffer[1][0]) delete[] Framebuffer[1][0];
         Framebuffer[0][0] = new u32[fbsize];
@@ -266,21 +268,23 @@ void SetFramebufferScale(int top, int bottom)
         if (NDS::PowerControl9 & (1<<15))
         {
             GPU2D_A->SetFramebuffer(Framebuffer[backbuf][0]);
-            GPU2D_A->SetScale(FBScale[0]);
-            GPU3D::SetScale(FBScale[0]);
+            GPU2D_A->SetDisplaySettings(FBScale[0], accel);
+            GPU3D::SetDisplaySettings(topscale, accel);
         }
         else
         {
             GPU2D_B->SetFramebuffer(Framebuffer[backbuf][0]);
-            GPU2D_B->SetScale(FBScale[0]);
+            GPU2D_B->SetDisplaySettings(FBScale[0], accel);
         }
     }
 
-    if (bottom != FBScale[1])
+    if (bottomscale != FBScale[1] || accel != Accelerated)
     {
-        FBScale[1] = bottom;
+        FBScale[1] = accel ? 0 : bottomscale;
 
-        int fbsize = (256 * 192) << (FBScale[1] * 2);
+        int fbsize;
+        if (accel) fbsize = 256*3 * 192;
+        else       fbsize = (256 * 192) << (FBScale[1] * 2);
         if (Framebuffer[0][1]) delete[] Framebuffer[0][1];
         if (Framebuffer[1][1]) delete[] Framebuffer[1][1];
         Framebuffer[0][1] = new u32[fbsize];
@@ -293,15 +297,17 @@ void SetFramebufferScale(int top, int bottom)
         if (NDS::PowerControl9 & (1<<15))
         {
             GPU2D_B->SetFramebuffer(Framebuffer[backbuf][1]);
-            GPU2D_B->SetScale(FBScale[1]);
+            GPU2D_B->SetDisplaySettings(FBScale[1], accel);
         }
         else
         {
             GPU2D_A->SetFramebuffer(Framebuffer[backbuf][1]);
-            GPU2D_A->SetScale(FBScale[1]);
-            GPU3D::SetScale(FBScale[1]);
+            GPU2D_A->SetDisplaySettings(FBScale[1], accel);
+            GPU3D::SetDisplaySettings(bottomscale, accel);
         }
     }
+
+    Accelerated = accel;
 }
 
 
diff --git a/src/GPU.h b/src/GPU.h
index 2e47fc4..cfa8d7d 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -75,7 +75,7 @@ void Stop();
 
 void DoSavestate(Savestate* file);
 
-void SetFramebufferScale(int top, int bottom);
+void SetDisplaySettings(int topscale, int bottomscale, bool accel);
 
 
 void MapVRAM_AB(u32 bank, u8 cnt);
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index 3a99964..34c17ff 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -82,7 +82,6 @@
 GPU2D::GPU2D(u32 num)
 {
     Num = num;
-    SetScale(0);
 }
 
 GPU2D::~GPU2D()
@@ -216,12 +215,16 @@ void GPU2D::SetFramebuffer(u32* buf)
     Framebuffer = buf;
 }
 
-void GPU2D::SetScale(int scale)
+void GPU2D::SetDisplaySettings(int scale, bool accel)
 {
+    if (accel) scale = 0;
+    Accelerated = accel;
+
     LineScale = scale;
     LineStride = 256 << (scale*2);
 
-    if      (scale == 1) DrawPixel = DrawPixel_2x;
+    if (Accelerated)     DrawPixel = DrawPixel_Accel;
+    else if (scale == 1) DrawPixel = DrawPixel_2x;
     else if (scale == 2) DrawPixel = DrawPixel_4x;
     else                 DrawPixel = DrawPixel_1x;
 }
@@ -623,7 +626,8 @@ u32 GPU2D::ColorBrightnessDown(u32 val, u32 factor)
 
 void GPU2D::DrawScanline(u32 line)
 {
-    u32* dst = &Framebuffer[LineStride * line];
+    int stride = Accelerated ? (256*3) : LineStride;
+    u32* dst = &Framebuffer[stride * line];
 
     int n3dline = line;
     line = GPU::VCount;
@@ -652,7 +656,7 @@ void GPU2D::DrawScanline(u32 line)
     u32 dispmode = DispCnt >> 16;
     dispmode &= (Num ? 0x1 : 0x3);
 
-    if (Num == 0)
+    if (Num == 0 && !Accelerated)
         _3DLine = GPU3D::GetLine(n3dline);
 
     // always render regular graphics
@@ -663,13 +667,13 @@ void GPU2D::DrawScanline(u32 line)
     case 0: // screen off
         {
             for (int i = 0; i < LineStride; i++)
-                dst[i] = 0xFF3F3F3F;
+                dst[i] = 0x003F3F3F;
         }
         break;
 
     case 1: // regular display
         {
-            for (int i = 0; i < LineStride; i+=2)
+            for (int i = 0; i < stride; i+=2)
                 *(u64*)&dst[i] = *(u64*)&BGOBJLine[i];
         }
         break;
@@ -783,6 +787,8 @@ void GPU2D::DrawScanline(u32 line)
             DoCapture(line, capwidth);
     }
 
+    if (Accelerated) return;
+
     // master brightness
     if (dispmode != 0)
     {
@@ -861,6 +867,8 @@ void GPU2D::DoCapture(u32 line, u32 width)
     u16* dst = (u16*)GPU::VRAM[dstvram];
     u32 dstaddr = (((CaptureCnt >> 18) & 0x3) << 14) + (line * width);
 
+    // TODO: handle 3D in accelerated mode!!
+
     u32* srcA;
     if (CaptureCnt & (1<<24))
         srcA = _3DLine;
@@ -1272,88 +1280,91 @@ void GPU2D::DrawScanline_Mode1(u32 line)
     // color special effects
     // can likely be optimized
 
-    u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
-
-    for (int i = 0; i < LineStride; i++)
+    if (!Accelerated)
     {
-        int j = (i >> LineScale) & 0xFF;
+        u32 bldcnteffect = (BlendCnt >> 6) & 0x3;
 
-        u32 val1 = BGOBJLine[i];
-        u32 val2 = BGOBJLine[4096+i];
+        for (int i = 0; i < LineStride; i++)
+        {
+            int j = (i >> LineScale) & 0xFF;
 
-        u32 coloreffect, eva, evb;
+            u32 val1 = BGOBJLine[i];
+            u32 val2 = BGOBJLine[4096+i];
 
-        u32 flag1 = val1 >> 24;
-        u32 flag2 = val2 >> 24;
+            u32 coloreffect, eva, evb;
 
-        u32 target2;
-        if (flag2 & 0x80)      target2 = 0x1000;
-        else if (flag2 & 0x40) target2 = 0x0100;
-        else                   target2 = flag2 << 8;
+            u32 flag1 = val1 >> 24;
+            u32 flag2 = val2 >> 24;
 
-        if ((flag1 & 0x80) && (BlendCnt & target2))
-        {
-            // sprite blending
+            u32 target2;
+            if (flag2 & 0x80)      target2 = 0x1000;
+            else if (flag2 & 0x40) target2 = 0x0100;
+            else                   target2 = flag2 << 8;
 
-            coloreffect = 1;
+            if ((flag1 & 0x80) && (BlendCnt & target2))
+            {
+                // sprite blending
+
+                coloreffect = 1;
 
-            if (flag1 & 0x40)
+                if (flag1 & 0x40)
+                {
+                    eva = flag1 & 0x1F;
+                    evb = 16 - eva;
+                }
+                else
+                {
+                    eva = EVA;
+                    evb = EVB;
+                }
+            }
+            else if ((flag1 & 0x40) && (BlendCnt & target2))
             {
-                eva = flag1 & 0x1F;
-                evb = 16 - eva;
+                // 3D layer blending
+
+                BGOBJLine[i] = ColorBlend5(val1, val2);
+                continue;
             }
             else
             {
-                eva = EVA;
-                evb = EVB;
-            }
-        }
-        else if ((flag1 & 0x40) && (BlendCnt & target2))
-        {
-            // 3D layer blending
-
-            BGOBJLine[i] = ColorBlend5(val1, val2);
-            continue;
-        }
-        else
-        {
-            if (flag1 & 0x80)      flag1 = 0x10;
-            else if (flag1 & 0x40) flag1 = 0x01;
+                if (flag1 & 0x80)      flag1 = 0x10;
+                else if (flag1 & 0x40) flag1 = 0x01;
 
-            if ((BlendCnt & flag1) && (WindowMask[j] & 0x20))
-            {
-                if ((bldcnteffect == 1) && (BlendCnt & target2))
+                if ((BlendCnt & flag1) && (WindowMask[j] & 0x20))
                 {
-                    coloreffect = 1;
-                    eva = EVA;
-                    evb = EVB;
+                    if ((bldcnteffect == 1) && (BlendCnt & target2))
+                    {
+                        coloreffect = 1;
+                        eva = EVA;
+                        evb = EVB;
+                    }
+                    else if (bldcnteffect >= 2)
+                        coloreffect = bldcnteffect;
+                    else
+                        coloreffect = 0;
                 }
-                else if (bldcnteffect >= 2)
-                    coloreffect = bldcnteffect;
                 else
                     coloreffect = 0;
             }
-            else
-                coloreffect = 0;
-        }
 
-        switch (coloreffect)
-        {
-        case 0:
-            BGOBJLine[i] = val1;
-            break;
+            switch (coloreffect)
+            {
+            case 0:
+                BGOBJLine[i] = val1;
+                break;
 
-        case 1:
-            BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb);
-            break;
+            case 1:
+                BGOBJLine[i] = ColorBlend4(val1, val2, eva, evb);
+                break;
 
-        case 2:
-            BGOBJLine[i] = ColorBrightnessUp(val1, EVY);
-            break;
+            case 2:
+                BGOBJLine[i] = ColorBrightnessUp(val1, EVY);
+                break;
 
-        case 3:
-            BGOBJLine[i] = ColorBrightnessDown(val1, EVY);
-            break;
+            case 3:
+                BGOBJLine[i] = ColorBrightnessDown(val1, EVY);
+                break;
+            }
         }
     }
 
@@ -1429,6 +1440,17 @@ void GPU2D::DrawPixel_4x(u32* dst, u16 color, u32 flag)
     *(u64*)(dst+3072+2) = val;
 }
 
+void GPU2D::DrawPixel_Accel(u32* dst, u16 color, u32 flag)
+{
+    u8 r = (color & 0x001F) << 1;
+    u8 g = (color & 0x03E0) >> 4;
+    u8 b = (color & 0x7C00) >> 9;
+
+    *(dst+512) = *(dst+256);
+    *(dst+256) = *dst;
+    *dst = r | (g << 8) | (b << 16) | flag;
+}
+
 void GPU2D::DrawBG_3D()
 {
     u16 xoff = BGXPos[0];
@@ -1445,7 +1467,18 @@ void GPU2D::DrawBG_3D()
         iend -= (xoff & 0xFF);
     }
 
-    if (LineScale == 1)
+    if (Accelerated)
+    {
+        for (; i < iend; i++)
+        {
+            if (!(WindowMask[i] & 0x01)) continue;
+
+            BGOBJLine[i+512] = BGOBJLine[i+256];
+            BGOBJLine[i+256] = BGOBJLine[i];
+            BGOBJLine[i] = 0x40000000; // 3D-layer placeholder
+        }
+    }
+    else if (LineScale == 1)
     {
         for (; i < iend; i++)
         {
diff --git a/src/GPU2D.h b/src/GPU2D.h
index aca1f7e..fc420a5 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -31,7 +31,7 @@ public:
 
     void SetEnabled(bool enable) { Enabled = enable; }
     void SetFramebuffer(u32* buf);
-    void SetScale(int scale);
+    void SetDisplaySettings(int scale, bool accel);
 
     u8 Read8(u32 addr);
     u16 Read16(u32 addr);
@@ -71,6 +71,7 @@ private:
 
     u32 LineStride;
     u32 LineScale;
+    bool Accelerated;
 
     u32 BGOBJLine[1024*4 * 2];
     u32* _3DLine;
@@ -136,6 +137,7 @@ private:
     static void DrawPixel_1x(u32* dst, u16 color, u32 flag);
     static void DrawPixel_2x(u32* dst, u16 color, u32 flag);
     static void DrawPixel_4x(u32* dst, u16 color, u32 flag);
+    static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
     void (*DrawPixel)(u32* dst, u16 color, u32 flag);
 
     void DrawBG_3D();
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 8c0588d..61629a7 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -607,9 +607,9 @@ void SetEnabled(bool geometry, bool rendering)
     if (!rendering) ResetRenderingState();
 }
 
-void SetScale(int scale)
+void SetDisplaySettings(int scale, bool accel)
 {
-    GLRenderer43::SetScale(scale);
+    GLRenderer43::SetDisplaySettings(scale, accel);
 }
 
 
diff --git a/src/GPU3D.h b/src/GPU3D.h
index 68bc696..2fe5bee 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -97,7 +97,7 @@ void Reset();
 void DoSavestate(Savestate* file);
 
 void SetEnabled(bool geometry, bool rendering);
-void SetScale(int scale);
+void SetDisplaySettings(int scale, bool accel);
 
 void ExecuteCommand();
 
@@ -110,6 +110,7 @@ void VCount144();
 void VBlank();
 void VCount215();
 u32* GetLine(int line);
+void SetupAccelFrame();
 
 void WriteToGXFIFO(u32 val);
 
@@ -127,13 +128,14 @@ bool Init();
 void DeInit();
 void Reset();
 
-void SetScale(int scale);
+void SetDisplaySettings(int scale, bool accel);
 
 void SetupRenderThread();
 
 void VCount144();
 void RenderFrame();
 u32* GetLine(int line);
+void SetupAccelFrame();
 
 }
 
@@ -144,11 +146,12 @@ bool Init();
 void DeInit();
 void Reset();
 
-void SetScale(int scale);
+void SetDisplaySettings(int scale, bool accel);
 
 void VCount144();
 void RenderFrame();
 u32* GetLine(int line);
+void SetupAccelFrame();
 
 }
 
diff --git a/src/GPU3D_OpenGL43.cpp b/src/GPU3D_OpenGL43.cpp
index e76db7f..6db9a18 100644
--- a/src/GPU3D_OpenGL43.cpp
+++ b/src/GPU3D_OpenGL43.cpp
@@ -21,6 +21,7 @@
 #include "NDS.h"
 #include "GPU.h"
 #include "OpenGLSupport.h"
+#include "GPU3D_OpenGL43_shaders.h"
 
 namespace GPU3D
 {
@@ -33,631 +34,6 @@ namespace GLRenderer43
 // * UBO: 3.1
 // * glMemoryBarrier: 4.2
 
-// TODO: consider other way to handle uniforms (UBO?)
-
-#define kShaderHeader "#version 430"
-
-
-const char* kClearVS = kShaderHeader R"(
-
-layout(location=0) in vec2 vPosition;
-
-layout(location=1) uniform uint uDepth;
-
-void main()
-{
-    float fdepth = (float(uDepth) / 8388608.0) - 1.0;
-    gl_Position = vec4(vPosition, fdepth, 1.0);
-}
-)";
-
-const char* kClearFS = kShaderHeader R"(
-
-layout(location=0) uniform uvec4 uColor;
-layout(location=2) uniform uint uOpaquePolyID;
-layout(location=3) uniform uint uFogFlag;
-
-layout(location=0) out vec4 oColor;
-layout(location=1) out uvec3 oAttr;
-
-void main()
-{
-    oColor = vec4(uColor).bgra / 31.0;
-    oAttr.r = 0;
-    oAttr.g = uOpaquePolyID;
-    oAttr.b = 0;
-}
-)";
-
-
-const char* kRenderVSCommon = R"(
-
-layout(std140, binding=0) uniform uConfig
-{
-    vec2 uScreenSize;
-    uint uDispCnt;
-    vec4 uToonColors[32];
-};
-
-layout(location=0) in uvec4 vPosition;
-layout(location=1) in uvec4 vColor;
-layout(location=2) in ivec2 vTexcoord;
-layout(location=3) in uvec3 vPolygonAttr;
-
-smooth out vec4 fColor;
-smooth out vec2 fTexcoord;
-flat out uvec3 fPolygonAttr;
-)";
-
-const char* kRenderFSCommon = R"(
-
-layout(binding=0) uniform usampler2D TexMem;
-layout(binding=1) uniform sampler2D TexPalMem;
-
-layout(std140, binding=0) uniform uConfig
-{
-    vec2 uScreenSize;
-    uint uDispCnt;
-    vec4 uToonColors[32];
-};
-
-smooth in vec4 fColor;
-smooth in vec2 fTexcoord;
-flat in uvec3 fPolygonAttr;
-
-layout(location=0) out vec4 oColor;
-layout(location=1) out uvec3 oAttr;
-
-int TexcoordWrap(int c, int maxc, uint mode)
-{
-    if ((mode & (1<<0)) != 0)
-    {
-        if ((mode & (1<<2)) != 0 && (c & maxc) != 0)
-            return (maxc-1) - (c & (maxc-1));
-        else
-            return (c & (maxc-1));
-    }
-    else
-        return clamp(c, 0, maxc-1);
-}
-
-vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, uint wrapmode)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x);
-    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-
-    pixel.a = (pixel.r & 0xE0);
-    pixel.a = (pixel.a >> 3) + (pixel.a >> 6);
-    pixel.r &= 0x1F;
-
-    addr.y = (addr.y << 3) + int(pixel.r);
-    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-
-    return vec4(color.rgb, float(pixel.a)/31.0);
-}
-
-vec4 TextureFetch_I2(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x) >> 2;
-    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-    pixel.r >>= (2 * (st.x & 3));
-    pixel.r &= 0x03;
-
-    addr.y = (addr.y << 2) + int(pixel.r);
-    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-
-    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
-}
-
-vec4 TextureFetch_I4(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x) >> 1;
-    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-    if ((st.x & 1) != 0) pixel.r >>= 4;
-    else                 pixel.r &= 0x0F;
-
-    addr.y = (addr.y << 3) + int(pixel.r);
-    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-
-    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
-}
-
-vec4 TextureFetch_I8(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x);
-    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-
-    addr.y = (addr.y << 3) + int(pixel.r);
-    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-
-    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
-}
-
-vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, uint wrapmode)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3);
-    uvec4 p = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-    uint val = (p.r >> (2 * (st.x & 0x3))) & 0x3;
-
-    int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1);
-    if (addr.x >= 0x40000) slot1addr += 0x10000;
-
-    uint palinfo;
-    p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0);
-    palinfo = p.r;
-    slot1addr++;
-    p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0);
-    palinfo |= (p.r << 8);
-
-    addr.y = (addr.y << 3) + ((int(palinfo) & 0x3FFF) << 1);
-    palinfo >>= 14;
-
-    if (val == 0)
-    {
-        vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-        return vec4(color.rgb, 1.0);
-    }
-    else if (val == 1)
-    {
-        addr.y++;
-        vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-        return vec4(color.rgb, 1.0);
-    }
-    else if (val == 2)
-    {
-        if (palinfo == 1)
-        {
-            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            addr.y++;
-            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            return vec4((color0.rgb + color1.rgb) / 2.0, 1.0);
-        }
-        else if (palinfo == 3)
-        {
-            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            addr.y++;
-            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0);
-        }
-        else
-        {
-            addr.y += 2;
-            vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            return vec4(color.rgb, 1.0);
-        }
-    }
-    else
-    {
-        if (palinfo == 2)
-        {
-            addr.y += 3;
-            vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            return vec4(color.rgb, 1.0);
-        }
-        else if (palinfo == 3)
-        {
-            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            addr.y++;
-            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-            return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0);
-        }
-        else
-        {
-            return vec4(0.0);
-        }
-    }
-}
-
-vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, uint wrapmode)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x);
-    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-
-    pixel.a = (pixel.r & 0xF8) >> 3;
-    pixel.r &= 0x07;
-
-    addr.y = (addr.y << 3) + int(pixel.r);
-    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
-
-    return vec4(color.rgb, float(pixel.a)/31.0);
-}
-
-vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, uint wrapmode)
-{
-    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
-    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
-
-    addr.x += ((st.y * st.z) + st.x) << 1;
-    uvec4 pixelL = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-    addr.x++;
-    uvec4 pixelH = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
-
-    vec4 color;
-    color.r = float(pixelL.r & 0x1F) / 31.0;
-    color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0;
-    color.b = float((pixelH.r & 0x7C) >> 2) / 31.0;
-    color.a = float(pixelH.r >> 7);
-
-    return color;
-}
-
-vec4 TextureLookup_Nearest(vec2 st)
-{
-    uint attr = fPolygonAttr.y;
-    uint paladdr = fPolygonAttr.z;
-
-    float alpha0;
-    if ((attr & (1<<29)) != 0) alpha0 = 0.0;
-    else                       alpha0 = 1.0;
-
-    int tw = 8 << int((attr >> 20) & 0x7);
-    int th = 8 << int((attr >> 23) & 0x7);
-    ivec4 st_full = ivec4(ivec2(st), tw, th);
-
-    ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr));
-    uint wrapmode = attr >> 16;
-
-    uint type = (attr >> 26) & 0x7;
-    if      (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode);
-    else if (type == 2) return TextureFetch_I2        (vramaddr, st_full, wrapmode, alpha0);
-    else if (type == 3) return TextureFetch_I4        (vramaddr, st_full, wrapmode, alpha0);
-    else if (type == 4) return TextureFetch_I8        (vramaddr, st_full, wrapmode, alpha0);
-    else if (type == 1) return TextureFetch_A3I5      (vramaddr, st_full, wrapmode);
-    else if (type == 6) return TextureFetch_A5I3      (vramaddr, st_full, wrapmode);
-    else                return TextureFetch_Direct    (vramaddr, st_full, wrapmode);
-}
-
-vec4 TextureLookup_Linear(vec2 texcoord)
-{
-    ivec2 intpart = ivec2(texcoord);
-    vec2 fracpart = fract(texcoord);
-
-    uint attr = fPolygonAttr.y;
-    uint paladdr = fPolygonAttr.z;
-
-    float alpha0;
-    if ((attr & (1<<29)) != 0) alpha0 = 0.0;
-    else                       alpha0 = 1.0;
-
-    int tw = 8 << int((attr >> 20) & 0x7);
-    int th = 8 << int((attr >> 23) & 0x7);
-    ivec4 st_full = ivec4(intpart, tw, th);
-
-    ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr));
-    uint wrapmode = attr >> 16;
-
-    vec4 A, B, C, D;
-    uint type = (attr >> 26) & 0x7;
-    if (type == 5)
-    {
-        A = TextureFetch_Compressed(vramaddr, st_full                 , wrapmode);
-        B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
-        C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
-        D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
-    }
-    else if (type == 2)
-    {
-        A = TextureFetch_I2(vramaddr, st_full                 , wrapmode, alpha0);
-        B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
-        C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
-        D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
-    }
-    else if (type == 3)
-    {
-        A = TextureFetch_I4(vramaddr, st_full                 , wrapmode, alpha0);
-        B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
-        C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
-        D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
-    }
-    else if (type == 4)
-    {
-        A = TextureFetch_I8(vramaddr, st_full                 , wrapmode, alpha0);
-        B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
-        C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
-        D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
-    }
-    else if (type == 1)
-    {
-        A = TextureFetch_A3I5(vramaddr, st_full                 , wrapmode);
-        B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
-        C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
-        D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
-    }
-    else if (type == 6)
-    {
-        A = TextureFetch_A5I3(vramaddr, st_full                 , wrapmode);
-        B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
-        C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
-        D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
-    }
-    else
-    {
-        A = TextureFetch_Direct(vramaddr, st_full                 , wrapmode);
-        B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
-        C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
-        D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
-    }
-
-    float fx = fracpart.x;
-    vec4 AB;
-    if (A.a < (0.5/31.0) && B.a < (0.5/31.0))
-        AB = vec4(0);
-    else
-    {
-        //if (A.a < (0.5/31.0) || B.a < (0.5/31.0))
-        //    fx = step(0.5, fx);
-
-        AB = mix(A, B, fx);
-    }
-
-    fx = fracpart.x;
-    vec4 CD;
-    if (C.a < (0.5/31.0) && D.a < (0.5/31.0))
-        CD = vec4(0);
-    else
-    {
-        //if (C.a < (0.5/31.0) || D.a < (0.5/31.0))
-        //    fx = step(0.5, fx);
-
-        CD = mix(C, D, fx);
-    }
-
-    fx = fracpart.y;
-    vec4 ret;
-    if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0))
-        ret = vec4(0);
-    else
-    {
-        //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0))
-        //    fx = step(0.5, fx);
-
-        ret = mix(AB, CD, fx);
-    }
-
-    return ret;
-}
-
-vec4 FinalColor()
-{
-    vec4 col;
-    vec4 vcol = fColor;
-    uint blendmode = (fPolygonAttr.x >> 4) & 0x3;
-
-    if (blendmode == 2)
-    {
-        if ((uDispCnt & (1<<1)) == 0)
-        {
-            // toon
-            vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb;
-            vcol.rgb = tooncolor;
-        }
-        else
-        {
-            // highlight
-            vcol.rgb = vcol.rrr;
-        }
-    }
-
-    if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0))
-    {
-        // no texture
-        col = vcol;
-    }
-    else
-    {
-        vec4 tcol = TextureLookup_Nearest(fTexcoord);
-        //vec4 tcol = TextureLookup_Linear(fTexcoord);
-
-        if ((blendmode & 1) != 0)
-        {
-            // decal
-            col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a));
-            col.a = vcol.a;
-        }
-        else
-        {
-            // modulate
-            col = vcol * tcol;
-        }
-    }
-
-    if (blendmode == 2)
-    {
-        if ((uDispCnt & (1<<1)) != 0)
-        {
-            vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb;
-            col.rgb = min(col.rgb + tooncolor, 1.0);
-        }
-    }
-
-    return col.bgra;
-}
-)";
-
-
-const char* kRenderVS_Z = R"(
-
-void main()
-{
-    uint attr = vPolygonAttr.x;
-    uint zshift = (attr >> 16) & 0x1F;
-
-    vec4 fpos;
-    fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0;
-    fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0;
-    fpos.w = float(vPosition.w) / 65536.0f;
-    fpos.xyz *= fpos.w;
-
-    fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0);
-    fTexcoord = vec2(vTexcoord) / 16.0;
-    fPolygonAttr = vPolygonAttr;
-
-    gl_Position = fpos;
-}
-)";
-
-const char* kRenderVS_W = R"(
-
-smooth out float fZ;
-
-void main()
-{
-    uint attr = vPolygonAttr.x;
-    uint zshift = (attr >> 16) & 0x1F;
-
-    vec4 fpos;
-    fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0;
-    fZ = float(vPosition.z << zshift) / 16777216.0;
-    fpos.w = float(vPosition.w) / 65536.0f;
-    fpos.xy *= fpos.w;
-
-    fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0);
-    fTexcoord = vec2(vTexcoord) / 16.0;
-    fPolygonAttr = vPolygonAttr;
-
-    gl_Position = fpos;
-}
-)";
-
-
-const char* kRenderFS_ZO = R"(
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 30.5/31) discard;
-
-    oColor = col;
-    oAttr.g = (fPolygonAttr.x >> 24) & 0x3F;
-}
-)";
-
-const char* kRenderFS_WO = R"(
-
-smooth in float fZ;
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 30.5/31) discard;
-
-    oColor = col;
-    oAttr.g = (fPolygonAttr.x >> 24) & 0x3F;
-    gl_FragDepth = fZ;
-}
-)";
-
-const char* kRenderFS_ZT = R"(
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 0.5/31) discard;
-    if (col.a >= 30.5/31) discard;
-
-    oColor = col;
-    oAttr.g = 0xFF;
-}
-)";
-
-const char* kRenderFS_WT = R"(
-
-smooth in float fZ;
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 0.5/31) discard;
-    if (col.a >= 30.5/31) discard;
-
-    oColor = col;
-    oAttr.g = 0xFF;
-    gl_FragDepth = fZ;
-}
-)";
-
-const char* kRenderFS_ZSM = R"(
-
-void main()
-{
-    oColor = vec4(0,0,0,1);
-    oAttr.g = 0xFF;
-    oAttr.b = 1;
-}
-)";
-
-const char* kRenderFS_WSM = R"(
-
-smooth in float fZ;
-
-void main()
-{
-    oColor = vec4(0,0,0,1);
-    oAttr.g = 0xFF;
-    oAttr.b = 1;
-    gl_FragDepth = fZ;
-}
-)";
-
-const char* kRenderFS_ZS = R"(
-
-layout(binding=2) uniform usampler2D iAttrTex;
-//layout(origin_upper_left) in vec4 gl_FragCoord;
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 0.5/31) discard;
-    if (col.a >= 30.5/31) discard;
-
-    uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0);
-    if (iAttr.b != 1) discard;
-    if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard;
-
-    oColor = col;
-}
-)";
-
-const char* kRenderFS_WS = R"(
-
-layout(binding=2) uniform usampler2D iAttrTex;
-//layout(origin_upper_left) in vec4 gl_FragCoord;
-
-smooth in float fZ;
-
-void main()
-{
-    vec4 col = FinalColor();
-    if (col.a < 0.5/31) discard;
-    if (col.a >= 30.5/31) discard;
-
-    uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0);
-    if (iAttr.b != 1) discard;
-    if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard;
-
-    oColor = col;
-    gl_FragDepth = fZ;
-}
-)";
-
 
 enum
 {
@@ -721,6 +97,7 @@ GLuint TexMemID;
 GLuint TexPalMemID;
 
 int ScaleFactor;
+bool Accelerated;
 int ScreenW, ScreenH;
 
 GLuint FramebufferTex[4];
@@ -968,9 +345,10 @@ void Reset()
     //
 }
 
-void SetScale(int scale)
+void SetDisplaySettings(int scale, bool accel)
 {
     ScaleFactor = scale;
+    Accelerated = accel;
 
     // TODO: antialiasing setting
     ScreenW = 256 << scale;
@@ -986,10 +364,12 @@ void SetScale(int scale)
     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW/2, ScreenH/2, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
-    glBufferData(GL_PIXEL_PACK_BUFFER, ScreenW*ScreenH*4, NULL, GL_DYNAMIC_READ);
+    if (accel) glBufferData(GL_PIXEL_PACK_BUFFER, ScreenW*ScreenH*4, NULL, GL_DYNAMIC_READ);
+    else       glBufferData(GL_PIXEL_PACK_BUFFER, 256*192, NULL, GL_DYNAMIC_READ);
 
     if (Framebuffer) delete[] Framebuffer;
-    Framebuffer = new u32[ScreenW*ScreenH];
+    if (accel) Framebuffer = new u32[256*192];
+    else       Framebuffer = new u32[ScreenW*ScreenH];
 }
 
 
@@ -1533,5 +913,10 @@ u32* GetLine(int line)
     return &Framebuffer[stride * line];
 }
 
+void SetupAccelFrame()
+{
+    //
+}
+
 }
 }
diff --git a/src/GPU3D_OpenGL43_shaders.h b/src/GPU3D_OpenGL43_shaders.h
new file mode 100644
index 0000000..8a69566
--- /dev/null
+++ b/src/GPU3D_OpenGL43_shaders.h
@@ -0,0 +1,645 @@
+/*
+    Copyright 2016-2019 Arisotura
+
+    This file is part of melonDS.
+
+    melonDS is free software: you can redistribute it and/or modify it under
+    the terms of the GNU General Public License as published by the Free
+    Software Foundation, either version 3 of the License, or (at your option)
+    any later version.
+
+    melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+    FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef GPU3D_OPENGL43_SHADERS_H
+#define GPU3D_OPENGL43_SHADERS_H
+
+#define kShaderHeader "#version 430"
+
+
+const char* kClearVS = kShaderHeader R"(
+
+layout(location=0) in vec2 vPosition;
+
+layout(location=1) uniform uint uDepth;
+
+void main()
+{
+    float fdepth = (float(uDepth) / 8388608.0) - 1.0;
+    gl_Position = vec4(vPosition, fdepth, 1.0);
+}
+)";
+
+const char* kClearFS = kShaderHeader R"(
+
+layout(location=0) uniform uvec4 uColor;
+layout(location=2) uniform uint uOpaquePolyID;
+layout(location=3) uniform uint uFogFlag;
+
+layout(location=0) out vec4 oColor;
+layout(location=1) out uvec3 oAttr;
+
+void main()
+{
+    oColor = vec4(uColor).bgra / 31.0;
+    oAttr.r = 0;
+    oAttr.g = uOpaquePolyID;
+    oAttr.b = 0;
+}
+)";
+
+
+const char* kRenderVSCommon = R"(
+
+layout(std140, binding=0) uniform uConfig
+{
+    vec2 uScreenSize;
+    uint uDispCnt;
+    vec4 uToonColors[32];
+};
+
+layout(location=0) in uvec4 vPosition;
+layout(location=1) in uvec4 vColor;
+layout(location=2) in ivec2 vTexcoord;
+layout(location=3) in uvec3 vPolygonAttr;
+
+smooth out vec4 fColor;
+smooth out vec2 fTexcoord;
+flat out uvec3 fPolygonAttr;
+)";
+
+const char* kRenderFSCommon = R"(
+
+layout(binding=0) uniform usampler2D TexMem;
+layout(binding=1) uniform sampler2D TexPalMem;
+
+layout(std140, binding=0) uniform uConfig
+{
+    vec2 uScreenSize;
+    uint uDispCnt;
+    vec4 uToonColors[32];
+};
+
+smooth in vec4 fColor;
+smooth in vec2 fTexcoord;
+flat in uvec3 fPolygonAttr;
+
+layout(location=0) out vec4 oColor;
+layout(location=1) out uvec3 oAttr;
+
+int TexcoordWrap(int c, int maxc, uint mode)
+{
+    if ((mode & (1<<0)) != 0)
+    {
+        if ((mode & (1<<2)) != 0 && (c & maxc) != 0)
+            return (maxc-1) - (c & (maxc-1));
+        else
+            return (c & (maxc-1));
+    }
+    else
+        return clamp(c, 0, maxc-1);
+}
+
+vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, uint wrapmode)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x);
+    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+
+    pixel.a = (pixel.r & 0xE0);
+    pixel.a = (pixel.a >> 3) + (pixel.a >> 6);
+    pixel.r &= 0x1F;
+
+    addr.y = (addr.y << 3) + int(pixel.r);
+    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+
+    return vec4(color.rgb, float(pixel.a)/31.0);
+}
+
+vec4 TextureFetch_I2(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x) >> 2;
+    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+    pixel.r >>= (2 * (st.x & 3));
+    pixel.r &= 0x03;
+
+    addr.y = (addr.y << 2) + int(pixel.r);
+    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+
+    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
+}
+
+vec4 TextureFetch_I4(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x) >> 1;
+    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+    if ((st.x & 1) != 0) pixel.r >>= 4;
+    else                 pixel.r &= 0x0F;
+
+    addr.y = (addr.y << 3) + int(pixel.r);
+    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+
+    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
+}
+
+vec4 TextureFetch_I8(ivec2 addr, ivec4 st, uint wrapmode, float alpha0)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x);
+    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+
+    addr.y = (addr.y << 3) + int(pixel.r);
+    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+
+    return vec4(color.rgb, max(step(1,pixel.r),alpha0));
+}
+
+vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, uint wrapmode)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3);
+    uvec4 p = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+    uint val = (p.r >> (2 * (st.x & 0x3))) & 0x3;
+
+    int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1);
+    if (addr.x >= 0x40000) slot1addr += 0x10000;
+
+    uint palinfo;
+    p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0);
+    palinfo = p.r;
+    slot1addr++;
+    p = texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0);
+    palinfo |= (p.r << 8);
+
+    addr.y = (addr.y << 3) + ((int(palinfo) & 0x3FFF) << 1);
+    palinfo >>= 14;
+
+    if (val == 0)
+    {
+        vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+        return vec4(color.rgb, 1.0);
+    }
+    else if (val == 1)
+    {
+        addr.y++;
+        vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+        return vec4(color.rgb, 1.0);
+    }
+    else if (val == 2)
+    {
+        if (palinfo == 1)
+        {
+            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            addr.y++;
+            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            return vec4((color0.rgb + color1.rgb) / 2.0, 1.0);
+        }
+        else if (palinfo == 3)
+        {
+            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            addr.y++;
+            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0);
+        }
+        else
+        {
+            addr.y += 2;
+            vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            return vec4(color.rgb, 1.0);
+        }
+    }
+    else
+    {
+        if (palinfo == 2)
+        {
+            addr.y += 3;
+            vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            return vec4(color.rgb, 1.0);
+        }
+        else if (palinfo == 3)
+        {
+            vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            addr.y++;
+            vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+            return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0);
+        }
+        else
+        {
+            return vec4(0.0);
+        }
+    }
+}
+
+vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, uint wrapmode)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x);
+    uvec4 pixel = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+
+    pixel.a = (pixel.r & 0xF8) >> 3;
+    pixel.r &= 0x07;
+
+    addr.y = (addr.y << 3) + int(pixel.r);
+    vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0);
+
+    return vec4(color.rgb, float(pixel.a)/31.0);
+}
+
+vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, uint wrapmode)
+{
+    st.x = TexcoordWrap(st.x, st.z, wrapmode>>0);
+    st.y = TexcoordWrap(st.y, st.w, wrapmode>>1);
+
+    addr.x += ((st.y * st.z) + st.x) << 1;
+    uvec4 pixelL = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+    addr.x++;
+    uvec4 pixelH = texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0);
+
+    vec4 color;
+    color.r = float(pixelL.r & 0x1F) / 31.0;
+    color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0;
+    color.b = float((pixelH.r & 0x7C) >> 2) / 31.0;
+    color.a = float(pixelH.r >> 7);
+
+    return color;
+}
+
+vec4 TextureLookup_Nearest(vec2 st)
+{
+    uint attr = fPolygonAttr.y;
+    uint paladdr = fPolygonAttr.z;
+
+    float alpha0;
+    if ((attr & (1<<29)) != 0) alpha0 = 0.0;
+    else                       alpha0 = 1.0;
+
+    int tw = 8 << int((attr >> 20) & 0x7);
+    int th = 8 << int((attr >> 23) & 0x7);
+    ivec4 st_full = ivec4(ivec2(st), tw, th);
+
+    ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr));
+    uint wrapmode = attr >> 16;
+
+    uint type = (attr >> 26) & 0x7;
+    if      (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode);
+    else if (type == 2) return TextureFetch_I2        (vramaddr, st_full, wrapmode, alpha0);
+    else if (type == 3) return TextureFetch_I4        (vramaddr, st_full, wrapmode, alpha0);
+    else if (type == 4) return TextureFetch_I8        (vramaddr, st_full, wrapmode, alpha0);
+    else if (type == 1) return TextureFetch_A3I5      (vramaddr, st_full, wrapmode);
+    else if (type == 6) return TextureFetch_A5I3      (vramaddr, st_full, wrapmode);
+    else                return TextureFetch_Direct    (vramaddr, st_full, wrapmode);
+}
+
+vec4 TextureLookup_Linear(vec2 texcoord)
+{
+    ivec2 intpart = ivec2(texcoord);
+    vec2 fracpart = fract(texcoord);
+
+    uint attr = fPolygonAttr.y;
+    uint paladdr = fPolygonAttr.z;
+
+    float alpha0;
+    if ((attr & (1<<29)) != 0) alpha0 = 0.0;
+    else                       alpha0 = 1.0;
+
+    int tw = 8 << int((attr >> 20) & 0x7);
+    int th = 8 << int((attr >> 23) & 0x7);
+    ivec4 st_full = ivec4(intpart, tw, th);
+
+    ivec2 vramaddr = ivec2(int(attr & 0xFFFF) << 3, int(paladdr));
+    uint wrapmode = attr >> 16;
+
+    vec4 A, B, C, D;
+    uint type = (attr >> 26) & 0x7;
+    if (type == 5)
+    {
+        A = TextureFetch_Compressed(vramaddr, st_full                 , wrapmode);
+        B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
+        C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
+        D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
+    }
+    else if (type == 2)
+    {
+        A = TextureFetch_I2(vramaddr, st_full                 , wrapmode, alpha0);
+        B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
+        C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
+        D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
+    }
+    else if (type == 3)
+    {
+        A = TextureFetch_I4(vramaddr, st_full                 , wrapmode, alpha0);
+        B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
+        C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
+        D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
+    }
+    else if (type == 4)
+    {
+        A = TextureFetch_I8(vramaddr, st_full                 , wrapmode, alpha0);
+        B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0);
+        C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0);
+        D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0);
+    }
+    else if (type == 1)
+    {
+        A = TextureFetch_A3I5(vramaddr, st_full                 , wrapmode);
+        B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
+        C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
+        D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
+    }
+    else if (type == 6)
+    {
+        A = TextureFetch_A5I3(vramaddr, st_full                 , wrapmode);
+        B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
+        C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
+        D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
+    }
+    else
+    {
+        A = TextureFetch_Direct(vramaddr, st_full                 , wrapmode);
+        B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode);
+        C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode);
+        D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode);
+    }
+
+    float fx = fracpart.x;
+    vec4 AB;
+    if (A.a < (0.5/31.0) && B.a < (0.5/31.0))
+        AB = vec4(0);
+    else
+    {
+        //if (A.a < (0.5/31.0) || B.a < (0.5/31.0))
+        //    fx = step(0.5, fx);
+
+        AB = mix(A, B, fx);
+    }
+
+    fx = fracpart.x;
+    vec4 CD;
+    if (C.a < (0.5/31.0) && D.a < (0.5/31.0))
+        CD = vec4(0);
+    else
+    {
+        //if (C.a < (0.5/31.0) || D.a < (0.5/31.0))
+        //    fx = step(0.5, fx);
+
+        CD = mix(C, D, fx);
+    }
+
+    fx = fracpart.y;
+    vec4 ret;
+    if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0))
+        ret = vec4(0);
+    else
+    {
+        //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0))
+        //    fx = step(0.5, fx);
+
+        ret = mix(AB, CD, fx);
+    }
+
+    return ret;
+}
+
+vec4 FinalColor()
+{
+    vec4 col;
+    vec4 vcol = fColor;
+    uint blendmode = (fPolygonAttr.x >> 4) & 0x3;
+
+    if (blendmode == 2)
+    {
+        if ((uDispCnt & (1<<1)) == 0)
+        {
+            // toon
+            vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb;
+            vcol.rgb = tooncolor;
+        }
+        else
+        {
+            // highlight
+            vcol.rgb = vcol.rrr;
+        }
+    }
+
+    if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0))
+    {
+        // no texture
+        col = vcol;
+    }
+    else
+    {
+        vec4 tcol = TextureLookup_Nearest(fTexcoord);
+        //vec4 tcol = TextureLookup_Linear(fTexcoord);
+
+        if ((blendmode & 1) != 0)
+        {
+            // decal
+            col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a));
+            col.a = vcol.a;
+        }
+        else
+        {
+            // modulate
+            col = vcol * tcol;
+        }
+    }
+
+    if (blendmode == 2)
+    {
+        if ((uDispCnt & (1<<1)) != 0)
+        {
+            vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb;
+            col.rgb = min(col.rgb + tooncolor, 1.0);
+        }
+    }
+
+    return col.bgra;
+}
+)";
+
+
+const char* kRenderVS_Z = R"(
+
+void main()
+{
+    uint attr = vPolygonAttr.x;
+    uint zshift = (attr >> 16) & 0x1F;
+
+    vec4 fpos;
+    fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0;
+    fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0;
+    fpos.w = float(vPosition.w) / 65536.0f;
+    fpos.xyz *= fpos.w;
+
+    fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0);
+    fTexcoord = vec2(vTexcoord) / 16.0;
+    fPolygonAttr = vPolygonAttr;
+
+    gl_Position = fpos;
+}
+)";
+
+const char* kRenderVS_W = R"(
+
+smooth out float fZ;
+
+void main()
+{
+    uint attr = vPolygonAttr.x;
+    uint zshift = (attr >> 16) & 0x1F;
+
+    vec4 fpos;
+    fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0;
+    fZ = float(vPosition.z << zshift) / 16777216.0;
+    fpos.w = float(vPosition.w) / 65536.0f;
+    fpos.xy *= fpos.w;
+
+    fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0);
+    fTexcoord = vec2(vTexcoord) / 16.0;
+    fPolygonAttr = vPolygonAttr;
+
+    gl_Position = fpos;
+}
+)";
+
+
+const char* kRenderFS_ZO = R"(
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 30.5/31) discard;
+
+    oColor = col;
+    oAttr.g = (fPolygonAttr.x >> 24) & 0x3F;
+}
+)";
+
+const char* kRenderFS_WO = R"(
+
+smooth in float fZ;
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 30.5/31) discard;
+
+    oColor = col;
+    oAttr.g = (fPolygonAttr.x >> 24) & 0x3F;
+    gl_FragDepth = fZ;
+}
+)";
+
+const char* kRenderFS_ZT = R"(
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 0.5/31) discard;
+    if (col.a >= 30.5/31) discard;
+
+    oColor = col;
+    oAttr.g = 0xFF;
+}
+)";
+
+const char* kRenderFS_WT = R"(
+
+smooth in float fZ;
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 0.5/31) discard;
+    if (col.a >= 30.5/31) discard;
+
+    oColor = col;
+    oAttr.g = 0xFF;
+    gl_FragDepth = fZ;
+}
+)";
+
+const char* kRenderFS_ZSM = R"(
+
+void main()
+{
+    oColor = vec4(0,0,0,1);
+    oAttr.g = 0xFF;
+    oAttr.b = 1;
+}
+)";
+
+const char* kRenderFS_WSM = R"(
+
+smooth in float fZ;
+
+void main()
+{
+    oColor = vec4(0,0,0,1);
+    oAttr.g = 0xFF;
+    oAttr.b = 1;
+    gl_FragDepth = fZ;
+}
+)";
+
+const char* kRenderFS_ZS = R"(
+
+layout(binding=2) uniform usampler2D iAttrTex;
+//layout(origin_upper_left) in vec4 gl_FragCoord;
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 0.5/31) discard;
+    if (col.a >= 30.5/31) discard;
+
+    uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0);
+    if (iAttr.b != 1) discard;
+    if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard;
+
+    oColor = col;
+}
+)";
+
+const char* kRenderFS_WS = R"(
+
+layout(binding=2) uniform usampler2D iAttrTex;
+//layout(origin_upper_left) in vec4 gl_FragCoord;
+
+smooth in float fZ;
+
+void main()
+{
+    vec4 col = FinalColor();
+    if (col.a < 0.5/31) discard;
+    if (col.a >= 30.5/31) discard;
+
+    uvec4 iAttr = texelFetch(iAttrTex, ivec2(gl_FragCoord.xy), 0);
+    if (iAttr.b != 1) discard;
+    if (iAttr.g == ((fPolygonAttr.x >> 24) & 0x3F)) discard;
+
+    oColor = col;
+    gl_FragDepth = fZ;
+}
+)";
+
+#endif // GPU3D_OPENGL43_SHADERS_H
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index ac5cd93..20283d3 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -138,7 +138,7 @@ void Reset()
     SetupRenderThread();
 }
 
-void SetScale(int scale)
+void SetDisplaySettings(int scale, bool accel)
 {
     printf("SOFT RENDERER SCALE FACTOR: TODO!!!\n");
 }
@@ -2122,5 +2122,10 @@ u32* GetLine(int line)
     return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset];
 }
 
+void SetupAccelFrame()
+{
+    // TODO
+}
+
 }
 }
diff --git a/src/libui_sdl/main.cpp b/src/libui_sdl/main.cpp
index 47f5f13..b77d67f 100644
--- a/src/libui_sdl/main.cpp
+++ b/src/libui_sdl/main.cpp
@@ -240,8 +240,8 @@ void GLDrawing_DrawScreen()
         x1 = TopScreenRect.X + TopScreenRect.Width;
         y1 = TopScreenRect.Y + TopScreenRect.Height;
 
-        scwidth = 256 << ScreenScale[0];
-        scheight = 192 << ScreenScale[0];
+        scwidth = 256;// << ScreenScale[0];
+        scheight = 192;// << ScreenScale[0];
 
         switch (ScreenRotation)
         {
@@ -286,8 +286,8 @@ void GLDrawing_DrawScreen()
         x1 = BottomScreenRect.X + BottomScreenRect.Width;
         y1 = BottomScreenRect.Y + BottomScreenRect.Height;
 
-        scwidth = 256 << ScreenScale[1];
-        scheight = 192 << ScreenScale[1];
+        scwidth = 256;// << ScreenScale[1];
+        scheight = 192;// << ScreenScale[1];
 
         switch (ScreenRotation)
         {
@@ -349,9 +349,13 @@ void GLDrawing_DrawScreen()
     int frontbuf = GPU::FrontBuffer;
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, GL_ScreenTexture);
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256<<ScreenScale[0], 192<<ScreenScale[0], GL_RGBA_INTEGER,
+    /*glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256<<ScreenScale[0], 192<<ScreenScale[0], GL_RGBA_INTEGER,
                     GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
     glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 768, 256<<ScreenScale[1], 192<<ScreenScale[1], GL_RGBA_INTEGER,
+                    GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);*/
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3, 192, GL_RGBA_INTEGER,
+                    GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 768, 256*3, 192, GL_RGBA_INTEGER,
                     GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
 
     glBindBuffer(GL_ARRAY_BUFFER, GL_ScreenVertexBufferID);
@@ -633,7 +637,8 @@ int EmuThreadFunc(void* burp)
     ScreenScale[0] = ScreenScale[2];
     ScreenScale[1] = ScreenScale[2];
 
-    int lastscale[2] = {-1, -1};
+    int lastscale[2] = {ScreenScale[0], ScreenScale[1]};
+    GPU::SetDisplaySettings(ScreenScale[0], ScreenScale[1], false);
 
     Touching = false;
     KeyInputMask = 0xFFF;
@@ -782,7 +787,7 @@ int EmuThreadFunc(void* burp)
             if (ScreenScale[0] != lastscale[0] ||
                 ScreenScale[1] != lastscale[1])
             {
-                GPU::SetFramebufferScale(ScreenScale[0], ScreenScale[1]);
+                GPU::SetDisplaySettings(ScreenScale[0], ScreenScale[1], false);
                 ScreenDrawInited = false;
             }
 
-- 
cgit v1.2.3