/* Copyright 2016-2019 Arisotura This file is part of melonDS. melonDS is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. melonDS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with melonDS. If not, see http://www.gnu.org/licenses/. */ #ifndef GPU3D_OPENGL_SHADERS_H #define GPU3D_OPENGL_SHADERS_H #define kShaderHeader "#version 140" const char* kClearVS = kShaderHeader R"( in vec2 vPosition; uniform uint uDepth; void main() { float fdepth = (float(uDepth) / 8388608.0) - 1.0; gl_Position = vec4(vPosition, fdepth, 1.0); } )"; const char* kClearFS = kShaderHeader R"( uniform uvec4 uColor; uniform uint uOpaquePolyID; uniform uint uFogFlag; out vec4 oColor; out vec4 oAttr; void main() { oColor = vec4(uColor).bgra / 31.0; oAttr.r = float(uOpaquePolyID) / 63.0; oAttr.g = 0; oAttr.b = float(uFogFlag); oAttr.a = 1; } )"; const char* kFinalPassVS = kShaderHeader R"( in vec2 vPosition; void main() { // heh gl_Position = vec4(vPosition, 0.0, 1.0); } )"; const char* kFinalPassFS = kShaderHeader R"( uniform sampler2D DepthBuffer; uniform sampler2D AttrBuffer; layout(std140) uniform uConfig { vec2 uScreenSize; int uDispCnt; vec4 uToonColors[32]; vec4 uEdgeColors[8]; vec4 uFogColor; float uFogDensity[34]; int uFogOffset; int uFogShift; }; out vec4 oColor; vec4 CalculateFog(float depth) { int idepth = int(depth * 16777216.0); int densityid, densityfrac; if (idepth < uFogOffset) { densityid = 0; densityfrac = 0; } else { uint udepth = uint(idepth); udepth -= uint(uFogOffset); udepth = (udepth >> 2) << uint(uFogShift); densityid = int(udepth >> 17); if (densityid >= 32) { densityid = 32; densityfrac = 0; } else densityfrac = int(udepth & uint(0x1FFFF)); } float density = mix(uFogDensity[densityid], uFogDensity[densityid+1], float(densityfrac)/131072.0); return vec4(density, density, density, density); } void main() { ivec2 coord = ivec2(gl_FragCoord.xy); vec4 ret = vec4(0,0,0,0); vec4 depth = texelFetch(DepthBuffer, coord, 0); vec4 attr = texelFetch(AttrBuffer, coord, 0); if (attr.b != 0) ret = CalculateFog(depth.r); oColor = ret; } )"; const char* kRenderVSCommon = R"( layout(std140) uniform uConfig { vec2 uScreenSize; int uDispCnt; vec4 uToonColors[32]; vec4 uEdgeColors[8]; vec4 uFogColor; float uFogDensity[34]; int uFogOffset; int uFogShift; }; in uvec4 vPosition; in uvec4 vColor; in ivec2 vTexcoord; in ivec3 vPolygonAttr; smooth out vec4 fColor; smooth out vec2 fTexcoord; flat out ivec3 fPolygonAttr; )"; const char* kRenderFSCommon = R"( uniform usampler2D TexMem; uniform sampler2D TexPalMem; layout(std140) uniform uConfig { vec2 uScreenSize; int uDispCnt; vec4 uToonColors[32]; vec4 uEdgeColors[8]; vec4 uFogColor; float uFogDensity[34]; int uFogOffset; int uFogShift; }; smooth in vec4 fColor; smooth in vec2 fTexcoord; flat in ivec3 fPolygonAttr; out vec4 oColor; out vec4 oAttr; int TexcoordWrap(int c, int maxc, int mode) { if ((mode & (1<<0)) != 0) { if ((mode & (1<<2)) != 0 && (c & maxc) != 0) return (maxc-1) - (c & (maxc-1)); else return (c & (maxc-1)); } else return clamp(c, 0, maxc-1); } vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, int wrapmode) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x); ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); pixel.a = (pixel.r & 0xE0); pixel.a = (pixel.a >> 3) + (pixel.a >> 6); pixel.r &= 0x1F; addr.y = (addr.y << 3) + pixel.r; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, float(pixel.a)/31.0); } vec4 TextureFetch_I2(ivec2 addr, ivec4 st, int wrapmode, float alpha0) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x) >> 2; ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); pixel.r >>= (2 * (st.x & 3)); pixel.r &= 0x03; addr.y = (addr.y << 2) + pixel.r; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, (pixel.r>0)?1:alpha0); } vec4 TextureFetch_I4(ivec2 addr, ivec4 st, int wrapmode, float alpha0) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x) >> 1; ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); if ((st.x & 1) != 0) pixel.r >>= 4; else pixel.r &= 0x0F; addr.y = (addr.y << 3) + pixel.r; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, (pixel.r>0)?1:alpha0); } vec4 TextureFetch_I8(ivec2 addr, ivec4 st, int wrapmode, float alpha0) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x); ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); addr.y = (addr.y << 3) + pixel.r; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, (pixel.r>0)?1:alpha0); } vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, int wrapmode) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3); ivec4 p = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); int val = (p.r >> (2 * (st.x & 0x3))) & 0x3; int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1); if (addr.x >= 0x40000) slot1addr += 0x10000; int palinfo; p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); palinfo = p.r; slot1addr++; p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); palinfo |= (p.r << 8); addr.y = (addr.y << 3) + ((palinfo & 0x3FFF) << 1); palinfo >>= 14; if (val == 0) { vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, 1.0); } else if (val == 1) { addr.y++; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, 1.0); } else if (val == 2) { if (palinfo == 1) { vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); addr.y++; vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4((color0.rgb + color1.rgb) / 2.0, 1.0); } else if (palinfo == 3) { vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); addr.y++; vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0); } else { addr.y += 2; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, 1.0); } } else { if (palinfo == 2) { addr.y += 3; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, 1.0); } else if (palinfo == 3) { vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); addr.y++; vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0); } else { return vec4(0.0); } } } vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, int wrapmode) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x); ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); pixel.a = (pixel.r & 0xF8) >> 3; pixel.r &= 0x07; addr.y = (addr.y << 3) + pixel.r; vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); return vec4(color.rgb, float(pixel.a)/31.0); } vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, int wrapmode) { st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); addr.x += ((st.y * st.z) + st.x) << 1; ivec4 pixelL = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); addr.x++; ivec4 pixelH = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); vec4 color; color.r = float(pixelL.r & 0x1F) / 31.0; color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0; color.b = float((pixelH.r & 0x7C) >> 2) / 31.0; color.a = float(pixelH.r >> 7); return color; } vec4 TextureLookup_Nearest(vec2 st) { int attr = int(fPolygonAttr.y); int paladdr = int(fPolygonAttr.z); float alpha0; if ((attr & (1<<29)) != 0) alpha0 = 0.0; else alpha0 = 1.0; int tw = 8 << ((attr >> 20) & 0x7); int th = 8 << ((attr >> 23) & 0x7); ivec4 st_full = ivec4(ivec2(st), tw, th); ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); int wrapmode = (attr >> 16); int type = (attr >> 26) & 0x7; if (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode); else if (type == 2) return TextureFetch_I2 (vramaddr, st_full, wrapmode, alpha0); else if (type == 3) return TextureFetch_I4 (vramaddr, st_full, wrapmode, alpha0); else if (type == 4) return TextureFetch_I8 (vramaddr, st_full, wrapmode, alpha0); else if (type == 1) return TextureFetch_A3I5 (vramaddr, st_full, wrapmode); else if (type == 6) return TextureFetch_A5I3 (vramaddr, st_full, wrapmode); else return TextureFetch_Direct (vramaddr, st_full, wrapmode); } vec4 TextureLookup_Linear(vec2 texcoord) { ivec2 intpart = ivec2(texcoord); vec2 fracpart = fract(texcoord); int attr = int(fPolygonAttr.y); int paladdr = int(fPolygonAttr.z); float alpha0; if ((attr & (1<<29)) != 0) alpha0 = 0.0; else alpha0 = 1.0; int tw = 8 << ((attr >> 20) & 0x7); int th = 8 << ((attr >> 23) & 0x7); ivec4 st_full = ivec4(intpart, tw, th); ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); int wrapmode = (attr >> 16); vec4 A, B, C, D; int type = (attr >> 26) & 0x7; if (type == 5) { A = TextureFetch_Compressed(vramaddr, st_full , wrapmode); B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); } else if (type == 2) { A = TextureFetch_I2(vramaddr, st_full , wrapmode, alpha0); B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); } else if (type == 3) { A = TextureFetch_I4(vramaddr, st_full , wrapmode, alpha0); B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); } else if (type == 4) { A = TextureFetch_I8(vramaddr, st_full , wrapmode, alpha0); B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); } else if (type == 1) { A = TextureFetch_A3I5(vramaddr, st_full , wrapmode); B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); } else if (type == 6) { A = TextureFetch_A5I3(vramaddr, st_full , wrapmode); B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); } else { A = TextureFetch_Direct(vramaddr, st_full , wrapmode); B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); } float fx = fracpart.x; vec4 AB; if (A.a < (0.5/31.0) && B.a < (0.5/31.0)) AB = vec4(0); else { //if (A.a < (0.5/31.0) || B.a < (0.5/31.0)) // fx = step(0.5, fx); AB = mix(A, B, fx); } fx = fracpart.x; vec4 CD; if (C.a < (0.5/31.0) && D.a < (0.5/31.0)) CD = vec4(0); else { //if (C.a < (0.5/31.0) || D.a < (0.5/31.0)) // fx = step(0.5, fx); CD = mix(C, D, fx); } fx = fracpart.y; vec4 ret; if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0)) ret = vec4(0); else { //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0)) // fx = step(0.5, fx); ret = mix(AB, CD, fx); } return ret; } vec4 FinalColor() { vec4 col; vec4 vcol = fColor; int blendmode = (fPolygonAttr.x >> 4) & 0x3; if (blendmode == 2) { if ((uDispCnt & (1<<1)) == 0) { // toon vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; vcol.rgb = tooncolor; } else { // highlight vcol.rgb = vcol.rrr; } } if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0)) { // no texture col = vcol; } else { vec4 tcol = TextureLookup_Nearest(fTexcoord); //vec4 tcol = TextureLookup_Linear(fTexcoord); if ((blendmode & 1) != 0) { // decal col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); col.a = vcol.a; } else { // modulate col = vcol * tcol; } } if (blendmode == 2) { if ((uDispCnt & (1<<1)) != 0) { vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; col.rgb = min(col.rgb + tooncolor, 1.0); } } return col.bgra; } )"; const char* kRenderVS_Z = R"( void main() { int attr = vPolygonAttr.x; int zshift = (attr >> 16) & 0x1F; vec4 fpos; fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; fpos.w = float(vPosition.w) / 65536.0f; fpos.xyz *= fpos.w; fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); fTexcoord = vec2(vTexcoord) / 16.0; fPolygonAttr = vPolygonAttr; gl_Position = fpos; } )"; const char* kRenderVS_W = R"( smooth out float fZ; void main() { int attr = vPolygonAttr.x; int zshift = (attr >> 16) & 0x1F; vec4 fpos; fpos.xy = ((vec2(vPosition.xy) * 2.0) / uScreenSize) - 1.0; fZ = float(vPosition.z << zshift) / 16777216.0; fpos.w = float(vPosition.w) / 65536.0f; fpos.xy *= fpos.w; fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); fTexcoord = vec2(vTexcoord) / 16.0; fPolygonAttr = vPolygonAttr; gl_Position = fpos; } )"; const char* kRenderFS_ZO = R"( void main() { vec4 col = FinalColor(); if (col.a < 30.5/31) discard; oColor = col; oAttr.r = float((fPolygonAttr.x >> 24) & 0x3F) / 63.0; oAttr.b = float((fPolygonAttr.x >> 15) & 0x1); oAttr.a = 1; } )"; const char* kRenderFS_WO = R"( smooth in float fZ; void main() { vec4 col = FinalColor(); if (col.a < 30.5/31) discard; oColor = col; oAttr.r = float((fPolygonAttr.x >> 24) & 0x3F) / 63.0; oAttr.b = float((fPolygonAttr.x >> 15) & 0x1); oAttr.a = 1; gl_FragDepth = fZ; } )"; const char* kRenderFS_ZT = R"( void main() { vec4 col = FinalColor(); if (col.a < 0.5/31) discard; if (col.a >= 30.5/31) discard; oColor = col; oAttr.b = 0; oAttr.a = 1; } )"; const char* kRenderFS_WT = R"( smooth in float fZ; void main() { vec4 col = FinalColor(); if (col.a < 0.5/31) discard; if (col.a >= 30.5/31) discard; oColor = col; oAttr.b = 0; oAttr.a = 1; gl_FragDepth = fZ; } )"; const char* kRenderFS_ZSM = R"( void main() { oColor = vec4(0,0,0,1); } )"; const char* kRenderFS_WSM = R"( smooth in float fZ; void main() { oColor = vec4(0,0,0,1); gl_FragDepth = fZ; } )"; #endif // GPU3D_OPENGL_SHADERS_H