diff options
author | StapleButter <thetotalworm@gmail.com> | 2017-02-13 14:59:51 +0100 |
---|---|---|
committer | StapleButter <thetotalworm@gmail.com> | 2017-02-13 14:59:51 +0100 |
commit | fb53fd5195f52365dd802e54412b4af5e049b677 (patch) | |
tree | cf35ea99670822c7998edb7826c870c6e3a252f3 | |
parent | 361ddd7595671a907ebdecfaf6b0fcba499c9e07 (diff) |
* fix overflows during fixed-point multiply
* small fix to SwapBuffers
-rw-r--r-- | GPU3D.cpp | 170 | ||||
-rw-r--r-- | GPU3D_Soft.cpp | 20 | ||||
-rw-r--r-- | melonDS.depend | 8 |
3 files changed, 113 insertions, 85 deletions
@@ -270,25 +270,25 @@ void MatrixMult4x4(s32* m, s32* s) memcpy(tmp, m, 16*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8] + s[3]*tmp[12]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9] + s[3]*tmp[13]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10] + s[3]*tmp[14]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11] + s[3]*tmp[15]) >> 12; - - m[4] = (s[4]*tmp[0] + s[5]*tmp[4] + s[6]*tmp[8] + s[7]*tmp[12]) >> 12; - m[5] = (s[4]*tmp[1] + s[5]*tmp[5] + s[6]*tmp[9] + s[7]*tmp[13]) >> 12; - m[6] = (s[4]*tmp[2] + s[5]*tmp[6] + s[6]*tmp[10] + s[7]*tmp[14]) >> 12; - m[7] = (s[4]*tmp[3] + s[5]*tmp[7] + s[6]*tmp[11] + s[7]*tmp[15]) >> 12; - - m[8] = (s[8]*tmp[0] + s[9]*tmp[4] + s[10]*tmp[8] + s[11]*tmp[12]) >> 12; - m[9] = (s[8]*tmp[1] + s[9]*tmp[5] + s[10]*tmp[9] + s[11]*tmp[13]) >> 12; - m[10] = (s[8]*tmp[2] + s[9]*tmp[6] + s[10]*tmp[10] + s[11]*tmp[14]) >> 12; - m[11] = (s[8]*tmp[3] + s[9]*tmp[7] + s[10]*tmp[11] + s[11]*tmp[15]) >> 12; - - m[12] = (s[12]*tmp[0] + s[13]*tmp[4] + s[14]*tmp[8] + s[15]*tmp[12]) >> 12; - m[13] = (s[12]*tmp[1] + s[13]*tmp[5] + s[14]*tmp[9] + s[15]*tmp[13]) >> 12; - m[14] = (s[12]*tmp[2] + s[13]*tmp[6] + s[14]*tmp[10] + s[15]*tmp[14]) >> 12; - m[15] = (s[12]*tmp[3] + s[13]*tmp[7] + s[14]*tmp[11] + s[15]*tmp[15]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8] + (s64)s[3]*tmp[12]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9] + (s64)s[3]*tmp[13]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10] + (s64)s[3]*tmp[14]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11] + (s64)s[3]*tmp[15]) >> 12; + + m[4] = ((s64)s[4]*tmp[0] + (s64)s[5]*tmp[4] + (s64)s[6]*tmp[8] + (s64)s[7]*tmp[12]) >> 12; + m[5] = ((s64)s[4]*tmp[1] + (s64)s[5]*tmp[5] + (s64)s[6]*tmp[9] + (s64)s[7]*tmp[13]) >> 12; + m[6] = ((s64)s[4]*tmp[2] + (s64)s[5]*tmp[6] + (s64)s[6]*tmp[10] + (s64)s[7]*tmp[14]) >> 12; + m[7] = ((s64)s[4]*tmp[3] + (s64)s[5]*tmp[7] + (s64)s[6]*tmp[11] + (s64)s[7]*tmp[15]) >> 12; + + m[8] = ((s64)s[8]*tmp[0] + (s64)s[9]*tmp[4] + (s64)s[10]*tmp[8] + (s64)s[11]*tmp[12]) >> 12; + m[9] = ((s64)s[8]*tmp[1] + (s64)s[9]*tmp[5] + (s64)s[10]*tmp[9] + (s64)s[11]*tmp[13]) >> 12; + m[10] = ((s64)s[8]*tmp[2] + (s64)s[9]*tmp[6] + (s64)s[10]*tmp[10] + (s64)s[11]*tmp[14]) >> 12; + m[11] = ((s64)s[8]*tmp[3] + (s64)s[9]*tmp[7] + (s64)s[10]*tmp[11] + (s64)s[11]*tmp[15]) >> 12; + + m[12] = ((s64)s[12]*tmp[0] + (s64)s[13]*tmp[4] + (s64)s[14]*tmp[8] + (s64)s[15]*tmp[12]) >> 12; + m[13] = ((s64)s[12]*tmp[1] + (s64)s[13]*tmp[5] + (s64)s[14]*tmp[9] + (s64)s[15]*tmp[13]) >> 12; + m[14] = ((s64)s[12]*tmp[2] + (s64)s[13]*tmp[6] + (s64)s[14]*tmp[10] + (s64)s[15]*tmp[14]) >> 12; + m[15] = ((s64)s[12]*tmp[3] + (s64)s[13]*tmp[7] + (s64)s[14]*tmp[11] + (s64)s[15]*tmp[15]) >> 12; } void MatrixMult4x3(s32* m, s32* s) @@ -296,26 +296,34 @@ void MatrixMult4x3(s32* m, s32* s) s32 tmp[16]; memcpy(tmp, m, 16*4); + /*printf("4x3 matrix\n"); + for (int j = 0; j < 12; j += 3) + { + for (int i = 0; i < 3; i++) + printf("%f ", s[i]/4096.0f); + printf("\n"); + }*/ + // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; - - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; - - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; - - m[12] = (s[9]*tmp[0] + s[10]*tmp[4] + s[11]*tmp[8] + 0x1000*tmp[12]) >> 12; - m[13] = (s[9]*tmp[1] + s[10]*tmp[5] + s[11]*tmp[9] + 0x1000*tmp[13]) >> 12; - m[14] = (s[9]*tmp[2] + s[10]*tmp[6] + s[11]*tmp[10] + 0x1000*tmp[14]) >> 12; - m[15] = (s[9]*tmp[3] + s[10]*tmp[7] + s[11]*tmp[11] + 0x1000*tmp[15]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; + + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; + + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; + + m[12] = ((s64)s[9]*tmp[0] + (s64)s[10]*tmp[4] + (s64)s[11]*tmp[8] + (s64)0x1000*tmp[12]) >> 12; + m[13] = ((s64)s[9]*tmp[1] + (s64)s[10]*tmp[5] + (s64)s[11]*tmp[9] + (s64)0x1000*tmp[13]) >> 12; + m[14] = ((s64)s[9]*tmp[2] + (s64)s[10]*tmp[6] + (s64)s[11]*tmp[10] + (s64)0x1000*tmp[14]) >> 12; + m[15] = ((s64)s[9]*tmp[3] + (s64)s[10]*tmp[7] + (s64)s[11]*tmp[11] + (s64)0x1000*tmp[15]) >> 12; } void MatrixMult3x3(s32* m, s32* s) @@ -324,45 +332,45 @@ void MatrixMult3x3(s32* m, s32* s) memcpy(tmp, m, 12*4); // m = s*m - m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; - m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; - m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; - m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; - - m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; - m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; - m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; - m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; - - m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; - m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; - m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; - m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + m[0] = ((s64)s[0]*tmp[0] + (s64)s[1]*tmp[4] + (s64)s[2]*tmp[8]) >> 12; + m[1] = ((s64)s[0]*tmp[1] + (s64)s[1]*tmp[5] + (s64)s[2]*tmp[9]) >> 12; + m[2] = ((s64)s[0]*tmp[2] + (s64)s[1]*tmp[6] + (s64)s[2]*tmp[10]) >> 12; + m[3] = ((s64)s[0]*tmp[3] + (s64)s[1]*tmp[7] + (s64)s[2]*tmp[11]) >> 12; + + m[4] = ((s64)s[3]*tmp[0] + (s64)s[4]*tmp[4] + (s64)s[5]*tmp[8]) >> 12; + m[5] = ((s64)s[3]*tmp[1] + (s64)s[4]*tmp[5] + (s64)s[5]*tmp[9]) >> 12; + m[6] = ((s64)s[3]*tmp[2] + (s64)s[4]*tmp[6] + (s64)s[5]*tmp[10]) >> 12; + m[7] = ((s64)s[3]*tmp[3] + (s64)s[4]*tmp[7] + (s64)s[5]*tmp[11]) >> 12; + + m[8] = ((s64)s[6]*tmp[0] + (s64)s[7]*tmp[4] + (s64)s[8]*tmp[8]) >> 12; + m[9] = ((s64)s[6]*tmp[1] + (s64)s[7]*tmp[5] + (s64)s[8]*tmp[9]) >> 12; + m[10] = ((s64)s[6]*tmp[2] + (s64)s[7]*tmp[6] + (s64)s[8]*tmp[10]) >> 12; + m[11] = ((s64)s[6]*tmp[3] + (s64)s[7]*tmp[7] + (s64)s[8]*tmp[11]) >> 12; } void MatrixScale(s32* m, s32* s) { - m[0] = (s[0]*m[0]) >> 12; - m[1] = (s[0]*m[1]) >> 12; - m[2] = (s[0]*m[2]) >> 12; - m[3] = (s[0]*m[3]) >> 12; - - m[4] = (s[1]*m[4]) >> 12; - m[5] = (s[1]*m[5]) >> 12; - m[6] = (s[1]*m[6]) >> 12; - m[7] = (s[1]*m[7]) >> 12; - - m[8] = (s[2]*m[8]) >> 12; - m[9] = (s[2]*m[9]) >> 12; - m[10] = (s[2]*m[10]) >> 12; - m[11] = (s[2]*m[11]) >> 12; + m[0] = ((s64)s[0]*m[0]) >> 12; + m[1] = ((s64)s[0]*m[1]) >> 12; + m[2] = ((s64)s[0]*m[2]) >> 12; + m[3] = ((s64)s[0]*m[3]) >> 12; + + m[4] = ((s64)s[1]*m[4]) >> 12; + m[5] = ((s64)s[1]*m[5]) >> 12; + m[6] = ((s64)s[1]*m[6]) >> 12; + m[7] = ((s64)s[1]*m[7]) >> 12; + + m[8] = ((s64)s[2]*m[8]) >> 12; + m[9] = ((s64)s[2]*m[9]) >> 12; + m[10] = ((s64)s[2]*m[10]) >> 12; + m[11] = ((s64)s[2]*m[11]) >> 12; } void MatrixTranslate(s32* m, s32* s) { - m[12] += (s[0]*m[0] + s[1]*m[4] + s[2]*m[8]) >> 12; - m[13] += (s[0]*m[1] + s[1]*m[5] + s[2]*m[9]) >> 12; - m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12; + m[12] += ((s64)s[0]*m[0] + (s64)s[1]*m[4] + (s64)s[2]*m[8]) >> 12; + m[13] += ((s64)s[0]*m[1] + (s64)s[1]*m[5] + (s64)s[2]*m[9]) >> 12; + m[14] += ((s64)s[0]*m[2] + (s64)s[1]*m[6] + (s64)s[2]*m[10]) >> 12; } void UpdateClipMatrix() @@ -379,7 +387,7 @@ void UpdateClipMatrix() template<int comp, s32 plane> void ClipSegment(Vertex* outbuf, int num, Vertex* vout, Vertex* vin) { - s32 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / + s64 factor = ((vin->Position[3] - (plane*vin->Position[comp])) << 12) / ((vin->Position[3] - (plane*vin->Position[comp])) - (vout->Position[3] - (plane*vout->Position[comp]))); Vertex mid; @@ -412,6 +420,15 @@ void SubmitPolygon() int prev, next; int c; + /*if (NumPolygons == 91) + for (int i = 0; i < nverts; i++) + { + Vertex vtx = TempVertexBuffer[i]; + printf("pre-clip v%d: %f %f %f %f\n", i, + vtx.Position[0]/4096.0f, vtx.Position[1]/4096.0f, + vtx.Position[2]/4096.0f, vtx.Position[3]/4096.0f); + }*/ + // X clipping prev = nverts-1; next = 1; c = 0; @@ -603,7 +620,7 @@ void SubmitPolygon() void SubmitVertex() { - s32 vertex[4] = {(s32)CurVertex[0], (s32)CurVertex[1], (s32)CurVertex[2], 0x1000}; + s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; //s32 vertextrans[4]; Vertex* vertextrans = &TempVertexBuffer[VertexNumInPoly]; @@ -769,9 +786,12 @@ void ExecuteCommand() ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; + //if ((entry.Command&0xF0)==0x10) + // printf("MATRIX CMD %02X %08X\n", entry.Command, entry.Param); + if (ExecParamCount >= CmdNumParams[entry.Command]) { - CycleCount += CmdNumCycles[entry.Command]; + //CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; GXStat &= ~(1<<14); @@ -1129,6 +1149,9 @@ void ExecuteCommand() void Run(s32 cycles) { + if (FlushRequest) + return; + if (CycleCount <= 0) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) @@ -1140,8 +1163,7 @@ void Run(s32 cycles) if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { CycleCount = 0; - if (!FlushRequest) - GXStat &= ~(1<<27); + GXStat &= ~(1<<27); } } diff --git a/GPU3D_Soft.cpp b/GPU3D_Soft.cpp index d47975d..6ac4e81 100644 --- a/GPU3D_Soft.cpp +++ b/GPU3D_Soft.cpp @@ -102,7 +102,7 @@ void RenderPolygon(Polygon* polygon) vbot = i; } //if (vtx->Color[0]==63 && vtx->Color[1]==0 && vtx->Color[2]==0) - //printf("v%d: %d,%d W=%d\n", i, scrX, 191-scrY, vtx->Position[3]); + //printf("v%d: %d,%d Z=%f W=%f\n", i, scrX, 191-scrY, vtx->Position[2]/4096.0f, vtx->Position[3]/4096.0f); } // draw, line per line @@ -176,11 +176,16 @@ void RenderPolygon(Polygon* polygon) s32 xl = scrcoords[lcur][0] + (((scrcoords[lnext][0] - scrcoords[lcur][0]) * lfactor) >> 12); s32 xr = scrcoords[rcur][0] + (((scrcoords[rnext][0] - scrcoords[rcur][0]) * rfactor) >> 12); + if (xl<0 || xr>255) continue; // hax + //if (vlcur->Color[0]==0 && vlcur->Color[1]==63 && vlcur->Color[2]==0) - // printf("y:%d xleft:%d xright:%d %d,%d %d,%d\n", y, xl, xr, lcur, rcur, vtop, vbot); + /*printf("y:%d xleft:%d xright:%d %d,%d %d,%d | left: %d to %d right: %d to %d\n", + y, xl, xr, lcur, rcur, vtop, vbot, + scrcoords[lcur][0], scrcoords[lnext][0], + scrcoords[rcur][0], scrcoords[rnext][0]);*/ - //s32 zl = scrcoords[lcur][3] + (((scrcoords[lnext][3] - scrcoords[lcur][3]) * lfactor) >> 12); - //s32 zr = scrcoords[rcur][3] + (((scrcoords[rnext][3] - scrcoords[rcur][3]) * rfactor) >> 12); + //s32 zl = scrcoords[lcur][2] + (((scrcoords[lnext][2] - scrcoords[lcur][2]) * lfactor) >> 12); + //s32 zr = scrcoords[rcur][2] + (((scrcoords[rnext][2] - scrcoords[rcur][2]) * rfactor) >> 12); u8 rl = vlcur->Color[0] + (((vlnext->Color[0] - vlcur->Color[0]) * lfactor) >> 12); u8 gl = vlcur->Color[1] + (((vlnext->Color[1] - vlcur->Color[1]) * lfactor) >> 12); @@ -200,12 +205,12 @@ void RenderPolygon(Polygon* polygon) { s32 xfactor = (x - xl) * xdiv; - //s32 z = (zl << 12) + ((zr - zl) * xfactor); - //z = zl + (((zr - zl) * xfactor) >> 12); + //s32 z = (((zr - zl) * xfactor) >> 12); + //if (zr!=zl) z = (z << 12) / (zr - zl); //s32 z_inv = ((z>>12)==0) ? 0x1000 : 0x1000000 / (z >> 12); //xfactor = (xfactor * z_inv) >> 12; - //xfactor = (xfactor << 12) / z; + //if (z) xfactor = (xfactor << 12) / z; // TODO: get rid of this shit if (x<0 || x>255 || y<0 || y>191) @@ -248,6 +253,7 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) polygons[i].Vertices[j]->Position[1]/4096.0f, polygons[i].Vertices[j]->Position[2]/4096.0f); */ + //printf("polygon %d\n", i); RenderPolygon(&polygons[i]); } } diff --git a/melonDS.depend b/melonDS.depend index 08e52b7..51e832c 100644 --- a/melonDS.depend +++ b/melonDS.depend @@ -1,5 +1,5 @@ # depslib dependency file v1.0 -1486824787 source:c:\documents\sources\melonds\main.cpp +1486993536 source:c:\documents\sources\melonds\main.cpp <stdio.h> <windows.h> "NDS.h" @@ -10,7 +10,7 @@ 1481161027 c:\documents\sources\melonds\types.h -1486947856 source:c:\documents\sources\melonds\nds.cpp +1486994139 source:c:\documents\sources\melonds\nds.cpp <stdio.h> <string.h> "NDS.h" @@ -148,14 +148,14 @@ 1486777933 c:\documents\sources\melonds\gpu3d.h -1486947978 source:c:\documents\sources\melonds\gpu3d.cpp +1486993935 source:c:\documents\sources\melonds\gpu3d.cpp <stdio.h> <string.h> "NDS.h" "GPU.h" "FIFO.h" -1486947027 source:c:\documents\sources\melonds\gpu3d_soft.cpp +1486994049 source:c:\documents\sources\melonds\gpu3d_soft.cpp <stdio.h> <string.h> "NDS.h" |