aboutsummaryrefslogtreecommitdiff
path: root/src/GPU3D.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/GPU3D.cpp')
-rw-r--r--src/GPU3D.cpp521
1 files changed, 397 insertions, 124 deletions
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 6e4d26a..df27913 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -71,6 +71,27 @@
// clear attributes
//
// TODO: check how DISP_1DOT_DEPTH works and whether it's latched
+//
+// TODO: emulate GPU hanging
+// * when calling BEGIN with an incomplete polygon defined
+// * probably same with BOXTEST
+// * when sending vertices immediately after a BOXTEST
+//
+// TODO: test results should probably not be presented immediately, even if we set the busy flag
+
+
+// command execution notes
+//
+// timings given by GBAtek are for individual commands
+// actual display lists have different timing characteristics
+// * vertex pipeline: individual vertex commands are able to execute in parallel
+// with certain other commands
+// * similarly, the normal command can execute in parallel with a subsequent vertex
+// * polygon pipeline: each vertex which completes a polygon takes longer to run
+// and imposes rules on when further vertex commands can run
+// (one every 9-cycle time slot during polygon setup)
+// polygon setup time is 27 cycles for a triangle and 36 for a quad
+// * additionally, some commands (BEGIN, LIGHT_VECTOR, BOXTEST) stall the polygon pipeline
namespace GPU3D
@@ -113,43 +134,6 @@ const u32 CmdNumParams[256] =
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-const s32 CmdNumCycles[256] =
-{
- // 0x00
- 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x10
- 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22,
- 0, 0, 0,
- // 0x20
- 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1,
- 0, 0, 0, 0,
- // 0x30
- 4, 4, 6, 1, 32,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x40
- 1, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x50
- 392,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x60
- 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x70
- 103, 9, 5,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- // 0x80+
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
typedef union
{
u64 _contents;
@@ -164,6 +148,8 @@ typedef union
FIFO<CmdFIFOEntry>* CmdFIFO;
FIFO<CmdFIFOEntry>* CmdPIPE;
+FIFO<CmdFIFOEntry>* CmdStallQueue;
+
u32 NumCommands, CurCommand, ParamCount, TotalParams;
u32 DispCnt;
@@ -192,7 +178,13 @@ u32 GXStat;
u32 ExecParams[32];
u32 ExecParamCount;
+
s32 CycleCount;
+s32 VertexPipeline;
+s32 NormalPipeline;
+s32 PolygonPipeline;
+s32 VertexSlotCounter;
+u32 VertexSlotsFree;
u32 NumPushPopCommands;
u32 NumTestCommands;
@@ -211,8 +203,8 @@ bool ClipMatrixDirty;
u32 Viewport[6];
s32 ProjMatrixStack[16];
-s32 PosMatrixStack[31][16];
-s32 VecMatrixStack[31][16];
+s32 PosMatrixStack[32][16];
+s32 VecMatrixStack[32][16];
s32 TexMatrixStack[16];
s32 ProjMatrixStackPointer;
s32 PosMatrixStackPointer;
@@ -276,6 +268,8 @@ bool Init()
CmdFIFO = new FIFO<CmdFIFOEntry>(256);
CmdPIPE = new FIFO<CmdFIFOEntry>(4);
+ CmdStallQueue = new FIFO<CmdFIFOEntry>(64);
+
if (!SoftRenderer::Init()) return false;
return true;
@@ -287,6 +281,8 @@ void DeInit()
delete CmdFIFO;
delete CmdPIPE;
+
+ delete CmdStallQueue;
}
void Reset()
@@ -294,6 +290,8 @@ void Reset()
CmdFIFO->Clear();
CmdPIPE->Clear();
+ CmdStallQueue->Clear();
+
NumCommands = 0;
CurCommand = 0;
ParamCount = 0;
@@ -309,7 +307,13 @@ void Reset()
memset(ExecParams, 0, 32*4);
ExecParamCount = 0;
+
CycleCount = 0;
+ VertexPipeline = 0;
+ NormalPipeline = 0;
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 1;
MatrixMode = 0;
@@ -387,8 +391,8 @@ void DoSavestate(Savestate* file)
file->VarArray(TexMatrix, 16*4);
file->VarArray(ProjMatrixStack, 16*4);
- file->VarArray(PosMatrixStack, 31*16*4);
- file->VarArray(VecMatrixStack, 31*16*4);
+ file->VarArray(PosMatrixStack, 32*16*4);
+ file->VarArray(VecMatrixStack, 32*16*4);
file->VarArray(TexMatrixStack, 16*4);
file->Var32((u32*)&ProjMatrixStackPointer);
@@ -514,6 +518,28 @@ void DoSavestate(Savestate* file)
// probably not worth storing the vblank-latched Renderxxxxxx variables
+ if (file->IsAtleastVersion(2, 1))
+ {
+ // command stall queue, only in version 2.1 and up
+ CmdStallQueue->DoSavestate(file);
+ file->Var32((u32*)&VertexPipeline);
+ file->Var32((u32*)&NormalPipeline);
+ file->Var32((u32*)&PolygonPipeline);
+ file->Var32((u32*)&VertexSlotCounter);
+ file->Var32(&VertexSlotsFree);
+ }
+ else
+ {
+ // for version 2.0, just clear it. not having it doesn't matter
+ // if this comes from older melonDS revisions.
+ CmdStallQueue->Clear();
+ VertexPipeline = 0;
+ NormalPipeline = 0;
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 1;
+ }
+
if (!file->Saving)
{
ClipMatrixDirty = true;
@@ -664,6 +690,105 @@ void UpdateClipMatrix()
+void AddCycles(s32 num)
+{
+ CycleCount += num;
+
+ if (VertexPipeline > 0)
+ {
+ if (VertexPipeline > num) VertexPipeline -= num;
+ else VertexPipeline = 0;
+ }
+
+ if (PolygonPipeline > 0)
+ {
+ if (PolygonPipeline > num)
+ {
+ PolygonPipeline -= num;
+ VertexSlotCounter += num;
+ while (VertexSlotCounter > 9)
+ {
+ VertexSlotCounter -= 9;
+ VertexSlotsFree >>= 1;
+ }
+ }
+ else
+ {
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 0x1;
+ }
+ }
+}
+
+void NextVertexSlot()
+{
+ s32 num = (9 - VertexSlotCounter) + 1;
+
+ for (;;)
+ {
+ CycleCount += num;
+
+ if (VertexPipeline > 0)
+ {
+ if (VertexPipeline > num) VertexPipeline -= num;
+ else VertexPipeline = 0;
+ }
+
+ if (PolygonPipeline > 0)
+ {
+ if (PolygonPipeline > num)
+ {
+ PolygonPipeline -= num;
+ VertexSlotCounter = 1;
+ VertexSlotsFree >>= 1;
+ if (VertexSlotsFree & 0x1)
+ {
+ VertexSlotsFree &= ~0x1;
+ break;
+ }
+ else
+ {
+ num = 9;
+ continue;
+ }
+ }
+ else
+ {
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 1;
+ break;
+ }
+ }
+ }
+}
+
+void StallPolygonPipeline(s32 delay, s32 nonstalldelay)
+{
+ if (PolygonPipeline > 0)
+ {
+ CycleCount += PolygonPipeline + delay;
+
+ // can be safely assumed those two will go to zero
+ VertexPipeline = 0;
+ NormalPipeline = 0;
+
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 1;
+ }
+ else
+ {
+ if (VertexPipeline > nonstalldelay)
+ AddCycles((VertexPipeline - nonstalldelay) + 1);
+ else
+ AddCycles(NormalPipeline + 1);
+ }
+}
+
+
+
template<int comp, s32 plane, bool attribs>
void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin)
{
@@ -811,6 +936,22 @@ void SubmitPolygon()
int nverts = PolygonMode & 0x1 ? 4:3;
int prev, next;
+ // submitting a polygon starts the polygon pipeline
+ if (nverts == 4)
+ {
+ PolygonPipeline = 35;
+ VertexSlotCounter = 1;
+ if (PolygonMode & 0x2) VertexSlotsFree = 0b11100;
+ else VertexSlotsFree = 0b11110;
+ }
+ else
+ {
+ PolygonPipeline = 26;
+ VertexSlotCounter = 1;
+ if (PolygonMode & 0x2) VertexSlotsFree = 0b1000;
+ else VertexSlotsFree = 0b1110;
+ }
+
// culling
// TODO: work out how it works on the real thing
// the normalization part is a wild guess
@@ -860,7 +1001,7 @@ void SubmitPolygon()
}
// for strips, check whether we can attach to the previous polygon
- // this requires two vertices shared with the previous polygon, and that
+ // this requires two original vertices shared with the previous polygon, and that
// the two polygons be of the same type
if (PolygonMode >= 2 && LastStripPolygon)
@@ -1093,6 +1234,8 @@ void SubmitVertex()
vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12;
vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12;
+ // this probably shouldn't be.
+ // the way color is handled during clipping needs investigation. TODO
vertextrans->Color[0] = (VertexColor[0] << 12) + 0xFFF;
vertextrans->Color[1] = (VertexColor[1] << 12) + 0xFFF;
vertextrans->Color[2] = (VertexColor[2] << 12) + 0xFFF;
@@ -1173,9 +1316,12 @@ void SubmitVertex()
}
break;
}
+
+ VertexPipeline = 7;
+ AddCycles(3);
}
-s32 CalculateLighting()
+void CalculateLighting()
{
if ((TexParam >> 30) == 2)
{
@@ -1244,8 +1390,9 @@ s32 CalculateLighting()
c++;
}
- // checkme: cycle count
- return c;
+ if (c < 1) c = 1;
+ NormalPipeline = 7;
+ AddCycles(c);
}
@@ -1255,6 +1402,7 @@ void BoxTest(u32* params)
Vertex face[10];
int res;
+ AddCycles(254);
GXStat &= ~(1<<1);
s16 x0 = (s16)(params[0] & 0xFFFF);
@@ -1354,6 +1502,8 @@ void PosTest()
PosTestResult[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12;
PosTestResult[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12;
PosTestResult[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12;
+
+ AddCycles(5);
}
void VecTest(u32* params)
@@ -1373,6 +1523,8 @@ void VecTest(u32* params)
if (VecTestResult[0] & 0x1000) VecTestResult[0] |= 0xF000;
if (VecTestResult[1] & 0x1000) VecTestResult[1] |= 0xF000;
if (VecTestResult[2] & 0x1000) VecTestResult[2] |= 0xF000;
+
+ AddCycles(4);
}
@@ -1387,17 +1539,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)
{
if (CmdFIFO->IsFull())
{
- //printf("!!! GX FIFO FULL\n");
- //return;
+ // store it to the stall queue. stall the system.
+ // worst case is if a STMxx opcode causes this, which is why our stall queue
+ // has 64 entries. this is less complicated than trying to make STMxx stall-able.
- // temp. hack
- // SM64DS seems to overflow the FIFO occasionally
- // either leftover bugs in our implementation, or the game accidentally doing that
- // TODO: investigate.
- // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore)
-
- while (CmdFIFO->IsFull())
- ExecuteCommand();
+ CmdStallQueue->Write(entry);
+ NDS::GXFIFOStall();
+ return;
}
CmdFIFO->Write(entry);
@@ -1426,6 +1574,21 @@ CmdFIFOEntry CmdFIFORead()
if (!CmdFIFO->IsEmpty())
CmdPIPE->Write(CmdFIFO->Read());
+ // empty stall queue if needed
+ // CmdFIFO should not be full at this point.
+ if (!CmdStallQueue->IsEmpty())
+ {
+ while (!CmdStallQueue->IsEmpty())
+ {
+ if (CmdFIFO->IsFull()) break;
+ CmdFIFOEntry entry = CmdStallQueue->Read();
+ CmdFIFOWrite(entry);
+ }
+
+ if (CmdStallQueue->IsEmpty())
+ NDS::GXFIFOUnstall();
+ }
+
CheckFIFODMA();
CheckFIFOIRQ();
}
@@ -1441,6 +1604,63 @@ void ExecuteCommand()
//printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level());
+ // each FIFO entry takes 1 cycle to be processed
+ // commands (presumably) run when all the needed parameters have been read
+ // which is where we add the remaining cycles if any
+ if (ExecParamCount == 0)
+ {
+ // delay the first command entry as needed
+ switch (entry.Command)
+ {
+ // commands that stall the polygon pipeline
+ case 0x32: StallPolygonPipeline(8 + 1, 2); break; // 32 can run 6 cycles after a vertex
+ case 0x40: StallPolygonPipeline(1, 0); break;
+ case 0x70: StallPolygonPipeline(10 + 1, 0); break;
+
+ case 0x23:
+ case 0x24:
+ case 0x25:
+ case 0x26:
+ case 0x27:
+ case 0x28:
+ // vertex
+ if (!(VertexSlotsFree & 0x1)) NextVertexSlot();
+ else AddCycles(1);
+ NormalPipeline = 0;
+ break;
+
+ case 0x20:
+ case 0x30:
+ case 0x31:
+ case 0x72:
+ // commands that can run 6 cycles after a vertex
+ if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1);
+ else AddCycles(NormalPipeline + 1);
+ break;
+
+ case 0x29:
+ case 0x2A:
+ case 0x2B:
+ case 0x33:
+ case 0x34:
+ case 0x41:
+ case 0x60:
+ case 0x71:
+ // command that can run 8 cycles after a vertex
+ if (VertexPipeline > 0) AddCycles(VertexPipeline + 1);
+ else AddCycles(NormalPipeline + 1);
+ break;
+
+ default:
+ // all other commands can run 4 cycles after a vertex
+ // no need to do much here since that is the minimum
+ AddCycles(NormalPipeline + 1);
+ break;
+ }
+ }
+ else
+ AddCycles(1);
+
ExecParams[ExecParamCount] = entry.Param;
ExecParamCount++;
@@ -1449,11 +1669,8 @@ void ExecuteCommand()
/*printf("0x%02X, ", entry.Command);
for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]);
printf("\n");*/
- CycleCount += CmdNumCycles[entry.Command];
- ExecParamCount = 0;
- if (CycleCount > 0)
- GXStat |= (1<<27);
+ ExecParamCount = 0;
switch (entry.Command)
{
@@ -1465,86 +1682,65 @@ void ExecuteCommand()
NumPushPopCommands--;
if (MatrixMode == 0)
{
- if (ProjMatrixStackPointer > 0)
- {
- printf("!! PROJ MATRIX STACK OVERFLOW\n");
- GXStat |= (1<<15);
- break;
- }
+ if (ProjMatrixStackPointer > 0) GXStat |= (1<<15);
memcpy(ProjMatrixStack, ProjMatrix, 16*4);
ProjMatrixStackPointer++;
+ ProjMatrixStackPointer &= 0x1;
}
else if (MatrixMode == 3)
{
- if (TexMatrixStackPointer > 0)
- {
- printf("!! TEX MATRIX STACK OVERFLOW\n");
- GXStat |= (1<<15);
- break;
- }
+ if (TexMatrixStackPointer > 0) GXStat |= (1<<15);
memcpy(TexMatrixStack, TexMatrix, 16*4);
TexMatrixStackPointer++;
+ TexMatrixStackPointer &= 0x1;
}
else
{
- if (PosMatrixStackPointer > 30)
- {
- printf("!! POS MATRIX STACK OVERFLOW\n");
- GXStat |= (1<<15);
- break;
- }
+ if (PosMatrixStackPointer > 30) GXStat |= (1<<15);
- memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4);
- memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4);
+ memcpy(PosMatrixStack[PosMatrixStackPointer & 0x1F], PosMatrix, 16*4);
+ memcpy(VecMatrixStack[PosMatrixStackPointer & 0x1F], VecMatrix, 16*4);
PosMatrixStackPointer++;
+ PosMatrixStackPointer &= 0x3F;
}
+ AddCycles(16);
break;
case 0x12: // pop matrix
NumPushPopCommands--;
if (MatrixMode == 0)
{
- if (ProjMatrixStackPointer <= 0)
- {
- printf("!! PROJ MATRIX STACK UNDERFLOW\n");
- GXStat |= (1<<15);
- break;
- }
+ if (ProjMatrixStackPointer == 0) GXStat |= (1<<15);
ProjMatrixStackPointer--;
+ ProjMatrixStackPointer &= 0x1;
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
ClipMatrixDirty = true;
+ AddCycles(35);
}
else if (MatrixMode == 3)
{
- if (TexMatrixStackPointer <= 0)
- {
- printf("!! TEX MATRIX STACK UNDERFLOW\n");
- GXStat |= (1<<15);
- break;
- }
+ if (TexMatrixStackPointer == 0) GXStat |= (1<<15);
TexMatrixStackPointer--;
+ TexMatrixStackPointer &= 0x1;
memcpy(TexMatrix, TexMatrixStack, 16*4);
+ AddCycles(17);
}
else
{
s32 offset = (s32)(ExecParams[0] << 26) >> 26;
PosMatrixStackPointer -= offset;
+ PosMatrixStackPointer &= 0x3F;
- if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30)
- {
- //printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer);
- PosMatrixStackPointer += offset;
- GXStat |= (1<<15);
- break;
- }
+ if (PosMatrixStackPointer > 30) GXStat |= (1<<15);
- memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4);
- memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4);
+ memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer & 0x1F], 16*4);
+ memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer & 0x1F], 16*4);
ClipMatrixDirty = true;
+ AddCycles(35);
}
break;
@@ -1560,16 +1756,12 @@ void ExecuteCommand()
else
{
u32 addr = ExecParams[0] & 0x1F;
- if (addr > 30)
- {
- printf("!! POS MATRIX STORE ADDR 31\n");
- GXStat |= (1<<15);
- break;
- }
+ if (addr > 30) GXStat |= (1<<15);
memcpy(PosMatrixStack[addr], PosMatrix, 16*4);
memcpy(VecMatrixStack[addr], VecMatrix, 16*4);
}
+ AddCycles(16);
break;
case 0x14: // restore matrix
@@ -1577,24 +1769,22 @@ void ExecuteCommand()
{
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
ClipMatrixDirty = true;
+ AddCycles(35);
}
else if (MatrixMode == 3)
{
memcpy(TexMatrix, TexMatrixStack, 16*4);
+ AddCycles(17);
}
else
{
u32 addr = ExecParams[0] & 0x1F;
- if (addr > 30)
- {
- printf("!! POS MATRIX STORE ADDR 31\n");
- GXStat |= (1<<15);
- break;
- }
+ if (addr > 30) GXStat |= (1<<15);
memcpy(PosMatrix, PosMatrixStack[addr], 16*4);
memcpy(VecMatrix, VecMatrixStack[addr], 16*4);
ClipMatrixDirty = true;
+ AddCycles(35);
}
break;
@@ -1603,6 +1793,7 @@ void ExecuteCommand()
{
MatrixLoadIdentity(ProjMatrix);
ClipMatrixDirty = true;
+ AddCycles(18);
}
else if (MatrixMode == 3)
MatrixLoadIdentity(TexMatrix);
@@ -1612,6 +1803,7 @@ void ExecuteCommand()
if (MatrixMode == 2)
MatrixLoadIdentity(VecMatrix);
ClipMatrixDirty = true;
+ AddCycles(18);
}
break;
@@ -1620,15 +1812,20 @@ void ExecuteCommand()
{
MatrixLoad4x4(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(18);
}
else if (MatrixMode == 3)
+ {
MatrixLoad4x4(TexMatrix, (s32*)ExecParams);
+ AddCycles(10);
+ }
else
{
MatrixLoad4x4(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
MatrixLoad4x4(VecMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(18);
}
break;
@@ -1637,15 +1834,20 @@ void ExecuteCommand()
{
MatrixLoad4x3(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(18);
}
else if (MatrixMode == 3)
+ {
MatrixLoad4x3(TexMatrix, (s32*)ExecParams);
+ AddCycles(7);
+ }
else
{
MatrixLoad4x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
MatrixLoad4x3(VecMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(18);
}
break;
@@ -1654,17 +1856,22 @@ void ExecuteCommand()
{
MatrixMult4x4(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 16);
}
else if (MatrixMode == 3)
+ {
MatrixMult4x4(TexMatrix, (s32*)ExecParams);
+ AddCycles(33 - 16);
+ }
else
{
MatrixMult4x4(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult4x4(VecMatrix, (s32*)ExecParams);
- CycleCount += 30;
+ AddCycles(35 + 30 - 16);
}
+ else AddCycles(35 - 16);
ClipMatrixDirty = true;
}
break;
@@ -1674,17 +1881,22 @@ void ExecuteCommand()
{
MatrixMult4x3(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 12);
}
else if (MatrixMode == 3)
+ {
MatrixMult4x3(TexMatrix, (s32*)ExecParams);
+ AddCycles(33 - 12);
+ }
else
{
MatrixMult4x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult4x3(VecMatrix, (s32*)ExecParams);
- CycleCount += 30;
+ AddCycles(35 + 30 - 12);
}
+ else AddCycles(35 - 12);
ClipMatrixDirty = true;
}
break;
@@ -1694,17 +1906,22 @@ void ExecuteCommand()
{
MatrixMult3x3(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 9);
}
else if (MatrixMode == 3)
+ {
MatrixMult3x3(TexMatrix, (s32*)ExecParams);
+ AddCycles(33 - 9);
+ }
else
{
MatrixMult3x3(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
{
MatrixMult3x3(VecMatrix, (s32*)ExecParams);
- CycleCount += 30;
+ AddCycles(35 + 30 - 9);
}
+ else AddCycles(35 - 9);
ClipMatrixDirty = true;
}
break;
@@ -1714,13 +1931,18 @@ void ExecuteCommand()
{
MatrixScale(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 3);
}
else if (MatrixMode == 3)
+ {
MatrixScale(TexMatrix, (s32*)ExecParams);
+ AddCycles(33 - 3);
+ }
else
{
MatrixScale(PosMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 3);
}
break;
@@ -1729,14 +1951,22 @@ void ExecuteCommand()
{
MatrixTranslate(ProjMatrix, (s32*)ExecParams);
ClipMatrixDirty = true;
+ AddCycles(35 - 3);
}
else if (MatrixMode == 3)
+ {
MatrixTranslate(TexMatrix, (s32*)ExecParams);
+ AddCycles(33 - 3);
+ }
else
{
MatrixTranslate(PosMatrix, (s32*)ExecParams);
if (MatrixMode == 2)
+ {
MatrixTranslate(VecMatrix, (s32*)ExecParams);
+ AddCycles(35 + 30 - 3);
+ }
+ else AddCycles(35 - 3);
ClipMatrixDirty = true;
}
break;
@@ -1757,7 +1987,7 @@ void ExecuteCommand()
Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6;
Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6;
Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6;
- CycleCount += CalculateLighting();
+ CalculateLighting();
break;
case 0x22: // texcoord
@@ -1839,6 +2069,7 @@ void ExecuteCommand()
VertexColor[1] = MatDiffuse[1];
VertexColor[2] = MatDiffuse[2];
}
+ AddCycles(3);
break;
case 0x31: // specular/emission material
@@ -1849,6 +2080,7 @@ void ExecuteCommand()
MatEmission[1] = (ExecParams[0] >> 21) & 0x1F;
MatEmission[2] = (ExecParams[0] >> 26) & 0x1F;
UseShininessTable = (ExecParams[0] & 0x8000) != 0;
+ AddCycles(3);
break;
case 0x32: // light direction
@@ -1862,6 +2094,7 @@ void ExecuteCommand()
LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12;
LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12;
}
+ AddCycles(5);
break;
case 0x33: // light color
@@ -1871,6 +2104,7 @@ void ExecuteCommand()
LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F;
LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F;
}
+ AddCycles(1);
break;
case 0x34: // shininess table
@@ -1887,6 +2121,8 @@ void ExecuteCommand()
break;
case 0x40: // begin polygons
+ // TODO: check if there was a polygon being defined but incomplete
+ // such cases seem to freeze the GPU
PolygonMode = ExecParams[0] & 0x3;
VertexNum = 0;
VertexNumInPoly = 0;
@@ -1895,10 +2131,24 @@ void ExecuteCommand()
CurPolygonAttr = PolygonAttr;
break;
+ case 0x41: // end polygons
+ // TODO: research this?
+ // it doesn't seem to have any effect whatsoever, but
+ // its timing characteristics are different from those of other
+ // no-op commands
+ break;
+
case 0x50: // flush
FlushRequest = 1;
FlushAttributes = ExecParams[0] & 0x3;
CycleCount = 392;
+ // probably safe to just reset all pipelines
+ // but needs checked
+ VertexPipeline = 0;
+ NormalPipeline = 0;
+ PolygonPipeline = 0;
+ VertexSlotCounter = 0;
+ VertexSlotsFree = 1;
break;
case 0x60: // viewport x1,y1,x2,y2
@@ -1930,18 +2180,41 @@ void ExecuteCommand()
break;
default:
- //if (entry.Command != 0x41)
- //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param);
+ //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param);
break;
}
}
+
+ if (CycleCount > 0 || !CmdPIPE->IsEmpty() ||
+ VertexPipeline || NormalPipeline || PolygonPipeline)
+ GXStat |= (1<<27);
+}
+
+s32 CyclesToRunFor()
+{
+ if (CycleCount < 0) return 0;
+ return CycleCount;
+}
+
+void FinishWork(s32 cycles)
+{
+ AddCycles(cycles);
+ if (NormalPipeline)
+ NormalPipeline -= std::min(NormalPipeline, cycles);
+
+ CycleCount = 0;
+
+ if (VertexPipeline || NormalPipeline || PolygonPipeline)
+ return;
+
+ GXStat &= ~(1<<27);
}
void Run(s32 cycles)
{
if (FlushRequest)
return;
- if (CycleCount <= 0 && CmdPIPE->IsEmpty())
+ if (CmdPIPE->IsEmpty() && !(GXStat & (1<<27)))
return;
CycleCount -= cycles;
@@ -1959,8 +2232,8 @@ void Run(s32 cycles)
if (CycleCount <= 0 && CmdPIPE->IsEmpty())
{
- CycleCount = 0;
- GXStat &= ~(1<<27);
+ if (GXStat & (1<<27)) FinishWork(-CycleCount);
+ else CycleCount = 0;
if (NumPushPopCommands == 0) GXStat &= ~(1<<14);
if (NumTestCommands == 0) GXStat &= ~(1<<0);
@@ -2254,7 +2527,7 @@ void Write8(u32 addr, u8 val)
GXStat &= ~0x8000;
ProjMatrixStackPointer = 0;
//PosMatrixStackPointer = 0;
- TexMatrixStackPointer = 0;
+ TexMatrixStackPointer = 0; // CHECKME
}
return;
case 0x04000603:
@@ -2319,7 +2592,7 @@ void Write16(u32 addr, u16 val)
GXStat &= ~0x8000;
ProjMatrixStackPointer = 0;
//PosMatrixStackPointer = 0;
- TexMatrixStackPointer = 0;
+ TexMatrixStackPointer = 0; // CHECKME
}
return;
case 0x04000602:
@@ -2389,7 +2662,7 @@ void Write32(u32 addr, u32 val)
GXStat &= ~0x8000;
ProjMatrixStackPointer = 0;
//PosMatrixStackPointer = 0;
- TexMatrixStackPointer = 0;
+ TexMatrixStackPointer = 0; // CHECKME
}
val &= 0xC0000000;
GXStat &= 0x3FFFFFFF;