diff options
Diffstat (limited to 'src/GPU3D.cpp')
-rw-r--r-- | src/GPU3D.cpp | 521 |
1 files changed, 397 insertions, 124 deletions
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 6e4d26a..df27913 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -71,6 +71,27 @@ // clear attributes // // TODO: check how DISP_1DOT_DEPTH works and whether it's latched +// +// TODO: emulate GPU hanging +// * when calling BEGIN with an incomplete polygon defined +// * probably same with BOXTEST +// * when sending vertices immediately after a BOXTEST +// +// TODO: test results should probably not be presented immediately, even if we set the busy flag + + +// command execution notes +// +// timings given by GBAtek are for individual commands +// actual display lists have different timing characteristics +// * vertex pipeline: individual vertex commands are able to execute in parallel +// with certain other commands +// * similarly, the normal command can execute in parallel with a subsequent vertex +// * polygon pipeline: each vertex which completes a polygon takes longer to run +// and imposes rules on when further vertex commands can run +// (one every 9-cycle time slot during polygon setup) +// polygon setup time is 27 cycles for a triangle and 36 for a quad +// * additionally, some commands (BEGIN, LIGHT_VECTOR, BOXTEST) stall the polygon pipeline namespace GPU3D @@ -113,43 +134,6 @@ const u32 CmdNumParams[256] = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -const s32 CmdNumCycles[256] = -{ - // 0x00 - 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x10 - 1, 17, 36, 17, 36, 19, 34, 30, 35, 31, 28, 22, 22, - 0, 0, 0, - // 0x20 - 1, 9, 1, 9, 8, 8, 8, 8, 8, 1, 1, 1, - 0, 0, 0, 0, - // 0x30 - 4, 4, 6, 1, 32, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x40 - 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x50 - 392, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x60 - 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x70 - 103, 9, 5, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // 0x80+ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - typedef union { u64 _contents; @@ -164,6 +148,8 @@ typedef union FIFO<CmdFIFOEntry>* CmdFIFO; FIFO<CmdFIFOEntry>* CmdPIPE; +FIFO<CmdFIFOEntry>* CmdStallQueue; + u32 NumCommands, CurCommand, ParamCount, TotalParams; u32 DispCnt; @@ -192,7 +178,13 @@ u32 GXStat; u32 ExecParams[32]; u32 ExecParamCount; + s32 CycleCount; +s32 VertexPipeline; +s32 NormalPipeline; +s32 PolygonPipeline; +s32 VertexSlotCounter; +u32 VertexSlotsFree; u32 NumPushPopCommands; u32 NumTestCommands; @@ -211,8 +203,8 @@ bool ClipMatrixDirty; u32 Viewport[6]; s32 ProjMatrixStack[16]; -s32 PosMatrixStack[31][16]; -s32 VecMatrixStack[31][16]; +s32 PosMatrixStack[32][16]; +s32 VecMatrixStack[32][16]; s32 TexMatrixStack[16]; s32 ProjMatrixStackPointer; s32 PosMatrixStackPointer; @@ -276,6 +268,8 @@ bool Init() CmdFIFO = new FIFO<CmdFIFOEntry>(256); CmdPIPE = new FIFO<CmdFIFOEntry>(4); + CmdStallQueue = new FIFO<CmdFIFOEntry>(64); + if (!SoftRenderer::Init()) return false; return true; @@ -287,6 +281,8 @@ void DeInit() delete CmdFIFO; delete CmdPIPE; + + delete CmdStallQueue; } void Reset() @@ -294,6 +290,8 @@ void Reset() CmdFIFO->Clear(); CmdPIPE->Clear(); + CmdStallQueue->Clear(); + NumCommands = 0; CurCommand = 0; ParamCount = 0; @@ -309,7 +307,13 @@ void Reset() memset(ExecParams, 0, 32*4); ExecParamCount = 0; + CycleCount = 0; + VertexPipeline = 0; + NormalPipeline = 0; + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 1; MatrixMode = 0; @@ -387,8 +391,8 @@ void DoSavestate(Savestate* file) file->VarArray(TexMatrix, 16*4); file->VarArray(ProjMatrixStack, 16*4); - file->VarArray(PosMatrixStack, 31*16*4); - file->VarArray(VecMatrixStack, 31*16*4); + file->VarArray(PosMatrixStack, 32*16*4); + file->VarArray(VecMatrixStack, 32*16*4); file->VarArray(TexMatrixStack, 16*4); file->Var32((u32*)&ProjMatrixStackPointer); @@ -514,6 +518,28 @@ void DoSavestate(Savestate* file) // probably not worth storing the vblank-latched Renderxxxxxx variables + if (file->IsAtleastVersion(2, 1)) + { + // command stall queue, only in version 2.1 and up + CmdStallQueue->DoSavestate(file); + file->Var32((u32*)&VertexPipeline); + file->Var32((u32*)&NormalPipeline); + file->Var32((u32*)&PolygonPipeline); + file->Var32((u32*)&VertexSlotCounter); + file->Var32(&VertexSlotsFree); + } + else + { + // for version 2.0, just clear it. not having it doesn't matter + // if this comes from older melonDS revisions. + CmdStallQueue->Clear(); + VertexPipeline = 0; + NormalPipeline = 0; + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 1; + } + if (!file->Saving) { ClipMatrixDirty = true; @@ -664,6 +690,105 @@ void UpdateClipMatrix() +void AddCycles(s32 num) +{ + CycleCount += num; + + if (VertexPipeline > 0) + { + if (VertexPipeline > num) VertexPipeline -= num; + else VertexPipeline = 0; + } + + if (PolygonPipeline > 0) + { + if (PolygonPipeline > num) + { + PolygonPipeline -= num; + VertexSlotCounter += num; + while (VertexSlotCounter > 9) + { + VertexSlotCounter -= 9; + VertexSlotsFree >>= 1; + } + } + else + { + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 0x1; + } + } +} + +void NextVertexSlot() +{ + s32 num = (9 - VertexSlotCounter) + 1; + + for (;;) + { + CycleCount += num; + + if (VertexPipeline > 0) + { + if (VertexPipeline > num) VertexPipeline -= num; + else VertexPipeline = 0; + } + + if (PolygonPipeline > 0) + { + if (PolygonPipeline > num) + { + PolygonPipeline -= num; + VertexSlotCounter = 1; + VertexSlotsFree >>= 1; + if (VertexSlotsFree & 0x1) + { + VertexSlotsFree &= ~0x1; + break; + } + else + { + num = 9; + continue; + } + } + else + { + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 1; + break; + } + } + } +} + +void StallPolygonPipeline(s32 delay, s32 nonstalldelay) +{ + if (PolygonPipeline > 0) + { + CycleCount += PolygonPipeline + delay; + + // can be safely assumed those two will go to zero + VertexPipeline = 0; + NormalPipeline = 0; + + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 1; + } + else + { + if (VertexPipeline > nonstalldelay) + AddCycles((VertexPipeline - nonstalldelay) + 1); + else + AddCycles(NormalPipeline + 1); + } +} + + + template<int comp, s32 plane, bool attribs> void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) { @@ -811,6 +936,22 @@ void SubmitPolygon() int nverts = PolygonMode & 0x1 ? 4:3; int prev, next; + // submitting a polygon starts the polygon pipeline + if (nverts == 4) + { + PolygonPipeline = 35; + VertexSlotCounter = 1; + if (PolygonMode & 0x2) VertexSlotsFree = 0b11100; + else VertexSlotsFree = 0b11110; + } + else + { + PolygonPipeline = 26; + VertexSlotCounter = 1; + if (PolygonMode & 0x2) VertexSlotsFree = 0b1000; + else VertexSlotsFree = 0b1110; + } + // culling // TODO: work out how it works on the real thing // the normalization part is a wild guess @@ -860,7 +1001,7 @@ void SubmitPolygon() } // for strips, check whether we can attach to the previous polygon - // this requires two vertices shared with the previous polygon, and that + // this requires two original vertices shared with the previous polygon, and that // the two polygons be of the same type if (PolygonMode >= 2 && LastStripPolygon) @@ -1093,6 +1234,8 @@ void SubmitVertex() vertextrans->Position[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; vertextrans->Position[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; + // this probably shouldn't be. + // the way color is handled during clipping needs investigation. TODO vertextrans->Color[0] = (VertexColor[0] << 12) + 0xFFF; vertextrans->Color[1] = (VertexColor[1] << 12) + 0xFFF; vertextrans->Color[2] = (VertexColor[2] << 12) + 0xFFF; @@ -1173,9 +1316,12 @@ void SubmitVertex() } break; } + + VertexPipeline = 7; + AddCycles(3); } -s32 CalculateLighting() +void CalculateLighting() { if ((TexParam >> 30) == 2) { @@ -1244,8 +1390,9 @@ s32 CalculateLighting() c++; } - // checkme: cycle count - return c; + if (c < 1) c = 1; + NormalPipeline = 7; + AddCycles(c); } @@ -1255,6 +1402,7 @@ void BoxTest(u32* params) Vertex face[10]; int res; + AddCycles(254); GXStat &= ~(1<<1); s16 x0 = (s16)(params[0] & 0xFFFF); @@ -1354,6 +1502,8 @@ void PosTest() PosTestResult[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; PosTestResult[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; PosTestResult[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; + + AddCycles(5); } void VecTest(u32* params) @@ -1373,6 +1523,8 @@ void VecTest(u32* params) if (VecTestResult[0] & 0x1000) VecTestResult[0] |= 0xF000; if (VecTestResult[1] & 0x1000) VecTestResult[1] |= 0xF000; if (VecTestResult[2] & 0x1000) VecTestResult[2] |= 0xF000; + + AddCycles(4); } @@ -1387,17 +1539,13 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) { if (CmdFIFO->IsFull()) { - //printf("!!! GX FIFO FULL\n"); - //return; + // store it to the stall queue. stall the system. + // worst case is if a STMxx opcode causes this, which is why our stall queue + // has 64 entries. this is less complicated than trying to make STMxx stall-able. - // temp. hack - // SM64DS seems to overflow the FIFO occasionally - // either leftover bugs in our implementation, or the game accidentally doing that - // TODO: investigate. - // TODO: implement this behavior properly (freezes the bus until the FIFO isn't full anymore) - - while (CmdFIFO->IsFull()) - ExecuteCommand(); + CmdStallQueue->Write(entry); + NDS::GXFIFOStall(); + return; } CmdFIFO->Write(entry); @@ -1426,6 +1574,21 @@ CmdFIFOEntry CmdFIFORead() if (!CmdFIFO->IsEmpty()) CmdPIPE->Write(CmdFIFO->Read()); + // empty stall queue if needed + // CmdFIFO should not be full at this point. + if (!CmdStallQueue->IsEmpty()) + { + while (!CmdStallQueue->IsEmpty()) + { + if (CmdFIFO->IsFull()) break; + CmdFIFOEntry entry = CmdStallQueue->Read(); + CmdFIFOWrite(entry); + } + + if (CmdStallQueue->IsEmpty()) + NDS::GXFIFOUnstall(); + } + CheckFIFODMA(); CheckFIFOIRQ(); } @@ -1441,6 +1604,63 @@ void ExecuteCommand() //printf("FIFO: processing %02X %08X. Levels: FIFO=%d, PIPE=%d\n", entry.Command, entry.Param, CmdFIFO->Level(), CmdPIPE->Level()); + // each FIFO entry takes 1 cycle to be processed + // commands (presumably) run when all the needed parameters have been read + // which is where we add the remaining cycles if any + if (ExecParamCount == 0) + { + // delay the first command entry as needed + switch (entry.Command) + { + // commands that stall the polygon pipeline + case 0x32: StallPolygonPipeline(8 + 1, 2); break; // 32 can run 6 cycles after a vertex + case 0x40: StallPolygonPipeline(1, 0); break; + case 0x70: StallPolygonPipeline(10 + 1, 0); break; + + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + // vertex + if (!(VertexSlotsFree & 0x1)) NextVertexSlot(); + else AddCycles(1); + NormalPipeline = 0; + break; + + case 0x20: + case 0x30: + case 0x31: + case 0x72: + // commands that can run 6 cycles after a vertex + if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1); + else AddCycles(NormalPipeline + 1); + break; + + case 0x29: + case 0x2A: + case 0x2B: + case 0x33: + case 0x34: + case 0x41: + case 0x60: + case 0x71: + // command that can run 8 cycles after a vertex + if (VertexPipeline > 0) AddCycles(VertexPipeline + 1); + else AddCycles(NormalPipeline + 1); + break; + + default: + // all other commands can run 4 cycles after a vertex + // no need to do much here since that is the minimum + AddCycles(NormalPipeline + 1); + break; + } + } + else + AddCycles(1); + ExecParams[ExecParamCount] = entry.Param; ExecParamCount++; @@ -1449,11 +1669,8 @@ void ExecuteCommand() /*printf("0x%02X, ", entry.Command); for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]); printf("\n");*/ - CycleCount += CmdNumCycles[entry.Command]; - ExecParamCount = 0; - if (CycleCount > 0) - GXStat |= (1<<27); + ExecParamCount = 0; switch (entry.Command) { @@ -1465,86 +1682,65 @@ void ExecuteCommand() NumPushPopCommands--; if (MatrixMode == 0) { - if (ProjMatrixStackPointer > 0) - { - printf("!! PROJ MATRIX STACK OVERFLOW\n"); - GXStat |= (1<<15); - break; - } + if (ProjMatrixStackPointer > 0) GXStat |= (1<<15); memcpy(ProjMatrixStack, ProjMatrix, 16*4); ProjMatrixStackPointer++; + ProjMatrixStackPointer &= 0x1; } else if (MatrixMode == 3) { - if (TexMatrixStackPointer > 0) - { - printf("!! TEX MATRIX STACK OVERFLOW\n"); - GXStat |= (1<<15); - break; - } + if (TexMatrixStackPointer > 0) GXStat |= (1<<15); memcpy(TexMatrixStack, TexMatrix, 16*4); TexMatrixStackPointer++; + TexMatrixStackPointer &= 0x1; } else { - if (PosMatrixStackPointer > 30) - { - printf("!! POS MATRIX STACK OVERFLOW\n"); - GXStat |= (1<<15); - break; - } + if (PosMatrixStackPointer > 30) GXStat |= (1<<15); - memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4); - memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4); + memcpy(PosMatrixStack[PosMatrixStackPointer & 0x1F], PosMatrix, 16*4); + memcpy(VecMatrixStack[PosMatrixStackPointer & 0x1F], VecMatrix, 16*4); PosMatrixStackPointer++; + PosMatrixStackPointer &= 0x3F; } + AddCycles(16); break; case 0x12: // pop matrix NumPushPopCommands--; if (MatrixMode == 0) { - if (ProjMatrixStackPointer <= 0) - { - printf("!! PROJ MATRIX STACK UNDERFLOW\n"); - GXStat |= (1<<15); - break; - } + if (ProjMatrixStackPointer == 0) GXStat |= (1<<15); ProjMatrixStackPointer--; + ProjMatrixStackPointer &= 0x1; memcpy(ProjMatrix, ProjMatrixStack, 16*4); ClipMatrixDirty = true; + AddCycles(35); } else if (MatrixMode == 3) { - if (TexMatrixStackPointer <= 0) - { - printf("!! TEX MATRIX STACK UNDERFLOW\n"); - GXStat |= (1<<15); - break; - } + if (TexMatrixStackPointer == 0) GXStat |= (1<<15); TexMatrixStackPointer--; + TexMatrixStackPointer &= 0x1; memcpy(TexMatrix, TexMatrixStack, 16*4); + AddCycles(17); } else { s32 offset = (s32)(ExecParams[0] << 26) >> 26; PosMatrixStackPointer -= offset; + PosMatrixStackPointer &= 0x3F; - if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30) - { - //printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer); - PosMatrixStackPointer += offset; - GXStat |= (1<<15); - break; - } + if (PosMatrixStackPointer > 30) GXStat |= (1<<15); - memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); - memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4); + memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer & 0x1F], 16*4); + memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer & 0x1F], 16*4); ClipMatrixDirty = true; + AddCycles(35); } break; @@ -1560,16 +1756,12 @@ void ExecuteCommand() else { u32 addr = ExecParams[0] & 0x1F; - if (addr > 30) - { - printf("!! POS MATRIX STORE ADDR 31\n"); - GXStat |= (1<<15); - break; - } + if (addr > 30) GXStat |= (1<<15); memcpy(PosMatrixStack[addr], PosMatrix, 16*4); memcpy(VecMatrixStack[addr], VecMatrix, 16*4); } + AddCycles(16); break; case 0x14: // restore matrix @@ -1577,24 +1769,22 @@ void ExecuteCommand() { memcpy(ProjMatrix, ProjMatrixStack, 16*4); ClipMatrixDirty = true; + AddCycles(35); } else if (MatrixMode == 3) { memcpy(TexMatrix, TexMatrixStack, 16*4); + AddCycles(17); } else { u32 addr = ExecParams[0] & 0x1F; - if (addr > 30) - { - printf("!! POS MATRIX STORE ADDR 31\n"); - GXStat |= (1<<15); - break; - } + if (addr > 30) GXStat |= (1<<15); memcpy(PosMatrix, PosMatrixStack[addr], 16*4); memcpy(VecMatrix, VecMatrixStack[addr], 16*4); ClipMatrixDirty = true; + AddCycles(35); } break; @@ -1603,6 +1793,7 @@ void ExecuteCommand() { MatrixLoadIdentity(ProjMatrix); ClipMatrixDirty = true; + AddCycles(18); } else if (MatrixMode == 3) MatrixLoadIdentity(TexMatrix); @@ -1612,6 +1803,7 @@ void ExecuteCommand() if (MatrixMode == 2) MatrixLoadIdentity(VecMatrix); ClipMatrixDirty = true; + AddCycles(18); } break; @@ -1620,15 +1812,20 @@ void ExecuteCommand() { MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(18); } else if (MatrixMode == 3) + { MatrixLoad4x4(TexMatrix, (s32*)ExecParams); + AddCycles(10); + } else { MatrixLoad4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x4(VecMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(18); } break; @@ -1637,15 +1834,20 @@ void ExecuteCommand() { MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(18); } else if (MatrixMode == 3) + { MatrixLoad4x3(TexMatrix, (s32*)ExecParams); + AddCycles(7); + } else { MatrixLoad4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) MatrixLoad4x3(VecMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(18); } break; @@ -1654,17 +1856,22 @@ void ExecuteCommand() { MatrixMult4x4(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 16); } else if (MatrixMode == 3) + { MatrixMult4x4(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 16); + } else { MatrixMult4x4(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult4x4(VecMatrix, (s32*)ExecParams); - CycleCount += 30; + AddCycles(35 + 30 - 16); } + else AddCycles(35 - 16); ClipMatrixDirty = true; } break; @@ -1674,17 +1881,22 @@ void ExecuteCommand() { MatrixMult4x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 12); } else if (MatrixMode == 3) + { MatrixMult4x3(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 12); + } else { MatrixMult4x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult4x3(VecMatrix, (s32*)ExecParams); - CycleCount += 30; + AddCycles(35 + 30 - 12); } + else AddCycles(35 - 12); ClipMatrixDirty = true; } break; @@ -1694,17 +1906,22 @@ void ExecuteCommand() { MatrixMult3x3(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 9); } else if (MatrixMode == 3) + { MatrixMult3x3(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 9); + } else { MatrixMult3x3(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) { MatrixMult3x3(VecMatrix, (s32*)ExecParams); - CycleCount += 30; + AddCycles(35 + 30 - 9); } + else AddCycles(35 - 9); ClipMatrixDirty = true; } break; @@ -1714,13 +1931,18 @@ void ExecuteCommand() { MatrixScale(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 3); } else if (MatrixMode == 3) + { MatrixScale(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 3); + } else { MatrixScale(PosMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 3); } break; @@ -1729,14 +1951,22 @@ void ExecuteCommand() { MatrixTranslate(ProjMatrix, (s32*)ExecParams); ClipMatrixDirty = true; + AddCycles(35 - 3); } else if (MatrixMode == 3) + { MatrixTranslate(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 3); + } else { MatrixTranslate(PosMatrix, (s32*)ExecParams); if (MatrixMode == 2) + { MatrixTranslate(VecMatrix, (s32*)ExecParams); + AddCycles(35 + 30 - 3); + } + else AddCycles(35 - 3); ClipMatrixDirty = true; } break; @@ -1757,7 +1987,7 @@ void ExecuteCommand() Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; - CycleCount += CalculateLighting(); + CalculateLighting(); break; case 0x22: // texcoord @@ -1839,6 +2069,7 @@ void ExecuteCommand() VertexColor[1] = MatDiffuse[1]; VertexColor[2] = MatDiffuse[2]; } + AddCycles(3); break; case 0x31: // specular/emission material @@ -1849,6 +2080,7 @@ void ExecuteCommand() MatEmission[1] = (ExecParams[0] >> 21) & 0x1F; MatEmission[2] = (ExecParams[0] >> 26) & 0x1F; UseShininessTable = (ExecParams[0] & 0x8000) != 0; + AddCycles(3); break; case 0x32: // light direction @@ -1862,6 +2094,7 @@ void ExecuteCommand() LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12; LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12; } + AddCycles(5); break; case 0x33: // light color @@ -1871,6 +2104,7 @@ void ExecuteCommand() LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F; LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F; } + AddCycles(1); break; case 0x34: // shininess table @@ -1887,6 +2121,8 @@ void ExecuteCommand() break; case 0x40: // begin polygons + // TODO: check if there was a polygon being defined but incomplete + // such cases seem to freeze the GPU PolygonMode = ExecParams[0] & 0x3; VertexNum = 0; VertexNumInPoly = 0; @@ -1895,10 +2131,24 @@ void ExecuteCommand() CurPolygonAttr = PolygonAttr; break; + case 0x41: // end polygons + // TODO: research this? + // it doesn't seem to have any effect whatsoever, but + // its timing characteristics are different from those of other + // no-op commands + break; + case 0x50: // flush FlushRequest = 1; FlushAttributes = ExecParams[0] & 0x3; CycleCount = 392; + // probably safe to just reset all pipelines + // but needs checked + VertexPipeline = 0; + NormalPipeline = 0; + PolygonPipeline = 0; + VertexSlotCounter = 0; + VertexSlotsFree = 1; break; case 0x60: // viewport x1,y1,x2,y2 @@ -1930,18 +2180,41 @@ void ExecuteCommand() break; default: - //if (entry.Command != 0x41) - //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param); + //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param); break; } } + + if (CycleCount > 0 || !CmdPIPE->IsEmpty() || + VertexPipeline || NormalPipeline || PolygonPipeline) + GXStat |= (1<<27); +} + +s32 CyclesToRunFor() +{ + if (CycleCount < 0) return 0; + return CycleCount; +} + +void FinishWork(s32 cycles) +{ + AddCycles(cycles); + if (NormalPipeline) + NormalPipeline -= std::min(NormalPipeline, cycles); + + CycleCount = 0; + + if (VertexPipeline || NormalPipeline || PolygonPipeline) + return; + + GXStat &= ~(1<<27); } void Run(s32 cycles) { if (FlushRequest) return; - if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + if (CmdPIPE->IsEmpty() && !(GXStat & (1<<27))) return; CycleCount -= cycles; @@ -1959,8 +2232,8 @@ void Run(s32 cycles) if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { - CycleCount = 0; - GXStat &= ~(1<<27); + if (GXStat & (1<<27)) FinishWork(-CycleCount); + else CycleCount = 0; if (NumPushPopCommands == 0) GXStat &= ~(1<<14); if (NumTestCommands == 0) GXStat &= ~(1<<0); @@ -2254,7 +2527,7 @@ void Write8(u32 addr, u8 val) GXStat &= ~0x8000; ProjMatrixStackPointer = 0; //PosMatrixStackPointer = 0; - TexMatrixStackPointer = 0; + TexMatrixStackPointer = 0; // CHECKME } return; case 0x04000603: @@ -2319,7 +2592,7 @@ void Write16(u32 addr, u16 val) GXStat &= ~0x8000; ProjMatrixStackPointer = 0; //PosMatrixStackPointer = 0; - TexMatrixStackPointer = 0; + TexMatrixStackPointer = 0; // CHECKME } return; case 0x04000602: @@ -2389,7 +2662,7 @@ void Write32(u32 addr, u32 val) GXStat &= ~0x8000; ProjMatrixStackPointer = 0; //PosMatrixStackPointer = 0; - TexMatrixStackPointer = 0; + TexMatrixStackPointer = 0; // CHECKME } val &= 0xC0000000; GXStat &= 0x3FFFFFFF; |