diff options
Diffstat (limited to 'src/GPU3D.cpp')
-rw-r--r-- | src/GPU3D.cpp | 758 |
1 files changed, 394 insertions, 364 deletions
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 7b30426..53fafb3 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -99,7 +99,7 @@ namespace GPU3D { -const u32 CmdNumParams[256] = +const u8 CmdNumParams[256] = { // 0x00 0, @@ -147,10 +147,10 @@ typedef union } CmdFIFOEntry; -FIFO<CmdFIFOEntry>* CmdFIFO; -FIFO<CmdFIFOEntry>* CmdPIPE; +FIFO<CmdFIFOEntry, 256> CmdFIFO; +FIFO<CmdFIFOEntry, 4> CmdPIPE; -FIFO<CmdFIFOEntry>* CmdStallQueue; +FIFO<CmdFIFOEntry, 64> CmdStallQueue; u32 NumCommands, CurCommand, ParamCount, TotalParams; @@ -277,20 +277,11 @@ u32 FlushAttributes; bool Init() { - CmdFIFO = new FIFO<CmdFIFOEntry>(256); - CmdPIPE = new FIFO<CmdFIFOEntry>(4); - - CmdStallQueue = new FIFO<CmdFIFOEntry>(64); - return true; } void DeInit() { - delete CmdFIFO; - delete CmdPIPE; - - delete CmdStallQueue; } void ResetRenderingState() @@ -314,10 +305,10 @@ void ResetRenderingState() void Reset() { - CmdFIFO->Clear(); - CmdPIPE->Clear(); + CmdFIFO.Clear(); + CmdPIPE.Clear(); - CmdStallQueue->Clear(); + CmdStallQueue.Clear(); NumCommands = 0; CurCommand = 0; @@ -395,8 +386,8 @@ void DoSavestate(Savestate* file) { file->Section("GP3D"); - CmdFIFO->DoSavestate(file); - CmdPIPE->DoSavestate(file); + CmdFIFO.DoSavestate(file); + CmdPIPE.DoSavestate(file); file->Var32(&NumCommands); file->Var32(&CurCommand); @@ -593,7 +584,9 @@ void DoSavestate(Savestate* file) } } - CmdStallQueue->DoSavestate(file); + // probably not worth storing the vblank-latched Renderxxxxxx variables + CmdStallQueue.DoSavestate(file); + file->Var32((u32*)&VertexPipeline); file->Var32((u32*)&NormalPipeline); file->Var32((u32*)&PolygonPipeline); @@ -1731,24 +1724,24 @@ void VecTest(u32* params) void CmdFIFOWrite(CmdFIFOEntry& entry) { - if (CmdFIFO->IsEmpty() && !CmdPIPE->IsFull()) + if (CmdFIFO.IsEmpty() && !CmdPIPE.IsFull()) { - CmdPIPE->Write(entry); + CmdPIPE.Write(entry); } else { - if (CmdFIFO->IsFull()) + if (CmdFIFO.IsFull()) { // store it to the stall queue. stall the system. // worst case is if a STMxx opcode causes this, which is why our stall queue // has 64 entries. this is less complicated than trying to make STMxx stall-able. - CmdStallQueue->Write(entry); + CmdStallQueue.Write(entry); NDS::GXFIFOStall(); return; } - CmdFIFO->Write(entry); + CmdFIFO.Write(entry); } GXStat |= (1<<27); @@ -1767,27 +1760,27 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) CmdFIFOEntry CmdFIFORead() { - CmdFIFOEntry ret = CmdPIPE->Read(); + CmdFIFOEntry ret = CmdPIPE.Read(); - if (CmdPIPE->Level() <= 2) + if (CmdPIPE.Level() <= 2) { - if (!CmdFIFO->IsEmpty()) - CmdPIPE->Write(CmdFIFO->Read()); - if (!CmdFIFO->IsEmpty()) - CmdPIPE->Write(CmdFIFO->Read()); + if (!CmdFIFO.IsEmpty()) + CmdPIPE.Write(CmdFIFO.Read()); + if (!CmdFIFO.IsEmpty()) + CmdPIPE.Write(CmdFIFO.Read()); // empty stall queue if needed // CmdFIFO should not be full at this point. - if (!CmdStallQueue->IsEmpty()) + if (!CmdStallQueue.IsEmpty()) { - while (!CmdStallQueue->IsEmpty()) + while (!CmdStallQueue.IsEmpty()) { - if (CmdFIFO->IsFull()) break; - CmdFIFOEntry entry = CmdStallQueue->Read(); + if (CmdFIFO.IsFull()) break; + CmdFIFOEntry entry = CmdStallQueue.Read(); CmdFIFOWrite(entry); } - if (CmdStallQueue->IsEmpty()) + if (CmdStallQueue.IsEmpty()) NDS::GXFIFOUnstall(); } @@ -1798,7 +1791,37 @@ CmdFIFOEntry CmdFIFORead() return ret; } +inline void VertexPipelineSubmitCmd() +{ + // vertex commands 0x24, 0x25, 0x26, 0x27, 0x28 + if (!(VertexSlotsFree & 0x1)) NextVertexSlot(); + else AddCycles(1); + NormalPipeline = 0; +} + +inline void VertexPipelineCmdDelayed6() +{ + // commands 0x20, 0x30, 0x31, 0x72 that can run 6 cycles after a vertex + if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1); + else AddCycles(NormalPipeline + 1); + NormalPipeline = 0; +} +inline void VertexPipelineCmdDelayed8() +{ + // commands 0x29, 0x2A, 0x2B, 0x33, 0x34, 0x41, 0x60, 0x71 that can run 8 cycles after a vertex + if (VertexPipeline > 0) AddCycles(VertexPipeline + 1); + else AddCycles(NormalPipeline + 1); + NormalPipeline = 0; +} + +inline void VertexPipelineCmdDelayed4() +{ + // all other commands can run 4 cycles after a vertex + // no need to do much here since that is the minimum + AddCycles(NormalPipeline + 1); + NormalPipeline = 0; +} void ExecuteCommand() { @@ -1809,81 +1832,23 @@ void ExecuteCommand() // each FIFO entry takes 1 cycle to be processed // commands (presumably) run when all the needed parameters have been read // which is where we add the remaining cycles if any - if (ExecParamCount == 0) - { - // delay the first command entry as needed - switch (entry.Command) - { - // commands that stall the polygon pipeline - case 0x32: StallPolygonPipeline(8 + 1, 2); break; // 32 can run 6 cycles after a vertex - case 0x40: StallPolygonPipeline(1, 0); break; - case 0x70: StallPolygonPipeline(10 + 1, 0); break; - - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - // vertex - if (!(VertexSlotsFree & 0x1)) NextVertexSlot(); - else AddCycles(1); - NormalPipeline = 0; - break; - - case 0x20: - case 0x30: - case 0x31: - case 0x72: - // commands that can run 6 cycles after a vertex - if (VertexPipeline > 2) AddCycles((VertexPipeline - 2) + 1); - else AddCycles(NormalPipeline + 1); - NormalPipeline = 0; - break; - - case 0x29: - case 0x2A: - case 0x2B: - case 0x33: - case 0x34: - case 0x41: - case 0x60: - case 0x71: - // command that can run 8 cycles after a vertex - if (VertexPipeline > 0) AddCycles(VertexPipeline + 1); - else AddCycles(NormalPipeline + 1); - NormalPipeline = 0; - break; - - default: - // all other commands can run 4 cycles after a vertex - // no need to do much here since that is the minimum - AddCycles(NormalPipeline + 1); - NormalPipeline = 0; - break; - } - } - else - AddCycles(1); - ExecParams[ExecParamCount] = entry.Param; - ExecParamCount++; - - if (ExecParamCount >= CmdNumParams[entry.Command]) + u32 paramsRequiredCount = CmdNumParams[entry.Command]; + if (paramsRequiredCount <= 1) { - /*printf("[GXS:%08X] 0x%02X, ", GXStat, entry.Command); - for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]); - printf("\n");*/ + // fast path for command which only have a single parameter - ExecParamCount = 0; + /*printf("[GXS:%08X] 0x%02X, 0x%08X", GXStat, entry.Command, entry.Param);*/ switch (entry.Command) { case 0x10: // matrix mode - MatrixMode = ExecParams[0] & 0x3; + VertexPipelineCmdDelayed4(); + MatrixMode = entry.Param & 0x3; break; case 0x11: // push matrix + VertexPipelineCmdDelayed4(); NumPushPopCommands--; if (MatrixMode == 0) { @@ -1914,6 +1879,7 @@ void ExecuteCommand() break; case 0x12: // pop matrix + VertexPipelineCmdDelayed4(); NumPushPopCommands--; if (MatrixMode == 0) { @@ -1936,7 +1902,7 @@ void ExecuteCommand() } else { - s32 offset = (s32)(ExecParams[0] << 26) >> 26; + s32 offset = (s32)(entry.Param << 26) >> 26; PosMatrixStackPointer -= offset; PosMatrixStackPointer &= 0x3F; @@ -1950,6 +1916,7 @@ void ExecuteCommand() break; case 0x13: // store matrix + VertexPipelineCmdDelayed4(); if (MatrixMode == 0) { memcpy(ProjMatrixStack, ProjMatrix, 16*4); @@ -1960,7 +1927,7 @@ void ExecuteCommand() } else { - u32 addr = ExecParams[0] & 0x1F; + u32 addr = entry.Param & 0x1F; if (addr > 30) GXStat |= (1<<15); memcpy(PosMatrixStack[addr], PosMatrix, 16*4); @@ -1970,6 +1937,7 @@ void ExecuteCommand() break; case 0x14: // restore matrix + VertexPipelineCmdDelayed4(); if (MatrixMode == 0) { memcpy(ProjMatrix, ProjMatrixStack, 16*4); @@ -1983,7 +1951,7 @@ void ExecuteCommand() } else { - u32 addr = ExecParams[0] & 0x1F; + u32 addr = entry.Param & 0x1F; if (addr > 30) GXStat |= (1<<15); memcpy(PosMatrix, PosMatrixStack[addr], 16*4); @@ -1994,6 +1962,7 @@ void ExecuteCommand() break; case 0x15: // identity + VertexPipelineCmdDelayed4(); if (MatrixMode == 0) { MatrixLoadIdentity(ProjMatrix); @@ -2012,173 +1981,10 @@ void ExecuteCommand() } break; - case 0x16: // load 4x4 - if (MatrixMode == 0) - { - MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(18); - } - else if (MatrixMode == 3) - { - MatrixLoad4x4(TexMatrix, (s32*)ExecParams); - AddCycles(10); - } - else - { - MatrixLoad4x4(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - MatrixLoad4x4(VecMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(18); - } - break; - - case 0x17: // load 4x3 - if (MatrixMode == 0) - { - MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(18); - } - else if (MatrixMode == 3) - { - MatrixLoad4x3(TexMatrix, (s32*)ExecParams); - AddCycles(7); - } - else - { - MatrixLoad4x3(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - MatrixLoad4x3(VecMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(18); - } - break; - - case 0x18: // mult 4x4 - if (MatrixMode == 0) - { - MatrixMult4x4(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 16); - } - else if (MatrixMode == 3) - { - MatrixMult4x4(TexMatrix, (s32*)ExecParams); - AddCycles(33 - 16); - } - else - { - MatrixMult4x4(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - { - MatrixMult4x4(VecMatrix, (s32*)ExecParams); - AddCycles(35 + 30 - 16); - } - else AddCycles(35 - 16); - ClipMatrixDirty = true; - } - break; - - case 0x19: // mult 4x3 - if (MatrixMode == 0) - { - MatrixMult4x3(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 12); - } - else if (MatrixMode == 3) - { - MatrixMult4x3(TexMatrix, (s32*)ExecParams); - AddCycles(33 - 12); - } - else - { - MatrixMult4x3(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - { - MatrixMult4x3(VecMatrix, (s32*)ExecParams); - AddCycles(35 + 30 - 12); - } - else AddCycles(35 - 12); - ClipMatrixDirty = true; - } - break; - - case 0x1A: // mult 3x3 - if (MatrixMode == 0) - { - MatrixMult3x3(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 9); - } - else if (MatrixMode == 3) - { - MatrixMult3x3(TexMatrix, (s32*)ExecParams); - AddCycles(33 - 9); - } - else - { - MatrixMult3x3(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - { - MatrixMult3x3(VecMatrix, (s32*)ExecParams); - AddCycles(35 + 30 - 9); - } - else AddCycles(35 - 9); - ClipMatrixDirty = true; - } - break; - - case 0x1B: // scale - if (MatrixMode == 0) - { - MatrixScale(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 3); - } - else if (MatrixMode == 3) - { - MatrixScale(TexMatrix, (s32*)ExecParams); - AddCycles(33 - 3); - } - else - { - MatrixScale(PosMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 3); - } - break; - - case 0x1C: // translate - if (MatrixMode == 0) - { - MatrixTranslate(ProjMatrix, (s32*)ExecParams); - ClipMatrixDirty = true; - AddCycles(35 - 3); - } - else if (MatrixMode == 3) - { - MatrixTranslate(TexMatrix, (s32*)ExecParams); - AddCycles(33 - 3); - } - else - { - MatrixTranslate(PosMatrix, (s32*)ExecParams); - if (MatrixMode == 2) - { - MatrixTranslate(VecMatrix, (s32*)ExecParams); - AddCycles(35 + 30 - 3); - } - else AddCycles(35 - 3); - ClipMatrixDirty = true; - } - break; - case 0x20: // vertex color + VertexPipelineCmdDelayed6(); { - u32 c = ExecParams[0]; + u32 c = entry.Param; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -2189,15 +1995,17 @@ void ExecuteCommand() break; case 0x21: // normal - Normal[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; - Normal[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; - Normal[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + VertexPipelineCmdDelayed4(); + Normal[0] = (s16)((entry.Param & 0x000003FF) << 6) >> 6; + Normal[1] = (s16)((entry.Param & 0x000FFC00) >> 4) >> 6; + Normal[2] = (s16)((entry.Param & 0x3FF00000) >> 14) >> 6; CalculateLighting(); break; case 0x22: // texcoord - RawTexCoords[0] = ExecParams[0] & 0xFFFF; - RawTexCoords[1] = ExecParams[0] >> 16; + VertexPipelineCmdDelayed4(); + RawTexCoords[0] = entry.Param & 0xFFFF; + RawTexCoords[1] = entry.Param >> 16; if ((TexParam >> 30) == 1) { TexCoords[0] = (RawTexCoords[0]*TexMatrix[0] + RawTexCoords[1]*TexMatrix[4] + TexMatrix[8] + TexMatrix[12]) >> 12; @@ -2210,65 +2018,67 @@ void ExecuteCommand() } break; - case 0x23: // full vertex - CurVertex[0] = ExecParams[0] & 0xFFFF; - CurVertex[1] = ExecParams[0] >> 16; - CurVertex[2] = ExecParams[1] & 0xFFFF; - SubmitVertex(); - break; - case 0x24: // 10-bit vertex - CurVertex[0] = (ExecParams[0] & 0x000003FF) << 6; - CurVertex[1] = (ExecParams[0] & 0x000FFC00) >> 4; - CurVertex[2] = (ExecParams[0] & 0x3FF00000) >> 14; + VertexPipelineSubmitCmd(); + CurVertex[0] = (entry.Param & 0x000003FF) << 6; + CurVertex[1] = (entry.Param & 0x000FFC00) >> 4; + CurVertex[2] = (entry.Param & 0x3FF00000) >> 14; SubmitVertex(); break; case 0x25: // vertex XY - CurVertex[0] = ExecParams[0] & 0xFFFF; - CurVertex[1] = ExecParams[0] >> 16; + VertexPipelineSubmitCmd(); + CurVertex[0] = entry.Param & 0xFFFF; + CurVertex[1] = entry.Param >> 16; SubmitVertex(); break; case 0x26: // vertex XZ - CurVertex[0] = ExecParams[0] & 0xFFFF; - CurVertex[2] = ExecParams[0] >> 16; + VertexPipelineSubmitCmd(); + CurVertex[0] = entry.Param & 0xFFFF; + CurVertex[2] = entry.Param >> 16; SubmitVertex(); break; case 0x27: // vertex YZ - CurVertex[1] = ExecParams[0] & 0xFFFF; - CurVertex[2] = ExecParams[0] >> 16; + VertexPipelineSubmitCmd(); + CurVertex[1] = entry.Param & 0xFFFF; + CurVertex[2] = entry.Param >> 16; SubmitVertex(); break; case 0x28: // 10-bit delta vertex - CurVertex[0] += (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; - CurVertex[1] += (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; - CurVertex[2] += (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + VertexPipelineSubmitCmd(); + CurVertex[0] += (s16)((entry.Param & 0x000003FF) << 6) >> 6; + CurVertex[1] += (s16)((entry.Param & 0x000FFC00) >> 4) >> 6; + CurVertex[2] += (s16)((entry.Param & 0x3FF00000) >> 14) >> 6; SubmitVertex(); break; case 0x29: // polygon attributes - PolygonAttr = ExecParams[0]; + VertexPipelineCmdDelayed8(); + PolygonAttr = entry.Param; break; case 0x2A: // texture param - TexParam = ExecParams[0]; + VertexPipelineCmdDelayed8(); + TexParam = entry.Param; break; case 0x2B: // texture palette - TexPalette = ExecParams[0] & 0x1FFF; + VertexPipelineCmdDelayed8(); + TexPalette = entry.Param & 0x1FFF; break; case 0x30: // diffuse/ambient material - MatDiffuse[0] = ExecParams[0] & 0x1F; - MatDiffuse[1] = (ExecParams[0] >> 5) & 0x1F; - MatDiffuse[2] = (ExecParams[0] >> 10) & 0x1F; - MatAmbient[0] = (ExecParams[0] >> 16) & 0x1F; - MatAmbient[1] = (ExecParams[0] >> 21) & 0x1F; - MatAmbient[2] = (ExecParams[0] >> 26) & 0x1F; - if (ExecParams[0] & 0x8000) + VertexPipelineCmdDelayed6(); + MatDiffuse[0] = entry.Param & 0x1F; + MatDiffuse[1] = (entry.Param >> 5) & 0x1F; + MatDiffuse[2] = (entry.Param >> 10) & 0x1F; + MatAmbient[0] = (entry.Param >> 16) & 0x1F; + MatAmbient[1] = (entry.Param >> 21) & 0x1F; + MatAmbient[2] = (entry.Param >> 26) & 0x1F; + if (entry.Param & 0x8000) { VertexColor[0] = MatDiffuse[0]; VertexColor[1] = MatDiffuse[1]; @@ -2278,23 +2088,25 @@ void ExecuteCommand() break; case 0x31: // specular/emission material - MatSpecular[0] = ExecParams[0] & 0x1F; - MatSpecular[1] = (ExecParams[0] >> 5) & 0x1F; - MatSpecular[2] = (ExecParams[0] >> 10) & 0x1F; - MatEmission[0] = (ExecParams[0] >> 16) & 0x1F; - MatEmission[1] = (ExecParams[0] >> 21) & 0x1F; - MatEmission[2] = (ExecParams[0] >> 26) & 0x1F; - UseShininessTable = (ExecParams[0] & 0x8000) != 0; + VertexPipelineCmdDelayed6(); + MatSpecular[0] = entry.Param & 0x1F; + MatSpecular[1] = (entry.Param >> 5) & 0x1F; + MatSpecular[2] = (entry.Param >> 10) & 0x1F; + MatEmission[0] = (entry.Param >> 16) & 0x1F; + MatEmission[1] = (entry.Param >> 21) & 0x1F; + MatEmission[2] = (entry.Param >> 26) & 0x1F; + UseShininessTable = (entry.Param & 0x8000) != 0; AddCycles(3); break; case 0x32: // light direction + StallPolygonPipeline(8 + 1, 2); // 0x32 can run 6 cycles after a vertex { - u32 l = ExecParams[0] >> 30; + u32 l = entry.Param >> 30; s16 dir[3]; - dir[0] = (s16)((ExecParams[0] & 0x000003FF) << 6) >> 6; - dir[1] = (s16)((ExecParams[0] & 0x000FFC00) >> 4) >> 6; - dir[2] = (s16)((ExecParams[0] & 0x3FF00000) >> 14) >> 6; + dir[0] = (s16)((entry.Param & 0x000003FF) << 6) >> 6; + dir[1] = (s16)((entry.Param & 0x000FFC00) >> 4) >> 6; + dir[2] = (s16)((entry.Param & 0x3FF00000) >> 14) >> 6; LightDirection[l][0] = (dir[0]*VecMatrix[0] + dir[1]*VecMatrix[4] + dir[2]*VecMatrix[8]) >> 12; LightDirection[l][1] = (dir[0]*VecMatrix[1] + dir[1]*VecMatrix[5] + dir[2]*VecMatrix[9]) >> 12; LightDirection[l][2] = (dir[0]*VecMatrix[2] + dir[1]*VecMatrix[6] + dir[2]*VecMatrix[10]) >> 12; @@ -2303,32 +2115,21 @@ void ExecuteCommand() break; case 0x33: // light color + VertexPipelineCmdDelayed8(); { - u32 l = ExecParams[0] >> 30; - LightColor[l][0] = ExecParams[0] & 0x1F; - LightColor[l][1] = (ExecParams[0] >> 5) & 0x1F; - LightColor[l][2] = (ExecParams[0] >> 10) & 0x1F; + u32 l = entry.Param >> 30; + LightColor[l][0] = entry.Param & 0x1F; + LightColor[l][1] = (entry.Param >> 5) & 0x1F; + LightColor[l][2] = (entry.Param >> 10) & 0x1F; } AddCycles(1); break; - case 0x34: // shininess table - { - for (int i = 0; i < 128; i += 4) - { - u32 val = ExecParams[i >> 2]; - ShininessTable[i + 0] = val & 0xFF; - ShininessTable[i + 1] = (val >> 8) & 0xFF; - ShininessTable[i + 2] = (val >> 16) & 0xFF; - ShininessTable[i + 3] = val >> 24; - } - } - break; - case 0x40: // begin polygons + StallPolygonPipeline(1, 0); // TODO: check if there was a polygon being defined but incomplete // such cases seem to freeze the GPU - PolygonMode = ExecParams[0] & 0x3; + PolygonMode = entry.Param & 0x3; VertexNum = 0; VertexNumInPoly = 0; NumConsecutivePolygons = 0; @@ -2337,6 +2138,7 @@ void ExecuteCommand() break; case 0x41: // end polygons + VertexPipelineCmdDelayed8(); // TODO: research this? // it doesn't seem to have any effect whatsoever, but // its timing characteristics are different from those of other @@ -2344,8 +2146,9 @@ void ExecuteCommand() break; case 0x50: // flush + VertexPipelineCmdDelayed4(); FlushRequest = 1; - FlushAttributes = ExecParams[0] & 0x3; + FlushAttributes = entry.Param & 0x3; CycleCount = 325; // probably safe to just reset all pipelines // but needs checked @@ -2355,38 +2158,265 @@ void ExecuteCommand() VertexSlotCounter = 0; VertexSlotsFree = 1; break; - + case 0x60: // viewport x1,y1,x2,y2 + VertexPipelineCmdDelayed8(); // note: viewport Y coordinates are upside-down - Viewport[0] = ExecParams[0] & 0xFF; // x0 - Viewport[1] = (191 - ((ExecParams[0] >> 8) & 0xFF)) & 0xFF; // y0 - Viewport[2] = (ExecParams[0] >> 16) & 0xFF; // x1 - Viewport[3] = (191 - (ExecParams[0] >> 24)) & 0xFF; // y1 + Viewport[0] = entry.Param & 0xFF; // x0 + Viewport[1] = (191 - ((entry.Param >> 8) & 0xFF)) & 0xFF; // y0 + Viewport[2] = (entry.Param >> 16) & 0xFF; // x1 + Viewport[3] = (191 - (entry.Param >> 24)) & 0xFF; // y1 Viewport[4] = (Viewport[2] - Viewport[0] + 1) & 0x1FF; // width Viewport[5] = (Viewport[1] - Viewport[3] + 1) & 0xFF; // height break; - case 0x70: // box test - NumTestCommands -= 3; - BoxTest(ExecParams); + default: + VertexPipelineCmdDelayed4(); + //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param); break; + } + } + else + { + ExecParams[ExecParamCount] = entry.Param; + ExecParamCount++; - case 0x71: // pos test - NumTestCommands -= 2; - CurVertex[0] = ExecParams[0] & 0xFFFF; - CurVertex[1] = ExecParams[0] >> 16; - CurVertex[2] = ExecParams[1] & 0xFFFF; - PosTest(); - break; + if (ExecParamCount == 1) + { + // delay the first command entry as needed + switch (entry.Command) + { + // commands that stall the polygon pipeline + case 0x23: VertexPipelineSubmitCmd(); break; + case 0x34: + case 0x71: + VertexPipelineCmdDelayed8(); + break; + case 0x70: StallPolygonPipeline(10 + 1, 0); break; + case 0x72: VertexPipelineCmdDelayed6(); break; + default: VertexPipelineCmdDelayed4(); break; + } + } + else + { + AddCycles(1); - case 0x72: // vec test - NumTestCommands--; - VecTest(ExecParams); - break; + if (ExecParamCount >= paramsRequiredCount) + { + /*printf("[GXS:%08X] 0x%02X, ", GXStat, entry.Command); + for (int k = 0; k < ExecParamCount; k++) printf("0x%08X, ", ExecParams[k]); + printf("\n");*/ - default: - //printf("!! UNKNOWN GX COMMAND %02X %08X\n", entry.Command, entry.Param); - break; + ExecParamCount = 0; + + switch (entry.Command) + { + case 0x16: // load 4x4 + if (MatrixMode == 0) + { + MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(18); + } + else if (MatrixMode == 3) + { + MatrixLoad4x4(TexMatrix, (s32*)ExecParams); + AddCycles(10); + } + else + { + MatrixLoad4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x4(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(18); + } + break; + + case 0x17: // load 4x3 + if (MatrixMode == 0) + { + MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(18); + } + else if (MatrixMode == 3) + { + MatrixLoad4x3(TexMatrix, (s32*)ExecParams); + AddCycles(7); + } + else + { + MatrixLoad4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x3(VecMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(18); + } + break; + + case 0x18: // mult 4x4 + if (MatrixMode == 0) + { + MatrixMult4x4(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 16); + } + else if (MatrixMode == 3) + { + MatrixMult4x4(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 16); + } + else + { + MatrixMult4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x4(VecMatrix, (s32*)ExecParams); + AddCycles(35 + 30 - 16); + } + else AddCycles(35 - 16); + ClipMatrixDirty = true; + } + break; + + case 0x19: // mult 4x3 + if (MatrixMode == 0) + { + MatrixMult4x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 12); + } + else if (MatrixMode == 3) + { + MatrixMult4x3(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 12); + } + else + { + MatrixMult4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x3(VecMatrix, (s32*)ExecParams); + AddCycles(35 + 30 - 12); + } + else AddCycles(35 - 12); + ClipMatrixDirty = true; + } + break; + + case 0x1A: // mult 3x3 + if (MatrixMode == 0) + { + MatrixMult3x3(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 9); + } + else if (MatrixMode == 3) + { + MatrixMult3x3(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 9); + } + else + { + MatrixMult3x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult3x3(VecMatrix, (s32*)ExecParams); + AddCycles(35 + 30 - 9); + } + else AddCycles(35 - 9); + ClipMatrixDirty = true; + } + break; + + case 0x1B: // scale + if (MatrixMode == 0) + { + MatrixScale(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 3); + } + else if (MatrixMode == 3) + { + MatrixScale(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 3); + } + else + { + MatrixScale(PosMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 3); + } + break; + + case 0x1C: // translate + if (MatrixMode == 0) + { + MatrixTranslate(ProjMatrix, (s32*)ExecParams); + ClipMatrixDirty = true; + AddCycles(35 - 3); + } + else if (MatrixMode == 3) + { + MatrixTranslate(TexMatrix, (s32*)ExecParams); + AddCycles(33 - 3); + } + else + { + MatrixTranslate(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixTranslate(VecMatrix, (s32*)ExecParams); + AddCycles(35 + 30 - 3); + } + else AddCycles(35 - 3); + ClipMatrixDirty = true; + } + break; + + case 0x23: // full vertex + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + SubmitVertex(); + break; + + case 0x34: // shininess table + { + for (int i = 0; i < 128; i += 4) + { + u32 val = ExecParams[i >> 2]; + ShininessTable[i + 0] = val & 0xFF; + ShininessTable[i + 1] = (val >> 8) & 0xFF; + ShininessTable[i + 2] = (val >> 16) & 0xFF; + ShininessTable[i + 3] = val >> 24; + } + } + break; + + case 0x71: // pos test + NumTestCommands -= 2; + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + PosTest(); + break; + + case 0x70: // box test + NumTestCommands -= 3; + BoxTest(ExecParams); + break; + + case 0x72: // vec test + NumTestCommands--; + VecTest(ExecParams); + break; + + default: + __builtin_unreachable(); + } + } } } } @@ -2414,7 +2444,7 @@ void FinishWork(s32 cycles) void Run() { if (!GeometryEnabled || FlushRequest || - (CmdPIPE->IsEmpty() && !(GXStat & (1<<27)))) + (CmdPIPE.IsEmpty() && !(GXStat & (1<<27)))) { Timestamp = NDS::ARM9Timestamp >> NDS::ARM9ClockShift; return; @@ -2426,7 +2456,7 @@ void Run() if (CycleCount <= 0) { - while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) + while (CycleCount <= 0 && !CmdPIPE.IsEmpty()) { if (NumPushPopCommands == 0) GXStat &= ~(1<<14); if (NumTestCommands == 0) GXStat &= ~(1<<0); @@ -2435,7 +2465,7 @@ void Run() } } - if (CycleCount <= 0 && CmdPIPE->IsEmpty()) + if (CycleCount <= 0 && CmdPIPE.IsEmpty()) { if (GXStat & (1<<27)) FinishWork(-CycleCount); else CycleCount = 0; @@ -2451,8 +2481,8 @@ void CheckFIFOIRQ() bool irq = false; switch (GXStat >> 30) { - case 1: irq = (CmdFIFO->Level() < 128); break; - case 2: irq = CmdFIFO->IsEmpty(); break; + case 1: irq = (CmdFIFO.Level() < 128); break; + case 2: irq = CmdFIFO.IsEmpty(); break; } if (irq) NDS::SetIRQ(0, NDS::IRQ_GXFIFO); @@ -2461,7 +2491,7 @@ void CheckFIFOIRQ() void CheckFIFODMA() { - if (CmdFIFO->Level() < 128) + if (CmdFIFO.Level() < 128) NDS::CheckDMAs(0, 0x07); } @@ -2668,7 +2698,7 @@ u8 Read8(u32 addr) { Run(); - u32 fifolevel = CmdFIFO->Level(); + u32 fifolevel = CmdFIFO.Level(); return fifolevel & 0xFF; } @@ -2676,7 +2706,7 @@ u8 Read8(u32 addr) { Run(); - u32 fifolevel = CmdFIFO->Level(); + u32 fifolevel = CmdFIFO.Level(); return ((GXStat >> 24) & 0xFF) | (fifolevel >> 8) | @@ -2711,7 +2741,7 @@ u16 Read16(u32 addr) { Run(); - u32 fifolevel = CmdFIFO->Level(); + u32 fifolevel = CmdFIFO.Level(); return (GXStat >> 16) | fifolevel | @@ -2747,7 +2777,7 @@ u32 Read32(u32 addr) { Run(); - u32 fifolevel = CmdFIFO->Level(); + u32 fifolevel = CmdFIFO.Level(); return GXStat | ((PosMatrixStackPointer & 0x1F) << 8) | |