diff options
Diffstat (limited to 'GPU3D.cpp')
-rw-r--r-- | GPU3D.cpp | 413 |
1 files changed, 406 insertions, 7 deletions
@@ -119,6 +119,142 @@ u32 ExecParamCount; s32 CycleCount; +u32 MatrixMode; + +s32 ProjMatrix[16]; +s32 PosMatrix[16]; +s32 VecMatrix[16]; +s32 TexMatrix[16]; + +s32 ProjMatrixStack[16]; +s32 PosMatrixStack[31][16]; +s32 ProjMatrixStackPointer; +s32 PosMatrixStackPointer; + + +void MatrixLoadIdentity(s32* m) +{ + m[0] = 0x1000; m[1] = 0; m[2] = 0; m[3] = 0; + m[4] = 0; m[5] = 0x1000; m[6] = 0; m[7] = 0; + m[8] = 0; m[9] = 0; m[10] = 0x1000; m[11] = 0; + m[12] = 0; m[13] = 0; m[14] = 0; m[15] = 0x1000; +} + +void MatrixLoad4x4(s32* m, s32* s) +{ + memcpy(m, s, 16*4); +} + +void MatrixLoad4x3(s32* m, s32* s) +{ + m[0] = s[0]; m[1] = s[1]; m[2] = s[2]; m[3] = 0; + m[4] = s[3]; m[5] = s[4]; m[6] = s[5]; m[7] = 0; + m[8] = s[6]; m[9] = s[7]; m[10] = s[8]; m[11] = 0; + m[12] = s[9]; m[13] = s[10]; m[14] = s[11]; m[15] = 0x1000; +} + +void MatrixMult4x4(s32* m, s32* s) +{ + s32 tmp[16]; + memcpy(tmp, m, 16*4); + + // m = s*m + m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8] + s[3]*tmp[12]) >> 12; + m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9] + s[3]*tmp[13]) >> 12; + m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10] + s[3]*tmp[14]) >> 12; + m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11] + s[3]*tmp[15]) >> 12; + + m[4] = (s[4]*tmp[0] + s[5]*tmp[4] + s[6]*tmp[8] + s[7]*tmp[12]) >> 12; + m[5] = (s[4]*tmp[1] + s[5]*tmp[5] + s[6]*tmp[9] + s[7]*tmp[13]) >> 12; + m[6] = (s[4]*tmp[2] + s[5]*tmp[6] + s[6]*tmp[10] + s[7]*tmp[14]) >> 12; + m[7] = (s[4]*tmp[3] + s[5]*tmp[7] + s[6]*tmp[11] + s[7]*tmp[15]) >> 12; + + m[8] = (s[8]*tmp[0] + s[9]*tmp[4] + s[10]*tmp[8] + s[11]*tmp[12]) >> 12; + m[9] = (s[8]*tmp[1] + s[9]*tmp[5] + s[10]*tmp[9] + s[11]*tmp[13]) >> 12; + m[10] = (s[8]*tmp[2] + s[9]*tmp[6] + s[10]*tmp[10] + s[11]*tmp[14]) >> 12; + m[11] = (s[8]*tmp[3] + s[9]*tmp[7] + s[10]*tmp[11] + s[11]*tmp[15]) >> 12; + + m[12] = (s[12]*tmp[0] + s[13]*tmp[4] + s[14]*tmp[8] + s[15]*tmp[12]) >> 12; + m[13] = (s[12]*tmp[1] + s[13]*tmp[5] + s[14]*tmp[9] + s[15]*tmp[13]) >> 12; + m[14] = (s[12]*tmp[2] + s[13]*tmp[6] + s[14]*tmp[10] + s[15]*tmp[14]) >> 12; + m[15] = (s[12]*tmp[3] + s[13]*tmp[7] + s[14]*tmp[11] + s[15]*tmp[15]) >> 12; +} + +void MatrixMult4x3(s32* m, s32* s) +{ + s32 tmp[16]; + memcpy(tmp, m, 16*4); + + // m = s*m + m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; + m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; + m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; + m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + + m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; + m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; + m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; + m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + + m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; + m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; + m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; + m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; + + m[12] = (s[9]*tmp[0] + s[10]*tmp[4] + s[11]*tmp[8] + 0x1000*tmp[12]) >> 12; + m[13] = (s[9]*tmp[1] + s[10]*tmp[5] + s[11]*tmp[9] + 0x1000*tmp[13]) >> 12; + m[14] = (s[9]*tmp[2] + s[10]*tmp[6] + s[11]*tmp[10] + 0x1000*tmp[14]) >> 12; + m[15] = (s[9]*tmp[3] + s[10]*tmp[7] + s[11]*tmp[11] + 0x1000*tmp[15]) >> 12; +} + +void MatrixMult3x3(s32* m, s32* s) +{ + s32 tmp[12]; + memcpy(tmp, m, 12*4); + + // m = s*m + m[0] = (s[0]*tmp[0] + s[1]*tmp[4] + s[2]*tmp[8]) >> 12; + m[1] = (s[0]*tmp[1] + s[1]*tmp[5] + s[2]*tmp[9]) >> 12; + m[2] = (s[0]*tmp[2] + s[1]*tmp[6] + s[2]*tmp[10]) >> 12; + m[3] = (s[0]*tmp[3] + s[1]*tmp[7] + s[2]*tmp[11]) >> 12; + + m[4] = (s[3]*tmp[0] + s[4]*tmp[4] + s[5]*tmp[8]) >> 12; + m[5] = (s[3]*tmp[1] + s[4]*tmp[5] + s[5]*tmp[9]) >> 12; + m[6] = (s[3]*tmp[2] + s[4]*tmp[6] + s[5]*tmp[10]) >> 12; + m[7] = (s[3]*tmp[3] + s[4]*tmp[7] + s[5]*tmp[11]) >> 12; + + m[8] = (s[6]*tmp[0] + s[7]*tmp[4] + s[8]*tmp[8]) >> 12; + m[9] = (s[6]*tmp[1] + s[7]*tmp[5] + s[8]*tmp[9]) >> 12; + m[10] = (s[6]*tmp[2] + s[7]*tmp[6] + s[8]*tmp[10]) >> 12; + m[11] = (s[6]*tmp[3] + s[7]*tmp[7] + s[8]*tmp[11]) >> 12; +} + +void MatrixScale(s32* m, s32* s) +{ + m[0] = (s[0]*m[0]) >> 12; + m[1] = (s[0]*m[1]) >> 12; + m[2] = (s[0]*m[2]) >> 12; + m[3] = (s[0]*m[3]) >> 12; + + m[4] = (s[1]*m[4]) >> 12; + m[5] = (s[1]*m[5]) >> 12; + m[6] = (s[1]*m[6]) >> 12; + m[7] = (s[1]*m[7]) >> 12; + + m[8] = (s[2]*m[8]) >> 12; + m[9] = (s[2]*m[9]) >> 12; + m[10] = (s[2]*m[10]) >> 12; + m[11] = (s[2]*m[11]) >> 12; +} + +void MatrixTranslate(s32* m, s32* s) +{ + m[12] += (s[0]*m[0] + s[1]*m[4] + s[2]*m[8]) >> 12; + m[13] += (s[0]*m[1] + s[1]*m[5] + s[2]*m[9]) >> 12; + m[14] += (s[0]*m[2] + s[1]*m[6] + s[2]*m[10]) >> 12; +} + + bool Init() { CmdFIFO = new FIFO<CmdFIFOEntry>(256); @@ -148,6 +284,19 @@ void Reset() memset(ExecParams, 0, 32*4); ExecParamCount = 0; CycleCount = 0; + + + MatrixMode = 0; + + MatrixLoadIdentity(ProjMatrix); + MatrixLoadIdentity(PosMatrix); + MatrixLoadIdentity(VecMatrix); + MatrixLoadIdentity(TexMatrix); + + memset(ProjMatrixStack, 0, 16*4); + memset(PosMatrixStack, 0, 31 * 16*4); + ProjMatrixStackPointer = 0; + PosMatrixStackPointer = 0; } @@ -179,9 +328,11 @@ CmdFIFOEntry CmdFIFORead() CmdPIPE->Write(CmdFIFO->Read()); if (!CmdFIFO->IsEmpty()) CmdPIPE->Write(CmdFIFO->Read()); + + CheckFIFODMA(); + CheckFIFOIRQ(); } - CheckFIFOIRQ(); return ret; } @@ -200,15 +351,242 @@ void ExecuteCommand() CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; - // TODO: actually execute the command, maybe + GXStat &= ~(1<<14); + //printf("3D CMD %02X\n", entry.Command); switch (entry.Command) { - case 0x18: - case 0x19: - case 0x1A: - // TODO: more cycles if MTX_MODE=2 + case 0x10: // matrix mode + MatrixMode = ExecParams[0] & 0x3; + break; + + case 0x11: // push matrix + if (MatrixMode == 0) + { + if (ProjMatrixStackPointer > 0) + { + printf("!! PROJ MATRIX STACK OVERFLOW\n"); + GXStat |= (1<<15); + break; + } + + memcpy(ProjMatrixStack, ProjMatrix, 16*4); + ProjMatrixStackPointer++; + GXStat |= (1<<14); + } + else if (MatrixMode == 3) + { + printf("!! CAN'T PUSH TEXTURE MATRIX\n"); + GXStat |= (1<<15); // CHECKME + } + else + { + if (PosMatrixStackPointer > 30) + { + printf("!! POS MATRIX STACK OVERFLOW\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4); + PosMatrixStackPointer++; + GXStat |= (1<<14); + } + break; + + case 0x12: // pop matrix + if (MatrixMode == 0) + { + if (ProjMatrixStackPointer <= 0) + { + printf("!! PROJ MATRIX STACK UNDERFLOW\n"); + GXStat |= (1<<15); + break; + } + + ProjMatrixStackPointer--; + memcpy(ProjMatrix, ProjMatrixStack, 16*4); + GXStat |= (1<<14); + } + else if (MatrixMode == 3) + { + printf("!! CAN'T POP TEXTURE MATRIX\n"); + GXStat |= (1<<15); // CHECKME + } + else + { + s32 offset = (s32)(ExecParams[0] << 26) >> 26; + PosMatrixStackPointer -= offset; + + if (PosMatrixStackPointer < 0 || PosMatrixStackPointer > 30) + { + printf("!! POS MATRIX STACK UNDER/OVERFLOW %d\n", PosMatrixStackPointer); + PosMatrixStackPointer += offset; + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); + GXStat |= (1<<14); + } + break; + + case 0x13: // store matrix + if (MatrixMode == 0) + { + memcpy(ProjMatrixStack, ProjMatrix, 16*4); + } + else if (MatrixMode == 3) + { + printf("!! CAN'T STORE TEXTURE MATRIX\n"); + GXStat |= (1<<15); // CHECKME + } + else + { + u32 addr = ExecParams[0] & 0x1F; + if (addr > 30) + { + printf("!! POS MATRIX STORE ADDR 31\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrixStack[addr], PosMatrix, 16*4); + } + break; + + case 0x14: // restore matrix + if (MatrixMode == 0) + { + memcpy(ProjMatrix, ProjMatrixStack, 16*4); + } + else if (MatrixMode == 3) + { + printf("!! CAN'T RESTORE TEXTURE MATRIX\n"); + GXStat |= (1<<15); // CHECKME + } + else + { + u32 addr = ExecParams[0] & 0x1F; + if (addr > 30) + { + printf("!! POS MATRIX STORE ADDR 31\n"); + GXStat |= (1<<15); + break; + } + + memcpy(PosMatrix, PosMatrixStack[addr], 16*4); + } + break; + + case 0x15: // identity + if (MatrixMode == 0) + MatrixLoadIdentity(ProjMatrix); + else if (MatrixMode == 3) + MatrixLoadIdentity(TexMatrix); + else + { + MatrixLoadIdentity(PosMatrix); + if (MatrixMode == 2) + MatrixLoadIdentity(VecMatrix); + } + break; + + case 0x16: // load 4x4 + if (MatrixMode == 0) + MatrixLoad4x4(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixLoad4x4(TexMatrix, (s32*)ExecParams); + else + { + MatrixLoad4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x4(VecMatrix, (s32*)ExecParams); + } + break; + + case 0x17: // load 4x3 + if (MatrixMode == 0) + MatrixLoad4x3(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixLoad4x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixLoad4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixLoad4x3(VecMatrix, (s32*)ExecParams); + } + break; + + case 0x18: // mult 4x4 + if (MatrixMode == 0) + MatrixMult4x4(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixMult4x4(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult4x4(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x4(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + } + break; + + case 0x19: // mult 4x3 + if (MatrixMode == 0) + MatrixMult4x3(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixMult4x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult4x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult4x3(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + } + break; + + case 0x1A: // mult 3x3 + if (MatrixMode == 0) + MatrixMult3x3(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixMult3x3(TexMatrix, (s32*)ExecParams); + else + { + MatrixMult3x3(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + { + MatrixMult3x3(VecMatrix, (s32*)ExecParams); + CycleCount += 30; + } + } + break; + + case 0x1B: // scale + if (MatrixMode == 0) + MatrixScale(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixScale(TexMatrix, (s32*)ExecParams); + else + MatrixScale(PosMatrix, (s32*)ExecParams); + break; + + case 0x1C: // translate + if (MatrixMode == 0) + MatrixTranslate(ProjMatrix, (s32*)ExecParams); + else if (MatrixMode == 3) + MatrixTranslate(TexMatrix, (s32*)ExecParams); + else + { + MatrixTranslate(PosMatrix, (s32*)ExecParams); + if (MatrixMode == 2) + MatrixTranslate(VecMatrix, (s32*)ExecParams); + } break; case 0x21: @@ -249,6 +627,12 @@ void CheckFIFOIRQ() if (irq) NDS::TriggerIRQ(0, NDS::IRQ_GXFIFO); } +void CheckFIFODMA() +{ + if (CmdFIFO->Level() < 128) + NDS::CheckDMAs(0, 0x07); +} + u8 Read8(u32 addr) { @@ -272,13 +656,26 @@ u32 Read32(u32 addr) u32 fifolevel = CmdFIFO->Level(); return GXStat | - // matrix stack levels, TODO + ((PosMatrixStackPointer & 0x1F) << 8) | + ((ProjMatrixStackPointer & 0x1) << 13) | (fifolevel << 16) | (fifolevel < 128 ? (1<<25) : 0) | (fifolevel == 0 ? (1<<26) : 0) | (CycleCount > 0 ? (1<<27) : 0); } } + + if (addr >= 0x04000640 && addr < 0x04000680) + { + printf("!! CLIPMTX READ\n"); + return 0; + } + if (addr >= 0x04000680 && addr < 0x040006A4) + { + printf("!! VECMTX READ\n"); + return 0; + } + return 0; } @@ -330,6 +727,8 @@ void Write32(u32 addr, u32 val) ParamCount = 0; TotalParams = CmdNumParams[CurCommand & 0xFF]; } + + return; } if (addr >= 0x04000440 && addr < 0x040005CC) |