aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/ARM.cpp14
-rw-r--r--src/ARM.h10
-rw-r--r--src/DMA.cpp31
-rw-r--r--src/DMA.h2
-rw-r--r--src/GPU.cpp3
-rw-r--r--src/GPU2D.cpp476
-rw-r--r--src/GPU2D.h17
-rw-r--r--src/GPU3D.cpp733
-rw-r--r--src/GPU3D.h19
-rw-r--r--src/GPU3D_Soft.cpp798
-rw-r--r--src/NDS.cpp142
-rw-r--r--src/NDS.h31
-rw-r--r--src/NDSCart.cpp87
-rw-r--r--src/NDSCart.h1
-rw-r--r--src/RTC.cpp43
-rw-r--r--src/SPI.cpp4
-rw-r--r--src/SPI.h4
-rw-r--r--src/SPU.cpp811
-rw-r--r--src/SPU.h160
-rw-r--r--src/Wifi.cpp235
-rw-r--r--src/Wifi.h108
-rw-r--r--src/wx/main.cpp40
-rw-r--r--src/wx/main.h6
23 files changed, 2936 insertions, 839 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp
index a2e0066..bbfb7f7 100644
--- a/src/ARM.cpp
+++ b/src/ARM.cpp
@@ -322,10 +322,14 @@ s32 ARM::Execute()
{
if (Halted)
{
- if (NDS::HaltInterrupted(Num))
+ if (Halted == 2)
{
Halted = 0;
- if (NDS::IME[Num]&1)
+ }
+ else if (NDS::HaltInterrupted(Num))
+ {
+ Halted = 0;
+ if (NDS::IME[Num] & 0x1)
TriggerIRQ();
}
else
@@ -376,8 +380,6 @@ s32 ARM::Execute()
}
}
- //if (R[15]==0x037F9364) printf("R8=%08X R9=%08X\n", R[8], R[9]);
-
if (Num==0)
{
s32 diff = Cycles - lastcycles;
@@ -398,9 +400,9 @@ s32 ARM::Execute()
Cycles = CyclesToRun;
break;
}
- if (NDS::HaltInterrupted(Num))
+ if (NDS::IF[Num] & NDS::IE[Num])
{
- if (NDS::IME[Num]&1)
+ if (NDS::IME[Num] & 0x1)
TriggerIRQ();
}
}
diff --git a/src/ARM.h b/src/ARM.h
index 79c2bce..a51c3ff 100644
--- a/src/ARM.h
+++ b/src/ARM.h
@@ -43,9 +43,19 @@ public:
void Halt(u32 halt)
{
+ if (halt==2 && Halted==1) return;
Halted = halt;
}
+ void CheckIRQ()
+ {
+ if (!(NDS::IME[Num] & 0x1)) return;
+ if (NDS::IF[Num] & NDS::IE[Num])
+ {
+ TriggerIRQ();
+ }
+ }
+
s32 Execute();
bool CheckCondition(u32 code)
diff --git a/src/DMA.cpp b/src/DMA.cpp
index 9a17f41..edd6f8b 100644
--- a/src/DMA.cpp
+++ b/src/DMA.cpp
@@ -186,18 +186,6 @@ void DMA::Start()
//printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16);
- // special path for cart DMA. this is a gross hack.
- // emulating it properly requires emulating cart transfer delays, so uh... TODO
- if (CurSrcAddr==0x04100010 && RemCount==1 && (Cnt & 0x07E00000)==0x07000000 &&
- (StartMode==0x05 || StartMode==0x12))
- {
- NDSCart::DMA(CurDstAddr);
- Cnt &= ~0x80000000;
- if (Cnt & 0x40000000)
- NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num);
- return;
- }
-
// special path for the display FIFO. another gross hack.
// the display FIFO seems to be more like a circular buffer that holds 16 pixels
// from which the display controller reads. DMA is triggered every 8 pixels to fill it
@@ -212,6 +200,8 @@ void DMA::Start()
return;
}
+ IsGXFIFODMA = (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0);
+
// TODO eventually: not stop if we're running code in ITCM
Running = true;
@@ -245,6 +235,23 @@ s32 DMA::Run(s32 cycles)
}
else
{
+ // optimized path for typical GXFIFO DMA
+ if (IsGXFIFODMA)
+ {
+ while (IterCount > 0 && cycles > 0)
+ {
+ GPU3D::WriteToGXFIFO(*(u32*)&NDS::MainRAM[CurSrcAddr&0x3FFFFF]);
+
+ s32 c = (Waitstates[1][0x2] + Waitstates[1][0x4]);
+ cycles -= c;
+ NDS::RunTimingCriticalDevices(0, c);
+
+ CurSrcAddr += SrcAddrInc<<2;
+ IterCount--;
+ RemCount--;
+ }
+ }
+
u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32;
void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32;
diff --git a/src/DMA.h b/src/DMA.h
index 6b443be..17c075e 100644
--- a/src/DMA.h
+++ b/src/DMA.h
@@ -66,6 +66,8 @@ private:
bool Running;
bool InProgress;
+
+ bool IsGXFIFODMA;
};
#endif
diff --git a/src/GPU.cpp b/src/GPU.cpp
index d486e0c..680f08c 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -649,6 +649,9 @@ void StartScanline(u32 line)
else
DispStat[1] &= ~(1<<2);
+ GPU2D_A->CheckWindows(line);
+ GPU2D_B->CheckWindows(line);
+
if (line >= 2 && line < 194)
NDS::CheckDMAs(0, 0x03);
else if (line == 194)
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index ff9072d..77b79b4 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -32,6 +32,8 @@
// * VRAM/FIFO display modes convert colors the same way
// * 3D engine converts colors differently (18bit = 15bit * 2 + 1, except 0 = 0)
// * 'screen disabled' white is 63,63,63
+// * [Gericom] bit15 is used as bottom green bit for palettes. TODO: check where this applies.
+// tested on the normal BG palette and applies there
//
// oh also, changing DISPCNT bit16-17 midframe doesn't work (ignored? applied for next frame?)
// TODO, eventually: check whether other DISPCNT bits can be changed midframe
@@ -68,6 +70,9 @@
// * if BG0 is selected as 1st target, destination not selected as 2nd target:
// brightness up/down effect is applied if selected. if blending is selected, it doesn't apply.
// * 3D layer pixels with alpha=0 are always transparent.
+//
+// mosaic:
+// * mosaic grid starts at 0,0 regardless of the BG/sprite position
GPU2D::GPU2D(u32 num)
@@ -94,6 +99,10 @@ void GPU2D::Reset()
memset(BGRotC, 0, 2*2);
memset(BGRotD, 0, 2*2);
+ memset(Win0Coords, 0, 4);
+ memset(Win1Coords, 0, 4);
+ memset(WinCnt, 0, 4);
+
BlendCnt = 0;
EVA = 16;
EVB = 0;
@@ -120,7 +129,15 @@ void GPU2D::SetFramebuffer(u32* buf)
u8 GPU2D::Read8(u32 addr)
{
- printf("!! GPU2D READ8 %08X\n", addr);
+ switch (addr & 0x00000FFF)
+ {
+ case 0x048: return WinCnt[0];
+ case 0x049: return WinCnt[1];
+ case 0x04A: return WinCnt[2];
+ case 0x04B: return WinCnt[3];
+ }
+
+ printf("unknown GPU read8 %08X\n", addr);
return 0;
}
@@ -136,10 +153,15 @@ u16 GPU2D::Read16(u32 addr)
case 0x00C: return BGCnt[2];
case 0x00E: return BGCnt[3];
+ case 0x048: return WinCnt[0] | (WinCnt[1] << 8);
+ case 0x04A: return WinCnt[2] | (WinCnt[3] << 8);
+
case 0x050: return BlendCnt;
case 0x064: return CaptureCnt & 0xFFFF;
case 0x066: return CaptureCnt >> 16;
+
+ case 0x06C: return MasterBrightness;
}
printf("unknown GPU read16 %08X\n", addr);
@@ -160,7 +182,40 @@ u32 GPU2D::Read32(u32 addr)
void GPU2D::Write8(u32 addr, u8 val)
{
- printf("!! GPU2D WRITE8 %08X %02X\n", addr, val);
+ switch (addr & 0x00000FFF)
+ {
+ case 0x040: Win0Coords[1] = val; return;
+ case 0x041: Win0Coords[0] = val; return;
+ case 0x042: Win1Coords[1] = val; return;
+ case 0x043: Win1Coords[0] = val; return;
+
+ case 0x044: Win0Coords[3] = val; return;
+ case 0x045: Win0Coords[2] = val; return;
+ case 0x046: Win1Coords[3] = val; return;
+ case 0x047: Win1Coords[2] = val; return;
+
+ case 0x048: WinCnt[0] = val; return;
+ case 0x049: WinCnt[1] = val; return;
+ case 0x04A: WinCnt[2] = val; return;
+ case 0x04B: WinCnt[3] = val; return;
+
+ case 0x050: BlendCnt = (BlendCnt & 0xFF00) | val; return;
+ case 0x051: BlendCnt = (BlendCnt & 0x00FF) | (val << 8); return;
+ case 0x052:
+ EVA = val & 0x1F;
+ if (EVA > 16) EVA = 16;
+ return;
+ case 0x53:
+ EVB = val & 0x1F;
+ if (EVB > 16) EVB = 16;
+ return;
+ case 0x054:
+ EVY = val & 0x1F;
+ if (EVY > 16) EVY = 16;
+ return;
+ }
+
+ printf("unknown GPU write8 %08X %02X\n", addr, val);
}
void GPU2D::Write16(u32 addr, u16 val)
@@ -234,6 +289,33 @@ void GPU2D::Write16(u32 addr, u16 val)
if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
return;
+ case 0x040:
+ Win0Coords[1] = val & 0xFF;
+ Win0Coords[0] = val >> 8;
+ return;
+ case 0x042:
+ Win1Coords[1] = val & 0xFF;
+ Win1Coords[0] = val >> 8;
+ return;
+
+ case 0x044:
+ Win0Coords[3] = val & 0xFF;
+ Win0Coords[2] = val >> 8;
+ return;
+ case 0x046:
+ Win1Coords[3] = val & 0xFF;
+ Win1Coords[2] = val >> 8;
+ return;
+
+ case 0x048:
+ WinCnt[0] = val & 0xFF;
+ WinCnt[1] = val >> 8;
+ return;
+ case 0x04A:
+ WinCnt[2] = val & 0xFF;
+ WinCnt[3] = val >> 8;
+ return;
+
case 0x050: BlendCnt = val; return;
case 0x052:
EVA = val & 0x1F;
@@ -301,6 +383,25 @@ void GPU2D::DrawScanline(u32 line)
u32 dispmode = DispCnt >> 16;
dispmode &= (Num ? 0x1 : 0x3);
+ // always render regular graphics
+ DrawScanline_Mode1(line, dst);
+
+ // capture
+ if ((Num == 0) && (CaptureCnt & (1<<31)))
+ {
+ u32 capwidth, capheight;
+ switch ((CaptureCnt >> 20) & 0x3)
+ {
+ case 0: capwidth = 128; capheight = 128; break;
+ case 1: capwidth = 256; capheight = 64; break;
+ case 2: capwidth = 256; capheight = 128; break;
+ case 3: capwidth = 256; capheight = 192; break;
+ }
+
+ if (line < capheight)
+ DoCapture(line, capwidth, dst);
+ }
+
switch (dispmode)
{
case 0: // screen off
@@ -310,10 +411,7 @@ void GPU2D::DrawScanline(u32 line)
}
break;
- case 1: // regular display
- {
- DrawScanline_Mode1(line, dst);
- }
+ case 1: // regular display, already taken care of
break;
case 2: // VRAM display
@@ -359,22 +457,6 @@ void GPU2D::DrawScanline(u32 line)
break;
}
- // capture
- if ((!Num) && (CaptureCnt & (1<<31)))
- {
- u32 capwidth, capheight;
- switch ((CaptureCnt >> 20) & 0x3)
- {
- case 0: capwidth = 128; capheight = 128; break;
- case 1: capwidth = 256; capheight = 64; break;
- case 2: capwidth = 256; capheight = 128; break;
- case 3: capwidth = 256; capheight = 192; break;
- }
-
- if (line < capheight)
- DoCapture(line, capwidth, dst);
- }
-
// master brightness
if (dispmode != 0)
{
@@ -480,7 +562,7 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src)
dstaddr &= 0xFFFF;
srcBaddr &= 0xFFFF;
- switch ((DispCnt >> 29) & 0x3)
+ switch ((CaptureCnt >> 29) & 0x3)
{
case 0: // source A
{
@@ -526,8 +608,8 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src)
case 2: // sources A+B
case 3:
{
- u32 eva = DispCnt & 0x1F;
- u32 evb = (DispCnt >> 8) & 0x1F;
+ u32 eva = CaptureCnt & 0x1F;
+ u32 evb = (CaptureCnt >> 8) & 0x1F;
// checkme
if (eva > 16) eva = 16;
@@ -558,6 +640,10 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src)
u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4;
u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0);
+ if (rD > 0x1F) rD = 0x1F;
+ if (gD > 0x1F) gD = 0x1F;
+ if (bD > 0x1F) bD = 0x1F;
+
dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15);
srcBaddr = (srcBaddr + 1) & 0xFFFF;
dstaddr = (dstaddr + 1) & 0xFFFF;
@@ -683,6 +769,59 @@ u16* GPU2D::GetOBJExtPal(u32 pal)
}
+void GPU2D::CheckWindows(u32 line)
+{
+ line &= 0xFF;
+ if (line == Win0Coords[3]) Win0Active = false;
+ else if (line == Win0Coords[2]) Win0Active = true;
+ if (line == Win1Coords[3]) Win1Active = false;
+ else if (line == Win1Coords[2]) Win1Active = true;
+}
+
+void GPU2D::CalculateWindowMask(u32 line, u8* mask)
+{
+ for (u32 i = 0; i < 256; i++)
+ mask[i] = WinCnt[2]; // window outside
+
+ if ((DispCnt & (1<<15)) && (DispCnt & (1<<12)))
+ {
+ // OBJ window
+ u8 objwin[256];
+ memset(objwin, 0, 256);
+ DrawSpritesWindow(line, objwin);
+
+ for (u32 i = 0; i < 256; i++)
+ {
+ if (objwin[i]) mask[i] = WinCnt[3];
+ }
+ }
+
+ if ((DispCnt & (1<<14)) && Win1Active)
+ {
+ // window 1
+ u32 x1 = Win1Coords[0];
+ u32 x2 = Win1Coords[1];
+ if (x2 == 0 && x1 > 0) x2 = 256;
+ if (x1 > x2) x2 = 255; // checkme
+
+ for (u32 i = x1; i < x2; i++)
+ mask[i] = WinCnt[1];
+ }
+
+ if ((DispCnt & (1<<13)) && Win0Active)
+ {
+ // window 0
+ u32 x1 = Win0Coords[0];
+ u32 x2 = Win0Coords[1];
+ if (x2 == 0 && x1 > 0) x2 = 256;
+ if (x1 > x2) x2 = 255; // checkme
+
+ for (u32 i = x1; i < x2; i++)
+ mask[i] = WinCnt[0];
+ }
+}
+
+
template<u32 bgmode>
void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst)
{
@@ -736,7 +875,8 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst)
void GPU2D::DrawScanline_Mode1(u32 line, u32* dst)
{
- u32 linebuf[256*2];
+ u32 linebuf[256*2 + 64];
+ u8* windowmask = (u8*)&linebuf[256*2];
u32 backdrop;
if (Num) backdrop = *(u16*)&GPU::Palette[0x400];
@@ -753,6 +893,11 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst)
linebuf[i] = backdrop;
}
+ if (DispCnt & 0xE000)
+ CalculateWindowMask(line, windowmask);
+ else
+ memset(windowmask, 0xFF, 256);
+
// prerender sprites
u32 spritebuf[256];
memset(spritebuf, 0, 256*4);
@@ -781,7 +926,11 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst)
u32 coloreffect, eva, evb;
u32 flag1 = val1 >> 24;
- if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00)))
+ if (!(windowmask[i] & 0x20))
+ {
+ coloreffect = 0;
+ }
+ else if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00)))
{
// sprite blending
@@ -904,10 +1053,10 @@ void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag)
void GPU2D::DrawBG_3D(u32 line, u32* dst)
{
- // TODO: window, as for everything
- // also check if window can prevent blending from happening
+ // TODO: check if window can prevent blending from happening
u32* src = GPU3D::GetLine(line);
+ u8* windowmask = (u8*)&dst[256*2];
u16 xoff = BGXPos[0];
int i = 0;
@@ -929,6 +1078,7 @@ void GPU2D::DrawBG_3D(u32 line, u32* dst)
xoff++;
if ((c >> 24) == 0) continue;
+ if (!(windowmask[i] & 0x01)) continue;
dst[i+256] = dst[i];
dst[i] = c | 0x40000000;
@@ -937,6 +1087,7 @@ void GPU2D::DrawBG_3D(u32 line, u32* dst)
void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum)
{
+ u8* windowmask = (u8*)&dst[256*2];
u16 bgcnt = BGCnt[bgnum];
u32 tilesetaddr, tilemapaddr;
@@ -1012,12 +1163,15 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum)
}
// draw pixel
- u8 color;
- u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
+ if (windowmask[i] & (1<<bgnum))
+ {
+ u8 color;
+ u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff);
- if (color)
- DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+ if (color)
+ DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+ }
xoff++;
}
@@ -1049,19 +1203,22 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum)
// draw pixel
// TODO: optimize VRAM access
- u8 color;
- u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
- if (tilexoff & 0x1)
+ if (windowmask[i] & (1<<bgnum))
{
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
- }
- else
- {
- color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
- }
+ u8 color;
+ u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7);
+ if (tilexoff & 0x1)
+ {
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4;
+ }
+ else
+ {
+ color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F;
+ }
- if (color)
- DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+ if (color)
+ DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum);
+ }
xoff++;
}
@@ -1070,6 +1227,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum)
void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum)
{
+ u8* windowmask = (u8*)&dst[256*2];
u16 bgcnt = BGCnt[bgnum];
u32 tilesetaddr, tilemapaddr;
@@ -1118,7 +1276,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum)
for (int i = 0; i < 256; i++)
{
- if (!((rotX|rotY) & overflowmask))
+ if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum)))
{
curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)));
@@ -1143,6 +1301,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum)
void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
{
+ u8* windowmask = (u8*)&dst[256*2];
u16 bgcnt = BGCnt[bgnum];
u32 tilesetaddr, tilemapaddr;
@@ -1188,7 +1347,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
for (int i = 0; i < 256; i++)
{
- if (!((rotX|rotY) & overflowmask))
+ if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum)))
{
u16 color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)) << 1));
@@ -1209,7 +1368,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
for (int i = 0; i < 256; i++)
{
- if (!((rotX|rotY) & overflowmask))
+ if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum)))
{
u8 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8));
@@ -1248,7 +1407,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
for (int i = 0; i < 256; i++)
{
- if (!((rotX|rotY) & overflowmask))
+ if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum)))
{
curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)) << 1));
@@ -1280,9 +1439,11 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum)
void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst)
{
+ u8* windowmask = (u8*)&dst[256*2];
+
for (u32 i = 0; i < 256; i++)
{
- if ((buf[i] & 0xF8000) == prio)
+ if (((buf[i] & 0xF8000) == prio) && (windowmask[i] & 0x10))
{
u32 blendfunc = 0;
DrawPixel(&dst[i], buf[i] & 0x7FFF, buf[i] & 0xFF000000);
@@ -1318,6 +1479,9 @@ void GPU2D::DrawSprites(u32 line, u32* dst)
if ((attrib[2] & 0x0C00) != bgnum)
continue;
+ if (((attrib[0] >> 10) & 0x3) == 2)
+ continue;
+
if (attrib[0] & 0x0100)
{
u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
@@ -1343,7 +1507,7 @@ void GPU2D::DrawSprites(u32 line, u32* dst)
u32 rotparamgroup = (attrib[1] >> 9) & 0x1F;
- DrawSprite_Rotscale(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst);
+ DrawSprite_Rotscale<false>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst);
}
else
{
@@ -1367,17 +1531,98 @@ void GPU2D::DrawSprites(u32 line, u32* dst)
if (attrib[1] & 0x2000)
ypos = height-1 - ypos;
- DrawSprite_Normal(attrib, width, xpos, ypos, dst);
+ DrawSprite_Normal<false>(attrib, width, xpos, ypos, dst);
+ }
+ }
+ }
+}
+
+void GPU2D::DrawSpritesWindow(u32 line, u8* dst)
+{
+ u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0];
+
+ const s32 spritewidth[16] =
+ {
+ 8, 16, 8, 0,
+ 16, 32, 8, 0,
+ 32, 32, 16, 0,
+ 64, 64, 32, 0
+ };
+ const s32 spriteheight[16] =
+ {
+ 8, 8, 16, 0,
+ 16, 8, 32, 0,
+ 32, 16, 32, 0,
+ 64, 32, 64, 0
+ };
+
+ for (int sprnum = 127; sprnum >= 0; sprnum--)
+ {
+ u16* attrib = &oam[sprnum*4];
+
+ if (((attrib[0] >> 10) & 0x3) != 2)
+ continue;
+
+ if (attrib[0] & 0x0100)
+ {
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+ s32 boundwidth = width;
+ s32 boundheight = height;
+
+ if (attrib[0] & 0x0200)
+ {
+ boundwidth <<= 1;
+ boundheight <<= 1;
}
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (line - ypos) & 0xFF;
+ if (ypos >= (u32)boundheight)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -boundwidth)
+ continue;
+
+ u32 rotparamgroup = (attrib[1] >> 9) & 0x1F;
+
+ DrawSprite_Rotscale<true>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, (u32*)dst);
+ }
+ else
+ {
+ if (attrib[0] & 0x0200)
+ continue;
+
+ u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12);
+ s32 width = spritewidth[sizeparam];
+ s32 height = spriteheight[sizeparam];
+
+ u32 ypos = attrib[0] & 0xFF;
+ ypos = (line - ypos) & 0xFF;
+ if (ypos >= (u32)height)
+ continue;
+
+ s32 xpos = (s32)(attrib[1] << 23) >> 23;
+ if (xpos <= -width)
+ continue;
+
+ // yflip
+ if (attrib[1] & 0x2000)
+ ypos = height-1 - ypos;
+
+ DrawSprite_Normal<true>(attrib, width, xpos, ypos, (u32*)dst);
}
}
}
+template<bool window>
void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst)
{
u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000;
u32 tilenum = attrib[2] & 0x03FF;
- u32 spritemode = (attrib[0] >> 10) & 0x3;
+ u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
u32 ytilefactor;
@@ -1448,13 +1693,13 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32
{
if ((u32)rotX < width && (u32)rotY < height)
{
- u8 color;
-
- // blaaaarg
- color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
+ u8 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1));
if (color & 0x8000)
- dst[xpos] = color | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = color | prio;
+ }
}
rotX += rotA;
@@ -1488,20 +1733,23 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32
u32 extpal = (DispCnt & 0x80000000);
u16* pal;
- if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
- else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ if (!window)
+ {
+ if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
+ else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ }
for (; xoff < boundwidth;)
{
if ((u32)rotX < width && (u32)rotY < height)
{
- u8 color;
-
- // blaaaarg
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
+ u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8));
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
}
rotX += rotA;
@@ -1517,17 +1765,18 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32
ytilefactor <<= 5;
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
- u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
- pal += (attrib[2] & 0xF000) >> 8;
+ u16* pal;
+ if (!window)
+ {
+ pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ pal += (attrib[2] & 0xF000) >> 8;
+ }
for (; xoff < boundwidth;)
{
if ((u32)rotX < width && (u32)rotY < height)
{
- u8 color;
-
- // blaaaarg
- color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
+ u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9));
if (rotX & 0x100)
color >>= 4;
@@ -1535,7 +1784,10 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32
color &= 0x0F;
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
}
rotX += rotA;
@@ -1547,11 +1799,12 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32
}
}
+template<bool window>
void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst)
{
u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000;
u32 tilenum = attrib[2] & 0x03FF;
- u32 spritemode = (attrib[0] >> 10) & 0x3;
+ u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3);
u32 wmask = width - 8; // really ((width - 1) & ~0x7)
@@ -1606,18 +1859,44 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
}
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
- pixelsaddr += (xoff << 1);
- for (; xoff < xend;)
+ if (attrib[1] & 0x1000)
{
- u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
- pixelsaddr += 2;
+ pixelsaddr += ((width-1 - xoff) << 1);
- if (color & 0x8000)
- dst[xpos] = color | prio;
+ for (; xoff < xend;)
+ {
+ u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
+ pixelsaddr -= 2;
- xoff++;
- xpos++;
+ if (color & 0x8000)
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = color | prio;
+ }
+
+ xoff++;
+ xpos++;
+ }
+ }
+ else
+ {
+ pixelsaddr += (xoff << 1);
+
+ for (; xoff < xend;)
+ {
+ u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr);
+ pixelsaddr += 2;
+
+ if (color & 0x8000)
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = color | prio;
+ }
+
+ xoff++;
+ xpos++;
+ }
}
}
else
@@ -1645,8 +1924,11 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
u32 extpal = (DispCnt & 0x80000000);
u16* pal;
- if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
- else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ if (!window)
+ {
+ if (extpal) pal = GetOBJExtPal(attrib[2] >> 12);
+ else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ }
if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works
{
@@ -1659,7 +1941,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
pixelsaddr--;
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
xoff++;
xpos++;
@@ -1677,7 +1962,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
pixelsaddr++;
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
xoff++;
xpos++;
@@ -1692,8 +1980,12 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum;
pixelsaddr += ((ypos & 0x7) << 2);
- u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
- pal += (attrib[2] & 0xF000) >> 8;
+ u16* pal;
+ if (!window)
+ {
+ pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200];
+ pal += (attrib[2] & 0xF000) >> 8;
+ }
if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works
{
@@ -1714,7 +2006,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
}
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
xoff++;
xpos++;
@@ -1740,7 +2035,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d
}
if (color)
- dst[xpos] = pal[color] | prio;
+ {
+ if (window) ((u8*)dst)[xpos] = 1;
+ else dst[xpos] = pal[color] | prio;
+ }
xoff++;
xpos++;
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 38cbe7e..4bf698d 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -40,6 +40,8 @@ public:
void VBlank();
void VBlankEnd();
+ void CheckWindows(u32 line);
+
void BGExtPalDirty(u32 base);
void OBJExtPalDirty();
@@ -69,6 +71,12 @@ private:
s16 BGRotC[2];
s16 BGRotD[2];
+ u8 Win0Coords[4];
+ u8 Win1Coords[4];
+ u8 WinCnt[4];
+ bool Win0Active;
+ bool Win1Active;
+
u16 BlendCnt;
u8 EVA, EVB;
u8 EVY;
@@ -88,16 +96,19 @@ private:
void DrawPixel(u32* dst, u16 color, u32 flag);
void DrawBG_3D(u32 line, u32* dst);
- void DrawBG_Text(u32 line, u32* dst, u32 num);
+ void DrawBG_Text(u32 line, u32* dst, u32 bgnum);
void DrawBG_Affine(u32 line, u32* dst, u32 bgnum);
void DrawBG_Extended(u32 line, u32* dst, u32 bgnum);
void InterleaveSprites(u32* buf, u32 prio, u32* dst);
void DrawSprites(u32 line, u32* dst);
- void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst);
- void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst);
+ void DrawSpritesWindow(u32 line, u8* dst);
+ template<bool window> void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst);
+ template<bool window> void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst);
void DoCapture(u32 line, u32 width, u32* src);
+
+ void CalculateWindowMask(u32 line, u8* mask);
};
#endif
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index 9881760..72e7179 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -49,7 +49,6 @@
//
// formula for clear depth: (GBAtek is wrong there)
// clearZ = (val * 0x200) + 0x1FF;
-// if (clearZ >= 0x010000 && clearZ < 0xFFFFFF) clearZ++;
//
// alpha is 5-bit
//
@@ -58,6 +57,10 @@
// (the idea is that each position matrix has an associated vector matrix)
//
// TODO: check if translate works on the vector matrix? seems pointless
+//
+// viewport Y coordinates are upside-down
+//
+// clear color/depth/bitmap/etc registers (04000350/04000354) are double-buffered
namespace GPU3D
@@ -150,7 +153,8 @@ FIFO<CmdFIFOEntry>* CmdPIPE;
u32 NumCommands, CurCommand, ParamCount, TotalParams;
u32 DispCnt;
-u32 AlphaRef;
+u8 AlphaRefVal;
+u8 AlphaRef;
u16 ToonTable[32];
u16 EdgeTable[8];
@@ -165,6 +169,9 @@ u32 ExecParams[32];
u32 ExecParamCount;
s32 CycleCount;
+u32 NumPushPopCommands;
+u32 NumTestCommands;
+
u32 MatrixMode;
@@ -213,6 +220,9 @@ u32 CurPolygonAttr;
u32 TexParam;
u32 TexPalette;
+s32 PosTestResult[4];
+s16 VecTestResult[3];
+
Vertex TempVertexBuffer[4];
u32 VertexNum;
u32 VertexNumInPoly;
@@ -232,6 +242,7 @@ Polygon* RenderPolygonRAM;
u32 RenderNumPolygons;
u32 ClearAttr1, ClearAttr2;
+u32 RenderClearAttr1, RenderClearAttr2;
u32 FlushRequest;
u32 FlushAttributes;
@@ -266,6 +277,9 @@ void Reset()
ParamCount = 0;
TotalParams = 0;
+ NumPushPopCommands = 0;
+ NumTestCommands = 0;
+
DispCnt = 0;
AlphaRef = 0;
@@ -296,6 +310,9 @@ void Reset()
PosMatrixStackPointer = 0;
TexMatrixStackPointer = 0;
+ memset(PosTestResult, 0, 4*4);
+ memset(VecTestResult, 0, 2*3);
+
VertexNum = 0;
VertexNumInPoly = 0;
@@ -449,44 +466,148 @@ void UpdateClipMatrix()
-template<int comp, s32 plane>
+template<int comp, s32 plane, bool attribs>
void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin)
{
s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]);
s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp]));
- Vertex mid;
-#define INTERPOLATE(var) { mid.var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); }
+#define INTERPOLATE(var) { outbuf->var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); }
if (comp != 0) INTERPOLATE(Position[0]);
if (comp != 1) INTERPOLATE(Position[1]);
if (comp != 2) INTERPOLATE(Position[2]);
INTERPOLATE(Position[3]);
- mid.Position[comp] = plane*mid.Position[3];
+ outbuf->Position[comp] = plane*outbuf->Position[3];
- INTERPOLATE(Color[0]);
- INTERPOLATE(Color[1]);
- INTERPOLATE(Color[2]);
+ if (attribs)
+ {
+ INTERPOLATE(Color[0]);
+ INTERPOLATE(Color[1]);
+ INTERPOLATE(Color[2]);
- INTERPOLATE(TexCoords[0]);
- INTERPOLATE(TexCoords[1]);
+ INTERPOLATE(TexCoords[0]);
+ INTERPOLATE(TexCoords[1]);
+ }
- mid.Clipped = true;
+ outbuf->Clipped = true;
#undef INTERPOLATE
- *outbuf = mid;
+}
+
+template<int comp, bool attribs>
+int ClipAgainstPlane(Vertex* vertices, int nverts, int clipstart)
+{
+ Vertex temp[10];
+ int prev, next;
+ int c = clipstart;
+
+ if (clipstart == 2)
+ {
+ temp[0] = vertices[0];
+ temp[1] = vertices[1];
+ }
+
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = vertices[i];
+ if (vtx.Position[comp] > vtx.Position[3])
+ {
+ if ((comp == 2) && (!(CurPolygonAttr & (1<<12)))) return 0;
+
+ Vertex* vprev = &vertices[prev];
+ if (vprev->Position[comp] <= vprev->Position[3])
+ {
+ ClipSegment<comp, 1, attribs>(&temp[c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &vertices[next];
+ if (vnext->Position[comp] <= vnext->Position[3])
+ {
+ ClipSegment<comp, 1, attribs>(&temp[c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ temp[c++] = vtx;
+ }
+
+ nverts = c; c = clipstart;
+ for (int i = clipstart; i < nverts; i++)
+ {
+ prev = i-1; if (prev < 0) prev = nverts-1;
+ next = i+1; if (next >= nverts) next = 0;
+
+ Vertex vtx = temp[i];
+ if (vtx.Position[comp] < -vtx.Position[3])
+ {
+ Vertex* vprev = &temp[prev];
+ if (vprev->Position[comp] >= -vprev->Position[3])
+ {
+ ClipSegment<comp, -1, attribs>(&vertices[c], &vtx, vprev);
+ c++;
+ }
+
+ Vertex* vnext = &temp[next];
+ if (vnext->Position[comp] >= -vnext->Position[3])
+ {
+ ClipSegment<comp, -1, attribs>(&vertices[c], &vtx, vnext);
+ c++;
+ }
+ }
+ else
+ vertices[c++] = vtx;
+ }
+
+ // checkme
+ for (int i = 0; i < c; i++)
+ {
+ Vertex* vtx = &vertices[i];
+
+ vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
+ vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
+ vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
+ }
+
+ return c;
+}
+
+template<bool attribs>
+int ClipPolygon(Vertex* vertices, int nverts, int clipstart)
+{
+ // clip.
+ // for each vertex:
+ // if it's outside, check if the previous and next vertices are inside
+ // if so, place a new vertex at the edge of the view volume
+
+ // TODO: check for 1-dot polygons
+ // TODO: the hardware seems to use a different algorithm. it reacts differently to vertices with W=0
+
+ // X clipping
+ nverts = ClipAgainstPlane<0, attribs>(vertices, nverts, clipstart);
+
+ // Y clipping
+ nverts = ClipAgainstPlane<1, attribs>(vertices, nverts, clipstart);
+
+ // Z clipping
+ nverts = ClipAgainstPlane<2, attribs>(vertices, nverts, clipstart);
+
+ return nverts;
}
void SubmitPolygon()
{
- Vertex clippedvertices[2][10];
+ Vertex clippedvertices[10];
Vertex* reusedvertices[2];
int clipstart = 0;
int lastpolyverts = 0;
int nverts = PolygonMode & 0x1 ? 4:3;
int prev, next;
- int c;
// culling
@@ -500,7 +621,7 @@ void SubmitPolygon()
normalX = (((s64)v0->Position[1] * v2->Position[3]) - ((s64)v0->Position[3] * v2->Position[1])) >> 12;
normalY = (((s64)v0->Position[3] * v2->Position[0]) - ((s64)v0->Position[0] * v2->Position[3])) >> 12;
normalZ = (((s64)v0->Position[0] * v2->Position[1]) - ((s64)v0->Position[1] * v2->Position[0])) >> 12;
- dot = ((s64)(v1->Position[0] >> 0) * normalX) + ((s64)(v1->Position[1] >> 0) * normalY) + ((s64)(v1->Position[3] >> 0) * normalZ);
+ dot = ((s64)v1->Position[0] * normalX) + ((s64)v1->Position[1] * normalY) + ((s64)v1->Position[3] * normalZ);
bool facingview = (dot < 0);
@@ -558,225 +679,21 @@ void SubmitPolygon()
reusedvertices[0] = LastStripPolygon->Vertices[id0];
reusedvertices[1] = LastStripPolygon->Vertices[id1];
- clippedvertices[0][0] = *reusedvertices[0];
- clippedvertices[0][1] = *reusedvertices[1];
- clippedvertices[1][0] = *reusedvertices[0];
- clippedvertices[1][1] = *reusedvertices[1];
+ clippedvertices[0] = *reusedvertices[0];
+ clippedvertices[1] = *reusedvertices[1];
clipstart = 2;
}
}
- // clip.
- // for each vertex:
- // if it's outside, check if the previous and next vertices are inside
- // if so, place a new vertex at the edge of the view volume
-
- // X clipping
-
- c = clipstart;
- for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
-
- Vertex vtx = TempVertexBuffer[i];
- if (vtx.Position[0] > vtx.Position[3])
- {
- Vertex* vprev = &TempVertexBuffer[prev];
- if (vprev->Position[0] <= vprev->Position[3])
- {
- ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev);
- c++;
- }
-
- Vertex* vnext = &TempVertexBuffer[next];
- if (vnext->Position[0] <= vnext->Position[3])
- {
- ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[0][c++] = vtx;
- }
-
- nverts = c; c = clipstart;
for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
+ clippedvertices[i] = TempVertexBuffer[i];
- Vertex vtx = clippedvertices[0][i];
- if (vtx.Position[0] < -vtx.Position[3])
- {
- Vertex* vprev = &clippedvertices[0][prev];
- if (vprev->Position[0] >= -vprev->Position[3])
- {
- ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev);
- c++;
- }
+ // clipping
- Vertex* vnext = &clippedvertices[0][next];
- if (vnext->Position[0] >= -vnext->Position[3])
- {
- ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[1][c++] = vtx;
- }
+ nverts = ClipPolygon<true>(clippedvertices, nverts, clipstart);
- for (int i = 0; i < c; i++)
- {
- Vertex* vtx = &clippedvertices[1][i];
-
- vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
- vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
- vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
- }
-
- // Y clipping
-
- nverts = c; c = clipstart;
- for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
-
- Vertex vtx = clippedvertices[1][i];
- if (vtx.Position[1] > vtx.Position[3])
- {
- Vertex* vprev = &clippedvertices[1][prev];
- if (vprev->Position[1] <= vprev->Position[3])
- {
- ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev);
- c++;
- }
-
- Vertex* vnext = &clippedvertices[1][next];
- if (vnext->Position[1] <= vnext->Position[3])
- {
- ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[0][c++] = vtx;
- }
-
- nverts = c; c = clipstart;
- for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
-
- Vertex vtx = clippedvertices[0][i];
- if (vtx.Position[1] < -vtx.Position[3])
- {
- Vertex* vprev = &clippedvertices[0][prev];
- if (vprev->Position[1] >= -vprev->Position[3])
- {
- ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev);
- c++;
- }
-
- Vertex* vnext = &clippedvertices[0][next];
- if (vnext->Position[1] >= -vnext->Position[3])
- {
- ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[1][c++] = vtx;
- }
-
- for (int i = 0; i < c; i++)
- {
- Vertex* vtx = &clippedvertices[1][i];
-
- vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
- vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
- vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
- }
-
- // Z clipping
-
- bool farplaneclip = false;
- nverts = c; c = clipstart;
- for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
-
- Vertex vtx = clippedvertices[1][i];
- if (vtx.Position[2] > vtx.Position[3])
- {
- farplaneclip = true;
-
- Vertex* vprev = &clippedvertices[1][prev];
- if (vprev->Position[2] <= vprev->Position[3])
- {
- ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev);
- c++;
- }
-
- Vertex* vnext = &clippedvertices[1][next];
- if (vnext->Position[2] <= vnext->Position[3])
- {
- ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[0][c++] = vtx;
- }
-
- if (farplaneclip && (!(CurPolygonAttr & (1<<12))))
- {
- LastStripPolygon = NULL;
- return;
- }
-
- nverts = c; c = clipstart;
- for (int i = clipstart; i < nverts; i++)
- {
- prev = i-1; if (prev < 0) prev = nverts-1;
- next = i+1; if (next >= nverts) next = 0;
-
- Vertex vtx = clippedvertices[0][i];
- if (vtx.Position[2] < -vtx.Position[3])
- {
- Vertex* vprev = &clippedvertices[0][prev];
- if (vprev->Position[2] >= -vprev->Position[3])
- {
- ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev);
- c++;
- }
-
- Vertex* vnext = &clippedvertices[0][next];
- if (vnext->Position[2] >= -vnext->Position[3])
- {
- ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext);
- c++;
- }
- }
- else
- clippedvertices[1][c++] = vtx;
- }
-
- for (int i = 0; i < c; i++)
- {
- Vertex* vtx = &clippedvertices[1][i];
-
- vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF;
- vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF;
- vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF;
- }
-
- if (c == 0)
+ if (nverts == 0)
{
LastStripPolygon = NULL;
return;
@@ -784,10 +701,10 @@ void SubmitPolygon()
// build the actual polygon
- if (NumPolygons >= 2048 || NumVertices+c > 6144)
+ if (NumPolygons >= 2048 || NumVertices+nverts > 6144)
{
LastStripPolygon = NULL;
- // TODO: set DISP3DCNT overflow flag
+ DispCnt |= (1<<13);
return;
}
@@ -802,11 +719,19 @@ void SubmitPolygon()
u32 texfmt = (TexParam >> 26) & 0x7;
u32 polyalpha = (CurPolygonAttr >> 16) & 0x1F;
- poly->Translucent = (texfmt == 1 || texfmt == 6 || (polyalpha > 0 && polyalpha < 31));
+ poly->Translucent = ((texfmt == 1 || texfmt == 6) && !(CurPolygonAttr & 0x10)) || (polyalpha > 0 && polyalpha < 31);
+
+ poly->IsShadowMask = ((CurPolygonAttr & 0x3F000030) == 0x00000030);
+ if ((NumPolygons == 1) || (!CurPolygonRAM[NumPolygons-2].IsShadowMask))
+ poly->ClearStencil = poly->IsShadowMask;
+ else
+ poly->ClearStencil = false;
+
+ poly->IsShadow = ((CurPolygonAttr & 0x30) == 0x30) && !poly->IsShadowMask;
if (LastStripPolygon && clipstart > 0)
{
- if (c == lastpolyverts)
+ if (nverts == lastpolyverts)
{
poly->Vertices[0] = reusedvertices[0];
poly->Vertices[1] = reusedvertices[1];
@@ -826,10 +751,10 @@ void SubmitPolygon()
poly->NumVertices += 2;
}
- for (int i = clipstart; i < c; i++)
+ for (int i = clipstart; i < nverts; i++)
{
Vertex* vtx = &CurVertexRAM[NumVertices];
- *vtx = clippedvertices[1][i];
+ *vtx = clippedvertices[i];
poly->Vertices[i] = vtx;
NumVertices++;
@@ -847,24 +772,29 @@ void SubmitPolygon()
}
else
{
+ // W is normalized, such that all the polygon's W values fit within 16 bits
+ // the viewport transform for X and Y uses the original W values, but
+ // the transform for Z uses the normalized W values
+ // W normalization is applied to separate polygons, even within strips
+
posX = (((s64)(vtx->Position[0] + w) * Viewport[2]) / (((s64)w) << 1)) + Viewport[0];
posY = (((s64)(-vtx->Position[1] + w) * Viewport[3]) / (((s64)w) << 1)) + Viewport[1];
- if (FlushAttributes & 0x2) posZ = w;
- else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF;
+ //if (FlushAttributes & 0x2) posZ = w;
+ //else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF;
}
if (posX < 0) posX = 0;
else if (posX > 256) posX = 256;
if (posY < 0) posY = 0;
else if (posY > 192) posY = 192;
- if (posZ < 0) posZ = 0;
- else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF;
+ //if (posZ < 0) posZ = 0;
+ //else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF;
vtx->FinalPosition[0] = posX;
vtx->FinalPosition[1] = posY;
- vtx->FinalPosition[2] = posZ;
- vtx->FinalPosition[3] = w;
+ //vtx->FinalPosition[2] = posZ;
+ //vtx->FinalPosition[3] = w;
vtx->FinalColor[0] = vtx->Color[0] >> 12;
if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF);
@@ -875,11 +805,15 @@ void SubmitPolygon()
}
// determine bounds of the polygon
+ // also determine the W shift and normalize W
+ // TODO: normalization works both ways
+
u32 vtop = 0, vbot = 0;
s32 ytop = 192, ybot = 0;
s32 xtop = 256, xbot = 0;
+ u32 wshift = 0;
- for (int i = 0; i < c; i++)
+ for (int i = 0; i < nverts; i++)
{
Vertex* vtx = poly->Vertices[i];
@@ -895,11 +829,36 @@ void SubmitPolygon()
ybot = vtx->FinalPosition[1];
vbot = i;
}
+
+ u32 w = (u32)vtx->Position[3];
+ while ((w >> wshift) & 0xFFFF0000)
+ wshift += 4;
}
poly->VTop = vtop; poly->VBottom = vbot;
poly->YTop = ytop; poly->YBottom = ybot;
poly->XTop = xtop; poly->XBottom = xbot;
+ poly->WShift = wshift;
+ poly->WBuffer = (FlushAttributes & 0x2);
+
+ for (int i = 0; i < nverts; i++)
+ {
+ Vertex* vtx = poly->Vertices[i];
+ s32 w = vtx->Position[3] >> wshift;
+
+ s32 z;
+ if (FlushAttributes & 0x2)
+ z = w << wshift;
+ else
+ z = (((s64)vtx->Position[2] * 0x800000) / (w << wshift)) + 0x7FFEFF;
+
+ // checkme
+ if (z < 0) z = 0;
+ else if (z > 0xFFFFFF) z = 0xFFFFFF;
+
+ poly->FinalZ[i] = z;
+ poly->FinalW[i] = w;
+ }
if (PolygonMode >= 2)
LastStripPolygon = poly;
@@ -926,8 +885,8 @@ void SubmitVertex()
if ((TexParam >> 30) == 3)
{
- vertextrans->TexCoords[0] = (vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8] + vertex[3]*(RawTexCoords[0]<<8)) >> 20;
- vertextrans->TexCoords[1] = (vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9] + vertex[3]*(RawTexCoords[1]<<8)) >> 20;
+ vertextrans->TexCoords[0] = ((vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8]) >> 24) + RawTexCoords[0];
+ vertextrans->TexCoords[1] = ((vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9]) >> 24) + RawTexCoords[1];
}
else
{
@@ -1004,6 +963,10 @@ void SubmitVertex()
s32 CalculateLighting()
{
+ // TODO: this requires matrix mode 2, apparently
+ // hardware seems to read garbage when matrix mode isn't 2
+ // also, non-normal normals seem to be treated as zero? or overflow to negative?
+
if ((TexParam >> 30) == 2)
{
TexCoords[0] = RawTexCoords[0] + (((s64)Normal[0]*TexMatrix[0] + (s64)Normal[1]*TexMatrix[4] + (s64)Normal[2]*TexMatrix[8]) >> 21);
@@ -1070,6 +1033,134 @@ s32 CalculateLighting()
}
+void BoxTest(u32* params)
+{
+ Vertex cube[8];
+ Vertex face[10];
+ int res;
+
+ GXStat &= ~(1<<1);
+
+ s32 x0 = (s32)(s16)(params[0] & 0xFFFF);
+ s32 y0 = ((s32)params[0]) >> 16;
+ s32 z0 = (s32)(s16)(params[1] & 0xFFFF);
+ s32 x1 = ((s32)params[1]) >> 16;
+ s32 y1 = (s32)(s16)(params[2] & 0xFFFF);
+ s32 z1 = ((s32)params[2]) >> 16;
+
+ x1 += x0;
+ y1 += y0;
+ z1 += z0;
+
+ cube[0].Position[0] = x0; cube[0].Position[1] = y0; cube[0].Position[2] = z0;
+ cube[1].Position[0] = x1; cube[1].Position[1] = y0; cube[1].Position[2] = z0;
+ cube[2].Position[0] = x1; cube[2].Position[1] = y1; cube[2].Position[2] = z0;
+ cube[3].Position[0] = x0; cube[3].Position[1] = y1; cube[3].Position[2] = z0;
+ cube[4].Position[0] = x0; cube[4].Position[1] = y1; cube[4].Position[2] = z1;
+ cube[5].Position[0] = x0; cube[5].Position[1] = y0; cube[5].Position[2] = z1;
+ cube[6].Position[0] = x1; cube[6].Position[1] = y0; cube[6].Position[2] = z1;
+ cube[7].Position[0] = x1; cube[7].Position[1] = y1; cube[7].Position[2] = z1;
+
+ UpdateClipMatrix();
+ for (int i = 0; i < 8; i++)
+ {
+ s32 x = cube[i].Position[0];
+ s32 y = cube[i].Position[1];
+ s32 z = cube[i].Position[2];
+
+ cube[i].Position[0] = ((s64)x*ClipMatrix[0] + (s64)y*ClipMatrix[4] + (s64)z*ClipMatrix[8] + 0x1000*ClipMatrix[12]) >> 12;
+ cube[i].Position[1] = ((s64)x*ClipMatrix[1] + (s64)y*ClipMatrix[5] + (s64)z*ClipMatrix[9] + 0x1000*ClipMatrix[13]) >> 12;
+ cube[i].Position[2] = ((s64)x*ClipMatrix[2] + (s64)y*ClipMatrix[6] + (s64)z*ClipMatrix[10] + 0x1000*ClipMatrix[14]) >> 12;
+ cube[i].Position[3] = ((s64)x*ClipMatrix[3] + (s64)y*ClipMatrix[7] + (s64)z*ClipMatrix[11] + 0x1000*ClipMatrix[15]) >> 12;
+ }
+
+ // front face (-Z)
+ face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[2]; face[3] = cube[3];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+
+ // back face (+Z)
+ face[0] = cube[4]; face[1] = cube[5]; face[2] = cube[6]; face[3] = cube[7];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+
+ // left face (-X)
+ face[0] = cube[0]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[5];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+
+ // right face (+X)
+ face[0] = cube[1]; face[1] = cube[2]; face[2] = cube[7]; face[3] = cube[6];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+
+ // bottom face (-Y)
+ face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[6]; face[3] = cube[5];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+
+ // top face (+Y)
+ face[0] = cube[2]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[7];
+ res = ClipPolygon<false>(face, 4, 0);
+ if (res > 0)
+ {
+ GXStat |= (1<<1);
+ return;
+ }
+}
+
+void PosTest()
+{
+ s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000};
+
+ UpdateClipMatrix();
+ PosTestResult[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12;
+ PosTestResult[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12;
+ PosTestResult[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12;
+ PosTestResult[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12;
+}
+
+void VecTest(u32* params)
+{
+ // TODO: apparently requires matrix mode 2
+ // TODO: maybe it overwrites the normal registers, too
+
+ s16 normal[3];
+
+ normal[0] = (s16)((params[0] & 0x000003FF) << 6) >> 6;
+ normal[1] = (s16)((params[0] & 0x000FFC00) >> 4) >> 6;
+ normal[2] = (s16)((params[0] & 0x3FF00000) >> 14) >> 6;
+
+ VecTestResult[0] = (normal[0]*VecMatrix[0] + normal[1]*VecMatrix[4] + normal[2]*VecMatrix[8]) >> 9;
+ VecTestResult[1] = (normal[0]*VecMatrix[1] + normal[1]*VecMatrix[5] + normal[2]*VecMatrix[9]) >> 9;
+ VecTestResult[2] = (normal[0]*VecMatrix[2] + normal[1]*VecMatrix[6] + normal[2]*VecMatrix[10]) >> 9;
+
+ if (VecTestResult[0] & 0x1000) VecTestResult[0] |= 0xF000;
+ if (VecTestResult[1] & 0x1000) VecTestResult[1] |= 0xF000;
+ if (VecTestResult[2] & 0x1000) VecTestResult[2] |= 0xF000;
+}
+
+
void CmdFIFOWrite(CmdFIFOEntry& entry)
{
@@ -1096,6 +1187,17 @@ void CmdFIFOWrite(CmdFIFOEntry& entry)
CmdFIFO->Write(entry);
}
+
+ if (entry.Command == 0x11 || entry.Command == 0x12)
+ {
+ GXStat |= (1<<14); // push/pop matrix
+ NumPushPopCommands++;
+ }
+ else if (entry.Command == 0x70 || entry.Command == 0x71 || entry.Command == 0x72)
+ {
+ GXStat |= (1<<0); // box/pos/vec test
+ NumTestCommands++;
+ }
}
CmdFIFOEntry CmdFIFORead()
@@ -1132,7 +1234,6 @@ void ExecuteCommand()
CycleCount += CmdNumCycles[entry.Command];
ExecParamCount = 0;
- GXStat &= ~(1<<14);
if (CycleCount > 0)
GXStat |= (1<<27);
@@ -1143,6 +1244,7 @@ void ExecuteCommand()
break;
case 0x11: // push matrix
+ NumPushPopCommands--;
if (MatrixMode == 0)
{
if (ProjMatrixStackPointer > 0)
@@ -1154,7 +1256,6 @@ void ExecuteCommand()
memcpy(ProjMatrixStack, ProjMatrix, 16*4);
ProjMatrixStackPointer++;
- GXStat |= (1<<14);
}
else if (MatrixMode == 3)
{
@@ -1167,7 +1268,6 @@ void ExecuteCommand()
memcpy(TexMatrixStack, TexMatrix, 16*4);
TexMatrixStackPointer++;
- GXStat |= (1<<14);
}
else
{
@@ -1181,11 +1281,11 @@ void ExecuteCommand()
memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4);
memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4);
PosMatrixStackPointer++;
- GXStat |= (1<<14);
}
break;
case 0x12: // pop matrix
+ NumPushPopCommands--;
if (MatrixMode == 0)
{
if (ProjMatrixStackPointer <= 0)
@@ -1197,7 +1297,6 @@ void ExecuteCommand()
ProjMatrixStackPointer--;
memcpy(ProjMatrix, ProjMatrixStack, 16*4);
- GXStat |= (1<<14);
ClipMatrixDirty = true;
}
else if (MatrixMode == 3)
@@ -1211,7 +1310,6 @@ void ExecuteCommand()
TexMatrixStackPointer--;
memcpy(TexMatrix, TexMatrixStack, 16*4);
- GXStat |= (1<<14);
}
else
{
@@ -1228,7 +1326,6 @@ void ExecuteCommand()
memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4);
memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4);
- GXStat |= (1<<14);
ClipMatrixDirty = true;
}
break;
@@ -1587,10 +1684,29 @@ void ExecuteCommand()
break;
case 0x60: // viewport x1,y1,x2,y2
+ // note: viewport Y coordinates are upside-down
Viewport[0] = ExecParams[0] & 0xFF;
- Viewport[1] = (ExecParams[0] >> 8) & 0xFF;
+ Viewport[1] = 191 - (ExecParams[0] >> 24);
Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1;
- Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1;
+ Viewport[3] = (191 - ((ExecParams[0] >> 8) & 0xFF)) - Viewport[1] + 1;
+ break;
+
+ case 0x70: // box test
+ NumTestCommands -= 3;
+ BoxTest(ExecParams);
+ break;
+
+ case 0x71: // pos test
+ NumTestCommands -= 2;
+ CurVertex[0] = ExecParams[0] & 0xFFFF;
+ CurVertex[1] = ExecParams[0] >> 16;
+ CurVertex[2] = ExecParams[1] & 0xFFFF;
+ PosTest();
+ break;
+
+ case 0x72: // vec test
+ NumTestCommands--;
+ VecTest(ExecParams);
break;
default:
@@ -1613,13 +1729,21 @@ void Run(s32 cycles)
if (CycleCount <= 0)
{
while (CycleCount <= 0 && !CmdPIPE->IsEmpty())
+ {
+ if (NumPushPopCommands == 0) GXStat &= ~(1<<14);
+ if (NumTestCommands == 0) GXStat &= ~(1<<0);
+
ExecuteCommand();
+ }
}
if (CycleCount <= 0 && CmdPIPE->IsEmpty())
{
CycleCount = 0;
- GXStat &= ~((1<<27)|(1<<14));
+ GXStat &= ~(1<<27);
+
+ if (NumPushPopCommands == 0) GXStat &= ~(1<<14);
+ if (NumTestCommands == 0) GXStat &= ~(1<<0);
}
}
@@ -1652,6 +1776,10 @@ void VBlank()
RenderPolygonRAM = CurPolygonRAM;
RenderNumPolygons = NumPolygons;
+ // TODO: find out which other registers are latched for rendering
+ RenderClearAttr1 = ClearAttr1;
+ RenderClearAttr2 = ClearAttr2;
+
CurRAMBank = CurRAMBank?0:1;
CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0];
CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0];
@@ -1683,6 +1811,45 @@ u32* GetLine(int line)
}
+void WriteToGXFIFO(u32 val)
+{
+ if (NumCommands == 0)
+ {
+ NumCommands = 4;
+ CurCommand = val;
+ ParamCount = 0;
+ TotalParams = CmdNumParams[CurCommand & 0xFF];
+
+ if (TotalParams > 0) return;
+ }
+ else
+ ParamCount++;
+
+ for (;;)
+ {
+ if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
+ {
+ CmdFIFOEntry entry;
+ entry.Command = CurCommand & 0xFF;
+ entry.Param = val;
+ CmdFIFOWrite(entry);
+ }
+
+ if (ParamCount >= TotalParams)
+ {
+ CurCommand >>= 8;
+ NumCommands--;
+ if (NumCommands == 0) break;
+
+ ParamCount = 0;
+ TotalParams = CmdNumParams[CurCommand & 0xFF];
+ }
+ if (ParamCount < TotalParams)
+ break;
+ }
+}
+
+
u8 Read8(u32 addr)
{
printf("unknown GPU3D read8 %08X\n", addr);
@@ -1703,6 +1870,10 @@ u16 Read16(u32 addr)
return NumPolygons;
case 0x04000606:
return NumVertices;
+
+ case 0x04000630: return VecTestResult[0];
+ case 0x04000632: return VecTestResult[1];
+ case 0x04000634: return VecTestResult[2];
}
printf("unknown GPU3D read16 %08X\n", addr);
@@ -1734,6 +1905,11 @@ u32 Read32(u32 addr)
case 0x04000604:
return NumPolygons | (NumVertices << 16);
+ case 0x04000620: return PosTestResult[0];
+ case 0x04000624: return PosTestResult[1];
+ case 0x04000628: return PosTestResult[2];
+ case 0x0400062C: return PosTestResult[3];
+
case 0x04000680: return VecMatrix[0];
case 0x04000684: return VecMatrix[1];
case 0x04000688: return VecMatrix[2];
@@ -1760,7 +1936,8 @@ void Write8(u32 addr, u8 val)
switch (addr)
{
case 0x04000340:
- AlphaRef = val & 0x1F;
+ AlphaRefVal = val & 0x1F;
+ AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0;
return;
}
@@ -1778,11 +1955,15 @@ void Write16(u32 addr, u16 val)
switch (addr)
{
case 0x04000060:
- DispCnt = val;
+ DispCnt = (val & 0x4FFF) | (DispCnt & 0x3000);
+ if (val & (1<<12)) DispCnt &= ~(1<<12);
+ if (val & (1<<13)) DispCnt &= ~(1<<13);
+ AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0;
return;
case 0x04000340:
- AlphaRef = val & 0x1F;
+ AlphaRefVal = val & 0x1F;
+ AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0;
return;
case 0x04000350:
@@ -1837,11 +2018,15 @@ void Write32(u32 addr, u32 val)
switch (addr)
{
case 0x04000060:
- DispCnt = val & 0xFFFF;
+ DispCnt = (val & 0x4FFF) | (DispCnt & 0x3000);
+ if (val & (1<<12)) DispCnt &= ~(1<<12);
+ if (val & (1<<13)) DispCnt &= ~(1<<13);
+ AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0;
return;
case 0x04000340:
- AlphaRef = val & 0x1F;
+ AlphaRefVal = val & 0x1F;
+ AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0;
return;
case 0x04000350:
@@ -1875,41 +2060,7 @@ void Write32(u32 addr, u32 val)
if (addr >= 0x04000400 && addr < 0x04000440)
{
- if (NumCommands == 0)
- {
- NumCommands = 4;
- CurCommand = val;
- ParamCount = 0;
- TotalParams = CmdNumParams[CurCommand & 0xFF];
-
- if (TotalParams > 0) return;
- }
- else
- ParamCount++;
-
- for (;;)
- {
- if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0))
- {
- CmdFIFOEntry entry;
- entry.Command = CurCommand & 0xFF;
- entry.Param = val;
- CmdFIFOWrite(entry);
- }
-
- if (ParamCount >= TotalParams)
- {
- CurCommand >>= 8;
- NumCommands--;
- if (NumCommands == 0) break;
-
- ParamCount = 0;
- TotalParams = CmdNumParams[CurCommand & 0xFF];
- }
- if (ParamCount < TotalParams)
- break;
- }
-
+ WriteToGXFIFO(val);
return;
}
diff --git a/src/GPU3D.h b/src/GPU3D.h
index be121bf..fdb85f8 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -33,7 +33,7 @@ typedef struct
// final vertex attributes.
// allows them to be reused in polygon strips.
- s32 FinalPosition[4];
+ s32 FinalPosition[2];
s32 FinalColor[3];
} Vertex;
@@ -43,6 +43,11 @@ typedef struct
Vertex* Vertices[10];
u32 NumVertices;
+ s32 FinalZ[10];
+ s32 FinalW[10];
+ u8 WShift;
+ bool WBuffer;
+
u32 Attr;
u32 TexParam;
u32 TexPalette;
@@ -50,6 +55,10 @@ typedef struct
bool FacingView;
bool Translucent;
+ bool IsShadowMask;
+ bool IsShadow;
+ bool ClearStencil;
+
u32 VTop, VBottom; // vertex indices
s32 YTop, YBottom; // Y coords
s32 XTop, XBottom; // associated X coords
@@ -57,9 +66,11 @@ typedef struct
} Polygon;
extern u32 DispCnt;
-extern u32 AlphaRef;
+extern u8 AlphaRef;
extern s32 Viewport[4];
-extern u32 ClearAttr1, ClearAttr2;
+extern u32 RenderClearAttr1, RenderClearAttr2;
+
+extern u16 ToonTable[32];
bool Init();
void DeInit();
@@ -75,6 +86,8 @@ void VBlank();
void VCount215();
u32* GetLine(int line);
+void WriteToGXFIFO(u32 val);
+
u8 Read8(u32 addr);
u16 Read16(u32 addr);
u32 Read32(u32 addr);
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index a981bd5..55dd906 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -32,8 +32,15 @@ u32 DepthBuffer[256*192];
u32 AttrBuffer[256*192];
// attribute buffer:
-// bit0-5: polygon ID
-// bit8: fog enable
+// bit15: fog enable
+// bit24-29: polygon ID
+// bit30: translucent flag
+
+u8 StencilBuffer[256*192];
+
+// note: the stencil buffer isn't emulated properly.
+// emulating it properly would require rendering polygons per-scanline
+// the stencil buffer is normally limited to 2 scanlines
bool Init()
@@ -53,6 +60,243 @@ void Reset()
}
+// Notes on the interpolator:
+//
+// This is a theory on how the DS hardware interpolates values. It matches hardware output
+// in the tests I did, but the hardware may be doing it differently. You never know.
+//
+// Assuming you want to perspective-correctly interpolate a variable named A across two points
+// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly,
+// then divide A/W by 1/W to recover the correct A value.
+//
+// The DS GPU approximates interpolation by calculating a perspective-correct interpolation
+// between 0 and 1, then using the result as a factor to linearly interpolate the actual
+// vertex attributes. The factor has 9 bits of precision when interpolating along Y and
+// 8 bits along X.
+//
+// There's a special path for when the two W values are equal: it directly does linear
+// interpolation, avoiding precision loss from the aforementioned approximation.
+// Which is desirable when using the GPU to draw 2D graphics.
+
+class Interpolator
+{
+public:
+ Interpolator() {}
+ Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
+ {
+ Setup(x0, x1, w0, w1, shift);
+ }
+
+ void Setup(s32 x0, s32 x1, s32 w0, s32 w1, int shift)
+ {
+ this->x0 = x0;
+ this->x1 = x1;
+ this->xdiff = x1 - x0;
+ this->shift = shift;
+
+ this->w0factor = (s64)w0 * xdiff;
+ this->w1factor = (s64)w1 * xdiff;
+ this->wdiff = w1 - w0;
+ }
+
+ void SetX(s32 x)
+ {
+ x -= x0;
+ this->x = x;
+ if (xdiff != 0 && wdiff != 0)
+ {
+ s64 num = ((s64)x << (shift + 40)) / w1factor;
+ s64 denw0 = ((s64)(xdiff-x) << 40) / w0factor;
+ s64 denw1 = num >> shift;
+
+ s64 denom = denw0 + denw1;
+ if (denom == 0)
+ yfactor = 0;
+ else
+ {
+ yfactor = (s32)(num / denom);
+ }
+ }
+ }
+
+ s32 Interpolate(s32 y0, s32 y1)
+ {
+ if (xdiff == 0) return y0;
+
+ if (wdiff != 0)
+ return y0 + (((y1 - y0) * yfactor) >> shift);
+ else
+ return y0 + (((y1 - y0) * x) / xdiff);
+ }
+
+ s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer)
+ {
+ if (xdiff == 0) return z0;
+
+ if ((wdiff != 0) && wbuffer)
+ return z0 + (((s64)(z1 - z0) * yfactor) >> shift);
+ else
+ return z0 + (((s64)(z1 - z0) * x) / xdiff);
+ }
+
+private:
+ s32 x0, x1, xdiff, x;
+ s64 w0factor, w1factor;
+ s32 wdiff;
+ int shift;
+
+ s32 yfactor;
+};
+
+
+class Slope
+{
+public:
+ Slope() {}
+
+ s32 SetupDummy(s32 x0, int side)
+ {
+ if (side)
+ {
+ dx = -0x10000;
+ x0--;
+ }
+ else
+ {
+ dx = 0;
+ }
+
+ this->x0 = x0;
+ this->xmin = x0;
+ this->xmax = x0;
+
+ Increment = 0;
+ XMajor = false;
+
+ Interp.Setup(0, 0, 0, 0, 9);
+ Interp.SetX(0);
+
+ return x0;
+ }
+
+ s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, int side)
+ {
+ this->x0 = x0;
+ this->y = y0;
+
+ if (x1 > x0)
+ {
+ this->xmin = x0;
+ this->xmax = x1-1;
+ }
+ else if (x1 < x0)
+ {
+ this->xmin = x1;
+ this->xmax = x0-1;
+ }
+ else
+ {
+ this->xmin = x0;
+ if (side) this->xmin--;
+ this->xmax = this->xmin;
+ }
+
+ if (y0 == y1)
+ Increment = 0;
+ else
+ Increment = ((x1 - x0) << 16) / (y1 - y0);
+
+ if (Increment < 0)
+ {
+ Increment = -Increment;
+ Negative = true;
+ }
+ else
+ Negative = false;
+
+ XMajor = (Increment > 0x10000);
+
+ if (side)
+ {
+ // right
+
+ if (XMajor) dx = Negative ? (0x8000 + 0x10000) : (Increment - 0x8000);
+ else if (Increment != 0) dx = Negative ? 0x10000 : 0;
+ else dx = -0x10000;
+ }
+ else
+ {
+ // left
+
+ if (XMajor) dx = Negative ? ((Increment - 0x8000) + 0x10000) : 0x8000;
+ else if (Increment != 0) dx = Negative ? 0x10000 : 0;
+ else dx = 0;
+ }
+
+ if (XMajor)
+ {
+ if (side) Interp.Setup(x0-1, x1-1, w0, w1, 9); // checkme
+ else Interp.Setup(x0, x1, w0, w1, 9);
+ }
+ else Interp.Setup(y0, y1, w0, w1, 9);
+
+ s32 x = XVal();
+ if (XMajor) Interp.SetX(x);
+ else Interp.SetX(y);
+ return x;
+ }
+
+ s32 Step()
+ {
+ dx += Increment;
+ y++;
+
+ s32 x = XVal();
+ if (XMajor) Interp.SetX(x);
+ else Interp.SetX(y);
+ return x;
+ }
+
+ s32 XVal()
+ {
+ s32 ret;
+ if (Negative) ret = x0 - (dx >> 16);
+ else ret = x0 + (dx >> 16);
+
+ if (ret < xmin) ret = xmin;
+ else if (ret > xmax) ret = xmax;
+ return ret;
+ }
+
+ s32 EdgeLimit(int side)
+ {
+ s32 ret;
+ if (side)
+ {
+ if (Negative) ret = x0 - ((dx+Increment) >> 16);
+ else ret = x0 + ((dx-Increment) >> 16);
+ }
+ else
+ {
+ if (Negative) ret = x0 - ((dx-Increment) >> 16);
+ else ret = x0 + ((dx+Increment) >> 16);
+ }
+
+ return ret;
+ }
+
+ s32 Increment;
+ bool Negative;
+ bool XMajor;
+ Interpolator Interp;
+
+private:
+ s32 x0, xmin, xmax;
+ s32 dx;
+ s32 y;
+};
+
+
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
{
u32 vramaddr = (texparam & 0xFFFF) << 3;
@@ -65,6 +309,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
// texture wrapping
// TODO: optimize this somehow
+ // testing shows that it's hardly worth optimizing, actually
if (texparam & (1<<16))
{
@@ -278,11 +523,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
}
}
-bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z)
+template<bool func_equal>
+bool DepthTest(s32 oldz, s32 z)
{
- u32 oldz = DepthBuffer[(256*y) + x];
-
- if (polygon->Attr & (1<<14))
+ if (func_equal)
{
s32 diff = oldz - z;
if ((u32)(diff + 0x200) <= 0x400)
@@ -295,14 +539,23 @@ bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z)
return false;
}
-u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
+u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
{
- u32 attr = polygon->Attr;
u8 r, g, b, a;
+ u32 blendmode = (polygon->Attr >> 4) & 0x3;
u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
bool wireframe = (polyalpha == 0);
+ if (blendmode == 2)
+ {
+ u16 tooncolor = ToonTable[vr >> 1];
+
+ vr = (tooncolor << 1) & 0x3E; if (vr) vr++;
+ vg = (tooncolor >> 4) & 0x3E; if (vg) vg++;
+ vb = (tooncolor >> 9) & 0x3E; if (vb) vb++;
+ }
+
if ((DispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0))
{
u8 tr, tg, tb;
@@ -314,11 +567,39 @@ u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16
tg = (tcolor >> 4) & 0x3E; if (tg) tg++;
tb = (tcolor >> 9) & 0x3E; if (tb) tb++;
- // TODO: other blending modes
- r = ((tr+1) * (vr+1) - 1) >> 6;
- g = ((tg+1) * (vg+1) - 1) >> 6;
- b = ((tb+1) * (vb+1) - 1) >> 6;
- a = ((talpha+1) * (polyalpha+1) - 1) >> 5;
+ if (blendmode & 0x1)
+ {
+ // decal
+
+ if (talpha == 0)
+ {
+ r = vr;
+ g = vg;
+ b = vb;
+ }
+ else if (talpha == 31)
+ {
+ r = tr;
+ g = tg;
+ b = tb;
+ }
+ else
+ {
+ r = ((tr * talpha) + (vr * (31-talpha))) >> 5;
+ g = ((tg * talpha) + (vg * (31-talpha))) >> 5;
+ b = ((tb * talpha) + (vb * (31-talpha))) >> 5;
+ }
+ a = polyalpha;
+ }
+ else
+ {
+ // modulate
+
+ r = ((tr+1) * (vr+1) - 1) >> 6;
+ g = ((tg+1) * (vg+1) - 1) >> 6;
+ b = ((tb+1) * (vb+1) - 1) >> 6;
+ a = ((talpha+1) * (polyalpha+1) - 1) >> 5;
+ }
}
else
{
@@ -328,6 +609,18 @@ u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16
a = polyalpha;
}
+ if ((blendmode == 2) && (DispCnt & (1<<1)))
+ {
+ r += vr;
+ g += vg;
+ b += vb;
+
+ if (r > 63) r = 63;
+ if (g > 63) g = 63;
+ if (b > 63) b = 63;
+ }
+
+ // checkme: can wireframe polygons use texture alpha?
if (wireframe) a = 31;
return r | (g << 8) | (b << 16) | (a << 24);
@@ -349,11 +642,33 @@ void RenderPolygon(Polygon* polygon)
u32 polyalpha = (polygon->Attr >> 16) & 0x1F;
bool wireframe = (polyalpha == 0);
+ bool (*fnDepthTest)(s32 oldz, s32 z);
+ if (polygon->Attr & (1<<14))
+ fnDepthTest = DepthTest<true>;
+ else
+ fnDepthTest = DepthTest<false>;
+
+
int lcur = vtop, rcur = vtop;
int lnext, rnext;
- s32 dxl, dxr;
- s32 lslope, rslope;
+ if (polygon->FacingView)
+ {
+ lnext = lcur + 1;
+ if (lnext >= nverts) lnext = 0;
+ rnext = rcur - 1;
+ if (rnext < 0) rnext = nverts - 1;
+ }
+ else
+ {
+ lnext = lcur - 1;
+ if (lnext < 0) lnext = nverts - 1;
+ rnext = rcur + 1;
+ if (rnext >= nverts) rnext = 0;
+ }
+
+ Slope slopeL, slopeR;
+ s32 xL, xR;
bool l_xmajor, r_xmajor;
if (ybot == ytop)
@@ -376,52 +691,59 @@ void RenderPolygon(Polygon* polygon)
lcur = vtop; lnext = vtop;
rcur = vbot; rnext = vbot;
- lslope = 0; l_xmajor = false;
- rslope = 0; r_xmajor = false;
+ xL = slopeL.SetupDummy(polygon->Vertices[lcur]->FinalPosition[0], 0);
+ xR = slopeR.SetupDummy(polygon->Vertices[rcur]->FinalPosition[0], 1);
}
else
{
- //while (polygon->Vertices[lnext]->FinalPosition[1] )
- if (polygon->FacingView)
- {
- lnext = lcur + 1;
- if (lnext >= nverts) lnext = 0;
- rnext = rcur - 1;
- if (rnext < 0) rnext = nverts - 1;
- }
- else
+ while (ytop >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot)
{
- lnext = lcur - 1;
- if (lnext < 0) lnext = nverts - 1;
- rnext = rcur + 1;
- if (rnext >= nverts) rnext = 0;
+ lcur = lnext;
+
+ if (polygon->FacingView)
+ {
+ lnext = lcur + 1;
+ if (lnext >= nverts) lnext = 0;
+ }
+ else
+ {
+ lnext = lcur - 1;
+ if (lnext < 0) lnext = nverts - 1;
+ }
}
- if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1])
- lslope = 0;
- else
- lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) /
- (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]);
+ xL = slopeL.Setup(polygon->Vertices[lcur]->FinalPosition[0], polygon->Vertices[lnext]->FinalPosition[0],
+ polygon->Vertices[lcur]->FinalPosition[1], polygon->Vertices[lnext]->FinalPosition[1],
+ polygon->FinalW[lcur], polygon->FinalW[lnext], 0);
- if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1])
- rslope = 0;
- else
- rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) /
- (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]);
+ while (ytop >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot)
+ {
+ rcur = rnext;
- l_xmajor = (lslope < -0x1000) || (lslope > 0x1000);
- r_xmajor = (rslope < -0x1000) || (rslope > 0x1000);
+ if (polygon->FacingView)
+ {
+ rnext = rcur - 1;
+ if (rnext < 0) rnext = nverts - 1;
+ }
+ else
+ {
+ rnext = rcur + 1;
+ if (rnext >= nverts) rnext = 0;
+ }
+ }
+
+ xR = slopeR.Setup(polygon->Vertices[rcur]->FinalPosition[0], polygon->Vertices[rnext]->FinalPosition[0],
+ polygon->Vertices[rcur]->FinalPosition[1], polygon->Vertices[rnext]->FinalPosition[1],
+ polygon->FinalW[rcur], polygon->FinalW[rnext], 1);
}
- if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000;
- else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000;
- else dxl = 0;
+ if (ybot > 192) ybot = 192;
- if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000;
- else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000;
- else dxr = 0x1000;
+ if (polygon->ClearStencil)
+ {
+ memset(StencilBuffer, 0, 192*256);
+ }
- if (ybot > 192) ybot = 192;
for (s32 y = ytop; y < ybot; y++)
{
if (!isline)
@@ -444,17 +766,9 @@ void RenderPolygon(Polygon* polygon)
}
}
- if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1])
- lslope = 0;
- else
- lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) /
- (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]);
-
- l_xmajor = (lslope < -0x1000) || (lslope > 0x1000);
-
- if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000;
- else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000;
- else dxl = 0;
+ xL = slopeL.Setup(polygon->Vertices[lcur]->FinalPosition[0], polygon->Vertices[lnext]->FinalPosition[0],
+ polygon->Vertices[lcur]->FinalPosition[1], polygon->Vertices[lnext]->FinalPosition[1],
+ polygon->FinalW[lcur], polygon->FinalW[lnext], 0);
}
if (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot)
@@ -475,71 +789,25 @@ void RenderPolygon(Polygon* polygon)
}
}
- if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1])
- rslope = 0;
- else
- rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) /
- (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]);
-
- r_xmajor = (rslope < -0x1000) || (rslope > 0x1000);
-
- if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000;
- else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000;
- else dxr = 0x1000;
+ xR = slopeR.Setup(polygon->Vertices[rcur]->FinalPosition[0], polygon->Vertices[rnext]->FinalPosition[0],
+ polygon->Vertices[rcur]->FinalPosition[1], polygon->Vertices[rnext]->FinalPosition[1],
+ polygon->FinalW[rcur], polygon->FinalW[rnext], 1);
}
}
Vertex *vlcur, *vlnext, *vrcur, *vrnext;
s32 xstart, xend;
- s32 xstart_int, xend_int;
- s32 slope_start, slope_end;
+ Slope* slope_start;
+ Slope* slope_end;
- if (lslope == 0 && rslope == 0 &&
- polygon->Vertices[lcur]->FinalPosition[0] == polygon->Vertices[rcur]->FinalPosition[0])
- {
- xstart = polygon->Vertices[lcur]->FinalPosition[0];
- xend = xstart;
- }
- else
- {
- if (lslope > 0)
- {
- xstart = polygon->Vertices[lcur]->FinalPosition[0] + (dxl >> 12);
- if (xstart < polygon->Vertices[lcur]->FinalPosition[0])
- xstart = polygon->Vertices[lcur]->FinalPosition[0];
- else if (xstart > polygon->Vertices[lnext]->FinalPosition[0]-1)
- xstart = polygon->Vertices[lnext]->FinalPosition[0]-1;
- }
- else if (lslope < 0)
- {
- xstart = polygon->Vertices[lcur]->FinalPosition[0] - (dxl >> 12);
- if (xstart < polygon->Vertices[lnext]->FinalPosition[0])
- xstart = polygon->Vertices[lnext]->FinalPosition[0];
- else if (xstart > polygon->Vertices[lcur]->FinalPosition[0]-1)
- xstart = polygon->Vertices[lcur]->FinalPosition[0]-1;
- }
- else
- xstart = polygon->Vertices[lcur]->FinalPosition[0];
+ xstart = xL;
+ xend = xR;
- if (rslope > 0)
- {
- xend = polygon->Vertices[rcur]->FinalPosition[0] + (dxr >> 12);
- if (xend < polygon->Vertices[rcur]->FinalPosition[0])
- xend = polygon->Vertices[rcur]->FinalPosition[0];
- else if (xend > polygon->Vertices[rnext]->FinalPosition[0]-1)
- xend = polygon->Vertices[rnext]->FinalPosition[0]-1;
- }
- else if (rslope < 0)
- {
- xend = polygon->Vertices[rcur]->FinalPosition[0] - (dxr >> 12);
- if (xend < polygon->Vertices[rnext]->FinalPosition[0])
- xend = polygon->Vertices[rnext]->FinalPosition[0];
- else if (xend > polygon->Vertices[rcur]->FinalPosition[0]-1)
- xend = polygon->Vertices[rcur]->FinalPosition[0]-1;
- }
- else
- xend = polygon->Vertices[rcur]->FinalPosition[0] - 1;
- }
+ s32 wl = slopeL.Interp.Interpolate(polygon->FinalW[lcur], polygon->FinalW[lnext]);
+ s32 wr = slopeR.Interp.Interpolate(polygon->FinalW[rcur], polygon->FinalW[rnext]);
+
+ s32 zl = slopeL.Interp.InterpolateZ(polygon->FinalZ[lcur], polygon->FinalZ[lnext], polygon->WBuffer);
+ s32 zr = slopeR.Interp.InterpolateZ(polygon->FinalZ[rcur], polygon->FinalZ[rnext], polygon->WBuffer);
// if the left and right edges are swapped, render backwards.
// note: we 'forget' to swap the xmajor flags, on purpose
@@ -551,10 +819,13 @@ void RenderPolygon(Polygon* polygon)
vrcur = polygon->Vertices[lcur];
vrnext = polygon->Vertices[lnext];
- slope_start = rslope;
- slope_end = lslope;
+ slope_start = &slopeR;
+ slope_end = &slopeL;
- s32 tmp = xstart; xstart = xend; xend = tmp;
+ s32 tmp;
+ tmp = xstart; xstart = xend; xend = tmp;
+ tmp = wl; wl = wr; wr = tmp;
+ tmp = zl; zl = zr; zr = tmp;
}
else
{
@@ -563,222 +834,215 @@ void RenderPolygon(Polygon* polygon)
vrcur = polygon->Vertices[rcur];
vrnext = polygon->Vertices[rnext];
- slope_start = lslope;
- slope_end = rslope;
+ slope_start = &slopeL;
+ slope_end = &slopeR;
}
// interpolate attributes along Y
- s64 lfactor1, lfactor2;
- s64 rfactor1, rfactor2;
- if (l_xmajor)
- {
- lfactor1 = (vlnext->FinalPosition[0] - xstart) * vlnext->FinalPosition[3];
- lfactor2 = (xstart - vlcur->FinalPosition[0]) * vlcur->FinalPosition[3];
- }
- else
- {
- lfactor1 = (vlnext->FinalPosition[1] - y) * vlnext->FinalPosition[3];
- lfactor2 = (y - vlcur->FinalPosition[1]) * vlcur->FinalPosition[3];
- }
+ s32 rl = slope_start->Interp.Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]);
+ s32 gl = slope_start->Interp.Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]);
+ s32 bl = slope_start->Interp.Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]);
- s64 ldenom = lfactor1 + lfactor2;
- if (ldenom == 0)
- {
- lfactor1 = 0x1000;
- lfactor2 = 0;
- ldenom = 0x1000;
- }
+ s32 sl = slope_start->Interp.Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]);
+ s32 tl = slope_start->Interp.Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]);
- if (r_xmajor)
- {
- rfactor1 = (vrnext->FinalPosition[0] - xend+1) * vrnext->FinalPosition[3];
- rfactor2 = (xend+1 - vrcur->FinalPosition[0]) * vrcur->FinalPosition[3];
- }
- else
- {
- rfactor1 = (vrnext->FinalPosition[1] - y) * vrnext->FinalPosition[3];
- rfactor2 = (y - vrcur->FinalPosition[1]) * vrcur->FinalPosition[3];
- }
-
- s64 rdenom = rfactor1 + rfactor2;
- if (rdenom == 0)
- {
- rfactor1 = 0x1000;
- rfactor2 = 0;
- rdenom = 0x1000;
- }
-
- s32 zl = ((lfactor1 * vlcur->FinalPosition[2]) + (lfactor2 * vlnext->FinalPosition[2])) / ldenom;
- s32 zr = ((rfactor1 * vrcur->FinalPosition[2]) + (rfactor2 * vrnext->FinalPosition[2])) / rdenom;
-
- s32 wl = ((lfactor1 * vlcur->FinalPosition[3]) + (lfactor2 * vlnext->FinalPosition[3])) / ldenom;
- s32 wr = ((rfactor1 * vrcur->FinalPosition[3]) + (rfactor2 * vrnext->FinalPosition[3])) / rdenom;
-
- s32 rl = ((lfactor1 * vlcur->FinalColor[0]) + (lfactor2 * vlnext->FinalColor[0])) / ldenom;
- s32 gl = ((lfactor1 * vlcur->FinalColor[1]) + (lfactor2 * vlnext->FinalColor[1])) / ldenom;
- s32 bl = ((lfactor1 * vlcur->FinalColor[2]) + (lfactor2 * vlnext->FinalColor[2])) / ldenom;
-
- s32 sl = ((lfactor1 * vlcur->TexCoords[0]) + (lfactor2 * vlnext->TexCoords[0])) / ldenom;
- s32 tl = ((lfactor1 * vlcur->TexCoords[1]) + (lfactor2 * vlnext->TexCoords[1])) / ldenom;
+ s32 rr = slope_end->Interp.Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]);
+ s32 gr = slope_end->Interp.Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]);
+ s32 br = slope_end->Interp.Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]);
- s32 rr = ((rfactor1 * vrcur->FinalColor[0]) + (rfactor2 * vrnext->FinalColor[0])) / rdenom;
- s32 gr = ((rfactor1 * vrcur->FinalColor[1]) + (rfactor2 * vrnext->FinalColor[1])) / rdenom;
- s32 br = ((rfactor1 * vrcur->FinalColor[2]) + (rfactor2 * vrnext->FinalColor[2])) / rdenom;
-
- s32 sr = ((rfactor1 * vrcur->TexCoords[0]) + (rfactor2 * vrnext->TexCoords[0])) / rdenom;
- s32 tr = ((rfactor1 * vrcur->TexCoords[1]) + (rfactor2 * vrnext->TexCoords[1])) / rdenom;
+ s32 sr = slope_end->Interp.Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]);
+ s32 tr = slope_end->Interp.Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]);
// calculate edges
+ //
+ // edge fill rules for opaque pixels:
+ // * right edge is filled if slope > 1
+ // * left edge is filled if slope <= 1
+ // * edges with slope = 0 are always filled
+ // edges are always filled if the pixels are translucent
+ // in wireframe mode, there are special rules for equal Z (TODO)
+
s32 l_edgeend, r_edgestart;
+ bool l_filledge, r_filledge;
- if (l_xmajor)
+ if (slopeL.XMajor)
{
- if (slope_start > 0) l_edgeend = vlcur->FinalPosition[0] + ((dxl + slope_start) >> 12);
- else l_edgeend = vlcur->FinalPosition[0] - ((dxl - slope_start) >> 12);
-
+ l_edgeend = slope_start->EdgeLimit(0);
if (l_edgeend == xstart) l_edgeend++;
+
+ l_filledge = slope_start->Negative;
}
else
+ {
l_edgeend = xstart + 1;
- if (r_xmajor)
+ l_filledge = true;
+ }
+
+ if (slopeR.XMajor)
{
- if (slope_end > 0) r_edgestart = vrcur->FinalPosition[0] + ((dxr + slope_end) >> 12);
- else r_edgestart = vrcur->FinalPosition[0] - ((dxr - slope_end) >> 12);
+ r_edgestart = slope_end->EdgeLimit(1);
+ if (r_edgestart == xend) r_edgestart--;
- if (r_edgestart == xend_int) r_edgestart--;
+ r_filledge = !slope_end->Negative;
}
else
+ {
r_edgestart = xend - 1;
- // edge fill rules for opaque pixels:
- // * right edge is filled if slope > 1
- // * left edge is filled if slope <= 1
- // * edges with slope = 0 are always filled
- // edges are always filled if the pixels are translucent
- // in wireframe mode, there are special rules for equal Z (TODO)
+ r_filledge = slope_end->Increment==0;
+ }
+
+ int yedge = 0;
+ if (y == ytop) yedge = 0x4;
+ else if (y == ybot-1) yedge = 0x8;
+
+ Interpolator interpX(xstart, xend+1, wl, wr, 8);
for (s32 x = xstart; x <= xend; x++)
{
if (x < 0) continue;
if (x > 255) break;
- int edge = 0;
- if (y == ytop) edge |= 0x4;
- else if (y == ybot-1) edge |= 0x8;
+ int edge = yedge;
if (x < l_edgeend) edge |= 0x1;
else if (x > r_edgestart) edge |= 0x2;
// wireframe polygons. really ugly, but works
- if (wireframe && edge==0) continue;
+ if (wireframe && edge==0)
+ {
+ x = r_edgestart + 1;
+ continue;
+ }
- s64 factor1 = (xend+1 - x) * wr;
- s64 factor2 = (x - xstart) * wl;
- s64 denom = factor1 + factor2;
- if (denom == 0)
+ u32 pixeladdr = (y*256) + x;
+ u32 attr = polygon->Attr & 0x3F008000;
+
+ // check stencil buffer for shadows
+ if (polygon->IsShadow)
{
- factor1 = 0x1000;
- factor2 = 0;
- denom = 0x1000;
+ if (StencilBuffer[pixeladdr] == 0)
+ continue;
}
- s32 z = ((factor1 * zl) + (factor2 * zr)) / denom;
- if (!DepthTest(polygon, x, y, z)) continue;
+ interpX.SetX(x);
- u32 vr = ((factor1 * rl) + (factor2 * rr)) / denom;
- u32 vg = ((factor1 * gl) + (factor2 * gr)) / denom;
- u32 vb = ((factor1 * bl) + (factor2 * br)) / denom;
+ s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer);
- s16 s = ((factor1 * sl) + (factor2 * sr)) / denom;
- s16 t = ((factor1 * tl) + (factor2 * tr)) / denom;
+ if (polygon->IsShadowMask)
+ {
+ // for shadow masks: set stencil bits where the depth test fails.
+ // draw nothing.
- u32 color = RenderPixel(polygon, x, y, z, vr>>3, vg>>3, vb>>3, s, t);
- u32 attr = 0;
- u32 pixeladdr = (y*256) + x;
+ // checkme
+ if (polyalpha == 31)
+ {
+ if (!wireframe)
+ {
+ if ((edge & 0x1) && !l_filledge)
+ continue;
+ if ((edge & 0x2) && !r_filledge)
+ continue;
+ }
+ }
- u8 alpha = color >> 24;
+ if (!fnDepthTest(DepthBuffer[pixeladdr], z))
+ StencilBuffer[pixeladdr] = 1;
- // alpha test
- if (DispCnt & (1<<2))
- {
- if (alpha <= AlphaRef) continue;
- }
- else
- {
- if (alpha == 0) continue;
+ continue;
}
- // alpha blending disable
- // TODO: check alpha test when blending is disabled
- if (!(DispCnt & (1<<3)))
- alpha = 31;
+ if (!fnDepthTest(DepthBuffer[pixeladdr], z))
+ continue;
+
+ u32 vr = interpX.Interpolate(rl, rr);
+ u32 vg = interpX.Interpolate(gl, gr);
+ u32 vb = interpX.Interpolate(bl, br);
- u32 dstcolor = ColorBuffer[pixeladdr];
- u32 dstalpha = dstcolor >> 24;
+ s16 s = interpX.Interpolate(sl, sr);
+ s16 t = interpX.Interpolate(tl, tr);
+
+ u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
+ u8 alpha = color >> 24;
+
+ // alpha test
+ // TODO: check alpha test when blending is disabled
+ if (alpha <= AlphaRef) continue;
if (alpha == 31)
{
// edge fill rules for opaque pixels
// TODO, eventually: antialiasing
- if (!wireframe)// && !(edge & 0x4))
+ if (!wireframe)
{
- if ((edge & 0x1) && slope_start > 0x1000)
+ if ((edge & 0x1) && !l_filledge)
continue;
- if ((edge & 0x2) && (slope_end != 0 && slope_end <= 0x1000))
+ if ((edge & 0x2) && !r_filledge)
continue;
}
DepthBuffer[pixeladdr] = z;
}
- else if (dstalpha == 0)
- {
- // TODO: conditional Z-buffer update
- DepthBuffer[pixeladdr] = z;
- }
else
{
- u32 srcR = color & 0x3F;
- u32 srcG = (color >> 8) & 0x3F;
- u32 srcB = (color >> 16) & 0x3F;
+ u32 dstattr = AttrBuffer[pixeladdr];
+ attr |= (1<<30);
+ if (polygon->IsShadow) dstattr |= (1<<30);
+
+ // skip if polygon IDs are equal
+ // note: this only happens if the destination pixel was translucent
+ // or always when drawing a shadow
+ // (the GPU keeps track of which pixels are translucent, regardless of
+ // the destination alpha)
+ if ((dstattr & 0x7F000000) == (attr & 0x7F000000))
+ continue;
+
+ u32 dstcolor = ColorBuffer[pixeladdr];
+ u32 dstalpha = dstcolor >> 24;
+
+ if ((dstalpha > 0) && (DispCnt & (1<<3)))
+ {
+ u32 srcR = color & 0x3F;
+ u32 srcG = (color >> 8) & 0x3F;
+ u32 srcB = (color >> 16) & 0x3F;
- u32 dstR = dstcolor & 0x3F;
- u32 dstG = (dstcolor >> 8) & 0x3F;
- u32 dstB = (dstcolor >> 16) & 0x3F;
+ u32 dstR = dstcolor & 0x3F;
+ u32 dstG = (dstcolor >> 8) & 0x3F;
+ u32 dstB = (dstcolor >> 16) & 0x3F;
- alpha++;
- dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5;
- dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5;
- dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5;
+ alpha++;
+ dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5;
+ dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5;
+ dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5;
- alpha--;
- if (alpha > dstalpha) dstalpha = alpha;
+ alpha--;
+ if (alpha > dstalpha) dstalpha = alpha;
- color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24);
+ color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24);
+ }
- // TODO: conditional Z-buffer update
- DepthBuffer[pixeladdr] = z;
+ if (polygon->Attr & (1<<11))
+ DepthBuffer[pixeladdr] = z;
}
ColorBuffer[pixeladdr] = color;
AttrBuffer[pixeladdr] = attr;
}
- if (lslope > 0) dxl += lslope;
- else dxl -= lslope;
- if (rslope > 0) dxr += rslope;
- else dxr -= rslope;
+ xL = slopeL.Step();
+ xR = slopeR.Step();
}
}
void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys)
{
- u32 polyid = (ClearAttr1 >> 24) & 0x3F;
+ u32 polyid = RenderClearAttr1 & 0x3F000000;
if (DispCnt & (1<<14))
{
- u8 xoff = (ClearAttr2 >> 16) & 0xFF;
- u8 yoff = (ClearAttr2 >> 24) & 0xFF;
+ u8 xoff = (RenderClearAttr2 >> 16) & 0xFF;
+ u8 yoff = (RenderClearAttr2 >> 24) & 0xFF;
for (int y = 0; y < 256*192; y += 256)
{
@@ -795,11 +1059,10 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys)
u32 color = r | (g << 8) | (b << 16) | a;
u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF;
- if (z >= 0x10000 && z < 0xFFFFFF) z++;
ColorBuffer[y+x] = color;
DepthBuffer[y+x] = z;
- AttrBuffer[y+x] = polyid | ((val3 & 0x8000) >> 7);
+ AttrBuffer[y+x] = polyid | (val3 & 0x8000);
xoff++;
}
@@ -810,16 +1073,15 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys)
else
{
// TODO: confirm color conversion
- u32 r = (ClearAttr1 << 1) & 0x3E; if (r) r++;
- u32 g = (ClearAttr1 >> 4) & 0x3E; if (g) g++;
- u32 b = (ClearAttr1 >> 9) & 0x3E; if (b) b++;
- u32 a = (ClearAttr1 >> 16) & 0x1F;
+ u32 r = (RenderClearAttr1 << 1) & 0x3E; if (r) r++;
+ u32 g = (RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
+ u32 b = (RenderClearAttr1 >> 9) & 0x3E; if (b) b++;
+ u32 a = (RenderClearAttr1 >> 16) & 0x1F;
u32 color = r | (g << 8) | (b << 16) | (a << 24);
- u32 z = ((ClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
- if (z >= 0x10000 && z < 0xFFFFFF) z++;
+ u32 z = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
- polyid |= ((ClearAttr1 & 0x8000) >> 7);
+ polyid |= (RenderClearAttr1 & 0x8000);
for (int i = 0; i < 256*192; i++)
{
diff --git a/src/NDS.cpp b/src/NDS.cpp
index d8f346e..18a6f46 100644
--- a/src/NDS.cpp
+++ b/src/NDS.cpp
@@ -25,6 +25,7 @@
#include "DMA.h"
#include "FIFO.h"
#include "GPU.h"
+#include "SPU.h"
#include "SPI.h"
#include "RTC.h"
#include "Wifi.h"
@@ -37,11 +38,6 @@ namespace NDS
// * stick all the variables in a big structure?
// would make it easier to deal with savestates
-/*SchedEvent SchedBuffer[SCHED_BUF_LEN];
-SchedEvent* SchedQueue;
-
-bool NeedReschedule;*/
-
ARM* ARM9;
ARM* ARM7;
@@ -87,6 +83,7 @@ u16 PowerControl7;
u16 ARM7BIOSProt;
Timer Timers[8];
+u8 TimerCheckMask[2];
DMA* DMAs[8];
u32 DMA9Fill[4];
@@ -108,8 +105,6 @@ u32 SqrtRes;
u32 KeyInput;
-u16 _soundbias; // temp
-
bool Running;
@@ -132,6 +127,7 @@ bool Init()
if (!NDSCart::Init()) return false;
if (!GPU::Init()) return false;
+ if (!SPU::Init()) return false;
if (!SPI::Init()) return false;
if (!RTC::Init()) return false;
@@ -151,6 +147,7 @@ void DeInit()
NDSCart::DeInit();
GPU::DeInit();
+ SPU::DeInit();
SPI::DeInit();
RTC::DeInit();
}
@@ -221,9 +218,14 @@ void SetupDirectBoot()
ARM9->JumpTo(bootparams[1]);
ARM7->JumpTo(bootparams[5]);
+ PostFlag9 = 0x01;
+ PostFlag7 = 0x01;
+
PowerControl9 = 0x820F;
GPU::DisplaySwap(PowerControl9);
+ SPU::SetBias(0x200);
+
ARM7BIOSProt = 0x1204;
SPI_Firmware::SetupDirectBoot();
@@ -296,18 +298,12 @@ void Reset()
CPUStop = 0;
memset(Timers, 0, 8*sizeof(Timer));
+ TimerCheckMask[0] = 0;
+ TimerCheckMask[1] = 0;
for (i = 0; i < 8; i++) DMAs[i]->Reset();
memset(DMA9Fill, 0, 4*4);
- NDSCart::Reset();
- GPU::Reset();
- SPI::Reset();
- RTC::Reset();
- Wifi::Reset();
-
- // memset(SchedBuffer, 0, sizeof(SchedEvent)*SCHED_BUF_LEN);
- // SchedQueue = NULL;
memset(SchedList, 0, sizeof(SchedList));
SchedListMask = 0;
@@ -319,7 +315,12 @@ void Reset()
KeyInput = 0x007F03FF;
- _soundbias = 0;
+ NDSCart::Reset();
+ GPU::Reset();
+ SPU::Reset();
+ SPI::Reset();
+ RTC::Reset();
+ Wifi::Reset();
}
void LoadROM(const char* path, bool direct)
@@ -563,8 +564,16 @@ bool HaltInterrupted(u32 cpu)
void StopCPU(u32 cpu, u32 mask)
{
- if (cpu) mask <<= 16;
- CPUStop |= mask;
+ if (cpu)
+ {
+ CPUStop |= (mask << 16);
+ ARM7->Halt(2);
+ }
+ else
+ {
+ CPUStop |= mask;
+ ARM9->Halt(2);
+ }
}
void ResumeCPU(u32 cpu, u32 mask)
@@ -611,8 +620,8 @@ void HandleTimerOverflow(u32 tid)
void RunTimer(u32 tid, s32 cycles)
{
Timer* timer = &Timers[tid];
- if ((timer->Cnt & 0x84) != 0x80)
- return;
+ //if ((timer->Cnt & 0x84) != 0x80)
+ // return;
u32 oldcount = timer->Counter;
timer->Counter += (cycles << timer->CycleShift);
@@ -622,10 +631,12 @@ void RunTimer(u32 tid, s32 cycles)
void RunTimingCriticalDevices(u32 cpu, s32 cycles)
{
- RunTimer((cpu<<2)+0, cycles);
- RunTimer((cpu<<2)+1, cycles);
- RunTimer((cpu<<2)+2, cycles);
- RunTimer((cpu<<2)+3, cycles);
+ register u32 timermask = TimerCheckMask[cpu];
+
+ if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles);
+ if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles);
+ if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles);
+ if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles);
if (cpu == 0)
{
@@ -678,6 +689,11 @@ void TimerStart(u32 id, u16 cnt)
{
timer->Counter = timer->Reload << 16;
}
+
+ if ((cnt & 0x84) == 0x80)
+ TimerCheckMask[id>>2] |= (1<<(id&0x3));
+ else
+ TimerCheckMask[id>>2] &= ~(1<<(id&0x3));
}
@@ -808,8 +824,19 @@ void debug(u32 param)
printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]);
printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]);
- for (int i = 0; i < 9; i++)
- printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);
+ printf("ARM9 IME=%08X IE=%08X IF=%08X\n", IME[0], IE[0], IF[0]);
+ printf("ARM7 IME=%08X IE=%08X IF=%08X\n", IME[1], IE[1], IF[1]);
+
+ //for (int i = 0; i < 9; i++)
+ // printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]);
+
+ /*FILE* shit = fopen("debug/poke7.bin", "wb");
+ for (u32 i = 0x02000000; i < 0x03810000; i+=4)
+ {
+ u32 val = ARM7Read32(i);
+ fwrite(&val, 4, 1, shit);
+ }
+ fclose(shit);*/
}
@@ -904,7 +931,7 @@ u16 ARM9Read16(u32 addr)
return 0xFFFF;
}
- //printf("unknown arm9 read16 %08X %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[1], ARM9->R[2]);
+ //printf("unknown arm9 read16 %08X %08X\n", addr, ARM9->R[15]);
return 0;
}
@@ -1202,7 +1229,7 @@ void ARM7Write8(u32 addr, u8 val)
return;
}
- printf("unknown arm7 write8 %08X %02X | %08X | %08X %08X %08X %08X\n", addr, val, ARM7->R[15], IME[1], IE[1], ARM7->R[0], ARM7->R[1]);
+ printf("unknown arm7 write8 %08X %02X @ %08X\n", addr, val, ARM7->R[15]);
}
void ARM7Write16(u32 addr, u16 val)
@@ -1237,7 +1264,7 @@ void ARM7Write16(u32 addr, u16 val)
return;
}
- printf("unknown arm7 write16 %08X %04X | %08X\n", addr, val, ARM7->R[15]);
+ printf("unknown arm7 write16 %08X %04X @ %08X\n", addr, val, ARM7->R[15]);
}
void ARM7Write32(u32 addr, u32 val)
@@ -1268,7 +1295,7 @@ void ARM7Write32(u32 addr, u32 val)
return;
}
- printf("unknown arm7 write32 %08X %08X | %08X %08X\n", addr, val, ARM7->R[15], ARM7->CurInstr);
+ printf("unknown arm7 write32 %08X %08X @ %08X\n", addr, val, ARM7->R[15]);
}
@@ -1278,6 +1305,9 @@ u8 ARM9IORead8(u32 addr)
{
switch (addr)
{
+ case 0x04000130: return KeyInput & 0xFF;
+ case 0x04000131: return (KeyInput >> 8) & 0xFF;
+
case 0x040001A2: return NDSCart::ReadSPIData();
case 0x040001A8: return NDSCart::ROMCommand[0];
@@ -1402,11 +1432,11 @@ u16 ARM9IORead16(u32 addr)
case 0x04000304: return PowerControl9;
}
- if (addr >= 0x04000000 && addr < 0x04000060)
+ if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C))
{
return GPU::GPU2D_A->Read16(addr);
}
- if (addr >= 0x04001000 && addr < 0x04001060)
+ if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C))
{
return GPU::GPU2D_B->Read16(addr);
}
@@ -1471,6 +1501,7 @@ u32 ARM9IORead32(u32 addr)
case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24);
case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8);
+ case 0x04000280: return DivCnt;
case 0x04000290: return DivNumerator[0];
case 0x04000294: return DivNumerator[1];
case 0x04000298: return DivDenominator[0];
@@ -1480,6 +1511,7 @@ u32 ARM9IORead32(u32 addr)
case 0x040002A8: return DivRemainder[0];
case 0x040002AC: return DivRemainder[1];
+ case 0x040002B0: return SqrtCnt;
case 0x040002B4: return SqrtRes;
case 0x040002B8: return SqrtVal[0];
case 0x040002BC: return SqrtVal[1];
@@ -1510,11 +1542,11 @@ u32 ARM9IORead32(u32 addr)
return 0;
}
- if (addr >= 0x04000000 && addr < 0x04000060)
+ if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C))
{
return GPU::GPU2D_A->Read32(addr);
}
- if (addr >= 0x04001000 && addr < 0x04001060)
+ if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C))
{
return GPU::GPU2D_B->Read32(addr);
}
@@ -1611,6 +1643,15 @@ void ARM9IOWrite16(u32 addr, u16 val)
case 0x040000DC: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0xFFFF0000) | val); return;
case 0x040000DE: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x040000E0: DMA9Fill[0] = (DMA9Fill[0] & 0xFFFF0000) | val; return;
+ case 0x040000E2: DMA9Fill[0] = (DMA9Fill[0] & 0x0000FFFF) | (val << 16); return;
+ case 0x040000E4: DMA9Fill[1] = (DMA9Fill[1] & 0xFFFF0000) | val; return;
+ case 0x040000E6: DMA9Fill[1] = (DMA9Fill[1] & 0x0000FFFF) | (val << 16); return;
+ case 0x040000E8: DMA9Fill[2] = (DMA9Fill[2] & 0xFFFF0000) | val; return;
+ case 0x040000EA: DMA9Fill[2] = (DMA9Fill[2] & 0x0000FFFF) | (val << 16); return;
+ case 0x040000EC: DMA9Fill[3] = (DMA9Fill[3] & 0xFFFF0000) | val; return;
+ case 0x040000EE: DMA9Fill[3] = (DMA9Fill[3] & 0x0000FFFF) | (val << 16); return;
+
case 0x04000100: Timers[0].Reload = val; return;
case 0x04000102: TimerStart(0, val); return;
case 0x04000104: Timers[1].Reload = val; return;
@@ -1629,7 +1670,6 @@ void ARM9IOWrite16(u32 addr, u16 val)
{
SetIRQ(1, IRQ_IPCSync);
}
- //CompensateARM7();
return;
case 0x04000184:
@@ -1838,6 +1878,10 @@ void ARM9IOWrite32(u32 addr, u32 val)
GPU::MapVRAM_I(8, (val >> 8) & 0xFF);
return;
+ case 0x04000280: DivCnt = val; StartDiv(); return;
+
+ case 0x040002B0: SqrtCnt = val; StartSqrt(); return;
+
case 0x04000290: DivNumerator[0] = val; StartDiv(); return;
case 0x04000294: DivNumerator[1] = val; StartDiv(); return;
case 0x04000298: DivDenominator[0] = val; StartDiv(); return;
@@ -1876,6 +1920,11 @@ u8 ARM7IORead8(u32 addr)
{
switch (addr)
{
+ case 0x04000130: return KeyInput & 0xFF;
+ case 0x04000131: return (KeyInput >> 8) & 0xFF;
+ case 0x04000136: return (KeyInput >> 16) & 0xFF;
+ case 0x04000137: return KeyInput >> 24;
+
case 0x04000138: return RTC::Read() & 0xFF;
case 0x040001A2: return NDSCart::ReadSPIData();
@@ -1901,8 +1950,7 @@ u8 ARM7IORead8(u32 addr)
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
- return 0;
+ return SPU::Read8(addr);
}
printf("unknown ARM7 IO read8 %08X\n", addr);
@@ -1972,14 +2020,11 @@ u16 ARM7IORead16(u32 addr)
case 0x04000300: return PostFlag7;
case 0x04000304: return PowerControl7;
case 0x04000308: return ARM7BIOSProt;
-
- case 0x04000504: return _soundbias;
}
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
- return 0;
+ return SPU::Read16(addr);
}
printf("unknown ARM7 IO read16 %08X %08X\n", addr, ARM9->R[15]);
@@ -2057,8 +2102,7 @@ u32 ARM7IORead32(u32 addr)
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
- return 0;
+ return SPU::Read32(addr);
}
printf("unknown ARM7 IO read32 %08X\n", addr);
@@ -2116,7 +2160,7 @@ void ARM7IOWrite8(u32 addr, u8 val)
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
+ SPU::Write8(addr, val);
return;
}
@@ -2147,7 +2191,7 @@ void ARM7IOWrite16(u32 addr, u16 val)
case 0x0400010C: Timers[7].Reload = val; return;
case 0x0400010E: TimerStart(7, val); return;
- case 0x04000134: return;printf("set debug port %04X %08X\n", val, ARM7Read32(ARM7->R[13]+4)); return;
+ case 0x04000134: /* TODO? */ return;
case 0x04000138: RTC::Write(val, false); return;
@@ -2228,15 +2272,11 @@ void ARM7IOWrite16(u32 addr, u16 val)
if (ARM7BIOSProt == 0)
ARM7BIOSProt = val;
return;
-
- case 0x04000504: // removeme
- _soundbias = val & 0x3FF;
- return;
}
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
+ SPU::Write16(addr, val);
return;
}
@@ -2326,7 +2366,7 @@ void ARM7IOWrite32(u32 addr, u32 val)
if (addr >= 0x04000400 && addr < 0x04000520)
{
- // sound I/O
+ SPU::Write32(addr, val);
return;
}
diff --git a/src/NDS.h b/src/NDS.h
index 4fec117..480341f 100644
--- a/src/NDS.h
+++ b/src/NDS.h
@@ -24,30 +24,12 @@
namespace NDS
{
-/*#define SCHED_BUF_LEN 64
-
-typedef struct _SchedEvent
-{
- u32 Delay;
- void (*Func)(u32);
- u32 Param;
- struct _SchedEvent* PrevEvent;
- struct _SchedEvent* NextEvent;
-
-} SchedEvent;*/
-
enum
{
Event_LCD = 0,
+ Event_SPU,
- /*Event_Timer9_0,
- Event_Timer9_1,
- Event_Timer9_2,
- Event_Timer9_3,
- Event_Timer7_0,
- Event_Timer7_1,
- Event_Timer7_2,
- Event_Timer7_3,*/
+ Event_ROMTransfer,
Event_MAX
};
@@ -95,7 +77,6 @@ typedef struct
u16 Cnt;
u32 Counter;
u32 CycleShift;
- //SchedEvent* Event;
} Timer;
@@ -112,6 +93,8 @@ extern u8 ROMSeed1[2*8];
extern u8 ARM9BIOS[0x1000];
extern u8 ARM7BIOS[0x4000];
+extern u8 MainRAM[0x400000];
+
bool Init();
void DeInit();
void Reset();
@@ -127,15 +110,9 @@ void ReleaseKey(u32 key);
void TouchScreen(u16 x, u16 y);
void ReleaseScreen();
-/*SchedEvent* ScheduleEvent(s32 Delay, void (*Func)(u32), u32 Param);
-void CancelEvent(SchedEvent* event);
-void RunEvents(s32 cycles);*/
void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param);
void CancelEvent(u32 id);
-// DO NOT CALL FROM ARM7!!
-void CompensateARM7();
-
void debug(u32 p);
void Halt();
diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp
index f291d7b..696666b 100644
--- a/src/NDSCart.cpp
+++ b/src/NDSCart.cpp
@@ -246,7 +246,7 @@ void Write_Discover(u8 val, bool islast)
{
Discover_MemoryType = 5;
}
- else if (len > 2+128) // Flash
+ else if ((len > 2+128) || (len > 1+16 && CurCmd == 0xA)) // Flash
{
Discover_MemoryType = 4;
}
@@ -284,7 +284,7 @@ void Write_EEPROMTiny(u8 val, bool islast)
}
else
{
- SRAM[(Addr & 0xFF) | ((CurCmd==0x0A)?0x100:0)] = val;
+ SRAM[(Addr + ((CurCmd==0x0A)?0x100:0)) & 0x1FF] = val;
Addr++;
}
break;
@@ -298,7 +298,7 @@ void Write_EEPROMTiny(u8 val, bool islast)
}
else
{
- Data = SRAM[(Addr & 0xFF) | ((CurCmd==0x0B)?0x100:0)];
+ Data = SRAM[(Addr + ((CurCmd==0x0B)?0x100:0)) & 0x1FF];
Addr++;
}
break;
@@ -471,11 +471,20 @@ void Write(u8 val, u32 hold)
switch (CurCmd)
{
+ case 0x00:
+ // Pokémon carts have an IR transceiver thing, and send this
+ // to bypass it and access SRAM.
+ // TODO: design better
+ CurCmd = val;
+ break;
+
case 0x02:
case 0x03:
case 0x0A:
case 0x0B:
case 0x9F:
+ case 0xD8:
+ case 0xDB:
WriteFunc(val, islast);
DataPos++;
break;
@@ -496,7 +505,7 @@ void Write(u8 val, u32 hold)
default:
if (DataPos==0)
- printf("unknown save SPI command %02X\n", CurCmd);
+ printf("unknown save SPI command %02X %08X\n", CurCmd);
break;
}
@@ -801,9 +810,8 @@ void ReadROM_B7(u32 addr, u32 len, u32 offset)
}
-void EndTransfer()
+void ROMEndTransfer(u32 param)
{
- ROMCnt &= ~(1<<23);
ROMCnt &= ~(1<<31);
if (SPICnt & (1<<14))
@@ -820,16 +828,16 @@ void ROMPrepareData(u32 param)
DataOutPos += 4;
ROMCnt |= (1<<23);
- NDS::CheckDMAs(0, 0x06);
- NDS::CheckDMAs(1, 0x12);
- //if (DataOutPos < DataOutLen)
- // NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0);
+ if (NDS::ExMemCnt[0] & (1<<11))
+ NDS::CheckDMAs(1, 0x12);
+ else
+ NDS::CheckDMAs(0, 0x05);
}
void WriteROMCnt(u32 val)
{
- ROMCnt = val & 0xFF7F7FFF;
+ ROMCnt = (val & 0xFF7F7FFF) | (ROMCnt & 0x00800000);
if (!(SPICnt & (1<<15))) return;
@@ -958,52 +966,43 @@ void WriteROMCnt(u32 val)
break;
}
- //ROMCnt &= ~(1<<23);
- ROMCnt |= (1<<23);
+ ROMCnt &= ~(1<<23);
+
+ // ROM transfer timings
+ // the bus is parallel with 8 bits
+ // thus a command would take 8 cycles to be transferred
+ // and it would take 4 cycles to receive a word of data
+ // TODO: advance read position if bit28 is set
+
+ u32 xfercycle = (ROMCnt & (1<<27)) ? 8 : 5;
+ u32 cmddelay = 8 + (ROMCnt & 0x1FFF);
+ if (datasize) cmddelay += ((ROMCnt >> 16) & 0x3F);
if (datasize == 0)
- EndTransfer();
+ NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*cmddelay, ROMEndTransfer, 0);
else
- {
- NDS::CheckDMAs(0, 0x05);
- NDS::CheckDMAs(1, 0x12);
- }
- //NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0);
+ NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*(cmddelay+4), ROMPrepareData, 0);
}
u32 ReadROMData()
{
- /*if (ROMCnt & (1<<23))
+ if (ROMCnt & (1<<23))
{
ROMCnt &= ~(1<<23);
- if (DataOutPos >= DataOutLen)
- EndTransfer();
- }
-
- return ROMDataOut;*/
- u32 ret;
- if (DataOutPos >= DataOutLen)
- ret = 0;
- else
- ret = *(u32*)&DataOut[DataOutPos];
-
- DataOutPos += 4;
- if (DataOutPos == DataOutLen)
- EndTransfer();
-
- return ret;
-}
+ if (DataOutPos < DataOutLen)
+ {
+ u32 xfercycle = (ROMCnt & (1<<27)) ? 8 : 5;
+ u32 delay = 4;
+ if (!(DataOutPos & 0x1FF)) delay += ((ROMCnt >> 16) & 0x3F);
-void DMA(u32 addr)
-{
- void (*writefn)(u32,u32) = (NDS::ExMemCnt[0] & (1<<11)) ? NDS::ARM7Write32 : NDS::ARM9Write32;
- for (u32 i = 0; i < DataOutLen; i+=4)
- {
- writefn(addr+i, *(u32*)&DataOut[i]);
+ NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*delay, ROMPrepareData, 0);
+ }
+ else
+ ROMEndTransfer(0);
}
- EndTransfer();
+ return ROMDataOut;
}
diff --git a/src/NDSCart.h b/src/NDSCart.h
index 5125ffa..5bec38d 100644
--- a/src/NDSCart.h
+++ b/src/NDSCart.h
@@ -44,7 +44,6 @@ bool LoadROM(const char* path, bool direct);
void WriteROMCnt(u32 val);
u32 ReadROMData();
-void DMA(u32 addr);
void WriteSPICnt(u16 val);
u8 ReadSPIData();
diff --git a/src/RTC.cpp b/src/RTC.cpp
index 842fdae..3d45bef 100644
--- a/src/RTC.cpp
+++ b/src/RTC.cpp
@@ -18,6 +18,7 @@
#include <stdio.h>
#include <string.h>
+#include <time.h>
#include "RTC.h"
@@ -73,6 +74,12 @@ void Reset()
}
+u8 BCD(u8 val)
+{
+ return (val % 10) | ((val / 10) << 4);
+}
+
+
void ByteIn(u8 val)
{
//printf("RTC IN: %02X\n", val);
@@ -94,21 +101,33 @@ void ByteIn(u8 val)
case 0x40: Output[0] = StatusReg2; break;
case 0x20:
- // TODO: get actual system time
- Output[0] = 0x17;
- Output[1] = 0x01;
- Output[2] = 0x19;
- Output[3] = 0x04; // day of week. checkme. apparently 04=Thursday
- Output[4] = 0x06;
- Output[5] = 0x30;
- Output[6] = 0x30;
+ {
+ time_t timestamp;
+ struct tm* timedata;
+ time(&timestamp);
+ timedata = localtime(&timestamp);
+
+ Output[0] = BCD(timedata->tm_year - 100);
+ Output[1] = BCD(timedata->tm_mon + 1);
+ Output[2] = BCD(timedata->tm_mday);
+ Output[3] = BCD(timedata->tm_wday);
+ Output[4] = BCD(timedata->tm_hour);
+ Output[5] = BCD(timedata->tm_min);
+ Output[6] = BCD(timedata->tm_sec);
+ }
break;
case 0x60:
- // TODO: get actual system time
- Output[0] = 0x06;
- Output[1] = 0x30;
- Output[2] = 0x30;
+ {
+ time_t timestamp;
+ struct tm* timedata;
+ time(&timestamp);
+ timedata = localtime(&timestamp);
+
+ Output[0] = BCD(timedata->tm_hour);
+ Output[1] = BCD(timedata->tm_min);
+ Output[2] = BCD(timedata->tm_sec);
+ }
break;
case 0x10:
diff --git a/src/SPI.cpp b/src/SPI.cpp
index 3e77027..2c88197 100644
--- a/src/SPI.cpp
+++ b/src/SPI.cpp
@@ -164,6 +164,10 @@ void SetupDirectBoot()
NDS::ARM9Write32(0x027FFC80+i, *(u32*)&Firmware[UserSettings+i]);
}
+u8 GetConsoleType() { return Firmware[0x1D]; }
+u8 GetWifiVersion() { return Firmware[0x2F]; }
+u8 GetRFVersion() { return Firmware[0x40]; }
+
u8 Read()
{
return Data;
diff --git a/src/SPI.h b/src/SPI.h
index 4304b1f..d122da8 100644
--- a/src/SPI.h
+++ b/src/SPI.h
@@ -24,6 +24,10 @@ namespace SPI_Firmware
void SetupDirectBoot();
+u8 GetConsoleType();
+u8 GetWifiVersion();
+u8 GetRFVersion();
+
}
namespace SPI_TSC
diff --git a/src/SPU.cpp b/src/SPU.cpp
new file mode 100644
index 0000000..002cde6
--- /dev/null
+++ b/src/SPU.cpp
@@ -0,0 +1,811 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "NDS.h"
+#include "SPU.h"
+
+
+namespace SPU
+{
+
+const s8 ADPCMIndexTable[8] = {-1, -1, -1, -1, 2, 4, 6, 8};
+
+const u16 ADPCMTable[89] =
+{
+ 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E,
+ 0x0010, 0x0011, 0x0013, 0x0015, 0x0017, 0x0019, 0x001C, 0x001F,
+ 0x0022, 0x0025, 0x0029, 0x002D, 0x0032, 0x0037, 0x003C, 0x0042,
+ 0x0049, 0x0050, 0x0058, 0x0061, 0x006B, 0x0076, 0x0082, 0x008F,
+ 0x009D, 0x00AD, 0x00BE, 0x00D1, 0x00E6, 0x00FD, 0x0117, 0x0133,
+ 0x0151, 0x0173, 0x0198, 0x01C1, 0x01EE, 0x0220, 0x0256, 0x0292,
+ 0x02D4, 0x031C, 0x036C, 0x03C3, 0x0424, 0x048E, 0x0502, 0x0583,
+ 0x0610, 0x06AB, 0x0756, 0x0812, 0x08E0, 0x09C3, 0x0ABD, 0x0BD0,
+ 0x0CFF, 0x0E4C, 0x0FBA, 0x114C, 0x1307, 0x14EE, 0x1706, 0x1954,
+ 0x1BDC, 0x1EA5, 0x21B6, 0x2515, 0x28CA, 0x2CDF, 0x315B, 0x364B,
+ 0x3BB9, 0x41B2, 0x4844, 0x4F7E, 0x5771, 0x602F, 0x69CE, 0x7462,
+ 0x7FFF
+};
+
+const s16 PSGTable[8][8] =
+{
+ {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF},
+ {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF}
+};
+
+const u32 OutputBufferSize = 2*1024;
+s16 OutputBuffer[2 * OutputBufferSize];
+u32 OutputReadOffset;
+u32 OutputWriteOffset;
+
+
+u16 Cnt;
+u8 MasterVolume;
+u16 Bias;
+
+Channel* Channels[16];
+CaptureUnit* Capture[2];
+
+
+bool Init()
+{
+ for (int i = 0; i < 16; i++)
+ Channels[i] = new Channel(i);
+
+ Capture[0] = new CaptureUnit(0);
+ Capture[1] = new CaptureUnit(1);
+
+ return true;
+}
+
+void DeInit()
+{
+ for (int i = 0; i < 16; i++)
+ delete Channels[i];
+
+ delete Capture[0];
+ delete Capture[1];
+}
+
+void Reset()
+{
+ memset(OutputBuffer, 0, 2*OutputBufferSize*2);
+ OutputReadOffset = 0;
+ OutputWriteOffset = 0;
+
+ Cnt = 0;
+ MasterVolume = 0;
+ Bias = 0;
+
+ for (int i = 0; i < 16; i++)
+ Channels[i]->Reset();
+
+ Capture[0]->Reset();
+ Capture[1]->Reset();
+
+ NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*16, Mix, 16);
+}
+
+
+void SetBias(u16 bias)
+{
+ Bias = bias;
+}
+
+
+Channel::Channel(u32 num)
+{
+ Num = num;
+}
+
+Channel::~Channel()
+{
+}
+
+void Channel::Reset()
+{
+ SetCnt(0);
+ SrcAddr = 0;
+ TimerReload = 0;
+ LoopPos = 0;
+ Length = 0;
+
+ Timer = 0;
+}
+
+void Channel::Start()
+{
+ Timer = TimerReload;
+
+ if (((Cnt >> 29) & 0x3) == 3)
+ Pos = -1;
+ else
+ Pos = -3;
+
+ NoiseVal = 0x7FFF;
+ CurSample = 0;
+}
+
+void Channel::NextSample_PCM8()
+{
+ Pos++;
+ if (Pos < 0) return;
+ if (Pos >= (LoopPos + Length))
+ {
+ // TODO: what happens when mode 3 is used?
+ u32 repeat = (Cnt >> 27) & 0x3;
+ if (repeat == 2)
+ {
+ CurSample = 0;
+ Cnt &= ~(1<<31);
+ return;
+ }
+ else if (repeat == 1)
+ {
+ Pos = LoopPos;
+ }
+ }
+
+ s8 val = (s8)NDS::ARM7Read8(SrcAddr + Pos);
+ CurSample = val << 8;
+}
+
+void Channel::NextSample_PCM16()
+{
+ Pos++;
+ if (Pos < 0) return;
+ if ((Pos<<1) >= (LoopPos + Length))
+ {
+ // TODO: what happens when mode 3 is used?
+ u32 repeat = (Cnt >> 27) & 0x3;
+ if (repeat == 2)
+ {
+ CurSample = 0;
+ Cnt &= ~(1<<31);
+ return;
+ }
+ else if (repeat == 1)
+ {
+ Pos = LoopPos>>1;
+ }
+ }
+
+ s16 val = (s16)NDS::ARM7Read16(SrcAddr + (Pos<<1));
+ CurSample = val;
+}
+
+void Channel::NextSample_ADPCM()
+{
+ Pos++;
+ if (Pos < 8)
+ {
+ if (Pos == 0)
+ {
+ // setup ADPCM
+ u32 header = NDS::ARM7Read32(SrcAddr);
+ ADPCMVal = header & 0xFFFF;
+ ADPCMIndex = (header >> 16) & 0x7F;
+ if (ADPCMIndex > 88) ADPCMIndex = 88;
+
+ ADPCMValLoop = ADPCMVal;
+ ADPCMIndexLoop = ADPCMIndex;
+ }
+
+ return;
+ }
+
+ if ((Pos>>1) >= (LoopPos + Length))
+ {
+ // TODO: what happens when mode 3 is used?
+ u32 repeat = (Cnt >> 27) & 0x3;
+ if (repeat == 2)
+ {
+ CurSample = 0;
+ Cnt &= ~(1<<31);
+ return;
+ }
+ else if (repeat == 1)
+ {
+ Pos = LoopPos<<1;
+ ADPCMVal = ADPCMValLoop;
+ ADPCMIndex = ADPCMIndexLoop;
+ }
+ }
+ else
+ {
+ if (!(Pos & 0x1))
+ ADPCMCurByte = NDS::ARM7Read8(SrcAddr + (Pos>>1));
+ else
+ ADPCMCurByte >>= 4;
+
+ u16 val = ADPCMTable[ADPCMIndex];
+ u16 diff = val >> 3;
+ if (ADPCMCurByte & 0x1) diff += (val >> 2);
+ if (ADPCMCurByte & 0x2) diff += (val >> 1);
+ if (ADPCMCurByte & 0x4) diff += val;
+
+ if (ADPCMCurByte & 0x8)
+ {
+ ADPCMVal -= diff;
+ if (ADPCMVal < -0x7FFF) ADPCMVal = -0x7FFF;
+ }
+ else
+ {
+ ADPCMVal += diff;
+ if (ADPCMVal > 0x7FFF) ADPCMVal = 0x7FFF;
+ }
+
+ ADPCMIndex += ADPCMIndexTable[ADPCMCurByte & 0x7];
+ if (ADPCMIndex < 0) ADPCMIndex = 0;
+ else if (ADPCMIndex > 88) ADPCMIndex = 88;
+
+ if (Pos == (LoopPos<<1))
+ {
+ ADPCMValLoop = ADPCMVal;
+ ADPCMIndexLoop = ADPCMIndex;
+ }
+ }
+
+ CurSample = ADPCMVal;
+}
+
+void Channel::NextSample_PSG()
+{
+ Pos++;
+ CurSample = PSGTable[(Cnt >> 24) & 0x7][Pos & 0x7];
+}
+
+void Channel::NextSample_Noise()
+{
+ if (NoiseVal & 0x1)
+ {
+ NoiseVal = (NoiseVal >> 1) ^ 0x6000;
+ CurSample = -0x7FFF;
+ }
+ else
+ {
+ NoiseVal >>= 1;
+ CurSample = 0x7FFF;
+ }
+}
+
+template<u32 type>
+void Channel::Run(s32* buf, u32 samples)
+{
+ for (u32 s = 0; s < samples; s++)
+ buf[s] = 0;
+
+ for (u32 s = 0; s < samples; s++)
+ {
+ Timer += 512; // 1 sample = 512 cycles at 16MHz
+
+ while (Timer >> 16)
+ {
+ Timer = TimerReload + (Timer - 0x10000);
+
+ switch (type)
+ {
+ case 0: NextSample_PCM8(); break;
+ case 1: NextSample_PCM16(); break;
+ case 2: NextSample_ADPCM(); break;
+ case 3: NextSample_PSG(); break;
+ case 4: NextSample_Noise(); break;
+ }
+ }
+
+ s32 val = (s32)CurSample;
+ val <<= VolumeShift;
+ val *= Volume;
+ buf[s] = val;
+
+ if (!(Cnt & (1<<31))) break;
+ }
+}
+
+
+CaptureUnit::CaptureUnit(u32 num)
+{
+ Num = num;
+}
+
+CaptureUnit::~CaptureUnit()
+{
+}
+
+void CaptureUnit::Reset()
+{
+ SetCnt(0);
+ DstAddr = 0;
+ TimerReload = 0;
+ Length = 0;
+
+ Timer = 0;
+}
+
+void CaptureUnit::Run(s32 sample)
+{
+ Timer += 512;
+
+ if (Cnt & 0x08)
+ {
+ while (Timer >> 16)
+ {
+ Timer = TimerReload + (Timer - 0x10000);
+
+ NDS::ARM7Write8(DstAddr + Pos, (u8)(sample >> 8));
+ Pos++;
+ if (Pos >= Length)
+ {
+ if (Cnt & 0x04)
+ {
+ Cnt &= 0x7F;
+ return;
+ }
+ else
+ Pos = 0;
+ }
+ }
+ }
+ else
+ {
+ while (Timer >> 16)
+ {
+ Timer = TimerReload + (Timer - 0x10000);
+
+ NDS::ARM7Write16(DstAddr + Pos, (u16)sample);
+ Pos += 2;
+ if (Pos >= Length)
+ {
+ if (Cnt & 0x04)
+ {
+ Cnt &= 0x7F;
+ return;
+ }
+ else
+ Pos = 0;
+ }
+ }
+ }
+}
+
+
+void Mix(u32 samples)
+{
+ s32 channelbuf[32];
+ s32 leftbuf[32], rightbuf[32];
+ s32 ch1buf[32], ch3buf[32];
+ s32 leftoutput[32], rightoutput[32];
+
+ for (u32 s = 0; s < samples; s++)
+ {
+ leftbuf[s] = 0; rightbuf[s] = 0;
+ leftoutput[s] = 0; rightoutput[s] = 0;
+ }
+
+ if (Cnt & (1<<15))
+ {
+ u32 mixermask = 0xFFFF;
+ if (Cnt & (1<<12)) mixermask &= ~(1<<1);
+ if (Cnt & (1<<13)) mixermask &= ~(1<<3);
+
+ for (int i = 0; i < 16; i++)
+ {
+ if (!(mixermask & (1<<i))) continue;
+ Channel* chan = Channels[i];
+ if (!(chan->Cnt & (1<<31))) continue;
+
+ // TODO: what happens if we use type 3 on channels 0-7??
+ chan->DoRun(channelbuf, samples);
+
+ for (u32 s = 0; s < samples; s++)
+ {
+ s32 val = (s32)channelbuf[s];
+
+ s32 l = ((s64)val * (128-chan->Pan)) >> 10;
+ s32 r = ((s64)val * chan->Pan) >> 10;
+
+ leftbuf[s] += l;
+ rightbuf[s] += r;
+ }
+ }
+
+ // sound capture
+ // TODO: other sound capture sources, along with their bugs
+
+ if (Capture[0]->Cnt & (1<<7))
+ {
+ for (u32 s = 0; s < samples; s++)
+ {
+ s32 val = leftbuf[s];
+
+ val >>= 8;
+ if (val < -0x8000) val = -0x8000;
+ else if (val > 0x7FFF) val = 0x7FFF;
+
+ Capture[0]->Run(val);
+ if (!((Capture[0]->Cnt & (1<<7)))) break;
+ }
+ }
+
+ if (Capture[1]->Cnt & (1<<7))
+ {
+ for (u32 s = 0; s < samples; s++)
+ {
+ s32 val = rightbuf[s];
+
+ val >>= 8;
+ if (val < -0x8000) val = -0x8000;
+ else if (val > 0x7FFF) val = 0x7FFF;
+
+ Capture[1]->Run(val);
+ if (!((Capture[1]->Cnt & (1<<7)))) break;
+ }
+ }
+
+ // final output
+
+ if (Cnt & 0x0500)
+ {
+ // mix channel 1 if needed
+ Channels[1]->DoRun(ch1buf, samples);
+ }
+ if (Cnt & 0x0A00)
+ {
+ // mix channel 3 if needed
+ Channels[3]->DoRun(ch3buf, samples);
+ }
+
+ switch (Cnt & 0x0300)
+ {
+ case 0x0000: // left mixer
+ {
+ for (u32 s = 0; s < samples; s++)
+ leftoutput[s] = leftbuf[s];
+ }
+ break;
+ case 0x0100: // channel 1
+ {
+ s32 pan = 128 - Channels[1]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
+ }
+ break;
+ case 0x0200: // channel 3
+ {
+ s32 pan = 128 - Channels[3]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
+ }
+ break;
+ case 0x0300: // channel 1+3
+ {
+ s32 pan1 = 128 - Channels[1]->Pan;
+ s32 pan3 = 128 - Channels[3]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
+ }
+ break;
+ }
+
+ switch (Cnt & 0x0C00)
+ {
+ case 0x0000: // right mixer
+ {
+ for (u32 s = 0; s < samples; s++)
+ rightoutput[s] = rightbuf[s];
+ }
+ break;
+ case 0x0400: // channel 1
+ {
+ s32 pan = Channels[1]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10;
+ }
+ break;
+ case 0x0800: // channel 3
+ {
+ s32 pan = Channels[3]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10;
+ }
+ break;
+ case 0x0C00: // channel 1+3
+ {
+ s32 pan1 = Channels[1]->Pan;
+ s32 pan3 = Channels[3]->Pan;
+ for (u32 s = 0; s < samples; s++)
+ rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10);
+ }
+ break;
+ }
+ }
+
+ for (u32 s = 0; s < samples; s++)
+ {
+ s32 l = leftoutput[s];
+ s32 r = rightoutput[s];
+
+ l = ((s64)l * MasterVolume) >> 7;
+ r = ((s64)r * MasterVolume) >> 7;
+
+ l >>= 8;
+ if (l < -0x8000) l = -0x8000;
+ else if (l > 0x7FFF) l = 0x7FFF;
+ r >>= 8;
+ if (r < -0x8000) r = -0x8000;
+ else if (r > 0x7FFF) r = 0x7FFF;
+
+ OutputBuffer[OutputWriteOffset ] = l >> 1;
+ OutputBuffer[OutputWriteOffset + 1] = r >> 1;
+ OutputWriteOffset += 2;
+ OutputWriteOffset &= ((2*OutputBufferSize)-1);
+ }
+
+
+ NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*16, Mix, 16);
+}
+
+
+void ReadOutput(s16* data, int samples)
+{
+ for (int i = 0; i < samples; i++)
+ {
+ *data++ = OutputBuffer[OutputReadOffset];
+ *data++ = OutputBuffer[OutputReadOffset + 1];
+
+ if (OutputReadOffset != OutputWriteOffset)
+ {
+ OutputReadOffset += 2;
+ OutputReadOffset &= ((2*OutputBufferSize)-1);
+ }
+ }
+}
+
+
+u8 Read8(u32 addr)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: return chan->Cnt & 0xFF;
+ case 0x1: return (chan->Cnt >> 8) & 0xFF;
+ case 0x2: return (chan->Cnt >> 16) & 0xFF;
+ case 0x3: return chan->Cnt >> 24;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500: return Cnt & 0x7F;
+ case 0x04000501: return Cnt >> 8;
+
+ case 0x04000508: return Capture[0]->Cnt;
+ case 0x04000509: return Capture[1]->Cnt;
+ }
+ }
+
+ printf("unknown SPU read8 %08X\n", addr);
+ return 0;
+}
+
+u16 Read16(u32 addr)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: return chan->Cnt & 0xFFFF;
+ case 0x2: return chan->Cnt >> 16;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500: return Cnt;
+ case 0x04000504: return Bias;
+
+ case 0x04000508: return Capture[0]->Cnt | (Capture[1]->Cnt << 8);
+ }
+ }
+
+ printf("unknown SPU read16 %08X\n", addr);
+ return 0;
+}
+
+u32 Read32(u32 addr)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: return chan->Cnt;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500: return Cnt;
+ case 0x04000504: return Bias;
+
+ case 0x04000508: return Capture[0]->Cnt | (Capture[1]->Cnt << 8);
+
+ case 0x04000510: return Capture[0]->DstAddr;
+ case 0x04000518: return Capture[1]->DstAddr;
+ }
+ }
+
+ printf("unknown SPU read32 %08X\n", addr);
+ return 0;
+}
+
+void Write8(u32 addr, u8 val)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: chan->SetCnt((chan->Cnt & 0xFFFFFF00) | val); return;
+ case 0x1: chan->SetCnt((chan->Cnt & 0xFFFF00FF) | (val << 8)); return;
+ case 0x2: chan->SetCnt((chan->Cnt & 0xFF00FFFF) | (val << 16)); return;
+ case 0x3: chan->SetCnt((chan->Cnt & 0x00FFFFFF) | (val << 24)); return;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500:
+ Cnt = (Cnt & 0xBF00) | (val & 0x7F);
+ MasterVolume = Cnt & 0x7F;
+ if (MasterVolume == 127) MasterVolume++;
+ return;
+ case 0x04000501:
+ Cnt = (Cnt & 0x007F) | ((val & 0xBF) << 8);
+ return;
+
+ case 0x04000508:
+ Capture[0]->SetCnt(val);
+ if (val & 0x03) printf("!! UNSUPPORTED SPU CAPTURE MODE %02X\n", val);
+ return;
+ case 0x04000509:
+ Capture[1]->SetCnt(val);
+ if (val & 0x03) printf("!! UNSUPPORTED SPU CAPTURE MODE %02X\n", val);
+ return;
+ }
+ }
+
+ printf("unknown SPU write8 %08X %02X\n", addr, val);
+}
+
+void Write16(u32 addr, u16 val)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: chan->SetCnt((chan->Cnt & 0xFFFF0000) | val); return;
+ case 0x2: chan->SetCnt((chan->Cnt & 0x0000FFFF) | (val << 16)); return;
+ case 0x8:
+ chan->SetTimerReload(val);
+ if ((addr & 0xF0) == 0x10) Capture[0]->SetTimerReload(val);
+ else if ((addr & 0xF0) == 0x30) Capture[1]->SetTimerReload(val);
+ return;
+ case 0xA: chan->SetLoopPos(val); return;
+
+ case 0xC: chan->SetLength((chan->Length & 0xFFFF0000) | val); return;
+ case 0xE: chan->SetLength((chan->Length & 0x0000FFFF) | (val << 16)); return;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500:
+ Cnt = val & 0xBF7F;
+ MasterVolume = Cnt & 0x7F;
+ if (MasterVolume == 127) MasterVolume++;
+ return;
+
+ case 0x04000504:
+ Bias = val & 0x3FF;
+ return;
+
+ case 0x04000508:
+ Capture[0]->SetCnt(val & 0xFF);
+ Capture[1]->SetCnt(val >> 8);
+ if (val & 0x0303) printf("!! UNSUPPORTED SPU CAPTURE MODE %04X\n", val);
+ return;
+
+ case 0x04000514: Capture[0]->SetLength(val); return;
+ case 0x0400051C: Capture[1]->SetLength(val); return;
+ }
+ }
+
+ printf("unknown SPU write16 %08X %04X\n", addr, val);
+}
+
+void Write32(u32 addr, u32 val)
+{
+ if (addr < 0x04000500)
+ {
+ Channel* chan = Channels[(addr >> 4) & 0xF];
+
+ switch (addr & 0xF)
+ {
+ case 0x0: chan->SetCnt(val); return;
+ case 0x4: chan->SetSrcAddr(val); return;
+ case 0x8:
+ chan->SetLoopPos(val >> 16);
+ val &= 0xFFFF;
+ chan->SetTimerReload(val);
+ if ((addr & 0xF0) == 0x10) Capture[0]->SetTimerReload(val);
+ else if ((addr & 0xF0) == 0x30) Capture[1]->SetTimerReload(val);
+ return;
+ case 0xC: chan->SetLength(val); return;
+ }
+ }
+ else
+ {
+ switch (addr)
+ {
+ case 0x04000500:
+ Cnt = val & 0xBF7F;
+ MasterVolume = Cnt & 0x7F;
+ if (MasterVolume == 127) MasterVolume++;
+ return;
+
+ case 0x04000504:
+ Bias = val & 0x3FF;
+ return;
+
+ case 0x04000508:
+ Capture[0]->SetCnt(val & 0xFF);
+ Capture[1]->SetCnt(val >> 8);
+ if (val & 0x0303) printf("!! UNSUPPORTED SPU CAPTURE MODE %04X\n", val);
+ return;
+
+ case 0x04000510: Capture[0]->SetDstAddr(val); return;
+ case 0x04000514: Capture[0]->SetLength(val & 0xFFFF); return;
+ case 0x04000518: Capture[1]->SetDstAddr(val); return;
+ case 0x0400051C: Capture[1]->SetLength(val & 0xFFFF); return;
+ }
+ }
+}
+
+}
diff --git a/src/SPU.h b/src/SPU.h
new file mode 100644
index 0000000..a00e094
--- /dev/null
+++ b/src/SPU.h
@@ -0,0 +1,160 @@
+/*
+ Copyright 2016-2017 StapleButter
+
+ This file is part of melonDS.
+
+ melonDS is free software: you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with melonDS. If not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef SPU_H
+#define SPU_H
+
+namespace SPU
+{
+
+bool Init();
+void DeInit();
+void Reset();
+
+void SetBias(u16 bias);
+
+void Mix(u32 samples);
+
+void ReadOutput(s16* data, int samples);
+
+u8 Read8(u32 addr);
+u16 Read16(u32 addr);
+u32 Read32(u32 addr);
+void Write8(u32 addr, u8 val);
+void Write16(u32 addr, u16 val);
+void Write32(u32 addr, u32 val);
+
+class Channel
+{
+public:
+ Channel(u32 num);
+ ~Channel();
+ void Reset();
+
+ u32 Num;
+
+ u32 Cnt;
+ u32 SrcAddr;
+ u16 TimerReload;
+ u32 LoopPos;
+ u32 Length;
+
+ u8 Volume;
+ u8 VolumeShift;
+ u8 Pan;
+
+ u32 Timer;
+ s32 Pos;
+ s16 CurSample;
+ u16 NoiseVal;
+
+ s32 ADPCMVal;
+ s32 ADPCMIndex;
+ s32 ADPCMValLoop;
+ s32 ADPCMIndexLoop;
+ u8 ADPCMCurByte;
+
+ void SetCnt(u32 val)
+ {
+ u32 oldcnt = Cnt;
+ Cnt = val & 0xFF7F837F;
+
+ Volume = Cnt & 0x7F;
+ if (Volume == 127) Volume++;
+
+ const u8 volshift[4] = {4, 3, 2, 0};
+ VolumeShift = volshift[(Cnt >> 8) & 0x3];
+
+ Pan = (Cnt >> 16) & 0x7F;
+ if (Pan == 127) Pan++;
+
+ if ((val & (1<<31)) && !(oldcnt & (1<<31)))
+ {
+ Start();
+ }
+ }
+
+ void SetSrcAddr(u32 val) { SrcAddr = val & 0x07FFFFFC; }
+ void SetTimerReload(u32 val) { TimerReload = val & 0xFFFF; }
+ void SetLoopPos(u32 val) { LoopPos = (val & 0xFFFF) << 2; }
+ void SetLength(u32 val) { Length = (val & 0x001FFFFF) << 2; }
+
+ void Start();
+
+ void NextSample_PCM8();
+ void NextSample_PCM16();
+ void NextSample_ADPCM();
+ void NextSample_PSG();
+ void NextSample_Noise();
+
+ template<u32 type> void Run(s32* buf, u32 samples);
+
+ void DoRun(s32* buf, u32 samples)
+ {
+ switch ((Cnt >> 29) & 0x3)
+ {
+ case 0: Run<0>(buf, samples); break;
+ case 1: Run<1>(buf, samples); break;
+ case 2: Run<2>(buf, samples); break;
+ case 3:
+ if (Num >= 14) Run<4>(buf, samples);
+ else if (Num >= 8) Run<3>(buf, samples);
+ break;
+ }
+ }
+};
+
+class CaptureUnit
+{
+public:
+ CaptureUnit(u32 num);
+ ~CaptureUnit();
+ void Reset();
+
+ u32 Num;
+
+ u8 Cnt;
+ u32 DstAddr;
+ u16 TimerReload;
+ u32 Length;
+
+ u32 Timer;
+ s32 Pos;
+
+ void SetCnt(u8 val)
+ {
+ if ((val & 0x80) && !(Cnt & 0x80))
+ Start();
+
+ val &= 0x8F;
+ if (!(val & 0x80)) val &= ~0x01;
+ Cnt = val;
+ }
+
+ void SetDstAddr(u32 val) { DstAddr = val & 0x07FFFFFC; }
+ void SetTimerReload(u32 val) { TimerReload = val & 0xFFFF; }
+ void SetLength(u32 val) { Length = val << 2; if (Length == 0) Length = 4; }
+
+ void Start() { Timer = TimerReload; }
+
+ void Run(s32 sample);
+};
+
+}
+
+#endif // SPU_H
diff --git a/src/Wifi.cpp b/src/Wifi.cpp
index 0f1c239..99d0017 100644
--- a/src/Wifi.cpp
+++ b/src/Wifi.cpp
@@ -19,22 +19,39 @@
#include <stdio.h>
#include <string.h>
#include "NDS.h"
+#include "SPI.h"
#include "Wifi.h"
namespace Wifi
{
+u8 RAM[0x2000];
+u16 IO[0x1000>>1];
+
+#define IOPORT(x) IO[(x)>>1]
+
+u16 Random;
+
u16 BBCnt;
u8 BBWrite;
u8 BBRegs[0x100];
u8 BBRegsRO[0x100];
+u8 RFVersion;
+u16 RFCnt;
+u16 RFData1;
+u16 RFData2;
+u32 RFRegs[0x40];
+
void Reset()
{
- BBCnt = 0;
- BBWrite = 0;
+ memset(RAM, 0, 0x2000);
+ memset(IO, 0, 0x1000);
+
+ Random = 1;
+
memset(BBRegs, 0, 0x100);
memset(BBRegsRO, 0, 0x100);
@@ -65,56 +82,230 @@ void Reset()
BBREG_FIXED(i, 0x00);
}
#undef BBREG_FIXED
+
+ RFVersion = SPI_Firmware::GetRFVersion();
+ memset(RFRegs, 0, 4*0x40);
+
+ memset(&IOPORT(0x018), 0xFF, 6);
+ memset(&IOPORT(0x020), 0xFF, 6);
}
+void RFTransfer_Type2()
+{
+ u32 id = (IOPORT(W_RFData2) >> 2) & 0x1F;
+
+ if (IOPORT(W_RFData2) & 0x0080)
+ {
+ u32 data = RFRegs[id];
+ IOPORT(W_RFData1) = data & 0xFFFF;
+ IOPORT(W_RFData2) = (IOPORT(W_RFData2) & 0xFFFC) | ((data >> 16) & 0x3);
+ }
+ else
+ {
+ u32 data = IOPORT(W_RFData1) | ((IOPORT(W_RFData2) & 0x0003) << 16);
+ RFRegs[id] = data;
+ }
+}
+
+void RFTransfer_Type3()
+{
+ u32 id = (IOPORT(W_RFData1) >> 8) & 0x3F;
+
+ u32 cmd = IOPORT(W_RFData2) & 0xF;
+ if (cmd == 6)
+ {
+ IOPORT(W_RFData1) = (IOPORT(W_RFData1) & 0xFF00) | (RFRegs[id] & 0xFF);
+ }
+ else if (cmd == 5)
+ {
+ u32 data = IOPORT(W_RFData1) & 0xFF;
+ RFRegs[id] = data;
+ }
+}
+
+
+// TODO: wifi waitstates
+
u16 Read(u32 addr)
{
- addr &= 0x7FFF;
+ addr &= 0x7FFE;
+ //printf("WIFI: read %08X\n", addr);
+ if (addr >= 0x4000 && addr < 0x6000)
+ {
+ return *(u16*)&RAM[addr & 0x1FFE];
+ }
switch (addr)
{
- case 0x158:
- return BBCnt;
+ case W_Random: // random generator. not accurate
+ Random = (Random & 0x1) ^ (((Random & 0x3FF) << 1) | (Random >> 10));
+ return Random;
- case 0x15C:
- if ((BBCnt & 0xF000) != 0x6000)
+ case W_Preamble:
+ return IOPORT(W_Preamble) & 0x0003;
+
+ case W_BBRead:
+ if ((IOPORT(W_BBCnt) & 0xF000) != 0x6000)
{
- printf("WIFI: bad BB read, CNT=%04X\n", BBCnt);
+ printf("WIFI: bad BB read, CNT=%04X\n", IOPORT(W_BBCnt));
return 0;
}
- return BBRegs[BBCnt & 0xFF];
+ return BBRegs[IOPORT(W_BBCnt) & 0xFF];
- case 0x15E:
- return 0; // cheap
+ case W_BBBusy:
+ return 0; // TODO eventually (BB busy flag)
+ case W_RFBusy:
+ return 0; // TODO eventually (RF busy flag)
}
- printf("WIFI: unknown read %08X\n", addr);
- return 0;
+ //printf("WIFI: read %08X\n", addr);
+ return IOPORT(addr&0xFFF);
}
void Write(u32 addr, u16 val)
{
- addr &= 0x7FFF;
+ addr &= 0x7FFE;
+ //printf("WIFI: write %08X %04X\n", addr, val);
+ if (addr >= 0x4000 && addr < 0x6000)
+ {
+ *(u16*)&RAM[addr & 0x1FFE] = val;
+ return;
+ }
switch (addr)
{
- case 0x158:
- BBCnt = val;
- if ((BBCnt & 0xF000) == 0x5000)
+ case W_ModeReset:
+ {
+ u16 oldval = IOPORT(W_ModeReset);
+
+ if (!(oldval & 0x0001) && (val & 0x0001))
+ {
+ IOPORT(0x034) = 0x0002;
+ IOPORT(W_RFPins) = 0x0046;
+ IOPORT(W_RFStatus) = 9;
+ IOPORT(0x27C) = 0x0005;
+ // TODO: 02A2??
+ }
+ else if ((oldval & 0x0001) && !(val & 0x0001))
+ {
+ IOPORT(0x27C) = 0x000A;
+ }
+
+ if (val & 0x2000)
+ {
+ IOPORT(W_RXBufWriteAddr) = 0;
+ IOPORT(W_CmdTotalTime) = 0;
+ IOPORT(W_CmdReplyTime) = 0;
+ IOPORT(0x1A4) = 0;
+ IOPORT(0x278) = 0x000F;
+ // TODO: other ports??
+ }
+ if (val & 0x4000)
+ {
+ IOPORT(W_ModeWEP) = 0;
+ IOPORT(W_TXStatCnt) = 0;
+ IOPORT(0x00A) = 0;
+ IOPORT(W_MACAddr0) = 0;
+ IOPORT(W_MACAddr1) = 0;
+ IOPORT(W_MACAddr2) = 0;
+ IOPORT(W_BSSID0) = 0;
+ IOPORT(W_BSSID1) = 0;
+ IOPORT(W_BSSID2) = 0;
+ IOPORT(W_AIDLow) = 0;
+ IOPORT(W_AIDFull) = 0;
+ IOPORT(W_TXRetryLimit) = 0x0707;
+ IOPORT(0x02E) = 0;
+ IOPORT(W_RXBufBegin) = 0x4000;
+ IOPORT(W_RXBufEnd) = 0x4800;
+ IOPORT(W_TXBeaconTIM) = 0;
+ IOPORT(W_Preamble) = 0x0001;
+ IOPORT(W_RXFilter) = 0x0401;
+ IOPORT(0x0D4) = 0x0001;
+ IOPORT(W_RXFilter2) = 0x0008;
+ IOPORT(0x0EC) = 0x3F03;
+ IOPORT(W_TXHeaderCnt) = 0;
+ IOPORT(0x198) = 0;
+ IOPORT(0x1A2) = 0x0001;
+ IOPORT(0x224) = 0x0003;
+ IOPORT(0x230) = 0x0047;
+
+ }
+ }
+ break;
+
+ case W_ModeWEP:
+ val &= 0x007F;
+ break;
+
+ case W_IF:
+ // IF: TODO
+ return;
+ case W_IE:
+ printf("WIFI IE=%04X\n", val);
+ break;
+
+ case W_PowerState:
+ if (val & 0x0002)
{
- u32 regid = BBCnt & 0xFF;
+ // TODO: IRQ11
+ IOPORT(W_PowerState) = 0x0000;
+ }
+ return;
+ case W_PowerForce:
+ printf("WIFI: forcing power %04X\n", val);
+ val &= 0x8001;
+ if (val == 0x8001)
+ {
+ IOPORT(0x034) = 0x0002;
+ IOPORT(W_PowerState) = 0x0200;
+ IOPORT(W_TXReqRead) = 0;
+ IOPORT(W_RFPins) = 00046;
+ IOPORT(W_RFStatus) = 9;
+ }
+ break;
+
+ case W_BBCnt:
+ IOPORT(W_BBCnt) = val;
+ if ((IOPORT(W_BBCnt) & 0xF000) == 0x5000)
+ {
+ u32 regid = IOPORT(W_BBCnt) & 0xFF;
if (!BBRegsRO[regid])
- BBRegs[regid] = val & 0xFF;
+ BBRegs[regid] = IOPORT(W_BBWrite) & 0xFF;
}
return;
- case 0x15A:
- BBWrite = val;
+ case W_RFData2:
+ IOPORT(W_RFData2) = val;
+ if (RFVersion == 3) RFTransfer_Type3();
+ else RFTransfer_Type2();
+ return;
+ case W_RFCnt:
+ val &= 0x413F;
+ break;
+
+ // read-only ports
+ case 0x000:
+ case 0x044:
+ case 0x054:
+ case 0x0B0:
+ case 0x0B6:
+ case 0x0B8:
+ case 0x15C:
+ case 0x15E:
+ case 0x180:
+ case 0x19C:
+ case 0x1A8:
+ case 0x1AC:
+ case 0x1C4:
+ case 0x210:
+ case 0x214:
+ case 0x268:
return;
}
- printf("WIFI: unknown write %08X %04X\n", addr, val);
+ //printf("WIFI: write %08X %04X\n", addr, val);
+ IOPORT(addr&0xFFF) = val;
}
}
diff --git a/src/Wifi.h b/src/Wifi.h
index a1755ea..a351f7a 100644
--- a/src/Wifi.h
+++ b/src/Wifi.h
@@ -22,7 +22,113 @@
namespace Wifi
{
-//
+enum
+{
+ W_ID = 0x000,
+
+ W_ModeReset = 0x004,
+ W_ModeWEP = 0x006,
+ W_TXStatCnt = 0x008,
+ W_IF = 0x010,
+ W_IE = 0x012,
+
+ W_MACAddr0 = 0x018,
+ W_MACAddr1 = 0x01A,
+ W_MACAddr2 = 0x01C,
+ W_BSSID0 = 0x020,
+ W_BSSID1 = 0x022,
+ W_BSSID2 = 0x024,
+ W_AIDLow = 0x028,
+ W_AIDFull = 0x02A,
+
+ W_TXRetryLimit = 0x02C,
+ W_RXCnt = 0x030,
+ W_WEPCnt = 0x032,
+
+ W_PowerUS = 0x036,
+ W_PowerTX = 0x038,
+ W_PowerState = 0x03C,
+ W_PowerForce = 0x040,
+
+ W_Random = 0x044,
+
+ W_RXBufBegin = 0x050,
+ W_RXBufEnd = 0x052,
+ W_RXBufWriteCursor = 0x054,
+ W_RXBufWriteAddr = 0x056,
+ W_RXBufReadAddr = 0x058,
+ W_RXBufReadCursor = 0x05A,
+ W_RXBufCount = 0x05C,
+ W_RXBufDataRead = 0x060,
+ W_RXBufGapAddr = 0x062,
+ W_RXBufGapSize = 0x064,
+
+ W_TXBufWriteAddr = 0x068,
+ W_TXBufCount = 0x06C,
+ W_TXBufDataWrite = 0x070,
+ W_TXBufGapAddr = 0x074,
+ W_TXBufGapSize = 0x076,
+
+ W_TXSlotBeacon = 0x080,
+ W_TXBeaconTIM = 0x084,
+ W_ListenCount = 0x088,
+ W_BeaconInterval = 0x08C,
+ W_ListenInterval = 0x08E,
+ W_TXSlotCmd = 0x090,
+ W_TXSlotReply1 = 0x094,
+ W_TXSlotReply2 = 0x098,
+ W_TXSlotLoc1 = 0x0A0,
+ W_TXSlotLoc2 = 0x0A4,
+ W_TXSlotLoc3 = 0x0A8,
+ W_TXReqReset = 0x0AC,
+ W_TXReqSet = 0x0AE,
+ W_TXReqRead = 0x0B0,
+ W_TXSlotReset = 0x0B4,
+ W_TXBusy = 0x0B6,
+ W_TXStat = 0x0B8,
+ W_Preamble = 0x0BC,
+ W_CmdTotalTime = 0x0C0,
+ W_CmdReplyTime = 0x0C4,
+ W_RXFilter = 0x0D0,
+ W_RXFilter2 = 0x0E0,
+
+ W_USCountCnt = 0x0E8,
+ W_USCompareCnt = 0x0EA,
+ W_CmdCountCnt = 0x0EE,
+
+ W_ContentFree = 0x10C,
+ W_PreBeacon = 0x110,
+ W_CmdCount = 0x118,
+ W_BeaconCount1 = 0x11C,
+ W_BeaconCount2 = 0x134,
+
+ W_BBCnt = 0x158,
+ W_BBWrite = 0x15A,
+ W_BBRead = 0x15C,
+ W_BBBusy = 0x15E,
+ W_BBMode = 0x160,
+ W_BBPower = 0x168,
+
+ W_RFData2 = 0x17C,
+ W_RFData1 = 0x17E,
+ W_RFBusy = 0x180,
+ W_RFCnt = 0x184,
+
+ W_TXHeaderCnt = 0x194,
+ W_RFPins = 0x19C,
+
+ W_RXStatIncIF = 0x1A8,
+ W_RXStatIncIE = 0x1AA,
+ W_RXStatHalfIF = 0x1AC,
+ W_RXStatHalfIE = 0x1AE,
+ W_TXErrorCount = 0x1C0,
+ W_RXCount = 0x1C4,
+
+ W_TXSeqNo = 0x210,
+ W_RFStatus = 0x214,
+ W_IFSet = 0x21C,
+ W_RXTXAddr = 0x268,
+};
void Reset();
diff --git a/src/wx/main.cpp b/src/wx/main.cpp
index 85568a4..ff22090 100644
--- a/src/wx/main.cpp
+++ b/src/wx/main.cpp
@@ -22,6 +22,7 @@
#include "../Config.h"
#include "../NDS.h"
#include "../GPU.h"
+#include "../SPU.h"
#include "InputConfig.h"
#include "EmuConfig.h"
@@ -86,7 +87,7 @@ bool wxApp_melonDS::OnInit()
printf("melonDS " MELONDS_VERSION "\n" MELONDS_URL "\n");
Config::Load();
-
+
emuthread = new EmuThread();
if (emuthread->Run() != wxTHREAD_NO_ERROR)
{
@@ -97,7 +98,7 @@ bool wxApp_melonDS::OnInit()
MainFrame* melon = new MainFrame();
melon->Show(true);
-
+
melon->emuthread = emuthread;
emuthread->parent = melon;
@@ -108,7 +109,7 @@ int wxApp_melonDS::OnExit()
{
emuthread->Wait();
delete emuthread;
-
+
return wxApp::OnExit();
}
@@ -169,7 +170,7 @@ void MainFrame::OnClose(wxCloseEvent& event)
{
emuthread->EmuPause();
emuthread->EmuExit();
-
+
NDS::DeInit();
if (joy)
@@ -313,6 +314,11 @@ EmuThread::~EmuThread()
{
}
+static void AudioCallback(void* data, Uint8* stream, int len)
+{
+ SPU::ReadOutput((s16*)stream, len>>2);
+}
+
wxThread::ExitCode EmuThread::Entry()
{
emustatus = 3;
@@ -344,6 +350,23 @@ wxThread::ExitCode EmuThread::Entry()
botdst.x = 0; botdst.y = 192;
botdst.w = 256; botdst.h = 192;
+ SDL_AudioSpec whatIwant, whatIget;
+ memset(&whatIwant, 0, sizeof(SDL_AudioSpec));
+ whatIwant.freq = 32824; // 32823.6328125
+ whatIwant.format = AUDIO_S16LSB;
+ whatIwant.channels = 2;
+ whatIwant.samples = 1024;
+ whatIwant.callback = AudioCallback;
+ audio = SDL_OpenAudioDevice(NULL, 0, &whatIwant, &whatIget, 0);
+ if (!audio)
+ {
+ printf("Audio init failed: %s\n", SDL_GetError());
+ }
+ else
+ {
+ SDL_PauseAudioDevice(audio, 0);
+ }
+
Touching = false;
axismask = 0;
@@ -430,9 +453,11 @@ wxThread::ExitCode EmuThread::Entry()
emupaused = true;
}
}
-
+
emupaused = true;
+ if (audio) SDL_CloseAudioDevice(audio);
+
SDL_DestroyTexture(sdltex);
SDL_DestroyRenderer(sdlrend);
SDL_DestroyWindow(sdlwin);
@@ -462,7 +487,7 @@ void EmuThread::ProcessEvents()
{
int w = evt.window.data1;
int h = evt.window.data2;
-
+
// SDL_SetWindowMinimumSize() doesn't seem to work on Linux. oh well
if ((w < 256) || (h < 384))
{
@@ -514,7 +539,7 @@ void EmuThread::ProcessEvents()
{
Touching = true;
NDS::PressKey(16+6);
-
+
int mx, my;
SDL_GetGlobalMouseState(&mx, &my);
txoffset = mx - evt.button.x;
@@ -529,6 +554,7 @@ void EmuThread::ProcessEvents()
if (evt.key.keysym.scancode == Config::KeyMapping[i]) NDS::PressKey(i);
if (evt.key.keysym.scancode == Config::KeyMapping[10]) NDS::PressKey(16);
if (evt.key.keysym.scancode == Config::KeyMapping[11]) NDS::PressKey(17);
+ if (evt.key.keysym.scancode == SDL_SCANCODE_F12) NDS::debug(0);
break;
case SDL_KEYUP:
diff --git a/src/wx/main.h b/src/wx/main.h
index 851a061..0219ff7 100644
--- a/src/wx/main.h
+++ b/src/wx/main.h
@@ -46,7 +46,7 @@ class wxApp_melonDS : public wxApp
public:
virtual bool OnInit();
virtual int OnExit();
-
+
EmuThread* emuthread;
};
@@ -91,7 +91,7 @@ public:
bool EmuIsRunning() { return (emustatus == 1) || (emustatus == 2); }
bool EmuIsPaused() { return (emustatus == 2) && emupaused; }
-
+
MainFrame* parent;
protected:
@@ -105,6 +105,8 @@ protected:
SDL_Rect topsrc, topdst;
SDL_Rect botsrc, botdst;
+ SDL_AudioDeviceID audio;
+
bool Touching;
int txoffset, tyoffset;