diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/ARM.cpp | 14 | ||||
-rw-r--r-- | src/ARM.h | 10 | ||||
-rw-r--r-- | src/DMA.cpp | 31 | ||||
-rw-r--r-- | src/DMA.h | 2 | ||||
-rw-r--r-- | src/GPU.cpp | 3 | ||||
-rw-r--r-- | src/GPU2D.cpp | 476 | ||||
-rw-r--r-- | src/GPU2D.h | 17 | ||||
-rw-r--r-- | src/GPU3D.cpp | 733 | ||||
-rw-r--r-- | src/GPU3D.h | 19 | ||||
-rw-r--r-- | src/GPU3D_Soft.cpp | 798 | ||||
-rw-r--r-- | src/NDS.cpp | 142 | ||||
-rw-r--r-- | src/NDS.h | 31 | ||||
-rw-r--r-- | src/NDSCart.cpp | 87 | ||||
-rw-r--r-- | src/NDSCart.h | 1 | ||||
-rw-r--r-- | src/RTC.cpp | 43 | ||||
-rw-r--r-- | src/SPI.cpp | 4 | ||||
-rw-r--r-- | src/SPI.h | 4 | ||||
-rw-r--r-- | src/SPU.cpp | 811 | ||||
-rw-r--r-- | src/SPU.h | 160 | ||||
-rw-r--r-- | src/Wifi.cpp | 235 | ||||
-rw-r--r-- | src/Wifi.h | 108 | ||||
-rw-r--r-- | src/wx/main.cpp | 40 | ||||
-rw-r--r-- | src/wx/main.h | 6 |
23 files changed, 2936 insertions, 839 deletions
diff --git a/src/ARM.cpp b/src/ARM.cpp index a2e0066..bbfb7f7 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -322,10 +322,14 @@ s32 ARM::Execute() { if (Halted) { - if (NDS::HaltInterrupted(Num)) + if (Halted == 2) { Halted = 0; - if (NDS::IME[Num]&1) + } + else if (NDS::HaltInterrupted(Num)) + { + Halted = 0; + if (NDS::IME[Num] & 0x1) TriggerIRQ(); } else @@ -376,8 +380,6 @@ s32 ARM::Execute() } } - //if (R[15]==0x037F9364) printf("R8=%08X R9=%08X\n", R[8], R[9]); - if (Num==0) { s32 diff = Cycles - lastcycles; @@ -398,9 +400,9 @@ s32 ARM::Execute() Cycles = CyclesToRun; break; } - if (NDS::HaltInterrupted(Num)) + if (NDS::IF[Num] & NDS::IE[Num]) { - if (NDS::IME[Num]&1) + if (NDS::IME[Num] & 0x1) TriggerIRQ(); } } @@ -43,9 +43,19 @@ public: void Halt(u32 halt) { + if (halt==2 && Halted==1) return; Halted = halt; } + void CheckIRQ() + { + if (!(NDS::IME[Num] & 0x1)) return; + if (NDS::IF[Num] & NDS::IE[Num]) + { + TriggerIRQ(); + } + } + s32 Execute(); bool CheckCondition(u32 code) diff --git a/src/DMA.cpp b/src/DMA.cpp index 9a17f41..edd6f8b 100644 --- a/src/DMA.cpp +++ b/src/DMA.cpp @@ -186,18 +186,6 @@ void DMA::Start() //printf("ARM%d DMA%d %08X %02X %08X->%08X %d bytes %dbit\n", CPU?7:9, Num, Cnt, StartMode, CurSrcAddr, CurDstAddr, RemCount*((Cnt&0x04000000)?4:2), (Cnt&0x04000000)?32:16); - // special path for cart DMA. this is a gross hack. - // emulating it properly requires emulating cart transfer delays, so uh... TODO - if (CurSrcAddr==0x04100010 && RemCount==1 && (Cnt & 0x07E00000)==0x07000000 && - (StartMode==0x05 || StartMode==0x12)) - { - NDSCart::DMA(CurDstAddr); - Cnt &= ~0x80000000; - if (Cnt & 0x40000000) - NDS::SetIRQ(CPU, NDS::IRQ_DMA0 + Num); - return; - } - // special path for the display FIFO. another gross hack. // the display FIFO seems to be more like a circular buffer that holds 16 pixels // from which the display controller reads. DMA is triggered every 8 pixels to fill it @@ -212,6 +200,8 @@ void DMA::Start() return; } + IsGXFIFODMA = (CPU == 0 && (CurSrcAddr>>24) == 0x02 && CurDstAddr == 0x04000400 && DstAddrInc == 0); + // TODO eventually: not stop if we're running code in ITCM Running = true; @@ -245,6 +235,23 @@ s32 DMA::Run(s32 cycles) } else { + // optimized path for typical GXFIFO DMA + if (IsGXFIFODMA) + { + while (IterCount > 0 && cycles > 0) + { + GPU3D::WriteToGXFIFO(*(u32*)&NDS::MainRAM[CurSrcAddr&0x3FFFFF]); + + s32 c = (Waitstates[1][0x2] + Waitstates[1][0x4]); + cycles -= c; + NDS::RunTimingCriticalDevices(0, c); + + CurSrcAddr += SrcAddrInc<<2; + IterCount--; + RemCount--; + } + } + u32 (*readfn)(u32) = CPU ? NDS::ARM7Read32 : NDS::ARM9Read32; void (*writefn)(u32,u32) = CPU ? NDS::ARM7Write32 : NDS::ARM9Write32; @@ -66,6 +66,8 @@ private: bool Running; bool InProgress; + + bool IsGXFIFODMA; }; #endif diff --git a/src/GPU.cpp b/src/GPU.cpp index d486e0c..680f08c 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -649,6 +649,9 @@ void StartScanline(u32 line) else DispStat[1] &= ~(1<<2); + GPU2D_A->CheckWindows(line); + GPU2D_B->CheckWindows(line); + if (line >= 2 && line < 194) NDS::CheckDMAs(0, 0x03); else if (line == 194) diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index ff9072d..77b79b4 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -32,6 +32,8 @@ // * VRAM/FIFO display modes convert colors the same way // * 3D engine converts colors differently (18bit = 15bit * 2 + 1, except 0 = 0) // * 'screen disabled' white is 63,63,63 +// * [Gericom] bit15 is used as bottom green bit for palettes. TODO: check where this applies. +// tested on the normal BG palette and applies there // // oh also, changing DISPCNT bit16-17 midframe doesn't work (ignored? applied for next frame?) // TODO, eventually: check whether other DISPCNT bits can be changed midframe @@ -68,6 +70,9 @@ // * if BG0 is selected as 1st target, destination not selected as 2nd target: // brightness up/down effect is applied if selected. if blending is selected, it doesn't apply. // * 3D layer pixels with alpha=0 are always transparent. +// +// mosaic: +// * mosaic grid starts at 0,0 regardless of the BG/sprite position GPU2D::GPU2D(u32 num) @@ -94,6 +99,10 @@ void GPU2D::Reset() memset(BGRotC, 0, 2*2); memset(BGRotD, 0, 2*2); + memset(Win0Coords, 0, 4); + memset(Win1Coords, 0, 4); + memset(WinCnt, 0, 4); + BlendCnt = 0; EVA = 16; EVB = 0; @@ -120,7 +129,15 @@ void GPU2D::SetFramebuffer(u32* buf) u8 GPU2D::Read8(u32 addr) { - printf("!! GPU2D READ8 %08X\n", addr); + switch (addr & 0x00000FFF) + { + case 0x048: return WinCnt[0]; + case 0x049: return WinCnt[1]; + case 0x04A: return WinCnt[2]; + case 0x04B: return WinCnt[3]; + } + + printf("unknown GPU read8 %08X\n", addr); return 0; } @@ -136,10 +153,15 @@ u16 GPU2D::Read16(u32 addr) case 0x00C: return BGCnt[2]; case 0x00E: return BGCnt[3]; + case 0x048: return WinCnt[0] | (WinCnt[1] << 8); + case 0x04A: return WinCnt[2] | (WinCnt[3] << 8); + case 0x050: return BlendCnt; case 0x064: return CaptureCnt & 0xFFFF; case 0x066: return CaptureCnt >> 16; + + case 0x06C: return MasterBrightness; } printf("unknown GPU read16 %08X\n", addr); @@ -160,7 +182,40 @@ u32 GPU2D::Read32(u32 addr) void GPU2D::Write8(u32 addr, u8 val) { - printf("!! GPU2D WRITE8 %08X %02X\n", addr, val); + switch (addr & 0x00000FFF) + { + case 0x040: Win0Coords[1] = val; return; + case 0x041: Win0Coords[0] = val; return; + case 0x042: Win1Coords[1] = val; return; + case 0x043: Win1Coords[0] = val; return; + + case 0x044: Win0Coords[3] = val; return; + case 0x045: Win0Coords[2] = val; return; + case 0x046: Win1Coords[3] = val; return; + case 0x047: Win1Coords[2] = val; return; + + case 0x048: WinCnt[0] = val; return; + case 0x049: WinCnt[1] = val; return; + case 0x04A: WinCnt[2] = val; return; + case 0x04B: WinCnt[3] = val; return; + + case 0x050: BlendCnt = (BlendCnt & 0xFF00) | val; return; + case 0x051: BlendCnt = (BlendCnt & 0x00FF) | (val << 8); return; + case 0x052: + EVA = val & 0x1F; + if (EVA > 16) EVA = 16; + return; + case 0x53: + EVB = val & 0x1F; + if (EVB > 16) EVB = 16; + return; + case 0x054: + EVY = val & 0x1F; + if (EVY > 16) EVY = 16; + return; + } + + printf("unknown GPU write8 %08X %02X\n", addr, val); } void GPU2D::Write16(u32 addr, u16 val) @@ -234,6 +289,33 @@ void GPU2D::Write16(u32 addr, u16 val) if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1]; return; + case 0x040: + Win0Coords[1] = val & 0xFF; + Win0Coords[0] = val >> 8; + return; + case 0x042: + Win1Coords[1] = val & 0xFF; + Win1Coords[0] = val >> 8; + return; + + case 0x044: + Win0Coords[3] = val & 0xFF; + Win0Coords[2] = val >> 8; + return; + case 0x046: + Win1Coords[3] = val & 0xFF; + Win1Coords[2] = val >> 8; + return; + + case 0x048: + WinCnt[0] = val & 0xFF; + WinCnt[1] = val >> 8; + return; + case 0x04A: + WinCnt[2] = val & 0xFF; + WinCnt[3] = val >> 8; + return; + case 0x050: BlendCnt = val; return; case 0x052: EVA = val & 0x1F; @@ -301,6 +383,25 @@ void GPU2D::DrawScanline(u32 line) u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); + // always render regular graphics + DrawScanline_Mode1(line, dst); + + // capture + if ((Num == 0) && (CaptureCnt & (1<<31))) + { + u32 capwidth, capheight; + switch ((CaptureCnt >> 20) & 0x3) + { + case 0: capwidth = 128; capheight = 128; break; + case 1: capwidth = 256; capheight = 64; break; + case 2: capwidth = 256; capheight = 128; break; + case 3: capwidth = 256; capheight = 192; break; + } + + if (line < capheight) + DoCapture(line, capwidth, dst); + } + switch (dispmode) { case 0: // screen off @@ -310,10 +411,7 @@ void GPU2D::DrawScanline(u32 line) } break; - case 1: // regular display - { - DrawScanline_Mode1(line, dst); - } + case 1: // regular display, already taken care of break; case 2: // VRAM display @@ -359,22 +457,6 @@ void GPU2D::DrawScanline(u32 line) break; } - // capture - if ((!Num) && (CaptureCnt & (1<<31))) - { - u32 capwidth, capheight; - switch ((CaptureCnt >> 20) & 0x3) - { - case 0: capwidth = 128; capheight = 128; break; - case 1: capwidth = 256; capheight = 64; break; - case 2: capwidth = 256; capheight = 128; break; - case 3: capwidth = 256; capheight = 192; break; - } - - if (line < capheight) - DoCapture(line, capwidth, dst); - } - // master brightness if (dispmode != 0) { @@ -480,7 +562,7 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) dstaddr &= 0xFFFF; srcBaddr &= 0xFFFF; - switch ((DispCnt >> 29) & 0x3) + switch ((CaptureCnt >> 29) & 0x3) { case 0: // source A { @@ -526,8 +608,8 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) case 2: // sources A+B case 3: { - u32 eva = DispCnt & 0x1F; - u32 evb = (DispCnt >> 8) & 0x1F; + u32 eva = CaptureCnt & 0x1F; + u32 evb = (CaptureCnt >> 8) & 0x1F; // checkme if (eva > 16) eva = 16; @@ -558,6 +640,10 @@ void GPU2D::DoCapture(u32 line, u32 width, u32* src) u32 bD = ((bA * aA * eva) + (bB * aB * evb)) >> 4; u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0); + if (rD > 0x1F) rD = 0x1F; + if (gD > 0x1F) gD = 0x1F; + if (bD > 0x1F) bD = 0x1F; + dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); srcBaddr = (srcBaddr + 1) & 0xFFFF; dstaddr = (dstaddr + 1) & 0xFFFF; @@ -683,6 +769,59 @@ u16* GPU2D::GetOBJExtPal(u32 pal) } +void GPU2D::CheckWindows(u32 line) +{ + line &= 0xFF; + if (line == Win0Coords[3]) Win0Active = false; + else if (line == Win0Coords[2]) Win0Active = true; + if (line == Win1Coords[3]) Win1Active = false; + else if (line == Win1Coords[2]) Win1Active = true; +} + +void GPU2D::CalculateWindowMask(u32 line, u8* mask) +{ + for (u32 i = 0; i < 256; i++) + mask[i] = WinCnt[2]; // window outside + + if ((DispCnt & (1<<15)) && (DispCnt & (1<<12))) + { + // OBJ window + u8 objwin[256]; + memset(objwin, 0, 256); + DrawSpritesWindow(line, objwin); + + for (u32 i = 0; i < 256; i++) + { + if (objwin[i]) mask[i] = WinCnt[3]; + } + } + + if ((DispCnt & (1<<14)) && Win1Active) + { + // window 1 + u32 x1 = Win1Coords[0]; + u32 x2 = Win1Coords[1]; + if (x2 == 0 && x1 > 0) x2 = 256; + if (x1 > x2) x2 = 255; // checkme + + for (u32 i = x1; i < x2; i++) + mask[i] = WinCnt[1]; + } + + if ((DispCnt & (1<<13)) && Win0Active) + { + // window 0 + u32 x1 = Win0Coords[0]; + u32 x2 = Win0Coords[1]; + if (x2 == 0 && x1 > 0) x2 = 256; + if (x1 > x2) x2 = 255; // checkme + + for (u32 i = x1; i < x2; i++) + mask[i] = WinCnt[0]; + } +} + + template<u32 bgmode> void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst) { @@ -736,7 +875,8 @@ void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u32* dst) void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) { - u32 linebuf[256*2]; + u32 linebuf[256*2 + 64]; + u8* windowmask = (u8*)&linebuf[256*2]; u32 backdrop; if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; @@ -753,6 +893,11 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) linebuf[i] = backdrop; } + if (DispCnt & 0xE000) + CalculateWindowMask(line, windowmask); + else + memset(windowmask, 0xFF, 256); + // prerender sprites u32 spritebuf[256]; memset(spritebuf, 0, 256*4); @@ -781,7 +926,11 @@ void GPU2D::DrawScanline_Mode1(u32 line, u32* dst) u32 coloreffect, eva, evb; u32 flag1 = val1 >> 24; - if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00))) + if (!(windowmask[i] & 0x20)) + { + coloreffect = 0; + } + else if ((flag1 & 0x80) && (BlendCnt & ((val2 >> 16) & 0xFF00))) { // sprite blending @@ -904,10 +1053,10 @@ void GPU2D::DrawPixel(u32* dst, u16 color, u32 flag) void GPU2D::DrawBG_3D(u32 line, u32* dst) { - // TODO: window, as for everything - // also check if window can prevent blending from happening + // TODO: check if window can prevent blending from happening u32* src = GPU3D::GetLine(line); + u8* windowmask = (u8*)&dst[256*2]; u16 xoff = BGXPos[0]; int i = 0; @@ -929,6 +1078,7 @@ void GPU2D::DrawBG_3D(u32 line, u32* dst) xoff++; if ((c >> 24) == 0) continue; + if (!(windowmask[i] & 0x01)) continue; dst[i+256] = dst[i]; dst[i] = c | 0x40000000; @@ -937,6 +1087,7 @@ void GPU2D::DrawBG_3D(u32 line, u32* dst) void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) { + u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; @@ -1012,12 +1163,15 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) } // draw pixel - u8 color; - u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); + if (windowmask[i] & (1<<bgnum)) + { + u8 color; + u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + tilexoff); - if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + if (color) + DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + } xoff++; } @@ -1049,19 +1203,22 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) // draw pixel // TODO: optimize VRAM access - u8 color; - u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); - if (tilexoff & 0x1) + if (windowmask[i] & (1<<bgnum)) { - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4; - } - else - { - color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F; - } + u8 color; + u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); + if (tilexoff & 0x1) + { + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) >> 4; + } + else + { + color = GPU::ReadVRAM_BG<u8>(pixelsaddr + (tilexoff >> 1)) & 0x0F; + } - if (color) - DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + if (color) + DrawPixel(&dst[i], curpal[color], 0x01000000<<bgnum); + } xoff++; } @@ -1070,6 +1227,7 @@ void GPU2D::DrawBG_Text(u32 line, u32* dst, u32 bgnum) void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) { + u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; @@ -1118,7 +1276,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (!((rotX|rotY) & overflowmask)) + if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum))) { curtile = GPU::ReadVRAM_BG<u8>(tilemapaddr + ((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11))); @@ -1143,6 +1301,7 @@ void GPU2D::DrawBG_Affine(u32 line, u32* dst, u32 bgnum) void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) { + u8* windowmask = (u8*)&dst[256*2]; u16 bgcnt = BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; @@ -1188,7 +1347,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (!((rotX|rotY) & overflowmask)) + if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum))) { u16 color = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)) << 1)); @@ -1209,7 +1368,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (!((rotX|rotY) & overflowmask)) + if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum))) { u8 color = GPU::ReadVRAM_BG<u8>(tilemapaddr + (((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)); @@ -1248,7 +1407,7 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) for (int i = 0; i < 256; i++) { - if (!((rotX|rotY) & overflowmask)) + if ((!((rotX|rotY) & overflowmask)) && (windowmask[i] & (1<<bgnum))) { curtile = GPU::ReadVRAM_BG<u16>(tilemapaddr + (((((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)) << 1)); @@ -1280,9 +1439,11 @@ void GPU2D::DrawBG_Extended(u32 line, u32* dst, u32 bgnum) void GPU2D::InterleaveSprites(u32* buf, u32 prio, u32* dst) { + u8* windowmask = (u8*)&dst[256*2]; + for (u32 i = 0; i < 256; i++) { - if ((buf[i] & 0xF8000) == prio) + if (((buf[i] & 0xF8000) == prio) && (windowmask[i] & 0x10)) { u32 blendfunc = 0; DrawPixel(&dst[i], buf[i] & 0x7FFF, buf[i] & 0xFF000000); @@ -1318,6 +1479,9 @@ void GPU2D::DrawSprites(u32 line, u32* dst) if ((attrib[2] & 0x0C00) != bgnum) continue; + if (((attrib[0] >> 10) & 0x3) == 2) + continue; + if (attrib[0] & 0x0100) { u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); @@ -1343,7 +1507,7 @@ void GPU2D::DrawSprites(u32 line, u32* dst) u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; - DrawSprite_Rotscale(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst); + DrawSprite_Rotscale<false>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst); } else { @@ -1367,17 +1531,98 @@ void GPU2D::DrawSprites(u32 line, u32* dst) if (attrib[1] & 0x2000) ypos = height-1 - ypos; - DrawSprite_Normal(attrib, width, xpos, ypos, dst); + DrawSprite_Normal<false>(attrib, width, xpos, ypos, dst); + } + } + } +} + +void GPU2D::DrawSpritesWindow(u32 line, u8* dst) +{ + u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; + + const s32 spritewidth[16] = + { + 8, 16, 8, 0, + 16, 32, 8, 0, + 32, 32, 16, 0, + 64, 64, 32, 0 + }; + const s32 spriteheight[16] = + { + 8, 8, 16, 0, + 16, 8, 32, 0, + 32, 16, 32, 0, + 64, 32, 64, 0 + }; + + for (int sprnum = 127; sprnum >= 0; sprnum--) + { + u16* attrib = &oam[sprnum*4]; + + if (((attrib[0] >> 10) & 0x3) != 2) + continue; + + if (attrib[0] & 0x0100) + { + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + s32 boundwidth = width; + s32 boundheight = height; + + if (attrib[0] & 0x0200) + { + boundwidth <<= 1; + boundheight <<= 1; } + + u32 ypos = attrib[0] & 0xFF; + ypos = (line - ypos) & 0xFF; + if (ypos >= (u32)boundheight) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -boundwidth) + continue; + + u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; + + DrawSprite_Rotscale<true>(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, (u32*)dst); + } + else + { + if (attrib[0] & 0x0200) + continue; + + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + + u32 ypos = attrib[0] & 0xFF; + ypos = (line - ypos) & 0xFF; + if (ypos >= (u32)height) + continue; + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -width) + continue; + + // yflip + if (attrib[1] & 0x2000) + ypos = height-1 - ypos; + + DrawSprite_Normal<true>(attrib, width, xpos, ypos, (u32*)dst); } } } +template<bool window> void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; - u32 spritemode = (attrib[0] >> 10) & 0x3; + u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); u32 ytilefactor; @@ -1448,13 +1693,13 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 { if ((u32)rotX < width && (u32)rotY < height) { - u8 color; - - // blaaaarg - color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); + u8 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)); if (color & 0x8000) - dst[xpos] = color | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = color | prio; + } } rotX += rotA; @@ -1488,20 +1733,23 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 u32 extpal = (DispCnt & 0x80000000); u16* pal; - if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); - else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + if (!window) + { + if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); + else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + } for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { - u8 color; - - // blaaaarg - color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); + u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)); if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } } rotX += rotA; @@ -1517,17 +1765,18 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 ytilefactor <<= 5; u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; - pal += (attrib[2] & 0xF000) >> 8; + u16* pal; + if (!window) + { + pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + pal += (attrib[2] & 0xF000) >> 8; + } for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { - u8 color; - - // blaaaarg - color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); + u8 color = GPU::ReadVRAM_OBJ<u8>(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)); if (rotX & 0x100) color >>= 4; @@ -1535,7 +1784,10 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 color &= 0x0F; if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } } rotX += rotA; @@ -1547,11 +1799,12 @@ void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 } } +template<bool window> void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; - u32 spritemode = (attrib[0] >> 10) & 0x3; + u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); u32 wmask = width - 8; // really ((width - 1) & ~0x7) @@ -1606,18 +1859,44 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d } u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; - pixelsaddr += (xoff << 1); - for (; xoff < xend;) + if (attrib[1] & 0x1000) { - u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); - pixelsaddr += 2; + pixelsaddr += ((width-1 - xoff) << 1); - if (color & 0x8000) - dst[xpos] = color | prio; + for (; xoff < xend;) + { + u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); + pixelsaddr -= 2; - xoff++; - xpos++; + if (color & 0x8000) + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = color | prio; + } + + xoff++; + xpos++; + } + } + else + { + pixelsaddr += (xoff << 1); + + for (; xoff < xend;) + { + u16 color = GPU::ReadVRAM_OBJ<u16>(pixelsaddr); + pixelsaddr += 2; + + if (color & 0x8000) + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = color | prio; + } + + xoff++; + xpos++; + } } } else @@ -1645,8 +1924,11 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d u32 extpal = (DispCnt & 0x80000000); u16* pal; - if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); - else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + if (!window) + { + if (extpal) pal = GetOBJExtPal(attrib[2] >> 12); + else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + } if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works { @@ -1659,7 +1941,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d pixelsaddr--; if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } xoff++; xpos++; @@ -1677,7 +1962,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d pixelsaddr++; if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } xoff++; xpos++; @@ -1692,8 +1980,12 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d u32 pixelsaddr = (Num ? 0x06600000 : 0x06400000) + tilenum; pixelsaddr += ((ypos & 0x7) << 2); - u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; - pal += (attrib[2] & 0xF000) >> 8; + u16* pal; + if (!window) + { + pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; + pal += (attrib[2] & 0xF000) >> 8; + } if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works { @@ -1714,7 +2006,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d } if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } xoff++; xpos++; @@ -1740,7 +2035,10 @@ void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* d } if (color) - dst[xpos] = pal[color] | prio; + { + if (window) ((u8*)dst)[xpos] = 1; + else dst[xpos] = pal[color] | prio; + } xoff++; xpos++; diff --git a/src/GPU2D.h b/src/GPU2D.h index 38cbe7e..4bf698d 100644 --- a/src/GPU2D.h +++ b/src/GPU2D.h @@ -40,6 +40,8 @@ public: void VBlank(); void VBlankEnd(); + void CheckWindows(u32 line); + void BGExtPalDirty(u32 base); void OBJExtPalDirty(); @@ -69,6 +71,12 @@ private: s16 BGRotC[2]; s16 BGRotD[2]; + u8 Win0Coords[4]; + u8 Win1Coords[4]; + u8 WinCnt[4]; + bool Win0Active; + bool Win1Active; + u16 BlendCnt; u8 EVA, EVB; u8 EVY; @@ -88,16 +96,19 @@ private: void DrawPixel(u32* dst, u16 color, u32 flag); void DrawBG_3D(u32 line, u32* dst); - void DrawBG_Text(u32 line, u32* dst, u32 num); + void DrawBG_Text(u32 line, u32* dst, u32 bgnum); void DrawBG_Affine(u32 line, u32* dst, u32 bgnum); void DrawBG_Extended(u32 line, u32* dst, u32 bgnum); void InterleaveSprites(u32* buf, u32 prio, u32* dst); void DrawSprites(u32 line, u32* dst); - void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst); - void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst); + void DrawSpritesWindow(u32 line, u8* dst); + template<bool window> void DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst); + template<bool window> void DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst); void DoCapture(u32 line, u32 width, u32* src); + + void CalculateWindowMask(u32 line, u8* mask); }; #endif diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 9881760..72e7179 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -49,7 +49,6 @@ // // formula for clear depth: (GBAtek is wrong there) // clearZ = (val * 0x200) + 0x1FF; -// if (clearZ >= 0x010000 && clearZ < 0xFFFFFF) clearZ++; // // alpha is 5-bit // @@ -58,6 +57,10 @@ // (the idea is that each position matrix has an associated vector matrix) // // TODO: check if translate works on the vector matrix? seems pointless +// +// viewport Y coordinates are upside-down +// +// clear color/depth/bitmap/etc registers (04000350/04000354) are double-buffered namespace GPU3D @@ -150,7 +153,8 @@ FIFO<CmdFIFOEntry>* CmdPIPE; u32 NumCommands, CurCommand, ParamCount, TotalParams; u32 DispCnt; -u32 AlphaRef; +u8 AlphaRefVal; +u8 AlphaRef; u16 ToonTable[32]; u16 EdgeTable[8]; @@ -165,6 +169,9 @@ u32 ExecParams[32]; u32 ExecParamCount; s32 CycleCount; +u32 NumPushPopCommands; +u32 NumTestCommands; + u32 MatrixMode; @@ -213,6 +220,9 @@ u32 CurPolygonAttr; u32 TexParam; u32 TexPalette; +s32 PosTestResult[4]; +s16 VecTestResult[3]; + Vertex TempVertexBuffer[4]; u32 VertexNum; u32 VertexNumInPoly; @@ -232,6 +242,7 @@ Polygon* RenderPolygonRAM; u32 RenderNumPolygons; u32 ClearAttr1, ClearAttr2; +u32 RenderClearAttr1, RenderClearAttr2; u32 FlushRequest; u32 FlushAttributes; @@ -266,6 +277,9 @@ void Reset() ParamCount = 0; TotalParams = 0; + NumPushPopCommands = 0; + NumTestCommands = 0; + DispCnt = 0; AlphaRef = 0; @@ -296,6 +310,9 @@ void Reset() PosMatrixStackPointer = 0; TexMatrixStackPointer = 0; + memset(PosTestResult, 0, 4*4); + memset(VecTestResult, 0, 2*3); + VertexNum = 0; VertexNumInPoly = 0; @@ -449,44 +466,148 @@ void UpdateClipMatrix() -template<int comp, s32 plane> +template<int comp, s32 plane, bool attribs> void ClipSegment(Vertex* outbuf, Vertex* vout, Vertex* vin) { s64 factor_num = vin->Position[3] - (plane*vin->Position[comp]); s32 factor_den = factor_num - (vout->Position[3] - (plane*vout->Position[comp])); - Vertex mid; -#define INTERPOLATE(var) { mid.var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); } +#define INTERPOLATE(var) { outbuf->var = (vin->var + ((vout->var - vin->var) * factor_num) / factor_den); } if (comp != 0) INTERPOLATE(Position[0]); if (comp != 1) INTERPOLATE(Position[1]); if (comp != 2) INTERPOLATE(Position[2]); INTERPOLATE(Position[3]); - mid.Position[comp] = plane*mid.Position[3]; + outbuf->Position[comp] = plane*outbuf->Position[3]; - INTERPOLATE(Color[0]); - INTERPOLATE(Color[1]); - INTERPOLATE(Color[2]); + if (attribs) + { + INTERPOLATE(Color[0]); + INTERPOLATE(Color[1]); + INTERPOLATE(Color[2]); - INTERPOLATE(TexCoords[0]); - INTERPOLATE(TexCoords[1]); + INTERPOLATE(TexCoords[0]); + INTERPOLATE(TexCoords[1]); + } - mid.Clipped = true; + outbuf->Clipped = true; #undef INTERPOLATE - *outbuf = mid; +} + +template<int comp, bool attribs> +int ClipAgainstPlane(Vertex* vertices, int nverts, int clipstart) +{ + Vertex temp[10]; + int prev, next; + int c = clipstart; + + if (clipstart == 2) + { + temp[0] = vertices[0]; + temp[1] = vertices[1]; + } + + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = vertices[i]; + if (vtx.Position[comp] > vtx.Position[3]) + { + if ((comp == 2) && (!(CurPolygonAttr & (1<<12)))) return 0; + + Vertex* vprev = &vertices[prev]; + if (vprev->Position[comp] <= vprev->Position[3]) + { + ClipSegment<comp, 1, attribs>(&temp[c], &vtx, vprev); + c++; + } + + Vertex* vnext = &vertices[next]; + if (vnext->Position[comp] <= vnext->Position[3]) + { + ClipSegment<comp, 1, attribs>(&temp[c], &vtx, vnext); + c++; + } + } + else + temp[c++] = vtx; + } + + nverts = c; c = clipstart; + for (int i = clipstart; i < nverts; i++) + { + prev = i-1; if (prev < 0) prev = nverts-1; + next = i+1; if (next >= nverts) next = 0; + + Vertex vtx = temp[i]; + if (vtx.Position[comp] < -vtx.Position[3]) + { + Vertex* vprev = &temp[prev]; + if (vprev->Position[comp] >= -vprev->Position[3]) + { + ClipSegment<comp, -1, attribs>(&vertices[c], &vtx, vprev); + c++; + } + + Vertex* vnext = &temp[next]; + if (vnext->Position[comp] >= -vnext->Position[3]) + { + ClipSegment<comp, -1, attribs>(&vertices[c], &vtx, vnext); + c++; + } + } + else + vertices[c++] = vtx; + } + + // checkme + for (int i = 0; i < c; i++) + { + Vertex* vtx = &vertices[i]; + + vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; + vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; + vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; + } + + return c; +} + +template<bool attribs> +int ClipPolygon(Vertex* vertices, int nverts, int clipstart) +{ + // clip. + // for each vertex: + // if it's outside, check if the previous and next vertices are inside + // if so, place a new vertex at the edge of the view volume + + // TODO: check for 1-dot polygons + // TODO: the hardware seems to use a different algorithm. it reacts differently to vertices with W=0 + + // X clipping + nverts = ClipAgainstPlane<0, attribs>(vertices, nverts, clipstart); + + // Y clipping + nverts = ClipAgainstPlane<1, attribs>(vertices, nverts, clipstart); + + // Z clipping + nverts = ClipAgainstPlane<2, attribs>(vertices, nverts, clipstart); + + return nverts; } void SubmitPolygon() { - Vertex clippedvertices[2][10]; + Vertex clippedvertices[10]; Vertex* reusedvertices[2]; int clipstart = 0; int lastpolyverts = 0; int nverts = PolygonMode & 0x1 ? 4:3; int prev, next; - int c; // culling @@ -500,7 +621,7 @@ void SubmitPolygon() normalX = (((s64)v0->Position[1] * v2->Position[3]) - ((s64)v0->Position[3] * v2->Position[1])) >> 12; normalY = (((s64)v0->Position[3] * v2->Position[0]) - ((s64)v0->Position[0] * v2->Position[3])) >> 12; normalZ = (((s64)v0->Position[0] * v2->Position[1]) - ((s64)v0->Position[1] * v2->Position[0])) >> 12; - dot = ((s64)(v1->Position[0] >> 0) * normalX) + ((s64)(v1->Position[1] >> 0) * normalY) + ((s64)(v1->Position[3] >> 0) * normalZ); + dot = ((s64)v1->Position[0] * normalX) + ((s64)v1->Position[1] * normalY) + ((s64)v1->Position[3] * normalZ); bool facingview = (dot < 0); @@ -558,225 +679,21 @@ void SubmitPolygon() reusedvertices[0] = LastStripPolygon->Vertices[id0]; reusedvertices[1] = LastStripPolygon->Vertices[id1]; - clippedvertices[0][0] = *reusedvertices[0]; - clippedvertices[0][1] = *reusedvertices[1]; - clippedvertices[1][0] = *reusedvertices[0]; - clippedvertices[1][1] = *reusedvertices[1]; + clippedvertices[0] = *reusedvertices[0]; + clippedvertices[1] = *reusedvertices[1]; clipstart = 2; } } - // clip. - // for each vertex: - // if it's outside, check if the previous and next vertices are inside - // if so, place a new vertex at the edge of the view volume - - // X clipping - - c = clipstart; - for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; - - Vertex vtx = TempVertexBuffer[i]; - if (vtx.Position[0] > vtx.Position[3]) - { - Vertex* vprev = &TempVertexBuffer[prev]; - if (vprev->Position[0] <= vprev->Position[3]) - { - ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vprev); - c++; - } - - Vertex* vnext = &TempVertexBuffer[next]; - if (vnext->Position[0] <= vnext->Position[3]) - { - ClipSegment<0, 1>(&clippedvertices[0][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[0][c++] = vtx; - } - - nverts = c; c = clipstart; for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; + clippedvertices[i] = TempVertexBuffer[i]; - Vertex vtx = clippedvertices[0][i]; - if (vtx.Position[0] < -vtx.Position[3]) - { - Vertex* vprev = &clippedvertices[0][prev]; - if (vprev->Position[0] >= -vprev->Position[3]) - { - ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vprev); - c++; - } + // clipping - Vertex* vnext = &clippedvertices[0][next]; - if (vnext->Position[0] >= -vnext->Position[3]) - { - ClipSegment<0, -1>(&clippedvertices[1][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[1][c++] = vtx; - } + nverts = ClipPolygon<true>(clippedvertices, nverts, clipstart); - for (int i = 0; i < c; i++) - { - Vertex* vtx = &clippedvertices[1][i]; - - vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; - vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; - vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; - } - - // Y clipping - - nverts = c; c = clipstart; - for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; - - Vertex vtx = clippedvertices[1][i]; - if (vtx.Position[1] > vtx.Position[3]) - { - Vertex* vprev = &clippedvertices[1][prev]; - if (vprev->Position[1] <= vprev->Position[3]) - { - ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vprev); - c++; - } - - Vertex* vnext = &clippedvertices[1][next]; - if (vnext->Position[1] <= vnext->Position[3]) - { - ClipSegment<1, 1>(&clippedvertices[0][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[0][c++] = vtx; - } - - nverts = c; c = clipstart; - for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; - - Vertex vtx = clippedvertices[0][i]; - if (vtx.Position[1] < -vtx.Position[3]) - { - Vertex* vprev = &clippedvertices[0][prev]; - if (vprev->Position[1] >= -vprev->Position[3]) - { - ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vprev); - c++; - } - - Vertex* vnext = &clippedvertices[0][next]; - if (vnext->Position[1] >= -vnext->Position[3]) - { - ClipSegment<1, -1>(&clippedvertices[1][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[1][c++] = vtx; - } - - for (int i = 0; i < c; i++) - { - Vertex* vtx = &clippedvertices[1][i]; - - vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; - vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; - vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; - } - - // Z clipping - - bool farplaneclip = false; - nverts = c; c = clipstart; - for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; - - Vertex vtx = clippedvertices[1][i]; - if (vtx.Position[2] > vtx.Position[3]) - { - farplaneclip = true; - - Vertex* vprev = &clippedvertices[1][prev]; - if (vprev->Position[2] <= vprev->Position[3]) - { - ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vprev); - c++; - } - - Vertex* vnext = &clippedvertices[1][next]; - if (vnext->Position[2] <= vnext->Position[3]) - { - ClipSegment<2, 1>(&clippedvertices[0][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[0][c++] = vtx; - } - - if (farplaneclip && (!(CurPolygonAttr & (1<<12)))) - { - LastStripPolygon = NULL; - return; - } - - nverts = c; c = clipstart; - for (int i = clipstart; i < nverts; i++) - { - prev = i-1; if (prev < 0) prev = nverts-1; - next = i+1; if (next >= nverts) next = 0; - - Vertex vtx = clippedvertices[0][i]; - if (vtx.Position[2] < -vtx.Position[3]) - { - Vertex* vprev = &clippedvertices[0][prev]; - if (vprev->Position[2] >= -vprev->Position[3]) - { - ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vprev); - c++; - } - - Vertex* vnext = &clippedvertices[0][next]; - if (vnext->Position[2] >= -vnext->Position[3]) - { - ClipSegment<2, -1>(&clippedvertices[1][c], &vtx, vnext); - c++; - } - } - else - clippedvertices[1][c++] = vtx; - } - - for (int i = 0; i < c; i++) - { - Vertex* vtx = &clippedvertices[1][i]; - - vtx->Color[0] &= ~0xFFF; vtx->Color[0] += 0xFFF; - vtx->Color[1] &= ~0xFFF; vtx->Color[1] += 0xFFF; - vtx->Color[2] &= ~0xFFF; vtx->Color[2] += 0xFFF; - } - - if (c == 0) + if (nverts == 0) { LastStripPolygon = NULL; return; @@ -784,10 +701,10 @@ void SubmitPolygon() // build the actual polygon - if (NumPolygons >= 2048 || NumVertices+c > 6144) + if (NumPolygons >= 2048 || NumVertices+nverts > 6144) { LastStripPolygon = NULL; - // TODO: set DISP3DCNT overflow flag + DispCnt |= (1<<13); return; } @@ -802,11 +719,19 @@ void SubmitPolygon() u32 texfmt = (TexParam >> 26) & 0x7; u32 polyalpha = (CurPolygonAttr >> 16) & 0x1F; - poly->Translucent = (texfmt == 1 || texfmt == 6 || (polyalpha > 0 && polyalpha < 31)); + poly->Translucent = ((texfmt == 1 || texfmt == 6) && !(CurPolygonAttr & 0x10)) || (polyalpha > 0 && polyalpha < 31); + + poly->IsShadowMask = ((CurPolygonAttr & 0x3F000030) == 0x00000030); + if ((NumPolygons == 1) || (!CurPolygonRAM[NumPolygons-2].IsShadowMask)) + poly->ClearStencil = poly->IsShadowMask; + else + poly->ClearStencil = false; + + poly->IsShadow = ((CurPolygonAttr & 0x30) == 0x30) && !poly->IsShadowMask; if (LastStripPolygon && clipstart > 0) { - if (c == lastpolyverts) + if (nverts == lastpolyverts) { poly->Vertices[0] = reusedvertices[0]; poly->Vertices[1] = reusedvertices[1]; @@ -826,10 +751,10 @@ void SubmitPolygon() poly->NumVertices += 2; } - for (int i = clipstart; i < c; i++) + for (int i = clipstart; i < nverts; i++) { Vertex* vtx = &CurVertexRAM[NumVertices]; - *vtx = clippedvertices[1][i]; + *vtx = clippedvertices[i]; poly->Vertices[i] = vtx; NumVertices++; @@ -847,24 +772,29 @@ void SubmitPolygon() } else { + // W is normalized, such that all the polygon's W values fit within 16 bits + // the viewport transform for X and Y uses the original W values, but + // the transform for Z uses the normalized W values + // W normalization is applied to separate polygons, even within strips + posX = (((s64)(vtx->Position[0] + w) * Viewport[2]) / (((s64)w) << 1)) + Viewport[0]; posY = (((s64)(-vtx->Position[1] + w) * Viewport[3]) / (((s64)w) << 1)) + Viewport[1]; - if (FlushAttributes & 0x2) posZ = w; - else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF; + //if (FlushAttributes & 0x2) posZ = w; + //else posZ = (((s64)vtx->Position[2] * 0x800000) / w) + 0x7FFEFF; } if (posX < 0) posX = 0; else if (posX > 256) posX = 256; if (posY < 0) posY = 0; else if (posY > 192) posY = 192; - if (posZ < 0) posZ = 0; - else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF; + //if (posZ < 0) posZ = 0; + //else if (posZ > 0xFFFFFF) posZ = 0xFFFFFF; vtx->FinalPosition[0] = posX; vtx->FinalPosition[1] = posY; - vtx->FinalPosition[2] = posZ; - vtx->FinalPosition[3] = w; + //vtx->FinalPosition[2] = posZ; + //vtx->FinalPosition[3] = w; vtx->FinalColor[0] = vtx->Color[0] >> 12; if (vtx->FinalColor[0]) vtx->FinalColor[0] = ((vtx->FinalColor[0] << 4) + 0xF); @@ -875,11 +805,15 @@ void SubmitPolygon() } // determine bounds of the polygon + // also determine the W shift and normalize W + // TODO: normalization works both ways + u32 vtop = 0, vbot = 0; s32 ytop = 192, ybot = 0; s32 xtop = 256, xbot = 0; + u32 wshift = 0; - for (int i = 0; i < c; i++) + for (int i = 0; i < nverts; i++) { Vertex* vtx = poly->Vertices[i]; @@ -895,11 +829,36 @@ void SubmitPolygon() ybot = vtx->FinalPosition[1]; vbot = i; } + + u32 w = (u32)vtx->Position[3]; + while ((w >> wshift) & 0xFFFF0000) + wshift += 4; } poly->VTop = vtop; poly->VBottom = vbot; poly->YTop = ytop; poly->YBottom = ybot; poly->XTop = xtop; poly->XBottom = xbot; + poly->WShift = wshift; + poly->WBuffer = (FlushAttributes & 0x2); + + for (int i = 0; i < nverts; i++) + { + Vertex* vtx = poly->Vertices[i]; + s32 w = vtx->Position[3] >> wshift; + + s32 z; + if (FlushAttributes & 0x2) + z = w << wshift; + else + z = (((s64)vtx->Position[2] * 0x800000) / (w << wshift)) + 0x7FFEFF; + + // checkme + if (z < 0) z = 0; + else if (z > 0xFFFFFF) z = 0xFFFFFF; + + poly->FinalZ[i] = z; + poly->FinalW[i] = w; + } if (PolygonMode >= 2) LastStripPolygon = poly; @@ -926,8 +885,8 @@ void SubmitVertex() if ((TexParam >> 30) == 3) { - vertextrans->TexCoords[0] = (vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8] + vertex[3]*(RawTexCoords[0]<<8)) >> 20; - vertextrans->TexCoords[1] = (vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9] + vertex[3]*(RawTexCoords[1]<<8)) >> 20; + vertextrans->TexCoords[0] = ((vertex[0]*TexMatrix[0] + vertex[1]*TexMatrix[4] + vertex[2]*TexMatrix[8]) >> 24) + RawTexCoords[0]; + vertextrans->TexCoords[1] = ((vertex[0]*TexMatrix[1] + vertex[1]*TexMatrix[5] + vertex[2]*TexMatrix[9]) >> 24) + RawTexCoords[1]; } else { @@ -1004,6 +963,10 @@ void SubmitVertex() s32 CalculateLighting() { + // TODO: this requires matrix mode 2, apparently + // hardware seems to read garbage when matrix mode isn't 2 + // also, non-normal normals seem to be treated as zero? or overflow to negative? + if ((TexParam >> 30) == 2) { TexCoords[0] = RawTexCoords[0] + (((s64)Normal[0]*TexMatrix[0] + (s64)Normal[1]*TexMatrix[4] + (s64)Normal[2]*TexMatrix[8]) >> 21); @@ -1070,6 +1033,134 @@ s32 CalculateLighting() } +void BoxTest(u32* params) +{ + Vertex cube[8]; + Vertex face[10]; + int res; + + GXStat &= ~(1<<1); + + s32 x0 = (s32)(s16)(params[0] & 0xFFFF); + s32 y0 = ((s32)params[0]) >> 16; + s32 z0 = (s32)(s16)(params[1] & 0xFFFF); + s32 x1 = ((s32)params[1]) >> 16; + s32 y1 = (s32)(s16)(params[2] & 0xFFFF); + s32 z1 = ((s32)params[2]) >> 16; + + x1 += x0; + y1 += y0; + z1 += z0; + + cube[0].Position[0] = x0; cube[0].Position[1] = y0; cube[0].Position[2] = z0; + cube[1].Position[0] = x1; cube[1].Position[1] = y0; cube[1].Position[2] = z0; + cube[2].Position[0] = x1; cube[2].Position[1] = y1; cube[2].Position[2] = z0; + cube[3].Position[0] = x0; cube[3].Position[1] = y1; cube[3].Position[2] = z0; + cube[4].Position[0] = x0; cube[4].Position[1] = y1; cube[4].Position[2] = z1; + cube[5].Position[0] = x0; cube[5].Position[1] = y0; cube[5].Position[2] = z1; + cube[6].Position[0] = x1; cube[6].Position[1] = y0; cube[6].Position[2] = z1; + cube[7].Position[0] = x1; cube[7].Position[1] = y1; cube[7].Position[2] = z1; + + UpdateClipMatrix(); + for (int i = 0; i < 8; i++) + { + s32 x = cube[i].Position[0]; + s32 y = cube[i].Position[1]; + s32 z = cube[i].Position[2]; + + cube[i].Position[0] = ((s64)x*ClipMatrix[0] + (s64)y*ClipMatrix[4] + (s64)z*ClipMatrix[8] + 0x1000*ClipMatrix[12]) >> 12; + cube[i].Position[1] = ((s64)x*ClipMatrix[1] + (s64)y*ClipMatrix[5] + (s64)z*ClipMatrix[9] + 0x1000*ClipMatrix[13]) >> 12; + cube[i].Position[2] = ((s64)x*ClipMatrix[2] + (s64)y*ClipMatrix[6] + (s64)z*ClipMatrix[10] + 0x1000*ClipMatrix[14]) >> 12; + cube[i].Position[3] = ((s64)x*ClipMatrix[3] + (s64)y*ClipMatrix[7] + (s64)z*ClipMatrix[11] + 0x1000*ClipMatrix[15]) >> 12; + } + + // front face (-Z) + face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[2]; face[3] = cube[3]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } + + // back face (+Z) + face[0] = cube[4]; face[1] = cube[5]; face[2] = cube[6]; face[3] = cube[7]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } + + // left face (-X) + face[0] = cube[0]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[5]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } + + // right face (+X) + face[0] = cube[1]; face[1] = cube[2]; face[2] = cube[7]; face[3] = cube[6]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } + + // bottom face (-Y) + face[0] = cube[0]; face[1] = cube[1]; face[2] = cube[6]; face[3] = cube[5]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } + + // top face (+Y) + face[0] = cube[2]; face[1] = cube[3]; face[2] = cube[4]; face[3] = cube[7]; + res = ClipPolygon<false>(face, 4, 0); + if (res > 0) + { + GXStat |= (1<<1); + return; + } +} + +void PosTest() +{ + s64 vertex[4] = {(s64)CurVertex[0], (s64)CurVertex[1], (s64)CurVertex[2], 0x1000}; + + UpdateClipMatrix(); + PosTestResult[0] = (vertex[0]*ClipMatrix[0] + vertex[1]*ClipMatrix[4] + vertex[2]*ClipMatrix[8] + vertex[3]*ClipMatrix[12]) >> 12; + PosTestResult[1] = (vertex[0]*ClipMatrix[1] + vertex[1]*ClipMatrix[5] + vertex[2]*ClipMatrix[9] + vertex[3]*ClipMatrix[13]) >> 12; + PosTestResult[2] = (vertex[0]*ClipMatrix[2] + vertex[1]*ClipMatrix[6] + vertex[2]*ClipMatrix[10] + vertex[3]*ClipMatrix[14]) >> 12; + PosTestResult[3] = (vertex[0]*ClipMatrix[3] + vertex[1]*ClipMatrix[7] + vertex[2]*ClipMatrix[11] + vertex[3]*ClipMatrix[15]) >> 12; +} + +void VecTest(u32* params) +{ + // TODO: apparently requires matrix mode 2 + // TODO: maybe it overwrites the normal registers, too + + s16 normal[3]; + + normal[0] = (s16)((params[0] & 0x000003FF) << 6) >> 6; + normal[1] = (s16)((params[0] & 0x000FFC00) >> 4) >> 6; + normal[2] = (s16)((params[0] & 0x3FF00000) >> 14) >> 6; + + VecTestResult[0] = (normal[0]*VecMatrix[0] + normal[1]*VecMatrix[4] + normal[2]*VecMatrix[8]) >> 9; + VecTestResult[1] = (normal[0]*VecMatrix[1] + normal[1]*VecMatrix[5] + normal[2]*VecMatrix[9]) >> 9; + VecTestResult[2] = (normal[0]*VecMatrix[2] + normal[1]*VecMatrix[6] + normal[2]*VecMatrix[10]) >> 9; + + if (VecTestResult[0] & 0x1000) VecTestResult[0] |= 0xF000; + if (VecTestResult[1] & 0x1000) VecTestResult[1] |= 0xF000; + if (VecTestResult[2] & 0x1000) VecTestResult[2] |= 0xF000; +} + + void CmdFIFOWrite(CmdFIFOEntry& entry) { @@ -1096,6 +1187,17 @@ void CmdFIFOWrite(CmdFIFOEntry& entry) CmdFIFO->Write(entry); } + + if (entry.Command == 0x11 || entry.Command == 0x12) + { + GXStat |= (1<<14); // push/pop matrix + NumPushPopCommands++; + } + else if (entry.Command == 0x70 || entry.Command == 0x71 || entry.Command == 0x72) + { + GXStat |= (1<<0); // box/pos/vec test + NumTestCommands++; + } } CmdFIFOEntry CmdFIFORead() @@ -1132,7 +1234,6 @@ void ExecuteCommand() CycleCount += CmdNumCycles[entry.Command]; ExecParamCount = 0; - GXStat &= ~(1<<14); if (CycleCount > 0) GXStat |= (1<<27); @@ -1143,6 +1244,7 @@ void ExecuteCommand() break; case 0x11: // push matrix + NumPushPopCommands--; if (MatrixMode == 0) { if (ProjMatrixStackPointer > 0) @@ -1154,7 +1256,6 @@ void ExecuteCommand() memcpy(ProjMatrixStack, ProjMatrix, 16*4); ProjMatrixStackPointer++; - GXStat |= (1<<14); } else if (MatrixMode == 3) { @@ -1167,7 +1268,6 @@ void ExecuteCommand() memcpy(TexMatrixStack, TexMatrix, 16*4); TexMatrixStackPointer++; - GXStat |= (1<<14); } else { @@ -1181,11 +1281,11 @@ void ExecuteCommand() memcpy(PosMatrixStack[PosMatrixStackPointer], PosMatrix, 16*4); memcpy(VecMatrixStack[PosMatrixStackPointer], VecMatrix, 16*4); PosMatrixStackPointer++; - GXStat |= (1<<14); } break; case 0x12: // pop matrix + NumPushPopCommands--; if (MatrixMode == 0) { if (ProjMatrixStackPointer <= 0) @@ -1197,7 +1297,6 @@ void ExecuteCommand() ProjMatrixStackPointer--; memcpy(ProjMatrix, ProjMatrixStack, 16*4); - GXStat |= (1<<14); ClipMatrixDirty = true; } else if (MatrixMode == 3) @@ -1211,7 +1310,6 @@ void ExecuteCommand() TexMatrixStackPointer--; memcpy(TexMatrix, TexMatrixStack, 16*4); - GXStat |= (1<<14); } else { @@ -1228,7 +1326,6 @@ void ExecuteCommand() memcpy(PosMatrix, PosMatrixStack[PosMatrixStackPointer], 16*4); memcpy(VecMatrix, VecMatrixStack[PosMatrixStackPointer], 16*4); - GXStat |= (1<<14); ClipMatrixDirty = true; } break; @@ -1587,10 +1684,29 @@ void ExecuteCommand() break; case 0x60: // viewport x1,y1,x2,y2 + // note: viewport Y coordinates are upside-down Viewport[0] = ExecParams[0] & 0xFF; - Viewport[1] = (ExecParams[0] >> 8) & 0xFF; + Viewport[1] = 191 - (ExecParams[0] >> 24); Viewport[2] = ((ExecParams[0] >> 16) & 0xFF) - Viewport[0] + 1; - Viewport[3] = (ExecParams[0] >> 24) - Viewport[1] + 1; + Viewport[3] = (191 - ((ExecParams[0] >> 8) & 0xFF)) - Viewport[1] + 1; + break; + + case 0x70: // box test + NumTestCommands -= 3; + BoxTest(ExecParams); + break; + + case 0x71: // pos test + NumTestCommands -= 2; + CurVertex[0] = ExecParams[0] & 0xFFFF; + CurVertex[1] = ExecParams[0] >> 16; + CurVertex[2] = ExecParams[1] & 0xFFFF; + PosTest(); + break; + + case 0x72: // vec test + NumTestCommands--; + VecTest(ExecParams); break; default: @@ -1613,13 +1729,21 @@ void Run(s32 cycles) if (CycleCount <= 0) { while (CycleCount <= 0 && !CmdPIPE->IsEmpty()) + { + if (NumPushPopCommands == 0) GXStat &= ~(1<<14); + if (NumTestCommands == 0) GXStat &= ~(1<<0); + ExecuteCommand(); + } } if (CycleCount <= 0 && CmdPIPE->IsEmpty()) { CycleCount = 0; - GXStat &= ~((1<<27)|(1<<14)); + GXStat &= ~(1<<27); + + if (NumPushPopCommands == 0) GXStat &= ~(1<<14); + if (NumTestCommands == 0) GXStat &= ~(1<<0); } } @@ -1652,6 +1776,10 @@ void VBlank() RenderPolygonRAM = CurPolygonRAM; RenderNumPolygons = NumPolygons; + // TODO: find out which other registers are latched for rendering + RenderClearAttr1 = ClearAttr1; + RenderClearAttr2 = ClearAttr2; + CurRAMBank = CurRAMBank?0:1; CurVertexRAM = &VertexRAM[CurRAMBank ? 6144 : 0]; CurPolygonRAM = &PolygonRAM[CurRAMBank ? 2048 : 0]; @@ -1683,6 +1811,45 @@ u32* GetLine(int line) } +void WriteToGXFIFO(u32 val) +{ + if (NumCommands == 0) + { + NumCommands = 4; + CurCommand = val; + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + + if (TotalParams > 0) return; + } + else + ParamCount++; + + for (;;) + { + if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0)) + { + CmdFIFOEntry entry; + entry.Command = CurCommand & 0xFF; + entry.Param = val; + CmdFIFOWrite(entry); + } + + if (ParamCount >= TotalParams) + { + CurCommand >>= 8; + NumCommands--; + if (NumCommands == 0) break; + + ParamCount = 0; + TotalParams = CmdNumParams[CurCommand & 0xFF]; + } + if (ParamCount < TotalParams) + break; + } +} + + u8 Read8(u32 addr) { printf("unknown GPU3D read8 %08X\n", addr); @@ -1703,6 +1870,10 @@ u16 Read16(u32 addr) return NumPolygons; case 0x04000606: return NumVertices; + + case 0x04000630: return VecTestResult[0]; + case 0x04000632: return VecTestResult[1]; + case 0x04000634: return VecTestResult[2]; } printf("unknown GPU3D read16 %08X\n", addr); @@ -1734,6 +1905,11 @@ u32 Read32(u32 addr) case 0x04000604: return NumPolygons | (NumVertices << 16); + case 0x04000620: return PosTestResult[0]; + case 0x04000624: return PosTestResult[1]; + case 0x04000628: return PosTestResult[2]; + case 0x0400062C: return PosTestResult[3]; + case 0x04000680: return VecMatrix[0]; case 0x04000684: return VecMatrix[1]; case 0x04000688: return VecMatrix[2]; @@ -1760,7 +1936,8 @@ void Write8(u32 addr, u8 val) switch (addr) { case 0x04000340: - AlphaRef = val & 0x1F; + AlphaRefVal = val & 0x1F; + AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0; return; } @@ -1778,11 +1955,15 @@ void Write16(u32 addr, u16 val) switch (addr) { case 0x04000060: - DispCnt = val; + DispCnt = (val & 0x4FFF) | (DispCnt & 0x3000); + if (val & (1<<12)) DispCnt &= ~(1<<12); + if (val & (1<<13)) DispCnt &= ~(1<<13); + AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0; return; case 0x04000340: - AlphaRef = val & 0x1F; + AlphaRefVal = val & 0x1F; + AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0; return; case 0x04000350: @@ -1837,11 +2018,15 @@ void Write32(u32 addr, u32 val) switch (addr) { case 0x04000060: - DispCnt = val & 0xFFFF; + DispCnt = (val & 0x4FFF) | (DispCnt & 0x3000); + if (val & (1<<12)) DispCnt &= ~(1<<12); + if (val & (1<<13)) DispCnt &= ~(1<<13); + AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0; return; case 0x04000340: - AlphaRef = val & 0x1F; + AlphaRefVal = val & 0x1F; + AlphaRef = (DispCnt & (1<<2)) ? AlphaRefVal : 0; return; case 0x04000350: @@ -1875,41 +2060,7 @@ void Write32(u32 addr, u32 val) if (addr >= 0x04000400 && addr < 0x04000440) { - if (NumCommands == 0) - { - NumCommands = 4; - CurCommand = val; - ParamCount = 0; - TotalParams = CmdNumParams[CurCommand & 0xFF]; - - if (TotalParams > 0) return; - } - else - ParamCount++; - - for (;;) - { - if ((CurCommand & 0xFF) || (NumCommands == 4 && CurCommand == 0)) - { - CmdFIFOEntry entry; - entry.Command = CurCommand & 0xFF; - entry.Param = val; - CmdFIFOWrite(entry); - } - - if (ParamCount >= TotalParams) - { - CurCommand >>= 8; - NumCommands--; - if (NumCommands == 0) break; - - ParamCount = 0; - TotalParams = CmdNumParams[CurCommand & 0xFF]; - } - if (ParamCount < TotalParams) - break; - } - + WriteToGXFIFO(val); return; } diff --git a/src/GPU3D.h b/src/GPU3D.h index be121bf..fdb85f8 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -33,7 +33,7 @@ typedef struct // final vertex attributes. // allows them to be reused in polygon strips. - s32 FinalPosition[4]; + s32 FinalPosition[2]; s32 FinalColor[3]; } Vertex; @@ -43,6 +43,11 @@ typedef struct Vertex* Vertices[10]; u32 NumVertices; + s32 FinalZ[10]; + s32 FinalW[10]; + u8 WShift; + bool WBuffer; + u32 Attr; u32 TexParam; u32 TexPalette; @@ -50,6 +55,10 @@ typedef struct bool FacingView; bool Translucent; + bool IsShadowMask; + bool IsShadow; + bool ClearStencil; + u32 VTop, VBottom; // vertex indices s32 YTop, YBottom; // Y coords s32 XTop, XBottom; // associated X coords @@ -57,9 +66,11 @@ typedef struct } Polygon; extern u32 DispCnt; -extern u32 AlphaRef; +extern u8 AlphaRef; extern s32 Viewport[4]; -extern u32 ClearAttr1, ClearAttr2; +extern u32 RenderClearAttr1, RenderClearAttr2; + +extern u16 ToonTable[32]; bool Init(); void DeInit(); @@ -75,6 +86,8 @@ void VBlank(); void VCount215(); u32* GetLine(int line); +void WriteToGXFIFO(u32 val); + u8 Read8(u32 addr); u16 Read16(u32 addr); u32 Read32(u32 addr); diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index a981bd5..55dd906 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -32,8 +32,15 @@ u32 DepthBuffer[256*192]; u32 AttrBuffer[256*192]; // attribute buffer: -// bit0-5: polygon ID -// bit8: fog enable +// bit15: fog enable +// bit24-29: polygon ID +// bit30: translucent flag + +u8 StencilBuffer[256*192]; + +// note: the stencil buffer isn't emulated properly. +// emulating it properly would require rendering polygons per-scanline +// the stencil buffer is normally limited to 2 scanlines bool Init() @@ -53,6 +60,243 @@ void Reset() } +// Notes on the interpolator: +// +// This is a theory on how the DS hardware interpolates values. It matches hardware output +// in the tests I did, but the hardware may be doing it differently. You never know. +// +// Assuming you want to perspective-correctly interpolate a variable named A across two points +// in a typical rasterizer, you would calculate A/W and 1/W at each point, interpolate linearly, +// then divide A/W by 1/W to recover the correct A value. +// +// The DS GPU approximates interpolation by calculating a perspective-correct interpolation +// between 0 and 1, then using the result as a factor to linearly interpolate the actual +// vertex attributes. The factor has 9 bits of precision when interpolating along Y and +// 8 bits along X. +// +// There's a special path for when the two W values are equal: it directly does linear +// interpolation, avoiding precision loss from the aforementioned approximation. +// Which is desirable when using the GPU to draw 2D graphics. + +class Interpolator +{ +public: + Interpolator() {} + Interpolator(s32 x0, s32 x1, s32 w0, s32 w1, int shift) + { + Setup(x0, x1, w0, w1, shift); + } + + void Setup(s32 x0, s32 x1, s32 w0, s32 w1, int shift) + { + this->x0 = x0; + this->x1 = x1; + this->xdiff = x1 - x0; + this->shift = shift; + + this->w0factor = (s64)w0 * xdiff; + this->w1factor = (s64)w1 * xdiff; + this->wdiff = w1 - w0; + } + + void SetX(s32 x) + { + x -= x0; + this->x = x; + if (xdiff != 0 && wdiff != 0) + { + s64 num = ((s64)x << (shift + 40)) / w1factor; + s64 denw0 = ((s64)(xdiff-x) << 40) / w0factor; + s64 denw1 = num >> shift; + + s64 denom = denw0 + denw1; + if (denom == 0) + yfactor = 0; + else + { + yfactor = (s32)(num / denom); + } + } + } + + s32 Interpolate(s32 y0, s32 y1) + { + if (xdiff == 0) return y0; + + if (wdiff != 0) + return y0 + (((y1 - y0) * yfactor) >> shift); + else + return y0 + (((y1 - y0) * x) / xdiff); + } + + s32 InterpolateZ(s32 z0, s32 z1, bool wbuffer) + { + if (xdiff == 0) return z0; + + if ((wdiff != 0) && wbuffer) + return z0 + (((s64)(z1 - z0) * yfactor) >> shift); + else + return z0 + (((s64)(z1 - z0) * x) / xdiff); + } + +private: + s32 x0, x1, xdiff, x; + s64 w0factor, w1factor; + s32 wdiff; + int shift; + + s32 yfactor; +}; + + +class Slope +{ +public: + Slope() {} + + s32 SetupDummy(s32 x0, int side) + { + if (side) + { + dx = -0x10000; + x0--; + } + else + { + dx = 0; + } + + this->x0 = x0; + this->xmin = x0; + this->xmax = x0; + + Increment = 0; + XMajor = false; + + Interp.Setup(0, 0, 0, 0, 9); + Interp.SetX(0); + + return x0; + } + + s32 Setup(s32 x0, s32 x1, s32 y0, s32 y1, s32 w0, s32 w1, int side) + { + this->x0 = x0; + this->y = y0; + + if (x1 > x0) + { + this->xmin = x0; + this->xmax = x1-1; + } + else if (x1 < x0) + { + this->xmin = x1; + this->xmax = x0-1; + } + else + { + this->xmin = x0; + if (side) this->xmin--; + this->xmax = this->xmin; + } + + if (y0 == y1) + Increment = 0; + else + Increment = ((x1 - x0) << 16) / (y1 - y0); + + if (Increment < 0) + { + Increment = -Increment; + Negative = true; + } + else + Negative = false; + + XMajor = (Increment > 0x10000); + + if (side) + { + // right + + if (XMajor) dx = Negative ? (0x8000 + 0x10000) : (Increment - 0x8000); + else if (Increment != 0) dx = Negative ? 0x10000 : 0; + else dx = -0x10000; + } + else + { + // left + + if (XMajor) dx = Negative ? ((Increment - 0x8000) + 0x10000) : 0x8000; + else if (Increment != 0) dx = Negative ? 0x10000 : 0; + else dx = 0; + } + + if (XMajor) + { + if (side) Interp.Setup(x0-1, x1-1, w0, w1, 9); // checkme + else Interp.Setup(x0, x1, w0, w1, 9); + } + else Interp.Setup(y0, y1, w0, w1, 9); + + s32 x = XVal(); + if (XMajor) Interp.SetX(x); + else Interp.SetX(y); + return x; + } + + s32 Step() + { + dx += Increment; + y++; + + s32 x = XVal(); + if (XMajor) Interp.SetX(x); + else Interp.SetX(y); + return x; + } + + s32 XVal() + { + s32 ret; + if (Negative) ret = x0 - (dx >> 16); + else ret = x0 + (dx >> 16); + + if (ret < xmin) ret = xmin; + else if (ret > xmax) ret = xmax; + return ret; + } + + s32 EdgeLimit(int side) + { + s32 ret; + if (side) + { + if (Negative) ret = x0 - ((dx+Increment) >> 16); + else ret = x0 + ((dx-Increment) >> 16); + } + else + { + if (Negative) ret = x0 - ((dx-Increment) >> 16); + else ret = x0 + ((dx+Increment) >> 16); + } + + return ret; + } + + s32 Increment; + bool Negative; + bool XMajor; + Interpolator Interp; + +private: + s32 x0, xmin, xmax; + s32 dx; + s32 y; +}; + + void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) { u32 vramaddr = (texparam & 0xFFFF) << 3; @@ -65,6 +309,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha // texture wrapping // TODO: optimize this somehow + // testing shows that it's hardly worth optimizing, actually if (texparam & (1<<16)) { @@ -278,11 +523,10 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha } } -bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z) +template<bool func_equal> +bool DepthTest(s32 oldz, s32 z) { - u32 oldz = DepthBuffer[(256*y) + x]; - - if (polygon->Attr & (1<<14)) + if (func_equal) { s32 diff = oldz - z; if ((u32)(diff + 0x200) <= 0x400) @@ -295,14 +539,23 @@ bool DepthTest(Polygon* polygon, s32 x, s32 y, s32 z) return false; } -u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 s, s16 t) +u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) { - u32 attr = polygon->Attr; u8 r, g, b, a; + u32 blendmode = (polygon->Attr >> 4) & 0x3; u32 polyalpha = (polygon->Attr >> 16) & 0x1F; bool wireframe = (polyalpha == 0); + if (blendmode == 2) + { + u16 tooncolor = ToonTable[vr >> 1]; + + vr = (tooncolor << 1) & 0x3E; if (vr) vr++; + vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; + vb = (tooncolor >> 9) & 0x3E; if (vb) vb++; + } + if ((DispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) { u8 tr, tg, tb; @@ -314,11 +567,39 @@ u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 tg = (tcolor >> 4) & 0x3E; if (tg) tg++; tb = (tcolor >> 9) & 0x3E; if (tb) tb++; - // TODO: other blending modes - r = ((tr+1) * (vr+1) - 1) >> 6; - g = ((tg+1) * (vg+1) - 1) >> 6; - b = ((tb+1) * (vb+1) - 1) >> 6; - a = ((talpha+1) * (polyalpha+1) - 1) >> 5; + if (blendmode & 0x1) + { + // decal + + if (talpha == 0) + { + r = vr; + g = vg; + b = vb; + } + else if (talpha == 31) + { + r = tr; + g = tg; + b = tb; + } + else + { + r = ((tr * talpha) + (vr * (31-talpha))) >> 5; + g = ((tg * talpha) + (vg * (31-talpha))) >> 5; + b = ((tb * talpha) + (vb * (31-talpha))) >> 5; + } + a = polyalpha; + } + else + { + // modulate + + r = ((tr+1) * (vr+1) - 1) >> 6; + g = ((tg+1) * (vg+1) - 1) >> 6; + b = ((tb+1) * (vb+1) - 1) >> 6; + a = ((talpha+1) * (polyalpha+1) - 1) >> 5; + } } else { @@ -328,6 +609,18 @@ u32 RenderPixel(Polygon* polygon, s32 x, s32 y, s32 z, u8 vr, u8 vg, u8 vb, s16 a = polyalpha; } + if ((blendmode == 2) && (DispCnt & (1<<1))) + { + r += vr; + g += vg; + b += vb; + + if (r > 63) r = 63; + if (g > 63) g = 63; + if (b > 63) b = 63; + } + + // checkme: can wireframe polygons use texture alpha? if (wireframe) a = 31; return r | (g << 8) | (b << 16) | (a << 24); @@ -349,11 +642,33 @@ void RenderPolygon(Polygon* polygon) u32 polyalpha = (polygon->Attr >> 16) & 0x1F; bool wireframe = (polyalpha == 0); + bool (*fnDepthTest)(s32 oldz, s32 z); + if (polygon->Attr & (1<<14)) + fnDepthTest = DepthTest<true>; + else + fnDepthTest = DepthTest<false>; + + int lcur = vtop, rcur = vtop; int lnext, rnext; - s32 dxl, dxr; - s32 lslope, rslope; + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + + Slope slopeL, slopeR; + s32 xL, xR; bool l_xmajor, r_xmajor; if (ybot == ytop) @@ -376,52 +691,59 @@ void RenderPolygon(Polygon* polygon) lcur = vtop; lnext = vtop; rcur = vbot; rnext = vbot; - lslope = 0; l_xmajor = false; - rslope = 0; r_xmajor = false; + xL = slopeL.SetupDummy(polygon->Vertices[lcur]->FinalPosition[0], 0); + xR = slopeR.SetupDummy(polygon->Vertices[rcur]->FinalPosition[0], 1); } else { - //while (polygon->Vertices[lnext]->FinalPosition[1] ) - if (polygon->FacingView) - { - lnext = lcur + 1; - if (lnext >= nverts) lnext = 0; - rnext = rcur - 1; - if (rnext < 0) rnext = nverts - 1; - } - else + while (ytop >= polygon->Vertices[lnext]->FinalPosition[1] && lcur != vbot) { - lnext = lcur - 1; - if (lnext < 0) lnext = nverts - 1; - rnext = rcur + 1; - if (rnext >= nverts) rnext = 0; + lcur = lnext; + + if (polygon->FacingView) + { + lnext = lcur + 1; + if (lnext >= nverts) lnext = 0; + } + else + { + lnext = lcur - 1; + if (lnext < 0) lnext = nverts - 1; + } } - if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1]) - lslope = 0; - else - lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) / - (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]); + xL = slopeL.Setup(polygon->Vertices[lcur]->FinalPosition[0], polygon->Vertices[lnext]->FinalPosition[0], + polygon->Vertices[lcur]->FinalPosition[1], polygon->Vertices[lnext]->FinalPosition[1], + polygon->FinalW[lcur], polygon->FinalW[lnext], 0); - if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1]) - rslope = 0; - else - rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) / - (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]); + while (ytop >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot) + { + rcur = rnext; - l_xmajor = (lslope < -0x1000) || (lslope > 0x1000); - r_xmajor = (rslope < -0x1000) || (rslope > 0x1000); + if (polygon->FacingView) + { + rnext = rcur - 1; + if (rnext < 0) rnext = nverts - 1; + } + else + { + rnext = rcur + 1; + if (rnext >= nverts) rnext = 0; + } + } + + xR = slopeR.Setup(polygon->Vertices[rcur]->FinalPosition[0], polygon->Vertices[rnext]->FinalPosition[0], + polygon->Vertices[rcur]->FinalPosition[1], polygon->Vertices[rnext]->FinalPosition[1], + polygon->FinalW[rcur], polygon->FinalW[rnext], 1); } - if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000; - else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000; - else dxl = 0; + if (ybot > 192) ybot = 192; - if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000; - else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000; - else dxr = 0x1000; + if (polygon->ClearStencil) + { + memset(StencilBuffer, 0, 192*256); + } - if (ybot > 192) ybot = 192; for (s32 y = ytop; y < ybot; y++) { if (!isline) @@ -444,17 +766,9 @@ void RenderPolygon(Polygon* polygon) } } - if (polygon->Vertices[lnext]->FinalPosition[1] == polygon->Vertices[lcur]->FinalPosition[1]) - lslope = 0; - else - lslope = ((polygon->Vertices[lnext]->FinalPosition[0] - polygon->Vertices[lcur]->FinalPosition[0]) << 12) / - (polygon->Vertices[lnext]->FinalPosition[1] - polygon->Vertices[lcur]->FinalPosition[1]); - - l_xmajor = (lslope < -0x1000) || (lslope > 0x1000); - - if (l_xmajor) dxl = (lslope > 0) ? 0x800 : (-lslope-0x800)+0x1000; - else if (lslope) dxl = (lslope > 0) ? 0 : 0x1000; - else dxl = 0; + xL = slopeL.Setup(polygon->Vertices[lcur]->FinalPosition[0], polygon->Vertices[lnext]->FinalPosition[0], + polygon->Vertices[lcur]->FinalPosition[1], polygon->Vertices[lnext]->FinalPosition[1], + polygon->FinalW[lcur], polygon->FinalW[lnext], 0); } if (y >= polygon->Vertices[rnext]->FinalPosition[1] && rcur != vbot) @@ -475,71 +789,25 @@ void RenderPolygon(Polygon* polygon) } } - if (polygon->Vertices[rnext]->FinalPosition[1] == polygon->Vertices[rcur]->FinalPosition[1]) - rslope = 0; - else - rslope = ((polygon->Vertices[rnext]->FinalPosition[0] - polygon->Vertices[rcur]->FinalPosition[0]) << 12) / - (polygon->Vertices[rnext]->FinalPosition[1] - polygon->Vertices[rcur]->FinalPosition[1]); - - r_xmajor = (rslope < -0x1000) || (rslope > 0x1000); - - if (r_xmajor) dxr = (rslope > 0) ? rslope-0x800 : 0x800+0x1000; - else if (rslope) dxr = (rslope > 0) ? 0 : 0x1000; - else dxr = 0x1000; + xR = slopeR.Setup(polygon->Vertices[rcur]->FinalPosition[0], polygon->Vertices[rnext]->FinalPosition[0], + polygon->Vertices[rcur]->FinalPosition[1], polygon->Vertices[rnext]->FinalPosition[1], + polygon->FinalW[rcur], polygon->FinalW[rnext], 1); } } Vertex *vlcur, *vlnext, *vrcur, *vrnext; s32 xstart, xend; - s32 xstart_int, xend_int; - s32 slope_start, slope_end; + Slope* slope_start; + Slope* slope_end; - if (lslope == 0 && rslope == 0 && - polygon->Vertices[lcur]->FinalPosition[0] == polygon->Vertices[rcur]->FinalPosition[0]) - { - xstart = polygon->Vertices[lcur]->FinalPosition[0]; - xend = xstart; - } - else - { - if (lslope > 0) - { - xstart = polygon->Vertices[lcur]->FinalPosition[0] + (dxl >> 12); - if (xstart < polygon->Vertices[lcur]->FinalPosition[0]) - xstart = polygon->Vertices[lcur]->FinalPosition[0]; - else if (xstart > polygon->Vertices[lnext]->FinalPosition[0]-1) - xstart = polygon->Vertices[lnext]->FinalPosition[0]-1; - } - else if (lslope < 0) - { - xstart = polygon->Vertices[lcur]->FinalPosition[0] - (dxl >> 12); - if (xstart < polygon->Vertices[lnext]->FinalPosition[0]) - xstart = polygon->Vertices[lnext]->FinalPosition[0]; - else if (xstart > polygon->Vertices[lcur]->FinalPosition[0]-1) - xstart = polygon->Vertices[lcur]->FinalPosition[0]-1; - } - else - xstart = polygon->Vertices[lcur]->FinalPosition[0]; + xstart = xL; + xend = xR; - if (rslope > 0) - { - xend = polygon->Vertices[rcur]->FinalPosition[0] + (dxr >> 12); - if (xend < polygon->Vertices[rcur]->FinalPosition[0]) - xend = polygon->Vertices[rcur]->FinalPosition[0]; - else if (xend > polygon->Vertices[rnext]->FinalPosition[0]-1) - xend = polygon->Vertices[rnext]->FinalPosition[0]-1; - } - else if (rslope < 0) - { - xend = polygon->Vertices[rcur]->FinalPosition[0] - (dxr >> 12); - if (xend < polygon->Vertices[rnext]->FinalPosition[0]) - xend = polygon->Vertices[rnext]->FinalPosition[0]; - else if (xend > polygon->Vertices[rcur]->FinalPosition[0]-1) - xend = polygon->Vertices[rcur]->FinalPosition[0]-1; - } - else - xend = polygon->Vertices[rcur]->FinalPosition[0] - 1; - } + s32 wl = slopeL.Interp.Interpolate(polygon->FinalW[lcur], polygon->FinalW[lnext]); + s32 wr = slopeR.Interp.Interpolate(polygon->FinalW[rcur], polygon->FinalW[rnext]); + + s32 zl = slopeL.Interp.InterpolateZ(polygon->FinalZ[lcur], polygon->FinalZ[lnext], polygon->WBuffer); + s32 zr = slopeR.Interp.InterpolateZ(polygon->FinalZ[rcur], polygon->FinalZ[rnext], polygon->WBuffer); // if the left and right edges are swapped, render backwards. // note: we 'forget' to swap the xmajor flags, on purpose @@ -551,10 +819,13 @@ void RenderPolygon(Polygon* polygon) vrcur = polygon->Vertices[lcur]; vrnext = polygon->Vertices[lnext]; - slope_start = rslope; - slope_end = lslope; + slope_start = &slopeR; + slope_end = &slopeL; - s32 tmp = xstart; xstart = xend; xend = tmp; + s32 tmp; + tmp = xstart; xstart = xend; xend = tmp; + tmp = wl; wl = wr; wr = tmp; + tmp = zl; zl = zr; zr = tmp; } else { @@ -563,222 +834,215 @@ void RenderPolygon(Polygon* polygon) vrcur = polygon->Vertices[rcur]; vrnext = polygon->Vertices[rnext]; - slope_start = lslope; - slope_end = rslope; + slope_start = &slopeL; + slope_end = &slopeR; } // interpolate attributes along Y - s64 lfactor1, lfactor2; - s64 rfactor1, rfactor2; - if (l_xmajor) - { - lfactor1 = (vlnext->FinalPosition[0] - xstart) * vlnext->FinalPosition[3]; - lfactor2 = (xstart - vlcur->FinalPosition[0]) * vlcur->FinalPosition[3]; - } - else - { - lfactor1 = (vlnext->FinalPosition[1] - y) * vlnext->FinalPosition[3]; - lfactor2 = (y - vlcur->FinalPosition[1]) * vlcur->FinalPosition[3]; - } + s32 rl = slope_start->Interp.Interpolate(vlcur->FinalColor[0], vlnext->FinalColor[0]); + s32 gl = slope_start->Interp.Interpolate(vlcur->FinalColor[1], vlnext->FinalColor[1]); + s32 bl = slope_start->Interp.Interpolate(vlcur->FinalColor[2], vlnext->FinalColor[2]); - s64 ldenom = lfactor1 + lfactor2; - if (ldenom == 0) - { - lfactor1 = 0x1000; - lfactor2 = 0; - ldenom = 0x1000; - } + s32 sl = slope_start->Interp.Interpolate(vlcur->TexCoords[0], vlnext->TexCoords[0]); + s32 tl = slope_start->Interp.Interpolate(vlcur->TexCoords[1], vlnext->TexCoords[1]); - if (r_xmajor) - { - rfactor1 = (vrnext->FinalPosition[0] - xend+1) * vrnext->FinalPosition[3]; - rfactor2 = (xend+1 - vrcur->FinalPosition[0]) * vrcur->FinalPosition[3]; - } - else - { - rfactor1 = (vrnext->FinalPosition[1] - y) * vrnext->FinalPosition[3]; - rfactor2 = (y - vrcur->FinalPosition[1]) * vrcur->FinalPosition[3]; - } - - s64 rdenom = rfactor1 + rfactor2; - if (rdenom == 0) - { - rfactor1 = 0x1000; - rfactor2 = 0; - rdenom = 0x1000; - } - - s32 zl = ((lfactor1 * vlcur->FinalPosition[2]) + (lfactor2 * vlnext->FinalPosition[2])) / ldenom; - s32 zr = ((rfactor1 * vrcur->FinalPosition[2]) + (rfactor2 * vrnext->FinalPosition[2])) / rdenom; - - s32 wl = ((lfactor1 * vlcur->FinalPosition[3]) + (lfactor2 * vlnext->FinalPosition[3])) / ldenom; - s32 wr = ((rfactor1 * vrcur->FinalPosition[3]) + (rfactor2 * vrnext->FinalPosition[3])) / rdenom; - - s32 rl = ((lfactor1 * vlcur->FinalColor[0]) + (lfactor2 * vlnext->FinalColor[0])) / ldenom; - s32 gl = ((lfactor1 * vlcur->FinalColor[1]) + (lfactor2 * vlnext->FinalColor[1])) / ldenom; - s32 bl = ((lfactor1 * vlcur->FinalColor[2]) + (lfactor2 * vlnext->FinalColor[2])) / ldenom; - - s32 sl = ((lfactor1 * vlcur->TexCoords[0]) + (lfactor2 * vlnext->TexCoords[0])) / ldenom; - s32 tl = ((lfactor1 * vlcur->TexCoords[1]) + (lfactor2 * vlnext->TexCoords[1])) / ldenom; + s32 rr = slope_end->Interp.Interpolate(vrcur->FinalColor[0], vrnext->FinalColor[0]); + s32 gr = slope_end->Interp.Interpolate(vrcur->FinalColor[1], vrnext->FinalColor[1]); + s32 br = slope_end->Interp.Interpolate(vrcur->FinalColor[2], vrnext->FinalColor[2]); - s32 rr = ((rfactor1 * vrcur->FinalColor[0]) + (rfactor2 * vrnext->FinalColor[0])) / rdenom; - s32 gr = ((rfactor1 * vrcur->FinalColor[1]) + (rfactor2 * vrnext->FinalColor[1])) / rdenom; - s32 br = ((rfactor1 * vrcur->FinalColor[2]) + (rfactor2 * vrnext->FinalColor[2])) / rdenom; - - s32 sr = ((rfactor1 * vrcur->TexCoords[0]) + (rfactor2 * vrnext->TexCoords[0])) / rdenom; - s32 tr = ((rfactor1 * vrcur->TexCoords[1]) + (rfactor2 * vrnext->TexCoords[1])) / rdenom; + s32 sr = slope_end->Interp.Interpolate(vrcur->TexCoords[0], vrnext->TexCoords[0]); + s32 tr = slope_end->Interp.Interpolate(vrcur->TexCoords[1], vrnext->TexCoords[1]); // calculate edges + // + // edge fill rules for opaque pixels: + // * right edge is filled if slope > 1 + // * left edge is filled if slope <= 1 + // * edges with slope = 0 are always filled + // edges are always filled if the pixels are translucent + // in wireframe mode, there are special rules for equal Z (TODO) + s32 l_edgeend, r_edgestart; + bool l_filledge, r_filledge; - if (l_xmajor) + if (slopeL.XMajor) { - if (slope_start > 0) l_edgeend = vlcur->FinalPosition[0] + ((dxl + slope_start) >> 12); - else l_edgeend = vlcur->FinalPosition[0] - ((dxl - slope_start) >> 12); - + l_edgeend = slope_start->EdgeLimit(0); if (l_edgeend == xstart) l_edgeend++; + + l_filledge = slope_start->Negative; } else + { l_edgeend = xstart + 1; - if (r_xmajor) + l_filledge = true; + } + + if (slopeR.XMajor) { - if (slope_end > 0) r_edgestart = vrcur->FinalPosition[0] + ((dxr + slope_end) >> 12); - else r_edgestart = vrcur->FinalPosition[0] - ((dxr - slope_end) >> 12); + r_edgestart = slope_end->EdgeLimit(1); + if (r_edgestart == xend) r_edgestart--; - if (r_edgestart == xend_int) r_edgestart--; + r_filledge = !slope_end->Negative; } else + { r_edgestart = xend - 1; - // edge fill rules for opaque pixels: - // * right edge is filled if slope > 1 - // * left edge is filled if slope <= 1 - // * edges with slope = 0 are always filled - // edges are always filled if the pixels are translucent - // in wireframe mode, there are special rules for equal Z (TODO) + r_filledge = slope_end->Increment==0; + } + + int yedge = 0; + if (y == ytop) yedge = 0x4; + else if (y == ybot-1) yedge = 0x8; + + Interpolator interpX(xstart, xend+1, wl, wr, 8); for (s32 x = xstart; x <= xend; x++) { if (x < 0) continue; if (x > 255) break; - int edge = 0; - if (y == ytop) edge |= 0x4; - else if (y == ybot-1) edge |= 0x8; + int edge = yedge; if (x < l_edgeend) edge |= 0x1; else if (x > r_edgestart) edge |= 0x2; // wireframe polygons. really ugly, but works - if (wireframe && edge==0) continue; + if (wireframe && edge==0) + { + x = r_edgestart + 1; + continue; + } - s64 factor1 = (xend+1 - x) * wr; - s64 factor2 = (x - xstart) * wl; - s64 denom = factor1 + factor2; - if (denom == 0) + u32 pixeladdr = (y*256) + x; + u32 attr = polygon->Attr & 0x3F008000; + + // check stencil buffer for shadows + if (polygon->IsShadow) { - factor1 = 0x1000; - factor2 = 0; - denom = 0x1000; + if (StencilBuffer[pixeladdr] == 0) + continue; } - s32 z = ((factor1 * zl) + (factor2 * zr)) / denom; - if (!DepthTest(polygon, x, y, z)) continue; + interpX.SetX(x); - u32 vr = ((factor1 * rl) + (factor2 * rr)) / denom; - u32 vg = ((factor1 * gl) + (factor2 * gr)) / denom; - u32 vb = ((factor1 * bl) + (factor2 * br)) / denom; + s32 z = interpX.InterpolateZ(zl, zr, polygon->WBuffer); - s16 s = ((factor1 * sl) + (factor2 * sr)) / denom; - s16 t = ((factor1 * tl) + (factor2 * tr)) / denom; + if (polygon->IsShadowMask) + { + // for shadow masks: set stencil bits where the depth test fails. + // draw nothing. - u32 color = RenderPixel(polygon, x, y, z, vr>>3, vg>>3, vb>>3, s, t); - u32 attr = 0; - u32 pixeladdr = (y*256) + x; + // checkme + if (polyalpha == 31) + { + if (!wireframe) + { + if ((edge & 0x1) && !l_filledge) + continue; + if ((edge & 0x2) && !r_filledge) + continue; + } + } - u8 alpha = color >> 24; + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + StencilBuffer[pixeladdr] = 1; - // alpha test - if (DispCnt & (1<<2)) - { - if (alpha <= AlphaRef) continue; - } - else - { - if (alpha == 0) continue; + continue; } - // alpha blending disable - // TODO: check alpha test when blending is disabled - if (!(DispCnt & (1<<3))) - alpha = 31; + if (!fnDepthTest(DepthBuffer[pixeladdr], z)) + continue; + + u32 vr = interpX.Interpolate(rl, rr); + u32 vg = interpX.Interpolate(gl, gr); + u32 vb = interpX.Interpolate(bl, br); - u32 dstcolor = ColorBuffer[pixeladdr]; - u32 dstalpha = dstcolor >> 24; + s16 s = interpX.Interpolate(sl, sr); + s16 t = interpX.Interpolate(tl, tr); + + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); + u8 alpha = color >> 24; + + // alpha test + // TODO: check alpha test when blending is disabled + if (alpha <= AlphaRef) continue; if (alpha == 31) { // edge fill rules for opaque pixels // TODO, eventually: antialiasing - if (!wireframe)// && !(edge & 0x4)) + if (!wireframe) { - if ((edge & 0x1) && slope_start > 0x1000) + if ((edge & 0x1) && !l_filledge) continue; - if ((edge & 0x2) && (slope_end != 0 && slope_end <= 0x1000)) + if ((edge & 0x2) && !r_filledge) continue; } DepthBuffer[pixeladdr] = z; } - else if (dstalpha == 0) - { - // TODO: conditional Z-buffer update - DepthBuffer[pixeladdr] = z; - } else { - u32 srcR = color & 0x3F; - u32 srcG = (color >> 8) & 0x3F; - u32 srcB = (color >> 16) & 0x3F; + u32 dstattr = AttrBuffer[pixeladdr]; + attr |= (1<<30); + if (polygon->IsShadow) dstattr |= (1<<30); + + // skip if polygon IDs are equal + // note: this only happens if the destination pixel was translucent + // or always when drawing a shadow + // (the GPU keeps track of which pixels are translucent, regardless of + // the destination alpha) + if ((dstattr & 0x7F000000) == (attr & 0x7F000000)) + continue; + + u32 dstcolor = ColorBuffer[pixeladdr]; + u32 dstalpha = dstcolor >> 24; + + if ((dstalpha > 0) && (DispCnt & (1<<3))) + { + u32 srcR = color & 0x3F; + u32 srcG = (color >> 8) & 0x3F; + u32 srcB = (color >> 16) & 0x3F; - u32 dstR = dstcolor & 0x3F; - u32 dstG = (dstcolor >> 8) & 0x3F; - u32 dstB = (dstcolor >> 16) & 0x3F; + u32 dstR = dstcolor & 0x3F; + u32 dstG = (dstcolor >> 8) & 0x3F; + u32 dstB = (dstcolor >> 16) & 0x3F; - alpha++; - dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5; - dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5; - dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5; + alpha++; + dstR = ((srcR * alpha) + (dstR * (32-alpha))) >> 5; + dstG = ((srcG * alpha) + (dstG * (32-alpha))) >> 5; + dstB = ((srcB * alpha) + (dstB * (32-alpha))) >> 5; - alpha--; - if (alpha > dstalpha) dstalpha = alpha; + alpha--; + if (alpha > dstalpha) dstalpha = alpha; - color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24); + color = dstR | (dstG << 8) | (dstB << 16) | (dstalpha << 24); + } - // TODO: conditional Z-buffer update - DepthBuffer[pixeladdr] = z; + if (polygon->Attr & (1<<11)) + DepthBuffer[pixeladdr] = z; } ColorBuffer[pixeladdr] = color; AttrBuffer[pixeladdr] = attr; } - if (lslope > 0) dxl += lslope; - else dxl -= lslope; - if (rslope > 0) dxr += rslope; - else dxr -= rslope; + xL = slopeL.Step(); + xR = slopeR.Step(); } } void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) { - u32 polyid = (ClearAttr1 >> 24) & 0x3F; + u32 polyid = RenderClearAttr1 & 0x3F000000; if (DispCnt & (1<<14)) { - u8 xoff = (ClearAttr2 >> 16) & 0xFF; - u8 yoff = (ClearAttr2 >> 24) & 0xFF; + u8 xoff = (RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (RenderClearAttr2 >> 24) & 0xFF; for (int y = 0; y < 256*192; y += 256) { @@ -795,11 +1059,10 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) u32 color = r | (g << 8) | (b << 16) | a; u32 z = ((val3 & 0x7FFF) * 0x200) + 0x1FF; - if (z >= 0x10000 && z < 0xFFFFFF) z++; ColorBuffer[y+x] = color; DepthBuffer[y+x] = z; - AttrBuffer[y+x] = polyid | ((val3 & 0x8000) >> 7); + AttrBuffer[y+x] = polyid | (val3 & 0x8000); xoff++; } @@ -810,16 +1073,15 @@ void RenderFrame(Vertex* vertices, Polygon* polygons, int npolys) else { // TODO: confirm color conversion - u32 r = (ClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (ClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (ClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (ClearAttr1 >> 16) & 0x1F; + u32 r = (RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - u32 z = ((ClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - if (z >= 0x10000 && z < 0xFFFFFF) z++; + u32 z = ((RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - polyid |= ((ClearAttr1 & 0x8000) >> 7); + polyid |= (RenderClearAttr1 & 0x8000); for (int i = 0; i < 256*192; i++) { diff --git a/src/NDS.cpp b/src/NDS.cpp index d8f346e..18a6f46 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -25,6 +25,7 @@ #include "DMA.h" #include "FIFO.h" #include "GPU.h" +#include "SPU.h" #include "SPI.h" #include "RTC.h" #include "Wifi.h" @@ -37,11 +38,6 @@ namespace NDS // * stick all the variables in a big structure? // would make it easier to deal with savestates -/*SchedEvent SchedBuffer[SCHED_BUF_LEN]; -SchedEvent* SchedQueue; - -bool NeedReschedule;*/ - ARM* ARM9; ARM* ARM7; @@ -87,6 +83,7 @@ u16 PowerControl7; u16 ARM7BIOSProt; Timer Timers[8]; +u8 TimerCheckMask[2]; DMA* DMAs[8]; u32 DMA9Fill[4]; @@ -108,8 +105,6 @@ u32 SqrtRes; u32 KeyInput; -u16 _soundbias; // temp - bool Running; @@ -132,6 +127,7 @@ bool Init() if (!NDSCart::Init()) return false; if (!GPU::Init()) return false; + if (!SPU::Init()) return false; if (!SPI::Init()) return false; if (!RTC::Init()) return false; @@ -151,6 +147,7 @@ void DeInit() NDSCart::DeInit(); GPU::DeInit(); + SPU::DeInit(); SPI::DeInit(); RTC::DeInit(); } @@ -221,9 +218,14 @@ void SetupDirectBoot() ARM9->JumpTo(bootparams[1]); ARM7->JumpTo(bootparams[5]); + PostFlag9 = 0x01; + PostFlag7 = 0x01; + PowerControl9 = 0x820F; GPU::DisplaySwap(PowerControl9); + SPU::SetBias(0x200); + ARM7BIOSProt = 0x1204; SPI_Firmware::SetupDirectBoot(); @@ -296,18 +298,12 @@ void Reset() CPUStop = 0; memset(Timers, 0, 8*sizeof(Timer)); + TimerCheckMask[0] = 0; + TimerCheckMask[1] = 0; for (i = 0; i < 8; i++) DMAs[i]->Reset(); memset(DMA9Fill, 0, 4*4); - NDSCart::Reset(); - GPU::Reset(); - SPI::Reset(); - RTC::Reset(); - Wifi::Reset(); - - // memset(SchedBuffer, 0, sizeof(SchedEvent)*SCHED_BUF_LEN); - // SchedQueue = NULL; memset(SchedList, 0, sizeof(SchedList)); SchedListMask = 0; @@ -319,7 +315,12 @@ void Reset() KeyInput = 0x007F03FF; - _soundbias = 0; + NDSCart::Reset(); + GPU::Reset(); + SPU::Reset(); + SPI::Reset(); + RTC::Reset(); + Wifi::Reset(); } void LoadROM(const char* path, bool direct) @@ -563,8 +564,16 @@ bool HaltInterrupted(u32 cpu) void StopCPU(u32 cpu, u32 mask) { - if (cpu) mask <<= 16; - CPUStop |= mask; + if (cpu) + { + CPUStop |= (mask << 16); + ARM7->Halt(2); + } + else + { + CPUStop |= mask; + ARM9->Halt(2); + } } void ResumeCPU(u32 cpu, u32 mask) @@ -611,8 +620,8 @@ void HandleTimerOverflow(u32 tid) void RunTimer(u32 tid, s32 cycles) { Timer* timer = &Timers[tid]; - if ((timer->Cnt & 0x84) != 0x80) - return; + //if ((timer->Cnt & 0x84) != 0x80) + // return; u32 oldcount = timer->Counter; timer->Counter += (cycles << timer->CycleShift); @@ -622,10 +631,12 @@ void RunTimer(u32 tid, s32 cycles) void RunTimingCriticalDevices(u32 cpu, s32 cycles) { - RunTimer((cpu<<2)+0, cycles); - RunTimer((cpu<<2)+1, cycles); - RunTimer((cpu<<2)+2, cycles); - RunTimer((cpu<<2)+3, cycles); + register u32 timermask = TimerCheckMask[cpu]; + + if (timermask & 0x1) RunTimer((cpu<<2)+0, cycles); + if (timermask & 0x2) RunTimer((cpu<<2)+1, cycles); + if (timermask & 0x4) RunTimer((cpu<<2)+2, cycles); + if (timermask & 0x8) RunTimer((cpu<<2)+3, cycles); if (cpu == 0) { @@ -678,6 +689,11 @@ void TimerStart(u32 id, u16 cnt) { timer->Counter = timer->Reload << 16; } + + if ((cnt & 0x84) == 0x80) + TimerCheckMask[id>>2] |= (1<<(id&0x3)); + else + TimerCheckMask[id>>2] &= ~(1<<(id&0x3)); } @@ -808,8 +824,19 @@ void debug(u32 param) printf("ARM9 PC=%08X LR=%08X %08X\n", ARM9->R[15], ARM9->R[14], ARM9->R_IRQ[1]); printf("ARM7 PC=%08X LR=%08X %08X\n", ARM7->R[15], ARM7->R[14], ARM7->R_IRQ[1]); - for (int i = 0; i < 9; i++) - printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); + printf("ARM9 IME=%08X IE=%08X IF=%08X\n", IME[0], IE[0], IF[0]); + printf("ARM7 IME=%08X IE=%08X IF=%08X\n", IME[1], IE[1], IF[1]); + + //for (int i = 0; i < 9; i++) + // printf("VRAM %c: %02X\n", 'A'+i, GPU::VRAMCNT[i]); + + /*FILE* shit = fopen("debug/poke7.bin", "wb"); + for (u32 i = 0x02000000; i < 0x03810000; i+=4) + { + u32 val = ARM7Read32(i); + fwrite(&val, 4, 1, shit); + } + fclose(shit);*/ } @@ -904,7 +931,7 @@ u16 ARM9Read16(u32 addr) return 0xFFFF; } - //printf("unknown arm9 read16 %08X %08X %08X %08X\n", addr, ARM9->R[15], ARM9->R[1], ARM9->R[2]); + //printf("unknown arm9 read16 %08X %08X\n", addr, ARM9->R[15]); return 0; } @@ -1202,7 +1229,7 @@ void ARM7Write8(u32 addr, u8 val) return; } - printf("unknown arm7 write8 %08X %02X | %08X | %08X %08X %08X %08X\n", addr, val, ARM7->R[15], IME[1], IE[1], ARM7->R[0], ARM7->R[1]); + printf("unknown arm7 write8 %08X %02X @ %08X\n", addr, val, ARM7->R[15]); } void ARM7Write16(u32 addr, u16 val) @@ -1237,7 +1264,7 @@ void ARM7Write16(u32 addr, u16 val) return; } - printf("unknown arm7 write16 %08X %04X | %08X\n", addr, val, ARM7->R[15]); + printf("unknown arm7 write16 %08X %04X @ %08X\n", addr, val, ARM7->R[15]); } void ARM7Write32(u32 addr, u32 val) @@ -1268,7 +1295,7 @@ void ARM7Write32(u32 addr, u32 val) return; } - printf("unknown arm7 write32 %08X %08X | %08X %08X\n", addr, val, ARM7->R[15], ARM7->CurInstr); + printf("unknown arm7 write32 %08X %08X @ %08X\n", addr, val, ARM7->R[15]); } @@ -1278,6 +1305,9 @@ u8 ARM9IORead8(u32 addr) { switch (addr) { + case 0x04000130: return KeyInput & 0xFF; + case 0x04000131: return (KeyInput >> 8) & 0xFF; + case 0x040001A2: return NDSCart::ReadSPIData(); case 0x040001A8: return NDSCart::ROMCommand[0]; @@ -1402,11 +1432,11 @@ u16 ARM9IORead16(u32 addr) case 0x04000304: return PowerControl9; } - if (addr >= 0x04000000 && addr < 0x04000060) + if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C)) { return GPU::GPU2D_A->Read16(addr); } - if (addr >= 0x04001000 && addr < 0x04001060) + if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C)) { return GPU::GPU2D_B->Read16(addr); } @@ -1471,6 +1501,7 @@ u32 ARM9IORead32(u32 addr) case 0x04000244: return GPU::VRAMCNT[4] | (GPU::VRAMCNT[5] << 8) | (GPU::VRAMCNT[6] << 16) | (WRAMCnt << 24); case 0x04000248: return GPU::VRAMCNT[7] | (GPU::VRAMCNT[8] << 8); + case 0x04000280: return DivCnt; case 0x04000290: return DivNumerator[0]; case 0x04000294: return DivNumerator[1]; case 0x04000298: return DivDenominator[0]; @@ -1480,6 +1511,7 @@ u32 ARM9IORead32(u32 addr) case 0x040002A8: return DivRemainder[0]; case 0x040002AC: return DivRemainder[1]; + case 0x040002B0: return SqrtCnt; case 0x040002B4: return SqrtRes; case 0x040002B8: return SqrtVal[0]; case 0x040002BC: return SqrtVal[1]; @@ -1510,11 +1542,11 @@ u32 ARM9IORead32(u32 addr) return 0; } - if (addr >= 0x04000000 && addr < 0x04000060) + if ((addr >= 0x04000000 && addr < 0x04000060) || (addr == 0x0400006C)) { return GPU::GPU2D_A->Read32(addr); } - if (addr >= 0x04001000 && addr < 0x04001060) + if ((addr >= 0x04001000 && addr < 0x04001060) || (addr == 0x0400106C)) { return GPU::GPU2D_B->Read32(addr); } @@ -1611,6 +1643,15 @@ void ARM9IOWrite16(u32 addr, u16 val) case 0x040000DC: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0xFFFF0000) | val); return; case 0x040000DE: DMAs[3]->WriteCnt((DMAs[3]->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x040000E0: DMA9Fill[0] = (DMA9Fill[0] & 0xFFFF0000) | val; return; + case 0x040000E2: DMA9Fill[0] = (DMA9Fill[0] & 0x0000FFFF) | (val << 16); return; + case 0x040000E4: DMA9Fill[1] = (DMA9Fill[1] & 0xFFFF0000) | val; return; + case 0x040000E6: DMA9Fill[1] = (DMA9Fill[1] & 0x0000FFFF) | (val << 16); return; + case 0x040000E8: DMA9Fill[2] = (DMA9Fill[2] & 0xFFFF0000) | val; return; + case 0x040000EA: DMA9Fill[2] = (DMA9Fill[2] & 0x0000FFFF) | (val << 16); return; + case 0x040000EC: DMA9Fill[3] = (DMA9Fill[3] & 0xFFFF0000) | val; return; + case 0x040000EE: DMA9Fill[3] = (DMA9Fill[3] & 0x0000FFFF) | (val << 16); return; + case 0x04000100: Timers[0].Reload = val; return; case 0x04000102: TimerStart(0, val); return; case 0x04000104: Timers[1].Reload = val; return; @@ -1629,7 +1670,6 @@ void ARM9IOWrite16(u32 addr, u16 val) { SetIRQ(1, IRQ_IPCSync); } - //CompensateARM7(); return; case 0x04000184: @@ -1838,6 +1878,10 @@ void ARM9IOWrite32(u32 addr, u32 val) GPU::MapVRAM_I(8, (val >> 8) & 0xFF); return; + case 0x04000280: DivCnt = val; StartDiv(); return; + + case 0x040002B0: SqrtCnt = val; StartSqrt(); return; + case 0x04000290: DivNumerator[0] = val; StartDiv(); return; case 0x04000294: DivNumerator[1] = val; StartDiv(); return; case 0x04000298: DivDenominator[0] = val; StartDiv(); return; @@ -1876,6 +1920,11 @@ u8 ARM7IORead8(u32 addr) { switch (addr) { + case 0x04000130: return KeyInput & 0xFF; + case 0x04000131: return (KeyInput >> 8) & 0xFF; + case 0x04000136: return (KeyInput >> 16) & 0xFF; + case 0x04000137: return KeyInput >> 24; + case 0x04000138: return RTC::Read() & 0xFF; case 0x040001A2: return NDSCart::ReadSPIData(); @@ -1901,8 +1950,7 @@ u8 ARM7IORead8(u32 addr) if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O - return 0; + return SPU::Read8(addr); } printf("unknown ARM7 IO read8 %08X\n", addr); @@ -1972,14 +2020,11 @@ u16 ARM7IORead16(u32 addr) case 0x04000300: return PostFlag7; case 0x04000304: return PowerControl7; case 0x04000308: return ARM7BIOSProt; - - case 0x04000504: return _soundbias; } if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O - return 0; + return SPU::Read16(addr); } printf("unknown ARM7 IO read16 %08X %08X\n", addr, ARM9->R[15]); @@ -2057,8 +2102,7 @@ u32 ARM7IORead32(u32 addr) if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O - return 0; + return SPU::Read32(addr); } printf("unknown ARM7 IO read32 %08X\n", addr); @@ -2116,7 +2160,7 @@ void ARM7IOWrite8(u32 addr, u8 val) if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O + SPU::Write8(addr, val); return; } @@ -2147,7 +2191,7 @@ void ARM7IOWrite16(u32 addr, u16 val) case 0x0400010C: Timers[7].Reload = val; return; case 0x0400010E: TimerStart(7, val); return; - case 0x04000134: return;printf("set debug port %04X %08X\n", val, ARM7Read32(ARM7->R[13]+4)); return; + case 0x04000134: /* TODO? */ return; case 0x04000138: RTC::Write(val, false); return; @@ -2228,15 +2272,11 @@ void ARM7IOWrite16(u32 addr, u16 val) if (ARM7BIOSProt == 0) ARM7BIOSProt = val; return; - - case 0x04000504: // removeme - _soundbias = val & 0x3FF; - return; } if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O + SPU::Write16(addr, val); return; } @@ -2326,7 +2366,7 @@ void ARM7IOWrite32(u32 addr, u32 val) if (addr >= 0x04000400 && addr < 0x04000520) { - // sound I/O + SPU::Write32(addr, val); return; } @@ -24,30 +24,12 @@ namespace NDS { -/*#define SCHED_BUF_LEN 64 - -typedef struct _SchedEvent -{ - u32 Delay; - void (*Func)(u32); - u32 Param; - struct _SchedEvent* PrevEvent; - struct _SchedEvent* NextEvent; - -} SchedEvent;*/ - enum { Event_LCD = 0, + Event_SPU, - /*Event_Timer9_0, - Event_Timer9_1, - Event_Timer9_2, - Event_Timer9_3, - Event_Timer7_0, - Event_Timer7_1, - Event_Timer7_2, - Event_Timer7_3,*/ + Event_ROMTransfer, Event_MAX }; @@ -95,7 +77,6 @@ typedef struct u16 Cnt; u32 Counter; u32 CycleShift; - //SchedEvent* Event; } Timer; @@ -112,6 +93,8 @@ extern u8 ROMSeed1[2*8]; extern u8 ARM9BIOS[0x1000]; extern u8 ARM7BIOS[0x4000]; +extern u8 MainRAM[0x400000]; + bool Init(); void DeInit(); void Reset(); @@ -127,15 +110,9 @@ void ReleaseKey(u32 key); void TouchScreen(u16 x, u16 y); void ReleaseScreen(); -/*SchedEvent* ScheduleEvent(s32 Delay, void (*Func)(u32), u32 Param); -void CancelEvent(SchedEvent* event); -void RunEvents(s32 cycles);*/ void ScheduleEvent(u32 id, bool periodic, s32 delay, void (*func)(u32), u32 param); void CancelEvent(u32 id); -// DO NOT CALL FROM ARM7!! -void CompensateARM7(); - void debug(u32 p); void Halt(); diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp index f291d7b..696666b 100644 --- a/src/NDSCart.cpp +++ b/src/NDSCart.cpp @@ -246,7 +246,7 @@ void Write_Discover(u8 val, bool islast) { Discover_MemoryType = 5; } - else if (len > 2+128) // Flash + else if ((len > 2+128) || (len > 1+16 && CurCmd == 0xA)) // Flash { Discover_MemoryType = 4; } @@ -284,7 +284,7 @@ void Write_EEPROMTiny(u8 val, bool islast) } else { - SRAM[(Addr & 0xFF) | ((CurCmd==0x0A)?0x100:0)] = val; + SRAM[(Addr + ((CurCmd==0x0A)?0x100:0)) & 0x1FF] = val; Addr++; } break; @@ -298,7 +298,7 @@ void Write_EEPROMTiny(u8 val, bool islast) } else { - Data = SRAM[(Addr & 0xFF) | ((CurCmd==0x0B)?0x100:0)]; + Data = SRAM[(Addr + ((CurCmd==0x0B)?0x100:0)) & 0x1FF]; Addr++; } break; @@ -471,11 +471,20 @@ void Write(u8 val, u32 hold) switch (CurCmd) { + case 0x00: + // Pokémon carts have an IR transceiver thing, and send this + // to bypass it and access SRAM. + // TODO: design better + CurCmd = val; + break; + case 0x02: case 0x03: case 0x0A: case 0x0B: case 0x9F: + case 0xD8: + case 0xDB: WriteFunc(val, islast); DataPos++; break; @@ -496,7 +505,7 @@ void Write(u8 val, u32 hold) default: if (DataPos==0) - printf("unknown save SPI command %02X\n", CurCmd); + printf("unknown save SPI command %02X %08X\n", CurCmd); break; } @@ -801,9 +810,8 @@ void ReadROM_B7(u32 addr, u32 len, u32 offset) } -void EndTransfer() +void ROMEndTransfer(u32 param) { - ROMCnt &= ~(1<<23); ROMCnt &= ~(1<<31); if (SPICnt & (1<<14)) @@ -820,16 +828,16 @@ void ROMPrepareData(u32 param) DataOutPos += 4; ROMCnt |= (1<<23); - NDS::CheckDMAs(0, 0x06); - NDS::CheckDMAs(1, 0x12); - //if (DataOutPos < DataOutLen) - // NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0); + if (NDS::ExMemCnt[0] & (1<<11)) + NDS::CheckDMAs(1, 0x12); + else + NDS::CheckDMAs(0, 0x05); } void WriteROMCnt(u32 val) { - ROMCnt = val & 0xFF7F7FFF; + ROMCnt = (val & 0xFF7F7FFF) | (ROMCnt & 0x00800000); if (!(SPICnt & (1<<15))) return; @@ -958,52 +966,43 @@ void WriteROMCnt(u32 val) break; } - //ROMCnt &= ~(1<<23); - ROMCnt |= (1<<23); + ROMCnt &= ~(1<<23); + + // ROM transfer timings + // the bus is parallel with 8 bits + // thus a command would take 8 cycles to be transferred + // and it would take 4 cycles to receive a word of data + // TODO: advance read position if bit28 is set + + u32 xfercycle = (ROMCnt & (1<<27)) ? 8 : 5; + u32 cmddelay = 8 + (ROMCnt & 0x1FFF); + if (datasize) cmddelay += ((ROMCnt >> 16) & 0x3F); if (datasize == 0) - EndTransfer(); + NDS::ScheduleEvent(NDS::Event_ROMTransfer, false, xfercycle*cmddelay, ROMEndTransfer, 0); else - { - NDS::CheckDMAs(0, 0x05); - NDS::CheckDMAs(1, 0x12); - } - //NDS::ScheduleEvent((ROMCnt & (1<<27)) ? 8:5, ROMPrepareData, 0); + NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*(cmddelay+4), ROMPrepareData, 0); } u32 ReadROMData() { - /*if (ROMCnt & (1<<23)) + if (ROMCnt & (1<<23)) { ROMCnt &= ~(1<<23); - if (DataOutPos >= DataOutLen) - EndTransfer(); - } - - return ROMDataOut;*/ - u32 ret; - if (DataOutPos >= DataOutLen) - ret = 0; - else - ret = *(u32*)&DataOut[DataOutPos]; - - DataOutPos += 4; - if (DataOutPos == DataOutLen) - EndTransfer(); - - return ret; -} + if (DataOutPos < DataOutLen) + { + u32 xfercycle = (ROMCnt & (1<<27)) ? 8 : 5; + u32 delay = 4; + if (!(DataOutPos & 0x1FF)) delay += ((ROMCnt >> 16) & 0x3F); -void DMA(u32 addr) -{ - void (*writefn)(u32,u32) = (NDS::ExMemCnt[0] & (1<<11)) ? NDS::ARM7Write32 : NDS::ARM9Write32; - for (u32 i = 0; i < DataOutLen; i+=4) - { - writefn(addr+i, *(u32*)&DataOut[i]); + NDS::ScheduleEvent(NDS::Event_ROMTransfer, true, xfercycle*delay, ROMPrepareData, 0); + } + else + ROMEndTransfer(0); } - EndTransfer(); + return ROMDataOut; } diff --git a/src/NDSCart.h b/src/NDSCart.h index 5125ffa..5bec38d 100644 --- a/src/NDSCart.h +++ b/src/NDSCart.h @@ -44,7 +44,6 @@ bool LoadROM(const char* path, bool direct); void WriteROMCnt(u32 val); u32 ReadROMData(); -void DMA(u32 addr); void WriteSPICnt(u16 val); u8 ReadSPIData(); diff --git a/src/RTC.cpp b/src/RTC.cpp index 842fdae..3d45bef 100644 --- a/src/RTC.cpp +++ b/src/RTC.cpp @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> +#include <time.h> #include "RTC.h" @@ -73,6 +74,12 @@ void Reset() } +u8 BCD(u8 val) +{ + return (val % 10) | ((val / 10) << 4); +} + + void ByteIn(u8 val) { //printf("RTC IN: %02X\n", val); @@ -94,21 +101,33 @@ void ByteIn(u8 val) case 0x40: Output[0] = StatusReg2; break; case 0x20: - // TODO: get actual system time - Output[0] = 0x17; - Output[1] = 0x01; - Output[2] = 0x19; - Output[3] = 0x04; // day of week. checkme. apparently 04=Thursday - Output[4] = 0x06; - Output[5] = 0x30; - Output[6] = 0x30; + { + time_t timestamp; + struct tm* timedata; + time(×tamp); + timedata = localtime(×tamp); + + Output[0] = BCD(timedata->tm_year - 100); + Output[1] = BCD(timedata->tm_mon + 1); + Output[2] = BCD(timedata->tm_mday); + Output[3] = BCD(timedata->tm_wday); + Output[4] = BCD(timedata->tm_hour); + Output[5] = BCD(timedata->tm_min); + Output[6] = BCD(timedata->tm_sec); + } break; case 0x60: - // TODO: get actual system time - Output[0] = 0x06; - Output[1] = 0x30; - Output[2] = 0x30; + { + time_t timestamp; + struct tm* timedata; + time(×tamp); + timedata = localtime(×tamp); + + Output[0] = BCD(timedata->tm_hour); + Output[1] = BCD(timedata->tm_min); + Output[2] = BCD(timedata->tm_sec); + } break; case 0x10: diff --git a/src/SPI.cpp b/src/SPI.cpp index 3e77027..2c88197 100644 --- a/src/SPI.cpp +++ b/src/SPI.cpp @@ -164,6 +164,10 @@ void SetupDirectBoot() NDS::ARM9Write32(0x027FFC80+i, *(u32*)&Firmware[UserSettings+i]); } +u8 GetConsoleType() { return Firmware[0x1D]; } +u8 GetWifiVersion() { return Firmware[0x2F]; } +u8 GetRFVersion() { return Firmware[0x40]; } + u8 Read() { return Data; @@ -24,6 +24,10 @@ namespace SPI_Firmware void SetupDirectBoot(); +u8 GetConsoleType(); +u8 GetWifiVersion(); +u8 GetRFVersion(); + } namespace SPI_TSC diff --git a/src/SPU.cpp b/src/SPU.cpp new file mode 100644 index 0000000..002cde6 --- /dev/null +++ b/src/SPU.cpp @@ -0,0 +1,811 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include <stdio.h> +#include <string.h> +#include "NDS.h" +#include "SPU.h" + + +namespace SPU +{ + +const s8 ADPCMIndexTable[8] = {-1, -1, -1, -1, 2, 4, 6, 8}; + +const u16 ADPCMTable[89] = +{ + 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, + 0x0010, 0x0011, 0x0013, 0x0015, 0x0017, 0x0019, 0x001C, 0x001F, + 0x0022, 0x0025, 0x0029, 0x002D, 0x0032, 0x0037, 0x003C, 0x0042, + 0x0049, 0x0050, 0x0058, 0x0061, 0x006B, 0x0076, 0x0082, 0x008F, + 0x009D, 0x00AD, 0x00BE, 0x00D1, 0x00E6, 0x00FD, 0x0117, 0x0133, + 0x0151, 0x0173, 0x0198, 0x01C1, 0x01EE, 0x0220, 0x0256, 0x0292, + 0x02D4, 0x031C, 0x036C, 0x03C3, 0x0424, 0x048E, 0x0502, 0x0583, + 0x0610, 0x06AB, 0x0756, 0x0812, 0x08E0, 0x09C3, 0x0ABD, 0x0BD0, + 0x0CFF, 0x0E4C, 0x0FBA, 0x114C, 0x1307, 0x14EE, 0x1706, 0x1954, + 0x1BDC, 0x1EA5, 0x21B6, 0x2515, 0x28CA, 0x2CDF, 0x315B, 0x364B, + 0x3BB9, 0x41B2, 0x4844, 0x4F7E, 0x5771, 0x602F, 0x69CE, 0x7462, + 0x7FFF +}; + +const s16 PSGTable[8][8] = +{ + {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF}, + {-0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF, -0x7FFF} +}; + +const u32 OutputBufferSize = 2*1024; +s16 OutputBuffer[2 * OutputBufferSize]; +u32 OutputReadOffset; +u32 OutputWriteOffset; + + +u16 Cnt; +u8 MasterVolume; +u16 Bias; + +Channel* Channels[16]; +CaptureUnit* Capture[2]; + + +bool Init() +{ + for (int i = 0; i < 16; i++) + Channels[i] = new Channel(i); + + Capture[0] = new CaptureUnit(0); + Capture[1] = new CaptureUnit(1); + + return true; +} + +void DeInit() +{ + for (int i = 0; i < 16; i++) + delete Channels[i]; + + delete Capture[0]; + delete Capture[1]; +} + +void Reset() +{ + memset(OutputBuffer, 0, 2*OutputBufferSize*2); + OutputReadOffset = 0; + OutputWriteOffset = 0; + + Cnt = 0; + MasterVolume = 0; + Bias = 0; + + for (int i = 0; i < 16; i++) + Channels[i]->Reset(); + + Capture[0]->Reset(); + Capture[1]->Reset(); + + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*16, Mix, 16); +} + + +void SetBias(u16 bias) +{ + Bias = bias; +} + + +Channel::Channel(u32 num) +{ + Num = num; +} + +Channel::~Channel() +{ +} + +void Channel::Reset() +{ + SetCnt(0); + SrcAddr = 0; + TimerReload = 0; + LoopPos = 0; + Length = 0; + + Timer = 0; +} + +void Channel::Start() +{ + Timer = TimerReload; + + if (((Cnt >> 29) & 0x3) == 3) + Pos = -1; + else + Pos = -3; + + NoiseVal = 0x7FFF; + CurSample = 0; +} + +void Channel::NextSample_PCM8() +{ + Pos++; + if (Pos < 0) return; + if (Pos >= (LoopPos + Length)) + { + // TODO: what happens when mode 3 is used? + u32 repeat = (Cnt >> 27) & 0x3; + if (repeat == 2) + { + CurSample = 0; + Cnt &= ~(1<<31); + return; + } + else if (repeat == 1) + { + Pos = LoopPos; + } + } + + s8 val = (s8)NDS::ARM7Read8(SrcAddr + Pos); + CurSample = val << 8; +} + +void Channel::NextSample_PCM16() +{ + Pos++; + if (Pos < 0) return; + if ((Pos<<1) >= (LoopPos + Length)) + { + // TODO: what happens when mode 3 is used? + u32 repeat = (Cnt >> 27) & 0x3; + if (repeat == 2) + { + CurSample = 0; + Cnt &= ~(1<<31); + return; + } + else if (repeat == 1) + { + Pos = LoopPos>>1; + } + } + + s16 val = (s16)NDS::ARM7Read16(SrcAddr + (Pos<<1)); + CurSample = val; +} + +void Channel::NextSample_ADPCM() +{ + Pos++; + if (Pos < 8) + { + if (Pos == 0) + { + // setup ADPCM + u32 header = NDS::ARM7Read32(SrcAddr); + ADPCMVal = header & 0xFFFF; + ADPCMIndex = (header >> 16) & 0x7F; + if (ADPCMIndex > 88) ADPCMIndex = 88; + + ADPCMValLoop = ADPCMVal; + ADPCMIndexLoop = ADPCMIndex; + } + + return; + } + + if ((Pos>>1) >= (LoopPos + Length)) + { + // TODO: what happens when mode 3 is used? + u32 repeat = (Cnt >> 27) & 0x3; + if (repeat == 2) + { + CurSample = 0; + Cnt &= ~(1<<31); + return; + } + else if (repeat == 1) + { + Pos = LoopPos<<1; + ADPCMVal = ADPCMValLoop; + ADPCMIndex = ADPCMIndexLoop; + } + } + else + { + if (!(Pos & 0x1)) + ADPCMCurByte = NDS::ARM7Read8(SrcAddr + (Pos>>1)); + else + ADPCMCurByte >>= 4; + + u16 val = ADPCMTable[ADPCMIndex]; + u16 diff = val >> 3; + if (ADPCMCurByte & 0x1) diff += (val >> 2); + if (ADPCMCurByte & 0x2) diff += (val >> 1); + if (ADPCMCurByte & 0x4) diff += val; + + if (ADPCMCurByte & 0x8) + { + ADPCMVal -= diff; + if (ADPCMVal < -0x7FFF) ADPCMVal = -0x7FFF; + } + else + { + ADPCMVal += diff; + if (ADPCMVal > 0x7FFF) ADPCMVal = 0x7FFF; + } + + ADPCMIndex += ADPCMIndexTable[ADPCMCurByte & 0x7]; + if (ADPCMIndex < 0) ADPCMIndex = 0; + else if (ADPCMIndex > 88) ADPCMIndex = 88; + + if (Pos == (LoopPos<<1)) + { + ADPCMValLoop = ADPCMVal; + ADPCMIndexLoop = ADPCMIndex; + } + } + + CurSample = ADPCMVal; +} + +void Channel::NextSample_PSG() +{ + Pos++; + CurSample = PSGTable[(Cnt >> 24) & 0x7][Pos & 0x7]; +} + +void Channel::NextSample_Noise() +{ + if (NoiseVal & 0x1) + { + NoiseVal = (NoiseVal >> 1) ^ 0x6000; + CurSample = -0x7FFF; + } + else + { + NoiseVal >>= 1; + CurSample = 0x7FFF; + } +} + +template<u32 type> +void Channel::Run(s32* buf, u32 samples) +{ + for (u32 s = 0; s < samples; s++) + buf[s] = 0; + + for (u32 s = 0; s < samples; s++) + { + Timer += 512; // 1 sample = 512 cycles at 16MHz + + while (Timer >> 16) + { + Timer = TimerReload + (Timer - 0x10000); + + switch (type) + { + case 0: NextSample_PCM8(); break; + case 1: NextSample_PCM16(); break; + case 2: NextSample_ADPCM(); break; + case 3: NextSample_PSG(); break; + case 4: NextSample_Noise(); break; + } + } + + s32 val = (s32)CurSample; + val <<= VolumeShift; + val *= Volume; + buf[s] = val; + + if (!(Cnt & (1<<31))) break; + } +} + + +CaptureUnit::CaptureUnit(u32 num) +{ + Num = num; +} + +CaptureUnit::~CaptureUnit() +{ +} + +void CaptureUnit::Reset() +{ + SetCnt(0); + DstAddr = 0; + TimerReload = 0; + Length = 0; + + Timer = 0; +} + +void CaptureUnit::Run(s32 sample) +{ + Timer += 512; + + if (Cnt & 0x08) + { + while (Timer >> 16) + { + Timer = TimerReload + (Timer - 0x10000); + + NDS::ARM7Write8(DstAddr + Pos, (u8)(sample >> 8)); + Pos++; + if (Pos >= Length) + { + if (Cnt & 0x04) + { + Cnt &= 0x7F; + return; + } + else + Pos = 0; + } + } + } + else + { + while (Timer >> 16) + { + Timer = TimerReload + (Timer - 0x10000); + + NDS::ARM7Write16(DstAddr + Pos, (u16)sample); + Pos += 2; + if (Pos >= Length) + { + if (Cnt & 0x04) + { + Cnt &= 0x7F; + return; + } + else + Pos = 0; + } + } + } +} + + +void Mix(u32 samples) +{ + s32 channelbuf[32]; + s32 leftbuf[32], rightbuf[32]; + s32 ch1buf[32], ch3buf[32]; + s32 leftoutput[32], rightoutput[32]; + + for (u32 s = 0; s < samples; s++) + { + leftbuf[s] = 0; rightbuf[s] = 0; + leftoutput[s] = 0; rightoutput[s] = 0; + } + + if (Cnt & (1<<15)) + { + u32 mixermask = 0xFFFF; + if (Cnt & (1<<12)) mixermask &= ~(1<<1); + if (Cnt & (1<<13)) mixermask &= ~(1<<3); + + for (int i = 0; i < 16; i++) + { + if (!(mixermask & (1<<i))) continue; + Channel* chan = Channels[i]; + if (!(chan->Cnt & (1<<31))) continue; + + // TODO: what happens if we use type 3 on channels 0-7?? + chan->DoRun(channelbuf, samples); + + for (u32 s = 0; s < samples; s++) + { + s32 val = (s32)channelbuf[s]; + + s32 l = ((s64)val * (128-chan->Pan)) >> 10; + s32 r = ((s64)val * chan->Pan) >> 10; + + leftbuf[s] += l; + rightbuf[s] += r; + } + } + + // sound capture + // TODO: other sound capture sources, along with their bugs + + if (Capture[0]->Cnt & (1<<7)) + { + for (u32 s = 0; s < samples; s++) + { + s32 val = leftbuf[s]; + + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; + + Capture[0]->Run(val); + if (!((Capture[0]->Cnt & (1<<7)))) break; + } + } + + if (Capture[1]->Cnt & (1<<7)) + { + for (u32 s = 0; s < samples; s++) + { + s32 val = rightbuf[s]; + + val >>= 8; + if (val < -0x8000) val = -0x8000; + else if (val > 0x7FFF) val = 0x7FFF; + + Capture[1]->Run(val); + if (!((Capture[1]->Cnt & (1<<7)))) break; + } + } + + // final output + + if (Cnt & 0x0500) + { + // mix channel 1 if needed + Channels[1]->DoRun(ch1buf, samples); + } + if (Cnt & 0x0A00) + { + // mix channel 3 if needed + Channels[3]->DoRun(ch3buf, samples); + } + + switch (Cnt & 0x0300) + { + case 0x0000: // left mixer + { + for (u32 s = 0; s < samples; s++) + leftoutput[s] = leftbuf[s]; + } + break; + case 0x0100: // channel 1 + { + s32 pan = 128 - Channels[1]->Pan; + for (u32 s = 0; s < samples; s++) + leftoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + } + break; + case 0x0200: // channel 3 + { + s32 pan = 128 - Channels[3]->Pan; + for (u32 s = 0; s < samples; s++) + leftoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + } + break; + case 0x0300: // channel 1+3 + { + s32 pan1 = 128 - Channels[1]->Pan; + s32 pan3 = 128 - Channels[3]->Pan; + for (u32 s = 0; s < samples; s++) + leftoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + } + break; + } + + switch (Cnt & 0x0C00) + { + case 0x0000: // right mixer + { + for (u32 s = 0; s < samples; s++) + rightoutput[s] = rightbuf[s]; + } + break; + case 0x0400: // channel 1 + { + s32 pan = Channels[1]->Pan; + for (u32 s = 0; s < samples; s++) + rightoutput[s] = ((s64)ch1buf[s] * pan) >> 10; + } + break; + case 0x0800: // channel 3 + { + s32 pan = Channels[3]->Pan; + for (u32 s = 0; s < samples; s++) + rightoutput[s] = ((s64)ch3buf[s] * pan) >> 10; + } + break; + case 0x0C00: // channel 1+3 + { + s32 pan1 = Channels[1]->Pan; + s32 pan3 = Channels[3]->Pan; + for (u32 s = 0; s < samples; s++) + rightoutput[s] = (((s64)ch1buf[s] * pan1) >> 10) + (((s64)ch3buf[s] * pan3) >> 10); + } + break; + } + } + + for (u32 s = 0; s < samples; s++) + { + s32 l = leftoutput[s]; + s32 r = rightoutput[s]; + + l = ((s64)l * MasterVolume) >> 7; + r = ((s64)r * MasterVolume) >> 7; + + l >>= 8; + if (l < -0x8000) l = -0x8000; + else if (l > 0x7FFF) l = 0x7FFF; + r >>= 8; + if (r < -0x8000) r = -0x8000; + else if (r > 0x7FFF) r = 0x7FFF; + + OutputBuffer[OutputWriteOffset ] = l >> 1; + OutputBuffer[OutputWriteOffset + 1] = r >> 1; + OutputWriteOffset += 2; + OutputWriteOffset &= ((2*OutputBufferSize)-1); + } + + + NDS::ScheduleEvent(NDS::Event_SPU, true, 1024*16, Mix, 16); +} + + +void ReadOutput(s16* data, int samples) +{ + for (int i = 0; i < samples; i++) + { + *data++ = OutputBuffer[OutputReadOffset]; + *data++ = OutputBuffer[OutputReadOffset + 1]; + + if (OutputReadOffset != OutputWriteOffset) + { + OutputReadOffset += 2; + OutputReadOffset &= ((2*OutputBufferSize)-1); + } + } +} + + +u8 Read8(u32 addr) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: return chan->Cnt & 0xFF; + case 0x1: return (chan->Cnt >> 8) & 0xFF; + case 0x2: return (chan->Cnt >> 16) & 0xFF; + case 0x3: return chan->Cnt >> 24; + } + } + else + { + switch (addr) + { + case 0x04000500: return Cnt & 0x7F; + case 0x04000501: return Cnt >> 8; + + case 0x04000508: return Capture[0]->Cnt; + case 0x04000509: return Capture[1]->Cnt; + } + } + + printf("unknown SPU read8 %08X\n", addr); + return 0; +} + +u16 Read16(u32 addr) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: return chan->Cnt & 0xFFFF; + case 0x2: return chan->Cnt >> 16; + } + } + else + { + switch (addr) + { + case 0x04000500: return Cnt; + case 0x04000504: return Bias; + + case 0x04000508: return Capture[0]->Cnt | (Capture[1]->Cnt << 8); + } + } + + printf("unknown SPU read16 %08X\n", addr); + return 0; +} + +u32 Read32(u32 addr) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: return chan->Cnt; + } + } + else + { + switch (addr) + { + case 0x04000500: return Cnt; + case 0x04000504: return Bias; + + case 0x04000508: return Capture[0]->Cnt | (Capture[1]->Cnt << 8); + + case 0x04000510: return Capture[0]->DstAddr; + case 0x04000518: return Capture[1]->DstAddr; + } + } + + printf("unknown SPU read32 %08X\n", addr); + return 0; +} + +void Write8(u32 addr, u8 val) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: chan->SetCnt((chan->Cnt & 0xFFFFFF00) | val); return; + case 0x1: chan->SetCnt((chan->Cnt & 0xFFFF00FF) | (val << 8)); return; + case 0x2: chan->SetCnt((chan->Cnt & 0xFF00FFFF) | (val << 16)); return; + case 0x3: chan->SetCnt((chan->Cnt & 0x00FFFFFF) | (val << 24)); return; + } + } + else + { + switch (addr) + { + case 0x04000500: + Cnt = (Cnt & 0xBF00) | (val & 0x7F); + MasterVolume = Cnt & 0x7F; + if (MasterVolume == 127) MasterVolume++; + return; + case 0x04000501: + Cnt = (Cnt & 0x007F) | ((val & 0xBF) << 8); + return; + + case 0x04000508: + Capture[0]->SetCnt(val); + if (val & 0x03) printf("!! UNSUPPORTED SPU CAPTURE MODE %02X\n", val); + return; + case 0x04000509: + Capture[1]->SetCnt(val); + if (val & 0x03) printf("!! UNSUPPORTED SPU CAPTURE MODE %02X\n", val); + return; + } + } + + printf("unknown SPU write8 %08X %02X\n", addr, val); +} + +void Write16(u32 addr, u16 val) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: chan->SetCnt((chan->Cnt & 0xFFFF0000) | val); return; + case 0x2: chan->SetCnt((chan->Cnt & 0x0000FFFF) | (val << 16)); return; + case 0x8: + chan->SetTimerReload(val); + if ((addr & 0xF0) == 0x10) Capture[0]->SetTimerReload(val); + else if ((addr & 0xF0) == 0x30) Capture[1]->SetTimerReload(val); + return; + case 0xA: chan->SetLoopPos(val); return; + + case 0xC: chan->SetLength((chan->Length & 0xFFFF0000) | val); return; + case 0xE: chan->SetLength((chan->Length & 0x0000FFFF) | (val << 16)); return; + } + } + else + { + switch (addr) + { + case 0x04000500: + Cnt = val & 0xBF7F; + MasterVolume = Cnt & 0x7F; + if (MasterVolume == 127) MasterVolume++; + return; + + case 0x04000504: + Bias = val & 0x3FF; + return; + + case 0x04000508: + Capture[0]->SetCnt(val & 0xFF); + Capture[1]->SetCnt(val >> 8); + if (val & 0x0303) printf("!! UNSUPPORTED SPU CAPTURE MODE %04X\n", val); + return; + + case 0x04000514: Capture[0]->SetLength(val); return; + case 0x0400051C: Capture[1]->SetLength(val); return; + } + } + + printf("unknown SPU write16 %08X %04X\n", addr, val); +} + +void Write32(u32 addr, u32 val) +{ + if (addr < 0x04000500) + { + Channel* chan = Channels[(addr >> 4) & 0xF]; + + switch (addr & 0xF) + { + case 0x0: chan->SetCnt(val); return; + case 0x4: chan->SetSrcAddr(val); return; + case 0x8: + chan->SetLoopPos(val >> 16); + val &= 0xFFFF; + chan->SetTimerReload(val); + if ((addr & 0xF0) == 0x10) Capture[0]->SetTimerReload(val); + else if ((addr & 0xF0) == 0x30) Capture[1]->SetTimerReload(val); + return; + case 0xC: chan->SetLength(val); return; + } + } + else + { + switch (addr) + { + case 0x04000500: + Cnt = val & 0xBF7F; + MasterVolume = Cnt & 0x7F; + if (MasterVolume == 127) MasterVolume++; + return; + + case 0x04000504: + Bias = val & 0x3FF; + return; + + case 0x04000508: + Capture[0]->SetCnt(val & 0xFF); + Capture[1]->SetCnt(val >> 8); + if (val & 0x0303) printf("!! UNSUPPORTED SPU CAPTURE MODE %04X\n", val); + return; + + case 0x04000510: Capture[0]->SetDstAddr(val); return; + case 0x04000514: Capture[0]->SetLength(val & 0xFFFF); return; + case 0x04000518: Capture[1]->SetDstAddr(val); return; + case 0x0400051C: Capture[1]->SetLength(val & 0xFFFF); return; + } + } +} + +} diff --git a/src/SPU.h b/src/SPU.h new file mode 100644 index 0000000..a00e094 --- /dev/null +++ b/src/SPU.h @@ -0,0 +1,160 @@ +/* + Copyright 2016-2017 StapleButter + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef SPU_H +#define SPU_H + +namespace SPU +{ + +bool Init(); +void DeInit(); +void Reset(); + +void SetBias(u16 bias); + +void Mix(u32 samples); + +void ReadOutput(s16* data, int samples); + +u8 Read8(u32 addr); +u16 Read16(u32 addr); +u32 Read32(u32 addr); +void Write8(u32 addr, u8 val); +void Write16(u32 addr, u16 val); +void Write32(u32 addr, u32 val); + +class Channel +{ +public: + Channel(u32 num); + ~Channel(); + void Reset(); + + u32 Num; + + u32 Cnt; + u32 SrcAddr; + u16 TimerReload; + u32 LoopPos; + u32 Length; + + u8 Volume; + u8 VolumeShift; + u8 Pan; + + u32 Timer; + s32 Pos; + s16 CurSample; + u16 NoiseVal; + + s32 ADPCMVal; + s32 ADPCMIndex; + s32 ADPCMValLoop; + s32 ADPCMIndexLoop; + u8 ADPCMCurByte; + + void SetCnt(u32 val) + { + u32 oldcnt = Cnt; + Cnt = val & 0xFF7F837F; + + Volume = Cnt & 0x7F; + if (Volume == 127) Volume++; + + const u8 volshift[4] = {4, 3, 2, 0}; + VolumeShift = volshift[(Cnt >> 8) & 0x3]; + + Pan = (Cnt >> 16) & 0x7F; + if (Pan == 127) Pan++; + + if ((val & (1<<31)) && !(oldcnt & (1<<31))) + { + Start(); + } + } + + void SetSrcAddr(u32 val) { SrcAddr = val & 0x07FFFFFC; } + void SetTimerReload(u32 val) { TimerReload = val & 0xFFFF; } + void SetLoopPos(u32 val) { LoopPos = (val & 0xFFFF) << 2; } + void SetLength(u32 val) { Length = (val & 0x001FFFFF) << 2; } + + void Start(); + + void NextSample_PCM8(); + void NextSample_PCM16(); + void NextSample_ADPCM(); + void NextSample_PSG(); + void NextSample_Noise(); + + template<u32 type> void Run(s32* buf, u32 samples); + + void DoRun(s32* buf, u32 samples) + { + switch ((Cnt >> 29) & 0x3) + { + case 0: Run<0>(buf, samples); break; + case 1: Run<1>(buf, samples); break; + case 2: Run<2>(buf, samples); break; + case 3: + if (Num >= 14) Run<4>(buf, samples); + else if (Num >= 8) Run<3>(buf, samples); + break; + } + } +}; + +class CaptureUnit +{ +public: + CaptureUnit(u32 num); + ~CaptureUnit(); + void Reset(); + + u32 Num; + + u8 Cnt; + u32 DstAddr; + u16 TimerReload; + u32 Length; + + u32 Timer; + s32 Pos; + + void SetCnt(u8 val) + { + if ((val & 0x80) && !(Cnt & 0x80)) + Start(); + + val &= 0x8F; + if (!(val & 0x80)) val &= ~0x01; + Cnt = val; + } + + void SetDstAddr(u32 val) { DstAddr = val & 0x07FFFFFC; } + void SetTimerReload(u32 val) { TimerReload = val & 0xFFFF; } + void SetLength(u32 val) { Length = val << 2; if (Length == 0) Length = 4; } + + void Start() { Timer = TimerReload; } + + void Run(s32 sample); +}; + +} + +#endif // SPU_H diff --git a/src/Wifi.cpp b/src/Wifi.cpp index 0f1c239..99d0017 100644 --- a/src/Wifi.cpp +++ b/src/Wifi.cpp @@ -19,22 +19,39 @@ #include <stdio.h> #include <string.h> #include "NDS.h" +#include "SPI.h" #include "Wifi.h" namespace Wifi { +u8 RAM[0x2000]; +u16 IO[0x1000>>1]; + +#define IOPORT(x) IO[(x)>>1] + +u16 Random; + u16 BBCnt; u8 BBWrite; u8 BBRegs[0x100]; u8 BBRegsRO[0x100]; +u8 RFVersion; +u16 RFCnt; +u16 RFData1; +u16 RFData2; +u32 RFRegs[0x40]; + void Reset() { - BBCnt = 0; - BBWrite = 0; + memset(RAM, 0, 0x2000); + memset(IO, 0, 0x1000); + + Random = 1; + memset(BBRegs, 0, 0x100); memset(BBRegsRO, 0, 0x100); @@ -65,56 +82,230 @@ void Reset() BBREG_FIXED(i, 0x00); } #undef BBREG_FIXED + + RFVersion = SPI_Firmware::GetRFVersion(); + memset(RFRegs, 0, 4*0x40); + + memset(&IOPORT(0x018), 0xFF, 6); + memset(&IOPORT(0x020), 0xFF, 6); } +void RFTransfer_Type2() +{ + u32 id = (IOPORT(W_RFData2) >> 2) & 0x1F; + + if (IOPORT(W_RFData2) & 0x0080) + { + u32 data = RFRegs[id]; + IOPORT(W_RFData1) = data & 0xFFFF; + IOPORT(W_RFData2) = (IOPORT(W_RFData2) & 0xFFFC) | ((data >> 16) & 0x3); + } + else + { + u32 data = IOPORT(W_RFData1) | ((IOPORT(W_RFData2) & 0x0003) << 16); + RFRegs[id] = data; + } +} + +void RFTransfer_Type3() +{ + u32 id = (IOPORT(W_RFData1) >> 8) & 0x3F; + + u32 cmd = IOPORT(W_RFData2) & 0xF; + if (cmd == 6) + { + IOPORT(W_RFData1) = (IOPORT(W_RFData1) & 0xFF00) | (RFRegs[id] & 0xFF); + } + else if (cmd == 5) + { + u32 data = IOPORT(W_RFData1) & 0xFF; + RFRegs[id] = data; + } +} + + +// TODO: wifi waitstates + u16 Read(u32 addr) { - addr &= 0x7FFF; + addr &= 0x7FFE; + //printf("WIFI: read %08X\n", addr); + if (addr >= 0x4000 && addr < 0x6000) + { + return *(u16*)&RAM[addr & 0x1FFE]; + } switch (addr) { - case 0x158: - return BBCnt; + case W_Random: // random generator. not accurate + Random = (Random & 0x1) ^ (((Random & 0x3FF) << 1) | (Random >> 10)); + return Random; - case 0x15C: - if ((BBCnt & 0xF000) != 0x6000) + case W_Preamble: + return IOPORT(W_Preamble) & 0x0003; + + case W_BBRead: + if ((IOPORT(W_BBCnt) & 0xF000) != 0x6000) { - printf("WIFI: bad BB read, CNT=%04X\n", BBCnt); + printf("WIFI: bad BB read, CNT=%04X\n", IOPORT(W_BBCnt)); return 0; } - return BBRegs[BBCnt & 0xFF]; + return BBRegs[IOPORT(W_BBCnt) & 0xFF]; - case 0x15E: - return 0; // cheap + case W_BBBusy: + return 0; // TODO eventually (BB busy flag) + case W_RFBusy: + return 0; // TODO eventually (RF busy flag) } - printf("WIFI: unknown read %08X\n", addr); - return 0; + //printf("WIFI: read %08X\n", addr); + return IOPORT(addr&0xFFF); } void Write(u32 addr, u16 val) { - addr &= 0x7FFF; + addr &= 0x7FFE; + //printf("WIFI: write %08X %04X\n", addr, val); + if (addr >= 0x4000 && addr < 0x6000) + { + *(u16*)&RAM[addr & 0x1FFE] = val; + return; + } switch (addr) { - case 0x158: - BBCnt = val; - if ((BBCnt & 0xF000) == 0x5000) + case W_ModeReset: + { + u16 oldval = IOPORT(W_ModeReset); + + if (!(oldval & 0x0001) && (val & 0x0001)) + { + IOPORT(0x034) = 0x0002; + IOPORT(W_RFPins) = 0x0046; + IOPORT(W_RFStatus) = 9; + IOPORT(0x27C) = 0x0005; + // TODO: 02A2?? + } + else if ((oldval & 0x0001) && !(val & 0x0001)) + { + IOPORT(0x27C) = 0x000A; + } + + if (val & 0x2000) + { + IOPORT(W_RXBufWriteAddr) = 0; + IOPORT(W_CmdTotalTime) = 0; + IOPORT(W_CmdReplyTime) = 0; + IOPORT(0x1A4) = 0; + IOPORT(0x278) = 0x000F; + // TODO: other ports?? + } + if (val & 0x4000) + { + IOPORT(W_ModeWEP) = 0; + IOPORT(W_TXStatCnt) = 0; + IOPORT(0x00A) = 0; + IOPORT(W_MACAddr0) = 0; + IOPORT(W_MACAddr1) = 0; + IOPORT(W_MACAddr2) = 0; + IOPORT(W_BSSID0) = 0; + IOPORT(W_BSSID1) = 0; + IOPORT(W_BSSID2) = 0; + IOPORT(W_AIDLow) = 0; + IOPORT(W_AIDFull) = 0; + IOPORT(W_TXRetryLimit) = 0x0707; + IOPORT(0x02E) = 0; + IOPORT(W_RXBufBegin) = 0x4000; + IOPORT(W_RXBufEnd) = 0x4800; + IOPORT(W_TXBeaconTIM) = 0; + IOPORT(W_Preamble) = 0x0001; + IOPORT(W_RXFilter) = 0x0401; + IOPORT(0x0D4) = 0x0001; + IOPORT(W_RXFilter2) = 0x0008; + IOPORT(0x0EC) = 0x3F03; + IOPORT(W_TXHeaderCnt) = 0; + IOPORT(0x198) = 0; + IOPORT(0x1A2) = 0x0001; + IOPORT(0x224) = 0x0003; + IOPORT(0x230) = 0x0047; + + } + } + break; + + case W_ModeWEP: + val &= 0x007F; + break; + + case W_IF: + // IF: TODO + return; + case W_IE: + printf("WIFI IE=%04X\n", val); + break; + + case W_PowerState: + if (val & 0x0002) { - u32 regid = BBCnt & 0xFF; + // TODO: IRQ11 + IOPORT(W_PowerState) = 0x0000; + } + return; + case W_PowerForce: + printf("WIFI: forcing power %04X\n", val); + val &= 0x8001; + if (val == 0x8001) + { + IOPORT(0x034) = 0x0002; + IOPORT(W_PowerState) = 0x0200; + IOPORT(W_TXReqRead) = 0; + IOPORT(W_RFPins) = 00046; + IOPORT(W_RFStatus) = 9; + } + break; + + case W_BBCnt: + IOPORT(W_BBCnt) = val; + if ((IOPORT(W_BBCnt) & 0xF000) == 0x5000) + { + u32 regid = IOPORT(W_BBCnt) & 0xFF; if (!BBRegsRO[regid]) - BBRegs[regid] = val & 0xFF; + BBRegs[regid] = IOPORT(W_BBWrite) & 0xFF; } return; - case 0x15A: - BBWrite = val; + case W_RFData2: + IOPORT(W_RFData2) = val; + if (RFVersion == 3) RFTransfer_Type3(); + else RFTransfer_Type2(); + return; + case W_RFCnt: + val &= 0x413F; + break; + + // read-only ports + case 0x000: + case 0x044: + case 0x054: + case 0x0B0: + case 0x0B6: + case 0x0B8: + case 0x15C: + case 0x15E: + case 0x180: + case 0x19C: + case 0x1A8: + case 0x1AC: + case 0x1C4: + case 0x210: + case 0x214: + case 0x268: return; } - printf("WIFI: unknown write %08X %04X\n", addr, val); + //printf("WIFI: write %08X %04X\n", addr, val); + IOPORT(addr&0xFFF) = val; } } @@ -22,7 +22,113 @@ namespace Wifi { -// +enum +{ + W_ID = 0x000, + + W_ModeReset = 0x004, + W_ModeWEP = 0x006, + W_TXStatCnt = 0x008, + W_IF = 0x010, + W_IE = 0x012, + + W_MACAddr0 = 0x018, + W_MACAddr1 = 0x01A, + W_MACAddr2 = 0x01C, + W_BSSID0 = 0x020, + W_BSSID1 = 0x022, + W_BSSID2 = 0x024, + W_AIDLow = 0x028, + W_AIDFull = 0x02A, + + W_TXRetryLimit = 0x02C, + W_RXCnt = 0x030, + W_WEPCnt = 0x032, + + W_PowerUS = 0x036, + W_PowerTX = 0x038, + W_PowerState = 0x03C, + W_PowerForce = 0x040, + + W_Random = 0x044, + + W_RXBufBegin = 0x050, + W_RXBufEnd = 0x052, + W_RXBufWriteCursor = 0x054, + W_RXBufWriteAddr = 0x056, + W_RXBufReadAddr = 0x058, + W_RXBufReadCursor = 0x05A, + W_RXBufCount = 0x05C, + W_RXBufDataRead = 0x060, + W_RXBufGapAddr = 0x062, + W_RXBufGapSize = 0x064, + + W_TXBufWriteAddr = 0x068, + W_TXBufCount = 0x06C, + W_TXBufDataWrite = 0x070, + W_TXBufGapAddr = 0x074, + W_TXBufGapSize = 0x076, + + W_TXSlotBeacon = 0x080, + W_TXBeaconTIM = 0x084, + W_ListenCount = 0x088, + W_BeaconInterval = 0x08C, + W_ListenInterval = 0x08E, + W_TXSlotCmd = 0x090, + W_TXSlotReply1 = 0x094, + W_TXSlotReply2 = 0x098, + W_TXSlotLoc1 = 0x0A0, + W_TXSlotLoc2 = 0x0A4, + W_TXSlotLoc3 = 0x0A8, + W_TXReqReset = 0x0AC, + W_TXReqSet = 0x0AE, + W_TXReqRead = 0x0B0, + W_TXSlotReset = 0x0B4, + W_TXBusy = 0x0B6, + W_TXStat = 0x0B8, + W_Preamble = 0x0BC, + W_CmdTotalTime = 0x0C0, + W_CmdReplyTime = 0x0C4, + W_RXFilter = 0x0D0, + W_RXFilter2 = 0x0E0, + + W_USCountCnt = 0x0E8, + W_USCompareCnt = 0x0EA, + W_CmdCountCnt = 0x0EE, + + W_ContentFree = 0x10C, + W_PreBeacon = 0x110, + W_CmdCount = 0x118, + W_BeaconCount1 = 0x11C, + W_BeaconCount2 = 0x134, + + W_BBCnt = 0x158, + W_BBWrite = 0x15A, + W_BBRead = 0x15C, + W_BBBusy = 0x15E, + W_BBMode = 0x160, + W_BBPower = 0x168, + + W_RFData2 = 0x17C, + W_RFData1 = 0x17E, + W_RFBusy = 0x180, + W_RFCnt = 0x184, + + W_TXHeaderCnt = 0x194, + W_RFPins = 0x19C, + + W_RXStatIncIF = 0x1A8, + W_RXStatIncIE = 0x1AA, + W_RXStatHalfIF = 0x1AC, + W_RXStatHalfIE = 0x1AE, + W_TXErrorCount = 0x1C0, + W_RXCount = 0x1C4, + + W_TXSeqNo = 0x210, + W_RFStatus = 0x214, + W_IFSet = 0x21C, + W_RXTXAddr = 0x268, +}; void Reset(); diff --git a/src/wx/main.cpp b/src/wx/main.cpp index 85568a4..ff22090 100644 --- a/src/wx/main.cpp +++ b/src/wx/main.cpp @@ -22,6 +22,7 @@ #include "../Config.h" #include "../NDS.h" #include "../GPU.h" +#include "../SPU.h" #include "InputConfig.h" #include "EmuConfig.h" @@ -86,7 +87,7 @@ bool wxApp_melonDS::OnInit() printf("melonDS " MELONDS_VERSION "\n" MELONDS_URL "\n"); Config::Load(); - + emuthread = new EmuThread(); if (emuthread->Run() != wxTHREAD_NO_ERROR) { @@ -97,7 +98,7 @@ bool wxApp_melonDS::OnInit() MainFrame* melon = new MainFrame(); melon->Show(true); - + melon->emuthread = emuthread; emuthread->parent = melon; @@ -108,7 +109,7 @@ int wxApp_melonDS::OnExit() { emuthread->Wait(); delete emuthread; - + return wxApp::OnExit(); } @@ -169,7 +170,7 @@ void MainFrame::OnClose(wxCloseEvent& event) { emuthread->EmuPause(); emuthread->EmuExit(); - + NDS::DeInit(); if (joy) @@ -313,6 +314,11 @@ EmuThread::~EmuThread() { } +static void AudioCallback(void* data, Uint8* stream, int len) +{ + SPU::ReadOutput((s16*)stream, len>>2); +} + wxThread::ExitCode EmuThread::Entry() { emustatus = 3; @@ -344,6 +350,23 @@ wxThread::ExitCode EmuThread::Entry() botdst.x = 0; botdst.y = 192; botdst.w = 256; botdst.h = 192; + SDL_AudioSpec whatIwant, whatIget; + memset(&whatIwant, 0, sizeof(SDL_AudioSpec)); + whatIwant.freq = 32824; // 32823.6328125 + whatIwant.format = AUDIO_S16LSB; + whatIwant.channels = 2; + whatIwant.samples = 1024; + whatIwant.callback = AudioCallback; + audio = SDL_OpenAudioDevice(NULL, 0, &whatIwant, &whatIget, 0); + if (!audio) + { + printf("Audio init failed: %s\n", SDL_GetError()); + } + else + { + SDL_PauseAudioDevice(audio, 0); + } + Touching = false; axismask = 0; @@ -430,9 +453,11 @@ wxThread::ExitCode EmuThread::Entry() emupaused = true; } } - + emupaused = true; + if (audio) SDL_CloseAudioDevice(audio); + SDL_DestroyTexture(sdltex); SDL_DestroyRenderer(sdlrend); SDL_DestroyWindow(sdlwin); @@ -462,7 +487,7 @@ void EmuThread::ProcessEvents() { int w = evt.window.data1; int h = evt.window.data2; - + // SDL_SetWindowMinimumSize() doesn't seem to work on Linux. oh well if ((w < 256) || (h < 384)) { @@ -514,7 +539,7 @@ void EmuThread::ProcessEvents() { Touching = true; NDS::PressKey(16+6); - + int mx, my; SDL_GetGlobalMouseState(&mx, &my); txoffset = mx - evt.button.x; @@ -529,6 +554,7 @@ void EmuThread::ProcessEvents() if (evt.key.keysym.scancode == Config::KeyMapping[i]) NDS::PressKey(i); if (evt.key.keysym.scancode == Config::KeyMapping[10]) NDS::PressKey(16); if (evt.key.keysym.scancode == Config::KeyMapping[11]) NDS::PressKey(17); + if (evt.key.keysym.scancode == SDL_SCANCODE_F12) NDS::debug(0); break; case SDL_KEYUP: diff --git a/src/wx/main.h b/src/wx/main.h index 851a061..0219ff7 100644 --- a/src/wx/main.h +++ b/src/wx/main.h @@ -46,7 +46,7 @@ class wxApp_melonDS : public wxApp public: virtual bool OnInit(); virtual int OnExit(); - + EmuThread* emuthread; }; @@ -91,7 +91,7 @@ public: bool EmuIsRunning() { return (emustatus == 1) || (emustatus == 2); } bool EmuIsPaused() { return (emustatus == 2) && emupaused; } - + MainFrame* parent; protected: @@ -105,6 +105,8 @@ protected: SDL_Rect topsrc, topdst; SDL_Rect botsrc, botdst; + SDL_AudioDeviceID audio; + bool Touching; int txoffset, tyoffset; |